diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..ba06928ffb75663687453978c525421331e92339 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-8500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-8600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-8680/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..41d3e94dd696d33a4d8c7c5a1bd1a93787cbe885 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +library_name: peft +license: other +base_model: Qwen/Qwen2.5-VL-7B-Instruct +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +pipeline_tag: text-generation +model-index: +- name: Qwen2.5-VL-7B-diversifier + results: [] +--- + + + +# Qwen2.5-VL-7B-diversifier + +This model is a fine-tuned version of [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) on the pc-agent-e-diversifier-sliding_window dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-06 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- gradient_accumulation_steps: 2 +- total_train_batch_size: 4 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 0.05 +- num_epochs: 2 + +### Training results + + + +### Framework versions + +- PEFT 0.18.1 +- Transformers 5.2.0 +- Pytorch 2.6.0+cu126 +- Datasets 4.0.0 +- Tokenizers 0.22.2 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..07855d838b18d52ab3ab7a1ec1a852f57cf14fd8 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.0.mlp.down_proj", + "v_proj", + "layers.10.mlp.gate_proj", + "layers.1.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.3.mlp.gate_proj", + "layers.5.mlp.down_proj", + "layers.8.mlp.up_proj", + "layers.4.mlp.down_proj", + "layers.1.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.25.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.23.mlp.down_proj", + "layers.15.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.26.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.9.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.12.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.16.mlp.gate_proj", + "layers.25.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.0.mlp.up_proj", + "layers.15.mlp.gate_proj", + "layers.9.mlp.gate_proj", + "layers.22.mlp.gate_proj", + "layers.24.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.14.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "k_proj", + "layers.27.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.19.mlp.up_proj", + "q_proj", + "layers.17.mlp.gate_proj", + "layers.0.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.27.mlp.up_proj", + "layers.2.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.6.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.10.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.2.mlp.up_proj", + "o_proj", + "layers.16.mlp.down_proj", + "layers.4.mlp.gate_proj", + "layers.20.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.22.mlp.up_proj", + "layers.11.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.23.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.17.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.6.mlp.gate_proj", + "layers.16.mlp.up_proj", + "layers.18.mlp.up_proj", + "layers.7.mlp.gate_proj", + "layers.8.mlp.down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98f120b5eede66c5f031ca2bc6952df22cd29d41 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80f97cfb7cba38b2ff84ae927993a7eb6fcc670bc1c05a488c870cc11232bc01 +size 40428088 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..874b9fe8fde883c45d6df6e99e75cc964841790a --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 7249753014763520.0, + "train_loss": 0.8227015781100444, + "train_runtime": 44224.0669, + "train_samples_per_second": 0.785, + "train_steps_per_second": 0.196 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-8500/README.md b/checkpoint-8500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-8500/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-8500/adapter_config.json b/checkpoint-8500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..07855d838b18d52ab3ab7a1ec1a852f57cf14fd8 --- /dev/null +++ b/checkpoint-8500/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.0.mlp.down_proj", + "v_proj", + "layers.10.mlp.gate_proj", + "layers.1.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.3.mlp.gate_proj", + "layers.5.mlp.down_proj", + "layers.8.mlp.up_proj", + "layers.4.mlp.down_proj", + "layers.1.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.25.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.23.mlp.down_proj", + "layers.15.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.26.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.9.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.12.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.16.mlp.gate_proj", + "layers.25.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.0.mlp.up_proj", + "layers.15.mlp.gate_proj", + "layers.9.mlp.gate_proj", + "layers.22.mlp.gate_proj", + "layers.24.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.14.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "k_proj", + "layers.27.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.19.mlp.up_proj", + "q_proj", + "layers.17.mlp.gate_proj", + "layers.0.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.27.mlp.up_proj", + "layers.2.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.6.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.10.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.2.mlp.up_proj", + "o_proj", + "layers.16.mlp.down_proj", + "layers.4.mlp.gate_proj", + "layers.20.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.22.mlp.up_proj", + "layers.11.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.23.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.17.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.6.mlp.gate_proj", + "layers.16.mlp.up_proj", + "layers.18.mlp.up_proj", + "layers.7.mlp.gate_proj", + "layers.8.mlp.down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-8500/adapter_model.safetensors b/checkpoint-8500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69979fb11257fed0c018b48da8953c53c88de110 --- /dev/null +++ b/checkpoint-8500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c363e49fcc4a53c19d70099e81fcbe69a8677a8e8b493609c71864cccd90fb1 +size 40428088 diff --git a/checkpoint-8500/chat_template.jinja b/checkpoint-8500/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-8500/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-8500/global_step8500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-8500/global_step8500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6df5cec3b8ebbdb067872223683aac64d50bebe --- /dev/null +++ b/checkpoint-8500/global_step8500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4683a8628de90ca0fc4cfec5e406fa7f5b880531cd0fa98ff6c183e55e4dc70d +size 242224880 diff --git a/checkpoint-8500/global_step8500/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-8500/global_step8500/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f2e65d015c5b20981505ca45f3436f33368caea --- /dev/null +++ b/checkpoint-8500/global_step8500/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8974a75b5d07f3ed0ce487bb7f3e8b36379d743de40566f2b8e7a6c8e0d1c168 +size 460630 diff --git a/checkpoint-8500/latest b/checkpoint-8500/latest new file mode 100644 index 0000000000000000000000000000000000000000..a450348b0838b4468f619777572a0bf08a822fa9 --- /dev/null +++ b/checkpoint-8500/latest @@ -0,0 +1 @@ +global_step8500 \ No newline at end of file diff --git a/checkpoint-8500/processor_config.json b/checkpoint-8500/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-8500/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-8500/rng_state.pth b/checkpoint-8500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fcf11c9b78de2c2c55fdfc44daef09cd9181c14 --- /dev/null +++ b/checkpoint-8500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc398a73e46bca50defc25b4467441315246a33383a5d6c80985d238e57127f +size 14244 diff --git a/checkpoint-8500/scheduler.pt b/checkpoint-8500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d28a98a53e644b2b665ad314b262386e918159a7 --- /dev/null +++ b/checkpoint-8500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad75a7044b15f88fe4e09f96b6e1aa63c9e76f67718ab86836b0a2f0f3ca74e8 +size 1000 diff --git a/checkpoint-8500/tokenizer.json b/checkpoint-8500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-8500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-8500/tokenizer_config.json b/checkpoint-8500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-8500/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-8500/trainer_state.json b/checkpoint-8500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b32a754cf1398d9f118c2dbf46eacdea65491027 --- /dev/null +++ b/checkpoint-8500/trainer_state.json @@ -0,0 +1,59534 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9585253456221197, + "eval_steps": 500, + "global_step": 8500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002304147465437788, + "grad_norm": 0.3584135221139379, + "learning_rate": 0.0, + "loss": 1.1575632095336914, + "step": 1 + }, + { + "epoch": 0.0004608294930875576, + "grad_norm": 0.3035367055626511, + "learning_rate": 4.6082949308755755e-09, + "loss": 0.9973502159118652, + "step": 2 + }, + { + "epoch": 0.0006912442396313364, + "grad_norm": 0.39685233086299543, + "learning_rate": 9.216589861751151e-09, + "loss": 1.0778999328613281, + "step": 3 + }, + { + "epoch": 0.0009216589861751152, + "grad_norm": 0.4029042979509503, + "learning_rate": 1.3824884792626728e-08, + "loss": 1.1912263631820679, + "step": 4 + }, + { + "epoch": 0.001152073732718894, + "grad_norm": 0.3943812894307851, + "learning_rate": 1.8433179723502302e-08, + "loss": 1.136031150817871, + "step": 5 + }, + { + "epoch": 0.0013824884792626728, + "grad_norm": 0.472718552613566, + "learning_rate": 2.304147465437788e-08, + "loss": 1.1647956371307373, + "step": 6 + }, + { + "epoch": 0.0016129032258064516, + "grad_norm": 0.4378363913681294, + "learning_rate": 2.7649769585253456e-08, + "loss": 1.144924283027649, + "step": 7 + }, + { + "epoch": 0.0018433179723502304, + "grad_norm": 0.412264706125121, + "learning_rate": 3.225806451612903e-08, + "loss": 1.1821019649505615, + "step": 8 + }, + { + "epoch": 0.0020737327188940094, + "grad_norm": 0.35864626774735575, + "learning_rate": 3.6866359447004604e-08, + "loss": 1.0586045980453491, + "step": 9 + }, + { + "epoch": 0.002304147465437788, + "grad_norm": 0.497058147699291, + "learning_rate": 4.1474654377880186e-08, + "loss": 1.2029818296432495, + "step": 10 + }, + { + "epoch": 0.002534562211981567, + "grad_norm": 0.465265464928516, + "learning_rate": 4.608294930875576e-08, + "loss": 1.1411634683609009, + "step": 11 + }, + { + "epoch": 0.0027649769585253456, + "grad_norm": 0.4356529753705429, + "learning_rate": 5.069124423963134e-08, + "loss": 1.2719087600708008, + "step": 12 + }, + { + "epoch": 0.0029953917050691246, + "grad_norm": 0.4469831586732583, + "learning_rate": 5.529953917050691e-08, + "loss": 1.1132495403289795, + "step": 13 + }, + { + "epoch": 0.0032258064516129032, + "grad_norm": 0.3918942421249174, + "learning_rate": 5.990783410138249e-08, + "loss": 1.1900808811187744, + "step": 14 + }, + { + "epoch": 0.0034562211981566822, + "grad_norm": 0.33446734054876004, + "learning_rate": 6.451612903225806e-08, + "loss": 1.2273608446121216, + "step": 15 + }, + { + "epoch": 0.003686635944700461, + "grad_norm": 0.4610551419026991, + "learning_rate": 6.912442396313364e-08, + "loss": 1.2130601406097412, + "step": 16 + }, + { + "epoch": 0.00391705069124424, + "grad_norm": 0.4765520188128542, + "learning_rate": 7.373271889400921e-08, + "loss": 1.0534124374389648, + "step": 17 + }, + { + "epoch": 0.004147465437788019, + "grad_norm": 0.4247458361448018, + "learning_rate": 7.834101382488478e-08, + "loss": 1.1796221733093262, + "step": 18 + }, + { + "epoch": 0.004377880184331797, + "grad_norm": 0.42651087679972033, + "learning_rate": 8.294930875576037e-08, + "loss": 1.118175745010376, + "step": 19 + }, + { + "epoch": 0.004608294930875576, + "grad_norm": 0.37538111415149067, + "learning_rate": 8.755760368663594e-08, + "loss": 1.140963077545166, + "step": 20 + }, + { + "epoch": 0.004838709677419355, + "grad_norm": 0.39373769301837386, + "learning_rate": 9.216589861751152e-08, + "loss": 1.107339859008789, + "step": 21 + }, + { + "epoch": 0.005069124423963134, + "grad_norm": 0.5053900590341595, + "learning_rate": 9.677419354838709e-08, + "loss": 1.171803593635559, + "step": 22 + }, + { + "epoch": 0.005299539170506912, + "grad_norm": 0.32897537004851696, + "learning_rate": 1.0138248847926267e-07, + "loss": 0.9935251474380493, + "step": 23 + }, + { + "epoch": 0.005529953917050691, + "grad_norm": 0.4355535799950001, + "learning_rate": 1.0599078341013824e-07, + "loss": 1.0870952606201172, + "step": 24 + }, + { + "epoch": 0.00576036866359447, + "grad_norm": 0.5215895570336967, + "learning_rate": 1.1059907834101383e-07, + "loss": 1.1520278453826904, + "step": 25 + }, + { + "epoch": 0.005990783410138249, + "grad_norm": 0.4878994851998504, + "learning_rate": 1.152073732718894e-07, + "loss": 1.3603750467300415, + "step": 26 + }, + { + "epoch": 0.006221198156682027, + "grad_norm": 0.3985371704289713, + "learning_rate": 1.1981566820276498e-07, + "loss": 1.230550765991211, + "step": 27 + }, + { + "epoch": 0.0064516129032258064, + "grad_norm": 0.4105556408349015, + "learning_rate": 1.2442396313364054e-07, + "loss": 1.267604112625122, + "step": 28 + }, + { + "epoch": 0.0066820276497695855, + "grad_norm": 0.3604672745500653, + "learning_rate": 1.2903225806451611e-07, + "loss": 1.344348669052124, + "step": 29 + }, + { + "epoch": 0.0069124423963133645, + "grad_norm": 0.42234881975895605, + "learning_rate": 1.336405529953917e-07, + "loss": 1.2794291973114014, + "step": 30 + }, + { + "epoch": 0.007142857142857143, + "grad_norm": 0.39749887698930225, + "learning_rate": 1.3824884792626728e-07, + "loss": 1.2841103076934814, + "step": 31 + }, + { + "epoch": 0.007373271889400922, + "grad_norm": 0.34204310388035036, + "learning_rate": 1.4285714285714285e-07, + "loss": 1.1505224704742432, + "step": 32 + }, + { + "epoch": 0.007603686635944701, + "grad_norm": 0.36676388907062357, + "learning_rate": 1.4746543778801842e-07, + "loss": 0.9800833463668823, + "step": 33 + }, + { + "epoch": 0.00783410138248848, + "grad_norm": 0.4216809539302965, + "learning_rate": 1.52073732718894e-07, + "loss": 1.3712589740753174, + "step": 34 + }, + { + "epoch": 0.008064516129032258, + "grad_norm": 0.46644559931224167, + "learning_rate": 1.5668202764976955e-07, + "loss": 1.2274689674377441, + "step": 35 + }, + { + "epoch": 0.008294930875576038, + "grad_norm": 0.41359150478695417, + "learning_rate": 1.6129032258064515e-07, + "loss": 1.0673755407333374, + "step": 36 + }, + { + "epoch": 0.008525345622119816, + "grad_norm": 0.534062363030203, + "learning_rate": 1.6589861751152074e-07, + "loss": 1.242164134979248, + "step": 37 + }, + { + "epoch": 0.008755760368663594, + "grad_norm": 0.48756247774131056, + "learning_rate": 1.705069124423963e-07, + "loss": 1.190554141998291, + "step": 38 + }, + { + "epoch": 0.008986175115207374, + "grad_norm": 0.35848282094721656, + "learning_rate": 1.7511520737327188e-07, + "loss": 1.3119773864746094, + "step": 39 + }, + { + "epoch": 0.009216589861751152, + "grad_norm": 0.4466769921356875, + "learning_rate": 1.7972350230414745e-07, + "loss": 1.2532517910003662, + "step": 40 + }, + { + "epoch": 0.00944700460829493, + "grad_norm": 0.4271763580587928, + "learning_rate": 1.8433179723502305e-07, + "loss": 1.307154655456543, + "step": 41 + }, + { + "epoch": 0.00967741935483871, + "grad_norm": 0.432221455567464, + "learning_rate": 1.889400921658986e-07, + "loss": 1.1899281740188599, + "step": 42 + }, + { + "epoch": 0.009907834101382488, + "grad_norm": 0.48501644393966153, + "learning_rate": 1.9354838709677418e-07, + "loss": 1.1928249597549438, + "step": 43 + }, + { + "epoch": 0.010138248847926268, + "grad_norm": 0.35170632131851265, + "learning_rate": 1.9815668202764975e-07, + "loss": 1.1663157939910889, + "step": 44 + }, + { + "epoch": 0.010368663594470046, + "grad_norm": 0.43449129429745276, + "learning_rate": 2.0276497695852535e-07, + "loss": 1.1806118488311768, + "step": 45 + }, + { + "epoch": 0.010599078341013824, + "grad_norm": 0.39933118678172597, + "learning_rate": 2.073732718894009e-07, + "loss": 1.1704952716827393, + "step": 46 + }, + { + "epoch": 0.010829493087557604, + "grad_norm": 0.46071358975984034, + "learning_rate": 2.1198156682027649e-07, + "loss": 1.2124149799346924, + "step": 47 + }, + { + "epoch": 0.011059907834101382, + "grad_norm": 0.325920139351066, + "learning_rate": 2.1658986175115208e-07, + "loss": 1.041813850402832, + "step": 48 + }, + { + "epoch": 0.01129032258064516, + "grad_norm": 0.4189805583015969, + "learning_rate": 2.2119815668202765e-07, + "loss": 1.255402684211731, + "step": 49 + }, + { + "epoch": 0.01152073732718894, + "grad_norm": 0.369986826532368, + "learning_rate": 2.2580645161290322e-07, + "loss": 1.1115221977233887, + "step": 50 + }, + { + "epoch": 0.011751152073732719, + "grad_norm": 0.501835295036206, + "learning_rate": 2.304147465437788e-07, + "loss": 1.4048426151275635, + "step": 51 + }, + { + "epoch": 0.011981566820276499, + "grad_norm": 0.38759638044019523, + "learning_rate": 2.3502304147465438e-07, + "loss": 1.1690936088562012, + "step": 52 + }, + { + "epoch": 0.012211981566820277, + "grad_norm": 0.43771993971927803, + "learning_rate": 2.3963133640552995e-07, + "loss": 1.164888620376587, + "step": 53 + }, + { + "epoch": 0.012442396313364055, + "grad_norm": 0.5047093250847474, + "learning_rate": 2.442396313364055e-07, + "loss": 1.004424810409546, + "step": 54 + }, + { + "epoch": 0.012672811059907835, + "grad_norm": 0.371768250028493, + "learning_rate": 2.488479262672811e-07, + "loss": 0.8810856342315674, + "step": 55 + }, + { + "epoch": 0.012903225806451613, + "grad_norm": 0.41437582347111235, + "learning_rate": 2.534562211981567e-07, + "loss": 1.300262451171875, + "step": 56 + }, + { + "epoch": 0.013133640552995391, + "grad_norm": 0.44923919860912964, + "learning_rate": 2.5806451612903223e-07, + "loss": 1.3624285459518433, + "step": 57 + }, + { + "epoch": 0.013364055299539171, + "grad_norm": 0.37916325568511644, + "learning_rate": 2.6267281105990777e-07, + "loss": 1.2133375406265259, + "step": 58 + }, + { + "epoch": 0.013594470046082949, + "grad_norm": 0.3665676434937369, + "learning_rate": 2.672811059907834e-07, + "loss": 1.2203283309936523, + "step": 59 + }, + { + "epoch": 0.013824884792626729, + "grad_norm": 0.4314731168039537, + "learning_rate": 2.7188940092165896e-07, + "loss": 1.291412353515625, + "step": 60 + }, + { + "epoch": 0.014055299539170507, + "grad_norm": 0.46787898249820037, + "learning_rate": 2.7649769585253456e-07, + "loss": 1.1596577167510986, + "step": 61 + }, + { + "epoch": 0.014285714285714285, + "grad_norm": 0.34850075759056304, + "learning_rate": 2.8110599078341015e-07, + "loss": 0.9789823889732361, + "step": 62 + }, + { + "epoch": 0.014516129032258065, + "grad_norm": 0.46810420323672, + "learning_rate": 2.857142857142857e-07, + "loss": 1.220383882522583, + "step": 63 + }, + { + "epoch": 0.014746543778801843, + "grad_norm": 0.36577992953429955, + "learning_rate": 2.903225806451613e-07, + "loss": 1.0961871147155762, + "step": 64 + }, + { + "epoch": 0.014976958525345621, + "grad_norm": 0.4155727286496237, + "learning_rate": 2.9493087557603683e-07, + "loss": 1.2281936407089233, + "step": 65 + }, + { + "epoch": 0.015207373271889401, + "grad_norm": 0.48770399467414544, + "learning_rate": 2.9953917050691243e-07, + "loss": 1.279728889465332, + "step": 66 + }, + { + "epoch": 0.01543778801843318, + "grad_norm": 0.3697109399388579, + "learning_rate": 3.04147465437788e-07, + "loss": 1.0932798385620117, + "step": 67 + }, + { + "epoch": 0.01566820276497696, + "grad_norm": 0.4768828309013543, + "learning_rate": 3.0875576036866356e-07, + "loss": 1.1612955331802368, + "step": 68 + }, + { + "epoch": 0.015898617511520736, + "grad_norm": 0.335260500319883, + "learning_rate": 3.133640552995391e-07, + "loss": 1.193152666091919, + "step": 69 + }, + { + "epoch": 0.016129032258064516, + "grad_norm": 0.3754577001974335, + "learning_rate": 3.1797235023041476e-07, + "loss": 1.3303695917129517, + "step": 70 + }, + { + "epoch": 0.016359447004608296, + "grad_norm": 0.5384978005623245, + "learning_rate": 3.225806451612903e-07, + "loss": 1.3735731840133667, + "step": 71 + }, + { + "epoch": 0.016589861751152075, + "grad_norm": 0.44147085813841874, + "learning_rate": 3.271889400921659e-07, + "loss": 1.162925124168396, + "step": 72 + }, + { + "epoch": 0.016820276497695852, + "grad_norm": 0.46260262466297236, + "learning_rate": 3.317972350230415e-07, + "loss": 1.3879203796386719, + "step": 73 + }, + { + "epoch": 0.017050691244239632, + "grad_norm": 0.33864035083037825, + "learning_rate": 3.3640552995391703e-07, + "loss": 1.2721638679504395, + "step": 74 + }, + { + "epoch": 0.01728110599078341, + "grad_norm": 0.5797449954735189, + "learning_rate": 3.410138248847926e-07, + "loss": 1.3997783660888672, + "step": 75 + }, + { + "epoch": 0.017511520737327188, + "grad_norm": 0.3824734589731608, + "learning_rate": 3.4562211981566817e-07, + "loss": 1.1099059581756592, + "step": 76 + }, + { + "epoch": 0.017741935483870968, + "grad_norm": 0.6286343528066216, + "learning_rate": 3.5023041474654376e-07, + "loss": 1.341759204864502, + "step": 77 + }, + { + "epoch": 0.017972350230414748, + "grad_norm": 0.41058458963409694, + "learning_rate": 3.5483870967741936e-07, + "loss": 1.343479871749878, + "step": 78 + }, + { + "epoch": 0.018202764976958524, + "grad_norm": 0.41653629518149576, + "learning_rate": 3.594470046082949e-07, + "loss": 1.2225772142410278, + "step": 79 + }, + { + "epoch": 0.018433179723502304, + "grad_norm": 0.37871730557010347, + "learning_rate": 3.6405529953917044e-07, + "loss": 1.1934573650360107, + "step": 80 + }, + { + "epoch": 0.018663594470046084, + "grad_norm": 0.36930989407616927, + "learning_rate": 3.686635944700461e-07, + "loss": 1.099440336227417, + "step": 81 + }, + { + "epoch": 0.01889400921658986, + "grad_norm": 0.4445938548359885, + "learning_rate": 3.7327188940092163e-07, + "loss": 1.0864269733428955, + "step": 82 + }, + { + "epoch": 0.01912442396313364, + "grad_norm": 0.4183127094774659, + "learning_rate": 3.778801843317972e-07, + "loss": 1.0706703662872314, + "step": 83 + }, + { + "epoch": 0.01935483870967742, + "grad_norm": 0.3377183372891763, + "learning_rate": 3.824884792626728e-07, + "loss": 1.1675662994384766, + "step": 84 + }, + { + "epoch": 0.019585253456221197, + "grad_norm": 0.4219766455348787, + "learning_rate": 3.8709677419354837e-07, + "loss": 1.3294553756713867, + "step": 85 + }, + { + "epoch": 0.019815668202764977, + "grad_norm": 0.39357768126078463, + "learning_rate": 3.9170506912442396e-07, + "loss": 1.050878882408142, + "step": 86 + }, + { + "epoch": 0.020046082949308756, + "grad_norm": 0.5263429396452582, + "learning_rate": 3.963133640552995e-07, + "loss": 1.3243739604949951, + "step": 87 + }, + { + "epoch": 0.020276497695852536, + "grad_norm": 0.4373425676890139, + "learning_rate": 4.009216589861751e-07, + "loss": 1.1350429058074951, + "step": 88 + }, + { + "epoch": 0.020506912442396313, + "grad_norm": 0.39555461421299365, + "learning_rate": 4.055299539170507e-07, + "loss": 1.24526047706604, + "step": 89 + }, + { + "epoch": 0.020737327188940093, + "grad_norm": 0.5372699223271491, + "learning_rate": 4.1013824884792624e-07, + "loss": 1.3459908962249756, + "step": 90 + }, + { + "epoch": 0.020967741935483872, + "grad_norm": 0.45711998906450413, + "learning_rate": 4.147465437788018e-07, + "loss": 1.2129223346710205, + "step": 91 + }, + { + "epoch": 0.02119815668202765, + "grad_norm": 0.396171288478396, + "learning_rate": 4.1935483870967743e-07, + "loss": 1.0522969961166382, + "step": 92 + }, + { + "epoch": 0.02142857142857143, + "grad_norm": 0.4102245507283394, + "learning_rate": 4.2396313364055297e-07, + "loss": 1.3128937482833862, + "step": 93 + }, + { + "epoch": 0.02165898617511521, + "grad_norm": 0.4498995421630644, + "learning_rate": 4.285714285714285e-07, + "loss": 1.3582855463027954, + "step": 94 + }, + { + "epoch": 0.021889400921658985, + "grad_norm": 0.401280081593378, + "learning_rate": 4.3317972350230416e-07, + "loss": 1.3959028720855713, + "step": 95 + }, + { + "epoch": 0.022119815668202765, + "grad_norm": 0.34811166324547105, + "learning_rate": 4.377880184331797e-07, + "loss": 1.149501085281372, + "step": 96 + }, + { + "epoch": 0.022350230414746545, + "grad_norm": 0.48133121679013907, + "learning_rate": 4.423963133640553e-07, + "loss": 1.024135708808899, + "step": 97 + }, + { + "epoch": 0.02258064516129032, + "grad_norm": 0.42298775317954185, + "learning_rate": 4.4700460829493084e-07, + "loss": 0.9255483150482178, + "step": 98 + }, + { + "epoch": 0.0228110599078341, + "grad_norm": 0.4429779269301727, + "learning_rate": 4.5161290322580644e-07, + "loss": 1.1694722175598145, + "step": 99 + }, + { + "epoch": 0.02304147465437788, + "grad_norm": 0.5257102588195529, + "learning_rate": 4.5622119815668203e-07, + "loss": 1.1588457822799683, + "step": 100 + }, + { + "epoch": 0.023271889400921657, + "grad_norm": 0.37478821324150746, + "learning_rate": 4.608294930875576e-07, + "loss": 1.172672986984253, + "step": 101 + }, + { + "epoch": 0.023502304147465437, + "grad_norm": 0.5416446977134604, + "learning_rate": 4.654377880184331e-07, + "loss": 1.092405915260315, + "step": 102 + }, + { + "epoch": 0.023732718894009217, + "grad_norm": 0.40304171727239163, + "learning_rate": 4.7004608294930877e-07, + "loss": 1.11540687084198, + "step": 103 + }, + { + "epoch": 0.023963133640552997, + "grad_norm": 0.46185115643683655, + "learning_rate": 4.746543778801843e-07, + "loss": 1.1380189657211304, + "step": 104 + }, + { + "epoch": 0.024193548387096774, + "grad_norm": 0.4705857339336588, + "learning_rate": 4.792626728110599e-07, + "loss": 1.1031086444854736, + "step": 105 + }, + { + "epoch": 0.024423963133640553, + "grad_norm": 0.38094574356569405, + "learning_rate": 4.838709677419355e-07, + "loss": 1.1988024711608887, + "step": 106 + }, + { + "epoch": 0.024654377880184333, + "grad_norm": 0.48794686062473364, + "learning_rate": 4.88479262672811e-07, + "loss": 1.0814614295959473, + "step": 107 + }, + { + "epoch": 0.02488479262672811, + "grad_norm": 0.41304010922593737, + "learning_rate": 4.930875576036866e-07, + "loss": 1.0541695356369019, + "step": 108 + }, + { + "epoch": 0.02511520737327189, + "grad_norm": 0.4262047073398665, + "learning_rate": 4.976958525345622e-07, + "loss": 1.2281692028045654, + "step": 109 + }, + { + "epoch": 0.02534562211981567, + "grad_norm": 0.4617413170072456, + "learning_rate": 5.023041474654378e-07, + "loss": 1.2542369365692139, + "step": 110 + }, + { + "epoch": 0.025576036866359446, + "grad_norm": 0.46571699511286535, + "learning_rate": 5.069124423963134e-07, + "loss": 1.36039137840271, + "step": 111 + }, + { + "epoch": 0.025806451612903226, + "grad_norm": 0.3893860976585314, + "learning_rate": 5.11520737327189e-07, + "loss": 1.1092976331710815, + "step": 112 + }, + { + "epoch": 0.026036866359447006, + "grad_norm": 0.4636216593448083, + "learning_rate": 5.161290322580645e-07, + "loss": 1.0634076595306396, + "step": 113 + }, + { + "epoch": 0.026267281105990782, + "grad_norm": 0.3440530135190564, + "learning_rate": 5.2073732718894e-07, + "loss": 1.0024809837341309, + "step": 114 + }, + { + "epoch": 0.026497695852534562, + "grad_norm": 0.4346835070660911, + "learning_rate": 5.253456221198155e-07, + "loss": 1.1691724061965942, + "step": 115 + }, + { + "epoch": 0.026728110599078342, + "grad_norm": 0.46992230717269323, + "learning_rate": 5.299539170506912e-07, + "loss": 1.2053219079971313, + "step": 116 + }, + { + "epoch": 0.02695852534562212, + "grad_norm": 0.3668719861525143, + "learning_rate": 5.345622119815668e-07, + "loss": 1.119420051574707, + "step": 117 + }, + { + "epoch": 0.027188940092165898, + "grad_norm": 0.44063509410116297, + "learning_rate": 5.391705069124423e-07, + "loss": 1.1640167236328125, + "step": 118 + }, + { + "epoch": 0.027419354838709678, + "grad_norm": 0.41158620514350025, + "learning_rate": 5.437788018433179e-07, + "loss": 1.180116057395935, + "step": 119 + }, + { + "epoch": 0.027649769585253458, + "grad_norm": 0.4684655855415561, + "learning_rate": 5.483870967741935e-07, + "loss": 1.0726159811019897, + "step": 120 + }, + { + "epoch": 0.027880184331797234, + "grad_norm": 0.44443528947779826, + "learning_rate": 5.529953917050691e-07, + "loss": 1.03219473361969, + "step": 121 + }, + { + "epoch": 0.028110599078341014, + "grad_norm": 0.4615930748718386, + "learning_rate": 5.576036866359447e-07, + "loss": 1.1545735597610474, + "step": 122 + }, + { + "epoch": 0.028341013824884794, + "grad_norm": 0.4154044637047318, + "learning_rate": 5.622119815668203e-07, + "loss": 1.2409746646881104, + "step": 123 + }, + { + "epoch": 0.02857142857142857, + "grad_norm": 0.48642203067509454, + "learning_rate": 5.668202764976958e-07, + "loss": 1.2717409133911133, + "step": 124 + }, + { + "epoch": 0.02880184331797235, + "grad_norm": 0.5633308049530943, + "learning_rate": 5.714285714285714e-07, + "loss": 1.523846983909607, + "step": 125 + }, + { + "epoch": 0.02903225806451613, + "grad_norm": 0.47068700261388136, + "learning_rate": 5.760368663594469e-07, + "loss": 1.3386890888214111, + "step": 126 + }, + { + "epoch": 0.029262672811059907, + "grad_norm": 0.5199142981609907, + "learning_rate": 5.806451612903226e-07, + "loss": 1.3080404996871948, + "step": 127 + }, + { + "epoch": 0.029493087557603687, + "grad_norm": 0.530224330517059, + "learning_rate": 5.852534562211982e-07, + "loss": 1.3194537162780762, + "step": 128 + }, + { + "epoch": 0.029723502304147466, + "grad_norm": 0.49119251759787413, + "learning_rate": 5.898617511520737e-07, + "loss": 1.0546228885650635, + "step": 129 + }, + { + "epoch": 0.029953917050691243, + "grad_norm": 0.44238233872112126, + "learning_rate": 5.944700460829493e-07, + "loss": 1.3160395622253418, + "step": 130 + }, + { + "epoch": 0.030184331797235023, + "grad_norm": 0.5551864793339897, + "learning_rate": 5.990783410138249e-07, + "loss": 1.3497555255889893, + "step": 131 + }, + { + "epoch": 0.030414746543778803, + "grad_norm": 0.41383181378393813, + "learning_rate": 6.036866359447004e-07, + "loss": 1.0863350629806519, + "step": 132 + }, + { + "epoch": 0.03064516129032258, + "grad_norm": 0.4913368059485873, + "learning_rate": 6.08294930875576e-07, + "loss": 1.1640913486480713, + "step": 133 + }, + { + "epoch": 0.03087557603686636, + "grad_norm": 0.4309615007654084, + "learning_rate": 6.129032258064516e-07, + "loss": 1.398510217666626, + "step": 134 + }, + { + "epoch": 0.03110599078341014, + "grad_norm": 0.46249423735581563, + "learning_rate": 6.175115207373271e-07, + "loss": 1.3015594482421875, + "step": 135 + }, + { + "epoch": 0.03133640552995392, + "grad_norm": 0.5511951371835903, + "learning_rate": 6.221198156682027e-07, + "loss": 1.2786016464233398, + "step": 136 + }, + { + "epoch": 0.031566820276497695, + "grad_norm": 0.35056112177409643, + "learning_rate": 6.267281105990782e-07, + "loss": 1.0863161087036133, + "step": 137 + }, + { + "epoch": 0.03179723502304147, + "grad_norm": 0.49469780540978775, + "learning_rate": 6.313364055299539e-07, + "loss": 1.1590030193328857, + "step": 138 + }, + { + "epoch": 0.032027649769585255, + "grad_norm": 0.4498097850802204, + "learning_rate": 6.359447004608295e-07, + "loss": 1.2473185062408447, + "step": 139 + }, + { + "epoch": 0.03225806451612903, + "grad_norm": 0.46996183926649465, + "learning_rate": 6.40552995391705e-07, + "loss": 1.1982496976852417, + "step": 140 + }, + { + "epoch": 0.03248847926267281, + "grad_norm": 0.39627654459475076, + "learning_rate": 6.451612903225806e-07, + "loss": 1.078690528869629, + "step": 141 + }, + { + "epoch": 0.03271889400921659, + "grad_norm": 0.4831308537053794, + "learning_rate": 6.497695852534562e-07, + "loss": 1.1540311574935913, + "step": 142 + }, + { + "epoch": 0.03294930875576037, + "grad_norm": 0.4510531995801552, + "learning_rate": 6.543778801843318e-07, + "loss": 1.319035530090332, + "step": 143 + }, + { + "epoch": 0.03317972350230415, + "grad_norm": 0.46683155201608206, + "learning_rate": 6.589861751152074e-07, + "loss": 1.199448585510254, + "step": 144 + }, + { + "epoch": 0.03341013824884793, + "grad_norm": 0.526397133846452, + "learning_rate": 6.63594470046083e-07, + "loss": 1.212646484375, + "step": 145 + }, + { + "epoch": 0.033640552995391704, + "grad_norm": 0.6339080221663279, + "learning_rate": 6.682027649769585e-07, + "loss": 1.2833064794540405, + "step": 146 + }, + { + "epoch": 0.03387096774193549, + "grad_norm": 0.6111094782416204, + "learning_rate": 6.728110599078341e-07, + "loss": 1.2852118015289307, + "step": 147 + }, + { + "epoch": 0.034101382488479264, + "grad_norm": 0.36790627555446376, + "learning_rate": 6.774193548387096e-07, + "loss": 1.0287699699401855, + "step": 148 + }, + { + "epoch": 0.03433179723502304, + "grad_norm": 0.4705970251054534, + "learning_rate": 6.820276497695853e-07, + "loss": 1.2580914497375488, + "step": 149 + }, + { + "epoch": 0.03456221198156682, + "grad_norm": 0.4446865658925291, + "learning_rate": 6.866359447004608e-07, + "loss": 1.0557801723480225, + "step": 150 + }, + { + "epoch": 0.0347926267281106, + "grad_norm": 0.4962737867323335, + "learning_rate": 6.912442396313363e-07, + "loss": 1.1820557117462158, + "step": 151 + }, + { + "epoch": 0.035023041474654376, + "grad_norm": 0.4496579463689646, + "learning_rate": 6.958525345622119e-07, + "loss": 1.2777981758117676, + "step": 152 + }, + { + "epoch": 0.03525345622119816, + "grad_norm": 0.4664315599937052, + "learning_rate": 7.004608294930875e-07, + "loss": 1.1465356349945068, + "step": 153 + }, + { + "epoch": 0.035483870967741936, + "grad_norm": 0.5245233624695497, + "learning_rate": 7.05069124423963e-07, + "loss": 1.3553744554519653, + "step": 154 + }, + { + "epoch": 0.03571428571428571, + "grad_norm": 0.5474513239817841, + "learning_rate": 7.096774193548387e-07, + "loss": 1.176223874092102, + "step": 155 + }, + { + "epoch": 0.035944700460829496, + "grad_norm": 0.4022708922904972, + "learning_rate": 7.142857142857143e-07, + "loss": 1.1771761178970337, + "step": 156 + }, + { + "epoch": 0.03617511520737327, + "grad_norm": 0.5000685120319052, + "learning_rate": 7.188940092165898e-07, + "loss": 1.1598860025405884, + "step": 157 + }, + { + "epoch": 0.03640552995391705, + "grad_norm": 0.4955460688514832, + "learning_rate": 7.235023041474654e-07, + "loss": 1.0689195394515991, + "step": 158 + }, + { + "epoch": 0.03663594470046083, + "grad_norm": 0.5324202700222229, + "learning_rate": 7.281105990783409e-07, + "loss": 1.1444990634918213, + "step": 159 + }, + { + "epoch": 0.03686635944700461, + "grad_norm": 0.441885052912425, + "learning_rate": 7.327188940092166e-07, + "loss": 1.2261321544647217, + "step": 160 + }, + { + "epoch": 0.037096774193548385, + "grad_norm": 0.47946473640002796, + "learning_rate": 7.373271889400922e-07, + "loss": 0.9325876235961914, + "step": 161 + }, + { + "epoch": 0.03732718894009217, + "grad_norm": 0.46688477365444836, + "learning_rate": 7.419354838709677e-07, + "loss": 1.071167230606079, + "step": 162 + }, + { + "epoch": 0.037557603686635944, + "grad_norm": 0.5188018198616766, + "learning_rate": 7.465437788018433e-07, + "loss": 1.1856298446655273, + "step": 163 + }, + { + "epoch": 0.03778801843317972, + "grad_norm": 0.5279511073474723, + "learning_rate": 7.511520737327189e-07, + "loss": 1.13883376121521, + "step": 164 + }, + { + "epoch": 0.038018433179723504, + "grad_norm": 0.4671725091927055, + "learning_rate": 7.557603686635944e-07, + "loss": 1.2896685600280762, + "step": 165 + }, + { + "epoch": 0.03824884792626728, + "grad_norm": 0.6286776240106037, + "learning_rate": 7.603686635944701e-07, + "loss": 1.3122754096984863, + "step": 166 + }, + { + "epoch": 0.03847926267281106, + "grad_norm": 0.5120060171404104, + "learning_rate": 7.649769585253457e-07, + "loss": 1.165675163269043, + "step": 167 + }, + { + "epoch": 0.03870967741935484, + "grad_norm": 0.5132036652169082, + "learning_rate": 7.695852534562211e-07, + "loss": 1.1348214149475098, + "step": 168 + }, + { + "epoch": 0.03894009216589862, + "grad_norm": 0.5816469452243797, + "learning_rate": 7.741935483870967e-07, + "loss": 1.287818431854248, + "step": 169 + }, + { + "epoch": 0.03917050691244239, + "grad_norm": 0.4886112893618036, + "learning_rate": 7.788018433179722e-07, + "loss": 1.0723031759262085, + "step": 170 + }, + { + "epoch": 0.03940092165898618, + "grad_norm": 0.5572220637370465, + "learning_rate": 7.834101382488479e-07, + "loss": 1.29054594039917, + "step": 171 + }, + { + "epoch": 0.03963133640552995, + "grad_norm": 0.4996602061858042, + "learning_rate": 7.880184331797235e-07, + "loss": 1.201147198677063, + "step": 172 + }, + { + "epoch": 0.03986175115207373, + "grad_norm": 0.47488604971715725, + "learning_rate": 7.92626728110599e-07, + "loss": 1.2529574632644653, + "step": 173 + }, + { + "epoch": 0.04009216589861751, + "grad_norm": 0.5420947446150967, + "learning_rate": 7.972350230414746e-07, + "loss": 1.3255105018615723, + "step": 174 + }, + { + "epoch": 0.04032258064516129, + "grad_norm": 0.5367164884336, + "learning_rate": 8.018433179723502e-07, + "loss": 1.3167433738708496, + "step": 175 + }, + { + "epoch": 0.04055299539170507, + "grad_norm": 0.5124027812324866, + "learning_rate": 8.064516129032257e-07, + "loss": 1.4780502319335938, + "step": 176 + }, + { + "epoch": 0.04078341013824885, + "grad_norm": 0.49049200777499574, + "learning_rate": 8.110599078341014e-07, + "loss": 1.3096996545791626, + "step": 177 + }, + { + "epoch": 0.041013824884792625, + "grad_norm": 0.5684690759624818, + "learning_rate": 8.15668202764977e-07, + "loss": 1.3124895095825195, + "step": 178 + }, + { + "epoch": 0.04124423963133641, + "grad_norm": 0.5746940747619091, + "learning_rate": 8.202764976958525e-07, + "loss": 1.2589681148529053, + "step": 179 + }, + { + "epoch": 0.041474654377880185, + "grad_norm": 0.5351550863930432, + "learning_rate": 8.248847926267281e-07, + "loss": 1.0576659440994263, + "step": 180 + }, + { + "epoch": 0.04170506912442396, + "grad_norm": 0.5804930108989373, + "learning_rate": 8.294930875576036e-07, + "loss": 1.2647404670715332, + "step": 181 + }, + { + "epoch": 0.041935483870967745, + "grad_norm": 0.5527713530674592, + "learning_rate": 8.341013824884793e-07, + "loss": 1.072542428970337, + "step": 182 + }, + { + "epoch": 0.04216589861751152, + "grad_norm": 0.636913740412271, + "learning_rate": 8.387096774193549e-07, + "loss": 1.2417643070220947, + "step": 183 + }, + { + "epoch": 0.0423963133640553, + "grad_norm": 0.4636179655744076, + "learning_rate": 8.433179723502303e-07, + "loss": 1.2490241527557373, + "step": 184 + }, + { + "epoch": 0.04262672811059908, + "grad_norm": 0.5714553493227277, + "learning_rate": 8.479262672811059e-07, + "loss": 1.1169328689575195, + "step": 185 + }, + { + "epoch": 0.04285714285714286, + "grad_norm": 0.5893436962226742, + "learning_rate": 8.525345622119815e-07, + "loss": 1.1799774169921875, + "step": 186 + }, + { + "epoch": 0.043087557603686634, + "grad_norm": 0.4840759402042485, + "learning_rate": 8.57142857142857e-07, + "loss": 0.9655753374099731, + "step": 187 + }, + { + "epoch": 0.04331797235023042, + "grad_norm": 0.5473512318665162, + "learning_rate": 8.617511520737327e-07, + "loss": 1.2863562107086182, + "step": 188 + }, + { + "epoch": 0.043548387096774194, + "grad_norm": 0.5971573505450626, + "learning_rate": 8.663594470046083e-07, + "loss": 1.056877613067627, + "step": 189 + }, + { + "epoch": 0.04377880184331797, + "grad_norm": 0.5903656134268881, + "learning_rate": 8.709677419354838e-07, + "loss": 1.2128019332885742, + "step": 190 + }, + { + "epoch": 0.044009216589861753, + "grad_norm": 0.5042165136835149, + "learning_rate": 8.755760368663594e-07, + "loss": 1.1397441625595093, + "step": 191 + }, + { + "epoch": 0.04423963133640553, + "grad_norm": 0.5007324461761941, + "learning_rate": 8.801843317972349e-07, + "loss": 1.062232255935669, + "step": 192 + }, + { + "epoch": 0.044470046082949306, + "grad_norm": 0.5077694656116347, + "learning_rate": 8.847926267281106e-07, + "loss": 1.0102736949920654, + "step": 193 + }, + { + "epoch": 0.04470046082949309, + "grad_norm": 0.5039275409209952, + "learning_rate": 8.894009216589862e-07, + "loss": 1.155517339706421, + "step": 194 + }, + { + "epoch": 0.044930875576036866, + "grad_norm": 0.4568536555143312, + "learning_rate": 8.940092165898617e-07, + "loss": 1.042372703552246, + "step": 195 + }, + { + "epoch": 0.04516129032258064, + "grad_norm": 0.6118356615587064, + "learning_rate": 8.986175115207373e-07, + "loss": 1.1158320903778076, + "step": 196 + }, + { + "epoch": 0.045391705069124426, + "grad_norm": 0.6547758969058546, + "learning_rate": 9.032258064516129e-07, + "loss": 1.4693050384521484, + "step": 197 + }, + { + "epoch": 0.0456221198156682, + "grad_norm": 0.5189200191294998, + "learning_rate": 9.078341013824884e-07, + "loss": 1.0990574359893799, + "step": 198 + }, + { + "epoch": 0.04585253456221198, + "grad_norm": 0.5123720508165549, + "learning_rate": 9.124423963133641e-07, + "loss": 1.0259861946105957, + "step": 199 + }, + { + "epoch": 0.04608294930875576, + "grad_norm": 0.4638504791285932, + "learning_rate": 9.170506912442397e-07, + "loss": 1.2708477973937988, + "step": 200 + }, + { + "epoch": 0.04631336405529954, + "grad_norm": 0.426472351706666, + "learning_rate": 9.216589861751152e-07, + "loss": 1.052978754043579, + "step": 201 + }, + { + "epoch": 0.046543778801843315, + "grad_norm": 0.5548008737632977, + "learning_rate": 9.262672811059907e-07, + "loss": 1.3405938148498535, + "step": 202 + }, + { + "epoch": 0.0467741935483871, + "grad_norm": 0.4311530218247671, + "learning_rate": 9.308755760368662e-07, + "loss": 0.9464558362960815, + "step": 203 + }, + { + "epoch": 0.047004608294930875, + "grad_norm": 0.6377195135282403, + "learning_rate": 9.354838709677418e-07, + "loss": 1.3019077777862549, + "step": 204 + }, + { + "epoch": 0.04723502304147465, + "grad_norm": 0.6029329005096047, + "learning_rate": 9.400921658986175e-07, + "loss": 1.146841049194336, + "step": 205 + }, + { + "epoch": 0.047465437788018434, + "grad_norm": 0.6136536598800337, + "learning_rate": 9.44700460829493e-07, + "loss": 1.106084942817688, + "step": 206 + }, + { + "epoch": 0.04769585253456221, + "grad_norm": 0.6661299934206126, + "learning_rate": 9.493087557603686e-07, + "loss": 1.2930629253387451, + "step": 207 + }, + { + "epoch": 0.047926267281105994, + "grad_norm": 0.5555271013101563, + "learning_rate": 9.539170506912442e-07, + "loss": 1.1637842655181885, + "step": 208 + }, + { + "epoch": 0.04815668202764977, + "grad_norm": 0.444081897230925, + "learning_rate": 9.585253456221198e-07, + "loss": 1.1753308773040771, + "step": 209 + }, + { + "epoch": 0.04838709677419355, + "grad_norm": 0.5362299776231612, + "learning_rate": 9.631336405529954e-07, + "loss": 1.2304046154022217, + "step": 210 + }, + { + "epoch": 0.04861751152073733, + "grad_norm": 0.6898819231347578, + "learning_rate": 9.67741935483871e-07, + "loss": 1.4326789379119873, + "step": 211 + }, + { + "epoch": 0.04884792626728111, + "grad_norm": 0.614044501232848, + "learning_rate": 9.723502304147466e-07, + "loss": 1.0759861469268799, + "step": 212 + }, + { + "epoch": 0.04907834101382488, + "grad_norm": 0.5971609176488232, + "learning_rate": 9.76958525345622e-07, + "loss": 1.1514811515808105, + "step": 213 + }, + { + "epoch": 0.04930875576036867, + "grad_norm": 0.49252816443356506, + "learning_rate": 9.815668202764976e-07, + "loss": 1.1618578433990479, + "step": 214 + }, + { + "epoch": 0.04953917050691244, + "grad_norm": 0.5677669382006955, + "learning_rate": 9.861751152073732e-07, + "loss": 1.0321345329284668, + "step": 215 + }, + { + "epoch": 0.04976958525345622, + "grad_norm": 0.4551655972629908, + "learning_rate": 9.907834101382488e-07, + "loss": 1.0391438007354736, + "step": 216 + }, + { + "epoch": 0.05, + "grad_norm": 0.6188957189455181, + "learning_rate": 9.953917050691244e-07, + "loss": 1.080418586730957, + "step": 217 + }, + { + "epoch": 0.05023041474654378, + "grad_norm": 0.6531841586974683, + "learning_rate": 1e-06, + "loss": 1.2095223665237427, + "step": 218 + }, + { + "epoch": 0.050460829493087556, + "grad_norm": 0.5036313537560552, + "learning_rate": 1.0046082949308756e-06, + "loss": 1.1144485473632812, + "step": 219 + }, + { + "epoch": 0.05069124423963134, + "grad_norm": 0.6466646674884302, + "learning_rate": 1.0092165898617511e-06, + "loss": 1.2560818195343018, + "step": 220 + }, + { + "epoch": 0.050921658986175115, + "grad_norm": 0.586777516357483, + "learning_rate": 1.0138248847926267e-06, + "loss": 1.1043426990509033, + "step": 221 + }, + { + "epoch": 0.05115207373271889, + "grad_norm": 0.41448570454396455, + "learning_rate": 1.0184331797235021e-06, + "loss": 1.0725831985473633, + "step": 222 + }, + { + "epoch": 0.051382488479262675, + "grad_norm": 0.5713867853647446, + "learning_rate": 1.023041474654378e-06, + "loss": 0.9764004349708557, + "step": 223 + }, + { + "epoch": 0.05161290322580645, + "grad_norm": 0.6662412690615445, + "learning_rate": 1.0276497695852535e-06, + "loss": 1.2172776460647583, + "step": 224 + }, + { + "epoch": 0.05184331797235023, + "grad_norm": 0.610800258000843, + "learning_rate": 1.032258064516129e-06, + "loss": 1.1065070629119873, + "step": 225 + }, + { + "epoch": 0.05207373271889401, + "grad_norm": 0.5057724484519791, + "learning_rate": 1.0368663594470047e-06, + "loss": 1.0840628147125244, + "step": 226 + }, + { + "epoch": 0.05230414746543779, + "grad_norm": 0.5250793281243177, + "learning_rate": 1.04147465437788e-06, + "loss": 1.109276294708252, + "step": 227 + }, + { + "epoch": 0.052534562211981564, + "grad_norm": 0.7348582040933043, + "learning_rate": 1.0460829493087557e-06, + "loss": 1.186352252960205, + "step": 228 + }, + { + "epoch": 0.05276497695852535, + "grad_norm": 0.48569306871313883, + "learning_rate": 1.050691244239631e-06, + "loss": 1.1605256795883179, + "step": 229 + }, + { + "epoch": 0.052995391705069124, + "grad_norm": 0.6312799860168967, + "learning_rate": 1.0552995391705069e-06, + "loss": 1.0269646644592285, + "step": 230 + }, + { + "epoch": 0.0532258064516129, + "grad_norm": 0.6446173917231129, + "learning_rate": 1.0599078341013825e-06, + "loss": 0.9595874547958374, + "step": 231 + }, + { + "epoch": 0.053456221198156684, + "grad_norm": 0.6010998567907583, + "learning_rate": 1.0645161290322579e-06, + "loss": 1.1606154441833496, + "step": 232 + }, + { + "epoch": 0.05368663594470046, + "grad_norm": 0.6379425251609956, + "learning_rate": 1.0691244239631337e-06, + "loss": 0.9920428991317749, + "step": 233 + }, + { + "epoch": 0.05391705069124424, + "grad_norm": 0.6346840342097714, + "learning_rate": 1.073732718894009e-06, + "loss": 1.2124650478363037, + "step": 234 + }, + { + "epoch": 0.05414746543778802, + "grad_norm": 0.5761223431136224, + "learning_rate": 1.0783410138248847e-06, + "loss": 1.2237420082092285, + "step": 235 + }, + { + "epoch": 0.054377880184331796, + "grad_norm": 0.5178799666370111, + "learning_rate": 1.0829493087557605e-06, + "loss": 1.1484715938568115, + "step": 236 + }, + { + "epoch": 0.05460829493087557, + "grad_norm": 0.5910590598999479, + "learning_rate": 1.0875576036866358e-06, + "loss": 1.2143291234970093, + "step": 237 + }, + { + "epoch": 0.054838709677419356, + "grad_norm": 0.568116947952991, + "learning_rate": 1.0921658986175114e-06, + "loss": 1.1995420455932617, + "step": 238 + }, + { + "epoch": 0.05506912442396313, + "grad_norm": 0.6128333972066793, + "learning_rate": 1.096774193548387e-06, + "loss": 1.2577292919158936, + "step": 239 + }, + { + "epoch": 0.055299539170506916, + "grad_norm": 0.6177738975799152, + "learning_rate": 1.1013824884792626e-06, + "loss": 1.2170629501342773, + "step": 240 + }, + { + "epoch": 0.05552995391705069, + "grad_norm": 0.3580107479174479, + "learning_rate": 1.1059907834101382e-06, + "loss": 0.8318669199943542, + "step": 241 + }, + { + "epoch": 0.05576036866359447, + "grad_norm": 0.4976235536822315, + "learning_rate": 1.1105990783410138e-06, + "loss": 1.0760166645050049, + "step": 242 + }, + { + "epoch": 0.05599078341013825, + "grad_norm": 0.7197455436310494, + "learning_rate": 1.1152073732718894e-06, + "loss": 1.2437031269073486, + "step": 243 + }, + { + "epoch": 0.05622119815668203, + "grad_norm": 0.5957655407019126, + "learning_rate": 1.1198156682027648e-06, + "loss": 1.1680852174758911, + "step": 244 + }, + { + "epoch": 0.056451612903225805, + "grad_norm": 0.6708075502500678, + "learning_rate": 1.1244239631336406e-06, + "loss": 1.051478385925293, + "step": 245 + }, + { + "epoch": 0.05668202764976959, + "grad_norm": 0.547285271256248, + "learning_rate": 1.1290322580645162e-06, + "loss": 1.1433100700378418, + "step": 246 + }, + { + "epoch": 0.056912442396313365, + "grad_norm": 0.6428413238154085, + "learning_rate": 1.1336405529953916e-06, + "loss": 0.9521546363830566, + "step": 247 + }, + { + "epoch": 0.05714285714285714, + "grad_norm": 0.6790518899839243, + "learning_rate": 1.1382488479262674e-06, + "loss": 1.226189136505127, + "step": 248 + }, + { + "epoch": 0.057373271889400924, + "grad_norm": 0.7178538920010674, + "learning_rate": 1.1428571428571428e-06, + "loss": 1.108027696609497, + "step": 249 + }, + { + "epoch": 0.0576036866359447, + "grad_norm": 0.4608432366288286, + "learning_rate": 1.1474654377880184e-06, + "loss": 1.042288064956665, + "step": 250 + }, + { + "epoch": 0.05783410138248848, + "grad_norm": 0.8171244559521852, + "learning_rate": 1.1520737327188938e-06, + "loss": 1.193603754043579, + "step": 251 + }, + { + "epoch": 0.05806451612903226, + "grad_norm": 0.6766522772283506, + "learning_rate": 1.1566820276497696e-06, + "loss": 1.193584680557251, + "step": 252 + }, + { + "epoch": 0.05829493087557604, + "grad_norm": 0.5714710938556213, + "learning_rate": 1.1612903225806452e-06, + "loss": 1.2318934202194214, + "step": 253 + }, + { + "epoch": 0.05852534562211981, + "grad_norm": 0.6443899979691422, + "learning_rate": 1.1658986175115205e-06, + "loss": 1.1626521348953247, + "step": 254 + }, + { + "epoch": 0.0587557603686636, + "grad_norm": 0.6336855527034527, + "learning_rate": 1.1705069124423963e-06, + "loss": 1.2402286529541016, + "step": 255 + }, + { + "epoch": 0.05898617511520737, + "grad_norm": 0.599628545600123, + "learning_rate": 1.1751152073732717e-06, + "loss": 1.190323829650879, + "step": 256 + }, + { + "epoch": 0.05921658986175115, + "grad_norm": 0.655955321737197, + "learning_rate": 1.1797235023041473e-06, + "loss": 1.121636986732483, + "step": 257 + }, + { + "epoch": 0.05944700460829493, + "grad_norm": 0.5349922437861245, + "learning_rate": 1.1843317972350231e-06, + "loss": 1.099304437637329, + "step": 258 + }, + { + "epoch": 0.05967741935483871, + "grad_norm": 0.5611568770807159, + "learning_rate": 1.1889400921658985e-06, + "loss": 1.1730690002441406, + "step": 259 + }, + { + "epoch": 0.059907834101382486, + "grad_norm": 0.5874751551203973, + "learning_rate": 1.1935483870967741e-06, + "loss": 1.1450574398040771, + "step": 260 + }, + { + "epoch": 0.06013824884792627, + "grad_norm": 0.6634311667010621, + "learning_rate": 1.1981566820276497e-06, + "loss": 1.1435421705245972, + "step": 261 + }, + { + "epoch": 0.060368663594470046, + "grad_norm": 0.6113712565981082, + "learning_rate": 1.2027649769585253e-06, + "loss": 1.2153000831604004, + "step": 262 + }, + { + "epoch": 0.06059907834101382, + "grad_norm": 0.4715675476477507, + "learning_rate": 1.207373271889401e-06, + "loss": 1.0380406379699707, + "step": 263 + }, + { + "epoch": 0.060829493087557605, + "grad_norm": 0.5396758253019809, + "learning_rate": 1.2119815668202765e-06, + "loss": 1.1639207601547241, + "step": 264 + }, + { + "epoch": 0.06105990783410138, + "grad_norm": 0.7193765184254299, + "learning_rate": 1.216589861751152e-06, + "loss": 1.1862819194793701, + "step": 265 + }, + { + "epoch": 0.06129032258064516, + "grad_norm": 0.5621136552568688, + "learning_rate": 1.2211981566820275e-06, + "loss": 1.2122020721435547, + "step": 266 + }, + { + "epoch": 0.06152073732718894, + "grad_norm": 0.506518590231947, + "learning_rate": 1.2258064516129033e-06, + "loss": 1.1201646327972412, + "step": 267 + }, + { + "epoch": 0.06175115207373272, + "grad_norm": 0.6015371724768855, + "learning_rate": 1.2304147465437787e-06, + "loss": 0.9520926475524902, + "step": 268 + }, + { + "epoch": 0.061981566820276494, + "grad_norm": 0.6815507447701216, + "learning_rate": 1.2350230414746543e-06, + "loss": 1.0426976680755615, + "step": 269 + }, + { + "epoch": 0.06221198156682028, + "grad_norm": 0.5129880337213574, + "learning_rate": 1.23963133640553e-06, + "loss": 0.934493899345398, + "step": 270 + }, + { + "epoch": 0.062442396313364054, + "grad_norm": 0.5416312735509534, + "learning_rate": 1.2442396313364054e-06, + "loss": 1.23980712890625, + "step": 271 + }, + { + "epoch": 0.06267281105990784, + "grad_norm": 0.5947336924258313, + "learning_rate": 1.248847926267281e-06, + "loss": 1.094742774963379, + "step": 272 + }, + { + "epoch": 0.06290322580645161, + "grad_norm": 0.5496219212827214, + "learning_rate": 1.2534562211981564e-06, + "loss": 1.0271551609039307, + "step": 273 + }, + { + "epoch": 0.06313364055299539, + "grad_norm": 0.43924704821878574, + "learning_rate": 1.2580645161290322e-06, + "loss": 1.159210205078125, + "step": 274 + }, + { + "epoch": 0.06336405529953917, + "grad_norm": 0.6336734571964621, + "learning_rate": 1.2626728110599078e-06, + "loss": 1.127510666847229, + "step": 275 + }, + { + "epoch": 0.06359447004608294, + "grad_norm": 0.564136508309977, + "learning_rate": 1.2672811059907832e-06, + "loss": 1.1371517181396484, + "step": 276 + }, + { + "epoch": 0.06382488479262673, + "grad_norm": 0.5092569849346139, + "learning_rate": 1.271889400921659e-06, + "loss": 1.0296730995178223, + "step": 277 + }, + { + "epoch": 0.06405529953917051, + "grad_norm": 0.47819096787751125, + "learning_rate": 1.2764976958525344e-06, + "loss": 1.036975383758545, + "step": 278 + }, + { + "epoch": 0.06428571428571428, + "grad_norm": 0.5933788958917384, + "learning_rate": 1.28110599078341e-06, + "loss": 1.2120393514633179, + "step": 279 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 0.5094532117085869, + "learning_rate": 1.2857142857142858e-06, + "loss": 1.0084068775177002, + "step": 280 + }, + { + "epoch": 0.06474654377880185, + "grad_norm": 0.5556672645421422, + "learning_rate": 1.2903225806451612e-06, + "loss": 1.2005786895751953, + "step": 281 + }, + { + "epoch": 0.06497695852534562, + "grad_norm": 0.5273275990471241, + "learning_rate": 1.2949308755760368e-06, + "loss": 1.1506783962249756, + "step": 282 + }, + { + "epoch": 0.0652073732718894, + "grad_norm": 0.6565311834699108, + "learning_rate": 1.2995391705069124e-06, + "loss": 1.1219947338104248, + "step": 283 + }, + { + "epoch": 0.06543778801843318, + "grad_norm": 0.5392805741788703, + "learning_rate": 1.304147465437788e-06, + "loss": 1.2041170597076416, + "step": 284 + }, + { + "epoch": 0.06566820276497695, + "grad_norm": 0.4958618059812673, + "learning_rate": 1.3087557603686636e-06, + "loss": 1.0903037786483765, + "step": 285 + }, + { + "epoch": 0.06589861751152074, + "grad_norm": 0.5739593792710319, + "learning_rate": 1.3133640552995392e-06, + "loss": 1.2140064239501953, + "step": 286 + }, + { + "epoch": 0.06612903225806452, + "grad_norm": 0.6611408054194472, + "learning_rate": 1.3179723502304148e-06, + "loss": 1.3026092052459717, + "step": 287 + }, + { + "epoch": 0.0663594470046083, + "grad_norm": 0.5994162091601994, + "learning_rate": 1.3225806451612901e-06, + "loss": 1.0937910079956055, + "step": 288 + }, + { + "epoch": 0.06658986175115207, + "grad_norm": 0.5087892316212932, + "learning_rate": 1.327188940092166e-06, + "loss": 1.1768109798431396, + "step": 289 + }, + { + "epoch": 0.06682027649769585, + "grad_norm": 0.6601843016778813, + "learning_rate": 1.3317972350230413e-06, + "loss": 1.0796440839767456, + "step": 290 + }, + { + "epoch": 0.06705069124423964, + "grad_norm": 0.5059222364831474, + "learning_rate": 1.336405529953917e-06, + "loss": 0.9972932934761047, + "step": 291 + }, + { + "epoch": 0.06728110599078341, + "grad_norm": 0.5571474335328804, + "learning_rate": 1.3410138248847927e-06, + "loss": 0.9860717058181763, + "step": 292 + }, + { + "epoch": 0.06751152073732719, + "grad_norm": 0.5418320654969337, + "learning_rate": 1.3456221198156681e-06, + "loss": 1.045119047164917, + "step": 293 + }, + { + "epoch": 0.06774193548387097, + "grad_norm": 0.5469511174229076, + "learning_rate": 1.3502304147465437e-06, + "loss": 1.2740920782089233, + "step": 294 + }, + { + "epoch": 0.06797235023041474, + "grad_norm": 0.5280888059979016, + "learning_rate": 1.354838709677419e-06, + "loss": 1.0860114097595215, + "step": 295 + }, + { + "epoch": 0.06820276497695853, + "grad_norm": 0.6361673375880608, + "learning_rate": 1.359447004608295e-06, + "loss": 1.111539602279663, + "step": 296 + }, + { + "epoch": 0.06843317972350231, + "grad_norm": 0.6640553054344481, + "learning_rate": 1.3640552995391705e-06, + "loss": 1.1628870964050293, + "step": 297 + }, + { + "epoch": 0.06866359447004608, + "grad_norm": 0.5665129055040568, + "learning_rate": 1.3686635944700459e-06, + "loss": 1.042768955230713, + "step": 298 + }, + { + "epoch": 0.06889400921658986, + "grad_norm": 0.43340931133190164, + "learning_rate": 1.3732718894009217e-06, + "loss": 0.9970331192016602, + "step": 299 + }, + { + "epoch": 0.06912442396313365, + "grad_norm": 0.5645710736996077, + "learning_rate": 1.377880184331797e-06, + "loss": 1.1270179748535156, + "step": 300 + }, + { + "epoch": 0.06935483870967742, + "grad_norm": 0.5065704773498506, + "learning_rate": 1.3824884792626727e-06, + "loss": 0.9505646824836731, + "step": 301 + }, + { + "epoch": 0.0695852534562212, + "grad_norm": 0.5178052985950043, + "learning_rate": 1.3870967741935485e-06, + "loss": 1.0997588634490967, + "step": 302 + }, + { + "epoch": 0.06981566820276498, + "grad_norm": 0.46976885146719827, + "learning_rate": 1.3917050691244239e-06, + "loss": 1.1512106657028198, + "step": 303 + }, + { + "epoch": 0.07004608294930875, + "grad_norm": 0.5368431131511487, + "learning_rate": 1.3963133640552995e-06, + "loss": 1.1340759992599487, + "step": 304 + }, + { + "epoch": 0.07027649769585254, + "grad_norm": 0.6153911846871725, + "learning_rate": 1.400921658986175e-06, + "loss": 1.187511682510376, + "step": 305 + }, + { + "epoch": 0.07050691244239632, + "grad_norm": 0.511555535336468, + "learning_rate": 1.4055299539170507e-06, + "loss": 1.0711122751235962, + "step": 306 + }, + { + "epoch": 0.07073732718894009, + "grad_norm": 0.48287298633713555, + "learning_rate": 1.410138248847926e-06, + "loss": 0.9636896848678589, + "step": 307 + }, + { + "epoch": 0.07096774193548387, + "grad_norm": 0.5910127759130634, + "learning_rate": 1.4147465437788018e-06, + "loss": 1.0506833791732788, + "step": 308 + }, + { + "epoch": 0.07119815668202766, + "grad_norm": 0.46621570534633416, + "learning_rate": 1.4193548387096774e-06, + "loss": 1.1076349020004272, + "step": 309 + }, + { + "epoch": 0.07142857142857142, + "grad_norm": 0.5023143786431462, + "learning_rate": 1.4239631336405528e-06, + "loss": 1.0878944396972656, + "step": 310 + }, + { + "epoch": 0.07165898617511521, + "grad_norm": 0.5894127846415432, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.0808600187301636, + "step": 311 + }, + { + "epoch": 0.07188940092165899, + "grad_norm": 0.6608655757057322, + "learning_rate": 1.433179723502304e-06, + "loss": 1.2117588520050049, + "step": 312 + }, + { + "epoch": 0.07211981566820276, + "grad_norm": 0.49227698344069387, + "learning_rate": 1.4377880184331796e-06, + "loss": 1.0899101495742798, + "step": 313 + }, + { + "epoch": 0.07235023041474654, + "grad_norm": 0.4429228185732454, + "learning_rate": 1.4423963133640554e-06, + "loss": 0.9540426135063171, + "step": 314 + }, + { + "epoch": 0.07258064516129033, + "grad_norm": 0.6262415135725191, + "learning_rate": 1.4470046082949308e-06, + "loss": 1.1170068979263306, + "step": 315 + }, + { + "epoch": 0.0728110599078341, + "grad_norm": 0.5398534028349639, + "learning_rate": 1.4516129032258064e-06, + "loss": 1.2030160427093506, + "step": 316 + }, + { + "epoch": 0.07304147465437788, + "grad_norm": 0.5750696017486863, + "learning_rate": 1.4562211981566818e-06, + "loss": 1.1599903106689453, + "step": 317 + }, + { + "epoch": 0.07327188940092166, + "grad_norm": 0.4940370201046436, + "learning_rate": 1.4608294930875576e-06, + "loss": 1.0777950286865234, + "step": 318 + }, + { + "epoch": 0.07350230414746543, + "grad_norm": 0.5527232601625117, + "learning_rate": 1.4654377880184332e-06, + "loss": 1.1250553131103516, + "step": 319 + }, + { + "epoch": 0.07373271889400922, + "grad_norm": 0.4905671836592669, + "learning_rate": 1.4700460829493086e-06, + "loss": 1.10176420211792, + "step": 320 + }, + { + "epoch": 0.073963133640553, + "grad_norm": 0.5457078879226115, + "learning_rate": 1.4746543778801844e-06, + "loss": 1.111799716949463, + "step": 321 + }, + { + "epoch": 0.07419354838709677, + "grad_norm": 0.4195800331952007, + "learning_rate": 1.4792626728110598e-06, + "loss": 1.1555054187774658, + "step": 322 + }, + { + "epoch": 0.07442396313364055, + "grad_norm": 0.46236670595596, + "learning_rate": 1.4838709677419353e-06, + "loss": 1.0977535247802734, + "step": 323 + }, + { + "epoch": 0.07465437788018434, + "grad_norm": 0.5097860724223924, + "learning_rate": 1.4884792626728112e-06, + "loss": 0.9058012962341309, + "step": 324 + }, + { + "epoch": 0.0748847926267281, + "grad_norm": 0.5077577953430894, + "learning_rate": 1.4930875576036865e-06, + "loss": 1.1147960424423218, + "step": 325 + }, + { + "epoch": 0.07511520737327189, + "grad_norm": 0.44169448790763116, + "learning_rate": 1.4976958525345621e-06, + "loss": 1.1315648555755615, + "step": 326 + }, + { + "epoch": 0.07534562211981567, + "grad_norm": 0.5088086763700569, + "learning_rate": 1.5023041474654377e-06, + "loss": 0.9134868383407593, + "step": 327 + }, + { + "epoch": 0.07557603686635944, + "grad_norm": 0.44118138965972864, + "learning_rate": 1.5069124423963133e-06, + "loss": 1.017493724822998, + "step": 328 + }, + { + "epoch": 0.07580645161290323, + "grad_norm": 0.5038134502792564, + "learning_rate": 1.5115207373271887e-06, + "loss": 1.220658540725708, + "step": 329 + }, + { + "epoch": 0.07603686635944701, + "grad_norm": 0.49196264739665124, + "learning_rate": 1.5161290322580645e-06, + "loss": 1.2254307270050049, + "step": 330 + }, + { + "epoch": 0.07626728110599078, + "grad_norm": 0.6442066774537618, + "learning_rate": 1.5207373271889401e-06, + "loss": 1.2653989791870117, + "step": 331 + }, + { + "epoch": 0.07649769585253456, + "grad_norm": 0.5214989033274106, + "learning_rate": 1.5253456221198155e-06, + "loss": 1.199981451034546, + "step": 332 + }, + { + "epoch": 0.07672811059907834, + "grad_norm": 0.5987857165424706, + "learning_rate": 1.5299539170506913e-06, + "loss": 1.1141018867492676, + "step": 333 + }, + { + "epoch": 0.07695852534562211, + "grad_norm": 0.5942646354683767, + "learning_rate": 1.5345622119815667e-06, + "loss": 1.2139991521835327, + "step": 334 + }, + { + "epoch": 0.0771889400921659, + "grad_norm": 0.46506211352562865, + "learning_rate": 1.5391705069124423e-06, + "loss": 1.0647475719451904, + "step": 335 + }, + { + "epoch": 0.07741935483870968, + "grad_norm": 0.41334086285294086, + "learning_rate": 1.543778801843318e-06, + "loss": 0.9740357398986816, + "step": 336 + }, + { + "epoch": 0.07764976958525345, + "grad_norm": 0.3730662683323707, + "learning_rate": 1.5483870967741935e-06, + "loss": 0.877153754234314, + "step": 337 + }, + { + "epoch": 0.07788018433179723, + "grad_norm": 0.5608914234538745, + "learning_rate": 1.552995391705069e-06, + "loss": 1.2472789287567139, + "step": 338 + }, + { + "epoch": 0.07811059907834102, + "grad_norm": 0.49369711494641005, + "learning_rate": 1.5576036866359445e-06, + "loss": 1.1873078346252441, + "step": 339 + }, + { + "epoch": 0.07834101382488479, + "grad_norm": 0.47054639516827856, + "learning_rate": 1.5622119815668203e-06, + "loss": 1.0728449821472168, + "step": 340 + }, + { + "epoch": 0.07857142857142857, + "grad_norm": 0.5084311239727589, + "learning_rate": 1.5668202764976959e-06, + "loss": 0.9974904656410217, + "step": 341 + }, + { + "epoch": 0.07880184331797235, + "grad_norm": 0.5100945633220172, + "learning_rate": 1.5714285714285712e-06, + "loss": 1.0591039657592773, + "step": 342 + }, + { + "epoch": 0.07903225806451612, + "grad_norm": 0.5927330256525677, + "learning_rate": 1.576036866359447e-06, + "loss": 1.04117751121521, + "step": 343 + }, + { + "epoch": 0.0792626728110599, + "grad_norm": 0.40984725482311923, + "learning_rate": 1.5806451612903224e-06, + "loss": 0.934100866317749, + "step": 344 + }, + { + "epoch": 0.07949308755760369, + "grad_norm": 0.4545690285130126, + "learning_rate": 1.585253456221198e-06, + "loss": 1.0333890914916992, + "step": 345 + }, + { + "epoch": 0.07972350230414746, + "grad_norm": 0.4848318013907446, + "learning_rate": 1.5898617511520738e-06, + "loss": 1.1762741804122925, + "step": 346 + }, + { + "epoch": 0.07995391705069124, + "grad_norm": 0.4082821303075448, + "learning_rate": 1.5944700460829492e-06, + "loss": 1.081842303276062, + "step": 347 + }, + { + "epoch": 0.08018433179723503, + "grad_norm": 0.48343962912190763, + "learning_rate": 1.5990783410138248e-06, + "loss": 1.140712022781372, + "step": 348 + }, + { + "epoch": 0.0804147465437788, + "grad_norm": 0.3519464028715392, + "learning_rate": 1.6036866359447004e-06, + "loss": 1.0155198574066162, + "step": 349 + }, + { + "epoch": 0.08064516129032258, + "grad_norm": 0.4719922940268299, + "learning_rate": 1.608294930875576e-06, + "loss": 1.0673280954360962, + "step": 350 + }, + { + "epoch": 0.08087557603686636, + "grad_norm": 0.44336917730159625, + "learning_rate": 1.6129032258064514e-06, + "loss": 1.1061692237854004, + "step": 351 + }, + { + "epoch": 0.08110599078341015, + "grad_norm": 0.6227306591455409, + "learning_rate": 1.6175115207373272e-06, + "loss": 1.0120354890823364, + "step": 352 + }, + { + "epoch": 0.08133640552995391, + "grad_norm": 0.5343939607764295, + "learning_rate": 1.6221198156682028e-06, + "loss": 1.1260986328125, + "step": 353 + }, + { + "epoch": 0.0815668202764977, + "grad_norm": 0.514597043189326, + "learning_rate": 1.6267281105990782e-06, + "loss": 1.0376214981079102, + "step": 354 + }, + { + "epoch": 0.08179723502304148, + "grad_norm": 0.41314458702115897, + "learning_rate": 1.631336405529954e-06, + "loss": 1.0802130699157715, + "step": 355 + }, + { + "epoch": 0.08202764976958525, + "grad_norm": 0.5113844239661658, + "learning_rate": 1.6359447004608294e-06, + "loss": 1.217378854751587, + "step": 356 + }, + { + "epoch": 0.08225806451612903, + "grad_norm": 0.3681604891354872, + "learning_rate": 1.640552995391705e-06, + "loss": 0.9107617139816284, + "step": 357 + }, + { + "epoch": 0.08248847926267282, + "grad_norm": 0.4567828094638987, + "learning_rate": 1.6451612903225808e-06, + "loss": 1.089385986328125, + "step": 358 + }, + { + "epoch": 0.08271889400921659, + "grad_norm": 0.42382031863730735, + "learning_rate": 1.6497695852534561e-06, + "loss": 1.1420392990112305, + "step": 359 + }, + { + "epoch": 0.08294930875576037, + "grad_norm": 0.4385300551654332, + "learning_rate": 1.6543778801843317e-06, + "loss": 0.9308648705482483, + "step": 360 + }, + { + "epoch": 0.08317972350230415, + "grad_norm": 0.4691608891369802, + "learning_rate": 1.6589861751152071e-06, + "loss": 0.9463413953781128, + "step": 361 + }, + { + "epoch": 0.08341013824884792, + "grad_norm": 0.4312953553305326, + "learning_rate": 1.663594470046083e-06, + "loss": 1.0364834070205688, + "step": 362 + }, + { + "epoch": 0.0836405529953917, + "grad_norm": 0.4601141894995307, + "learning_rate": 1.6682027649769585e-06, + "loss": 0.9992797374725342, + "step": 363 + }, + { + "epoch": 0.08387096774193549, + "grad_norm": 0.4110829230093509, + "learning_rate": 1.672811059907834e-06, + "loss": 0.9862687587738037, + "step": 364 + }, + { + "epoch": 0.08410138248847926, + "grad_norm": 0.538237007116734, + "learning_rate": 1.6774193548387097e-06, + "loss": 1.0882744789123535, + "step": 365 + }, + { + "epoch": 0.08433179723502304, + "grad_norm": 0.38129891099780466, + "learning_rate": 1.682027649769585e-06, + "loss": 0.9217149615287781, + "step": 366 + }, + { + "epoch": 0.08456221198156683, + "grad_norm": 0.47566005804946043, + "learning_rate": 1.6866359447004607e-06, + "loss": 1.0384632349014282, + "step": 367 + }, + { + "epoch": 0.0847926267281106, + "grad_norm": 0.41334228678230484, + "learning_rate": 1.6912442396313363e-06, + "loss": 0.8760565519332886, + "step": 368 + }, + { + "epoch": 0.08502304147465438, + "grad_norm": 0.38194490761666694, + "learning_rate": 1.6958525345622119e-06, + "loss": 0.9868614077568054, + "step": 369 + }, + { + "epoch": 0.08525345622119816, + "grad_norm": 0.41853356164893474, + "learning_rate": 1.7004608294930875e-06, + "loss": 1.0386936664581299, + "step": 370 + }, + { + "epoch": 0.08548387096774193, + "grad_norm": 0.4969082634490474, + "learning_rate": 1.705069124423963e-06, + "loss": 1.2201364040374756, + "step": 371 + }, + { + "epoch": 0.08571428571428572, + "grad_norm": 0.45684500070085005, + "learning_rate": 1.7096774193548387e-06, + "loss": 0.9892920255661011, + "step": 372 + }, + { + "epoch": 0.0859447004608295, + "grad_norm": 0.3411435628885881, + "learning_rate": 1.714285714285714e-06, + "loss": 0.9379667639732361, + "step": 373 + }, + { + "epoch": 0.08617511520737327, + "grad_norm": 0.4493279942699278, + "learning_rate": 1.7188940092165899e-06, + "loss": 1.0150624513626099, + "step": 374 + }, + { + "epoch": 0.08640552995391705, + "grad_norm": 0.3873317793310882, + "learning_rate": 1.7235023041474655e-06, + "loss": 0.8724589943885803, + "step": 375 + }, + { + "epoch": 0.08663594470046083, + "grad_norm": 0.451020649692039, + "learning_rate": 1.7281105990783408e-06, + "loss": 1.005715012550354, + "step": 376 + }, + { + "epoch": 0.0868663594470046, + "grad_norm": 0.40515583321904614, + "learning_rate": 1.7327188940092167e-06, + "loss": 1.0238345861434937, + "step": 377 + }, + { + "epoch": 0.08709677419354839, + "grad_norm": 0.5713304603163627, + "learning_rate": 1.737327188940092e-06, + "loss": 1.061020851135254, + "step": 378 + }, + { + "epoch": 0.08732718894009217, + "grad_norm": 0.31543151666242697, + "learning_rate": 1.7419354838709676e-06, + "loss": 0.8607133626937866, + "step": 379 + }, + { + "epoch": 0.08755760368663594, + "grad_norm": 0.396586403800106, + "learning_rate": 1.7465437788018434e-06, + "loss": 0.9070740938186646, + "step": 380 + }, + { + "epoch": 0.08778801843317972, + "grad_norm": 0.4114853593210784, + "learning_rate": 1.7511520737327188e-06, + "loss": 0.993092954158783, + "step": 381 + }, + { + "epoch": 0.08801843317972351, + "grad_norm": 0.5030976624666732, + "learning_rate": 1.7557603686635944e-06, + "loss": 1.1119567155838013, + "step": 382 + }, + { + "epoch": 0.08824884792626728, + "grad_norm": 0.3947649464982104, + "learning_rate": 1.7603686635944698e-06, + "loss": 1.030786395072937, + "step": 383 + }, + { + "epoch": 0.08847926267281106, + "grad_norm": 0.413233744996873, + "learning_rate": 1.7649769585253456e-06, + "loss": 1.0578559637069702, + "step": 384 + }, + { + "epoch": 0.08870967741935484, + "grad_norm": 0.5116874225270758, + "learning_rate": 1.7695852534562212e-06, + "loss": 1.1282391548156738, + "step": 385 + }, + { + "epoch": 0.08894009216589861, + "grad_norm": 0.36883704269137796, + "learning_rate": 1.7741935483870966e-06, + "loss": 0.7838784456253052, + "step": 386 + }, + { + "epoch": 0.0891705069124424, + "grad_norm": 0.4028833159886203, + "learning_rate": 1.7788018433179724e-06, + "loss": 0.9244300127029419, + "step": 387 + }, + { + "epoch": 0.08940092165898618, + "grad_norm": 0.37786066556159736, + "learning_rate": 1.7834101382488478e-06, + "loss": 0.916866660118103, + "step": 388 + }, + { + "epoch": 0.08963133640552995, + "grad_norm": 0.3466207390337416, + "learning_rate": 1.7880184331797234e-06, + "loss": 0.9918155670166016, + "step": 389 + }, + { + "epoch": 0.08986175115207373, + "grad_norm": 0.49146787648511026, + "learning_rate": 1.792626728110599e-06, + "loss": 0.9879001379013062, + "step": 390 + }, + { + "epoch": 0.09009216589861752, + "grad_norm": 0.4467876721911936, + "learning_rate": 1.7972350230414746e-06, + "loss": 1.0252082347869873, + "step": 391 + }, + { + "epoch": 0.09032258064516129, + "grad_norm": 0.4519090202816701, + "learning_rate": 1.8018433179723502e-06, + "loss": 1.0376901626586914, + "step": 392 + }, + { + "epoch": 0.09055299539170507, + "grad_norm": 0.4158305964101772, + "learning_rate": 1.8064516129032258e-06, + "loss": 1.0237072706222534, + "step": 393 + }, + { + "epoch": 0.09078341013824885, + "grad_norm": 0.3903660894092682, + "learning_rate": 1.8110599078341013e-06, + "loss": 1.004181146621704, + "step": 394 + }, + { + "epoch": 0.09101382488479262, + "grad_norm": 0.4844697096481946, + "learning_rate": 1.8156682027649767e-06, + "loss": 1.1162958145141602, + "step": 395 + }, + { + "epoch": 0.0912442396313364, + "grad_norm": 0.43484007077470926, + "learning_rate": 1.8202764976958525e-06, + "loss": 0.9634548425674438, + "step": 396 + }, + { + "epoch": 0.09147465437788019, + "grad_norm": 0.34256483920586434, + "learning_rate": 1.8248847926267281e-06, + "loss": 0.9306463599205017, + "step": 397 + }, + { + "epoch": 0.09170506912442396, + "grad_norm": 0.4291772843094685, + "learning_rate": 1.8294930875576035e-06, + "loss": 1.0243630409240723, + "step": 398 + }, + { + "epoch": 0.09193548387096774, + "grad_norm": 0.37150575195192614, + "learning_rate": 1.8341013824884793e-06, + "loss": 0.9261370897293091, + "step": 399 + }, + { + "epoch": 0.09216589861751152, + "grad_norm": 0.41574639263883495, + "learning_rate": 1.8387096774193547e-06, + "loss": 0.9929264783859253, + "step": 400 + }, + { + "epoch": 0.0923963133640553, + "grad_norm": 0.4086620199652483, + "learning_rate": 1.8433179723502303e-06, + "loss": 1.0245590209960938, + "step": 401 + }, + { + "epoch": 0.09262672811059908, + "grad_norm": 0.4485366734014856, + "learning_rate": 1.8479262672811061e-06, + "loss": 0.9801148176193237, + "step": 402 + }, + { + "epoch": 0.09285714285714286, + "grad_norm": 0.48045286204627596, + "learning_rate": 1.8525345622119815e-06, + "loss": 1.181383728981018, + "step": 403 + }, + { + "epoch": 0.09308755760368663, + "grad_norm": 0.41845043157279344, + "learning_rate": 1.857142857142857e-06, + "loss": 0.9493411779403687, + "step": 404 + }, + { + "epoch": 0.09331797235023041, + "grad_norm": 0.4897744794150158, + "learning_rate": 1.8617511520737325e-06, + "loss": 1.1096491813659668, + "step": 405 + }, + { + "epoch": 0.0935483870967742, + "grad_norm": 0.4480175053230346, + "learning_rate": 1.8663594470046083e-06, + "loss": 1.1019275188446045, + "step": 406 + }, + { + "epoch": 0.09377880184331797, + "grad_norm": 0.3732577959232657, + "learning_rate": 1.8709677419354837e-06, + "loss": 0.973988950252533, + "step": 407 + }, + { + "epoch": 0.09400921658986175, + "grad_norm": 0.4400203989690802, + "learning_rate": 1.8755760368663593e-06, + "loss": 1.1670622825622559, + "step": 408 + }, + { + "epoch": 0.09423963133640553, + "grad_norm": 0.3329146322312322, + "learning_rate": 1.880184331797235e-06, + "loss": 0.8550488948822021, + "step": 409 + }, + { + "epoch": 0.0944700460829493, + "grad_norm": 0.4080056832475701, + "learning_rate": 1.8847926267281104e-06, + "loss": 1.0501651763916016, + "step": 410 + }, + { + "epoch": 0.09470046082949309, + "grad_norm": 0.4667020783139675, + "learning_rate": 1.889400921658986e-06, + "loss": 1.1323202848434448, + "step": 411 + }, + { + "epoch": 0.09493087557603687, + "grad_norm": 0.4438011539128225, + "learning_rate": 1.8940092165898616e-06, + "loss": 1.168154001235962, + "step": 412 + }, + { + "epoch": 0.09516129032258064, + "grad_norm": 0.5043395094497101, + "learning_rate": 1.8986175115207372e-06, + "loss": 1.0667431354522705, + "step": 413 + }, + { + "epoch": 0.09539170506912442, + "grad_norm": 0.42921175733784445, + "learning_rate": 1.9032258064516128e-06, + "loss": 1.1447162628173828, + "step": 414 + }, + { + "epoch": 0.0956221198156682, + "grad_norm": 0.42501454608228506, + "learning_rate": 1.9078341013824884e-06, + "loss": 0.9403433799743652, + "step": 415 + }, + { + "epoch": 0.09585253456221199, + "grad_norm": 0.4016688989337606, + "learning_rate": 1.912442396313364e-06, + "loss": 0.9837527275085449, + "step": 416 + }, + { + "epoch": 0.09608294930875576, + "grad_norm": 0.422068085350648, + "learning_rate": 1.9170506912442396e-06, + "loss": 1.071333408355713, + "step": 417 + }, + { + "epoch": 0.09631336405529954, + "grad_norm": 0.5124388054628781, + "learning_rate": 1.921658986175115e-06, + "loss": 1.0156168937683105, + "step": 418 + }, + { + "epoch": 0.09654377880184332, + "grad_norm": 0.4338501331744671, + "learning_rate": 1.926267281105991e-06, + "loss": 0.9705266952514648, + "step": 419 + }, + { + "epoch": 0.0967741935483871, + "grad_norm": 0.407144156286867, + "learning_rate": 1.930875576036866e-06, + "loss": 1.0570204257965088, + "step": 420 + }, + { + "epoch": 0.09700460829493088, + "grad_norm": 0.43729360857600713, + "learning_rate": 1.935483870967742e-06, + "loss": 1.141861915588379, + "step": 421 + }, + { + "epoch": 0.09723502304147466, + "grad_norm": 0.4507835554387818, + "learning_rate": 1.9400921658986174e-06, + "loss": 0.9849745631217957, + "step": 422 + }, + { + "epoch": 0.09746543778801843, + "grad_norm": 0.4932195036683519, + "learning_rate": 1.944700460829493e-06, + "loss": 1.0279912948608398, + "step": 423 + }, + { + "epoch": 0.09769585253456221, + "grad_norm": 0.4014365475110759, + "learning_rate": 1.9493087557603686e-06, + "loss": 1.0707788467407227, + "step": 424 + }, + { + "epoch": 0.097926267281106, + "grad_norm": 0.37856248369077095, + "learning_rate": 1.953917050691244e-06, + "loss": 0.9391129016876221, + "step": 425 + }, + { + "epoch": 0.09815668202764977, + "grad_norm": 0.3604046417791118, + "learning_rate": 1.9585253456221198e-06, + "loss": 0.9792884588241577, + "step": 426 + }, + { + "epoch": 0.09838709677419355, + "grad_norm": 0.42091691400517506, + "learning_rate": 1.963133640552995e-06, + "loss": 1.0111792087554932, + "step": 427 + }, + { + "epoch": 0.09861751152073733, + "grad_norm": 0.2951881364083913, + "learning_rate": 1.967741935483871e-06, + "loss": 1.0020272731781006, + "step": 428 + }, + { + "epoch": 0.0988479262672811, + "grad_norm": 0.42473763380817414, + "learning_rate": 1.9723502304147463e-06, + "loss": 1.1002991199493408, + "step": 429 + }, + { + "epoch": 0.09907834101382489, + "grad_norm": 0.3977328364337887, + "learning_rate": 1.976958525345622e-06, + "loss": 0.9656131267547607, + "step": 430 + }, + { + "epoch": 0.09930875576036867, + "grad_norm": 0.4163794190517341, + "learning_rate": 1.9815668202764975e-06, + "loss": 1.1845166683197021, + "step": 431 + }, + { + "epoch": 0.09953917050691244, + "grad_norm": 0.4102761511182145, + "learning_rate": 1.9861751152073733e-06, + "loss": 0.8743879795074463, + "step": 432 + }, + { + "epoch": 0.09976958525345622, + "grad_norm": 0.48299006340600875, + "learning_rate": 1.9907834101382487e-06, + "loss": 1.0800082683563232, + "step": 433 + }, + { + "epoch": 0.1, + "grad_norm": 0.39412754669182365, + "learning_rate": 1.995391705069124e-06, + "loss": 1.0410808324813843, + "step": 434 + }, + { + "epoch": 0.10023041474654378, + "grad_norm": 0.4817128357084655, + "learning_rate": 2e-06, + "loss": 1.0214624404907227, + "step": 435 + }, + { + "epoch": 0.10046082949308756, + "grad_norm": 0.4738161753055533, + "learning_rate": 1.9999999274256618e-06, + "loss": 1.0304028987884521, + "step": 436 + }, + { + "epoch": 0.10069124423963134, + "grad_norm": 0.3946923205513698, + "learning_rate": 1.9999997097026583e-06, + "loss": 1.0457626581192017, + "step": 437 + }, + { + "epoch": 0.10092165898617511, + "grad_norm": 0.43567215904100204, + "learning_rate": 1.9999993468310205e-06, + "loss": 0.9837691187858582, + "step": 438 + }, + { + "epoch": 0.1011520737327189, + "grad_norm": 0.5216317957588074, + "learning_rate": 1.9999988388108013e-06, + "loss": 1.0819612741470337, + "step": 439 + }, + { + "epoch": 0.10138248847926268, + "grad_norm": 0.31182314858852395, + "learning_rate": 1.9999981856420743e-06, + "loss": 1.0417449474334717, + "step": 440 + }, + { + "epoch": 0.10161290322580645, + "grad_norm": 0.5477105048499294, + "learning_rate": 1.999997387324935e-06, + "loss": 1.0501068830490112, + "step": 441 + }, + { + "epoch": 0.10184331797235023, + "grad_norm": 0.4106183150059033, + "learning_rate": 1.999996443859498e-06, + "loss": 1.0635120868682861, + "step": 442 + }, + { + "epoch": 0.10207373271889401, + "grad_norm": 0.4873224989082174, + "learning_rate": 1.999995355245902e-06, + "loss": 0.9732234477996826, + "step": 443 + }, + { + "epoch": 0.10230414746543778, + "grad_norm": 0.3718846857755592, + "learning_rate": 1.9999941214843034e-06, + "loss": 0.9493811130523682, + "step": 444 + }, + { + "epoch": 0.10253456221198157, + "grad_norm": 0.5595191439491263, + "learning_rate": 1.9999927425748817e-06, + "loss": 1.1455141305923462, + "step": 445 + }, + { + "epoch": 0.10276497695852535, + "grad_norm": 0.4237177518607636, + "learning_rate": 1.9999912185178374e-06, + "loss": 0.9341592788696289, + "step": 446 + }, + { + "epoch": 0.10299539170506912, + "grad_norm": 0.3913224265375377, + "learning_rate": 1.9999895493133916e-06, + "loss": 0.9535608291625977, + "step": 447 + }, + { + "epoch": 0.1032258064516129, + "grad_norm": 0.4687207319213409, + "learning_rate": 1.999987734961787e-06, + "loss": 1.1977221965789795, + "step": 448 + }, + { + "epoch": 0.10345622119815669, + "grad_norm": 0.45995634872516833, + "learning_rate": 1.999985775463286e-06, + "loss": 1.1658375263214111, + "step": 449 + }, + { + "epoch": 0.10368663594470046, + "grad_norm": 0.47830181543951694, + "learning_rate": 1.9999836708181734e-06, + "loss": 1.1171612739562988, + "step": 450 + }, + { + "epoch": 0.10391705069124424, + "grad_norm": 0.3823354001067843, + "learning_rate": 1.999981421026755e-06, + "loss": 1.0864373445510864, + "step": 451 + }, + { + "epoch": 0.10414746543778802, + "grad_norm": 0.43518989690984766, + "learning_rate": 1.999979026089357e-06, + "loss": 1.1211299896240234, + "step": 452 + }, + { + "epoch": 0.10437788018433179, + "grad_norm": 0.45163820634554874, + "learning_rate": 1.9999764860063277e-06, + "loss": 1.071751594543457, + "step": 453 + }, + { + "epoch": 0.10460829493087558, + "grad_norm": 0.3749468590501543, + "learning_rate": 1.9999738007780347e-06, + "loss": 1.0377576351165771, + "step": 454 + }, + { + "epoch": 0.10483870967741936, + "grad_norm": 0.42625340690366553, + "learning_rate": 1.9999709704048685e-06, + "loss": 0.9658410549163818, + "step": 455 + }, + { + "epoch": 0.10506912442396313, + "grad_norm": 0.4022888050751363, + "learning_rate": 1.9999679948872395e-06, + "loss": 0.9070194959640503, + "step": 456 + }, + { + "epoch": 0.10529953917050691, + "grad_norm": 0.5570523464378584, + "learning_rate": 1.9999648742255803e-06, + "loss": 1.2197664976119995, + "step": 457 + }, + { + "epoch": 0.1055299539170507, + "grad_norm": 0.3961372853294897, + "learning_rate": 1.9999616084203426e-06, + "loss": 0.9032889604568481, + "step": 458 + }, + { + "epoch": 0.10576036866359446, + "grad_norm": 0.39060467678942784, + "learning_rate": 1.9999581974720017e-06, + "loss": 0.9458762407302856, + "step": 459 + }, + { + "epoch": 0.10599078341013825, + "grad_norm": 0.5068153216782157, + "learning_rate": 1.9999546413810526e-06, + "loss": 1.0024757385253906, + "step": 460 + }, + { + "epoch": 0.10622119815668203, + "grad_norm": 0.38148764403186025, + "learning_rate": 1.9999509401480108e-06, + "loss": 0.9499050378799438, + "step": 461 + }, + { + "epoch": 0.1064516129032258, + "grad_norm": 0.4354491299812492, + "learning_rate": 1.9999470937734132e-06, + "loss": 1.0764188766479492, + "step": 462 + }, + { + "epoch": 0.10668202764976958, + "grad_norm": 0.42800401210878014, + "learning_rate": 1.9999431022578194e-06, + "loss": 0.9858300089836121, + "step": 463 + }, + { + "epoch": 0.10691244239631337, + "grad_norm": 0.41132718920336847, + "learning_rate": 1.999938965601808e-06, + "loss": 0.8965580463409424, + "step": 464 + }, + { + "epoch": 0.10714285714285714, + "grad_norm": 0.39699129711694964, + "learning_rate": 1.9999346838059788e-06, + "loss": 0.8860410451889038, + "step": 465 + }, + { + "epoch": 0.10737327188940092, + "grad_norm": 0.48300723462768347, + "learning_rate": 1.9999302568709546e-06, + "loss": 1.0621274709701538, + "step": 466 + }, + { + "epoch": 0.1076036866359447, + "grad_norm": 0.45149909069714367, + "learning_rate": 1.9999256847973774e-06, + "loss": 0.8894643783569336, + "step": 467 + }, + { + "epoch": 0.10783410138248847, + "grad_norm": 0.3529913357119793, + "learning_rate": 1.999920967585911e-06, + "loss": 0.98856520652771, + "step": 468 + }, + { + "epoch": 0.10806451612903226, + "grad_norm": 0.3260735960256147, + "learning_rate": 1.999916105237239e-06, + "loss": 0.7885239124298096, + "step": 469 + }, + { + "epoch": 0.10829493087557604, + "grad_norm": 0.4477697599226733, + "learning_rate": 1.9999110977520687e-06, + "loss": 1.0274477005004883, + "step": 470 + }, + { + "epoch": 0.10852534562211981, + "grad_norm": 0.3938409891368368, + "learning_rate": 1.999905945131126e-06, + "loss": 0.8672109842300415, + "step": 471 + }, + { + "epoch": 0.10875576036866359, + "grad_norm": 0.37173415889586336, + "learning_rate": 1.9999006473751594e-06, + "loss": 0.852576732635498, + "step": 472 + }, + { + "epoch": 0.10898617511520738, + "grad_norm": 0.3670138423827908, + "learning_rate": 1.9998952044849375e-06, + "loss": 0.9553557634353638, + "step": 473 + }, + { + "epoch": 0.10921658986175115, + "grad_norm": 0.4402707979796638, + "learning_rate": 1.99988961646125e-06, + "loss": 1.1375620365142822, + "step": 474 + }, + { + "epoch": 0.10944700460829493, + "grad_norm": 0.4045716386517098, + "learning_rate": 1.9998838833049083e-06, + "loss": 0.9653681516647339, + "step": 475 + }, + { + "epoch": 0.10967741935483871, + "grad_norm": 0.3653559897200667, + "learning_rate": 1.999878005016745e-06, + "loss": 1.1139185428619385, + "step": 476 + }, + { + "epoch": 0.10990783410138248, + "grad_norm": 0.37459420946595523, + "learning_rate": 1.9998719815976127e-06, + "loss": 0.8375418186187744, + "step": 477 + }, + { + "epoch": 0.11013824884792627, + "grad_norm": 0.33053822521695836, + "learning_rate": 1.999865813048386e-06, + "loss": 1.0005979537963867, + "step": 478 + }, + { + "epoch": 0.11036866359447005, + "grad_norm": 0.39083306344420843, + "learning_rate": 1.99985949936996e-06, + "loss": 0.8499772548675537, + "step": 479 + }, + { + "epoch": 0.11059907834101383, + "grad_norm": 0.3575835338316839, + "learning_rate": 1.999853040563252e-06, + "loss": 0.9805284738540649, + "step": 480 + }, + { + "epoch": 0.1108294930875576, + "grad_norm": 0.43340835059987204, + "learning_rate": 1.9998464366291983e-06, + "loss": 0.9462177753448486, + "step": 481 + }, + { + "epoch": 0.11105990783410138, + "grad_norm": 0.44706726559657484, + "learning_rate": 1.999839687568758e-06, + "loss": 1.1023187637329102, + "step": 482 + }, + { + "epoch": 0.11129032258064517, + "grad_norm": 0.3754824087757579, + "learning_rate": 1.9998327933829103e-06, + "loss": 0.9361279010772705, + "step": 483 + }, + { + "epoch": 0.11152073732718894, + "grad_norm": 0.38419186899738067, + "learning_rate": 1.9998257540726567e-06, + "loss": 0.9811379909515381, + "step": 484 + }, + { + "epoch": 0.11175115207373272, + "grad_norm": 0.4030421476721474, + "learning_rate": 1.9998185696390184e-06, + "loss": 1.0246069431304932, + "step": 485 + }, + { + "epoch": 0.1119815668202765, + "grad_norm": 0.4555360249805513, + "learning_rate": 1.9998112400830385e-06, + "loss": 1.0614899396896362, + "step": 486 + }, + { + "epoch": 0.11221198156682027, + "grad_norm": 0.4347652169333907, + "learning_rate": 1.9998037654057803e-06, + "loss": 1.02305269241333, + "step": 487 + }, + { + "epoch": 0.11244239631336406, + "grad_norm": 0.43672158413630835, + "learning_rate": 1.999796145608329e-06, + "loss": 1.044907808303833, + "step": 488 + }, + { + "epoch": 0.11267281105990784, + "grad_norm": 0.4917956866782855, + "learning_rate": 1.999788380691791e-06, + "loss": 0.9669852256774902, + "step": 489 + }, + { + "epoch": 0.11290322580645161, + "grad_norm": 0.3857920087478492, + "learning_rate": 1.9997804706572933e-06, + "loss": 1.0235236883163452, + "step": 490 + }, + { + "epoch": 0.1131336405529954, + "grad_norm": 0.4541175977583441, + "learning_rate": 1.9997724155059835e-06, + "loss": 0.8982692360877991, + "step": 491 + }, + { + "epoch": 0.11336405529953918, + "grad_norm": 0.481910238333043, + "learning_rate": 1.9997642152390312e-06, + "loss": 0.8390282988548279, + "step": 492 + }, + { + "epoch": 0.11359447004608295, + "grad_norm": 0.39882686276748835, + "learning_rate": 1.9997558698576266e-06, + "loss": 0.8938695192337036, + "step": 493 + }, + { + "epoch": 0.11382488479262673, + "grad_norm": 0.5064684870077569, + "learning_rate": 1.9997473793629813e-06, + "loss": 0.9747422933578491, + "step": 494 + }, + { + "epoch": 0.11405529953917051, + "grad_norm": 0.443509358045386, + "learning_rate": 1.999738743756327e-06, + "loss": 1.050918698310852, + "step": 495 + }, + { + "epoch": 0.11428571428571428, + "grad_norm": 0.5368423996158629, + "learning_rate": 1.9997299630389174e-06, + "loss": 0.9169312715530396, + "step": 496 + }, + { + "epoch": 0.11451612903225807, + "grad_norm": 0.452695866401899, + "learning_rate": 1.9997210372120272e-06, + "loss": 1.0258065462112427, + "step": 497 + }, + { + "epoch": 0.11474654377880185, + "grad_norm": 0.3831239007423439, + "learning_rate": 1.9997119662769523e-06, + "loss": 1.066356897354126, + "step": 498 + }, + { + "epoch": 0.11497695852534562, + "grad_norm": 0.4319474855040805, + "learning_rate": 1.9997027502350086e-06, + "loss": 1.0336101055145264, + "step": 499 + }, + { + "epoch": 0.1152073732718894, + "grad_norm": 0.36856882435983085, + "learning_rate": 1.9996933890875342e-06, + "loss": 1.0434989929199219, + "step": 500 + }, + { + "epoch": 0.11543778801843319, + "grad_norm": 0.4366750071509639, + "learning_rate": 1.9996838828358876e-06, + "loss": 1.0081424713134766, + "step": 501 + }, + { + "epoch": 0.11566820276497695, + "grad_norm": 0.4424253641379215, + "learning_rate": 1.999674231481449e-06, + "loss": 1.0998575687408447, + "step": 502 + }, + { + "epoch": 0.11589861751152074, + "grad_norm": 0.43915567985422416, + "learning_rate": 1.9996644350256193e-06, + "loss": 1.0325868129730225, + "step": 503 + }, + { + "epoch": 0.11612903225806452, + "grad_norm": 0.39758687932867864, + "learning_rate": 1.99965449346982e-06, + "loss": 1.0520741939544678, + "step": 504 + }, + { + "epoch": 0.11635944700460829, + "grad_norm": 0.4373332869451062, + "learning_rate": 1.9996444068154943e-06, + "loss": 0.9355484247207642, + "step": 505 + }, + { + "epoch": 0.11658986175115207, + "grad_norm": 0.478944942365821, + "learning_rate": 1.9996341750641067e-06, + "loss": 1.2088062763214111, + "step": 506 + }, + { + "epoch": 0.11682027649769586, + "grad_norm": 0.45703939880277317, + "learning_rate": 1.9996237982171416e-06, + "loss": 1.007477045059204, + "step": 507 + }, + { + "epoch": 0.11705069124423963, + "grad_norm": 0.516029780444843, + "learning_rate": 1.9996132762761054e-06, + "loss": 0.9528911113739014, + "step": 508 + }, + { + "epoch": 0.11728110599078341, + "grad_norm": 0.44144049831872473, + "learning_rate": 1.9996026092425258e-06, + "loss": 1.0906065702438354, + "step": 509 + }, + { + "epoch": 0.1175115207373272, + "grad_norm": 0.45635386377861326, + "learning_rate": 1.9995917971179507e-06, + "loss": 1.1328812837600708, + "step": 510 + }, + { + "epoch": 0.11774193548387096, + "grad_norm": 0.5010986511700435, + "learning_rate": 1.9995808399039493e-06, + "loss": 1.1367099285125732, + "step": 511 + }, + { + "epoch": 0.11797235023041475, + "grad_norm": 0.5738525299064665, + "learning_rate": 1.999569737602112e-06, + "loss": 1.22605562210083, + "step": 512 + }, + { + "epoch": 0.11820276497695853, + "grad_norm": 0.40700112362856533, + "learning_rate": 1.9995584902140514e-06, + "loss": 0.8814148306846619, + "step": 513 + }, + { + "epoch": 0.1184331797235023, + "grad_norm": 0.4018062947026822, + "learning_rate": 1.9995470977413988e-06, + "loss": 0.916766881942749, + "step": 514 + }, + { + "epoch": 0.11866359447004608, + "grad_norm": 0.3907370494982875, + "learning_rate": 1.999535560185808e-06, + "loss": 0.8088599443435669, + "step": 515 + }, + { + "epoch": 0.11889400921658987, + "grad_norm": 0.5585215819507526, + "learning_rate": 1.9995238775489538e-06, + "loss": 1.0029397010803223, + "step": 516 + }, + { + "epoch": 0.11912442396313364, + "grad_norm": 0.47103060321263474, + "learning_rate": 1.9995120498325322e-06, + "loss": 1.157515287399292, + "step": 517 + }, + { + "epoch": 0.11935483870967742, + "grad_norm": 0.43934234876750516, + "learning_rate": 1.99950007703826e-06, + "loss": 0.989453911781311, + "step": 518 + }, + { + "epoch": 0.1195852534562212, + "grad_norm": 0.501533126043576, + "learning_rate": 1.999487959167874e-06, + "loss": 0.9791898727416992, + "step": 519 + }, + { + "epoch": 0.11981566820276497, + "grad_norm": 0.3947583681206324, + "learning_rate": 1.9994756962231343e-06, + "loss": 0.9994203448295593, + "step": 520 + }, + { + "epoch": 0.12004608294930876, + "grad_norm": 0.4064680989752179, + "learning_rate": 1.999463288205821e-06, + "loss": 0.9096299409866333, + "step": 521 + }, + { + "epoch": 0.12027649769585254, + "grad_norm": 0.5675118509929592, + "learning_rate": 1.999450735117734e-06, + "loss": 0.9956046342849731, + "step": 522 + }, + { + "epoch": 0.12050691244239631, + "grad_norm": 0.40854646192247485, + "learning_rate": 1.9994380369606956e-06, + "loss": 1.0336079597473145, + "step": 523 + }, + { + "epoch": 0.12073732718894009, + "grad_norm": 0.4028964743045085, + "learning_rate": 1.99942519373655e-06, + "loss": 0.8828116655349731, + "step": 524 + }, + { + "epoch": 0.12096774193548387, + "grad_norm": 0.4113573248244064, + "learning_rate": 1.9994122054471597e-06, + "loss": 0.8733093738555908, + "step": 525 + }, + { + "epoch": 0.12119815668202764, + "grad_norm": 0.4633889976755098, + "learning_rate": 1.9993990720944114e-06, + "loss": 1.0312494039535522, + "step": 526 + }, + { + "epoch": 0.12142857142857143, + "grad_norm": 0.39342421435973574, + "learning_rate": 1.9993857936802105e-06, + "loss": 0.9229701161384583, + "step": 527 + }, + { + "epoch": 0.12165898617511521, + "grad_norm": 0.4629141668744642, + "learning_rate": 1.9993723702064853e-06, + "loss": 0.8980100154876709, + "step": 528 + }, + { + "epoch": 0.12188940092165898, + "grad_norm": 0.42208035145091816, + "learning_rate": 1.999358801675183e-06, + "loss": 0.939933180809021, + "step": 529 + }, + { + "epoch": 0.12211981566820276, + "grad_norm": 0.3966309171286601, + "learning_rate": 1.9993450880882733e-06, + "loss": 1.0014444589614868, + "step": 530 + }, + { + "epoch": 0.12235023041474655, + "grad_norm": 0.4166874579150977, + "learning_rate": 1.9993312294477477e-06, + "loss": 0.9995889663696289, + "step": 531 + }, + { + "epoch": 0.12258064516129032, + "grad_norm": 0.37598019229960666, + "learning_rate": 1.9993172257556167e-06, + "loss": 1.0010197162628174, + "step": 532 + }, + { + "epoch": 0.1228110599078341, + "grad_norm": 0.3629842057209114, + "learning_rate": 1.9993030770139135e-06, + "loss": 0.972966194152832, + "step": 533 + }, + { + "epoch": 0.12304147465437788, + "grad_norm": 0.4160633061352588, + "learning_rate": 1.9992887832246917e-06, + "loss": 0.8033444881439209, + "step": 534 + }, + { + "epoch": 0.12327188940092165, + "grad_norm": 0.3895553967201257, + "learning_rate": 1.9992743443900254e-06, + "loss": 0.7532742619514465, + "step": 535 + }, + { + "epoch": 0.12350230414746544, + "grad_norm": 0.46964696388446997, + "learning_rate": 1.9992597605120113e-06, + "loss": 1.058760643005371, + "step": 536 + }, + { + "epoch": 0.12373271889400922, + "grad_norm": 0.37591416731208094, + "learning_rate": 1.9992450315927658e-06, + "loss": 0.8559634685516357, + "step": 537 + }, + { + "epoch": 0.12396313364055299, + "grad_norm": 0.4216079229956694, + "learning_rate": 1.9992301576344267e-06, + "loss": 1.053638219833374, + "step": 538 + }, + { + "epoch": 0.12419354838709677, + "grad_norm": 0.5423293655738015, + "learning_rate": 1.9992151386391528e-06, + "loss": 0.8841970562934875, + "step": 539 + }, + { + "epoch": 0.12442396313364056, + "grad_norm": 0.5667972752402203, + "learning_rate": 1.9991999746091247e-06, + "loss": 0.9355173110961914, + "step": 540 + }, + { + "epoch": 0.12465437788018432, + "grad_norm": 0.43323548094659586, + "learning_rate": 1.999184665546543e-06, + "loss": 0.9978284239768982, + "step": 541 + }, + { + "epoch": 0.12488479262672811, + "grad_norm": 0.4166718713190779, + "learning_rate": 1.99916921145363e-06, + "loss": 0.8855264782905579, + "step": 542 + }, + { + "epoch": 0.1251152073732719, + "grad_norm": 0.5314416958418489, + "learning_rate": 1.9991536123326283e-06, + "loss": 0.885519802570343, + "step": 543 + }, + { + "epoch": 0.12534562211981568, + "grad_norm": 0.4381118612604031, + "learning_rate": 1.9991378681858024e-06, + "loss": 0.9772528409957886, + "step": 544 + }, + { + "epoch": 0.12557603686635946, + "grad_norm": 0.46876887659201405, + "learning_rate": 1.999121979015438e-06, + "loss": 0.8817745447158813, + "step": 545 + }, + { + "epoch": 0.12580645161290321, + "grad_norm": 0.36530562318650095, + "learning_rate": 1.9991059448238404e-06, + "loss": 0.9374080896377563, + "step": 546 + }, + { + "epoch": 0.126036866359447, + "grad_norm": 0.3669313811039727, + "learning_rate": 1.9990897656133383e-06, + "loss": 0.9174116253852844, + "step": 547 + }, + { + "epoch": 0.12626728110599078, + "grad_norm": 0.401361126928626, + "learning_rate": 1.999073441386279e-06, + "loss": 0.9514039158821106, + "step": 548 + }, + { + "epoch": 0.12649769585253456, + "grad_norm": 0.4665811721686224, + "learning_rate": 1.999056972145032e-06, + "loss": 1.10535728931427, + "step": 549 + }, + { + "epoch": 0.12672811059907835, + "grad_norm": 0.4609610092344924, + "learning_rate": 1.999040357891989e-06, + "loss": 1.0641597509384155, + "step": 550 + }, + { + "epoch": 0.12695852534562213, + "grad_norm": 0.39409304359090785, + "learning_rate": 1.99902359862956e-06, + "loss": 0.9596017599105835, + "step": 551 + }, + { + "epoch": 0.1271889400921659, + "grad_norm": 0.4899166130843387, + "learning_rate": 1.9990066943601777e-06, + "loss": 1.083927869796753, + "step": 552 + }, + { + "epoch": 0.12741935483870967, + "grad_norm": 0.42007806110658624, + "learning_rate": 1.998989645086297e-06, + "loss": 0.9146738052368164, + "step": 553 + }, + { + "epoch": 0.12764976958525345, + "grad_norm": 0.41224202627344914, + "learning_rate": 1.998972450810391e-06, + "loss": 0.9038050770759583, + "step": 554 + }, + { + "epoch": 0.12788018433179724, + "grad_norm": 0.45759233489952406, + "learning_rate": 1.9989551115349574e-06, + "loss": 0.973220705986023, + "step": 555 + }, + { + "epoch": 0.12811059907834102, + "grad_norm": 0.424280511041039, + "learning_rate": 1.998937627262511e-06, + "loss": 0.8804281949996948, + "step": 556 + }, + { + "epoch": 0.1283410138248848, + "grad_norm": 0.47603807991909786, + "learning_rate": 1.9989199979955903e-06, + "loss": 1.100919485092163, + "step": 557 + }, + { + "epoch": 0.12857142857142856, + "grad_norm": 0.5871199693144976, + "learning_rate": 1.998902223736755e-06, + "loss": 1.1152353286743164, + "step": 558 + }, + { + "epoch": 0.12880184331797234, + "grad_norm": 0.4236469989661471, + "learning_rate": 1.9988843044885837e-06, + "loss": 1.0721793174743652, + "step": 559 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 0.4234271408586371, + "learning_rate": 1.9988662402536783e-06, + "loss": 0.9035133123397827, + "step": 560 + }, + { + "epoch": 0.1292626728110599, + "grad_norm": 0.4210053632602843, + "learning_rate": 1.9988480310346603e-06, + "loss": 1.0053937435150146, + "step": 561 + }, + { + "epoch": 0.1294930875576037, + "grad_norm": 0.5230478085674195, + "learning_rate": 1.9988296768341728e-06, + "loss": 0.8536228537559509, + "step": 562 + }, + { + "epoch": 0.12972350230414748, + "grad_norm": 0.41493118398063783, + "learning_rate": 1.9988111776548797e-06, + "loss": 0.9673396348953247, + "step": 563 + }, + { + "epoch": 0.12995391705069123, + "grad_norm": 0.36295429679681995, + "learning_rate": 1.998792533499467e-06, + "loss": 0.9402456879615784, + "step": 564 + }, + { + "epoch": 0.13018433179723501, + "grad_norm": 0.3983153697524455, + "learning_rate": 1.99877374437064e-06, + "loss": 0.8900678157806396, + "step": 565 + }, + { + "epoch": 0.1304147465437788, + "grad_norm": 0.47587146443270817, + "learning_rate": 1.9987548102711264e-06, + "loss": 0.9112892150878906, + "step": 566 + }, + { + "epoch": 0.13064516129032258, + "grad_norm": 0.3969666466780631, + "learning_rate": 1.9987357312036743e-06, + "loss": 0.763452410697937, + "step": 567 + }, + { + "epoch": 0.13087557603686636, + "grad_norm": 0.44573355289133143, + "learning_rate": 1.9987165071710527e-06, + "loss": 1.0410873889923096, + "step": 568 + }, + { + "epoch": 0.13110599078341015, + "grad_norm": 0.389372329442145, + "learning_rate": 1.9986971381760524e-06, + "loss": 1.029583215713501, + "step": 569 + }, + { + "epoch": 0.1313364055299539, + "grad_norm": 0.46351745512727555, + "learning_rate": 1.9986776242214845e-06, + "loss": 0.994928777217865, + "step": 570 + }, + { + "epoch": 0.1315668202764977, + "grad_norm": 0.49139035828687805, + "learning_rate": 1.9986579653101817e-06, + "loss": 1.001985788345337, + "step": 571 + }, + { + "epoch": 0.13179723502304147, + "grad_norm": 0.45831221525956994, + "learning_rate": 1.998638161444997e-06, + "loss": 0.9813050031661987, + "step": 572 + }, + { + "epoch": 0.13202764976958525, + "grad_norm": 0.45157644768988, + "learning_rate": 1.9986182126288053e-06, + "loss": 0.8678451180458069, + "step": 573 + }, + { + "epoch": 0.13225806451612904, + "grad_norm": 0.42446769343835317, + "learning_rate": 1.998598118864502e-06, + "loss": 1.0393729209899902, + "step": 574 + }, + { + "epoch": 0.13248847926267282, + "grad_norm": 0.39102315770569207, + "learning_rate": 1.998577880155004e-06, + "loss": 0.9631935358047485, + "step": 575 + }, + { + "epoch": 0.1327188940092166, + "grad_norm": 0.37655183711017187, + "learning_rate": 1.9985574965032483e-06, + "loss": 0.8480437994003296, + "step": 576 + }, + { + "epoch": 0.13294930875576036, + "grad_norm": 0.432103661547375, + "learning_rate": 1.998536967912194e-06, + "loss": 1.0450071096420288, + "step": 577 + }, + { + "epoch": 0.13317972350230414, + "grad_norm": 0.5144084645376303, + "learning_rate": 1.9985162943848207e-06, + "loss": 0.9374763369560242, + "step": 578 + }, + { + "epoch": 0.13341013824884793, + "grad_norm": 0.45444537143479036, + "learning_rate": 1.9984954759241293e-06, + "loss": 0.9405182600021362, + "step": 579 + }, + { + "epoch": 0.1336405529953917, + "grad_norm": 0.42824704739155545, + "learning_rate": 1.998474512533141e-06, + "loss": 0.8406375646591187, + "step": 580 + }, + { + "epoch": 0.1338709677419355, + "grad_norm": 0.48847321743709643, + "learning_rate": 1.9984534042148994e-06, + "loss": 0.9323312044143677, + "step": 581 + }, + { + "epoch": 0.13410138248847928, + "grad_norm": 0.43641992007971325, + "learning_rate": 1.998432150972468e-06, + "loss": 1.0704214572906494, + "step": 582 + }, + { + "epoch": 0.13433179723502303, + "grad_norm": 0.38681502715760335, + "learning_rate": 1.9984107528089315e-06, + "loss": 0.8636025190353394, + "step": 583 + }, + { + "epoch": 0.13456221198156681, + "grad_norm": 0.4361205380771583, + "learning_rate": 1.998389209727396e-06, + "loss": 0.9616303443908691, + "step": 584 + }, + { + "epoch": 0.1347926267281106, + "grad_norm": 0.4406937724242653, + "learning_rate": 1.998367521730988e-06, + "loss": 1.0457193851470947, + "step": 585 + }, + { + "epoch": 0.13502304147465438, + "grad_norm": 0.4060450620979949, + "learning_rate": 1.9983456888228566e-06, + "loss": 1.0460572242736816, + "step": 586 + }, + { + "epoch": 0.13525345622119817, + "grad_norm": 0.3771944294411782, + "learning_rate": 1.9983237110061695e-06, + "loss": 0.9693883657455444, + "step": 587 + }, + { + "epoch": 0.13548387096774195, + "grad_norm": 0.4355709352067949, + "learning_rate": 1.9983015882841175e-06, + "loss": 0.8823472857475281, + "step": 588 + }, + { + "epoch": 0.1357142857142857, + "grad_norm": 0.5606637533068962, + "learning_rate": 1.998279320659912e-06, + "loss": 1.1602983474731445, + "step": 589 + }, + { + "epoch": 0.1359447004608295, + "grad_norm": 0.40130101265364443, + "learning_rate": 1.9982569081367843e-06, + "loss": 0.8191353678703308, + "step": 590 + }, + { + "epoch": 0.13617511520737327, + "grad_norm": 0.40863931644700857, + "learning_rate": 1.9982343507179876e-06, + "loss": 1.141557216644287, + "step": 591 + }, + { + "epoch": 0.13640552995391705, + "grad_norm": 0.4712969186607289, + "learning_rate": 1.998211648406797e-06, + "loss": 0.9688570499420166, + "step": 592 + }, + { + "epoch": 0.13663594470046084, + "grad_norm": 0.4543844570436241, + "learning_rate": 1.9981888012065068e-06, + "loss": 1.0218561887741089, + "step": 593 + }, + { + "epoch": 0.13686635944700462, + "grad_norm": 0.5219271265911207, + "learning_rate": 1.9981658091204334e-06, + "loss": 0.9531952142715454, + "step": 594 + }, + { + "epoch": 0.13709677419354838, + "grad_norm": 0.5314208269690397, + "learning_rate": 1.9981426721519143e-06, + "loss": 1.1421492099761963, + "step": 595 + }, + { + "epoch": 0.13732718894009216, + "grad_norm": 0.3970137466851754, + "learning_rate": 1.9981193903043074e-06, + "loss": 0.8173041343688965, + "step": 596 + }, + { + "epoch": 0.13755760368663594, + "grad_norm": 0.43200053855682263, + "learning_rate": 1.998095963580993e-06, + "loss": 0.8842465877532959, + "step": 597 + }, + { + "epoch": 0.13778801843317973, + "grad_norm": 0.6492506358781442, + "learning_rate": 1.9980723919853703e-06, + "loss": 0.8547788858413696, + "step": 598 + }, + { + "epoch": 0.1380184331797235, + "grad_norm": 0.5287255050220753, + "learning_rate": 1.998048675520861e-06, + "loss": 1.0085712671279907, + "step": 599 + }, + { + "epoch": 0.1382488479262673, + "grad_norm": 0.5226769291219134, + "learning_rate": 1.9980248141909083e-06, + "loss": 0.9276378750801086, + "step": 600 + }, + { + "epoch": 0.13847926267281105, + "grad_norm": 0.44292446989501455, + "learning_rate": 1.998000807998975e-06, + "loss": 0.9236693382263184, + "step": 601 + }, + { + "epoch": 0.13870967741935483, + "grad_norm": 0.43514287150953085, + "learning_rate": 1.9979766569485454e-06, + "loss": 1.0353924036026, + "step": 602 + }, + { + "epoch": 0.13894009216589862, + "grad_norm": 0.3831256791535214, + "learning_rate": 1.9979523610431246e-06, + "loss": 0.8456567525863647, + "step": 603 + }, + { + "epoch": 0.1391705069124424, + "grad_norm": 0.46736174894260846, + "learning_rate": 1.997927920286241e-06, + "loss": 0.997468888759613, + "step": 604 + }, + { + "epoch": 0.13940092165898618, + "grad_norm": 0.38558572890255066, + "learning_rate": 1.9979033346814397e-06, + "loss": 0.8962260484695435, + "step": 605 + }, + { + "epoch": 0.13963133640552997, + "grad_norm": 0.4829548009529998, + "learning_rate": 1.997878604232291e-06, + "loss": 0.8586266040802002, + "step": 606 + }, + { + "epoch": 0.13986175115207372, + "grad_norm": 0.4776734917637134, + "learning_rate": 1.9978537289423837e-06, + "loss": 0.9639670848846436, + "step": 607 + }, + { + "epoch": 0.1400921658986175, + "grad_norm": 0.4115822234384495, + "learning_rate": 1.9978287088153286e-06, + "loss": 1.005727767944336, + "step": 608 + }, + { + "epoch": 0.1403225806451613, + "grad_norm": 0.44858527541471366, + "learning_rate": 1.9978035438547575e-06, + "loss": 1.148871898651123, + "step": 609 + }, + { + "epoch": 0.14055299539170507, + "grad_norm": 0.4357664217922314, + "learning_rate": 1.9977782340643226e-06, + "loss": 1.0459539890289307, + "step": 610 + }, + { + "epoch": 0.14078341013824885, + "grad_norm": 0.43229915305128663, + "learning_rate": 1.9977527794476985e-06, + "loss": 0.92689448595047, + "step": 611 + }, + { + "epoch": 0.14101382488479264, + "grad_norm": 0.5514025110097415, + "learning_rate": 1.997727180008579e-06, + "loss": 0.9735790491104126, + "step": 612 + }, + { + "epoch": 0.1412442396313364, + "grad_norm": 0.5114055458545007, + "learning_rate": 1.99770143575068e-06, + "loss": 0.8882870674133301, + "step": 613 + }, + { + "epoch": 0.14147465437788018, + "grad_norm": 0.47604166837238787, + "learning_rate": 1.9976755466777386e-06, + "loss": 0.9229795932769775, + "step": 614 + }, + { + "epoch": 0.14170506912442396, + "grad_norm": 0.39391923738635765, + "learning_rate": 1.997649512793512e-06, + "loss": 0.9097769260406494, + "step": 615 + }, + { + "epoch": 0.14193548387096774, + "grad_norm": 0.429877903042447, + "learning_rate": 1.9976233341017798e-06, + "loss": 0.7751711010932922, + "step": 616 + }, + { + "epoch": 0.14216589861751153, + "grad_norm": 0.4585028421290768, + "learning_rate": 1.9975970106063414e-06, + "loss": 0.9071080684661865, + "step": 617 + }, + { + "epoch": 0.1423963133640553, + "grad_norm": 0.372835081071011, + "learning_rate": 1.997570542311017e-06, + "loss": 0.8444115519523621, + "step": 618 + }, + { + "epoch": 0.14262672811059907, + "grad_norm": 0.46125503087530084, + "learning_rate": 1.9975439292196496e-06, + "loss": 0.9159516096115112, + "step": 619 + }, + { + "epoch": 0.14285714285714285, + "grad_norm": 0.45879242474243875, + "learning_rate": 1.997517171336101e-06, + "loss": 0.9697242975234985, + "step": 620 + }, + { + "epoch": 0.14308755760368663, + "grad_norm": 0.4605305033840643, + "learning_rate": 1.9974902686642557e-06, + "loss": 0.9894170761108398, + "step": 621 + }, + { + "epoch": 0.14331797235023042, + "grad_norm": 0.48517122287493847, + "learning_rate": 1.9974632212080184e-06, + "loss": 1.0364127159118652, + "step": 622 + }, + { + "epoch": 0.1435483870967742, + "grad_norm": 0.39169164845291754, + "learning_rate": 1.997436028971315e-06, + "loss": 0.8980219960212708, + "step": 623 + }, + { + "epoch": 0.14377880184331798, + "grad_norm": 0.4857070397144096, + "learning_rate": 1.9974086919580925e-06, + "loss": 1.0293703079223633, + "step": 624 + }, + { + "epoch": 0.14400921658986174, + "grad_norm": 0.46693238253454916, + "learning_rate": 1.9973812101723186e-06, + "loss": 1.006148099899292, + "step": 625 + }, + { + "epoch": 0.14423963133640552, + "grad_norm": 0.5525790188158035, + "learning_rate": 1.9973535836179825e-06, + "loss": 0.9489799737930298, + "step": 626 + }, + { + "epoch": 0.1444700460829493, + "grad_norm": 0.3704152285915921, + "learning_rate": 1.997325812299094e-06, + "loss": 0.7601498961448669, + "step": 627 + }, + { + "epoch": 0.1447004608294931, + "grad_norm": 0.6225002321802279, + "learning_rate": 1.9972978962196843e-06, + "loss": 0.8345643281936646, + "step": 628 + }, + { + "epoch": 0.14493087557603687, + "grad_norm": 0.48694459235316484, + "learning_rate": 1.9972698353838053e-06, + "loss": 0.8705894947052002, + "step": 629 + }, + { + "epoch": 0.14516129032258066, + "grad_norm": 0.42033173985472694, + "learning_rate": 1.9972416297955294e-06, + "loss": 0.9515185356140137, + "step": 630 + }, + { + "epoch": 0.1453917050691244, + "grad_norm": 0.38157222553103914, + "learning_rate": 1.9972132794589514e-06, + "loss": 0.7616517543792725, + "step": 631 + }, + { + "epoch": 0.1456221198156682, + "grad_norm": 0.47593248323635307, + "learning_rate": 1.9971847843781862e-06, + "loss": 0.8870444297790527, + "step": 632 + }, + { + "epoch": 0.14585253456221198, + "grad_norm": 0.45987330163099194, + "learning_rate": 1.9971561445573696e-06, + "loss": 0.8709393739700317, + "step": 633 + }, + { + "epoch": 0.14608294930875576, + "grad_norm": 0.35616612587319196, + "learning_rate": 1.997127360000658e-06, + "loss": 0.865444540977478, + "step": 634 + }, + { + "epoch": 0.14631336405529954, + "grad_norm": 0.4431578416665891, + "learning_rate": 1.997098430712231e-06, + "loss": 0.9560728073120117, + "step": 635 + }, + { + "epoch": 0.14654377880184333, + "grad_norm": 0.4107966614124612, + "learning_rate": 1.9970693566962866e-06, + "loss": 0.7579058408737183, + "step": 636 + }, + { + "epoch": 0.14677419354838708, + "grad_norm": 0.4609569973718347, + "learning_rate": 1.997040137957045e-06, + "loss": 1.0709021091461182, + "step": 637 + }, + { + "epoch": 0.14700460829493087, + "grad_norm": 0.5029329480654331, + "learning_rate": 1.9970107744987474e-06, + "loss": 0.9911563396453857, + "step": 638 + }, + { + "epoch": 0.14723502304147465, + "grad_norm": 0.45338591583748106, + "learning_rate": 1.996981266325655e-06, + "loss": 0.9673472046852112, + "step": 639 + }, + { + "epoch": 0.14746543778801843, + "grad_norm": 0.3918341582647882, + "learning_rate": 1.9969516134420523e-06, + "loss": 0.7728441953659058, + "step": 640 + }, + { + "epoch": 0.14769585253456222, + "grad_norm": 0.532382418423259, + "learning_rate": 1.9969218158522426e-06, + "loss": 1.0198101997375488, + "step": 641 + }, + { + "epoch": 0.147926267281106, + "grad_norm": 0.45259693038053805, + "learning_rate": 1.996891873560551e-06, + "loss": 0.9710760116577148, + "step": 642 + }, + { + "epoch": 0.14815668202764978, + "grad_norm": 0.41281494255735757, + "learning_rate": 1.9968617865713237e-06, + "loss": 0.9956847429275513, + "step": 643 + }, + { + "epoch": 0.14838709677419354, + "grad_norm": 0.40081796016292187, + "learning_rate": 1.996831554888928e-06, + "loss": 1.0974771976470947, + "step": 644 + }, + { + "epoch": 0.14861751152073732, + "grad_norm": 0.5353172634899142, + "learning_rate": 1.9968011785177513e-06, + "loss": 0.914455771446228, + "step": 645 + }, + { + "epoch": 0.1488479262672811, + "grad_norm": 0.5511418094652546, + "learning_rate": 1.9967706574622033e-06, + "loss": 1.1308314800262451, + "step": 646 + }, + { + "epoch": 0.1490783410138249, + "grad_norm": 0.5114786055194052, + "learning_rate": 1.9967399917267142e-06, + "loss": 0.981814444065094, + "step": 647 + }, + { + "epoch": 0.14930875576036867, + "grad_norm": 0.431645238473459, + "learning_rate": 1.9967091813157345e-06, + "loss": 0.874076247215271, + "step": 648 + }, + { + "epoch": 0.14953917050691246, + "grad_norm": 0.39621973386547166, + "learning_rate": 1.9966782262337365e-06, + "loss": 0.8496171832084656, + "step": 649 + }, + { + "epoch": 0.1497695852534562, + "grad_norm": 0.49468581823361646, + "learning_rate": 1.9966471264852136e-06, + "loss": 0.9395674467086792, + "step": 650 + }, + { + "epoch": 0.15, + "grad_norm": 0.4120224768195847, + "learning_rate": 1.99661588207468e-06, + "loss": 0.8363018035888672, + "step": 651 + }, + { + "epoch": 0.15023041474654378, + "grad_norm": 0.4552124844336583, + "learning_rate": 1.9965844930066696e-06, + "loss": 1.0035831928253174, + "step": 652 + }, + { + "epoch": 0.15046082949308756, + "grad_norm": 0.3910663219458386, + "learning_rate": 1.99655295928574e-06, + "loss": 1.0316795110702515, + "step": 653 + }, + { + "epoch": 0.15069124423963134, + "grad_norm": 0.4287067909796643, + "learning_rate": 1.9965212809164676e-06, + "loss": 0.9545150995254517, + "step": 654 + }, + { + "epoch": 0.15092165898617513, + "grad_norm": 0.4577708396372056, + "learning_rate": 1.99648945790345e-06, + "loss": 0.993801474571228, + "step": 655 + }, + { + "epoch": 0.15115207373271888, + "grad_norm": 0.4032410507172632, + "learning_rate": 1.9964574902513075e-06, + "loss": 0.8666588664054871, + "step": 656 + }, + { + "epoch": 0.15138248847926267, + "grad_norm": 0.48179220104835324, + "learning_rate": 1.9964253779646787e-06, + "loss": 0.9507651925086975, + "step": 657 + }, + { + "epoch": 0.15161290322580645, + "grad_norm": 0.4899406622119438, + "learning_rate": 1.996393121048226e-06, + "loss": 0.8700851202011108, + "step": 658 + }, + { + "epoch": 0.15184331797235023, + "grad_norm": 0.40256613113119405, + "learning_rate": 1.9963607195066307e-06, + "loss": 0.9966975450515747, + "step": 659 + }, + { + "epoch": 0.15207373271889402, + "grad_norm": 0.44964674216674483, + "learning_rate": 1.9963281733445957e-06, + "loss": 0.9552028179168701, + "step": 660 + }, + { + "epoch": 0.1523041474654378, + "grad_norm": 0.47921018457871023, + "learning_rate": 1.9962954825668456e-06, + "loss": 1.0182740688323975, + "step": 661 + }, + { + "epoch": 0.15253456221198156, + "grad_norm": 0.5096203598929419, + "learning_rate": 1.996262647178125e-06, + "loss": 1.0001778602600098, + "step": 662 + }, + { + "epoch": 0.15276497695852534, + "grad_norm": 0.44730944505165277, + "learning_rate": 1.9962296671832e-06, + "loss": 0.9902865886688232, + "step": 663 + }, + { + "epoch": 0.15299539170506912, + "grad_norm": 0.44977913840647327, + "learning_rate": 1.9961965425868575e-06, + "loss": 0.9272845983505249, + "step": 664 + }, + { + "epoch": 0.1532258064516129, + "grad_norm": 0.5011405916103636, + "learning_rate": 1.996163273393906e-06, + "loss": 0.9705777168273926, + "step": 665 + }, + { + "epoch": 0.1534562211981567, + "grad_norm": 0.5035568947424544, + "learning_rate": 1.9961298596091736e-06, + "loss": 0.9472209215164185, + "step": 666 + }, + { + "epoch": 0.15368663594470047, + "grad_norm": 0.3982786140702462, + "learning_rate": 1.9960963012375113e-06, + "loss": 0.9734043478965759, + "step": 667 + }, + { + "epoch": 0.15391705069124423, + "grad_norm": 0.516464356110248, + "learning_rate": 1.9960625982837894e-06, + "loss": 0.8765468001365662, + "step": 668 + }, + { + "epoch": 0.154147465437788, + "grad_norm": 0.6158206412964224, + "learning_rate": 1.9960287507529e-06, + "loss": 1.0224063396453857, + "step": 669 + }, + { + "epoch": 0.1543778801843318, + "grad_norm": 0.4417623345727513, + "learning_rate": 1.995994758649756e-06, + "loss": 0.983299970626831, + "step": 670 + }, + { + "epoch": 0.15460829493087558, + "grad_norm": 0.4680475004359934, + "learning_rate": 1.9959606219792914e-06, + "loss": 1.0524147748947144, + "step": 671 + }, + { + "epoch": 0.15483870967741936, + "grad_norm": 0.45758073401288785, + "learning_rate": 1.9959263407464606e-06, + "loss": 1.1012977361679077, + "step": 672 + }, + { + "epoch": 0.15506912442396314, + "grad_norm": 0.6298296463565526, + "learning_rate": 1.99589191495624e-06, + "loss": 0.8494049310684204, + "step": 673 + }, + { + "epoch": 0.1552995391705069, + "grad_norm": 0.4795860182347848, + "learning_rate": 1.9958573446136263e-06, + "loss": 0.8677045106887817, + "step": 674 + }, + { + "epoch": 0.15552995391705068, + "grad_norm": 0.5514260857685808, + "learning_rate": 1.995822629723638e-06, + "loss": 1.1034941673278809, + "step": 675 + }, + { + "epoch": 0.15576036866359447, + "grad_norm": 0.3959041890885462, + "learning_rate": 1.9957877702913128e-06, + "loss": 0.8428820371627808, + "step": 676 + }, + { + "epoch": 0.15599078341013825, + "grad_norm": 0.5435721054179383, + "learning_rate": 1.9957527663217107e-06, + "loss": 0.8584408760070801, + "step": 677 + }, + { + "epoch": 0.15622119815668203, + "grad_norm": 0.47044010204436115, + "learning_rate": 1.995717617819913e-06, + "loss": 0.8089514970779419, + "step": 678 + }, + { + "epoch": 0.15645161290322582, + "grad_norm": 0.48360065475233177, + "learning_rate": 1.9956823247910217e-06, + "loss": 0.8459775447845459, + "step": 679 + }, + { + "epoch": 0.15668202764976957, + "grad_norm": 0.441023419118176, + "learning_rate": 1.9956468872401583e-06, + "loss": 1.0583066940307617, + "step": 680 + }, + { + "epoch": 0.15691244239631336, + "grad_norm": 0.4427871322496545, + "learning_rate": 1.995611305172468e-06, + "loss": 0.9396135807037354, + "step": 681 + }, + { + "epoch": 0.15714285714285714, + "grad_norm": 0.4888169944824013, + "learning_rate": 1.995575578593114e-06, + "loss": 1.0143593549728394, + "step": 682 + }, + { + "epoch": 0.15737327188940092, + "grad_norm": 0.44801312951365924, + "learning_rate": 1.9955397075072833e-06, + "loss": 0.8822500109672546, + "step": 683 + }, + { + "epoch": 0.1576036866359447, + "grad_norm": 0.4936771776275296, + "learning_rate": 1.995503691920182e-06, + "loss": 0.8841962218284607, + "step": 684 + }, + { + "epoch": 0.1578341013824885, + "grad_norm": 0.4240877666200064, + "learning_rate": 1.9954675318370374e-06, + "loss": 0.8537080883979797, + "step": 685 + }, + { + "epoch": 0.15806451612903225, + "grad_norm": 0.5056113314098377, + "learning_rate": 1.9954312272630985e-06, + "loss": 1.0292394161224365, + "step": 686 + }, + { + "epoch": 0.15829493087557603, + "grad_norm": 0.5106923922410934, + "learning_rate": 1.995394778203635e-06, + "loss": 0.8741706013679504, + "step": 687 + }, + { + "epoch": 0.1585253456221198, + "grad_norm": 0.47911475912836377, + "learning_rate": 1.995358184663937e-06, + "loss": 0.9429572820663452, + "step": 688 + }, + { + "epoch": 0.1587557603686636, + "grad_norm": 0.5562334593954328, + "learning_rate": 1.995321446649316e-06, + "loss": 0.9522494077682495, + "step": 689 + }, + { + "epoch": 0.15898617511520738, + "grad_norm": 0.5394048021515351, + "learning_rate": 1.9952845641651046e-06, + "loss": 0.9743782877922058, + "step": 690 + }, + { + "epoch": 0.15921658986175116, + "grad_norm": 0.4663620909245047, + "learning_rate": 1.995247537216657e-06, + "loss": 0.926364541053772, + "step": 691 + }, + { + "epoch": 0.15944700460829492, + "grad_norm": 0.4595450639525932, + "learning_rate": 1.995210365809346e-06, + "loss": 0.8355565071105957, + "step": 692 + }, + { + "epoch": 0.1596774193548387, + "grad_norm": 0.44548737988500176, + "learning_rate": 1.9951730499485684e-06, + "loss": 0.9200692772865295, + "step": 693 + }, + { + "epoch": 0.15990783410138248, + "grad_norm": 0.36513232613054547, + "learning_rate": 1.99513558963974e-06, + "loss": 0.7571361064910889, + "step": 694 + }, + { + "epoch": 0.16013824884792627, + "grad_norm": 0.48187866859107054, + "learning_rate": 1.995097984888298e-06, + "loss": 0.935307502746582, + "step": 695 + }, + { + "epoch": 0.16036866359447005, + "grad_norm": 0.5833897193983939, + "learning_rate": 1.995060235699701e-06, + "loss": 1.1118557453155518, + "step": 696 + }, + { + "epoch": 0.16059907834101383, + "grad_norm": 0.3866866326578979, + "learning_rate": 1.995022342079428e-06, + "loss": 0.8024749755859375, + "step": 697 + }, + { + "epoch": 0.1608294930875576, + "grad_norm": 0.44217187311148026, + "learning_rate": 1.994984304032979e-06, + "loss": 0.9018943309783936, + "step": 698 + }, + { + "epoch": 0.16105990783410137, + "grad_norm": 0.4729402911259197, + "learning_rate": 1.9949461215658757e-06, + "loss": 0.8571128249168396, + "step": 699 + }, + { + "epoch": 0.16129032258064516, + "grad_norm": 0.4822593475964477, + "learning_rate": 1.99490779468366e-06, + "loss": 0.9707971215248108, + "step": 700 + }, + { + "epoch": 0.16152073732718894, + "grad_norm": 0.4341551988253619, + "learning_rate": 1.994869323391895e-06, + "loss": 0.8157618045806885, + "step": 701 + }, + { + "epoch": 0.16175115207373272, + "grad_norm": 0.4620050649733586, + "learning_rate": 1.994830707696165e-06, + "loss": 0.9009906053543091, + "step": 702 + }, + { + "epoch": 0.1619815668202765, + "grad_norm": 0.5270647594020066, + "learning_rate": 1.9947919476020745e-06, + "loss": 1.0093860626220703, + "step": 703 + }, + { + "epoch": 0.1622119815668203, + "grad_norm": 0.4233068308539462, + "learning_rate": 1.9947530431152494e-06, + "loss": 1.018160343170166, + "step": 704 + }, + { + "epoch": 0.16244239631336405, + "grad_norm": 0.5753809013533212, + "learning_rate": 1.9947139942413378e-06, + "loss": 0.9755370616912842, + "step": 705 + }, + { + "epoch": 0.16267281105990783, + "grad_norm": 0.490686071812002, + "learning_rate": 1.994674800986006e-06, + "loss": 0.9406822919845581, + "step": 706 + }, + { + "epoch": 0.1629032258064516, + "grad_norm": 0.4856505350445516, + "learning_rate": 1.994635463354944e-06, + "loss": 0.9128296971321106, + "step": 707 + }, + { + "epoch": 0.1631336405529954, + "grad_norm": 0.42889971607025285, + "learning_rate": 1.994595981353861e-06, + "loss": 0.929735541343689, + "step": 708 + }, + { + "epoch": 0.16336405529953918, + "grad_norm": 0.5176054911036664, + "learning_rate": 1.994556354988488e-06, + "loss": 0.9021023511886597, + "step": 709 + }, + { + "epoch": 0.16359447004608296, + "grad_norm": 0.46567553841056064, + "learning_rate": 1.994516584264577e-06, + "loss": 0.9187623262405396, + "step": 710 + }, + { + "epoch": 0.16382488479262672, + "grad_norm": 0.4564071002670219, + "learning_rate": 1.9944766691879e-06, + "loss": 0.8283985257148743, + "step": 711 + }, + { + "epoch": 0.1640552995391705, + "grad_norm": 0.5448909609220928, + "learning_rate": 1.994436609764251e-06, + "loss": 1.0592901706695557, + "step": 712 + }, + { + "epoch": 0.16428571428571428, + "grad_norm": 0.5512946720093808, + "learning_rate": 1.9943964059994446e-06, + "loss": 0.98726487159729, + "step": 713 + }, + { + "epoch": 0.16451612903225807, + "grad_norm": 0.5060774432164115, + "learning_rate": 1.9943560578993165e-06, + "loss": 0.8761749267578125, + "step": 714 + }, + { + "epoch": 0.16474654377880185, + "grad_norm": 0.4759569802502017, + "learning_rate": 1.9943155654697227e-06, + "loss": 0.878170371055603, + "step": 715 + }, + { + "epoch": 0.16497695852534563, + "grad_norm": 0.5212205127966931, + "learning_rate": 1.9942749287165414e-06, + "loss": 0.9444767236709595, + "step": 716 + }, + { + "epoch": 0.1652073732718894, + "grad_norm": 0.436107073640643, + "learning_rate": 1.9942341476456697e-06, + "loss": 0.8270057439804077, + "step": 717 + }, + { + "epoch": 0.16543778801843317, + "grad_norm": 0.36828111446023454, + "learning_rate": 1.9941932222630284e-06, + "loss": 0.825955867767334, + "step": 718 + }, + { + "epoch": 0.16566820276497696, + "grad_norm": 0.4748059596727922, + "learning_rate": 1.9941521525745564e-06, + "loss": 0.9384286403656006, + "step": 719 + }, + { + "epoch": 0.16589861751152074, + "grad_norm": 0.5968010950850139, + "learning_rate": 1.994110938586216e-06, + "loss": 0.9627010226249695, + "step": 720 + }, + { + "epoch": 0.16612903225806452, + "grad_norm": 0.40665371786149496, + "learning_rate": 1.9940695803039886e-06, + "loss": 0.8436836004257202, + "step": 721 + }, + { + "epoch": 0.1663594470046083, + "grad_norm": 0.48219849106464674, + "learning_rate": 1.994028077733878e-06, + "loss": 1.0689928531646729, + "step": 722 + }, + { + "epoch": 0.16658986175115206, + "grad_norm": 0.4600242469407339, + "learning_rate": 1.993986430881907e-06, + "loss": 0.911309003829956, + "step": 723 + }, + { + "epoch": 0.16682027649769585, + "grad_norm": 0.5404195969690949, + "learning_rate": 1.993944639754122e-06, + "loss": 0.9897152185440063, + "step": 724 + }, + { + "epoch": 0.16705069124423963, + "grad_norm": 0.48212503869308937, + "learning_rate": 1.9939027043565883e-06, + "loss": 1.0230367183685303, + "step": 725 + }, + { + "epoch": 0.1672811059907834, + "grad_norm": 0.4398728967426152, + "learning_rate": 1.993860624695393e-06, + "loss": 0.8067069053649902, + "step": 726 + }, + { + "epoch": 0.1675115207373272, + "grad_norm": 0.5835576425821721, + "learning_rate": 1.9938184007766434e-06, + "loss": 0.9784343242645264, + "step": 727 + }, + { + "epoch": 0.16774193548387098, + "grad_norm": 0.5139557651921927, + "learning_rate": 1.9937760326064686e-06, + "loss": 0.8617877960205078, + "step": 728 + }, + { + "epoch": 0.16797235023041474, + "grad_norm": 0.5276605551773887, + "learning_rate": 1.9937335201910183e-06, + "loss": 1.0390141010284424, + "step": 729 + }, + { + "epoch": 0.16820276497695852, + "grad_norm": 0.5007165894606777, + "learning_rate": 1.9936908635364633e-06, + "loss": 1.0478965044021606, + "step": 730 + }, + { + "epoch": 0.1684331797235023, + "grad_norm": 0.46789644745982956, + "learning_rate": 1.9936480626489944e-06, + "loss": 0.8396252393722534, + "step": 731 + }, + { + "epoch": 0.16866359447004609, + "grad_norm": 0.4366381763655398, + "learning_rate": 1.9936051175348256e-06, + "loss": 0.8690099120140076, + "step": 732 + }, + { + "epoch": 0.16889400921658987, + "grad_norm": 0.44373038767323764, + "learning_rate": 1.993562028200189e-06, + "loss": 0.944722056388855, + "step": 733 + }, + { + "epoch": 0.16912442396313365, + "grad_norm": 0.4480067961897654, + "learning_rate": 1.9935187946513385e-06, + "loss": 0.7134733200073242, + "step": 734 + }, + { + "epoch": 0.1693548387096774, + "grad_norm": 0.44081731431481436, + "learning_rate": 1.993475416894551e-06, + "loss": 0.8102486729621887, + "step": 735 + }, + { + "epoch": 0.1695852534562212, + "grad_norm": 0.5621249368486638, + "learning_rate": 1.9934318949361215e-06, + "loss": 0.924787163734436, + "step": 736 + }, + { + "epoch": 0.16981566820276497, + "grad_norm": 0.4621168425652111, + "learning_rate": 1.993388228782368e-06, + "loss": 0.9595087766647339, + "step": 737 + }, + { + "epoch": 0.17004608294930876, + "grad_norm": 0.4164356485660062, + "learning_rate": 1.993344418439628e-06, + "loss": 0.9949792623519897, + "step": 738 + }, + { + "epoch": 0.17027649769585254, + "grad_norm": 0.6359964400004778, + "learning_rate": 1.9933004639142604e-06, + "loss": 1.0905860662460327, + "step": 739 + }, + { + "epoch": 0.17050691244239632, + "grad_norm": 0.39800173884382345, + "learning_rate": 1.9932563652126455e-06, + "loss": 0.9638324975967407, + "step": 740 + }, + { + "epoch": 0.17073732718894008, + "grad_norm": 0.4909114039853375, + "learning_rate": 1.9932121223411844e-06, + "loss": 0.9434946179389954, + "step": 741 + }, + { + "epoch": 0.17096774193548386, + "grad_norm": 0.49072837958490606, + "learning_rate": 1.9931677353062983e-06, + "loss": 0.9050095081329346, + "step": 742 + }, + { + "epoch": 0.17119815668202765, + "grad_norm": 0.509303736181324, + "learning_rate": 1.9931232041144303e-06, + "loss": 1.0698316097259521, + "step": 743 + }, + { + "epoch": 0.17142857142857143, + "grad_norm": 0.393391743712663, + "learning_rate": 1.993078528772044e-06, + "loss": 0.7938296794891357, + "step": 744 + }, + { + "epoch": 0.1716589861751152, + "grad_norm": 0.46597408496400117, + "learning_rate": 1.993033709285624e-06, + "loss": 0.8485043048858643, + "step": 745 + }, + { + "epoch": 0.171889400921659, + "grad_norm": 0.4736797887475262, + "learning_rate": 1.9929887456616754e-06, + "loss": 0.8605694770812988, + "step": 746 + }, + { + "epoch": 0.17211981566820275, + "grad_norm": 0.40523028160004354, + "learning_rate": 1.9929436379067253e-06, + "loss": 0.7101563215255737, + "step": 747 + }, + { + "epoch": 0.17235023041474654, + "grad_norm": 0.4519555914654837, + "learning_rate": 1.9928983860273205e-06, + "loss": 1.093912959098816, + "step": 748 + }, + { + "epoch": 0.17258064516129032, + "grad_norm": 0.4930830686705908, + "learning_rate": 1.9928529900300294e-06, + "loss": 0.8099753856658936, + "step": 749 + }, + { + "epoch": 0.1728110599078341, + "grad_norm": 0.3752662958180716, + "learning_rate": 1.992807449921441e-06, + "loss": 0.7816359400749207, + "step": 750 + }, + { + "epoch": 0.17304147465437789, + "grad_norm": 0.5180432792159949, + "learning_rate": 1.9927617657081656e-06, + "loss": 0.8887455463409424, + "step": 751 + }, + { + "epoch": 0.17327188940092167, + "grad_norm": 0.6260862232080928, + "learning_rate": 1.992715937396834e-06, + "loss": 1.0926017761230469, + "step": 752 + }, + { + "epoch": 0.17350230414746542, + "grad_norm": 0.5546410088380269, + "learning_rate": 1.9926699649940985e-06, + "loss": 0.7657707929611206, + "step": 753 + }, + { + "epoch": 0.1737327188940092, + "grad_norm": 0.5766197712214459, + "learning_rate": 1.992623848506632e-06, + "loss": 0.9350340366363525, + "step": 754 + }, + { + "epoch": 0.173963133640553, + "grad_norm": 0.5011774306610247, + "learning_rate": 1.9925775879411276e-06, + "loss": 0.883575439453125, + "step": 755 + }, + { + "epoch": 0.17419354838709677, + "grad_norm": 0.3678933943457833, + "learning_rate": 1.9925311833043e-06, + "loss": 0.814304769039154, + "step": 756 + }, + { + "epoch": 0.17442396313364056, + "grad_norm": 0.5857143887476359, + "learning_rate": 1.992484634602886e-06, + "loss": 0.9263690710067749, + "step": 757 + }, + { + "epoch": 0.17465437788018434, + "grad_norm": 0.49862680540203774, + "learning_rate": 1.9924379418436402e-06, + "loss": 1.0321627855300903, + "step": 758 + }, + { + "epoch": 0.1748847926267281, + "grad_norm": 0.5062063825952041, + "learning_rate": 1.9923911050333413e-06, + "loss": 0.969459056854248, + "step": 759 + }, + { + "epoch": 0.17511520737327188, + "grad_norm": 0.4554436665394103, + "learning_rate": 1.9923441241787874e-06, + "loss": 0.9926396012306213, + "step": 760 + }, + { + "epoch": 0.17534562211981566, + "grad_norm": 0.43315077691547155, + "learning_rate": 1.9922969992867975e-06, + "loss": 0.776180624961853, + "step": 761 + }, + { + "epoch": 0.17557603686635945, + "grad_norm": 0.5350913373105377, + "learning_rate": 1.992249730364212e-06, + "loss": 0.9413800239562988, + "step": 762 + }, + { + "epoch": 0.17580645161290323, + "grad_norm": 0.48045178893419493, + "learning_rate": 1.9922023174178913e-06, + "loss": 0.8365576267242432, + "step": 763 + }, + { + "epoch": 0.17603686635944701, + "grad_norm": 0.47752363664412967, + "learning_rate": 1.992154760454718e-06, + "loss": 1.023102879524231, + "step": 764 + }, + { + "epoch": 0.17626728110599077, + "grad_norm": 0.6035875388891613, + "learning_rate": 1.9921070594815944e-06, + "loss": 1.079930067062378, + "step": 765 + }, + { + "epoch": 0.17649769585253455, + "grad_norm": 0.44885698296531085, + "learning_rate": 1.9920592145054445e-06, + "loss": 0.8974392414093018, + "step": 766 + }, + { + "epoch": 0.17672811059907834, + "grad_norm": 0.5363940338283703, + "learning_rate": 1.9920112255332133e-06, + "loss": 0.9509298205375671, + "step": 767 + }, + { + "epoch": 0.17695852534562212, + "grad_norm": 0.3960858930926947, + "learning_rate": 1.991963092571866e-06, + "loss": 0.938835620880127, + "step": 768 + }, + { + "epoch": 0.1771889400921659, + "grad_norm": 0.3409332869225393, + "learning_rate": 1.9919148156283888e-06, + "loss": 0.7918044328689575, + "step": 769 + }, + { + "epoch": 0.1774193548387097, + "grad_norm": 0.46985590284048473, + "learning_rate": 1.9918663947097893e-06, + "loss": 0.8235958814620972, + "step": 770 + }, + { + "epoch": 0.17764976958525347, + "grad_norm": 0.4734643903674827, + "learning_rate": 1.9918178298230953e-06, + "loss": 0.9079158902168274, + "step": 771 + }, + { + "epoch": 0.17788018433179723, + "grad_norm": 0.5764167010482935, + "learning_rate": 1.9917691209753563e-06, + "loss": 0.8548607230186462, + "step": 772 + }, + { + "epoch": 0.178110599078341, + "grad_norm": 0.47446352682333093, + "learning_rate": 1.9917202681736428e-06, + "loss": 0.8327757120132446, + "step": 773 + }, + { + "epoch": 0.1783410138248848, + "grad_norm": 0.5415533792438672, + "learning_rate": 1.991671271425045e-06, + "loss": 1.0511503219604492, + "step": 774 + }, + { + "epoch": 0.17857142857142858, + "grad_norm": 0.4310425860855909, + "learning_rate": 1.991622130736675e-06, + "loss": 0.9168857932090759, + "step": 775 + }, + { + "epoch": 0.17880184331797236, + "grad_norm": 0.44391822434593214, + "learning_rate": 1.9915728461156654e-06, + "loss": 0.8740782737731934, + "step": 776 + }, + { + "epoch": 0.17903225806451614, + "grad_norm": 0.5841506637592749, + "learning_rate": 1.99152341756917e-06, + "loss": 0.9706588983535767, + "step": 777 + }, + { + "epoch": 0.1792626728110599, + "grad_norm": 0.5492923015057676, + "learning_rate": 1.9914738451043627e-06, + "loss": 1.144281268119812, + "step": 778 + }, + { + "epoch": 0.17949308755760368, + "grad_norm": 0.4170516305027483, + "learning_rate": 1.9914241287284403e-06, + "loss": 0.973777174949646, + "step": 779 + }, + { + "epoch": 0.17972350230414746, + "grad_norm": 0.4502683719091688, + "learning_rate": 1.991374268448617e-06, + "loss": 0.9002145528793335, + "step": 780 + }, + { + "epoch": 0.17995391705069125, + "grad_norm": 0.5526460425242373, + "learning_rate": 1.9913242642721316e-06, + "loss": 0.9234670400619507, + "step": 781 + }, + { + "epoch": 0.18018433179723503, + "grad_norm": 0.4959743401985291, + "learning_rate": 1.9912741162062415e-06, + "loss": 0.9552402496337891, + "step": 782 + }, + { + "epoch": 0.18041474654377881, + "grad_norm": 0.5510111451188886, + "learning_rate": 1.9912238242582257e-06, + "loss": 1.0485708713531494, + "step": 783 + }, + { + "epoch": 0.18064516129032257, + "grad_norm": 0.5447745918227888, + "learning_rate": 1.991173388435384e-06, + "loss": 0.9852809906005859, + "step": 784 + }, + { + "epoch": 0.18087557603686635, + "grad_norm": 0.4726322734582533, + "learning_rate": 1.991122808745037e-06, + "loss": 0.7824808359146118, + "step": 785 + }, + { + "epoch": 0.18110599078341014, + "grad_norm": 0.6534462420793078, + "learning_rate": 1.9910720851945268e-06, + "loss": 1.0380492210388184, + "step": 786 + }, + { + "epoch": 0.18133640552995392, + "grad_norm": 0.48532232647089923, + "learning_rate": 1.991021217791215e-06, + "loss": 0.9808282256126404, + "step": 787 + }, + { + "epoch": 0.1815668202764977, + "grad_norm": 0.4791928008108061, + "learning_rate": 1.9909702065424854e-06, + "loss": 0.8636116981506348, + "step": 788 + }, + { + "epoch": 0.1817972350230415, + "grad_norm": 0.45783287516468024, + "learning_rate": 1.9909190514557427e-06, + "loss": 0.8179407715797424, + "step": 789 + }, + { + "epoch": 0.18202764976958524, + "grad_norm": 0.4760021295113364, + "learning_rate": 1.990867752538411e-06, + "loss": 0.9424594044685364, + "step": 790 + }, + { + "epoch": 0.18225806451612903, + "grad_norm": 0.5558557995369799, + "learning_rate": 1.9908163097979366e-06, + "loss": 0.9429298043251038, + "step": 791 + }, + { + "epoch": 0.1824884792626728, + "grad_norm": 0.5944732273868478, + "learning_rate": 1.990764723241787e-06, + "loss": 0.9671716690063477, + "step": 792 + }, + { + "epoch": 0.1827188940092166, + "grad_norm": 0.6041148299127167, + "learning_rate": 1.9907129928774494e-06, + "loss": 1.0063345432281494, + "step": 793 + }, + { + "epoch": 0.18294930875576038, + "grad_norm": 0.4817475331580677, + "learning_rate": 1.990661118712432e-06, + "loss": 0.9932061433792114, + "step": 794 + }, + { + "epoch": 0.18317972350230416, + "grad_norm": 0.4648544131499562, + "learning_rate": 1.990609100754265e-06, + "loss": 0.859153151512146, + "step": 795 + }, + { + "epoch": 0.18341013824884791, + "grad_norm": 0.4738825500961963, + "learning_rate": 1.9905569390104984e-06, + "loss": 0.9328111410140991, + "step": 796 + }, + { + "epoch": 0.1836405529953917, + "grad_norm": 0.542624486663781, + "learning_rate": 1.9905046334887033e-06, + "loss": 0.9970628619194031, + "step": 797 + }, + { + "epoch": 0.18387096774193548, + "grad_norm": 0.41971271798029636, + "learning_rate": 1.990452184196472e-06, + "loss": 1.0347282886505127, + "step": 798 + }, + { + "epoch": 0.18410138248847926, + "grad_norm": 0.4270967132251902, + "learning_rate": 1.990399591141417e-06, + "loss": 0.9167106747627258, + "step": 799 + }, + { + "epoch": 0.18433179723502305, + "grad_norm": 0.5046236893106074, + "learning_rate": 1.990346854331173e-06, + "loss": 0.8895610570907593, + "step": 800 + }, + { + "epoch": 0.18456221198156683, + "grad_norm": 0.5237845429219861, + "learning_rate": 1.990293973773394e-06, + "loss": 0.8525041341781616, + "step": 801 + }, + { + "epoch": 0.1847926267281106, + "grad_norm": 0.4894836264572075, + "learning_rate": 1.9902409494757553e-06, + "loss": 0.8184069395065308, + "step": 802 + }, + { + "epoch": 0.18502304147465437, + "grad_norm": 0.430895578738413, + "learning_rate": 1.9901877814459544e-06, + "loss": 0.8342509269714355, + "step": 803 + }, + { + "epoch": 0.18525345622119815, + "grad_norm": 0.49779999067704434, + "learning_rate": 1.9901344696917072e-06, + "loss": 0.9254395365715027, + "step": 804 + }, + { + "epoch": 0.18548387096774194, + "grad_norm": 0.5124892914660328, + "learning_rate": 1.990081014220753e-06, + "loss": 0.9537396430969238, + "step": 805 + }, + { + "epoch": 0.18571428571428572, + "grad_norm": 0.47100696643896606, + "learning_rate": 1.99002741504085e-06, + "loss": 0.871498167514801, + "step": 806 + }, + { + "epoch": 0.1859447004608295, + "grad_norm": 0.43363760401100476, + "learning_rate": 1.9899736721597786e-06, + "loss": 0.879954993724823, + "step": 807 + }, + { + "epoch": 0.18617511520737326, + "grad_norm": 0.5651525829110051, + "learning_rate": 1.9899197855853386e-06, + "loss": 0.9238240718841553, + "step": 808 + }, + { + "epoch": 0.18640552995391704, + "grad_norm": 0.43185548411741037, + "learning_rate": 1.9898657553253527e-06, + "loss": 0.7939119935035706, + "step": 809 + }, + { + "epoch": 0.18663594470046083, + "grad_norm": 0.42423118388289394, + "learning_rate": 1.989811581387663e-06, + "loss": 0.8536086082458496, + "step": 810 + }, + { + "epoch": 0.1868663594470046, + "grad_norm": 0.7488569193689159, + "learning_rate": 1.9897572637801322e-06, + "loss": 0.8272225856781006, + "step": 811 + }, + { + "epoch": 0.1870967741935484, + "grad_norm": 0.5639808995976617, + "learning_rate": 1.989702802510645e-06, + "loss": 0.9187904596328735, + "step": 812 + }, + { + "epoch": 0.18732718894009218, + "grad_norm": 0.5096509814307604, + "learning_rate": 1.989648197587106e-06, + "loss": 0.905516505241394, + "step": 813 + }, + { + "epoch": 0.18755760368663593, + "grad_norm": 0.46349746061643887, + "learning_rate": 1.9895934490174415e-06, + "loss": 0.7548567056655884, + "step": 814 + }, + { + "epoch": 0.18778801843317972, + "grad_norm": 0.5916446556749395, + "learning_rate": 1.9895385568095978e-06, + "loss": 0.8242576122283936, + "step": 815 + }, + { + "epoch": 0.1880184331797235, + "grad_norm": 0.47871736963615374, + "learning_rate": 1.9894835209715427e-06, + "loss": 0.9861007928848267, + "step": 816 + }, + { + "epoch": 0.18824884792626728, + "grad_norm": 0.5325996448618295, + "learning_rate": 1.989428341511264e-06, + "loss": 0.9705426096916199, + "step": 817 + }, + { + "epoch": 0.18847926267281107, + "grad_norm": 0.5222036147665577, + "learning_rate": 1.9893730184367722e-06, + "loss": 0.9773565530776978, + "step": 818 + }, + { + "epoch": 0.18870967741935485, + "grad_norm": 0.42837248272258044, + "learning_rate": 1.989317551756096e-06, + "loss": 0.7929856777191162, + "step": 819 + }, + { + "epoch": 0.1889400921658986, + "grad_norm": 0.48925051722314383, + "learning_rate": 1.9892619414772866e-06, + "loss": 0.9749126434326172, + "step": 820 + }, + { + "epoch": 0.1891705069124424, + "grad_norm": 0.49968815355517815, + "learning_rate": 1.9892061876084166e-06, + "loss": 0.9945374727249146, + "step": 821 + }, + { + "epoch": 0.18940092165898617, + "grad_norm": 0.3942389156154952, + "learning_rate": 1.9891502901575776e-06, + "loss": 0.8016892075538635, + "step": 822 + }, + { + "epoch": 0.18963133640552995, + "grad_norm": 0.5604199160430772, + "learning_rate": 1.9890942491328837e-06, + "loss": 0.9389557838439941, + "step": 823 + }, + { + "epoch": 0.18986175115207374, + "grad_norm": 0.38179956879765936, + "learning_rate": 1.9890380645424686e-06, + "loss": 0.724082887172699, + "step": 824 + }, + { + "epoch": 0.19009216589861752, + "grad_norm": 0.5409880819899738, + "learning_rate": 1.988981736394488e-06, + "loss": 0.8877915143966675, + "step": 825 + }, + { + "epoch": 0.19032258064516128, + "grad_norm": 0.6992705135248997, + "learning_rate": 1.9889252646971177e-06, + "loss": 1.207446813583374, + "step": 826 + }, + { + "epoch": 0.19055299539170506, + "grad_norm": 0.5040994233955279, + "learning_rate": 1.9888686494585542e-06, + "loss": 0.9155057668685913, + "step": 827 + }, + { + "epoch": 0.19078341013824884, + "grad_norm": 0.5532998867192596, + "learning_rate": 1.9888118906870154e-06, + "loss": 1.005772352218628, + "step": 828 + }, + { + "epoch": 0.19101382488479263, + "grad_norm": 0.42790166152469256, + "learning_rate": 1.9887549883907394e-06, + "loss": 0.9060605764389038, + "step": 829 + }, + { + "epoch": 0.1912442396313364, + "grad_norm": 0.5177028577691919, + "learning_rate": 1.988697942577986e-06, + "loss": 0.7652161717414856, + "step": 830 + }, + { + "epoch": 0.1914746543778802, + "grad_norm": 0.5981838434161031, + "learning_rate": 1.9886407532570354e-06, + "loss": 1.0191380977630615, + "step": 831 + }, + { + "epoch": 0.19170506912442398, + "grad_norm": 0.4987711114148914, + "learning_rate": 1.9885834204361876e-06, + "loss": 0.9497933387756348, + "step": 832 + }, + { + "epoch": 0.19193548387096773, + "grad_norm": 0.462035144334916, + "learning_rate": 1.9885259441237657e-06, + "loss": 0.7728058099746704, + "step": 833 + }, + { + "epoch": 0.19216589861751152, + "grad_norm": 0.517810203206895, + "learning_rate": 1.9884683243281113e-06, + "loss": 0.8961999416351318, + "step": 834 + }, + { + "epoch": 0.1923963133640553, + "grad_norm": 0.49386963761649333, + "learning_rate": 1.9884105610575885e-06, + "loss": 0.9218904972076416, + "step": 835 + }, + { + "epoch": 0.19262672811059908, + "grad_norm": 0.49785428541631027, + "learning_rate": 1.9883526543205807e-06, + "loss": 0.8411329984664917, + "step": 836 + }, + { + "epoch": 0.19285714285714287, + "grad_norm": 0.42947794662366, + "learning_rate": 1.988294604125494e-06, + "loss": 0.9536285400390625, + "step": 837 + }, + { + "epoch": 0.19308755760368665, + "grad_norm": 0.589338261376726, + "learning_rate": 1.9882364104807535e-06, + "loss": 0.9404321908950806, + "step": 838 + }, + { + "epoch": 0.1933179723502304, + "grad_norm": 0.6889982860652113, + "learning_rate": 1.9881780733948066e-06, + "loss": 1.2520880699157715, + "step": 839 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 0.5071547317768794, + "learning_rate": 1.9881195928761205e-06, + "loss": 0.8961449861526489, + "step": 840 + }, + { + "epoch": 0.19377880184331797, + "grad_norm": 0.5612915327251169, + "learning_rate": 1.9880609689331833e-06, + "loss": 0.8844394683837891, + "step": 841 + }, + { + "epoch": 0.19400921658986175, + "grad_norm": 0.6383643268501873, + "learning_rate": 1.9880022015745044e-06, + "loss": 1.1305835247039795, + "step": 842 + }, + { + "epoch": 0.19423963133640554, + "grad_norm": 0.5396685716999928, + "learning_rate": 1.9879432908086143e-06, + "loss": 0.9980956315994263, + "step": 843 + }, + { + "epoch": 0.19447004608294932, + "grad_norm": 0.46511386172638836, + "learning_rate": 1.987884236644063e-06, + "loss": 0.7613730430603027, + "step": 844 + }, + { + "epoch": 0.19470046082949308, + "grad_norm": 0.6010725617242704, + "learning_rate": 1.987825039089423e-06, + "loss": 0.9742579460144043, + "step": 845 + }, + { + "epoch": 0.19493087557603686, + "grad_norm": 0.4022001131058661, + "learning_rate": 1.9877656981532864e-06, + "loss": 0.7118766903877258, + "step": 846 + }, + { + "epoch": 0.19516129032258064, + "grad_norm": 0.48902949112989696, + "learning_rate": 1.9877062138442657e-06, + "loss": 0.8657095432281494, + "step": 847 + }, + { + "epoch": 0.19539170506912443, + "grad_norm": 0.42720754806325495, + "learning_rate": 1.987646586170996e-06, + "loss": 0.8543902039527893, + "step": 848 + }, + { + "epoch": 0.1956221198156682, + "grad_norm": 0.4842820004763047, + "learning_rate": 1.9875868151421317e-06, + "loss": 0.8896970748901367, + "step": 849 + }, + { + "epoch": 0.195852534562212, + "grad_norm": 0.5225855938017534, + "learning_rate": 1.9875269007663486e-06, + "loss": 0.8662775754928589, + "step": 850 + }, + { + "epoch": 0.19608294930875575, + "grad_norm": 0.48460338230512107, + "learning_rate": 1.9874668430523434e-06, + "loss": 0.8241516351699829, + "step": 851 + }, + { + "epoch": 0.19631336405529953, + "grad_norm": 0.5278134062893883, + "learning_rate": 1.987406642008833e-06, + "loss": 0.973886251449585, + "step": 852 + }, + { + "epoch": 0.19654377880184332, + "grad_norm": 0.48464213201098744, + "learning_rate": 1.9873462976445554e-06, + "loss": 0.8133533000946045, + "step": 853 + }, + { + "epoch": 0.1967741935483871, + "grad_norm": 0.6657370368562822, + "learning_rate": 1.9872858099682697e-06, + "loss": 1.120869755744934, + "step": 854 + }, + { + "epoch": 0.19700460829493088, + "grad_norm": 0.47886128108046017, + "learning_rate": 1.9872251789887562e-06, + "loss": 0.9376444816589355, + "step": 855 + }, + { + "epoch": 0.19723502304147467, + "grad_norm": 0.4627008078705538, + "learning_rate": 1.9871644047148148e-06, + "loss": 0.8763699531555176, + "step": 856 + }, + { + "epoch": 0.19746543778801842, + "grad_norm": 0.5436736732062664, + "learning_rate": 1.9871034871552667e-06, + "loss": 0.7993260622024536, + "step": 857 + }, + { + "epoch": 0.1976958525345622, + "grad_norm": 0.5225344117964711, + "learning_rate": 1.9870424263189542e-06, + "loss": 1.0312654972076416, + "step": 858 + }, + { + "epoch": 0.197926267281106, + "grad_norm": 0.6040828842975151, + "learning_rate": 1.98698122221474e-06, + "loss": 1.0784629583358765, + "step": 859 + }, + { + "epoch": 0.19815668202764977, + "grad_norm": 0.5681257026488339, + "learning_rate": 1.9869198748515085e-06, + "loss": 1.136039137840271, + "step": 860 + }, + { + "epoch": 0.19838709677419356, + "grad_norm": 0.5123381612546825, + "learning_rate": 1.986858384238163e-06, + "loss": 0.834873378276825, + "step": 861 + }, + { + "epoch": 0.19861751152073734, + "grad_norm": 0.5505167057841309, + "learning_rate": 1.98679675038363e-06, + "loss": 0.9705442190170288, + "step": 862 + }, + { + "epoch": 0.1988479262672811, + "grad_norm": 0.6567761197272963, + "learning_rate": 1.9867349732968547e-06, + "loss": 0.9343886375427246, + "step": 863 + }, + { + "epoch": 0.19907834101382488, + "grad_norm": 0.49387008808397015, + "learning_rate": 1.986673052986805e-06, + "loss": 0.9140456914901733, + "step": 864 + }, + { + "epoch": 0.19930875576036866, + "grad_norm": 0.5850607327811402, + "learning_rate": 1.986610989462467e-06, + "loss": 0.9121139049530029, + "step": 865 + }, + { + "epoch": 0.19953917050691244, + "grad_norm": 0.4775789448856378, + "learning_rate": 1.9865487827328505e-06, + "loss": 0.7333672642707825, + "step": 866 + }, + { + "epoch": 0.19976958525345623, + "grad_norm": 0.5039450613377916, + "learning_rate": 1.986486432806984e-06, + "loss": 0.8405989408493042, + "step": 867 + }, + { + "epoch": 0.2, + "grad_norm": 0.47371690470710304, + "learning_rate": 1.9864239396939176e-06, + "loss": 0.8693375587463379, + "step": 868 + }, + { + "epoch": 0.20023041474654377, + "grad_norm": 0.5727654616233698, + "learning_rate": 1.9863613034027223e-06, + "loss": 1.0137104988098145, + "step": 869 + }, + { + "epoch": 0.20046082949308755, + "grad_norm": 0.5382771457657299, + "learning_rate": 1.9862985239424895e-06, + "loss": 1.0283832550048828, + "step": 870 + }, + { + "epoch": 0.20069124423963133, + "grad_norm": 0.6200501422886965, + "learning_rate": 1.9862356013223316e-06, + "loss": 1.117444634437561, + "step": 871 + }, + { + "epoch": 0.20092165898617512, + "grad_norm": 0.6309070895129882, + "learning_rate": 1.986172535551382e-06, + "loss": 0.8861427307128906, + "step": 872 + }, + { + "epoch": 0.2011520737327189, + "grad_norm": 0.5017852774763055, + "learning_rate": 1.9861093266387946e-06, + "loss": 1.0273747444152832, + "step": 873 + }, + { + "epoch": 0.20138248847926268, + "grad_norm": 0.5141875246573869, + "learning_rate": 1.9860459745937437e-06, + "loss": 0.918023943901062, + "step": 874 + }, + { + "epoch": 0.20161290322580644, + "grad_norm": 0.5278755996885149, + "learning_rate": 1.9859824794254246e-06, + "loss": 0.8983356952667236, + "step": 875 + }, + { + "epoch": 0.20184331797235022, + "grad_norm": 0.5803540160351622, + "learning_rate": 1.985918841143054e-06, + "loss": 1.0180974006652832, + "step": 876 + }, + { + "epoch": 0.202073732718894, + "grad_norm": 0.48253787858386377, + "learning_rate": 1.985855059755869e-06, + "loss": 0.9656573534011841, + "step": 877 + }, + { + "epoch": 0.2023041474654378, + "grad_norm": 0.5015537059540116, + "learning_rate": 1.9857911352731273e-06, + "loss": 0.8522181510925293, + "step": 878 + }, + { + "epoch": 0.20253456221198157, + "grad_norm": 0.4883752495192941, + "learning_rate": 1.985727067704107e-06, + "loss": 0.9180892705917358, + "step": 879 + }, + { + "epoch": 0.20276497695852536, + "grad_norm": 0.5817140345419661, + "learning_rate": 1.985662857058108e-06, + "loss": 0.9979432821273804, + "step": 880 + }, + { + "epoch": 0.2029953917050691, + "grad_norm": 0.5608420179715049, + "learning_rate": 1.98559850334445e-06, + "loss": 0.8916480541229248, + "step": 881 + }, + { + "epoch": 0.2032258064516129, + "grad_norm": 0.41973060059994494, + "learning_rate": 1.9855340065724738e-06, + "loss": 0.8755770921707153, + "step": 882 + }, + { + "epoch": 0.20345622119815668, + "grad_norm": 0.5596516763963291, + "learning_rate": 1.9854693667515418e-06, + "loss": 1.0200350284576416, + "step": 883 + }, + { + "epoch": 0.20368663594470046, + "grad_norm": 0.5199867730002389, + "learning_rate": 1.9854045838910353e-06, + "loss": 0.928024172782898, + "step": 884 + }, + { + "epoch": 0.20391705069124424, + "grad_norm": 0.5756725941645391, + "learning_rate": 1.9853396580003582e-06, + "loss": 0.8617212176322937, + "step": 885 + }, + { + "epoch": 0.20414746543778803, + "grad_norm": 0.5415263717139983, + "learning_rate": 1.985274589088934e-06, + "loss": 0.9383209943771362, + "step": 886 + }, + { + "epoch": 0.20437788018433178, + "grad_norm": 0.48094986017269503, + "learning_rate": 1.985209377166208e-06, + "loss": 0.7217687368392944, + "step": 887 + }, + { + "epoch": 0.20460829493087557, + "grad_norm": 0.612593081169746, + "learning_rate": 1.9851440222416446e-06, + "loss": 1.0717028379440308, + "step": 888 + }, + { + "epoch": 0.20483870967741935, + "grad_norm": 0.6063882651782059, + "learning_rate": 1.9850785243247303e-06, + "loss": 1.0137064456939697, + "step": 889 + }, + { + "epoch": 0.20506912442396313, + "grad_norm": 0.5244411173844509, + "learning_rate": 1.985012883424973e-06, + "loss": 0.8569058179855347, + "step": 890 + }, + { + "epoch": 0.20529953917050692, + "grad_norm": 0.6524290996376207, + "learning_rate": 1.9849470995518993e-06, + "loss": 0.9398901462554932, + "step": 891 + }, + { + "epoch": 0.2055299539170507, + "grad_norm": 0.3752296846015947, + "learning_rate": 1.9848811727150577e-06, + "loss": 0.731800377368927, + "step": 892 + }, + { + "epoch": 0.20576036866359446, + "grad_norm": 0.5142990565199794, + "learning_rate": 1.984815102924018e-06, + "loss": 0.8543055653572083, + "step": 893 + }, + { + "epoch": 0.20599078341013824, + "grad_norm": 0.5278314343821748, + "learning_rate": 1.98474889018837e-06, + "loss": 0.9112114906311035, + "step": 894 + }, + { + "epoch": 0.20622119815668202, + "grad_norm": 0.50708997202126, + "learning_rate": 1.984682534517724e-06, + "loss": 0.8272690773010254, + "step": 895 + }, + { + "epoch": 0.2064516129032258, + "grad_norm": 0.5912295968473946, + "learning_rate": 1.984616035921712e-06, + "loss": 0.9680918455123901, + "step": 896 + }, + { + "epoch": 0.2066820276497696, + "grad_norm": 0.6089139321115737, + "learning_rate": 1.984549394409985e-06, + "loss": 0.815123438835144, + "step": 897 + }, + { + "epoch": 0.20691244239631337, + "grad_norm": 0.4952276433479721, + "learning_rate": 1.984482609992218e-06, + "loss": 0.8035521507263184, + "step": 898 + }, + { + "epoch": 0.20714285714285716, + "grad_norm": 0.548354244530079, + "learning_rate": 1.9844156826781027e-06, + "loss": 0.9000132083892822, + "step": 899 + }, + { + "epoch": 0.2073732718894009, + "grad_norm": 0.6652515011666116, + "learning_rate": 1.9843486124773543e-06, + "loss": 1.06328547000885, + "step": 900 + }, + { + "epoch": 0.2076036866359447, + "grad_norm": 0.4596762245312169, + "learning_rate": 1.9842813993997083e-06, + "loss": 0.9028425216674805, + "step": 901 + }, + { + "epoch": 0.20783410138248848, + "grad_norm": 0.5779573613376965, + "learning_rate": 1.9842140434549196e-06, + "loss": 0.7786350250244141, + "step": 902 + }, + { + "epoch": 0.20806451612903226, + "grad_norm": 0.5102795361356062, + "learning_rate": 1.9841465446527656e-06, + "loss": 0.8041539788246155, + "step": 903 + }, + { + "epoch": 0.20829493087557605, + "grad_norm": 0.4348300351835264, + "learning_rate": 1.9840789030030434e-06, + "loss": 0.8380184173583984, + "step": 904 + }, + { + "epoch": 0.20852534562211983, + "grad_norm": 0.7151525379978475, + "learning_rate": 1.984011118515572e-06, + "loss": 0.8191432952880859, + "step": 905 + }, + { + "epoch": 0.20875576036866358, + "grad_norm": 0.5006646807997585, + "learning_rate": 1.9839431912001885e-06, + "loss": 0.8236384391784668, + "step": 906 + }, + { + "epoch": 0.20898617511520737, + "grad_norm": 0.4959155947407375, + "learning_rate": 1.9838751210667534e-06, + "loss": 0.8218076825141907, + "step": 907 + }, + { + "epoch": 0.20921658986175115, + "grad_norm": 0.5127899266702147, + "learning_rate": 1.983806908125147e-06, + "loss": 0.9140353202819824, + "step": 908 + }, + { + "epoch": 0.20944700460829493, + "grad_norm": 0.5063732794644019, + "learning_rate": 1.9837385523852706e-06, + "loss": 0.9179826974868774, + "step": 909 + }, + { + "epoch": 0.20967741935483872, + "grad_norm": 0.5385574519868781, + "learning_rate": 1.9836700538570456e-06, + "loss": 0.8888909816741943, + "step": 910 + }, + { + "epoch": 0.2099078341013825, + "grad_norm": 0.5208969379705799, + "learning_rate": 1.9836014125504143e-06, + "loss": 0.8951253890991211, + "step": 911 + }, + { + "epoch": 0.21013824884792626, + "grad_norm": 0.6093988535410455, + "learning_rate": 1.98353262847534e-06, + "loss": 1.084958553314209, + "step": 912 + }, + { + "epoch": 0.21036866359447004, + "grad_norm": 0.5061127496745415, + "learning_rate": 1.983463701641807e-06, + "loss": 0.8590713739395142, + "step": 913 + }, + { + "epoch": 0.21059907834101382, + "grad_norm": 0.6396228440899432, + "learning_rate": 1.9833946320598195e-06, + "loss": 1.0393706560134888, + "step": 914 + }, + { + "epoch": 0.2108294930875576, + "grad_norm": 0.49567487165870866, + "learning_rate": 1.983325419739403e-06, + "loss": 0.9403085708618164, + "step": 915 + }, + { + "epoch": 0.2110599078341014, + "grad_norm": 0.49912224081019996, + "learning_rate": 1.9832560646906038e-06, + "loss": 0.8431342244148254, + "step": 916 + }, + { + "epoch": 0.21129032258064517, + "grad_norm": 0.5558843704958377, + "learning_rate": 1.9831865669234884e-06, + "loss": 0.9024044871330261, + "step": 917 + }, + { + "epoch": 0.21152073732718893, + "grad_norm": 0.44775113902692637, + "learning_rate": 1.9831169264481443e-06, + "loss": 0.747347354888916, + "step": 918 + }, + { + "epoch": 0.2117511520737327, + "grad_norm": 0.46715914917156914, + "learning_rate": 1.9830471432746796e-06, + "loss": 0.8266197443008423, + "step": 919 + }, + { + "epoch": 0.2119815668202765, + "grad_norm": 0.5566270603086758, + "learning_rate": 1.9829772174132235e-06, + "loss": 0.8633416295051575, + "step": 920 + }, + { + "epoch": 0.21221198156682028, + "grad_norm": 0.5228096908540074, + "learning_rate": 1.9829071488739256e-06, + "loss": 1.0290095806121826, + "step": 921 + }, + { + "epoch": 0.21244239631336406, + "grad_norm": 0.667274912811163, + "learning_rate": 1.9828369376669566e-06, + "loss": 0.8193448781967163, + "step": 922 + }, + { + "epoch": 0.21267281105990785, + "grad_norm": 0.5677549533509479, + "learning_rate": 1.982766583802507e-06, + "loss": 0.8828415870666504, + "step": 923 + }, + { + "epoch": 0.2129032258064516, + "grad_norm": 0.597806988660978, + "learning_rate": 1.9826960872907885e-06, + "loss": 0.8806191682815552, + "step": 924 + }, + { + "epoch": 0.21313364055299538, + "grad_norm": 0.40902701240404726, + "learning_rate": 1.982625448142034e-06, + "loss": 0.8441533446311951, + "step": 925 + }, + { + "epoch": 0.21336405529953917, + "grad_norm": 0.5142754504345473, + "learning_rate": 1.9825546663664963e-06, + "loss": 0.9084080457687378, + "step": 926 + }, + { + "epoch": 0.21359447004608295, + "grad_norm": 0.7318607240255686, + "learning_rate": 1.98248374197445e-06, + "loss": 0.9005601406097412, + "step": 927 + }, + { + "epoch": 0.21382488479262673, + "grad_norm": 0.48930991442842664, + "learning_rate": 1.9824126749761893e-06, + "loss": 1.0415414571762085, + "step": 928 + }, + { + "epoch": 0.21405529953917052, + "grad_norm": 0.4380456409582823, + "learning_rate": 1.982341465382029e-06, + "loss": 0.8130594491958618, + "step": 929 + }, + { + "epoch": 0.21428571428571427, + "grad_norm": 0.4623167832467728, + "learning_rate": 1.9822701132023053e-06, + "loss": 0.9178205728530884, + "step": 930 + }, + { + "epoch": 0.21451612903225806, + "grad_norm": 0.5894382821211327, + "learning_rate": 1.9821986184473754e-06, + "loss": 0.9927947521209717, + "step": 931 + }, + { + "epoch": 0.21474654377880184, + "grad_norm": 0.5621440238225328, + "learning_rate": 1.982126981127616e-06, + "loss": 0.9172670841217041, + "step": 932 + }, + { + "epoch": 0.21497695852534562, + "grad_norm": 0.5805773191302366, + "learning_rate": 1.9820552012534255e-06, + "loss": 0.9513058066368103, + "step": 933 + }, + { + "epoch": 0.2152073732718894, + "grad_norm": 0.6596090379041671, + "learning_rate": 1.9819832788352227e-06, + "loss": 1.014827013015747, + "step": 934 + }, + { + "epoch": 0.2154377880184332, + "grad_norm": 0.5483468550441934, + "learning_rate": 1.9819112138834473e-06, + "loss": 1.0225746631622314, + "step": 935 + }, + { + "epoch": 0.21566820276497695, + "grad_norm": 0.46659867801168237, + "learning_rate": 1.9818390064085584e-06, + "loss": 0.8804227113723755, + "step": 936 + }, + { + "epoch": 0.21589861751152073, + "grad_norm": 0.42738644934381204, + "learning_rate": 1.9817666564210376e-06, + "loss": 0.7215760350227356, + "step": 937 + }, + { + "epoch": 0.2161290322580645, + "grad_norm": 0.6620668522422565, + "learning_rate": 1.981694163931387e-06, + "loss": 0.9978986978530884, + "step": 938 + }, + { + "epoch": 0.2163594470046083, + "grad_norm": 0.5846107454293807, + "learning_rate": 1.981621528950128e-06, + "loss": 0.8646233081817627, + "step": 939 + }, + { + "epoch": 0.21658986175115208, + "grad_norm": 0.44150430663795637, + "learning_rate": 1.981548751487803e-06, + "loss": 0.9619132876396179, + "step": 940 + }, + { + "epoch": 0.21682027649769586, + "grad_norm": 0.543839377462045, + "learning_rate": 1.981475831554976e-06, + "loss": 0.9209504127502441, + "step": 941 + }, + { + "epoch": 0.21705069124423962, + "grad_norm": 0.563351483363654, + "learning_rate": 1.9814027691622318e-06, + "loss": 0.7629299163818359, + "step": 942 + }, + { + "epoch": 0.2172811059907834, + "grad_norm": 0.4885334834965844, + "learning_rate": 1.9813295643201747e-06, + "loss": 0.8702583312988281, + "step": 943 + }, + { + "epoch": 0.21751152073732719, + "grad_norm": 0.5579102568918498, + "learning_rate": 1.9812562170394305e-06, + "loss": 0.9571657180786133, + "step": 944 + }, + { + "epoch": 0.21774193548387097, + "grad_norm": 0.43227127189367615, + "learning_rate": 1.9811827273306456e-06, + "loss": 0.7271617650985718, + "step": 945 + }, + { + "epoch": 0.21797235023041475, + "grad_norm": 0.46137899963900864, + "learning_rate": 1.9811090952044865e-06, + "loss": 0.8189597725868225, + "step": 946 + }, + { + "epoch": 0.21820276497695854, + "grad_norm": 0.49142212284435566, + "learning_rate": 1.981035320671641e-06, + "loss": 0.7933987379074097, + "step": 947 + }, + { + "epoch": 0.2184331797235023, + "grad_norm": 0.48207328184354004, + "learning_rate": 1.9809614037428174e-06, + "loss": 0.9687645435333252, + "step": 948 + }, + { + "epoch": 0.21866359447004607, + "grad_norm": 0.5647695490676888, + "learning_rate": 1.980887344428745e-06, + "loss": 0.8293745517730713, + "step": 949 + }, + { + "epoch": 0.21889400921658986, + "grad_norm": 0.6489579503887147, + "learning_rate": 1.9808131427401727e-06, + "loss": 1.0447471141815186, + "step": 950 + }, + { + "epoch": 0.21912442396313364, + "grad_norm": 0.48010625791746325, + "learning_rate": 1.9807387986878715e-06, + "loss": 0.8916672468185425, + "step": 951 + }, + { + "epoch": 0.21935483870967742, + "grad_norm": 0.5436399520986829, + "learning_rate": 1.980664312282632e-06, + "loss": 0.8380981683731079, + "step": 952 + }, + { + "epoch": 0.2195852534562212, + "grad_norm": 0.4634469099281989, + "learning_rate": 1.9805896835352656e-06, + "loss": 0.887790322303772, + "step": 953 + }, + { + "epoch": 0.21981566820276496, + "grad_norm": 0.5184548533508342, + "learning_rate": 1.9805149124566048e-06, + "loss": 0.8353140950202942, + "step": 954 + }, + { + "epoch": 0.22004608294930875, + "grad_norm": 0.7177333773715296, + "learning_rate": 1.9804399990575026e-06, + "loss": 1.0337531566619873, + "step": 955 + }, + { + "epoch": 0.22027649769585253, + "grad_norm": 0.4262367777660272, + "learning_rate": 1.9803649433488324e-06, + "loss": 0.8845529556274414, + "step": 956 + }, + { + "epoch": 0.2205069124423963, + "grad_norm": 0.4271901286679727, + "learning_rate": 1.9802897453414884e-06, + "loss": 0.7408445477485657, + "step": 957 + }, + { + "epoch": 0.2207373271889401, + "grad_norm": 0.5478873632644168, + "learning_rate": 1.980214405046386e-06, + "loss": 0.873178243637085, + "step": 958 + }, + { + "epoch": 0.22096774193548388, + "grad_norm": 0.556535747180833, + "learning_rate": 1.98013892247446e-06, + "loss": 1.0207639932632446, + "step": 959 + }, + { + "epoch": 0.22119815668202766, + "grad_norm": 0.5890989419509002, + "learning_rate": 1.980063297636667e-06, + "loss": 0.8626997470855713, + "step": 960 + }, + { + "epoch": 0.22142857142857142, + "grad_norm": 0.5912616927968722, + "learning_rate": 1.9799875305439836e-06, + "loss": 0.8961347341537476, + "step": 961 + }, + { + "epoch": 0.2216589861751152, + "grad_norm": 0.495639914718092, + "learning_rate": 1.9799116212074075e-06, + "loss": 0.8115944862365723, + "step": 962 + }, + { + "epoch": 0.22188940092165899, + "grad_norm": 0.5281413221179645, + "learning_rate": 1.979835569637957e-06, + "loss": 0.8274029493331909, + "step": 963 + }, + { + "epoch": 0.22211981566820277, + "grad_norm": 0.5782364794204825, + "learning_rate": 1.9797593758466706e-06, + "loss": 1.020345687866211, + "step": 964 + }, + { + "epoch": 0.22235023041474655, + "grad_norm": 0.586333023609623, + "learning_rate": 1.979683039844608e-06, + "loss": 0.8164723515510559, + "step": 965 + }, + { + "epoch": 0.22258064516129034, + "grad_norm": 0.48956655235723145, + "learning_rate": 1.979606561642849e-06, + "loss": 0.832849383354187, + "step": 966 + }, + { + "epoch": 0.2228110599078341, + "grad_norm": 0.5810232623043905, + "learning_rate": 1.9795299412524945e-06, + "loss": 0.9765876531600952, + "step": 967 + }, + { + "epoch": 0.22304147465437787, + "grad_norm": 0.5610292572060406, + "learning_rate": 1.9794531786846657e-06, + "loss": 0.9280411005020142, + "step": 968 + }, + { + "epoch": 0.22327188940092166, + "grad_norm": 0.6528516733941818, + "learning_rate": 1.9793762739505042e-06, + "loss": 1.122058629989624, + "step": 969 + }, + { + "epoch": 0.22350230414746544, + "grad_norm": 0.4582570301724996, + "learning_rate": 1.9792992270611737e-06, + "loss": 0.824627161026001, + "step": 970 + }, + { + "epoch": 0.22373271889400922, + "grad_norm": 0.750391550156154, + "learning_rate": 1.9792220380278565e-06, + "loss": 1.0583840608596802, + "step": 971 + }, + { + "epoch": 0.223963133640553, + "grad_norm": 0.5277817422831291, + "learning_rate": 1.979144706861757e-06, + "loss": 1.053803563117981, + "step": 972 + }, + { + "epoch": 0.22419354838709676, + "grad_norm": 0.5197675200798639, + "learning_rate": 1.9790672335740993e-06, + "loss": 0.8572183847427368, + "step": 973 + }, + { + "epoch": 0.22442396313364055, + "grad_norm": 0.5956201422774761, + "learning_rate": 1.978989618176129e-06, + "loss": 0.7955416440963745, + "step": 974 + }, + { + "epoch": 0.22465437788018433, + "grad_norm": 0.6931203377433601, + "learning_rate": 1.9789118606791113e-06, + "loss": 0.9455063343048096, + "step": 975 + }, + { + "epoch": 0.2248847926267281, + "grad_norm": 0.5553738972507489, + "learning_rate": 1.978833961094333e-06, + "loss": 0.788895845413208, + "step": 976 + }, + { + "epoch": 0.2251152073732719, + "grad_norm": 0.4854852275390097, + "learning_rate": 1.9787559194331014e-06, + "loss": 0.8344719409942627, + "step": 977 + }, + { + "epoch": 0.22534562211981568, + "grad_norm": 0.5098723288351352, + "learning_rate": 1.9786777357067436e-06, + "loss": 0.85140061378479, + "step": 978 + }, + { + "epoch": 0.22557603686635944, + "grad_norm": 0.43945689098482754, + "learning_rate": 1.978599409926608e-06, + "loss": 0.8511399030685425, + "step": 979 + }, + { + "epoch": 0.22580645161290322, + "grad_norm": 0.4893125980217, + "learning_rate": 1.9785209421040636e-06, + "loss": 0.9243351221084595, + "step": 980 + }, + { + "epoch": 0.226036866359447, + "grad_norm": 0.5349074342918002, + "learning_rate": 1.9784423322504996e-06, + "loss": 0.9043580293655396, + "step": 981 + }, + { + "epoch": 0.2262672811059908, + "grad_norm": 0.654146848198394, + "learning_rate": 1.978363580377327e-06, + "loss": 0.854049563407898, + "step": 982 + }, + { + "epoch": 0.22649769585253457, + "grad_norm": 0.43507484708504635, + "learning_rate": 1.9782846864959754e-06, + "loss": 0.7785296440124512, + "step": 983 + }, + { + "epoch": 0.22672811059907835, + "grad_norm": 0.5830354059161934, + "learning_rate": 1.9782056506178965e-06, + "loss": 0.8464720845222473, + "step": 984 + }, + { + "epoch": 0.2269585253456221, + "grad_norm": 0.5249975809892665, + "learning_rate": 1.9781264727545624e-06, + "loss": 0.8519179821014404, + "step": 985 + }, + { + "epoch": 0.2271889400921659, + "grad_norm": 0.6176158235785483, + "learning_rate": 1.978047152917466e-06, + "loss": 0.956415057182312, + "step": 986 + }, + { + "epoch": 0.22741935483870968, + "grad_norm": 0.5046722242039021, + "learning_rate": 1.97796769111812e-06, + "loss": 1.028620719909668, + "step": 987 + }, + { + "epoch": 0.22764976958525346, + "grad_norm": 0.4889451789926323, + "learning_rate": 1.9778880873680585e-06, + "loss": 0.8707184195518494, + "step": 988 + }, + { + "epoch": 0.22788018433179724, + "grad_norm": 0.5212071576326044, + "learning_rate": 1.9778083416788355e-06, + "loss": 0.9842795729637146, + "step": 989 + }, + { + "epoch": 0.22811059907834103, + "grad_norm": 0.5963522406410062, + "learning_rate": 1.977728454062026e-06, + "loss": 0.8827522993087769, + "step": 990 + }, + { + "epoch": 0.22834101382488478, + "grad_norm": 0.5285989804764033, + "learning_rate": 1.9776484245292256e-06, + "loss": 0.8608568906784058, + "step": 991 + }, + { + "epoch": 0.22857142857142856, + "grad_norm": 0.7428648265675979, + "learning_rate": 1.977568253092051e-06, + "loss": 0.8512595891952515, + "step": 992 + }, + { + "epoch": 0.22880184331797235, + "grad_norm": 0.520235896024025, + "learning_rate": 1.9774879397621383e-06, + "loss": 0.7335344552993774, + "step": 993 + }, + { + "epoch": 0.22903225806451613, + "grad_norm": 0.6711607827981731, + "learning_rate": 1.9774074845511457e-06, + "loss": 1.0301114320755005, + "step": 994 + }, + { + "epoch": 0.22926267281105991, + "grad_norm": 0.515409965463074, + "learning_rate": 1.97732688747075e-06, + "loss": 0.9011565446853638, + "step": 995 + }, + { + "epoch": 0.2294930875576037, + "grad_norm": 0.5657170632178228, + "learning_rate": 1.9772461485326507e-06, + "loss": 0.8644282221794128, + "step": 996 + }, + { + "epoch": 0.22972350230414745, + "grad_norm": 0.49795498598042737, + "learning_rate": 1.9771652677485664e-06, + "loss": 0.8107467889785767, + "step": 997 + }, + { + "epoch": 0.22995391705069124, + "grad_norm": 0.5832229133316258, + "learning_rate": 1.9770842451302373e-06, + "loss": 1.0090508460998535, + "step": 998 + }, + { + "epoch": 0.23018433179723502, + "grad_norm": 0.4910768822506593, + "learning_rate": 1.977003080689424e-06, + "loss": 0.8153292536735535, + "step": 999 + }, + { + "epoch": 0.2304147465437788, + "grad_norm": 0.6502643477323704, + "learning_rate": 1.976921774437906e-06, + "loss": 0.8446916341781616, + "step": 1000 + }, + { + "epoch": 0.2306451612903226, + "grad_norm": 0.5179047651030808, + "learning_rate": 1.9768403263874865e-06, + "loss": 0.759350597858429, + "step": 1001 + }, + { + "epoch": 0.23087557603686637, + "grad_norm": 0.5414654559095757, + "learning_rate": 1.9767587365499862e-06, + "loss": 0.9181695580482483, + "step": 1002 + }, + { + "epoch": 0.23110599078341013, + "grad_norm": 0.4755050115257823, + "learning_rate": 1.976677004937249e-06, + "loss": 0.8450978994369507, + "step": 1003 + }, + { + "epoch": 0.2313364055299539, + "grad_norm": 0.5616575268963485, + "learning_rate": 1.9765951315611365e-06, + "loss": 0.775252640247345, + "step": 1004 + }, + { + "epoch": 0.2315668202764977, + "grad_norm": 0.5248180263396327, + "learning_rate": 1.976513116433534e-06, + "loss": 0.8682440519332886, + "step": 1005 + }, + { + "epoch": 0.23179723502304148, + "grad_norm": 0.6093284414229693, + "learning_rate": 1.9764309595663457e-06, + "loss": 1.0701451301574707, + "step": 1006 + }, + { + "epoch": 0.23202764976958526, + "grad_norm": 0.5747684398408948, + "learning_rate": 1.976348660971496e-06, + "loss": 0.9381946921348572, + "step": 1007 + }, + { + "epoch": 0.23225806451612904, + "grad_norm": 0.5225356801303237, + "learning_rate": 1.976266220660931e-06, + "loss": 0.7836539149284363, + "step": 1008 + }, + { + "epoch": 0.2324884792626728, + "grad_norm": 0.5379097818020191, + "learning_rate": 1.9761836386466156e-06, + "loss": 0.9271948337554932, + "step": 1009 + }, + { + "epoch": 0.23271889400921658, + "grad_norm": 0.514797473753123, + "learning_rate": 1.976100914940538e-06, + "loss": 0.8268035650253296, + "step": 1010 + }, + { + "epoch": 0.23294930875576036, + "grad_norm": 0.5105764513310544, + "learning_rate": 1.976018049554705e-06, + "loss": 0.8266786336898804, + "step": 1011 + }, + { + "epoch": 0.23317972350230415, + "grad_norm": 0.6250953922330988, + "learning_rate": 1.9759350425011435e-06, + "loss": 0.9437457323074341, + "step": 1012 + }, + { + "epoch": 0.23341013824884793, + "grad_norm": 0.5629533372281755, + "learning_rate": 1.9758518937919033e-06, + "loss": 0.9078803062438965, + "step": 1013 + }, + { + "epoch": 0.23364055299539171, + "grad_norm": 0.5994095472581402, + "learning_rate": 1.975768603439052e-06, + "loss": 0.9873687624931335, + "step": 1014 + }, + { + "epoch": 0.23387096774193547, + "grad_norm": 0.5010269853722422, + "learning_rate": 1.97568517145468e-06, + "loss": 0.9450196027755737, + "step": 1015 + }, + { + "epoch": 0.23410138248847925, + "grad_norm": 0.5173338079683222, + "learning_rate": 1.975601597850897e-06, + "loss": 0.8804495334625244, + "step": 1016 + }, + { + "epoch": 0.23433179723502304, + "grad_norm": 0.5286639294307074, + "learning_rate": 1.9755178826398333e-06, + "loss": 0.9646104574203491, + "step": 1017 + }, + { + "epoch": 0.23456221198156682, + "grad_norm": 0.5917923655178416, + "learning_rate": 1.9754340258336403e-06, + "loss": 0.9829385280609131, + "step": 1018 + }, + { + "epoch": 0.2347926267281106, + "grad_norm": 0.5022802882731887, + "learning_rate": 1.97535002744449e-06, + "loss": 0.8433707356452942, + "step": 1019 + }, + { + "epoch": 0.2350230414746544, + "grad_norm": 0.5984717862988072, + "learning_rate": 1.9752658874845744e-06, + "loss": 0.9892767071723938, + "step": 1020 + }, + { + "epoch": 0.23525345622119814, + "grad_norm": 0.5038568694461213, + "learning_rate": 1.9751816059661065e-06, + "loss": 0.8367536664009094, + "step": 1021 + }, + { + "epoch": 0.23548387096774193, + "grad_norm": 0.6009503951092086, + "learning_rate": 1.9750971829013194e-06, + "loss": 0.8947298526763916, + "step": 1022 + }, + { + "epoch": 0.2357142857142857, + "grad_norm": 0.4955473883987944, + "learning_rate": 1.975012618302467e-06, + "loss": 0.9218910336494446, + "step": 1023 + }, + { + "epoch": 0.2359447004608295, + "grad_norm": 0.46527028147066757, + "learning_rate": 1.9749279121818236e-06, + "loss": 0.8744943141937256, + "step": 1024 + }, + { + "epoch": 0.23617511520737328, + "grad_norm": 0.5457797851350515, + "learning_rate": 1.9748430645516845e-06, + "loss": 0.9023007154464722, + "step": 1025 + }, + { + "epoch": 0.23640552995391706, + "grad_norm": 0.5361296427556177, + "learning_rate": 1.974758075424365e-06, + "loss": 0.8475106954574585, + "step": 1026 + }, + { + "epoch": 0.23663594470046084, + "grad_norm": 0.5535275060374267, + "learning_rate": 1.9746729448122013e-06, + "loss": 0.8594635725021362, + "step": 1027 + }, + { + "epoch": 0.2368663594470046, + "grad_norm": 0.6574105474773485, + "learning_rate": 1.97458767272755e-06, + "loss": 0.9601756930351257, + "step": 1028 + }, + { + "epoch": 0.23709677419354838, + "grad_norm": 0.5454698959338334, + "learning_rate": 1.9745022591827886e-06, + "loss": 0.9281105399131775, + "step": 1029 + }, + { + "epoch": 0.23732718894009217, + "grad_norm": 0.4631930883062957, + "learning_rate": 1.9744167041903136e-06, + "loss": 0.8240020275115967, + "step": 1030 + }, + { + "epoch": 0.23755760368663595, + "grad_norm": 0.5116113956014486, + "learning_rate": 1.9743310077625446e-06, + "loss": 0.807030200958252, + "step": 1031 + }, + { + "epoch": 0.23778801843317973, + "grad_norm": 0.5399356518827937, + "learning_rate": 1.9742451699119194e-06, + "loss": 0.8044267892837524, + "step": 1032 + }, + { + "epoch": 0.23801843317972352, + "grad_norm": 0.5022311335968053, + "learning_rate": 1.9741591906508975e-06, + "loss": 0.9198760390281677, + "step": 1033 + }, + { + "epoch": 0.23824884792626727, + "grad_norm": 0.6382005412114766, + "learning_rate": 1.974073069991959e-06, + "loss": 0.7951973676681519, + "step": 1034 + }, + { + "epoch": 0.23847926267281105, + "grad_norm": 0.5488288386867366, + "learning_rate": 1.9739868079476035e-06, + "loss": 0.8366928100585938, + "step": 1035 + }, + { + "epoch": 0.23870967741935484, + "grad_norm": 0.5327938531465227, + "learning_rate": 1.9739004045303524e-06, + "loss": 0.9644484519958496, + "step": 1036 + }, + { + "epoch": 0.23894009216589862, + "grad_norm": 0.47502000880743445, + "learning_rate": 1.9738138597527464e-06, + "loss": 0.8332105875015259, + "step": 1037 + }, + { + "epoch": 0.2391705069124424, + "grad_norm": 0.4812648524584188, + "learning_rate": 1.9737271736273482e-06, + "loss": 0.8923197388648987, + "step": 1038 + }, + { + "epoch": 0.2394009216589862, + "grad_norm": 0.48693803999160823, + "learning_rate": 1.97364034616674e-06, + "loss": 0.861129879951477, + "step": 1039 + }, + { + "epoch": 0.23963133640552994, + "grad_norm": 0.49858003070315154, + "learning_rate": 1.973553377383524e-06, + "loss": 0.8042281270027161, + "step": 1040 + }, + { + "epoch": 0.23986175115207373, + "grad_norm": 0.603264823916037, + "learning_rate": 1.9734662672903247e-06, + "loss": 1.0315792560577393, + "step": 1041 + }, + { + "epoch": 0.2400921658986175, + "grad_norm": 0.524902457294173, + "learning_rate": 1.973379015899785e-06, + "loss": 0.8165839910507202, + "step": 1042 + }, + { + "epoch": 0.2403225806451613, + "grad_norm": 0.5868579839473654, + "learning_rate": 1.97329162322457e-06, + "loss": 1.0002663135528564, + "step": 1043 + }, + { + "epoch": 0.24055299539170508, + "grad_norm": 0.579630177733921, + "learning_rate": 1.9732040892773642e-06, + "loss": 0.9340938925743103, + "step": 1044 + }, + { + "epoch": 0.24078341013824886, + "grad_norm": 0.40394518210500746, + "learning_rate": 1.973116414070873e-06, + "loss": 0.7457709312438965, + "step": 1045 + }, + { + "epoch": 0.24101382488479262, + "grad_norm": 0.5468265646556031, + "learning_rate": 1.9730285976178227e-06, + "loss": 0.846583366394043, + "step": 1046 + }, + { + "epoch": 0.2412442396313364, + "grad_norm": 0.597351972991794, + "learning_rate": 1.9729406399309594e-06, + "loss": 0.9701514840126038, + "step": 1047 + }, + { + "epoch": 0.24147465437788018, + "grad_norm": 0.430042606733588, + "learning_rate": 1.9728525410230506e-06, + "loss": 0.7943054437637329, + "step": 1048 + }, + { + "epoch": 0.24170506912442397, + "grad_norm": 0.690774172762037, + "learning_rate": 1.972764300906883e-06, + "loss": 0.8885551691055298, + "step": 1049 + }, + { + "epoch": 0.24193548387096775, + "grad_norm": 0.522936671850185, + "learning_rate": 1.9726759195952653e-06, + "loss": 0.8258899450302124, + "step": 1050 + }, + { + "epoch": 0.24216589861751153, + "grad_norm": 0.586622666679495, + "learning_rate": 1.9725873971010255e-06, + "loss": 1.0085303783416748, + "step": 1051 + }, + { + "epoch": 0.2423963133640553, + "grad_norm": 0.49596210148454095, + "learning_rate": 1.9724987334370124e-06, + "loss": 0.814777135848999, + "step": 1052 + }, + { + "epoch": 0.24262672811059907, + "grad_norm": 0.5592433145931486, + "learning_rate": 1.9724099286160953e-06, + "loss": 0.8328995704650879, + "step": 1053 + }, + { + "epoch": 0.24285714285714285, + "grad_norm": 0.5857793622474846, + "learning_rate": 1.9723209826511645e-06, + "loss": 0.8699138164520264, + "step": 1054 + }, + { + "epoch": 0.24308755760368664, + "grad_norm": 0.5678867062742812, + "learning_rate": 1.9722318955551303e-06, + "loss": 0.8298562169075012, + "step": 1055 + }, + { + "epoch": 0.24331797235023042, + "grad_norm": 0.5976489688453608, + "learning_rate": 1.9721426673409236e-06, + "loss": 0.9470195770263672, + "step": 1056 + }, + { + "epoch": 0.2435483870967742, + "grad_norm": 0.48875505327809854, + "learning_rate": 1.9720532980214955e-06, + "loss": 0.7733730673789978, + "step": 1057 + }, + { + "epoch": 0.24377880184331796, + "grad_norm": 0.46823524678841166, + "learning_rate": 1.9719637876098184e-06, + "loss": 0.7761770486831665, + "step": 1058 + }, + { + "epoch": 0.24400921658986174, + "grad_norm": 0.445725356281168, + "learning_rate": 1.971874136118884e-06, + "loss": 0.9270585775375366, + "step": 1059 + }, + { + "epoch": 0.24423963133640553, + "grad_norm": 0.42406381632115403, + "learning_rate": 1.971784343561705e-06, + "loss": 0.906977653503418, + "step": 1060 + }, + { + "epoch": 0.2444700460829493, + "grad_norm": 0.6412884076264423, + "learning_rate": 1.971694409951316e-06, + "loss": 0.9668625593185425, + "step": 1061 + }, + { + "epoch": 0.2447004608294931, + "grad_norm": 0.49415949875048953, + "learning_rate": 1.971604335300769e-06, + "loss": 0.8215349316596985, + "step": 1062 + }, + { + "epoch": 0.24493087557603688, + "grad_norm": 0.5322070043492434, + "learning_rate": 1.971514119623139e-06, + "loss": 0.8351551294326782, + "step": 1063 + }, + { + "epoch": 0.24516129032258063, + "grad_norm": 0.47999809865085763, + "learning_rate": 1.9714237629315206e-06, + "loss": 0.8778517246246338, + "step": 1064 + }, + { + "epoch": 0.24539170506912442, + "grad_norm": 0.5396014898113735, + "learning_rate": 1.9713332652390293e-06, + "loss": 0.9415761232376099, + "step": 1065 + }, + { + "epoch": 0.2456221198156682, + "grad_norm": 0.5420605598116663, + "learning_rate": 1.9712426265588e-06, + "loss": 0.9040292501449585, + "step": 1066 + }, + { + "epoch": 0.24585253456221198, + "grad_norm": 0.6005715295467339, + "learning_rate": 1.9711518469039894e-06, + "loss": 0.8886675834655762, + "step": 1067 + }, + { + "epoch": 0.24608294930875577, + "grad_norm": 0.6273079636247865, + "learning_rate": 1.971060926287774e-06, + "loss": 0.8439750671386719, + "step": 1068 + }, + { + "epoch": 0.24631336405529955, + "grad_norm": 0.5872743245126388, + "learning_rate": 1.9709698647233507e-06, + "loss": 0.8698763251304626, + "step": 1069 + }, + { + "epoch": 0.2465437788018433, + "grad_norm": 0.5858508124188764, + "learning_rate": 1.970878662223937e-06, + "loss": 0.7866508364677429, + "step": 1070 + }, + { + "epoch": 0.2467741935483871, + "grad_norm": 0.46529709331014274, + "learning_rate": 1.97078731880277e-06, + "loss": 0.8652541637420654, + "step": 1071 + }, + { + "epoch": 0.24700460829493087, + "grad_norm": 0.4617144249036463, + "learning_rate": 1.97069583447311e-06, + "loss": 0.8614386320114136, + "step": 1072 + }, + { + "epoch": 0.24723502304147466, + "grad_norm": 0.5647954006429063, + "learning_rate": 1.970604209248234e-06, + "loss": 0.9367830753326416, + "step": 1073 + }, + { + "epoch": 0.24746543778801844, + "grad_norm": 0.5744177103855904, + "learning_rate": 1.9705124431414417e-06, + "loss": 0.8851934671401978, + "step": 1074 + }, + { + "epoch": 0.24769585253456222, + "grad_norm": 0.49563724633359013, + "learning_rate": 1.9704205361660534e-06, + "loss": 0.9619653224945068, + "step": 1075 + }, + { + "epoch": 0.24792626728110598, + "grad_norm": 0.5649060756387019, + "learning_rate": 1.9703284883354094e-06, + "loss": 0.8826392889022827, + "step": 1076 + }, + { + "epoch": 0.24815668202764976, + "grad_norm": 0.6563751938003036, + "learning_rate": 1.970236299662869e-06, + "loss": 0.9075444340705872, + "step": 1077 + }, + { + "epoch": 0.24838709677419354, + "grad_norm": 0.5796370649143662, + "learning_rate": 1.9701439701618147e-06, + "loss": 1.048058032989502, + "step": 1078 + }, + { + "epoch": 0.24861751152073733, + "grad_norm": 0.5313768074192232, + "learning_rate": 1.970051499845647e-06, + "loss": 0.8460798263549805, + "step": 1079 + }, + { + "epoch": 0.2488479262672811, + "grad_norm": 0.7193266180122563, + "learning_rate": 1.9699588887277886e-06, + "loss": 0.9410982131958008, + "step": 1080 + }, + { + "epoch": 0.2490783410138249, + "grad_norm": 0.5102129399153178, + "learning_rate": 1.9698661368216816e-06, + "loss": 0.8247401714324951, + "step": 1081 + }, + { + "epoch": 0.24930875576036865, + "grad_norm": 0.5269386839997043, + "learning_rate": 1.969773244140789e-06, + "loss": 0.8543484210968018, + "step": 1082 + }, + { + "epoch": 0.24953917050691243, + "grad_norm": 0.6681776129080308, + "learning_rate": 1.9696802106985933e-06, + "loss": 0.9339861273765564, + "step": 1083 + }, + { + "epoch": 0.24976958525345622, + "grad_norm": 0.6394378735221973, + "learning_rate": 1.969587036508599e-06, + "loss": 0.8268687725067139, + "step": 1084 + }, + { + "epoch": 0.25, + "grad_norm": 0.5565533707237263, + "learning_rate": 1.96949372158433e-06, + "loss": 0.9990735054016113, + "step": 1085 + }, + { + "epoch": 0.2502304147465438, + "grad_norm": 0.5875792221187977, + "learning_rate": 1.9694002659393305e-06, + "loss": 0.871169924736023, + "step": 1086 + }, + { + "epoch": 0.25046082949308757, + "grad_norm": 0.5066699305192991, + "learning_rate": 1.9693066695871657e-06, + "loss": 0.9275476932525635, + "step": 1087 + }, + { + "epoch": 0.25069124423963135, + "grad_norm": 0.5987932412868929, + "learning_rate": 1.969212932541421e-06, + "loss": 0.802006721496582, + "step": 1088 + }, + { + "epoch": 0.25092165898617513, + "grad_norm": 0.6594060142183631, + "learning_rate": 1.9691190548157023e-06, + "loss": 1.158774495124817, + "step": 1089 + }, + { + "epoch": 0.2511520737327189, + "grad_norm": 0.5926971423347241, + "learning_rate": 1.969025036423636e-06, + "loss": 0.8979278802871704, + "step": 1090 + }, + { + "epoch": 0.2513824884792627, + "grad_norm": 0.48149308442816224, + "learning_rate": 1.968930877378868e-06, + "loss": 0.9486579895019531, + "step": 1091 + }, + { + "epoch": 0.25161290322580643, + "grad_norm": 0.5203236583717573, + "learning_rate": 1.968836577695066e-06, + "loss": 0.8661590814590454, + "step": 1092 + }, + { + "epoch": 0.2518433179723502, + "grad_norm": 0.5636787742284843, + "learning_rate": 1.9687421373859173e-06, + "loss": 0.9224900007247925, + "step": 1093 + }, + { + "epoch": 0.252073732718894, + "grad_norm": 0.6117977186323622, + "learning_rate": 1.96864755646513e-06, + "loss": 0.9563734531402588, + "step": 1094 + }, + { + "epoch": 0.2523041474654378, + "grad_norm": 0.535175631127211, + "learning_rate": 1.968552834946432e-06, + "loss": 0.7457284927368164, + "step": 1095 + }, + { + "epoch": 0.25253456221198156, + "grad_norm": 0.5387959310508903, + "learning_rate": 1.9684579728435727e-06, + "loss": 0.8763077259063721, + "step": 1096 + }, + { + "epoch": 0.25276497695852534, + "grad_norm": 0.5765732282352442, + "learning_rate": 1.9683629701703203e-06, + "loss": 0.8476013541221619, + "step": 1097 + }, + { + "epoch": 0.25299539170506913, + "grad_norm": 0.6265041816963897, + "learning_rate": 1.9682678269404647e-06, + "loss": 0.9706464409828186, + "step": 1098 + }, + { + "epoch": 0.2532258064516129, + "grad_norm": 0.5592313042434921, + "learning_rate": 1.968172543167816e-06, + "loss": 0.9898370504379272, + "step": 1099 + }, + { + "epoch": 0.2534562211981567, + "grad_norm": 0.5273265970472166, + "learning_rate": 1.9680771188662043e-06, + "loss": 0.9073352813720703, + "step": 1100 + }, + { + "epoch": 0.2536866359447005, + "grad_norm": 0.5101975110861352, + "learning_rate": 1.9679815540494805e-06, + "loss": 0.698054850101471, + "step": 1101 + }, + { + "epoch": 0.25391705069124426, + "grad_norm": 0.5334723333803978, + "learning_rate": 1.967885848731515e-06, + "loss": 0.8755865097045898, + "step": 1102 + }, + { + "epoch": 0.25414746543778804, + "grad_norm": 0.7353231676630018, + "learning_rate": 1.9677900029262004e-06, + "loss": 0.8884447813034058, + "step": 1103 + }, + { + "epoch": 0.2543778801843318, + "grad_norm": 0.48855032311862734, + "learning_rate": 1.967694016647448e-06, + "loss": 0.738738477230072, + "step": 1104 + }, + { + "epoch": 0.25460829493087556, + "grad_norm": 0.5363150933196312, + "learning_rate": 1.96759788990919e-06, + "loss": 0.8024383783340454, + "step": 1105 + }, + { + "epoch": 0.25483870967741934, + "grad_norm": 0.703802110686274, + "learning_rate": 1.967501622725379e-06, + "loss": 0.8780910968780518, + "step": 1106 + }, + { + "epoch": 0.2550691244239631, + "grad_norm": 0.47799328608287317, + "learning_rate": 1.967405215109989e-06, + "loss": 0.8709204196929932, + "step": 1107 + }, + { + "epoch": 0.2552995391705069, + "grad_norm": 0.5771096865101828, + "learning_rate": 1.9673086670770122e-06, + "loss": 0.8838910460472107, + "step": 1108 + }, + { + "epoch": 0.2555299539170507, + "grad_norm": 0.6122299943883392, + "learning_rate": 1.967211978640463e-06, + "loss": 0.9310617446899414, + "step": 1109 + }, + { + "epoch": 0.2557603686635945, + "grad_norm": 0.5172180782022067, + "learning_rate": 1.9671151498143756e-06, + "loss": 0.8453254699707031, + "step": 1110 + }, + { + "epoch": 0.25599078341013826, + "grad_norm": 0.6724028308795985, + "learning_rate": 1.967018180612804e-06, + "loss": 1.0201973915100098, + "step": 1111 + }, + { + "epoch": 0.25622119815668204, + "grad_norm": 0.5304279166188671, + "learning_rate": 1.9669210710498242e-06, + "loss": 0.84140944480896, + "step": 1112 + }, + { + "epoch": 0.2564516129032258, + "grad_norm": 0.5850181467371437, + "learning_rate": 1.9668238211395308e-06, + "loss": 0.9012273550033569, + "step": 1113 + }, + { + "epoch": 0.2566820276497696, + "grad_norm": 0.5516270166899023, + "learning_rate": 1.9667264308960394e-06, + "loss": 0.820103645324707, + "step": 1114 + }, + { + "epoch": 0.2569124423963134, + "grad_norm": 0.7253674338479518, + "learning_rate": 1.9666289003334868e-06, + "loss": 1.0709048509597778, + "step": 1115 + }, + { + "epoch": 0.2571428571428571, + "grad_norm": 0.6606805333344365, + "learning_rate": 1.966531229466029e-06, + "loss": 0.9408602714538574, + "step": 1116 + }, + { + "epoch": 0.2573732718894009, + "grad_norm": 0.7074764796406602, + "learning_rate": 1.9664334183078425e-06, + "loss": 0.967316210269928, + "step": 1117 + }, + { + "epoch": 0.2576036866359447, + "grad_norm": 0.7069704403267734, + "learning_rate": 1.9663354668731248e-06, + "loss": 0.9483754634857178, + "step": 1118 + }, + { + "epoch": 0.25783410138248847, + "grad_norm": 0.7072881911304519, + "learning_rate": 1.966237375176093e-06, + "loss": 0.7978509664535522, + "step": 1119 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.5719987288484106, + "learning_rate": 1.9661391432309862e-06, + "loss": 0.8720531463623047, + "step": 1120 + }, + { + "epoch": 0.25829493087557603, + "grad_norm": 0.6673697559796071, + "learning_rate": 1.966040771052061e-06, + "loss": 0.7984024286270142, + "step": 1121 + }, + { + "epoch": 0.2585253456221198, + "grad_norm": 0.5693036626081565, + "learning_rate": 1.965942258653597e-06, + "loss": 0.9255385398864746, + "step": 1122 + }, + { + "epoch": 0.2587557603686636, + "grad_norm": 0.5886763980683305, + "learning_rate": 1.9658436060498927e-06, + "loss": 0.9028007984161377, + "step": 1123 + }, + { + "epoch": 0.2589861751152074, + "grad_norm": 0.5256574840125579, + "learning_rate": 1.9657448132552677e-06, + "loss": 0.8773014545440674, + "step": 1124 + }, + { + "epoch": 0.25921658986175117, + "grad_norm": 0.5356122505196939, + "learning_rate": 1.9656458802840617e-06, + "loss": 0.9280908107757568, + "step": 1125 + }, + { + "epoch": 0.25944700460829495, + "grad_norm": 0.6473213250874083, + "learning_rate": 1.9655468071506344e-06, + "loss": 0.820783793926239, + "step": 1126 + }, + { + "epoch": 0.25967741935483873, + "grad_norm": 0.490374992394704, + "learning_rate": 1.9654475938693663e-06, + "loss": 0.7832465171813965, + "step": 1127 + }, + { + "epoch": 0.25990783410138246, + "grad_norm": 0.6097626342555662, + "learning_rate": 1.965348240454658e-06, + "loss": 0.8824669122695923, + "step": 1128 + }, + { + "epoch": 0.26013824884792625, + "grad_norm": 0.5472888524636408, + "learning_rate": 1.9652487469209305e-06, + "loss": 0.8782131671905518, + "step": 1129 + }, + { + "epoch": 0.26036866359447003, + "grad_norm": 0.6689126051687625, + "learning_rate": 1.9651491132826255e-06, + "loss": 0.938920259475708, + "step": 1130 + }, + { + "epoch": 0.2605990783410138, + "grad_norm": 0.5811243675216263, + "learning_rate": 1.965049339554204e-06, + "loss": 0.8733320236206055, + "step": 1131 + }, + { + "epoch": 0.2608294930875576, + "grad_norm": 0.5773916722243296, + "learning_rate": 1.9649494257501485e-06, + "loss": 0.8688358664512634, + "step": 1132 + }, + { + "epoch": 0.2610599078341014, + "grad_norm": 0.5867794198483245, + "learning_rate": 1.9648493718849617e-06, + "loss": 0.9250427484512329, + "step": 1133 + }, + { + "epoch": 0.26129032258064516, + "grad_norm": 0.5093685293336041, + "learning_rate": 1.9647491779731655e-06, + "loss": 0.7890609502792358, + "step": 1134 + }, + { + "epoch": 0.26152073732718895, + "grad_norm": 0.5526465355704269, + "learning_rate": 1.964648844029303e-06, + "loss": 0.83612060546875, + "step": 1135 + }, + { + "epoch": 0.26175115207373273, + "grad_norm": 0.597714005790405, + "learning_rate": 1.9645483700679387e-06, + "loss": 0.7951240539550781, + "step": 1136 + }, + { + "epoch": 0.2619815668202765, + "grad_norm": 0.5785889079746135, + "learning_rate": 1.9644477561036546e-06, + "loss": 0.9746277332305908, + "step": 1137 + }, + { + "epoch": 0.2622119815668203, + "grad_norm": 0.6092572079482067, + "learning_rate": 1.9643470021510556e-06, + "loss": 0.856966495513916, + "step": 1138 + }, + { + "epoch": 0.2624423963133641, + "grad_norm": 0.5158468607686231, + "learning_rate": 1.9642461082247663e-06, + "loss": 0.7419042587280273, + "step": 1139 + }, + { + "epoch": 0.2626728110599078, + "grad_norm": 0.6141847224483623, + "learning_rate": 1.9641450743394304e-06, + "loss": 0.8868693709373474, + "step": 1140 + }, + { + "epoch": 0.2629032258064516, + "grad_norm": 0.6400145867633011, + "learning_rate": 1.9640439005097133e-06, + "loss": 1.0111520290374756, + "step": 1141 + }, + { + "epoch": 0.2631336405529954, + "grad_norm": 0.5946199662941717, + "learning_rate": 1.9639425867503006e-06, + "loss": 0.9379187226295471, + "step": 1142 + }, + { + "epoch": 0.26336405529953916, + "grad_norm": 0.6188285038344139, + "learning_rate": 1.9638411330758973e-06, + "loss": 0.8451071977615356, + "step": 1143 + }, + { + "epoch": 0.26359447004608294, + "grad_norm": 0.6988429276503174, + "learning_rate": 1.9637395395012295e-06, + "loss": 1.0407288074493408, + "step": 1144 + }, + { + "epoch": 0.2638248847926267, + "grad_norm": 0.7122851693009883, + "learning_rate": 1.9636378060410433e-06, + "loss": 0.9594388008117676, + "step": 1145 + }, + { + "epoch": 0.2640552995391705, + "grad_norm": 0.4400072369022715, + "learning_rate": 1.9635359327101057e-06, + "loss": 0.7940789461135864, + "step": 1146 + }, + { + "epoch": 0.2642857142857143, + "grad_norm": 0.6347840140846547, + "learning_rate": 1.9634339195232025e-06, + "loss": 0.9707269668579102, + "step": 1147 + }, + { + "epoch": 0.2645161290322581, + "grad_norm": 0.6349984514987448, + "learning_rate": 1.9633317664951417e-06, + "loss": 0.9554522037506104, + "step": 1148 + }, + { + "epoch": 0.26474654377880186, + "grad_norm": 0.7144693638673882, + "learning_rate": 1.9632294736407497e-06, + "loss": 1.009516716003418, + "step": 1149 + }, + { + "epoch": 0.26497695852534564, + "grad_norm": 0.5429306162333095, + "learning_rate": 1.9631270409748754e-06, + "loss": 0.8337735533714294, + "step": 1150 + }, + { + "epoch": 0.2652073732718894, + "grad_norm": 0.5901765838606909, + "learning_rate": 1.963024468512386e-06, + "loss": 0.9103367328643799, + "step": 1151 + }, + { + "epoch": 0.2654377880184332, + "grad_norm": 0.3703807183273661, + "learning_rate": 1.9629217562681694e-06, + "loss": 0.7258249521255493, + "step": 1152 + }, + { + "epoch": 0.26566820276497694, + "grad_norm": 0.6322578847379198, + "learning_rate": 1.962818904257135e-06, + "loss": 0.7696776390075684, + "step": 1153 + }, + { + "epoch": 0.2658986175115207, + "grad_norm": 0.5842074670437798, + "learning_rate": 1.962715912494211e-06, + "loss": 0.9027894139289856, + "step": 1154 + }, + { + "epoch": 0.2661290322580645, + "grad_norm": 0.6016444551454023, + "learning_rate": 1.962612780994347e-06, + "loss": 1.0412788391113281, + "step": 1155 + }, + { + "epoch": 0.2663594470046083, + "grad_norm": 0.5483158655152818, + "learning_rate": 1.962509509772512e-06, + "loss": 0.8656542897224426, + "step": 1156 + }, + { + "epoch": 0.26658986175115207, + "grad_norm": 0.56350579921959, + "learning_rate": 1.9624060988436964e-06, + "loss": 0.9541186094284058, + "step": 1157 + }, + { + "epoch": 0.26682027649769585, + "grad_norm": 0.6019903664727945, + "learning_rate": 1.962302548222909e-06, + "loss": 0.7684942483901978, + "step": 1158 + }, + { + "epoch": 0.26705069124423964, + "grad_norm": 0.5978642328134118, + "learning_rate": 1.962198857925181e-06, + "loss": 0.8934941291809082, + "step": 1159 + }, + { + "epoch": 0.2672811059907834, + "grad_norm": 0.8041491872239377, + "learning_rate": 1.962095027965562e-06, + "loss": 0.8674842715263367, + "step": 1160 + }, + { + "epoch": 0.2675115207373272, + "grad_norm": 0.5520577783269698, + "learning_rate": 1.9619910583591237e-06, + "loss": 0.8850778937339783, + "step": 1161 + }, + { + "epoch": 0.267741935483871, + "grad_norm": 0.5547632066870658, + "learning_rate": 1.961886949120957e-06, + "loss": 0.9140915870666504, + "step": 1162 + }, + { + "epoch": 0.26797235023041477, + "grad_norm": 0.5171975434439527, + "learning_rate": 1.9617827002661733e-06, + "loss": 0.7557287812232971, + "step": 1163 + }, + { + "epoch": 0.26820276497695855, + "grad_norm": 0.6409514019909783, + "learning_rate": 1.9616783118099032e-06, + "loss": 0.8780542612075806, + "step": 1164 + }, + { + "epoch": 0.2684331797235023, + "grad_norm": 0.5407478984703894, + "learning_rate": 1.9615737837672995e-06, + "loss": 0.8352043628692627, + "step": 1165 + }, + { + "epoch": 0.26866359447004606, + "grad_norm": 0.5628947650252879, + "learning_rate": 1.961469116153534e-06, + "loss": 0.8119357228279114, + "step": 1166 + }, + { + "epoch": 0.26889400921658985, + "grad_norm": 0.5744461460266088, + "learning_rate": 1.9613643089837992e-06, + "loss": 0.8953120708465576, + "step": 1167 + }, + { + "epoch": 0.26912442396313363, + "grad_norm": 0.5867925171054906, + "learning_rate": 1.9612593622733074e-06, + "loss": 0.9078162908554077, + "step": 1168 + }, + { + "epoch": 0.2693548387096774, + "grad_norm": 0.5358654275940312, + "learning_rate": 1.961154276037292e-06, + "loss": 0.9118859767913818, + "step": 1169 + }, + { + "epoch": 0.2695852534562212, + "grad_norm": 0.5501238198976731, + "learning_rate": 1.9610490502910056e-06, + "loss": 0.8456159234046936, + "step": 1170 + }, + { + "epoch": 0.269815668202765, + "grad_norm": 0.6291583788438779, + "learning_rate": 1.9609436850497222e-06, + "loss": 0.7860552072525024, + "step": 1171 + }, + { + "epoch": 0.27004608294930876, + "grad_norm": 0.5078912747038423, + "learning_rate": 1.9608381803287343e-06, + "loss": 0.8121567368507385, + "step": 1172 + }, + { + "epoch": 0.27027649769585255, + "grad_norm": 0.6271384929565738, + "learning_rate": 1.9607325361433574e-06, + "loss": 0.9212384819984436, + "step": 1173 + }, + { + "epoch": 0.27050691244239633, + "grad_norm": 0.5704107274797215, + "learning_rate": 1.960626752508924e-06, + "loss": 0.9528858661651611, + "step": 1174 + }, + { + "epoch": 0.2707373271889401, + "grad_norm": 0.5901390376692353, + "learning_rate": 1.9605208294407894e-06, + "loss": 0.8561227321624756, + "step": 1175 + }, + { + "epoch": 0.2709677419354839, + "grad_norm": 0.5308748660328867, + "learning_rate": 1.960414766954328e-06, + "loss": 0.9333669543266296, + "step": 1176 + }, + { + "epoch": 0.2711981566820276, + "grad_norm": 0.5146250417484006, + "learning_rate": 1.9603085650649345e-06, + "loss": 0.8879388570785522, + "step": 1177 + }, + { + "epoch": 0.2714285714285714, + "grad_norm": 0.6699060572110628, + "learning_rate": 1.9602022237880244e-06, + "loss": 1.0099214315414429, + "step": 1178 + }, + { + "epoch": 0.2716589861751152, + "grad_norm": 0.5456103597772948, + "learning_rate": 1.9600957431390324e-06, + "loss": 0.9341822862625122, + "step": 1179 + }, + { + "epoch": 0.271889400921659, + "grad_norm": 0.48145703185786454, + "learning_rate": 1.9599891231334144e-06, + "loss": 0.7616428136825562, + "step": 1180 + }, + { + "epoch": 0.27211981566820276, + "grad_norm": 0.4889684884403523, + "learning_rate": 1.959882363786646e-06, + "loss": 0.8270235061645508, + "step": 1181 + }, + { + "epoch": 0.27235023041474654, + "grad_norm": 0.5354748169041671, + "learning_rate": 1.9597754651142233e-06, + "loss": 0.8715114593505859, + "step": 1182 + }, + { + "epoch": 0.2725806451612903, + "grad_norm": 0.5251650427533354, + "learning_rate": 1.959668427131662e-06, + "loss": 0.6910781860351562, + "step": 1183 + }, + { + "epoch": 0.2728110599078341, + "grad_norm": 0.5425639259870759, + "learning_rate": 1.9595612498544997e-06, + "loss": 0.9158545136451721, + "step": 1184 + }, + { + "epoch": 0.2730414746543779, + "grad_norm": 0.4274378587816055, + "learning_rate": 1.9594539332982917e-06, + "loss": 0.7129944562911987, + "step": 1185 + }, + { + "epoch": 0.2732718894009217, + "grad_norm": 0.5549453334752472, + "learning_rate": 1.9593464774786155e-06, + "loss": 0.9487595558166504, + "step": 1186 + }, + { + "epoch": 0.27350230414746546, + "grad_norm": 0.490496609840347, + "learning_rate": 1.959238882411068e-06, + "loss": 0.9455368518829346, + "step": 1187 + }, + { + "epoch": 0.27373271889400924, + "grad_norm": 0.5638225468967204, + "learning_rate": 1.959131148111267e-06, + "loss": 0.9005390405654907, + "step": 1188 + }, + { + "epoch": 0.27396313364055297, + "grad_norm": 0.6239187759866925, + "learning_rate": 1.9590232745948494e-06, + "loss": 0.91117262840271, + "step": 1189 + }, + { + "epoch": 0.27419354838709675, + "grad_norm": 0.46530917608588857, + "learning_rate": 1.958915261877473e-06, + "loss": 0.7940579652786255, + "step": 1190 + }, + { + "epoch": 0.27442396313364054, + "grad_norm": 0.5621028227805456, + "learning_rate": 1.9588071099748155e-06, + "loss": 1.0705196857452393, + "step": 1191 + }, + { + "epoch": 0.2746543778801843, + "grad_norm": 0.7402334674842445, + "learning_rate": 1.9586988189025756e-06, + "loss": 0.9311869740486145, + "step": 1192 + }, + { + "epoch": 0.2748847926267281, + "grad_norm": 0.5809380189675816, + "learning_rate": 1.9585903886764715e-06, + "loss": 0.9400506019592285, + "step": 1193 + }, + { + "epoch": 0.2751152073732719, + "grad_norm": 0.5097271764516258, + "learning_rate": 1.958481819312241e-06, + "loss": 0.8282920122146606, + "step": 1194 + }, + { + "epoch": 0.27534562211981567, + "grad_norm": 0.6446418001070287, + "learning_rate": 1.9583731108256435e-06, + "loss": 0.9111119508743286, + "step": 1195 + }, + { + "epoch": 0.27557603686635945, + "grad_norm": 0.6208204199981331, + "learning_rate": 1.9582642632324576e-06, + "loss": 0.9486548900604248, + "step": 1196 + }, + { + "epoch": 0.27580645161290324, + "grad_norm": 0.634036768829364, + "learning_rate": 1.9581552765484828e-06, + "loss": 0.8452764749526978, + "step": 1197 + }, + { + "epoch": 0.276036866359447, + "grad_norm": 0.6457489846855801, + "learning_rate": 1.958046150789538e-06, + "loss": 0.8636663556098938, + "step": 1198 + }, + { + "epoch": 0.2762672811059908, + "grad_norm": 0.6308230498005049, + "learning_rate": 1.9579368859714623e-06, + "loss": 0.9819158315658569, + "step": 1199 + }, + { + "epoch": 0.2764976958525346, + "grad_norm": 0.6100305190055095, + "learning_rate": 1.957827482110116e-06, + "loss": 0.8010607957839966, + "step": 1200 + }, + { + "epoch": 0.2767281105990783, + "grad_norm": 0.44236661935550003, + "learning_rate": 1.957717939221379e-06, + "loss": 0.7686241865158081, + "step": 1201 + }, + { + "epoch": 0.2769585253456221, + "grad_norm": 0.5324278038856628, + "learning_rate": 1.9576082573211507e-06, + "loss": 0.8548723459243774, + "step": 1202 + }, + { + "epoch": 0.2771889400921659, + "grad_norm": 0.5873649231612361, + "learning_rate": 1.957498436425351e-06, + "loss": 0.7866852283477783, + "step": 1203 + }, + { + "epoch": 0.27741935483870966, + "grad_norm": 0.5578610745935356, + "learning_rate": 1.9573884765499215e-06, + "loss": 0.8086235523223877, + "step": 1204 + }, + { + "epoch": 0.27764976958525345, + "grad_norm": 0.6489442522213279, + "learning_rate": 1.9572783777108217e-06, + "loss": 1.0310871601104736, + "step": 1205 + }, + { + "epoch": 0.27788018433179723, + "grad_norm": 0.6639195648959771, + "learning_rate": 1.957168139924033e-06, + "loss": 0.9482970237731934, + "step": 1206 + }, + { + "epoch": 0.278110599078341, + "grad_norm": 0.5595205782283428, + "learning_rate": 1.957057763205556e-06, + "loss": 0.809493899345398, + "step": 1207 + }, + { + "epoch": 0.2783410138248848, + "grad_norm": 0.5835729385419335, + "learning_rate": 1.956947247571411e-06, + "loss": 0.8679298162460327, + "step": 1208 + }, + { + "epoch": 0.2785714285714286, + "grad_norm": 0.5339273489408208, + "learning_rate": 1.95683659303764e-06, + "loss": 0.8870571255683899, + "step": 1209 + }, + { + "epoch": 0.27880184331797236, + "grad_norm": 0.6400258685482293, + "learning_rate": 1.9567257996203046e-06, + "loss": 0.8452431559562683, + "step": 1210 + }, + { + "epoch": 0.27903225806451615, + "grad_norm": 0.585371400581961, + "learning_rate": 1.9566148673354855e-06, + "loss": 0.8376550674438477, + "step": 1211 + }, + { + "epoch": 0.27926267281105993, + "grad_norm": 0.468171015360779, + "learning_rate": 1.9565037961992853e-06, + "loss": 0.7686463594436646, + "step": 1212 + }, + { + "epoch": 0.2794930875576037, + "grad_norm": 0.6305180956441923, + "learning_rate": 1.956392586227825e-06, + "loss": 1.0064536333084106, + "step": 1213 + }, + { + "epoch": 0.27972350230414744, + "grad_norm": 0.5204866621768998, + "learning_rate": 1.956281237437247e-06, + "loss": 0.9087784290313721, + "step": 1214 + }, + { + "epoch": 0.2799539170506912, + "grad_norm": 0.5800831908467822, + "learning_rate": 1.9561697498437133e-06, + "loss": 0.8528383374214172, + "step": 1215 + }, + { + "epoch": 0.280184331797235, + "grad_norm": 0.492586251170718, + "learning_rate": 1.9560581234634062e-06, + "loss": 0.8229737281799316, + "step": 1216 + }, + { + "epoch": 0.2804147465437788, + "grad_norm": 0.6543530371868361, + "learning_rate": 1.9559463583125285e-06, + "loss": 0.8957454562187195, + "step": 1217 + }, + { + "epoch": 0.2806451612903226, + "grad_norm": 0.6116476174626837, + "learning_rate": 1.955834454407302e-06, + "loss": 0.8373404741287231, + "step": 1218 + }, + { + "epoch": 0.28087557603686636, + "grad_norm": 0.6339166918490768, + "learning_rate": 1.9557224117639698e-06, + "loss": 0.9117659330368042, + "step": 1219 + }, + { + "epoch": 0.28110599078341014, + "grad_norm": 0.7009847380548185, + "learning_rate": 1.9556102303987946e-06, + "loss": 0.9079498052597046, + "step": 1220 + }, + { + "epoch": 0.2813364055299539, + "grad_norm": 0.6797187898490639, + "learning_rate": 1.9554979103280597e-06, + "loss": 0.8127235174179077, + "step": 1221 + }, + { + "epoch": 0.2815668202764977, + "grad_norm": 0.4430544694455362, + "learning_rate": 1.9553854515680684e-06, + "loss": 0.6790676712989807, + "step": 1222 + }, + { + "epoch": 0.2817972350230415, + "grad_norm": 0.547920786044559, + "learning_rate": 1.955272854135143e-06, + "loss": 0.93434739112854, + "step": 1223 + }, + { + "epoch": 0.2820276497695853, + "grad_norm": 0.5831429716678932, + "learning_rate": 1.9551601180456274e-06, + "loss": 0.8624403476715088, + "step": 1224 + }, + { + "epoch": 0.28225806451612906, + "grad_norm": 0.5942670172250124, + "learning_rate": 1.9550472433158856e-06, + "loss": 0.8871273994445801, + "step": 1225 + }, + { + "epoch": 0.2824884792626728, + "grad_norm": 0.6403907324028919, + "learning_rate": 1.9549342299623007e-06, + "loss": 1.0226445198059082, + "step": 1226 + }, + { + "epoch": 0.28271889400921657, + "grad_norm": 0.5570530371692032, + "learning_rate": 1.9548210780012764e-06, + "loss": 0.9232503771781921, + "step": 1227 + }, + { + "epoch": 0.28294930875576035, + "grad_norm": 0.5562171255847491, + "learning_rate": 1.9547077874492367e-06, + "loss": 0.944965124130249, + "step": 1228 + }, + { + "epoch": 0.28317972350230414, + "grad_norm": 0.7815951055502713, + "learning_rate": 1.9545943583226255e-06, + "loss": 0.9491870403289795, + "step": 1229 + }, + { + "epoch": 0.2834101382488479, + "grad_norm": 0.5531880644641158, + "learning_rate": 1.9544807906379065e-06, + "loss": 0.8477638363838196, + "step": 1230 + }, + { + "epoch": 0.2836405529953917, + "grad_norm": 0.6334904267465776, + "learning_rate": 1.9543670844115647e-06, + "loss": 0.9733752012252808, + "step": 1231 + }, + { + "epoch": 0.2838709677419355, + "grad_norm": 0.5077250781055755, + "learning_rate": 1.954253239660104e-06, + "loss": 0.8158911466598511, + "step": 1232 + }, + { + "epoch": 0.28410138248847927, + "grad_norm": 0.47003121688563365, + "learning_rate": 1.9541392564000487e-06, + "loss": 0.8814271092414856, + "step": 1233 + }, + { + "epoch": 0.28433179723502305, + "grad_norm": 0.5974631149552703, + "learning_rate": 1.9540251346479435e-06, + "loss": 0.8366897106170654, + "step": 1234 + }, + { + "epoch": 0.28456221198156684, + "grad_norm": 0.5122641090735244, + "learning_rate": 1.953910874420353e-06, + "loss": 0.8043497800827026, + "step": 1235 + }, + { + "epoch": 0.2847926267281106, + "grad_norm": 0.6923450749153209, + "learning_rate": 1.953796475733862e-06, + "loss": 0.904765248298645, + "step": 1236 + }, + { + "epoch": 0.2850230414746544, + "grad_norm": 0.6316427864189956, + "learning_rate": 1.953681938605075e-06, + "loss": 0.9092245101928711, + "step": 1237 + }, + { + "epoch": 0.28525345622119813, + "grad_norm": 0.44433825637231683, + "learning_rate": 1.953567263050617e-06, + "loss": 0.9119021892547607, + "step": 1238 + }, + { + "epoch": 0.2854838709677419, + "grad_norm": 0.5258256580858013, + "learning_rate": 1.9534524490871336e-06, + "loss": 0.8380709886550903, + "step": 1239 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 0.6731382971935342, + "learning_rate": 1.9533374967312894e-06, + "loss": 0.9410983324050903, + "step": 1240 + }, + { + "epoch": 0.2859447004608295, + "grad_norm": 0.5901005556596554, + "learning_rate": 1.953222405999769e-06, + "loss": 0.882665753364563, + "step": 1241 + }, + { + "epoch": 0.28617511520737327, + "grad_norm": 0.600142706864601, + "learning_rate": 1.953107176909279e-06, + "loss": 0.9334039688110352, + "step": 1242 + }, + { + "epoch": 0.28640552995391705, + "grad_norm": 0.649506044390801, + "learning_rate": 1.9529918094765433e-06, + "loss": 0.8743090033531189, + "step": 1243 + }, + { + "epoch": 0.28663594470046083, + "grad_norm": 0.5149777367828677, + "learning_rate": 1.9528763037183086e-06, + "loss": 0.9017846584320068, + "step": 1244 + }, + { + "epoch": 0.2868663594470046, + "grad_norm": 0.6718877038666831, + "learning_rate": 1.95276065965134e-06, + "loss": 0.9412289261817932, + "step": 1245 + }, + { + "epoch": 0.2870967741935484, + "grad_norm": 0.5829455891585096, + "learning_rate": 1.9526448772924222e-06, + "loss": 0.9008835554122925, + "step": 1246 + }, + { + "epoch": 0.2873271889400922, + "grad_norm": 0.5850809594667484, + "learning_rate": 1.9525289566583622e-06, + "loss": 0.803752064704895, + "step": 1247 + }, + { + "epoch": 0.28755760368663597, + "grad_norm": 0.642250740432813, + "learning_rate": 1.952412897765985e-06, + "loss": 0.8354049921035767, + "step": 1248 + }, + { + "epoch": 0.28778801843317975, + "grad_norm": 0.711123311118831, + "learning_rate": 1.9522967006321363e-06, + "loss": 1.047461748123169, + "step": 1249 + }, + { + "epoch": 0.2880184331797235, + "grad_norm": 0.5664585984555107, + "learning_rate": 1.9521803652736826e-06, + "loss": 0.9036056399345398, + "step": 1250 + }, + { + "epoch": 0.28824884792626726, + "grad_norm": 0.6380477461120507, + "learning_rate": 1.952063891707509e-06, + "loss": 0.9534894227981567, + "step": 1251 + }, + { + "epoch": 0.28847926267281104, + "grad_norm": 0.6213868500155985, + "learning_rate": 1.9519472799505217e-06, + "loss": 0.9200841188430786, + "step": 1252 + }, + { + "epoch": 0.2887096774193548, + "grad_norm": 0.6071864938745559, + "learning_rate": 1.9518305300196475e-06, + "loss": 0.8917449712753296, + "step": 1253 + }, + { + "epoch": 0.2889400921658986, + "grad_norm": 0.43859246681042113, + "learning_rate": 1.9517136419318317e-06, + "loss": 0.92131507396698, + "step": 1254 + }, + { + "epoch": 0.2891705069124424, + "grad_norm": 0.5459214675052779, + "learning_rate": 1.951596615704041e-06, + "loss": 0.8862432241439819, + "step": 1255 + }, + { + "epoch": 0.2894009216589862, + "grad_norm": 0.5238034407201325, + "learning_rate": 1.951479451353261e-06, + "loss": 0.7789605855941772, + "step": 1256 + }, + { + "epoch": 0.28963133640552996, + "grad_norm": 0.6480376013887345, + "learning_rate": 1.951362148896498e-06, + "loss": 0.8187062740325928, + "step": 1257 + }, + { + "epoch": 0.28986175115207374, + "grad_norm": 0.651824990199355, + "learning_rate": 1.9512447083507784e-06, + "loss": 1.0575072765350342, + "step": 1258 + }, + { + "epoch": 0.2900921658986175, + "grad_norm": 0.5300946141437952, + "learning_rate": 1.9511271297331493e-06, + "loss": 0.8027279376983643, + "step": 1259 + }, + { + "epoch": 0.2903225806451613, + "grad_norm": 0.549023479491683, + "learning_rate": 1.951009413060676e-06, + "loss": 0.6641743183135986, + "step": 1260 + }, + { + "epoch": 0.2905529953917051, + "grad_norm": 0.4919566770154341, + "learning_rate": 1.950891558350446e-06, + "loss": 0.7937613725662231, + "step": 1261 + }, + { + "epoch": 0.2907834101382488, + "grad_norm": 0.6213972326398296, + "learning_rate": 1.950773565619564e-06, + "loss": 0.9600511193275452, + "step": 1262 + }, + { + "epoch": 0.2910138248847926, + "grad_norm": 0.6514763319649333, + "learning_rate": 1.9506554348851585e-06, + "loss": 0.8275980353355408, + "step": 1263 + }, + { + "epoch": 0.2912442396313364, + "grad_norm": 0.598467260157347, + "learning_rate": 1.950537166164375e-06, + "loss": 0.9008789658546448, + "step": 1264 + }, + { + "epoch": 0.29147465437788017, + "grad_norm": 0.5520168646542984, + "learning_rate": 1.95041875947438e-06, + "loss": 0.8701465129852295, + "step": 1265 + }, + { + "epoch": 0.29170506912442395, + "grad_norm": 0.5793489097336151, + "learning_rate": 1.95030021483236e-06, + "loss": 0.9313883781433105, + "step": 1266 + }, + { + "epoch": 0.29193548387096774, + "grad_norm": 0.5738973536331494, + "learning_rate": 1.9501815322555222e-06, + "loss": 0.883125901222229, + "step": 1267 + }, + { + "epoch": 0.2921658986175115, + "grad_norm": 0.5430628147775056, + "learning_rate": 1.9500627117610927e-06, + "loss": 0.8856269121170044, + "step": 1268 + }, + { + "epoch": 0.2923963133640553, + "grad_norm": 0.4857560088008075, + "learning_rate": 1.9499437533663184e-06, + "loss": 0.8817840218544006, + "step": 1269 + }, + { + "epoch": 0.2926267281105991, + "grad_norm": 0.7079159031386842, + "learning_rate": 1.949824657088466e-06, + "loss": 0.9911330342292786, + "step": 1270 + }, + { + "epoch": 0.29285714285714287, + "grad_norm": 0.6283382634413396, + "learning_rate": 1.949705422944822e-06, + "loss": 0.8902890682220459, + "step": 1271 + }, + { + "epoch": 0.29308755760368665, + "grad_norm": 0.5381213123876506, + "learning_rate": 1.949586050952693e-06, + "loss": 0.6846401691436768, + "step": 1272 + }, + { + "epoch": 0.29331797235023044, + "grad_norm": 0.6164805880844991, + "learning_rate": 1.9494665411294057e-06, + "loss": 0.9186165928840637, + "step": 1273 + }, + { + "epoch": 0.29354838709677417, + "grad_norm": 0.4648178531483389, + "learning_rate": 1.949346893492307e-06, + "loss": 0.8614095449447632, + "step": 1274 + }, + { + "epoch": 0.29377880184331795, + "grad_norm": 0.6146731068970395, + "learning_rate": 1.9492271080587637e-06, + "loss": 0.7824405431747437, + "step": 1275 + }, + { + "epoch": 0.29400921658986173, + "grad_norm": 0.5415059908334089, + "learning_rate": 1.949107184846162e-06, + "loss": 0.8694697618484497, + "step": 1276 + }, + { + "epoch": 0.2942396313364055, + "grad_norm": 0.6070495052767576, + "learning_rate": 1.948987123871909e-06, + "loss": 0.8839597105979919, + "step": 1277 + }, + { + "epoch": 0.2944700460829493, + "grad_norm": 0.5155544169686388, + "learning_rate": 1.948866925153431e-06, + "loss": 0.832268238067627, + "step": 1278 + }, + { + "epoch": 0.2947004608294931, + "grad_norm": 0.48264272480740306, + "learning_rate": 1.948746588708175e-06, + "loss": 0.8243123888969421, + "step": 1279 + }, + { + "epoch": 0.29493087557603687, + "grad_norm": 0.7516695382591614, + "learning_rate": 1.948626114553608e-06, + "loss": 0.99314284324646, + "step": 1280 + }, + { + "epoch": 0.29516129032258065, + "grad_norm": 0.6001488755214682, + "learning_rate": 1.948505502707216e-06, + "loss": 0.8853542804718018, + "step": 1281 + }, + { + "epoch": 0.29539170506912443, + "grad_norm": 0.7940640499991963, + "learning_rate": 1.948384753186506e-06, + "loss": 0.9623305797576904, + "step": 1282 + }, + { + "epoch": 0.2956221198156682, + "grad_norm": 0.64774993620639, + "learning_rate": 1.948263866009005e-06, + "loss": 0.8321142792701721, + "step": 1283 + }, + { + "epoch": 0.295852534562212, + "grad_norm": 0.6059595321597901, + "learning_rate": 1.948142841192258e-06, + "loss": 0.8911606669425964, + "step": 1284 + }, + { + "epoch": 0.2960829493087558, + "grad_norm": 0.6228210357050852, + "learning_rate": 1.948021678753834e-06, + "loss": 0.9501996040344238, + "step": 1285 + }, + { + "epoch": 0.29631336405529957, + "grad_norm": 0.5846881548888203, + "learning_rate": 1.947900378711318e-06, + "loss": 0.8555784225463867, + "step": 1286 + }, + { + "epoch": 0.2965437788018433, + "grad_norm": 0.5726752466099971, + "learning_rate": 1.9477789410823163e-06, + "loss": 0.7703878283500671, + "step": 1287 + }, + { + "epoch": 0.2967741935483871, + "grad_norm": 0.5629458043150717, + "learning_rate": 1.947657365884457e-06, + "loss": 1.0072009563446045, + "step": 1288 + }, + { + "epoch": 0.29700460829493086, + "grad_norm": 0.5698014348408978, + "learning_rate": 1.9475356531353847e-06, + "loss": 0.7633493542671204, + "step": 1289 + }, + { + "epoch": 0.29723502304147464, + "grad_norm": 0.5241558601711666, + "learning_rate": 1.9474138028527674e-06, + "loss": 0.88579261302948, + "step": 1290 + }, + { + "epoch": 0.2974654377880184, + "grad_norm": 0.6037880677787516, + "learning_rate": 1.94729181505429e-06, + "loss": 0.8356794118881226, + "step": 1291 + }, + { + "epoch": 0.2976958525345622, + "grad_norm": 0.6197051238228268, + "learning_rate": 1.94716968975766e-06, + "loss": 0.8330395817756653, + "step": 1292 + }, + { + "epoch": 0.297926267281106, + "grad_norm": 0.6667932213948545, + "learning_rate": 1.947047426980604e-06, + "loss": 0.9219698905944824, + "step": 1293 + }, + { + "epoch": 0.2981566820276498, + "grad_norm": 0.5409653154450632, + "learning_rate": 1.9469250267408674e-06, + "loss": 0.880803644657135, + "step": 1294 + }, + { + "epoch": 0.29838709677419356, + "grad_norm": 0.5789679620224094, + "learning_rate": 1.9468024890562165e-06, + "loss": 0.8212012052536011, + "step": 1295 + }, + { + "epoch": 0.29861751152073734, + "grad_norm": 0.6209106243517916, + "learning_rate": 1.946679813944438e-06, + "loss": 1.0118587017059326, + "step": 1296 + }, + { + "epoch": 0.2988479262672811, + "grad_norm": 0.6374046746708436, + "learning_rate": 1.9465570014233377e-06, + "loss": 0.8708915710449219, + "step": 1297 + }, + { + "epoch": 0.2990783410138249, + "grad_norm": 0.6373146041782783, + "learning_rate": 1.9464340515107415e-06, + "loss": 0.9386067986488342, + "step": 1298 + }, + { + "epoch": 0.29930875576036864, + "grad_norm": 0.5346925830356088, + "learning_rate": 1.9463109642244958e-06, + "loss": 0.8672319650650024, + "step": 1299 + }, + { + "epoch": 0.2995391705069124, + "grad_norm": 0.7198371333215221, + "learning_rate": 1.9461877395824662e-06, + "loss": 0.9002958536148071, + "step": 1300 + }, + { + "epoch": 0.2997695852534562, + "grad_norm": 0.6247724220238058, + "learning_rate": 1.946064377602539e-06, + "loss": 0.9206029772758484, + "step": 1301 + }, + { + "epoch": 0.3, + "grad_norm": 0.8295443472719992, + "learning_rate": 1.94594087830262e-06, + "loss": 1.0063598155975342, + "step": 1302 + }, + { + "epoch": 0.3002304147465438, + "grad_norm": 0.5149695005553171, + "learning_rate": 1.9458172417006346e-06, + "loss": 0.7616912126541138, + "step": 1303 + }, + { + "epoch": 0.30046082949308756, + "grad_norm": 0.5462398029065331, + "learning_rate": 1.945693467814529e-06, + "loss": 0.8385730385780334, + "step": 1304 + }, + { + "epoch": 0.30069124423963134, + "grad_norm": 0.4854220181479302, + "learning_rate": 1.9455695566622677e-06, + "loss": 0.7032216787338257, + "step": 1305 + }, + { + "epoch": 0.3009216589861751, + "grad_norm": 0.5554776786626977, + "learning_rate": 1.9454455082618373e-06, + "loss": 0.7647181749343872, + "step": 1306 + }, + { + "epoch": 0.3011520737327189, + "grad_norm": 0.7119385935860951, + "learning_rate": 1.945321322631243e-06, + "loss": 0.9918918013572693, + "step": 1307 + }, + { + "epoch": 0.3013824884792627, + "grad_norm": 0.5689741757687454, + "learning_rate": 1.945196999788511e-06, + "loss": 0.838451623916626, + "step": 1308 + }, + { + "epoch": 0.3016129032258065, + "grad_norm": 0.7156229049064139, + "learning_rate": 1.945072539751685e-06, + "loss": 0.9739303588867188, + "step": 1309 + }, + { + "epoch": 0.30184331797235026, + "grad_norm": 0.4850858592361209, + "learning_rate": 1.9449479425388305e-06, + "loss": 0.8233742713928223, + "step": 1310 + }, + { + "epoch": 0.302073732718894, + "grad_norm": 0.666231819455408, + "learning_rate": 1.944823208168034e-06, + "loss": 0.9765088558197021, + "step": 1311 + }, + { + "epoch": 0.30230414746543777, + "grad_norm": 0.5940530240559707, + "learning_rate": 1.944698336657399e-06, + "loss": 0.7614048719406128, + "step": 1312 + }, + { + "epoch": 0.30253456221198155, + "grad_norm": 0.5807403996402337, + "learning_rate": 1.9445733280250512e-06, + "loss": 0.760692834854126, + "step": 1313 + }, + { + "epoch": 0.30276497695852533, + "grad_norm": 0.710580819926471, + "learning_rate": 1.944448182289135e-06, + "loss": 0.8484706878662109, + "step": 1314 + }, + { + "epoch": 0.3029953917050691, + "grad_norm": 0.6131916776262658, + "learning_rate": 1.944322899467816e-06, + "loss": 0.8857289552688599, + "step": 1315 + }, + { + "epoch": 0.3032258064516129, + "grad_norm": 0.7120330171482998, + "learning_rate": 1.944197479579278e-06, + "loss": 0.8375179171562195, + "step": 1316 + }, + { + "epoch": 0.3034562211981567, + "grad_norm": 0.5402001956337824, + "learning_rate": 1.9440719226417263e-06, + "loss": 0.8141925930976868, + "step": 1317 + }, + { + "epoch": 0.30368663594470047, + "grad_norm": 0.7607357810019435, + "learning_rate": 1.943946228673384e-06, + "loss": 0.9970111846923828, + "step": 1318 + }, + { + "epoch": 0.30391705069124425, + "grad_norm": 0.5721230302327327, + "learning_rate": 1.9438203976924966e-06, + "loss": 0.9542866349220276, + "step": 1319 + }, + { + "epoch": 0.30414746543778803, + "grad_norm": 0.5904074306009988, + "learning_rate": 1.943694429717328e-06, + "loss": 0.8808399438858032, + "step": 1320 + }, + { + "epoch": 0.3043778801843318, + "grad_norm": 0.5734964183027593, + "learning_rate": 1.9435683247661623e-06, + "loss": 0.8541150093078613, + "step": 1321 + }, + { + "epoch": 0.3046082949308756, + "grad_norm": 0.7749551173384804, + "learning_rate": 1.943442082857303e-06, + "loss": 0.8887044191360474, + "step": 1322 + }, + { + "epoch": 0.30483870967741933, + "grad_norm": 0.6530281616907251, + "learning_rate": 1.9433157040090746e-06, + "loss": 0.8699131011962891, + "step": 1323 + }, + { + "epoch": 0.3050691244239631, + "grad_norm": 0.6811202971751444, + "learning_rate": 1.9431891882398205e-06, + "loss": 0.7096077799797058, + "step": 1324 + }, + { + "epoch": 0.3052995391705069, + "grad_norm": 0.5279135582200482, + "learning_rate": 1.9430625355679045e-06, + "loss": 0.8040453195571899, + "step": 1325 + }, + { + "epoch": 0.3055299539170507, + "grad_norm": 0.5904456084555657, + "learning_rate": 1.9429357460117093e-06, + "loss": 0.8275970220565796, + "step": 1326 + }, + { + "epoch": 0.30576036866359446, + "grad_norm": 0.5947614996956965, + "learning_rate": 1.9428088195896393e-06, + "loss": 0.9724141359329224, + "step": 1327 + }, + { + "epoch": 0.30599078341013825, + "grad_norm": 0.6696756628924122, + "learning_rate": 1.9426817563201174e-06, + "loss": 0.9293274879455566, + "step": 1328 + }, + { + "epoch": 0.30622119815668203, + "grad_norm": 0.5976334939970911, + "learning_rate": 1.9425545562215865e-06, + "loss": 0.9454036951065063, + "step": 1329 + }, + { + "epoch": 0.3064516129032258, + "grad_norm": 0.48928245529374687, + "learning_rate": 1.9424272193125094e-06, + "loss": 0.7751365900039673, + "step": 1330 + }, + { + "epoch": 0.3066820276497696, + "grad_norm": 0.5211050083614731, + "learning_rate": 1.942299745611369e-06, + "loss": 0.8444688320159912, + "step": 1331 + }, + { + "epoch": 0.3069124423963134, + "grad_norm": 0.6370602856216532, + "learning_rate": 1.9421721351366684e-06, + "loss": 0.7751414775848389, + "step": 1332 + }, + { + "epoch": 0.30714285714285716, + "grad_norm": 0.6732034032956694, + "learning_rate": 1.9420443879069287e-06, + "loss": 0.912209153175354, + "step": 1333 + }, + { + "epoch": 0.30737327188940095, + "grad_norm": 0.4990267188564962, + "learning_rate": 1.941916503940694e-06, + "loss": 0.8897542357444763, + "step": 1334 + }, + { + "epoch": 0.3076036866359447, + "grad_norm": 0.6319943447022882, + "learning_rate": 1.9417884832565257e-06, + "loss": 0.8562046885490417, + "step": 1335 + }, + { + "epoch": 0.30783410138248846, + "grad_norm": 0.6422557067377674, + "learning_rate": 1.9416603258730055e-06, + "loss": 0.8886401653289795, + "step": 1336 + }, + { + "epoch": 0.30806451612903224, + "grad_norm": 0.5477788033894715, + "learning_rate": 1.9415320318087354e-06, + "loss": 0.7401903867721558, + "step": 1337 + }, + { + "epoch": 0.308294930875576, + "grad_norm": 0.5300494853019255, + "learning_rate": 1.941403601082338e-06, + "loss": 0.761360764503479, + "step": 1338 + }, + { + "epoch": 0.3085253456221198, + "grad_norm": 0.5079078108497779, + "learning_rate": 1.9412750337124537e-06, + "loss": 0.9223028421401978, + "step": 1339 + }, + { + "epoch": 0.3087557603686636, + "grad_norm": 0.6370349934611669, + "learning_rate": 1.9411463297177446e-06, + "loss": 0.9287113547325134, + "step": 1340 + }, + { + "epoch": 0.3089861751152074, + "grad_norm": 0.6186566628026451, + "learning_rate": 1.941017489116891e-06, + "loss": 0.8548502922058105, + "step": 1341 + }, + { + "epoch": 0.30921658986175116, + "grad_norm": 0.6058300330585435, + "learning_rate": 1.9408885119285953e-06, + "loss": 0.8885709643363953, + "step": 1342 + }, + { + "epoch": 0.30944700460829494, + "grad_norm": 0.6807261513363189, + "learning_rate": 1.940759398171577e-06, + "loss": 0.8970856666564941, + "step": 1343 + }, + { + "epoch": 0.3096774193548387, + "grad_norm": 0.5664251996297385, + "learning_rate": 1.9406301478645783e-06, + "loss": 0.847138524055481, + "step": 1344 + }, + { + "epoch": 0.3099078341013825, + "grad_norm": 0.5354847877369665, + "learning_rate": 1.9405007610263584e-06, + "loss": 0.7892216444015503, + "step": 1345 + }, + { + "epoch": 0.3101382488479263, + "grad_norm": 0.6610649492142503, + "learning_rate": 1.940371237675698e-06, + "loss": 0.8869141340255737, + "step": 1346 + }, + { + "epoch": 0.3103686635944701, + "grad_norm": 0.6628677961578048, + "learning_rate": 1.9402415778313976e-06, + "loss": 0.8405635356903076, + "step": 1347 + }, + { + "epoch": 0.3105990783410138, + "grad_norm": 0.6646875425992601, + "learning_rate": 1.9401117815122768e-06, + "loss": 0.914352536201477, + "step": 1348 + }, + { + "epoch": 0.3108294930875576, + "grad_norm": 0.5494930695935469, + "learning_rate": 1.9399818487371756e-06, + "loss": 0.9059416055679321, + "step": 1349 + }, + { + "epoch": 0.31105990783410137, + "grad_norm": 0.7196818748440269, + "learning_rate": 1.939851779524953e-06, + "loss": 1.0513644218444824, + "step": 1350 + }, + { + "epoch": 0.31129032258064515, + "grad_norm": 0.6538820317800585, + "learning_rate": 1.9397215738944893e-06, + "loss": 0.8673127889633179, + "step": 1351 + }, + { + "epoch": 0.31152073732718893, + "grad_norm": 0.5818727011440767, + "learning_rate": 1.9395912318646827e-06, + "loss": 0.7907043695449829, + "step": 1352 + }, + { + "epoch": 0.3117511520737327, + "grad_norm": 0.6065295506543811, + "learning_rate": 1.9394607534544527e-06, + "loss": 0.8127990961074829, + "step": 1353 + }, + { + "epoch": 0.3119815668202765, + "grad_norm": 0.598693369273182, + "learning_rate": 1.9393301386827387e-06, + "loss": 0.877837061882019, + "step": 1354 + }, + { + "epoch": 0.3122119815668203, + "grad_norm": 0.734976387219602, + "learning_rate": 1.939199387568498e-06, + "loss": 0.8518592715263367, + "step": 1355 + }, + { + "epoch": 0.31244239631336407, + "grad_norm": 0.5473192670176156, + "learning_rate": 1.9390685001307093e-06, + "loss": 0.7151869535446167, + "step": 1356 + }, + { + "epoch": 0.31267281105990785, + "grad_norm": 0.5581062201544951, + "learning_rate": 1.9389374763883716e-06, + "loss": 0.8325271606445312, + "step": 1357 + }, + { + "epoch": 0.31290322580645163, + "grad_norm": 0.6052904551524719, + "learning_rate": 1.938806316360502e-06, + "loss": 0.6875327825546265, + "step": 1358 + }, + { + "epoch": 0.3131336405529954, + "grad_norm": 0.5481097616797531, + "learning_rate": 1.9386750200661382e-06, + "loss": 0.8944000005722046, + "step": 1359 + }, + { + "epoch": 0.31336405529953915, + "grad_norm": 0.4954617799257055, + "learning_rate": 1.938543587524338e-06, + "loss": 0.8544747829437256, + "step": 1360 + }, + { + "epoch": 0.31359447004608293, + "grad_norm": 0.6938391730058001, + "learning_rate": 1.9384120187541788e-06, + "loss": 0.9216448068618774, + "step": 1361 + }, + { + "epoch": 0.3138248847926267, + "grad_norm": 0.6834174981389874, + "learning_rate": 1.938280313774757e-06, + "loss": 0.9239494800567627, + "step": 1362 + }, + { + "epoch": 0.3140552995391705, + "grad_norm": 0.6816430700209432, + "learning_rate": 1.9381484726051896e-06, + "loss": 0.8903297185897827, + "step": 1363 + }, + { + "epoch": 0.3142857142857143, + "grad_norm": 0.46405748253250195, + "learning_rate": 1.9380164952646137e-06, + "loss": 0.7019625306129456, + "step": 1364 + }, + { + "epoch": 0.31451612903225806, + "grad_norm": 0.6844663370999042, + "learning_rate": 1.9378843817721854e-06, + "loss": 0.8668909072875977, + "step": 1365 + }, + { + "epoch": 0.31474654377880185, + "grad_norm": 0.6454844689755169, + "learning_rate": 1.9377521321470804e-06, + "loss": 0.8124282360076904, + "step": 1366 + }, + { + "epoch": 0.31497695852534563, + "grad_norm": 0.5251795296125459, + "learning_rate": 1.937619746408495e-06, + "loss": 0.7543507814407349, + "step": 1367 + }, + { + "epoch": 0.3152073732718894, + "grad_norm": 0.6140420297919054, + "learning_rate": 1.9374872245756448e-06, + "loss": 0.8355506062507629, + "step": 1368 + }, + { + "epoch": 0.3154377880184332, + "grad_norm": 0.5898778959170111, + "learning_rate": 1.937354566667765e-06, + "loss": 0.9642060399055481, + "step": 1369 + }, + { + "epoch": 0.315668202764977, + "grad_norm": 0.5556038505388771, + "learning_rate": 1.93722177270411e-06, + "loss": 0.9044197201728821, + "step": 1370 + }, + { + "epoch": 0.31589861751152076, + "grad_norm": 0.4271939145273097, + "learning_rate": 1.937088842703956e-06, + "loss": 0.8077869415283203, + "step": 1371 + }, + { + "epoch": 0.3161290322580645, + "grad_norm": 0.6032982707731396, + "learning_rate": 1.9369557766865968e-06, + "loss": 0.8319793939590454, + "step": 1372 + }, + { + "epoch": 0.3163594470046083, + "grad_norm": 0.6304953638761566, + "learning_rate": 1.9368225746713475e-06, + "loss": 0.8233131170272827, + "step": 1373 + }, + { + "epoch": 0.31658986175115206, + "grad_norm": 0.6631214954178034, + "learning_rate": 1.936689236677541e-06, + "loss": 0.7898514270782471, + "step": 1374 + }, + { + "epoch": 0.31682027649769584, + "grad_norm": 0.6121849479571054, + "learning_rate": 1.9365557627245326e-06, + "loss": 0.9243249893188477, + "step": 1375 + }, + { + "epoch": 0.3170506912442396, + "grad_norm": 0.5673475924264754, + "learning_rate": 1.9364221528316946e-06, + "loss": 0.8153131008148193, + "step": 1376 + }, + { + "epoch": 0.3172811059907834, + "grad_norm": 0.6767166003638188, + "learning_rate": 1.936288407018421e-06, + "loss": 0.9203826189041138, + "step": 1377 + }, + { + "epoch": 0.3175115207373272, + "grad_norm": 0.6187562743125278, + "learning_rate": 1.936154525304124e-06, + "loss": 0.902605414390564, + "step": 1378 + }, + { + "epoch": 0.317741935483871, + "grad_norm": 0.6256929156852202, + "learning_rate": 1.936020507708238e-06, + "loss": 0.9504558444023132, + "step": 1379 + }, + { + "epoch": 0.31797235023041476, + "grad_norm": 0.6737932441495208, + "learning_rate": 1.9358863542502133e-06, + "loss": 0.8068373203277588, + "step": 1380 + }, + { + "epoch": 0.31820276497695854, + "grad_norm": 0.6309381884158767, + "learning_rate": 1.935752064949524e-06, + "loss": 1.00711989402771, + "step": 1381 + }, + { + "epoch": 0.3184331797235023, + "grad_norm": 0.6297604875594859, + "learning_rate": 1.935617639825661e-06, + "loss": 0.8271746039390564, + "step": 1382 + }, + { + "epoch": 0.3186635944700461, + "grad_norm": 0.658739150286029, + "learning_rate": 1.9354830788981363e-06, + "loss": 0.8478754758834839, + "step": 1383 + }, + { + "epoch": 0.31889400921658984, + "grad_norm": 0.6165108812612344, + "learning_rate": 1.935348382186481e-06, + "loss": 0.9240723252296448, + "step": 1384 + }, + { + "epoch": 0.3191244239631336, + "grad_norm": 0.6446571506984649, + "learning_rate": 1.935213549710246e-06, + "loss": 0.9275645613670349, + "step": 1385 + }, + { + "epoch": 0.3193548387096774, + "grad_norm": 0.6060948743586713, + "learning_rate": 1.9350785814890027e-06, + "loss": 0.9838275909423828, + "step": 1386 + }, + { + "epoch": 0.3195852534562212, + "grad_norm": 0.5765714017880346, + "learning_rate": 1.934943477542341e-06, + "loss": 0.9259177446365356, + "step": 1387 + }, + { + "epoch": 0.31981566820276497, + "grad_norm": 0.6051365106169855, + "learning_rate": 1.9348082378898714e-06, + "loss": 0.9252835512161255, + "step": 1388 + }, + { + "epoch": 0.32004608294930875, + "grad_norm": 0.5670107070091258, + "learning_rate": 1.9346728625512235e-06, + "loss": 0.8929460048675537, + "step": 1389 + }, + { + "epoch": 0.32027649769585254, + "grad_norm": 0.5325931239107909, + "learning_rate": 1.934537351546047e-06, + "loss": 0.8909564018249512, + "step": 1390 + }, + { + "epoch": 0.3205069124423963, + "grad_norm": 0.6295332947946368, + "learning_rate": 1.934401704894011e-06, + "loss": 0.8745983839035034, + "step": 1391 + }, + { + "epoch": 0.3207373271889401, + "grad_norm": 0.5987888846505133, + "learning_rate": 1.934265922614805e-06, + "loss": 0.8622266054153442, + "step": 1392 + }, + { + "epoch": 0.3209677419354839, + "grad_norm": 0.5587707056179402, + "learning_rate": 1.9341300047281365e-06, + "loss": 0.6796590089797974, + "step": 1393 + }, + { + "epoch": 0.32119815668202767, + "grad_norm": 0.6156409956015295, + "learning_rate": 1.9339939512537344e-06, + "loss": 0.9012733697891235, + "step": 1394 + }, + { + "epoch": 0.32142857142857145, + "grad_norm": 0.5898128750933246, + "learning_rate": 1.933857762211347e-06, + "loss": 0.9196282625198364, + "step": 1395 + }, + { + "epoch": 0.3216589861751152, + "grad_norm": 0.716981638669288, + "learning_rate": 1.9337214376207417e-06, + "loss": 0.7717788219451904, + "step": 1396 + }, + { + "epoch": 0.32188940092165896, + "grad_norm": 0.6574432706431985, + "learning_rate": 1.9335849775017057e-06, + "loss": 0.8516619801521301, + "step": 1397 + }, + { + "epoch": 0.32211981566820275, + "grad_norm": 0.6319036543472709, + "learning_rate": 1.933448381874046e-06, + "loss": 0.8089120388031006, + "step": 1398 + }, + { + "epoch": 0.32235023041474653, + "grad_norm": 0.7117992019263996, + "learning_rate": 1.9333116507575895e-06, + "loss": 0.8940925598144531, + "step": 1399 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 1.1103495530975782, + "learning_rate": 1.9331747841721827e-06, + "loss": 1.0240859985351562, + "step": 1400 + }, + { + "epoch": 0.3228110599078341, + "grad_norm": 0.6110124319562482, + "learning_rate": 1.9330377821376916e-06, + "loss": 0.742689847946167, + "step": 1401 + }, + { + "epoch": 0.3230414746543779, + "grad_norm": 0.6830153635526487, + "learning_rate": 1.932900644674001e-06, + "loss": 0.9843875169754028, + "step": 1402 + }, + { + "epoch": 0.32327188940092166, + "grad_norm": 0.6043326796009376, + "learning_rate": 1.932763371801017e-06, + "loss": 0.7289329767227173, + "step": 1403 + }, + { + "epoch": 0.32350230414746545, + "grad_norm": 0.676828647698979, + "learning_rate": 1.9326259635386644e-06, + "loss": 0.7706295251846313, + "step": 1404 + }, + { + "epoch": 0.32373271889400923, + "grad_norm": 0.526047650367784, + "learning_rate": 1.932488419906888e-06, + "loss": 0.87788325548172, + "step": 1405 + }, + { + "epoch": 0.323963133640553, + "grad_norm": 0.5971998478662486, + "learning_rate": 1.9323507409256515e-06, + "loss": 0.863690972328186, + "step": 1406 + }, + { + "epoch": 0.3241935483870968, + "grad_norm": 0.700825296208237, + "learning_rate": 1.9322129266149396e-06, + "loss": 0.9333875179290771, + "step": 1407 + }, + { + "epoch": 0.3244239631336406, + "grad_norm": 0.6642455421211582, + "learning_rate": 1.9320749769947555e-06, + "loss": 0.9170523881912231, + "step": 1408 + }, + { + "epoch": 0.3246543778801843, + "grad_norm": 0.7524235771818621, + "learning_rate": 1.931936892085122e-06, + "loss": 0.9337698221206665, + "step": 1409 + }, + { + "epoch": 0.3248847926267281, + "grad_norm": 0.5832115844679703, + "learning_rate": 1.9317986719060824e-06, + "loss": 0.8436682224273682, + "step": 1410 + }, + { + "epoch": 0.3251152073732719, + "grad_norm": 0.5569674571153642, + "learning_rate": 1.9316603164776996e-06, + "loss": 0.6652755737304688, + "step": 1411 + }, + { + "epoch": 0.32534562211981566, + "grad_norm": 0.5895248621851672, + "learning_rate": 1.931521825820055e-06, + "loss": 0.7966932654380798, + "step": 1412 + }, + { + "epoch": 0.32557603686635944, + "grad_norm": 0.7207375493085693, + "learning_rate": 1.93138319995325e-06, + "loss": 0.9791682958602905, + "step": 1413 + }, + { + "epoch": 0.3258064516129032, + "grad_norm": 0.6505701538481653, + "learning_rate": 1.931244438897407e-06, + "loss": 0.7403467297554016, + "step": 1414 + }, + { + "epoch": 0.326036866359447, + "grad_norm": 0.5881243698924259, + "learning_rate": 1.931105542672667e-06, + "loss": 0.7758523225784302, + "step": 1415 + }, + { + "epoch": 0.3262672811059908, + "grad_norm": 0.6866613437755184, + "learning_rate": 1.9309665112991894e-06, + "loss": 0.8444551229476929, + "step": 1416 + }, + { + "epoch": 0.3264976958525346, + "grad_norm": 0.6987387290897759, + "learning_rate": 1.9308273447971553e-06, + "loss": 0.8796061277389526, + "step": 1417 + }, + { + "epoch": 0.32672811059907836, + "grad_norm": 0.6235742967720523, + "learning_rate": 1.9306880431867643e-06, + "loss": 0.8386640548706055, + "step": 1418 + }, + { + "epoch": 0.32695852534562214, + "grad_norm": 0.669578268248941, + "learning_rate": 1.930548606488236e-06, + "loss": 0.9229142665863037, + "step": 1419 + }, + { + "epoch": 0.3271889400921659, + "grad_norm": 0.6307605261613933, + "learning_rate": 1.9304090347218094e-06, + "loss": 0.9938615560531616, + "step": 1420 + }, + { + "epoch": 0.32741935483870965, + "grad_norm": 0.6526253572614591, + "learning_rate": 1.930269327907743e-06, + "loss": 0.7946186661720276, + "step": 1421 + }, + { + "epoch": 0.32764976958525344, + "grad_norm": 0.6717401804422498, + "learning_rate": 1.930129486066315e-06, + "loss": 0.9456713199615479, + "step": 1422 + }, + { + "epoch": 0.3278801843317972, + "grad_norm": 0.5156577436912951, + "learning_rate": 1.929989509217824e-06, + "loss": 0.844656765460968, + "step": 1423 + }, + { + "epoch": 0.328110599078341, + "grad_norm": 0.5219846430026822, + "learning_rate": 1.9298493973825862e-06, + "loss": 0.7534950971603394, + "step": 1424 + }, + { + "epoch": 0.3283410138248848, + "grad_norm": 0.7328149629860281, + "learning_rate": 1.92970915058094e-06, + "loss": 0.934429407119751, + "step": 1425 + }, + { + "epoch": 0.32857142857142857, + "grad_norm": 0.6913075282966522, + "learning_rate": 1.929568768833241e-06, + "loss": 0.9491959810256958, + "step": 1426 + }, + { + "epoch": 0.32880184331797235, + "grad_norm": 0.6938433783461605, + "learning_rate": 1.9294282521598657e-06, + "loss": 0.9739001989364624, + "step": 1427 + }, + { + "epoch": 0.32903225806451614, + "grad_norm": 0.7260904191446513, + "learning_rate": 1.92928760058121e-06, + "loss": 0.8159639835357666, + "step": 1428 + }, + { + "epoch": 0.3292626728110599, + "grad_norm": 0.6287238530590293, + "learning_rate": 1.9291468141176894e-06, + "loss": 0.8752772808074951, + "step": 1429 + }, + { + "epoch": 0.3294930875576037, + "grad_norm": 0.6480201898337635, + "learning_rate": 1.929005892789739e-06, + "loss": 0.8543882369995117, + "step": 1430 + }, + { + "epoch": 0.3297235023041475, + "grad_norm": 0.7294679881265868, + "learning_rate": 1.928864836617813e-06, + "loss": 0.8837493658065796, + "step": 1431 + }, + { + "epoch": 0.32995391705069127, + "grad_norm": 0.7638461032292205, + "learning_rate": 1.9287236456223854e-06, + "loss": 0.9320387840270996, + "step": 1432 + }, + { + "epoch": 0.330184331797235, + "grad_norm": 0.5042343025936808, + "learning_rate": 1.92858231982395e-06, + "loss": 0.8272919654846191, + "step": 1433 + }, + { + "epoch": 0.3304147465437788, + "grad_norm": 0.6965906133224807, + "learning_rate": 1.9284408592430207e-06, + "loss": 0.9415527582168579, + "step": 1434 + }, + { + "epoch": 0.33064516129032256, + "grad_norm": 0.7215035047368656, + "learning_rate": 1.928299263900129e-06, + "loss": 0.91558837890625, + "step": 1435 + }, + { + "epoch": 0.33087557603686635, + "grad_norm": 0.5956823050741555, + "learning_rate": 1.9281575338158287e-06, + "loss": 0.9333036541938782, + "step": 1436 + }, + { + "epoch": 0.33110599078341013, + "grad_norm": 0.6051938214219355, + "learning_rate": 1.928015669010691e-06, + "loss": 0.7823847532272339, + "step": 1437 + }, + { + "epoch": 0.3313364055299539, + "grad_norm": 0.7462826372754077, + "learning_rate": 1.9278736695053075e-06, + "loss": 0.8436610102653503, + "step": 1438 + }, + { + "epoch": 0.3315668202764977, + "grad_norm": 0.7254037554281902, + "learning_rate": 1.927731535320289e-06, + "loss": 0.8658925890922546, + "step": 1439 + }, + { + "epoch": 0.3317972350230415, + "grad_norm": 0.6229809292573231, + "learning_rate": 1.9275892664762665e-06, + "loss": 0.8510075807571411, + "step": 1440 + }, + { + "epoch": 0.33202764976958526, + "grad_norm": 0.6349856559462502, + "learning_rate": 1.9274468629938897e-06, + "loss": 0.8002004623413086, + "step": 1441 + }, + { + "epoch": 0.33225806451612905, + "grad_norm": 0.6766111098462606, + "learning_rate": 1.9273043248938287e-06, + "loss": 1.0030219554901123, + "step": 1442 + }, + { + "epoch": 0.33248847926267283, + "grad_norm": 0.6313930076569801, + "learning_rate": 1.9271616521967723e-06, + "loss": 0.8415981531143188, + "step": 1443 + }, + { + "epoch": 0.3327188940092166, + "grad_norm": 0.5599899399531522, + "learning_rate": 1.9270188449234295e-06, + "loss": 0.7704254388809204, + "step": 1444 + }, + { + "epoch": 0.33294930875576034, + "grad_norm": 0.5742869826690059, + "learning_rate": 1.9268759030945294e-06, + "loss": 0.8350723385810852, + "step": 1445 + }, + { + "epoch": 0.3331797235023041, + "grad_norm": 0.7177949171518314, + "learning_rate": 1.926732826730818e-06, + "loss": 0.8729690313339233, + "step": 1446 + }, + { + "epoch": 0.3334101382488479, + "grad_norm": 0.64691268148931, + "learning_rate": 1.926589615853064e-06, + "loss": 0.7758746147155762, + "step": 1447 + }, + { + "epoch": 0.3336405529953917, + "grad_norm": 0.6330035443782508, + "learning_rate": 1.926446270482054e-06, + "loss": 0.7895134687423706, + "step": 1448 + }, + { + "epoch": 0.3338709677419355, + "grad_norm": 0.5710370240153678, + "learning_rate": 1.9263027906385936e-06, + "loss": 1.0239053964614868, + "step": 1449 + }, + { + "epoch": 0.33410138248847926, + "grad_norm": 0.6423159813237256, + "learning_rate": 1.9261591763435104e-06, + "loss": 0.9294595122337341, + "step": 1450 + }, + { + "epoch": 0.33433179723502304, + "grad_norm": 0.690830605411519, + "learning_rate": 1.9260154276176484e-06, + "loss": 0.9786148071289062, + "step": 1451 + }, + { + "epoch": 0.3345622119815668, + "grad_norm": 0.5115027993477321, + "learning_rate": 1.925871544481873e-06, + "loss": 0.8513587117195129, + "step": 1452 + }, + { + "epoch": 0.3347926267281106, + "grad_norm": 0.4974492616751121, + "learning_rate": 1.9257275269570686e-06, + "loss": 0.7737371921539307, + "step": 1453 + }, + { + "epoch": 0.3350230414746544, + "grad_norm": 0.6186615203368176, + "learning_rate": 1.9255833750641392e-06, + "loss": 0.8567382097244263, + "step": 1454 + }, + { + "epoch": 0.3352534562211982, + "grad_norm": 0.5498745898568592, + "learning_rate": 1.9254390888240078e-06, + "loss": 0.893741250038147, + "step": 1455 + }, + { + "epoch": 0.33548387096774196, + "grad_norm": 0.5996544133152318, + "learning_rate": 1.9252946682576184e-06, + "loss": 0.9558119773864746, + "step": 1456 + }, + { + "epoch": 0.3357142857142857, + "grad_norm": 0.6629164295929078, + "learning_rate": 1.9251501133859323e-06, + "loss": 0.7055593729019165, + "step": 1457 + }, + { + "epoch": 0.33594470046082947, + "grad_norm": 0.652213418545905, + "learning_rate": 1.9250054242299326e-06, + "loss": 0.8409907817840576, + "step": 1458 + }, + { + "epoch": 0.33617511520737325, + "grad_norm": 0.5648924790833157, + "learning_rate": 1.9248606008106196e-06, + "loss": 0.9459772109985352, + "step": 1459 + }, + { + "epoch": 0.33640552995391704, + "grad_norm": 0.6285611694534835, + "learning_rate": 1.924715643149015e-06, + "loss": 0.7848879098892212, + "step": 1460 + }, + { + "epoch": 0.3366359447004608, + "grad_norm": 0.8030718131506138, + "learning_rate": 1.924570551266159e-06, + "loss": 1.0365980863571167, + "step": 1461 + }, + { + "epoch": 0.3368663594470046, + "grad_norm": 0.6014174038703485, + "learning_rate": 1.924425325183111e-06, + "loss": 0.7331318855285645, + "step": 1462 + }, + { + "epoch": 0.3370967741935484, + "grad_norm": 0.6427865459032713, + "learning_rate": 1.9242799649209515e-06, + "loss": 0.8536237478256226, + "step": 1463 + }, + { + "epoch": 0.33732718894009217, + "grad_norm": 0.6525839289073214, + "learning_rate": 1.9241344705007784e-06, + "loss": 0.9296326637268066, + "step": 1464 + }, + { + "epoch": 0.33755760368663595, + "grad_norm": 0.887947392639257, + "learning_rate": 1.92398884194371e-06, + "loss": 0.9084932804107666, + "step": 1465 + }, + { + "epoch": 0.33778801843317974, + "grad_norm": 0.5270165853452017, + "learning_rate": 1.9238430792708847e-06, + "loss": 0.7426833510398865, + "step": 1466 + }, + { + "epoch": 0.3380184331797235, + "grad_norm": 0.5410658114261949, + "learning_rate": 1.9236971825034595e-06, + "loss": 0.7655431032180786, + "step": 1467 + }, + { + "epoch": 0.3382488479262673, + "grad_norm": 0.8331011387344854, + "learning_rate": 1.923551151662611e-06, + "loss": 0.9463646411895752, + "step": 1468 + }, + { + "epoch": 0.3384792626728111, + "grad_norm": 0.5486811314665706, + "learning_rate": 1.9234049867695355e-06, + "loss": 0.75661301612854, + "step": 1469 + }, + { + "epoch": 0.3387096774193548, + "grad_norm": 0.6386489226368193, + "learning_rate": 1.9232586878454486e-06, + "loss": 0.7411723136901855, + "step": 1470 + }, + { + "epoch": 0.3389400921658986, + "grad_norm": 0.6921074075590697, + "learning_rate": 1.9231122549115854e-06, + "loss": 0.9537360072135925, + "step": 1471 + }, + { + "epoch": 0.3391705069124424, + "grad_norm": 0.6895160542670777, + "learning_rate": 1.9229656879892004e-06, + "loss": 0.9527197480201721, + "step": 1472 + }, + { + "epoch": 0.33940092165898617, + "grad_norm": 0.7025720730409266, + "learning_rate": 1.9228189870995674e-06, + "loss": 0.9083822965621948, + "step": 1473 + }, + { + "epoch": 0.33963133640552995, + "grad_norm": 0.5301970222083436, + "learning_rate": 1.9226721522639804e-06, + "loss": 0.8546823263168335, + "step": 1474 + }, + { + "epoch": 0.33986175115207373, + "grad_norm": 0.6709689097402769, + "learning_rate": 1.922525183503752e-06, + "loss": 0.7429832816123962, + "step": 1475 + }, + { + "epoch": 0.3400921658986175, + "grad_norm": 0.62032231336291, + "learning_rate": 1.922378080840214e-06, + "loss": 0.8805499076843262, + "step": 1476 + }, + { + "epoch": 0.3403225806451613, + "grad_norm": 0.681736765273056, + "learning_rate": 1.9222308442947193e-06, + "loss": 1.0177074670791626, + "step": 1477 + }, + { + "epoch": 0.3405529953917051, + "grad_norm": 0.5202393927717802, + "learning_rate": 1.922083473888638e-06, + "loss": 0.778317391872406, + "step": 1478 + }, + { + "epoch": 0.34078341013824887, + "grad_norm": 0.5628134051805, + "learning_rate": 1.921935969643361e-06, + "loss": 0.8461896181106567, + "step": 1479 + }, + { + "epoch": 0.34101382488479265, + "grad_norm": 0.5553667327802273, + "learning_rate": 1.921788331580299e-06, + "loss": 0.8028895258903503, + "step": 1480 + }, + { + "epoch": 0.34124423963133643, + "grad_norm": 0.5368047903298083, + "learning_rate": 1.9216405597208803e-06, + "loss": 0.9071121215820312, + "step": 1481 + }, + { + "epoch": 0.34147465437788016, + "grad_norm": 0.6427007304701287, + "learning_rate": 1.921492654086555e-06, + "loss": 0.7715062499046326, + "step": 1482 + }, + { + "epoch": 0.34170506912442394, + "grad_norm": 0.5552851307839923, + "learning_rate": 1.9213446146987907e-06, + "loss": 0.8446664810180664, + "step": 1483 + }, + { + "epoch": 0.3419354838709677, + "grad_norm": 0.712846002939772, + "learning_rate": 1.9211964415790754e-06, + "loss": 0.9835283756256104, + "step": 1484 + }, + { + "epoch": 0.3421658986175115, + "grad_norm": 0.8210412746012221, + "learning_rate": 1.921048134748916e-06, + "loss": 1.0630817413330078, + "step": 1485 + }, + { + "epoch": 0.3423963133640553, + "grad_norm": 0.6748930312757173, + "learning_rate": 1.920899694229839e-06, + "loss": 0.8514837622642517, + "step": 1486 + }, + { + "epoch": 0.3426267281105991, + "grad_norm": 0.6222560657794074, + "learning_rate": 1.920751120043391e-06, + "loss": 0.7302432060241699, + "step": 1487 + }, + { + "epoch": 0.34285714285714286, + "grad_norm": 0.7079869651359869, + "learning_rate": 1.920602412211136e-06, + "loss": 0.778337836265564, + "step": 1488 + }, + { + "epoch": 0.34308755760368664, + "grad_norm": 0.6890026561089317, + "learning_rate": 1.92045357075466e-06, + "loss": 0.815348207950592, + "step": 1489 + }, + { + "epoch": 0.3433179723502304, + "grad_norm": 0.5476065495891982, + "learning_rate": 1.920304595695567e-06, + "loss": 0.7844003438949585, + "step": 1490 + }, + { + "epoch": 0.3435483870967742, + "grad_norm": 0.6758218109549144, + "learning_rate": 1.92015548705548e-06, + "loss": 0.9513435363769531, + "step": 1491 + }, + { + "epoch": 0.343778801843318, + "grad_norm": 0.6450445262879821, + "learning_rate": 1.9200062448560424e-06, + "loss": 0.7506752610206604, + "step": 1492 + }, + { + "epoch": 0.3440092165898618, + "grad_norm": 0.6233205865485715, + "learning_rate": 1.919856869118916e-06, + "loss": 0.739554762840271, + "step": 1493 + }, + { + "epoch": 0.3442396313364055, + "grad_norm": 0.7436551378630792, + "learning_rate": 1.9197073598657826e-06, + "loss": 0.8167033791542053, + "step": 1494 + }, + { + "epoch": 0.3444700460829493, + "grad_norm": 0.6904439986569212, + "learning_rate": 1.919557717118344e-06, + "loss": 0.9308677911758423, + "step": 1495 + }, + { + "epoch": 0.34470046082949307, + "grad_norm": 0.6340340245140523, + "learning_rate": 1.9194079408983197e-06, + "loss": 0.8601467609405518, + "step": 1496 + }, + { + "epoch": 0.34493087557603686, + "grad_norm": 0.5645119744435318, + "learning_rate": 1.91925803122745e-06, + "loss": 0.8062653541564941, + "step": 1497 + }, + { + "epoch": 0.34516129032258064, + "grad_norm": 0.6267130901098985, + "learning_rate": 1.9191079881274943e-06, + "loss": 0.8910555839538574, + "step": 1498 + }, + { + "epoch": 0.3453917050691244, + "grad_norm": 0.6398235864437706, + "learning_rate": 1.9189578116202307e-06, + "loss": 0.8604668378829956, + "step": 1499 + }, + { + "epoch": 0.3456221198156682, + "grad_norm": 0.660935387898433, + "learning_rate": 1.918807501727457e-06, + "loss": 0.7255126237869263, + "step": 1500 + }, + { + "epoch": 0.345852534562212, + "grad_norm": 0.6873891579533423, + "learning_rate": 1.9186570584709912e-06, + "loss": 0.998108983039856, + "step": 1501 + }, + { + "epoch": 0.34608294930875577, + "grad_norm": 0.6220147185177797, + "learning_rate": 1.918506481872669e-06, + "loss": 0.7660422325134277, + "step": 1502 + }, + { + "epoch": 0.34631336405529956, + "grad_norm": 0.6579892645247903, + "learning_rate": 1.9183557719543472e-06, + "loss": 0.868739902973175, + "step": 1503 + }, + { + "epoch": 0.34654377880184334, + "grad_norm": 0.5789973673480234, + "learning_rate": 1.918204928737901e-06, + "loss": 0.6630350351333618, + "step": 1504 + }, + { + "epoch": 0.3467741935483871, + "grad_norm": 0.5444610824332694, + "learning_rate": 1.9180539522452247e-06, + "loss": 0.8651586771011353, + "step": 1505 + }, + { + "epoch": 0.34700460829493085, + "grad_norm": 0.5927111235913876, + "learning_rate": 1.9179028424982326e-06, + "loss": 0.8584417700767517, + "step": 1506 + }, + { + "epoch": 0.34723502304147463, + "grad_norm": 0.5575547611441275, + "learning_rate": 1.917751599518858e-06, + "loss": 0.7793893814086914, + "step": 1507 + }, + { + "epoch": 0.3474654377880184, + "grad_norm": 0.768634414143097, + "learning_rate": 1.9176002233290542e-06, + "loss": 0.8499815464019775, + "step": 1508 + }, + { + "epoch": 0.3476958525345622, + "grad_norm": 0.7795460044280101, + "learning_rate": 1.917448713950792e-06, + "loss": 0.7914199829101562, + "step": 1509 + }, + { + "epoch": 0.347926267281106, + "grad_norm": 0.8510793838671106, + "learning_rate": 1.9172970714060637e-06, + "loss": 0.942331850528717, + "step": 1510 + }, + { + "epoch": 0.34815668202764977, + "grad_norm": 0.621963787262809, + "learning_rate": 1.9171452957168803e-06, + "loss": 0.7780032157897949, + "step": 1511 + }, + { + "epoch": 0.34838709677419355, + "grad_norm": 0.6399045325995384, + "learning_rate": 1.916993386905271e-06, + "loss": 0.8544708490371704, + "step": 1512 + }, + { + "epoch": 0.34861751152073733, + "grad_norm": 0.6890752127070114, + "learning_rate": 1.9168413449932855e-06, + "loss": 0.798173725605011, + "step": 1513 + }, + { + "epoch": 0.3488479262672811, + "grad_norm": 0.7396810139453504, + "learning_rate": 1.9166891700029922e-06, + "loss": 0.9426852464675903, + "step": 1514 + }, + { + "epoch": 0.3490783410138249, + "grad_norm": 0.7455227520654529, + "learning_rate": 1.91653686195648e-06, + "loss": 0.922240138053894, + "step": 1515 + }, + { + "epoch": 0.3493087557603687, + "grad_norm": 0.630161091555718, + "learning_rate": 1.9163844208758556e-06, + "loss": 0.7997978925704956, + "step": 1516 + }, + { + "epoch": 0.34953917050691247, + "grad_norm": 0.7560374253096135, + "learning_rate": 1.9162318467832455e-06, + "loss": 1.0597525835037231, + "step": 1517 + }, + { + "epoch": 0.3497695852534562, + "grad_norm": 0.6669142658812499, + "learning_rate": 1.9160791397007957e-06, + "loss": 0.8211681842803955, + "step": 1518 + }, + { + "epoch": 0.35, + "grad_norm": 0.6134468456903489, + "learning_rate": 1.9159262996506716e-06, + "loss": 0.8078022003173828, + "step": 1519 + }, + { + "epoch": 0.35023041474654376, + "grad_norm": 0.8800559709758627, + "learning_rate": 1.915773326655057e-06, + "loss": 0.9449256658554077, + "step": 1520 + }, + { + "epoch": 0.35046082949308754, + "grad_norm": 0.6806561068219223, + "learning_rate": 1.915620220736157e-06, + "loss": 0.8744012117385864, + "step": 1521 + }, + { + "epoch": 0.35069124423963133, + "grad_norm": 0.501693303726274, + "learning_rate": 1.9154669819161946e-06, + "loss": 0.9503095746040344, + "step": 1522 + }, + { + "epoch": 0.3509216589861751, + "grad_norm": 0.7422922368497302, + "learning_rate": 1.9153136102174106e-06, + "loss": 1.055432915687561, + "step": 1523 + }, + { + "epoch": 0.3511520737327189, + "grad_norm": 0.7420134076461076, + "learning_rate": 1.9151601056620684e-06, + "loss": 0.8540226221084595, + "step": 1524 + }, + { + "epoch": 0.3513824884792627, + "grad_norm": 0.6432500784024293, + "learning_rate": 1.915006468272448e-06, + "loss": 0.8846266865730286, + "step": 1525 + }, + { + "epoch": 0.35161290322580646, + "grad_norm": 0.6065038491164693, + "learning_rate": 1.9148526980708507e-06, + "loss": 0.8941656947135925, + "step": 1526 + }, + { + "epoch": 0.35184331797235024, + "grad_norm": 0.657637251938276, + "learning_rate": 1.914698795079595e-06, + "loss": 0.868419885635376, + "step": 1527 + }, + { + "epoch": 0.35207373271889403, + "grad_norm": 0.6471997072963731, + "learning_rate": 1.91454475932102e-06, + "loss": 0.7375580072402954, + "step": 1528 + }, + { + "epoch": 0.3523041474654378, + "grad_norm": 0.5813494020686044, + "learning_rate": 1.9143905908174844e-06, + "loss": 0.9415492415428162, + "step": 1529 + }, + { + "epoch": 0.35253456221198154, + "grad_norm": 0.5845641741459107, + "learning_rate": 1.9142362895913646e-06, + "loss": 0.8395911455154419, + "step": 1530 + }, + { + "epoch": 0.3527649769585253, + "grad_norm": 0.6214793611789142, + "learning_rate": 1.914081855665057e-06, + "loss": 0.831234335899353, + "step": 1531 + }, + { + "epoch": 0.3529953917050691, + "grad_norm": 0.6337865377576076, + "learning_rate": 1.9139272890609794e-06, + "loss": 0.8975566029548645, + "step": 1532 + }, + { + "epoch": 0.3532258064516129, + "grad_norm": 0.629586080319263, + "learning_rate": 1.913772589801565e-06, + "loss": 0.8134264945983887, + "step": 1533 + }, + { + "epoch": 0.3534562211981567, + "grad_norm": 0.6728325426784268, + "learning_rate": 1.913617757909269e-06, + "loss": 0.9507275819778442, + "step": 1534 + }, + { + "epoch": 0.35368663594470046, + "grad_norm": 0.6431752162471284, + "learning_rate": 1.913462793406565e-06, + "loss": 0.8839038610458374, + "step": 1535 + }, + { + "epoch": 0.35391705069124424, + "grad_norm": 0.5543997844984022, + "learning_rate": 1.9133076963159453e-06, + "loss": 0.8708392381668091, + "step": 1536 + }, + { + "epoch": 0.354147465437788, + "grad_norm": 0.6062385114401656, + "learning_rate": 1.913152466659923e-06, + "loss": 0.7609391212463379, + "step": 1537 + }, + { + "epoch": 0.3543778801843318, + "grad_norm": 0.7180303128257083, + "learning_rate": 1.912997104461029e-06, + "loss": 0.9231283664703369, + "step": 1538 + }, + { + "epoch": 0.3546082949308756, + "grad_norm": 0.6890910539107805, + "learning_rate": 1.912841609741814e-06, + "loss": 1.0297726392745972, + "step": 1539 + }, + { + "epoch": 0.3548387096774194, + "grad_norm": 0.75971130189085, + "learning_rate": 1.9126859825248475e-06, + "loss": 0.8798987865447998, + "step": 1540 + }, + { + "epoch": 0.35506912442396316, + "grad_norm": 0.7030378763019209, + "learning_rate": 1.912530222832719e-06, + "loss": 0.9104069471359253, + "step": 1541 + }, + { + "epoch": 0.35529953917050694, + "grad_norm": 0.6534729730017157, + "learning_rate": 1.9123743306880368e-06, + "loss": 0.7618073225021362, + "step": 1542 + }, + { + "epoch": 0.35552995391705067, + "grad_norm": 0.7461748863693719, + "learning_rate": 1.912218306113428e-06, + "loss": 0.8397510051727295, + "step": 1543 + }, + { + "epoch": 0.35576036866359445, + "grad_norm": 0.7060377086024656, + "learning_rate": 1.91206214913154e-06, + "loss": 0.9884299039840698, + "step": 1544 + }, + { + "epoch": 0.35599078341013823, + "grad_norm": 0.9576081524625122, + "learning_rate": 1.9119058597650385e-06, + "loss": 0.9878349304199219, + "step": 1545 + }, + { + "epoch": 0.356221198156682, + "grad_norm": 0.6493274093007226, + "learning_rate": 1.9117494380366086e-06, + "loss": 0.8790488243103027, + "step": 1546 + }, + { + "epoch": 0.3564516129032258, + "grad_norm": 0.5310131973918355, + "learning_rate": 1.9115928839689546e-06, + "loss": 0.7390745878219604, + "step": 1547 + }, + { + "epoch": 0.3566820276497696, + "grad_norm": 0.6882029258971281, + "learning_rate": 1.9114361975848004e-06, + "loss": 0.7354288101196289, + "step": 1548 + }, + { + "epoch": 0.35691244239631337, + "grad_norm": 0.7667535594605746, + "learning_rate": 1.911279378906889e-06, + "loss": 0.9234673976898193, + "step": 1549 + }, + { + "epoch": 0.35714285714285715, + "grad_norm": 0.6115013610277281, + "learning_rate": 1.911122427957982e-06, + "loss": 0.8913710117340088, + "step": 1550 + }, + { + "epoch": 0.35737327188940093, + "grad_norm": 0.7050561523779678, + "learning_rate": 1.9109653447608605e-06, + "loss": 0.754358172416687, + "step": 1551 + }, + { + "epoch": 0.3576036866359447, + "grad_norm": 0.784312775933048, + "learning_rate": 1.910808129338325e-06, + "loss": 0.7361906170845032, + "step": 1552 + }, + { + "epoch": 0.3578341013824885, + "grad_norm": 0.7799572736490341, + "learning_rate": 1.9106507817131957e-06, + "loss": 0.8167279362678528, + "step": 1553 + }, + { + "epoch": 0.3580645161290323, + "grad_norm": 0.5335250967831215, + "learning_rate": 1.910493301908311e-06, + "loss": 0.7504739761352539, + "step": 1554 + }, + { + "epoch": 0.358294930875576, + "grad_norm": 0.7032319483863736, + "learning_rate": 1.9103356899465287e-06, + "loss": 0.8452355861663818, + "step": 1555 + }, + { + "epoch": 0.3585253456221198, + "grad_norm": 0.6126249946093243, + "learning_rate": 1.9101779458507263e-06, + "loss": 0.891547679901123, + "step": 1556 + }, + { + "epoch": 0.3587557603686636, + "grad_norm": 0.6935978783962933, + "learning_rate": 1.9100200696438e-06, + "loss": 0.8132680654525757, + "step": 1557 + }, + { + "epoch": 0.35898617511520736, + "grad_norm": 0.6519674133121284, + "learning_rate": 1.9098620613486646e-06, + "loss": 0.799482524394989, + "step": 1558 + }, + { + "epoch": 0.35921658986175115, + "grad_norm": 0.5904521460015955, + "learning_rate": 1.909703920988256e-06, + "loss": 0.8490267992019653, + "step": 1559 + }, + { + "epoch": 0.35944700460829493, + "grad_norm": 0.6819976276562522, + "learning_rate": 1.9095456485855277e-06, + "loss": 0.8608428239822388, + "step": 1560 + }, + { + "epoch": 0.3596774193548387, + "grad_norm": 0.710056379748393, + "learning_rate": 1.9093872441634526e-06, + "loss": 0.8460499048233032, + "step": 1561 + }, + { + "epoch": 0.3599078341013825, + "grad_norm": 0.7727130217690178, + "learning_rate": 1.9092287077450226e-06, + "loss": 0.9268433451652527, + "step": 1562 + }, + { + "epoch": 0.3601382488479263, + "grad_norm": 0.612809776724531, + "learning_rate": 1.90907003935325e-06, + "loss": 0.7354154586791992, + "step": 1563 + }, + { + "epoch": 0.36036866359447006, + "grad_norm": 0.6941943523357101, + "learning_rate": 1.9089112390111637e-06, + "loss": 0.87982177734375, + "step": 1564 + }, + { + "epoch": 0.36059907834101385, + "grad_norm": 0.7092001355075633, + "learning_rate": 1.9087523067418148e-06, + "loss": 0.994953453540802, + "step": 1565 + }, + { + "epoch": 0.36082949308755763, + "grad_norm": 0.7240785511234525, + "learning_rate": 1.9085932425682715e-06, + "loss": 0.8623256087303162, + "step": 1566 + }, + { + "epoch": 0.36105990783410136, + "grad_norm": 0.7577571727617612, + "learning_rate": 1.908434046513622e-06, + "loss": 0.8752846717834473, + "step": 1567 + }, + { + "epoch": 0.36129032258064514, + "grad_norm": 0.7538020694732109, + "learning_rate": 1.908274718600973e-06, + "loss": 0.9002033472061157, + "step": 1568 + }, + { + "epoch": 0.3615207373271889, + "grad_norm": 0.6751938160957709, + "learning_rate": 1.908115258853451e-06, + "loss": 0.7290444374084473, + "step": 1569 + }, + { + "epoch": 0.3617511520737327, + "grad_norm": 0.5739449847646289, + "learning_rate": 1.9079556672942016e-06, + "loss": 0.6833889484405518, + "step": 1570 + }, + { + "epoch": 0.3619815668202765, + "grad_norm": 0.7271514059808825, + "learning_rate": 1.907795943946389e-06, + "loss": 1.0033842325210571, + "step": 1571 + }, + { + "epoch": 0.3622119815668203, + "grad_norm": 0.7261786878454322, + "learning_rate": 1.907636088833197e-06, + "loss": 0.9590950012207031, + "step": 1572 + }, + { + "epoch": 0.36244239631336406, + "grad_norm": 0.6796147019608265, + "learning_rate": 1.907476101977828e-06, + "loss": 0.8812122344970703, + "step": 1573 + }, + { + "epoch": 0.36267281105990784, + "grad_norm": 0.5509770826635522, + "learning_rate": 1.9073159834035045e-06, + "loss": 0.7549433708190918, + "step": 1574 + }, + { + "epoch": 0.3629032258064516, + "grad_norm": 0.8344983468044503, + "learning_rate": 1.9071557331334667e-06, + "loss": 0.9235562086105347, + "step": 1575 + }, + { + "epoch": 0.3631336405529954, + "grad_norm": 0.6317903590715543, + "learning_rate": 1.9069953511909755e-06, + "loss": 0.8468542098999023, + "step": 1576 + }, + { + "epoch": 0.3633640552995392, + "grad_norm": 0.5574642699953357, + "learning_rate": 1.9068348375993096e-06, + "loss": 0.8804000616073608, + "step": 1577 + }, + { + "epoch": 0.363594470046083, + "grad_norm": 0.5912501411899118, + "learning_rate": 1.9066741923817676e-06, + "loss": 0.762598991394043, + "step": 1578 + }, + { + "epoch": 0.3638248847926267, + "grad_norm": 0.7706966706442087, + "learning_rate": 1.9065134155616666e-06, + "loss": 0.8791940212249756, + "step": 1579 + }, + { + "epoch": 0.3640552995391705, + "grad_norm": 0.7168527524200441, + "learning_rate": 1.9063525071623439e-06, + "loss": 0.7041842937469482, + "step": 1580 + }, + { + "epoch": 0.36428571428571427, + "grad_norm": 0.6160916310238944, + "learning_rate": 1.9061914672071543e-06, + "loss": 0.9526468515396118, + "step": 1581 + }, + { + "epoch": 0.36451612903225805, + "grad_norm": 0.7118890640067297, + "learning_rate": 1.906030295719473e-06, + "loss": 0.9388316869735718, + "step": 1582 + }, + { + "epoch": 0.36474654377880183, + "grad_norm": 0.6899284739234433, + "learning_rate": 1.9058689927226936e-06, + "loss": 0.7295777797698975, + "step": 1583 + }, + { + "epoch": 0.3649769585253456, + "grad_norm": 0.773766722090894, + "learning_rate": 1.905707558240229e-06, + "loss": 0.7540932297706604, + "step": 1584 + }, + { + "epoch": 0.3652073732718894, + "grad_norm": 0.7012558071518832, + "learning_rate": 1.9055459922955118e-06, + "loss": 0.9457792639732361, + "step": 1585 + }, + { + "epoch": 0.3654377880184332, + "grad_norm": 0.8248538436303866, + "learning_rate": 1.9053842949119923e-06, + "loss": 0.9121883511543274, + "step": 1586 + }, + { + "epoch": 0.36566820276497697, + "grad_norm": 0.7283384308967912, + "learning_rate": 1.905222466113141e-06, + "loss": 0.8140746355056763, + "step": 1587 + }, + { + "epoch": 0.36589861751152075, + "grad_norm": 0.6419705545105435, + "learning_rate": 1.905060505922447e-06, + "loss": 0.7403484582901001, + "step": 1588 + }, + { + "epoch": 0.36612903225806454, + "grad_norm": 0.581047347336086, + "learning_rate": 1.9048984143634188e-06, + "loss": 0.9040734171867371, + "step": 1589 + }, + { + "epoch": 0.3663594470046083, + "grad_norm": 0.8763582049227886, + "learning_rate": 1.9047361914595834e-06, + "loss": 0.9060958623886108, + "step": 1590 + }, + { + "epoch": 0.36658986175115205, + "grad_norm": 0.563240407907546, + "learning_rate": 1.904573837234488e-06, + "loss": 0.6925936937332153, + "step": 1591 + }, + { + "epoch": 0.36682027649769583, + "grad_norm": 0.6465995527416484, + "learning_rate": 1.9044113517116973e-06, + "loss": 0.8120197057723999, + "step": 1592 + }, + { + "epoch": 0.3670506912442396, + "grad_norm": 0.6544256373051048, + "learning_rate": 1.9042487349147965e-06, + "loss": 0.796414852142334, + "step": 1593 + }, + { + "epoch": 0.3672811059907834, + "grad_norm": 0.5916998574283423, + "learning_rate": 1.9040859868673885e-06, + "loss": 0.8390822410583496, + "step": 1594 + }, + { + "epoch": 0.3675115207373272, + "grad_norm": 0.6567403008386238, + "learning_rate": 1.9039231075930967e-06, + "loss": 0.990093469619751, + "step": 1595 + }, + { + "epoch": 0.36774193548387096, + "grad_norm": 0.733917290012865, + "learning_rate": 1.9037600971155623e-06, + "loss": 0.8548597097396851, + "step": 1596 + }, + { + "epoch": 0.36797235023041475, + "grad_norm": 0.5429475903618856, + "learning_rate": 1.9035969554584464e-06, + "loss": 0.687299370765686, + "step": 1597 + }, + { + "epoch": 0.36820276497695853, + "grad_norm": 0.9276548262086025, + "learning_rate": 1.9034336826454282e-06, + "loss": 0.7857942581176758, + "step": 1598 + }, + { + "epoch": 0.3684331797235023, + "grad_norm": 0.7345227244712206, + "learning_rate": 1.9032702787002072e-06, + "loss": 0.8836538195610046, + "step": 1599 + }, + { + "epoch": 0.3686635944700461, + "grad_norm": 0.723858907192251, + "learning_rate": 1.9031067436465011e-06, + "loss": 0.8132715225219727, + "step": 1600 + }, + { + "epoch": 0.3688940092165899, + "grad_norm": 0.6649285274594987, + "learning_rate": 1.9029430775080467e-06, + "loss": 0.7632347345352173, + "step": 1601 + }, + { + "epoch": 0.36912442396313366, + "grad_norm": 0.6319858893374919, + "learning_rate": 1.9027792803086e-06, + "loss": 0.8616297841072083, + "step": 1602 + }, + { + "epoch": 0.36935483870967745, + "grad_norm": 0.6067565637769744, + "learning_rate": 1.9026153520719358e-06, + "loss": 0.8418172597885132, + "step": 1603 + }, + { + "epoch": 0.3695852534562212, + "grad_norm": 0.7094320350542224, + "learning_rate": 1.902451292821848e-06, + "loss": 0.7253717184066772, + "step": 1604 + }, + { + "epoch": 0.36981566820276496, + "grad_norm": 0.8059000016280097, + "learning_rate": 1.90228710258215e-06, + "loss": 0.9746035933494568, + "step": 1605 + }, + { + "epoch": 0.37004608294930874, + "grad_norm": 0.5259402340057983, + "learning_rate": 1.9021227813766733e-06, + "loss": 0.7722853422164917, + "step": 1606 + }, + { + "epoch": 0.3702764976958525, + "grad_norm": 0.6925264238716391, + "learning_rate": 1.9019583292292693e-06, + "loss": 0.8278614282608032, + "step": 1607 + }, + { + "epoch": 0.3705069124423963, + "grad_norm": 0.6439238935194896, + "learning_rate": 1.9017937461638078e-06, + "loss": 0.7433085441589355, + "step": 1608 + }, + { + "epoch": 0.3707373271889401, + "grad_norm": 0.5505689424398915, + "learning_rate": 1.901629032204178e-06, + "loss": 0.9194153547286987, + "step": 1609 + }, + { + "epoch": 0.3709677419354839, + "grad_norm": 0.5866951472740422, + "learning_rate": 1.9014641873742877e-06, + "loss": 0.8502616882324219, + "step": 1610 + }, + { + "epoch": 0.37119815668202766, + "grad_norm": 0.6242266615517361, + "learning_rate": 1.9012992116980637e-06, + "loss": 0.8494570255279541, + "step": 1611 + }, + { + "epoch": 0.37142857142857144, + "grad_norm": 0.7369836132356214, + "learning_rate": 1.9011341051994526e-06, + "loss": 0.8567800521850586, + "step": 1612 + }, + { + "epoch": 0.3716589861751152, + "grad_norm": 0.6246604791910833, + "learning_rate": 1.9009688679024189e-06, + "loss": 0.7739682197570801, + "step": 1613 + }, + { + "epoch": 0.371889400921659, + "grad_norm": 0.754158311495332, + "learning_rate": 1.900803499830947e-06, + "loss": 0.8548814058303833, + "step": 1614 + }, + { + "epoch": 0.3721198156682028, + "grad_norm": 0.5813822362984273, + "learning_rate": 1.9006380010090395e-06, + "loss": 0.7444359064102173, + "step": 1615 + }, + { + "epoch": 0.3723502304147465, + "grad_norm": 1.02732235167255, + "learning_rate": 1.9004723714607183e-06, + "loss": 1.0483827590942383, + "step": 1616 + }, + { + "epoch": 0.3725806451612903, + "grad_norm": 0.7020606936102383, + "learning_rate": 1.9003066112100248e-06, + "loss": 0.7734435200691223, + "step": 1617 + }, + { + "epoch": 0.3728110599078341, + "grad_norm": 0.7388837596699729, + "learning_rate": 1.9001407202810181e-06, + "loss": 0.856806755065918, + "step": 1618 + }, + { + "epoch": 0.37304147465437787, + "grad_norm": 0.6630252498689021, + "learning_rate": 1.8999746986977776e-06, + "loss": 0.8708832263946533, + "step": 1619 + }, + { + "epoch": 0.37327188940092165, + "grad_norm": 0.7833548721469644, + "learning_rate": 1.899808546484401e-06, + "loss": 0.9295653104782104, + "step": 1620 + }, + { + "epoch": 0.37350230414746544, + "grad_norm": 0.8120612065986471, + "learning_rate": 1.8996422636650054e-06, + "loss": 0.8799598217010498, + "step": 1621 + }, + { + "epoch": 0.3737327188940092, + "grad_norm": 0.6113644757026901, + "learning_rate": 1.8994758502637259e-06, + "loss": 0.8014140725135803, + "step": 1622 + }, + { + "epoch": 0.373963133640553, + "grad_norm": 0.7305462035644114, + "learning_rate": 1.8993093063047174e-06, + "loss": 0.8252615928649902, + "step": 1623 + }, + { + "epoch": 0.3741935483870968, + "grad_norm": 0.5571708900709818, + "learning_rate": 1.899142631812154e-06, + "loss": 0.8617361783981323, + "step": 1624 + }, + { + "epoch": 0.37442396313364057, + "grad_norm": 0.7088005059034134, + "learning_rate": 1.8989758268102274e-06, + "loss": 0.9316745400428772, + "step": 1625 + }, + { + "epoch": 0.37465437788018435, + "grad_norm": 0.5449801119846465, + "learning_rate": 1.89880889132315e-06, + "loss": 0.8195457458496094, + "step": 1626 + }, + { + "epoch": 0.37488479262672814, + "grad_norm": 0.7143201633211917, + "learning_rate": 1.8986418253751516e-06, + "loss": 0.7828787565231323, + "step": 1627 + }, + { + "epoch": 0.37511520737327186, + "grad_norm": 0.6506165386805676, + "learning_rate": 1.898474628990482e-06, + "loss": 0.8130955696105957, + "step": 1628 + }, + { + "epoch": 0.37534562211981565, + "grad_norm": 0.7388682274593752, + "learning_rate": 1.8983073021934097e-06, + "loss": 0.9925695657730103, + "step": 1629 + }, + { + "epoch": 0.37557603686635943, + "grad_norm": 0.7851734301973293, + "learning_rate": 1.8981398450082216e-06, + "loss": 0.8547999858856201, + "step": 1630 + }, + { + "epoch": 0.3758064516129032, + "grad_norm": 0.7016894400602667, + "learning_rate": 1.897972257459224e-06, + "loss": 0.8922954797744751, + "step": 1631 + }, + { + "epoch": 0.376036866359447, + "grad_norm": 0.641235710173759, + "learning_rate": 1.8978045395707415e-06, + "loss": 0.8553646802902222, + "step": 1632 + }, + { + "epoch": 0.3762672811059908, + "grad_norm": 0.6780369843564141, + "learning_rate": 1.897636691367119e-06, + "loss": 0.7854139804840088, + "step": 1633 + }, + { + "epoch": 0.37649769585253456, + "grad_norm": 0.8291834208164379, + "learning_rate": 1.897468712872719e-06, + "loss": 0.8968626260757446, + "step": 1634 + }, + { + "epoch": 0.37672811059907835, + "grad_norm": 0.8135056284613995, + "learning_rate": 1.8973006041119234e-06, + "loss": 0.8898152112960815, + "step": 1635 + }, + { + "epoch": 0.37695852534562213, + "grad_norm": 0.7215595529410248, + "learning_rate": 1.8971323651091332e-06, + "loss": 0.8499374389648438, + "step": 1636 + }, + { + "epoch": 0.3771889400921659, + "grad_norm": 0.5955881573233954, + "learning_rate": 1.8969639958887677e-06, + "loss": 0.7803430557250977, + "step": 1637 + }, + { + "epoch": 0.3774193548387097, + "grad_norm": 0.672225539346555, + "learning_rate": 1.8967954964752657e-06, + "loss": 0.7669799327850342, + "step": 1638 + }, + { + "epoch": 0.3776497695852535, + "grad_norm": 0.7164416850564317, + "learning_rate": 1.8966268668930845e-06, + "loss": 0.9085204601287842, + "step": 1639 + }, + { + "epoch": 0.3778801843317972, + "grad_norm": 0.8492247946008473, + "learning_rate": 1.8964581071667005e-06, + "loss": 0.7793002724647522, + "step": 1640 + }, + { + "epoch": 0.378110599078341, + "grad_norm": 0.6359200183287212, + "learning_rate": 1.896289217320609e-06, + "loss": 0.8649430274963379, + "step": 1641 + }, + { + "epoch": 0.3783410138248848, + "grad_norm": 0.6424804906800053, + "learning_rate": 1.8961201973793243e-06, + "loss": 0.856898844242096, + "step": 1642 + }, + { + "epoch": 0.37857142857142856, + "grad_norm": 0.7702312360726356, + "learning_rate": 1.895951047367379e-06, + "loss": 0.8221957087516785, + "step": 1643 + }, + { + "epoch": 0.37880184331797234, + "grad_norm": 0.7163935487823062, + "learning_rate": 1.8957817673093256e-06, + "loss": 0.8158079385757446, + "step": 1644 + }, + { + "epoch": 0.3790322580645161, + "grad_norm": 0.8008902981825888, + "learning_rate": 1.8956123572297343e-06, + "loss": 0.7803312540054321, + "step": 1645 + }, + { + "epoch": 0.3792626728110599, + "grad_norm": 0.7902834195938876, + "learning_rate": 1.8954428171531949e-06, + "loss": 1.035685420036316, + "step": 1646 + }, + { + "epoch": 0.3794930875576037, + "grad_norm": 0.6044824314396153, + "learning_rate": 1.8952731471043161e-06, + "loss": 0.6871123313903809, + "step": 1647 + }, + { + "epoch": 0.3797235023041475, + "grad_norm": 0.6400629937897654, + "learning_rate": 1.8951033471077253e-06, + "loss": 0.9651780128479004, + "step": 1648 + }, + { + "epoch": 0.37995391705069126, + "grad_norm": 0.7485926311468839, + "learning_rate": 1.8949334171880687e-06, + "loss": 1.018349528312683, + "step": 1649 + }, + { + "epoch": 0.38018433179723504, + "grad_norm": 0.6571349103626993, + "learning_rate": 1.894763357370011e-06, + "loss": 0.6839278936386108, + "step": 1650 + }, + { + "epoch": 0.3804147465437788, + "grad_norm": 0.6757724586058976, + "learning_rate": 1.894593167678237e-06, + "loss": 0.8442174196243286, + "step": 1651 + }, + { + "epoch": 0.38064516129032255, + "grad_norm": 0.6368918088972565, + "learning_rate": 1.8944228481374484e-06, + "loss": 0.8224585056304932, + "step": 1652 + }, + { + "epoch": 0.38087557603686634, + "grad_norm": 0.6970802562618803, + "learning_rate": 1.8942523987723678e-06, + "loss": 0.8570500612258911, + "step": 1653 + }, + { + "epoch": 0.3811059907834101, + "grad_norm": 0.731718201815575, + "learning_rate": 1.8940818196077354e-06, + "loss": 0.7696554660797119, + "step": 1654 + }, + { + "epoch": 0.3813364055299539, + "grad_norm": 0.7456139352122005, + "learning_rate": 1.8939111106683103e-06, + "loss": 0.822563886642456, + "step": 1655 + }, + { + "epoch": 0.3815668202764977, + "grad_norm": 0.46565320695076334, + "learning_rate": 1.8937402719788711e-06, + "loss": 0.6537219882011414, + "step": 1656 + }, + { + "epoch": 0.38179723502304147, + "grad_norm": 0.8414098679023442, + "learning_rate": 1.8935693035642145e-06, + "loss": 0.9081932306289673, + "step": 1657 + }, + { + "epoch": 0.38202764976958525, + "grad_norm": 0.5018818977531995, + "learning_rate": 1.8933982054491563e-06, + "loss": 0.6839661598205566, + "step": 1658 + }, + { + "epoch": 0.38225806451612904, + "grad_norm": 0.6964355972832653, + "learning_rate": 1.8932269776585313e-06, + "loss": 0.9187283515930176, + "step": 1659 + }, + { + "epoch": 0.3824884792626728, + "grad_norm": 0.8100260748701062, + "learning_rate": 1.893055620217193e-06, + "loss": 0.9567047357559204, + "step": 1660 + }, + { + "epoch": 0.3827188940092166, + "grad_norm": 0.7345697660292878, + "learning_rate": 1.8928841331500136e-06, + "loss": 0.785561203956604, + "step": 1661 + }, + { + "epoch": 0.3829493087557604, + "grad_norm": 0.882033286363023, + "learning_rate": 1.8927125164818842e-06, + "loss": 0.8986088037490845, + "step": 1662 + }, + { + "epoch": 0.38317972350230417, + "grad_norm": 0.7191553093714457, + "learning_rate": 1.892540770237715e-06, + "loss": 1.0027087926864624, + "step": 1663 + }, + { + "epoch": 0.38341013824884795, + "grad_norm": 0.6970721775230337, + "learning_rate": 1.8923688944424346e-06, + "loss": 0.8502041697502136, + "step": 1664 + }, + { + "epoch": 0.3836405529953917, + "grad_norm": 0.6684142159321271, + "learning_rate": 1.8921968891209907e-06, + "loss": 0.8526991605758667, + "step": 1665 + }, + { + "epoch": 0.38387096774193546, + "grad_norm": 0.7082372977886758, + "learning_rate": 1.8920247542983492e-06, + "loss": 0.8084676265716553, + "step": 1666 + }, + { + "epoch": 0.38410138248847925, + "grad_norm": 0.6206558140284871, + "learning_rate": 1.8918524899994957e-06, + "loss": 0.8922938704490662, + "step": 1667 + }, + { + "epoch": 0.38433179723502303, + "grad_norm": 0.768771022868596, + "learning_rate": 1.8916800962494337e-06, + "loss": 0.7965600490570068, + "step": 1668 + }, + { + "epoch": 0.3845622119815668, + "grad_norm": 0.6752105100256773, + "learning_rate": 1.8915075730731865e-06, + "loss": 0.9505549073219299, + "step": 1669 + }, + { + "epoch": 0.3847926267281106, + "grad_norm": 0.6897214722687708, + "learning_rate": 1.8913349204957947e-06, + "loss": 0.9459924697875977, + "step": 1670 + }, + { + "epoch": 0.3850230414746544, + "grad_norm": 0.6215985429421047, + "learning_rate": 1.8911621385423195e-06, + "loss": 0.8433674573898315, + "step": 1671 + }, + { + "epoch": 0.38525345622119817, + "grad_norm": 0.7790027974124772, + "learning_rate": 1.8909892272378398e-06, + "loss": 0.8945955038070679, + "step": 1672 + }, + { + "epoch": 0.38548387096774195, + "grad_norm": 0.6828005324330048, + "learning_rate": 1.890816186607453e-06, + "loss": 0.8580358624458313, + "step": 1673 + }, + { + "epoch": 0.38571428571428573, + "grad_norm": 0.6249387555876122, + "learning_rate": 1.8906430166762761e-06, + "loss": 0.7708698511123657, + "step": 1674 + }, + { + "epoch": 0.3859447004608295, + "grad_norm": 0.7418139824839276, + "learning_rate": 1.8904697174694446e-06, + "loss": 0.8647153377532959, + "step": 1675 + }, + { + "epoch": 0.3861751152073733, + "grad_norm": 0.7428074816121766, + "learning_rate": 1.890296289012112e-06, + "loss": 0.9380506277084351, + "step": 1676 + }, + { + "epoch": 0.386405529953917, + "grad_norm": 0.6218965089791644, + "learning_rate": 1.8901227313294519e-06, + "loss": 0.8814103603363037, + "step": 1677 + }, + { + "epoch": 0.3866359447004608, + "grad_norm": 0.7768206335574417, + "learning_rate": 1.8899490444466556e-06, + "loss": 0.9348419904708862, + "step": 1678 + }, + { + "epoch": 0.3868663594470046, + "grad_norm": 0.5956095891599564, + "learning_rate": 1.8897752283889338e-06, + "loss": 0.7502046823501587, + "step": 1679 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 0.567040551050712, + "learning_rate": 1.8896012831815155e-06, + "loss": 0.8499769568443298, + "step": 1680 + }, + { + "epoch": 0.38732718894009216, + "grad_norm": 0.6506272613615357, + "learning_rate": 1.8894272088496487e-06, + "loss": 0.8253993391990662, + "step": 1681 + }, + { + "epoch": 0.38755760368663594, + "grad_norm": 0.7707626449058277, + "learning_rate": 1.8892530054185998e-06, + "loss": 0.8494073152542114, + "step": 1682 + }, + { + "epoch": 0.3877880184331797, + "grad_norm": 0.7608738547672518, + "learning_rate": 1.8890786729136546e-06, + "loss": 0.8836106061935425, + "step": 1683 + }, + { + "epoch": 0.3880184331797235, + "grad_norm": 0.636256009552465, + "learning_rate": 1.8889042113601166e-06, + "loss": 0.8949145078659058, + "step": 1684 + }, + { + "epoch": 0.3882488479262673, + "grad_norm": 0.5966436023392323, + "learning_rate": 1.8887296207833095e-06, + "loss": 0.6210965514183044, + "step": 1685 + }, + { + "epoch": 0.3884792626728111, + "grad_norm": 0.8527942588919344, + "learning_rate": 1.8885549012085744e-06, + "loss": 0.9216527938842773, + "step": 1686 + }, + { + "epoch": 0.38870967741935486, + "grad_norm": 0.6878600463475216, + "learning_rate": 1.8883800526612715e-06, + "loss": 0.9266358613967896, + "step": 1687 + }, + { + "epoch": 0.38894009216589864, + "grad_norm": 0.7261249184769291, + "learning_rate": 1.88820507516678e-06, + "loss": 0.8550606966018677, + "step": 1688 + }, + { + "epoch": 0.38917050691244237, + "grad_norm": 0.702582367534852, + "learning_rate": 1.888029968750498e-06, + "loss": 0.8632181882858276, + "step": 1689 + }, + { + "epoch": 0.38940092165898615, + "grad_norm": 0.8055419508573982, + "learning_rate": 1.8878547334378415e-06, + "loss": 0.8795493841171265, + "step": 1690 + }, + { + "epoch": 0.38963133640552994, + "grad_norm": 0.8491490559655837, + "learning_rate": 1.8876793692542456e-06, + "loss": 0.9750456809997559, + "step": 1691 + }, + { + "epoch": 0.3898617511520737, + "grad_norm": 0.7818793926101317, + "learning_rate": 1.8875038762251645e-06, + "loss": 0.9270161390304565, + "step": 1692 + }, + { + "epoch": 0.3900921658986175, + "grad_norm": 0.7260894881906815, + "learning_rate": 1.8873282543760705e-06, + "loss": 0.8154089450836182, + "step": 1693 + }, + { + "epoch": 0.3903225806451613, + "grad_norm": 0.692223503364103, + "learning_rate": 1.887152503732455e-06, + "loss": 0.9245043992996216, + "step": 1694 + }, + { + "epoch": 0.39055299539170507, + "grad_norm": 0.7622355519095229, + "learning_rate": 1.8869766243198284e-06, + "loss": 0.9218056201934814, + "step": 1695 + }, + { + "epoch": 0.39078341013824885, + "grad_norm": 0.5749624768358436, + "learning_rate": 1.8868006161637192e-06, + "loss": 0.7753894329071045, + "step": 1696 + }, + { + "epoch": 0.39101382488479264, + "grad_norm": 0.7181901167791495, + "learning_rate": 1.8866244792896739e-06, + "loss": 0.8455277681350708, + "step": 1697 + }, + { + "epoch": 0.3912442396313364, + "grad_norm": 0.7361657621974459, + "learning_rate": 1.8864482137232596e-06, + "loss": 0.8301571607589722, + "step": 1698 + }, + { + "epoch": 0.3914746543778802, + "grad_norm": 0.5504243602930398, + "learning_rate": 1.8862718194900602e-06, + "loss": 0.9768285155296326, + "step": 1699 + }, + { + "epoch": 0.391705069124424, + "grad_norm": 0.7416616964447972, + "learning_rate": 1.8860952966156798e-06, + "loss": 0.9659395217895508, + "step": 1700 + }, + { + "epoch": 0.3919354838709677, + "grad_norm": 0.731283063502841, + "learning_rate": 1.8859186451257401e-06, + "loss": 0.9975444078445435, + "step": 1701 + }, + { + "epoch": 0.3921658986175115, + "grad_norm": 0.712824030540976, + "learning_rate": 1.8857418650458816e-06, + "loss": 0.9248796701431274, + "step": 1702 + }, + { + "epoch": 0.3923963133640553, + "grad_norm": 0.6864309886370629, + "learning_rate": 1.8855649564017642e-06, + "loss": 0.8792428970336914, + "step": 1703 + }, + { + "epoch": 0.39262672811059907, + "grad_norm": 0.7264626081176593, + "learning_rate": 1.8853879192190657e-06, + "loss": 0.8387417197227478, + "step": 1704 + }, + { + "epoch": 0.39285714285714285, + "grad_norm": 0.707677593822268, + "learning_rate": 1.8852107535234828e-06, + "loss": 0.7020218372344971, + "step": 1705 + }, + { + "epoch": 0.39308755760368663, + "grad_norm": 0.673092322659609, + "learning_rate": 1.885033459340731e-06, + "loss": 0.7388321161270142, + "step": 1706 + }, + { + "epoch": 0.3933179723502304, + "grad_norm": 0.7503922468030345, + "learning_rate": 1.8848560366965441e-06, + "loss": 0.7536240220069885, + "step": 1707 + }, + { + "epoch": 0.3935483870967742, + "grad_norm": 0.7237343332600692, + "learning_rate": 1.8846784856166746e-06, + "loss": 0.747667670249939, + "step": 1708 + }, + { + "epoch": 0.393778801843318, + "grad_norm": 0.7263541821971573, + "learning_rate": 1.8845008061268945e-06, + "loss": 0.8068975210189819, + "step": 1709 + }, + { + "epoch": 0.39400921658986177, + "grad_norm": 0.7581453840562968, + "learning_rate": 1.8843229982529932e-06, + "loss": 0.7613410949707031, + "step": 1710 + }, + { + "epoch": 0.39423963133640555, + "grad_norm": 0.6546080156681554, + "learning_rate": 1.8841450620207793e-06, + "loss": 0.8579158782958984, + "step": 1711 + }, + { + "epoch": 0.39447004608294933, + "grad_norm": 0.6400652758844664, + "learning_rate": 1.88396699745608e-06, + "loss": 0.8754673004150391, + "step": 1712 + }, + { + "epoch": 0.39470046082949306, + "grad_norm": 0.7227539443635326, + "learning_rate": 1.8837888045847415e-06, + "loss": 0.7988177537918091, + "step": 1713 + }, + { + "epoch": 0.39493087557603684, + "grad_norm": 0.7533730909693769, + "learning_rate": 1.8836104834326279e-06, + "loss": 0.8658367395401001, + "step": 1714 + }, + { + "epoch": 0.3951612903225806, + "grad_norm": 0.7819630929666835, + "learning_rate": 1.8834320340256223e-06, + "loss": 0.8777489066123962, + "step": 1715 + }, + { + "epoch": 0.3953917050691244, + "grad_norm": 0.6763778401068745, + "learning_rate": 1.8832534563896264e-06, + "loss": 0.9785901308059692, + "step": 1716 + }, + { + "epoch": 0.3956221198156682, + "grad_norm": 0.7796554840537433, + "learning_rate": 1.883074750550561e-06, + "loss": 0.847503125667572, + "step": 1717 + }, + { + "epoch": 0.395852534562212, + "grad_norm": 0.7786503806499795, + "learning_rate": 1.8828959165343643e-06, + "loss": 1.0159538984298706, + "step": 1718 + }, + { + "epoch": 0.39608294930875576, + "grad_norm": 0.8472423063084373, + "learning_rate": 1.882716954366994e-06, + "loss": 0.9064888954162598, + "step": 1719 + }, + { + "epoch": 0.39631336405529954, + "grad_norm": 0.7664117713246195, + "learning_rate": 1.8825378640744264e-06, + "loss": 0.956849217414856, + "step": 1720 + }, + { + "epoch": 0.3965437788018433, + "grad_norm": 0.758389558529891, + "learning_rate": 1.882358645682656e-06, + "loss": 0.8983441591262817, + "step": 1721 + }, + { + "epoch": 0.3967741935483871, + "grad_norm": 0.5702990900386659, + "learning_rate": 1.8821792992176967e-06, + "loss": 0.7698956727981567, + "step": 1722 + }, + { + "epoch": 0.3970046082949309, + "grad_norm": 0.8118873070872795, + "learning_rate": 1.8819998247055797e-06, + "loss": 0.9376351833343506, + "step": 1723 + }, + { + "epoch": 0.3972350230414747, + "grad_norm": 0.8486728692509508, + "learning_rate": 1.881820222172356e-06, + "loss": 0.8776079416275024, + "step": 1724 + }, + { + "epoch": 0.39746543778801846, + "grad_norm": 0.9552617438975642, + "learning_rate": 1.8816404916440942e-06, + "loss": 0.9776726961135864, + "step": 1725 + }, + { + "epoch": 0.3976958525345622, + "grad_norm": 0.5841959382882552, + "learning_rate": 1.8814606331468822e-06, + "loss": 0.7699686288833618, + "step": 1726 + }, + { + "epoch": 0.39792626728110597, + "grad_norm": 0.7581748259398383, + "learning_rate": 1.8812806467068265e-06, + "loss": 0.8256866931915283, + "step": 1727 + }, + { + "epoch": 0.39815668202764976, + "grad_norm": 0.6320724280659841, + "learning_rate": 1.881100532350051e-06, + "loss": 0.8493847846984863, + "step": 1728 + }, + { + "epoch": 0.39838709677419354, + "grad_norm": 0.6592895509903398, + "learning_rate": 1.8809202901027002e-06, + "loss": 0.8138688802719116, + "step": 1729 + }, + { + "epoch": 0.3986175115207373, + "grad_norm": 0.7569638843586648, + "learning_rate": 1.880739919990935e-06, + "loss": 0.8637882471084595, + "step": 1730 + }, + { + "epoch": 0.3988479262672811, + "grad_norm": 0.5847233582227849, + "learning_rate": 1.880559422040937e-06, + "loss": 0.8988152742385864, + "step": 1731 + }, + { + "epoch": 0.3990783410138249, + "grad_norm": 0.4724369020135308, + "learning_rate": 1.880378796278904e-06, + "loss": 0.8247279524803162, + "step": 1732 + }, + { + "epoch": 0.39930875576036867, + "grad_norm": 0.8071560192562027, + "learning_rate": 1.8801980427310546e-06, + "loss": 0.9699070453643799, + "step": 1733 + }, + { + "epoch": 0.39953917050691246, + "grad_norm": 0.8108307817175047, + "learning_rate": 1.8800171614236241e-06, + "loss": 0.9516465663909912, + "step": 1734 + }, + { + "epoch": 0.39976958525345624, + "grad_norm": 0.655632769560408, + "learning_rate": 1.879836152382868e-06, + "loss": 0.9553602933883667, + "step": 1735 + }, + { + "epoch": 0.4, + "grad_norm": 0.666214042250043, + "learning_rate": 1.879655015635059e-06, + "loss": 0.7805094718933105, + "step": 1736 + }, + { + "epoch": 0.4002304147465438, + "grad_norm": 0.730264537734651, + "learning_rate": 1.8794737512064888e-06, + "loss": 0.9509962797164917, + "step": 1737 + }, + { + "epoch": 0.40046082949308753, + "grad_norm": 0.6755335543884481, + "learning_rate": 1.8792923591234683e-06, + "loss": 0.8663454055786133, + "step": 1738 + }, + { + "epoch": 0.4006912442396313, + "grad_norm": 0.7325230471707477, + "learning_rate": 1.8791108394123257e-06, + "loss": 0.8773336410522461, + "step": 1739 + }, + { + "epoch": 0.4009216589861751, + "grad_norm": 0.6493515009165077, + "learning_rate": 1.8789291920994086e-06, + "loss": 0.7201284766197205, + "step": 1740 + }, + { + "epoch": 0.4011520737327189, + "grad_norm": 0.6665806307840867, + "learning_rate": 1.8787474172110826e-06, + "loss": 0.799161434173584, + "step": 1741 + }, + { + "epoch": 0.40138248847926267, + "grad_norm": 0.8651407328311, + "learning_rate": 1.8785655147737326e-06, + "loss": 0.8987375497817993, + "step": 1742 + }, + { + "epoch": 0.40161290322580645, + "grad_norm": 0.8706739093465035, + "learning_rate": 1.878383484813761e-06, + "loss": 0.8553296327590942, + "step": 1743 + }, + { + "epoch": 0.40184331797235023, + "grad_norm": 0.6706596266673751, + "learning_rate": 1.8782013273575895e-06, + "loss": 0.8376551270484924, + "step": 1744 + }, + { + "epoch": 0.402073732718894, + "grad_norm": 0.7963067027250083, + "learning_rate": 1.8780190424316578e-06, + "loss": 0.8220775723457336, + "step": 1745 + }, + { + "epoch": 0.4023041474654378, + "grad_norm": 0.7339356821882034, + "learning_rate": 1.8778366300624244e-06, + "loss": 0.8614820241928101, + "step": 1746 + }, + { + "epoch": 0.4025345622119816, + "grad_norm": 0.8065421465945496, + "learning_rate": 1.8776540902763665e-06, + "loss": 0.9434851408004761, + "step": 1747 + }, + { + "epoch": 0.40276497695852537, + "grad_norm": 0.8102544073977809, + "learning_rate": 1.877471423099979e-06, + "loss": 0.8150373101234436, + "step": 1748 + }, + { + "epoch": 0.40299539170506915, + "grad_norm": 0.5910178895755134, + "learning_rate": 1.8772886285597762e-06, + "loss": 0.7660368084907532, + "step": 1749 + }, + { + "epoch": 0.4032258064516129, + "grad_norm": 0.7262631962712356, + "learning_rate": 1.8771057066822903e-06, + "loss": 0.7647032141685486, + "step": 1750 + }, + { + "epoch": 0.40345622119815666, + "grad_norm": 0.6238918567790319, + "learning_rate": 1.8769226574940723e-06, + "loss": 0.6034061908721924, + "step": 1751 + }, + { + "epoch": 0.40368663594470044, + "grad_norm": 0.7344154412243011, + "learning_rate": 1.8767394810216914e-06, + "loss": 1.0062675476074219, + "step": 1752 + }, + { + "epoch": 0.40391705069124423, + "grad_norm": 0.6966552417777933, + "learning_rate": 1.8765561772917354e-06, + "loss": 0.9791489839553833, + "step": 1753 + }, + { + "epoch": 0.404147465437788, + "grad_norm": 0.5825611392130148, + "learning_rate": 1.8763727463308108e-06, + "loss": 0.9054251909255981, + "step": 1754 + }, + { + "epoch": 0.4043778801843318, + "grad_norm": 0.7455727854900284, + "learning_rate": 1.8761891881655423e-06, + "loss": 0.9156093597412109, + "step": 1755 + }, + { + "epoch": 0.4046082949308756, + "grad_norm": 0.6983601123297067, + "learning_rate": 1.876005502822573e-06, + "loss": 0.7525647878646851, + "step": 1756 + }, + { + "epoch": 0.40483870967741936, + "grad_norm": 0.6156689393045622, + "learning_rate": 1.8758216903285643e-06, + "loss": 0.8321493864059448, + "step": 1757 + }, + { + "epoch": 0.40506912442396314, + "grad_norm": 0.888147060404811, + "learning_rate": 1.8756377507101973e-06, + "loss": 0.9937042593955994, + "step": 1758 + }, + { + "epoch": 0.40529953917050693, + "grad_norm": 0.553604524827559, + "learning_rate": 1.8754536839941694e-06, + "loss": 0.7001460790634155, + "step": 1759 + }, + { + "epoch": 0.4055299539170507, + "grad_norm": 0.7747422377442987, + "learning_rate": 1.8752694902071986e-06, + "loss": 1.0062569379806519, + "step": 1760 + }, + { + "epoch": 0.4057603686635945, + "grad_norm": 0.7145787925683823, + "learning_rate": 1.8750851693760199e-06, + "loss": 0.7414188385009766, + "step": 1761 + }, + { + "epoch": 0.4059907834101382, + "grad_norm": 0.6306403135362045, + "learning_rate": 1.8749007215273873e-06, + "loss": 0.7181771397590637, + "step": 1762 + }, + { + "epoch": 0.406221198156682, + "grad_norm": 0.7763317855361268, + "learning_rate": 1.8747161466880732e-06, + "loss": 0.8797845244407654, + "step": 1763 + }, + { + "epoch": 0.4064516129032258, + "grad_norm": 0.6123636271862207, + "learning_rate": 1.8745314448848684e-06, + "loss": 0.7774960398674011, + "step": 1764 + }, + { + "epoch": 0.4066820276497696, + "grad_norm": 0.9110978120854332, + "learning_rate": 1.874346616144582e-06, + "loss": 0.8499422073364258, + "step": 1765 + }, + { + "epoch": 0.40691244239631336, + "grad_norm": 0.6306854745937814, + "learning_rate": 1.874161660494042e-06, + "loss": 0.7070250511169434, + "step": 1766 + }, + { + "epoch": 0.40714285714285714, + "grad_norm": 0.6762437905211294, + "learning_rate": 1.8739765779600939e-06, + "loss": 0.8009281158447266, + "step": 1767 + }, + { + "epoch": 0.4073732718894009, + "grad_norm": 0.6084135312041689, + "learning_rate": 1.8737913685696027e-06, + "loss": 0.6866155862808228, + "step": 1768 + }, + { + "epoch": 0.4076036866359447, + "grad_norm": 0.7813040754942882, + "learning_rate": 1.873606032349451e-06, + "loss": 0.8200059533119202, + "step": 1769 + }, + { + "epoch": 0.4078341013824885, + "grad_norm": 0.629385301974861, + "learning_rate": 1.8734205693265404e-06, + "loss": 0.8413814902305603, + "step": 1770 + }, + { + "epoch": 0.4080645161290323, + "grad_norm": 0.776612651465312, + "learning_rate": 1.8732349795277903e-06, + "loss": 0.9935271143913269, + "step": 1771 + }, + { + "epoch": 0.40829493087557606, + "grad_norm": 0.6589503544607032, + "learning_rate": 1.873049262980139e-06, + "loss": 0.8718058466911316, + "step": 1772 + }, + { + "epoch": 0.40852534562211984, + "grad_norm": 0.8620050398467397, + "learning_rate": 1.8728634197105428e-06, + "loss": 0.9009358882904053, + "step": 1773 + }, + { + "epoch": 0.40875576036866357, + "grad_norm": 0.7755306532739165, + "learning_rate": 1.8726774497459768e-06, + "loss": 0.9128156900405884, + "step": 1774 + }, + { + "epoch": 0.40898617511520735, + "grad_norm": 0.6450271750629438, + "learning_rate": 1.8724913531134342e-06, + "loss": 0.8524078130722046, + "step": 1775 + }, + { + "epoch": 0.40921658986175113, + "grad_norm": 0.7569328214438452, + "learning_rate": 1.872305129839927e-06, + "loss": 0.9431420564651489, + "step": 1776 + }, + { + "epoch": 0.4094470046082949, + "grad_norm": 0.6746261931292995, + "learning_rate": 1.8721187799524846e-06, + "loss": 0.7666694521903992, + "step": 1777 + }, + { + "epoch": 0.4096774193548387, + "grad_norm": 0.6448149830483173, + "learning_rate": 1.871932303478156e-06, + "loss": 0.872551679611206, + "step": 1778 + }, + { + "epoch": 0.4099078341013825, + "grad_norm": 0.6320914450645303, + "learning_rate": 1.8717457004440079e-06, + "loss": 0.7596250176429749, + "step": 1779 + }, + { + "epoch": 0.41013824884792627, + "grad_norm": 0.9751786230729174, + "learning_rate": 1.8715589708771253e-06, + "loss": 1.0098414421081543, + "step": 1780 + }, + { + "epoch": 0.41036866359447005, + "grad_norm": 0.9695096083628231, + "learning_rate": 1.871372114804612e-06, + "loss": 0.9961523413658142, + "step": 1781 + }, + { + "epoch": 0.41059907834101383, + "grad_norm": 0.8458697864526913, + "learning_rate": 1.8711851322535896e-06, + "loss": 0.9065390825271606, + "step": 1782 + }, + { + "epoch": 0.4108294930875576, + "grad_norm": 0.5445685826440523, + "learning_rate": 1.8709980232511987e-06, + "loss": 0.7906428575515747, + "step": 1783 + }, + { + "epoch": 0.4110599078341014, + "grad_norm": 0.5783797348856774, + "learning_rate": 1.8708107878245976e-06, + "loss": 0.798285722732544, + "step": 1784 + }, + { + "epoch": 0.4112903225806452, + "grad_norm": 0.7492534516122694, + "learning_rate": 1.870623426000964e-06, + "loss": 0.7809790372848511, + "step": 1785 + }, + { + "epoch": 0.4115207373271889, + "grad_norm": 0.8776810150838931, + "learning_rate": 1.8704359378074921e-06, + "loss": 0.8931630849838257, + "step": 1786 + }, + { + "epoch": 0.4117511520737327, + "grad_norm": 0.6321595970525742, + "learning_rate": 1.870248323271396e-06, + "loss": 0.8219889402389526, + "step": 1787 + }, + { + "epoch": 0.4119815668202765, + "grad_norm": 0.9973808347817518, + "learning_rate": 1.8700605824199084e-06, + "loss": 0.8371819257736206, + "step": 1788 + }, + { + "epoch": 0.41221198156682026, + "grad_norm": 0.7869196176383942, + "learning_rate": 1.8698727152802789e-06, + "loss": 0.951171875, + "step": 1789 + }, + { + "epoch": 0.41244239631336405, + "grad_norm": 0.6763081680317143, + "learning_rate": 1.8696847218797763e-06, + "loss": 0.7678385972976685, + "step": 1790 + }, + { + "epoch": 0.41267281105990783, + "grad_norm": 0.567634539573834, + "learning_rate": 1.8694966022456872e-06, + "loss": 0.9296993017196655, + "step": 1791 + }, + { + "epoch": 0.4129032258064516, + "grad_norm": 0.5450828031444163, + "learning_rate": 1.8693083564053178e-06, + "loss": 0.8991763591766357, + "step": 1792 + }, + { + "epoch": 0.4131336405529954, + "grad_norm": 0.5967294444907658, + "learning_rate": 1.8691199843859913e-06, + "loss": 0.8332901000976562, + "step": 1793 + }, + { + "epoch": 0.4133640552995392, + "grad_norm": 0.7571962190593917, + "learning_rate": 1.8689314862150497e-06, + "loss": 0.7723548412322998, + "step": 1794 + }, + { + "epoch": 0.41359447004608296, + "grad_norm": 0.6588409150246594, + "learning_rate": 1.868742861919853e-06, + "loss": 0.7768993377685547, + "step": 1795 + }, + { + "epoch": 0.41382488479262675, + "grad_norm": 0.43193778142300604, + "learning_rate": 1.86855411152778e-06, + "loss": 0.6058932542800903, + "step": 1796 + }, + { + "epoch": 0.41405529953917053, + "grad_norm": 0.8667574432138021, + "learning_rate": 1.8683652350662274e-06, + "loss": 0.8711605072021484, + "step": 1797 + }, + { + "epoch": 0.4142857142857143, + "grad_norm": 0.8780154463369872, + "learning_rate": 1.8681762325626104e-06, + "loss": 0.9023469090461731, + "step": 1798 + }, + { + "epoch": 0.41451612903225804, + "grad_norm": 0.6070102500189553, + "learning_rate": 1.867987104044363e-06, + "loss": 0.7735910415649414, + "step": 1799 + }, + { + "epoch": 0.4147465437788018, + "grad_norm": 0.6293725885471063, + "learning_rate": 1.8677978495389364e-06, + "loss": 0.6609020829200745, + "step": 1800 + }, + { + "epoch": 0.4149769585253456, + "grad_norm": 0.6485782104038655, + "learning_rate": 1.8676084690738005e-06, + "loss": 0.7823291420936584, + "step": 1801 + }, + { + "epoch": 0.4152073732718894, + "grad_norm": 0.8472581681306268, + "learning_rate": 1.867418962676444e-06, + "loss": 0.9076563715934753, + "step": 1802 + }, + { + "epoch": 0.4154377880184332, + "grad_norm": 0.561807586977654, + "learning_rate": 1.8672293303743735e-06, + "loss": 0.8645772933959961, + "step": 1803 + }, + { + "epoch": 0.41566820276497696, + "grad_norm": 0.6821058596015542, + "learning_rate": 1.8670395721951135e-06, + "loss": 0.8071421384811401, + "step": 1804 + }, + { + "epoch": 0.41589861751152074, + "grad_norm": 0.7396557376618352, + "learning_rate": 1.8668496881662077e-06, + "loss": 0.8459846377372742, + "step": 1805 + }, + { + "epoch": 0.4161290322580645, + "grad_norm": 0.7167052224732033, + "learning_rate": 1.866659678315217e-06, + "loss": 0.8467865586280823, + "step": 1806 + }, + { + "epoch": 0.4163594470046083, + "grad_norm": 0.8262164291061972, + "learning_rate": 1.8664695426697215e-06, + "loss": 0.8963291645050049, + "step": 1807 + }, + { + "epoch": 0.4165898617511521, + "grad_norm": 0.528766323006704, + "learning_rate": 1.8662792812573188e-06, + "loss": 0.7901826500892639, + "step": 1808 + }, + { + "epoch": 0.4168202764976959, + "grad_norm": 0.8974116604603759, + "learning_rate": 1.8660888941056252e-06, + "loss": 0.807115912437439, + "step": 1809 + }, + { + "epoch": 0.41705069124423966, + "grad_norm": 0.6271237317374816, + "learning_rate": 1.8658983812422753e-06, + "loss": 0.8439537286758423, + "step": 1810 + }, + { + "epoch": 0.4172811059907834, + "grad_norm": 0.8360600380108553, + "learning_rate": 1.8657077426949214e-06, + "loss": 0.6920834183692932, + "step": 1811 + }, + { + "epoch": 0.41751152073732717, + "grad_norm": 0.7603232216568709, + "learning_rate": 1.865516978491235e-06, + "loss": 0.8712124824523926, + "step": 1812 + }, + { + "epoch": 0.41774193548387095, + "grad_norm": 0.718498571919399, + "learning_rate": 1.865326088658905e-06, + "loss": 0.7720927596092224, + "step": 1813 + }, + { + "epoch": 0.41797235023041474, + "grad_norm": 0.6953832780918029, + "learning_rate": 1.8651350732256386e-06, + "loss": 0.8003814220428467, + "step": 1814 + }, + { + "epoch": 0.4182027649769585, + "grad_norm": 0.838076886250554, + "learning_rate": 1.8649439322191616e-06, + "loss": 0.8999850749969482, + "step": 1815 + }, + { + "epoch": 0.4184331797235023, + "grad_norm": 0.584714014216153, + "learning_rate": 1.8647526656672179e-06, + "loss": 0.6752324104309082, + "step": 1816 + }, + { + "epoch": 0.4186635944700461, + "grad_norm": 0.7365325720475113, + "learning_rate": 1.8645612735975696e-06, + "loss": 0.8521262407302856, + "step": 1817 + }, + { + "epoch": 0.41889400921658987, + "grad_norm": 0.7194058023938104, + "learning_rate": 1.864369756037997e-06, + "loss": 0.8813315629959106, + "step": 1818 + }, + { + "epoch": 0.41912442396313365, + "grad_norm": 0.742428235010686, + "learning_rate": 1.8641781130162986e-06, + "loss": 0.8358273506164551, + "step": 1819 + }, + { + "epoch": 0.41935483870967744, + "grad_norm": 0.591500867449821, + "learning_rate": 1.863986344560291e-06, + "loss": 0.8051023483276367, + "step": 1820 + }, + { + "epoch": 0.4195852534562212, + "grad_norm": 0.7791039105049288, + "learning_rate": 1.863794450697809e-06, + "loss": 0.768791675567627, + "step": 1821 + }, + { + "epoch": 0.419815668202765, + "grad_norm": 0.9369354252226071, + "learning_rate": 1.8636024314567065e-06, + "loss": 0.8420040607452393, + "step": 1822 + }, + { + "epoch": 0.42004608294930873, + "grad_norm": 0.673055652482875, + "learning_rate": 1.8634102868648542e-06, + "loss": 0.7670450806617737, + "step": 1823 + }, + { + "epoch": 0.4202764976958525, + "grad_norm": 0.6699812957272996, + "learning_rate": 1.863218016950142e-06, + "loss": 0.8292283415794373, + "step": 1824 + }, + { + "epoch": 0.4205069124423963, + "grad_norm": 0.6058254395333167, + "learning_rate": 1.8630256217404767e-06, + "loss": 0.8005781769752502, + "step": 1825 + }, + { + "epoch": 0.4207373271889401, + "grad_norm": 0.923190166351158, + "learning_rate": 1.8628331012637854e-06, + "loss": 0.8214897513389587, + "step": 1826 + }, + { + "epoch": 0.42096774193548386, + "grad_norm": 0.6734314204378448, + "learning_rate": 1.8626404555480118e-06, + "loss": 0.7938524484634399, + "step": 1827 + }, + { + "epoch": 0.42119815668202765, + "grad_norm": 0.7824933974022145, + "learning_rate": 1.862447684621118e-06, + "loss": 1.0047048330307007, + "step": 1828 + }, + { + "epoch": 0.42142857142857143, + "grad_norm": 0.7060449091561402, + "learning_rate": 1.862254788511084e-06, + "loss": 0.7660601139068604, + "step": 1829 + }, + { + "epoch": 0.4216589861751152, + "grad_norm": 0.7940468118829026, + "learning_rate": 1.8620617672459096e-06, + "loss": 0.8227912783622742, + "step": 1830 + }, + { + "epoch": 0.421889400921659, + "grad_norm": 0.8322274877206185, + "learning_rate": 1.8618686208536106e-06, + "loss": 0.8570956587791443, + "step": 1831 + }, + { + "epoch": 0.4221198156682028, + "grad_norm": 0.6215191834076389, + "learning_rate": 1.8616753493622221e-06, + "loss": 0.7472532987594604, + "step": 1832 + }, + { + "epoch": 0.42235023041474656, + "grad_norm": 0.702673502332975, + "learning_rate": 1.8614819527997976e-06, + "loss": 0.812872052192688, + "step": 1833 + }, + { + "epoch": 0.42258064516129035, + "grad_norm": 0.7168526420375322, + "learning_rate": 1.861288431194408e-06, + "loss": 0.7801386117935181, + "step": 1834 + }, + { + "epoch": 0.4228110599078341, + "grad_norm": 0.8740851917776313, + "learning_rate": 1.8610947845741426e-06, + "loss": 0.7834687829017639, + "step": 1835 + }, + { + "epoch": 0.42304147465437786, + "grad_norm": 0.8009990500080056, + "learning_rate": 1.8609010129671097e-06, + "loss": 0.786865234375, + "step": 1836 + }, + { + "epoch": 0.42327188940092164, + "grad_norm": 0.6559457181196078, + "learning_rate": 1.860707116401434e-06, + "loss": 0.7728738784790039, + "step": 1837 + }, + { + "epoch": 0.4235023041474654, + "grad_norm": 0.6384024302830484, + "learning_rate": 1.8605130949052598e-06, + "loss": 0.6508793830871582, + "step": 1838 + }, + { + "epoch": 0.4237327188940092, + "grad_norm": 0.6544986461362278, + "learning_rate": 1.8603189485067492e-06, + "loss": 0.7949484586715698, + "step": 1839 + }, + { + "epoch": 0.423963133640553, + "grad_norm": 0.7679729608195138, + "learning_rate": 1.8601246772340822e-06, + "loss": 0.7151408195495605, + "step": 1840 + }, + { + "epoch": 0.4241935483870968, + "grad_norm": 0.6910188883895837, + "learning_rate": 1.859930281115457e-06, + "loss": 0.7678598165512085, + "step": 1841 + }, + { + "epoch": 0.42442396313364056, + "grad_norm": 0.6547923584739629, + "learning_rate": 1.8597357601790895e-06, + "loss": 0.8042058944702148, + "step": 1842 + }, + { + "epoch": 0.42465437788018434, + "grad_norm": 0.6889925049755639, + "learning_rate": 1.859541114453215e-06, + "loss": 0.7328081130981445, + "step": 1843 + }, + { + "epoch": 0.4248847926267281, + "grad_norm": 0.7385850960276812, + "learning_rate": 1.8593463439660853e-06, + "loss": 0.7646626234054565, + "step": 1844 + }, + { + "epoch": 0.4251152073732719, + "grad_norm": 0.7455331415840897, + "learning_rate": 1.8591514487459717e-06, + "loss": 0.8965721726417542, + "step": 1845 + }, + { + "epoch": 0.4253456221198157, + "grad_norm": 0.6783955368622289, + "learning_rate": 1.8589564288211623e-06, + "loss": 0.8892468810081482, + "step": 1846 + }, + { + "epoch": 0.4255760368663594, + "grad_norm": 0.669354336924349, + "learning_rate": 1.8587612842199648e-06, + "loss": 0.8314409255981445, + "step": 1847 + }, + { + "epoch": 0.4258064516129032, + "grad_norm": 0.7299222952808436, + "learning_rate": 1.8585660149707034e-06, + "loss": 0.7713892459869385, + "step": 1848 + }, + { + "epoch": 0.426036866359447, + "grad_norm": 0.7583328231707663, + "learning_rate": 1.8583706211017216e-06, + "loss": 0.9349459409713745, + "step": 1849 + }, + { + "epoch": 0.42626728110599077, + "grad_norm": 0.7309436500165829, + "learning_rate": 1.8581751026413805e-06, + "loss": 0.8438700437545776, + "step": 1850 + }, + { + "epoch": 0.42649769585253455, + "grad_norm": 1.0171962155435006, + "learning_rate": 1.8579794596180594e-06, + "loss": 0.9559776782989502, + "step": 1851 + }, + { + "epoch": 0.42672811059907834, + "grad_norm": 0.6701533748146308, + "learning_rate": 1.8577836920601556e-06, + "loss": 0.7124872803688049, + "step": 1852 + }, + { + "epoch": 0.4269585253456221, + "grad_norm": 0.8613289026694887, + "learning_rate": 1.8575877999960842e-06, + "loss": 0.7935503125190735, + "step": 1853 + }, + { + "epoch": 0.4271889400921659, + "grad_norm": 0.7107096707504692, + "learning_rate": 1.8573917834542792e-06, + "loss": 0.9145890474319458, + "step": 1854 + }, + { + "epoch": 0.4274193548387097, + "grad_norm": 0.7290504646059204, + "learning_rate": 1.8571956424631918e-06, + "loss": 0.8239228129386902, + "step": 1855 + }, + { + "epoch": 0.42764976958525347, + "grad_norm": 0.6018983094431002, + "learning_rate": 1.8569993770512916e-06, + "loss": 0.8767688274383545, + "step": 1856 + }, + { + "epoch": 0.42788018433179725, + "grad_norm": 0.6742014961339767, + "learning_rate": 1.8568029872470663e-06, + "loss": 0.7860859632492065, + "step": 1857 + }, + { + "epoch": 0.42811059907834104, + "grad_norm": 0.6990668023927343, + "learning_rate": 1.8566064730790218e-06, + "loss": 0.8855729103088379, + "step": 1858 + }, + { + "epoch": 0.4283410138248848, + "grad_norm": 0.8518974155898882, + "learning_rate": 1.8564098345756815e-06, + "loss": 1.023299217224121, + "step": 1859 + }, + { + "epoch": 0.42857142857142855, + "grad_norm": 0.7174059285774532, + "learning_rate": 1.8562130717655878e-06, + "loss": 0.7665202617645264, + "step": 1860 + }, + { + "epoch": 0.42880184331797233, + "grad_norm": 0.7036772811538429, + "learning_rate": 1.8560161846773e-06, + "loss": 0.8456651568412781, + "step": 1861 + }, + { + "epoch": 0.4290322580645161, + "grad_norm": 0.7229483822116546, + "learning_rate": 1.8558191733393964e-06, + "loss": 0.8920061588287354, + "step": 1862 + }, + { + "epoch": 0.4292626728110599, + "grad_norm": 0.8104170426239989, + "learning_rate": 1.8556220377804723e-06, + "loss": 0.8686853051185608, + "step": 1863 + }, + { + "epoch": 0.4294930875576037, + "grad_norm": 0.5832986779631602, + "learning_rate": 1.8554247780291425e-06, + "loss": 0.6976242065429688, + "step": 1864 + }, + { + "epoch": 0.42972350230414746, + "grad_norm": 0.7347161353185314, + "learning_rate": 1.8552273941140387e-06, + "loss": 0.9612032771110535, + "step": 1865 + }, + { + "epoch": 0.42995391705069125, + "grad_norm": 0.6243829709767468, + "learning_rate": 1.8550298860638108e-06, + "loss": 0.9288003444671631, + "step": 1866 + }, + { + "epoch": 0.43018433179723503, + "grad_norm": 0.6743712494799082, + "learning_rate": 1.8548322539071263e-06, + "loss": 0.8397525548934937, + "step": 1867 + }, + { + "epoch": 0.4304147465437788, + "grad_norm": 0.5881426126037044, + "learning_rate": 1.8546344976726722e-06, + "loss": 0.6311365365982056, + "step": 1868 + }, + { + "epoch": 0.4306451612903226, + "grad_norm": 0.7497017851812813, + "learning_rate": 1.8544366173891523e-06, + "loss": 0.7868270874023438, + "step": 1869 + }, + { + "epoch": 0.4308755760368664, + "grad_norm": 0.6265515804052451, + "learning_rate": 1.8542386130852883e-06, + "loss": 0.9197052717208862, + "step": 1870 + }, + { + "epoch": 0.43110599078341016, + "grad_norm": 0.7018278829983491, + "learning_rate": 1.8540404847898206e-06, + "loss": 0.7875635027885437, + "step": 1871 + }, + { + "epoch": 0.4313364055299539, + "grad_norm": 0.7789284724063816, + "learning_rate": 1.853842232531507e-06, + "loss": 0.9805077910423279, + "step": 1872 + }, + { + "epoch": 0.4315668202764977, + "grad_norm": 0.838470325159009, + "learning_rate": 1.8536438563391236e-06, + "loss": 0.8906866312026978, + "step": 1873 + }, + { + "epoch": 0.43179723502304146, + "grad_norm": 0.73247587866706, + "learning_rate": 1.8534453562414649e-06, + "loss": 0.7506693601608276, + "step": 1874 + }, + { + "epoch": 0.43202764976958524, + "grad_norm": 0.6576915367586517, + "learning_rate": 1.8532467322673422e-06, + "loss": 0.6173181533813477, + "step": 1875 + }, + { + "epoch": 0.432258064516129, + "grad_norm": 0.6907344817423696, + "learning_rate": 1.853047984445586e-06, + "loss": 0.9217972755432129, + "step": 1876 + }, + { + "epoch": 0.4324884792626728, + "grad_norm": 0.8808471726659616, + "learning_rate": 1.8528491128050442e-06, + "loss": 0.8300588130950928, + "step": 1877 + }, + { + "epoch": 0.4327188940092166, + "grad_norm": 0.7869544847637374, + "learning_rate": 1.8526501173745826e-06, + "loss": 0.8109279870986938, + "step": 1878 + }, + { + "epoch": 0.4329493087557604, + "grad_norm": 0.8253705845492948, + "learning_rate": 1.852450998183085e-06, + "loss": 0.9243700504302979, + "step": 1879 + }, + { + "epoch": 0.43317972350230416, + "grad_norm": 0.7291726511705204, + "learning_rate": 1.8522517552594539e-06, + "loss": 0.7983531951904297, + "step": 1880 + }, + { + "epoch": 0.43341013824884794, + "grad_norm": 0.837506072245515, + "learning_rate": 1.8520523886326088e-06, + "loss": 0.9931240081787109, + "step": 1881 + }, + { + "epoch": 0.4336405529953917, + "grad_norm": 0.7782064692415819, + "learning_rate": 1.8518528983314874e-06, + "loss": 0.923255443572998, + "step": 1882 + }, + { + "epoch": 0.4338709677419355, + "grad_norm": 0.5003052765919304, + "learning_rate": 1.8516532843850454e-06, + "loss": 0.8470325469970703, + "step": 1883 + }, + { + "epoch": 0.43410138248847924, + "grad_norm": 0.7497886449083292, + "learning_rate": 1.8514535468222566e-06, + "loss": 0.9175074696540833, + "step": 1884 + }, + { + "epoch": 0.434331797235023, + "grad_norm": 0.7474680310474195, + "learning_rate": 1.8512536856721126e-06, + "loss": 0.8617827892303467, + "step": 1885 + }, + { + "epoch": 0.4345622119815668, + "grad_norm": 0.6779026169933022, + "learning_rate": 1.8510537009636231e-06, + "loss": 0.6787248849868774, + "step": 1886 + }, + { + "epoch": 0.4347926267281106, + "grad_norm": 0.6948062534132075, + "learning_rate": 1.8508535927258157e-06, + "loss": 0.8031569719314575, + "step": 1887 + }, + { + "epoch": 0.43502304147465437, + "grad_norm": 0.8219581995376891, + "learning_rate": 1.8506533609877354e-06, + "loss": 1.0252577066421509, + "step": 1888 + }, + { + "epoch": 0.43525345622119815, + "grad_norm": 0.6297691459816858, + "learning_rate": 1.850453005778446e-06, + "loss": 0.7947444915771484, + "step": 1889 + }, + { + "epoch": 0.43548387096774194, + "grad_norm": 0.7974729793994046, + "learning_rate": 1.8502525271270288e-06, + "loss": 0.817523717880249, + "step": 1890 + }, + { + "epoch": 0.4357142857142857, + "grad_norm": 0.905445482286677, + "learning_rate": 1.850051925062583e-06, + "loss": 0.8029658794403076, + "step": 1891 + }, + { + "epoch": 0.4359447004608295, + "grad_norm": 0.7902601112013473, + "learning_rate": 1.8498511996142253e-06, + "loss": 0.871408224105835, + "step": 1892 + }, + { + "epoch": 0.4361751152073733, + "grad_norm": 0.7279346643764769, + "learning_rate": 1.849650350811091e-06, + "loss": 1.0133098363876343, + "step": 1893 + }, + { + "epoch": 0.43640552995391707, + "grad_norm": 0.5859043876213773, + "learning_rate": 1.8494493786823333e-06, + "loss": 0.8320624828338623, + "step": 1894 + }, + { + "epoch": 0.43663594470046085, + "grad_norm": 0.7240549495084485, + "learning_rate": 1.8492482832571225e-06, + "loss": 0.7757631540298462, + "step": 1895 + }, + { + "epoch": 0.4368663594470046, + "grad_norm": 0.7606146142454437, + "learning_rate": 1.8490470645646479e-06, + "loss": 0.8503100872039795, + "step": 1896 + }, + { + "epoch": 0.43709677419354837, + "grad_norm": 0.7560932530175453, + "learning_rate": 1.8488457226341158e-06, + "loss": 0.8145939707756042, + "step": 1897 + }, + { + "epoch": 0.43732718894009215, + "grad_norm": 0.8041258430075643, + "learning_rate": 1.848644257494751e-06, + "loss": 0.831500232219696, + "step": 1898 + }, + { + "epoch": 0.43755760368663593, + "grad_norm": 0.6473340838552745, + "learning_rate": 1.8484426691757956e-06, + "loss": 0.9340692758560181, + "step": 1899 + }, + { + "epoch": 0.4377880184331797, + "grad_norm": 0.7851684163129825, + "learning_rate": 1.8482409577065097e-06, + "loss": 1.011988639831543, + "step": 1900 + }, + { + "epoch": 0.4380184331797235, + "grad_norm": 0.6819650200659566, + "learning_rate": 1.848039123116172e-06, + "loss": 0.8110378980636597, + "step": 1901 + }, + { + "epoch": 0.4382488479262673, + "grad_norm": 0.6310651453357742, + "learning_rate": 1.8478371654340779e-06, + "loss": 0.8230330944061279, + "step": 1902 + }, + { + "epoch": 0.43847926267281107, + "grad_norm": 0.8335502206603579, + "learning_rate": 1.8476350846895419e-06, + "loss": 0.875052809715271, + "step": 1903 + }, + { + "epoch": 0.43870967741935485, + "grad_norm": 0.7394371211482306, + "learning_rate": 1.8474328809118953e-06, + "loss": 0.9373071193695068, + "step": 1904 + }, + { + "epoch": 0.43894009216589863, + "grad_norm": 0.7538115820848524, + "learning_rate": 1.847230554130488e-06, + "loss": 0.8341633677482605, + "step": 1905 + }, + { + "epoch": 0.4391705069124424, + "grad_norm": 0.6579829053639499, + "learning_rate": 1.8470281043746873e-06, + "loss": 0.8147767782211304, + "step": 1906 + }, + { + "epoch": 0.4394009216589862, + "grad_norm": 0.6022228592985512, + "learning_rate": 1.8468255316738785e-06, + "loss": 0.740512490272522, + "step": 1907 + }, + { + "epoch": 0.4396313364055299, + "grad_norm": 0.7743265443588842, + "learning_rate": 1.846622836057465e-06, + "loss": 0.7754743099212646, + "step": 1908 + }, + { + "epoch": 0.4398617511520737, + "grad_norm": 0.7535493986684056, + "learning_rate": 1.8464200175548677e-06, + "loss": 0.9131484031677246, + "step": 1909 + }, + { + "epoch": 0.4400921658986175, + "grad_norm": 0.7099012564704421, + "learning_rate": 1.8462170761955252e-06, + "loss": 0.7084713578224182, + "step": 1910 + }, + { + "epoch": 0.4403225806451613, + "grad_norm": 0.7949281739735957, + "learning_rate": 1.8460140120088945e-06, + "loss": 0.8535224199295044, + "step": 1911 + }, + { + "epoch": 0.44055299539170506, + "grad_norm": 0.8579322326008002, + "learning_rate": 1.8458108250244498e-06, + "loss": 0.7661323547363281, + "step": 1912 + }, + { + "epoch": 0.44078341013824884, + "grad_norm": 0.7355189670899542, + "learning_rate": 1.8456075152716837e-06, + "loss": 0.8064024448394775, + "step": 1913 + }, + { + "epoch": 0.4410138248847926, + "grad_norm": 0.7422340222781728, + "learning_rate": 1.8454040827801058e-06, + "loss": 0.7858735918998718, + "step": 1914 + }, + { + "epoch": 0.4412442396313364, + "grad_norm": 0.6589873136371734, + "learning_rate": 1.8452005275792448e-06, + "loss": 0.9251735210418701, + "step": 1915 + }, + { + "epoch": 0.4414746543778802, + "grad_norm": 0.718018605876598, + "learning_rate": 1.8449968496986461e-06, + "loss": 0.7237124443054199, + "step": 1916 + }, + { + "epoch": 0.441705069124424, + "grad_norm": 0.7573893032737062, + "learning_rate": 1.8447930491678732e-06, + "loss": 0.8939133882522583, + "step": 1917 + }, + { + "epoch": 0.44193548387096776, + "grad_norm": 0.8373489922925343, + "learning_rate": 1.8445891260165076e-06, + "loss": 0.8815577626228333, + "step": 1918 + }, + { + "epoch": 0.44216589861751154, + "grad_norm": 0.8703539982402225, + "learning_rate": 1.8443850802741485e-06, + "loss": 0.943426787853241, + "step": 1919 + }, + { + "epoch": 0.4423963133640553, + "grad_norm": 0.6998600920537428, + "learning_rate": 1.8441809119704126e-06, + "loss": 0.8001632690429688, + "step": 1920 + }, + { + "epoch": 0.44262672811059905, + "grad_norm": 0.8531362441371287, + "learning_rate": 1.8439766211349352e-06, + "loss": 0.8656308650970459, + "step": 1921 + }, + { + "epoch": 0.44285714285714284, + "grad_norm": 0.7261410922718881, + "learning_rate": 1.8437722077973686e-06, + "loss": 0.9774024486541748, + "step": 1922 + }, + { + "epoch": 0.4430875576036866, + "grad_norm": 0.728823767818971, + "learning_rate": 1.8435676719873827e-06, + "loss": 0.7655738592147827, + "step": 1923 + }, + { + "epoch": 0.4433179723502304, + "grad_norm": 0.6595509202419896, + "learning_rate": 1.8433630137346657e-06, + "loss": 0.6455004811286926, + "step": 1924 + }, + { + "epoch": 0.4435483870967742, + "grad_norm": 0.7214853647491487, + "learning_rate": 1.8431582330689243e-06, + "loss": 0.8221153020858765, + "step": 1925 + }, + { + "epoch": 0.44377880184331797, + "grad_norm": 0.7718374957528886, + "learning_rate": 1.8429533300198816e-06, + "loss": 0.7878339886665344, + "step": 1926 + }, + { + "epoch": 0.44400921658986175, + "grad_norm": 0.7666174978175726, + "learning_rate": 1.8427483046172787e-06, + "loss": 0.8292763829231262, + "step": 1927 + }, + { + "epoch": 0.44423963133640554, + "grad_norm": 0.7395800766154846, + "learning_rate": 1.842543156890875e-06, + "loss": 0.7774572372436523, + "step": 1928 + }, + { + "epoch": 0.4444700460829493, + "grad_norm": 0.7419338266362171, + "learning_rate": 1.8423378868704476e-06, + "loss": 0.7327601909637451, + "step": 1929 + }, + { + "epoch": 0.4447004608294931, + "grad_norm": 0.7176112305038147, + "learning_rate": 1.8421324945857909e-06, + "loss": 0.8067511320114136, + "step": 1930 + }, + { + "epoch": 0.4449308755760369, + "grad_norm": 0.780684647138278, + "learning_rate": 1.8419269800667173e-06, + "loss": 0.851010799407959, + "step": 1931 + }, + { + "epoch": 0.44516129032258067, + "grad_norm": 0.7848772154457995, + "learning_rate": 1.8417213433430576e-06, + "loss": 0.8402234315872192, + "step": 1932 + }, + { + "epoch": 0.4453917050691244, + "grad_norm": 0.7848428302916386, + "learning_rate": 1.8415155844446591e-06, + "loss": 0.8857355117797852, + "step": 1933 + }, + { + "epoch": 0.4456221198156682, + "grad_norm": 0.6465222204250215, + "learning_rate": 1.841309703401387e-06, + "loss": 0.7517881393432617, + "step": 1934 + }, + { + "epoch": 0.44585253456221197, + "grad_norm": 0.8220839741097039, + "learning_rate": 1.8411037002431257e-06, + "loss": 0.8583779335021973, + "step": 1935 + }, + { + "epoch": 0.44608294930875575, + "grad_norm": 0.7149579567670102, + "learning_rate": 1.8408975749997758e-06, + "loss": 0.7691524028778076, + "step": 1936 + }, + { + "epoch": 0.44631336405529953, + "grad_norm": 0.6891731440130011, + "learning_rate": 1.8406913277012558e-06, + "loss": 0.9164496660232544, + "step": 1937 + }, + { + "epoch": 0.4465437788018433, + "grad_norm": 0.6382978906826758, + "learning_rate": 1.8404849583775025e-06, + "loss": 0.843226432800293, + "step": 1938 + }, + { + "epoch": 0.4467741935483871, + "grad_norm": 0.843769912689158, + "learning_rate": 1.8402784670584706e-06, + "loss": 0.8492633104324341, + "step": 1939 + }, + { + "epoch": 0.4470046082949309, + "grad_norm": 0.7117202181402426, + "learning_rate": 1.8400718537741314e-06, + "loss": 0.8088324069976807, + "step": 1940 + }, + { + "epoch": 0.44723502304147467, + "grad_norm": 0.8584564611753391, + "learning_rate": 1.8398651185544746e-06, + "loss": 0.8879667520523071, + "step": 1941 + }, + { + "epoch": 0.44746543778801845, + "grad_norm": 0.6515549607308898, + "learning_rate": 1.8396582614295078e-06, + "loss": 0.8926588892936707, + "step": 1942 + }, + { + "epoch": 0.44769585253456223, + "grad_norm": 0.6885634929225364, + "learning_rate": 1.8394512824292558e-06, + "loss": 0.8007583618164062, + "step": 1943 + }, + { + "epoch": 0.447926267281106, + "grad_norm": 0.6940540666117992, + "learning_rate": 1.8392441815837613e-06, + "loss": 0.7420827746391296, + "step": 1944 + }, + { + "epoch": 0.44815668202764974, + "grad_norm": 0.6846873323136197, + "learning_rate": 1.839036958923085e-06, + "loss": 0.7653264999389648, + "step": 1945 + }, + { + "epoch": 0.4483870967741935, + "grad_norm": 0.6684685460178057, + "learning_rate": 1.838829614477305e-06, + "loss": 0.886576771736145, + "step": 1946 + }, + { + "epoch": 0.4486175115207373, + "grad_norm": 0.7769567865097903, + "learning_rate": 1.8386221482765168e-06, + "loss": 0.904376745223999, + "step": 1947 + }, + { + "epoch": 0.4488479262672811, + "grad_norm": 0.6833196213451335, + "learning_rate": 1.838414560350834e-06, + "loss": 0.6791579723358154, + "step": 1948 + }, + { + "epoch": 0.4490783410138249, + "grad_norm": 0.8296885335278092, + "learning_rate": 1.838206850730388e-06, + "loss": 0.9402183294296265, + "step": 1949 + }, + { + "epoch": 0.44930875576036866, + "grad_norm": 0.9215175287627321, + "learning_rate": 1.8379990194453265e-06, + "loss": 0.9756022691726685, + "step": 1950 + }, + { + "epoch": 0.44953917050691244, + "grad_norm": 0.9502651388093868, + "learning_rate": 1.8377910665258173e-06, + "loss": 0.7311051487922668, + "step": 1951 + }, + { + "epoch": 0.4497695852534562, + "grad_norm": 0.5687721596613555, + "learning_rate": 1.8375829920020438e-06, + "loss": 0.6966956853866577, + "step": 1952 + }, + { + "epoch": 0.45, + "grad_norm": 0.7191813033419734, + "learning_rate": 1.8373747959042076e-06, + "loss": 0.7327426671981812, + "step": 1953 + }, + { + "epoch": 0.4502304147465438, + "grad_norm": 0.8067848664348717, + "learning_rate": 1.8371664782625285e-06, + "loss": 0.8650925755500793, + "step": 1954 + }, + { + "epoch": 0.4504608294930876, + "grad_norm": 0.8028206677205298, + "learning_rate": 1.8369580391072431e-06, + "loss": 0.876739501953125, + "step": 1955 + }, + { + "epoch": 0.45069124423963136, + "grad_norm": 0.7092651204784524, + "learning_rate": 1.8367494784686066e-06, + "loss": 0.7787455320358276, + "step": 1956 + }, + { + "epoch": 0.4509216589861751, + "grad_norm": 0.7762123563340246, + "learning_rate": 1.836540796376891e-06, + "loss": 0.8874029517173767, + "step": 1957 + }, + { + "epoch": 0.4511520737327189, + "grad_norm": 0.7670080315961673, + "learning_rate": 1.8363319928623862e-06, + "loss": 0.8944835662841797, + "step": 1958 + }, + { + "epoch": 0.45138248847926266, + "grad_norm": 0.570293089893543, + "learning_rate": 1.8361230679553996e-06, + "loss": 0.7106739282608032, + "step": 1959 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.7068996407627426, + "learning_rate": 1.835914021686257e-06, + "loss": 0.8668634295463562, + "step": 1960 + }, + { + "epoch": 0.4518433179723502, + "grad_norm": 0.7818076957354034, + "learning_rate": 1.8357048540853003e-06, + "loss": 0.8123712539672852, + "step": 1961 + }, + { + "epoch": 0.452073732718894, + "grad_norm": 0.7369058807274856, + "learning_rate": 1.8354955651828907e-06, + "loss": 0.865728497505188, + "step": 1962 + }, + { + "epoch": 0.4523041474654378, + "grad_norm": 0.7502978391788373, + "learning_rate": 1.8352861550094056e-06, + "loss": 0.8066651225090027, + "step": 1963 + }, + { + "epoch": 0.4525345622119816, + "grad_norm": 1.2076261262226256, + "learning_rate": 1.835076623595241e-06, + "loss": 1.020591139793396, + "step": 1964 + }, + { + "epoch": 0.45276497695852536, + "grad_norm": 0.7642119123557376, + "learning_rate": 1.83486697097081e-06, + "loss": 0.839346706867218, + "step": 1965 + }, + { + "epoch": 0.45299539170506914, + "grad_norm": 0.663652311830839, + "learning_rate": 1.8346571971665434e-06, + "loss": 0.7707340121269226, + "step": 1966 + }, + { + "epoch": 0.4532258064516129, + "grad_norm": 0.6603686601649886, + "learning_rate": 1.8344473022128897e-06, + "loss": 0.7969534397125244, + "step": 1967 + }, + { + "epoch": 0.4534562211981567, + "grad_norm": 0.8431782882642489, + "learning_rate": 1.8342372861403143e-06, + "loss": 0.9371283650398254, + "step": 1968 + }, + { + "epoch": 0.45368663594470043, + "grad_norm": 0.7102966402282939, + "learning_rate": 1.8340271489793015e-06, + "loss": 0.7915256023406982, + "step": 1969 + }, + { + "epoch": 0.4539170506912442, + "grad_norm": 0.6028172078632871, + "learning_rate": 1.8338168907603522e-06, + "loss": 0.8394884467124939, + "step": 1970 + }, + { + "epoch": 0.454147465437788, + "grad_norm": 0.8133055611447335, + "learning_rate": 1.833606511513985e-06, + "loss": 0.7786067128181458, + "step": 1971 + }, + { + "epoch": 0.4543778801843318, + "grad_norm": 0.905741517676821, + "learning_rate": 1.833396011270736e-06, + "loss": 0.9237443208694458, + "step": 1972 + }, + { + "epoch": 0.45460829493087557, + "grad_norm": 0.9055049100464759, + "learning_rate": 1.8331853900611596e-06, + "loss": 0.7530162334442139, + "step": 1973 + }, + { + "epoch": 0.45483870967741935, + "grad_norm": 0.7172947421019107, + "learning_rate": 1.8329746479158263e-06, + "loss": 0.8349624872207642, + "step": 1974 + }, + { + "epoch": 0.45506912442396313, + "grad_norm": 0.9222448487169791, + "learning_rate": 1.8327637848653259e-06, + "loss": 0.8748637437820435, + "step": 1975 + }, + { + "epoch": 0.4552995391705069, + "grad_norm": 0.7416851295200875, + "learning_rate": 1.832552800940265e-06, + "loss": 0.9111478924751282, + "step": 1976 + }, + { + "epoch": 0.4555299539170507, + "grad_norm": 0.6251856024732342, + "learning_rate": 1.8323416961712665e-06, + "loss": 0.8108797073364258, + "step": 1977 + }, + { + "epoch": 0.4557603686635945, + "grad_norm": 0.9459625715160394, + "learning_rate": 1.832130470588973e-06, + "loss": 0.9266520738601685, + "step": 1978 + }, + { + "epoch": 0.45599078341013827, + "grad_norm": 0.7773850051724754, + "learning_rate": 1.831919124224043e-06, + "loss": 0.9092522859573364, + "step": 1979 + }, + { + "epoch": 0.45622119815668205, + "grad_norm": 0.664954530341155, + "learning_rate": 1.8317076571071536e-06, + "loss": 0.8249068260192871, + "step": 1980 + }, + { + "epoch": 0.45645161290322583, + "grad_norm": 0.770896895795481, + "learning_rate": 1.8314960692689992e-06, + "loss": 0.7497084140777588, + "step": 1981 + }, + { + "epoch": 0.45668202764976956, + "grad_norm": 0.7450904317902424, + "learning_rate": 1.8312843607402907e-06, + "loss": 0.7360142469406128, + "step": 1982 + }, + { + "epoch": 0.45691244239631335, + "grad_norm": 0.7224490513690306, + "learning_rate": 1.8310725315517578e-06, + "loss": 0.8443512320518494, + "step": 1983 + }, + { + "epoch": 0.45714285714285713, + "grad_norm": 0.6770718154001021, + "learning_rate": 1.830860581734147e-06, + "loss": 0.7995656728744507, + "step": 1984 + }, + { + "epoch": 0.4573732718894009, + "grad_norm": 0.8305927985197211, + "learning_rate": 1.8306485113182229e-06, + "loss": 0.7396436929702759, + "step": 1985 + }, + { + "epoch": 0.4576036866359447, + "grad_norm": 0.7351757860546534, + "learning_rate": 1.8304363203347668e-06, + "loss": 0.7415385246276855, + "step": 1986 + }, + { + "epoch": 0.4578341013824885, + "grad_norm": 0.8416697439034252, + "learning_rate": 1.8302240088145784e-06, + "loss": 0.9316694736480713, + "step": 1987 + }, + { + "epoch": 0.45806451612903226, + "grad_norm": 0.6482250359686991, + "learning_rate": 1.830011576788474e-06, + "loss": 0.7692697048187256, + "step": 1988 + }, + { + "epoch": 0.45829493087557605, + "grad_norm": 0.7546540101557039, + "learning_rate": 1.829799024287288e-06, + "loss": 0.8377524614334106, + "step": 1989 + }, + { + "epoch": 0.45852534562211983, + "grad_norm": 0.800432018333432, + "learning_rate": 1.8295863513418724e-06, + "loss": 0.8005630970001221, + "step": 1990 + }, + { + "epoch": 0.4587557603686636, + "grad_norm": 0.6132717130341248, + "learning_rate": 1.829373557983096e-06, + "loss": 0.8609297275543213, + "step": 1991 + }, + { + "epoch": 0.4589861751152074, + "grad_norm": 0.7611348757483902, + "learning_rate": 1.8291606442418454e-06, + "loss": 0.9111521244049072, + "step": 1992 + }, + { + "epoch": 0.4592165898617512, + "grad_norm": 0.6486046074488622, + "learning_rate": 1.8289476101490254e-06, + "loss": 0.7540388107299805, + "step": 1993 + }, + { + "epoch": 0.4594470046082949, + "grad_norm": 0.7891604292973137, + "learning_rate": 1.8287344557355565e-06, + "loss": 0.9018936157226562, + "step": 1994 + }, + { + "epoch": 0.4596774193548387, + "grad_norm": 0.8558307889574596, + "learning_rate": 1.8285211810323791e-06, + "loss": 0.918912947177887, + "step": 1995 + }, + { + "epoch": 0.4599078341013825, + "grad_norm": 0.6889746928021416, + "learning_rate": 1.8283077860704488e-06, + "loss": 0.7777351140975952, + "step": 1996 + }, + { + "epoch": 0.46013824884792626, + "grad_norm": 0.8546199279018112, + "learning_rate": 1.82809427088074e-06, + "loss": 0.9283437132835388, + "step": 1997 + }, + { + "epoch": 0.46036866359447004, + "grad_norm": 0.7206983576837674, + "learning_rate": 1.8278806354942442e-06, + "loss": 0.7032894492149353, + "step": 1998 + }, + { + "epoch": 0.4605990783410138, + "grad_norm": 0.7084552833839082, + "learning_rate": 1.8276668799419696e-06, + "loss": 0.8392905592918396, + "step": 1999 + }, + { + "epoch": 0.4608294930875576, + "grad_norm": 0.8216520324249929, + "learning_rate": 1.8274530042549434e-06, + "loss": 0.8059369325637817, + "step": 2000 + }, + { + "epoch": 0.4610599078341014, + "grad_norm": 0.7022225516164876, + "learning_rate": 1.827239008464209e-06, + "loss": 0.7738519906997681, + "step": 2001 + }, + { + "epoch": 0.4612903225806452, + "grad_norm": 0.894321981759021, + "learning_rate": 1.8270248926008275e-06, + "loss": 0.9189014434814453, + "step": 2002 + }, + { + "epoch": 0.46152073732718896, + "grad_norm": 0.9750927332357222, + "learning_rate": 1.8268106566958782e-06, + "loss": 0.8878552913665771, + "step": 2003 + }, + { + "epoch": 0.46175115207373274, + "grad_norm": 0.7601663032895281, + "learning_rate": 1.826596300780456e-06, + "loss": 0.9786058664321899, + "step": 2004 + }, + { + "epoch": 0.4619815668202765, + "grad_norm": 0.7513085122069586, + "learning_rate": 1.8263818248856754e-06, + "loss": 0.7887653112411499, + "step": 2005 + }, + { + "epoch": 0.46221198156682025, + "grad_norm": 0.7571825247765968, + "learning_rate": 1.8261672290426668e-06, + "loss": 0.8773549795150757, + "step": 2006 + }, + { + "epoch": 0.46244239631336403, + "grad_norm": 0.6543768471355319, + "learning_rate": 1.8259525132825786e-06, + "loss": 0.6929831504821777, + "step": 2007 + }, + { + "epoch": 0.4626728110599078, + "grad_norm": 0.8544099497368944, + "learning_rate": 1.8257376776365765e-06, + "loss": 0.9438232183456421, + "step": 2008 + }, + { + "epoch": 0.4629032258064516, + "grad_norm": 0.6803330432545487, + "learning_rate": 1.8255227221358435e-06, + "loss": 0.7559594511985779, + "step": 2009 + }, + { + "epoch": 0.4631336405529954, + "grad_norm": 0.7347158890455135, + "learning_rate": 1.8253076468115805e-06, + "loss": 0.8990212678909302, + "step": 2010 + }, + { + "epoch": 0.46336405529953917, + "grad_norm": 0.7325838411869188, + "learning_rate": 1.825092451695005e-06, + "loss": 0.8638331890106201, + "step": 2011 + }, + { + "epoch": 0.46359447004608295, + "grad_norm": 0.7537964319175384, + "learning_rate": 1.8248771368173522e-06, + "loss": 0.9262570142745972, + "step": 2012 + }, + { + "epoch": 0.46382488479262673, + "grad_norm": 0.770620841657562, + "learning_rate": 1.8246617022098754e-06, + "loss": 0.7412514090538025, + "step": 2013 + }, + { + "epoch": 0.4640552995391705, + "grad_norm": 0.8304378021605247, + "learning_rate": 1.8244461479038437e-06, + "loss": 0.8680287599563599, + "step": 2014 + }, + { + "epoch": 0.4642857142857143, + "grad_norm": 0.7004084931574237, + "learning_rate": 1.8242304739305457e-06, + "loss": 0.7774302959442139, + "step": 2015 + }, + { + "epoch": 0.4645161290322581, + "grad_norm": 0.8275882534036313, + "learning_rate": 1.824014680321285e-06, + "loss": 0.9278442859649658, + "step": 2016 + }, + { + "epoch": 0.46474654377880187, + "grad_norm": 0.6808747325759799, + "learning_rate": 1.8237987671073846e-06, + "loss": 0.9617106914520264, + "step": 2017 + }, + { + "epoch": 0.4649769585253456, + "grad_norm": 0.682915952128137, + "learning_rate": 1.8235827343201838e-06, + "loss": 0.7983255386352539, + "step": 2018 + }, + { + "epoch": 0.4652073732718894, + "grad_norm": 0.7878897167758285, + "learning_rate": 1.8233665819910393e-06, + "loss": 0.7966747283935547, + "step": 2019 + }, + { + "epoch": 0.46543778801843316, + "grad_norm": 0.893729443286113, + "learning_rate": 1.8231503101513253e-06, + "loss": 0.8977803587913513, + "step": 2020 + }, + { + "epoch": 0.46566820276497695, + "grad_norm": 0.6522874054217892, + "learning_rate": 1.8229339188324334e-06, + "loss": 0.7098231911659241, + "step": 2021 + }, + { + "epoch": 0.46589861751152073, + "grad_norm": 0.6971785978535421, + "learning_rate": 1.822717408065773e-06, + "loss": 0.6402776837348938, + "step": 2022 + }, + { + "epoch": 0.4661290322580645, + "grad_norm": 0.7272467550896602, + "learning_rate": 1.8225007778827698e-06, + "loss": 0.797479510307312, + "step": 2023 + }, + { + "epoch": 0.4663594470046083, + "grad_norm": 0.7464543289112394, + "learning_rate": 1.8222840283148675e-06, + "loss": 0.8205317258834839, + "step": 2024 + }, + { + "epoch": 0.4665898617511521, + "grad_norm": 0.755319646803663, + "learning_rate": 1.822067159393527e-06, + "loss": 0.8123108148574829, + "step": 2025 + }, + { + "epoch": 0.46682027649769586, + "grad_norm": 0.7470494916721893, + "learning_rate": 1.8218501711502262e-06, + "loss": 0.9103116989135742, + "step": 2026 + }, + { + "epoch": 0.46705069124423965, + "grad_norm": 0.8399971318490079, + "learning_rate": 1.8216330636164617e-06, + "loss": 0.725040078163147, + "step": 2027 + }, + { + "epoch": 0.46728110599078343, + "grad_norm": 0.8693243601175246, + "learning_rate": 1.8214158368237456e-06, + "loss": 0.8598217964172363, + "step": 2028 + }, + { + "epoch": 0.4675115207373272, + "grad_norm": 0.9587381766929439, + "learning_rate": 1.821198490803608e-06, + "loss": 0.9139465093612671, + "step": 2029 + }, + { + "epoch": 0.46774193548387094, + "grad_norm": 0.7850806397253399, + "learning_rate": 1.8209810255875966e-06, + "loss": 0.8331620097160339, + "step": 2030 + }, + { + "epoch": 0.4679723502304147, + "grad_norm": 0.8908286579751021, + "learning_rate": 1.8207634412072764e-06, + "loss": 0.7901387810707092, + "step": 2031 + }, + { + "epoch": 0.4682027649769585, + "grad_norm": 0.6861413854458724, + "learning_rate": 1.8205457376942288e-06, + "loss": 0.7651060819625854, + "step": 2032 + }, + { + "epoch": 0.4684331797235023, + "grad_norm": 0.7738923235394239, + "learning_rate": 1.820327915080054e-06, + "loss": 0.7382134199142456, + "step": 2033 + }, + { + "epoch": 0.4686635944700461, + "grad_norm": 0.6962774548883505, + "learning_rate": 1.8201099733963682e-06, + "loss": 0.7851507067680359, + "step": 2034 + }, + { + "epoch": 0.46889400921658986, + "grad_norm": 0.8995005169228616, + "learning_rate": 1.8198919126748056e-06, + "loss": 0.9357708692550659, + "step": 2035 + }, + { + "epoch": 0.46912442396313364, + "grad_norm": 0.8238296907521364, + "learning_rate": 1.819673732947017e-06, + "loss": 0.8188502788543701, + "step": 2036 + }, + { + "epoch": 0.4693548387096774, + "grad_norm": 1.0258349340262545, + "learning_rate": 1.8194554342446712e-06, + "loss": 0.81590735912323, + "step": 2037 + }, + { + "epoch": 0.4695852534562212, + "grad_norm": 0.811644542087897, + "learning_rate": 1.8192370165994544e-06, + "loss": 0.6879743933677673, + "step": 2038 + }, + { + "epoch": 0.469815668202765, + "grad_norm": 0.8669848845646889, + "learning_rate": 1.8190184800430686e-06, + "loss": 0.9287742376327515, + "step": 2039 + }, + { + "epoch": 0.4700460829493088, + "grad_norm": 0.9807524438459786, + "learning_rate": 1.818799824607235e-06, + "loss": 0.9625484943389893, + "step": 2040 + }, + { + "epoch": 0.47027649769585256, + "grad_norm": 0.8259194997097902, + "learning_rate": 1.8185810503236904e-06, + "loss": 0.8267782926559448, + "step": 2041 + }, + { + "epoch": 0.4705069124423963, + "grad_norm": 0.8404148332122154, + "learning_rate": 1.8183621572241904e-06, + "loss": 0.8827054500579834, + "step": 2042 + }, + { + "epoch": 0.47073732718894007, + "grad_norm": 0.7550183773883651, + "learning_rate": 1.8181431453405067e-06, + "loss": 0.7755721807479858, + "step": 2043 + }, + { + "epoch": 0.47096774193548385, + "grad_norm": 0.9234865066349518, + "learning_rate": 1.8179240147044285e-06, + "loss": 0.8320283889770508, + "step": 2044 + }, + { + "epoch": 0.47119815668202764, + "grad_norm": 0.7077773446032107, + "learning_rate": 1.8177047653477619e-06, + "loss": 0.8737574815750122, + "step": 2045 + }, + { + "epoch": 0.4714285714285714, + "grad_norm": 0.8821209974643925, + "learning_rate": 1.8174853973023317e-06, + "loss": 0.7007719278335571, + "step": 2046 + }, + { + "epoch": 0.4716589861751152, + "grad_norm": 0.822666216900424, + "learning_rate": 1.817265910599978e-06, + "loss": 0.8062577247619629, + "step": 2047 + }, + { + "epoch": 0.471889400921659, + "grad_norm": 0.6775605665320994, + "learning_rate": 1.8170463052725594e-06, + "loss": 0.7059667110443115, + "step": 2048 + }, + { + "epoch": 0.47211981566820277, + "grad_norm": 0.7830423922028903, + "learning_rate": 1.816826581351951e-06, + "loss": 0.9025841951370239, + "step": 2049 + }, + { + "epoch": 0.47235023041474655, + "grad_norm": 0.8388278274768075, + "learning_rate": 1.8166067388700458e-06, + "loss": 0.7534186840057373, + "step": 2050 + }, + { + "epoch": 0.47258064516129034, + "grad_norm": 0.7623620329649421, + "learning_rate": 1.8163867778587534e-06, + "loss": 0.9447616338729858, + "step": 2051 + }, + { + "epoch": 0.4728110599078341, + "grad_norm": 0.6423913345578718, + "learning_rate": 1.8161666983500012e-06, + "loss": 0.7092128992080688, + "step": 2052 + }, + { + "epoch": 0.4730414746543779, + "grad_norm": 0.8648864734786782, + "learning_rate": 1.815946500375733e-06, + "loss": 0.8689497113227844, + "step": 2053 + }, + { + "epoch": 0.4732718894009217, + "grad_norm": 0.8941588190294093, + "learning_rate": 1.8157261839679105e-06, + "loss": 0.9298638105392456, + "step": 2054 + }, + { + "epoch": 0.4735023041474654, + "grad_norm": 0.6527064378770876, + "learning_rate": 1.8155057491585125e-06, + "loss": 0.7138030529022217, + "step": 2055 + }, + { + "epoch": 0.4737327188940092, + "grad_norm": 0.6699370139228978, + "learning_rate": 1.815285195979534e-06, + "loss": 0.825221836566925, + "step": 2056 + }, + { + "epoch": 0.473963133640553, + "grad_norm": 0.8559190132682327, + "learning_rate": 1.8150645244629891e-06, + "loss": 0.8643208742141724, + "step": 2057 + }, + { + "epoch": 0.47419354838709676, + "grad_norm": 0.8338353738235549, + "learning_rate": 1.8148437346409073e-06, + "loss": 0.9611828327178955, + "step": 2058 + }, + { + "epoch": 0.47442396313364055, + "grad_norm": 0.8119567978397472, + "learning_rate": 1.8146228265453363e-06, + "loss": 0.8609912991523743, + "step": 2059 + }, + { + "epoch": 0.47465437788018433, + "grad_norm": 0.7540582566966652, + "learning_rate": 1.8144018002083404e-06, + "loss": 0.8277603387832642, + "step": 2060 + }, + { + "epoch": 0.4748847926267281, + "grad_norm": 0.8438703930452028, + "learning_rate": 1.814180655662001e-06, + "loss": 0.8601360321044922, + "step": 2061 + }, + { + "epoch": 0.4751152073732719, + "grad_norm": 0.7023202538855939, + "learning_rate": 1.8139593929384178e-06, + "loss": 0.8454653024673462, + "step": 2062 + }, + { + "epoch": 0.4753456221198157, + "grad_norm": 0.8270167900724995, + "learning_rate": 1.8137380120697059e-06, + "loss": 0.870082437992096, + "step": 2063 + }, + { + "epoch": 0.47557603686635946, + "grad_norm": 0.8497953303327396, + "learning_rate": 1.8135165130879988e-06, + "loss": 0.8064073324203491, + "step": 2064 + }, + { + "epoch": 0.47580645161290325, + "grad_norm": 0.5532170457954219, + "learning_rate": 1.813294896025447e-06, + "loss": 0.829608678817749, + "step": 2065 + }, + { + "epoch": 0.47603686635944703, + "grad_norm": 0.7131662100806325, + "learning_rate": 1.8130731609142176e-06, + "loss": 0.8185791969299316, + "step": 2066 + }, + { + "epoch": 0.47626728110599076, + "grad_norm": 0.9405207635689381, + "learning_rate": 1.812851307786495e-06, + "loss": 0.8855293989181519, + "step": 2067 + }, + { + "epoch": 0.47649769585253454, + "grad_norm": 0.6766659884445188, + "learning_rate": 1.8126293366744815e-06, + "loss": 0.7495461106300354, + "step": 2068 + }, + { + "epoch": 0.4767281105990783, + "grad_norm": 0.9706294845402844, + "learning_rate": 1.8124072476103956e-06, + "loss": 0.9435098171234131, + "step": 2069 + }, + { + "epoch": 0.4769585253456221, + "grad_norm": 0.7637936743615437, + "learning_rate": 1.8121850406264727e-06, + "loss": 0.9299448728561401, + "step": 2070 + }, + { + "epoch": 0.4771889400921659, + "grad_norm": 0.9500813357187163, + "learning_rate": 1.8119627157549665e-06, + "loss": 0.9011991024017334, + "step": 2071 + }, + { + "epoch": 0.4774193548387097, + "grad_norm": 0.6847341374863515, + "learning_rate": 1.8117402730281476e-06, + "loss": 0.7326598167419434, + "step": 2072 + }, + { + "epoch": 0.47764976958525346, + "grad_norm": 0.7364560962143368, + "learning_rate": 1.8115177124783024e-06, + "loss": 0.8137445449829102, + "step": 2073 + }, + { + "epoch": 0.47788018433179724, + "grad_norm": 0.9429635333298672, + "learning_rate": 1.811295034137735e-06, + "loss": 0.8653519153594971, + "step": 2074 + }, + { + "epoch": 0.478110599078341, + "grad_norm": 0.8511205154632088, + "learning_rate": 1.811072238038768e-06, + "loss": 0.9140677452087402, + "step": 2075 + }, + { + "epoch": 0.4783410138248848, + "grad_norm": 0.8012710450337872, + "learning_rate": 1.810849324213739e-06, + "loss": 0.8878934979438782, + "step": 2076 + }, + { + "epoch": 0.4785714285714286, + "grad_norm": 0.6571390792752639, + "learning_rate": 1.8106262926950045e-06, + "loss": 0.8238190412521362, + "step": 2077 + }, + { + "epoch": 0.4788018433179724, + "grad_norm": 0.8097531572330602, + "learning_rate": 1.8104031435149362e-06, + "loss": 0.7722488641738892, + "step": 2078 + }, + { + "epoch": 0.4790322580645161, + "grad_norm": 0.890992078514086, + "learning_rate": 1.8101798767059248e-06, + "loss": 0.9338192939758301, + "step": 2079 + }, + { + "epoch": 0.4792626728110599, + "grad_norm": 0.8000986035452533, + "learning_rate": 1.8099564923003767e-06, + "loss": 0.7342168688774109, + "step": 2080 + }, + { + "epoch": 0.47949308755760367, + "grad_norm": 0.7644530181466097, + "learning_rate": 1.809732990330716e-06, + "loss": 0.8445772528648376, + "step": 2081 + }, + { + "epoch": 0.47972350230414745, + "grad_norm": 0.7291725333905612, + "learning_rate": 1.8095093708293839e-06, + "loss": 0.825678825378418, + "step": 2082 + }, + { + "epoch": 0.47995391705069124, + "grad_norm": 0.8072481370959372, + "learning_rate": 1.8092856338288381e-06, + "loss": 0.7995405197143555, + "step": 2083 + }, + { + "epoch": 0.480184331797235, + "grad_norm": 0.8193777121106555, + "learning_rate": 1.8090617793615536e-06, + "loss": 0.7811745405197144, + "step": 2084 + }, + { + "epoch": 0.4804147465437788, + "grad_norm": 0.7364459454678961, + "learning_rate": 1.8088378074600231e-06, + "loss": 0.842727780342102, + "step": 2085 + }, + { + "epoch": 0.4806451612903226, + "grad_norm": 0.7640299868769393, + "learning_rate": 1.808613718156756e-06, + "loss": 0.840941309928894, + "step": 2086 + }, + { + "epoch": 0.48087557603686637, + "grad_norm": 0.7783965916533324, + "learning_rate": 1.808389511484278e-06, + "loss": 0.9024466872215271, + "step": 2087 + }, + { + "epoch": 0.48110599078341015, + "grad_norm": 0.8943218774431004, + "learning_rate": 1.8081651874751325e-06, + "loss": 0.9112771153450012, + "step": 2088 + }, + { + "epoch": 0.48133640552995394, + "grad_norm": 0.6675207900987881, + "learning_rate": 1.8079407461618797e-06, + "loss": 0.834719181060791, + "step": 2089 + }, + { + "epoch": 0.4815668202764977, + "grad_norm": 0.8421358450475633, + "learning_rate": 1.8077161875770971e-06, + "loss": 0.8472555875778198, + "step": 2090 + }, + { + "epoch": 0.48179723502304145, + "grad_norm": 0.7303169649115268, + "learning_rate": 1.8074915117533796e-06, + "loss": 0.8459140062332153, + "step": 2091 + }, + { + "epoch": 0.48202764976958523, + "grad_norm": 0.6945162401362365, + "learning_rate": 1.807266718723338e-06, + "loss": 0.6570066213607788, + "step": 2092 + }, + { + "epoch": 0.482258064516129, + "grad_norm": 0.7314212575092469, + "learning_rate": 1.8070418085196006e-06, + "loss": 0.8897342681884766, + "step": 2093 + }, + { + "epoch": 0.4824884792626728, + "grad_norm": 0.8312385191950623, + "learning_rate": 1.8068167811748132e-06, + "loss": 0.8339060544967651, + "step": 2094 + }, + { + "epoch": 0.4827188940092166, + "grad_norm": 0.7547678583050421, + "learning_rate": 1.8065916367216383e-06, + "loss": 0.7972484827041626, + "step": 2095 + }, + { + "epoch": 0.48294930875576036, + "grad_norm": 0.7424060773179767, + "learning_rate": 1.806366375192755e-06, + "loss": 0.7894760966300964, + "step": 2096 + }, + { + "epoch": 0.48317972350230415, + "grad_norm": 0.7408232706643347, + "learning_rate": 1.8061409966208597e-06, + "loss": 0.713944673538208, + "step": 2097 + }, + { + "epoch": 0.48341013824884793, + "grad_norm": 0.8423029874540192, + "learning_rate": 1.8059155010386662e-06, + "loss": 0.7832180261611938, + "step": 2098 + }, + { + "epoch": 0.4836405529953917, + "grad_norm": 0.6563887159918735, + "learning_rate": 1.8056898884789043e-06, + "loss": 0.8873809576034546, + "step": 2099 + }, + { + "epoch": 0.4838709677419355, + "grad_norm": 0.8864132111812594, + "learning_rate": 1.8054641589743218e-06, + "loss": 0.8174929618835449, + "step": 2100 + }, + { + "epoch": 0.4841013824884793, + "grad_norm": 0.6797946394214075, + "learning_rate": 1.805238312557683e-06, + "loss": 0.876921534538269, + "step": 2101 + }, + { + "epoch": 0.48433179723502306, + "grad_norm": 0.7629892942789464, + "learning_rate": 1.8050123492617693e-06, + "loss": 0.9455937147140503, + "step": 2102 + }, + { + "epoch": 0.4845622119815668, + "grad_norm": 0.6880522665173857, + "learning_rate": 1.8047862691193784e-06, + "loss": 0.8146508932113647, + "step": 2103 + }, + { + "epoch": 0.4847926267281106, + "grad_norm": 0.762873599305404, + "learning_rate": 1.8045600721633262e-06, + "loss": 0.8513495326042175, + "step": 2104 + }, + { + "epoch": 0.48502304147465436, + "grad_norm": 0.8329533644475985, + "learning_rate": 1.8043337584264443e-06, + "loss": 0.8430027961730957, + "step": 2105 + }, + { + "epoch": 0.48525345622119814, + "grad_norm": 0.6323595862794837, + "learning_rate": 1.8041073279415826e-06, + "loss": 0.7683960199356079, + "step": 2106 + }, + { + "epoch": 0.4854838709677419, + "grad_norm": 0.6620613064117244, + "learning_rate": 1.8038807807416067e-06, + "loss": 0.7099664211273193, + "step": 2107 + }, + { + "epoch": 0.4857142857142857, + "grad_norm": 0.725415262213876, + "learning_rate": 1.8036541168593994e-06, + "loss": 0.8046330213546753, + "step": 2108 + }, + { + "epoch": 0.4859447004608295, + "grad_norm": 0.7817858416968994, + "learning_rate": 1.803427336327861e-06, + "loss": 0.8387504816055298, + "step": 2109 + }, + { + "epoch": 0.4861751152073733, + "grad_norm": 0.7135784962709865, + "learning_rate": 1.8032004391799085e-06, + "loss": 0.883955717086792, + "step": 2110 + }, + { + "epoch": 0.48640552995391706, + "grad_norm": 0.7408960119431725, + "learning_rate": 1.8029734254484756e-06, + "loss": 0.7622070908546448, + "step": 2111 + }, + { + "epoch": 0.48663594470046084, + "grad_norm": 0.7726145388563513, + "learning_rate": 1.802746295166513e-06, + "loss": 0.6625584363937378, + "step": 2112 + }, + { + "epoch": 0.4868663594470046, + "grad_norm": 0.8189497209718242, + "learning_rate": 1.8025190483669878e-06, + "loss": 0.8232327699661255, + "step": 2113 + }, + { + "epoch": 0.4870967741935484, + "grad_norm": 0.8528139298235252, + "learning_rate": 1.8022916850828857e-06, + "loss": 0.9083148241043091, + "step": 2114 + }, + { + "epoch": 0.4873271889400922, + "grad_norm": 0.7392938308731752, + "learning_rate": 1.8020642053472074e-06, + "loss": 0.8248398303985596, + "step": 2115 + }, + { + "epoch": 0.4875576036866359, + "grad_norm": 0.7121240208517446, + "learning_rate": 1.8018366091929717e-06, + "loss": 0.8055423498153687, + "step": 2116 + }, + { + "epoch": 0.4877880184331797, + "grad_norm": 0.778973471543998, + "learning_rate": 1.8016088966532135e-06, + "loss": 0.8716787695884705, + "step": 2117 + }, + { + "epoch": 0.4880184331797235, + "grad_norm": 0.7561230225795058, + "learning_rate": 1.801381067760985e-06, + "loss": 0.8530780673027039, + "step": 2118 + }, + { + "epoch": 0.48824884792626727, + "grad_norm": 0.6774037273322415, + "learning_rate": 1.8011531225493557e-06, + "loss": 0.7958484888076782, + "step": 2119 + }, + { + "epoch": 0.48847926267281105, + "grad_norm": 0.8596146173926187, + "learning_rate": 1.800925061051411e-06, + "loss": 0.8312872648239136, + "step": 2120 + }, + { + "epoch": 0.48870967741935484, + "grad_norm": 0.8135900564482533, + "learning_rate": 1.8006968833002541e-06, + "loss": 0.8097391128540039, + "step": 2121 + }, + { + "epoch": 0.4889400921658986, + "grad_norm": 0.9139337120301166, + "learning_rate": 1.8004685893290046e-06, + "loss": 0.8636112213134766, + "step": 2122 + }, + { + "epoch": 0.4891705069124424, + "grad_norm": 0.9088930992891967, + "learning_rate": 1.800240179170799e-06, + "loss": 0.9122721552848816, + "step": 2123 + }, + { + "epoch": 0.4894009216589862, + "grad_norm": 0.914017678688966, + "learning_rate": 1.8000116528587907e-06, + "loss": 0.8172330856323242, + "step": 2124 + }, + { + "epoch": 0.48963133640552997, + "grad_norm": 0.8007018337125341, + "learning_rate": 1.7997830104261502e-06, + "loss": 0.7377575635910034, + "step": 2125 + }, + { + "epoch": 0.48986175115207375, + "grad_norm": 0.9218847107737449, + "learning_rate": 1.7995542519060644e-06, + "loss": 0.7278136014938354, + "step": 2126 + }, + { + "epoch": 0.49009216589861754, + "grad_norm": 0.8808842591031234, + "learning_rate": 1.7993253773317374e-06, + "loss": 0.8977715969085693, + "step": 2127 + }, + { + "epoch": 0.49032258064516127, + "grad_norm": 0.7019593909183576, + "learning_rate": 1.7990963867363902e-06, + "loss": 0.789979100227356, + "step": 2128 + }, + { + "epoch": 0.49055299539170505, + "grad_norm": 0.7069412826082713, + "learning_rate": 1.7988672801532602e-06, + "loss": 0.8304328322410583, + "step": 2129 + }, + { + "epoch": 0.49078341013824883, + "grad_norm": 0.7922910084647693, + "learning_rate": 1.7986380576156019e-06, + "loss": 0.7597516179084778, + "step": 2130 + }, + { + "epoch": 0.4910138248847926, + "grad_norm": 0.6007262757544611, + "learning_rate": 1.7984087191566873e-06, + "loss": 0.661639928817749, + "step": 2131 + }, + { + "epoch": 0.4912442396313364, + "grad_norm": 0.7484873666922557, + "learning_rate": 1.7981792648098035e-06, + "loss": 0.7871333360671997, + "step": 2132 + }, + { + "epoch": 0.4914746543778802, + "grad_norm": 0.7758289248832314, + "learning_rate": 1.7979496946082565e-06, + "loss": 0.8166402578353882, + "step": 2133 + }, + { + "epoch": 0.49170506912442397, + "grad_norm": 0.6906377275927077, + "learning_rate": 1.7977200085853674e-06, + "loss": 0.7112412452697754, + "step": 2134 + }, + { + "epoch": 0.49193548387096775, + "grad_norm": 0.8103572300867555, + "learning_rate": 1.7974902067744752e-06, + "loss": 0.8358132839202881, + "step": 2135 + }, + { + "epoch": 0.49216589861751153, + "grad_norm": 0.7103875590554449, + "learning_rate": 1.7972602892089353e-06, + "loss": 0.8544377088546753, + "step": 2136 + }, + { + "epoch": 0.4923963133640553, + "grad_norm": 0.9004573017295656, + "learning_rate": 1.7970302559221197e-06, + "loss": 1.0105161666870117, + "step": 2137 + }, + { + "epoch": 0.4926267281105991, + "grad_norm": 0.7525179633837843, + "learning_rate": 1.7968001069474176e-06, + "loss": 0.7666197419166565, + "step": 2138 + }, + { + "epoch": 0.4928571428571429, + "grad_norm": 0.9209694432294897, + "learning_rate": 1.7965698423182349e-06, + "loss": 0.9250742197036743, + "step": 2139 + }, + { + "epoch": 0.4930875576036866, + "grad_norm": 0.8066717978287462, + "learning_rate": 1.7963394620679942e-06, + "loss": 0.8269995450973511, + "step": 2140 + }, + { + "epoch": 0.4933179723502304, + "grad_norm": 0.9533305612537857, + "learning_rate": 1.7961089662301346e-06, + "loss": 1.0431339740753174, + "step": 2141 + }, + { + "epoch": 0.4935483870967742, + "grad_norm": 0.7107784117562762, + "learning_rate": 1.7958783548381125e-06, + "loss": 0.7474809288978577, + "step": 2142 + }, + { + "epoch": 0.49377880184331796, + "grad_norm": 0.7729911498332706, + "learning_rate": 1.7956476279254007e-06, + "loss": 0.8850520849227905, + "step": 2143 + }, + { + "epoch": 0.49400921658986174, + "grad_norm": 0.8566824172714074, + "learning_rate": 1.7954167855254893e-06, + "loss": 0.8898880481719971, + "step": 2144 + }, + { + "epoch": 0.4942396313364055, + "grad_norm": 0.886855392770134, + "learning_rate": 1.7951858276718842e-06, + "loss": 0.8718239068984985, + "step": 2145 + }, + { + "epoch": 0.4944700460829493, + "grad_norm": 0.7604278475621951, + "learning_rate": 1.794954754398109e-06, + "loss": 0.8407484292984009, + "step": 2146 + }, + { + "epoch": 0.4947004608294931, + "grad_norm": 0.9582215314216729, + "learning_rate": 1.7947235657377036e-06, + "loss": 0.8453764915466309, + "step": 2147 + }, + { + "epoch": 0.4949308755760369, + "grad_norm": 0.6332693049941237, + "learning_rate": 1.794492261724225e-06, + "loss": 0.5795568227767944, + "step": 2148 + }, + { + "epoch": 0.49516129032258066, + "grad_norm": 0.9864343717736791, + "learning_rate": 1.794260842391246e-06, + "loss": 0.8601347208023071, + "step": 2149 + }, + { + "epoch": 0.49539170506912444, + "grad_norm": 0.8909931853274754, + "learning_rate": 1.7940293077723573e-06, + "loss": 0.8328324556350708, + "step": 2150 + }, + { + "epoch": 0.4956221198156682, + "grad_norm": 0.6691517417241877, + "learning_rate": 1.7937976579011655e-06, + "loss": 0.8924463391304016, + "step": 2151 + }, + { + "epoch": 0.49585253456221196, + "grad_norm": 0.7983254161536232, + "learning_rate": 1.7935658928112947e-06, + "loss": 0.9725968837738037, + "step": 2152 + }, + { + "epoch": 0.49608294930875574, + "grad_norm": 0.7649378566504706, + "learning_rate": 1.7933340125363855e-06, + "loss": 0.7814322710037231, + "step": 2153 + }, + { + "epoch": 0.4963133640552995, + "grad_norm": 0.795129549448148, + "learning_rate": 1.793102017110094e-06, + "loss": 0.8022886514663696, + "step": 2154 + }, + { + "epoch": 0.4965437788018433, + "grad_norm": 0.9455352743035539, + "learning_rate": 1.7928699065660951e-06, + "loss": 0.9747333526611328, + "step": 2155 + }, + { + "epoch": 0.4967741935483871, + "grad_norm": 1.0353782305768249, + "learning_rate": 1.7926376809380783e-06, + "loss": 0.9039797782897949, + "step": 2156 + }, + { + "epoch": 0.49700460829493087, + "grad_norm": 1.000992925643121, + "learning_rate": 1.7924053402597518e-06, + "loss": 0.9444677829742432, + "step": 2157 + }, + { + "epoch": 0.49723502304147466, + "grad_norm": 0.7688551400180308, + "learning_rate": 1.7921728845648393e-06, + "loss": 0.8442031741142273, + "step": 2158 + }, + { + "epoch": 0.49746543778801844, + "grad_norm": 0.8590371435800439, + "learning_rate": 1.7919403138870813e-06, + "loss": 0.9410362839698792, + "step": 2159 + }, + { + "epoch": 0.4976958525345622, + "grad_norm": 0.8168398725206235, + "learning_rate": 1.791707628260235e-06, + "loss": 0.8929172158241272, + "step": 2160 + }, + { + "epoch": 0.497926267281106, + "grad_norm": 0.970370102226972, + "learning_rate": 1.7914748277180745e-06, + "loss": 0.9259560108184814, + "step": 2161 + }, + { + "epoch": 0.4981566820276498, + "grad_norm": 0.7778204252845836, + "learning_rate": 1.7912419122943904e-06, + "loss": 0.8201638460159302, + "step": 2162 + }, + { + "epoch": 0.49838709677419357, + "grad_norm": 0.7628075269760098, + "learning_rate": 1.7910088820229907e-06, + "loss": 0.7554556131362915, + "step": 2163 + }, + { + "epoch": 0.4986175115207373, + "grad_norm": 0.7698860809397133, + "learning_rate": 1.7907757369376984e-06, + "loss": 0.8206801414489746, + "step": 2164 + }, + { + "epoch": 0.4988479262672811, + "grad_norm": 0.7606971261006891, + "learning_rate": 1.7905424770723551e-06, + "loss": 0.765400767326355, + "step": 2165 + }, + { + "epoch": 0.49907834101382487, + "grad_norm": 0.9629614917036793, + "learning_rate": 1.7903091024608177e-06, + "loss": 0.9191527366638184, + "step": 2166 + }, + { + "epoch": 0.49930875576036865, + "grad_norm": 1.0883591834210613, + "learning_rate": 1.7900756131369601e-06, + "loss": 0.8515042662620544, + "step": 2167 + }, + { + "epoch": 0.49953917050691243, + "grad_norm": 0.7623230395498896, + "learning_rate": 1.7898420091346736e-06, + "loss": 0.8509752750396729, + "step": 2168 + }, + { + "epoch": 0.4997695852534562, + "grad_norm": 0.7417934516303272, + "learning_rate": 1.7896082904878647e-06, + "loss": 0.8007084131240845, + "step": 2169 + }, + { + "epoch": 0.5, + "grad_norm": 0.8597818097533757, + "learning_rate": 1.789374457230458e-06, + "loss": 0.8395413756370544, + "step": 2170 + }, + { + "epoch": 0.5002304147465437, + "grad_norm": 0.7232889708808644, + "learning_rate": 1.7891405093963937e-06, + "loss": 0.8624853491783142, + "step": 2171 + }, + { + "epoch": 0.5004608294930876, + "grad_norm": 0.6629899968556545, + "learning_rate": 1.788906447019629e-06, + "loss": 0.8141548037528992, + "step": 2172 + }, + { + "epoch": 0.5006912442396313, + "grad_norm": 0.6495144260680482, + "learning_rate": 1.7886722701341382e-06, + "loss": 0.6764500141143799, + "step": 2173 + }, + { + "epoch": 0.5009216589861751, + "grad_norm": 0.6701022764652186, + "learning_rate": 1.7884379787739112e-06, + "loss": 0.710756778717041, + "step": 2174 + }, + { + "epoch": 0.5011520737327189, + "grad_norm": 0.8273999117205362, + "learning_rate": 1.7882035729729555e-06, + "loss": 0.8090574145317078, + "step": 2175 + }, + { + "epoch": 0.5013824884792627, + "grad_norm": 0.6977221855783239, + "learning_rate": 1.7879690527652943e-06, + "loss": 0.7639138102531433, + "step": 2176 + }, + { + "epoch": 0.5016129032258064, + "grad_norm": 0.9185836860641033, + "learning_rate": 1.7877344181849687e-06, + "loss": 0.8093903660774231, + "step": 2177 + }, + { + "epoch": 0.5018433179723503, + "grad_norm": 0.7610855435865236, + "learning_rate": 1.7874996692660348e-06, + "loss": 0.8705824017524719, + "step": 2178 + }, + { + "epoch": 0.502073732718894, + "grad_norm": 0.7815265219501579, + "learning_rate": 1.7872648060425666e-06, + "loss": 0.7365947961807251, + "step": 2179 + }, + { + "epoch": 0.5023041474654378, + "grad_norm": 0.8989287933893153, + "learning_rate": 1.787029828548654e-06, + "loss": 0.9405299425125122, + "step": 2180 + }, + { + "epoch": 0.5025345622119816, + "grad_norm": 0.907417749032586, + "learning_rate": 1.7867947368184036e-06, + "loss": 0.9232017993927002, + "step": 2181 + }, + { + "epoch": 0.5027649769585254, + "grad_norm": 1.0801728154122552, + "learning_rate": 1.7865595308859388e-06, + "loss": 0.9941537380218506, + "step": 2182 + }, + { + "epoch": 0.5029953917050691, + "grad_norm": 0.7341611336832391, + "learning_rate": 1.7863242107853993e-06, + "loss": 0.6981802582740784, + "step": 2183 + }, + { + "epoch": 0.5032258064516129, + "grad_norm": 0.8346521198909456, + "learning_rate": 1.7860887765509417e-06, + "loss": 0.8155109882354736, + "step": 2184 + }, + { + "epoch": 0.5034562211981567, + "grad_norm": 0.8846374910749497, + "learning_rate": 1.7858532282167385e-06, + "loss": 0.7246255874633789, + "step": 2185 + }, + { + "epoch": 0.5036866359447004, + "grad_norm": 0.7027049895049993, + "learning_rate": 1.7856175658169796e-06, + "loss": 0.7042064666748047, + "step": 2186 + }, + { + "epoch": 0.5039170506912443, + "grad_norm": 0.8633735424450812, + "learning_rate": 1.7853817893858714e-06, + "loss": 0.7522145509719849, + "step": 2187 + }, + { + "epoch": 0.504147465437788, + "grad_norm": 0.8170927084265063, + "learning_rate": 1.7851458989576359e-06, + "loss": 1.0157709121704102, + "step": 2188 + }, + { + "epoch": 0.5043778801843318, + "grad_norm": 0.8537305826863457, + "learning_rate": 1.7849098945665127e-06, + "loss": 0.7096433639526367, + "step": 2189 + }, + { + "epoch": 0.5046082949308756, + "grad_norm": 0.8293401368813538, + "learning_rate": 1.7846737762467572e-06, + "loss": 0.7743037939071655, + "step": 2190 + }, + { + "epoch": 0.5048387096774194, + "grad_norm": 0.802261593558941, + "learning_rate": 1.784437544032642e-06, + "loss": 0.7907241582870483, + "step": 2191 + }, + { + "epoch": 0.5050691244239631, + "grad_norm": 0.9488985791352184, + "learning_rate": 1.7842011979584557e-06, + "loss": 0.8692185878753662, + "step": 2192 + }, + { + "epoch": 0.505299539170507, + "grad_norm": 1.0636987469588612, + "learning_rate": 1.783964738058504e-06, + "loss": 0.9678715467453003, + "step": 2193 + }, + { + "epoch": 0.5055299539170507, + "grad_norm": 0.7713527005281836, + "learning_rate": 1.7837281643671077e-06, + "loss": 0.855170726776123, + "step": 2194 + }, + { + "epoch": 0.5057603686635944, + "grad_norm": 0.7469430705420217, + "learning_rate": 1.7834914769186065e-06, + "loss": 0.8452733755111694, + "step": 2195 + }, + { + "epoch": 0.5059907834101383, + "grad_norm": 0.6866121153572871, + "learning_rate": 1.7832546757473543e-06, + "loss": 0.7517217397689819, + "step": 2196 + }, + { + "epoch": 0.506221198156682, + "grad_norm": 0.7453227048555126, + "learning_rate": 1.783017760887723e-06, + "loss": 0.6971632838249207, + "step": 2197 + }, + { + "epoch": 0.5064516129032258, + "grad_norm": 0.7964964192157018, + "learning_rate": 1.7827807323741002e-06, + "loss": 0.8638256192207336, + "step": 2198 + }, + { + "epoch": 0.5066820276497696, + "grad_norm": 0.7941877452524988, + "learning_rate": 1.7825435902408903e-06, + "loss": 0.8410143256187439, + "step": 2199 + }, + { + "epoch": 0.5069124423963134, + "grad_norm": 0.7902588767037179, + "learning_rate": 1.7823063345225143e-06, + "loss": 0.8127691745758057, + "step": 2200 + }, + { + "epoch": 0.5071428571428571, + "grad_norm": 0.7618481515663807, + "learning_rate": 1.7820689652534096e-06, + "loss": 0.7351404428482056, + "step": 2201 + }, + { + "epoch": 0.507373271889401, + "grad_norm": 0.6691944306500267, + "learning_rate": 1.7818314824680298e-06, + "loss": 0.7258716821670532, + "step": 2202 + }, + { + "epoch": 0.5076036866359447, + "grad_norm": 1.0029859864492747, + "learning_rate": 1.7815938862008454e-06, + "loss": 0.9509599208831787, + "step": 2203 + }, + { + "epoch": 0.5078341013824885, + "grad_norm": 0.7738532710061052, + "learning_rate": 1.7813561764863429e-06, + "loss": 0.8600929379463196, + "step": 2204 + }, + { + "epoch": 0.5080645161290323, + "grad_norm": 0.9689099485850551, + "learning_rate": 1.7811183533590257e-06, + "loss": 0.8688119649887085, + "step": 2205 + }, + { + "epoch": 0.5082949308755761, + "grad_norm": 0.7599344683888546, + "learning_rate": 1.780880416853414e-06, + "loss": 0.8447986841201782, + "step": 2206 + }, + { + "epoch": 0.5085253456221198, + "grad_norm": 0.6953642388755117, + "learning_rate": 1.7806423670040433e-06, + "loss": 0.8262573480606079, + "step": 2207 + }, + { + "epoch": 0.5087557603686635, + "grad_norm": 0.7640117945069856, + "learning_rate": 1.7804042038454666e-06, + "loss": 0.9534487724304199, + "step": 2208 + }, + { + "epoch": 0.5089861751152074, + "grad_norm": 0.7513792438385134, + "learning_rate": 1.7801659274122527e-06, + "loss": 0.7712565064430237, + "step": 2209 + }, + { + "epoch": 0.5092165898617511, + "grad_norm": 0.8714588056175714, + "learning_rate": 1.7799275377389873e-06, + "loss": 0.8190760016441345, + "step": 2210 + }, + { + "epoch": 0.509447004608295, + "grad_norm": 0.9379540710774249, + "learning_rate": 1.7796890348602722e-06, + "loss": 0.8647592067718506, + "step": 2211 + }, + { + "epoch": 0.5096774193548387, + "grad_norm": 0.7912467632232041, + "learning_rate": 1.7794504188107257e-06, + "loss": 0.7788198590278625, + "step": 2212 + }, + { + "epoch": 0.5099078341013825, + "grad_norm": 0.7053754197084299, + "learning_rate": 1.779211689624983e-06, + "loss": 0.8610718250274658, + "step": 2213 + }, + { + "epoch": 0.5101382488479262, + "grad_norm": 0.7783569383566119, + "learning_rate": 1.7789728473376952e-06, + "loss": 0.832200825214386, + "step": 2214 + }, + { + "epoch": 0.5103686635944701, + "grad_norm": 0.7823482622118234, + "learning_rate": 1.7787338919835298e-06, + "loss": 0.7325488328933716, + "step": 2215 + }, + { + "epoch": 0.5105990783410138, + "grad_norm": 0.8903627357495159, + "learning_rate": 1.7784948235971707e-06, + "loss": 0.8038203716278076, + "step": 2216 + }, + { + "epoch": 0.5108294930875577, + "grad_norm": 0.6275186054972087, + "learning_rate": 1.7782556422133185e-06, + "loss": 0.7016317248344421, + "step": 2217 + }, + { + "epoch": 0.5110599078341014, + "grad_norm": 0.8951545762278973, + "learning_rate": 1.7780163478666905e-06, + "loss": 0.7964655160903931, + "step": 2218 + }, + { + "epoch": 0.5112903225806451, + "grad_norm": 0.7709224710894249, + "learning_rate": 1.777776940592019e-06, + "loss": 0.6681785583496094, + "step": 2219 + }, + { + "epoch": 0.511520737327189, + "grad_norm": 0.8934880823893885, + "learning_rate": 1.7775374204240547e-06, + "loss": 0.835777759552002, + "step": 2220 + }, + { + "epoch": 0.5117511520737327, + "grad_norm": 1.0248178001051076, + "learning_rate": 1.777297787397563e-06, + "loss": 0.9442443251609802, + "step": 2221 + }, + { + "epoch": 0.5119815668202765, + "grad_norm": 1.072158922361294, + "learning_rate": 1.7770580415473267e-06, + "loss": 0.9351231455802917, + "step": 2222 + }, + { + "epoch": 0.5122119815668202, + "grad_norm": 0.878332211622375, + "learning_rate": 1.776818182908144e-06, + "loss": 0.7238374352455139, + "step": 2223 + }, + { + "epoch": 0.5124423963133641, + "grad_norm": 0.7001659306792695, + "learning_rate": 1.7765782115148308e-06, + "loss": 0.8206230998039246, + "step": 2224 + }, + { + "epoch": 0.5126728110599078, + "grad_norm": 0.6546302150578799, + "learning_rate": 1.7763381274022176e-06, + "loss": 0.748784065246582, + "step": 2225 + }, + { + "epoch": 0.5129032258064516, + "grad_norm": 0.7566703422977776, + "learning_rate": 1.7760979306051533e-06, + "loss": 0.7980858087539673, + "step": 2226 + }, + { + "epoch": 0.5131336405529954, + "grad_norm": 0.8877968508757134, + "learning_rate": 1.7758576211585018e-06, + "loss": 0.8631168603897095, + "step": 2227 + }, + { + "epoch": 0.5133640552995392, + "grad_norm": 0.7405217897025548, + "learning_rate": 1.7756171990971441e-06, + "loss": 0.9405999779701233, + "step": 2228 + }, + { + "epoch": 0.5135944700460829, + "grad_norm": 0.8867257371824923, + "learning_rate": 1.7753766644559763e-06, + "loss": 0.9055094718933105, + "step": 2229 + }, + { + "epoch": 0.5138248847926268, + "grad_norm": 0.827493910498757, + "learning_rate": 1.775136017269912e-06, + "loss": 0.7583146691322327, + "step": 2230 + }, + { + "epoch": 0.5140552995391705, + "grad_norm": 0.8689067612775456, + "learning_rate": 1.7748952575738811e-06, + "loss": 0.8728743195533752, + "step": 2231 + }, + { + "epoch": 0.5142857142857142, + "grad_norm": 0.7067707521741841, + "learning_rate": 1.7746543854028295e-06, + "loss": 0.8133460283279419, + "step": 2232 + }, + { + "epoch": 0.5145161290322581, + "grad_norm": 0.7177694794353267, + "learning_rate": 1.7744134007917194e-06, + "loss": 0.8389721512794495, + "step": 2233 + }, + { + "epoch": 0.5147465437788018, + "grad_norm": 0.9617522193850644, + "learning_rate": 1.774172303775529e-06, + "loss": 0.7016798257827759, + "step": 2234 + }, + { + "epoch": 0.5149769585253456, + "grad_norm": 0.7999711451764379, + "learning_rate": 1.7739310943892538e-06, + "loss": 0.7920540571212769, + "step": 2235 + }, + { + "epoch": 0.5152073732718894, + "grad_norm": 0.6990088891534603, + "learning_rate": 1.7736897726679048e-06, + "loss": 0.900149405002594, + "step": 2236 + }, + { + "epoch": 0.5154377880184332, + "grad_norm": 0.743220745754201, + "learning_rate": 1.7734483386465096e-06, + "loss": 0.8537915349006653, + "step": 2237 + }, + { + "epoch": 0.5156682027649769, + "grad_norm": 0.8134323205434837, + "learning_rate": 1.7732067923601121e-06, + "loss": 0.7418123483657837, + "step": 2238 + }, + { + "epoch": 0.5158986175115208, + "grad_norm": 1.108361921569266, + "learning_rate": 1.7729651338437721e-06, + "loss": 0.8890011310577393, + "step": 2239 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9841321811418366, + "learning_rate": 1.7727233631325663e-06, + "loss": 0.9082813262939453, + "step": 2240 + }, + { + "epoch": 0.5163594470046083, + "grad_norm": 0.9268737545625799, + "learning_rate": 1.7724814802615868e-06, + "loss": 0.8337695598602295, + "step": 2241 + }, + { + "epoch": 0.5165898617511521, + "grad_norm": 1.1037050608526282, + "learning_rate": 1.7722394852659437e-06, + "loss": 0.8990765810012817, + "step": 2242 + }, + { + "epoch": 0.5168202764976959, + "grad_norm": 0.8552834719912825, + "learning_rate": 1.7719973781807614e-06, + "loss": 0.720890998840332, + "step": 2243 + }, + { + "epoch": 0.5170506912442396, + "grad_norm": 0.6406815235154244, + "learning_rate": 1.7717551590411817e-06, + "loss": 0.7966938018798828, + "step": 2244 + }, + { + "epoch": 0.5172811059907834, + "grad_norm": 0.8614270693246835, + "learning_rate": 1.7715128278823622e-06, + "loss": 0.9290107488632202, + "step": 2245 + }, + { + "epoch": 0.5175115207373272, + "grad_norm": 0.8755598994931274, + "learning_rate": 1.771270384739477e-06, + "loss": 0.8388533592224121, + "step": 2246 + }, + { + "epoch": 0.5177419354838709, + "grad_norm": 0.8200932411512113, + "learning_rate": 1.7710278296477169e-06, + "loss": 0.8845043182373047, + "step": 2247 + }, + { + "epoch": 0.5179723502304148, + "grad_norm": 0.8499976704860752, + "learning_rate": 1.7707851626422875e-06, + "loss": 0.879709780216217, + "step": 2248 + }, + { + "epoch": 0.5182027649769585, + "grad_norm": 0.8407815201465851, + "learning_rate": 1.7705423837584123e-06, + "loss": 0.8215152025222778, + "step": 2249 + }, + { + "epoch": 0.5184331797235023, + "grad_norm": 0.8770027311962882, + "learning_rate": 1.7702994930313305e-06, + "loss": 0.8108627796173096, + "step": 2250 + }, + { + "epoch": 0.5186635944700461, + "grad_norm": 0.9106818329739914, + "learning_rate": 1.7700564904962966e-06, + "loss": 0.8391602039337158, + "step": 2251 + }, + { + "epoch": 0.5188940092165899, + "grad_norm": 0.82724043269172, + "learning_rate": 1.769813376188583e-06, + "loss": 0.8664923906326294, + "step": 2252 + }, + { + "epoch": 0.5191244239631336, + "grad_norm": 0.8478256896643234, + "learning_rate": 1.7695701501434765e-06, + "loss": 0.9670882821083069, + "step": 2253 + }, + { + "epoch": 0.5193548387096775, + "grad_norm": 0.8831524743377538, + "learning_rate": 1.7693268123962816e-06, + "loss": 0.946273684501648, + "step": 2254 + }, + { + "epoch": 0.5195852534562212, + "grad_norm": 0.7643743435262689, + "learning_rate": 1.7690833629823184e-06, + "loss": 0.9691795706748962, + "step": 2255 + }, + { + "epoch": 0.5198156682027649, + "grad_norm": 0.7833370135674333, + "learning_rate": 1.7688398019369232e-06, + "loss": 0.8086103200912476, + "step": 2256 + }, + { + "epoch": 0.5200460829493088, + "grad_norm": 0.8183770044685874, + "learning_rate": 1.7685961292954486e-06, + "loss": 0.8574277758598328, + "step": 2257 + }, + { + "epoch": 0.5202764976958525, + "grad_norm": 0.7089387180946831, + "learning_rate": 1.7683523450932633e-06, + "loss": 0.7841963171958923, + "step": 2258 + }, + { + "epoch": 0.5205069124423963, + "grad_norm": 0.7629735238937895, + "learning_rate": 1.7681084493657523e-06, + "loss": 0.6972980499267578, + "step": 2259 + }, + { + "epoch": 0.5207373271889401, + "grad_norm": 0.7917333859989639, + "learning_rate": 1.7678644421483163e-06, + "loss": 0.9193723201751709, + "step": 2260 + }, + { + "epoch": 0.5209677419354839, + "grad_norm": 0.9714597630384237, + "learning_rate": 1.7676203234763736e-06, + "loss": 0.7902654409408569, + "step": 2261 + }, + { + "epoch": 0.5211981566820276, + "grad_norm": 0.7983060164629807, + "learning_rate": 1.767376093385357e-06, + "loss": 0.8804734945297241, + "step": 2262 + }, + { + "epoch": 0.5214285714285715, + "grad_norm": 0.9065709846386143, + "learning_rate": 1.7671317519107163e-06, + "loss": 0.7884976863861084, + "step": 2263 + }, + { + "epoch": 0.5216589861751152, + "grad_norm": 0.9252417906886758, + "learning_rate": 1.7668872990879173e-06, + "loss": 0.8233190774917603, + "step": 2264 + }, + { + "epoch": 0.521889400921659, + "grad_norm": 0.7126124532622758, + "learning_rate": 1.766642734952442e-06, + "loss": 0.7985334396362305, + "step": 2265 + }, + { + "epoch": 0.5221198156682028, + "grad_norm": 0.8073440338214538, + "learning_rate": 1.7663980595397887e-06, + "loss": 0.7805646657943726, + "step": 2266 + }, + { + "epoch": 0.5223502304147466, + "grad_norm": 0.9455838488830395, + "learning_rate": 1.7661532728854718e-06, + "loss": 0.8528248071670532, + "step": 2267 + }, + { + "epoch": 0.5225806451612903, + "grad_norm": 0.882590365173732, + "learning_rate": 1.7659083750250215e-06, + "loss": 0.7714066505432129, + "step": 2268 + }, + { + "epoch": 0.522811059907834, + "grad_norm": 0.7632999883965862, + "learning_rate": 1.7656633659939843e-06, + "loss": 0.8250499963760376, + "step": 2269 + }, + { + "epoch": 0.5230414746543779, + "grad_norm": 0.6787990523098465, + "learning_rate": 1.7654182458279231e-06, + "loss": 0.7878777384757996, + "step": 2270 + }, + { + "epoch": 0.5232718894009216, + "grad_norm": 0.8263772967033729, + "learning_rate": 1.7651730145624174e-06, + "loss": 0.9080224633216858, + "step": 2271 + }, + { + "epoch": 0.5235023041474655, + "grad_norm": 0.8137376292994275, + "learning_rate": 1.7649276722330607e-06, + "loss": 0.8010937571525574, + "step": 2272 + }, + { + "epoch": 0.5237327188940092, + "grad_norm": 0.8996847055009526, + "learning_rate": 1.7646822188754658e-06, + "loss": 0.903404951095581, + "step": 2273 + }, + { + "epoch": 0.523963133640553, + "grad_norm": 0.928692707021516, + "learning_rate": 1.7644366545252589e-06, + "loss": 0.9009061455726624, + "step": 2274 + }, + { + "epoch": 0.5241935483870968, + "grad_norm": 0.7651260343716183, + "learning_rate": 1.7641909792180834e-06, + "loss": 0.7158697843551636, + "step": 2275 + }, + { + "epoch": 0.5244239631336406, + "grad_norm": 0.8041302440889452, + "learning_rate": 1.763945192989599e-06, + "loss": 0.8101463317871094, + "step": 2276 + }, + { + "epoch": 0.5246543778801843, + "grad_norm": 0.8174455436475604, + "learning_rate": 1.7636992958754812e-06, + "loss": 0.758610725402832, + "step": 2277 + }, + { + "epoch": 0.5248847926267282, + "grad_norm": 0.9651314388158028, + "learning_rate": 1.7634532879114216e-06, + "loss": 0.9469501972198486, + "step": 2278 + }, + { + "epoch": 0.5251152073732719, + "grad_norm": 0.6853415956002341, + "learning_rate": 1.7632071691331281e-06, + "loss": 0.7528036236763, + "step": 2279 + }, + { + "epoch": 0.5253456221198156, + "grad_norm": 0.9124447697867164, + "learning_rate": 1.7629609395763242e-06, + "loss": 0.8519324064254761, + "step": 2280 + }, + { + "epoch": 0.5255760368663595, + "grad_norm": 0.9239480610002251, + "learning_rate": 1.7627145992767498e-06, + "loss": 0.8620004653930664, + "step": 2281 + }, + { + "epoch": 0.5258064516129032, + "grad_norm": 0.7831738680942184, + "learning_rate": 1.762468148270161e-06, + "loss": 0.8066067695617676, + "step": 2282 + }, + { + "epoch": 0.526036866359447, + "grad_norm": 0.8314773622163678, + "learning_rate": 1.7622215865923301e-06, + "loss": 0.865642786026001, + "step": 2283 + }, + { + "epoch": 0.5262672811059907, + "grad_norm": 0.7269170910166286, + "learning_rate": 1.761974914279045e-06, + "loss": 0.8478001356124878, + "step": 2284 + }, + { + "epoch": 0.5264976958525346, + "grad_norm": 0.8461811606118353, + "learning_rate": 1.7617281313661098e-06, + "loss": 0.7984344363212585, + "step": 2285 + }, + { + "epoch": 0.5267281105990783, + "grad_norm": 0.8489168247147351, + "learning_rate": 1.7614812378893444e-06, + "loss": 0.8480801582336426, + "step": 2286 + }, + { + "epoch": 0.5269585253456222, + "grad_norm": 0.9126795310234661, + "learning_rate": 1.7612342338845859e-06, + "loss": 0.8667479753494263, + "step": 2287 + }, + { + "epoch": 0.5271889400921659, + "grad_norm": 0.9533468835174431, + "learning_rate": 1.7609871193876854e-06, + "loss": 0.8431364297866821, + "step": 2288 + }, + { + "epoch": 0.5274193548387097, + "grad_norm": 0.8628781350943807, + "learning_rate": 1.7607398944345127e-06, + "loss": 0.8544220924377441, + "step": 2289 + }, + { + "epoch": 0.5276497695852534, + "grad_norm": 0.9575259696859837, + "learning_rate": 1.760492559060951e-06, + "loss": 0.9298971891403198, + "step": 2290 + }, + { + "epoch": 0.5278801843317973, + "grad_norm": 0.8854664005974592, + "learning_rate": 1.760245113302901e-06, + "loss": 0.739667534828186, + "step": 2291 + }, + { + "epoch": 0.528110599078341, + "grad_norm": 0.9418693515744256, + "learning_rate": 1.7599975571962796e-06, + "loss": 0.8981268405914307, + "step": 2292 + }, + { + "epoch": 0.5283410138248847, + "grad_norm": 0.8489202000746718, + "learning_rate": 1.7597498907770185e-06, + "loss": 0.8027834892272949, + "step": 2293 + }, + { + "epoch": 0.5285714285714286, + "grad_norm": 0.7244957329263912, + "learning_rate": 1.7595021140810669e-06, + "loss": 0.7018242478370667, + "step": 2294 + }, + { + "epoch": 0.5288018433179723, + "grad_norm": 0.8699196704594798, + "learning_rate": 1.7592542271443887e-06, + "loss": 0.7655147910118103, + "step": 2295 + }, + { + "epoch": 0.5290322580645161, + "grad_norm": 0.8169123509935803, + "learning_rate": 1.7590062300029644e-06, + "loss": 0.8283153772354126, + "step": 2296 + }, + { + "epoch": 0.5292626728110599, + "grad_norm": 1.0550792201388366, + "learning_rate": 1.7587581226927907e-06, + "loss": 1.0430598258972168, + "step": 2297 + }, + { + "epoch": 0.5294930875576037, + "grad_norm": 0.7609036061197976, + "learning_rate": 1.7585099052498802e-06, + "loss": 0.6683472990989685, + "step": 2298 + }, + { + "epoch": 0.5297235023041474, + "grad_norm": 0.7278178698575015, + "learning_rate": 1.7582615777102609e-06, + "loss": 0.7254939079284668, + "step": 2299 + }, + { + "epoch": 0.5299539170506913, + "grad_norm": 0.7049477325497308, + "learning_rate": 1.7580131401099774e-06, + "loss": 0.7913245558738708, + "step": 2300 + }, + { + "epoch": 0.530184331797235, + "grad_norm": 0.8416230641508338, + "learning_rate": 1.75776459248509e-06, + "loss": 0.7832915782928467, + "step": 2301 + }, + { + "epoch": 0.5304147465437788, + "grad_norm": 0.7722959383546871, + "learning_rate": 1.7575159348716754e-06, + "loss": 0.9754987955093384, + "step": 2302 + }, + { + "epoch": 0.5306451612903226, + "grad_norm": 0.8614799765536667, + "learning_rate": 1.7572671673058254e-06, + "loss": 0.8343901634216309, + "step": 2303 + }, + { + "epoch": 0.5308755760368664, + "grad_norm": 0.862069962418511, + "learning_rate": 1.757018289823649e-06, + "loss": 0.9836198091506958, + "step": 2304 + }, + { + "epoch": 0.5311059907834101, + "grad_norm": 0.7978699236275345, + "learning_rate": 1.7567693024612695e-06, + "loss": 0.8258972764015198, + "step": 2305 + }, + { + "epoch": 0.5313364055299539, + "grad_norm": 0.8169244061103897, + "learning_rate": 1.7565202052548277e-06, + "loss": 0.8822964429855347, + "step": 2306 + }, + { + "epoch": 0.5315668202764977, + "grad_norm": 0.8094894252842574, + "learning_rate": 1.7562709982404797e-06, + "loss": 0.721222996711731, + "step": 2307 + }, + { + "epoch": 0.5317972350230414, + "grad_norm": 0.7759663122688174, + "learning_rate": 1.7560216814543974e-06, + "loss": 0.7273069620132446, + "step": 2308 + }, + { + "epoch": 0.5320276497695853, + "grad_norm": 0.749740659090673, + "learning_rate": 1.755772254932769e-06, + "loss": 0.8031520843505859, + "step": 2309 + }, + { + "epoch": 0.532258064516129, + "grad_norm": 0.8746676083569236, + "learning_rate": 1.7555227187117982e-06, + "loss": 0.8767163157463074, + "step": 2310 + }, + { + "epoch": 0.5324884792626728, + "grad_norm": 1.052374988916139, + "learning_rate": 1.755273072827705e-06, + "loss": 0.8018463850021362, + "step": 2311 + }, + { + "epoch": 0.5327188940092166, + "grad_norm": 0.9632384627648846, + "learning_rate": 1.7550233173167252e-06, + "loss": 0.8281232118606567, + "step": 2312 + }, + { + "epoch": 0.5329493087557604, + "grad_norm": 0.9472067369973646, + "learning_rate": 1.7547734522151103e-06, + "loss": 0.8802565336227417, + "step": 2313 + }, + { + "epoch": 0.5331797235023041, + "grad_norm": 0.7195582219345643, + "learning_rate": 1.754523477559128e-06, + "loss": 0.8055544495582581, + "step": 2314 + }, + { + "epoch": 0.533410138248848, + "grad_norm": 0.9358658916449707, + "learning_rate": 1.754273393385062e-06, + "loss": 0.8163481950759888, + "step": 2315 + }, + { + "epoch": 0.5336405529953917, + "grad_norm": 0.9365559775291885, + "learning_rate": 1.7540231997292111e-06, + "loss": 0.8308255076408386, + "step": 2316 + }, + { + "epoch": 0.5338709677419354, + "grad_norm": 0.9031429015213124, + "learning_rate": 1.7537728966278913e-06, + "loss": 0.8387685418128967, + "step": 2317 + }, + { + "epoch": 0.5341013824884793, + "grad_norm": 0.7470153179334161, + "learning_rate": 1.7535224841174333e-06, + "loss": 0.8668780326843262, + "step": 2318 + }, + { + "epoch": 0.534331797235023, + "grad_norm": 0.7449540611731051, + "learning_rate": 1.7532719622341842e-06, + "loss": 0.8394712209701538, + "step": 2319 + }, + { + "epoch": 0.5345622119815668, + "grad_norm": 0.7539905771593468, + "learning_rate": 1.7530213310145073e-06, + "loss": 0.7755688428878784, + "step": 2320 + }, + { + "epoch": 0.5347926267281106, + "grad_norm": 0.8150738821263226, + "learning_rate": 1.7527705904947805e-06, + "loss": 0.7714632749557495, + "step": 2321 + }, + { + "epoch": 0.5350230414746544, + "grad_norm": 0.807680924946579, + "learning_rate": 1.7525197407113997e-06, + "loss": 0.8810869455337524, + "step": 2322 + }, + { + "epoch": 0.5352534562211981, + "grad_norm": 1.0672299468188131, + "learning_rate": 1.7522687817007742e-06, + "loss": 0.8445242643356323, + "step": 2323 + }, + { + "epoch": 0.535483870967742, + "grad_norm": 1.1338085945775938, + "learning_rate": 1.7520177134993311e-06, + "loss": 0.9602948427200317, + "step": 2324 + }, + { + "epoch": 0.5357142857142857, + "grad_norm": 0.7789379367396811, + "learning_rate": 1.7517665361435126e-06, + "loss": 0.7865237593650818, + "step": 2325 + }, + { + "epoch": 0.5359447004608295, + "grad_norm": 0.8870578602537817, + "learning_rate": 1.7515152496697763e-06, + "loss": 0.8062880039215088, + "step": 2326 + }, + { + "epoch": 0.5361751152073733, + "grad_norm": 0.9742037408160464, + "learning_rate": 1.7512638541145966e-06, + "loss": 0.8386664986610413, + "step": 2327 + }, + { + "epoch": 0.5364055299539171, + "grad_norm": 1.0154937609139327, + "learning_rate": 1.7510123495144629e-06, + "loss": 0.973692774772644, + "step": 2328 + }, + { + "epoch": 0.5366359447004608, + "grad_norm": 0.9023959356834507, + "learning_rate": 1.7507607359058808e-06, + "loss": 0.8250089883804321, + "step": 2329 + }, + { + "epoch": 0.5368663594470046, + "grad_norm": 0.8457870176131529, + "learning_rate": 1.750509013325372e-06, + "loss": 0.8578102588653564, + "step": 2330 + }, + { + "epoch": 0.5370967741935484, + "grad_norm": 0.8804595958614453, + "learning_rate": 1.7502571818094732e-06, + "loss": 0.916475236415863, + "step": 2331 + }, + { + "epoch": 0.5373271889400921, + "grad_norm": 0.9225430635370255, + "learning_rate": 1.7500052413947377e-06, + "loss": 0.8210046291351318, + "step": 2332 + }, + { + "epoch": 0.537557603686636, + "grad_norm": 0.7091387099201478, + "learning_rate": 1.7497531921177344e-06, + "loss": 0.816267728805542, + "step": 2333 + }, + { + "epoch": 0.5377880184331797, + "grad_norm": 0.9764630645457667, + "learning_rate": 1.7495010340150478e-06, + "loss": 1.0091882944107056, + "step": 2334 + }, + { + "epoch": 0.5380184331797235, + "grad_norm": 0.982812584725329, + "learning_rate": 1.7492487671232783e-06, + "loss": 0.7549277544021606, + "step": 2335 + }, + { + "epoch": 0.5382488479262673, + "grad_norm": 0.8589431412898547, + "learning_rate": 1.7489963914790423e-06, + "loss": 0.9584934711456299, + "step": 2336 + }, + { + "epoch": 0.5384792626728111, + "grad_norm": 0.7167225081500926, + "learning_rate": 1.7487439071189713e-06, + "loss": 0.8189069628715515, + "step": 2337 + }, + { + "epoch": 0.5387096774193548, + "grad_norm": 0.976466384445042, + "learning_rate": 1.7484913140797138e-06, + "loss": 0.7529993057250977, + "step": 2338 + }, + { + "epoch": 0.5389400921658987, + "grad_norm": 0.9894954868399615, + "learning_rate": 1.7482386123979324e-06, + "loss": 0.8611496686935425, + "step": 2339 + }, + { + "epoch": 0.5391705069124424, + "grad_norm": 1.2753256885249857, + "learning_rate": 1.7479858021103074e-06, + "loss": 0.9400241374969482, + "step": 2340 + }, + { + "epoch": 0.5394009216589861, + "grad_norm": 0.7513824016722385, + "learning_rate": 1.7477328832535332e-06, + "loss": 0.6686737537384033, + "step": 2341 + }, + { + "epoch": 0.53963133640553, + "grad_norm": 0.7834119073150019, + "learning_rate": 1.747479855864321e-06, + "loss": 0.864795982837677, + "step": 2342 + }, + { + "epoch": 0.5398617511520737, + "grad_norm": 0.9942068845664563, + "learning_rate": 1.7472267199793971e-06, + "loss": 0.9579563140869141, + "step": 2343 + }, + { + "epoch": 0.5400921658986175, + "grad_norm": 0.9464284115225821, + "learning_rate": 1.746973475635504e-06, + "loss": 0.7492884397506714, + "step": 2344 + }, + { + "epoch": 0.5403225806451613, + "grad_norm": 1.1301826150440575, + "learning_rate": 1.7467201228694e-06, + "loss": 1.020420789718628, + "step": 2345 + }, + { + "epoch": 0.5405529953917051, + "grad_norm": 0.8996882097606888, + "learning_rate": 1.7464666617178585e-06, + "loss": 0.8277238011360168, + "step": 2346 + }, + { + "epoch": 0.5407834101382488, + "grad_norm": 0.8343415166384458, + "learning_rate": 1.7462130922176694e-06, + "loss": 0.8160337209701538, + "step": 2347 + }, + { + "epoch": 0.5410138248847927, + "grad_norm": 0.940177897473061, + "learning_rate": 1.7459594144056378e-06, + "loss": 0.8742454648017883, + "step": 2348 + }, + { + "epoch": 0.5412442396313364, + "grad_norm": 0.8263630155636004, + "learning_rate": 1.7457056283185847e-06, + "loss": 0.7987914085388184, + "step": 2349 + }, + { + "epoch": 0.5414746543778802, + "grad_norm": 0.8096196719588583, + "learning_rate": 1.7454517339933467e-06, + "loss": 0.6917734146118164, + "step": 2350 + }, + { + "epoch": 0.541705069124424, + "grad_norm": 0.9860357050478065, + "learning_rate": 1.7451977314667763e-06, + "loss": 0.8338258266448975, + "step": 2351 + }, + { + "epoch": 0.5419354838709678, + "grad_norm": 0.6906626367704619, + "learning_rate": 1.7449436207757418e-06, + "loss": 0.8308743238449097, + "step": 2352 + }, + { + "epoch": 0.5421658986175115, + "grad_norm": 0.7126371911422212, + "learning_rate": 1.744689401957127e-06, + "loss": 0.7843145728111267, + "step": 2353 + }, + { + "epoch": 0.5423963133640552, + "grad_norm": 0.6637904176126797, + "learning_rate": 1.7444350750478314e-06, + "loss": 0.9088687896728516, + "step": 2354 + }, + { + "epoch": 0.5426267281105991, + "grad_norm": 1.1601519737508017, + "learning_rate": 1.74418064008477e-06, + "loss": 0.876841127872467, + "step": 2355 + }, + { + "epoch": 0.5428571428571428, + "grad_norm": 0.804702758707697, + "learning_rate": 1.743926097104874e-06, + "loss": 0.7169051170349121, + "step": 2356 + }, + { + "epoch": 0.5430875576036867, + "grad_norm": 0.8414445338031196, + "learning_rate": 1.7436714461450897e-06, + "loss": 0.7979093194007874, + "step": 2357 + }, + { + "epoch": 0.5433179723502304, + "grad_norm": 0.796767744969521, + "learning_rate": 1.7434166872423795e-06, + "loss": 0.9152545928955078, + "step": 2358 + }, + { + "epoch": 0.5435483870967742, + "grad_norm": 0.8612716514728646, + "learning_rate": 1.7431618204337212e-06, + "loss": 0.8968983888626099, + "step": 2359 + }, + { + "epoch": 0.543778801843318, + "grad_norm": 0.7451796864953032, + "learning_rate": 1.7429068457561086e-06, + "loss": 0.7591085433959961, + "step": 2360 + }, + { + "epoch": 0.5440092165898618, + "grad_norm": 0.8434007797764556, + "learning_rate": 1.7426517632465508e-06, + "loss": 0.6931861639022827, + "step": 2361 + }, + { + "epoch": 0.5442396313364055, + "grad_norm": 0.816030716232177, + "learning_rate": 1.7423965729420729e-06, + "loss": 0.7715095281600952, + "step": 2362 + }, + { + "epoch": 0.5444700460829494, + "grad_norm": 0.7333839549943538, + "learning_rate": 1.742141274879715e-06, + "loss": 0.8282119035720825, + "step": 2363 + }, + { + "epoch": 0.5447004608294931, + "grad_norm": 0.8282161479585932, + "learning_rate": 1.7418858690965337e-06, + "loss": 0.7595704197883606, + "step": 2364 + }, + { + "epoch": 0.5449308755760369, + "grad_norm": 0.8861519618227073, + "learning_rate": 1.7416303556296005e-06, + "loss": 0.8738422393798828, + "step": 2365 + }, + { + "epoch": 0.5451612903225806, + "grad_norm": 0.819062403403448, + "learning_rate": 1.741374734516003e-06, + "loss": 0.8399837017059326, + "step": 2366 + }, + { + "epoch": 0.5453917050691244, + "grad_norm": 0.9147252373002325, + "learning_rate": 1.7411190057928442e-06, + "loss": 0.8213151693344116, + "step": 2367 + }, + { + "epoch": 0.5456221198156682, + "grad_norm": 0.862161359681962, + "learning_rate": 1.740863169497243e-06, + "loss": 0.748835563659668, + "step": 2368 + }, + { + "epoch": 0.5458525345622119, + "grad_norm": 0.6925915187477067, + "learning_rate": 1.7406072256663333e-06, + "loss": 0.9222339391708374, + "step": 2369 + }, + { + "epoch": 0.5460829493087558, + "grad_norm": 0.6352006169320189, + "learning_rate": 1.7403511743372655e-06, + "loss": 0.6543160676956177, + "step": 2370 + }, + { + "epoch": 0.5463133640552995, + "grad_norm": 0.9993386394035012, + "learning_rate": 1.7400950155472046e-06, + "loss": 0.9828567504882812, + "step": 2371 + }, + { + "epoch": 0.5465437788018433, + "grad_norm": 0.9620494284169527, + "learning_rate": 1.739838749333332e-06, + "loss": 0.95346599817276, + "step": 2372 + }, + { + "epoch": 0.5467741935483871, + "grad_norm": 0.4533946729074916, + "learning_rate": 1.7395823757328442e-06, + "loss": 0.626889705657959, + "step": 2373 + }, + { + "epoch": 0.5470046082949309, + "grad_norm": 0.6641652944774505, + "learning_rate": 1.739325894782954e-06, + "loss": 0.8152071833610535, + "step": 2374 + }, + { + "epoch": 0.5472350230414746, + "grad_norm": 0.7149653321076401, + "learning_rate": 1.7390693065208889e-06, + "loss": 0.8244980573654175, + "step": 2375 + }, + { + "epoch": 0.5474654377880185, + "grad_norm": 0.8801604517186058, + "learning_rate": 1.738812610983892e-06, + "loss": 0.8234372138977051, + "step": 2376 + }, + { + "epoch": 0.5476958525345622, + "grad_norm": 0.8626749383303203, + "learning_rate": 1.7385558082092228e-06, + "loss": 0.9334712624549866, + "step": 2377 + }, + { + "epoch": 0.5479262672811059, + "grad_norm": 0.8866496689156442, + "learning_rate": 1.7382988982341557e-06, + "loss": 0.7873882055282593, + "step": 2378 + }, + { + "epoch": 0.5481566820276498, + "grad_norm": 0.7814140858155267, + "learning_rate": 1.7380418810959814e-06, + "loss": 0.7971000671386719, + "step": 2379 + }, + { + "epoch": 0.5483870967741935, + "grad_norm": 0.7452714019733373, + "learning_rate": 1.7377847568320046e-06, + "loss": 0.8617004156112671, + "step": 2380 + }, + { + "epoch": 0.5486175115207373, + "grad_norm": 0.7316280745753603, + "learning_rate": 1.7375275254795472e-06, + "loss": 0.6798374056816101, + "step": 2381 + }, + { + "epoch": 0.5488479262672811, + "grad_norm": 0.8600424341995414, + "learning_rate": 1.7372701870759459e-06, + "loss": 0.8621633052825928, + "step": 2382 + }, + { + "epoch": 0.5490783410138249, + "grad_norm": 0.78685909041996, + "learning_rate": 1.7370127416585527e-06, + "loss": 0.6533470153808594, + "step": 2383 + }, + { + "epoch": 0.5493087557603686, + "grad_norm": 0.9199843580999427, + "learning_rate": 1.736755189264736e-06, + "loss": 0.8854461908340454, + "step": 2384 + }, + { + "epoch": 0.5495391705069125, + "grad_norm": 1.0020485772603467, + "learning_rate": 1.7364975299318786e-06, + "loss": 0.9461240768432617, + "step": 2385 + }, + { + "epoch": 0.5497695852534562, + "grad_norm": 1.0179837516521926, + "learning_rate": 1.73623976369738e-06, + "loss": 0.8936882019042969, + "step": 2386 + }, + { + "epoch": 0.55, + "grad_norm": 0.7527230779520249, + "learning_rate": 1.7359818905986544e-06, + "loss": 0.8177640438079834, + "step": 2387 + }, + { + "epoch": 0.5502304147465438, + "grad_norm": 0.7539178622826256, + "learning_rate": 1.7357239106731317e-06, + "loss": 0.793328046798706, + "step": 2388 + }, + { + "epoch": 0.5504608294930876, + "grad_norm": 0.8548599569350254, + "learning_rate": 1.7354658239582572e-06, + "loss": 0.8837069272994995, + "step": 2389 + }, + { + "epoch": 0.5506912442396313, + "grad_norm": 0.8764277126116193, + "learning_rate": 1.7352076304914918e-06, + "loss": 0.8801138401031494, + "step": 2390 + }, + { + "epoch": 0.5509216589861751, + "grad_norm": 0.7981260720892804, + "learning_rate": 1.7349493303103123e-06, + "loss": 0.865073025226593, + "step": 2391 + }, + { + "epoch": 0.5511520737327189, + "grad_norm": 0.5938962289027067, + "learning_rate": 1.7346909234522107e-06, + "loss": 0.8712339401245117, + "step": 2392 + }, + { + "epoch": 0.5513824884792626, + "grad_norm": 0.6857068624612402, + "learning_rate": 1.7344324099546938e-06, + "loss": 0.7689294815063477, + "step": 2393 + }, + { + "epoch": 0.5516129032258065, + "grad_norm": 0.6784843872797971, + "learning_rate": 1.7341737898552851e-06, + "loss": 0.9228999614715576, + "step": 2394 + }, + { + "epoch": 0.5518433179723502, + "grad_norm": 1.025443261317525, + "learning_rate": 1.7339150631915228e-06, + "loss": 0.9473327398300171, + "step": 2395 + }, + { + "epoch": 0.552073732718894, + "grad_norm": 0.9317831571882359, + "learning_rate": 1.7336562300009604e-06, + "loss": 0.7724621295928955, + "step": 2396 + }, + { + "epoch": 0.5523041474654378, + "grad_norm": 0.7823556125482615, + "learning_rate": 1.7333972903211675e-06, + "loss": 0.8646600246429443, + "step": 2397 + }, + { + "epoch": 0.5525345622119816, + "grad_norm": 0.6673069571562762, + "learning_rate": 1.7331382441897286e-06, + "loss": 0.7143402099609375, + "step": 2398 + }, + { + "epoch": 0.5527649769585253, + "grad_norm": 0.9600129950475998, + "learning_rate": 1.7328790916442446e-06, + "loss": 0.8229624032974243, + "step": 2399 + }, + { + "epoch": 0.5529953917050692, + "grad_norm": 0.8815652742153803, + "learning_rate": 1.7326198327223303e-06, + "loss": 0.7244875431060791, + "step": 2400 + }, + { + "epoch": 0.5532258064516129, + "grad_norm": 0.8586401947703556, + "learning_rate": 1.7323604674616173e-06, + "loss": 0.7797688245773315, + "step": 2401 + }, + { + "epoch": 0.5534562211981566, + "grad_norm": 0.7923271764392044, + "learning_rate": 1.7321009958997519e-06, + "loss": 0.752421498298645, + "step": 2402 + }, + { + "epoch": 0.5536866359447005, + "grad_norm": 0.880725843060538, + "learning_rate": 1.7318414180743962e-06, + "loss": 0.8285892009735107, + "step": 2403 + }, + { + "epoch": 0.5539170506912442, + "grad_norm": 0.7844500606150882, + "learning_rate": 1.7315817340232272e-06, + "loss": 0.8247888088226318, + "step": 2404 + }, + { + "epoch": 0.554147465437788, + "grad_norm": 0.7041289847587934, + "learning_rate": 1.7313219437839384e-06, + "loss": 0.7713418006896973, + "step": 2405 + }, + { + "epoch": 0.5543778801843318, + "grad_norm": 0.8575067968238488, + "learning_rate": 1.7310620473942374e-06, + "loss": 0.8748825788497925, + "step": 2406 + }, + { + "epoch": 0.5546082949308756, + "grad_norm": 0.899949436927101, + "learning_rate": 1.730802044891848e-06, + "loss": 0.9255902767181396, + "step": 2407 + }, + { + "epoch": 0.5548387096774193, + "grad_norm": 0.7968868837370462, + "learning_rate": 1.7305419363145093e-06, + "loss": 0.7226976156234741, + "step": 2408 + }, + { + "epoch": 0.5550691244239632, + "grad_norm": 0.8868777191693532, + "learning_rate": 1.7302817216999754e-06, + "loss": 0.9024704694747925, + "step": 2409 + }, + { + "epoch": 0.5552995391705069, + "grad_norm": 0.8331382998314191, + "learning_rate": 1.7300214010860168e-06, + "loss": 0.7857767343521118, + "step": 2410 + }, + { + "epoch": 0.5555299539170507, + "grad_norm": 0.7111146090264087, + "learning_rate": 1.7297609745104183e-06, + "loss": 0.7280064821243286, + "step": 2411 + }, + { + "epoch": 0.5557603686635945, + "grad_norm": 0.8916895272866717, + "learning_rate": 1.72950044201098e-06, + "loss": 0.8909369111061096, + "step": 2412 + }, + { + "epoch": 0.5559907834101383, + "grad_norm": 0.8724458169518867, + "learning_rate": 1.7292398036255183e-06, + "loss": 0.8543871641159058, + "step": 2413 + }, + { + "epoch": 0.556221198156682, + "grad_norm": 0.7364121573266219, + "learning_rate": 1.7289790593918648e-06, + "loss": 0.6934928894042969, + "step": 2414 + }, + { + "epoch": 0.5564516129032258, + "grad_norm": 0.7288921937743348, + "learning_rate": 1.7287182093478658e-06, + "loss": 0.6323058605194092, + "step": 2415 + }, + { + "epoch": 0.5566820276497696, + "grad_norm": 0.9203399963548066, + "learning_rate": 1.7284572535313833e-06, + "loss": 0.8607437014579773, + "step": 2416 + }, + { + "epoch": 0.5569124423963133, + "grad_norm": 0.8312318653257402, + "learning_rate": 1.7281961919802948e-06, + "loss": 0.932594358921051, + "step": 2417 + }, + { + "epoch": 0.5571428571428572, + "grad_norm": 0.8132622554262421, + "learning_rate": 1.727935024732493e-06, + "loss": 0.7239062786102295, + "step": 2418 + }, + { + "epoch": 0.5573732718894009, + "grad_norm": 0.770772581447816, + "learning_rate": 1.727673751825886e-06, + "loss": 0.7600498199462891, + "step": 2419 + }, + { + "epoch": 0.5576036866359447, + "grad_norm": 0.9553759629640377, + "learning_rate": 1.7274123732983977e-06, + "loss": 0.6888710260391235, + "step": 2420 + }, + { + "epoch": 0.5578341013824885, + "grad_norm": 0.9472816188704319, + "learning_rate": 1.7271508891879657e-06, + "loss": 0.9768370389938354, + "step": 2421 + }, + { + "epoch": 0.5580645161290323, + "grad_norm": 0.7612474564207412, + "learning_rate": 1.7268892995325453e-06, + "loss": 0.7302272319793701, + "step": 2422 + }, + { + "epoch": 0.558294930875576, + "grad_norm": 0.952809818405442, + "learning_rate": 1.7266276043701052e-06, + "loss": 0.7664496898651123, + "step": 2423 + }, + { + "epoch": 0.5585253456221199, + "grad_norm": 0.7105308716985692, + "learning_rate": 1.72636580373863e-06, + "loss": 0.7672723531723022, + "step": 2424 + }, + { + "epoch": 0.5587557603686636, + "grad_norm": 0.9094827818764729, + "learning_rate": 1.7261038976761203e-06, + "loss": 0.7467625141143799, + "step": 2425 + }, + { + "epoch": 0.5589861751152074, + "grad_norm": 1.0609555724090778, + "learning_rate": 1.7258418862205908e-06, + "loss": 0.899692177772522, + "step": 2426 + }, + { + "epoch": 0.5592165898617512, + "grad_norm": 0.8726314105037919, + "learning_rate": 1.7255797694100724e-06, + "loss": 0.9654138088226318, + "step": 2427 + }, + { + "epoch": 0.5594470046082949, + "grad_norm": 1.0261431779245342, + "learning_rate": 1.725317547282611e-06, + "loss": 0.8487396836280823, + "step": 2428 + }, + { + "epoch": 0.5596774193548387, + "grad_norm": 0.7692614118612008, + "learning_rate": 1.7250552198762682e-06, + "loss": 0.7785199284553528, + "step": 2429 + }, + { + "epoch": 0.5599078341013825, + "grad_norm": 0.7931069179642137, + "learning_rate": 1.7247927872291198e-06, + "loss": 0.9243934750556946, + "step": 2430 + }, + { + "epoch": 0.5601382488479263, + "grad_norm": 0.6935679959823647, + "learning_rate": 1.724530249379258e-06, + "loss": 0.8674443960189819, + "step": 2431 + }, + { + "epoch": 0.56036866359447, + "grad_norm": 0.7564063858493598, + "learning_rate": 1.7242676063647895e-06, + "loss": 0.8022270202636719, + "step": 2432 + }, + { + "epoch": 0.5605990783410139, + "grad_norm": 0.8222900385869091, + "learning_rate": 1.7240048582238367e-06, + "loss": 0.8696796894073486, + "step": 2433 + }, + { + "epoch": 0.5608294930875576, + "grad_norm": 0.8560234672396506, + "learning_rate": 1.7237420049945374e-06, + "loss": 0.7752439975738525, + "step": 2434 + }, + { + "epoch": 0.5610599078341014, + "grad_norm": 0.9286340475505503, + "learning_rate": 1.723479046715044e-06, + "loss": 0.7660201787948608, + "step": 2435 + }, + { + "epoch": 0.5612903225806452, + "grad_norm": 0.7639410477119124, + "learning_rate": 1.7232159834235249e-06, + "loss": 0.9319918155670166, + "step": 2436 + }, + { + "epoch": 0.561520737327189, + "grad_norm": 0.8121463742755932, + "learning_rate": 1.722952815158163e-06, + "loss": 0.8175421357154846, + "step": 2437 + }, + { + "epoch": 0.5617511520737327, + "grad_norm": 0.5646145066796834, + "learning_rate": 1.7226895419571573e-06, + "loss": 0.6959598064422607, + "step": 2438 + }, + { + "epoch": 0.5619815668202764, + "grad_norm": 0.9804875774075569, + "learning_rate": 1.722426163858721e-06, + "loss": 0.8629111051559448, + "step": 2439 + }, + { + "epoch": 0.5622119815668203, + "grad_norm": 1.1148628556143985, + "learning_rate": 1.7221626809010833e-06, + "loss": 0.8222612142562866, + "step": 2440 + }, + { + "epoch": 0.562442396313364, + "grad_norm": 0.7126052614291007, + "learning_rate": 1.721899093122489e-06, + "loss": 0.8329352140426636, + "step": 2441 + }, + { + "epoch": 0.5626728110599079, + "grad_norm": 0.7803804718208336, + "learning_rate": 1.7216354005611966e-06, + "loss": 0.8777236938476562, + "step": 2442 + }, + { + "epoch": 0.5629032258064516, + "grad_norm": 0.8601336969746237, + "learning_rate": 1.7213716032554814e-06, + "loss": 0.8487246036529541, + "step": 2443 + }, + { + "epoch": 0.5631336405529954, + "grad_norm": 0.9035051311861264, + "learning_rate": 1.7211077012436327e-06, + "loss": 0.8429645299911499, + "step": 2444 + }, + { + "epoch": 0.5633640552995391, + "grad_norm": 0.9883668092610399, + "learning_rate": 1.720843694563956e-06, + "loss": 0.7683241367340088, + "step": 2445 + }, + { + "epoch": 0.563594470046083, + "grad_norm": 0.839045001132387, + "learning_rate": 1.7205795832547715e-06, + "loss": 0.8468153476715088, + "step": 2446 + }, + { + "epoch": 0.5638248847926267, + "grad_norm": 0.7865527461309724, + "learning_rate": 1.7203153673544136e-06, + "loss": 0.7957276105880737, + "step": 2447 + }, + { + "epoch": 0.5640552995391706, + "grad_norm": 0.7301149604369097, + "learning_rate": 1.7200510469012343e-06, + "loss": 0.703586757183075, + "step": 2448 + }, + { + "epoch": 0.5642857142857143, + "grad_norm": 0.9237896103754119, + "learning_rate": 1.7197866219335988e-06, + "loss": 0.8399583101272583, + "step": 2449 + }, + { + "epoch": 0.5645161290322581, + "grad_norm": 0.9147331037465749, + "learning_rate": 1.7195220924898882e-06, + "loss": 0.8198127746582031, + "step": 2450 + }, + { + "epoch": 0.5647465437788018, + "grad_norm": 0.8751939719560463, + "learning_rate": 1.7192574586084977e-06, + "loss": 0.8345620632171631, + "step": 2451 + }, + { + "epoch": 0.5649769585253456, + "grad_norm": 0.5798955427424709, + "learning_rate": 1.71899272032784e-06, + "loss": 0.7717207670211792, + "step": 2452 + }, + { + "epoch": 0.5652073732718894, + "grad_norm": 1.0279650439820616, + "learning_rate": 1.7187278776863402e-06, + "loss": 0.9178022146224976, + "step": 2453 + }, + { + "epoch": 0.5654377880184331, + "grad_norm": 0.8586126622693072, + "learning_rate": 1.7184629307224405e-06, + "loss": 0.802221417427063, + "step": 2454 + }, + { + "epoch": 0.565668202764977, + "grad_norm": 0.9691589621671786, + "learning_rate": 1.718197879474598e-06, + "loss": 0.8785420656204224, + "step": 2455 + }, + { + "epoch": 0.5658986175115207, + "grad_norm": 0.8087978885886937, + "learning_rate": 1.7179327239812835e-06, + "loss": 0.866797924041748, + "step": 2456 + }, + { + "epoch": 0.5661290322580645, + "grad_norm": 0.7850858892434726, + "learning_rate": 1.7176674642809848e-06, + "loss": 0.8483223915100098, + "step": 2457 + }, + { + "epoch": 0.5663594470046083, + "grad_norm": 0.7634922973789945, + "learning_rate": 1.7174021004122038e-06, + "loss": 0.815066933631897, + "step": 2458 + }, + { + "epoch": 0.5665898617511521, + "grad_norm": 0.7286124953848899, + "learning_rate": 1.7171366324134575e-06, + "loss": 0.8584767580032349, + "step": 2459 + }, + { + "epoch": 0.5668202764976958, + "grad_norm": 0.8250445352678845, + "learning_rate": 1.7168710603232783e-06, + "loss": 0.8710953593254089, + "step": 2460 + }, + { + "epoch": 0.5670506912442397, + "grad_norm": 0.9434416859632441, + "learning_rate": 1.7166053841802137e-06, + "loss": 0.8174586892127991, + "step": 2461 + }, + { + "epoch": 0.5672811059907834, + "grad_norm": 0.8270311207697365, + "learning_rate": 1.7163396040228263e-06, + "loss": 0.7240795493125916, + "step": 2462 + }, + { + "epoch": 0.5675115207373271, + "grad_norm": 0.9011815170935621, + "learning_rate": 1.7160737198896938e-06, + "loss": 0.8026313781738281, + "step": 2463 + }, + { + "epoch": 0.567741935483871, + "grad_norm": 0.906377679717593, + "learning_rate": 1.7158077318194088e-06, + "loss": 0.8170863389968872, + "step": 2464 + }, + { + "epoch": 0.5679723502304147, + "grad_norm": 0.7708394273236241, + "learning_rate": 1.7155416398505794e-06, + "loss": 0.7524861097335815, + "step": 2465 + }, + { + "epoch": 0.5682027649769585, + "grad_norm": 1.053627484653556, + "learning_rate": 1.7152754440218278e-06, + "loss": 0.9895739555358887, + "step": 2466 + }, + { + "epoch": 0.5684331797235023, + "grad_norm": 0.8044893250734789, + "learning_rate": 1.7150091443717924e-06, + "loss": 0.840786874294281, + "step": 2467 + }, + { + "epoch": 0.5686635944700461, + "grad_norm": 0.7235386782272144, + "learning_rate": 1.7147427409391265e-06, + "loss": 0.8896929025650024, + "step": 2468 + }, + { + "epoch": 0.5688940092165898, + "grad_norm": 0.930785639448215, + "learning_rate": 1.714476233762498e-06, + "loss": 0.9940589666366577, + "step": 2469 + }, + { + "epoch": 0.5691244239631337, + "grad_norm": 0.8541894175832414, + "learning_rate": 1.7142096228805896e-06, + "loss": 0.8827046155929565, + "step": 2470 + }, + { + "epoch": 0.5693548387096774, + "grad_norm": 0.8477738552913107, + "learning_rate": 1.7139429083321003e-06, + "loss": 0.8402417302131653, + "step": 2471 + }, + { + "epoch": 0.5695852534562212, + "grad_norm": 1.0681644319875638, + "learning_rate": 1.7136760901557428e-06, + "loss": 0.9298208951950073, + "step": 2472 + }, + { + "epoch": 0.569815668202765, + "grad_norm": 0.799198798955049, + "learning_rate": 1.7134091683902456e-06, + "loss": 0.7272841930389404, + "step": 2473 + }, + { + "epoch": 0.5700460829493088, + "grad_norm": 0.9504491625382946, + "learning_rate": 1.7131421430743522e-06, + "loss": 0.7767274379730225, + "step": 2474 + }, + { + "epoch": 0.5702764976958525, + "grad_norm": 0.8321899881110706, + "learning_rate": 1.7128750142468205e-06, + "loss": 0.8381883502006531, + "step": 2475 + }, + { + "epoch": 0.5705069124423963, + "grad_norm": 0.722993858034587, + "learning_rate": 1.7126077819464247e-06, + "loss": 0.6917109489440918, + "step": 2476 + }, + { + "epoch": 0.5707373271889401, + "grad_norm": 0.8529687693157456, + "learning_rate": 1.712340446211952e-06, + "loss": 0.848122239112854, + "step": 2477 + }, + { + "epoch": 0.5709677419354838, + "grad_norm": 0.8115142651418973, + "learning_rate": 1.7120730070822074e-06, + "loss": 0.7880194187164307, + "step": 2478 + }, + { + "epoch": 0.5711981566820277, + "grad_norm": 0.7900923038142705, + "learning_rate": 1.7118054645960077e-06, + "loss": 0.8782297372817993, + "step": 2479 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 0.8386744568018749, + "learning_rate": 1.7115378187921876e-06, + "loss": 0.9030005931854248, + "step": 2480 + }, + { + "epoch": 0.5716589861751152, + "grad_norm": 1.0512780177061767, + "learning_rate": 1.7112700697095953e-06, + "loss": 0.9950683116912842, + "step": 2481 + }, + { + "epoch": 0.571889400921659, + "grad_norm": 0.7851257012482162, + "learning_rate": 1.7110022173870933e-06, + "loss": 0.8825187683105469, + "step": 2482 + }, + { + "epoch": 0.5721198156682028, + "grad_norm": 0.7742449968104124, + "learning_rate": 1.710734261863561e-06, + "loss": 0.7918775081634521, + "step": 2483 + }, + { + "epoch": 0.5723502304147465, + "grad_norm": 0.8385191739759446, + "learning_rate": 1.7104662031778916e-06, + "loss": 1.0219467878341675, + "step": 2484 + }, + { + "epoch": 0.5725806451612904, + "grad_norm": 0.7273611559924746, + "learning_rate": 1.7101980413689931e-06, + "loss": 0.7633316516876221, + "step": 2485 + }, + { + "epoch": 0.5728110599078341, + "grad_norm": 0.9207367628977638, + "learning_rate": 1.7099297764757891e-06, + "loss": 0.8972171545028687, + "step": 2486 + }, + { + "epoch": 0.5730414746543778, + "grad_norm": 0.9268590747994748, + "learning_rate": 1.7096614085372183e-06, + "loss": 0.9467268586158752, + "step": 2487 + }, + { + "epoch": 0.5732718894009217, + "grad_norm": 0.6697903314360253, + "learning_rate": 1.709392937592233e-06, + "loss": 0.7688668370246887, + "step": 2488 + }, + { + "epoch": 0.5735023041474654, + "grad_norm": 0.9069250629096394, + "learning_rate": 1.7091243636798022e-06, + "loss": 0.8521163463592529, + "step": 2489 + }, + { + "epoch": 0.5737327188940092, + "grad_norm": 1.1876566208797892, + "learning_rate": 1.7088556868389087e-06, + "loss": 0.937403678894043, + "step": 2490 + }, + { + "epoch": 0.573963133640553, + "grad_norm": 0.7484200220587712, + "learning_rate": 1.7085869071085507e-06, + "loss": 0.929175853729248, + "step": 2491 + }, + { + "epoch": 0.5741935483870968, + "grad_norm": 0.75868423962596, + "learning_rate": 1.708318024527741e-06, + "loss": 0.8213154673576355, + "step": 2492 + }, + { + "epoch": 0.5744239631336405, + "grad_norm": 0.8570973138589657, + "learning_rate": 1.708049039135508e-06, + "loss": 0.7666962146759033, + "step": 2493 + }, + { + "epoch": 0.5746543778801844, + "grad_norm": 0.944726193523685, + "learning_rate": 1.707779950970894e-06, + "loss": 0.9787846803665161, + "step": 2494 + }, + { + "epoch": 0.5748847926267281, + "grad_norm": 0.9499725243145639, + "learning_rate": 1.7075107600729575e-06, + "loss": 0.9688804149627686, + "step": 2495 + }, + { + "epoch": 0.5751152073732719, + "grad_norm": 0.7169812071362754, + "learning_rate": 1.7072414664807706e-06, + "loss": 0.7186019420623779, + "step": 2496 + }, + { + "epoch": 0.5753456221198157, + "grad_norm": 0.8737696103531859, + "learning_rate": 1.706972070233421e-06, + "loss": 0.814068615436554, + "step": 2497 + }, + { + "epoch": 0.5755760368663595, + "grad_norm": 0.8930538892783126, + "learning_rate": 1.7067025713700111e-06, + "loss": 0.8439940214157104, + "step": 2498 + }, + { + "epoch": 0.5758064516129032, + "grad_norm": 1.0358274070142592, + "learning_rate": 1.706432969929659e-06, + "loss": 1.0199556350708008, + "step": 2499 + }, + { + "epoch": 0.576036866359447, + "grad_norm": 0.8418547467759998, + "learning_rate": 1.7061632659514964e-06, + "loss": 0.9422338008880615, + "step": 2500 + }, + { + "epoch": 0.5762672811059908, + "grad_norm": 0.8692517624840741, + "learning_rate": 1.7058934594746704e-06, + "loss": 0.9307081699371338, + "step": 2501 + }, + { + "epoch": 0.5764976958525345, + "grad_norm": 0.8121605874769848, + "learning_rate": 1.7056235505383433e-06, + "loss": 0.7202768325805664, + "step": 2502 + }, + { + "epoch": 0.5767281105990784, + "grad_norm": 0.915285295701684, + "learning_rate": 1.7053535391816923e-06, + "loss": 1.0184223651885986, + "step": 2503 + }, + { + "epoch": 0.5769585253456221, + "grad_norm": 0.8238573361353964, + "learning_rate": 1.7050834254439085e-06, + "loss": 0.7957574129104614, + "step": 2504 + }, + { + "epoch": 0.5771889400921659, + "grad_norm": 0.9632097611385487, + "learning_rate": 1.7048132093641989e-06, + "loss": 0.9694541096687317, + "step": 2505 + }, + { + "epoch": 0.5774193548387097, + "grad_norm": 0.7406781740567284, + "learning_rate": 1.704542890981785e-06, + "loss": 0.8427075147628784, + "step": 2506 + }, + { + "epoch": 0.5776497695852535, + "grad_norm": 0.7137957479223747, + "learning_rate": 1.7042724703359032e-06, + "loss": 0.7745763063430786, + "step": 2507 + }, + { + "epoch": 0.5778801843317972, + "grad_norm": 0.8935647722203462, + "learning_rate": 1.7040019474658047e-06, + "loss": 0.8179641962051392, + "step": 2508 + }, + { + "epoch": 0.5781105990783411, + "grad_norm": 0.9010033541227577, + "learning_rate": 1.7037313224107557e-06, + "loss": 0.8118200302124023, + "step": 2509 + }, + { + "epoch": 0.5783410138248848, + "grad_norm": 0.7297456575398072, + "learning_rate": 1.7034605952100364e-06, + "loss": 0.7892665863037109, + "step": 2510 + }, + { + "epoch": 0.5785714285714286, + "grad_norm": 0.736874372872981, + "learning_rate": 1.7031897659029434e-06, + "loss": 0.7442026734352112, + "step": 2511 + }, + { + "epoch": 0.5788018433179724, + "grad_norm": 0.9375581770522491, + "learning_rate": 1.7029188345287865e-06, + "loss": 0.8179585933685303, + "step": 2512 + }, + { + "epoch": 0.5790322580645161, + "grad_norm": 0.8710660194733852, + "learning_rate": 1.7026478011268918e-06, + "loss": 0.7569797039031982, + "step": 2513 + }, + { + "epoch": 0.5792626728110599, + "grad_norm": 0.8952615874674131, + "learning_rate": 1.7023766657365984e-06, + "loss": 0.8464581966400146, + "step": 2514 + }, + { + "epoch": 0.5794930875576036, + "grad_norm": 0.9645554070219402, + "learning_rate": 1.702105428397262e-06, + "loss": 0.7326645255088806, + "step": 2515 + }, + { + "epoch": 0.5797235023041475, + "grad_norm": 0.8243138835822689, + "learning_rate": 1.7018340891482522e-06, + "loss": 0.7993732690811157, + "step": 2516 + }, + { + "epoch": 0.5799539170506912, + "grad_norm": 0.7406582307230963, + "learning_rate": 1.7015626480289532e-06, + "loss": 0.8124513626098633, + "step": 2517 + }, + { + "epoch": 0.580184331797235, + "grad_norm": 0.7758431888553803, + "learning_rate": 1.701291105078765e-06, + "loss": 0.9075840711593628, + "step": 2518 + }, + { + "epoch": 0.5804147465437788, + "grad_norm": 0.8900052121004013, + "learning_rate": 1.7010194603371009e-06, + "loss": 0.8212069272994995, + "step": 2519 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.8737089153257858, + "learning_rate": 1.7007477138433903e-06, + "loss": 0.7582074999809265, + "step": 2520 + }, + { + "epoch": 0.5808755760368663, + "grad_norm": 0.7402264811343096, + "learning_rate": 1.7004758656370769e-06, + "loss": 0.8917636871337891, + "step": 2521 + }, + { + "epoch": 0.5811059907834102, + "grad_norm": 0.9496944008191128, + "learning_rate": 1.7002039157576186e-06, + "loss": 0.8919704556465149, + "step": 2522 + }, + { + "epoch": 0.5813364055299539, + "grad_norm": 0.8803733592170607, + "learning_rate": 1.699931864244489e-06, + "loss": 0.7474988698959351, + "step": 2523 + }, + { + "epoch": 0.5815668202764976, + "grad_norm": 0.9179665061824968, + "learning_rate": 1.6996597111371758e-06, + "loss": 0.8596241474151611, + "step": 2524 + }, + { + "epoch": 0.5817972350230415, + "grad_norm": 0.8260474861422493, + "learning_rate": 1.699387456475182e-06, + "loss": 0.9316335916519165, + "step": 2525 + }, + { + "epoch": 0.5820276497695852, + "grad_norm": 0.7937616616577486, + "learning_rate": 1.6991151002980248e-06, + "loss": 0.7364813089370728, + "step": 2526 + }, + { + "epoch": 0.582258064516129, + "grad_norm": 0.9072210580359311, + "learning_rate": 1.698842642645236e-06, + "loss": 0.789472758769989, + "step": 2527 + }, + { + "epoch": 0.5824884792626728, + "grad_norm": 0.9988239379820413, + "learning_rate": 1.6985700835563627e-06, + "loss": 1.024861216545105, + "step": 2528 + }, + { + "epoch": 0.5827188940092166, + "grad_norm": 0.9746619752287254, + "learning_rate": 1.6982974230709667e-06, + "loss": 0.8465025424957275, + "step": 2529 + }, + { + "epoch": 0.5829493087557603, + "grad_norm": 1.0146741583341603, + "learning_rate": 1.6980246612286244e-06, + "loss": 0.7502799034118652, + "step": 2530 + }, + { + "epoch": 0.5831797235023042, + "grad_norm": 0.866831185770848, + "learning_rate": 1.6977517980689264e-06, + "loss": 0.8019870519638062, + "step": 2531 + }, + { + "epoch": 0.5834101382488479, + "grad_norm": 0.783761351839215, + "learning_rate": 1.6974788336314788e-06, + "loss": 0.9048774242401123, + "step": 2532 + }, + { + "epoch": 0.5836405529953917, + "grad_norm": 0.8577409607010705, + "learning_rate": 1.6972057679559018e-06, + "loss": 0.8411067724227905, + "step": 2533 + }, + { + "epoch": 0.5838709677419355, + "grad_norm": 0.7158353942796929, + "learning_rate": 1.6969326010818304e-06, + "loss": 0.7399133443832397, + "step": 2534 + }, + { + "epoch": 0.5841013824884793, + "grad_norm": 0.7309631229110555, + "learning_rate": 1.6966593330489144e-06, + "loss": 0.7553995847702026, + "step": 2535 + }, + { + "epoch": 0.584331797235023, + "grad_norm": 0.7563702103772202, + "learning_rate": 1.6963859638968188e-06, + "loss": 0.8405054807662964, + "step": 2536 + }, + { + "epoch": 0.5845622119815668, + "grad_norm": 0.739785555800379, + "learning_rate": 1.6961124936652223e-06, + "loss": 0.7619640231132507, + "step": 2537 + }, + { + "epoch": 0.5847926267281106, + "grad_norm": 0.6189871014888121, + "learning_rate": 1.6958389223938187e-06, + "loss": 0.7785576581954956, + "step": 2538 + }, + { + "epoch": 0.5850230414746543, + "grad_norm": 1.0593569746028593, + "learning_rate": 1.695565250122317e-06, + "loss": 0.9230754375457764, + "step": 2539 + }, + { + "epoch": 0.5852534562211982, + "grad_norm": 0.9087046574881754, + "learning_rate": 1.69529147689044e-06, + "loss": 0.798599362373352, + "step": 2540 + }, + { + "epoch": 0.5854838709677419, + "grad_norm": 0.7546263570181881, + "learning_rate": 1.6950176027379253e-06, + "loss": 0.8491491079330444, + "step": 2541 + }, + { + "epoch": 0.5857142857142857, + "grad_norm": 0.9063392015432612, + "learning_rate": 1.694743627704526e-06, + "loss": 0.7906054854393005, + "step": 2542 + }, + { + "epoch": 0.5859447004608295, + "grad_norm": 0.8834118839199732, + "learning_rate": 1.6944695518300084e-06, + "loss": 0.8178746700286865, + "step": 2543 + }, + { + "epoch": 0.5861751152073733, + "grad_norm": 0.9444844508582247, + "learning_rate": 1.6941953751541552e-06, + "loss": 0.867972731590271, + "step": 2544 + }, + { + "epoch": 0.586405529953917, + "grad_norm": 0.8815618278989616, + "learning_rate": 1.6939210977167622e-06, + "loss": 0.8000613451004028, + "step": 2545 + }, + { + "epoch": 0.5866359447004609, + "grad_norm": 0.938056940810552, + "learning_rate": 1.6936467195576403e-06, + "loss": 0.8473562002182007, + "step": 2546 + }, + { + "epoch": 0.5868663594470046, + "grad_norm": 0.960324746454341, + "learning_rate": 1.6933722407166156e-06, + "loss": 0.971686065196991, + "step": 2547 + }, + { + "epoch": 0.5870967741935483, + "grad_norm": 0.718798566737211, + "learning_rate": 1.6930976612335276e-06, + "loss": 0.6679604053497314, + "step": 2548 + }, + { + "epoch": 0.5873271889400922, + "grad_norm": 0.8662288511956259, + "learning_rate": 1.692822981148232e-06, + "loss": 0.81952303647995, + "step": 2549 + }, + { + "epoch": 0.5875576036866359, + "grad_norm": 0.7171085968938, + "learning_rate": 1.6925482005005978e-06, + "loss": 0.8711779713630676, + "step": 2550 + }, + { + "epoch": 0.5877880184331797, + "grad_norm": 0.8419799604008648, + "learning_rate": 1.6922733193305093e-06, + "loss": 0.930451512336731, + "step": 2551 + }, + { + "epoch": 0.5880184331797235, + "grad_norm": 0.8349862719015169, + "learning_rate": 1.6919983376778647e-06, + "loss": 0.8435598611831665, + "step": 2552 + }, + { + "epoch": 0.5882488479262673, + "grad_norm": 0.8491940209701643, + "learning_rate": 1.6917232555825774e-06, + "loss": 0.8868621587753296, + "step": 2553 + }, + { + "epoch": 0.588479262672811, + "grad_norm": 0.7537041162487105, + "learning_rate": 1.6914480730845752e-06, + "loss": 0.6821786165237427, + "step": 2554 + }, + { + "epoch": 0.5887096774193549, + "grad_norm": 0.8487688242201222, + "learning_rate": 1.691172790223801e-06, + "loss": 0.7241402864456177, + "step": 2555 + }, + { + "epoch": 0.5889400921658986, + "grad_norm": 0.7422220828348832, + "learning_rate": 1.690897407040211e-06, + "loss": 0.7477490305900574, + "step": 2556 + }, + { + "epoch": 0.5891705069124424, + "grad_norm": 0.7636915444427955, + "learning_rate": 1.690621923573777e-06, + "loss": 0.7881484031677246, + "step": 2557 + }, + { + "epoch": 0.5894009216589862, + "grad_norm": 0.959692830610789, + "learning_rate": 1.6903463398644848e-06, + "loss": 0.8292979001998901, + "step": 2558 + }, + { + "epoch": 0.58963133640553, + "grad_norm": 0.711937804642515, + "learning_rate": 1.690070655952336e-06, + "loss": 0.7068917751312256, + "step": 2559 + }, + { + "epoch": 0.5898617511520737, + "grad_norm": 1.1143023950252693, + "learning_rate": 1.6897948718773443e-06, + "loss": 0.8907356262207031, + "step": 2560 + }, + { + "epoch": 0.5900921658986175, + "grad_norm": 0.7930222105996996, + "learning_rate": 1.6895189876795405e-06, + "loss": 0.7762824892997742, + "step": 2561 + }, + { + "epoch": 0.5903225806451613, + "grad_norm": 1.0922797891559575, + "learning_rate": 1.6892430033989685e-06, + "loss": 0.9682759046554565, + "step": 2562 + }, + { + "epoch": 0.590552995391705, + "grad_norm": 0.8231082510824629, + "learning_rate": 1.6889669190756866e-06, + "loss": 0.7594735622406006, + "step": 2563 + }, + { + "epoch": 0.5907834101382489, + "grad_norm": 0.8117866090414669, + "learning_rate": 1.6886907347497687e-06, + "loss": 0.8161605000495911, + "step": 2564 + }, + { + "epoch": 0.5910138248847926, + "grad_norm": 0.8557086150703954, + "learning_rate": 1.6884144504613023e-06, + "loss": 0.9390331506729126, + "step": 2565 + }, + { + "epoch": 0.5912442396313364, + "grad_norm": 0.9387748138594502, + "learning_rate": 1.68813806625039e-06, + "loss": 0.8895832300186157, + "step": 2566 + }, + { + "epoch": 0.5914746543778802, + "grad_norm": 0.8802161511936953, + "learning_rate": 1.687861582157148e-06, + "loss": 0.7779919505119324, + "step": 2567 + }, + { + "epoch": 0.591705069124424, + "grad_norm": 1.139110447936057, + "learning_rate": 1.687584998221708e-06, + "loss": 0.8974252343177795, + "step": 2568 + }, + { + "epoch": 0.5919354838709677, + "grad_norm": 0.8073269492940187, + "learning_rate": 1.687308314484216e-06, + "loss": 0.8487393856048584, + "step": 2569 + }, + { + "epoch": 0.5921658986175116, + "grad_norm": 0.8310515688854938, + "learning_rate": 1.6870315309848318e-06, + "loss": 0.8356295824050903, + "step": 2570 + }, + { + "epoch": 0.5923963133640553, + "grad_norm": 0.9033360313158958, + "learning_rate": 1.6867546477637307e-06, + "loss": 0.8180248737335205, + "step": 2571 + }, + { + "epoch": 0.5926267281105991, + "grad_norm": 0.6950974205275126, + "learning_rate": 1.6864776648611013e-06, + "loss": 0.8456830978393555, + "step": 2572 + }, + { + "epoch": 0.5928571428571429, + "grad_norm": 0.9039181033590447, + "learning_rate": 1.6862005823171476e-06, + "loss": 0.8378905057907104, + "step": 2573 + }, + { + "epoch": 0.5930875576036866, + "grad_norm": 0.835432630485808, + "learning_rate": 1.685923400172088e-06, + "loss": 0.8060408234596252, + "step": 2574 + }, + { + "epoch": 0.5933179723502304, + "grad_norm": 0.8354491785263655, + "learning_rate": 1.685646118466155e-06, + "loss": 0.7550709247589111, + "step": 2575 + }, + { + "epoch": 0.5935483870967742, + "grad_norm": 0.805260271869055, + "learning_rate": 1.6853687372395955e-06, + "loss": 0.8475208282470703, + "step": 2576 + }, + { + "epoch": 0.593778801843318, + "grad_norm": 1.0626255995304192, + "learning_rate": 1.6850912565326709e-06, + "loss": 0.8681533336639404, + "step": 2577 + }, + { + "epoch": 0.5940092165898617, + "grad_norm": 0.9000714044087056, + "learning_rate": 1.6848136763856573e-06, + "loss": 0.7756578922271729, + "step": 2578 + }, + { + "epoch": 0.5942396313364056, + "grad_norm": 1.1163759985623336, + "learning_rate": 1.6845359968388456e-06, + "loss": 0.8910564184188843, + "step": 2579 + }, + { + "epoch": 0.5944700460829493, + "grad_norm": 0.7484768523036672, + "learning_rate": 1.6842582179325397e-06, + "loss": 0.7293382883071899, + "step": 2580 + }, + { + "epoch": 0.5947004608294931, + "grad_norm": 0.8208214849988605, + "learning_rate": 1.6839803397070597e-06, + "loss": 0.8497427105903625, + "step": 2581 + }, + { + "epoch": 0.5949308755760369, + "grad_norm": 0.9124854441462121, + "learning_rate": 1.6837023622027386e-06, + "loss": 0.800891637802124, + "step": 2582 + }, + { + "epoch": 0.5951612903225807, + "grad_norm": 0.8887114325795745, + "learning_rate": 1.683424285459925e-06, + "loss": 0.889703631401062, + "step": 2583 + }, + { + "epoch": 0.5953917050691244, + "grad_norm": 0.83139201735135, + "learning_rate": 1.6831461095189808e-06, + "loss": 0.7500913143157959, + "step": 2584 + }, + { + "epoch": 0.5956221198156681, + "grad_norm": 0.8260167845821169, + "learning_rate": 1.6828678344202834e-06, + "loss": 0.8575263023376465, + "step": 2585 + }, + { + "epoch": 0.595852534562212, + "grad_norm": 0.8796083393133354, + "learning_rate": 1.6825894602042238e-06, + "loss": 0.7754372358322144, + "step": 2586 + }, + { + "epoch": 0.5960829493087557, + "grad_norm": 1.0529816523070568, + "learning_rate": 1.6823109869112074e-06, + "loss": 0.8861502408981323, + "step": 2587 + }, + { + "epoch": 0.5963133640552996, + "grad_norm": 0.7738036894554111, + "learning_rate": 1.6820324145816548e-06, + "loss": 0.725920557975769, + "step": 2588 + }, + { + "epoch": 0.5965437788018433, + "grad_norm": 0.7887605961214393, + "learning_rate": 1.6817537432559998e-06, + "loss": 0.6195499897003174, + "step": 2589 + }, + { + "epoch": 0.5967741935483871, + "grad_norm": 0.8405918169035362, + "learning_rate": 1.6814749729746918e-06, + "loss": 0.8757472038269043, + "step": 2590 + }, + { + "epoch": 0.5970046082949308, + "grad_norm": 0.8710168774832879, + "learning_rate": 1.6811961037781934e-06, + "loss": 0.8024059534072876, + "step": 2591 + }, + { + "epoch": 0.5972350230414747, + "grad_norm": 1.1763814328442668, + "learning_rate": 1.6809171357069825e-06, + "loss": 0.8397082090377808, + "step": 2592 + }, + { + "epoch": 0.5974654377880184, + "grad_norm": 0.8163820389720032, + "learning_rate": 1.6806380688015507e-06, + "loss": 0.7693872451782227, + "step": 2593 + }, + { + "epoch": 0.5976958525345623, + "grad_norm": 0.7668441612993817, + "learning_rate": 1.6803589031024043e-06, + "loss": 0.7918043732643127, + "step": 2594 + }, + { + "epoch": 0.597926267281106, + "grad_norm": 0.7951277033960863, + "learning_rate": 1.680079638650064e-06, + "loss": 0.8046969175338745, + "step": 2595 + }, + { + "epoch": 0.5981566820276498, + "grad_norm": 0.9724191958452253, + "learning_rate": 1.6798002754850643e-06, + "loss": 0.7889789938926697, + "step": 2596 + }, + { + "epoch": 0.5983870967741935, + "grad_norm": 0.8356070849986357, + "learning_rate": 1.6795208136479543e-06, + "loss": 0.874780535697937, + "step": 2597 + }, + { + "epoch": 0.5986175115207373, + "grad_norm": 0.8380940855873632, + "learning_rate": 1.679241253179298e-06, + "loss": 0.8728631734848022, + "step": 2598 + }, + { + "epoch": 0.5988479262672811, + "grad_norm": 0.7909132896338992, + "learning_rate": 1.678961594119673e-06, + "loss": 0.5940345525741577, + "step": 2599 + }, + { + "epoch": 0.5990783410138248, + "grad_norm": 0.7873638428289793, + "learning_rate": 1.6786818365096712e-06, + "loss": 0.8524528741836548, + "step": 2600 + }, + { + "epoch": 0.5993087557603687, + "grad_norm": 1.2099119623298256, + "learning_rate": 1.6784019803899e-06, + "loss": 1.0738554000854492, + "step": 2601 + }, + { + "epoch": 0.5995391705069124, + "grad_norm": 0.9987206599474828, + "learning_rate": 1.6781220258009787e-06, + "loss": 0.9146362543106079, + "step": 2602 + }, + { + "epoch": 0.5997695852534562, + "grad_norm": 0.9546196333490053, + "learning_rate": 1.6778419727835434e-06, + "loss": 0.8846019506454468, + "step": 2603 + }, + { + "epoch": 0.6, + "grad_norm": 1.0356705992849526, + "learning_rate": 1.6775618213782427e-06, + "loss": 0.9564694166183472, + "step": 2604 + }, + { + "epoch": 0.6002304147465438, + "grad_norm": 0.8649265876220377, + "learning_rate": 1.6772815716257411e-06, + "loss": 0.7311475276947021, + "step": 2605 + }, + { + "epoch": 0.6004608294930875, + "grad_norm": 0.9996641063184493, + "learning_rate": 1.6770012235667157e-06, + "loss": 0.8198719024658203, + "step": 2606 + }, + { + "epoch": 0.6006912442396314, + "grad_norm": 0.8625199282325245, + "learning_rate": 1.676720777241859e-06, + "loss": 0.7667897939682007, + "step": 2607 + }, + { + "epoch": 0.6009216589861751, + "grad_norm": 0.8068998344787891, + "learning_rate": 1.6764402326918775e-06, + "loss": 0.8438166379928589, + "step": 2608 + }, + { + "epoch": 0.6011520737327188, + "grad_norm": 0.8540979807575545, + "learning_rate": 1.6761595899574913e-06, + "loss": 0.801039457321167, + "step": 2609 + }, + { + "epoch": 0.6013824884792627, + "grad_norm": 0.8234203241271092, + "learning_rate": 1.6758788490794362e-06, + "loss": 0.8063384294509888, + "step": 2610 + }, + { + "epoch": 0.6016129032258064, + "grad_norm": 0.6526013686548677, + "learning_rate": 1.6755980100984609e-06, + "loss": 0.7574378848075867, + "step": 2611 + }, + { + "epoch": 0.6018433179723502, + "grad_norm": 0.9515660687698646, + "learning_rate": 1.6753170730553285e-06, + "loss": 0.7640282511711121, + "step": 2612 + }, + { + "epoch": 0.602073732718894, + "grad_norm": 0.8028588885811085, + "learning_rate": 1.675036037990817e-06, + "loss": 0.8366582989692688, + "step": 2613 + }, + { + "epoch": 0.6023041474654378, + "grad_norm": 0.9790278189412774, + "learning_rate": 1.6747549049457184e-06, + "loss": 0.851488471031189, + "step": 2614 + }, + { + "epoch": 0.6025345622119815, + "grad_norm": 0.8888933014827352, + "learning_rate": 1.6744736739608385e-06, + "loss": 0.6821870803833008, + "step": 2615 + }, + { + "epoch": 0.6027649769585254, + "grad_norm": 0.9884428615602953, + "learning_rate": 1.6741923450769977e-06, + "loss": 0.9263452887535095, + "step": 2616 + }, + { + "epoch": 0.6029953917050691, + "grad_norm": 0.7660541738576696, + "learning_rate": 1.6739109183350303e-06, + "loss": 0.7471155524253845, + "step": 2617 + }, + { + "epoch": 0.603225806451613, + "grad_norm": 0.8463548916487829, + "learning_rate": 1.6736293937757858e-06, + "loss": 0.8859940767288208, + "step": 2618 + }, + { + "epoch": 0.6034562211981567, + "grad_norm": 0.7725702923302962, + "learning_rate": 1.673347771440126e-06, + "loss": 0.8078656792640686, + "step": 2619 + }, + { + "epoch": 0.6036866359447005, + "grad_norm": 0.8796637852565455, + "learning_rate": 1.673066051368929e-06, + "loss": 0.7663185596466064, + "step": 2620 + }, + { + "epoch": 0.6039170506912442, + "grad_norm": 0.7762146466532337, + "learning_rate": 1.6727842336030855e-06, + "loss": 0.7924770712852478, + "step": 2621 + }, + { + "epoch": 0.604147465437788, + "grad_norm": 0.6362525346897695, + "learning_rate": 1.672502318183501e-06, + "loss": 0.7781439423561096, + "step": 2622 + }, + { + "epoch": 0.6043778801843318, + "grad_norm": 0.7824821748809755, + "learning_rate": 1.6722203051510953e-06, + "loss": 0.9342260360717773, + "step": 2623 + }, + { + "epoch": 0.6046082949308755, + "grad_norm": 0.9113412146225311, + "learning_rate": 1.6719381945468024e-06, + "loss": 0.8589230179786682, + "step": 2624 + }, + { + "epoch": 0.6048387096774194, + "grad_norm": 0.9092021688294594, + "learning_rate": 1.67165598641157e-06, + "loss": 0.8692198991775513, + "step": 2625 + }, + { + "epoch": 0.6050691244239631, + "grad_norm": 0.9811252814075038, + "learning_rate": 1.6713736807863606e-06, + "loss": 0.9220771789550781, + "step": 2626 + }, + { + "epoch": 0.6052995391705069, + "grad_norm": 0.7869789442575379, + "learning_rate": 1.6710912777121497e-06, + "loss": 0.670639157295227, + "step": 2627 + }, + { + "epoch": 0.6055299539170507, + "grad_norm": 0.8458627233906328, + "learning_rate": 1.6708087772299287e-06, + "loss": 0.780914306640625, + "step": 2628 + }, + { + "epoch": 0.6057603686635945, + "grad_norm": 0.7718782555310939, + "learning_rate": 1.6705261793807014e-06, + "loss": 0.836430549621582, + "step": 2629 + }, + { + "epoch": 0.6059907834101382, + "grad_norm": 0.8965474432723056, + "learning_rate": 1.670243484205487e-06, + "loss": 0.84266197681427, + "step": 2630 + }, + { + "epoch": 0.6062211981566821, + "grad_norm": 0.8992013517980091, + "learning_rate": 1.6699606917453184e-06, + "loss": 0.9276752471923828, + "step": 2631 + }, + { + "epoch": 0.6064516129032258, + "grad_norm": 0.8740634897243095, + "learning_rate": 1.6696778020412418e-06, + "loss": 0.8319100141525269, + "step": 2632 + }, + { + "epoch": 0.6066820276497696, + "grad_norm": 0.9778851785690291, + "learning_rate": 1.669394815134319e-06, + "loss": 0.7511987686157227, + "step": 2633 + }, + { + "epoch": 0.6069124423963134, + "grad_norm": 0.9559089829828732, + "learning_rate": 1.6691117310656249e-06, + "loss": 0.7847566604614258, + "step": 2634 + }, + { + "epoch": 0.6071428571428571, + "grad_norm": 0.7352732117136743, + "learning_rate": 1.668828549876249e-06, + "loss": 0.8598428964614868, + "step": 2635 + }, + { + "epoch": 0.6073732718894009, + "grad_norm": 0.9632462301651329, + "learning_rate": 1.6685452716072942e-06, + "loss": 0.8676267266273499, + "step": 2636 + }, + { + "epoch": 0.6076036866359447, + "grad_norm": 0.9796050613045469, + "learning_rate": 1.6682618962998787e-06, + "loss": 0.8139858841896057, + "step": 2637 + }, + { + "epoch": 0.6078341013824885, + "grad_norm": 0.9214980939594923, + "learning_rate": 1.6679784239951334e-06, + "loss": 0.878848671913147, + "step": 2638 + }, + { + "epoch": 0.6080645161290322, + "grad_norm": 0.8942413316087445, + "learning_rate": 1.6676948547342038e-06, + "loss": 0.7094229459762573, + "step": 2639 + }, + { + "epoch": 0.6082949308755761, + "grad_norm": 0.7183954232108332, + "learning_rate": 1.6674111885582502e-06, + "loss": 0.7908186912536621, + "step": 2640 + }, + { + "epoch": 0.6085253456221198, + "grad_norm": 0.705517985038791, + "learning_rate": 1.6671274255084465e-06, + "loss": 0.7205992341041565, + "step": 2641 + }, + { + "epoch": 0.6087557603686636, + "grad_norm": 0.937951031991606, + "learning_rate": 1.6668435656259796e-06, + "loss": 0.8098955750465393, + "step": 2642 + }, + { + "epoch": 0.6089861751152074, + "grad_norm": 0.8047793122116887, + "learning_rate": 1.6665596089520522e-06, + "loss": 0.9344205856323242, + "step": 2643 + }, + { + "epoch": 0.6092165898617512, + "grad_norm": 0.73132257965357, + "learning_rate": 1.6662755555278798e-06, + "loss": 0.6149121522903442, + "step": 2644 + }, + { + "epoch": 0.6094470046082949, + "grad_norm": 1.1550816011183633, + "learning_rate": 1.6659914053946929e-06, + "loss": 0.790631115436554, + "step": 2645 + }, + { + "epoch": 0.6096774193548387, + "grad_norm": 0.9832349740984434, + "learning_rate": 1.6657071585937349e-06, + "loss": 0.7789372801780701, + "step": 2646 + }, + { + "epoch": 0.6099078341013825, + "grad_norm": 0.7425679816784971, + "learning_rate": 1.6654228151662641e-06, + "loss": 0.9119753837585449, + "step": 2647 + }, + { + "epoch": 0.6101382488479262, + "grad_norm": 1.0635804319271085, + "learning_rate": 1.6651383751535526e-06, + "loss": 0.827568769454956, + "step": 2648 + }, + { + "epoch": 0.6103686635944701, + "grad_norm": 0.9620609244203838, + "learning_rate": 1.6648538385968865e-06, + "loss": 0.8862377405166626, + "step": 2649 + }, + { + "epoch": 0.6105990783410138, + "grad_norm": 0.7954209003880245, + "learning_rate": 1.6645692055375658e-06, + "loss": 0.7765665054321289, + "step": 2650 + }, + { + "epoch": 0.6108294930875576, + "grad_norm": 0.7698374340240739, + "learning_rate": 1.6642844760169048e-06, + "loss": 0.7673745155334473, + "step": 2651 + }, + { + "epoch": 0.6110599078341014, + "grad_norm": 1.051257553540871, + "learning_rate": 1.6639996500762313e-06, + "loss": 0.8539090752601624, + "step": 2652 + }, + { + "epoch": 0.6112903225806452, + "grad_norm": 0.8676017636407886, + "learning_rate": 1.663714727756888e-06, + "loss": 0.9146299362182617, + "step": 2653 + }, + { + "epoch": 0.6115207373271889, + "grad_norm": 0.9802646170879412, + "learning_rate": 1.6634297091002304e-06, + "loss": 0.6720675230026245, + "step": 2654 + }, + { + "epoch": 0.6117511520737328, + "grad_norm": 0.9963804792413621, + "learning_rate": 1.6631445941476287e-06, + "loss": 0.876419186592102, + "step": 2655 + }, + { + "epoch": 0.6119815668202765, + "grad_norm": 0.8251901500966289, + "learning_rate": 1.6628593829404673e-06, + "loss": 0.781826376914978, + "step": 2656 + }, + { + "epoch": 0.6122119815668203, + "grad_norm": 1.0156308960299383, + "learning_rate": 1.662574075520144e-06, + "loss": 0.8700725436210632, + "step": 2657 + }, + { + "epoch": 0.6124423963133641, + "grad_norm": 0.8730333366815507, + "learning_rate": 1.6622886719280703e-06, + "loss": 0.7927212715148926, + "step": 2658 + }, + { + "epoch": 0.6126728110599078, + "grad_norm": 0.9472958125063492, + "learning_rate": 1.6620031722056732e-06, + "loss": 0.8402982354164124, + "step": 2659 + }, + { + "epoch": 0.6129032258064516, + "grad_norm": 0.9246784332742947, + "learning_rate": 1.6617175763943916e-06, + "loss": 0.844031572341919, + "step": 2660 + }, + { + "epoch": 0.6131336405529954, + "grad_norm": 1.1749754124811849, + "learning_rate": 1.66143188453568e-06, + "loss": 0.7927590608596802, + "step": 2661 + }, + { + "epoch": 0.6133640552995392, + "grad_norm": 0.7562363270320578, + "learning_rate": 1.6611460966710057e-06, + "loss": 0.6881238222122192, + "step": 2662 + }, + { + "epoch": 0.6135944700460829, + "grad_norm": 0.7503304726479316, + "learning_rate": 1.6608602128418512e-06, + "loss": 0.8782250881195068, + "step": 2663 + }, + { + "epoch": 0.6138248847926268, + "grad_norm": 0.764429872232153, + "learning_rate": 1.6605742330897112e-06, + "loss": 0.810072124004364, + "step": 2664 + }, + { + "epoch": 0.6140552995391705, + "grad_norm": 0.7959070796498304, + "learning_rate": 1.660288157456096e-06, + "loss": 0.9278649091720581, + "step": 2665 + }, + { + "epoch": 0.6142857142857143, + "grad_norm": 0.8518702716538695, + "learning_rate": 1.6600019859825287e-06, + "loss": 0.7821990251541138, + "step": 2666 + }, + { + "epoch": 0.614516129032258, + "grad_norm": 0.8000150810917545, + "learning_rate": 1.6597157187105474e-06, + "loss": 0.7945138216018677, + "step": 2667 + }, + { + "epoch": 0.6147465437788019, + "grad_norm": 0.9158855636867193, + "learning_rate": 1.659429355681702e-06, + "loss": 0.7796168327331543, + "step": 2668 + }, + { + "epoch": 0.6149769585253456, + "grad_norm": 0.8778480996767207, + "learning_rate": 1.659142896937559e-06, + "loss": 0.8412867784500122, + "step": 2669 + }, + { + "epoch": 0.6152073732718893, + "grad_norm": 0.8776586025383009, + "learning_rate": 1.6588563425196976e-06, + "loss": 0.8507891893386841, + "step": 2670 + }, + { + "epoch": 0.6154377880184332, + "grad_norm": 0.7470530836348557, + "learning_rate": 1.6585696924697097e-06, + "loss": 0.7538737654685974, + "step": 2671 + }, + { + "epoch": 0.6156682027649769, + "grad_norm": 0.7938343055651664, + "learning_rate": 1.6582829468292027e-06, + "loss": 0.7241994142532349, + "step": 2672 + }, + { + "epoch": 0.6158986175115208, + "grad_norm": 0.7740707689038899, + "learning_rate": 1.6579961056397979e-06, + "loss": 0.8282276391983032, + "step": 2673 + }, + { + "epoch": 0.6161290322580645, + "grad_norm": 0.9834275785675608, + "learning_rate": 1.657709168943129e-06, + "loss": 0.7823094725608826, + "step": 2674 + }, + { + "epoch": 0.6163594470046083, + "grad_norm": 0.7814560466718257, + "learning_rate": 1.6574221367808452e-06, + "loss": 0.7682117819786072, + "step": 2675 + }, + { + "epoch": 0.616589861751152, + "grad_norm": 0.791790817396352, + "learning_rate": 1.6571350091946084e-06, + "loss": 0.7483188509941101, + "step": 2676 + }, + { + "epoch": 0.6168202764976959, + "grad_norm": 0.7904062559480196, + "learning_rate": 1.656847786226095e-06, + "loss": 0.8244579434394836, + "step": 2677 + }, + { + "epoch": 0.6170506912442396, + "grad_norm": 0.935192090002093, + "learning_rate": 1.6565604679169951e-06, + "loss": 0.9741685390472412, + "step": 2678 + }, + { + "epoch": 0.6172811059907835, + "grad_norm": 1.2715516239943523, + "learning_rate": 1.6562730543090122e-06, + "loss": 1.0004706382751465, + "step": 2679 + }, + { + "epoch": 0.6175115207373272, + "grad_norm": 0.7382412100690486, + "learning_rate": 1.6559855454438644e-06, + "loss": 0.6897011399269104, + "step": 2680 + }, + { + "epoch": 0.617741935483871, + "grad_norm": 0.6330897297720288, + "learning_rate": 1.6556979413632833e-06, + "loss": 0.7250478267669678, + "step": 2681 + }, + { + "epoch": 0.6179723502304147, + "grad_norm": 0.9717515360338855, + "learning_rate": 1.6554102421090137e-06, + "loss": 0.850714385509491, + "step": 2682 + }, + { + "epoch": 0.6182027649769585, + "grad_norm": 0.917367886199939, + "learning_rate": 1.6551224477228152e-06, + "loss": 0.8389794230461121, + "step": 2683 + }, + { + "epoch": 0.6184331797235023, + "grad_norm": 0.8244704754842406, + "learning_rate": 1.6548345582464608e-06, + "loss": 0.8004277944564819, + "step": 2684 + }, + { + "epoch": 0.618663594470046, + "grad_norm": 0.9438052955461359, + "learning_rate": 1.654546573721737e-06, + "loss": 0.8439298868179321, + "step": 2685 + }, + { + "epoch": 0.6188940092165899, + "grad_norm": 0.9506767899718855, + "learning_rate": 1.6542584941904448e-06, + "loss": 0.7715939283370972, + "step": 2686 + }, + { + "epoch": 0.6191244239631336, + "grad_norm": 0.7277066195828455, + "learning_rate": 1.6539703196943982e-06, + "loss": 0.8521275520324707, + "step": 2687 + }, + { + "epoch": 0.6193548387096774, + "grad_norm": 0.9502964788805838, + "learning_rate": 1.6536820502754249e-06, + "loss": 0.8773370981216431, + "step": 2688 + }, + { + "epoch": 0.6195852534562212, + "grad_norm": 0.8896877670997408, + "learning_rate": 1.653393685975368e-06, + "loss": 0.7613356113433838, + "step": 2689 + }, + { + "epoch": 0.619815668202765, + "grad_norm": 0.7872525626089157, + "learning_rate": 1.6531052268360823e-06, + "loss": 0.7534692287445068, + "step": 2690 + }, + { + "epoch": 0.6200460829493087, + "grad_norm": 0.8888603991720845, + "learning_rate": 1.652816672899438e-06, + "loss": 0.861242413520813, + "step": 2691 + }, + { + "epoch": 0.6202764976958526, + "grad_norm": 1.0955455640383855, + "learning_rate": 1.652528024207317e-06, + "loss": 0.9778954982757568, + "step": 2692 + }, + { + "epoch": 0.6205069124423963, + "grad_norm": 0.8389124431813023, + "learning_rate": 1.6522392808016176e-06, + "loss": 0.7874879240989685, + "step": 2693 + }, + { + "epoch": 0.6207373271889401, + "grad_norm": 1.038077147354541, + "learning_rate": 1.6519504427242503e-06, + "loss": 0.8306739330291748, + "step": 2694 + }, + { + "epoch": 0.6209677419354839, + "grad_norm": 0.890554970207788, + "learning_rate": 1.651661510017139e-06, + "loss": 0.7617331743240356, + "step": 2695 + }, + { + "epoch": 0.6211981566820276, + "grad_norm": 0.8325839299854928, + "learning_rate": 1.6513724827222223e-06, + "loss": 0.8912776708602905, + "step": 2696 + }, + { + "epoch": 0.6214285714285714, + "grad_norm": 0.9626202232237234, + "learning_rate": 1.6510833608814519e-06, + "loss": 0.832025945186615, + "step": 2697 + }, + { + "epoch": 0.6216589861751152, + "grad_norm": 0.8573045739455887, + "learning_rate": 1.6507941445367934e-06, + "loss": 0.7391358613967896, + "step": 2698 + }, + { + "epoch": 0.621889400921659, + "grad_norm": 0.8417803604945624, + "learning_rate": 1.6505048337302267e-06, + "loss": 0.7968891263008118, + "step": 2699 + }, + { + "epoch": 0.6221198156682027, + "grad_norm": 0.7943584636642551, + "learning_rate": 1.6502154285037446e-06, + "loss": 0.8268226981163025, + "step": 2700 + }, + { + "epoch": 0.6223502304147466, + "grad_norm": 0.8943748659016423, + "learning_rate": 1.6499259288993536e-06, + "loss": 0.8727509379386902, + "step": 2701 + }, + { + "epoch": 0.6225806451612903, + "grad_norm": 0.9781149876582625, + "learning_rate": 1.6496363349590746e-06, + "loss": 0.8419584035873413, + "step": 2702 + }, + { + "epoch": 0.6228110599078341, + "grad_norm": 0.9222004845701074, + "learning_rate": 1.6493466467249415e-06, + "loss": 0.7753620743751526, + "step": 2703 + }, + { + "epoch": 0.6230414746543779, + "grad_norm": 0.8188505837862442, + "learning_rate": 1.6490568642390022e-06, + "loss": 0.7735302448272705, + "step": 2704 + }, + { + "epoch": 0.6232718894009217, + "grad_norm": 0.892742684163995, + "learning_rate": 1.6487669875433183e-06, + "loss": 0.8730747699737549, + "step": 2705 + }, + { + "epoch": 0.6235023041474654, + "grad_norm": 1.081206789540213, + "learning_rate": 1.648477016679965e-06, + "loss": 1.026259183883667, + "step": 2706 + }, + { + "epoch": 0.6237327188940092, + "grad_norm": 1.1700615414540931, + "learning_rate": 1.6481869516910314e-06, + "loss": 1.0710067749023438, + "step": 2707 + }, + { + "epoch": 0.623963133640553, + "grad_norm": 0.8750649396873535, + "learning_rate": 1.6478967926186196e-06, + "loss": 0.8451842069625854, + "step": 2708 + }, + { + "epoch": 0.6241935483870967, + "grad_norm": 1.0025312740636694, + "learning_rate": 1.6476065395048463e-06, + "loss": 0.8114550113677979, + "step": 2709 + }, + { + "epoch": 0.6244239631336406, + "grad_norm": 0.9543936745980088, + "learning_rate": 1.6473161923918408e-06, + "loss": 0.9158897399902344, + "step": 2710 + }, + { + "epoch": 0.6246543778801843, + "grad_norm": 0.9073320322912862, + "learning_rate": 1.6470257513217471e-06, + "loss": 0.8455985188484192, + "step": 2711 + }, + { + "epoch": 0.6248847926267281, + "grad_norm": 0.9409835862192949, + "learning_rate": 1.6467352163367224e-06, + "loss": 0.7869806885719299, + "step": 2712 + }, + { + "epoch": 0.6251152073732719, + "grad_norm": 0.9720046165998673, + "learning_rate": 1.6464445874789369e-06, + "loss": 0.7813467979431152, + "step": 2713 + }, + { + "epoch": 0.6253456221198157, + "grad_norm": 0.9253768349404401, + "learning_rate": 1.646153864790575e-06, + "loss": 0.7607834339141846, + "step": 2714 + }, + { + "epoch": 0.6255760368663594, + "grad_norm": 0.7655542834849622, + "learning_rate": 1.6458630483138354e-06, + "loss": 0.6316394209861755, + "step": 2715 + }, + { + "epoch": 0.6258064516129033, + "grad_norm": 1.0037920503955002, + "learning_rate": 1.6455721380909293e-06, + "loss": 0.8613089323043823, + "step": 2716 + }, + { + "epoch": 0.626036866359447, + "grad_norm": 0.900314234710346, + "learning_rate": 1.6452811341640823e-06, + "loss": 0.8521597385406494, + "step": 2717 + }, + { + "epoch": 0.6262672811059908, + "grad_norm": 0.863334614503053, + "learning_rate": 1.6449900365755322e-06, + "loss": 0.7649816870689392, + "step": 2718 + }, + { + "epoch": 0.6264976958525346, + "grad_norm": 0.7921235061169694, + "learning_rate": 1.6446988453675327e-06, + "loss": 0.669215738773346, + "step": 2719 + }, + { + "epoch": 0.6267281105990783, + "grad_norm": 1.0085146323707468, + "learning_rate": 1.6444075605823491e-06, + "loss": 0.7795897722244263, + "step": 2720 + }, + { + "epoch": 0.6269585253456221, + "grad_norm": 1.0985096718321175, + "learning_rate": 1.6441161822622612e-06, + "loss": 0.9773029088973999, + "step": 2721 + }, + { + "epoch": 0.6271889400921659, + "grad_norm": 0.88062279724108, + "learning_rate": 1.6438247104495622e-06, + "loss": 0.8313496112823486, + "step": 2722 + }, + { + "epoch": 0.6274193548387097, + "grad_norm": 0.8741823244787398, + "learning_rate": 1.6435331451865589e-06, + "loss": 0.822803258895874, + "step": 2723 + }, + { + "epoch": 0.6276497695852534, + "grad_norm": 1.1191623839144935, + "learning_rate": 1.643241486515571e-06, + "loss": 0.8933405876159668, + "step": 2724 + }, + { + "epoch": 0.6278801843317973, + "grad_norm": 0.8721873626078817, + "learning_rate": 1.6429497344789334e-06, + "loss": 0.865382194519043, + "step": 2725 + }, + { + "epoch": 0.628110599078341, + "grad_norm": 0.6623424743433429, + "learning_rate": 1.6426578891189929e-06, + "loss": 0.5955609679222107, + "step": 2726 + }, + { + "epoch": 0.6283410138248848, + "grad_norm": 0.9379654908769754, + "learning_rate": 1.6423659504781102e-06, + "loss": 0.7832648754119873, + "step": 2727 + }, + { + "epoch": 0.6285714285714286, + "grad_norm": 0.9904172136436726, + "learning_rate": 1.6420739185986606e-06, + "loss": 0.8939651250839233, + "step": 2728 + }, + { + "epoch": 0.6288018433179724, + "grad_norm": 0.8754504203733118, + "learning_rate": 1.6417817935230316e-06, + "loss": 0.7950553894042969, + "step": 2729 + }, + { + "epoch": 0.6290322580645161, + "grad_norm": 0.7473547756110924, + "learning_rate": 1.6414895752936247e-06, + "loss": 0.7011410593986511, + "step": 2730 + }, + { + "epoch": 0.6292626728110599, + "grad_norm": 0.8298073820867625, + "learning_rate": 1.6411972639528553e-06, + "loss": 0.8745814561843872, + "step": 2731 + }, + { + "epoch": 0.6294930875576037, + "grad_norm": 0.9643129286331958, + "learning_rate": 1.640904859543152e-06, + "loss": 0.9487906694412231, + "step": 2732 + }, + { + "epoch": 0.6297235023041474, + "grad_norm": 1.0003996457820634, + "learning_rate": 1.6406123621069565e-06, + "loss": 0.8493598103523254, + "step": 2733 + }, + { + "epoch": 0.6299539170506913, + "grad_norm": 0.7043952970778223, + "learning_rate": 1.640319771686725e-06, + "loss": 0.8176105618476868, + "step": 2734 + }, + { + "epoch": 0.630184331797235, + "grad_norm": 1.1365398207749948, + "learning_rate": 1.640027088324926e-06, + "loss": 0.8331952691078186, + "step": 2735 + }, + { + "epoch": 0.6304147465437788, + "grad_norm": 0.9152153352251905, + "learning_rate": 1.6397343120640428e-06, + "loss": 0.7507727146148682, + "step": 2736 + }, + { + "epoch": 0.6306451612903226, + "grad_norm": 0.8498087936716523, + "learning_rate": 1.6394414429465707e-06, + "loss": 0.7681083679199219, + "step": 2737 + }, + { + "epoch": 0.6308755760368664, + "grad_norm": 1.0207970870125542, + "learning_rate": 1.6391484810150197e-06, + "loss": 0.86592036485672, + "step": 2738 + }, + { + "epoch": 0.6311059907834101, + "grad_norm": 0.7893726077346048, + "learning_rate": 1.6388554263119133e-06, + "loss": 0.6561422348022461, + "step": 2739 + }, + { + "epoch": 0.631336405529954, + "grad_norm": 0.8691518888981297, + "learning_rate": 1.6385622788797871e-06, + "loss": 1.0149214267730713, + "step": 2740 + }, + { + "epoch": 0.6315668202764977, + "grad_norm": 3.1459869291369578, + "learning_rate": 1.6382690387611912e-06, + "loss": 0.8542313575744629, + "step": 2741 + }, + { + "epoch": 0.6317972350230415, + "grad_norm": 0.8459688860048273, + "learning_rate": 1.6379757059986898e-06, + "loss": 0.8561190366744995, + "step": 2742 + }, + { + "epoch": 0.6320276497695853, + "grad_norm": 0.8945733601522768, + "learning_rate": 1.6376822806348591e-06, + "loss": 0.7487457990646362, + "step": 2743 + }, + { + "epoch": 0.632258064516129, + "grad_norm": 0.7710656021686645, + "learning_rate": 1.6373887627122894e-06, + "loss": 0.6169087886810303, + "step": 2744 + }, + { + "epoch": 0.6324884792626728, + "grad_norm": 0.9363459151732765, + "learning_rate": 1.6370951522735848e-06, + "loss": 0.8384301662445068, + "step": 2745 + }, + { + "epoch": 0.6327188940092165, + "grad_norm": 0.8816116065345285, + "learning_rate": 1.636801449361362e-06, + "loss": 0.8009958267211914, + "step": 2746 + }, + { + "epoch": 0.6329493087557604, + "grad_norm": 0.7782605199549586, + "learning_rate": 1.6365076540182518e-06, + "loss": 0.7277840375900269, + "step": 2747 + }, + { + "epoch": 0.6331797235023041, + "grad_norm": 0.8629211607674182, + "learning_rate": 1.6362137662868988e-06, + "loss": 0.7994974255561829, + "step": 2748 + }, + { + "epoch": 0.633410138248848, + "grad_norm": 0.9972871876044257, + "learning_rate": 1.6359197862099592e-06, + "loss": 0.9940546751022339, + "step": 2749 + }, + { + "epoch": 0.6336405529953917, + "grad_norm": 0.7083636808435892, + "learning_rate": 1.6356257138301048e-06, + "loss": 0.776983916759491, + "step": 2750 + }, + { + "epoch": 0.6338709677419355, + "grad_norm": 1.0813287689618403, + "learning_rate": 1.6353315491900194e-06, + "loss": 0.8218704462051392, + "step": 2751 + }, + { + "epoch": 0.6341013824884792, + "grad_norm": 0.9285197745822434, + "learning_rate": 1.635037292332401e-06, + "loss": 0.8437784910202026, + "step": 2752 + }, + { + "epoch": 0.6343317972350231, + "grad_norm": 0.7951039096878332, + "learning_rate": 1.63474294329996e-06, + "loss": 0.7774004340171814, + "step": 2753 + }, + { + "epoch": 0.6345622119815668, + "grad_norm": 0.7998446978982631, + "learning_rate": 1.634448502135421e-06, + "loss": 0.8480523824691772, + "step": 2754 + }, + { + "epoch": 0.6347926267281107, + "grad_norm": 0.8710356721404071, + "learning_rate": 1.634153968881522e-06, + "loss": 0.838944673538208, + "step": 2755 + }, + { + "epoch": 0.6350230414746544, + "grad_norm": 0.9609360504840417, + "learning_rate": 1.633859343581014e-06, + "loss": 0.7989159822463989, + "step": 2756 + }, + { + "epoch": 0.6352534562211981, + "grad_norm": 0.8906618388597183, + "learning_rate": 1.6335646262766612e-06, + "loss": 0.8122522234916687, + "step": 2757 + }, + { + "epoch": 0.635483870967742, + "grad_norm": 1.0306905026592958, + "learning_rate": 1.6332698170112418e-06, + "loss": 0.7472352981567383, + "step": 2758 + }, + { + "epoch": 0.6357142857142857, + "grad_norm": 0.7470082329854858, + "learning_rate": 1.6329749158275466e-06, + "loss": 0.7160866260528564, + "step": 2759 + }, + { + "epoch": 0.6359447004608295, + "grad_norm": 0.9276359862380839, + "learning_rate": 1.6326799227683803e-06, + "loss": 0.850339412689209, + "step": 2760 + }, + { + "epoch": 0.6361751152073732, + "grad_norm": 0.8334408182150722, + "learning_rate": 1.632384837876561e-06, + "loss": 0.7683566808700562, + "step": 2761 + }, + { + "epoch": 0.6364055299539171, + "grad_norm": 1.0070287688728312, + "learning_rate": 1.6320896611949197e-06, + "loss": 0.820326030254364, + "step": 2762 + }, + { + "epoch": 0.6366359447004608, + "grad_norm": 0.9088399606663712, + "learning_rate": 1.6317943927663005e-06, + "loss": 0.9319206476211548, + "step": 2763 + }, + { + "epoch": 0.6368663594470046, + "grad_norm": 0.854101738795234, + "learning_rate": 1.6314990326335619e-06, + "loss": 0.8473616242408752, + "step": 2764 + }, + { + "epoch": 0.6370967741935484, + "grad_norm": 0.9083270544798837, + "learning_rate": 1.6312035808395746e-06, + "loss": 0.7515239715576172, + "step": 2765 + }, + { + "epoch": 0.6373271889400922, + "grad_norm": 0.9691327918436982, + "learning_rate": 1.630908037427223e-06, + "loss": 0.8780150413513184, + "step": 2766 + }, + { + "epoch": 0.6375576036866359, + "grad_norm": 0.8183908015853972, + "learning_rate": 1.6306124024394051e-06, + "loss": 0.7502909898757935, + "step": 2767 + }, + { + "epoch": 0.6377880184331797, + "grad_norm": 1.0244030314506845, + "learning_rate": 1.630316675919032e-06, + "loss": 0.8440920114517212, + "step": 2768 + }, + { + "epoch": 0.6380184331797235, + "grad_norm": 0.9479398820781787, + "learning_rate": 1.6300208579090275e-06, + "loss": 0.7769831418991089, + "step": 2769 + }, + { + "epoch": 0.6382488479262672, + "grad_norm": 0.7616107153752498, + "learning_rate": 1.6297249484523297e-06, + "loss": 0.6217764616012573, + "step": 2770 + }, + { + "epoch": 0.6384792626728111, + "grad_norm": 0.7961962297717475, + "learning_rate": 1.6294289475918891e-06, + "loss": 0.8726013898849487, + "step": 2771 + }, + { + "epoch": 0.6387096774193548, + "grad_norm": 0.9993347618775529, + "learning_rate": 1.6291328553706702e-06, + "loss": 0.9624546766281128, + "step": 2772 + }, + { + "epoch": 0.6389400921658986, + "grad_norm": 0.9073330627878557, + "learning_rate": 1.62883667183165e-06, + "loss": 0.733322024345398, + "step": 2773 + }, + { + "epoch": 0.6391705069124424, + "grad_norm": 0.828990327728417, + "learning_rate": 1.6285403970178197e-06, + "loss": 0.7944040298461914, + "step": 2774 + }, + { + "epoch": 0.6394009216589862, + "grad_norm": 0.945508092850191, + "learning_rate": 1.6282440309721825e-06, + "loss": 0.8006964921951294, + "step": 2775 + }, + { + "epoch": 0.6396313364055299, + "grad_norm": 0.8235251563991838, + "learning_rate": 1.6279475737377562e-06, + "loss": 0.8226393461227417, + "step": 2776 + }, + { + "epoch": 0.6398617511520738, + "grad_norm": 0.9205648176506509, + "learning_rate": 1.6276510253575707e-06, + "loss": 0.8216049671173096, + "step": 2777 + }, + { + "epoch": 0.6400921658986175, + "grad_norm": 1.2879339929003093, + "learning_rate": 1.6273543858746698e-06, + "loss": 0.9556760191917419, + "step": 2778 + }, + { + "epoch": 0.6403225806451613, + "grad_norm": 1.226309717633737, + "learning_rate": 1.6270576553321103e-06, + "loss": 0.9736160039901733, + "step": 2779 + }, + { + "epoch": 0.6405529953917051, + "grad_norm": 0.7107959971647043, + "learning_rate": 1.6267608337729622e-06, + "loss": 0.6930527687072754, + "step": 2780 + }, + { + "epoch": 0.6407834101382488, + "grad_norm": 0.8158686811134676, + "learning_rate": 1.6264639212403089e-06, + "loss": 0.8047456741333008, + "step": 2781 + }, + { + "epoch": 0.6410138248847926, + "grad_norm": 0.8454524938044947, + "learning_rate": 1.6261669177772465e-06, + "loss": 0.7278450727462769, + "step": 2782 + }, + { + "epoch": 0.6412442396313364, + "grad_norm": 0.8520417006771478, + "learning_rate": 1.6258698234268852e-06, + "loss": 0.7768574357032776, + "step": 2783 + }, + { + "epoch": 0.6414746543778802, + "grad_norm": 1.0890287289964238, + "learning_rate": 1.6255726382323475e-06, + "loss": 0.7621645331382751, + "step": 2784 + }, + { + "epoch": 0.6417050691244239, + "grad_norm": 0.7437513689171984, + "learning_rate": 1.6252753622367695e-06, + "loss": 0.7566754221916199, + "step": 2785 + }, + { + "epoch": 0.6419354838709678, + "grad_norm": 0.8832427803322862, + "learning_rate": 1.6249779954833005e-06, + "loss": 0.7609840631484985, + "step": 2786 + }, + { + "epoch": 0.6421658986175115, + "grad_norm": 0.7482883809435998, + "learning_rate": 1.6246805380151028e-06, + "loss": 0.7360000610351562, + "step": 2787 + }, + { + "epoch": 0.6423963133640553, + "grad_norm": 1.1130271498528226, + "learning_rate": 1.624382989875352e-06, + "loss": 0.7951081395149231, + "step": 2788 + }, + { + "epoch": 0.6426267281105991, + "grad_norm": 0.7939855049580037, + "learning_rate": 1.6240853511072367e-06, + "loss": 0.7273311614990234, + "step": 2789 + }, + { + "epoch": 0.6428571428571429, + "grad_norm": 1.0416971384804878, + "learning_rate": 1.6237876217539588e-06, + "loss": 0.9270737171173096, + "step": 2790 + }, + { + "epoch": 0.6430875576036866, + "grad_norm": 0.97801359210753, + "learning_rate": 1.6234898018587336e-06, + "loss": 0.7624385356903076, + "step": 2791 + }, + { + "epoch": 0.6433179723502304, + "grad_norm": 0.8529799225121792, + "learning_rate": 1.6231918914647889e-06, + "loss": 0.8266719579696655, + "step": 2792 + }, + { + "epoch": 0.6435483870967742, + "grad_norm": 0.6435153338840431, + "learning_rate": 1.6228938906153663e-06, + "loss": 0.7606902122497559, + "step": 2793 + }, + { + "epoch": 0.6437788018433179, + "grad_norm": 1.022572162531227, + "learning_rate": 1.6225957993537197e-06, + "loss": 0.8239191174507141, + "step": 2794 + }, + { + "epoch": 0.6440092165898618, + "grad_norm": 0.8871272102711673, + "learning_rate": 1.6222976177231174e-06, + "loss": 0.8313608169555664, + "step": 2795 + }, + { + "epoch": 0.6442396313364055, + "grad_norm": 0.7541910127898682, + "learning_rate": 1.6219993457668396e-06, + "loss": 0.7725037932395935, + "step": 2796 + }, + { + "epoch": 0.6444700460829493, + "grad_norm": 0.8887584465014293, + "learning_rate": 1.6217009835281802e-06, + "loss": 0.8791182041168213, + "step": 2797 + }, + { + "epoch": 0.6447004608294931, + "grad_norm": 0.9285171614449231, + "learning_rate": 1.621402531050446e-06, + "loss": 0.7157453298568726, + "step": 2798 + }, + { + "epoch": 0.6449308755760369, + "grad_norm": 0.9675001114911925, + "learning_rate": 1.621103988376957e-06, + "loss": 0.8248307704925537, + "step": 2799 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8114025469253138, + "learning_rate": 1.6208053555510467e-06, + "loss": 0.7094661593437195, + "step": 2800 + }, + { + "epoch": 0.6453917050691245, + "grad_norm": 0.997320269594231, + "learning_rate": 1.6205066326160605e-06, + "loss": 0.9130781888961792, + "step": 2801 + }, + { + "epoch": 0.6456221198156682, + "grad_norm": 0.8555561883924394, + "learning_rate": 1.620207819615358e-06, + "loss": 0.7140541076660156, + "step": 2802 + }, + { + "epoch": 0.645852534562212, + "grad_norm": 0.8223075667705522, + "learning_rate": 1.6199089165923116e-06, + "loss": 0.8638602495193481, + "step": 2803 + }, + { + "epoch": 0.6460829493087558, + "grad_norm": 0.8487880176317714, + "learning_rate": 1.6196099235903068e-06, + "loss": 0.9055536389350891, + "step": 2804 + }, + { + "epoch": 0.6463133640552995, + "grad_norm": 0.9356547902583738, + "learning_rate": 1.6193108406527416e-06, + "loss": 0.7694590091705322, + "step": 2805 + }, + { + "epoch": 0.6465437788018433, + "grad_norm": 0.9047595380936525, + "learning_rate": 1.619011667823028e-06, + "loss": 0.7512019872665405, + "step": 2806 + }, + { + "epoch": 0.646774193548387, + "grad_norm": 0.8406537006369587, + "learning_rate": 1.6187124051445903e-06, + "loss": 0.6362565159797668, + "step": 2807 + }, + { + "epoch": 0.6470046082949309, + "grad_norm": 1.328031327807814, + "learning_rate": 1.6184130526608656e-06, + "loss": 0.885259747505188, + "step": 2808 + }, + { + "epoch": 0.6472350230414746, + "grad_norm": 0.9445009081248091, + "learning_rate": 1.6181136104153054e-06, + "loss": 0.7868754863739014, + "step": 2809 + }, + { + "epoch": 0.6474654377880185, + "grad_norm": 0.901923102146858, + "learning_rate": 1.6178140784513729e-06, + "loss": 0.889660120010376, + "step": 2810 + }, + { + "epoch": 0.6476958525345622, + "grad_norm": 0.7380215273328754, + "learning_rate": 1.6175144568125444e-06, + "loss": 0.8460343480110168, + "step": 2811 + }, + { + "epoch": 0.647926267281106, + "grad_norm": 0.9963582050847237, + "learning_rate": 1.6172147455423105e-06, + "loss": 0.8729731440544128, + "step": 2812 + }, + { + "epoch": 0.6481566820276498, + "grad_norm": 0.9500689129739934, + "learning_rate": 1.616914944684173e-06, + "loss": 0.7937173843383789, + "step": 2813 + }, + { + "epoch": 0.6483870967741936, + "grad_norm": 1.068299419221943, + "learning_rate": 1.6166150542816483e-06, + "loss": 0.8764641284942627, + "step": 2814 + }, + { + "epoch": 0.6486175115207373, + "grad_norm": 0.8942547003902331, + "learning_rate": 1.6163150743782645e-06, + "loss": 0.8078420758247375, + "step": 2815 + }, + { + "epoch": 0.6488479262672812, + "grad_norm": 0.9410598977678883, + "learning_rate": 1.6160150050175636e-06, + "loss": 0.9124993085861206, + "step": 2816 + }, + { + "epoch": 0.6490783410138249, + "grad_norm": 0.8852573714623596, + "learning_rate": 1.6157148462431003e-06, + "loss": 0.9584136009216309, + "step": 2817 + }, + { + "epoch": 0.6493087557603686, + "grad_norm": 1.0833527157774228, + "learning_rate": 1.6154145980984422e-06, + "loss": 0.8404672145843506, + "step": 2818 + }, + { + "epoch": 0.6495391705069125, + "grad_norm": 0.9498348014278839, + "learning_rate": 1.6151142606271695e-06, + "loss": 0.7928001880645752, + "step": 2819 + }, + { + "epoch": 0.6497695852534562, + "grad_norm": 0.8444903444994009, + "learning_rate": 1.6148138338728766e-06, + "loss": 0.7877479791641235, + "step": 2820 + }, + { + "epoch": 0.65, + "grad_norm": 0.814898961059689, + "learning_rate": 1.6145133178791695e-06, + "loss": 0.9502429366111755, + "step": 2821 + }, + { + "epoch": 0.6502304147465438, + "grad_norm": 0.791549779828082, + "learning_rate": 1.6142127126896679e-06, + "loss": 0.7866412401199341, + "step": 2822 + }, + { + "epoch": 0.6504608294930876, + "grad_norm": 0.7841896313928699, + "learning_rate": 1.613912018348004e-06, + "loss": 0.8315345644950867, + "step": 2823 + }, + { + "epoch": 0.6506912442396313, + "grad_norm": 0.6841019539216254, + "learning_rate": 1.6136112348978236e-06, + "loss": 0.9718044400215149, + "step": 2824 + }, + { + "epoch": 0.6509216589861752, + "grad_norm": 0.6502753552916141, + "learning_rate": 1.6133103623827843e-06, + "loss": 0.5874941349029541, + "step": 2825 + }, + { + "epoch": 0.6511520737327189, + "grad_norm": 0.8954999916723304, + "learning_rate": 1.613009400846558e-06, + "loss": 0.9498391151428223, + "step": 2826 + }, + { + "epoch": 0.6513824884792627, + "grad_norm": 0.9527387242959447, + "learning_rate": 1.612708350332829e-06, + "loss": 0.858715295791626, + "step": 2827 + }, + { + "epoch": 0.6516129032258065, + "grad_norm": 0.7771583744459308, + "learning_rate": 1.6124072108852938e-06, + "loss": 0.8618113994598389, + "step": 2828 + }, + { + "epoch": 0.6518433179723502, + "grad_norm": 0.7504136233680345, + "learning_rate": 1.6121059825476628e-06, + "loss": 0.8024446964263916, + "step": 2829 + }, + { + "epoch": 0.652073732718894, + "grad_norm": 0.8461077162414828, + "learning_rate": 1.6118046653636586e-06, + "loss": 0.8021122813224792, + "step": 2830 + }, + { + "epoch": 0.6523041474654377, + "grad_norm": 0.8330044091738112, + "learning_rate": 1.6115032593770176e-06, + "loss": 0.8092107772827148, + "step": 2831 + }, + { + "epoch": 0.6525345622119816, + "grad_norm": 0.8480183578387018, + "learning_rate": 1.6112017646314872e-06, + "loss": 0.9842641353607178, + "step": 2832 + }, + { + "epoch": 0.6527649769585253, + "grad_norm": 0.8051494817524167, + "learning_rate": 1.6109001811708305e-06, + "loss": 0.744353175163269, + "step": 2833 + }, + { + "epoch": 0.6529953917050692, + "grad_norm": 1.0610555371871784, + "learning_rate": 1.6105985090388209e-06, + "loss": 0.7089616060256958, + "step": 2834 + }, + { + "epoch": 0.6532258064516129, + "grad_norm": 0.9119028582239228, + "learning_rate": 1.610296748279246e-06, + "loss": 0.9043736457824707, + "step": 2835 + }, + { + "epoch": 0.6534562211981567, + "grad_norm": 1.0078987757698072, + "learning_rate": 1.6099948989359061e-06, + "loss": 0.9170948266983032, + "step": 2836 + }, + { + "epoch": 0.6536866359447004, + "grad_norm": 0.9289963097672949, + "learning_rate": 1.6096929610526145e-06, + "loss": 0.8275802135467529, + "step": 2837 + }, + { + "epoch": 0.6539170506912443, + "grad_norm": 0.9146670757237039, + "learning_rate": 1.6093909346731965e-06, + "loss": 0.9180251955986023, + "step": 2838 + }, + { + "epoch": 0.654147465437788, + "grad_norm": 0.708269208459363, + "learning_rate": 1.6090888198414908e-06, + "loss": 0.8041235208511353, + "step": 2839 + }, + { + "epoch": 0.6543778801843319, + "grad_norm": 0.9431191202102605, + "learning_rate": 1.6087866166013492e-06, + "loss": 0.7833176851272583, + "step": 2840 + }, + { + "epoch": 0.6546082949308756, + "grad_norm": 0.8680924352570318, + "learning_rate": 1.6084843249966364e-06, + "loss": 0.838886022567749, + "step": 2841 + }, + { + "epoch": 0.6548387096774193, + "grad_norm": 0.8317233103954151, + "learning_rate": 1.6081819450712293e-06, + "loss": 0.837687611579895, + "step": 2842 + }, + { + "epoch": 0.6550691244239631, + "grad_norm": 0.8737630969117387, + "learning_rate": 1.607879476869018e-06, + "loss": 0.6572843790054321, + "step": 2843 + }, + { + "epoch": 0.6552995391705069, + "grad_norm": 0.8513917948170456, + "learning_rate": 1.6075769204339053e-06, + "loss": 0.7698653936386108, + "step": 2844 + }, + { + "epoch": 0.6555299539170507, + "grad_norm": 0.9469558820500475, + "learning_rate": 1.607274275809807e-06, + "loss": 0.8639169335365295, + "step": 2845 + }, + { + "epoch": 0.6557603686635944, + "grad_norm": 0.8250799867539951, + "learning_rate": 1.6069715430406517e-06, + "loss": 0.837492823600769, + "step": 2846 + }, + { + "epoch": 0.6559907834101383, + "grad_norm": 0.9277000604833184, + "learning_rate": 1.6066687221703803e-06, + "loss": 0.8824087381362915, + "step": 2847 + }, + { + "epoch": 0.656221198156682, + "grad_norm": 0.9304701724719217, + "learning_rate": 1.6063658132429468e-06, + "loss": 0.8161731958389282, + "step": 2848 + }, + { + "epoch": 0.6564516129032258, + "grad_norm": 0.7988044282931124, + "learning_rate": 1.6060628163023183e-06, + "loss": 0.8365877270698547, + "step": 2849 + }, + { + "epoch": 0.6566820276497696, + "grad_norm": 0.8477393490951164, + "learning_rate": 1.6057597313924745e-06, + "loss": 0.877829909324646, + "step": 2850 + }, + { + "epoch": 0.6569124423963134, + "grad_norm": 0.857078285622655, + "learning_rate": 1.6054565585574075e-06, + "loss": 0.756903886795044, + "step": 2851 + }, + { + "epoch": 0.6571428571428571, + "grad_norm": 1.0124401818225557, + "learning_rate": 1.6051532978411223e-06, + "loss": 0.7777276039123535, + "step": 2852 + }, + { + "epoch": 0.6573732718894009, + "grad_norm": 0.9464152715401636, + "learning_rate": 1.6048499492876375e-06, + "loss": 0.9191532135009766, + "step": 2853 + }, + { + "epoch": 0.6576036866359447, + "grad_norm": 0.7885787618366824, + "learning_rate": 1.6045465129409829e-06, + "loss": 0.7693309783935547, + "step": 2854 + }, + { + "epoch": 0.6578341013824884, + "grad_norm": 0.8787314035574895, + "learning_rate": 1.6042429888452024e-06, + "loss": 0.7865023612976074, + "step": 2855 + }, + { + "epoch": 0.6580645161290323, + "grad_norm": 0.8588996745183644, + "learning_rate": 1.6039393770443521e-06, + "loss": 0.844336748123169, + "step": 2856 + }, + { + "epoch": 0.658294930875576, + "grad_norm": 0.9455502994869639, + "learning_rate": 1.6036356775825009e-06, + "loss": 0.9590705633163452, + "step": 2857 + }, + { + "epoch": 0.6585253456221198, + "grad_norm": 0.904582718768817, + "learning_rate": 1.6033318905037297e-06, + "loss": 0.8687748312950134, + "step": 2858 + }, + { + "epoch": 0.6587557603686636, + "grad_norm": 0.8848681311153475, + "learning_rate": 1.6030280158521336e-06, + "loss": 0.8669745922088623, + "step": 2859 + }, + { + "epoch": 0.6589861751152074, + "grad_norm": 0.8829211466390271, + "learning_rate": 1.6027240536718191e-06, + "loss": 0.6929436922073364, + "step": 2860 + }, + { + "epoch": 0.6592165898617511, + "grad_norm": 0.9047325967091919, + "learning_rate": 1.6024200040069065e-06, + "loss": 0.6965433359146118, + "step": 2861 + }, + { + "epoch": 0.659447004608295, + "grad_norm": 0.9743729570848424, + "learning_rate": 1.6021158669015273e-06, + "loss": 0.780353307723999, + "step": 2862 + }, + { + "epoch": 0.6596774193548387, + "grad_norm": 0.7726382879850381, + "learning_rate": 1.6018116423998277e-06, + "loss": 0.685762882232666, + "step": 2863 + }, + { + "epoch": 0.6599078341013825, + "grad_norm": 0.8607619933867399, + "learning_rate": 1.6015073305459646e-06, + "loss": 0.8249918222427368, + "step": 2864 + }, + { + "epoch": 0.6601382488479263, + "grad_norm": 0.7388237148259402, + "learning_rate": 1.6012029313841086e-06, + "loss": 0.7327184677124023, + "step": 2865 + }, + { + "epoch": 0.66036866359447, + "grad_norm": 0.9554378042614118, + "learning_rate": 1.6008984449584433e-06, + "loss": 0.7785891890525818, + "step": 2866 + }, + { + "epoch": 0.6605990783410138, + "grad_norm": 0.7196967379779726, + "learning_rate": 1.600593871313164e-06, + "loss": 0.7307751178741455, + "step": 2867 + }, + { + "epoch": 0.6608294930875576, + "grad_norm": 1.2601680054093507, + "learning_rate": 1.6002892104924796e-06, + "loss": 0.8802257180213928, + "step": 2868 + }, + { + "epoch": 0.6610599078341014, + "grad_norm": 1.0302753711943056, + "learning_rate": 1.5999844625406106e-06, + "loss": 0.8699140548706055, + "step": 2869 + }, + { + "epoch": 0.6612903225806451, + "grad_norm": 0.8146336951608913, + "learning_rate": 1.5996796275017914e-06, + "loss": 0.6453604102134705, + "step": 2870 + }, + { + "epoch": 0.661520737327189, + "grad_norm": 0.807532897551279, + "learning_rate": 1.5993747054202682e-06, + "loss": 0.7319324016571045, + "step": 2871 + }, + { + "epoch": 0.6617511520737327, + "grad_norm": 0.9337023535064233, + "learning_rate": 1.5990696963402998e-06, + "loss": 0.8357574343681335, + "step": 2872 + }, + { + "epoch": 0.6619815668202765, + "grad_norm": 0.854915024221744, + "learning_rate": 1.5987646003061581e-06, + "loss": 0.7647984027862549, + "step": 2873 + }, + { + "epoch": 0.6622119815668203, + "grad_norm": 1.0099884737934117, + "learning_rate": 1.5984594173621274e-06, + "loss": 0.8542075753211975, + "step": 2874 + }, + { + "epoch": 0.6624423963133641, + "grad_norm": 0.9685596460194386, + "learning_rate": 1.5981541475525044e-06, + "loss": 0.7689328193664551, + "step": 2875 + }, + { + "epoch": 0.6626728110599078, + "grad_norm": 0.8183777315007433, + "learning_rate": 1.5978487909215987e-06, + "loss": 0.7459174990653992, + "step": 2876 + }, + { + "epoch": 0.6629032258064517, + "grad_norm": 0.8697380019030229, + "learning_rate": 1.5975433475137329e-06, + "loss": 0.8268495202064514, + "step": 2877 + }, + { + "epoch": 0.6631336405529954, + "grad_norm": 0.9013422410425754, + "learning_rate": 1.5972378173732406e-06, + "loss": 0.8254266977310181, + "step": 2878 + }, + { + "epoch": 0.6633640552995391, + "grad_norm": 1.0427681980244552, + "learning_rate": 1.59693220054447e-06, + "loss": 0.8552727103233337, + "step": 2879 + }, + { + "epoch": 0.663594470046083, + "grad_norm": 0.7469699255899254, + "learning_rate": 1.596626497071781e-06, + "loss": 0.7196269035339355, + "step": 2880 + }, + { + "epoch": 0.6638248847926267, + "grad_norm": 0.9146202447996906, + "learning_rate": 1.5963207069995455e-06, + "loss": 0.815540075302124, + "step": 2881 + }, + { + "epoch": 0.6640552995391705, + "grad_norm": 0.8585411055523222, + "learning_rate": 1.596014830372149e-06, + "loss": 0.8040128350257874, + "step": 2882 + }, + { + "epoch": 0.6642857142857143, + "grad_norm": 0.8592608746136836, + "learning_rate": 1.5957088672339887e-06, + "loss": 0.7990812659263611, + "step": 2883 + }, + { + "epoch": 0.6645161290322581, + "grad_norm": 0.9139395957334936, + "learning_rate": 1.5954028176294746e-06, + "loss": 0.956179141998291, + "step": 2884 + }, + { + "epoch": 0.6647465437788018, + "grad_norm": 0.9544806325504157, + "learning_rate": 1.5950966816030304e-06, + "loss": 0.7730144262313843, + "step": 2885 + }, + { + "epoch": 0.6649769585253457, + "grad_norm": 1.0230957824823068, + "learning_rate": 1.5947904591990904e-06, + "loss": 0.902834415435791, + "step": 2886 + }, + { + "epoch": 0.6652073732718894, + "grad_norm": 0.8987169052425068, + "learning_rate": 1.5944841504621027e-06, + "loss": 0.7234599590301514, + "step": 2887 + }, + { + "epoch": 0.6654377880184332, + "grad_norm": 0.9849005395145788, + "learning_rate": 1.5941777554365271e-06, + "loss": 1.0267843008041382, + "step": 2888 + }, + { + "epoch": 0.665668202764977, + "grad_norm": 1.1615941669691254, + "learning_rate": 1.5938712741668376e-06, + "loss": 0.7431002855300903, + "step": 2889 + }, + { + "epoch": 0.6658986175115207, + "grad_norm": 0.8013605201375282, + "learning_rate": 1.5935647066975185e-06, + "loss": 0.7843111753463745, + "step": 2890 + }, + { + "epoch": 0.6661290322580645, + "grad_norm": 0.9498522711625995, + "learning_rate": 1.593258053073068e-06, + "loss": 0.8775256872177124, + "step": 2891 + }, + { + "epoch": 0.6663594470046083, + "grad_norm": 0.8363878343517416, + "learning_rate": 1.5929513133379966e-06, + "loss": 0.7861695289611816, + "step": 2892 + }, + { + "epoch": 0.6665898617511521, + "grad_norm": 1.1446598361432248, + "learning_rate": 1.5926444875368267e-06, + "loss": 0.8721977472305298, + "step": 2893 + }, + { + "epoch": 0.6668202764976958, + "grad_norm": 0.7591669830135314, + "learning_rate": 1.5923375757140941e-06, + "loss": 0.648263692855835, + "step": 2894 + }, + { + "epoch": 0.6670506912442397, + "grad_norm": 0.8984763952333247, + "learning_rate": 1.592030577914347e-06, + "loss": 0.8334729075431824, + "step": 2895 + }, + { + "epoch": 0.6672811059907834, + "grad_norm": 0.7757586607492352, + "learning_rate": 1.591723494182145e-06, + "loss": 0.6105949878692627, + "step": 2896 + }, + { + "epoch": 0.6675115207373272, + "grad_norm": 0.8562379620561761, + "learning_rate": 1.5914163245620608e-06, + "loss": 0.7895448207855225, + "step": 2897 + }, + { + "epoch": 0.667741935483871, + "grad_norm": 0.9487051467126763, + "learning_rate": 1.5911090690986805e-06, + "loss": 0.8728576302528381, + "step": 2898 + }, + { + "epoch": 0.6679723502304148, + "grad_norm": 0.7480056751597441, + "learning_rate": 1.590801727836601e-06, + "loss": 0.7637856006622314, + "step": 2899 + }, + { + "epoch": 0.6682027649769585, + "grad_norm": 1.0125939986027075, + "learning_rate": 1.590494300820433e-06, + "loss": 0.8988397717475891, + "step": 2900 + }, + { + "epoch": 0.6684331797235024, + "grad_norm": 0.9324485554010499, + "learning_rate": 1.590186788094799e-06, + "loss": 0.7486827373504639, + "step": 2901 + }, + { + "epoch": 0.6686635944700461, + "grad_norm": 0.7629631437151, + "learning_rate": 1.589879189704334e-06, + "loss": 0.8212865591049194, + "step": 2902 + }, + { + "epoch": 0.6688940092165898, + "grad_norm": 0.7640149838894683, + "learning_rate": 1.5895715056936853e-06, + "loss": 0.7421284914016724, + "step": 2903 + }, + { + "epoch": 0.6691244239631337, + "grad_norm": 0.8407199034997399, + "learning_rate": 1.5892637361075132e-06, + "loss": 0.8721676468849182, + "step": 2904 + }, + { + "epoch": 0.6693548387096774, + "grad_norm": 0.9214400782360851, + "learning_rate": 1.58895588099049e-06, + "loss": 0.7265836000442505, + "step": 2905 + }, + { + "epoch": 0.6695852534562212, + "grad_norm": 0.959235173078028, + "learning_rate": 1.5886479403873e-06, + "loss": 0.863615870475769, + "step": 2906 + }, + { + "epoch": 0.669815668202765, + "grad_norm": 0.788219849900096, + "learning_rate": 1.588339914342641e-06, + "loss": 0.8362177610397339, + "step": 2907 + }, + { + "epoch": 0.6700460829493088, + "grad_norm": 1.0142262876785297, + "learning_rate": 1.5880318029012223e-06, + "loss": 0.9076892137527466, + "step": 2908 + }, + { + "epoch": 0.6702764976958525, + "grad_norm": 0.957653217332238, + "learning_rate": 1.5877236061077658e-06, + "loss": 0.9149065017700195, + "step": 2909 + }, + { + "epoch": 0.6705069124423964, + "grad_norm": 0.8820705070600866, + "learning_rate": 1.5874153240070062e-06, + "loss": 0.7761013507843018, + "step": 2910 + }, + { + "epoch": 0.6707373271889401, + "grad_norm": 1.049261864076062, + "learning_rate": 1.5871069566436894e-06, + "loss": 0.8671830892562866, + "step": 2911 + }, + { + "epoch": 0.6709677419354839, + "grad_norm": 0.9461120142941367, + "learning_rate": 1.5867985040625755e-06, + "loss": 0.9433870315551758, + "step": 2912 + }, + { + "epoch": 0.6711981566820276, + "grad_norm": 0.934114103387592, + "learning_rate": 1.5864899663084352e-06, + "loss": 0.8009352684020996, + "step": 2913 + }, + { + "epoch": 0.6714285714285714, + "grad_norm": 0.9285902098427739, + "learning_rate": 1.5861813434260528e-06, + "loss": 0.6813808083534241, + "step": 2914 + }, + { + "epoch": 0.6716589861751152, + "grad_norm": 0.7891360814530397, + "learning_rate": 1.5858726354602248e-06, + "loss": 0.712783932685852, + "step": 2915 + }, + { + "epoch": 0.6718894009216589, + "grad_norm": 0.9971879600214522, + "learning_rate": 1.5855638424557588e-06, + "loss": 0.7871056795120239, + "step": 2916 + }, + { + "epoch": 0.6721198156682028, + "grad_norm": 0.9551471269364743, + "learning_rate": 1.5852549644574766e-06, + "loss": 0.8590981960296631, + "step": 2917 + }, + { + "epoch": 0.6723502304147465, + "grad_norm": 0.9338373296128487, + "learning_rate": 1.584946001510211e-06, + "loss": 0.7952913641929626, + "step": 2918 + }, + { + "epoch": 0.6725806451612903, + "grad_norm": 1.0716689971646949, + "learning_rate": 1.5846369536588078e-06, + "loss": 0.8567384481430054, + "step": 2919 + }, + { + "epoch": 0.6728110599078341, + "grad_norm": 1.0797852963412387, + "learning_rate": 1.5843278209481246e-06, + "loss": 0.859541654586792, + "step": 2920 + }, + { + "epoch": 0.6730414746543779, + "grad_norm": 1.1734504357127358, + "learning_rate": 1.5840186034230318e-06, + "loss": 0.7843801975250244, + "step": 2921 + }, + { + "epoch": 0.6732718894009216, + "grad_norm": 0.7736885985619673, + "learning_rate": 1.5837093011284118e-06, + "loss": 0.7448940277099609, + "step": 2922 + }, + { + "epoch": 0.6735023041474655, + "grad_norm": 1.0803788544256392, + "learning_rate": 1.5833999141091593e-06, + "loss": 0.9325242042541504, + "step": 2923 + }, + { + "epoch": 0.6737327188940092, + "grad_norm": 1.2302390941080075, + "learning_rate": 1.5830904424101816e-06, + "loss": 0.8005647659301758, + "step": 2924 + }, + { + "epoch": 0.673963133640553, + "grad_norm": 0.9271295903754758, + "learning_rate": 1.5827808860763984e-06, + "loss": 0.8897464275360107, + "step": 2925 + }, + { + "epoch": 0.6741935483870968, + "grad_norm": 1.0218758099034497, + "learning_rate": 1.5824712451527409e-06, + "loss": 0.8319039344787598, + "step": 2926 + }, + { + "epoch": 0.6744239631336405, + "grad_norm": 1.0734614103347653, + "learning_rate": 1.5821615196841533e-06, + "loss": 0.7638111114501953, + "step": 2927 + }, + { + "epoch": 0.6746543778801843, + "grad_norm": 0.8552316991076688, + "learning_rate": 1.581851709715592e-06, + "loss": 0.7617092132568359, + "step": 2928 + }, + { + "epoch": 0.6748847926267281, + "grad_norm": 1.0119419737078916, + "learning_rate": 1.581541815292025e-06, + "loss": 0.813319742679596, + "step": 2929 + }, + { + "epoch": 0.6751152073732719, + "grad_norm": 0.8324815306646182, + "learning_rate": 1.5812318364584334e-06, + "loss": 0.7495343089103699, + "step": 2930 + }, + { + "epoch": 0.6753456221198156, + "grad_norm": 1.0070331562925772, + "learning_rate": 1.5809217732598103e-06, + "loss": 0.9064745306968689, + "step": 2931 + }, + { + "epoch": 0.6755760368663595, + "grad_norm": 0.77529378116571, + "learning_rate": 1.580611625741161e-06, + "loss": 0.699098527431488, + "step": 2932 + }, + { + "epoch": 0.6758064516129032, + "grad_norm": 0.9525126023464006, + "learning_rate": 1.5803013939475025e-06, + "loss": 0.9168096780776978, + "step": 2933 + }, + { + "epoch": 0.676036866359447, + "grad_norm": 0.8145178437764095, + "learning_rate": 1.5799910779238652e-06, + "loss": 0.8848644495010376, + "step": 2934 + }, + { + "epoch": 0.6762672811059908, + "grad_norm": 0.8852934324704809, + "learning_rate": 1.5796806777152903e-06, + "loss": 0.7795228958129883, + "step": 2935 + }, + { + "epoch": 0.6764976958525346, + "grad_norm": 0.9901973226971541, + "learning_rate": 1.5793701933668327e-06, + "loss": 0.9287698268890381, + "step": 2936 + }, + { + "epoch": 0.6767281105990783, + "grad_norm": 0.9605403793187631, + "learning_rate": 1.5790596249235587e-06, + "loss": 0.8661396503448486, + "step": 2937 + }, + { + "epoch": 0.6769585253456222, + "grad_norm": 1.0073544692346657, + "learning_rate": 1.5787489724305464e-06, + "loss": 0.7544706463813782, + "step": 2938 + }, + { + "epoch": 0.6771889400921659, + "grad_norm": 1.350397583464208, + "learning_rate": 1.5784382359328872e-06, + "loss": 0.8613651990890503, + "step": 2939 + }, + { + "epoch": 0.6774193548387096, + "grad_norm": 1.0225856960398716, + "learning_rate": 1.5781274154756833e-06, + "loss": 0.8695065975189209, + "step": 2940 + }, + { + "epoch": 0.6776497695852535, + "grad_norm": 1.1450515007973723, + "learning_rate": 1.577816511104051e-06, + "loss": 0.9453287720680237, + "step": 2941 + }, + { + "epoch": 0.6778801843317972, + "grad_norm": 0.7720442193305806, + "learning_rate": 1.577505522863117e-06, + "loss": 0.8599261045455933, + "step": 2942 + }, + { + "epoch": 0.678110599078341, + "grad_norm": 0.8831442525084486, + "learning_rate": 1.5771944507980205e-06, + "loss": 0.8143391609191895, + "step": 2943 + }, + { + "epoch": 0.6783410138248848, + "grad_norm": 0.9328639928073722, + "learning_rate": 1.576883294953914e-06, + "loss": 0.9558438062667847, + "step": 2944 + }, + { + "epoch": 0.6785714285714286, + "grad_norm": 0.6484366074680237, + "learning_rate": 1.5765720553759605e-06, + "loss": 0.7348268628120422, + "step": 2945 + }, + { + "epoch": 0.6788018433179723, + "grad_norm": 1.0387482604326927, + "learning_rate": 1.5762607321093366e-06, + "loss": 0.9361155033111572, + "step": 2946 + }, + { + "epoch": 0.6790322580645162, + "grad_norm": 0.9855095789147831, + "learning_rate": 1.5759493251992303e-06, + "loss": 0.8094985485076904, + "step": 2947 + }, + { + "epoch": 0.6792626728110599, + "grad_norm": 1.631714554631539, + "learning_rate": 1.575637834690842e-06, + "loss": 0.8746658563613892, + "step": 2948 + }, + { + "epoch": 0.6794930875576037, + "grad_norm": 0.9249217331606766, + "learning_rate": 1.575326260629384e-06, + "loss": 0.7433050870895386, + "step": 2949 + }, + { + "epoch": 0.6797235023041475, + "grad_norm": 0.9856239464338491, + "learning_rate": 1.5750146030600808e-06, + "loss": 0.8621053695678711, + "step": 2950 + }, + { + "epoch": 0.6799539170506912, + "grad_norm": 0.9119478915395727, + "learning_rate": 1.5747028620281695e-06, + "loss": 0.7541971206665039, + "step": 2951 + }, + { + "epoch": 0.680184331797235, + "grad_norm": 1.0099311239329205, + "learning_rate": 1.5743910375788982e-06, + "loss": 0.9817987680435181, + "step": 2952 + }, + { + "epoch": 0.6804147465437788, + "grad_norm": 1.046074262522893, + "learning_rate": 1.5740791297575283e-06, + "loss": 0.7763534188270569, + "step": 2953 + }, + { + "epoch": 0.6806451612903226, + "grad_norm": 1.0303747349913415, + "learning_rate": 1.573767138609333e-06, + "loss": 0.7482337355613708, + "step": 2954 + }, + { + "epoch": 0.6808755760368663, + "grad_norm": 1.0308347032013807, + "learning_rate": 1.5734550641795967e-06, + "loss": 0.7352473735809326, + "step": 2955 + }, + { + "epoch": 0.6811059907834102, + "grad_norm": 0.9086715245515472, + "learning_rate": 1.573142906513617e-06, + "loss": 0.8657293319702148, + "step": 2956 + }, + { + "epoch": 0.6813364055299539, + "grad_norm": 0.9597438975913184, + "learning_rate": 1.5728306656567033e-06, + "loss": 0.8035376667976379, + "step": 2957 + }, + { + "epoch": 0.6815668202764977, + "grad_norm": 0.9481340627224691, + "learning_rate": 1.572518341654177e-06, + "loss": 0.8030140399932861, + "step": 2958 + }, + { + "epoch": 0.6817972350230415, + "grad_norm": 0.956950799259568, + "learning_rate": 1.5722059345513711e-06, + "loss": 0.797377347946167, + "step": 2959 + }, + { + "epoch": 0.6820276497695853, + "grad_norm": 0.7086079395333297, + "learning_rate": 1.5718934443936311e-06, + "loss": 0.7041053175926208, + "step": 2960 + }, + { + "epoch": 0.682258064516129, + "grad_norm": 1.0251660128790803, + "learning_rate": 1.571580871226315e-06, + "loss": 0.7911885976791382, + "step": 2961 + }, + { + "epoch": 0.6824884792626729, + "grad_norm": 0.8834527581303466, + "learning_rate": 1.5712682150947922e-06, + "loss": 0.7908599376678467, + "step": 2962 + }, + { + "epoch": 0.6827188940092166, + "grad_norm": 0.8159267525070817, + "learning_rate": 1.5709554760444442e-06, + "loss": 0.860281229019165, + "step": 2963 + }, + { + "epoch": 0.6829493087557603, + "grad_norm": 0.8226887233242035, + "learning_rate": 1.5706426541206645e-06, + "loss": 0.6987707018852234, + "step": 2964 + }, + { + "epoch": 0.6831797235023042, + "grad_norm": 0.8719992040747229, + "learning_rate": 1.5703297493688592e-06, + "loss": 0.7198495864868164, + "step": 2965 + }, + { + "epoch": 0.6834101382488479, + "grad_norm": 1.1775957395401402, + "learning_rate": 1.5700167618344455e-06, + "loss": 0.8232598304748535, + "step": 2966 + }, + { + "epoch": 0.6836405529953917, + "grad_norm": 0.8962037845514019, + "learning_rate": 1.569703691562854e-06, + "loss": 0.8425456285476685, + "step": 2967 + }, + { + "epoch": 0.6838709677419355, + "grad_norm": 0.8746880672166448, + "learning_rate": 1.5693905385995252e-06, + "loss": 0.7758797407150269, + "step": 2968 + }, + { + "epoch": 0.6841013824884793, + "grad_norm": 0.9739325658587258, + "learning_rate": 1.569077302989914e-06, + "loss": 0.7478910684585571, + "step": 2969 + }, + { + "epoch": 0.684331797235023, + "grad_norm": 0.88099670074057, + "learning_rate": 1.5687639847794854e-06, + "loss": 0.8274309635162354, + "step": 2970 + }, + { + "epoch": 0.6845622119815669, + "grad_norm": 0.9125307567181903, + "learning_rate": 1.5684505840137173e-06, + "loss": 0.6800183653831482, + "step": 2971 + }, + { + "epoch": 0.6847926267281106, + "grad_norm": 1.1416810893109246, + "learning_rate": 1.5681371007380996e-06, + "loss": 0.7768006324768066, + "step": 2972 + }, + { + "epoch": 0.6850230414746544, + "grad_norm": 0.8308804334079786, + "learning_rate": 1.5678235349981338e-06, + "loss": 0.7462732195854187, + "step": 2973 + }, + { + "epoch": 0.6852534562211982, + "grad_norm": 0.935725297382271, + "learning_rate": 1.5675098868393335e-06, + "loss": 0.8461781144142151, + "step": 2974 + }, + { + "epoch": 0.6854838709677419, + "grad_norm": 0.9717984846524689, + "learning_rate": 1.5671961563072244e-06, + "loss": 0.7968491911888123, + "step": 2975 + }, + { + "epoch": 0.6857142857142857, + "grad_norm": 0.9710985084042064, + "learning_rate": 1.5668823434473443e-06, + "loss": 0.805394172668457, + "step": 2976 + }, + { + "epoch": 0.6859447004608294, + "grad_norm": 0.9297793560483373, + "learning_rate": 1.5665684483052424e-06, + "loss": 0.7241736650466919, + "step": 2977 + }, + { + "epoch": 0.6861751152073733, + "grad_norm": 0.9673260038513803, + "learning_rate": 1.5662544709264801e-06, + "loss": 0.7345866560935974, + "step": 2978 + }, + { + "epoch": 0.686405529953917, + "grad_norm": 0.8604134561659843, + "learning_rate": 1.5659404113566312e-06, + "loss": 0.7605085372924805, + "step": 2979 + }, + { + "epoch": 0.6866359447004609, + "grad_norm": 0.9618303204830516, + "learning_rate": 1.5656262696412808e-06, + "loss": 0.8555188179016113, + "step": 2980 + }, + { + "epoch": 0.6868663594470046, + "grad_norm": 0.8604009092225049, + "learning_rate": 1.5653120458260261e-06, + "loss": 0.7139542698860168, + "step": 2981 + }, + { + "epoch": 0.6870967741935484, + "grad_norm": 0.9290410772154322, + "learning_rate": 1.564997739956476e-06, + "loss": 0.8676587343215942, + "step": 2982 + }, + { + "epoch": 0.6873271889400921, + "grad_norm": 0.9524807718966832, + "learning_rate": 1.5646833520782523e-06, + "loss": 0.8121025562286377, + "step": 2983 + }, + { + "epoch": 0.687557603686636, + "grad_norm": 0.7889521702672326, + "learning_rate": 1.5643688822369873e-06, + "loss": 0.7757136821746826, + "step": 2984 + }, + { + "epoch": 0.6877880184331797, + "grad_norm": 0.8884194014759353, + "learning_rate": 1.5640543304783264e-06, + "loss": 0.8357381820678711, + "step": 2985 + }, + { + "epoch": 0.6880184331797236, + "grad_norm": 0.9725078170053829, + "learning_rate": 1.563739696847926e-06, + "loss": 0.8635811805725098, + "step": 2986 + }, + { + "epoch": 0.6882488479262673, + "grad_norm": 0.9539959391598165, + "learning_rate": 1.563424981391455e-06, + "loss": 0.90900057554245, + "step": 2987 + }, + { + "epoch": 0.688479262672811, + "grad_norm": 1.056070683011334, + "learning_rate": 1.563110184154594e-06, + "loss": 0.9001314043998718, + "step": 2988 + }, + { + "epoch": 0.6887096774193548, + "grad_norm": 0.7893194308475292, + "learning_rate": 1.5627953051830353e-06, + "loss": 0.7482000589370728, + "step": 2989 + }, + { + "epoch": 0.6889400921658986, + "grad_norm": 1.0183435769639337, + "learning_rate": 1.5624803445224829e-06, + "loss": 0.8504235744476318, + "step": 2990 + }, + { + "epoch": 0.6891705069124424, + "grad_norm": 0.9687684393899343, + "learning_rate": 1.5621653022186526e-06, + "loss": 0.7887089252471924, + "step": 2991 + }, + { + "epoch": 0.6894009216589861, + "grad_norm": 0.9412995775666883, + "learning_rate": 1.5618501783172735e-06, + "loss": 0.8745719790458679, + "step": 2992 + }, + { + "epoch": 0.68963133640553, + "grad_norm": 0.8960957701589951, + "learning_rate": 1.5615349728640848e-06, + "loss": 0.8269633054733276, + "step": 2993 + }, + { + "epoch": 0.6898617511520737, + "grad_norm": 0.802430248071724, + "learning_rate": 1.5612196859048382e-06, + "loss": 0.7355072498321533, + "step": 2994 + }, + { + "epoch": 0.6900921658986175, + "grad_norm": 0.9768940563158048, + "learning_rate": 1.5609043174852966e-06, + "loss": 0.857653021812439, + "step": 2995 + }, + { + "epoch": 0.6903225806451613, + "grad_norm": 1.0766498115550724, + "learning_rate": 1.5605888676512365e-06, + "loss": 0.8575785160064697, + "step": 2996 + }, + { + "epoch": 0.6905529953917051, + "grad_norm": 0.8803208034747956, + "learning_rate": 1.560273336448444e-06, + "loss": 0.8631561994552612, + "step": 2997 + }, + { + "epoch": 0.6907834101382488, + "grad_norm": 1.0014936433552548, + "learning_rate": 1.5599577239227185e-06, + "loss": 0.7993800044059753, + "step": 2998 + }, + { + "epoch": 0.6910138248847926, + "grad_norm": 0.8990076202156756, + "learning_rate": 1.5596420301198707e-06, + "loss": 0.7961007356643677, + "step": 2999 + }, + { + "epoch": 0.6912442396313364, + "grad_norm": 1.0216355950582598, + "learning_rate": 1.5593262550857232e-06, + "loss": 0.7536421418190002, + "step": 3000 + }, + { + "epoch": 0.6914746543778801, + "grad_norm": 0.8348839196110558, + "learning_rate": 1.55901039886611e-06, + "loss": 0.70341956615448, + "step": 3001 + }, + { + "epoch": 0.691705069124424, + "grad_norm": 1.0093771985733984, + "learning_rate": 1.5586944615068776e-06, + "loss": 0.8152127265930176, + "step": 3002 + }, + { + "epoch": 0.6919354838709677, + "grad_norm": 0.9332692294841357, + "learning_rate": 1.5583784430538838e-06, + "loss": 0.6728770732879639, + "step": 3003 + }, + { + "epoch": 0.6921658986175115, + "grad_norm": 1.0871891474224546, + "learning_rate": 1.558062343552998e-06, + "loss": 0.8406884670257568, + "step": 3004 + }, + { + "epoch": 0.6923963133640553, + "grad_norm": 0.8920706269230131, + "learning_rate": 1.5577461630501018e-06, + "loss": 0.766754686832428, + "step": 3005 + }, + { + "epoch": 0.6926267281105991, + "grad_norm": 0.714004026253109, + "learning_rate": 1.5574299015910889e-06, + "loss": 0.7456642389297485, + "step": 3006 + }, + { + "epoch": 0.6928571428571428, + "grad_norm": 0.8290815943958627, + "learning_rate": 1.557113559221863e-06, + "loss": 0.7834097743034363, + "step": 3007 + }, + { + "epoch": 0.6930875576036867, + "grad_norm": 0.91346801287595, + "learning_rate": 1.556797135988342e-06, + "loss": 0.7425946593284607, + "step": 3008 + }, + { + "epoch": 0.6933179723502304, + "grad_norm": 1.0483330104966306, + "learning_rate": 1.5564806319364534e-06, + "loss": 0.7914093732833862, + "step": 3009 + }, + { + "epoch": 0.6935483870967742, + "grad_norm": 0.9665010461345012, + "learning_rate": 1.556164047112138e-06, + "loss": 0.819783091545105, + "step": 3010 + }, + { + "epoch": 0.693778801843318, + "grad_norm": 0.985903986481312, + "learning_rate": 1.5558473815613474e-06, + "loss": 0.7147302627563477, + "step": 3011 + }, + { + "epoch": 0.6940092165898617, + "grad_norm": 1.1240220664371217, + "learning_rate": 1.5555306353300452e-06, + "loss": 0.7247470617294312, + "step": 3012 + }, + { + "epoch": 0.6942396313364055, + "grad_norm": 1.2403633886338306, + "learning_rate": 1.5552138084642067e-06, + "loss": 0.8277294635772705, + "step": 3013 + }, + { + "epoch": 0.6944700460829493, + "grad_norm": 0.9054626931882043, + "learning_rate": 1.554896901009819e-06, + "loss": 0.8014394640922546, + "step": 3014 + }, + { + "epoch": 0.6947004608294931, + "grad_norm": 0.9274937399954835, + "learning_rate": 1.5545799130128808e-06, + "loss": 0.7468869686126709, + "step": 3015 + }, + { + "epoch": 0.6949308755760368, + "grad_norm": 0.8904964499744723, + "learning_rate": 1.554262844519402e-06, + "loss": 0.7854933142662048, + "step": 3016 + }, + { + "epoch": 0.6951612903225807, + "grad_norm": 0.9536718451900233, + "learning_rate": 1.5539456955754053e-06, + "loss": 0.8359543681144714, + "step": 3017 + }, + { + "epoch": 0.6953917050691244, + "grad_norm": 0.8313774511874621, + "learning_rate": 1.5536284662269243e-06, + "loss": 0.7767773866653442, + "step": 3018 + }, + { + "epoch": 0.6956221198156682, + "grad_norm": 0.7370790678700915, + "learning_rate": 1.5533111565200044e-06, + "loss": 0.8388162851333618, + "step": 3019 + }, + { + "epoch": 0.695852534562212, + "grad_norm": 0.9159856551917743, + "learning_rate": 1.5529937665007024e-06, + "loss": 0.7791208028793335, + "step": 3020 + }, + { + "epoch": 0.6960829493087558, + "grad_norm": 0.9740300384215894, + "learning_rate": 1.5526762962150875e-06, + "loss": 0.8662698864936829, + "step": 3021 + }, + { + "epoch": 0.6963133640552995, + "grad_norm": 0.7004253764922403, + "learning_rate": 1.5523587457092394e-06, + "loss": 0.737492024898529, + "step": 3022 + }, + { + "epoch": 0.6965437788018434, + "grad_norm": 1.0408775765092733, + "learning_rate": 1.552041115029251e-06, + "loss": 0.83610999584198, + "step": 3023 + }, + { + "epoch": 0.6967741935483871, + "grad_norm": 1.1134023704947162, + "learning_rate": 1.5517234042212254e-06, + "loss": 0.930977463722229, + "step": 3024 + }, + { + "epoch": 0.6970046082949308, + "grad_norm": 0.8756044667716456, + "learning_rate": 1.551405613331278e-06, + "loss": 0.7587058544158936, + "step": 3025 + }, + { + "epoch": 0.6972350230414747, + "grad_norm": 0.7720525053545241, + "learning_rate": 1.551087742405536e-06, + "loss": 0.7549247741699219, + "step": 3026 + }, + { + "epoch": 0.6974654377880184, + "grad_norm": 0.8108175030001162, + "learning_rate": 1.5507697914901376e-06, + "loss": 0.6906812787055969, + "step": 3027 + }, + { + "epoch": 0.6976958525345622, + "grad_norm": 0.7358502568670926, + "learning_rate": 1.5504517606312332e-06, + "loss": 0.7806124687194824, + "step": 3028 + }, + { + "epoch": 0.697926267281106, + "grad_norm": 0.8191496367359047, + "learning_rate": 1.5501336498749846e-06, + "loss": 0.8091036081314087, + "step": 3029 + }, + { + "epoch": 0.6981566820276498, + "grad_norm": 0.923718506351422, + "learning_rate": 1.5498154592675646e-06, + "loss": 0.721937894821167, + "step": 3030 + }, + { + "epoch": 0.6983870967741935, + "grad_norm": 0.729194360630959, + "learning_rate": 1.5494971888551587e-06, + "loss": 0.712378740310669, + "step": 3031 + }, + { + "epoch": 0.6986175115207374, + "grad_norm": 0.9809936276606201, + "learning_rate": 1.5491788386839635e-06, + "loss": 0.8106495141983032, + "step": 3032 + }, + { + "epoch": 0.6988479262672811, + "grad_norm": 1.0550994014291641, + "learning_rate": 1.5488604088001866e-06, + "loss": 0.7886521816253662, + "step": 3033 + }, + { + "epoch": 0.6990783410138249, + "grad_norm": 0.9413909460240358, + "learning_rate": 1.5485418992500479e-06, + "loss": 0.7483402490615845, + "step": 3034 + }, + { + "epoch": 0.6993087557603687, + "grad_norm": 0.9735513924670123, + "learning_rate": 1.5482233100797788e-06, + "loss": 0.6236725449562073, + "step": 3035 + }, + { + "epoch": 0.6995391705069124, + "grad_norm": 1.023064942988146, + "learning_rate": 1.5479046413356222e-06, + "loss": 0.9477910995483398, + "step": 3036 + }, + { + "epoch": 0.6997695852534562, + "grad_norm": 1.0993186685690193, + "learning_rate": 1.5475858930638322e-06, + "loss": 0.8921213746070862, + "step": 3037 + }, + { + "epoch": 0.7, + "grad_norm": 0.7179145673247356, + "learning_rate": 1.5472670653106744e-06, + "loss": 0.7460963726043701, + "step": 3038 + }, + { + "epoch": 0.7002304147465438, + "grad_norm": 0.8319225077693166, + "learning_rate": 1.5469481581224271e-06, + "loss": 0.6135849356651306, + "step": 3039 + }, + { + "epoch": 0.7004608294930875, + "grad_norm": 0.8739744675210649, + "learning_rate": 1.546629171545378e-06, + "loss": 0.8039313554763794, + "step": 3040 + }, + { + "epoch": 0.7006912442396314, + "grad_norm": 1.2210857419731846, + "learning_rate": 1.5463101056258289e-06, + "loss": 0.8751651048660278, + "step": 3041 + }, + { + "epoch": 0.7009216589861751, + "grad_norm": 0.9070575590392688, + "learning_rate": 1.545990960410091e-06, + "loss": 0.7600879669189453, + "step": 3042 + }, + { + "epoch": 0.7011520737327189, + "grad_norm": 0.9983949583794295, + "learning_rate": 1.545671735944488e-06, + "loss": 0.8118841648101807, + "step": 3043 + }, + { + "epoch": 0.7013824884792627, + "grad_norm": 0.7470799565000998, + "learning_rate": 1.5453524322753546e-06, + "loss": 0.7144184112548828, + "step": 3044 + }, + { + "epoch": 0.7016129032258065, + "grad_norm": 1.149288210915265, + "learning_rate": 1.545033049449038e-06, + "loss": 0.9730075001716614, + "step": 3045 + }, + { + "epoch": 0.7018433179723502, + "grad_norm": 0.9334735321523672, + "learning_rate": 1.5447135875118957e-06, + "loss": 0.6930910348892212, + "step": 3046 + }, + { + "epoch": 0.7020737327188941, + "grad_norm": 1.0190518922073715, + "learning_rate": 1.5443940465102973e-06, + "loss": 0.8517031669616699, + "step": 3047 + }, + { + "epoch": 0.7023041474654378, + "grad_norm": 0.9199109424213672, + "learning_rate": 1.5440744264906237e-06, + "loss": 0.7939779758453369, + "step": 3048 + }, + { + "epoch": 0.7025345622119815, + "grad_norm": 1.0310125567194028, + "learning_rate": 1.5437547274992672e-06, + "loss": 0.8946782350540161, + "step": 3049 + }, + { + "epoch": 0.7027649769585254, + "grad_norm": 1.1682685309372194, + "learning_rate": 1.543434949582632e-06, + "loss": 0.9273954033851624, + "step": 3050 + }, + { + "epoch": 0.7029953917050691, + "grad_norm": 0.8496559046178408, + "learning_rate": 1.5431150927871333e-06, + "loss": 0.7731457352638245, + "step": 3051 + }, + { + "epoch": 0.7032258064516129, + "grad_norm": 0.9900519408386056, + "learning_rate": 1.542795157159198e-06, + "loss": 0.7982608079910278, + "step": 3052 + }, + { + "epoch": 0.7034562211981567, + "grad_norm": 1.0252185126476046, + "learning_rate": 1.542475142745264e-06, + "loss": 0.8422989845275879, + "step": 3053 + }, + { + "epoch": 0.7036866359447005, + "grad_norm": 1.1364598749635721, + "learning_rate": 1.542155049591781e-06, + "loss": 0.8344876766204834, + "step": 3054 + }, + { + "epoch": 0.7039170506912442, + "grad_norm": 1.3240029855230715, + "learning_rate": 1.541834877745211e-06, + "loss": 0.8830629587173462, + "step": 3055 + }, + { + "epoch": 0.7041474654377881, + "grad_norm": 0.8841605120149971, + "learning_rate": 1.5415146272520247e-06, + "loss": 0.823864221572876, + "step": 3056 + }, + { + "epoch": 0.7043778801843318, + "grad_norm": 1.226256029650695, + "learning_rate": 1.5411942981587077e-06, + "loss": 0.8577016592025757, + "step": 3057 + }, + { + "epoch": 0.7046082949308756, + "grad_norm": 0.9938154526101401, + "learning_rate": 1.540873890511755e-06, + "loss": 0.7431750297546387, + "step": 3058 + }, + { + "epoch": 0.7048387096774194, + "grad_norm": 1.3100911793106818, + "learning_rate": 1.5405534043576729e-06, + "loss": 0.8219394683837891, + "step": 3059 + }, + { + "epoch": 0.7050691244239631, + "grad_norm": 0.8179546123014678, + "learning_rate": 1.5402328397429795e-06, + "loss": 0.706437349319458, + "step": 3060 + }, + { + "epoch": 0.7052995391705069, + "grad_norm": 0.9400567182130463, + "learning_rate": 1.5399121967142051e-06, + "loss": 0.8669443130493164, + "step": 3061 + }, + { + "epoch": 0.7055299539170506, + "grad_norm": 0.9808762608140087, + "learning_rate": 1.5395914753178897e-06, + "loss": 0.7995564937591553, + "step": 3062 + }, + { + "epoch": 0.7057603686635945, + "grad_norm": 1.0691077372052262, + "learning_rate": 1.5392706756005862e-06, + "loss": 0.7840889692306519, + "step": 3063 + }, + { + "epoch": 0.7059907834101382, + "grad_norm": 0.9593102373354429, + "learning_rate": 1.5389497976088582e-06, + "loss": 0.8231604695320129, + "step": 3064 + }, + { + "epoch": 0.706221198156682, + "grad_norm": 1.0423471516482703, + "learning_rate": 1.5386288413892801e-06, + "loss": 0.7821571826934814, + "step": 3065 + }, + { + "epoch": 0.7064516129032258, + "grad_norm": 0.9221304357539406, + "learning_rate": 1.538307806988439e-06, + "loss": 0.736830472946167, + "step": 3066 + }, + { + "epoch": 0.7066820276497696, + "grad_norm": 0.8124713959576904, + "learning_rate": 1.537986694452932e-06, + "loss": 0.7783113718032837, + "step": 3067 + }, + { + "epoch": 0.7069124423963133, + "grad_norm": 0.8679700879266566, + "learning_rate": 1.5376655038293692e-06, + "loss": 0.8000421524047852, + "step": 3068 + }, + { + "epoch": 0.7071428571428572, + "grad_norm": 0.8513728527683974, + "learning_rate": 1.5373442351643696e-06, + "loss": 0.7446980476379395, + "step": 3069 + }, + { + "epoch": 0.7073732718894009, + "grad_norm": 0.8188336762916474, + "learning_rate": 1.537022888504566e-06, + "loss": 0.7018321752548218, + "step": 3070 + }, + { + "epoch": 0.7076036866359448, + "grad_norm": 0.8259052522128728, + "learning_rate": 1.5367014638966008e-06, + "loss": 0.6903716325759888, + "step": 3071 + }, + { + "epoch": 0.7078341013824885, + "grad_norm": 1.0909385113291765, + "learning_rate": 1.5363799613871289e-06, + "loss": 0.9635254144668579, + "step": 3072 + }, + { + "epoch": 0.7080645161290322, + "grad_norm": 0.7335179559352851, + "learning_rate": 1.5360583810228156e-06, + "loss": 0.8612154722213745, + "step": 3073 + }, + { + "epoch": 0.708294930875576, + "grad_norm": 0.9395034612023028, + "learning_rate": 1.5357367228503376e-06, + "loss": 0.8632407784461975, + "step": 3074 + }, + { + "epoch": 0.7085253456221198, + "grad_norm": 0.9383639731759232, + "learning_rate": 1.5354149869163839e-06, + "loss": 0.8117856979370117, + "step": 3075 + }, + { + "epoch": 0.7087557603686636, + "grad_norm": 0.9770895875008837, + "learning_rate": 1.5350931732676538e-06, + "loss": 0.8062559366226196, + "step": 3076 + }, + { + "epoch": 0.7089861751152073, + "grad_norm": 0.9191794034062433, + "learning_rate": 1.5347712819508576e-06, + "loss": 0.7918965816497803, + "step": 3077 + }, + { + "epoch": 0.7092165898617512, + "grad_norm": 0.7897301018455927, + "learning_rate": 1.534449313012718e-06, + "loss": 0.7564986944198608, + "step": 3078 + }, + { + "epoch": 0.7094470046082949, + "grad_norm": 0.774017262501344, + "learning_rate": 1.534127266499968e-06, + "loss": 0.8261928558349609, + "step": 3079 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.9288792217475005, + "learning_rate": 1.5338051424593524e-06, + "loss": 0.705269455909729, + "step": 3080 + }, + { + "epoch": 0.7099078341013825, + "grad_norm": 0.8500383243043894, + "learning_rate": 1.5334829409376271e-06, + "loss": 0.823144793510437, + "step": 3081 + }, + { + "epoch": 0.7101382488479263, + "grad_norm": 0.7512588375717618, + "learning_rate": 1.5331606619815588e-06, + "loss": 0.7772066593170166, + "step": 3082 + }, + { + "epoch": 0.71036866359447, + "grad_norm": 1.0827682012637947, + "learning_rate": 1.5328383056379265e-06, + "loss": 0.8901097178459167, + "step": 3083 + }, + { + "epoch": 0.7105990783410139, + "grad_norm": 0.9540489638748495, + "learning_rate": 1.5325158719535196e-06, + "loss": 0.8454819917678833, + "step": 3084 + }, + { + "epoch": 0.7108294930875576, + "grad_norm": 0.8879734338037916, + "learning_rate": 1.5321933609751388e-06, + "loss": 0.8444693684577942, + "step": 3085 + }, + { + "epoch": 0.7110599078341013, + "grad_norm": 1.0157021807199436, + "learning_rate": 1.5318707727495964e-06, + "loss": 0.7893826961517334, + "step": 3086 + }, + { + "epoch": 0.7112903225806452, + "grad_norm": 0.9711563338551928, + "learning_rate": 1.531548107323715e-06, + "loss": 0.7536686658859253, + "step": 3087 + }, + { + "epoch": 0.7115207373271889, + "grad_norm": 1.1272305964721914, + "learning_rate": 1.53122536474433e-06, + "loss": 0.8105358481407166, + "step": 3088 + }, + { + "epoch": 0.7117511520737327, + "grad_norm": 0.8430783893005721, + "learning_rate": 1.530902545058286e-06, + "loss": 0.8104212284088135, + "step": 3089 + }, + { + "epoch": 0.7119815668202765, + "grad_norm": 1.1740010494566606, + "learning_rate": 1.5305796483124405e-06, + "loss": 0.7738373279571533, + "step": 3090 + }, + { + "epoch": 0.7122119815668203, + "grad_norm": 0.8346644560955941, + "learning_rate": 1.5302566745536618e-06, + "loss": 0.7583746910095215, + "step": 3091 + }, + { + "epoch": 0.712442396313364, + "grad_norm": 1.0290772907257426, + "learning_rate": 1.5299336238288286e-06, + "loss": 0.8370871543884277, + "step": 3092 + }, + { + "epoch": 0.7126728110599079, + "grad_norm": 0.8908237623549358, + "learning_rate": 1.5296104961848314e-06, + "loss": 0.7833988666534424, + "step": 3093 + }, + { + "epoch": 0.7129032258064516, + "grad_norm": 1.135734716262211, + "learning_rate": 1.5292872916685717e-06, + "loss": 0.8024515509605408, + "step": 3094 + }, + { + "epoch": 0.7131336405529954, + "grad_norm": 0.8156588034123838, + "learning_rate": 1.5289640103269623e-06, + "loss": 0.8044738173484802, + "step": 3095 + }, + { + "epoch": 0.7133640552995392, + "grad_norm": 0.846268334708117, + "learning_rate": 1.5286406522069273e-06, + "loss": 0.7783721685409546, + "step": 3096 + }, + { + "epoch": 0.7135944700460829, + "grad_norm": 0.8004616169511741, + "learning_rate": 1.5283172173554014e-06, + "loss": 0.693443238735199, + "step": 3097 + }, + { + "epoch": 0.7138248847926267, + "grad_norm": 0.9862921565687749, + "learning_rate": 1.527993705819331e-06, + "loss": 0.8142237663269043, + "step": 3098 + }, + { + "epoch": 0.7140552995391705, + "grad_norm": 0.9077662799949481, + "learning_rate": 1.5276701176456726e-06, + "loss": 0.790626049041748, + "step": 3099 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 1.0485200242859731, + "learning_rate": 1.5273464528813953e-06, + "loss": 0.9460805654525757, + "step": 3100 + }, + { + "epoch": 0.714516129032258, + "grad_norm": 0.902776913050398, + "learning_rate": 1.5270227115734789e-06, + "loss": 0.6906337738037109, + "step": 3101 + }, + { + "epoch": 0.7147465437788019, + "grad_norm": 0.8514512995363496, + "learning_rate": 1.526698893768913e-06, + "loss": 0.8828556537628174, + "step": 3102 + }, + { + "epoch": 0.7149769585253456, + "grad_norm": 1.0568586756231748, + "learning_rate": 1.5263749995147004e-06, + "loss": 0.8395771980285645, + "step": 3103 + }, + { + "epoch": 0.7152073732718894, + "grad_norm": 0.814014727084384, + "learning_rate": 1.5260510288578535e-06, + "loss": 0.7103895545005798, + "step": 3104 + }, + { + "epoch": 0.7154377880184332, + "grad_norm": 1.0670304040497072, + "learning_rate": 1.5257269818453956e-06, + "loss": 0.9780298471450806, + "step": 3105 + }, + { + "epoch": 0.715668202764977, + "grad_norm": 0.777700102492748, + "learning_rate": 1.525402858524363e-06, + "loss": 0.8176128268241882, + "step": 3106 + }, + { + "epoch": 0.7158986175115207, + "grad_norm": 0.8127092170976247, + "learning_rate": 1.5250786589418008e-06, + "loss": 0.6766567230224609, + "step": 3107 + }, + { + "epoch": 0.7161290322580646, + "grad_norm": 0.8076252538068988, + "learning_rate": 1.5247543831447662e-06, + "loss": 0.7910950183868408, + "step": 3108 + }, + { + "epoch": 0.7163594470046083, + "grad_norm": 0.76882132080824, + "learning_rate": 1.5244300311803275e-06, + "loss": 0.8444501161575317, + "step": 3109 + }, + { + "epoch": 0.716589861751152, + "grad_norm": 0.9073390489490682, + "learning_rate": 1.5241056030955642e-06, + "loss": 0.7180038690567017, + "step": 3110 + }, + { + "epoch": 0.7168202764976959, + "grad_norm": 0.8535510406326756, + "learning_rate": 1.5237810989375663e-06, + "loss": 0.8563181757926941, + "step": 3111 + }, + { + "epoch": 0.7170506912442396, + "grad_norm": 0.7281554723991874, + "learning_rate": 1.5234565187534353e-06, + "loss": 0.7792840003967285, + "step": 3112 + }, + { + "epoch": 0.7172811059907834, + "grad_norm": 1.2546504724448617, + "learning_rate": 1.5231318625902835e-06, + "loss": 0.8414837121963501, + "step": 3113 + }, + { + "epoch": 0.7175115207373272, + "grad_norm": 0.9151299107605344, + "learning_rate": 1.5228071304952348e-06, + "loss": 0.8549888134002686, + "step": 3114 + }, + { + "epoch": 0.717741935483871, + "grad_norm": 0.8858229770055023, + "learning_rate": 1.5224823225154228e-06, + "loss": 0.7973321676254272, + "step": 3115 + }, + { + "epoch": 0.7179723502304147, + "grad_norm": 0.8923496131316503, + "learning_rate": 1.5221574386979937e-06, + "loss": 0.7328228950500488, + "step": 3116 + }, + { + "epoch": 0.7182027649769586, + "grad_norm": 0.8315355877258431, + "learning_rate": 1.5218324790901033e-06, + "loss": 0.8953883051872253, + "step": 3117 + }, + { + "epoch": 0.7184331797235023, + "grad_norm": 0.8252416441396693, + "learning_rate": 1.5215074437389195e-06, + "loss": 0.7804527282714844, + "step": 3118 + }, + { + "epoch": 0.7186635944700461, + "grad_norm": 1.0592650685202745, + "learning_rate": 1.5211823326916204e-06, + "loss": 0.7581363320350647, + "step": 3119 + }, + { + "epoch": 0.7188940092165899, + "grad_norm": 0.9812896234713268, + "learning_rate": 1.520857145995396e-06, + "loss": 0.7720214128494263, + "step": 3120 + }, + { + "epoch": 0.7191244239631336, + "grad_norm": 0.8448153689850479, + "learning_rate": 1.5205318836974463e-06, + "loss": 0.7142826914787292, + "step": 3121 + }, + { + "epoch": 0.7193548387096774, + "grad_norm": 1.0627992363231917, + "learning_rate": 1.520206545844983e-06, + "loss": 0.715612530708313, + "step": 3122 + }, + { + "epoch": 0.7195852534562212, + "grad_norm": 1.1048993433011334, + "learning_rate": 1.5198811324852277e-06, + "loss": 0.8851219415664673, + "step": 3123 + }, + { + "epoch": 0.719815668202765, + "grad_norm": 0.9292687584217408, + "learning_rate": 1.5195556436654146e-06, + "loss": 0.981631875038147, + "step": 3124 + }, + { + "epoch": 0.7200460829493087, + "grad_norm": 1.043088312445038, + "learning_rate": 1.5192300794327876e-06, + "loss": 0.8586313724517822, + "step": 3125 + }, + { + "epoch": 0.7202764976958526, + "grad_norm": 1.082548105463139, + "learning_rate": 1.518904439834602e-06, + "loss": 0.8863250017166138, + "step": 3126 + }, + { + "epoch": 0.7205069124423963, + "grad_norm": 0.8136107336174612, + "learning_rate": 1.5185787249181239e-06, + "loss": 0.864910900592804, + "step": 3127 + }, + { + "epoch": 0.7207373271889401, + "grad_norm": 0.9898417106954193, + "learning_rate": 1.5182529347306302e-06, + "loss": 0.8120951652526855, + "step": 3128 + }, + { + "epoch": 0.7209677419354839, + "grad_norm": 1.008844559262399, + "learning_rate": 1.517927069319409e-06, + "loss": 0.7866026163101196, + "step": 3129 + }, + { + "epoch": 0.7211981566820277, + "grad_norm": 0.9577789377394936, + "learning_rate": 1.5176011287317598e-06, + "loss": 0.8610655069351196, + "step": 3130 + }, + { + "epoch": 0.7214285714285714, + "grad_norm": 0.8861108738387133, + "learning_rate": 1.5172751130149915e-06, + "loss": 0.7463846206665039, + "step": 3131 + }, + { + "epoch": 0.7216589861751153, + "grad_norm": 0.7361410685782023, + "learning_rate": 1.5169490222164254e-06, + "loss": 0.6578936576843262, + "step": 3132 + }, + { + "epoch": 0.721889400921659, + "grad_norm": 0.9361369886672088, + "learning_rate": 1.516622856383393e-06, + "loss": 0.6849668025970459, + "step": 3133 + }, + { + "epoch": 0.7221198156682027, + "grad_norm": 1.0686822202217916, + "learning_rate": 1.5162966155632372e-06, + "loss": 0.9549611806869507, + "step": 3134 + }, + { + "epoch": 0.7223502304147466, + "grad_norm": 0.9063080856885865, + "learning_rate": 1.5159702998033113e-06, + "loss": 0.8005616664886475, + "step": 3135 + }, + { + "epoch": 0.7225806451612903, + "grad_norm": 1.089721709643384, + "learning_rate": 1.5156439091509793e-06, + "loss": 0.8980830311775208, + "step": 3136 + }, + { + "epoch": 0.7228110599078341, + "grad_norm": 1.012161312959267, + "learning_rate": 1.5153174436536166e-06, + "loss": 0.8247464895248413, + "step": 3137 + }, + { + "epoch": 0.7230414746543778, + "grad_norm": 0.9582357561913161, + "learning_rate": 1.5149909033586088e-06, + "loss": 0.818629264831543, + "step": 3138 + }, + { + "epoch": 0.7232718894009217, + "grad_norm": 0.7730251673290138, + "learning_rate": 1.5146642883133532e-06, + "loss": 0.8928704261779785, + "step": 3139 + }, + { + "epoch": 0.7235023041474654, + "grad_norm": 1.199560365249708, + "learning_rate": 1.5143375985652576e-06, + "loss": 0.9330282807350159, + "step": 3140 + }, + { + "epoch": 0.7237327188940093, + "grad_norm": 0.9749101527395967, + "learning_rate": 1.5140108341617405e-06, + "loss": 0.7961822748184204, + "step": 3141 + }, + { + "epoch": 0.723963133640553, + "grad_norm": 0.9244859383947029, + "learning_rate": 1.513683995150231e-06, + "loss": 0.8073769807815552, + "step": 3142 + }, + { + "epoch": 0.7241935483870968, + "grad_norm": 1.0469784848396728, + "learning_rate": 1.51335708157817e-06, + "loss": 0.946292519569397, + "step": 3143 + }, + { + "epoch": 0.7244239631336405, + "grad_norm": 0.8214787899217685, + "learning_rate": 1.513030093493008e-06, + "loss": 0.806084156036377, + "step": 3144 + }, + { + "epoch": 0.7246543778801844, + "grad_norm": 0.9086362129225068, + "learning_rate": 1.5127030309422072e-06, + "loss": 0.8804534673690796, + "step": 3145 + }, + { + "epoch": 0.7248847926267281, + "grad_norm": 0.973773267534968, + "learning_rate": 1.51237589397324e-06, + "loss": 0.7489848136901855, + "step": 3146 + }, + { + "epoch": 0.7251152073732718, + "grad_norm": 1.047973105384132, + "learning_rate": 1.5120486826335905e-06, + "loss": 0.875586986541748, + "step": 3147 + }, + { + "epoch": 0.7253456221198157, + "grad_norm": 0.8473382638758681, + "learning_rate": 1.5117213969707522e-06, + "loss": 0.8334758281707764, + "step": 3148 + }, + { + "epoch": 0.7255760368663594, + "grad_norm": 0.8693445792084491, + "learning_rate": 1.5113940370322306e-06, + "loss": 0.8010859489440918, + "step": 3149 + }, + { + "epoch": 0.7258064516129032, + "grad_norm": 0.8638975130346471, + "learning_rate": 1.5110666028655417e-06, + "loss": 0.7907547950744629, + "step": 3150 + }, + { + "epoch": 0.726036866359447, + "grad_norm": 0.9542895726151109, + "learning_rate": 1.5107390945182117e-06, + "loss": 0.8922848105430603, + "step": 3151 + }, + { + "epoch": 0.7262672811059908, + "grad_norm": 0.7865624103758176, + "learning_rate": 1.5104115120377783e-06, + "loss": 0.7418628931045532, + "step": 3152 + }, + { + "epoch": 0.7264976958525345, + "grad_norm": 1.0285540479216404, + "learning_rate": 1.51008385547179e-06, + "loss": 0.9063338041305542, + "step": 3153 + }, + { + "epoch": 0.7267281105990784, + "grad_norm": 1.0080575916686718, + "learning_rate": 1.5097561248678047e-06, + "loss": 0.8718822002410889, + "step": 3154 + }, + { + "epoch": 0.7269585253456221, + "grad_norm": 1.0055226715830414, + "learning_rate": 1.5094283202733934e-06, + "loss": 0.950742244720459, + "step": 3155 + }, + { + "epoch": 0.727188940092166, + "grad_norm": 1.126636802719941, + "learning_rate": 1.5091004417361353e-06, + "loss": 0.7963443994522095, + "step": 3156 + }, + { + "epoch": 0.7274193548387097, + "grad_norm": 1.0644638923319971, + "learning_rate": 1.5087724893036225e-06, + "loss": 0.8428621888160706, + "step": 3157 + }, + { + "epoch": 0.7276497695852534, + "grad_norm": 1.0421355661787988, + "learning_rate": 1.508444463023456e-06, + "loss": 0.8271539211273193, + "step": 3158 + }, + { + "epoch": 0.7278801843317972, + "grad_norm": 0.7345991655152693, + "learning_rate": 1.508116362943249e-06, + "loss": 0.7899917364120483, + "step": 3159 + }, + { + "epoch": 0.728110599078341, + "grad_norm": 1.1916065857121023, + "learning_rate": 1.5077881891106246e-06, + "loss": 0.8734809160232544, + "step": 3160 + }, + { + "epoch": 0.7283410138248848, + "grad_norm": 1.0138536766133128, + "learning_rate": 1.5074599415732164e-06, + "loss": 0.7740491628646851, + "step": 3161 + }, + { + "epoch": 0.7285714285714285, + "grad_norm": 0.8952462084516831, + "learning_rate": 1.5071316203786698e-06, + "loss": 0.7219515442848206, + "step": 3162 + }, + { + "epoch": 0.7288018433179724, + "grad_norm": 0.7779518912065628, + "learning_rate": 1.50680322557464e-06, + "loss": 0.8122725486755371, + "step": 3163 + }, + { + "epoch": 0.7290322580645161, + "grad_norm": 0.9965727720770509, + "learning_rate": 1.5064747572087923e-06, + "loss": 0.8280072212219238, + "step": 3164 + }, + { + "epoch": 0.7292626728110599, + "grad_norm": 0.9097690003119847, + "learning_rate": 1.5061462153288047e-06, + "loss": 0.7287842035293579, + "step": 3165 + }, + { + "epoch": 0.7294930875576037, + "grad_norm": 1.0497146109580189, + "learning_rate": 1.5058175999823639e-06, + "loss": 0.8404949903488159, + "step": 3166 + }, + { + "epoch": 0.7297235023041475, + "grad_norm": 0.9887517999095412, + "learning_rate": 1.505488911217168e-06, + "loss": 0.6572415828704834, + "step": 3167 + }, + { + "epoch": 0.7299539170506912, + "grad_norm": 1.0946078663351873, + "learning_rate": 1.5051601490809257e-06, + "loss": 0.8924484848976135, + "step": 3168 + }, + { + "epoch": 0.7301843317972351, + "grad_norm": 1.1648951213224894, + "learning_rate": 1.5048313136213566e-06, + "loss": 0.8701428174972534, + "step": 3169 + }, + { + "epoch": 0.7304147465437788, + "grad_norm": 1.1475520143482136, + "learning_rate": 1.5045024048861906e-06, + "loss": 0.8327716588973999, + "step": 3170 + }, + { + "epoch": 0.7306451612903225, + "grad_norm": 0.9261768702303601, + "learning_rate": 1.5041734229231686e-06, + "loss": 0.8379253149032593, + "step": 3171 + }, + { + "epoch": 0.7308755760368664, + "grad_norm": 0.944084791074753, + "learning_rate": 1.5038443677800413e-06, + "loss": 0.7475664019584656, + "step": 3172 + }, + { + "epoch": 0.7311059907834101, + "grad_norm": 1.2226580752686416, + "learning_rate": 1.5035152395045714e-06, + "loss": 0.9002243280410767, + "step": 3173 + }, + { + "epoch": 0.7313364055299539, + "grad_norm": 0.8355701729873874, + "learning_rate": 1.503186038144531e-06, + "loss": 0.6718685626983643, + "step": 3174 + }, + { + "epoch": 0.7315668202764977, + "grad_norm": 0.8961232238271665, + "learning_rate": 1.5028567637477033e-06, + "loss": 0.6836501359939575, + "step": 3175 + }, + { + "epoch": 0.7317972350230415, + "grad_norm": 0.8859536342600928, + "learning_rate": 1.502527416361882e-06, + "loss": 0.7548954486846924, + "step": 3176 + }, + { + "epoch": 0.7320276497695852, + "grad_norm": 0.9826706955950207, + "learning_rate": 1.5021979960348714e-06, + "loss": 0.8385212421417236, + "step": 3177 + }, + { + "epoch": 0.7322580645161291, + "grad_norm": 0.8341383572022868, + "learning_rate": 1.5018685028144864e-06, + "loss": 0.8605425357818604, + "step": 3178 + }, + { + "epoch": 0.7324884792626728, + "grad_norm": 0.9464588739740442, + "learning_rate": 1.501538936748553e-06, + "loss": 0.8831393718719482, + "step": 3179 + }, + { + "epoch": 0.7327188940092166, + "grad_norm": 0.8991947067614845, + "learning_rate": 1.5012092978849062e-06, + "loss": 0.6965172290802002, + "step": 3180 + }, + { + "epoch": 0.7329493087557604, + "grad_norm": 1.0090692893685214, + "learning_rate": 1.500879586271394e-06, + "loss": 0.8062859773635864, + "step": 3181 + }, + { + "epoch": 0.7331797235023041, + "grad_norm": 0.7952177607289516, + "learning_rate": 1.5005498019558724e-06, + "loss": 0.8285790681838989, + "step": 3182 + }, + { + "epoch": 0.7334101382488479, + "grad_norm": 0.9848452236152132, + "learning_rate": 1.50021994498621e-06, + "loss": 0.612429141998291, + "step": 3183 + }, + { + "epoch": 0.7336405529953917, + "grad_norm": 0.9156545700522013, + "learning_rate": 1.4998900154102847e-06, + "loss": 0.8271423578262329, + "step": 3184 + }, + { + "epoch": 0.7338709677419355, + "grad_norm": 1.033787601007848, + "learning_rate": 1.499560013275986e-06, + "loss": 0.838964581489563, + "step": 3185 + }, + { + "epoch": 0.7341013824884792, + "grad_norm": 0.973220548768116, + "learning_rate": 1.4992299386312119e-06, + "loss": 0.7902333736419678, + "step": 3186 + }, + { + "epoch": 0.7343317972350231, + "grad_norm": 1.0086369878855088, + "learning_rate": 1.4988997915238735e-06, + "loss": 0.8520635366439819, + "step": 3187 + }, + { + "epoch": 0.7345622119815668, + "grad_norm": 0.9892742658321851, + "learning_rate": 1.4985695720018905e-06, + "loss": 0.8666567206382751, + "step": 3188 + }, + { + "epoch": 0.7347926267281106, + "grad_norm": 0.9672613309802366, + "learning_rate": 1.4982392801131944e-06, + "loss": 0.6930691003799438, + "step": 3189 + }, + { + "epoch": 0.7350230414746544, + "grad_norm": 0.7049869743164157, + "learning_rate": 1.4979089159057263e-06, + "loss": 0.7957722544670105, + "step": 3190 + }, + { + "epoch": 0.7352534562211982, + "grad_norm": 1.0247601673009343, + "learning_rate": 1.4975784794274383e-06, + "loss": 0.8966697454452515, + "step": 3191 + }, + { + "epoch": 0.7354838709677419, + "grad_norm": 0.9082832739975722, + "learning_rate": 1.4972479707262926e-06, + "loss": 0.7478537559509277, + "step": 3192 + }, + { + "epoch": 0.7357142857142858, + "grad_norm": 0.9541041339746362, + "learning_rate": 1.4969173898502624e-06, + "loss": 0.8862416744232178, + "step": 3193 + }, + { + "epoch": 0.7359447004608295, + "grad_norm": 0.8171852448254098, + "learning_rate": 1.4965867368473306e-06, + "loss": 0.7910712957382202, + "step": 3194 + }, + { + "epoch": 0.7361751152073732, + "grad_norm": 1.1219879646982642, + "learning_rate": 1.4962560117654916e-06, + "loss": 0.7371944785118103, + "step": 3195 + }, + { + "epoch": 0.7364055299539171, + "grad_norm": 1.097733223938739, + "learning_rate": 1.4959252146527496e-06, + "loss": 0.7966737151145935, + "step": 3196 + }, + { + "epoch": 0.7366359447004608, + "grad_norm": 1.0499505243286467, + "learning_rate": 1.4955943455571188e-06, + "loss": 0.8474653363227844, + "step": 3197 + }, + { + "epoch": 0.7368663594470046, + "grad_norm": 1.1042914253537062, + "learning_rate": 1.4952634045266249e-06, + "loss": 1.0197458267211914, + "step": 3198 + }, + { + "epoch": 0.7370967741935484, + "grad_norm": 1.054872102822339, + "learning_rate": 1.4949323916093036e-06, + "loss": 0.8813979625701904, + "step": 3199 + }, + { + "epoch": 0.7373271889400922, + "grad_norm": 0.9264193586497762, + "learning_rate": 1.4946013068532008e-06, + "loss": 0.9323042631149292, + "step": 3200 + }, + { + "epoch": 0.7375576036866359, + "grad_norm": 1.1184797510334814, + "learning_rate": 1.494270150306373e-06, + "loss": 0.8637902736663818, + "step": 3201 + }, + { + "epoch": 0.7377880184331798, + "grad_norm": 1.1006860616870338, + "learning_rate": 1.4939389220168875e-06, + "loss": 0.8046854734420776, + "step": 3202 + }, + { + "epoch": 0.7380184331797235, + "grad_norm": 0.9882241685181946, + "learning_rate": 1.4936076220328211e-06, + "loss": 0.7616177201271057, + "step": 3203 + }, + { + "epoch": 0.7382488479262673, + "grad_norm": 1.0795779512267711, + "learning_rate": 1.4932762504022619e-06, + "loss": 0.8548959493637085, + "step": 3204 + }, + { + "epoch": 0.738479262672811, + "grad_norm": 0.7907178615166577, + "learning_rate": 1.492944807173308e-06, + "loss": 0.8062562942504883, + "step": 3205 + }, + { + "epoch": 0.7387096774193549, + "grad_norm": 1.3004819436990922, + "learning_rate": 1.492613292394068e-06, + "loss": 0.8776403069496155, + "step": 3206 + }, + { + "epoch": 0.7389400921658986, + "grad_norm": 1.0654471822316505, + "learning_rate": 1.4922817061126605e-06, + "loss": 0.7528336048126221, + "step": 3207 + }, + { + "epoch": 0.7391705069124423, + "grad_norm": 0.9288011243231857, + "learning_rate": 1.4919500483772152e-06, + "loss": 0.7441881895065308, + "step": 3208 + }, + { + "epoch": 0.7394009216589862, + "grad_norm": 0.9496581250230889, + "learning_rate": 1.4916183192358715e-06, + "loss": 0.8925758004188538, + "step": 3209 + }, + { + "epoch": 0.7396313364055299, + "grad_norm": 0.999519243113449, + "learning_rate": 1.4912865187367798e-06, + "loss": 0.7527008652687073, + "step": 3210 + }, + { + "epoch": 0.7398617511520738, + "grad_norm": 0.8631940848050832, + "learning_rate": 1.4909546469281e-06, + "loss": 0.753572404384613, + "step": 3211 + }, + { + "epoch": 0.7400921658986175, + "grad_norm": 0.938203260102219, + "learning_rate": 1.4906227038580036e-06, + "loss": 0.8884274959564209, + "step": 3212 + }, + { + "epoch": 0.7403225806451613, + "grad_norm": 0.7835821294972823, + "learning_rate": 1.4902906895746707e-06, + "loss": 0.7702244520187378, + "step": 3213 + }, + { + "epoch": 0.740552995391705, + "grad_norm": 1.0140732775513552, + "learning_rate": 1.4899586041262936e-06, + "loss": 0.8662835359573364, + "step": 3214 + }, + { + "epoch": 0.7407834101382489, + "grad_norm": 1.0357827096613574, + "learning_rate": 1.4896264475610736e-06, + "loss": 0.9819997549057007, + "step": 3215 + }, + { + "epoch": 0.7410138248847926, + "grad_norm": 1.0094197188590162, + "learning_rate": 1.4892942199272232e-06, + "loss": 0.9137614965438843, + "step": 3216 + }, + { + "epoch": 0.7412442396313365, + "grad_norm": 0.8442315992670393, + "learning_rate": 1.488961921272964e-06, + "loss": 0.7554785013198853, + "step": 3217 + }, + { + "epoch": 0.7414746543778802, + "grad_norm": 1.1172745597106868, + "learning_rate": 1.4886295516465296e-06, + "loss": 0.8528940677642822, + "step": 3218 + }, + { + "epoch": 0.7417050691244239, + "grad_norm": 0.9056918439443091, + "learning_rate": 1.4882971110961626e-06, + "loss": 0.7212377786636353, + "step": 3219 + }, + { + "epoch": 0.7419354838709677, + "grad_norm": 0.9349124518247459, + "learning_rate": 1.4879645996701161e-06, + "loss": 0.7767617702484131, + "step": 3220 + }, + { + "epoch": 0.7421658986175115, + "grad_norm": 0.8749389005214587, + "learning_rate": 1.4876320174166542e-06, + "loss": 0.8083292245864868, + "step": 3221 + }, + { + "epoch": 0.7423963133640553, + "grad_norm": 1.14484646357819, + "learning_rate": 1.4872993643840506e-06, + "loss": 0.8652364015579224, + "step": 3222 + }, + { + "epoch": 0.742626728110599, + "grad_norm": 0.9176030431238368, + "learning_rate": 1.486966640620589e-06, + "loss": 0.7455019950866699, + "step": 3223 + }, + { + "epoch": 0.7428571428571429, + "grad_norm": 1.0637469159007076, + "learning_rate": 1.4866338461745644e-06, + "loss": 0.7881917953491211, + "step": 3224 + }, + { + "epoch": 0.7430875576036866, + "grad_norm": 1.0955814961304737, + "learning_rate": 1.4863009810942813e-06, + "loss": 0.8148372173309326, + "step": 3225 + }, + { + "epoch": 0.7433179723502304, + "grad_norm": 0.7991384008669099, + "learning_rate": 1.4859680454280547e-06, + "loss": 0.6574658751487732, + "step": 3226 + }, + { + "epoch": 0.7435483870967742, + "grad_norm": 0.9231484623709659, + "learning_rate": 1.4856350392242094e-06, + "loss": 0.7831655740737915, + "step": 3227 + }, + { + "epoch": 0.743778801843318, + "grad_norm": 0.8080817272772121, + "learning_rate": 1.485301962531081e-06, + "loss": 0.7406231164932251, + "step": 3228 + }, + { + "epoch": 0.7440092165898617, + "grad_norm": 0.9500561612529754, + "learning_rate": 1.4849688153970154e-06, + "loss": 0.8092324733734131, + "step": 3229 + }, + { + "epoch": 0.7442396313364056, + "grad_norm": 0.969093760928221, + "learning_rate": 1.4846355978703679e-06, + "loss": 0.6662560701370239, + "step": 3230 + }, + { + "epoch": 0.7444700460829493, + "grad_norm": 0.8941354868939383, + "learning_rate": 1.4843023099995052e-06, + "loss": 0.8064731359481812, + "step": 3231 + }, + { + "epoch": 0.744700460829493, + "grad_norm": 1.0463529761361023, + "learning_rate": 1.4839689518328037e-06, + "loss": 0.7424519658088684, + "step": 3232 + }, + { + "epoch": 0.7449308755760369, + "grad_norm": 0.9618875213680247, + "learning_rate": 1.4836355234186489e-06, + "loss": 0.7851438522338867, + "step": 3233 + }, + { + "epoch": 0.7451612903225806, + "grad_norm": 1.2534680382280676, + "learning_rate": 1.4833020248054381e-06, + "loss": 0.896986722946167, + "step": 3234 + }, + { + "epoch": 0.7453917050691244, + "grad_norm": 1.3688846458082455, + "learning_rate": 1.4829684560415787e-06, + "loss": 0.9469928741455078, + "step": 3235 + }, + { + "epoch": 0.7456221198156682, + "grad_norm": 0.8653442286827894, + "learning_rate": 1.4826348171754872e-06, + "loss": 0.7527188062667847, + "step": 3236 + }, + { + "epoch": 0.745852534562212, + "grad_norm": 0.9575212903893582, + "learning_rate": 1.4823011082555907e-06, + "loss": 0.7758080959320068, + "step": 3237 + }, + { + "epoch": 0.7460829493087557, + "grad_norm": 0.9454436343118328, + "learning_rate": 1.481967329330327e-06, + "loss": 0.8359881043434143, + "step": 3238 + }, + { + "epoch": 0.7463133640552996, + "grad_norm": 0.7567559878181612, + "learning_rate": 1.4816334804481434e-06, + "loss": 0.6576982736587524, + "step": 3239 + }, + { + "epoch": 0.7465437788018433, + "grad_norm": 1.0012365138594377, + "learning_rate": 1.4812995616574978e-06, + "loss": 0.7919917106628418, + "step": 3240 + }, + { + "epoch": 0.7467741935483871, + "grad_norm": 0.7865137499791297, + "learning_rate": 1.480965573006858e-06, + "loss": 0.7682263851165771, + "step": 3241 + }, + { + "epoch": 0.7470046082949309, + "grad_norm": 1.0123241682054298, + "learning_rate": 1.4806315145447017e-06, + "loss": 0.8573193550109863, + "step": 3242 + }, + { + "epoch": 0.7472350230414746, + "grad_norm": 0.8191884786597581, + "learning_rate": 1.4802973863195174e-06, + "loss": 0.8473606109619141, + "step": 3243 + }, + { + "epoch": 0.7474654377880184, + "grad_norm": 0.8754073951862541, + "learning_rate": 1.4799631883798033e-06, + "loss": 0.8110678195953369, + "step": 3244 + }, + { + "epoch": 0.7476958525345622, + "grad_norm": 1.2161581760732987, + "learning_rate": 1.4796289207740681e-06, + "loss": 0.6624661087989807, + "step": 3245 + }, + { + "epoch": 0.747926267281106, + "grad_norm": 0.7356293873938221, + "learning_rate": 1.47929458355083e-06, + "loss": 0.8145536184310913, + "step": 3246 + }, + { + "epoch": 0.7481566820276497, + "grad_norm": 0.921128997158793, + "learning_rate": 1.4789601767586172e-06, + "loss": 0.7819876074790955, + "step": 3247 + }, + { + "epoch": 0.7483870967741936, + "grad_norm": 0.973465003660405, + "learning_rate": 1.4786257004459692e-06, + "loss": 0.7573810815811157, + "step": 3248 + }, + { + "epoch": 0.7486175115207373, + "grad_norm": 1.061603620628762, + "learning_rate": 1.4782911546614343e-06, + "loss": 0.8149522542953491, + "step": 3249 + }, + { + "epoch": 0.7488479262672811, + "grad_norm": 1.023358335101362, + "learning_rate": 1.4779565394535714e-06, + "loss": 0.9935284852981567, + "step": 3250 + }, + { + "epoch": 0.7490783410138249, + "grad_norm": 0.8488935416479958, + "learning_rate": 1.4776218548709497e-06, + "loss": 0.8673371076583862, + "step": 3251 + }, + { + "epoch": 0.7493087557603687, + "grad_norm": 1.0304468521950305, + "learning_rate": 1.4772871009621477e-06, + "loss": 0.8569149374961853, + "step": 3252 + }, + { + "epoch": 0.7495391705069124, + "grad_norm": 0.8613722173703313, + "learning_rate": 1.4769522777757551e-06, + "loss": 0.7177854776382446, + "step": 3253 + }, + { + "epoch": 0.7497695852534563, + "grad_norm": 1.0681726446759283, + "learning_rate": 1.4766173853603706e-06, + "loss": 0.8115622997283936, + "step": 3254 + }, + { + "epoch": 0.75, + "grad_norm": 0.782977490159237, + "learning_rate": 1.4762824237646038e-06, + "loss": 0.7209019660949707, + "step": 3255 + }, + { + "epoch": 0.7502304147465437, + "grad_norm": 0.9264325214188774, + "learning_rate": 1.4759473930370736e-06, + "loss": 0.8433470726013184, + "step": 3256 + }, + { + "epoch": 0.7504608294930876, + "grad_norm": 1.0399152705693322, + "learning_rate": 1.4756122932264093e-06, + "loss": 0.853674054145813, + "step": 3257 + }, + { + "epoch": 0.7506912442396313, + "grad_norm": 0.9978956076189626, + "learning_rate": 1.4752771243812503e-06, + "loss": 0.8645769357681274, + "step": 3258 + }, + { + "epoch": 0.7509216589861751, + "grad_norm": 1.4046905803968728, + "learning_rate": 1.474941886550246e-06, + "loss": 0.927452564239502, + "step": 3259 + }, + { + "epoch": 0.7511520737327189, + "grad_norm": 0.8642581213790671, + "learning_rate": 1.4746065797820552e-06, + "loss": 0.7461255788803101, + "step": 3260 + }, + { + "epoch": 0.7513824884792627, + "grad_norm": 0.9230380534710827, + "learning_rate": 1.4742712041253481e-06, + "loss": 0.8737163543701172, + "step": 3261 + }, + { + "epoch": 0.7516129032258064, + "grad_norm": 0.8624828182814519, + "learning_rate": 1.4739357596288036e-06, + "loss": 0.7148758172988892, + "step": 3262 + }, + { + "epoch": 0.7518433179723503, + "grad_norm": 0.8930446588032352, + "learning_rate": 1.4736002463411108e-06, + "loss": 0.738334596157074, + "step": 3263 + }, + { + "epoch": 0.752073732718894, + "grad_norm": 0.9237791770446419, + "learning_rate": 1.4732646643109692e-06, + "loss": 0.7733340263366699, + "step": 3264 + }, + { + "epoch": 0.7523041474654378, + "grad_norm": 0.8815526032135323, + "learning_rate": 1.4729290135870883e-06, + "loss": 0.7882881164550781, + "step": 3265 + }, + { + "epoch": 0.7525345622119816, + "grad_norm": 1.029688172185613, + "learning_rate": 1.472593294218187e-06, + "loss": 0.7908357381820679, + "step": 3266 + }, + { + "epoch": 0.7527649769585254, + "grad_norm": 1.0791156682188368, + "learning_rate": 1.4722575062529946e-06, + "loss": 0.8818062543869019, + "step": 3267 + }, + { + "epoch": 0.7529953917050691, + "grad_norm": 0.9552677127935061, + "learning_rate": 1.4719216497402504e-06, + "loss": 0.7152599692344666, + "step": 3268 + }, + { + "epoch": 0.7532258064516129, + "grad_norm": 0.8322037056106782, + "learning_rate": 1.4715857247287036e-06, + "loss": 0.8503165245056152, + "step": 3269 + }, + { + "epoch": 0.7534562211981567, + "grad_norm": 0.9223729567181368, + "learning_rate": 1.4712497312671128e-06, + "loss": 0.8382623195648193, + "step": 3270 + }, + { + "epoch": 0.7536866359447004, + "grad_norm": 1.0456882119229616, + "learning_rate": 1.4709136694042479e-06, + "loss": 0.8358533382415771, + "step": 3271 + }, + { + "epoch": 0.7539170506912443, + "grad_norm": 0.850717529465525, + "learning_rate": 1.4705775391888868e-06, + "loss": 0.6735624670982361, + "step": 3272 + }, + { + "epoch": 0.754147465437788, + "grad_norm": 0.8890452669379437, + "learning_rate": 1.470241340669819e-06, + "loss": 0.8343949317932129, + "step": 3273 + }, + { + "epoch": 0.7543778801843318, + "grad_norm": 0.9508610560109901, + "learning_rate": 1.4699050738958434e-06, + "loss": 0.8204318284988403, + "step": 3274 + }, + { + "epoch": 0.7546082949308756, + "grad_norm": 0.9484772286558124, + "learning_rate": 1.4695687389157684e-06, + "loss": 0.7541854977607727, + "step": 3275 + }, + { + "epoch": 0.7548387096774194, + "grad_norm": 0.8425504123859369, + "learning_rate": 1.4692323357784122e-06, + "loss": 0.8144943714141846, + "step": 3276 + }, + { + "epoch": 0.7550691244239631, + "grad_norm": 0.8699783126306536, + "learning_rate": 1.468895864532604e-06, + "loss": 0.9045677781105042, + "step": 3277 + }, + { + "epoch": 0.755299539170507, + "grad_norm": 1.1586104318366583, + "learning_rate": 1.4685593252271816e-06, + "loss": 0.8818730115890503, + "step": 3278 + }, + { + "epoch": 0.7555299539170507, + "grad_norm": 1.013621065000431, + "learning_rate": 1.4682227179109932e-06, + "loss": 0.8582229614257812, + "step": 3279 + }, + { + "epoch": 0.7557603686635944, + "grad_norm": 1.016541372354986, + "learning_rate": 1.4678860426328977e-06, + "loss": 0.8769974708557129, + "step": 3280 + }, + { + "epoch": 0.7559907834101383, + "grad_norm": 0.8474484944100091, + "learning_rate": 1.467549299441762e-06, + "loss": 0.8034937381744385, + "step": 3281 + }, + { + "epoch": 0.756221198156682, + "grad_norm": 0.9998169463505984, + "learning_rate": 1.4672124883864646e-06, + "loss": 0.9057378768920898, + "step": 3282 + }, + { + "epoch": 0.7564516129032258, + "grad_norm": 0.9160359407680143, + "learning_rate": 1.4668756095158929e-06, + "loss": 0.8039969205856323, + "step": 3283 + }, + { + "epoch": 0.7566820276497696, + "grad_norm": 0.7311572278532684, + "learning_rate": 1.4665386628789448e-06, + "loss": 0.887493908405304, + "step": 3284 + }, + { + "epoch": 0.7569124423963134, + "grad_norm": 0.9749833066021305, + "learning_rate": 1.4662016485245271e-06, + "loss": 0.783561646938324, + "step": 3285 + }, + { + "epoch": 0.7571428571428571, + "grad_norm": 1.1972955361865625, + "learning_rate": 1.4658645665015579e-06, + "loss": 0.7526337504386902, + "step": 3286 + }, + { + "epoch": 0.757373271889401, + "grad_norm": 1.0074911468135093, + "learning_rate": 1.4655274168589633e-06, + "loss": 0.8583099842071533, + "step": 3287 + }, + { + "epoch": 0.7576036866359447, + "grad_norm": 0.9193819222275846, + "learning_rate": 1.4651901996456802e-06, + "loss": 0.743253767490387, + "step": 3288 + }, + { + "epoch": 0.7578341013824885, + "grad_norm": 0.9481332173734432, + "learning_rate": 1.4648529149106555e-06, + "loss": 0.8763987421989441, + "step": 3289 + }, + { + "epoch": 0.7580645161290323, + "grad_norm": 0.9531439206540595, + "learning_rate": 1.4645155627028455e-06, + "loss": 0.8388645648956299, + "step": 3290 + }, + { + "epoch": 0.7582949308755761, + "grad_norm": 0.9430549047432926, + "learning_rate": 1.4641781430712167e-06, + "loss": 0.8943589925765991, + "step": 3291 + }, + { + "epoch": 0.7585253456221198, + "grad_norm": 0.897306276129885, + "learning_rate": 1.463840656064745e-06, + "loss": 0.9224259257316589, + "step": 3292 + }, + { + "epoch": 0.7587557603686635, + "grad_norm": 0.7118962108569266, + "learning_rate": 1.463503101732416e-06, + "loss": 0.5836232900619507, + "step": 3293 + }, + { + "epoch": 0.7589861751152074, + "grad_norm": 1.2610309452085111, + "learning_rate": 1.4631654801232255e-06, + "loss": 0.6700382828712463, + "step": 3294 + }, + { + "epoch": 0.7592165898617511, + "grad_norm": 0.9159006934526643, + "learning_rate": 1.4628277912861785e-06, + "loss": 0.7876112461090088, + "step": 3295 + }, + { + "epoch": 0.759447004608295, + "grad_norm": 0.9073380438964382, + "learning_rate": 1.4624900352702905e-06, + "loss": 0.8410799503326416, + "step": 3296 + }, + { + "epoch": 0.7596774193548387, + "grad_norm": 0.931630117662002, + "learning_rate": 1.4621522121245859e-06, + "loss": 0.9615974426269531, + "step": 3297 + }, + { + "epoch": 0.7599078341013825, + "grad_norm": 1.1213393394374043, + "learning_rate": 1.4618143218980996e-06, + "loss": 0.7973389625549316, + "step": 3298 + }, + { + "epoch": 0.7601382488479262, + "grad_norm": 0.7835636014361216, + "learning_rate": 1.461476364639876e-06, + "loss": 0.7734094858169556, + "step": 3299 + }, + { + "epoch": 0.7603686635944701, + "grad_norm": 0.9681758067915807, + "learning_rate": 1.461138340398969e-06, + "loss": 0.7365939617156982, + "step": 3300 + }, + { + "epoch": 0.7605990783410138, + "grad_norm": 0.9251627601521192, + "learning_rate": 1.4608002492244421e-06, + "loss": 0.822052001953125, + "step": 3301 + }, + { + "epoch": 0.7608294930875577, + "grad_norm": 0.83536047590978, + "learning_rate": 1.460462091165369e-06, + "loss": 0.7220577001571655, + "step": 3302 + }, + { + "epoch": 0.7610599078341014, + "grad_norm": 0.9806834080573716, + "learning_rate": 1.4601238662708332e-06, + "loss": 0.9795923233032227, + "step": 3303 + }, + { + "epoch": 0.7612903225806451, + "grad_norm": 1.0452301496717684, + "learning_rate": 1.4597855745899273e-06, + "loss": 0.804523229598999, + "step": 3304 + }, + { + "epoch": 0.761520737327189, + "grad_norm": 0.936039712838613, + "learning_rate": 1.4594472161717536e-06, + "loss": 0.7630297541618347, + "step": 3305 + }, + { + "epoch": 0.7617511520737327, + "grad_norm": 1.008258749087615, + "learning_rate": 1.4591087910654254e-06, + "loss": 0.7088560461997986, + "step": 3306 + }, + { + "epoch": 0.7619815668202765, + "grad_norm": 0.8612515545716848, + "learning_rate": 1.4587702993200637e-06, + "loss": 0.6627416014671326, + "step": 3307 + }, + { + "epoch": 0.7622119815668202, + "grad_norm": 1.0700034611745908, + "learning_rate": 1.4584317409848001e-06, + "loss": 0.7931111454963684, + "step": 3308 + }, + { + "epoch": 0.7624423963133641, + "grad_norm": 0.918004873184285, + "learning_rate": 1.4580931161087763e-06, + "loss": 0.8107850551605225, + "step": 3309 + }, + { + "epoch": 0.7626728110599078, + "grad_norm": 1.1251596055699022, + "learning_rate": 1.4577544247411431e-06, + "loss": 0.8211404085159302, + "step": 3310 + }, + { + "epoch": 0.7629032258064516, + "grad_norm": 1.1825093837600291, + "learning_rate": 1.457415666931061e-06, + "loss": 0.9861341714859009, + "step": 3311 + }, + { + "epoch": 0.7631336405529954, + "grad_norm": 1.0573079532917569, + "learning_rate": 1.4570768427277007e-06, + "loss": 0.8963409662246704, + "step": 3312 + }, + { + "epoch": 0.7633640552995392, + "grad_norm": 1.1183054914337, + "learning_rate": 1.4567379521802416e-06, + "loss": 0.7510147094726562, + "step": 3313 + }, + { + "epoch": 0.7635944700460829, + "grad_norm": 1.0312269750408198, + "learning_rate": 1.4563989953378734e-06, + "loss": 0.7761805057525635, + "step": 3314 + }, + { + "epoch": 0.7638248847926268, + "grad_norm": 0.782434581691777, + "learning_rate": 1.4560599722497953e-06, + "loss": 0.6202781200408936, + "step": 3315 + }, + { + "epoch": 0.7640552995391705, + "grad_norm": 0.9114320197488165, + "learning_rate": 1.4557208829652159e-06, + "loss": 0.711891770362854, + "step": 3316 + }, + { + "epoch": 0.7642857142857142, + "grad_norm": 1.0888571874972786, + "learning_rate": 1.4553817275333537e-06, + "loss": 0.8689517974853516, + "step": 3317 + }, + { + "epoch": 0.7645161290322581, + "grad_norm": 0.847547372029402, + "learning_rate": 1.4550425060034365e-06, + "loss": 0.7323688268661499, + "step": 3318 + }, + { + "epoch": 0.7647465437788018, + "grad_norm": 0.954006429800706, + "learning_rate": 1.4547032184247022e-06, + "loss": 0.8934407234191895, + "step": 3319 + }, + { + "epoch": 0.7649769585253456, + "grad_norm": 0.9830574702749578, + "learning_rate": 1.4543638648463975e-06, + "loss": 0.7729885578155518, + "step": 3320 + }, + { + "epoch": 0.7652073732718894, + "grad_norm": 0.9967355019103026, + "learning_rate": 1.454024445317779e-06, + "loss": 0.8962388038635254, + "step": 3321 + }, + { + "epoch": 0.7654377880184332, + "grad_norm": 0.8821073382766633, + "learning_rate": 1.4536849598881137e-06, + "loss": 0.8655213117599487, + "step": 3322 + }, + { + "epoch": 0.7656682027649769, + "grad_norm": 0.8780656658271131, + "learning_rate": 1.453345408606677e-06, + "loss": 0.6471779346466064, + "step": 3323 + }, + { + "epoch": 0.7658986175115208, + "grad_norm": 0.7335596828312507, + "learning_rate": 1.4530057915227545e-06, + "loss": 0.8665071129798889, + "step": 3324 + }, + { + "epoch": 0.7661290322580645, + "grad_norm": 1.054528188345679, + "learning_rate": 1.4526661086856407e-06, + "loss": 0.9504371285438538, + "step": 3325 + }, + { + "epoch": 0.7663594470046083, + "grad_norm": 1.017396914206461, + "learning_rate": 1.452326360144641e-06, + "loss": 0.8122013807296753, + "step": 3326 + }, + { + "epoch": 0.7665898617511521, + "grad_norm": 1.0019111601549837, + "learning_rate": 1.4519865459490687e-06, + "loss": 0.817001223564148, + "step": 3327 + }, + { + "epoch": 0.7668202764976959, + "grad_norm": 0.9387626004792055, + "learning_rate": 1.4516466661482474e-06, + "loss": 0.732322096824646, + "step": 3328 + }, + { + "epoch": 0.7670506912442396, + "grad_norm": 0.8844021324185192, + "learning_rate": 1.4513067207915106e-06, + "loss": 0.7961580157279968, + "step": 3329 + }, + { + "epoch": 0.7672811059907834, + "grad_norm": 0.9579783239612414, + "learning_rate": 1.4509667099282007e-06, + "loss": 0.7660717368125916, + "step": 3330 + }, + { + "epoch": 0.7675115207373272, + "grad_norm": 0.8487336367256668, + "learning_rate": 1.4506266336076698e-06, + "loss": 0.8279193639755249, + "step": 3331 + }, + { + "epoch": 0.7677419354838709, + "grad_norm": 0.8431407438554851, + "learning_rate": 1.4502864918792796e-06, + "loss": 0.7050153017044067, + "step": 3332 + }, + { + "epoch": 0.7679723502304148, + "grad_norm": 0.9386347952909049, + "learning_rate": 1.4499462847924013e-06, + "loss": 0.8146064877510071, + "step": 3333 + }, + { + "epoch": 0.7682027649769585, + "grad_norm": 0.8248232070769104, + "learning_rate": 1.4496060123964153e-06, + "loss": 0.8300814628601074, + "step": 3334 + }, + { + "epoch": 0.7684331797235023, + "grad_norm": 0.848400587593364, + "learning_rate": 1.4492656747407117e-06, + "loss": 0.8240403532981873, + "step": 3335 + }, + { + "epoch": 0.7686635944700461, + "grad_norm": 1.1661360506901004, + "learning_rate": 1.4489252718746908e-06, + "loss": 0.901625394821167, + "step": 3336 + }, + { + "epoch": 0.7688940092165899, + "grad_norm": 0.8620744709914054, + "learning_rate": 1.4485848038477604e-06, + "loss": 0.827139675617218, + "step": 3337 + }, + { + "epoch": 0.7691244239631336, + "grad_norm": 1.111541176491108, + "learning_rate": 1.4482442707093397e-06, + "loss": 0.7032946348190308, + "step": 3338 + }, + { + "epoch": 0.7693548387096775, + "grad_norm": 0.8506038004087974, + "learning_rate": 1.4479036725088564e-06, + "loss": 0.6805816888809204, + "step": 3339 + }, + { + "epoch": 0.7695852534562212, + "grad_norm": 0.8063208135295213, + "learning_rate": 1.447563009295748e-06, + "loss": 0.673591136932373, + "step": 3340 + }, + { + "epoch": 0.7698156682027649, + "grad_norm": 0.8116035277545482, + "learning_rate": 1.4472222811194614e-06, + "loss": 0.6513386964797974, + "step": 3341 + }, + { + "epoch": 0.7700460829493088, + "grad_norm": 0.7654089652768199, + "learning_rate": 1.4468814880294529e-06, + "loss": 0.7367297410964966, + "step": 3342 + }, + { + "epoch": 0.7702764976958525, + "grad_norm": 1.0405555538712603, + "learning_rate": 1.4465406300751878e-06, + "loss": 0.7393670082092285, + "step": 3343 + }, + { + "epoch": 0.7705069124423963, + "grad_norm": 0.7135144631405288, + "learning_rate": 1.4461997073061411e-06, + "loss": 0.7525930404663086, + "step": 3344 + }, + { + "epoch": 0.7707373271889401, + "grad_norm": 0.7583677101512988, + "learning_rate": 1.445858719771798e-06, + "loss": 0.6679942011833191, + "step": 3345 + }, + { + "epoch": 0.7709677419354839, + "grad_norm": 1.0903018310329022, + "learning_rate": 1.4455176675216518e-06, + "loss": 0.8440653085708618, + "step": 3346 + }, + { + "epoch": 0.7711981566820276, + "grad_norm": 0.9929368208299709, + "learning_rate": 1.4451765506052063e-06, + "loss": 0.8765773177146912, + "step": 3347 + }, + { + "epoch": 0.7714285714285715, + "grad_norm": 0.9183070258317377, + "learning_rate": 1.4448353690719732e-06, + "loss": 0.7309157848358154, + "step": 3348 + }, + { + "epoch": 0.7716589861751152, + "grad_norm": 0.8130162073408548, + "learning_rate": 1.4444941229714758e-06, + "loss": 0.8043340444564819, + "step": 3349 + }, + { + "epoch": 0.771889400921659, + "grad_norm": 0.8488386913998837, + "learning_rate": 1.4441528123532443e-06, + "loss": 0.6528831124305725, + "step": 3350 + }, + { + "epoch": 0.7721198156682028, + "grad_norm": 0.7632405080168834, + "learning_rate": 1.4438114372668202e-06, + "loss": 0.7973155975341797, + "step": 3351 + }, + { + "epoch": 0.7723502304147466, + "grad_norm": 0.8366450624031991, + "learning_rate": 1.443469997761754e-06, + "loss": 0.940142810344696, + "step": 3352 + }, + { + "epoch": 0.7725806451612903, + "grad_norm": 1.0048812991349738, + "learning_rate": 1.443128493887604e-06, + "loss": 0.7936829328536987, + "step": 3353 + }, + { + "epoch": 0.772811059907834, + "grad_norm": 0.8583665989338275, + "learning_rate": 1.44278692569394e-06, + "loss": 0.8369218111038208, + "step": 3354 + }, + { + "epoch": 0.7730414746543779, + "grad_norm": 1.313808566044562, + "learning_rate": 1.4424452932303398e-06, + "loss": 0.9305802583694458, + "step": 3355 + }, + { + "epoch": 0.7732718894009216, + "grad_norm": 0.8862565116465879, + "learning_rate": 1.4421035965463916e-06, + "loss": 0.913454532623291, + "step": 3356 + }, + { + "epoch": 0.7735023041474655, + "grad_norm": 1.0772806984700294, + "learning_rate": 1.4417618356916912e-06, + "loss": 0.8552114963531494, + "step": 3357 + }, + { + "epoch": 0.7737327188940092, + "grad_norm": 1.080720564237515, + "learning_rate": 1.4414200107158452e-06, + "loss": 0.8674488067626953, + "step": 3358 + }, + { + "epoch": 0.773963133640553, + "grad_norm": 1.0999604158561203, + "learning_rate": 1.441078121668469e-06, + "loss": 0.9142898321151733, + "step": 3359 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 1.0964749277789683, + "learning_rate": 1.4407361685991872e-06, + "loss": 0.8258639574050903, + "step": 3360 + }, + { + "epoch": 0.7744239631336406, + "grad_norm": 1.062716295700188, + "learning_rate": 1.4403941515576343e-06, + "loss": 0.773646354675293, + "step": 3361 + }, + { + "epoch": 0.7746543778801843, + "grad_norm": 1.1397221950146432, + "learning_rate": 1.440052070593453e-06, + "loss": 0.9481985569000244, + "step": 3362 + }, + { + "epoch": 0.7748847926267282, + "grad_norm": 1.0332478363266029, + "learning_rate": 1.4397099257562965e-06, + "loss": 0.7915977239608765, + "step": 3363 + }, + { + "epoch": 0.7751152073732719, + "grad_norm": 1.057946693927254, + "learning_rate": 1.4393677170958261e-06, + "loss": 0.887650191783905, + "step": 3364 + }, + { + "epoch": 0.7753456221198156, + "grad_norm": 0.8250912024788589, + "learning_rate": 1.4390254446617137e-06, + "loss": 0.8516546487808228, + "step": 3365 + }, + { + "epoch": 0.7755760368663595, + "grad_norm": 0.9895329351481195, + "learning_rate": 1.4386831085036386e-06, + "loss": 0.8076090812683105, + "step": 3366 + }, + { + "epoch": 0.7758064516129032, + "grad_norm": 0.9203902257484836, + "learning_rate": 1.4383407086712913e-06, + "loss": 0.7480059862136841, + "step": 3367 + }, + { + "epoch": 0.776036866359447, + "grad_norm": 1.1101542314671893, + "learning_rate": 1.4379982452143704e-06, + "loss": 0.8586190938949585, + "step": 3368 + }, + { + "epoch": 0.7762672811059907, + "grad_norm": 0.9197679868181698, + "learning_rate": 1.4376557181825842e-06, + "loss": 0.7581472396850586, + "step": 3369 + }, + { + "epoch": 0.7764976958525346, + "grad_norm": 1.2064630913320733, + "learning_rate": 1.4373131276256495e-06, + "loss": 0.7482568621635437, + "step": 3370 + }, + { + "epoch": 0.7767281105990783, + "grad_norm": 1.2204489088505164, + "learning_rate": 1.4369704735932935e-06, + "loss": 0.8822590112686157, + "step": 3371 + }, + { + "epoch": 0.7769585253456222, + "grad_norm": 0.9171528830764245, + "learning_rate": 1.4366277561352517e-06, + "loss": 0.7762279510498047, + "step": 3372 + }, + { + "epoch": 0.7771889400921659, + "grad_norm": 0.9649262790570658, + "learning_rate": 1.4362849753012692e-06, + "loss": 0.8059147596359253, + "step": 3373 + }, + { + "epoch": 0.7774193548387097, + "grad_norm": 1.0529652703364816, + "learning_rate": 1.4359421311411e-06, + "loss": 0.778538703918457, + "step": 3374 + }, + { + "epoch": 0.7776497695852534, + "grad_norm": 1.1587212424703164, + "learning_rate": 1.4355992237045077e-06, + "loss": 0.9422975778579712, + "step": 3375 + }, + { + "epoch": 0.7778801843317973, + "grad_norm": 1.0109308621512796, + "learning_rate": 1.4352562530412645e-06, + "loss": 0.7437118291854858, + "step": 3376 + }, + { + "epoch": 0.778110599078341, + "grad_norm": 0.8961203034935337, + "learning_rate": 1.4349132192011525e-06, + "loss": 0.6935930252075195, + "step": 3377 + }, + { + "epoch": 0.7783410138248847, + "grad_norm": 1.1629979064489353, + "learning_rate": 1.4345701222339628e-06, + "loss": 0.7797117829322815, + "step": 3378 + }, + { + "epoch": 0.7785714285714286, + "grad_norm": 1.0591342199366531, + "learning_rate": 1.434226962189495e-06, + "loss": 0.8795931339263916, + "step": 3379 + }, + { + "epoch": 0.7788018433179723, + "grad_norm": 1.071603440273884, + "learning_rate": 1.433883739117558e-06, + "loss": 0.8936992287635803, + "step": 3380 + }, + { + "epoch": 0.7790322580645161, + "grad_norm": 1.0412928095771106, + "learning_rate": 1.4335404530679708e-06, + "loss": 0.9142701625823975, + "step": 3381 + }, + { + "epoch": 0.7792626728110599, + "grad_norm": 1.0966643259622728, + "learning_rate": 1.4331971040905613e-06, + "loss": 0.8996907472610474, + "step": 3382 + }, + { + "epoch": 0.7794930875576037, + "grad_norm": 1.020250921022328, + "learning_rate": 1.4328536922351654e-06, + "loss": 0.9645330905914307, + "step": 3383 + }, + { + "epoch": 0.7797235023041474, + "grad_norm": 0.7173807290755059, + "learning_rate": 1.4325102175516289e-06, + "loss": 0.5122036933898926, + "step": 3384 + }, + { + "epoch": 0.7799539170506913, + "grad_norm": 0.8487864939918429, + "learning_rate": 1.432166680089807e-06, + "loss": 0.6556990742683411, + "step": 3385 + }, + { + "epoch": 0.780184331797235, + "grad_norm": 0.7980125905366343, + "learning_rate": 1.4318230798995634e-06, + "loss": 0.6642920970916748, + "step": 3386 + }, + { + "epoch": 0.7804147465437788, + "grad_norm": 1.1205844690065134, + "learning_rate": 1.4314794170307718e-06, + "loss": 0.9373915195465088, + "step": 3387 + }, + { + "epoch": 0.7806451612903226, + "grad_norm": 1.1583496011366634, + "learning_rate": 1.4311356915333139e-06, + "loss": 0.8295063972473145, + "step": 3388 + }, + { + "epoch": 0.7808755760368664, + "grad_norm": 1.0075666840710995, + "learning_rate": 1.4307919034570809e-06, + "loss": 0.8167035579681396, + "step": 3389 + }, + { + "epoch": 0.7811059907834101, + "grad_norm": 1.045465756545736, + "learning_rate": 1.4304480528519736e-06, + "loss": 0.8444087505340576, + "step": 3390 + }, + { + "epoch": 0.7813364055299539, + "grad_norm": 0.9731986846355507, + "learning_rate": 1.4301041397679012e-06, + "loss": 0.7753941416740417, + "step": 3391 + }, + { + "epoch": 0.7815668202764977, + "grad_norm": 1.0117493931274548, + "learning_rate": 1.4297601642547824e-06, + "loss": 0.7885915040969849, + "step": 3392 + }, + { + "epoch": 0.7817972350230414, + "grad_norm": 0.9902641403084854, + "learning_rate": 1.4294161263625444e-06, + "loss": 0.730733335018158, + "step": 3393 + }, + { + "epoch": 0.7820276497695853, + "grad_norm": 0.8781208509199174, + "learning_rate": 1.4290720261411241e-06, + "loss": 0.8505427837371826, + "step": 3394 + }, + { + "epoch": 0.782258064516129, + "grad_norm": 0.9435888376510791, + "learning_rate": 1.4287278636404676e-06, + "loss": 0.7370787858963013, + "step": 3395 + }, + { + "epoch": 0.7824884792626728, + "grad_norm": 0.8683550268652552, + "learning_rate": 1.428383638910529e-06, + "loss": 0.6776250600814819, + "step": 3396 + }, + { + "epoch": 0.7827188940092166, + "grad_norm": 1.158711583120319, + "learning_rate": 1.4280393520012726e-06, + "loss": 0.8878101706504822, + "step": 3397 + }, + { + "epoch": 0.7829493087557604, + "grad_norm": 1.0028929146104306, + "learning_rate": 1.427695002962671e-06, + "loss": 0.789238691329956, + "step": 3398 + }, + { + "epoch": 0.7831797235023041, + "grad_norm": 1.0382561381902518, + "learning_rate": 1.4273505918447052e-06, + "loss": 0.772524356842041, + "step": 3399 + }, + { + "epoch": 0.783410138248848, + "grad_norm": 0.8483839499127978, + "learning_rate": 1.4270061186973673e-06, + "loss": 0.682374119758606, + "step": 3400 + }, + { + "epoch": 0.7836405529953917, + "grad_norm": 0.9396222987314208, + "learning_rate": 1.4266615835706566e-06, + "loss": 0.874775767326355, + "step": 3401 + }, + { + "epoch": 0.7838709677419354, + "grad_norm": 1.3780294752863322, + "learning_rate": 1.4263169865145816e-06, + "loss": 0.9141736626625061, + "step": 3402 + }, + { + "epoch": 0.7841013824884793, + "grad_norm": 1.0849695477918648, + "learning_rate": 1.4259723275791603e-06, + "loss": 0.8533145189285278, + "step": 3403 + }, + { + "epoch": 0.784331797235023, + "grad_norm": 0.9340136683520418, + "learning_rate": 1.4256276068144198e-06, + "loss": 0.7920266389846802, + "step": 3404 + }, + { + "epoch": 0.7845622119815668, + "grad_norm": 0.9462841256440514, + "learning_rate": 1.4252828242703957e-06, + "loss": 0.7822731733322144, + "step": 3405 + }, + { + "epoch": 0.7847926267281106, + "grad_norm": 0.9890597976168253, + "learning_rate": 1.4249379799971324e-06, + "loss": 0.7103791832923889, + "step": 3406 + }, + { + "epoch": 0.7850230414746544, + "grad_norm": 1.0298833059227221, + "learning_rate": 1.4245930740446841e-06, + "loss": 0.7857639789581299, + "step": 3407 + }, + { + "epoch": 0.7852534562211981, + "grad_norm": 1.1065594183312877, + "learning_rate": 1.4242481064631134e-06, + "loss": 0.8069730997085571, + "step": 3408 + }, + { + "epoch": 0.785483870967742, + "grad_norm": 1.0472042802008708, + "learning_rate": 1.4239030773024912e-06, + "loss": 0.8758031129837036, + "step": 3409 + }, + { + "epoch": 0.7857142857142857, + "grad_norm": 1.015785019886056, + "learning_rate": 1.4235579866128983e-06, + "loss": 0.895712673664093, + "step": 3410 + }, + { + "epoch": 0.7859447004608295, + "grad_norm": 0.9442660407745113, + "learning_rate": 1.423212834444425e-06, + "loss": 0.7904561758041382, + "step": 3411 + }, + { + "epoch": 0.7861751152073733, + "grad_norm": 1.0957623852355893, + "learning_rate": 1.4228676208471685e-06, + "loss": 0.9322203993797302, + "step": 3412 + }, + { + "epoch": 0.7864055299539171, + "grad_norm": 0.7668753687506044, + "learning_rate": 1.422522345871237e-06, + "loss": 0.9693628549575806, + "step": 3413 + }, + { + "epoch": 0.7866359447004608, + "grad_norm": 0.8417164970136307, + "learning_rate": 1.4221770095667462e-06, + "loss": 0.6737014651298523, + "step": 3414 + }, + { + "epoch": 0.7868663594470046, + "grad_norm": 1.1466654292657967, + "learning_rate": 1.4218316119838215e-06, + "loss": 0.8682050108909607, + "step": 3415 + }, + { + "epoch": 0.7870967741935484, + "grad_norm": 1.058324160083765, + "learning_rate": 1.4214861531725966e-06, + "loss": 0.7920347452163696, + "step": 3416 + }, + { + "epoch": 0.7873271889400921, + "grad_norm": 1.0147867893383273, + "learning_rate": 1.4211406331832144e-06, + "loss": 0.8330510854721069, + "step": 3417 + }, + { + "epoch": 0.787557603686636, + "grad_norm": 0.8802491842183522, + "learning_rate": 1.4207950520658272e-06, + "loss": 0.8314074873924255, + "step": 3418 + }, + { + "epoch": 0.7877880184331797, + "grad_norm": 1.069355954495663, + "learning_rate": 1.420449409870595e-06, + "loss": 0.7045331001281738, + "step": 3419 + }, + { + "epoch": 0.7880184331797235, + "grad_norm": 0.9484390721895568, + "learning_rate": 1.4201037066476876e-06, + "loss": 0.7825411558151245, + "step": 3420 + }, + { + "epoch": 0.7882488479262673, + "grad_norm": 0.86611108370867, + "learning_rate": 1.4197579424472834e-06, + "loss": 0.6960075497627258, + "step": 3421 + }, + { + "epoch": 0.7884792626728111, + "grad_norm": 1.038692849963906, + "learning_rate": 1.4194121173195694e-06, + "loss": 0.8366748094558716, + "step": 3422 + }, + { + "epoch": 0.7887096774193548, + "grad_norm": 0.8605441828045868, + "learning_rate": 1.4190662313147419e-06, + "loss": 0.8859039545059204, + "step": 3423 + }, + { + "epoch": 0.7889400921658987, + "grad_norm": 1.0572382908005622, + "learning_rate": 1.4187202844830057e-06, + "loss": 0.7098245620727539, + "step": 3424 + }, + { + "epoch": 0.7891705069124424, + "grad_norm": 0.9126448008384304, + "learning_rate": 1.4183742768745743e-06, + "loss": 0.7410455942153931, + "step": 3425 + }, + { + "epoch": 0.7894009216589861, + "grad_norm": 0.8007200450015498, + "learning_rate": 1.4180282085396706e-06, + "loss": 0.7414010763168335, + "step": 3426 + }, + { + "epoch": 0.78963133640553, + "grad_norm": 1.090062212374054, + "learning_rate": 1.417682079528526e-06, + "loss": 0.9043526649475098, + "step": 3427 + }, + { + "epoch": 0.7898617511520737, + "grad_norm": 0.8510201071166715, + "learning_rate": 1.4173358898913804e-06, + "loss": 0.7709499597549438, + "step": 3428 + }, + { + "epoch": 0.7900921658986175, + "grad_norm": 1.0829385459770577, + "learning_rate": 1.416989639678483e-06, + "loss": 0.7499940395355225, + "step": 3429 + }, + { + "epoch": 0.7903225806451613, + "grad_norm": 0.766744185733082, + "learning_rate": 1.4166433289400911e-06, + "loss": 0.7401680946350098, + "step": 3430 + }, + { + "epoch": 0.7905529953917051, + "grad_norm": 0.8802012939982503, + "learning_rate": 1.4162969577264718e-06, + "loss": 1.0132567882537842, + "step": 3431 + }, + { + "epoch": 0.7907834101382488, + "grad_norm": 0.9758763490715631, + "learning_rate": 1.4159505260879004e-06, + "loss": 0.8438389301300049, + "step": 3432 + }, + { + "epoch": 0.7910138248847927, + "grad_norm": 1.2075583274029744, + "learning_rate": 1.4156040340746603e-06, + "loss": 0.9149703979492188, + "step": 3433 + }, + { + "epoch": 0.7912442396313364, + "grad_norm": 1.4960555955584764, + "learning_rate": 1.4152574817370451e-06, + "loss": 0.9141047596931458, + "step": 3434 + }, + { + "epoch": 0.7914746543778802, + "grad_norm": 0.924125511762228, + "learning_rate": 1.414910869125356e-06, + "loss": 0.6896570324897766, + "step": 3435 + }, + { + "epoch": 0.791705069124424, + "grad_norm": 0.9277571830040596, + "learning_rate": 1.4145641962899035e-06, + "loss": 0.742916464805603, + "step": 3436 + }, + { + "epoch": 0.7919354838709678, + "grad_norm": 1.0041274553911197, + "learning_rate": 1.414217463281007e-06, + "loss": 0.9315029382705688, + "step": 3437 + }, + { + "epoch": 0.7921658986175115, + "grad_norm": 0.9532695013501692, + "learning_rate": 1.4138706701489942e-06, + "loss": 0.7645175457000732, + "step": 3438 + }, + { + "epoch": 0.7923963133640552, + "grad_norm": 1.0166687927137474, + "learning_rate": 1.413523816944201e-06, + "loss": 0.8253934383392334, + "step": 3439 + }, + { + "epoch": 0.7926267281105991, + "grad_norm": 1.055807296618818, + "learning_rate": 1.4131769037169736e-06, + "loss": 0.8650136590003967, + "step": 3440 + }, + { + "epoch": 0.7928571428571428, + "grad_norm": 1.0239985264965783, + "learning_rate": 1.4128299305176654e-06, + "loss": 0.7453975677490234, + "step": 3441 + }, + { + "epoch": 0.7930875576036867, + "grad_norm": 1.1689392671270256, + "learning_rate": 1.4124828973966392e-06, + "loss": 0.9121813774108887, + "step": 3442 + }, + { + "epoch": 0.7933179723502304, + "grad_norm": 1.16007005259146, + "learning_rate": 1.4121358044042667e-06, + "loss": 0.9097952842712402, + "step": 3443 + }, + { + "epoch": 0.7935483870967742, + "grad_norm": 0.9263687778783555, + "learning_rate": 1.4117886515909277e-06, + "loss": 0.7185770273208618, + "step": 3444 + }, + { + "epoch": 0.793778801843318, + "grad_norm": 0.9816189958888628, + "learning_rate": 1.4114414390070111e-06, + "loss": 0.8192715644836426, + "step": 3445 + }, + { + "epoch": 0.7940092165898618, + "grad_norm": 0.8830372557771754, + "learning_rate": 1.4110941667029143e-06, + "loss": 0.7864251136779785, + "step": 3446 + }, + { + "epoch": 0.7942396313364055, + "grad_norm": 0.9262266668392852, + "learning_rate": 1.4107468347290431e-06, + "loss": 0.7433357834815979, + "step": 3447 + }, + { + "epoch": 0.7944700460829494, + "grad_norm": 0.8826486406616629, + "learning_rate": 1.4103994431358133e-06, + "loss": 0.8196350336074829, + "step": 3448 + }, + { + "epoch": 0.7947004608294931, + "grad_norm": 1.0379031741076927, + "learning_rate": 1.410051991973647e-06, + "loss": 0.7698987126350403, + "step": 3449 + }, + { + "epoch": 0.7949308755760369, + "grad_norm": 1.228700210939763, + "learning_rate": 1.4097044812929776e-06, + "loss": 0.9404128789901733, + "step": 3450 + }, + { + "epoch": 0.7951612903225806, + "grad_norm": 0.9114628140508482, + "learning_rate": 1.4093569111442443e-06, + "loss": 0.827290952205658, + "step": 3451 + }, + { + "epoch": 0.7953917050691244, + "grad_norm": 1.0612294009838623, + "learning_rate": 1.4090092815778976e-06, + "loss": 0.8126389384269714, + "step": 3452 + }, + { + "epoch": 0.7956221198156682, + "grad_norm": 0.9598694992596972, + "learning_rate": 1.4086615926443953e-06, + "loss": 0.7439650297164917, + "step": 3453 + }, + { + "epoch": 0.7958525345622119, + "grad_norm": 0.9952168701899716, + "learning_rate": 1.4083138443942036e-06, + "loss": 0.7505590915679932, + "step": 3454 + }, + { + "epoch": 0.7960829493087558, + "grad_norm": 0.8299073365871691, + "learning_rate": 1.407966036877798e-06, + "loss": 0.7070168256759644, + "step": 3455 + }, + { + "epoch": 0.7963133640552995, + "grad_norm": 0.9422601313607071, + "learning_rate": 1.4076181701456623e-06, + "loss": 0.8271987438201904, + "step": 3456 + }, + { + "epoch": 0.7965437788018433, + "grad_norm": 0.8558890366072001, + "learning_rate": 1.4072702442482886e-06, + "loss": 0.72886061668396, + "step": 3457 + }, + { + "epoch": 0.7967741935483871, + "grad_norm": 1.1355616522222822, + "learning_rate": 1.4069222592361784e-06, + "loss": 0.838603138923645, + "step": 3458 + }, + { + "epoch": 0.7970046082949309, + "grad_norm": 1.1314183210174298, + "learning_rate": 1.4065742151598408e-06, + "loss": 0.9829634428024292, + "step": 3459 + }, + { + "epoch": 0.7972350230414746, + "grad_norm": 1.0528251173572156, + "learning_rate": 1.406226112069794e-06, + "loss": 0.8269632458686829, + "step": 3460 + }, + { + "epoch": 0.7974654377880185, + "grad_norm": 1.0290510208624037, + "learning_rate": 1.405877950016565e-06, + "loss": 0.7234654426574707, + "step": 3461 + }, + { + "epoch": 0.7976958525345622, + "grad_norm": 0.89079385428478, + "learning_rate": 1.4055297290506887e-06, + "loss": 0.7843908071517944, + "step": 3462 + }, + { + "epoch": 0.7979262672811059, + "grad_norm": 0.8247890912721374, + "learning_rate": 1.4051814492227094e-06, + "loss": 0.7294371128082275, + "step": 3463 + }, + { + "epoch": 0.7981566820276498, + "grad_norm": 1.1727486785997119, + "learning_rate": 1.4048331105831787e-06, + "loss": 0.8805780410766602, + "step": 3464 + }, + { + "epoch": 0.7983870967741935, + "grad_norm": 0.9922079942807702, + "learning_rate": 1.404484713182658e-06, + "loss": 0.6933708190917969, + "step": 3465 + }, + { + "epoch": 0.7986175115207373, + "grad_norm": 1.0638183747733119, + "learning_rate": 1.404136257071717e-06, + "loss": 0.8720458745956421, + "step": 3466 + }, + { + "epoch": 0.7988479262672811, + "grad_norm": 1.1404138575251217, + "learning_rate": 1.403787742300933e-06, + "loss": 0.7675988674163818, + "step": 3467 + }, + { + "epoch": 0.7990783410138249, + "grad_norm": 1.0188982193786602, + "learning_rate": 1.403439168920893e-06, + "loss": 0.7630051374435425, + "step": 3468 + }, + { + "epoch": 0.7993087557603686, + "grad_norm": 0.9607713149142998, + "learning_rate": 1.4030905369821914e-06, + "loss": 0.9195173978805542, + "step": 3469 + }, + { + "epoch": 0.7995391705069125, + "grad_norm": 0.966603725031027, + "learning_rate": 1.402741846535432e-06, + "loss": 0.9347431659698486, + "step": 3470 + }, + { + "epoch": 0.7997695852534562, + "grad_norm": 1.0423944793385256, + "learning_rate": 1.4023930976312271e-06, + "loss": 0.7812551259994507, + "step": 3471 + }, + { + "epoch": 0.8, + "grad_norm": 1.0230073164776583, + "learning_rate": 1.4020442903201963e-06, + "loss": 0.7655330896377563, + "step": 3472 + }, + { + "epoch": 0.8002304147465438, + "grad_norm": 1.2791975931288466, + "learning_rate": 1.4016954246529694e-06, + "loss": 0.7543904185295105, + "step": 3473 + }, + { + "epoch": 0.8004608294930876, + "grad_norm": 0.8246426244987128, + "learning_rate": 1.4013465006801833e-06, + "loss": 0.9343980550765991, + "step": 3474 + }, + { + "epoch": 0.8006912442396313, + "grad_norm": 1.1458439395589735, + "learning_rate": 1.4009975184524838e-06, + "loss": 0.7366182208061218, + "step": 3475 + }, + { + "epoch": 0.8009216589861751, + "grad_norm": 1.0109168818205314, + "learning_rate": 1.4006484780205254e-06, + "loss": 0.7028899192810059, + "step": 3476 + }, + { + "epoch": 0.8011520737327189, + "grad_norm": 1.1092959183189253, + "learning_rate": 1.4002993794349708e-06, + "loss": 0.9259153604507446, + "step": 3477 + }, + { + "epoch": 0.8013824884792626, + "grad_norm": 1.091442085001374, + "learning_rate": 1.3999502227464914e-06, + "loss": 0.7263842225074768, + "step": 3478 + }, + { + "epoch": 0.8016129032258065, + "grad_norm": 0.9964781390280828, + "learning_rate": 1.3996010080057664e-06, + "loss": 0.8177748918533325, + "step": 3479 + }, + { + "epoch": 0.8018433179723502, + "grad_norm": 1.080145531043834, + "learning_rate": 1.3992517352634842e-06, + "loss": 0.8526895046234131, + "step": 3480 + }, + { + "epoch": 0.802073732718894, + "grad_norm": 1.031018616296166, + "learning_rate": 1.398902404570341e-06, + "loss": 0.7914575338363647, + "step": 3481 + }, + { + "epoch": 0.8023041474654378, + "grad_norm": 0.816157508913072, + "learning_rate": 1.398553015977042e-06, + "loss": 0.7546013593673706, + "step": 3482 + }, + { + "epoch": 0.8025345622119816, + "grad_norm": 1.0408293581677805, + "learning_rate": 1.3982035695343005e-06, + "loss": 0.7250038385391235, + "step": 3483 + }, + { + "epoch": 0.8027649769585253, + "grad_norm": 1.023275477136697, + "learning_rate": 1.3978540652928376e-06, + "loss": 0.8650141954421997, + "step": 3484 + }, + { + "epoch": 0.8029953917050692, + "grad_norm": 0.9633891302798026, + "learning_rate": 1.3975045033033838e-06, + "loss": 0.8020066022872925, + "step": 3485 + }, + { + "epoch": 0.8032258064516129, + "grad_norm": 0.9146174916063312, + "learning_rate": 1.3971548836166782e-06, + "loss": 0.7376772165298462, + "step": 3486 + }, + { + "epoch": 0.8034562211981566, + "grad_norm": 0.9278800283054291, + "learning_rate": 1.3968052062834665e-06, + "loss": 0.8440769910812378, + "step": 3487 + }, + { + "epoch": 0.8036866359447005, + "grad_norm": 0.8964312010034259, + "learning_rate": 1.3964554713545047e-06, + "loss": 0.7886836528778076, + "step": 3488 + }, + { + "epoch": 0.8039170506912442, + "grad_norm": 0.9177920963823754, + "learning_rate": 1.396105678880556e-06, + "loss": 0.9167575836181641, + "step": 3489 + }, + { + "epoch": 0.804147465437788, + "grad_norm": 0.8367032180339474, + "learning_rate": 1.3957558289123922e-06, + "loss": 0.6761677861213684, + "step": 3490 + }, + { + "epoch": 0.8043778801843318, + "grad_norm": 0.9716984065235628, + "learning_rate": 1.3954059215007938e-06, + "loss": 0.7775592803955078, + "step": 3491 + }, + { + "epoch": 0.8046082949308756, + "grad_norm": 1.00005526663364, + "learning_rate": 1.3950559566965494e-06, + "loss": 0.8127217292785645, + "step": 3492 + }, + { + "epoch": 0.8048387096774193, + "grad_norm": 1.007116682040637, + "learning_rate": 1.394705934550456e-06, + "loss": 0.8134229779243469, + "step": 3493 + }, + { + "epoch": 0.8050691244239632, + "grad_norm": 1.3224030787110577, + "learning_rate": 1.3943558551133186e-06, + "loss": 0.8853167295455933, + "step": 3494 + }, + { + "epoch": 0.8052995391705069, + "grad_norm": 1.0544152264027669, + "learning_rate": 1.3940057184359506e-06, + "loss": 0.8024332523345947, + "step": 3495 + }, + { + "epoch": 0.8055299539170507, + "grad_norm": 0.6779010833647611, + "learning_rate": 1.3936555245691745e-06, + "loss": 0.7581099271774292, + "step": 3496 + }, + { + "epoch": 0.8057603686635945, + "grad_norm": 1.0509729333579008, + "learning_rate": 1.3933052735638203e-06, + "loss": 0.979412317276001, + "step": 3497 + }, + { + "epoch": 0.8059907834101383, + "grad_norm": 0.9816833973848147, + "learning_rate": 1.392954965470726e-06, + "loss": 0.7917830944061279, + "step": 3498 + }, + { + "epoch": 0.806221198156682, + "grad_norm": 0.9622725908619084, + "learning_rate": 1.392604600340739e-06, + "loss": 0.8565326929092407, + "step": 3499 + }, + { + "epoch": 0.8064516129032258, + "grad_norm": 1.0170451339424116, + "learning_rate": 1.3922541782247136e-06, + "loss": 0.7276358604431152, + "step": 3500 + }, + { + "epoch": 0.8066820276497696, + "grad_norm": 0.8351645839157906, + "learning_rate": 1.3919036991735138e-06, + "loss": 0.734528660774231, + "step": 3501 + }, + { + "epoch": 0.8069124423963133, + "grad_norm": 1.1746648423168138, + "learning_rate": 1.391553163238011e-06, + "loss": 0.8786039352416992, + "step": 3502 + }, + { + "epoch": 0.8071428571428572, + "grad_norm": 1.1050955424788658, + "learning_rate": 1.3912025704690844e-06, + "loss": 0.9509482383728027, + "step": 3503 + }, + { + "epoch": 0.8073732718894009, + "grad_norm": 0.8741751886687131, + "learning_rate": 1.3908519209176225e-06, + "loss": 0.7188615202903748, + "step": 3504 + }, + { + "epoch": 0.8076036866359447, + "grad_norm": 1.0307846021250762, + "learning_rate": 1.3905012146345221e-06, + "loss": 0.7681115865707397, + "step": 3505 + }, + { + "epoch": 0.8078341013824885, + "grad_norm": 1.0988034793572021, + "learning_rate": 1.3901504516706874e-06, + "loss": 0.8835415840148926, + "step": 3506 + }, + { + "epoch": 0.8080645161290323, + "grad_norm": 1.0724177836810997, + "learning_rate": 1.389799632077031e-06, + "loss": 0.8179003000259399, + "step": 3507 + }, + { + "epoch": 0.808294930875576, + "grad_norm": 1.1244187286361234, + "learning_rate": 1.3894487559044742e-06, + "loss": 0.9690247774124146, + "step": 3508 + }, + { + "epoch": 0.8085253456221199, + "grad_norm": 0.9601740737567672, + "learning_rate": 1.389097823203946e-06, + "loss": 0.9759812951087952, + "step": 3509 + }, + { + "epoch": 0.8087557603686636, + "grad_norm": 0.8953376224758026, + "learning_rate": 1.3887468340263838e-06, + "loss": 0.6649112105369568, + "step": 3510 + }, + { + "epoch": 0.8089861751152074, + "grad_norm": 0.8803647716437188, + "learning_rate": 1.388395788422733e-06, + "loss": 0.7824583053588867, + "step": 3511 + }, + { + "epoch": 0.8092165898617512, + "grad_norm": 1.0776551292843717, + "learning_rate": 1.3880446864439482e-06, + "loss": 0.8226176500320435, + "step": 3512 + }, + { + "epoch": 0.8094470046082949, + "grad_norm": 1.0775758718001336, + "learning_rate": 1.3876935281409904e-06, + "loss": 0.7708876729011536, + "step": 3513 + }, + { + "epoch": 0.8096774193548387, + "grad_norm": 1.1275141981575327, + "learning_rate": 1.3873423135648303e-06, + "loss": 0.7162825465202332, + "step": 3514 + }, + { + "epoch": 0.8099078341013825, + "grad_norm": 1.1973823780619761, + "learning_rate": 1.3869910427664464e-06, + "loss": 0.815816342830658, + "step": 3515 + }, + { + "epoch": 0.8101382488479263, + "grad_norm": 1.0491570029475803, + "learning_rate": 1.3866397157968248e-06, + "loss": 0.9166251420974731, + "step": 3516 + }, + { + "epoch": 0.81036866359447, + "grad_norm": 1.185963303947227, + "learning_rate": 1.3862883327069606e-06, + "loss": 0.9193897843360901, + "step": 3517 + }, + { + "epoch": 0.8105990783410139, + "grad_norm": 1.1492579516601074, + "learning_rate": 1.3859368935478557e-06, + "loss": 0.9019489288330078, + "step": 3518 + }, + { + "epoch": 0.8108294930875576, + "grad_norm": 1.0706438739080621, + "learning_rate": 1.3855853983705222e-06, + "loss": 0.8616153597831726, + "step": 3519 + }, + { + "epoch": 0.8110599078341014, + "grad_norm": 0.9368530229676858, + "learning_rate": 1.3852338472259782e-06, + "loss": 0.8898462057113647, + "step": 3520 + }, + { + "epoch": 0.8112903225806452, + "grad_norm": 0.9891797921278073, + "learning_rate": 1.3848822401652513e-06, + "loss": 0.770263135433197, + "step": 3521 + }, + { + "epoch": 0.811520737327189, + "grad_norm": 0.950594228231774, + "learning_rate": 1.384530577239377e-06, + "loss": 0.7524563074111938, + "step": 3522 + }, + { + "epoch": 0.8117511520737327, + "grad_norm": 0.8975349550091929, + "learning_rate": 1.3841788584993981e-06, + "loss": 0.776715874671936, + "step": 3523 + }, + { + "epoch": 0.8119815668202764, + "grad_norm": 0.6412822466784485, + "learning_rate": 1.3838270839963666e-06, + "loss": 0.7165439128875732, + "step": 3524 + }, + { + "epoch": 0.8122119815668203, + "grad_norm": 1.0082147827954213, + "learning_rate": 1.383475253781342e-06, + "loss": 0.7641004323959351, + "step": 3525 + }, + { + "epoch": 0.812442396313364, + "grad_norm": 0.9278762834298543, + "learning_rate": 1.3831233679053921e-06, + "loss": 0.7493933439254761, + "step": 3526 + }, + { + "epoch": 0.8126728110599079, + "grad_norm": 1.1064599998463516, + "learning_rate": 1.3827714264195924e-06, + "loss": 0.7981607913970947, + "step": 3527 + }, + { + "epoch": 0.8129032258064516, + "grad_norm": 1.2555949352929368, + "learning_rate": 1.3824194293750272e-06, + "loss": 0.9130103588104248, + "step": 3528 + }, + { + "epoch": 0.8131336405529954, + "grad_norm": 1.0192840808161379, + "learning_rate": 1.3820673768227878e-06, + "loss": 0.7208644151687622, + "step": 3529 + }, + { + "epoch": 0.8133640552995391, + "grad_norm": 0.9880323858602741, + "learning_rate": 1.3817152688139745e-06, + "loss": 0.9134006500244141, + "step": 3530 + }, + { + "epoch": 0.813594470046083, + "grad_norm": 0.836575472485664, + "learning_rate": 1.381363105399695e-06, + "loss": 0.7383376359939575, + "step": 3531 + }, + { + "epoch": 0.8138248847926267, + "grad_norm": 1.4743208995655537, + "learning_rate": 1.381010886631066e-06, + "loss": 0.9143035411834717, + "step": 3532 + }, + { + "epoch": 0.8140552995391706, + "grad_norm": 0.8030889519622723, + "learning_rate": 1.3806586125592107e-06, + "loss": 0.7972506284713745, + "step": 3533 + }, + { + "epoch": 0.8142857142857143, + "grad_norm": 0.9706054308316248, + "learning_rate": 1.380306283235262e-06, + "loss": 0.8999859094619751, + "step": 3534 + }, + { + "epoch": 0.8145161290322581, + "grad_norm": 1.4136312048518, + "learning_rate": 1.37995389871036e-06, + "loss": 0.7759672999382019, + "step": 3535 + }, + { + "epoch": 0.8147465437788018, + "grad_norm": 0.8852561621502252, + "learning_rate": 1.3796014590356522e-06, + "loss": 0.7915023565292358, + "step": 3536 + }, + { + "epoch": 0.8149769585253456, + "grad_norm": 1.0626460640648143, + "learning_rate": 1.3792489642622956e-06, + "loss": 0.8259623050689697, + "step": 3537 + }, + { + "epoch": 0.8152073732718894, + "grad_norm": 0.9193643373115533, + "learning_rate": 1.3788964144414534e-06, + "loss": 0.7786526679992676, + "step": 3538 + }, + { + "epoch": 0.8154377880184331, + "grad_norm": 0.8743120056652736, + "learning_rate": 1.3785438096242987e-06, + "loss": 0.8655314445495605, + "step": 3539 + }, + { + "epoch": 0.815668202764977, + "grad_norm": 1.073925215345039, + "learning_rate": 1.3781911498620108e-06, + "loss": 0.8116016387939453, + "step": 3540 + }, + { + "epoch": 0.8158986175115207, + "grad_norm": 1.07781870851745, + "learning_rate": 1.3778384352057781e-06, + "loss": 0.712907075881958, + "step": 3541 + }, + { + "epoch": 0.8161290322580645, + "grad_norm": 0.9419481549244654, + "learning_rate": 1.377485665706797e-06, + "loss": 0.8271318674087524, + "step": 3542 + }, + { + "epoch": 0.8163594470046083, + "grad_norm": 1.231349694992367, + "learning_rate": 1.3771328414162713e-06, + "loss": 0.9161353707313538, + "step": 3543 + }, + { + "epoch": 0.8165898617511521, + "grad_norm": 1.1900246832578463, + "learning_rate": 1.3767799623854125e-06, + "loss": 0.9555908441543579, + "step": 3544 + }, + { + "epoch": 0.8168202764976958, + "grad_norm": 0.9121338000164769, + "learning_rate": 1.3764270286654414e-06, + "loss": 0.7863249778747559, + "step": 3545 + }, + { + "epoch": 0.8170506912442397, + "grad_norm": 1.0362996056258458, + "learning_rate": 1.3760740403075853e-06, + "loss": 0.9086883068084717, + "step": 3546 + }, + { + "epoch": 0.8172811059907834, + "grad_norm": 0.9211768991499883, + "learning_rate": 1.37572099736308e-06, + "loss": 0.6231412887573242, + "step": 3547 + }, + { + "epoch": 0.8175115207373271, + "grad_norm": 0.94903309328564, + "learning_rate": 1.3753678998831692e-06, + "loss": 0.8221716284751892, + "step": 3548 + }, + { + "epoch": 0.817741935483871, + "grad_norm": 1.0641797094094223, + "learning_rate": 1.375014747919105e-06, + "loss": 0.8077783584594727, + "step": 3549 + }, + { + "epoch": 0.8179723502304147, + "grad_norm": 1.0675643850007648, + "learning_rate": 1.3746615415221463e-06, + "loss": 0.6882060766220093, + "step": 3550 + }, + { + "epoch": 0.8182027649769585, + "grad_norm": 0.8393670588117293, + "learning_rate": 1.3743082807435614e-06, + "loss": 0.700161337852478, + "step": 3551 + }, + { + "epoch": 0.8184331797235023, + "grad_norm": 0.8856084645963668, + "learning_rate": 1.3739549656346243e-06, + "loss": 0.737981915473938, + "step": 3552 + }, + { + "epoch": 0.8186635944700461, + "grad_norm": 0.8562104816360829, + "learning_rate": 1.3736015962466193e-06, + "loss": 0.8025717735290527, + "step": 3553 + }, + { + "epoch": 0.8188940092165898, + "grad_norm": 1.1233745076434911, + "learning_rate": 1.3732481726308372e-06, + "loss": 0.8855722546577454, + "step": 3554 + }, + { + "epoch": 0.8191244239631337, + "grad_norm": 1.2861487220187957, + "learning_rate": 1.3728946948385768e-06, + "loss": 0.819130539894104, + "step": 3555 + }, + { + "epoch": 0.8193548387096774, + "grad_norm": 1.086213399760416, + "learning_rate": 1.3725411629211454e-06, + "loss": 0.8419625759124756, + "step": 3556 + }, + { + "epoch": 0.8195852534562212, + "grad_norm": 0.8659477904111433, + "learning_rate": 1.3721875769298575e-06, + "loss": 0.8478890657424927, + "step": 3557 + }, + { + "epoch": 0.819815668202765, + "grad_norm": 0.9446742102947047, + "learning_rate": 1.371833936916035e-06, + "loss": 0.8654077053070068, + "step": 3558 + }, + { + "epoch": 0.8200460829493088, + "grad_norm": 1.132873117876266, + "learning_rate": 1.371480242931009e-06, + "loss": 0.8898686170578003, + "step": 3559 + }, + { + "epoch": 0.8202764976958525, + "grad_norm": 1.0419861877874252, + "learning_rate": 1.3711264950261176e-06, + "loss": 0.873773455619812, + "step": 3560 + }, + { + "epoch": 0.8205069124423963, + "grad_norm": 0.8068261635969198, + "learning_rate": 1.3707726932527068e-06, + "loss": 0.6323572397232056, + "step": 3561 + }, + { + "epoch": 0.8207373271889401, + "grad_norm": 1.1038849604905803, + "learning_rate": 1.3704188376621304e-06, + "loss": 0.7018281817436218, + "step": 3562 + }, + { + "epoch": 0.8209677419354838, + "grad_norm": 1.084497532058705, + "learning_rate": 1.37006492830575e-06, + "loss": 0.8052775859832764, + "step": 3563 + }, + { + "epoch": 0.8211981566820277, + "grad_norm": 1.0795040103988192, + "learning_rate": 1.3697109652349352e-06, + "loss": 0.8057233095169067, + "step": 3564 + }, + { + "epoch": 0.8214285714285714, + "grad_norm": 1.1240440402053398, + "learning_rate": 1.3693569485010633e-06, + "loss": 0.8647899627685547, + "step": 3565 + }, + { + "epoch": 0.8216589861751152, + "grad_norm": 0.9167509343069911, + "learning_rate": 1.369002878155519e-06, + "loss": 0.8022265434265137, + "step": 3566 + }, + { + "epoch": 0.821889400921659, + "grad_norm": 1.0569217144551386, + "learning_rate": 1.368648754249696e-06, + "loss": 0.8534140586853027, + "step": 3567 + }, + { + "epoch": 0.8221198156682028, + "grad_norm": 1.1336199597215886, + "learning_rate": 1.3682945768349935e-06, + "loss": 0.905183732509613, + "step": 3568 + }, + { + "epoch": 0.8223502304147465, + "grad_norm": 1.0114816874699049, + "learning_rate": 1.3679403459628215e-06, + "loss": 0.6096831560134888, + "step": 3569 + }, + { + "epoch": 0.8225806451612904, + "grad_norm": 1.0433167842442863, + "learning_rate": 1.367586061684595e-06, + "loss": 0.7220188975334167, + "step": 3570 + }, + { + "epoch": 0.8228110599078341, + "grad_norm": 1.2434665139770538, + "learning_rate": 1.3672317240517386e-06, + "loss": 0.8028903007507324, + "step": 3571 + }, + { + "epoch": 0.8230414746543778, + "grad_norm": 0.8999816334081224, + "learning_rate": 1.3668773331156831e-06, + "loss": 0.8121141791343689, + "step": 3572 + }, + { + "epoch": 0.8232718894009217, + "grad_norm": 0.9985064007808814, + "learning_rate": 1.3665228889278687e-06, + "loss": 0.8259282112121582, + "step": 3573 + }, + { + "epoch": 0.8235023041474654, + "grad_norm": 1.0492496227314838, + "learning_rate": 1.3661683915397423e-06, + "loss": 0.9356029033660889, + "step": 3574 + }, + { + "epoch": 0.8237327188940092, + "grad_norm": 0.9103215470779688, + "learning_rate": 1.3658138410027582e-06, + "loss": 0.738788366317749, + "step": 3575 + }, + { + "epoch": 0.823963133640553, + "grad_norm": 0.9813034370683628, + "learning_rate": 1.3654592373683794e-06, + "loss": 0.7775605320930481, + "step": 3576 + }, + { + "epoch": 0.8241935483870968, + "grad_norm": 1.0650813981062164, + "learning_rate": 1.3651045806880766e-06, + "loss": 0.7645376324653625, + "step": 3577 + }, + { + "epoch": 0.8244239631336405, + "grad_norm": 0.9731809944135928, + "learning_rate": 1.3647498710133272e-06, + "loss": 0.7713958024978638, + "step": 3578 + }, + { + "epoch": 0.8246543778801844, + "grad_norm": 1.148498187573576, + "learning_rate": 1.3643951083956165e-06, + "loss": 0.6920947432518005, + "step": 3579 + }, + { + "epoch": 0.8248847926267281, + "grad_norm": 0.8263814798727009, + "learning_rate": 1.3640402928864382e-06, + "loss": 0.7108405828475952, + "step": 3580 + }, + { + "epoch": 0.8251152073732719, + "grad_norm": 1.0141959867722847, + "learning_rate": 1.3636854245372936e-06, + "loss": 0.7879295945167542, + "step": 3581 + }, + { + "epoch": 0.8253456221198157, + "grad_norm": 0.8796188222287911, + "learning_rate": 1.3633305033996909e-06, + "loss": 0.8173119425773621, + "step": 3582 + }, + { + "epoch": 0.8255760368663595, + "grad_norm": 1.230625652029921, + "learning_rate": 1.3629755295251466e-06, + "loss": 0.8530454635620117, + "step": 3583 + }, + { + "epoch": 0.8258064516129032, + "grad_norm": 0.7851178128331011, + "learning_rate": 1.3626205029651846e-06, + "loss": 0.7749553918838501, + "step": 3584 + }, + { + "epoch": 0.826036866359447, + "grad_norm": 0.9879629515788971, + "learning_rate": 1.362265423771337e-06, + "loss": 0.8313847780227661, + "step": 3585 + }, + { + "epoch": 0.8262672811059908, + "grad_norm": 0.9997153587851354, + "learning_rate": 1.3619102919951424e-06, + "loss": 0.7285455465316772, + "step": 3586 + }, + { + "epoch": 0.8264976958525345, + "grad_norm": 1.053529475482116, + "learning_rate": 1.361555107688148e-06, + "loss": 0.8084003925323486, + "step": 3587 + }, + { + "epoch": 0.8267281105990784, + "grad_norm": 1.1979034262658517, + "learning_rate": 1.3611998709019088e-06, + "loss": 0.8506543040275574, + "step": 3588 + }, + { + "epoch": 0.8269585253456221, + "grad_norm": 1.150137696376644, + "learning_rate": 1.3608445816879864e-06, + "loss": 0.8320293426513672, + "step": 3589 + }, + { + "epoch": 0.8271889400921659, + "grad_norm": 1.0954200087136678, + "learning_rate": 1.3604892400979501e-06, + "loss": 0.8116205930709839, + "step": 3590 + }, + { + "epoch": 0.8274193548387097, + "grad_norm": 0.988607654244707, + "learning_rate": 1.3601338461833785e-06, + "loss": 0.8317450284957886, + "step": 3591 + }, + { + "epoch": 0.8276497695852535, + "grad_norm": 1.0502248139840338, + "learning_rate": 1.3597783999958553e-06, + "loss": 0.7348642349243164, + "step": 3592 + }, + { + "epoch": 0.8278801843317972, + "grad_norm": 0.8829971344500126, + "learning_rate": 1.359422901586974e-06, + "loss": 0.8087270259857178, + "step": 3593 + }, + { + "epoch": 0.8281105990783411, + "grad_norm": 1.1012699484003496, + "learning_rate": 1.3590673510083345e-06, + "loss": 0.7964637875556946, + "step": 3594 + }, + { + "epoch": 0.8283410138248848, + "grad_norm": 0.8597833865541051, + "learning_rate": 1.358711748311544e-06, + "loss": 0.6192176342010498, + "step": 3595 + }, + { + "epoch": 0.8285714285714286, + "grad_norm": 1.458647590594062, + "learning_rate": 1.3583560935482182e-06, + "loss": 0.7735739946365356, + "step": 3596 + }, + { + "epoch": 0.8288018433179724, + "grad_norm": 1.209934555151429, + "learning_rate": 1.35800038676998e-06, + "loss": 0.7965315580368042, + "step": 3597 + }, + { + "epoch": 0.8290322580645161, + "grad_norm": 1.0086229436787473, + "learning_rate": 1.3576446280284595e-06, + "loss": 0.6489244699478149, + "step": 3598 + }, + { + "epoch": 0.8292626728110599, + "grad_norm": 1.041271189758682, + "learning_rate": 1.3572888173752946e-06, + "loss": 0.8073695302009583, + "step": 3599 + }, + { + "epoch": 0.8294930875576036, + "grad_norm": 0.7544591630478071, + "learning_rate": 1.3569329548621309e-06, + "loss": 0.7925900816917419, + "step": 3600 + }, + { + "epoch": 0.8297235023041475, + "grad_norm": 1.1274353505725723, + "learning_rate": 1.356577040540621e-06, + "loss": 0.83954918384552, + "step": 3601 + }, + { + "epoch": 0.8299539170506912, + "grad_norm": 0.69092010707332, + "learning_rate": 1.356221074462426e-06, + "loss": 0.6384706497192383, + "step": 3602 + }, + { + "epoch": 0.830184331797235, + "grad_norm": 0.8604009933780791, + "learning_rate": 1.3558650566792136e-06, + "loss": 0.8308184146881104, + "step": 3603 + }, + { + "epoch": 0.8304147465437788, + "grad_norm": 0.9893567222365065, + "learning_rate": 1.3555089872426596e-06, + "loss": 0.7972864508628845, + "step": 3604 + }, + { + "epoch": 0.8306451612903226, + "grad_norm": 1.0575497381629144, + "learning_rate": 1.3551528662044463e-06, + "loss": 0.8038849830627441, + "step": 3605 + }, + { + "epoch": 0.8308755760368663, + "grad_norm": 1.0146034272672162, + "learning_rate": 1.3547966936162646e-06, + "loss": 0.7735980749130249, + "step": 3606 + }, + { + "epoch": 0.8311059907834102, + "grad_norm": 1.169701687059532, + "learning_rate": 1.354440469529813e-06, + "loss": 0.7717504501342773, + "step": 3607 + }, + { + "epoch": 0.8313364055299539, + "grad_norm": 0.8981514617249363, + "learning_rate": 1.3540841939967962e-06, + "loss": 0.9405615329742432, + "step": 3608 + }, + { + "epoch": 0.8315668202764976, + "grad_norm": 0.9913743440349779, + "learning_rate": 1.3537278670689273e-06, + "loss": 0.7730603814125061, + "step": 3609 + }, + { + "epoch": 0.8317972350230415, + "grad_norm": 1.1958069213876743, + "learning_rate": 1.353371488797927e-06, + "loss": 0.8677463531494141, + "step": 3610 + }, + { + "epoch": 0.8320276497695852, + "grad_norm": 1.0362704574624084, + "learning_rate": 1.3530150592355227e-06, + "loss": 0.8261700868606567, + "step": 3611 + }, + { + "epoch": 0.832258064516129, + "grad_norm": 0.9430749395940993, + "learning_rate": 1.35265857843345e-06, + "loss": 0.6799050569534302, + "step": 3612 + }, + { + "epoch": 0.8324884792626728, + "grad_norm": 1.0479319081515341, + "learning_rate": 1.3523020464434514e-06, + "loss": 0.9117664098739624, + "step": 3613 + }, + { + "epoch": 0.8327188940092166, + "grad_norm": 1.0691436327470698, + "learning_rate": 1.3519454633172771e-06, + "loss": 0.8637168407440186, + "step": 3614 + }, + { + "epoch": 0.8329493087557603, + "grad_norm": 0.8579929983536723, + "learning_rate": 1.3515888291066848e-06, + "loss": 0.8169793486595154, + "step": 3615 + }, + { + "epoch": 0.8331797235023042, + "grad_norm": 0.920659117563804, + "learning_rate": 1.3512321438634392e-06, + "loss": 0.6901019811630249, + "step": 3616 + }, + { + "epoch": 0.8334101382488479, + "grad_norm": 1.350300242304736, + "learning_rate": 1.3508754076393133e-06, + "loss": 0.868461012840271, + "step": 3617 + }, + { + "epoch": 0.8336405529953917, + "grad_norm": 0.9765625383196332, + "learning_rate": 1.3505186204860864e-06, + "loss": 0.7916195392608643, + "step": 3618 + }, + { + "epoch": 0.8338709677419355, + "grad_norm": 0.9685384546753151, + "learning_rate": 1.3501617824555456e-06, + "loss": 0.7078498601913452, + "step": 3619 + }, + { + "epoch": 0.8341013824884793, + "grad_norm": 1.2242730037688179, + "learning_rate": 1.3498048935994857e-06, + "loss": 0.890669584274292, + "step": 3620 + }, + { + "epoch": 0.834331797235023, + "grad_norm": 0.8358453705503323, + "learning_rate": 1.3494479539697087e-06, + "loss": 0.8162761926651001, + "step": 3621 + }, + { + "epoch": 0.8345622119815668, + "grad_norm": 1.013077112717635, + "learning_rate": 1.3490909636180233e-06, + "loss": 0.7743235230445862, + "step": 3622 + }, + { + "epoch": 0.8347926267281106, + "grad_norm": 1.0099386147746707, + "learning_rate": 1.3487339225962472e-06, + "loss": 0.8297950029373169, + "step": 3623 + }, + { + "epoch": 0.8350230414746543, + "grad_norm": 1.1865830325248257, + "learning_rate": 1.3483768309562035e-06, + "loss": 0.9550352692604065, + "step": 3624 + }, + { + "epoch": 0.8352534562211982, + "grad_norm": 0.9576603479694407, + "learning_rate": 1.3480196887497242e-06, + "loss": 0.7343823909759521, + "step": 3625 + }, + { + "epoch": 0.8354838709677419, + "grad_norm": 1.0312198523972542, + "learning_rate": 1.3476624960286479e-06, + "loss": 0.8942683935165405, + "step": 3626 + }, + { + "epoch": 0.8357142857142857, + "grad_norm": 1.0216203737583824, + "learning_rate": 1.34730525284482e-06, + "loss": 0.778289794921875, + "step": 3627 + }, + { + "epoch": 0.8359447004608295, + "grad_norm": 0.8374039418656565, + "learning_rate": 1.3469479592500951e-06, + "loss": 0.5924088954925537, + "step": 3628 + }, + { + "epoch": 0.8361751152073733, + "grad_norm": 1.6640914693337763, + "learning_rate": 1.3465906152963329e-06, + "loss": 1.0363706350326538, + "step": 3629 + }, + { + "epoch": 0.836405529953917, + "grad_norm": 1.1094517477504633, + "learning_rate": 1.346233221035402e-06, + "loss": 0.7927669286727905, + "step": 3630 + }, + { + "epoch": 0.8366359447004609, + "grad_norm": 1.017803676905956, + "learning_rate": 1.345875776519177e-06, + "loss": 0.8428707718849182, + "step": 3631 + }, + { + "epoch": 0.8368663594470046, + "grad_norm": 1.0894705086513103, + "learning_rate": 1.345518281799541e-06, + "loss": 0.7975403070449829, + "step": 3632 + }, + { + "epoch": 0.8370967741935483, + "grad_norm": 1.0032068733109394, + "learning_rate": 1.3451607369283842e-06, + "loss": 0.8383880853652954, + "step": 3633 + }, + { + "epoch": 0.8373271889400922, + "grad_norm": 1.007543360201824, + "learning_rate": 1.3448031419576028e-06, + "loss": 0.9033386707305908, + "step": 3634 + }, + { + "epoch": 0.8375576036866359, + "grad_norm": 1.1312406567077748, + "learning_rate": 1.3444454969391021e-06, + "loss": 0.8913514018058777, + "step": 3635 + }, + { + "epoch": 0.8377880184331797, + "grad_norm": 1.4041014769308477, + "learning_rate": 1.3440878019247936e-06, + "loss": 0.9051915407180786, + "step": 3636 + }, + { + "epoch": 0.8380184331797235, + "grad_norm": 0.9777048211867199, + "learning_rate": 1.343730056966596e-06, + "loss": 0.8240993618965149, + "step": 3637 + }, + { + "epoch": 0.8382488479262673, + "grad_norm": 1.1788464491037272, + "learning_rate": 1.3433722621164358e-06, + "loss": 0.8276345133781433, + "step": 3638 + }, + { + "epoch": 0.838479262672811, + "grad_norm": 1.1512835626079758, + "learning_rate": 1.343014417426246e-06, + "loss": 0.8250508904457092, + "step": 3639 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 1.0066201319773938, + "learning_rate": 1.342656522947968e-06, + "loss": 0.7872868180274963, + "step": 3640 + }, + { + "epoch": 0.8389400921658986, + "grad_norm": 0.8473767849665474, + "learning_rate": 1.3422985787335491e-06, + "loss": 0.7634146809577942, + "step": 3641 + }, + { + "epoch": 0.8391705069124424, + "grad_norm": 0.9991956505737468, + "learning_rate": 1.3419405848349448e-06, + "loss": 0.63923180103302, + "step": 3642 + }, + { + "epoch": 0.8394009216589862, + "grad_norm": 0.8936657519523178, + "learning_rate": 1.3415825413041173e-06, + "loss": 0.900942325592041, + "step": 3643 + }, + { + "epoch": 0.83963133640553, + "grad_norm": 0.8086145892134451, + "learning_rate": 1.341224448193036e-06, + "loss": 0.6415199041366577, + "step": 3644 + }, + { + "epoch": 0.8398617511520737, + "grad_norm": 0.7541710851332, + "learning_rate": 1.3408663055536775e-06, + "loss": 0.7750275135040283, + "step": 3645 + }, + { + "epoch": 0.8400921658986175, + "grad_norm": 1.0677810215945565, + "learning_rate": 1.3405081134380264e-06, + "loss": 0.8159983158111572, + "step": 3646 + }, + { + "epoch": 0.8403225806451613, + "grad_norm": 1.0361250834896671, + "learning_rate": 1.3401498718980733e-06, + "loss": 0.6870952844619751, + "step": 3647 + }, + { + "epoch": 0.840552995391705, + "grad_norm": 1.0057736881312165, + "learning_rate": 1.3397915809858168e-06, + "loss": 0.8588749170303345, + "step": 3648 + }, + { + "epoch": 0.8407834101382489, + "grad_norm": 0.8944864050117411, + "learning_rate": 1.3394332407532619e-06, + "loss": 0.6926778554916382, + "step": 3649 + }, + { + "epoch": 0.8410138248847926, + "grad_norm": 0.9996715673645244, + "learning_rate": 1.3390748512524213e-06, + "loss": 0.7165309190750122, + "step": 3650 + }, + { + "epoch": 0.8412442396313364, + "grad_norm": 0.8676606625906299, + "learning_rate": 1.3387164125353149e-06, + "loss": 0.7782741189002991, + "step": 3651 + }, + { + "epoch": 0.8414746543778802, + "grad_norm": 1.2076812224962883, + "learning_rate": 1.3383579246539698e-06, + "loss": 0.9153795838356018, + "step": 3652 + }, + { + "epoch": 0.841705069124424, + "grad_norm": 0.9194313077193984, + "learning_rate": 1.33799938766042e-06, + "loss": 0.8419643044471741, + "step": 3653 + }, + { + "epoch": 0.8419354838709677, + "grad_norm": 0.9325821466469247, + "learning_rate": 1.3376408016067064e-06, + "loss": 0.6927728652954102, + "step": 3654 + }, + { + "epoch": 0.8421658986175116, + "grad_norm": 0.8795285549516815, + "learning_rate": 1.3372821665448774e-06, + "loss": 0.7721414566040039, + "step": 3655 + }, + { + "epoch": 0.8423963133640553, + "grad_norm": 0.8650877944504008, + "learning_rate": 1.3369234825269887e-06, + "loss": 0.7277967929840088, + "step": 3656 + }, + { + "epoch": 0.8426267281105991, + "grad_norm": 0.8893990009557013, + "learning_rate": 1.336564749605102e-06, + "loss": 0.7764936089515686, + "step": 3657 + }, + { + "epoch": 0.8428571428571429, + "grad_norm": 1.0366422012708214, + "learning_rate": 1.336205967831288e-06, + "loss": 0.7445545196533203, + "step": 3658 + }, + { + "epoch": 0.8430875576036866, + "grad_norm": 0.9883734306246509, + "learning_rate": 1.3358471372576227e-06, + "loss": 0.8359465599060059, + "step": 3659 + }, + { + "epoch": 0.8433179723502304, + "grad_norm": 1.1992732184975974, + "learning_rate": 1.33548825793619e-06, + "loss": 0.8634141683578491, + "step": 3660 + }, + { + "epoch": 0.8435483870967742, + "grad_norm": 0.9932267949840192, + "learning_rate": 1.3351293299190804e-06, + "loss": 0.7365708351135254, + "step": 3661 + }, + { + "epoch": 0.843778801843318, + "grad_norm": 1.0553779905834517, + "learning_rate": 1.3347703532583927e-06, + "loss": 0.7135465145111084, + "step": 3662 + }, + { + "epoch": 0.8440092165898617, + "grad_norm": 0.9366872036776951, + "learning_rate": 1.3344113280062313e-06, + "loss": 0.7411447763442993, + "step": 3663 + }, + { + "epoch": 0.8442396313364056, + "grad_norm": 1.1654296408446096, + "learning_rate": 1.3340522542147081e-06, + "loss": 0.7765100002288818, + "step": 3664 + }, + { + "epoch": 0.8444700460829493, + "grad_norm": 0.9657216098787882, + "learning_rate": 1.3336931319359426e-06, + "loss": 0.7638096809387207, + "step": 3665 + }, + { + "epoch": 0.8447004608294931, + "grad_norm": 0.8148482611092309, + "learning_rate": 1.3333339612220606e-06, + "loss": 0.7114577889442444, + "step": 3666 + }, + { + "epoch": 0.8449308755760369, + "grad_norm": 1.075345107734405, + "learning_rate": 1.3329747421251955e-06, + "loss": 0.8702960014343262, + "step": 3667 + }, + { + "epoch": 0.8451612903225807, + "grad_norm": 0.8702936794654799, + "learning_rate": 1.3326154746974878e-06, + "loss": 0.7248300313949585, + "step": 3668 + }, + { + "epoch": 0.8453917050691244, + "grad_norm": 1.0810218150457531, + "learning_rate": 1.332256158991084e-06, + "loss": 0.7648389339447021, + "step": 3669 + }, + { + "epoch": 0.8456221198156681, + "grad_norm": 1.1179174327015893, + "learning_rate": 1.3318967950581383e-06, + "loss": 0.7075401544570923, + "step": 3670 + }, + { + "epoch": 0.845852534562212, + "grad_norm": 0.9497106076514022, + "learning_rate": 1.3315373829508122e-06, + "loss": 0.6923220157623291, + "step": 3671 + }, + { + "epoch": 0.8460829493087557, + "grad_norm": 1.100773813694407, + "learning_rate": 1.3311779227212742e-06, + "loss": 0.7522361874580383, + "step": 3672 + }, + { + "epoch": 0.8463133640552996, + "grad_norm": 1.026931960572947, + "learning_rate": 1.3308184144216989e-06, + "loss": 0.7087293863296509, + "step": 3673 + }, + { + "epoch": 0.8465437788018433, + "grad_norm": 0.793322008156401, + "learning_rate": 1.3304588581042688e-06, + "loss": 0.782098650932312, + "step": 3674 + }, + { + "epoch": 0.8467741935483871, + "grad_norm": 1.029621860148689, + "learning_rate": 1.330099253821173e-06, + "loss": 0.7671197652816772, + "step": 3675 + }, + { + "epoch": 0.8470046082949308, + "grad_norm": 0.8604911309489864, + "learning_rate": 1.3297396016246073e-06, + "loss": 0.8098698258399963, + "step": 3676 + }, + { + "epoch": 0.8472350230414747, + "grad_norm": 0.9021265860196932, + "learning_rate": 1.3293799015667751e-06, + "loss": 0.7671023011207581, + "step": 3677 + }, + { + "epoch": 0.8474654377880184, + "grad_norm": 0.9115553667327773, + "learning_rate": 1.3290201536998862e-06, + "loss": 0.7448668479919434, + "step": 3678 + }, + { + "epoch": 0.8476958525345623, + "grad_norm": 1.4463207292378697, + "learning_rate": 1.3286603580761576e-06, + "loss": 0.946117639541626, + "step": 3679 + }, + { + "epoch": 0.847926267281106, + "grad_norm": 0.932975472082494, + "learning_rate": 1.328300514747813e-06, + "loss": 0.8134163618087769, + "step": 3680 + }, + { + "epoch": 0.8481566820276498, + "grad_norm": 1.0433920810873991, + "learning_rate": 1.327940623767083e-06, + "loss": 0.725477933883667, + "step": 3681 + }, + { + "epoch": 0.8483870967741935, + "grad_norm": 0.9434209059724857, + "learning_rate": 1.3275806851862061e-06, + "loss": 0.8278200626373291, + "step": 3682 + }, + { + "epoch": 0.8486175115207373, + "grad_norm": 1.2837572025692205, + "learning_rate": 1.327220699057426e-06, + "loss": 0.8437181711196899, + "step": 3683 + }, + { + "epoch": 0.8488479262672811, + "grad_norm": 1.0932618965520366, + "learning_rate": 1.326860665432995e-06, + "loss": 0.8921856880187988, + "step": 3684 + }, + { + "epoch": 0.8490783410138248, + "grad_norm": 0.9850919430921788, + "learning_rate": 1.326500584365171e-06, + "loss": 0.7285119295120239, + "step": 3685 + }, + { + "epoch": 0.8493087557603687, + "grad_norm": 1.0119244636074918, + "learning_rate": 1.3261404559062196e-06, + "loss": 0.8968918323516846, + "step": 3686 + }, + { + "epoch": 0.8495391705069124, + "grad_norm": 0.9862869524570133, + "learning_rate": 1.3257802801084123e-06, + "loss": 0.6794285774230957, + "step": 3687 + }, + { + "epoch": 0.8497695852534562, + "grad_norm": 1.1495746754769118, + "learning_rate": 1.3254200570240291e-06, + "loss": 0.869774341583252, + "step": 3688 + }, + { + "epoch": 0.85, + "grad_norm": 1.1620464557259493, + "learning_rate": 1.3250597867053553e-06, + "loss": 0.7862332463264465, + "step": 3689 + }, + { + "epoch": 0.8502304147465438, + "grad_norm": 1.1253065949092746, + "learning_rate": 1.3246994692046835e-06, + "loss": 0.8424299955368042, + "step": 3690 + }, + { + "epoch": 0.8504608294930875, + "grad_norm": 0.7041532260107465, + "learning_rate": 1.3243391045743137e-06, + "loss": 0.6232138276100159, + "step": 3691 + }, + { + "epoch": 0.8506912442396314, + "grad_norm": 0.9563538572085633, + "learning_rate": 1.3239786928665523e-06, + "loss": 0.7108159065246582, + "step": 3692 + }, + { + "epoch": 0.8509216589861751, + "grad_norm": 1.0262733388108027, + "learning_rate": 1.3236182341337126e-06, + "loss": 0.7282330393791199, + "step": 3693 + }, + { + "epoch": 0.8511520737327188, + "grad_norm": 1.2079736335999256, + "learning_rate": 1.3232577284281147e-06, + "loss": 0.7864304780960083, + "step": 3694 + }, + { + "epoch": 0.8513824884792627, + "grad_norm": 0.9682428596442779, + "learning_rate": 1.3228971758020852e-06, + "loss": 0.7826365232467651, + "step": 3695 + }, + { + "epoch": 0.8516129032258064, + "grad_norm": 1.0308498953586989, + "learning_rate": 1.322536576307958e-06, + "loss": 0.8429988026618958, + "step": 3696 + }, + { + "epoch": 0.8518433179723502, + "grad_norm": 1.106791902142165, + "learning_rate": 1.322175929998074e-06, + "loss": 0.771148145198822, + "step": 3697 + }, + { + "epoch": 0.852073732718894, + "grad_norm": 1.2323556662321768, + "learning_rate": 1.3218152369247804e-06, + "loss": 0.9610496759414673, + "step": 3698 + }, + { + "epoch": 0.8523041474654378, + "grad_norm": 1.0124488299649408, + "learning_rate": 1.321454497140431e-06, + "loss": 0.7286547422409058, + "step": 3699 + }, + { + "epoch": 0.8525345622119815, + "grad_norm": 0.8362780560832063, + "learning_rate": 1.321093710697387e-06, + "loss": 0.7446750402450562, + "step": 3700 + }, + { + "epoch": 0.8527649769585254, + "grad_norm": 0.8774754337310029, + "learning_rate": 1.3207328776480156e-06, + "loss": 0.7211639881134033, + "step": 3701 + }, + { + "epoch": 0.8529953917050691, + "grad_norm": 0.9667628641735269, + "learning_rate": 1.320371998044692e-06, + "loss": 0.765962541103363, + "step": 3702 + }, + { + "epoch": 0.853225806451613, + "grad_norm": 1.0775083181101466, + "learning_rate": 1.3200110719397967e-06, + "loss": 0.9090084433555603, + "step": 3703 + }, + { + "epoch": 0.8534562211981567, + "grad_norm": 0.9604272002153474, + "learning_rate": 1.319650099385718e-06, + "loss": 0.8222901225090027, + "step": 3704 + }, + { + "epoch": 0.8536866359447005, + "grad_norm": 1.0297311955715076, + "learning_rate": 1.3192890804348508e-06, + "loss": 0.7929965853691101, + "step": 3705 + }, + { + "epoch": 0.8539170506912442, + "grad_norm": 0.9788103737354025, + "learning_rate": 1.318928015139596e-06, + "loss": 0.89229816198349, + "step": 3706 + }, + { + "epoch": 0.854147465437788, + "grad_norm": 1.1185541946390394, + "learning_rate": 1.3185669035523621e-06, + "loss": 0.8348276615142822, + "step": 3707 + }, + { + "epoch": 0.8543778801843318, + "grad_norm": 1.0960703003892842, + "learning_rate": 1.3182057457255639e-06, + "loss": 0.9006820917129517, + "step": 3708 + }, + { + "epoch": 0.8546082949308755, + "grad_norm": 0.8300224623954644, + "learning_rate": 1.3178445417116233e-06, + "loss": 0.665691614151001, + "step": 3709 + }, + { + "epoch": 0.8548387096774194, + "grad_norm": 0.6677558949928035, + "learning_rate": 1.3174832915629677e-06, + "loss": 0.7073110342025757, + "step": 3710 + }, + { + "epoch": 0.8550691244239631, + "grad_norm": 1.0807205184602706, + "learning_rate": 1.317121995332033e-06, + "loss": 0.7125800848007202, + "step": 3711 + }, + { + "epoch": 0.8552995391705069, + "grad_norm": 1.1504081133401938, + "learning_rate": 1.31676065307126e-06, + "loss": 0.847205638885498, + "step": 3712 + }, + { + "epoch": 0.8555299539170507, + "grad_norm": 1.1272186923536152, + "learning_rate": 1.3163992648330979e-06, + "loss": 0.860866904258728, + "step": 3713 + }, + { + "epoch": 0.8557603686635945, + "grad_norm": 0.9974272492162177, + "learning_rate": 1.3160378306700014e-06, + "loss": 0.811161994934082, + "step": 3714 + }, + { + "epoch": 0.8559907834101382, + "grad_norm": 1.059693566679631, + "learning_rate": 1.3156763506344318e-06, + "loss": 1.0276790857315063, + "step": 3715 + }, + { + "epoch": 0.8562211981566821, + "grad_norm": 0.8617440282777447, + "learning_rate": 1.3153148247788584e-06, + "loss": 0.7462253570556641, + "step": 3716 + }, + { + "epoch": 0.8564516129032258, + "grad_norm": 1.281384523734545, + "learning_rate": 1.314953253155755e-06, + "loss": 0.9181896448135376, + "step": 3717 + }, + { + "epoch": 0.8566820276497696, + "grad_norm": 0.7940667691684741, + "learning_rate": 1.3145916358176044e-06, + "loss": 0.5943678021430969, + "step": 3718 + }, + { + "epoch": 0.8569124423963134, + "grad_norm": 0.9268739898787507, + "learning_rate": 1.3142299728168942e-06, + "loss": 0.7908656597137451, + "step": 3719 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 1.2242140267734891, + "learning_rate": 1.3138682642061192e-06, + "loss": 0.8716393709182739, + "step": 3720 + }, + { + "epoch": 0.8573732718894009, + "grad_norm": 0.9921811812486295, + "learning_rate": 1.3135065100377814e-06, + "loss": 0.76909339427948, + "step": 3721 + }, + { + "epoch": 0.8576036866359447, + "grad_norm": 1.0272733292998222, + "learning_rate": 1.3131447103643884e-06, + "loss": 0.7896728515625, + "step": 3722 + }, + { + "epoch": 0.8578341013824885, + "grad_norm": 1.0326134494637835, + "learning_rate": 1.3127828652384554e-06, + "loss": 0.8458575010299683, + "step": 3723 + }, + { + "epoch": 0.8580645161290322, + "grad_norm": 0.9849414066001893, + "learning_rate": 1.3124209747125036e-06, + "loss": 0.7419729232788086, + "step": 3724 + }, + { + "epoch": 0.8582949308755761, + "grad_norm": 0.9131603734827297, + "learning_rate": 1.3120590388390608e-06, + "loss": 0.8801093697547913, + "step": 3725 + }, + { + "epoch": 0.8585253456221198, + "grad_norm": 0.7986933302941567, + "learning_rate": 1.3116970576706617e-06, + "loss": 0.6337816715240479, + "step": 3726 + }, + { + "epoch": 0.8587557603686636, + "grad_norm": 1.1352865331161706, + "learning_rate": 1.3113350312598472e-06, + "loss": 0.8099665641784668, + "step": 3727 + }, + { + "epoch": 0.8589861751152074, + "grad_norm": 1.0467011868433627, + "learning_rate": 1.3109729596591651e-06, + "loss": 0.7430413961410522, + "step": 3728 + }, + { + "epoch": 0.8592165898617512, + "grad_norm": 1.0569982664185076, + "learning_rate": 1.3106108429211699e-06, + "loss": 0.7374905347824097, + "step": 3729 + }, + { + "epoch": 0.8594470046082949, + "grad_norm": 0.7857724004075162, + "learning_rate": 1.3102486810984217e-06, + "loss": 0.71753990650177, + "step": 3730 + }, + { + "epoch": 0.8596774193548387, + "grad_norm": 1.0554970253272185, + "learning_rate": 1.3098864742434885e-06, + "loss": 0.9126461744308472, + "step": 3731 + }, + { + "epoch": 0.8599078341013825, + "grad_norm": 1.1141466235187625, + "learning_rate": 1.3095242224089434e-06, + "loss": 0.846487283706665, + "step": 3732 + }, + { + "epoch": 0.8601382488479262, + "grad_norm": 0.9640305278845377, + "learning_rate": 1.3091619256473671e-06, + "loss": 0.7026070952415466, + "step": 3733 + }, + { + "epoch": 0.8603686635944701, + "grad_norm": 1.2209599470129553, + "learning_rate": 1.3087995840113471e-06, + "loss": 1.0044158697128296, + "step": 3734 + }, + { + "epoch": 0.8605990783410138, + "grad_norm": 1.2732308696122019, + "learning_rate": 1.3084371975534759e-06, + "loss": 0.8061608076095581, + "step": 3735 + }, + { + "epoch": 0.8608294930875576, + "grad_norm": 1.2155874878372677, + "learning_rate": 1.308074766326354e-06, + "loss": 0.9189345836639404, + "step": 3736 + }, + { + "epoch": 0.8610599078341014, + "grad_norm": 3.0839554304770314, + "learning_rate": 1.3077122903825875e-06, + "loss": 0.8183290958404541, + "step": 3737 + }, + { + "epoch": 0.8612903225806452, + "grad_norm": 0.9202037098580877, + "learning_rate": 1.3073497697747893e-06, + "loss": 0.860893726348877, + "step": 3738 + }, + { + "epoch": 0.8615207373271889, + "grad_norm": 0.7717429741205805, + "learning_rate": 1.306987204555579e-06, + "loss": 0.6732957363128662, + "step": 3739 + }, + { + "epoch": 0.8617511520737328, + "grad_norm": 0.9444170667577415, + "learning_rate": 1.3066245947775821e-06, + "loss": 0.7910758256912231, + "step": 3740 + }, + { + "epoch": 0.8619815668202765, + "grad_norm": 1.316217805471382, + "learning_rate": 1.3062619404934317e-06, + "loss": 0.9422181844711304, + "step": 3741 + }, + { + "epoch": 0.8622119815668203, + "grad_norm": 0.9698503213179374, + "learning_rate": 1.3058992417557657e-06, + "loss": 0.7731142044067383, + "step": 3742 + }, + { + "epoch": 0.8624423963133641, + "grad_norm": 0.9561313394387324, + "learning_rate": 1.3055364986172296e-06, + "loss": 0.8419089317321777, + "step": 3743 + }, + { + "epoch": 0.8626728110599078, + "grad_norm": 0.8852750785802604, + "learning_rate": 1.3051737111304757e-06, + "loss": 0.7535419464111328, + "step": 3744 + }, + { + "epoch": 0.8629032258064516, + "grad_norm": 0.8636514927767351, + "learning_rate": 1.3048108793481614e-06, + "loss": 0.7744847536087036, + "step": 3745 + }, + { + "epoch": 0.8631336405529954, + "grad_norm": 1.04058809416254, + "learning_rate": 1.3044480033229513e-06, + "loss": 0.7578398585319519, + "step": 3746 + }, + { + "epoch": 0.8633640552995392, + "grad_norm": 1.2334871836764278, + "learning_rate": 1.3040850831075168e-06, + "loss": 0.8767418265342712, + "step": 3747 + }, + { + "epoch": 0.8635944700460829, + "grad_norm": 1.1256734507930313, + "learning_rate": 1.303722118754535e-06, + "loss": 0.7484671473503113, + "step": 3748 + }, + { + "epoch": 0.8638248847926268, + "grad_norm": 0.9064086460386975, + "learning_rate": 1.3033591103166897e-06, + "loss": 0.7231101989746094, + "step": 3749 + }, + { + "epoch": 0.8640552995391705, + "grad_norm": 0.896473034432068, + "learning_rate": 1.3029960578466709e-06, + "loss": 0.7626307606697083, + "step": 3750 + }, + { + "epoch": 0.8642857142857143, + "grad_norm": 1.0608055188685264, + "learning_rate": 1.302632961397176e-06, + "loss": 0.7244704961776733, + "step": 3751 + }, + { + "epoch": 0.864516129032258, + "grad_norm": 1.0368271143877468, + "learning_rate": 1.3022698210209066e-06, + "loss": 0.8575884103775024, + "step": 3752 + }, + { + "epoch": 0.8647465437788019, + "grad_norm": 1.050928094888414, + "learning_rate": 1.3019066367705733e-06, + "loss": 0.7617322206497192, + "step": 3753 + }, + { + "epoch": 0.8649769585253456, + "grad_norm": 1.0524737157850867, + "learning_rate": 1.3015434086988914e-06, + "loss": 0.7899904251098633, + "step": 3754 + }, + { + "epoch": 0.8652073732718893, + "grad_norm": 0.7826254299372721, + "learning_rate": 1.3011801368585825e-06, + "loss": 0.6405949592590332, + "step": 3755 + }, + { + "epoch": 0.8654377880184332, + "grad_norm": 1.004484214855527, + "learning_rate": 1.300816821302376e-06, + "loss": 0.8473223447799683, + "step": 3756 + }, + { + "epoch": 0.8656682027649769, + "grad_norm": 1.0318183916575985, + "learning_rate": 1.3004534620830059e-06, + "loss": 0.7843037843704224, + "step": 3757 + }, + { + "epoch": 0.8658986175115208, + "grad_norm": 0.8527211236886993, + "learning_rate": 1.3000900592532134e-06, + "loss": 0.7418329119682312, + "step": 3758 + }, + { + "epoch": 0.8661290322580645, + "grad_norm": 1.1686967012789897, + "learning_rate": 1.2997266128657462e-06, + "loss": 0.9007542133331299, + "step": 3759 + }, + { + "epoch": 0.8663594470046083, + "grad_norm": 1.0002999248018631, + "learning_rate": 1.2993631229733582e-06, + "loss": 0.7214536666870117, + "step": 3760 + }, + { + "epoch": 0.866589861751152, + "grad_norm": 1.060698383579802, + "learning_rate": 1.2989995896288085e-06, + "loss": 0.6538300514221191, + "step": 3761 + }, + { + "epoch": 0.8668202764976959, + "grad_norm": 0.8939424364373206, + "learning_rate": 1.2986360128848647e-06, + "loss": 0.8132497668266296, + "step": 3762 + }, + { + "epoch": 0.8670506912442396, + "grad_norm": 1.2692579875098073, + "learning_rate": 1.2982723927942987e-06, + "loss": 0.8940386176109314, + "step": 3763 + }, + { + "epoch": 0.8672811059907835, + "grad_norm": 0.9095968882110219, + "learning_rate": 1.2979087294098904e-06, + "loss": 0.7426153421401978, + "step": 3764 + }, + { + "epoch": 0.8675115207373272, + "grad_norm": 1.2314721218727755, + "learning_rate": 1.2975450227844236e-06, + "loss": 0.8140754103660583, + "step": 3765 + }, + { + "epoch": 0.867741935483871, + "grad_norm": 1.165847048536148, + "learning_rate": 1.2971812729706907e-06, + "loss": 0.9078278541564941, + "step": 3766 + }, + { + "epoch": 0.8679723502304147, + "grad_norm": 0.8581444329277982, + "learning_rate": 1.29681748002149e-06, + "loss": 0.6632627248764038, + "step": 3767 + }, + { + "epoch": 0.8682027649769585, + "grad_norm": 1.0737542944031577, + "learning_rate": 1.2964536439896245e-06, + "loss": 0.913419246673584, + "step": 3768 + }, + { + "epoch": 0.8684331797235023, + "grad_norm": 0.9232699220030103, + "learning_rate": 1.2960897649279054e-06, + "loss": 0.776391863822937, + "step": 3769 + }, + { + "epoch": 0.868663594470046, + "grad_norm": 0.7836255693570048, + "learning_rate": 1.2957258428891488e-06, + "loss": 0.7171014547348022, + "step": 3770 + }, + { + "epoch": 0.8688940092165899, + "grad_norm": 1.072840063629104, + "learning_rate": 1.2953618779261776e-06, + "loss": 0.8848521709442139, + "step": 3771 + }, + { + "epoch": 0.8691244239631336, + "grad_norm": 0.9374655640180731, + "learning_rate": 1.2949978700918207e-06, + "loss": 0.6794570684432983, + "step": 3772 + }, + { + "epoch": 0.8693548387096774, + "grad_norm": 1.1765914680464367, + "learning_rate": 1.2946338194389137e-06, + "loss": 0.7128770351409912, + "step": 3773 + }, + { + "epoch": 0.8695852534562212, + "grad_norm": 1.0061805151394425, + "learning_rate": 1.2942697260202976e-06, + "loss": 0.7794370651245117, + "step": 3774 + }, + { + "epoch": 0.869815668202765, + "grad_norm": 0.8201503807835805, + "learning_rate": 1.2939055898888203e-06, + "loss": 0.7946528196334839, + "step": 3775 + }, + { + "epoch": 0.8700460829493087, + "grad_norm": 0.8253544658473864, + "learning_rate": 1.2935414110973357e-06, + "loss": 0.7052137851715088, + "step": 3776 + }, + { + "epoch": 0.8702764976958526, + "grad_norm": 1.1148062721900278, + "learning_rate": 1.293177189698704e-06, + "loss": 0.785929799079895, + "step": 3777 + }, + { + "epoch": 0.8705069124423963, + "grad_norm": 1.0434715730493578, + "learning_rate": 1.2928129257457915e-06, + "loss": 0.7907861471176147, + "step": 3778 + }, + { + "epoch": 0.8707373271889401, + "grad_norm": 1.0141295879138945, + "learning_rate": 1.2924486192914704e-06, + "loss": 0.9145845770835876, + "step": 3779 + }, + { + "epoch": 0.8709677419354839, + "grad_norm": 1.2821040685334846, + "learning_rate": 1.2920842703886191e-06, + "loss": 0.8332167863845825, + "step": 3780 + }, + { + "epoch": 0.8711981566820276, + "grad_norm": 1.1443987508087015, + "learning_rate": 1.2917198790901229e-06, + "loss": 0.9593367576599121, + "step": 3781 + }, + { + "epoch": 0.8714285714285714, + "grad_norm": 1.1001262078147525, + "learning_rate": 1.2913554454488723e-06, + "loss": 0.9269144535064697, + "step": 3782 + }, + { + "epoch": 0.8716589861751152, + "grad_norm": 0.8577227656018163, + "learning_rate": 1.2909909695177645e-06, + "loss": 0.8474053144454956, + "step": 3783 + }, + { + "epoch": 0.871889400921659, + "grad_norm": 1.0482742591675172, + "learning_rate": 1.2906264513497027e-06, + "loss": 0.8098207116127014, + "step": 3784 + }, + { + "epoch": 0.8721198156682027, + "grad_norm": 0.9400670599728106, + "learning_rate": 1.2902618909975962e-06, + "loss": 0.7394517064094543, + "step": 3785 + }, + { + "epoch": 0.8723502304147466, + "grad_norm": 1.199479550356467, + "learning_rate": 1.2898972885143606e-06, + "loss": 0.8667110204696655, + "step": 3786 + }, + { + "epoch": 0.8725806451612903, + "grad_norm": 1.2600204383371998, + "learning_rate": 1.289532643952917e-06, + "loss": 0.826819121837616, + "step": 3787 + }, + { + "epoch": 0.8728110599078341, + "grad_norm": 0.9212030006613351, + "learning_rate": 1.2891679573661937e-06, + "loss": 0.7765695452690125, + "step": 3788 + }, + { + "epoch": 0.8730414746543779, + "grad_norm": 0.8409152224560986, + "learning_rate": 1.2888032288071245e-06, + "loss": 0.7180448770523071, + "step": 3789 + }, + { + "epoch": 0.8732718894009217, + "grad_norm": 0.9734045628890519, + "learning_rate": 1.2884384583286486e-06, + "loss": 0.7619662880897522, + "step": 3790 + }, + { + "epoch": 0.8735023041474654, + "grad_norm": 1.0439158459354512, + "learning_rate": 1.2880736459837123e-06, + "loss": 0.8332309126853943, + "step": 3791 + }, + { + "epoch": 0.8737327188940092, + "grad_norm": 1.019583919621154, + "learning_rate": 1.2877087918252676e-06, + "loss": 0.9314864277839661, + "step": 3792 + }, + { + "epoch": 0.873963133640553, + "grad_norm": 1.0252621742811456, + "learning_rate": 1.287343895906273e-06, + "loss": 0.8505650758743286, + "step": 3793 + }, + { + "epoch": 0.8741935483870967, + "grad_norm": 1.1808911521686665, + "learning_rate": 1.286978958279692e-06, + "loss": 0.8086442351341248, + "step": 3794 + }, + { + "epoch": 0.8744239631336406, + "grad_norm": 0.9931096763073582, + "learning_rate": 1.2866139789984951e-06, + "loss": 0.9369934797286987, + "step": 3795 + }, + { + "epoch": 0.8746543778801843, + "grad_norm": 1.0923174237783717, + "learning_rate": 1.2862489581156585e-06, + "loss": 0.6776204705238342, + "step": 3796 + }, + { + "epoch": 0.8748847926267281, + "grad_norm": 1.1437930163109349, + "learning_rate": 1.2858838956841646e-06, + "loss": 0.8742507100105286, + "step": 3797 + }, + { + "epoch": 0.8751152073732719, + "grad_norm": 0.8088256156858264, + "learning_rate": 1.285518791757002e-06, + "loss": 0.6592123508453369, + "step": 3798 + }, + { + "epoch": 0.8753456221198157, + "grad_norm": 1.064419209573929, + "learning_rate": 1.2851536463871646e-06, + "loss": 0.727974534034729, + "step": 3799 + }, + { + "epoch": 0.8755760368663594, + "grad_norm": 1.1114963626056278, + "learning_rate": 1.284788459627653e-06, + "loss": 0.734921395778656, + "step": 3800 + }, + { + "epoch": 0.8758064516129033, + "grad_norm": 1.1341924912712853, + "learning_rate": 1.2844232315314734e-06, + "loss": 0.8848391771316528, + "step": 3801 + }, + { + "epoch": 0.876036866359447, + "grad_norm": 0.9036415522550547, + "learning_rate": 1.284057962151638e-06, + "loss": 0.7014757394790649, + "step": 3802 + }, + { + "epoch": 0.8762672811059908, + "grad_norm": 1.1253352689452834, + "learning_rate": 1.2836926515411662e-06, + "loss": 0.9037606716156006, + "step": 3803 + }, + { + "epoch": 0.8764976958525346, + "grad_norm": 1.0304179621449525, + "learning_rate": 1.2833272997530808e-06, + "loss": 0.7842103242874146, + "step": 3804 + }, + { + "epoch": 0.8767281105990783, + "grad_norm": 0.8881021582469312, + "learning_rate": 1.282961906840413e-06, + "loss": 0.7233899831771851, + "step": 3805 + }, + { + "epoch": 0.8769585253456221, + "grad_norm": 1.0965629604169354, + "learning_rate": 1.2825964728561995e-06, + "loss": 0.8439977169036865, + "step": 3806 + }, + { + "epoch": 0.8771889400921659, + "grad_norm": 0.9011702646392625, + "learning_rate": 1.2822309978534817e-06, + "loss": 0.6734062433242798, + "step": 3807 + }, + { + "epoch": 0.8774193548387097, + "grad_norm": 0.8611901516189409, + "learning_rate": 1.2818654818853082e-06, + "loss": 0.8132908344268799, + "step": 3808 + }, + { + "epoch": 0.8776497695852534, + "grad_norm": 1.0055540352806662, + "learning_rate": 1.2814999250047334e-06, + "loss": 0.7867386341094971, + "step": 3809 + }, + { + "epoch": 0.8778801843317973, + "grad_norm": 0.9631857828899055, + "learning_rate": 1.2811343272648172e-06, + "loss": 0.7367507219314575, + "step": 3810 + }, + { + "epoch": 0.878110599078341, + "grad_norm": 0.9475758390620135, + "learning_rate": 1.280768688718625e-06, + "loss": 0.8154586553573608, + "step": 3811 + }, + { + "epoch": 0.8783410138248848, + "grad_norm": 1.2471162716233217, + "learning_rate": 1.2804030094192297e-06, + "loss": 0.9962621331214905, + "step": 3812 + }, + { + "epoch": 0.8785714285714286, + "grad_norm": 0.9442759022004834, + "learning_rate": 1.280037289419709e-06, + "loss": 0.8720508813858032, + "step": 3813 + }, + { + "epoch": 0.8788018433179724, + "grad_norm": 0.9970556206238078, + "learning_rate": 1.2796715287731461e-06, + "loss": 0.7211558818817139, + "step": 3814 + }, + { + "epoch": 0.8790322580645161, + "grad_norm": 1.0985560987492957, + "learning_rate": 1.279305727532631e-06, + "loss": 0.8354029059410095, + "step": 3815 + }, + { + "epoch": 0.8792626728110599, + "grad_norm": 1.2983425606164107, + "learning_rate": 1.2789398857512597e-06, + "loss": 0.9136772155761719, + "step": 3816 + }, + { + "epoch": 0.8794930875576037, + "grad_norm": 1.099731879502331, + "learning_rate": 1.2785740034821328e-06, + "loss": 0.7603391408920288, + "step": 3817 + }, + { + "epoch": 0.8797235023041474, + "grad_norm": 1.0043618459346715, + "learning_rate": 1.2782080807783582e-06, + "loss": 0.8938640356063843, + "step": 3818 + }, + { + "epoch": 0.8799539170506913, + "grad_norm": 0.9668042432935031, + "learning_rate": 1.2778421176930492e-06, + "loss": 0.8041675090789795, + "step": 3819 + }, + { + "epoch": 0.880184331797235, + "grad_norm": 0.858269124078789, + "learning_rate": 1.2774761142793246e-06, + "loss": 0.7128704786300659, + "step": 3820 + }, + { + "epoch": 0.8804147465437788, + "grad_norm": 1.01263470571454, + "learning_rate": 1.277110070590309e-06, + "loss": 0.7927603721618652, + "step": 3821 + }, + { + "epoch": 0.8806451612903226, + "grad_norm": 0.8447601312860044, + "learning_rate": 1.2767439866791342e-06, + "loss": 0.8294891119003296, + "step": 3822 + }, + { + "epoch": 0.8808755760368664, + "grad_norm": 1.0620381421224903, + "learning_rate": 1.2763778625989354e-06, + "loss": 0.8058860301971436, + "step": 3823 + }, + { + "epoch": 0.8811059907834101, + "grad_norm": 1.1264235058600618, + "learning_rate": 1.2760116984028559e-06, + "loss": 0.9073271751403809, + "step": 3824 + }, + { + "epoch": 0.881336405529954, + "grad_norm": 0.9871957246708625, + "learning_rate": 1.2756454941440439e-06, + "loss": 0.755131721496582, + "step": 3825 + }, + { + "epoch": 0.8815668202764977, + "grad_norm": 0.9177831986454672, + "learning_rate": 1.2752792498756532e-06, + "loss": 0.7571133375167847, + "step": 3826 + }, + { + "epoch": 0.8817972350230415, + "grad_norm": 1.0303718222421674, + "learning_rate": 1.2749129656508438e-06, + "loss": 0.8021755218505859, + "step": 3827 + }, + { + "epoch": 0.8820276497695853, + "grad_norm": 0.9628359079626025, + "learning_rate": 1.2745466415227812e-06, + "loss": 0.7817519903182983, + "step": 3828 + }, + { + "epoch": 0.882258064516129, + "grad_norm": 0.9923984386602839, + "learning_rate": 1.2741802775446375e-06, + "loss": 0.7144416570663452, + "step": 3829 + }, + { + "epoch": 0.8824884792626728, + "grad_norm": 1.1770010674703593, + "learning_rate": 1.2738138737695894e-06, + "loss": 0.8154206275939941, + "step": 3830 + }, + { + "epoch": 0.8827188940092165, + "grad_norm": 1.0860031408073831, + "learning_rate": 1.2734474302508199e-06, + "loss": 0.7478733062744141, + "step": 3831 + }, + { + "epoch": 0.8829493087557604, + "grad_norm": 0.9998255564669785, + "learning_rate": 1.2730809470415177e-06, + "loss": 0.7792314291000366, + "step": 3832 + }, + { + "epoch": 0.8831797235023041, + "grad_norm": 1.1952265957395494, + "learning_rate": 1.2727144241948776e-06, + "loss": 0.8550708293914795, + "step": 3833 + }, + { + "epoch": 0.883410138248848, + "grad_norm": 1.14972903127367, + "learning_rate": 1.2723478617641e-06, + "loss": 0.9415113925933838, + "step": 3834 + }, + { + "epoch": 0.8836405529953917, + "grad_norm": 1.1062517985394071, + "learning_rate": 1.2719812598023909e-06, + "loss": 0.8359560370445251, + "step": 3835 + }, + { + "epoch": 0.8838709677419355, + "grad_norm": 1.2039080793867758, + "learning_rate": 1.2716146183629618e-06, + "loss": 0.9515634775161743, + "step": 3836 + }, + { + "epoch": 0.8841013824884792, + "grad_norm": 1.1195735084656264, + "learning_rate": 1.2712479374990302e-06, + "loss": 0.9433277249336243, + "step": 3837 + }, + { + "epoch": 0.8843317972350231, + "grad_norm": 1.022594144324791, + "learning_rate": 1.27088121726382e-06, + "loss": 0.809203028678894, + "step": 3838 + }, + { + "epoch": 0.8845622119815668, + "grad_norm": 1.0243153152488458, + "learning_rate": 1.2705144577105596e-06, + "loss": 0.8003803491592407, + "step": 3839 + }, + { + "epoch": 0.8847926267281107, + "grad_norm": 1.0509871208480976, + "learning_rate": 1.2701476588924837e-06, + "loss": 0.8258087038993835, + "step": 3840 + }, + { + "epoch": 0.8850230414746544, + "grad_norm": 0.8336199164135607, + "learning_rate": 1.2697808208628326e-06, + "loss": 0.7337249517440796, + "step": 3841 + }, + { + "epoch": 0.8852534562211981, + "grad_norm": 1.1988508685394492, + "learning_rate": 1.269413943674853e-06, + "loss": 0.6963306665420532, + "step": 3842 + }, + { + "epoch": 0.885483870967742, + "grad_norm": 1.1494175494849699, + "learning_rate": 1.2690470273817955e-06, + "loss": 0.8849321603775024, + "step": 3843 + }, + { + "epoch": 0.8857142857142857, + "grad_norm": 0.9311581320318796, + "learning_rate": 1.2686800720369183e-06, + "loss": 0.804117739200592, + "step": 3844 + }, + { + "epoch": 0.8859447004608295, + "grad_norm": 0.9139368239237865, + "learning_rate": 1.2683130776934848e-06, + "loss": 0.7873985767364502, + "step": 3845 + }, + { + "epoch": 0.8861751152073732, + "grad_norm": 1.0475484077031534, + "learning_rate": 1.2679460444047627e-06, + "loss": 0.7401156425476074, + "step": 3846 + }, + { + "epoch": 0.8864055299539171, + "grad_norm": 1.1867976153376456, + "learning_rate": 1.2675789722240274e-06, + "loss": 0.8216343522071838, + "step": 3847 + }, + { + "epoch": 0.8866359447004608, + "grad_norm": 1.1126927795380483, + "learning_rate": 1.2672118612045583e-06, + "loss": 0.9367883205413818, + "step": 3848 + }, + { + "epoch": 0.8868663594470046, + "grad_norm": 1.333436966015092, + "learning_rate": 1.2668447113996411e-06, + "loss": 0.959208607673645, + "step": 3849 + }, + { + "epoch": 0.8870967741935484, + "grad_norm": 1.019926575329533, + "learning_rate": 1.2664775228625678e-06, + "loss": 0.754011869430542, + "step": 3850 + }, + { + "epoch": 0.8873271889400922, + "grad_norm": 1.0679613059424808, + "learning_rate": 1.2661102956466343e-06, + "loss": 0.7200918793678284, + "step": 3851 + }, + { + "epoch": 0.8875576036866359, + "grad_norm": 1.1470470713937198, + "learning_rate": 1.2657430298051441e-06, + "loss": 0.7819997072219849, + "step": 3852 + }, + { + "epoch": 0.8877880184331797, + "grad_norm": 0.7442261609023784, + "learning_rate": 1.2653757253914045e-06, + "loss": 0.6145305037498474, + "step": 3853 + }, + { + "epoch": 0.8880184331797235, + "grad_norm": 1.0307629205268725, + "learning_rate": 1.2650083824587298e-06, + "loss": 0.8730908036231995, + "step": 3854 + }, + { + "epoch": 0.8882488479262672, + "grad_norm": 0.8412211397931054, + "learning_rate": 1.2646410010604395e-06, + "loss": 0.7595944404602051, + "step": 3855 + }, + { + "epoch": 0.8884792626728111, + "grad_norm": 1.1742884385001073, + "learning_rate": 1.264273581249858e-06, + "loss": 0.8533104658126831, + "step": 3856 + }, + { + "epoch": 0.8887096774193548, + "grad_norm": 0.9075889816265436, + "learning_rate": 1.263906123080316e-06, + "loss": 0.7239818572998047, + "step": 3857 + }, + { + "epoch": 0.8889400921658986, + "grad_norm": 1.1211735744208717, + "learning_rate": 1.2635386266051498e-06, + "loss": 0.7675650119781494, + "step": 3858 + }, + { + "epoch": 0.8891705069124424, + "grad_norm": 1.03231156560467, + "learning_rate": 1.2631710918777007e-06, + "loss": 0.8886630535125732, + "step": 3859 + }, + { + "epoch": 0.8894009216589862, + "grad_norm": 1.078590523668252, + "learning_rate": 1.2628035189513159e-06, + "loss": 0.798930287361145, + "step": 3860 + }, + { + "epoch": 0.8896313364055299, + "grad_norm": 0.9635414297502106, + "learning_rate": 1.2624359078793484e-06, + "loss": 0.7189278602600098, + "step": 3861 + }, + { + "epoch": 0.8898617511520738, + "grad_norm": 1.0909939790359444, + "learning_rate": 1.2620682587151565e-06, + "loss": 0.8187342882156372, + "step": 3862 + }, + { + "epoch": 0.8900921658986175, + "grad_norm": 1.1174191800105742, + "learning_rate": 1.2617005715121034e-06, + "loss": 0.880839467048645, + "step": 3863 + }, + { + "epoch": 0.8903225806451613, + "grad_norm": 0.9160208180175933, + "learning_rate": 1.2613328463235586e-06, + "loss": 0.84575355052948, + "step": 3864 + }, + { + "epoch": 0.8905529953917051, + "grad_norm": 0.8361425077510937, + "learning_rate": 1.2609650832028978e-06, + "loss": 0.6823658347129822, + "step": 3865 + }, + { + "epoch": 0.8907834101382488, + "grad_norm": 1.0695425966983703, + "learning_rate": 1.2605972822035e-06, + "loss": 0.8295711278915405, + "step": 3866 + }, + { + "epoch": 0.8910138248847926, + "grad_norm": 1.1932993089448705, + "learning_rate": 1.2602294433787518e-06, + "loss": 0.8684213161468506, + "step": 3867 + }, + { + "epoch": 0.8912442396313364, + "grad_norm": 0.8493371065418897, + "learning_rate": 1.2598615667820447e-06, + "loss": 0.6560889482498169, + "step": 3868 + }, + { + "epoch": 0.8914746543778802, + "grad_norm": 1.0552959260029386, + "learning_rate": 1.259493652466775e-06, + "loss": 0.740487277507782, + "step": 3869 + }, + { + "epoch": 0.8917050691244239, + "grad_norm": 0.9680726179927289, + "learning_rate": 1.2591257004863453e-06, + "loss": 0.8167253732681274, + "step": 3870 + }, + { + "epoch": 0.8919354838709678, + "grad_norm": 0.8741208745575088, + "learning_rate": 1.2587577108941634e-06, + "loss": 0.8521690368652344, + "step": 3871 + }, + { + "epoch": 0.8921658986175115, + "grad_norm": 1.263426910808872, + "learning_rate": 1.2583896837436418e-06, + "loss": 0.8830848932266235, + "step": 3872 + }, + { + "epoch": 0.8923963133640553, + "grad_norm": 0.9234650272103238, + "learning_rate": 1.2580216190881999e-06, + "loss": 0.7080649137496948, + "step": 3873 + }, + { + "epoch": 0.8926267281105991, + "grad_norm": 0.9098984938292525, + "learning_rate": 1.2576535169812614e-06, + "loss": 0.8013911247253418, + "step": 3874 + }, + { + "epoch": 0.8928571428571429, + "grad_norm": 0.9781454154869316, + "learning_rate": 1.2572853774762564e-06, + "loss": 0.8307033777236938, + "step": 3875 + }, + { + "epoch": 0.8930875576036866, + "grad_norm": 1.003074779947638, + "learning_rate": 1.256917200626619e-06, + "loss": 0.7514123916625977, + "step": 3876 + }, + { + "epoch": 0.8933179723502304, + "grad_norm": 1.3024082731165083, + "learning_rate": 1.2565489864857903e-06, + "loss": 0.7608132362365723, + "step": 3877 + }, + { + "epoch": 0.8935483870967742, + "grad_norm": 0.9570998315665514, + "learning_rate": 1.256180735107216e-06, + "loss": 0.8011139631271362, + "step": 3878 + }, + { + "epoch": 0.8937788018433179, + "grad_norm": 1.134653936381734, + "learning_rate": 1.2558124465443467e-06, + "loss": 0.9760414958000183, + "step": 3879 + }, + { + "epoch": 0.8940092165898618, + "grad_norm": 1.0547420638261442, + "learning_rate": 1.2554441208506399e-06, + "loss": 0.7292976379394531, + "step": 3880 + }, + { + "epoch": 0.8942396313364055, + "grad_norm": 1.0683215421992245, + "learning_rate": 1.255075758079557e-06, + "loss": 0.819061279296875, + "step": 3881 + }, + { + "epoch": 0.8944700460829493, + "grad_norm": 1.006803716245281, + "learning_rate": 1.2547073582845652e-06, + "loss": 0.8407306671142578, + "step": 3882 + }, + { + "epoch": 0.8947004608294931, + "grad_norm": 0.8233707920449198, + "learning_rate": 1.2543389215191379e-06, + "loss": 0.7452164888381958, + "step": 3883 + }, + { + "epoch": 0.8949308755760369, + "grad_norm": 1.049978361878961, + "learning_rate": 1.2539704478367525e-06, + "loss": 0.9001756310462952, + "step": 3884 + }, + { + "epoch": 0.8951612903225806, + "grad_norm": 0.8057583780945189, + "learning_rate": 1.253601937290893e-06, + "loss": 0.7006322741508484, + "step": 3885 + }, + { + "epoch": 0.8953917050691245, + "grad_norm": 0.9116907763776896, + "learning_rate": 1.253233389935048e-06, + "loss": 0.8464070558547974, + "step": 3886 + }, + { + "epoch": 0.8956221198156682, + "grad_norm": 0.9768693849406578, + "learning_rate": 1.2528648058227117e-06, + "loss": 0.8153925538063049, + "step": 3887 + }, + { + "epoch": 0.895852534562212, + "grad_norm": 0.9311867207234187, + "learning_rate": 1.2524961850073835e-06, + "loss": 0.7093103528022766, + "step": 3888 + }, + { + "epoch": 0.8960829493087558, + "grad_norm": 0.8533841155936702, + "learning_rate": 1.2521275275425685e-06, + "loss": 0.676047682762146, + "step": 3889 + }, + { + "epoch": 0.8963133640552995, + "grad_norm": 0.87097687176947, + "learning_rate": 1.2517588334817765e-06, + "loss": 0.6980170011520386, + "step": 3890 + }, + { + "epoch": 0.8965437788018433, + "grad_norm": 0.9291831127411667, + "learning_rate": 1.2513901028785232e-06, + "loss": 0.7343952655792236, + "step": 3891 + }, + { + "epoch": 0.896774193548387, + "grad_norm": 1.0285752510532034, + "learning_rate": 1.251021335786329e-06, + "loss": 0.6836012005805969, + "step": 3892 + }, + { + "epoch": 0.8970046082949309, + "grad_norm": 0.9328635468922583, + "learning_rate": 1.2506525322587204e-06, + "loss": 0.7405731678009033, + "step": 3893 + }, + { + "epoch": 0.8972350230414746, + "grad_norm": 0.9162563014074782, + "learning_rate": 1.2502836923492288e-06, + "loss": 0.7626791596412659, + "step": 3894 + }, + { + "epoch": 0.8974654377880185, + "grad_norm": 0.8530894630449782, + "learning_rate": 1.2499148161113904e-06, + "loss": 0.951126754283905, + "step": 3895 + }, + { + "epoch": 0.8976958525345622, + "grad_norm": 1.0356266230162976, + "learning_rate": 1.249545903598747e-06, + "loss": 0.8248430490493774, + "step": 3896 + }, + { + "epoch": 0.897926267281106, + "grad_norm": 1.0696916510331513, + "learning_rate": 1.2491769548648466e-06, + "loss": 0.9306991100311279, + "step": 3897 + }, + { + "epoch": 0.8981566820276498, + "grad_norm": 1.2546361240375576, + "learning_rate": 1.2488079699632406e-06, + "loss": 0.8529196977615356, + "step": 3898 + }, + { + "epoch": 0.8983870967741936, + "grad_norm": 1.1432122269665714, + "learning_rate": 1.2484389489474873e-06, + "loss": 0.8614317178726196, + "step": 3899 + }, + { + "epoch": 0.8986175115207373, + "grad_norm": 0.8777341649032664, + "learning_rate": 1.2480698918711494e-06, + "loss": 0.723548173904419, + "step": 3900 + }, + { + "epoch": 0.8988479262672812, + "grad_norm": 0.8559428728446495, + "learning_rate": 1.2477007987877953e-06, + "loss": 0.9424235820770264, + "step": 3901 + }, + { + "epoch": 0.8990783410138249, + "grad_norm": 1.1966583189697881, + "learning_rate": 1.2473316697509982e-06, + "loss": 0.8307658433914185, + "step": 3902 + }, + { + "epoch": 0.8993087557603686, + "grad_norm": 0.9430977683906336, + "learning_rate": 1.2469625048143364e-06, + "loss": 0.7164772748947144, + "step": 3903 + }, + { + "epoch": 0.8995391705069125, + "grad_norm": 1.0578567003352413, + "learning_rate": 1.2465933040313941e-06, + "loss": 0.824491024017334, + "step": 3904 + }, + { + "epoch": 0.8997695852534562, + "grad_norm": 0.9955753469888821, + "learning_rate": 1.24622406745576e-06, + "loss": 0.7468826770782471, + "step": 3905 + }, + { + "epoch": 0.9, + "grad_norm": 1.0419833775918754, + "learning_rate": 1.2458547951410285e-06, + "loss": 0.8049126863479614, + "step": 3906 + }, + { + "epoch": 0.9002304147465438, + "grad_norm": 1.0794114769462158, + "learning_rate": 1.245485487140799e-06, + "loss": 0.658754825592041, + "step": 3907 + }, + { + "epoch": 0.9004608294930876, + "grad_norm": 0.9848364091798514, + "learning_rate": 1.245116143508676e-06, + "loss": 0.6772202849388123, + "step": 3908 + }, + { + "epoch": 0.9006912442396313, + "grad_norm": 0.9291487276824166, + "learning_rate": 1.2447467642982697e-06, + "loss": 0.8160394430160522, + "step": 3909 + }, + { + "epoch": 0.9009216589861752, + "grad_norm": 1.3459000002689838, + "learning_rate": 1.244377349563194e-06, + "loss": 0.8289823532104492, + "step": 3910 + }, + { + "epoch": 0.9011520737327189, + "grad_norm": 1.0130598759262572, + "learning_rate": 1.24400789935707e-06, + "loss": 0.7574084997177124, + "step": 3911 + }, + { + "epoch": 0.9013824884792627, + "grad_norm": 0.9665886404424858, + "learning_rate": 1.2436384137335218e-06, + "loss": 0.8116365671157837, + "step": 3912 + }, + { + "epoch": 0.9016129032258065, + "grad_norm": 1.0860329839978788, + "learning_rate": 1.2432688927461808e-06, + "loss": 0.814805805683136, + "step": 3913 + }, + { + "epoch": 0.9018433179723502, + "grad_norm": 0.9783977746996081, + "learning_rate": 1.2428993364486822e-06, + "loss": 0.7947453260421753, + "step": 3914 + }, + { + "epoch": 0.902073732718894, + "grad_norm": 1.1432103627131167, + "learning_rate": 1.2425297448946661e-06, + "loss": 0.939562976360321, + "step": 3915 + }, + { + "epoch": 0.9023041474654377, + "grad_norm": 0.9342812306918719, + "learning_rate": 1.2421601181377787e-06, + "loss": 0.9460225105285645, + "step": 3916 + }, + { + "epoch": 0.9025345622119816, + "grad_norm": 1.1417876456910938, + "learning_rate": 1.241790456231671e-06, + "loss": 0.9183799028396606, + "step": 3917 + }, + { + "epoch": 0.9027649769585253, + "grad_norm": 1.1195959115117728, + "learning_rate": 1.2414207592299984e-06, + "loss": 0.6793398857116699, + "step": 3918 + }, + { + "epoch": 0.9029953917050692, + "grad_norm": 0.9758451113738527, + "learning_rate": 1.2410510271864222e-06, + "loss": 0.7796125411987305, + "step": 3919 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 1.127885346985943, + "learning_rate": 1.2406812601546085e-06, + "loss": 0.8164567351341248, + "step": 3920 + }, + { + "epoch": 0.9034562211981567, + "grad_norm": 1.327729370966401, + "learning_rate": 1.2403114581882288e-06, + "loss": 0.7267879247665405, + "step": 3921 + }, + { + "epoch": 0.9036866359447004, + "grad_norm": 0.9644037075475709, + "learning_rate": 1.2399416213409586e-06, + "loss": 0.7277103066444397, + "step": 3922 + }, + { + "epoch": 0.9039170506912443, + "grad_norm": 1.1653209742127064, + "learning_rate": 1.23957174966648e-06, + "loss": 0.8507979512214661, + "step": 3923 + }, + { + "epoch": 0.904147465437788, + "grad_norm": 1.2024221808183382, + "learning_rate": 1.2392018432184792e-06, + "loss": 0.9431333541870117, + "step": 3924 + }, + { + "epoch": 0.9043778801843319, + "grad_norm": 0.9610849982223711, + "learning_rate": 1.2388319020506473e-06, + "loss": 0.669041633605957, + "step": 3925 + }, + { + "epoch": 0.9046082949308756, + "grad_norm": 1.0428863031922808, + "learning_rate": 1.2384619262166808e-06, + "loss": 0.7639964818954468, + "step": 3926 + }, + { + "epoch": 0.9048387096774193, + "grad_norm": 0.9055700075744166, + "learning_rate": 1.2380919157702819e-06, + "loss": 0.7390594482421875, + "step": 3927 + }, + { + "epoch": 0.9050691244239631, + "grad_norm": 1.0183193149474203, + "learning_rate": 1.2377218707651562e-06, + "loss": 0.8320105075836182, + "step": 3928 + }, + { + "epoch": 0.9052995391705069, + "grad_norm": 0.9604555269461571, + "learning_rate": 1.237351791255016e-06, + "loss": 0.6820249557495117, + "step": 3929 + }, + { + "epoch": 0.9055299539170507, + "grad_norm": 1.0758012435150028, + "learning_rate": 1.2369816772935773e-06, + "loss": 0.8548537492752075, + "step": 3930 + }, + { + "epoch": 0.9057603686635944, + "grad_norm": 1.0169473440313737, + "learning_rate": 1.236611528934562e-06, + "loss": 0.7226318120956421, + "step": 3931 + }, + { + "epoch": 0.9059907834101383, + "grad_norm": 1.2196278844047388, + "learning_rate": 1.2362413462316963e-06, + "loss": 0.879987359046936, + "step": 3932 + }, + { + "epoch": 0.906221198156682, + "grad_norm": 0.8628507992206548, + "learning_rate": 1.2358711292387122e-06, + "loss": 0.7919881343841553, + "step": 3933 + }, + { + "epoch": 0.9064516129032258, + "grad_norm": 1.0779297510278616, + "learning_rate": 1.2355008780093456e-06, + "loss": 0.8232694268226624, + "step": 3934 + }, + { + "epoch": 0.9066820276497696, + "grad_norm": 1.249487252121194, + "learning_rate": 1.2351305925973385e-06, + "loss": 0.80347740650177, + "step": 3935 + }, + { + "epoch": 0.9069124423963134, + "grad_norm": 1.2510529509996382, + "learning_rate": 1.234760273056437e-06, + "loss": 0.7818408012390137, + "step": 3936 + }, + { + "epoch": 0.9071428571428571, + "grad_norm": 1.1620371895322128, + "learning_rate": 1.2343899194403931e-06, + "loss": 0.8391210436820984, + "step": 3937 + }, + { + "epoch": 0.9073732718894009, + "grad_norm": 1.1380529418025975, + "learning_rate": 1.2340195318029622e-06, + "loss": 0.7937500476837158, + "step": 3938 + }, + { + "epoch": 0.9076036866359447, + "grad_norm": 0.973433345758839, + "learning_rate": 1.2336491101979065e-06, + "loss": 0.7158668041229248, + "step": 3939 + }, + { + "epoch": 0.9078341013824884, + "grad_norm": 0.9549803277521113, + "learning_rate": 1.2332786546789915e-06, + "loss": 0.6956034898757935, + "step": 3940 + }, + { + "epoch": 0.9080645161290323, + "grad_norm": 1.035574155623001, + "learning_rate": 1.2329081652999887e-06, + "loss": 0.7252948880195618, + "step": 3941 + }, + { + "epoch": 0.908294930875576, + "grad_norm": 1.2086784459715743, + "learning_rate": 1.2325376421146739e-06, + "loss": 0.7131162881851196, + "step": 3942 + }, + { + "epoch": 0.9085253456221198, + "grad_norm": 0.8781165558243194, + "learning_rate": 1.2321670851768285e-06, + "loss": 0.7383663654327393, + "step": 3943 + }, + { + "epoch": 0.9087557603686636, + "grad_norm": 0.9355062944038273, + "learning_rate": 1.2317964945402374e-06, + "loss": 0.8296892642974854, + "step": 3944 + }, + { + "epoch": 0.9089861751152074, + "grad_norm": 1.1131069336270092, + "learning_rate": 1.2314258702586923e-06, + "loss": 0.8314273357391357, + "step": 3945 + }, + { + "epoch": 0.9092165898617511, + "grad_norm": 0.9647703306046335, + "learning_rate": 1.2310552123859888e-06, + "loss": 0.7264384031295776, + "step": 3946 + }, + { + "epoch": 0.909447004608295, + "grad_norm": 0.7580621867286127, + "learning_rate": 1.230684520975927e-06, + "loss": 0.6757937073707581, + "step": 3947 + }, + { + "epoch": 0.9096774193548387, + "grad_norm": 0.8884108342506404, + "learning_rate": 1.230313796082312e-06, + "loss": 0.8318504691123962, + "step": 3948 + }, + { + "epoch": 0.9099078341013825, + "grad_norm": 0.7767337233620181, + "learning_rate": 1.2299430377589547e-06, + "loss": 0.7043207883834839, + "step": 3949 + }, + { + "epoch": 0.9101382488479263, + "grad_norm": 1.0668368590995472, + "learning_rate": 1.2295722460596696e-06, + "loss": 0.8499487638473511, + "step": 3950 + }, + { + "epoch": 0.91036866359447, + "grad_norm": 1.1145902688644103, + "learning_rate": 1.2292014210382772e-06, + "loss": 0.8219600319862366, + "step": 3951 + }, + { + "epoch": 0.9105990783410138, + "grad_norm": 1.2329010539695853, + "learning_rate": 1.2288305627486017e-06, + "loss": 0.8136317133903503, + "step": 3952 + }, + { + "epoch": 0.9108294930875576, + "grad_norm": 1.1220482069317936, + "learning_rate": 1.2284596712444735e-06, + "loss": 0.7858958840370178, + "step": 3953 + }, + { + "epoch": 0.9110599078341014, + "grad_norm": 1.182019995516566, + "learning_rate": 1.2280887465797259e-06, + "loss": 0.8108563423156738, + "step": 3954 + }, + { + "epoch": 0.9112903225806451, + "grad_norm": 1.17197106565382, + "learning_rate": 1.2277177888081987e-06, + "loss": 0.8061145544052124, + "step": 3955 + }, + { + "epoch": 0.911520737327189, + "grad_norm": 1.1140830632516712, + "learning_rate": 1.2273467979837361e-06, + "loss": 0.7769665718078613, + "step": 3956 + }, + { + "epoch": 0.9117511520737327, + "grad_norm": 1.5134088570090107, + "learning_rate": 1.2269757741601867e-06, + "loss": 1.0548570156097412, + "step": 3957 + }, + { + "epoch": 0.9119815668202765, + "grad_norm": 0.9732476833800602, + "learning_rate": 1.226604717391404e-06, + "loss": 0.7095952033996582, + "step": 3958 + }, + { + "epoch": 0.9122119815668203, + "grad_norm": 0.8435340807921997, + "learning_rate": 1.226233627731247e-06, + "loss": 0.7330363392829895, + "step": 3959 + }, + { + "epoch": 0.9124423963133641, + "grad_norm": 0.9706068481575616, + "learning_rate": 1.225862505233578e-06, + "loss": 0.7328442931175232, + "step": 3960 + }, + { + "epoch": 0.9126728110599078, + "grad_norm": 1.059740258312267, + "learning_rate": 1.2254913499522656e-06, + "loss": 0.7572993040084839, + "step": 3961 + }, + { + "epoch": 0.9129032258064517, + "grad_norm": 1.0542941153492202, + "learning_rate": 1.2251201619411823e-06, + "loss": 0.7706469297409058, + "step": 3962 + }, + { + "epoch": 0.9131336405529954, + "grad_norm": 1.1436826868313579, + "learning_rate": 1.2247489412542053e-06, + "loss": 0.7830193042755127, + "step": 3963 + }, + { + "epoch": 0.9133640552995391, + "grad_norm": 1.0827904871592715, + "learning_rate": 1.224377687945217e-06, + "loss": 0.8415955901145935, + "step": 3964 + }, + { + "epoch": 0.913594470046083, + "grad_norm": 1.1895924425921953, + "learning_rate": 1.2240064020681044e-06, + "loss": 0.7383062839508057, + "step": 3965 + }, + { + "epoch": 0.9138248847926267, + "grad_norm": 1.1432920832791855, + "learning_rate": 1.2236350836767593e-06, + "loss": 0.7372882962226868, + "step": 3966 + }, + { + "epoch": 0.9140552995391705, + "grad_norm": 1.0941013432151616, + "learning_rate": 1.2232637328250776e-06, + "loss": 0.7914254665374756, + "step": 3967 + }, + { + "epoch": 0.9142857142857143, + "grad_norm": 0.9886213418734634, + "learning_rate": 1.2228923495669605e-06, + "loss": 0.8510675430297852, + "step": 3968 + }, + { + "epoch": 0.9145161290322581, + "grad_norm": 1.045281864627849, + "learning_rate": 1.2225209339563143e-06, + "loss": 0.7391757369041443, + "step": 3969 + }, + { + "epoch": 0.9147465437788018, + "grad_norm": 0.8746728562097662, + "learning_rate": 1.2221494860470491e-06, + "loss": 0.69194495677948, + "step": 3970 + }, + { + "epoch": 0.9149769585253457, + "grad_norm": 1.0907421288179358, + "learning_rate": 1.22177800589308e-06, + "loss": 0.7593865394592285, + "step": 3971 + }, + { + "epoch": 0.9152073732718894, + "grad_norm": 1.037234739347401, + "learning_rate": 1.2214064935483268e-06, + "loss": 0.7831966876983643, + "step": 3972 + }, + { + "epoch": 0.9154377880184332, + "grad_norm": 1.1150279108134162, + "learning_rate": 1.2210349490667145e-06, + "loss": 0.8858723640441895, + "step": 3973 + }, + { + "epoch": 0.915668202764977, + "grad_norm": 1.1381126617682915, + "learning_rate": 1.2206633725021715e-06, + "loss": 0.8645567893981934, + "step": 3974 + }, + { + "epoch": 0.9158986175115207, + "grad_norm": 0.9188905804582469, + "learning_rate": 1.2202917639086322e-06, + "loss": 0.7619047164916992, + "step": 3975 + }, + { + "epoch": 0.9161290322580645, + "grad_norm": 1.0126992141273314, + "learning_rate": 1.2199201233400355e-06, + "loss": 0.8652681112289429, + "step": 3976 + }, + { + "epoch": 0.9163594470046083, + "grad_norm": 0.9961259698766619, + "learning_rate": 1.2195484508503234e-06, + "loss": 0.6860940456390381, + "step": 3977 + }, + { + "epoch": 0.9165898617511521, + "grad_norm": 0.8860870600955693, + "learning_rate": 1.2191767464934444e-06, + "loss": 0.7372464537620544, + "step": 3978 + }, + { + "epoch": 0.9168202764976958, + "grad_norm": 1.3495413684840594, + "learning_rate": 1.218805010323351e-06, + "loss": 0.8719853162765503, + "step": 3979 + }, + { + "epoch": 0.9170506912442397, + "grad_norm": 0.9968927276513252, + "learning_rate": 1.2184332423940003e-06, + "loss": 0.8203779458999634, + "step": 3980 + }, + { + "epoch": 0.9172811059907834, + "grad_norm": 1.197176686739939, + "learning_rate": 1.218061442759353e-06, + "loss": 0.8648861646652222, + "step": 3981 + }, + { + "epoch": 0.9175115207373272, + "grad_norm": 1.0630748229990676, + "learning_rate": 1.2176896114733766e-06, + "loss": 0.7651659250259399, + "step": 3982 + }, + { + "epoch": 0.917741935483871, + "grad_norm": 1.20459191964974, + "learning_rate": 1.2173177485900408e-06, + "loss": 0.8495512008666992, + "step": 3983 + }, + { + "epoch": 0.9179723502304148, + "grad_norm": 1.3559959351470627, + "learning_rate": 1.2169458541633216e-06, + "loss": 0.7997228503227234, + "step": 3984 + }, + { + "epoch": 0.9182027649769585, + "grad_norm": 0.9870494686008755, + "learning_rate": 1.2165739282471987e-06, + "loss": 0.8353173136711121, + "step": 3985 + }, + { + "epoch": 0.9184331797235024, + "grad_norm": 1.2277323881843956, + "learning_rate": 1.216201970895657e-06, + "loss": 0.9039655327796936, + "step": 3986 + }, + { + "epoch": 0.9186635944700461, + "grad_norm": 0.9209288499077958, + "learning_rate": 1.2158299821626854e-06, + "loss": 0.8158592581748962, + "step": 3987 + }, + { + "epoch": 0.9188940092165898, + "grad_norm": 1.2007654555954255, + "learning_rate": 1.2154579621022776e-06, + "loss": 0.8443971872329712, + "step": 3988 + }, + { + "epoch": 0.9191244239631337, + "grad_norm": 0.916322848733307, + "learning_rate": 1.2150859107684318e-06, + "loss": 0.7934167385101318, + "step": 3989 + }, + { + "epoch": 0.9193548387096774, + "grad_norm": 1.1576910593833736, + "learning_rate": 1.2147138282151512e-06, + "loss": 0.750052809715271, + "step": 3990 + }, + { + "epoch": 0.9195852534562212, + "grad_norm": 1.0948767691124337, + "learning_rate": 1.2143417144964423e-06, + "loss": 0.813056468963623, + "step": 3991 + }, + { + "epoch": 0.919815668202765, + "grad_norm": 1.1487977592190233, + "learning_rate": 1.2139695696663174e-06, + "loss": 0.9478945732116699, + "step": 3992 + }, + { + "epoch": 0.9200460829493088, + "grad_norm": 0.9711264468634061, + "learning_rate": 1.2135973937787927e-06, + "loss": 0.687637448310852, + "step": 3993 + }, + { + "epoch": 0.9202764976958525, + "grad_norm": 1.071392128639805, + "learning_rate": 1.213225186887889e-06, + "loss": 0.8073818683624268, + "step": 3994 + }, + { + "epoch": 0.9205069124423964, + "grad_norm": 1.1074324196567935, + "learning_rate": 1.2128529490476318e-06, + "loss": 0.6684166789054871, + "step": 3995 + }, + { + "epoch": 0.9207373271889401, + "grad_norm": 1.1910033963986806, + "learning_rate": 1.2124806803120506e-06, + "loss": 0.7897466421127319, + "step": 3996 + }, + { + "epoch": 0.9209677419354839, + "grad_norm": 1.0375797321803883, + "learning_rate": 1.21210838073518e-06, + "loss": 0.832312822341919, + "step": 3997 + }, + { + "epoch": 0.9211981566820276, + "grad_norm": 1.036059468253791, + "learning_rate": 1.2117360503710588e-06, + "loss": 0.9536067247390747, + "step": 3998 + }, + { + "epoch": 0.9214285714285714, + "grad_norm": 1.123926651312402, + "learning_rate": 1.2113636892737302e-06, + "loss": 0.8959759473800659, + "step": 3999 + }, + { + "epoch": 0.9216589861751152, + "grad_norm": 0.9405530325495998, + "learning_rate": 1.2109912974972422e-06, + "loss": 0.6789166927337646, + "step": 4000 + }, + { + "epoch": 0.9218894009216589, + "grad_norm": 0.9327551909921717, + "learning_rate": 1.2106188750956464e-06, + "loss": 0.7336491346359253, + "step": 4001 + }, + { + "epoch": 0.9221198156682028, + "grad_norm": 0.8000293761487048, + "learning_rate": 1.2102464221229997e-06, + "loss": 0.7838259935379028, + "step": 4002 + }, + { + "epoch": 0.9223502304147465, + "grad_norm": 1.2907858896278495, + "learning_rate": 1.2098739386333631e-06, + "loss": 0.9147623777389526, + "step": 4003 + }, + { + "epoch": 0.9225806451612903, + "grad_norm": 1.3691019040487797, + "learning_rate": 1.2095014246808022e-06, + "loss": 0.7296491265296936, + "step": 4004 + }, + { + "epoch": 0.9228110599078341, + "grad_norm": 1.1028104717001235, + "learning_rate": 1.2091288803193868e-06, + "loss": 0.7898432016372681, + "step": 4005 + }, + { + "epoch": 0.9230414746543779, + "grad_norm": 1.1562470474736035, + "learning_rate": 1.2087563056031914e-06, + "loss": 0.8190659284591675, + "step": 4006 + }, + { + "epoch": 0.9232718894009216, + "grad_norm": 1.4146112766933352, + "learning_rate": 1.2083837005862945e-06, + "loss": 0.8383443355560303, + "step": 4007 + }, + { + "epoch": 0.9235023041474655, + "grad_norm": 0.7251077105825574, + "learning_rate": 1.2080110653227796e-06, + "loss": 0.5987120866775513, + "step": 4008 + }, + { + "epoch": 0.9237327188940092, + "grad_norm": 1.056645940510342, + "learning_rate": 1.2076383998667334e-06, + "loss": 0.8811358213424683, + "step": 4009 + }, + { + "epoch": 0.923963133640553, + "grad_norm": 0.8867108269493398, + "learning_rate": 1.2072657042722486e-06, + "loss": 0.7958807349205017, + "step": 4010 + }, + { + "epoch": 0.9241935483870968, + "grad_norm": 1.1776412427000924, + "learning_rate": 1.2068929785934215e-06, + "loss": 0.7192457914352417, + "step": 4011 + }, + { + "epoch": 0.9244239631336405, + "grad_norm": 1.0545419352254402, + "learning_rate": 1.2065202228843523e-06, + "loss": 0.6854838132858276, + "step": 4012 + }, + { + "epoch": 0.9246543778801843, + "grad_norm": 1.0759672957343283, + "learning_rate": 1.2061474371991457e-06, + "loss": 0.7334680557250977, + "step": 4013 + }, + { + "epoch": 0.9248847926267281, + "grad_norm": 0.9536076812745731, + "learning_rate": 1.205774621591912e-06, + "loss": 0.7614402770996094, + "step": 4014 + }, + { + "epoch": 0.9251152073732719, + "grad_norm": 1.3871826739545572, + "learning_rate": 1.2054017761167644e-06, + "loss": 0.7502505779266357, + "step": 4015 + }, + { + "epoch": 0.9253456221198156, + "grad_norm": 1.044146949688276, + "learning_rate": 1.2050289008278205e-06, + "loss": 0.7922523021697998, + "step": 4016 + }, + { + "epoch": 0.9255760368663595, + "grad_norm": 1.2025329853302307, + "learning_rate": 1.2046559957792032e-06, + "loss": 0.7534265518188477, + "step": 4017 + }, + { + "epoch": 0.9258064516129032, + "grad_norm": 0.9478426591249515, + "learning_rate": 1.2042830610250395e-06, + "loss": 0.6997093558311462, + "step": 4018 + }, + { + "epoch": 0.926036866359447, + "grad_norm": 1.050086676036124, + "learning_rate": 1.2039100966194594e-06, + "loss": 0.7009599208831787, + "step": 4019 + }, + { + "epoch": 0.9262672811059908, + "grad_norm": 1.108108705874163, + "learning_rate": 1.203537102616599e-06, + "loss": 0.795873761177063, + "step": 4020 + }, + { + "epoch": 0.9264976958525346, + "grad_norm": 1.1836803264586404, + "learning_rate": 1.2031640790705972e-06, + "loss": 0.7860225439071655, + "step": 4021 + }, + { + "epoch": 0.9267281105990783, + "grad_norm": 0.9036535621632875, + "learning_rate": 1.2027910260355989e-06, + "loss": 0.7657063007354736, + "step": 4022 + }, + { + "epoch": 0.9269585253456222, + "grad_norm": 1.0407468417409953, + "learning_rate": 1.2024179435657512e-06, + "loss": 0.782909631729126, + "step": 4023 + }, + { + "epoch": 0.9271889400921659, + "grad_norm": 0.8628791908243046, + "learning_rate": 1.202044831715207e-06, + "loss": 0.713431715965271, + "step": 4024 + }, + { + "epoch": 0.9274193548387096, + "grad_norm": 0.9826922843740741, + "learning_rate": 1.201671690538123e-06, + "loss": 0.9126790165901184, + "step": 4025 + }, + { + "epoch": 0.9276497695852535, + "grad_norm": 0.9552497173996132, + "learning_rate": 1.20129852008866e-06, + "loss": 0.8640999794006348, + "step": 4026 + }, + { + "epoch": 0.9278801843317972, + "grad_norm": 1.0290580406520045, + "learning_rate": 1.2009253204209832e-06, + "loss": 0.723473072052002, + "step": 4027 + }, + { + "epoch": 0.928110599078341, + "grad_norm": 0.9995947167655078, + "learning_rate": 1.2005520915892626e-06, + "loss": 0.6764041185379028, + "step": 4028 + }, + { + "epoch": 0.9283410138248848, + "grad_norm": 1.1315388960653066, + "learning_rate": 1.200178833647671e-06, + "loss": 0.8525882959365845, + "step": 4029 + }, + { + "epoch": 0.9285714285714286, + "grad_norm": 1.1279047416289067, + "learning_rate": 1.1998055466503872e-06, + "loss": 0.714957058429718, + "step": 4030 + }, + { + "epoch": 0.9288018433179723, + "grad_norm": 0.9055007840106456, + "learning_rate": 1.1994322306515925e-06, + "loss": 0.8015910387039185, + "step": 4031 + }, + { + "epoch": 0.9290322580645162, + "grad_norm": 1.1314666315910753, + "learning_rate": 1.1990588857054733e-06, + "loss": 1.0306739807128906, + "step": 4032 + }, + { + "epoch": 0.9292626728110599, + "grad_norm": 1.0078215910327748, + "learning_rate": 1.1986855118662205e-06, + "loss": 0.8307464122772217, + "step": 4033 + }, + { + "epoch": 0.9294930875576037, + "grad_norm": 0.9974753472669955, + "learning_rate": 1.1983121091880286e-06, + "loss": 0.8720347881317139, + "step": 4034 + }, + { + "epoch": 0.9297235023041475, + "grad_norm": 1.0249437684832297, + "learning_rate": 1.1979386777250968e-06, + "loss": 0.7716174721717834, + "step": 4035 + }, + { + "epoch": 0.9299539170506912, + "grad_norm": 0.9533075514678258, + "learning_rate": 1.1975652175316279e-06, + "loss": 0.8968960046768188, + "step": 4036 + }, + { + "epoch": 0.930184331797235, + "grad_norm": 1.0235472692311864, + "learning_rate": 1.197191728661829e-06, + "loss": 0.7472472786903381, + "step": 4037 + }, + { + "epoch": 0.9304147465437788, + "grad_norm": 1.209577738801564, + "learning_rate": 1.196818211169912e-06, + "loss": 0.7969691753387451, + "step": 4038 + }, + { + "epoch": 0.9306451612903226, + "grad_norm": 0.8592343628435503, + "learning_rate": 1.196444665110092e-06, + "loss": 0.6187525987625122, + "step": 4039 + }, + { + "epoch": 0.9308755760368663, + "grad_norm": 1.0503056259771648, + "learning_rate": 1.1960710905365893e-06, + "loss": 0.8715502619743347, + "step": 4040 + }, + { + "epoch": 0.9311059907834102, + "grad_norm": 0.9918268480034713, + "learning_rate": 1.1956974875036273e-06, + "loss": 0.7174774408340454, + "step": 4041 + }, + { + "epoch": 0.9313364055299539, + "grad_norm": 0.8743867275561935, + "learning_rate": 1.1953238560654337e-06, + "loss": 0.6546192169189453, + "step": 4042 + }, + { + "epoch": 0.9315668202764977, + "grad_norm": 1.1024794232135675, + "learning_rate": 1.194950196276241e-06, + "loss": 0.8688700199127197, + "step": 4043 + }, + { + "epoch": 0.9317972350230415, + "grad_norm": 1.0449187982587707, + "learning_rate": 1.1945765081902856e-06, + "loss": 0.7679718732833862, + "step": 4044 + }, + { + "epoch": 0.9320276497695853, + "grad_norm": 0.9426197124643214, + "learning_rate": 1.1942027918618073e-06, + "loss": 0.6335175037384033, + "step": 4045 + }, + { + "epoch": 0.932258064516129, + "grad_norm": 1.0452657366695544, + "learning_rate": 1.1938290473450513e-06, + "loss": 0.785153865814209, + "step": 4046 + }, + { + "epoch": 0.9324884792626729, + "grad_norm": 0.9145063707903602, + "learning_rate": 1.1934552746942653e-06, + "loss": 0.6873019337654114, + "step": 4047 + }, + { + "epoch": 0.9327188940092166, + "grad_norm": 0.9707470479007109, + "learning_rate": 1.1930814739637025e-06, + "loss": 0.7416094541549683, + "step": 4048 + }, + { + "epoch": 0.9329493087557603, + "grad_norm": 1.2103943548089806, + "learning_rate": 1.1927076452076193e-06, + "loss": 0.7206372618675232, + "step": 4049 + }, + { + "epoch": 0.9331797235023042, + "grad_norm": 1.1043264858931607, + "learning_rate": 1.1923337884802767e-06, + "loss": 0.8352477550506592, + "step": 4050 + }, + { + "epoch": 0.9334101382488479, + "grad_norm": 1.116832001192149, + "learning_rate": 1.191959903835939e-06, + "loss": 0.8243483304977417, + "step": 4051 + }, + { + "epoch": 0.9336405529953917, + "grad_norm": 1.4110893804735163, + "learning_rate": 1.1915859913288756e-06, + "loss": 0.827987790107727, + "step": 4052 + }, + { + "epoch": 0.9338709677419355, + "grad_norm": 1.1514055762505417, + "learning_rate": 1.1912120510133589e-06, + "loss": 0.8624123334884644, + "step": 4053 + }, + { + "epoch": 0.9341013824884793, + "grad_norm": 1.2091942284642192, + "learning_rate": 1.1908380829436667e-06, + "loss": 0.8615037202835083, + "step": 4054 + }, + { + "epoch": 0.934331797235023, + "grad_norm": 1.2500115524653743, + "learning_rate": 1.190464087174079e-06, + "loss": 0.9367121458053589, + "step": 4055 + }, + { + "epoch": 0.9345622119815669, + "grad_norm": 1.4503623207353766, + "learning_rate": 1.190090063758881e-06, + "loss": 0.927996039390564, + "step": 4056 + }, + { + "epoch": 0.9347926267281106, + "grad_norm": 1.0709061746508743, + "learning_rate": 1.1897160127523623e-06, + "loss": 0.841314435005188, + "step": 4057 + }, + { + "epoch": 0.9350230414746544, + "grad_norm": 1.1021939339887863, + "learning_rate": 1.189341934208815e-06, + "loss": 0.864904522895813, + "step": 4058 + }, + { + "epoch": 0.9352534562211982, + "grad_norm": 1.148301781904619, + "learning_rate": 1.188967828182537e-06, + "loss": 0.9505404829978943, + "step": 4059 + }, + { + "epoch": 0.9354838709677419, + "grad_norm": 1.0791372441668663, + "learning_rate": 1.188593694727829e-06, + "loss": 0.7347132563591003, + "step": 4060 + }, + { + "epoch": 0.9357142857142857, + "grad_norm": 1.1367351426324537, + "learning_rate": 1.1882195338989958e-06, + "loss": 0.6267231106758118, + "step": 4061 + }, + { + "epoch": 0.9359447004608294, + "grad_norm": 1.0946102482081315, + "learning_rate": 1.1878453457503464e-06, + "loss": 0.8052406907081604, + "step": 4062 + }, + { + "epoch": 0.9361751152073733, + "grad_norm": 1.1032845960202522, + "learning_rate": 1.1874711303361933e-06, + "loss": 0.7928211688995361, + "step": 4063 + }, + { + "epoch": 0.936405529953917, + "grad_norm": 1.1265414942472118, + "learning_rate": 1.1870968877108545e-06, + "loss": 0.8863959312438965, + "step": 4064 + }, + { + "epoch": 0.9366359447004609, + "grad_norm": 1.0592501761240638, + "learning_rate": 1.1867226179286496e-06, + "loss": 0.8749874830245972, + "step": 4065 + }, + { + "epoch": 0.9368663594470046, + "grad_norm": 0.9223254168257967, + "learning_rate": 1.186348321043904e-06, + "loss": 0.7516318559646606, + "step": 4066 + }, + { + "epoch": 0.9370967741935484, + "grad_norm": 1.0863969007807137, + "learning_rate": 1.1859739971109467e-06, + "loss": 0.8435031771659851, + "step": 4067 + }, + { + "epoch": 0.9373271889400921, + "grad_norm": 1.08570563607149, + "learning_rate": 1.1855996461841093e-06, + "loss": 0.8766932487487793, + "step": 4068 + }, + { + "epoch": 0.937557603686636, + "grad_norm": 1.2630999347152494, + "learning_rate": 1.1852252683177293e-06, + "loss": 0.8748513460159302, + "step": 4069 + }, + { + "epoch": 0.9377880184331797, + "grad_norm": 1.2689555695038703, + "learning_rate": 1.184850863566147e-06, + "loss": 0.8917855024337769, + "step": 4070 + }, + { + "epoch": 0.9380184331797236, + "grad_norm": 1.0628114663297852, + "learning_rate": 1.1844764319837064e-06, + "loss": 0.7631640434265137, + "step": 4071 + }, + { + "epoch": 0.9382488479262673, + "grad_norm": 1.0140155614547266, + "learning_rate": 1.1841019736247557e-06, + "loss": 0.8354158401489258, + "step": 4072 + }, + { + "epoch": 0.938479262672811, + "grad_norm": 0.8561335978546013, + "learning_rate": 1.1837274885436473e-06, + "loss": 0.8122761845588684, + "step": 4073 + }, + { + "epoch": 0.9387096774193548, + "grad_norm": 1.5776279194471237, + "learning_rate": 1.1833529767947374e-06, + "loss": 0.8281430006027222, + "step": 4074 + }, + { + "epoch": 0.9389400921658986, + "grad_norm": 1.3828203317822199, + "learning_rate": 1.1829784384323856e-06, + "loss": 0.8291982412338257, + "step": 4075 + }, + { + "epoch": 0.9391705069124424, + "grad_norm": 1.3096607265096822, + "learning_rate": 1.1826038735109553e-06, + "loss": 0.8951852321624756, + "step": 4076 + }, + { + "epoch": 0.9394009216589861, + "grad_norm": 1.2165058417213606, + "learning_rate": 1.182229282084815e-06, + "loss": 0.7006446123123169, + "step": 4077 + }, + { + "epoch": 0.93963133640553, + "grad_norm": 1.1269330295000342, + "learning_rate": 1.1818546642083353e-06, + "loss": 0.8944047689437866, + "step": 4078 + }, + { + "epoch": 0.9398617511520737, + "grad_norm": 0.9351299115123082, + "learning_rate": 1.1814800199358919e-06, + "loss": 0.8252646923065186, + "step": 4079 + }, + { + "epoch": 0.9400921658986175, + "grad_norm": 1.2255680666736817, + "learning_rate": 1.181105349321864e-06, + "loss": 0.7852828502655029, + "step": 4080 + }, + { + "epoch": 0.9403225806451613, + "grad_norm": 1.0734973037527151, + "learning_rate": 1.1807306524206347e-06, + "loss": 0.7758563160896301, + "step": 4081 + }, + { + "epoch": 0.9405529953917051, + "grad_norm": 1.0672387708424669, + "learning_rate": 1.1803559292865899e-06, + "loss": 0.7297114133834839, + "step": 4082 + }, + { + "epoch": 0.9407834101382488, + "grad_norm": 1.1802096748579922, + "learning_rate": 1.1799811799741209e-06, + "loss": 0.7974321842193604, + "step": 4083 + }, + { + "epoch": 0.9410138248847926, + "grad_norm": 1.2930194654348013, + "learning_rate": 1.179606404537622e-06, + "loss": 0.6406733989715576, + "step": 4084 + }, + { + "epoch": 0.9412442396313364, + "grad_norm": 0.9862268230007224, + "learning_rate": 1.179231603031491e-06, + "loss": 0.6925486326217651, + "step": 4085 + }, + { + "epoch": 0.9414746543778801, + "grad_norm": 0.9201295652583962, + "learning_rate": 1.17885677551013e-06, + "loss": 0.792647123336792, + "step": 4086 + }, + { + "epoch": 0.941705069124424, + "grad_norm": 1.0460531669846371, + "learning_rate": 1.1784819220279454e-06, + "loss": 0.7499191761016846, + "step": 4087 + }, + { + "epoch": 0.9419354838709677, + "grad_norm": 1.120763335726602, + "learning_rate": 1.1781070426393455e-06, + "loss": 0.8307451009750366, + "step": 4088 + }, + { + "epoch": 0.9421658986175115, + "grad_norm": 1.1015455973526673, + "learning_rate": 1.1777321373987445e-06, + "loss": 0.7859289646148682, + "step": 4089 + }, + { + "epoch": 0.9423963133640553, + "grad_norm": 1.0291702780651948, + "learning_rate": 1.177357206360559e-06, + "loss": 0.761134922504425, + "step": 4090 + }, + { + "epoch": 0.9426267281105991, + "grad_norm": 1.240188832472171, + "learning_rate": 1.1769822495792098e-06, + "loss": 0.8697078227996826, + "step": 4091 + }, + { + "epoch": 0.9428571428571428, + "grad_norm": 1.0395615260234665, + "learning_rate": 1.1766072671091212e-06, + "loss": 0.731541633605957, + "step": 4092 + }, + { + "epoch": 0.9430875576036867, + "grad_norm": 1.1056530512213054, + "learning_rate": 1.1762322590047219e-06, + "loss": 0.7501940727233887, + "step": 4093 + }, + { + "epoch": 0.9433179723502304, + "grad_norm": 1.1531150840189341, + "learning_rate": 1.1758572253204431e-06, + "loss": 0.9448602199554443, + "step": 4094 + }, + { + "epoch": 0.9435483870967742, + "grad_norm": 0.8884441593083074, + "learning_rate": 1.175482166110721e-06, + "loss": 0.7704026699066162, + "step": 4095 + }, + { + "epoch": 0.943778801843318, + "grad_norm": 0.8973060402184874, + "learning_rate": 1.1751070814299947e-06, + "loss": 0.7905057668685913, + "step": 4096 + }, + { + "epoch": 0.9440092165898617, + "grad_norm": 1.238350046583652, + "learning_rate": 1.1747319713327078e-06, + "loss": 0.8957202434539795, + "step": 4097 + }, + { + "epoch": 0.9442396313364055, + "grad_norm": 0.9896078596502195, + "learning_rate": 1.174356835873306e-06, + "loss": 0.7922521233558655, + "step": 4098 + }, + { + "epoch": 0.9444700460829493, + "grad_norm": 0.9974151293119675, + "learning_rate": 1.1739816751062404e-06, + "loss": 0.6501933336257935, + "step": 4099 + }, + { + "epoch": 0.9447004608294931, + "grad_norm": 0.9673699554437744, + "learning_rate": 1.1736064890859654e-06, + "loss": 0.6743361353874207, + "step": 4100 + }, + { + "epoch": 0.9449308755760368, + "grad_norm": 1.0381670362595088, + "learning_rate": 1.173231277866938e-06, + "loss": 0.920632004737854, + "step": 4101 + }, + { + "epoch": 0.9451612903225807, + "grad_norm": 0.872889135902432, + "learning_rate": 1.1728560415036199e-06, + "loss": 0.7498964071273804, + "step": 4102 + }, + { + "epoch": 0.9453917050691244, + "grad_norm": 0.8444235514312883, + "learning_rate": 1.1724807800504765e-06, + "loss": 0.7665064334869385, + "step": 4103 + }, + { + "epoch": 0.9456221198156682, + "grad_norm": 0.8729439782855682, + "learning_rate": 1.172105493561976e-06, + "loss": 0.75946044921875, + "step": 4104 + }, + { + "epoch": 0.945852534562212, + "grad_norm": 1.016811663523364, + "learning_rate": 1.1717301820925908e-06, + "loss": 0.7701961398124695, + "step": 4105 + }, + { + "epoch": 0.9460829493087558, + "grad_norm": 0.9708618505769702, + "learning_rate": 1.1713548456967974e-06, + "loss": 0.7775348424911499, + "step": 4106 + }, + { + "epoch": 0.9463133640552995, + "grad_norm": 0.8519325609053343, + "learning_rate": 1.1709794844290745e-06, + "loss": 0.8149436712265015, + "step": 4107 + }, + { + "epoch": 0.9465437788018434, + "grad_norm": 0.8519085263981432, + "learning_rate": 1.170604098343906e-06, + "loss": 0.7136009335517883, + "step": 4108 + }, + { + "epoch": 0.9467741935483871, + "grad_norm": 1.2048256186284507, + "learning_rate": 1.1702286874957786e-06, + "loss": 0.7678873538970947, + "step": 4109 + }, + { + "epoch": 0.9470046082949308, + "grad_norm": 0.9842223659547223, + "learning_rate": 1.1698532519391827e-06, + "loss": 0.7506710290908813, + "step": 4110 + }, + { + "epoch": 0.9472350230414747, + "grad_norm": 0.900893049038478, + "learning_rate": 1.1694777917286118e-06, + "loss": 0.6646897792816162, + "step": 4111 + }, + { + "epoch": 0.9474654377880184, + "grad_norm": 1.3857066059132386, + "learning_rate": 1.1691023069185639e-06, + "loss": 0.820647120475769, + "step": 4112 + }, + { + "epoch": 0.9476958525345622, + "grad_norm": 0.9795728799566645, + "learning_rate": 1.1687267975635402e-06, + "loss": 0.872378408908844, + "step": 4113 + }, + { + "epoch": 0.947926267281106, + "grad_norm": 1.0760361173899362, + "learning_rate": 1.168351263718045e-06, + "loss": 0.7920655608177185, + "step": 4114 + }, + { + "epoch": 0.9481566820276498, + "grad_norm": 1.1709025489256302, + "learning_rate": 1.1679757054365866e-06, + "loss": 0.6593836545944214, + "step": 4115 + }, + { + "epoch": 0.9483870967741935, + "grad_norm": 1.0965626572699905, + "learning_rate": 1.1676001227736772e-06, + "loss": 0.7473627328872681, + "step": 4116 + }, + { + "epoch": 0.9486175115207374, + "grad_norm": 1.2027339281506744, + "learning_rate": 1.1672245157838317e-06, + "loss": 0.8001665472984314, + "step": 4117 + }, + { + "epoch": 0.9488479262672811, + "grad_norm": 0.9543944768909415, + "learning_rate": 1.1668488845215689e-06, + "loss": 0.7342571020126343, + "step": 4118 + }, + { + "epoch": 0.9490783410138249, + "grad_norm": 1.2428163281726954, + "learning_rate": 1.1664732290414118e-06, + "loss": 0.7616822719573975, + "step": 4119 + }, + { + "epoch": 0.9493087557603687, + "grad_norm": 1.2486031522636918, + "learning_rate": 1.1660975493978857e-06, + "loss": 0.8885634541511536, + "step": 4120 + }, + { + "epoch": 0.9495391705069124, + "grad_norm": 1.1323168185847523, + "learning_rate": 1.1657218456455205e-06, + "loss": 0.7816281318664551, + "step": 4121 + }, + { + "epoch": 0.9497695852534562, + "grad_norm": 0.9570364600334796, + "learning_rate": 1.1653461178388485e-06, + "loss": 0.7412079572677612, + "step": 4122 + }, + { + "epoch": 0.95, + "grad_norm": 0.957883425985998, + "learning_rate": 1.1649703660324064e-06, + "loss": 0.8096172213554382, + "step": 4123 + }, + { + "epoch": 0.9502304147465438, + "grad_norm": 1.0359903594582591, + "learning_rate": 1.164594590280734e-06, + "loss": 0.6690856218338013, + "step": 4124 + }, + { + "epoch": 0.9504608294930875, + "grad_norm": 0.9697541149080181, + "learning_rate": 1.1642187906383746e-06, + "loss": 0.7509289979934692, + "step": 4125 + }, + { + "epoch": 0.9506912442396314, + "grad_norm": 0.8506285939807987, + "learning_rate": 1.1638429671598754e-06, + "loss": 0.6643730401992798, + "step": 4126 + }, + { + "epoch": 0.9509216589861751, + "grad_norm": 0.994475544194171, + "learning_rate": 1.1634671198997864e-06, + "loss": 0.8100850582122803, + "step": 4127 + }, + { + "epoch": 0.9511520737327189, + "grad_norm": 1.392121351288023, + "learning_rate": 1.1630912489126612e-06, + "loss": 0.919742226600647, + "step": 4128 + }, + { + "epoch": 0.9513824884792627, + "grad_norm": 1.144319413666889, + "learning_rate": 1.1627153542530571e-06, + "loss": 0.8953771591186523, + "step": 4129 + }, + { + "epoch": 0.9516129032258065, + "grad_norm": 0.9663802093818391, + "learning_rate": 1.162339435975535e-06, + "loss": 0.7401770949363708, + "step": 4130 + }, + { + "epoch": 0.9518433179723502, + "grad_norm": 1.0071840947097435, + "learning_rate": 1.1619634941346585e-06, + "loss": 0.7618032097816467, + "step": 4131 + }, + { + "epoch": 0.9520737327188941, + "grad_norm": 1.3156218418351784, + "learning_rate": 1.1615875287849955e-06, + "loss": 0.9134000539779663, + "step": 4132 + }, + { + "epoch": 0.9523041474654378, + "grad_norm": 0.9617492928251477, + "learning_rate": 1.1612115399811162e-06, + "loss": 0.7555145025253296, + "step": 4133 + }, + { + "epoch": 0.9525345622119815, + "grad_norm": 0.9434517704683025, + "learning_rate": 1.1608355277775955e-06, + "loss": 0.9125050902366638, + "step": 4134 + }, + { + "epoch": 0.9527649769585254, + "grad_norm": 0.9082549396493419, + "learning_rate": 1.1604594922290106e-06, + "loss": 0.6575542688369751, + "step": 4135 + }, + { + "epoch": 0.9529953917050691, + "grad_norm": 1.0750997369204898, + "learning_rate": 1.1600834333899431e-06, + "loss": 0.7530527114868164, + "step": 4136 + }, + { + "epoch": 0.9532258064516129, + "grad_norm": 0.9603596342147773, + "learning_rate": 1.159707351314977e-06, + "loss": 0.8818701505661011, + "step": 4137 + }, + { + "epoch": 0.9534562211981567, + "grad_norm": 0.9491169409805379, + "learning_rate": 1.1593312460587003e-06, + "loss": 0.7172919511795044, + "step": 4138 + }, + { + "epoch": 0.9536866359447005, + "grad_norm": 1.1122266085503043, + "learning_rate": 1.1589551176757044e-06, + "loss": 0.8701400756835938, + "step": 4139 + }, + { + "epoch": 0.9539170506912442, + "grad_norm": 1.3285866575691943, + "learning_rate": 1.1585789662205834e-06, + "loss": 0.867475152015686, + "step": 4140 + }, + { + "epoch": 0.9541474654377881, + "grad_norm": 1.1851362026267, + "learning_rate": 1.1582027917479356e-06, + "loss": 0.7809052467346191, + "step": 4141 + }, + { + "epoch": 0.9543778801843318, + "grad_norm": 1.1986202884801196, + "learning_rate": 1.1578265943123619e-06, + "loss": 0.8589099645614624, + "step": 4142 + }, + { + "epoch": 0.9546082949308756, + "grad_norm": 0.893566517908755, + "learning_rate": 1.157450373968467e-06, + "loss": 0.7826642394065857, + "step": 4143 + }, + { + "epoch": 0.9548387096774194, + "grad_norm": 1.3652425128856092, + "learning_rate": 1.1570741307708585e-06, + "loss": 0.9550029635429382, + "step": 4144 + }, + { + "epoch": 0.9550691244239631, + "grad_norm": 1.0826442844044148, + "learning_rate": 1.1566978647741478e-06, + "loss": 0.8607431650161743, + "step": 4145 + }, + { + "epoch": 0.9552995391705069, + "grad_norm": 0.8247649155112424, + "learning_rate": 1.15632157603295e-06, + "loss": 0.7350449562072754, + "step": 4146 + }, + { + "epoch": 0.9555299539170506, + "grad_norm": 1.033301557916291, + "learning_rate": 1.1559452646018818e-06, + "loss": 0.853142261505127, + "step": 4147 + }, + { + "epoch": 0.9557603686635945, + "grad_norm": 1.0495554531445934, + "learning_rate": 1.1555689305355651e-06, + "loss": 0.7137192487716675, + "step": 4148 + }, + { + "epoch": 0.9559907834101382, + "grad_norm": 1.158813208265862, + "learning_rate": 1.1551925738886244e-06, + "loss": 0.9007513523101807, + "step": 4149 + }, + { + "epoch": 0.956221198156682, + "grad_norm": 1.1071306366128357, + "learning_rate": 1.1548161947156867e-06, + "loss": 0.8499083518981934, + "step": 4150 + }, + { + "epoch": 0.9564516129032258, + "grad_norm": 0.874419574252059, + "learning_rate": 1.1544397930713836e-06, + "loss": 0.8068628311157227, + "step": 4151 + }, + { + "epoch": 0.9566820276497696, + "grad_norm": 1.1729788609256337, + "learning_rate": 1.1540633690103487e-06, + "loss": 0.8357307314872742, + "step": 4152 + }, + { + "epoch": 0.9569124423963133, + "grad_norm": 1.262397502444813, + "learning_rate": 1.1536869225872198e-06, + "loss": 0.7650378942489624, + "step": 4153 + }, + { + "epoch": 0.9571428571428572, + "grad_norm": 0.9933463317010283, + "learning_rate": 1.1533104538566376e-06, + "loss": 0.8717354536056519, + "step": 4154 + }, + { + "epoch": 0.9573732718894009, + "grad_norm": 0.9807638290234347, + "learning_rate": 1.152933962873246e-06, + "loss": 0.6314762830734253, + "step": 4155 + }, + { + "epoch": 0.9576036866359448, + "grad_norm": 1.1279705073097503, + "learning_rate": 1.152557449691692e-06, + "loss": 0.8949059844017029, + "step": 4156 + }, + { + "epoch": 0.9578341013824885, + "grad_norm": 1.137203803563717, + "learning_rate": 1.1521809143666261e-06, + "loss": 0.7862699031829834, + "step": 4157 + }, + { + "epoch": 0.9580645161290322, + "grad_norm": 0.8970512868442762, + "learning_rate": 1.151804356952702e-06, + "loss": 0.7954641580581665, + "step": 4158 + }, + { + "epoch": 0.958294930875576, + "grad_norm": 1.0478069911824797, + "learning_rate": 1.1514277775045766e-06, + "loss": 0.7654163241386414, + "step": 4159 + }, + { + "epoch": 0.9585253456221198, + "grad_norm": 1.0321973050954667, + "learning_rate": 1.1510511760769097e-06, + "loss": 0.7050681114196777, + "step": 4160 + }, + { + "epoch": 0.9587557603686636, + "grad_norm": 1.0667493196933242, + "learning_rate": 1.1506745527243646e-06, + "loss": 0.8646515607833862, + "step": 4161 + }, + { + "epoch": 0.9589861751152073, + "grad_norm": 0.9392654190881413, + "learning_rate": 1.1502979075016078e-06, + "loss": 0.7427883148193359, + "step": 4162 + }, + { + "epoch": 0.9592165898617512, + "grad_norm": 1.2506151155745373, + "learning_rate": 1.1499212404633083e-06, + "loss": 0.7800190448760986, + "step": 4163 + }, + { + "epoch": 0.9594470046082949, + "grad_norm": 1.0487739651932841, + "learning_rate": 1.1495445516641394e-06, + "loss": 0.789481520652771, + "step": 4164 + }, + { + "epoch": 0.9596774193548387, + "grad_norm": 0.8332785453272284, + "learning_rate": 1.1491678411587768e-06, + "loss": 0.7975008487701416, + "step": 4165 + }, + { + "epoch": 0.9599078341013825, + "grad_norm": 0.9306560917040928, + "learning_rate": 1.1487911090018994e-06, + "loss": 0.7964596748352051, + "step": 4166 + }, + { + "epoch": 0.9601382488479263, + "grad_norm": 0.8915843631095149, + "learning_rate": 1.1484143552481895e-06, + "loss": 0.7008803486824036, + "step": 4167 + }, + { + "epoch": 0.96036866359447, + "grad_norm": 0.888889684402262, + "learning_rate": 1.1480375799523328e-06, + "loss": 0.708189070224762, + "step": 4168 + }, + { + "epoch": 0.9605990783410139, + "grad_norm": 1.1069917813185677, + "learning_rate": 1.1476607831690167e-06, + "loss": 0.8207682371139526, + "step": 4169 + }, + { + "epoch": 0.9608294930875576, + "grad_norm": 1.200280235865814, + "learning_rate": 1.1472839649529337e-06, + "loss": 0.7682942152023315, + "step": 4170 + }, + { + "epoch": 0.9610599078341013, + "grad_norm": 1.0122999990692296, + "learning_rate": 1.1469071253587785e-06, + "loss": 0.8435598611831665, + "step": 4171 + }, + { + "epoch": 0.9612903225806452, + "grad_norm": 0.79536207500534, + "learning_rate": 1.1465302644412483e-06, + "loss": 0.7516113519668579, + "step": 4172 + }, + { + "epoch": 0.9615207373271889, + "grad_norm": 0.881539477347835, + "learning_rate": 1.1461533822550442e-06, + "loss": 0.7125411629676819, + "step": 4173 + }, + { + "epoch": 0.9617511520737327, + "grad_norm": 0.9108745928942158, + "learning_rate": 1.14577647885487e-06, + "loss": 0.7560747861862183, + "step": 4174 + }, + { + "epoch": 0.9619815668202765, + "grad_norm": 0.9027443230900505, + "learning_rate": 1.1453995542954332e-06, + "loss": 0.6702673435211182, + "step": 4175 + }, + { + "epoch": 0.9622119815668203, + "grad_norm": 1.1520258504461998, + "learning_rate": 1.1450226086314433e-06, + "loss": 0.8083088397979736, + "step": 4176 + }, + { + "epoch": 0.962442396313364, + "grad_norm": 0.9906259449003554, + "learning_rate": 1.1446456419176135e-06, + "loss": 0.7579925060272217, + "step": 4177 + }, + { + "epoch": 0.9626728110599079, + "grad_norm": 0.9460352601625827, + "learning_rate": 1.1442686542086609e-06, + "loss": 0.713416576385498, + "step": 4178 + }, + { + "epoch": 0.9629032258064516, + "grad_norm": 1.1770844867552515, + "learning_rate": 1.1438916455593035e-06, + "loss": 0.7767639756202698, + "step": 4179 + }, + { + "epoch": 0.9631336405529954, + "grad_norm": 1.0244180953454374, + "learning_rate": 1.1435146160242645e-06, + "loss": 0.7493964433670044, + "step": 4180 + }, + { + "epoch": 0.9633640552995392, + "grad_norm": 1.1249907720020325, + "learning_rate": 1.1431375656582692e-06, + "loss": 0.8789365291595459, + "step": 4181 + }, + { + "epoch": 0.9635944700460829, + "grad_norm": 1.177047767616621, + "learning_rate": 1.1427604945160457e-06, + "loss": 0.7750524878501892, + "step": 4182 + }, + { + "epoch": 0.9638248847926267, + "grad_norm": 1.1195166665130392, + "learning_rate": 1.142383402652325e-06, + "loss": 0.9330715537071228, + "step": 4183 + }, + { + "epoch": 0.9640552995391705, + "grad_norm": 0.933339002257347, + "learning_rate": 1.142006290121842e-06, + "loss": 0.6845035552978516, + "step": 4184 + }, + { + "epoch": 0.9642857142857143, + "grad_norm": 0.9794843601160967, + "learning_rate": 1.1416291569793343e-06, + "loss": 0.7295390963554382, + "step": 4185 + }, + { + "epoch": 0.964516129032258, + "grad_norm": 1.0666753158619988, + "learning_rate": 1.1412520032795419e-06, + "loss": 0.6869080066680908, + "step": 4186 + }, + { + "epoch": 0.9647465437788019, + "grad_norm": 1.506743316898968, + "learning_rate": 1.140874829077208e-06, + "loss": 1.0916842222213745, + "step": 4187 + }, + { + "epoch": 0.9649769585253456, + "grad_norm": 1.0539994363877199, + "learning_rate": 1.1404976344270793e-06, + "loss": 0.7487984299659729, + "step": 4188 + }, + { + "epoch": 0.9652073732718894, + "grad_norm": 1.024674697115665, + "learning_rate": 1.140120419383905e-06, + "loss": 0.8852604627609253, + "step": 4189 + }, + { + "epoch": 0.9654377880184332, + "grad_norm": 1.065174441144157, + "learning_rate": 1.139743184002437e-06, + "loss": 0.7384698987007141, + "step": 4190 + }, + { + "epoch": 0.965668202764977, + "grad_norm": 1.2009691028192717, + "learning_rate": 1.1393659283374312e-06, + "loss": 0.8033223152160645, + "step": 4191 + }, + { + "epoch": 0.9658986175115207, + "grad_norm": 1.2698866658546557, + "learning_rate": 1.1389886524436453e-06, + "loss": 0.8870355486869812, + "step": 4192 + }, + { + "epoch": 0.9661290322580646, + "grad_norm": 1.1198376045036553, + "learning_rate": 1.1386113563758405e-06, + "loss": 0.869537353515625, + "step": 4193 + }, + { + "epoch": 0.9663594470046083, + "grad_norm": 1.027781409519754, + "learning_rate": 1.1382340401887808e-06, + "loss": 0.8564068675041199, + "step": 4194 + }, + { + "epoch": 0.966589861751152, + "grad_norm": 0.9894593103049535, + "learning_rate": 1.1378567039372332e-06, + "loss": 0.7988623380661011, + "step": 4195 + }, + { + "epoch": 0.9668202764976959, + "grad_norm": 1.0843651981255995, + "learning_rate": 1.1374793476759673e-06, + "loss": 0.9405556917190552, + "step": 4196 + }, + { + "epoch": 0.9670506912442396, + "grad_norm": 0.8756334921680484, + "learning_rate": 1.137101971459756e-06, + "loss": 0.6757407188415527, + "step": 4197 + }, + { + "epoch": 0.9672811059907834, + "grad_norm": 1.1855730012050456, + "learning_rate": 1.1367245753433757e-06, + "loss": 0.7521541118621826, + "step": 4198 + }, + { + "epoch": 0.9675115207373272, + "grad_norm": 1.0137943151941313, + "learning_rate": 1.1363471593816037e-06, + "loss": 0.7306162714958191, + "step": 4199 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8912209844157076, + "learning_rate": 1.135969723629222e-06, + "loss": 0.6884766817092896, + "step": 4200 + }, + { + "epoch": 0.9679723502304147, + "grad_norm": 1.2084507323846643, + "learning_rate": 1.1355922681410152e-06, + "loss": 0.8420373201370239, + "step": 4201 + }, + { + "epoch": 0.9682027649769586, + "grad_norm": 0.7638761509020496, + "learning_rate": 1.1352147929717704e-06, + "loss": 0.7252322435379028, + "step": 4202 + }, + { + "epoch": 0.9684331797235023, + "grad_norm": 0.9448982669089191, + "learning_rate": 1.134837298176277e-06, + "loss": 0.6375538110733032, + "step": 4203 + }, + { + "epoch": 0.9686635944700461, + "grad_norm": 1.0629192948024473, + "learning_rate": 1.1344597838093283e-06, + "loss": 0.713671863079071, + "step": 4204 + }, + { + "epoch": 0.9688940092165899, + "grad_norm": 1.0319385361068514, + "learning_rate": 1.1340822499257201e-06, + "loss": 0.8591479063034058, + "step": 4205 + }, + { + "epoch": 0.9691244239631336, + "grad_norm": 1.0671754327237228, + "learning_rate": 1.1337046965802505e-06, + "loss": 0.7638808488845825, + "step": 4206 + }, + { + "epoch": 0.9693548387096774, + "grad_norm": 1.1032489557963816, + "learning_rate": 1.1333271238277215e-06, + "loss": 0.8133253455162048, + "step": 4207 + }, + { + "epoch": 0.9695852534562212, + "grad_norm": 0.9621754998556686, + "learning_rate": 1.132949531722937e-06, + "loss": 0.6938756704330444, + "step": 4208 + }, + { + "epoch": 0.969815668202765, + "grad_norm": 1.171557608199449, + "learning_rate": 1.132571920320704e-06, + "loss": 0.793639063835144, + "step": 4209 + }, + { + "epoch": 0.9700460829493087, + "grad_norm": 1.066219056403929, + "learning_rate": 1.132194289675832e-06, + "loss": 0.7188536524772644, + "step": 4210 + }, + { + "epoch": 0.9702764976958526, + "grad_norm": 1.2873690827507545, + "learning_rate": 1.1318166398431343e-06, + "loss": 0.8076587319374084, + "step": 4211 + }, + { + "epoch": 0.9705069124423963, + "grad_norm": 1.2434961707112964, + "learning_rate": 1.1314389708774258e-06, + "loss": 0.8390023708343506, + "step": 4212 + }, + { + "epoch": 0.9707373271889401, + "grad_norm": 1.2800250293744322, + "learning_rate": 1.1310612828335243e-06, + "loss": 0.8395706415176392, + "step": 4213 + }, + { + "epoch": 0.9709677419354839, + "grad_norm": 1.1156221851257155, + "learning_rate": 1.1306835757662515e-06, + "loss": 0.9672995805740356, + "step": 4214 + }, + { + "epoch": 0.9711981566820277, + "grad_norm": 1.1859433022618981, + "learning_rate": 1.1303058497304303e-06, + "loss": 0.7716202735900879, + "step": 4215 + }, + { + "epoch": 0.9714285714285714, + "grad_norm": 0.9257750691433206, + "learning_rate": 1.1299281047808876e-06, + "loss": 0.6318329572677612, + "step": 4216 + }, + { + "epoch": 0.9716589861751153, + "grad_norm": 1.1802189065520408, + "learning_rate": 1.1295503409724525e-06, + "loss": 0.8287553787231445, + "step": 4217 + }, + { + "epoch": 0.971889400921659, + "grad_norm": 0.835147088990129, + "learning_rate": 1.129172558359957e-06, + "loss": 0.6903107762336731, + "step": 4218 + }, + { + "epoch": 0.9721198156682027, + "grad_norm": 0.9693907793654548, + "learning_rate": 1.1287947569982355e-06, + "loss": 0.684443473815918, + "step": 4219 + }, + { + "epoch": 0.9723502304147466, + "grad_norm": 1.2152908203730401, + "learning_rate": 1.1284169369421254e-06, + "loss": 0.8566167950630188, + "step": 4220 + }, + { + "epoch": 0.9725806451612903, + "grad_norm": 1.0787740661687364, + "learning_rate": 1.1280390982464673e-06, + "loss": 0.8103536367416382, + "step": 4221 + }, + { + "epoch": 0.9728110599078341, + "grad_norm": 1.115333195517037, + "learning_rate": 1.1276612409661036e-06, + "loss": 0.8027071356773376, + "step": 4222 + }, + { + "epoch": 0.9730414746543778, + "grad_norm": 1.1442493875477038, + "learning_rate": 1.1272833651558796e-06, + "loss": 0.8251115679740906, + "step": 4223 + }, + { + "epoch": 0.9732718894009217, + "grad_norm": 1.1151561398542829, + "learning_rate": 1.1269054708706437e-06, + "loss": 0.6468047499656677, + "step": 4224 + }, + { + "epoch": 0.9735023041474654, + "grad_norm": 1.129830296326307, + "learning_rate": 1.1265275581652465e-06, + "loss": 0.8085706233978271, + "step": 4225 + }, + { + "epoch": 0.9737327188940093, + "grad_norm": 1.139574441171448, + "learning_rate": 1.1261496270945418e-06, + "loss": 0.8396503925323486, + "step": 4226 + }, + { + "epoch": 0.973963133640553, + "grad_norm": 0.9978900351940978, + "learning_rate": 1.1257716777133861e-06, + "loss": 0.7860006093978882, + "step": 4227 + }, + { + "epoch": 0.9741935483870968, + "grad_norm": 1.1484873689809545, + "learning_rate": 1.1253937100766373e-06, + "loss": 0.8630701303482056, + "step": 4228 + }, + { + "epoch": 0.9744239631336405, + "grad_norm": 0.9488769562872501, + "learning_rate": 1.1250157242391577e-06, + "loss": 0.8363114595413208, + "step": 4229 + }, + { + "epoch": 0.9746543778801844, + "grad_norm": 1.1415512207130691, + "learning_rate": 1.1246377202558114e-06, + "loss": 0.7837141156196594, + "step": 4230 + }, + { + "epoch": 0.9748847926267281, + "grad_norm": 1.3474534084840375, + "learning_rate": 1.1242596981814648e-06, + "loss": 0.8283151984214783, + "step": 4231 + }, + { + "epoch": 0.9751152073732718, + "grad_norm": 1.2728043293758005, + "learning_rate": 1.1238816580709878e-06, + "loss": 0.9232061505317688, + "step": 4232 + }, + { + "epoch": 0.9753456221198157, + "grad_norm": 1.125514954365521, + "learning_rate": 1.123503599979252e-06, + "loss": 0.8721164464950562, + "step": 4233 + }, + { + "epoch": 0.9755760368663594, + "grad_norm": 1.0382014546922784, + "learning_rate": 1.1231255239611321e-06, + "loss": 0.9398131370544434, + "step": 4234 + }, + { + "epoch": 0.9758064516129032, + "grad_norm": 1.0916134182788353, + "learning_rate": 1.1227474300715054e-06, + "loss": 0.8124324083328247, + "step": 4235 + }, + { + "epoch": 0.976036866359447, + "grad_norm": 0.8607187401974831, + "learning_rate": 1.1223693183652515e-06, + "loss": 0.8532534837722778, + "step": 4236 + }, + { + "epoch": 0.9762672811059908, + "grad_norm": 1.10871517745179, + "learning_rate": 1.1219911888972536e-06, + "loss": 0.7547662258148193, + "step": 4237 + }, + { + "epoch": 0.9764976958525345, + "grad_norm": 1.036940513326952, + "learning_rate": 1.1216130417223956e-06, + "loss": 0.7407231330871582, + "step": 4238 + }, + { + "epoch": 0.9767281105990784, + "grad_norm": 1.0573090435680337, + "learning_rate": 1.1212348768955657e-06, + "loss": 0.8190197944641113, + "step": 4239 + }, + { + "epoch": 0.9769585253456221, + "grad_norm": 1.111465926757279, + "learning_rate": 1.1208566944716542e-06, + "loss": 0.6641337871551514, + "step": 4240 + }, + { + "epoch": 0.977188940092166, + "grad_norm": 1.224342353107687, + "learning_rate": 1.120478494505553e-06, + "loss": 0.8953202962875366, + "step": 4241 + }, + { + "epoch": 0.9774193548387097, + "grad_norm": 0.9676272600083323, + "learning_rate": 1.1201002770521583e-06, + "loss": 0.7803191542625427, + "step": 4242 + }, + { + "epoch": 0.9776497695852534, + "grad_norm": 1.1107043139306134, + "learning_rate": 1.1197220421663674e-06, + "loss": 0.6827100515365601, + "step": 4243 + }, + { + "epoch": 0.9778801843317972, + "grad_norm": 1.2085442462659117, + "learning_rate": 1.1193437899030802e-06, + "loss": 0.8513565063476562, + "step": 4244 + }, + { + "epoch": 0.978110599078341, + "grad_norm": 0.9785496460004156, + "learning_rate": 1.1189655203172e-06, + "loss": 0.7196829915046692, + "step": 4245 + }, + { + "epoch": 0.9783410138248848, + "grad_norm": 1.0764048064511267, + "learning_rate": 1.1185872334636319e-06, + "loss": 0.7823485136032104, + "step": 4246 + }, + { + "epoch": 0.9785714285714285, + "grad_norm": 1.0963006166840967, + "learning_rate": 1.1182089293972841e-06, + "loss": 0.7178136110305786, + "step": 4247 + }, + { + "epoch": 0.9788018433179724, + "grad_norm": 1.0782886091125194, + "learning_rate": 1.1178306081730664e-06, + "loss": 0.7746715545654297, + "step": 4248 + }, + { + "epoch": 0.9790322580645161, + "grad_norm": 0.9177757629071243, + "learning_rate": 1.117452269845892e-06, + "loss": 0.8829167485237122, + "step": 4249 + }, + { + "epoch": 0.9792626728110599, + "grad_norm": 0.9096983569344097, + "learning_rate": 1.1170739144706764e-06, + "loss": 0.7592206001281738, + "step": 4250 + }, + { + "epoch": 0.9794930875576037, + "grad_norm": 0.8361017174057647, + "learning_rate": 1.1166955421023368e-06, + "loss": 0.8107382655143738, + "step": 4251 + }, + { + "epoch": 0.9797235023041475, + "grad_norm": 0.9837092835211146, + "learning_rate": 1.116317152795794e-06, + "loss": 0.6807001829147339, + "step": 4252 + }, + { + "epoch": 0.9799539170506912, + "grad_norm": 1.1872199804636603, + "learning_rate": 1.1159387466059705e-06, + "loss": 0.7752517461776733, + "step": 4253 + }, + { + "epoch": 0.9801843317972351, + "grad_norm": 0.8560133871531077, + "learning_rate": 1.115560323587791e-06, + "loss": 0.7484745383262634, + "step": 4254 + }, + { + "epoch": 0.9804147465437788, + "grad_norm": 1.153488759551228, + "learning_rate": 1.1151818837961838e-06, + "loss": 0.877413809299469, + "step": 4255 + }, + { + "epoch": 0.9806451612903225, + "grad_norm": 1.0087457568089837, + "learning_rate": 1.1148034272860785e-06, + "loss": 0.7806656360626221, + "step": 4256 + }, + { + "epoch": 0.9808755760368664, + "grad_norm": 0.849135201735791, + "learning_rate": 1.1144249541124078e-06, + "loss": 0.6938076019287109, + "step": 4257 + }, + { + "epoch": 0.9811059907834101, + "grad_norm": 1.0559339187336096, + "learning_rate": 1.1140464643301064e-06, + "loss": 0.8832957148551941, + "step": 4258 + }, + { + "epoch": 0.9813364055299539, + "grad_norm": 1.1632523287766907, + "learning_rate": 1.1136679579941117e-06, + "loss": 0.7794016003608704, + "step": 4259 + }, + { + "epoch": 0.9815668202764977, + "grad_norm": 0.9689102084269609, + "learning_rate": 1.1132894351593636e-06, + "loss": 0.6877585053443909, + "step": 4260 + }, + { + "epoch": 0.9817972350230415, + "grad_norm": 1.0902109747190951, + "learning_rate": 1.1129108958808037e-06, + "loss": 0.8268473148345947, + "step": 4261 + }, + { + "epoch": 0.9820276497695852, + "grad_norm": 1.0260596307079526, + "learning_rate": 1.112532340213377e-06, + "loss": 0.6717547178268433, + "step": 4262 + }, + { + "epoch": 0.9822580645161291, + "grad_norm": 1.0646130416760407, + "learning_rate": 1.11215376821203e-06, + "loss": 0.849999725818634, + "step": 4263 + }, + { + "epoch": 0.9824884792626728, + "grad_norm": 1.005034332417578, + "learning_rate": 1.1117751799317118e-06, + "loss": 0.6562552452087402, + "step": 4264 + }, + { + "epoch": 0.9827188940092166, + "grad_norm": 1.0885536317886024, + "learning_rate": 1.1113965754273743e-06, + "loss": 0.7734784483909607, + "step": 4265 + }, + { + "epoch": 0.9829493087557604, + "grad_norm": 1.0527283904271951, + "learning_rate": 1.1110179547539717e-06, + "loss": 0.7580564022064209, + "step": 4266 + }, + { + "epoch": 0.9831797235023041, + "grad_norm": 1.121984331535499, + "learning_rate": 1.1106393179664595e-06, + "loss": 0.9207481145858765, + "step": 4267 + }, + { + "epoch": 0.9834101382488479, + "grad_norm": 1.1182241685665208, + "learning_rate": 1.1102606651197968e-06, + "loss": 0.8987482786178589, + "step": 4268 + }, + { + "epoch": 0.9836405529953917, + "grad_norm": 0.8558732255272679, + "learning_rate": 1.1098819962689445e-06, + "loss": 0.7486778497695923, + "step": 4269 + }, + { + "epoch": 0.9838709677419355, + "grad_norm": 0.9905311956335509, + "learning_rate": 1.1095033114688662e-06, + "loss": 0.7387109994888306, + "step": 4270 + }, + { + "epoch": 0.9841013824884792, + "grad_norm": 0.913366940312768, + "learning_rate": 1.109124610774527e-06, + "loss": 0.7337637543678284, + "step": 4271 + }, + { + "epoch": 0.9843317972350231, + "grad_norm": 1.1127819698251733, + "learning_rate": 1.1087458942408952e-06, + "loss": 0.7419463396072388, + "step": 4272 + }, + { + "epoch": 0.9845622119815668, + "grad_norm": 1.0024132905496845, + "learning_rate": 1.1083671619229407e-06, + "loss": 0.7525068521499634, + "step": 4273 + }, + { + "epoch": 0.9847926267281106, + "grad_norm": 1.2794306882440036, + "learning_rate": 1.107988413875636e-06, + "loss": 0.8593931198120117, + "step": 4274 + }, + { + "epoch": 0.9850230414746544, + "grad_norm": 1.1058497522784536, + "learning_rate": 1.107609650153956e-06, + "loss": 0.9123519659042358, + "step": 4275 + }, + { + "epoch": 0.9852534562211982, + "grad_norm": 1.0134863035075283, + "learning_rate": 1.107230870812878e-06, + "loss": 0.7099615335464478, + "step": 4276 + }, + { + "epoch": 0.9854838709677419, + "grad_norm": 1.0305482113277953, + "learning_rate": 1.1068520759073807e-06, + "loss": 0.9525141716003418, + "step": 4277 + }, + { + "epoch": 0.9857142857142858, + "grad_norm": 1.078520213597711, + "learning_rate": 1.106473265492446e-06, + "loss": 0.8360154628753662, + "step": 4278 + }, + { + "epoch": 0.9859447004608295, + "grad_norm": 0.835665323629814, + "learning_rate": 1.106094439623058e-06, + "loss": 0.7788960933685303, + "step": 4279 + }, + { + "epoch": 0.9861751152073732, + "grad_norm": 1.4332707697001132, + "learning_rate": 1.1057155983542024e-06, + "loss": 0.76897132396698, + "step": 4280 + }, + { + "epoch": 0.9864055299539171, + "grad_norm": 1.2788839563876278, + "learning_rate": 1.1053367417408678e-06, + "loss": 0.8062764406204224, + "step": 4281 + }, + { + "epoch": 0.9866359447004608, + "grad_norm": 1.0759322336892816, + "learning_rate": 1.1049578698380446e-06, + "loss": 0.6796555519104004, + "step": 4282 + }, + { + "epoch": 0.9868663594470046, + "grad_norm": 1.2156156083740777, + "learning_rate": 1.1045789827007256e-06, + "loss": 0.8495693206787109, + "step": 4283 + }, + { + "epoch": 0.9870967741935484, + "grad_norm": 1.1065961656311563, + "learning_rate": 1.1042000803839054e-06, + "loss": 0.9202588200569153, + "step": 4284 + }, + { + "epoch": 0.9873271889400922, + "grad_norm": 1.0492103887070696, + "learning_rate": 1.1038211629425815e-06, + "loss": 0.8204039335250854, + "step": 4285 + }, + { + "epoch": 0.9875576036866359, + "grad_norm": 1.3424135227199923, + "learning_rate": 1.1034422304317534e-06, + "loss": 0.921082615852356, + "step": 4286 + }, + { + "epoch": 0.9877880184331798, + "grad_norm": 1.1158968493314756, + "learning_rate": 1.1030632829064225e-06, + "loss": 0.8114739656448364, + "step": 4287 + }, + { + "epoch": 0.9880184331797235, + "grad_norm": 1.160400130956272, + "learning_rate": 1.1026843204215924e-06, + "loss": 0.7394933700561523, + "step": 4288 + }, + { + "epoch": 0.9882488479262673, + "grad_norm": 1.102093260654992, + "learning_rate": 1.1023053430322692e-06, + "loss": 0.9515210390090942, + "step": 4289 + }, + { + "epoch": 0.988479262672811, + "grad_norm": 1.0914130901392678, + "learning_rate": 1.1019263507934611e-06, + "loss": 0.6729186773300171, + "step": 4290 + }, + { + "epoch": 0.9887096774193549, + "grad_norm": 0.9547635126100301, + "learning_rate": 1.1015473437601776e-06, + "loss": 0.6455283164978027, + "step": 4291 + }, + { + "epoch": 0.9889400921658986, + "grad_norm": 1.1259220869244864, + "learning_rate": 1.1011683219874322e-06, + "loss": 0.8071424961090088, + "step": 4292 + }, + { + "epoch": 0.9891705069124423, + "grad_norm": 0.8980294635582122, + "learning_rate": 1.1007892855302385e-06, + "loss": 0.7287160754203796, + "step": 4293 + }, + { + "epoch": 0.9894009216589862, + "grad_norm": 0.956104694967055, + "learning_rate": 1.1004102344436135e-06, + "loss": 0.7916513681411743, + "step": 4294 + }, + { + "epoch": 0.9896313364055299, + "grad_norm": 0.948939194234829, + "learning_rate": 1.1000311687825757e-06, + "loss": 0.8075610399246216, + "step": 4295 + }, + { + "epoch": 0.9898617511520738, + "grad_norm": 0.8467724433306772, + "learning_rate": 1.0996520886021465e-06, + "loss": 0.6144437193870544, + "step": 4296 + }, + { + "epoch": 0.9900921658986175, + "grad_norm": 1.1816936561057356, + "learning_rate": 1.0992729939573482e-06, + "loss": 0.830337643623352, + "step": 4297 + }, + { + "epoch": 0.9903225806451613, + "grad_norm": 1.1631921516982922, + "learning_rate": 1.0988938849032063e-06, + "loss": 0.7104393243789673, + "step": 4298 + }, + { + "epoch": 0.990552995391705, + "grad_norm": 1.0166827801425276, + "learning_rate": 1.0985147614947484e-06, + "loss": 0.746238112449646, + "step": 4299 + }, + { + "epoch": 0.9907834101382489, + "grad_norm": 0.8744941548736713, + "learning_rate": 1.0981356237870027e-06, + "loss": 0.7309597730636597, + "step": 4300 + }, + { + "epoch": 0.9910138248847926, + "grad_norm": 1.1787483382236952, + "learning_rate": 1.0977564718350013e-06, + "loss": 0.799136757850647, + "step": 4301 + }, + { + "epoch": 0.9912442396313365, + "grad_norm": 1.146252036070138, + "learning_rate": 1.0973773056937776e-06, + "loss": 0.7477747201919556, + "step": 4302 + }, + { + "epoch": 0.9914746543778802, + "grad_norm": 1.1466743668258872, + "learning_rate": 1.0969981254183668e-06, + "loss": 0.8051053285598755, + "step": 4303 + }, + { + "epoch": 0.9917050691244239, + "grad_norm": 0.9910519080633017, + "learning_rate": 1.0966189310638063e-06, + "loss": 0.8023163080215454, + "step": 4304 + }, + { + "epoch": 0.9919354838709677, + "grad_norm": 0.9483313078672773, + "learning_rate": 1.096239722685136e-06, + "loss": 0.6804348230361938, + "step": 4305 + }, + { + "epoch": 0.9921658986175115, + "grad_norm": 1.119857177527024, + "learning_rate": 1.0958605003373976e-06, + "loss": 0.8276509046554565, + "step": 4306 + }, + { + "epoch": 0.9923963133640553, + "grad_norm": 1.2511674827094457, + "learning_rate": 1.095481264075634e-06, + "loss": 0.9733830690383911, + "step": 4307 + }, + { + "epoch": 0.992626728110599, + "grad_norm": 1.070745120202566, + "learning_rate": 1.0951020139548917e-06, + "loss": 0.824803352355957, + "step": 4308 + }, + { + "epoch": 0.9928571428571429, + "grad_norm": 1.100108017822232, + "learning_rate": 1.094722750030218e-06, + "loss": 0.8144090175628662, + "step": 4309 + }, + { + "epoch": 0.9930875576036866, + "grad_norm": 1.1329325704330306, + "learning_rate": 1.0943434723566623e-06, + "loss": 0.8394016027450562, + "step": 4310 + }, + { + "epoch": 0.9933179723502304, + "grad_norm": 1.0464489724076296, + "learning_rate": 1.0939641809892766e-06, + "loss": 0.7688177824020386, + "step": 4311 + }, + { + "epoch": 0.9935483870967742, + "grad_norm": 1.0599291427198123, + "learning_rate": 1.0935848759831144e-06, + "loss": 0.8157391548156738, + "step": 4312 + }, + { + "epoch": 0.993778801843318, + "grad_norm": 1.0072726544693649, + "learning_rate": 1.0932055573932316e-06, + "loss": 0.7618423700332642, + "step": 4313 + }, + { + "epoch": 0.9940092165898617, + "grad_norm": 0.8996295977906229, + "learning_rate": 1.0928262252746848e-06, + "loss": 0.7404567003250122, + "step": 4314 + }, + { + "epoch": 0.9942396313364056, + "grad_norm": 0.8729845318677907, + "learning_rate": 1.092446879682535e-06, + "loss": 0.6825613975524902, + "step": 4315 + }, + { + "epoch": 0.9944700460829493, + "grad_norm": 0.886318283085954, + "learning_rate": 1.0920675206718428e-06, + "loss": 0.6607732772827148, + "step": 4316 + }, + { + "epoch": 0.994700460829493, + "grad_norm": 1.1703494407740602, + "learning_rate": 1.0916881482976716e-06, + "loss": 0.715195894241333, + "step": 4317 + }, + { + "epoch": 0.9949308755760369, + "grad_norm": 1.0266525014281969, + "learning_rate": 1.0913087626150872e-06, + "loss": 0.7593914270401001, + "step": 4318 + }, + { + "epoch": 0.9951612903225806, + "grad_norm": 0.9546142286310197, + "learning_rate": 1.090929363679157e-06, + "loss": 0.8368399143218994, + "step": 4319 + }, + { + "epoch": 0.9953917050691244, + "grad_norm": 1.0080836713071024, + "learning_rate": 1.0905499515449499e-06, + "loss": 0.7799170613288879, + "step": 4320 + }, + { + "epoch": 0.9956221198156682, + "grad_norm": 1.0450181436512773, + "learning_rate": 1.0901705262675372e-06, + "loss": 0.8194636702537537, + "step": 4321 + }, + { + "epoch": 0.995852534562212, + "grad_norm": 0.7482572391575254, + "learning_rate": 1.0897910879019917e-06, + "loss": 0.7150344848632812, + "step": 4322 + }, + { + "epoch": 0.9960829493087557, + "grad_norm": 1.0624528328831144, + "learning_rate": 1.089411636503389e-06, + "loss": 0.737568736076355, + "step": 4323 + }, + { + "epoch": 0.9963133640552996, + "grad_norm": 0.9578129661977193, + "learning_rate": 1.0890321721268056e-06, + "loss": 0.7037359476089478, + "step": 4324 + }, + { + "epoch": 0.9965437788018433, + "grad_norm": 1.1660806477651886, + "learning_rate": 1.0886526948273206e-06, + "loss": 0.7664542198181152, + "step": 4325 + }, + { + "epoch": 0.9967741935483871, + "grad_norm": 1.1927624722703807, + "learning_rate": 1.0882732046600138e-06, + "loss": 0.7700943946838379, + "step": 4326 + }, + { + "epoch": 0.9970046082949309, + "grad_norm": 0.9828460552540413, + "learning_rate": 1.0878937016799683e-06, + "loss": 0.7634885311126709, + "step": 4327 + }, + { + "epoch": 0.9972350230414746, + "grad_norm": 0.9138031795649807, + "learning_rate": 1.0875141859422685e-06, + "loss": 0.6784960031509399, + "step": 4328 + }, + { + "epoch": 0.9974654377880184, + "grad_norm": 0.9227707667287056, + "learning_rate": 1.0871346575020002e-06, + "loss": 0.7224948406219482, + "step": 4329 + }, + { + "epoch": 0.9976958525345622, + "grad_norm": 1.140456315375248, + "learning_rate": 1.086755116414252e-06, + "loss": 0.7886664867401123, + "step": 4330 + }, + { + "epoch": 0.997926267281106, + "grad_norm": 0.8735584486255558, + "learning_rate": 1.0863755627341133e-06, + "loss": 0.7871295809745789, + "step": 4331 + }, + { + "epoch": 0.9981566820276497, + "grad_norm": 0.9703663985745814, + "learning_rate": 1.085995996516676e-06, + "loss": 0.700717568397522, + "step": 4332 + }, + { + "epoch": 0.9983870967741936, + "grad_norm": 1.0137806073331785, + "learning_rate": 1.085616417817034e-06, + "loss": 0.9090461730957031, + "step": 4333 + }, + { + "epoch": 0.9986175115207373, + "grad_norm": 0.8161279565195018, + "learning_rate": 1.0852368266902818e-06, + "loss": 0.7697109580039978, + "step": 4334 + }, + { + "epoch": 0.9988479262672811, + "grad_norm": 1.1335275167371797, + "learning_rate": 1.0848572231915177e-06, + "loss": 0.8135972023010254, + "step": 4335 + }, + { + "epoch": 0.9990783410138249, + "grad_norm": 0.9620227504979613, + "learning_rate": 1.0844776073758392e-06, + "loss": 0.803811252117157, + "step": 4336 + }, + { + "epoch": 0.9993087557603687, + "grad_norm": 1.1159399325844028, + "learning_rate": 1.0840979792983482e-06, + "loss": 0.874006986618042, + "step": 4337 + }, + { + "epoch": 0.9995391705069124, + "grad_norm": 1.0695664725891423, + "learning_rate": 1.0837183390141472e-06, + "loss": 0.7424730062484741, + "step": 4338 + }, + { + "epoch": 0.9997695852534563, + "grad_norm": 1.0413618177070603, + "learning_rate": 1.0833386865783393e-06, + "loss": 0.8219665884971619, + "step": 4339 + }, + { + "epoch": 1.0, + "grad_norm": 1.2200287736254531, + "learning_rate": 1.0829590220460319e-06, + "loss": 0.7065195441246033, + "step": 4340 + }, + { + "epoch": 1.0002304147465437, + "grad_norm": 1.4255251627812264, + "learning_rate": 1.0825793454723324e-06, + "loss": 0.7988346219062805, + "step": 4341 + }, + { + "epoch": 1.0004608294930875, + "grad_norm": 0.9544404961531333, + "learning_rate": 1.08219965691235e-06, + "loss": 0.6731617450714111, + "step": 4342 + }, + { + "epoch": 1.0006912442396314, + "grad_norm": 1.0713203032897287, + "learning_rate": 1.0818199564211964e-06, + "loss": 0.8058687448501587, + "step": 4343 + }, + { + "epoch": 1.0009216589861751, + "grad_norm": 1.2330384736552804, + "learning_rate": 1.081440244053984e-06, + "loss": 0.8351448178291321, + "step": 4344 + }, + { + "epoch": 1.0011520737327189, + "grad_norm": 0.9578484310628987, + "learning_rate": 1.0810605198658286e-06, + "loss": 0.8619185090065002, + "step": 4345 + }, + { + "epoch": 1.0013824884792626, + "grad_norm": 1.030004028036847, + "learning_rate": 1.0806807839118455e-06, + "loss": 0.7600966691970825, + "step": 4346 + }, + { + "epoch": 1.0016129032258065, + "grad_norm": 1.103182000242006, + "learning_rate": 1.0803010362471536e-06, + "loss": 0.8123422265052795, + "step": 4347 + }, + { + "epoch": 1.0018433179723503, + "grad_norm": 1.0359331933938025, + "learning_rate": 1.0799212769268727e-06, + "loss": 0.8277603983879089, + "step": 4348 + }, + { + "epoch": 1.002073732718894, + "grad_norm": 0.7466130076646643, + "learning_rate": 1.079541506006124e-06, + "loss": 0.6666774153709412, + "step": 4349 + }, + { + "epoch": 1.0023041474654377, + "grad_norm": 1.0582236596847403, + "learning_rate": 1.0791617235400313e-06, + "loss": 0.8483254909515381, + "step": 4350 + }, + { + "epoch": 1.0025345622119817, + "grad_norm": 0.9094409000603249, + "learning_rate": 1.0787819295837193e-06, + "loss": 0.6585661172866821, + "step": 4351 + }, + { + "epoch": 1.0027649769585254, + "grad_norm": 1.0274936512349702, + "learning_rate": 1.0784021241923142e-06, + "loss": 0.7591124773025513, + "step": 4352 + }, + { + "epoch": 1.0029953917050691, + "grad_norm": 1.0201165998262116, + "learning_rate": 1.078022307420945e-06, + "loss": 0.7305805683135986, + "step": 4353 + }, + { + "epoch": 1.0032258064516129, + "grad_norm": 0.8894858318623733, + "learning_rate": 1.0776424793247407e-06, + "loss": 0.6558996438980103, + "step": 4354 + }, + { + "epoch": 1.0034562211981566, + "grad_norm": 1.313034349644303, + "learning_rate": 1.0772626399588336e-06, + "loss": 0.6837360262870789, + "step": 4355 + }, + { + "epoch": 1.0036866359447005, + "grad_norm": 0.9187212026563307, + "learning_rate": 1.0768827893783562e-06, + "loss": 0.778124988079071, + "step": 4356 + }, + { + "epoch": 1.0039170506912443, + "grad_norm": 1.0828207561971888, + "learning_rate": 1.0765029276384438e-06, + "loss": 0.7676408886909485, + "step": 4357 + }, + { + "epoch": 1.004147465437788, + "grad_norm": 1.1604376015370672, + "learning_rate": 1.0761230547942333e-06, + "loss": 0.854246973991394, + "step": 4358 + }, + { + "epoch": 1.0043778801843317, + "grad_norm": 0.9177073619188721, + "learning_rate": 1.0757431709008615e-06, + "loss": 0.716766893863678, + "step": 4359 + }, + { + "epoch": 1.0046082949308757, + "grad_norm": 0.9439720321299626, + "learning_rate": 1.075363276013469e-06, + "loss": 0.6827799081802368, + "step": 4360 + }, + { + "epoch": 1.0048387096774194, + "grad_norm": 0.9539231430903122, + "learning_rate": 1.074983370187197e-06, + "loss": 0.7977348566055298, + "step": 4361 + }, + { + "epoch": 1.0050691244239631, + "grad_norm": 1.1227456227969494, + "learning_rate": 1.0746034534771878e-06, + "loss": 0.6958035826683044, + "step": 4362 + }, + { + "epoch": 1.0052995391705069, + "grad_norm": 0.9288361874867539, + "learning_rate": 1.0742235259385861e-06, + "loss": 0.8407979607582092, + "step": 4363 + }, + { + "epoch": 1.0055299539170508, + "grad_norm": 0.8466973629768922, + "learning_rate": 1.073843587626538e-06, + "loss": 0.8180495500564575, + "step": 4364 + }, + { + "epoch": 1.0057603686635945, + "grad_norm": 0.9973113541484702, + "learning_rate": 1.0734636385961907e-06, + "loss": 0.7551306486129761, + "step": 4365 + }, + { + "epoch": 1.0059907834101383, + "grad_norm": 1.1054013447474482, + "learning_rate": 1.0730836789026936e-06, + "loss": 0.6598455309867859, + "step": 4366 + }, + { + "epoch": 1.006221198156682, + "grad_norm": 0.9578758202335947, + "learning_rate": 1.0727037086011971e-06, + "loss": 0.9186126589775085, + "step": 4367 + }, + { + "epoch": 1.0064516129032257, + "grad_norm": 1.0208878451508383, + "learning_rate": 1.0723237277468538e-06, + "loss": 0.8491259813308716, + "step": 4368 + }, + { + "epoch": 1.0066820276497697, + "grad_norm": 1.0678483382751343, + "learning_rate": 1.071943736394817e-06, + "loss": 0.6938691139221191, + "step": 4369 + }, + { + "epoch": 1.0069124423963134, + "grad_norm": 1.1084737690479445, + "learning_rate": 1.0715637346002423e-06, + "loss": 0.801313579082489, + "step": 4370 + }, + { + "epoch": 1.0071428571428571, + "grad_norm": 0.983698557868892, + "learning_rate": 1.071183722418286e-06, + "loss": 0.7663706541061401, + "step": 4371 + }, + { + "epoch": 1.0073732718894008, + "grad_norm": 0.8508185045615759, + "learning_rate": 1.070803699904107e-06, + "loss": 0.7434467077255249, + "step": 4372 + }, + { + "epoch": 1.0076036866359448, + "grad_norm": 1.331303605136832, + "learning_rate": 1.0704236671128643e-06, + "loss": 0.8366774320602417, + "step": 4373 + }, + { + "epoch": 1.0078341013824885, + "grad_norm": 1.276875198714222, + "learning_rate": 1.07004362409972e-06, + "loss": 0.7027710676193237, + "step": 4374 + }, + { + "epoch": 1.0080645161290323, + "grad_norm": 1.1122995966371962, + "learning_rate": 1.0696635709198357e-06, + "loss": 0.7965548038482666, + "step": 4375 + }, + { + "epoch": 1.008294930875576, + "grad_norm": 1.0387807228424288, + "learning_rate": 1.0692835076283768e-06, + "loss": 0.8058432340621948, + "step": 4376 + }, + { + "epoch": 1.0085253456221197, + "grad_norm": 1.1870264013217662, + "learning_rate": 1.0689034342805085e-06, + "loss": 0.9056248068809509, + "step": 4377 + }, + { + "epoch": 1.0087557603686637, + "grad_norm": 1.0069765876574615, + "learning_rate": 1.0685233509313979e-06, + "loss": 0.8407673835754395, + "step": 4378 + }, + { + "epoch": 1.0089861751152074, + "grad_norm": 1.3133023777292065, + "learning_rate": 1.0681432576362133e-06, + "loss": 0.9138794541358948, + "step": 4379 + }, + { + "epoch": 1.0092165898617511, + "grad_norm": 1.3361237624577444, + "learning_rate": 1.067763154450125e-06, + "loss": 0.6640630960464478, + "step": 4380 + }, + { + "epoch": 1.0094470046082948, + "grad_norm": 1.4646712113013267, + "learning_rate": 1.0673830414283051e-06, + "loss": 0.9387146234512329, + "step": 4381 + }, + { + "epoch": 1.0096774193548388, + "grad_norm": 1.0228212242769696, + "learning_rate": 1.067002918625926e-06, + "loss": 0.7288271188735962, + "step": 4382 + }, + { + "epoch": 1.0099078341013825, + "grad_norm": 1.1693551967727813, + "learning_rate": 1.0666227860981613e-06, + "loss": 0.7886035442352295, + "step": 4383 + }, + { + "epoch": 1.0101382488479262, + "grad_norm": 1.056596025284508, + "learning_rate": 1.066242643900188e-06, + "loss": 0.6929852962493896, + "step": 4384 + }, + { + "epoch": 1.01036866359447, + "grad_norm": 0.9057033157053335, + "learning_rate": 1.065862492087182e-06, + "loss": 0.7709990739822388, + "step": 4385 + }, + { + "epoch": 1.010599078341014, + "grad_norm": 1.0362803754904506, + "learning_rate": 1.065482330714323e-06, + "loss": 0.811382532119751, + "step": 4386 + }, + { + "epoch": 1.0108294930875577, + "grad_norm": 1.2204693151649666, + "learning_rate": 1.0651021598367905e-06, + "loss": 0.8274353742599487, + "step": 4387 + }, + { + "epoch": 1.0110599078341014, + "grad_norm": 0.9995911348883496, + "learning_rate": 1.0647219795097651e-06, + "loss": 0.7449204921722412, + "step": 4388 + }, + { + "epoch": 1.011290322580645, + "grad_norm": 0.906861932756066, + "learning_rate": 1.0643417897884303e-06, + "loss": 0.675945520401001, + "step": 4389 + }, + { + "epoch": 1.0115207373271888, + "grad_norm": 1.183632210098949, + "learning_rate": 1.06396159072797e-06, + "loss": 0.7329400777816772, + "step": 4390 + }, + { + "epoch": 1.0117511520737328, + "grad_norm": 0.9566645616399831, + "learning_rate": 1.0635813823835692e-06, + "loss": 0.7809139490127563, + "step": 4391 + }, + { + "epoch": 1.0119815668202765, + "grad_norm": 1.0167427862718812, + "learning_rate": 1.0632011648104155e-06, + "loss": 0.799081563949585, + "step": 4392 + }, + { + "epoch": 1.0122119815668202, + "grad_norm": 1.0484890321007356, + "learning_rate": 1.062820938063696e-06, + "loss": 0.7738279104232788, + "step": 4393 + }, + { + "epoch": 1.012442396313364, + "grad_norm": 0.9791695127555486, + "learning_rate": 1.0624407021986007e-06, + "loss": 0.895797610282898, + "step": 4394 + }, + { + "epoch": 1.012672811059908, + "grad_norm": 0.9476041908693101, + "learning_rate": 1.0620604572703198e-06, + "loss": 0.6887848973274231, + "step": 4395 + }, + { + "epoch": 1.0129032258064516, + "grad_norm": 1.0915270783702586, + "learning_rate": 1.0616802033340457e-06, + "loss": 0.9540888071060181, + "step": 4396 + }, + { + "epoch": 1.0131336405529954, + "grad_norm": 1.3368596619746418, + "learning_rate": 1.0612999404449721e-06, + "loss": 0.9047783017158508, + "step": 4397 + }, + { + "epoch": 1.013364055299539, + "grad_norm": 0.924946076870977, + "learning_rate": 1.0609196686582931e-06, + "loss": 0.7030448913574219, + "step": 4398 + }, + { + "epoch": 1.013594470046083, + "grad_norm": 0.9501232585433265, + "learning_rate": 1.0605393880292046e-06, + "loss": 0.8097348213195801, + "step": 4399 + }, + { + "epoch": 1.0138248847926268, + "grad_norm": 1.0163791343408108, + "learning_rate": 1.0601590986129045e-06, + "loss": 0.7446185350418091, + "step": 4400 + }, + { + "epoch": 1.0140552995391705, + "grad_norm": 1.0548185515811, + "learning_rate": 1.0597788004645908e-06, + "loss": 0.7450964450836182, + "step": 4401 + }, + { + "epoch": 1.0142857142857142, + "grad_norm": 1.1891450532947472, + "learning_rate": 1.0593984936394632e-06, + "loss": 0.8326355218887329, + "step": 4402 + }, + { + "epoch": 1.014516129032258, + "grad_norm": 1.0194370020803867, + "learning_rate": 1.0590181781927227e-06, + "loss": 0.7013953924179077, + "step": 4403 + }, + { + "epoch": 1.014746543778802, + "grad_norm": 1.2634402455639506, + "learning_rate": 1.0586378541795723e-06, + "loss": 0.7806364297866821, + "step": 4404 + }, + { + "epoch": 1.0149769585253456, + "grad_norm": 1.2061797737844093, + "learning_rate": 1.0582575216552146e-06, + "loss": 0.8207389116287231, + "step": 4405 + }, + { + "epoch": 1.0152073732718894, + "grad_norm": 1.123863770924685, + "learning_rate": 1.0578771806748545e-06, + "loss": 0.8042873740196228, + "step": 4406 + }, + { + "epoch": 1.015437788018433, + "grad_norm": 0.9837741196260199, + "learning_rate": 1.057496831293699e-06, + "loss": 0.7225071787834167, + "step": 4407 + }, + { + "epoch": 1.015668202764977, + "grad_norm": 0.8165867352878113, + "learning_rate": 1.0571164735669538e-06, + "loss": 0.7783743143081665, + "step": 4408 + }, + { + "epoch": 1.0158986175115208, + "grad_norm": 1.1050702802288892, + "learning_rate": 1.0567361075498286e-06, + "loss": 0.7455039024353027, + "step": 4409 + }, + { + "epoch": 1.0161290322580645, + "grad_norm": 1.0331220241961572, + "learning_rate": 1.0563557332975322e-06, + "loss": 0.7819615602493286, + "step": 4410 + }, + { + "epoch": 1.0163594470046082, + "grad_norm": 1.052305833495017, + "learning_rate": 1.0559753508652758e-06, + "loss": 0.6466404795646667, + "step": 4411 + }, + { + "epoch": 1.0165898617511522, + "grad_norm": 0.9503687927611121, + "learning_rate": 1.0555949603082715e-06, + "loss": 0.8728539943695068, + "step": 4412 + }, + { + "epoch": 1.016820276497696, + "grad_norm": 0.9080353373358744, + "learning_rate": 1.055214561681732e-06, + "loss": 0.6082659959793091, + "step": 4413 + }, + { + "epoch": 1.0170506912442396, + "grad_norm": 1.1401384988886654, + "learning_rate": 1.054834155040872e-06, + "loss": 0.8429103493690491, + "step": 4414 + }, + { + "epoch": 1.0172811059907834, + "grad_norm": 0.9060045457810262, + "learning_rate": 1.0544537404409073e-06, + "loss": 0.7953135967254639, + "step": 4415 + }, + { + "epoch": 1.017511520737327, + "grad_norm": 0.6713482182574511, + "learning_rate": 1.0540733179370542e-06, + "loss": 0.7243527173995972, + "step": 4416 + }, + { + "epoch": 1.017741935483871, + "grad_norm": 1.4572192259453962, + "learning_rate": 1.0536928875845303e-06, + "loss": 0.6882613897323608, + "step": 4417 + }, + { + "epoch": 1.0179723502304148, + "grad_norm": 0.9719982264568039, + "learning_rate": 1.053312449438555e-06, + "loss": 0.9157286882400513, + "step": 4418 + }, + { + "epoch": 1.0182027649769585, + "grad_norm": 1.1196456434566004, + "learning_rate": 1.0529320035543482e-06, + "loss": 0.7224643230438232, + "step": 4419 + }, + { + "epoch": 1.0184331797235022, + "grad_norm": 1.4712628070157254, + "learning_rate": 1.0525515499871311e-06, + "loss": 0.874829888343811, + "step": 4420 + }, + { + "epoch": 1.0186635944700462, + "grad_norm": 0.9184049522457163, + "learning_rate": 1.0521710887921262e-06, + "loss": 0.6911267042160034, + "step": 4421 + }, + { + "epoch": 1.01889400921659, + "grad_norm": 1.1423796554253005, + "learning_rate": 1.051790620024557e-06, + "loss": 0.9065574407577515, + "step": 4422 + }, + { + "epoch": 1.0191244239631336, + "grad_norm": 1.225714416603257, + "learning_rate": 1.0514101437396474e-06, + "loss": 0.7671108245849609, + "step": 4423 + }, + { + "epoch": 1.0193548387096774, + "grad_norm": 1.3506661037387142, + "learning_rate": 1.051029659992624e-06, + "loss": 0.8706510066986084, + "step": 4424 + }, + { + "epoch": 1.019585253456221, + "grad_norm": 1.4185673299670827, + "learning_rate": 1.0506491688387128e-06, + "loss": 0.741087794303894, + "step": 4425 + }, + { + "epoch": 1.019815668202765, + "grad_norm": 1.0122076007105019, + "learning_rate": 1.0502686703331419e-06, + "loss": 0.8045330047607422, + "step": 4426 + }, + { + "epoch": 1.0200460829493088, + "grad_norm": 1.1768435258548835, + "learning_rate": 1.0498881645311398e-06, + "loss": 0.8464969992637634, + "step": 4427 + }, + { + "epoch": 1.0202764976958525, + "grad_norm": 1.1260966872974236, + "learning_rate": 1.0495076514879367e-06, + "loss": 0.7660650610923767, + "step": 4428 + }, + { + "epoch": 1.0205069124423962, + "grad_norm": 1.0026539513539563, + "learning_rate": 1.0491271312587636e-06, + "loss": 0.8565669059753418, + "step": 4429 + }, + { + "epoch": 1.0207373271889402, + "grad_norm": 1.306851956145893, + "learning_rate": 1.0487466038988525e-06, + "loss": 0.8884295225143433, + "step": 4430 + }, + { + "epoch": 1.020967741935484, + "grad_norm": 1.0672501887857282, + "learning_rate": 1.0483660694634361e-06, + "loss": 0.7300036549568176, + "step": 4431 + }, + { + "epoch": 1.0211981566820276, + "grad_norm": 1.261937486377886, + "learning_rate": 1.0479855280077493e-06, + "loss": 0.7879898548126221, + "step": 4432 + }, + { + "epoch": 1.0214285714285714, + "grad_norm": 1.5182696761272942, + "learning_rate": 1.0476049795870263e-06, + "loss": 0.9811698198318481, + "step": 4433 + }, + { + "epoch": 1.0216589861751153, + "grad_norm": 1.1962738461411733, + "learning_rate": 1.0472244242565034e-06, + "loss": 0.7706241607666016, + "step": 4434 + }, + { + "epoch": 1.021889400921659, + "grad_norm": 1.289215010975763, + "learning_rate": 1.046843862071418e-06, + "loss": 0.761093020439148, + "step": 4435 + }, + { + "epoch": 1.0221198156682028, + "grad_norm": 1.2142929670752842, + "learning_rate": 1.046463293087008e-06, + "loss": 0.8306092619895935, + "step": 4436 + }, + { + "epoch": 1.0223502304147465, + "grad_norm": 1.0820298518439184, + "learning_rate": 1.0460827173585125e-06, + "loss": 0.9669788479804993, + "step": 4437 + }, + { + "epoch": 1.0225806451612902, + "grad_norm": 1.173748576404213, + "learning_rate": 1.0457021349411715e-06, + "loss": 0.8461639285087585, + "step": 4438 + }, + { + "epoch": 1.0228110599078342, + "grad_norm": 1.0738697424760002, + "learning_rate": 1.0453215458902262e-06, + "loss": 0.7230383157730103, + "step": 4439 + }, + { + "epoch": 1.023041474654378, + "grad_norm": 1.195555915731222, + "learning_rate": 1.0449409502609186e-06, + "loss": 0.7506514191627502, + "step": 4440 + }, + { + "epoch": 1.0232718894009216, + "grad_norm": 1.2468090783946124, + "learning_rate": 1.0445603481084914e-06, + "loss": 0.7530048489570618, + "step": 4441 + }, + { + "epoch": 1.0235023041474653, + "grad_norm": 1.1659142578592716, + "learning_rate": 1.044179739488189e-06, + "loss": 0.8402249813079834, + "step": 4442 + }, + { + "epoch": 1.0237327188940093, + "grad_norm": 0.9379480482149454, + "learning_rate": 1.0437991244552557e-06, + "loss": 0.7661963701248169, + "step": 4443 + }, + { + "epoch": 1.023963133640553, + "grad_norm": 1.484925993605904, + "learning_rate": 1.043418503064937e-06, + "loss": 0.7982668876647949, + "step": 4444 + }, + { + "epoch": 1.0241935483870968, + "grad_norm": 1.5153078123946815, + "learning_rate": 1.0430378753724807e-06, + "loss": 0.899538516998291, + "step": 4445 + }, + { + "epoch": 1.0244239631336405, + "grad_norm": 1.0283178313705175, + "learning_rate": 1.0426572414331337e-06, + "loss": 0.8027441501617432, + "step": 4446 + }, + { + "epoch": 1.0246543778801844, + "grad_norm": 1.0275551729897887, + "learning_rate": 1.0422766013021442e-06, + "loss": 0.8575221300125122, + "step": 4447 + }, + { + "epoch": 1.0248847926267282, + "grad_norm": 1.0529216327738424, + "learning_rate": 1.0418959550347622e-06, + "loss": 0.7001699209213257, + "step": 4448 + }, + { + "epoch": 1.0251152073732719, + "grad_norm": 1.344629476023339, + "learning_rate": 1.041515302686238e-06, + "loss": 0.9296507835388184, + "step": 4449 + }, + { + "epoch": 1.0253456221198156, + "grad_norm": 1.1736142719382505, + "learning_rate": 1.0411346443118222e-06, + "loss": 0.8214550018310547, + "step": 4450 + }, + { + "epoch": 1.0255760368663593, + "grad_norm": 1.111485424859677, + "learning_rate": 1.0407539799667673e-06, + "loss": 0.7598673701286316, + "step": 4451 + }, + { + "epoch": 1.0258064516129033, + "grad_norm": 1.1453890077051856, + "learning_rate": 1.0403733097063265e-06, + "loss": 0.8222990036010742, + "step": 4452 + }, + { + "epoch": 1.026036866359447, + "grad_norm": 0.8681765527907143, + "learning_rate": 1.039992633585753e-06, + "loss": 0.7860872745513916, + "step": 4453 + }, + { + "epoch": 1.0262672811059907, + "grad_norm": 0.7352315377021262, + "learning_rate": 1.0396119516603018e-06, + "loss": 0.6602796912193298, + "step": 4454 + }, + { + "epoch": 1.0264976958525345, + "grad_norm": 0.7865024675454858, + "learning_rate": 1.0392312639852278e-06, + "loss": 0.554654598236084, + "step": 4455 + }, + { + "epoch": 1.0267281105990784, + "grad_norm": 0.997694873166315, + "learning_rate": 1.0388505706157885e-06, + "loss": 0.7977210879325867, + "step": 4456 + }, + { + "epoch": 1.0269585253456222, + "grad_norm": 0.9315155505189272, + "learning_rate": 1.0384698716072398e-06, + "loss": 0.8770938515663147, + "step": 4457 + }, + { + "epoch": 1.0271889400921659, + "grad_norm": 1.1958306146081352, + "learning_rate": 1.0380891670148403e-06, + "loss": 0.710452675819397, + "step": 4458 + }, + { + "epoch": 1.0274193548387096, + "grad_norm": 1.0231453414790668, + "learning_rate": 1.0377084568938485e-06, + "loss": 0.8876768946647644, + "step": 4459 + }, + { + "epoch": 1.0276497695852536, + "grad_norm": 1.1707146109643827, + "learning_rate": 1.0373277412995241e-06, + "loss": 0.7770971059799194, + "step": 4460 + }, + { + "epoch": 1.0278801843317973, + "grad_norm": 1.2438301523835749, + "learning_rate": 1.0369470202871275e-06, + "loss": 0.9199050068855286, + "step": 4461 + }, + { + "epoch": 1.028110599078341, + "grad_norm": 1.225766455591599, + "learning_rate": 1.0365662939119199e-06, + "loss": 0.7931548357009888, + "step": 4462 + }, + { + "epoch": 1.0283410138248847, + "grad_norm": 0.9403888957806107, + "learning_rate": 1.0361855622291636e-06, + "loss": 0.7484941482543945, + "step": 4463 + }, + { + "epoch": 1.0285714285714285, + "grad_norm": 1.1077517121943607, + "learning_rate": 1.03580482529412e-06, + "loss": 0.7639475464820862, + "step": 4464 + }, + { + "epoch": 1.0288018433179724, + "grad_norm": 0.9266455289292281, + "learning_rate": 1.035424083162054e-06, + "loss": 0.7705268859863281, + "step": 4465 + }, + { + "epoch": 1.0290322580645161, + "grad_norm": 1.0602296301972336, + "learning_rate": 1.0350433358882288e-06, + "loss": 0.7714117169380188, + "step": 4466 + }, + { + "epoch": 1.0292626728110599, + "grad_norm": 0.9812855436464868, + "learning_rate": 1.0346625835279102e-06, + "loss": 0.851073145866394, + "step": 4467 + }, + { + "epoch": 1.0294930875576036, + "grad_norm": 0.9352903997309275, + "learning_rate": 1.0342818261363631e-06, + "loss": 0.8001583218574524, + "step": 4468 + }, + { + "epoch": 1.0297235023041476, + "grad_norm": 1.1158901092617035, + "learning_rate": 1.0339010637688547e-06, + "loss": 0.8352588415145874, + "step": 4469 + }, + { + "epoch": 1.0299539170506913, + "grad_norm": 0.91245372061127, + "learning_rate": 1.0335202964806515e-06, + "loss": 0.8136032223701477, + "step": 4470 + }, + { + "epoch": 1.030184331797235, + "grad_norm": 1.1248571903620148, + "learning_rate": 1.0331395243270215e-06, + "loss": 0.8041108846664429, + "step": 4471 + }, + { + "epoch": 1.0304147465437787, + "grad_norm": 0.9370378251466553, + "learning_rate": 1.032758747363234e-06, + "loss": 0.6961067914962769, + "step": 4472 + }, + { + "epoch": 1.0306451612903227, + "grad_norm": 0.8328897533850071, + "learning_rate": 1.0323779656445572e-06, + "loss": 0.8063983917236328, + "step": 4473 + }, + { + "epoch": 1.0308755760368664, + "grad_norm": 1.01915176563276, + "learning_rate": 1.0319971792262618e-06, + "loss": 0.706061601638794, + "step": 4474 + }, + { + "epoch": 1.0311059907834101, + "grad_norm": 1.1193687254143303, + "learning_rate": 1.0316163881636181e-06, + "loss": 0.8510581254959106, + "step": 4475 + }, + { + "epoch": 1.0313364055299539, + "grad_norm": 0.8459775762451333, + "learning_rate": 1.0312355925118975e-06, + "loss": 0.7169028520584106, + "step": 4476 + }, + { + "epoch": 1.0315668202764976, + "grad_norm": 0.8345675502163972, + "learning_rate": 1.0308547923263718e-06, + "loss": 0.7513360977172852, + "step": 4477 + }, + { + "epoch": 1.0317972350230415, + "grad_norm": 1.1826641384928935, + "learning_rate": 1.030473987662314e-06, + "loss": 0.7408783435821533, + "step": 4478 + }, + { + "epoch": 1.0320276497695853, + "grad_norm": 1.2135549739175484, + "learning_rate": 1.0300931785749974e-06, + "loss": 0.8177747130393982, + "step": 4479 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 1.074036475926982, + "learning_rate": 1.0297123651196954e-06, + "loss": 0.7530791759490967, + "step": 4480 + }, + { + "epoch": 1.0324884792626727, + "grad_norm": 1.2947307404575235, + "learning_rate": 1.0293315473516832e-06, + "loss": 0.7958859205245972, + "step": 4481 + }, + { + "epoch": 1.0327188940092167, + "grad_norm": 1.2482360288136136, + "learning_rate": 1.0289507253262357e-06, + "loss": 0.8719943761825562, + "step": 4482 + }, + { + "epoch": 1.0329493087557604, + "grad_norm": 1.0347953021678673, + "learning_rate": 1.028569899098629e-06, + "loss": 0.7584139108657837, + "step": 4483 + }, + { + "epoch": 1.0331797235023041, + "grad_norm": 1.1621251755994506, + "learning_rate": 1.0281890687241387e-06, + "loss": 0.852983832359314, + "step": 4484 + }, + { + "epoch": 1.0334101382488479, + "grad_norm": 0.995758429643109, + "learning_rate": 1.027808234258043e-06, + "loss": 0.7455692291259766, + "step": 4485 + }, + { + "epoch": 1.0336405529953918, + "grad_norm": 0.9126434588001895, + "learning_rate": 1.0274273957556185e-06, + "loss": 0.7078343629837036, + "step": 4486 + }, + { + "epoch": 1.0338709677419355, + "grad_norm": 1.056440353383354, + "learning_rate": 1.027046553272144e-06, + "loss": 0.7580842971801758, + "step": 4487 + }, + { + "epoch": 1.0341013824884793, + "grad_norm": 0.9071452550966383, + "learning_rate": 1.026665706862898e-06, + "loss": 0.7271389961242676, + "step": 4488 + }, + { + "epoch": 1.034331797235023, + "grad_norm": 1.3819767756673818, + "learning_rate": 1.0262848565831599e-06, + "loss": 0.8271546363830566, + "step": 4489 + }, + { + "epoch": 1.0345622119815667, + "grad_norm": 1.1533046933911033, + "learning_rate": 1.0259040024882098e-06, + "loss": 0.6799920201301575, + "step": 4490 + }, + { + "epoch": 1.0347926267281107, + "grad_norm": 0.7837273040397605, + "learning_rate": 1.0255231446333277e-06, + "loss": 0.6962645053863525, + "step": 4491 + }, + { + "epoch": 1.0350230414746544, + "grad_norm": 1.2060107344479347, + "learning_rate": 1.0251422830737955e-06, + "loss": 0.8722797632217407, + "step": 4492 + }, + { + "epoch": 1.0352534562211981, + "grad_norm": 1.0328841633467782, + "learning_rate": 1.024761417864894e-06, + "loss": 0.8054880499839783, + "step": 4493 + }, + { + "epoch": 1.0354838709677419, + "grad_norm": 0.9178345615112383, + "learning_rate": 1.0243805490619053e-06, + "loss": 0.8196548223495483, + "step": 4494 + }, + { + "epoch": 1.0357142857142858, + "grad_norm": 1.5010413914558958, + "learning_rate": 1.0239996767201122e-06, + "loss": 0.8197275400161743, + "step": 4495 + }, + { + "epoch": 1.0359447004608295, + "grad_norm": 1.1223467429515472, + "learning_rate": 1.0236188008947978e-06, + "loss": 0.7704858779907227, + "step": 4496 + }, + { + "epoch": 1.0361751152073733, + "grad_norm": 1.2288506828429187, + "learning_rate": 1.0232379216412459e-06, + "loss": 0.8296232223510742, + "step": 4497 + }, + { + "epoch": 1.036405529953917, + "grad_norm": 1.1910482399414777, + "learning_rate": 1.0228570390147404e-06, + "loss": 0.6546601057052612, + "step": 4498 + }, + { + "epoch": 1.036635944700461, + "grad_norm": 1.0493042801064925, + "learning_rate": 1.0224761530705656e-06, + "loss": 0.808987021446228, + "step": 4499 + }, + { + "epoch": 1.0368663594470047, + "grad_norm": 1.0198435860671902, + "learning_rate": 1.0220952638640073e-06, + "loss": 0.862627387046814, + "step": 4500 + }, + { + "epoch": 1.0370967741935484, + "grad_norm": 0.9314966888515314, + "learning_rate": 1.0217143714503507e-06, + "loss": 0.781114935874939, + "step": 4501 + }, + { + "epoch": 1.0373271889400921, + "grad_norm": 1.1732597442137338, + "learning_rate": 1.0213334758848814e-06, + "loss": 0.7186112403869629, + "step": 4502 + }, + { + "epoch": 1.0375576036866359, + "grad_norm": 0.9870711221115687, + "learning_rate": 1.0209525772228868e-06, + "loss": 0.8112529516220093, + "step": 4503 + }, + { + "epoch": 1.0377880184331798, + "grad_norm": 1.1558866878107408, + "learning_rate": 1.020571675519653e-06, + "loss": 0.7364751100540161, + "step": 4504 + }, + { + "epoch": 1.0380184331797235, + "grad_norm": 1.296821231113786, + "learning_rate": 1.0201907708304681e-06, + "loss": 0.7015886902809143, + "step": 4505 + }, + { + "epoch": 1.0382488479262673, + "grad_norm": 0.8755063657778166, + "learning_rate": 1.0198098632106197e-06, + "loss": 0.7018470168113708, + "step": 4506 + }, + { + "epoch": 1.038479262672811, + "grad_norm": 0.9958013421397902, + "learning_rate": 1.0194289527153953e-06, + "loss": 0.820391058921814, + "step": 4507 + }, + { + "epoch": 1.038709677419355, + "grad_norm": 1.2026544914516983, + "learning_rate": 1.0190480394000844e-06, + "loss": 0.8341129422187805, + "step": 4508 + }, + { + "epoch": 1.0389400921658987, + "grad_norm": 0.8606365913019236, + "learning_rate": 1.0186671233199757e-06, + "loss": 0.7345695495605469, + "step": 4509 + }, + { + "epoch": 1.0391705069124424, + "grad_norm": 1.375974242893794, + "learning_rate": 1.0182862045303589e-06, + "loss": 0.8899500370025635, + "step": 4510 + }, + { + "epoch": 1.0394009216589861, + "grad_norm": 1.001562990779633, + "learning_rate": 1.0179052830865238e-06, + "loss": 0.8158663511276245, + "step": 4511 + }, + { + "epoch": 1.0396313364055298, + "grad_norm": 1.1574048409080129, + "learning_rate": 1.0175243590437604e-06, + "loss": 0.734848141670227, + "step": 4512 + }, + { + "epoch": 1.0398617511520738, + "grad_norm": 1.062511127484639, + "learning_rate": 1.0171434324573596e-06, + "loss": 0.7920876741409302, + "step": 4513 + }, + { + "epoch": 1.0400921658986175, + "grad_norm": 1.2131341489328324, + "learning_rate": 1.0167625033826122e-06, + "loss": 0.9224791526794434, + "step": 4514 + }, + { + "epoch": 1.0403225806451613, + "grad_norm": 1.152494191321953, + "learning_rate": 1.0163815718748096e-06, + "loss": 0.7086025476455688, + "step": 4515 + }, + { + "epoch": 1.040552995391705, + "grad_norm": 1.0223491213154539, + "learning_rate": 1.0160006379892434e-06, + "loss": 0.7657936811447144, + "step": 4516 + }, + { + "epoch": 1.040783410138249, + "grad_norm": 1.11296257844156, + "learning_rate": 1.0156197017812058e-06, + "loss": 0.786298394203186, + "step": 4517 + }, + { + "epoch": 1.0410138248847927, + "grad_norm": 1.1998728834800867, + "learning_rate": 1.0152387633059895e-06, + "loss": 0.8667294979095459, + "step": 4518 + }, + { + "epoch": 1.0412442396313364, + "grad_norm": 1.0233425185279803, + "learning_rate": 1.0148578226188866e-06, + "loss": 0.8479517102241516, + "step": 4519 + }, + { + "epoch": 1.0414746543778801, + "grad_norm": 0.8930216519245627, + "learning_rate": 1.0144768797751904e-06, + "loss": 0.6430692076683044, + "step": 4520 + }, + { + "epoch": 1.041705069124424, + "grad_norm": 1.122852329570553, + "learning_rate": 1.0140959348301946e-06, + "loss": 0.874313473701477, + "step": 4521 + }, + { + "epoch": 1.0419354838709678, + "grad_norm": 1.101097598838231, + "learning_rate": 1.013714987839192e-06, + "loss": 0.8439676761627197, + "step": 4522 + }, + { + "epoch": 1.0421658986175115, + "grad_norm": 1.2477053670484948, + "learning_rate": 1.0133340388574774e-06, + "loss": 0.7480089664459229, + "step": 4523 + }, + { + "epoch": 1.0423963133640552, + "grad_norm": 1.3143250159570112, + "learning_rate": 1.012953087940345e-06, + "loss": 0.8786139488220215, + "step": 4524 + }, + { + "epoch": 1.042626728110599, + "grad_norm": 1.1897211165926171, + "learning_rate": 1.0125721351430885e-06, + "loss": 0.8333299160003662, + "step": 4525 + }, + { + "epoch": 1.042857142857143, + "grad_norm": 1.055645356383861, + "learning_rate": 1.0121911805210032e-06, + "loss": 0.8201998472213745, + "step": 4526 + }, + { + "epoch": 1.0430875576036867, + "grad_norm": 1.160199033506195, + "learning_rate": 1.0118102241293847e-06, + "loss": 0.7793110609054565, + "step": 4527 + }, + { + "epoch": 1.0433179723502304, + "grad_norm": 1.045720270383819, + "learning_rate": 1.0114292660235272e-06, + "loss": 0.7148817777633667, + "step": 4528 + }, + { + "epoch": 1.043548387096774, + "grad_norm": 1.0726942336798908, + "learning_rate": 1.011048306258727e-06, + "loss": 0.7945176362991333, + "step": 4529 + }, + { + "epoch": 1.043778801843318, + "grad_norm": 1.0532791972453868, + "learning_rate": 1.01066734489028e-06, + "loss": 0.7246826887130737, + "step": 4530 + }, + { + "epoch": 1.0440092165898618, + "grad_norm": 1.230297656368, + "learning_rate": 1.0102863819734822e-06, + "loss": 0.7342358827590942, + "step": 4531 + }, + { + "epoch": 1.0442396313364055, + "grad_norm": 1.1072867148521375, + "learning_rate": 1.0099054175636292e-06, + "loss": 0.6837234497070312, + "step": 4532 + }, + { + "epoch": 1.0444700460829492, + "grad_norm": 0.8847188010063922, + "learning_rate": 1.0095244517160184e-06, + "loss": 0.6941408514976501, + "step": 4533 + }, + { + "epoch": 1.0447004608294932, + "grad_norm": 0.9992175314765978, + "learning_rate": 1.009143484485946e-06, + "loss": 0.7835201025009155, + "step": 4534 + }, + { + "epoch": 1.044930875576037, + "grad_norm": 1.1533173348493126, + "learning_rate": 1.0087625159287086e-06, + "loss": 0.7887566089630127, + "step": 4535 + }, + { + "epoch": 1.0451612903225806, + "grad_norm": 0.9980831932241371, + "learning_rate": 1.0083815460996036e-06, + "loss": 0.7106727361679077, + "step": 4536 + }, + { + "epoch": 1.0453917050691244, + "grad_norm": 1.1003103489016812, + "learning_rate": 1.0080005750539287e-06, + "loss": 0.8316382169723511, + "step": 4537 + }, + { + "epoch": 1.045622119815668, + "grad_norm": 1.278017855977623, + "learning_rate": 1.0076196028469805e-06, + "loss": 0.7535592317581177, + "step": 4538 + }, + { + "epoch": 1.045852534562212, + "grad_norm": 1.2167524484109087, + "learning_rate": 1.0072386295340571e-06, + "loss": 0.9255459308624268, + "step": 4539 + }, + { + "epoch": 1.0460829493087558, + "grad_norm": 0.9884104383515986, + "learning_rate": 1.0068576551704561e-06, + "loss": 0.7415009140968323, + "step": 4540 + }, + { + "epoch": 1.0463133640552995, + "grad_norm": 0.9221193872044946, + "learning_rate": 1.0064766798114758e-06, + "loss": 0.673210620880127, + "step": 4541 + }, + { + "epoch": 1.0465437788018432, + "grad_norm": 1.2907861596502346, + "learning_rate": 1.006095703512414e-06, + "loss": 0.7063118815422058, + "step": 4542 + }, + { + "epoch": 1.0467741935483872, + "grad_norm": 1.0344490200256125, + "learning_rate": 1.005714726328569e-06, + "loss": 0.73606276512146, + "step": 4543 + }, + { + "epoch": 1.047004608294931, + "grad_norm": 1.1024687809140408, + "learning_rate": 1.005333748315239e-06, + "loss": 0.6723713874816895, + "step": 4544 + }, + { + "epoch": 1.0472350230414746, + "grad_norm": 1.0566239460690536, + "learning_rate": 1.0049527695277223e-06, + "loss": 0.643845796585083, + "step": 4545 + }, + { + "epoch": 1.0474654377880184, + "grad_norm": 1.1196128686458957, + "learning_rate": 1.0045717900213175e-06, + "loss": 0.8820847272872925, + "step": 4546 + }, + { + "epoch": 1.047695852534562, + "grad_norm": 1.177142500227169, + "learning_rate": 1.0041908098513239e-06, + "loss": 0.6555176973342896, + "step": 4547 + }, + { + "epoch": 1.047926267281106, + "grad_norm": 1.4046987769414077, + "learning_rate": 1.0038098290730394e-06, + "loss": 0.8142974376678467, + "step": 4548 + }, + { + "epoch": 1.0481566820276498, + "grad_norm": 1.3843242800793498, + "learning_rate": 1.0034288477417634e-06, + "loss": 0.8107532262802124, + "step": 4549 + }, + { + "epoch": 1.0483870967741935, + "grad_norm": 1.093115680939654, + "learning_rate": 1.0030478659127947e-06, + "loss": 0.7078464031219482, + "step": 4550 + }, + { + "epoch": 1.0486175115207372, + "grad_norm": 1.3647000829373368, + "learning_rate": 1.0026668836414322e-06, + "loss": 0.9168295860290527, + "step": 4551 + }, + { + "epoch": 1.0488479262672812, + "grad_norm": 0.7154125463388302, + "learning_rate": 1.0022859009829752e-06, + "loss": 0.7384864091873169, + "step": 4552 + }, + { + "epoch": 1.049078341013825, + "grad_norm": 0.9459016715465385, + "learning_rate": 1.0019049179927229e-06, + "loss": 0.6092562675476074, + "step": 4553 + }, + { + "epoch": 1.0493087557603686, + "grad_norm": 1.159695075830992, + "learning_rate": 1.001523934725974e-06, + "loss": 0.713464617729187, + "step": 4554 + }, + { + "epoch": 1.0495391705069124, + "grad_norm": 0.9471368467961162, + "learning_rate": 1.001142951238028e-06, + "loss": 0.7514123916625977, + "step": 4555 + }, + { + "epoch": 1.0497695852534563, + "grad_norm": 1.1414214053095963, + "learning_rate": 1.000761967584184e-06, + "loss": 0.8092095851898193, + "step": 4556 + }, + { + "epoch": 1.05, + "grad_norm": 0.830509770117895, + "learning_rate": 1.000380983819742e-06, + "loss": 0.7609254717826843, + "step": 4557 + }, + { + "epoch": 1.0502304147465438, + "grad_norm": 0.8874333429433436, + "learning_rate": 1e-06, + "loss": 0.8363404273986816, + "step": 4558 + }, + { + "epoch": 1.0504608294930875, + "grad_norm": 1.1983399653767088, + "learning_rate": 9.996190161802584e-07, + "loss": 0.8139501810073853, + "step": 4559 + }, + { + "epoch": 1.0506912442396312, + "grad_norm": 0.8984420952696672, + "learning_rate": 9.992380324158157e-07, + "loss": 0.8064978122711182, + "step": 4560 + }, + { + "epoch": 1.0509216589861752, + "grad_norm": 0.9258651657418774, + "learning_rate": 9.988570487619721e-07, + "loss": 0.7162975072860718, + "step": 4561 + }, + { + "epoch": 1.051152073732719, + "grad_norm": 1.2196516767947119, + "learning_rate": 9.984760652740261e-07, + "loss": 0.9298074245452881, + "step": 4562 + }, + { + "epoch": 1.0513824884792626, + "grad_norm": 1.0770268299074148, + "learning_rate": 9.980950820072773e-07, + "loss": 0.6929144859313965, + "step": 4563 + }, + { + "epoch": 1.0516129032258064, + "grad_norm": 0.919564091111097, + "learning_rate": 9.97714099017025e-07, + "loss": 0.6516381502151489, + "step": 4564 + }, + { + "epoch": 1.0518433179723503, + "grad_norm": 1.091105354713726, + "learning_rate": 9.97333116358568e-07, + "loss": 0.864730715751648, + "step": 4565 + }, + { + "epoch": 1.052073732718894, + "grad_norm": 0.9113453911026408, + "learning_rate": 9.969521340872052e-07, + "loss": 0.7911246418952942, + "step": 4566 + }, + { + "epoch": 1.0523041474654378, + "grad_norm": 1.032556518691269, + "learning_rate": 9.965711522582367e-07, + "loss": 0.7766593098640442, + "step": 4567 + }, + { + "epoch": 1.0525345622119815, + "grad_norm": 1.1309615036566574, + "learning_rate": 9.961901709269607e-07, + "loss": 0.7703378200531006, + "step": 4568 + }, + { + "epoch": 1.0527649769585254, + "grad_norm": 0.9296180823184125, + "learning_rate": 9.958091901486762e-07, + "loss": 0.7068926692008972, + "step": 4569 + }, + { + "epoch": 1.0529953917050692, + "grad_norm": 1.0589255494911889, + "learning_rate": 9.954282099786824e-07, + "loss": 0.740556538105011, + "step": 4570 + }, + { + "epoch": 1.053225806451613, + "grad_norm": 1.1264720214776667, + "learning_rate": 9.950472304722778e-07, + "loss": 0.798403263092041, + "step": 4571 + }, + { + "epoch": 1.0534562211981566, + "grad_norm": 0.9551633921802427, + "learning_rate": 9.94666251684761e-07, + "loss": 0.6945887804031372, + "step": 4572 + }, + { + "epoch": 1.0536866359447004, + "grad_norm": 1.0978186377940822, + "learning_rate": 9.942852736714312e-07, + "loss": 0.8257915377616882, + "step": 4573 + }, + { + "epoch": 1.0539170506912443, + "grad_norm": 1.108870855150134, + "learning_rate": 9.939042964875859e-07, + "loss": 0.751315712928772, + "step": 4574 + }, + { + "epoch": 1.054147465437788, + "grad_norm": 0.8929134755319279, + "learning_rate": 9.935233201885241e-07, + "loss": 0.6607721447944641, + "step": 4575 + }, + { + "epoch": 1.0543778801843318, + "grad_norm": 1.1623094406064765, + "learning_rate": 9.931423448295438e-07, + "loss": 0.9135023355484009, + "step": 4576 + }, + { + "epoch": 1.0546082949308755, + "grad_norm": 1.1079901137426853, + "learning_rate": 9.927613704659428e-07, + "loss": 0.8238483667373657, + "step": 4577 + }, + { + "epoch": 1.0548387096774194, + "grad_norm": 1.0927838633299076, + "learning_rate": 9.923803971530196e-07, + "loss": 0.7657001614570618, + "step": 4578 + }, + { + "epoch": 1.0550691244239632, + "grad_norm": 1.0858899027259339, + "learning_rate": 9.919994249460717e-07, + "loss": 0.6360250115394592, + "step": 4579 + }, + { + "epoch": 1.055299539170507, + "grad_norm": 3.1983788784304843, + "learning_rate": 9.916184539003963e-07, + "loss": 0.6958763003349304, + "step": 4580 + }, + { + "epoch": 1.0555299539170506, + "grad_norm": 1.0079237517587447, + "learning_rate": 9.912374840712915e-07, + "loss": 0.7093038558959961, + "step": 4581 + }, + { + "epoch": 1.0557603686635946, + "grad_norm": 1.0680215254508902, + "learning_rate": 9.908565155140544e-07, + "loss": 0.7641304731369019, + "step": 4582 + }, + { + "epoch": 1.0559907834101383, + "grad_norm": 0.8923201066182703, + "learning_rate": 9.904755482839817e-07, + "loss": 0.7976446151733398, + "step": 4583 + }, + { + "epoch": 1.056221198156682, + "grad_norm": 1.0963737907088362, + "learning_rate": 9.900945824363707e-07, + "loss": 0.8407114744186401, + "step": 4584 + }, + { + "epoch": 1.0564516129032258, + "grad_norm": 1.0695401976763876, + "learning_rate": 9.897136180265181e-07, + "loss": 0.7988634705543518, + "step": 4585 + }, + { + "epoch": 1.0566820276497695, + "grad_norm": 1.072342293651018, + "learning_rate": 9.893326551097198e-07, + "loss": 0.7847359776496887, + "step": 4586 + }, + { + "epoch": 1.0569124423963134, + "grad_norm": 1.0629893453410204, + "learning_rate": 9.889516937412728e-07, + "loss": 0.8458963632583618, + "step": 4587 + }, + { + "epoch": 1.0571428571428572, + "grad_norm": 1.1301054626559641, + "learning_rate": 9.88570733976473e-07, + "loss": 0.8479788899421692, + "step": 4588 + }, + { + "epoch": 1.057373271889401, + "grad_norm": 1.180492999769349, + "learning_rate": 9.881897758706154e-07, + "loss": 0.7467283010482788, + "step": 4589 + }, + { + "epoch": 1.0576036866359446, + "grad_norm": 1.1676226241505752, + "learning_rate": 9.878088194789967e-07, + "loss": 0.9400098323822021, + "step": 4590 + }, + { + "epoch": 1.0578341013824886, + "grad_norm": 1.2151292863225376, + "learning_rate": 9.874278648569118e-07, + "loss": 0.8901257514953613, + "step": 4591 + }, + { + "epoch": 1.0580645161290323, + "grad_norm": 1.2956773767909102, + "learning_rate": 9.870469120596552e-07, + "loss": 0.840053379535675, + "step": 4592 + }, + { + "epoch": 1.058294930875576, + "grad_norm": 0.9938952111506293, + "learning_rate": 9.866659611425225e-07, + "loss": 0.6825235486030579, + "step": 4593 + }, + { + "epoch": 1.0585253456221198, + "grad_norm": 1.2521534530730631, + "learning_rate": 9.86285012160808e-07, + "loss": 0.7783857583999634, + "step": 4594 + }, + { + "epoch": 1.0587557603686637, + "grad_norm": 1.0517032997656734, + "learning_rate": 9.859040651698055e-07, + "loss": 0.7901174426078796, + "step": 4595 + }, + { + "epoch": 1.0589861751152074, + "grad_norm": 1.2211963787816231, + "learning_rate": 9.855231202248097e-07, + "loss": 0.9475124478340149, + "step": 4596 + }, + { + "epoch": 1.0592165898617512, + "grad_norm": 1.1872676544788658, + "learning_rate": 9.851421773811133e-07, + "loss": 0.8582692742347717, + "step": 4597 + }, + { + "epoch": 1.0594470046082949, + "grad_norm": 1.1723948726757356, + "learning_rate": 9.847612366940106e-07, + "loss": 0.7885586023330688, + "step": 4598 + }, + { + "epoch": 1.0596774193548386, + "grad_norm": 1.17635061110199, + "learning_rate": 9.843802982187943e-07, + "loss": 0.7981748580932617, + "step": 4599 + }, + { + "epoch": 1.0599078341013826, + "grad_norm": 0.9066343519689628, + "learning_rate": 9.839993620107563e-07, + "loss": 0.7060403823852539, + "step": 4600 + }, + { + "epoch": 1.0601382488479263, + "grad_norm": 1.2126688495293467, + "learning_rate": 9.836184281251905e-07, + "loss": 0.7902223467826843, + "step": 4601 + }, + { + "epoch": 1.06036866359447, + "grad_norm": 0.9972491115312556, + "learning_rate": 9.83237496617388e-07, + "loss": 0.7074719071388245, + "step": 4602 + }, + { + "epoch": 1.0605990783410137, + "grad_norm": 0.9455936494800175, + "learning_rate": 9.828565675426405e-07, + "loss": 0.7180163264274597, + "step": 4603 + }, + { + "epoch": 1.0608294930875577, + "grad_norm": 0.8990997781996365, + "learning_rate": 9.824756409562397e-07, + "loss": 0.7040787935256958, + "step": 4604 + }, + { + "epoch": 1.0610599078341014, + "grad_norm": 1.0311368456712493, + "learning_rate": 9.820947169134765e-07, + "loss": 0.8387063145637512, + "step": 4605 + }, + { + "epoch": 1.0612903225806452, + "grad_norm": 1.0692817612993422, + "learning_rate": 9.81713795469641e-07, + "loss": 0.8587188124656677, + "step": 4606 + }, + { + "epoch": 1.0615207373271889, + "grad_norm": 1.0418289468184643, + "learning_rate": 9.813328766800242e-07, + "loss": 0.729094386100769, + "step": 4607 + }, + { + "epoch": 1.0617511520737328, + "grad_norm": 1.1884134090864242, + "learning_rate": 9.809519605999158e-07, + "loss": 1.0576609373092651, + "step": 4608 + }, + { + "epoch": 1.0619815668202766, + "grad_norm": 1.1124938149620707, + "learning_rate": 9.805710472846044e-07, + "loss": 0.7605572938919067, + "step": 4609 + }, + { + "epoch": 1.0622119815668203, + "grad_norm": 0.9566684121068049, + "learning_rate": 9.801901367893807e-07, + "loss": 0.722477912902832, + "step": 4610 + }, + { + "epoch": 1.062442396313364, + "grad_norm": 0.9185071862681494, + "learning_rate": 9.79809229169532e-07, + "loss": 0.7335925698280334, + "step": 4611 + }, + { + "epoch": 1.0626728110599077, + "grad_norm": 1.0494538531790283, + "learning_rate": 9.794283244803466e-07, + "loss": 0.8116357922554016, + "step": 4612 + }, + { + "epoch": 1.0629032258064517, + "grad_norm": 1.0519905027101895, + "learning_rate": 9.79047422777113e-07, + "loss": 0.8004311323165894, + "step": 4613 + }, + { + "epoch": 1.0631336405529954, + "grad_norm": 0.9803128568921189, + "learning_rate": 9.786665241151185e-07, + "loss": 0.8198168277740479, + "step": 4614 + }, + { + "epoch": 1.0633640552995391, + "grad_norm": 0.9841178854805237, + "learning_rate": 9.782856285496494e-07, + "loss": 0.7031205892562866, + "step": 4615 + }, + { + "epoch": 1.0635944700460829, + "grad_norm": 1.055262322588535, + "learning_rate": 9.779047361359928e-07, + "loss": 0.7303737998008728, + "step": 4616 + }, + { + "epoch": 1.0638248847926268, + "grad_norm": 1.1694198331033647, + "learning_rate": 9.775238469294345e-07, + "loss": 0.8775424957275391, + "step": 4617 + }, + { + "epoch": 1.0640552995391706, + "grad_norm": 0.9013154484602001, + "learning_rate": 9.771429609852597e-07, + "loss": 0.7463759183883667, + "step": 4618 + }, + { + "epoch": 1.0642857142857143, + "grad_norm": 0.8792691967623277, + "learning_rate": 9.767620783587542e-07, + "loss": 0.7200205326080322, + "step": 4619 + }, + { + "epoch": 1.064516129032258, + "grad_norm": 0.9102194522316246, + "learning_rate": 9.763811991052019e-07, + "loss": 0.8255786299705505, + "step": 4620 + }, + { + "epoch": 1.064746543778802, + "grad_norm": 1.2552865619465912, + "learning_rate": 9.760003232798877e-07, + "loss": 0.7975195050239563, + "step": 4621 + }, + { + "epoch": 1.0649769585253457, + "grad_norm": 0.9993977940644363, + "learning_rate": 9.756194509380948e-07, + "loss": 0.6993064880371094, + "step": 4622 + }, + { + "epoch": 1.0652073732718894, + "grad_norm": 1.314757658160511, + "learning_rate": 9.752385821351062e-07, + "loss": 0.818634033203125, + "step": 4623 + }, + { + "epoch": 1.0654377880184331, + "grad_norm": 1.0949894149977886, + "learning_rate": 9.748577169262046e-07, + "loss": 0.707933783531189, + "step": 4624 + }, + { + "epoch": 1.0656682027649769, + "grad_norm": 1.1439419332653986, + "learning_rate": 9.744768553666723e-07, + "loss": 0.8133440017700195, + "step": 4625 + }, + { + "epoch": 1.0658986175115208, + "grad_norm": 1.1394394770433072, + "learning_rate": 9.740959975117901e-07, + "loss": 0.8818857669830322, + "step": 4626 + }, + { + "epoch": 1.0661290322580645, + "grad_norm": 0.9617616601353652, + "learning_rate": 9.737151434168402e-07, + "loss": 0.6057544946670532, + "step": 4627 + }, + { + "epoch": 1.0663594470046083, + "grad_norm": 1.047486055121172, + "learning_rate": 9.733342931371023e-07, + "loss": 0.7560185194015503, + "step": 4628 + }, + { + "epoch": 1.066589861751152, + "grad_norm": 1.233360971442642, + "learning_rate": 9.72953446727856e-07, + "loss": 0.8196524381637573, + "step": 4629 + }, + { + "epoch": 1.066820276497696, + "grad_norm": 1.031309795003994, + "learning_rate": 9.725726042443814e-07, + "loss": 0.8695862889289856, + "step": 4630 + }, + { + "epoch": 1.0670506912442397, + "grad_norm": 0.9769847065094724, + "learning_rate": 9.721917657419573e-07, + "loss": 0.7753207683563232, + "step": 4631 + }, + { + "epoch": 1.0672811059907834, + "grad_norm": 1.0908524037443617, + "learning_rate": 9.718109312758612e-07, + "loss": 0.8245481252670288, + "step": 4632 + }, + { + "epoch": 1.0675115207373271, + "grad_norm": 1.201628166799481, + "learning_rate": 9.71430100901371e-07, + "loss": 0.8654806613922119, + "step": 4633 + }, + { + "epoch": 1.067741935483871, + "grad_norm": 1.22982718965067, + "learning_rate": 9.710492746737642e-07, + "loss": 0.8667370080947876, + "step": 4634 + }, + { + "epoch": 1.0679723502304148, + "grad_norm": 1.2635323967888392, + "learning_rate": 9.706684526483167e-07, + "loss": 0.7786421775817871, + "step": 4635 + }, + { + "epoch": 1.0682027649769585, + "grad_norm": 1.037203898616246, + "learning_rate": 9.702876348803045e-07, + "loss": 0.7788090705871582, + "step": 4636 + }, + { + "epoch": 1.0684331797235023, + "grad_norm": 1.1815160856137523, + "learning_rate": 9.69906821425003e-07, + "loss": 0.812332034111023, + "step": 4637 + }, + { + "epoch": 1.068663594470046, + "grad_norm": 1.2578908038434822, + "learning_rate": 9.69526012337686e-07, + "loss": 0.7884202599525452, + "step": 4638 + }, + { + "epoch": 1.06889400921659, + "grad_norm": 1.0539526708204177, + "learning_rate": 9.69145207673628e-07, + "loss": 0.725990891456604, + "step": 4639 + }, + { + "epoch": 1.0691244239631337, + "grad_norm": 1.01343921612526, + "learning_rate": 9.687644074881028e-07, + "loss": 0.7277272343635559, + "step": 4640 + }, + { + "epoch": 1.0693548387096774, + "grad_norm": 1.0871506025213427, + "learning_rate": 9.683836118363818e-07, + "loss": 0.8081945180892944, + "step": 4641 + }, + { + "epoch": 1.0695852534562211, + "grad_norm": 1.1050642405984226, + "learning_rate": 9.680028207737383e-07, + "loss": 0.8633503913879395, + "step": 4642 + }, + { + "epoch": 1.069815668202765, + "grad_norm": 0.9415461517108813, + "learning_rate": 9.67622034355443e-07, + "loss": 0.7873313426971436, + "step": 4643 + }, + { + "epoch": 1.0700460829493088, + "grad_norm": 1.269353126640295, + "learning_rate": 9.67241252636766e-07, + "loss": 0.7927644848823547, + "step": 4644 + }, + { + "epoch": 1.0702764976958525, + "grad_norm": 1.395156348091843, + "learning_rate": 9.668604756729784e-07, + "loss": 0.9458138942718506, + "step": 4645 + }, + { + "epoch": 1.0705069124423963, + "grad_norm": 1.2621680271291411, + "learning_rate": 9.664797035193484e-07, + "loss": 0.7471280097961426, + "step": 4646 + }, + { + "epoch": 1.07073732718894, + "grad_norm": 1.0373772164844823, + "learning_rate": 9.660989362311455e-07, + "loss": 0.7666789293289185, + "step": 4647 + }, + { + "epoch": 1.070967741935484, + "grad_norm": 0.8355654249705468, + "learning_rate": 9.65718173863637e-07, + "loss": 0.7846331000328064, + "step": 4648 + }, + { + "epoch": 1.0711981566820277, + "grad_norm": 1.1393955111251446, + "learning_rate": 9.653374164720897e-07, + "loss": 0.7790371179580688, + "step": 4649 + }, + { + "epoch": 1.0714285714285714, + "grad_norm": 1.110758470727215, + "learning_rate": 9.64956664111771e-07, + "loss": 0.9056169986724854, + "step": 4650 + }, + { + "epoch": 1.0716589861751151, + "grad_norm": 0.84240400487228, + "learning_rate": 9.645759168379461e-07, + "loss": 0.6839256286621094, + "step": 4651 + }, + { + "epoch": 1.071889400921659, + "grad_norm": 1.377334701305697, + "learning_rate": 9.641951747058799e-07, + "loss": 0.7071784138679504, + "step": 4652 + }, + { + "epoch": 1.0721198156682028, + "grad_norm": 1.1683127374870803, + "learning_rate": 9.638144377708366e-07, + "loss": 0.8166929483413696, + "step": 4653 + }, + { + "epoch": 1.0723502304147465, + "grad_norm": 1.239204160701412, + "learning_rate": 9.6343370608808e-07, + "loss": 0.8013010621070862, + "step": 4654 + }, + { + "epoch": 1.0725806451612903, + "grad_norm": 1.0825444957318084, + "learning_rate": 9.630529797128722e-07, + "loss": 0.8157169818878174, + "step": 4655 + }, + { + "epoch": 1.072811059907834, + "grad_norm": 1.0890180382455945, + "learning_rate": 9.626722587004758e-07, + "loss": 0.6467397212982178, + "step": 4656 + }, + { + "epoch": 1.073041474654378, + "grad_norm": 0.840613071204114, + "learning_rate": 9.622915431061519e-07, + "loss": 0.6623806953430176, + "step": 4657 + }, + { + "epoch": 1.0732718894009217, + "grad_norm": 0.9242647901691624, + "learning_rate": 9.619108329851596e-07, + "loss": 0.8333703279495239, + "step": 4658 + }, + { + "epoch": 1.0735023041474654, + "grad_norm": 1.1552752606597634, + "learning_rate": 9.615301283927603e-07, + "loss": 0.8798840045928955, + "step": 4659 + }, + { + "epoch": 1.0737327188940091, + "grad_norm": 1.1547075721097313, + "learning_rate": 9.611494293842119e-07, + "loss": 0.8712242841720581, + "step": 4660 + }, + { + "epoch": 1.073963133640553, + "grad_norm": 1.030127804248938, + "learning_rate": 9.60768736014772e-07, + "loss": 0.720801591873169, + "step": 4661 + }, + { + "epoch": 1.0741935483870968, + "grad_norm": 1.0305643381766019, + "learning_rate": 9.603880483396983e-07, + "loss": 0.7974982857704163, + "step": 4662 + }, + { + "epoch": 1.0744239631336405, + "grad_norm": 1.1569753217458012, + "learning_rate": 9.600073664142471e-07, + "loss": 0.7656542062759399, + "step": 4663 + }, + { + "epoch": 1.0746543778801843, + "grad_norm": 1.2831377014983525, + "learning_rate": 9.596266902936737e-07, + "loss": 0.8274385333061218, + "step": 4664 + }, + { + "epoch": 1.0748847926267282, + "grad_norm": 1.1261587516242995, + "learning_rate": 9.592460200332328e-07, + "loss": 0.6508798599243164, + "step": 4665 + }, + { + "epoch": 1.075115207373272, + "grad_norm": 0.8712727383997491, + "learning_rate": 9.588653556881781e-07, + "loss": 0.6393407583236694, + "step": 4666 + }, + { + "epoch": 1.0753456221198157, + "grad_norm": 0.8300127743505744, + "learning_rate": 9.58484697313762e-07, + "loss": 0.7857781052589417, + "step": 4667 + }, + { + "epoch": 1.0755760368663594, + "grad_norm": 1.0591582120645788, + "learning_rate": 9.58104044965238e-07, + "loss": 0.7433615922927856, + "step": 4668 + }, + { + "epoch": 1.0758064516129031, + "grad_norm": 0.9252765779736452, + "learning_rate": 9.57723398697856e-07, + "loss": 0.6694349646568298, + "step": 4669 + }, + { + "epoch": 1.076036866359447, + "grad_norm": 1.06633744555344, + "learning_rate": 9.573427585668664e-07, + "loss": 0.7849506735801697, + "step": 4670 + }, + { + "epoch": 1.0762672811059908, + "grad_norm": 0.948086558097784, + "learning_rate": 9.569621246275194e-07, + "loss": 0.5924462080001831, + "step": 4671 + }, + { + "epoch": 1.0764976958525345, + "grad_norm": 1.0764379613448063, + "learning_rate": 9.565814969350628e-07, + "loss": 0.7679359316825867, + "step": 4672 + }, + { + "epoch": 1.0767281105990782, + "grad_norm": 0.8770076747846444, + "learning_rate": 9.562008755447444e-07, + "loss": 0.803286612033844, + "step": 4673 + }, + { + "epoch": 1.0769585253456222, + "grad_norm": 0.9139287879253918, + "learning_rate": 9.558202605118112e-07, + "loss": 0.6302975416183472, + "step": 4674 + }, + { + "epoch": 1.077188940092166, + "grad_norm": 1.1929014758233443, + "learning_rate": 9.554396518915085e-07, + "loss": 0.7441667914390564, + "step": 4675 + }, + { + "epoch": 1.0774193548387097, + "grad_norm": 1.1469726623234646, + "learning_rate": 9.550590497390815e-07, + "loss": 0.805221438407898, + "step": 4676 + }, + { + "epoch": 1.0776497695852534, + "grad_norm": 1.1540692428304171, + "learning_rate": 9.54678454109774e-07, + "loss": 0.9557743072509766, + "step": 4677 + }, + { + "epoch": 1.0778801843317973, + "grad_norm": 1.0781366924036009, + "learning_rate": 9.542978650588284e-07, + "loss": 0.7361980080604553, + "step": 4678 + }, + { + "epoch": 1.078110599078341, + "grad_norm": 1.2143012487351885, + "learning_rate": 9.539172826414876e-07, + "loss": 0.7474843263626099, + "step": 4679 + }, + { + "epoch": 1.0783410138248848, + "grad_norm": 1.0143818885553835, + "learning_rate": 9.535367069129923e-07, + "loss": 0.595927357673645, + "step": 4680 + }, + { + "epoch": 1.0785714285714285, + "grad_norm": 1.1128254146821686, + "learning_rate": 9.531561379285818e-07, + "loss": 0.894598126411438, + "step": 4681 + }, + { + "epoch": 1.0788018433179722, + "grad_norm": 1.3233034879697116, + "learning_rate": 9.527755757434966e-07, + "loss": 0.915902853012085, + "step": 4682 + }, + { + "epoch": 1.0790322580645162, + "grad_norm": 1.3436084997047495, + "learning_rate": 9.523950204129739e-07, + "loss": 0.8670432567596436, + "step": 4683 + }, + { + "epoch": 1.07926267281106, + "grad_norm": 1.119487791223308, + "learning_rate": 9.520144719922508e-07, + "loss": 0.7829893231391907, + "step": 4684 + }, + { + "epoch": 1.0794930875576036, + "grad_norm": 1.1633745895382166, + "learning_rate": 9.516339305365638e-07, + "loss": 0.6584970951080322, + "step": 4685 + }, + { + "epoch": 1.0797235023041474, + "grad_norm": 1.0240703451548752, + "learning_rate": 9.512533961011478e-07, + "loss": 0.7853457927703857, + "step": 4686 + }, + { + "epoch": 1.0799539170506913, + "grad_norm": 0.8755927642296618, + "learning_rate": 9.508728687412364e-07, + "loss": 0.7890632152557373, + "step": 4687 + }, + { + "epoch": 1.080184331797235, + "grad_norm": 1.1475809434863895, + "learning_rate": 9.504923485120634e-07, + "loss": 0.8281408548355103, + "step": 4688 + }, + { + "epoch": 1.0804147465437788, + "grad_norm": 0.9222741947208914, + "learning_rate": 9.501118354688605e-07, + "loss": 0.7878601551055908, + "step": 4689 + }, + { + "epoch": 1.0806451612903225, + "grad_norm": 1.3827368592572105, + "learning_rate": 9.497313296668582e-07, + "loss": 0.8332592844963074, + "step": 4690 + }, + { + "epoch": 1.0808755760368665, + "grad_norm": 1.0564274993228098, + "learning_rate": 9.493508311612874e-07, + "loss": 0.7680759429931641, + "step": 4691 + }, + { + "epoch": 1.0811059907834102, + "grad_norm": 0.9446139934289677, + "learning_rate": 9.489703400073762e-07, + "loss": 0.6368690729141235, + "step": 4692 + }, + { + "epoch": 1.081336405529954, + "grad_norm": 1.1588361552017052, + "learning_rate": 9.485898562603525e-07, + "loss": 0.7018477916717529, + "step": 4693 + }, + { + "epoch": 1.0815668202764976, + "grad_norm": 1.057066552712669, + "learning_rate": 9.482093799754432e-07, + "loss": 0.8494987487792969, + "step": 4694 + }, + { + "epoch": 1.0817972350230414, + "grad_norm": 1.0119994692546468, + "learning_rate": 9.478289112078736e-07, + "loss": 0.8146306276321411, + "step": 4695 + }, + { + "epoch": 1.0820276497695853, + "grad_norm": 1.054771760893497, + "learning_rate": 9.474484500128689e-07, + "loss": 0.7832612991333008, + "step": 4696 + }, + { + "epoch": 1.082258064516129, + "grad_norm": 1.0487197763357414, + "learning_rate": 9.470679964456519e-07, + "loss": 0.8569360971450806, + "step": 4697 + }, + { + "epoch": 1.0824884792626728, + "grad_norm": 1.1432115985173055, + "learning_rate": 9.466875505614449e-07, + "loss": 0.8145112991333008, + "step": 4698 + }, + { + "epoch": 1.0827188940092165, + "grad_norm": 1.0578814317560323, + "learning_rate": 9.463071124154697e-07, + "loss": 0.6632689237594604, + "step": 4699 + }, + { + "epoch": 1.0829493087557605, + "grad_norm": 1.1233922356996344, + "learning_rate": 9.459266820629461e-07, + "loss": 0.6299769878387451, + "step": 4700 + }, + { + "epoch": 1.0831797235023042, + "grad_norm": 1.0275349813599226, + "learning_rate": 9.455462595590925e-07, + "loss": 0.7722063064575195, + "step": 4701 + }, + { + "epoch": 1.083410138248848, + "grad_norm": 1.2023285008908922, + "learning_rate": 9.451658449591278e-07, + "loss": 0.8219027519226074, + "step": 4702 + }, + { + "epoch": 1.0836405529953916, + "grad_norm": 1.1618110682341312, + "learning_rate": 9.44785438318268e-07, + "loss": 0.9078400731086731, + "step": 4703 + }, + { + "epoch": 1.0838709677419356, + "grad_norm": 1.087404948952653, + "learning_rate": 9.444050396917286e-07, + "loss": 0.8062041997909546, + "step": 4704 + }, + { + "epoch": 1.0841013824884793, + "grad_norm": 0.9599318157385525, + "learning_rate": 9.440246491347242e-07, + "loss": 0.6379001140594482, + "step": 4705 + }, + { + "epoch": 1.084331797235023, + "grad_norm": 1.179840039843376, + "learning_rate": 9.436442667024679e-07, + "loss": 0.919986367225647, + "step": 4706 + }, + { + "epoch": 1.0845622119815668, + "grad_norm": 1.025427308273649, + "learning_rate": 9.432638924501715e-07, + "loss": 0.6534138917922974, + "step": 4707 + }, + { + "epoch": 1.0847926267281105, + "grad_norm": 1.1537368190719173, + "learning_rate": 9.428835264330462e-07, + "loss": 0.8340045809745789, + "step": 4708 + }, + { + "epoch": 1.0850230414746544, + "grad_norm": 1.2598648406656967, + "learning_rate": 9.425031687063014e-07, + "loss": 0.8347625732421875, + "step": 4709 + }, + { + "epoch": 1.0852534562211982, + "grad_norm": 1.080310831214647, + "learning_rate": 9.421228193251452e-07, + "loss": 0.807063639163971, + "step": 4710 + }, + { + "epoch": 1.085483870967742, + "grad_norm": 0.8480154931503633, + "learning_rate": 9.417424783447855e-07, + "loss": 0.7375985383987427, + "step": 4711 + }, + { + "epoch": 1.0857142857142856, + "grad_norm": 0.9219258926876724, + "learning_rate": 9.413621458204281e-07, + "loss": 0.5723168849945068, + "step": 4712 + }, + { + "epoch": 1.0859447004608296, + "grad_norm": 1.20469026899904, + "learning_rate": 9.409818218072772e-07, + "loss": 0.8272668123245239, + "step": 4713 + }, + { + "epoch": 1.0861751152073733, + "grad_norm": 1.0744380351617728, + "learning_rate": 9.406015063605368e-07, + "loss": 0.6400803327560425, + "step": 4714 + }, + { + "epoch": 1.086405529953917, + "grad_norm": 0.9959690478635643, + "learning_rate": 9.402211995354095e-07, + "loss": 0.6829795837402344, + "step": 4715 + }, + { + "epoch": 1.0866359447004608, + "grad_norm": 1.0434747079590168, + "learning_rate": 9.398409013870954e-07, + "loss": 0.8509865999221802, + "step": 4716 + }, + { + "epoch": 1.0868663594470047, + "grad_norm": 1.0730582514021882, + "learning_rate": 9.394606119707954e-07, + "loss": 0.895818829536438, + "step": 4717 + }, + { + "epoch": 1.0870967741935484, + "grad_norm": 1.2584943519033869, + "learning_rate": 9.390803313417072e-07, + "loss": 0.8534268140792847, + "step": 4718 + }, + { + "epoch": 1.0873271889400922, + "grad_norm": 1.0910485662903118, + "learning_rate": 9.38700059555028e-07, + "loss": 0.8603401184082031, + "step": 4719 + }, + { + "epoch": 1.087557603686636, + "grad_norm": 1.1060380385520165, + "learning_rate": 9.383197966659542e-07, + "loss": 0.8810417652130127, + "step": 4720 + }, + { + "epoch": 1.0877880184331796, + "grad_norm": 1.078874247367276, + "learning_rate": 9.3793954272968e-07, + "loss": 0.7144299149513245, + "step": 4721 + }, + { + "epoch": 1.0880184331797236, + "grad_norm": 1.3140311568193026, + "learning_rate": 9.375592978013994e-07, + "loss": 0.8780069351196289, + "step": 4722 + }, + { + "epoch": 1.0882488479262673, + "grad_norm": 1.1329108063995987, + "learning_rate": 9.371790619363041e-07, + "loss": 0.7976780533790588, + "step": 4723 + }, + { + "epoch": 1.088479262672811, + "grad_norm": 1.0979402846559465, + "learning_rate": 9.367988351895846e-07, + "loss": 0.9183385372161865, + "step": 4724 + }, + { + "epoch": 1.0887096774193548, + "grad_norm": 1.0551038276717553, + "learning_rate": 9.364186176164306e-07, + "loss": 0.7891188859939575, + "step": 4725 + }, + { + "epoch": 1.0889400921658987, + "grad_norm": 0.9930223107211231, + "learning_rate": 9.360384092720301e-07, + "loss": 0.7586535215377808, + "step": 4726 + }, + { + "epoch": 1.0891705069124424, + "grad_norm": 1.1542507976324667, + "learning_rate": 9.356582102115696e-07, + "loss": 0.7915316224098206, + "step": 4727 + }, + { + "epoch": 1.0894009216589862, + "grad_norm": 0.901378484170352, + "learning_rate": 9.352780204902349e-07, + "loss": 0.6608257293701172, + "step": 4728 + }, + { + "epoch": 1.08963133640553, + "grad_norm": 1.1982692712799377, + "learning_rate": 9.3489784016321e-07, + "loss": 0.8375273942947388, + "step": 4729 + }, + { + "epoch": 1.0898617511520738, + "grad_norm": 1.43591815259741, + "learning_rate": 9.345176692856768e-07, + "loss": 0.7629055976867676, + "step": 4730 + }, + { + "epoch": 1.0900921658986176, + "grad_norm": 1.3741081876453818, + "learning_rate": 9.341375079128177e-07, + "loss": 0.8037875890731812, + "step": 4731 + }, + { + "epoch": 1.0903225806451613, + "grad_norm": 1.1252370555828741, + "learning_rate": 9.337573560998123e-07, + "loss": 0.8843437433242798, + "step": 4732 + }, + { + "epoch": 1.090552995391705, + "grad_norm": 1.058447534132799, + "learning_rate": 9.333772139018387e-07, + "loss": 0.7164910435676575, + "step": 4733 + }, + { + "epoch": 1.0907834101382488, + "grad_norm": 1.144703504042011, + "learning_rate": 9.329970813740742e-07, + "loss": 0.8076978921890259, + "step": 4734 + }, + { + "epoch": 1.0910138248847927, + "grad_norm": 1.091507904535434, + "learning_rate": 9.326169585716949e-07, + "loss": 0.7265340089797974, + "step": 4735 + }, + { + "epoch": 1.0912442396313364, + "grad_norm": 0.9010611551057135, + "learning_rate": 9.322368455498747e-07, + "loss": 0.7438681125640869, + "step": 4736 + }, + { + "epoch": 1.0914746543778802, + "grad_norm": 1.455573835192626, + "learning_rate": 9.318567423637868e-07, + "loss": 0.8760604858398438, + "step": 4737 + }, + { + "epoch": 1.0917050691244239, + "grad_norm": 1.064698472707054, + "learning_rate": 9.314766490686026e-07, + "loss": 0.7216911315917969, + "step": 4738 + }, + { + "epoch": 1.0919354838709678, + "grad_norm": 1.207051606070953, + "learning_rate": 9.310965657194916e-07, + "loss": 0.8003707528114319, + "step": 4739 + }, + { + "epoch": 1.0921658986175116, + "grad_norm": 0.9484074376515712, + "learning_rate": 9.307164923716233e-07, + "loss": 0.6496548652648926, + "step": 4740 + }, + { + "epoch": 1.0923963133640553, + "grad_norm": 1.0304975730869472, + "learning_rate": 9.303364290801644e-07, + "loss": 0.7659108638763428, + "step": 4741 + }, + { + "epoch": 1.092626728110599, + "grad_norm": 1.016478094690519, + "learning_rate": 9.299563759002802e-07, + "loss": 0.7799512147903442, + "step": 4742 + }, + { + "epoch": 1.092857142857143, + "grad_norm": 0.9921566283768914, + "learning_rate": 9.295763328871357e-07, + "loss": 0.7675691246986389, + "step": 4743 + }, + { + "epoch": 1.0930875576036867, + "grad_norm": 1.0513054078420998, + "learning_rate": 9.291963000958931e-07, + "loss": 0.677080512046814, + "step": 4744 + }, + { + "epoch": 1.0933179723502304, + "grad_norm": 1.0842277521538888, + "learning_rate": 9.28816277581714e-07, + "loss": 0.7885928153991699, + "step": 4745 + }, + { + "epoch": 1.0935483870967742, + "grad_norm": 1.07543209238493, + "learning_rate": 9.28436265399758e-07, + "loss": 0.6568010449409485, + "step": 4746 + }, + { + "epoch": 1.0937788018433179, + "grad_norm": 1.076830779801181, + "learning_rate": 9.280562636051827e-07, + "loss": 0.9438225030899048, + "step": 4747 + }, + { + "epoch": 1.0940092165898618, + "grad_norm": 1.0420094595322553, + "learning_rate": 9.276762722531461e-07, + "loss": 0.8119498491287231, + "step": 4748 + }, + { + "epoch": 1.0942396313364056, + "grad_norm": 0.8228863679585698, + "learning_rate": 9.272962913988029e-07, + "loss": 0.7570452690124512, + "step": 4749 + }, + { + "epoch": 1.0944700460829493, + "grad_norm": 1.0990726312613297, + "learning_rate": 9.269163210973063e-07, + "loss": 0.7541190385818481, + "step": 4750 + }, + { + "epoch": 1.094700460829493, + "grad_norm": 1.015570437282189, + "learning_rate": 9.265363614038093e-07, + "loss": 0.6481921672821045, + "step": 4751 + }, + { + "epoch": 1.094930875576037, + "grad_norm": 1.1173263478947815, + "learning_rate": 9.261564123734623e-07, + "loss": 0.7997267246246338, + "step": 4752 + }, + { + "epoch": 1.0951612903225807, + "grad_norm": 1.4388540160892265, + "learning_rate": 9.25776474061414e-07, + "loss": 0.9093008637428284, + "step": 4753 + }, + { + "epoch": 1.0953917050691244, + "grad_norm": 1.3909093606880625, + "learning_rate": 9.253965465228122e-07, + "loss": 0.7609673142433167, + "step": 4754 + }, + { + "epoch": 1.0956221198156681, + "grad_norm": 1.311027419629587, + "learning_rate": 9.250166298128032e-07, + "loss": 0.8338878154754639, + "step": 4755 + }, + { + "epoch": 1.095852534562212, + "grad_norm": 1.1912490488387477, + "learning_rate": 9.246367239865308e-07, + "loss": 0.7503781318664551, + "step": 4756 + }, + { + "epoch": 1.0960829493087558, + "grad_norm": 1.0417471668794835, + "learning_rate": 9.242568290991384e-07, + "loss": 0.7630816698074341, + "step": 4757 + }, + { + "epoch": 1.0963133640552996, + "grad_norm": 1.4287601409586015, + "learning_rate": 9.238769452057671e-07, + "loss": 0.8026378154754639, + "step": 4758 + }, + { + "epoch": 1.0965437788018433, + "grad_norm": 1.0309152969100308, + "learning_rate": 9.234970723615558e-07, + "loss": 0.8256090879440308, + "step": 4759 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 1.1197681925892131, + "learning_rate": 9.231172106216437e-07, + "loss": 0.7331836223602295, + "step": 4760 + }, + { + "epoch": 1.097004608294931, + "grad_norm": 1.1300301361381715, + "learning_rate": 9.227373600411667e-07, + "loss": 0.886203944683075, + "step": 4761 + }, + { + "epoch": 1.0972350230414747, + "grad_norm": 1.113695044174903, + "learning_rate": 9.223575206752592e-07, + "loss": 0.7802814245223999, + "step": 4762 + }, + { + "epoch": 1.0974654377880184, + "grad_norm": 1.3075634566953063, + "learning_rate": 9.219776925790552e-07, + "loss": 0.9682798385620117, + "step": 4763 + }, + { + "epoch": 1.0976958525345621, + "grad_norm": 1.1689607681364365, + "learning_rate": 9.215978758076858e-07, + "loss": 0.8733793497085571, + "step": 4764 + }, + { + "epoch": 1.097926267281106, + "grad_norm": 1.0890238577837303, + "learning_rate": 9.212180704162809e-07, + "loss": 0.8403818607330322, + "step": 4765 + }, + { + "epoch": 1.0981566820276498, + "grad_norm": 1.0898706001284595, + "learning_rate": 9.208382764599688e-07, + "loss": 0.7957059144973755, + "step": 4766 + }, + { + "epoch": 1.0983870967741935, + "grad_norm": 1.290224136897281, + "learning_rate": 9.204584939938761e-07, + "loss": 0.8943477272987366, + "step": 4767 + }, + { + "epoch": 1.0986175115207373, + "grad_norm": 1.0710230295284595, + "learning_rate": 9.200787230731273e-07, + "loss": 0.7084406018257141, + "step": 4768 + }, + { + "epoch": 1.098847926267281, + "grad_norm": 1.190836398847277, + "learning_rate": 9.196989637528465e-07, + "loss": 0.8374637365341187, + "step": 4769 + }, + { + "epoch": 1.099078341013825, + "grad_norm": 1.3757022429132086, + "learning_rate": 9.193192160881543e-07, + "loss": 0.6963578462600708, + "step": 4770 + }, + { + "epoch": 1.0993087557603687, + "grad_norm": 0.9887346096468936, + "learning_rate": 9.189394801341716e-07, + "loss": 0.6732540130615234, + "step": 4771 + }, + { + "epoch": 1.0995391705069124, + "grad_norm": 1.092710990198668, + "learning_rate": 9.185597559460159e-07, + "loss": 0.7104849219322205, + "step": 4772 + }, + { + "epoch": 1.0997695852534561, + "grad_norm": 1.3885045688613133, + "learning_rate": 9.181800435788037e-07, + "loss": 0.8461153507232666, + "step": 4773 + }, + { + "epoch": 1.1, + "grad_norm": 1.0447899457724443, + "learning_rate": 9.178003430876502e-07, + "loss": 0.7120847105979919, + "step": 4774 + }, + { + "epoch": 1.1002304147465438, + "grad_norm": 1.0881207229188647, + "learning_rate": 9.174206545276677e-07, + "loss": 0.8108617067337036, + "step": 4775 + }, + { + "epoch": 1.1004608294930875, + "grad_norm": 0.9153115264713604, + "learning_rate": 9.170409779539678e-07, + "loss": 0.7019558548927307, + "step": 4776 + }, + { + "epoch": 1.1006912442396313, + "grad_norm": 0.9272452690627847, + "learning_rate": 9.166613134216605e-07, + "loss": 0.7563629150390625, + "step": 4777 + }, + { + "epoch": 1.100921658986175, + "grad_norm": 0.9795708897837844, + "learning_rate": 9.162816609858533e-07, + "loss": 0.777009129524231, + "step": 4778 + }, + { + "epoch": 1.101152073732719, + "grad_norm": 1.143317572483065, + "learning_rate": 9.159020207016516e-07, + "loss": 0.812334418296814, + "step": 4779 + }, + { + "epoch": 1.1013824884792627, + "grad_norm": 0.8685579046345627, + "learning_rate": 9.155223926241608e-07, + "loss": 0.609114408493042, + "step": 4780 + }, + { + "epoch": 1.1016129032258064, + "grad_norm": 1.1689773804888128, + "learning_rate": 9.151427768084828e-07, + "loss": 0.8277549147605896, + "step": 4781 + }, + { + "epoch": 1.1018433179723501, + "grad_norm": 1.2556834532396843, + "learning_rate": 9.147631733097179e-07, + "loss": 0.8649400472640991, + "step": 4782 + }, + { + "epoch": 1.102073732718894, + "grad_norm": 0.8878271909604711, + "learning_rate": 9.14383582182966e-07, + "loss": 0.7894293665885925, + "step": 4783 + }, + { + "epoch": 1.1023041474654378, + "grad_norm": 1.3844953995401048, + "learning_rate": 9.14004003483324e-07, + "loss": 0.9121778011322021, + "step": 4784 + }, + { + "epoch": 1.1025345622119815, + "grad_norm": 1.0899535734318635, + "learning_rate": 9.136244372658867e-07, + "loss": 0.7162299156188965, + "step": 4785 + }, + { + "epoch": 1.1027649769585253, + "grad_norm": 1.1193596859001855, + "learning_rate": 9.132448835857482e-07, + "loss": 0.7059808969497681, + "step": 4786 + }, + { + "epoch": 1.1029953917050692, + "grad_norm": 1.2034226051758443, + "learning_rate": 9.128653424979999e-07, + "loss": 0.8172405958175659, + "step": 4787 + }, + { + "epoch": 1.103225806451613, + "grad_norm": 0.876114016677297, + "learning_rate": 9.124858140577316e-07, + "loss": 0.7672706842422485, + "step": 4788 + }, + { + "epoch": 1.1034562211981567, + "grad_norm": 1.2578760464526295, + "learning_rate": 9.121062983200318e-07, + "loss": 0.7054900527000427, + "step": 4789 + }, + { + "epoch": 1.1036866359447004, + "grad_norm": 1.0063162295686867, + "learning_rate": 9.117267953399865e-07, + "loss": 0.888538122177124, + "step": 4790 + }, + { + "epoch": 1.1039170506912441, + "grad_norm": 1.1758406583219614, + "learning_rate": 9.113473051726796e-07, + "loss": 0.7918668985366821, + "step": 4791 + }, + { + "epoch": 1.104147465437788, + "grad_norm": 1.220328177578168, + "learning_rate": 9.109678278731942e-07, + "loss": 0.7385697960853577, + "step": 4792 + }, + { + "epoch": 1.1043778801843318, + "grad_norm": 1.0627777124669568, + "learning_rate": 9.105883634966107e-07, + "loss": 0.6394056081771851, + "step": 4793 + }, + { + "epoch": 1.1046082949308755, + "grad_norm": 1.2147960582385422, + "learning_rate": 9.102089120980081e-07, + "loss": 0.8372077941894531, + "step": 4794 + }, + { + "epoch": 1.1048387096774193, + "grad_norm": 1.0764884273918471, + "learning_rate": 9.098294737324628e-07, + "loss": 0.6944066286087036, + "step": 4795 + }, + { + "epoch": 1.1050691244239632, + "grad_norm": 1.3210680270500303, + "learning_rate": 9.0945004845505e-07, + "loss": 0.8480994701385498, + "step": 4796 + }, + { + "epoch": 1.105299539170507, + "grad_norm": 1.3778825395187644, + "learning_rate": 9.090706363208431e-07, + "loss": 0.837437629699707, + "step": 4797 + }, + { + "epoch": 1.1055299539170507, + "grad_norm": 1.2126670676110476, + "learning_rate": 9.086912373849128e-07, + "loss": 0.8610002398490906, + "step": 4798 + }, + { + "epoch": 1.1057603686635944, + "grad_norm": 1.1204211704902753, + "learning_rate": 9.083118517023281e-07, + "loss": 0.7323784828186035, + "step": 4799 + }, + { + "epoch": 1.1059907834101383, + "grad_norm": 1.394483021595883, + "learning_rate": 9.079324793281573e-07, + "loss": 0.7838932871818542, + "step": 4800 + }, + { + "epoch": 1.106221198156682, + "grad_norm": 1.1333807320340106, + "learning_rate": 9.075531203174651e-07, + "loss": 0.7655705213546753, + "step": 4801 + }, + { + "epoch": 1.1064516129032258, + "grad_norm": 1.199812107745982, + "learning_rate": 9.071737747253148e-07, + "loss": 0.8320151567459106, + "step": 4802 + }, + { + "epoch": 1.1066820276497695, + "grad_norm": 1.0428789095876687, + "learning_rate": 9.067944426067687e-07, + "loss": 0.7434612512588501, + "step": 4803 + }, + { + "epoch": 1.1069124423963133, + "grad_norm": 1.348302596081637, + "learning_rate": 9.064151240168857e-07, + "loss": 0.8351321220397949, + "step": 4804 + }, + { + "epoch": 1.1071428571428572, + "grad_norm": 0.9731377071478325, + "learning_rate": 9.060358190107233e-07, + "loss": 0.6648053526878357, + "step": 4805 + }, + { + "epoch": 1.107373271889401, + "grad_norm": 1.236779616553706, + "learning_rate": 9.056565276433377e-07, + "loss": 0.7507585287094116, + "step": 4806 + }, + { + "epoch": 1.1076036866359447, + "grad_norm": 1.0866303306873377, + "learning_rate": 9.052772499697823e-07, + "loss": 0.7638635635375977, + "step": 4807 + }, + { + "epoch": 1.1078341013824884, + "grad_norm": 1.3204341922490346, + "learning_rate": 9.048979860451081e-07, + "loss": 0.8066626191139221, + "step": 4808 + }, + { + "epoch": 1.1080645161290323, + "grad_norm": 0.9459322006964221, + "learning_rate": 9.045187359243659e-07, + "loss": 0.7090466022491455, + "step": 4809 + }, + { + "epoch": 1.108294930875576, + "grad_norm": 1.1112578831827626, + "learning_rate": 9.041394996626027e-07, + "loss": 0.7071142792701721, + "step": 4810 + }, + { + "epoch": 1.1085253456221198, + "grad_norm": 1.0134445673972028, + "learning_rate": 9.037602773148638e-07, + "loss": 0.7103942036628723, + "step": 4811 + }, + { + "epoch": 1.1087557603686635, + "grad_norm": 1.1348721368793189, + "learning_rate": 9.033810689361936e-07, + "loss": 0.8408492207527161, + "step": 4812 + }, + { + "epoch": 1.1089861751152075, + "grad_norm": 0.9439878571651674, + "learning_rate": 9.030018745816335e-07, + "loss": 0.7621495723724365, + "step": 4813 + }, + { + "epoch": 1.1092165898617512, + "grad_norm": 1.152461687801826, + "learning_rate": 9.026226943062225e-07, + "loss": 0.7105196714401245, + "step": 4814 + }, + { + "epoch": 1.109447004608295, + "grad_norm": 1.079152769158689, + "learning_rate": 9.022435281649986e-07, + "loss": 0.8733636140823364, + "step": 4815 + }, + { + "epoch": 1.1096774193548387, + "grad_norm": 1.223534472251507, + "learning_rate": 9.018643762129974e-07, + "loss": 0.9097845554351807, + "step": 4816 + }, + { + "epoch": 1.1099078341013824, + "grad_norm": 1.2220607424054495, + "learning_rate": 9.014852385052519e-07, + "loss": 0.8743059635162354, + "step": 4817 + }, + { + "epoch": 1.1101382488479263, + "grad_norm": 1.0404677289419784, + "learning_rate": 9.011061150967937e-07, + "loss": 0.7898736000061035, + "step": 4818 + }, + { + "epoch": 1.11036866359447, + "grad_norm": 1.1698125073586854, + "learning_rate": 9.007270060426516e-07, + "loss": 0.871254563331604, + "step": 4819 + }, + { + "epoch": 1.1105990783410138, + "grad_norm": 1.323286168379092, + "learning_rate": 9.003479113978536e-07, + "loss": 0.6833579540252686, + "step": 4820 + }, + { + "epoch": 1.1108294930875575, + "grad_norm": 1.285642784687423, + "learning_rate": 8.999688312174243e-07, + "loss": 0.8289071321487427, + "step": 4821 + }, + { + "epoch": 1.1110599078341015, + "grad_norm": 1.1884737282905606, + "learning_rate": 8.995897655563864e-07, + "loss": 0.6798583269119263, + "step": 4822 + }, + { + "epoch": 1.1112903225806452, + "grad_norm": 1.1108358813410262, + "learning_rate": 8.992107144697614e-07, + "loss": 0.6518250703811646, + "step": 4823 + }, + { + "epoch": 1.111520737327189, + "grad_norm": 1.3596600109698966, + "learning_rate": 8.988316780125679e-07, + "loss": 0.9316667318344116, + "step": 4824 + }, + { + "epoch": 1.1117511520737327, + "grad_norm": 0.9951654747842746, + "learning_rate": 8.98452656239822e-07, + "loss": 0.755483865737915, + "step": 4825 + }, + { + "epoch": 1.1119815668202766, + "grad_norm": 1.0146600815927005, + "learning_rate": 8.980736492065391e-07, + "loss": 0.7892755270004272, + "step": 4826 + }, + { + "epoch": 1.1122119815668203, + "grad_norm": 0.9930161298314518, + "learning_rate": 8.976946569677308e-07, + "loss": 0.703255295753479, + "step": 4827 + }, + { + "epoch": 1.112442396313364, + "grad_norm": 1.1559327578235137, + "learning_rate": 8.973156795784073e-07, + "loss": 0.7885171175003052, + "step": 4828 + }, + { + "epoch": 1.1126728110599078, + "grad_norm": 1.1407519814570228, + "learning_rate": 8.969367170935776e-07, + "loss": 0.8035199642181396, + "step": 4829 + }, + { + "epoch": 1.1129032258064515, + "grad_norm": 1.0245821351407076, + "learning_rate": 8.965577695682467e-07, + "loss": 0.8272112607955933, + "step": 4830 + }, + { + "epoch": 1.1131336405529955, + "grad_norm": 1.1104598721433627, + "learning_rate": 8.961788370574182e-07, + "loss": 0.8734478950500488, + "step": 4831 + }, + { + "epoch": 1.1133640552995392, + "grad_norm": 1.2722110058519596, + "learning_rate": 8.957999196160946e-07, + "loss": 0.7487469911575317, + "step": 4832 + }, + { + "epoch": 1.113594470046083, + "grad_norm": 1.3783344397611896, + "learning_rate": 8.954210172992748e-07, + "loss": 0.9193693399429321, + "step": 4833 + }, + { + "epoch": 1.1138248847926266, + "grad_norm": 1.4522583636726432, + "learning_rate": 8.950421301619555e-07, + "loss": 0.8228428959846497, + "step": 4834 + }, + { + "epoch": 1.1140552995391706, + "grad_norm": 0.9646412535671615, + "learning_rate": 8.946632582591324e-07, + "loss": 0.7419015169143677, + "step": 4835 + }, + { + "epoch": 1.1142857142857143, + "grad_norm": 1.1957500872812925, + "learning_rate": 8.942844016457975e-07, + "loss": 0.827411949634552, + "step": 4836 + }, + { + "epoch": 1.114516129032258, + "grad_norm": 0.9975223373000859, + "learning_rate": 8.93905560376942e-07, + "loss": 0.7066754102706909, + "step": 4837 + }, + { + "epoch": 1.1147465437788018, + "grad_norm": 1.2336329306802043, + "learning_rate": 8.93526734507554e-07, + "loss": 0.7201621532440186, + "step": 4838 + }, + { + "epoch": 1.1149769585253457, + "grad_norm": 0.8521980282185057, + "learning_rate": 8.931479240926196e-07, + "loss": 0.6363521814346313, + "step": 4839 + }, + { + "epoch": 1.1152073732718895, + "grad_norm": 1.0065898101647581, + "learning_rate": 8.927691291871223e-07, + "loss": 0.8232909440994263, + "step": 4840 + }, + { + "epoch": 1.1154377880184332, + "grad_norm": 1.0354249430711853, + "learning_rate": 8.923903498460441e-07, + "loss": 0.7006033658981323, + "step": 4841 + }, + { + "epoch": 1.115668202764977, + "grad_norm": 1.1957171429651339, + "learning_rate": 8.920115861243638e-07, + "loss": 0.6982721090316772, + "step": 4842 + }, + { + "epoch": 1.1158986175115206, + "grad_norm": 1.039109039901578, + "learning_rate": 8.916328380770593e-07, + "loss": 0.7735922336578369, + "step": 4843 + }, + { + "epoch": 1.1161290322580646, + "grad_norm": 1.189307260310029, + "learning_rate": 8.912541057591049e-07, + "loss": 0.7430423498153687, + "step": 4844 + }, + { + "epoch": 1.1163594470046083, + "grad_norm": 1.0189703427385546, + "learning_rate": 8.908753892254729e-07, + "loss": 0.7783932685852051, + "step": 4845 + }, + { + "epoch": 1.116589861751152, + "grad_norm": 0.895546986970967, + "learning_rate": 8.904966885311339e-07, + "loss": 0.726211428642273, + "step": 4846 + }, + { + "epoch": 1.1168202764976958, + "grad_norm": 1.0042101088511581, + "learning_rate": 8.901180037310555e-07, + "loss": 0.664351761341095, + "step": 4847 + }, + { + "epoch": 1.1170506912442397, + "grad_norm": 1.192545271664204, + "learning_rate": 8.897393348802031e-07, + "loss": 0.8246554136276245, + "step": 4848 + }, + { + "epoch": 1.1172811059907835, + "grad_norm": 1.3113785088290244, + "learning_rate": 8.893606820335405e-07, + "loss": 0.9435447454452515, + "step": 4849 + }, + { + "epoch": 1.1175115207373272, + "grad_norm": 1.1196400925650334, + "learning_rate": 8.889820452460286e-07, + "loss": 0.8471171855926514, + "step": 4850 + }, + { + "epoch": 1.117741935483871, + "grad_norm": 0.9950597161448561, + "learning_rate": 8.886034245726254e-07, + "loss": 0.6038233041763306, + "step": 4851 + }, + { + "epoch": 1.1179723502304149, + "grad_norm": 1.1171540360532777, + "learning_rate": 8.882248200682881e-07, + "loss": 0.8186997771263123, + "step": 4852 + }, + { + "epoch": 1.1182027649769586, + "grad_norm": 1.2436642718372632, + "learning_rate": 8.878462317879702e-07, + "loss": 0.789948582649231, + "step": 4853 + }, + { + "epoch": 1.1184331797235023, + "grad_norm": 1.0789321556804603, + "learning_rate": 8.87467659786623e-07, + "loss": 0.7543652057647705, + "step": 4854 + }, + { + "epoch": 1.118663594470046, + "grad_norm": 1.0717127208024606, + "learning_rate": 8.870891041191963e-07, + "loss": 0.5985269546508789, + "step": 4855 + }, + { + "epoch": 1.1188940092165898, + "grad_norm": 1.109115113465042, + "learning_rate": 8.867105648406364e-07, + "loss": 0.7676643133163452, + "step": 4856 + }, + { + "epoch": 1.1191244239631337, + "grad_norm": 1.0078052507528568, + "learning_rate": 8.863320420058881e-07, + "loss": 0.7317303419113159, + "step": 4857 + }, + { + "epoch": 1.1193548387096774, + "grad_norm": 1.117240479042085, + "learning_rate": 8.859535356698936e-07, + "loss": 0.8357843160629272, + "step": 4858 + }, + { + "epoch": 1.1195852534562212, + "grad_norm": 1.2827717071860176, + "learning_rate": 8.855750458875923e-07, + "loss": 0.7149945497512817, + "step": 4859 + }, + { + "epoch": 1.119815668202765, + "grad_norm": 1.1258754685876486, + "learning_rate": 8.851965727139214e-07, + "loss": 0.7059169411659241, + "step": 4860 + }, + { + "epoch": 1.1200460829493089, + "grad_norm": 1.0779991100813224, + "learning_rate": 8.848181162038163e-07, + "loss": 0.7530190944671631, + "step": 4861 + }, + { + "epoch": 1.1202764976958526, + "grad_norm": 1.12578616970897, + "learning_rate": 8.844396764122092e-07, + "loss": 0.808814287185669, + "step": 4862 + }, + { + "epoch": 1.1205069124423963, + "grad_norm": 1.174668121226261, + "learning_rate": 8.840612533940295e-07, + "loss": 0.7205604910850525, + "step": 4863 + }, + { + "epoch": 1.12073732718894, + "grad_norm": 1.0284636891818573, + "learning_rate": 8.83682847204206e-07, + "loss": 0.7493274211883545, + "step": 4864 + }, + { + "epoch": 1.120967741935484, + "grad_norm": 1.1974475439930412, + "learning_rate": 8.833044578976631e-07, + "loss": 0.8115849494934082, + "step": 4865 + }, + { + "epoch": 1.1211981566820277, + "grad_norm": 1.2224514970634248, + "learning_rate": 8.829260855293237e-07, + "loss": 0.8188419342041016, + "step": 4866 + }, + { + "epoch": 1.1214285714285714, + "grad_norm": 1.372584236180193, + "learning_rate": 8.82547730154108e-07, + "loss": 0.6152349710464478, + "step": 4867 + }, + { + "epoch": 1.1216589861751152, + "grad_norm": 0.9364210771252817, + "learning_rate": 8.821693918269333e-07, + "loss": 0.7629969120025635, + "step": 4868 + }, + { + "epoch": 1.121889400921659, + "grad_norm": 1.0637191210851928, + "learning_rate": 8.81791070602716e-07, + "loss": 0.7063733339309692, + "step": 4869 + }, + { + "epoch": 1.1221198156682028, + "grad_norm": 1.2221996591019166, + "learning_rate": 8.814127665363682e-07, + "loss": 0.729676365852356, + "step": 4870 + }, + { + "epoch": 1.1223502304147466, + "grad_norm": 1.2363948838699006, + "learning_rate": 8.810344796827999e-07, + "loss": 0.8188877105712891, + "step": 4871 + }, + { + "epoch": 1.1225806451612903, + "grad_norm": 1.4364824515163135, + "learning_rate": 8.806562100969199e-07, + "loss": 0.70793217420578, + "step": 4872 + }, + { + "epoch": 1.122811059907834, + "grad_norm": 1.2471671753090219, + "learning_rate": 8.802779578336329e-07, + "loss": 0.8086484670639038, + "step": 4873 + }, + { + "epoch": 1.123041474654378, + "grad_norm": 1.209058465827679, + "learning_rate": 8.798997229478417e-07, + "loss": 0.8954081535339355, + "step": 4874 + }, + { + "epoch": 1.1232718894009217, + "grad_norm": 1.0352094557860352, + "learning_rate": 8.795215054944469e-07, + "loss": 0.6615205407142639, + "step": 4875 + }, + { + "epoch": 1.1235023041474654, + "grad_norm": 1.3182700744777898, + "learning_rate": 8.79143305528346e-07, + "loss": 0.6851116418838501, + "step": 4876 + }, + { + "epoch": 1.1237327188940092, + "grad_norm": 0.9311237252586447, + "learning_rate": 8.787651231044342e-07, + "loss": 0.7594672441482544, + "step": 4877 + }, + { + "epoch": 1.123963133640553, + "grad_norm": 1.2505187148095604, + "learning_rate": 8.783869582776044e-07, + "loss": 0.7170572280883789, + "step": 4878 + }, + { + "epoch": 1.1241935483870968, + "grad_norm": 1.1244851690255748, + "learning_rate": 8.780088111027467e-07, + "loss": 0.9139137864112854, + "step": 4879 + }, + { + "epoch": 1.1244239631336406, + "grad_norm": 1.2468380143920514, + "learning_rate": 8.776306816347482e-07, + "loss": 0.8716791868209839, + "step": 4880 + }, + { + "epoch": 1.1246543778801843, + "grad_norm": 1.5043743610246187, + "learning_rate": 8.772525699284946e-07, + "loss": 0.840330958366394, + "step": 4881 + }, + { + "epoch": 1.124884792626728, + "grad_norm": 1.28802116274467, + "learning_rate": 8.768744760388681e-07, + "loss": 0.7713445425033569, + "step": 4882 + }, + { + "epoch": 1.125115207373272, + "grad_norm": 1.2058132743835892, + "learning_rate": 8.764964000207479e-07, + "loss": 0.8964767456054688, + "step": 4883 + }, + { + "epoch": 1.1253456221198157, + "grad_norm": 1.12361515551762, + "learning_rate": 8.761183419290121e-07, + "loss": 0.8038421869277954, + "step": 4884 + }, + { + "epoch": 1.1255760368663594, + "grad_norm": 0.7722654284456119, + "learning_rate": 8.757403018185351e-07, + "loss": 0.6601011753082275, + "step": 4885 + }, + { + "epoch": 1.1258064516129032, + "grad_norm": 0.8011265369746955, + "learning_rate": 8.753622797441885e-07, + "loss": 0.8226664066314697, + "step": 4886 + }, + { + "epoch": 1.1260368663594469, + "grad_norm": 1.0633366554284305, + "learning_rate": 8.749842757608422e-07, + "loss": 0.7062248587608337, + "step": 4887 + }, + { + "epoch": 1.1262672811059908, + "grad_norm": 1.318395948514478, + "learning_rate": 8.746062899233628e-07, + "loss": 0.8642051815986633, + "step": 4888 + }, + { + "epoch": 1.1264976958525346, + "grad_norm": 1.2332349128972684, + "learning_rate": 8.74228322286614e-07, + "loss": 0.8194048404693604, + "step": 4889 + }, + { + "epoch": 1.1267281105990783, + "grad_norm": 1.121678775220638, + "learning_rate": 8.738503729054583e-07, + "loss": 0.6957820653915405, + "step": 4890 + }, + { + "epoch": 1.1269585253456222, + "grad_norm": 0.9775692035561586, + "learning_rate": 8.734724418347537e-07, + "loss": 0.8107770681381226, + "step": 4891 + }, + { + "epoch": 1.127188940092166, + "grad_norm": 1.1508754542191086, + "learning_rate": 8.730945291293563e-07, + "loss": 0.7727551460266113, + "step": 4892 + }, + { + "epoch": 1.1274193548387097, + "grad_norm": 1.1347047929449647, + "learning_rate": 8.727166348441207e-07, + "loss": 0.7389936447143555, + "step": 4893 + }, + { + "epoch": 1.1276497695852534, + "grad_norm": 1.2733389095695957, + "learning_rate": 8.723387590338964e-07, + "loss": 0.7666463851928711, + "step": 4894 + }, + { + "epoch": 1.1278801843317972, + "grad_norm": 1.1990629153183452, + "learning_rate": 8.719609017535328e-07, + "loss": 0.7795453071594238, + "step": 4895 + }, + { + "epoch": 1.128110599078341, + "grad_norm": 1.1062968437903737, + "learning_rate": 8.715830630578746e-07, + "loss": 0.8560752272605896, + "step": 4896 + }, + { + "epoch": 1.1283410138248848, + "grad_norm": 1.2251043883259816, + "learning_rate": 8.712052430017645e-07, + "loss": 0.7574455738067627, + "step": 4897 + }, + { + "epoch": 1.1285714285714286, + "grad_norm": 1.3025894471719623, + "learning_rate": 8.708274416400432e-07, + "loss": 0.8017276525497437, + "step": 4898 + }, + { + "epoch": 1.1288018433179723, + "grad_norm": 0.9942840399227726, + "learning_rate": 8.704496590275477e-07, + "loss": 0.7046157121658325, + "step": 4899 + }, + { + "epoch": 1.129032258064516, + "grad_norm": 1.187705347283351, + "learning_rate": 8.700718952191124e-07, + "loss": 0.7352035641670227, + "step": 4900 + }, + { + "epoch": 1.12926267281106, + "grad_norm": 0.9471130432852718, + "learning_rate": 8.696941502695698e-07, + "loss": 0.6444690227508545, + "step": 4901 + }, + { + "epoch": 1.1294930875576037, + "grad_norm": 1.0628821586759927, + "learning_rate": 8.69316424233749e-07, + "loss": 0.7909440994262695, + "step": 4902 + }, + { + "epoch": 1.1297235023041474, + "grad_norm": 0.9483928902743061, + "learning_rate": 8.689387171664756e-07, + "loss": 0.646790087223053, + "step": 4903 + }, + { + "epoch": 1.1299539170506911, + "grad_norm": 1.2796319408131067, + "learning_rate": 8.685610291225744e-07, + "loss": 0.786831796169281, + "step": 4904 + }, + { + "epoch": 1.130184331797235, + "grad_norm": 1.143272972798168, + "learning_rate": 8.681833601568657e-07, + "loss": 0.8004348278045654, + "step": 4905 + }, + { + "epoch": 1.1304147465437788, + "grad_norm": 0.996600703731369, + "learning_rate": 8.678057103241677e-07, + "loss": 0.6846532821655273, + "step": 4906 + }, + { + "epoch": 1.1306451612903226, + "grad_norm": 1.299426572962062, + "learning_rate": 8.67428079679296e-07, + "loss": 0.7555707693099976, + "step": 4907 + }, + { + "epoch": 1.1308755760368663, + "grad_norm": 1.3809719247833205, + "learning_rate": 8.67050468277063e-07, + "loss": 0.852725625038147, + "step": 4908 + }, + { + "epoch": 1.1311059907834102, + "grad_norm": 0.9844151846464619, + "learning_rate": 8.666728761722782e-07, + "loss": 0.6990044713020325, + "step": 4909 + }, + { + "epoch": 1.131336405529954, + "grad_norm": 1.223366973696945, + "learning_rate": 8.662953034197493e-07, + "loss": 0.8050999641418457, + "step": 4910 + }, + { + "epoch": 1.1315668202764977, + "grad_norm": 1.3085197840977536, + "learning_rate": 8.659177500742802e-07, + "loss": 0.8169291019439697, + "step": 4911 + }, + { + "epoch": 1.1317972350230414, + "grad_norm": 1.081294035300873, + "learning_rate": 8.655402161906716e-07, + "loss": 0.7814679145812988, + "step": 4912 + }, + { + "epoch": 1.1320276497695851, + "grad_norm": 1.237970773045493, + "learning_rate": 8.651627018237231e-07, + "loss": 0.6734834313392639, + "step": 4913 + }, + { + "epoch": 1.132258064516129, + "grad_norm": 1.1143770605215586, + "learning_rate": 8.647852070282299e-07, + "loss": 0.8765416145324707, + "step": 4914 + }, + { + "epoch": 1.1324884792626728, + "grad_norm": 1.3797966848789986, + "learning_rate": 8.644077318589847e-07, + "loss": 1.0023764371871948, + "step": 4915 + }, + { + "epoch": 1.1327188940092165, + "grad_norm": 1.0387287080137257, + "learning_rate": 8.64030276370778e-07, + "loss": 0.7561393976211548, + "step": 4916 + }, + { + "epoch": 1.1329493087557603, + "grad_norm": 1.123376400728965, + "learning_rate": 8.636528406183961e-07, + "loss": 0.8252062797546387, + "step": 4917 + }, + { + "epoch": 1.1331797235023042, + "grad_norm": 1.3939443114820729, + "learning_rate": 8.632754246566246e-07, + "loss": 0.7598097324371338, + "step": 4918 + }, + { + "epoch": 1.133410138248848, + "grad_norm": 0.8823184534346743, + "learning_rate": 8.628980285402438e-07, + "loss": 0.6113640069961548, + "step": 4919 + }, + { + "epoch": 1.1336405529953917, + "grad_norm": 1.096652563873467, + "learning_rate": 8.625206523240325e-07, + "loss": 0.7457853555679321, + "step": 4920 + }, + { + "epoch": 1.1338709677419354, + "grad_norm": 1.0304826450193199, + "learning_rate": 8.62143296062767e-07, + "loss": 0.7334161996841431, + "step": 4921 + }, + { + "epoch": 1.1341013824884794, + "grad_norm": 1.1383631487720753, + "learning_rate": 8.617659598112195e-07, + "loss": 0.7446962594985962, + "step": 4922 + }, + { + "epoch": 1.134331797235023, + "grad_norm": 0.9360514056176105, + "learning_rate": 8.613886436241594e-07, + "loss": 0.7074497938156128, + "step": 4923 + }, + { + "epoch": 1.1345622119815668, + "grad_norm": 0.9945384740922374, + "learning_rate": 8.610113475563547e-07, + "loss": 0.6728851795196533, + "step": 4924 + }, + { + "epoch": 1.1347926267281105, + "grad_norm": 1.0533766436674836, + "learning_rate": 8.606340716625689e-07, + "loss": 0.7732793092727661, + "step": 4925 + }, + { + "epoch": 1.1350230414746543, + "grad_norm": 1.2301857240081557, + "learning_rate": 8.60256815997563e-07, + "loss": 0.7514671683311462, + "step": 4926 + }, + { + "epoch": 1.1352534562211982, + "grad_norm": 1.2507291163181513, + "learning_rate": 8.598795806160952e-07, + "loss": 0.7824795842170715, + "step": 4927 + }, + { + "epoch": 1.135483870967742, + "grad_norm": 1.1585997268920079, + "learning_rate": 8.59502365572921e-07, + "loss": 0.789236307144165, + "step": 4928 + }, + { + "epoch": 1.1357142857142857, + "grad_norm": 1.1796078109098491, + "learning_rate": 8.591251709227919e-07, + "loss": 0.7005175948143005, + "step": 4929 + }, + { + "epoch": 1.1359447004608294, + "grad_norm": 1.2299124062921447, + "learning_rate": 8.587479967204582e-07, + "loss": 0.7851300239562988, + "step": 4930 + }, + { + "epoch": 1.1361751152073734, + "grad_norm": 1.5129438725714193, + "learning_rate": 8.583708430206658e-07, + "loss": 0.8901405334472656, + "step": 4931 + }, + { + "epoch": 1.136405529953917, + "grad_norm": 1.1049343524856345, + "learning_rate": 8.579937098781576e-07, + "loss": 0.8118528127670288, + "step": 4932 + }, + { + "epoch": 1.1366359447004608, + "grad_norm": 1.0631974751851168, + "learning_rate": 8.57616597347675e-07, + "loss": 0.6500028371810913, + "step": 4933 + }, + { + "epoch": 1.1368663594470045, + "grad_norm": 1.057066415615051, + "learning_rate": 8.572395054839547e-07, + "loss": 0.7752922773361206, + "step": 4934 + }, + { + "epoch": 1.1370967741935485, + "grad_norm": 1.124364781444334, + "learning_rate": 8.568624343417309e-07, + "loss": 0.7346245050430298, + "step": 4935 + }, + { + "epoch": 1.1373271889400922, + "grad_norm": 1.4547001781507483, + "learning_rate": 8.564853839757356e-07, + "loss": 0.9249104261398315, + "step": 4936 + }, + { + "epoch": 1.137557603686636, + "grad_norm": 1.0350864816884677, + "learning_rate": 8.561083544406965e-07, + "loss": 0.7407078742980957, + "step": 4937 + }, + { + "epoch": 1.1377880184331797, + "grad_norm": 1.197156559440129, + "learning_rate": 8.557313457913393e-07, + "loss": 0.7615865468978882, + "step": 4938 + }, + { + "epoch": 1.1380184331797234, + "grad_norm": 1.2125718427071739, + "learning_rate": 8.553543580823866e-07, + "loss": 0.757561445236206, + "step": 4939 + }, + { + "epoch": 1.1382488479262673, + "grad_norm": 1.1468001082336654, + "learning_rate": 8.549773913685572e-07, + "loss": 0.7130411863327026, + "step": 4940 + }, + { + "epoch": 1.138479262672811, + "grad_norm": 1.1282357144069963, + "learning_rate": 8.54600445704567e-07, + "loss": 0.7507551312446594, + "step": 4941 + }, + { + "epoch": 1.1387096774193548, + "grad_norm": 1.0556143227749322, + "learning_rate": 8.542235211451301e-07, + "loss": 0.896443247795105, + "step": 4942 + }, + { + "epoch": 1.1389400921658985, + "grad_norm": 1.145222677509159, + "learning_rate": 8.538466177449557e-07, + "loss": 0.7530815601348877, + "step": 4943 + }, + { + "epoch": 1.1391705069124425, + "grad_norm": 1.2481258172783056, + "learning_rate": 8.534697355587517e-07, + "loss": 0.8730431795120239, + "step": 4944 + }, + { + "epoch": 1.1394009216589862, + "grad_norm": 1.3010516024158107, + "learning_rate": 8.530928746412216e-07, + "loss": 0.6452720165252686, + "step": 4945 + }, + { + "epoch": 1.13963133640553, + "grad_norm": 1.1712957128451178, + "learning_rate": 8.527160350470661e-07, + "loss": 0.7679018974304199, + "step": 4946 + }, + { + "epoch": 1.1398617511520737, + "grad_norm": 1.402874429077297, + "learning_rate": 8.523392168309832e-07, + "loss": 0.8186824321746826, + "step": 4947 + }, + { + "epoch": 1.1400921658986176, + "grad_norm": 1.1669467278440648, + "learning_rate": 8.519624200476676e-07, + "loss": 0.666642427444458, + "step": 4948 + }, + { + "epoch": 1.1403225806451613, + "grad_norm": 1.0160881327834055, + "learning_rate": 8.515856447518104e-07, + "loss": 0.7478682994842529, + "step": 4949 + }, + { + "epoch": 1.140552995391705, + "grad_norm": 1.2340329971083113, + "learning_rate": 8.512088909981007e-07, + "loss": 0.7527793645858765, + "step": 4950 + }, + { + "epoch": 1.1407834101382488, + "grad_norm": 1.136863530366948, + "learning_rate": 8.508321588412235e-07, + "loss": 0.7614094018936157, + "step": 4951 + }, + { + "epoch": 1.1410138248847925, + "grad_norm": 1.2371366016065355, + "learning_rate": 8.504554483358605e-07, + "loss": 0.8294994831085205, + "step": 4952 + }, + { + "epoch": 1.1412442396313365, + "grad_norm": 1.4759487382386114, + "learning_rate": 8.500787595366919e-07, + "loss": 0.8900095224380493, + "step": 4953 + }, + { + "epoch": 1.1414746543778802, + "grad_norm": 1.0721192735972314, + "learning_rate": 8.497020924983926e-07, + "loss": 0.8403744697570801, + "step": 4954 + }, + { + "epoch": 1.141705069124424, + "grad_norm": 1.0449510164412683, + "learning_rate": 8.493254472756355e-07, + "loss": 0.7046208381652832, + "step": 4955 + }, + { + "epoch": 1.1419354838709677, + "grad_norm": 1.3018714779233174, + "learning_rate": 8.489488239230904e-07, + "loss": 0.8226789832115173, + "step": 4956 + }, + { + "epoch": 1.1421658986175116, + "grad_norm": 1.058902427650911, + "learning_rate": 8.485722224954236e-07, + "loss": 0.7248969674110413, + "step": 4957 + }, + { + "epoch": 1.1423963133640553, + "grad_norm": 1.1327549620980084, + "learning_rate": 8.481956430472979e-07, + "loss": 0.8116840124130249, + "step": 4958 + }, + { + "epoch": 1.142626728110599, + "grad_norm": 1.062622286893391, + "learning_rate": 8.478190856333739e-07, + "loss": 0.7534138560295105, + "step": 4959 + }, + { + "epoch": 1.1428571428571428, + "grad_norm": 1.3427980825750856, + "learning_rate": 8.474425503083082e-07, + "loss": 0.8945306539535522, + "step": 4960 + }, + { + "epoch": 1.1430875576036867, + "grad_norm": 1.1592346473165394, + "learning_rate": 8.47066037126754e-07, + "loss": 0.7554503083229065, + "step": 4961 + }, + { + "epoch": 1.1433179723502305, + "grad_norm": 1.4596388821753403, + "learning_rate": 8.466895461433625e-07, + "loss": 0.832726776599884, + "step": 4962 + }, + { + "epoch": 1.1435483870967742, + "grad_norm": 1.250046955776058, + "learning_rate": 8.463130774127804e-07, + "loss": 0.8312773704528809, + "step": 4963 + }, + { + "epoch": 1.143778801843318, + "grad_norm": 0.9153601791246997, + "learning_rate": 8.459366309896512e-07, + "loss": 0.6484537124633789, + "step": 4964 + }, + { + "epoch": 1.1440092165898617, + "grad_norm": 1.2863432770713337, + "learning_rate": 8.455602069286165e-07, + "loss": 0.9216604828834534, + "step": 4965 + }, + { + "epoch": 1.1442396313364056, + "grad_norm": 1.134985678431753, + "learning_rate": 8.451838052843131e-07, + "loss": 0.6213096380233765, + "step": 4966 + }, + { + "epoch": 1.1444700460829493, + "grad_norm": 0.9562822723791001, + "learning_rate": 8.448074261113756e-07, + "loss": 0.6873677968978882, + "step": 4967 + }, + { + "epoch": 1.144700460829493, + "grad_norm": 1.215560824144924, + "learning_rate": 8.444310694644348e-07, + "loss": 0.7883448600769043, + "step": 4968 + }, + { + "epoch": 1.1449308755760368, + "grad_norm": 1.1944176371651494, + "learning_rate": 8.440547353981178e-07, + "loss": 0.724172830581665, + "step": 4969 + }, + { + "epoch": 1.1451612903225807, + "grad_norm": 1.0792006702141475, + "learning_rate": 8.4367842396705e-07, + "loss": 0.7115252017974854, + "step": 4970 + }, + { + "epoch": 1.1453917050691245, + "grad_norm": 1.0823773323138404, + "learning_rate": 8.433021352258521e-07, + "loss": 0.7165110111236572, + "step": 4971 + }, + { + "epoch": 1.1456221198156682, + "grad_norm": 1.0874360604645514, + "learning_rate": 8.429258692291413e-07, + "loss": 0.7563315629959106, + "step": 4972 + }, + { + "epoch": 1.145852534562212, + "grad_norm": 1.1334099478279698, + "learning_rate": 8.425496260315331e-07, + "loss": 0.7528449892997742, + "step": 4973 + }, + { + "epoch": 1.1460829493087559, + "grad_norm": 1.1141426795021205, + "learning_rate": 8.421734056876383e-07, + "loss": 0.7976171970367432, + "step": 4974 + }, + { + "epoch": 1.1463133640552996, + "grad_norm": 1.020985144100356, + "learning_rate": 8.417972082520644e-07, + "loss": 0.7498095035552979, + "step": 4975 + }, + { + "epoch": 1.1465437788018433, + "grad_norm": 1.3446642320448154, + "learning_rate": 8.414210337794165e-07, + "loss": 0.9568856954574585, + "step": 4976 + }, + { + "epoch": 1.146774193548387, + "grad_norm": 0.9499457055768262, + "learning_rate": 8.410448823242957e-07, + "loss": 0.6402908563613892, + "step": 4977 + }, + { + "epoch": 1.1470046082949308, + "grad_norm": 1.1759709167305108, + "learning_rate": 8.406687539412995e-07, + "loss": 0.8224657773971558, + "step": 4978 + }, + { + "epoch": 1.1472350230414747, + "grad_norm": 1.2886598107348421, + "learning_rate": 8.402926486850229e-07, + "loss": 0.7804544568061829, + "step": 4979 + }, + { + "epoch": 1.1474654377880185, + "grad_norm": 1.1861127295236977, + "learning_rate": 8.39916566610057e-07, + "loss": 0.7920527458190918, + "step": 4980 + }, + { + "epoch": 1.1476958525345622, + "grad_norm": 1.1244888328051699, + "learning_rate": 8.395405077709891e-07, + "loss": 0.7672078609466553, + "step": 4981 + }, + { + "epoch": 1.147926267281106, + "grad_norm": 1.2427545332028853, + "learning_rate": 8.391644722224047e-07, + "loss": 0.6997950077056885, + "step": 4982 + }, + { + "epoch": 1.1481566820276499, + "grad_norm": 1.057637628401912, + "learning_rate": 8.38788460018884e-07, + "loss": 0.7754349708557129, + "step": 4983 + }, + { + "epoch": 1.1483870967741936, + "grad_norm": 1.1458978330134115, + "learning_rate": 8.384124712150046e-07, + "loss": 0.706238329410553, + "step": 4984 + }, + { + "epoch": 1.1486175115207373, + "grad_norm": 0.8874927618348325, + "learning_rate": 8.380365058653415e-07, + "loss": 0.7115224599838257, + "step": 4985 + }, + { + "epoch": 1.148847926267281, + "grad_norm": 1.349182229007694, + "learning_rate": 8.376605640244652e-07, + "loss": 0.9026098847389221, + "step": 4986 + }, + { + "epoch": 1.149078341013825, + "grad_norm": 1.359066441839043, + "learning_rate": 8.372846457469428e-07, + "loss": 0.9123632311820984, + "step": 4987 + }, + { + "epoch": 1.1493087557603687, + "grad_norm": 1.1389830084868187, + "learning_rate": 8.369087510873389e-07, + "loss": 0.8365681171417236, + "step": 4988 + }, + { + "epoch": 1.1495391705069125, + "grad_norm": 1.1572327597453433, + "learning_rate": 8.36532880100214e-07, + "loss": 0.7506389617919922, + "step": 4989 + }, + { + "epoch": 1.1497695852534562, + "grad_norm": 1.1932866122784214, + "learning_rate": 8.361570328401246e-07, + "loss": 0.7736936807632446, + "step": 4990 + }, + { + "epoch": 1.15, + "grad_norm": 1.0939095427412457, + "learning_rate": 8.357812093616254e-07, + "loss": 0.7364238500595093, + "step": 4991 + }, + { + "epoch": 1.1502304147465439, + "grad_norm": 1.154457809524142, + "learning_rate": 8.354054097192659e-07, + "loss": 0.8588067293167114, + "step": 4992 + }, + { + "epoch": 1.1504608294930876, + "grad_norm": 1.0040260335609983, + "learning_rate": 8.350296339675938e-07, + "loss": 0.777319073677063, + "step": 4993 + }, + { + "epoch": 1.1506912442396313, + "grad_norm": 1.2472613338245313, + "learning_rate": 8.346538821611517e-07, + "loss": 0.6695454716682434, + "step": 4994 + }, + { + "epoch": 1.150921658986175, + "grad_norm": 1.1333204343634593, + "learning_rate": 8.342781543544796e-07, + "loss": 0.7785383462905884, + "step": 4995 + }, + { + "epoch": 1.1511520737327188, + "grad_norm": 1.2063502081148214, + "learning_rate": 8.339024506021143e-07, + "loss": 0.7386239767074585, + "step": 4996 + }, + { + "epoch": 1.1513824884792627, + "grad_norm": 1.015973129089863, + "learning_rate": 8.335267709585884e-07, + "loss": 0.8044750690460205, + "step": 4997 + }, + { + "epoch": 1.1516129032258065, + "grad_norm": 0.991689333823338, + "learning_rate": 8.331511154784307e-07, + "loss": 0.6925652623176575, + "step": 4998 + }, + { + "epoch": 1.1518433179723502, + "grad_norm": 1.1362021503644928, + "learning_rate": 8.327754842161684e-07, + "loss": 0.7906935214996338, + "step": 4999 + }, + { + "epoch": 1.1520737327188941, + "grad_norm": 1.0865966340855062, + "learning_rate": 8.323998772263231e-07, + "loss": 0.7131960988044739, + "step": 5000 + }, + { + "epoch": 1.1523041474654379, + "grad_norm": 1.0459163670419733, + "learning_rate": 8.320242945634132e-07, + "loss": 0.8412370085716248, + "step": 5001 + }, + { + "epoch": 1.1525345622119816, + "grad_norm": 1.219248495471204, + "learning_rate": 8.316487362819551e-07, + "loss": 0.7800952792167664, + "step": 5002 + }, + { + "epoch": 1.1527649769585253, + "grad_norm": 1.2269188284281454, + "learning_rate": 8.312732024364602e-07, + "loss": 0.8620247840881348, + "step": 5003 + }, + { + "epoch": 1.152995391705069, + "grad_norm": 1.1576962368399284, + "learning_rate": 8.30897693081436e-07, + "loss": 0.7551721334457397, + "step": 5004 + }, + { + "epoch": 1.153225806451613, + "grad_norm": 1.1081098689134552, + "learning_rate": 8.305222082713882e-07, + "loss": 0.8510593175888062, + "step": 5005 + }, + { + "epoch": 1.1534562211981567, + "grad_norm": 1.0356186889640762, + "learning_rate": 8.301467480608176e-07, + "loss": 0.6503845453262329, + "step": 5006 + }, + { + "epoch": 1.1536866359447004, + "grad_norm": 1.1593829978588668, + "learning_rate": 8.297713125042212e-07, + "loss": 0.7729237079620361, + "step": 5007 + }, + { + "epoch": 1.1539170506912442, + "grad_norm": 1.0812796919286354, + "learning_rate": 8.293959016560939e-07, + "loss": 0.77802574634552, + "step": 5008 + }, + { + "epoch": 1.154147465437788, + "grad_norm": 0.9915519400035699, + "learning_rate": 8.290205155709256e-07, + "loss": 0.7977825999259949, + "step": 5009 + }, + { + "epoch": 1.1543778801843319, + "grad_norm": 1.1128731733324948, + "learning_rate": 8.286451543032027e-07, + "loss": 0.7479745149612427, + "step": 5010 + }, + { + "epoch": 1.1546082949308756, + "grad_norm": 1.0554376798438097, + "learning_rate": 8.282698179074092e-07, + "loss": 0.7631532549858093, + "step": 5011 + }, + { + "epoch": 1.1548387096774193, + "grad_norm": 1.1424098237872247, + "learning_rate": 8.278945064380243e-07, + "loss": 0.7437061071395874, + "step": 5012 + }, + { + "epoch": 1.1550691244239633, + "grad_norm": 1.2208599961881346, + "learning_rate": 8.275192199495236e-07, + "loss": 0.9334282875061035, + "step": 5013 + }, + { + "epoch": 1.155299539170507, + "grad_norm": 1.1846438304674103, + "learning_rate": 8.2714395849638e-07, + "loss": 0.7119227647781372, + "step": 5014 + }, + { + "epoch": 1.1555299539170507, + "grad_norm": 1.202224273678675, + "learning_rate": 8.267687221330619e-07, + "loss": 0.8335816860198975, + "step": 5015 + }, + { + "epoch": 1.1557603686635944, + "grad_norm": 1.290989413518125, + "learning_rate": 8.263935109140347e-07, + "loss": 0.6130940914154053, + "step": 5016 + }, + { + "epoch": 1.1559907834101382, + "grad_norm": 1.1118999574659398, + "learning_rate": 8.260183248937595e-07, + "loss": 0.8223903179168701, + "step": 5017 + }, + { + "epoch": 1.1562211981566821, + "grad_norm": 1.1042026567968168, + "learning_rate": 8.256431641266938e-07, + "loss": 0.8024790287017822, + "step": 5018 + }, + { + "epoch": 1.1564516129032258, + "grad_norm": 1.2308316211864536, + "learning_rate": 8.252680286672924e-07, + "loss": 0.7425345182418823, + "step": 5019 + }, + { + "epoch": 1.1566820276497696, + "grad_norm": 0.9907420981370885, + "learning_rate": 8.248929185700053e-07, + "loss": 0.7729727029800415, + "step": 5020 + }, + { + "epoch": 1.1569124423963133, + "grad_norm": 1.096476255015683, + "learning_rate": 8.245178338892788e-07, + "loss": 0.8451874256134033, + "step": 5021 + }, + { + "epoch": 1.157142857142857, + "grad_norm": 1.1584589365926052, + "learning_rate": 8.241427746795569e-07, + "loss": 0.8666542768478394, + "step": 5022 + }, + { + "epoch": 1.157373271889401, + "grad_norm": 1.2897904410488261, + "learning_rate": 8.237677409952784e-07, + "loss": 0.740352988243103, + "step": 5023 + }, + { + "epoch": 1.1576036866359447, + "grad_norm": 0.9937724952342799, + "learning_rate": 8.233927328908788e-07, + "loss": 0.6325985193252563, + "step": 5024 + }, + { + "epoch": 1.1578341013824884, + "grad_norm": 1.0099472902179978, + "learning_rate": 8.230177504207901e-07, + "loss": 0.8075892925262451, + "step": 5025 + }, + { + "epoch": 1.1580645161290322, + "grad_norm": 1.0459718249244707, + "learning_rate": 8.22642793639441e-07, + "loss": 0.7176432609558105, + "step": 5026 + }, + { + "epoch": 1.1582949308755761, + "grad_norm": 1.1804726429614583, + "learning_rate": 8.222678626012554e-07, + "loss": 0.7734829187393188, + "step": 5027 + }, + { + "epoch": 1.1585253456221198, + "grad_norm": 1.3220222245590558, + "learning_rate": 8.218929573606544e-07, + "loss": 0.8642655611038208, + "step": 5028 + }, + { + "epoch": 1.1587557603686636, + "grad_norm": 1.0337487495481472, + "learning_rate": 8.215180779720548e-07, + "loss": 0.7788450121879578, + "step": 5029 + }, + { + "epoch": 1.1589861751152073, + "grad_norm": 0.9361659768144168, + "learning_rate": 8.211432244898696e-07, + "loss": 0.7470313310623169, + "step": 5030 + }, + { + "epoch": 1.1592165898617512, + "grad_norm": 0.9907043815397547, + "learning_rate": 8.207683969685091e-07, + "loss": 0.7691675424575806, + "step": 5031 + }, + { + "epoch": 1.159447004608295, + "grad_norm": 0.9920310393320094, + "learning_rate": 8.203935954623783e-07, + "loss": 0.7060209512710571, + "step": 5032 + }, + { + "epoch": 1.1596774193548387, + "grad_norm": 1.189958639239752, + "learning_rate": 8.20018820025879e-07, + "loss": 0.7617488503456116, + "step": 5033 + }, + { + "epoch": 1.1599078341013824, + "grad_norm": 1.2174023482004634, + "learning_rate": 8.196440707134102e-07, + "loss": 0.7016350626945496, + "step": 5034 + }, + { + "epoch": 1.1601382488479262, + "grad_norm": 1.3407340114210469, + "learning_rate": 8.192693475793657e-07, + "loss": 0.8375445604324341, + "step": 5035 + }, + { + "epoch": 1.16036866359447, + "grad_norm": 1.2333127293881232, + "learning_rate": 8.188946506781359e-07, + "loss": 0.8903663158416748, + "step": 5036 + }, + { + "epoch": 1.1605990783410138, + "grad_norm": 1.1046448662682735, + "learning_rate": 8.18519980064108e-07, + "loss": 0.7613073587417603, + "step": 5037 + }, + { + "epoch": 1.1608294930875576, + "grad_norm": 1.2358045096315418, + "learning_rate": 8.181453357916649e-07, + "loss": 0.7443521022796631, + "step": 5038 + }, + { + "epoch": 1.1610599078341013, + "grad_norm": 1.0132222940739166, + "learning_rate": 8.17770717915185e-07, + "loss": 0.7986443042755127, + "step": 5039 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 1.1475221794766963, + "learning_rate": 8.173961264890447e-07, + "loss": 0.7128815650939941, + "step": 5040 + }, + { + "epoch": 1.161520737327189, + "grad_norm": 2.1353174029488593, + "learning_rate": 8.170215615676144e-07, + "loss": 0.7189117074012756, + "step": 5041 + }, + { + "epoch": 1.1617511520737327, + "grad_norm": 1.0970239097626442, + "learning_rate": 8.166470232052626e-07, + "loss": 0.8358731269836426, + "step": 5042 + }, + { + "epoch": 1.1619815668202764, + "grad_norm": 1.3103703595946257, + "learning_rate": 8.162725114563527e-07, + "loss": 0.7734829187393188, + "step": 5043 + }, + { + "epoch": 1.1622119815668204, + "grad_norm": 1.0836793655881298, + "learning_rate": 8.158980263752443e-07, + "loss": 0.842268705368042, + "step": 5044 + }, + { + "epoch": 1.162442396313364, + "grad_norm": 1.0953254817646525, + "learning_rate": 8.155235680162937e-07, + "loss": 0.7973036766052246, + "step": 5045 + }, + { + "epoch": 1.1626728110599078, + "grad_norm": 1.1431491680692596, + "learning_rate": 8.151491364338532e-07, + "loss": 0.743615984916687, + "step": 5046 + }, + { + "epoch": 1.1629032258064516, + "grad_norm": 1.2354800674331334, + "learning_rate": 8.147747316822705e-07, + "loss": 0.799458384513855, + "step": 5047 + }, + { + "epoch": 1.1631336405529953, + "grad_norm": 1.4365906916451476, + "learning_rate": 8.144003538158907e-07, + "loss": 0.8368128538131714, + "step": 5048 + }, + { + "epoch": 1.1633640552995392, + "grad_norm": 1.0543438991079201, + "learning_rate": 8.140260028890537e-07, + "loss": 0.8543322086334229, + "step": 5049 + }, + { + "epoch": 1.163594470046083, + "grad_norm": 1.4010693577495907, + "learning_rate": 8.136516789560957e-07, + "loss": 0.9586522579193115, + "step": 5050 + }, + { + "epoch": 1.1638248847926267, + "grad_norm": 1.0831898931931903, + "learning_rate": 8.132773820713505e-07, + "loss": 0.7781316041946411, + "step": 5051 + }, + { + "epoch": 1.1640552995391704, + "grad_norm": 1.1820241176000723, + "learning_rate": 8.129031122891459e-07, + "loss": 0.7726340293884277, + "step": 5052 + }, + { + "epoch": 1.1642857142857144, + "grad_norm": 1.2561245635498344, + "learning_rate": 8.125288696638064e-07, + "loss": 0.886093258857727, + "step": 5053 + }, + { + "epoch": 1.164516129032258, + "grad_norm": 1.1568232893052595, + "learning_rate": 8.121546542496538e-07, + "loss": 0.7896960973739624, + "step": 5054 + }, + { + "epoch": 1.1647465437788018, + "grad_norm": 1.066019166680275, + "learning_rate": 8.117804661010045e-07, + "loss": 0.8272452354431152, + "step": 5055 + }, + { + "epoch": 1.1649769585253456, + "grad_norm": 1.216096321256879, + "learning_rate": 8.11406305272171e-07, + "loss": 0.8452264070510864, + "step": 5056 + }, + { + "epoch": 1.1652073732718895, + "grad_norm": 1.1423033593169452, + "learning_rate": 8.11032171817463e-07, + "loss": 0.7973369359970093, + "step": 5057 + }, + { + "epoch": 1.1654377880184332, + "grad_norm": 0.9573952961126706, + "learning_rate": 8.10658065791185e-07, + "loss": 0.8045153617858887, + "step": 5058 + }, + { + "epoch": 1.165668202764977, + "grad_norm": 1.2070626820317865, + "learning_rate": 8.102839872476378e-07, + "loss": 0.8921254873275757, + "step": 5059 + }, + { + "epoch": 1.1658986175115207, + "grad_norm": 1.1196640968944265, + "learning_rate": 8.099099362411191e-07, + "loss": 0.7633669376373291, + "step": 5060 + }, + { + "epoch": 1.1661290322580644, + "grad_norm": 1.4676357149183228, + "learning_rate": 8.095359128259214e-07, + "loss": 0.9303205013275146, + "step": 5061 + }, + { + "epoch": 1.1663594470046084, + "grad_norm": 1.1532839170590041, + "learning_rate": 8.091619170563335e-07, + "loss": 0.867104709148407, + "step": 5062 + }, + { + "epoch": 1.166589861751152, + "grad_norm": 1.2071495700843942, + "learning_rate": 8.087879489866409e-07, + "loss": 0.8136844038963318, + "step": 5063 + }, + { + "epoch": 1.1668202764976958, + "grad_norm": 1.5482117252744063, + "learning_rate": 8.084140086711246e-07, + "loss": 0.9016939997673035, + "step": 5064 + }, + { + "epoch": 1.1670506912442395, + "grad_norm": 1.5795186850129557, + "learning_rate": 8.080400961640608e-07, + "loss": 0.8621236085891724, + "step": 5065 + }, + { + "epoch": 1.1672811059907835, + "grad_norm": 1.336449231038986, + "learning_rate": 8.076662115197234e-07, + "loss": 0.856648862361908, + "step": 5066 + }, + { + "epoch": 1.1675115207373272, + "grad_norm": 1.3107118910408024, + "learning_rate": 8.072923547923805e-07, + "loss": 0.7752784490585327, + "step": 5067 + }, + { + "epoch": 1.167741935483871, + "grad_norm": 1.3093385224686542, + "learning_rate": 8.069185260362974e-07, + "loss": 0.8573904037475586, + "step": 5068 + }, + { + "epoch": 1.1679723502304147, + "grad_norm": 1.1636599679682322, + "learning_rate": 8.065447253057347e-07, + "loss": 0.724372148513794, + "step": 5069 + }, + { + "epoch": 1.1682027649769586, + "grad_norm": 1.146758460237727, + "learning_rate": 8.061709526549486e-07, + "loss": 0.7428436875343323, + "step": 5070 + }, + { + "epoch": 1.1684331797235024, + "grad_norm": 1.273017047999111, + "learning_rate": 8.057972081381925e-07, + "loss": 0.8888595104217529, + "step": 5071 + }, + { + "epoch": 1.168663594470046, + "grad_norm": 0.9497262022662447, + "learning_rate": 8.054234918097146e-07, + "loss": 0.5753290057182312, + "step": 5072 + }, + { + "epoch": 1.1688940092165898, + "grad_norm": 1.037170746248572, + "learning_rate": 8.050498037237589e-07, + "loss": 0.6724086999893188, + "step": 5073 + }, + { + "epoch": 1.1691244239631335, + "grad_norm": 1.1504888789916348, + "learning_rate": 8.046761439345664e-07, + "loss": 0.7410751581192017, + "step": 5074 + }, + { + "epoch": 1.1693548387096775, + "grad_norm": 1.2658920818717738, + "learning_rate": 8.043025124963731e-07, + "loss": 0.8522979021072388, + "step": 5075 + }, + { + "epoch": 1.1695852534562212, + "grad_norm": 0.9918624551952729, + "learning_rate": 8.039289094634109e-07, + "loss": 0.6243441700935364, + "step": 5076 + }, + { + "epoch": 1.169815668202765, + "grad_norm": 1.113826210544245, + "learning_rate": 8.03555334889908e-07, + "loss": 0.9332150220870972, + "step": 5077 + }, + { + "epoch": 1.1700460829493087, + "grad_norm": 1.17170377289517, + "learning_rate": 8.031817888300883e-07, + "loss": 0.7620645761489868, + "step": 5078 + }, + { + "epoch": 1.1702764976958526, + "grad_norm": 1.2693395517069683, + "learning_rate": 8.028082713381708e-07, + "loss": 0.6983245015144348, + "step": 5079 + }, + { + "epoch": 1.1705069124423964, + "grad_norm": 1.049572082944252, + "learning_rate": 8.024347824683723e-07, + "loss": 0.6220129728317261, + "step": 5080 + }, + { + "epoch": 1.17073732718894, + "grad_norm": 1.0906919021349344, + "learning_rate": 8.020613222749034e-07, + "loss": 0.7363810539245605, + "step": 5081 + }, + { + "epoch": 1.1709677419354838, + "grad_norm": 1.1450127350480972, + "learning_rate": 8.016878908119713e-07, + "loss": 0.6864198446273804, + "step": 5082 + }, + { + "epoch": 1.1711981566820278, + "grad_norm": 1.061738817269073, + "learning_rate": 8.013144881337795e-07, + "loss": 0.758607029914856, + "step": 5083 + }, + { + "epoch": 1.1714285714285715, + "grad_norm": 1.038630253415404, + "learning_rate": 8.009411142945269e-07, + "loss": 0.7519336938858032, + "step": 5084 + }, + { + "epoch": 1.1716589861751152, + "grad_norm": 1.132431622302542, + "learning_rate": 8.005677693484076e-07, + "loss": 0.7681798934936523, + "step": 5085 + }, + { + "epoch": 1.171889400921659, + "grad_norm": 1.1022208744006678, + "learning_rate": 8.00194453349613e-07, + "loss": 0.6808522939682007, + "step": 5086 + }, + { + "epoch": 1.1721198156682027, + "grad_norm": 1.039877694159321, + "learning_rate": 7.99821166352329e-07, + "loss": 0.7373358607292175, + "step": 5087 + }, + { + "epoch": 1.1723502304147466, + "grad_norm": 1.0199898679930943, + "learning_rate": 7.994479084107374e-07, + "loss": 0.7272510528564453, + "step": 5088 + }, + { + "epoch": 1.1725806451612903, + "grad_norm": 1.2473385255320408, + "learning_rate": 7.990746795790166e-07, + "loss": 0.845584511756897, + "step": 5089 + }, + { + "epoch": 1.172811059907834, + "grad_norm": 1.188342902392479, + "learning_rate": 7.987014799113397e-07, + "loss": 0.7751157283782959, + "step": 5090 + }, + { + "epoch": 1.1730414746543778, + "grad_norm": 1.1193246813934836, + "learning_rate": 7.98328309461877e-07, + "loss": 0.679701566696167, + "step": 5091 + }, + { + "epoch": 1.1732718894009218, + "grad_norm": 1.1116687434739936, + "learning_rate": 7.979551682847932e-07, + "loss": 0.7630679607391357, + "step": 5092 + }, + { + "epoch": 1.1735023041474655, + "grad_norm": 1.0309555153446328, + "learning_rate": 7.975820564342487e-07, + "loss": 0.700912594795227, + "step": 5093 + }, + { + "epoch": 1.1737327188940092, + "grad_norm": 1.097867809116453, + "learning_rate": 7.972089739644012e-07, + "loss": 0.6789706945419312, + "step": 5094 + }, + { + "epoch": 1.173963133640553, + "grad_norm": 1.411041629986285, + "learning_rate": 7.968359209294027e-07, + "loss": 0.6744855642318726, + "step": 5095 + }, + { + "epoch": 1.1741935483870969, + "grad_norm": 1.060959542495881, + "learning_rate": 7.964628973834011e-07, + "loss": 0.7551798820495605, + "step": 5096 + }, + { + "epoch": 1.1744239631336406, + "grad_norm": 0.9743982939550204, + "learning_rate": 7.960899033805407e-07, + "loss": 0.711478054523468, + "step": 5097 + }, + { + "epoch": 1.1746543778801843, + "grad_norm": 1.1281696794434548, + "learning_rate": 7.95716938974961e-07, + "loss": 0.7464019060134888, + "step": 5098 + }, + { + "epoch": 1.174884792626728, + "grad_norm": 1.2269121334355921, + "learning_rate": 7.953440042207966e-07, + "loss": 0.7667930126190186, + "step": 5099 + }, + { + "epoch": 1.1751152073732718, + "grad_norm": 0.9314104563097803, + "learning_rate": 7.949710991721796e-07, + "loss": 0.7574796676635742, + "step": 5100 + }, + { + "epoch": 1.1753456221198157, + "grad_norm": 0.9285474016256665, + "learning_rate": 7.945982238832361e-07, + "loss": 0.6627304553985596, + "step": 5101 + }, + { + "epoch": 1.1755760368663595, + "grad_norm": 1.2503590742658475, + "learning_rate": 7.942253784080879e-07, + "loss": 0.6803916692733765, + "step": 5102 + }, + { + "epoch": 1.1758064516129032, + "grad_norm": 1.1622603764445048, + "learning_rate": 7.938525628008541e-07, + "loss": 0.7107337713241577, + "step": 5103 + }, + { + "epoch": 1.176036866359447, + "grad_norm": 1.0411872319848583, + "learning_rate": 7.934797771156481e-07, + "loss": 0.7669517993927002, + "step": 5104 + }, + { + "epoch": 1.1762672811059907, + "grad_norm": 1.185214338142044, + "learning_rate": 7.931070214065787e-07, + "loss": 0.7431854605674744, + "step": 5105 + }, + { + "epoch": 1.1764976958525346, + "grad_norm": 1.121798206744332, + "learning_rate": 7.927342957277512e-07, + "loss": 0.7778047323226929, + "step": 5106 + }, + { + "epoch": 1.1767281105990783, + "grad_norm": 1.1095356364162186, + "learning_rate": 7.923616001332666e-07, + "loss": 0.7759886980056763, + "step": 5107 + }, + { + "epoch": 1.176958525345622, + "grad_norm": 1.236811676128496, + "learning_rate": 7.919889346772206e-07, + "loss": 0.8010379076004028, + "step": 5108 + }, + { + "epoch": 1.177188940092166, + "grad_norm": 1.06629818182004, + "learning_rate": 7.916162994137055e-07, + "loss": 0.6671626567840576, + "step": 5109 + }, + { + "epoch": 1.1774193548387097, + "grad_norm": 1.3043487682811514, + "learning_rate": 7.912436943968088e-07, + "loss": 0.7521620988845825, + "step": 5110 + }, + { + "epoch": 1.1776497695852535, + "grad_norm": 1.0243889894502596, + "learning_rate": 7.908711196806131e-07, + "loss": 0.7626729011535645, + "step": 5111 + }, + { + "epoch": 1.1778801843317972, + "grad_norm": 1.2636422633100723, + "learning_rate": 7.904985753191979e-07, + "loss": 0.8247047066688538, + "step": 5112 + }, + { + "epoch": 1.178110599078341, + "grad_norm": 0.9958902943746148, + "learning_rate": 7.901260613666372e-07, + "loss": 0.6851831078529358, + "step": 5113 + }, + { + "epoch": 1.1783410138248849, + "grad_norm": 1.114469339271613, + "learning_rate": 7.897535778770003e-07, + "loss": 0.7752102613449097, + "step": 5114 + }, + { + "epoch": 1.1785714285714286, + "grad_norm": 1.0998339013097813, + "learning_rate": 7.893811249043537e-07, + "loss": 0.8885148167610168, + "step": 5115 + }, + { + "epoch": 1.1788018433179723, + "grad_norm": 1.3062040351627935, + "learning_rate": 7.890087025027579e-07, + "loss": 0.7530373334884644, + "step": 5116 + }, + { + "epoch": 1.179032258064516, + "grad_norm": 1.0400370692656624, + "learning_rate": 7.886363107262697e-07, + "loss": 0.7795672416687012, + "step": 5117 + }, + { + "epoch": 1.1792626728110598, + "grad_norm": 1.0719443222612952, + "learning_rate": 7.882639496289413e-07, + "loss": 0.7563966512680054, + "step": 5118 + }, + { + "epoch": 1.1794930875576037, + "grad_norm": 0.9799024359449507, + "learning_rate": 7.878916192648198e-07, + "loss": 0.7218793630599976, + "step": 5119 + }, + { + "epoch": 1.1797235023041475, + "grad_norm": 1.3292879414667447, + "learning_rate": 7.875193196879494e-07, + "loss": 0.8213250637054443, + "step": 5120 + }, + { + "epoch": 1.1799539170506912, + "grad_norm": 1.118163280715499, + "learning_rate": 7.871470509523685e-07, + "loss": 0.8134827613830566, + "step": 5121 + }, + { + "epoch": 1.1801843317972351, + "grad_norm": 0.9613119464109229, + "learning_rate": 7.867748131121109e-07, + "loss": 0.6135407090187073, + "step": 5122 + }, + { + "epoch": 1.1804147465437789, + "grad_norm": 1.2999694720426915, + "learning_rate": 7.864026062212073e-07, + "loss": 0.8110366463661194, + "step": 5123 + }, + { + "epoch": 1.1806451612903226, + "grad_norm": 0.9962674732824631, + "learning_rate": 7.860304303336827e-07, + "loss": 0.6723964214324951, + "step": 5124 + }, + { + "epoch": 1.1808755760368663, + "grad_norm": 1.2942490465484493, + "learning_rate": 7.856582855035577e-07, + "loss": 0.8308886885643005, + "step": 5125 + }, + { + "epoch": 1.18110599078341, + "grad_norm": 1.023999175845692, + "learning_rate": 7.852861717848488e-07, + "loss": 0.7960010766983032, + "step": 5126 + }, + { + "epoch": 1.181336405529954, + "grad_norm": 1.2456351777125307, + "learning_rate": 7.84914089231568e-07, + "loss": 0.7931640148162842, + "step": 5127 + }, + { + "epoch": 1.1815668202764977, + "grad_norm": 1.2288164842517166, + "learning_rate": 7.845420378977222e-07, + "loss": 0.762995719909668, + "step": 5128 + }, + { + "epoch": 1.1817972350230415, + "grad_norm": 1.373671152705427, + "learning_rate": 7.841700178373146e-07, + "loss": 0.9416301250457764, + "step": 5129 + }, + { + "epoch": 1.1820276497695852, + "grad_norm": 1.0032147289786453, + "learning_rate": 7.837980291043431e-07, + "loss": 0.7666923999786377, + "step": 5130 + }, + { + "epoch": 1.182258064516129, + "grad_norm": 1.1123898953678502, + "learning_rate": 7.834260717528012e-07, + "loss": 0.7668861150741577, + "step": 5131 + }, + { + "epoch": 1.1824884792626729, + "grad_norm": 1.1236616956881595, + "learning_rate": 7.830541458366786e-07, + "loss": 0.7576566934585571, + "step": 5132 + }, + { + "epoch": 1.1827188940092166, + "grad_norm": 1.0432406760791426, + "learning_rate": 7.826822514099595e-07, + "loss": 0.6288204193115234, + "step": 5133 + }, + { + "epoch": 1.1829493087557603, + "grad_norm": 1.2747953745069134, + "learning_rate": 7.823103885266236e-07, + "loss": 0.8332630395889282, + "step": 5134 + }, + { + "epoch": 1.1831797235023043, + "grad_norm": 1.3987532245853456, + "learning_rate": 7.819385572406469e-07, + "loss": 0.9294546246528625, + "step": 5135 + }, + { + "epoch": 1.183410138248848, + "grad_norm": 0.9911973140133253, + "learning_rate": 7.81566757606e-07, + "loss": 0.637617826461792, + "step": 5136 + }, + { + "epoch": 1.1836405529953917, + "grad_norm": 1.2295561738436023, + "learning_rate": 7.81194989676649e-07, + "loss": 0.7614878416061401, + "step": 5137 + }, + { + "epoch": 1.1838709677419355, + "grad_norm": 1.2939539056978149, + "learning_rate": 7.808232535065556e-07, + "loss": 0.8612164258956909, + "step": 5138 + }, + { + "epoch": 1.1841013824884792, + "grad_norm": 1.0758125620247463, + "learning_rate": 7.804515491496765e-07, + "loss": 0.7530151605606079, + "step": 5139 + }, + { + "epoch": 1.1843317972350231, + "grad_norm": 0.9883281570065391, + "learning_rate": 7.800798766599648e-07, + "loss": 0.7739782929420471, + "step": 5140 + }, + { + "epoch": 1.1845622119815669, + "grad_norm": 1.0835226521428547, + "learning_rate": 7.797082360913678e-07, + "loss": 0.7992277145385742, + "step": 5141 + }, + { + "epoch": 1.1847926267281106, + "grad_norm": 1.2343955942215838, + "learning_rate": 7.793366274978284e-07, + "loss": 0.8744574785232544, + "step": 5142 + }, + { + "epoch": 1.1850230414746543, + "grad_norm": 0.9992165946111031, + "learning_rate": 7.789650509332857e-07, + "loss": 0.7522493600845337, + "step": 5143 + }, + { + "epoch": 1.185253456221198, + "grad_norm": 1.1095107175779666, + "learning_rate": 7.785935064516733e-07, + "loss": 0.8811007142066956, + "step": 5144 + }, + { + "epoch": 1.185483870967742, + "grad_norm": 0.9512882648642599, + "learning_rate": 7.782219941069201e-07, + "loss": 0.8141417503356934, + "step": 5145 + }, + { + "epoch": 1.1857142857142857, + "grad_norm": 1.3048397777053706, + "learning_rate": 7.778505139529509e-07, + "loss": 0.9473680257797241, + "step": 5146 + }, + { + "epoch": 1.1859447004608294, + "grad_norm": 1.1561666933094623, + "learning_rate": 7.774790660436857e-07, + "loss": 0.740132212638855, + "step": 5147 + }, + { + "epoch": 1.1861751152073732, + "grad_norm": 1.1265716565789026, + "learning_rate": 7.771076504330392e-07, + "loss": 0.7904594540596008, + "step": 5148 + }, + { + "epoch": 1.1864055299539171, + "grad_norm": 1.1481555737803508, + "learning_rate": 7.767362671749224e-07, + "loss": 0.8085094690322876, + "step": 5149 + }, + { + "epoch": 1.1866359447004609, + "grad_norm": 1.3362082879917547, + "learning_rate": 7.76364916323241e-07, + "loss": 0.6954756379127502, + "step": 5150 + }, + { + "epoch": 1.1868663594470046, + "grad_norm": 1.175085216674836, + "learning_rate": 7.759935979318953e-07, + "loss": 0.8575167059898376, + "step": 5151 + }, + { + "epoch": 1.1870967741935483, + "grad_norm": 0.9330545417113619, + "learning_rate": 7.756223120547829e-07, + "loss": 0.6125110387802124, + "step": 5152 + }, + { + "epoch": 1.1873271889400923, + "grad_norm": 1.1387987197615417, + "learning_rate": 7.752510587457949e-07, + "loss": 0.7737400531768799, + "step": 5153 + }, + { + "epoch": 1.187557603686636, + "grad_norm": 0.9473095115528148, + "learning_rate": 7.748798380588177e-07, + "loss": 0.7300955653190613, + "step": 5154 + }, + { + "epoch": 1.1877880184331797, + "grad_norm": 0.9479432315278626, + "learning_rate": 7.745086500477343e-07, + "loss": 0.7974356412887573, + "step": 5155 + }, + { + "epoch": 1.1880184331797234, + "grad_norm": 1.120213603018525, + "learning_rate": 7.74137494766422e-07, + "loss": 0.8158693313598633, + "step": 5156 + }, + { + "epoch": 1.1882488479262672, + "grad_norm": 0.9086968377624679, + "learning_rate": 7.737663722687531e-07, + "loss": 0.6656177639961243, + "step": 5157 + }, + { + "epoch": 1.1884792626728111, + "grad_norm": 1.284345958176322, + "learning_rate": 7.733952826085958e-07, + "loss": 0.7796640992164612, + "step": 5158 + }, + { + "epoch": 1.1887096774193548, + "grad_norm": 1.1079992534891525, + "learning_rate": 7.730242258398135e-07, + "loss": 0.9224779009819031, + "step": 5159 + }, + { + "epoch": 1.1889400921658986, + "grad_norm": 1.2013047291849663, + "learning_rate": 7.726532020162639e-07, + "loss": 0.7105277180671692, + "step": 5160 + }, + { + "epoch": 1.1891705069124423, + "grad_norm": 0.9139263319393289, + "learning_rate": 7.722822111918012e-07, + "loss": 0.5793930292129517, + "step": 5161 + }, + { + "epoch": 1.1894009216589863, + "grad_norm": 0.9419478266668957, + "learning_rate": 7.719112534202743e-07, + "loss": 0.7319367527961731, + "step": 5162 + }, + { + "epoch": 1.18963133640553, + "grad_norm": 1.182614737199728, + "learning_rate": 7.715403287555266e-07, + "loss": 0.7517954111099243, + "step": 5163 + }, + { + "epoch": 1.1898617511520737, + "grad_norm": 1.1800441614309307, + "learning_rate": 7.711694372513981e-07, + "loss": 0.8633241057395935, + "step": 5164 + }, + { + "epoch": 1.1900921658986174, + "grad_norm": 1.280920610105802, + "learning_rate": 7.707985789617227e-07, + "loss": 0.6453210115432739, + "step": 5165 + }, + { + "epoch": 1.1903225806451614, + "grad_norm": 1.1209224749220659, + "learning_rate": 7.704277539403303e-07, + "loss": 0.7609909772872925, + "step": 5166 + }, + { + "epoch": 1.1905529953917051, + "grad_norm": 1.1829891287159422, + "learning_rate": 7.700569622410453e-07, + "loss": 0.7419755458831787, + "step": 5167 + }, + { + "epoch": 1.1907834101382488, + "grad_norm": 1.0759571852853795, + "learning_rate": 7.696862039176879e-07, + "loss": 0.849078357219696, + "step": 5168 + }, + { + "epoch": 1.1910138248847926, + "grad_norm": 1.3077976619104341, + "learning_rate": 7.693154790240732e-07, + "loss": 0.8147921562194824, + "step": 5169 + }, + { + "epoch": 1.1912442396313363, + "grad_norm": 1.1349568865686221, + "learning_rate": 7.689447876140114e-07, + "loss": 0.7660118937492371, + "step": 5170 + }, + { + "epoch": 1.1914746543778802, + "grad_norm": 0.9919046297525586, + "learning_rate": 7.685741297413075e-07, + "loss": 0.7775185108184814, + "step": 5171 + }, + { + "epoch": 1.191705069124424, + "grad_norm": 1.0634336005518812, + "learning_rate": 7.682035054597624e-07, + "loss": 0.7184321880340576, + "step": 5172 + }, + { + "epoch": 1.1919354838709677, + "grad_norm": 0.9191067866194278, + "learning_rate": 7.678329148231719e-07, + "loss": 0.7108585834503174, + "step": 5173 + }, + { + "epoch": 1.1921658986175114, + "grad_norm": 1.169972531551494, + "learning_rate": 7.674623578853259e-07, + "loss": 0.7252670526504517, + "step": 5174 + }, + { + "epoch": 1.1923963133640554, + "grad_norm": 1.0227424567448893, + "learning_rate": 7.670918347000113e-07, + "loss": 0.818352460861206, + "step": 5175 + }, + { + "epoch": 1.192626728110599, + "grad_norm": 0.8768631462521176, + "learning_rate": 7.667213453210086e-07, + "loss": 0.6538013815879822, + "step": 5176 + }, + { + "epoch": 1.1928571428571428, + "grad_norm": 1.1216359209528128, + "learning_rate": 7.663508898020935e-07, + "loss": 0.7058148384094238, + "step": 5177 + }, + { + "epoch": 1.1930875576036866, + "grad_norm": 1.0528263608484594, + "learning_rate": 7.659804681970377e-07, + "loss": 0.7003160715103149, + "step": 5178 + }, + { + "epoch": 1.1933179723502305, + "grad_norm": 1.2339709506043992, + "learning_rate": 7.656100805596072e-07, + "loss": 0.84567791223526, + "step": 5179 + }, + { + "epoch": 1.1935483870967742, + "grad_norm": 1.239861543806107, + "learning_rate": 7.652397269435626e-07, + "loss": 0.7994743585586548, + "step": 5180 + }, + { + "epoch": 1.193778801843318, + "grad_norm": 1.3106444419652792, + "learning_rate": 7.648694074026615e-07, + "loss": 0.8177791833877563, + "step": 5181 + }, + { + "epoch": 1.1940092165898617, + "grad_norm": 1.362939104353802, + "learning_rate": 7.644991219906545e-07, + "loss": 0.6663975715637207, + "step": 5182 + }, + { + "epoch": 1.1942396313364054, + "grad_norm": 1.1422405746222943, + "learning_rate": 7.641288707612878e-07, + "loss": 0.8275883197784424, + "step": 5183 + }, + { + "epoch": 1.1944700460829494, + "grad_norm": 1.1201157873973466, + "learning_rate": 7.637586537683036e-07, + "loss": 0.7710767388343811, + "step": 5184 + }, + { + "epoch": 1.194700460829493, + "grad_norm": 1.1629669577400157, + "learning_rate": 7.633884710654382e-07, + "loss": 0.7628582715988159, + "step": 5185 + }, + { + "epoch": 1.1949308755760368, + "grad_norm": 1.3793540006541976, + "learning_rate": 7.630183227064227e-07, + "loss": 0.7002676725387573, + "step": 5186 + }, + { + "epoch": 1.1951612903225806, + "grad_norm": 0.9948455527839576, + "learning_rate": 7.626482087449841e-07, + "loss": 0.8272073268890381, + "step": 5187 + }, + { + "epoch": 1.1953917050691245, + "grad_norm": 1.0711227380559258, + "learning_rate": 7.622781292348435e-07, + "loss": 0.7881417274475098, + "step": 5188 + }, + { + "epoch": 1.1956221198156682, + "grad_norm": 1.0728428578693516, + "learning_rate": 7.61908084229718e-07, + "loss": 0.797294020652771, + "step": 5189 + }, + { + "epoch": 1.195852534562212, + "grad_norm": 1.0264450399364256, + "learning_rate": 7.615380737833191e-07, + "loss": 0.7752290964126587, + "step": 5190 + }, + { + "epoch": 1.1960829493087557, + "grad_norm": 1.0830464595218987, + "learning_rate": 7.611680979493525e-07, + "loss": 0.7299143075942993, + "step": 5191 + }, + { + "epoch": 1.1963133640552996, + "grad_norm": 1.4839567137751186, + "learning_rate": 7.60798156781521e-07, + "loss": 0.6749997138977051, + "step": 5192 + }, + { + "epoch": 1.1965437788018434, + "grad_norm": 1.2717197322235172, + "learning_rate": 7.6042825033352e-07, + "loss": 0.7933796048164368, + "step": 5193 + }, + { + "epoch": 1.196774193548387, + "grad_norm": 1.1254669600910374, + "learning_rate": 7.600583786590411e-07, + "loss": 0.7214919328689575, + "step": 5194 + }, + { + "epoch": 1.1970046082949308, + "grad_norm": 1.0000165841598083, + "learning_rate": 7.596885418117713e-07, + "loss": 0.7804256081581116, + "step": 5195 + }, + { + "epoch": 1.1972350230414746, + "grad_norm": 1.2738023107912249, + "learning_rate": 7.593187398453915e-07, + "loss": 0.7615138292312622, + "step": 5196 + }, + { + "epoch": 1.1974654377880185, + "grad_norm": 1.0493977127227612, + "learning_rate": 7.589489728135778e-07, + "loss": 0.8473657369613647, + "step": 5197 + }, + { + "epoch": 1.1976958525345622, + "grad_norm": 1.2204301678409606, + "learning_rate": 7.585792407700018e-07, + "loss": 0.7302027940750122, + "step": 5198 + }, + { + "epoch": 1.197926267281106, + "grad_norm": 1.123276567811957, + "learning_rate": 7.582095437683294e-07, + "loss": 0.7631692886352539, + "step": 5199 + }, + { + "epoch": 1.1981566820276497, + "grad_norm": 1.339389807954867, + "learning_rate": 7.578398818622211e-07, + "loss": 0.7982754707336426, + "step": 5200 + }, + { + "epoch": 1.1983870967741936, + "grad_norm": 1.3949436336418501, + "learning_rate": 7.574702551053339e-07, + "loss": 0.8445635437965393, + "step": 5201 + }, + { + "epoch": 1.1986175115207374, + "grad_norm": 1.267881130363425, + "learning_rate": 7.571006635513182e-07, + "loss": 0.8486276268959045, + "step": 5202 + }, + { + "epoch": 1.198847926267281, + "grad_norm": 1.2841422228776138, + "learning_rate": 7.567311072538191e-07, + "loss": 0.8433184623718262, + "step": 5203 + }, + { + "epoch": 1.1990783410138248, + "grad_norm": 1.5895945882971518, + "learning_rate": 7.56361586266478e-07, + "loss": 0.9772260189056396, + "step": 5204 + }, + { + "epoch": 1.1993087557603688, + "grad_norm": 1.1927959868338558, + "learning_rate": 7.559921006429304e-07, + "loss": 0.8349692821502686, + "step": 5205 + }, + { + "epoch": 1.1995391705069125, + "grad_norm": 1.070076083870323, + "learning_rate": 7.556226504368059e-07, + "loss": 0.7454575300216675, + "step": 5206 + }, + { + "epoch": 1.1997695852534562, + "grad_norm": 0.882927792535501, + "learning_rate": 7.552532357017303e-07, + "loss": 0.6680991649627686, + "step": 5207 + }, + { + "epoch": 1.2, + "grad_norm": 1.1844993546767875, + "learning_rate": 7.54883856491324e-07, + "loss": 0.6528318524360657, + "step": 5208 + }, + { + "epoch": 1.2002304147465437, + "grad_norm": 1.0482736751922475, + "learning_rate": 7.545145128592008e-07, + "loss": 0.7711834907531738, + "step": 5209 + }, + { + "epoch": 1.2004608294930876, + "grad_norm": 1.022603342926927, + "learning_rate": 7.541452048589714e-07, + "loss": 0.6378746628761292, + "step": 5210 + }, + { + "epoch": 1.2006912442396314, + "grad_norm": 0.9309859008896244, + "learning_rate": 7.537759325442402e-07, + "loss": 0.7489340305328369, + "step": 5211 + }, + { + "epoch": 1.200921658986175, + "grad_norm": 1.0825673838806515, + "learning_rate": 7.53406695968606e-07, + "loss": 0.7869534492492676, + "step": 5212 + }, + { + "epoch": 1.2011520737327188, + "grad_norm": 1.1316888770375757, + "learning_rate": 7.530374951856637e-07, + "loss": 0.7252482175827026, + "step": 5213 + }, + { + "epoch": 1.2013824884792628, + "grad_norm": 1.1337087819491523, + "learning_rate": 7.526683302490018e-07, + "loss": 0.763259768486023, + "step": 5214 + }, + { + "epoch": 1.2016129032258065, + "grad_norm": 1.405277715760194, + "learning_rate": 7.522992012122046e-07, + "loss": 0.8135688304901123, + "step": 5215 + }, + { + "epoch": 1.2018433179723502, + "grad_norm": 1.5589534049714566, + "learning_rate": 7.519301081288504e-07, + "loss": 0.9282290935516357, + "step": 5216 + }, + { + "epoch": 1.202073732718894, + "grad_norm": 1.2621340712897178, + "learning_rate": 7.515610510525125e-07, + "loss": 0.7968727946281433, + "step": 5217 + }, + { + "epoch": 1.202304147465438, + "grad_norm": 1.4154309582650375, + "learning_rate": 7.511920300367594e-07, + "loss": 0.9495606422424316, + "step": 5218 + }, + { + "epoch": 1.2025345622119816, + "grad_norm": 1.120709992771365, + "learning_rate": 7.508230451351537e-07, + "loss": 0.6790425181388855, + "step": 5219 + }, + { + "epoch": 1.2027649769585254, + "grad_norm": 1.1216778132469425, + "learning_rate": 7.504540964012527e-07, + "loss": 0.7269036173820496, + "step": 5220 + }, + { + "epoch": 1.202995391705069, + "grad_norm": 1.4394573291388193, + "learning_rate": 7.500851838886097e-07, + "loss": 0.820799708366394, + "step": 5221 + }, + { + "epoch": 1.2032258064516128, + "grad_norm": 1.1080457725700354, + "learning_rate": 7.497163076507715e-07, + "loss": 0.7693401575088501, + "step": 5222 + }, + { + "epoch": 1.2034562211981568, + "grad_norm": 1.1611837511561531, + "learning_rate": 7.493474677412793e-07, + "loss": 0.7687606811523438, + "step": 5223 + }, + { + "epoch": 1.2036866359447005, + "grad_norm": 0.9784122136232752, + "learning_rate": 7.489786642136709e-07, + "loss": 0.6858488321304321, + "step": 5224 + }, + { + "epoch": 1.2039170506912442, + "grad_norm": 0.8776412008252917, + "learning_rate": 7.486098971214769e-07, + "loss": 0.7575044631958008, + "step": 5225 + }, + { + "epoch": 1.204147465437788, + "grad_norm": 0.8129887936087057, + "learning_rate": 7.482411665182236e-07, + "loss": 0.6799627542495728, + "step": 5226 + }, + { + "epoch": 1.2043778801843317, + "grad_norm": 1.4994332488998736, + "learning_rate": 7.478724724574317e-07, + "loss": 0.8882759809494019, + "step": 5227 + }, + { + "epoch": 1.2046082949308756, + "grad_norm": 1.10750930167245, + "learning_rate": 7.475038149926165e-07, + "loss": 0.7835016250610352, + "step": 5228 + }, + { + "epoch": 1.2048387096774194, + "grad_norm": 1.3325922049902164, + "learning_rate": 7.471351941772883e-07, + "loss": 0.9264512062072754, + "step": 5229 + }, + { + "epoch": 1.205069124423963, + "grad_norm": 1.225862576818596, + "learning_rate": 7.467666100649521e-07, + "loss": 0.8094228506088257, + "step": 5230 + }, + { + "epoch": 1.205299539170507, + "grad_norm": 1.167425367358343, + "learning_rate": 7.463980627091073e-07, + "loss": 0.7782102823257446, + "step": 5231 + }, + { + "epoch": 1.2055299539170508, + "grad_norm": 1.2892161969383955, + "learning_rate": 7.460295521632474e-07, + "loss": 0.7946768999099731, + "step": 5232 + }, + { + "epoch": 1.2057603686635945, + "grad_norm": 1.2538288509415036, + "learning_rate": 7.456610784808624e-07, + "loss": 0.7571625709533691, + "step": 5233 + }, + { + "epoch": 1.2059907834101382, + "grad_norm": 1.3786667467707436, + "learning_rate": 7.45292641715435e-07, + "loss": 0.9760236144065857, + "step": 5234 + }, + { + "epoch": 1.206221198156682, + "grad_norm": 1.0717694328508904, + "learning_rate": 7.449242419204431e-07, + "loss": 0.6370055675506592, + "step": 5235 + }, + { + "epoch": 1.206451612903226, + "grad_norm": 1.226412390848778, + "learning_rate": 7.445558791493603e-07, + "loss": 0.7991320490837097, + "step": 5236 + }, + { + "epoch": 1.2066820276497696, + "grad_norm": 1.0607083796487833, + "learning_rate": 7.441875534556531e-07, + "loss": 0.8840054273605347, + "step": 5237 + }, + { + "epoch": 1.2069124423963133, + "grad_norm": 1.0615184698087237, + "learning_rate": 7.438192648927841e-07, + "loss": 0.8634533882141113, + "step": 5238 + }, + { + "epoch": 1.207142857142857, + "grad_norm": 0.9816687263450602, + "learning_rate": 7.434510135142098e-07, + "loss": 0.7081723213195801, + "step": 5239 + }, + { + "epoch": 1.2073732718894008, + "grad_norm": 1.1398058732045784, + "learning_rate": 7.430827993733808e-07, + "loss": 0.7160249352455139, + "step": 5240 + }, + { + "epoch": 1.2076036866359448, + "grad_norm": 0.8011837684152103, + "learning_rate": 7.427146225237438e-07, + "loss": 0.5323421955108643, + "step": 5241 + }, + { + "epoch": 1.2078341013824885, + "grad_norm": 1.0448270993907307, + "learning_rate": 7.423464830187386e-07, + "loss": 0.6439197063446045, + "step": 5242 + }, + { + "epoch": 1.2080645161290322, + "grad_norm": 1.2861588666790074, + "learning_rate": 7.419783809117999e-07, + "loss": 0.8268016576766968, + "step": 5243 + }, + { + "epoch": 1.2082949308755762, + "grad_norm": 1.0010661947708184, + "learning_rate": 7.416103162563582e-07, + "loss": 0.8115339279174805, + "step": 5244 + }, + { + "epoch": 1.2085253456221199, + "grad_norm": 1.05524382659239, + "learning_rate": 7.41242289105837e-07, + "loss": 0.8677197694778442, + "step": 5245 + }, + { + "epoch": 1.2087557603686636, + "grad_norm": 1.3337261104998102, + "learning_rate": 7.408742995136547e-07, + "loss": 0.7942948937416077, + "step": 5246 + }, + { + "epoch": 1.2089861751152073, + "grad_norm": 1.4261507552200647, + "learning_rate": 7.405063475332249e-07, + "loss": 0.8457766771316528, + "step": 5247 + }, + { + "epoch": 1.209216589861751, + "grad_norm": 1.2992145711475631, + "learning_rate": 7.401384332179552e-07, + "loss": 0.8463923931121826, + "step": 5248 + }, + { + "epoch": 1.209447004608295, + "grad_norm": 1.2576660242210724, + "learning_rate": 7.397705566212479e-07, + "loss": 0.9192875623703003, + "step": 5249 + }, + { + "epoch": 1.2096774193548387, + "grad_norm": 1.257257688865163, + "learning_rate": 7.394027177964999e-07, + "loss": 0.7461347579956055, + "step": 5250 + }, + { + "epoch": 1.2099078341013825, + "grad_norm": 1.150791607540225, + "learning_rate": 7.390349167971025e-07, + "loss": 0.6953321695327759, + "step": 5251 + }, + { + "epoch": 1.2101382488479262, + "grad_norm": 1.0284326235023098, + "learning_rate": 7.38667153676441e-07, + "loss": 0.7226089835166931, + "step": 5252 + }, + { + "epoch": 1.21036866359447, + "grad_norm": 0.8781484717910895, + "learning_rate": 7.382994284878967e-07, + "loss": 0.6746406555175781, + "step": 5253 + }, + { + "epoch": 1.2105990783410139, + "grad_norm": 1.109396083619457, + "learning_rate": 7.379317412848438e-07, + "loss": 0.7600215673446655, + "step": 5254 + }, + { + "epoch": 1.2108294930875576, + "grad_norm": 1.0821310147954002, + "learning_rate": 7.375640921206514e-07, + "loss": 0.7530734539031982, + "step": 5255 + }, + { + "epoch": 1.2110599078341013, + "grad_norm": 1.0572444642243028, + "learning_rate": 7.371964810486839e-07, + "loss": 0.8103033304214478, + "step": 5256 + }, + { + "epoch": 1.2112903225806453, + "grad_norm": 1.5370115848017, + "learning_rate": 7.368289081222994e-07, + "loss": 0.8916831016540527, + "step": 5257 + }, + { + "epoch": 1.211520737327189, + "grad_norm": 0.9972990737801745, + "learning_rate": 7.364613733948501e-07, + "loss": 0.6728129386901855, + "step": 5258 + }, + { + "epoch": 1.2117511520737327, + "grad_norm": 1.2459715050980873, + "learning_rate": 7.360938769196841e-07, + "loss": 0.8609380722045898, + "step": 5259 + }, + { + "epoch": 1.2119815668202765, + "grad_norm": 1.2704694196315967, + "learning_rate": 7.357264187501422e-07, + "loss": 0.9370373487472534, + "step": 5260 + }, + { + "epoch": 1.2122119815668202, + "grad_norm": 1.1080973982930933, + "learning_rate": 7.353589989395604e-07, + "loss": 0.6812434196472168, + "step": 5261 + }, + { + "epoch": 1.2124423963133641, + "grad_norm": 1.1917998982451765, + "learning_rate": 7.349916175412701e-07, + "loss": 0.7661731243133545, + "step": 5262 + }, + { + "epoch": 1.2126728110599079, + "grad_norm": 1.175052294784061, + "learning_rate": 7.346242746085951e-07, + "loss": 0.7306643128395081, + "step": 5263 + }, + { + "epoch": 1.2129032258064516, + "grad_norm": 1.2065862060559862, + "learning_rate": 7.34256970194856e-07, + "loss": 0.7189076542854309, + "step": 5264 + }, + { + "epoch": 1.2131336405529953, + "grad_norm": 0.8932044441494517, + "learning_rate": 7.338897043533656e-07, + "loss": 0.6935977935791016, + "step": 5265 + }, + { + "epoch": 1.213364055299539, + "grad_norm": 1.1224428177486496, + "learning_rate": 7.335224771374323e-07, + "loss": 0.8451323509216309, + "step": 5266 + }, + { + "epoch": 1.213594470046083, + "grad_norm": 1.1211043364668347, + "learning_rate": 7.331552886003589e-07, + "loss": 0.7936843037605286, + "step": 5267 + }, + { + "epoch": 1.2138248847926267, + "grad_norm": 1.1507587511456696, + "learning_rate": 7.327881387954418e-07, + "loss": 0.7989950776100159, + "step": 5268 + }, + { + "epoch": 1.2140552995391705, + "grad_norm": 1.1166217189865624, + "learning_rate": 7.324210277759726e-07, + "loss": 0.7579236030578613, + "step": 5269 + }, + { + "epoch": 1.2142857142857142, + "grad_norm": 1.1276787851795544, + "learning_rate": 7.320539555952372e-07, + "loss": 0.7101268768310547, + "step": 5270 + }, + { + "epoch": 1.2145161290322581, + "grad_norm": 1.0342829920040018, + "learning_rate": 7.316869223065155e-07, + "loss": 0.7920513153076172, + "step": 5271 + }, + { + "epoch": 1.2147465437788019, + "grad_norm": 1.4357028015234437, + "learning_rate": 7.313199279630814e-07, + "loss": 0.9241428375244141, + "step": 5272 + }, + { + "epoch": 1.2149769585253456, + "grad_norm": 1.1653282891915406, + "learning_rate": 7.309529726182044e-07, + "loss": 0.8278338313102722, + "step": 5273 + }, + { + "epoch": 1.2152073732718893, + "grad_norm": 0.9443953324177181, + "learning_rate": 7.305860563251473e-07, + "loss": 0.8230598568916321, + "step": 5274 + }, + { + "epoch": 1.2154377880184333, + "grad_norm": 0.9783962526324749, + "learning_rate": 7.302191791371672e-07, + "loss": 0.7791799902915955, + "step": 5275 + }, + { + "epoch": 1.215668202764977, + "grad_norm": 1.1070826926760935, + "learning_rate": 7.298523411075163e-07, + "loss": 0.705475926399231, + "step": 5276 + }, + { + "epoch": 1.2158986175115207, + "grad_norm": 1.2064718691511076, + "learning_rate": 7.294855422894406e-07, + "loss": 0.8078421354293823, + "step": 5277 + }, + { + "epoch": 1.2161290322580645, + "grad_norm": 1.2182160993977798, + "learning_rate": 7.2911878273618e-07, + "loss": 0.8115853667259216, + "step": 5278 + }, + { + "epoch": 1.2163594470046082, + "grad_norm": 1.0596504935928797, + "learning_rate": 7.287520625009698e-07, + "loss": 0.6917247772216797, + "step": 5279 + }, + { + "epoch": 1.2165898617511521, + "grad_norm": 1.0522660082790807, + "learning_rate": 7.283853816370386e-07, + "loss": 0.7131551504135132, + "step": 5280 + }, + { + "epoch": 1.2168202764976959, + "grad_norm": 0.9495683492221387, + "learning_rate": 7.280187401976093e-07, + "loss": 0.713994562625885, + "step": 5281 + }, + { + "epoch": 1.2170506912442396, + "grad_norm": 1.0845439765546743, + "learning_rate": 7.276521382359001e-07, + "loss": 0.7123454809188843, + "step": 5282 + }, + { + "epoch": 1.2172811059907833, + "grad_norm": 1.395671188469518, + "learning_rate": 7.272855758051226e-07, + "loss": 0.7805770635604858, + "step": 5283 + }, + { + "epoch": 1.2175115207373273, + "grad_norm": 0.9191020761831104, + "learning_rate": 7.269190529584823e-07, + "loss": 0.756670355796814, + "step": 5284 + }, + { + "epoch": 1.217741935483871, + "grad_norm": 0.9614002237797926, + "learning_rate": 7.265525697491804e-07, + "loss": 0.5992655754089355, + "step": 5285 + }, + { + "epoch": 1.2179723502304147, + "grad_norm": 1.1857893348181308, + "learning_rate": 7.26186126230411e-07, + "loss": 0.7552722692489624, + "step": 5286 + }, + { + "epoch": 1.2182027649769585, + "grad_norm": 1.3153742960319537, + "learning_rate": 7.258197224553627e-07, + "loss": 0.7189064025878906, + "step": 5287 + }, + { + "epoch": 1.2184331797235024, + "grad_norm": 1.115820306372996, + "learning_rate": 7.254533584772188e-07, + "loss": 0.8277319669723511, + "step": 5288 + }, + { + "epoch": 1.2186635944700461, + "grad_norm": 1.0584826489222536, + "learning_rate": 7.250870343491561e-07, + "loss": 0.6655987501144409, + "step": 5289 + }, + { + "epoch": 1.2188940092165899, + "grad_norm": 1.3888484350972408, + "learning_rate": 7.247207501243469e-07, + "loss": 0.8654178380966187, + "step": 5290 + }, + { + "epoch": 1.2191244239631336, + "grad_norm": 1.1781514985004269, + "learning_rate": 7.243545058559564e-07, + "loss": 0.9148486852645874, + "step": 5291 + }, + { + "epoch": 1.2193548387096773, + "grad_norm": 1.0525236851594717, + "learning_rate": 7.239883015971439e-07, + "loss": 0.8003618717193604, + "step": 5292 + }, + { + "epoch": 1.2195852534562213, + "grad_norm": 1.1614945814905475, + "learning_rate": 7.236221374010647e-07, + "loss": 0.7290889024734497, + "step": 5293 + }, + { + "epoch": 1.219815668202765, + "grad_norm": 0.963434252776205, + "learning_rate": 7.232560133208663e-07, + "loss": 0.5989147424697876, + "step": 5294 + }, + { + "epoch": 1.2200460829493087, + "grad_norm": 0.8766403983792901, + "learning_rate": 7.228899294096907e-07, + "loss": 0.8424522876739502, + "step": 5295 + }, + { + "epoch": 1.2202764976958524, + "grad_norm": 1.1686896205403536, + "learning_rate": 7.225238857206754e-07, + "loss": 0.7753746509552002, + "step": 5296 + }, + { + "epoch": 1.2205069124423964, + "grad_norm": 1.1424848742103464, + "learning_rate": 7.221578823069508e-07, + "loss": 0.693191647529602, + "step": 5297 + }, + { + "epoch": 1.2207373271889401, + "grad_norm": 1.177332636609729, + "learning_rate": 7.217919192216417e-07, + "loss": 0.7561964988708496, + "step": 5298 + }, + { + "epoch": 1.2209677419354839, + "grad_norm": 0.9927977088932712, + "learning_rate": 7.214259965178673e-07, + "loss": 0.7721199989318848, + "step": 5299 + }, + { + "epoch": 1.2211981566820276, + "grad_norm": 1.39798744468456, + "learning_rate": 7.210601142487407e-07, + "loss": 0.8100659251213074, + "step": 5300 + }, + { + "epoch": 1.2214285714285715, + "grad_norm": 1.0570396078634527, + "learning_rate": 7.206942724673688e-07, + "loss": 0.6753256916999817, + "step": 5301 + }, + { + "epoch": 1.2216589861751153, + "grad_norm": 1.1020954128293505, + "learning_rate": 7.20328471226854e-07, + "loss": 0.7534425854682922, + "step": 5302 + }, + { + "epoch": 1.221889400921659, + "grad_norm": 1.5962153366210945, + "learning_rate": 7.199627105802913e-07, + "loss": 0.8275027275085449, + "step": 5303 + }, + { + "epoch": 1.2221198156682027, + "grad_norm": 1.1431238814592317, + "learning_rate": 7.195969905807702e-07, + "loss": 0.728579580783844, + "step": 5304 + }, + { + "epoch": 1.2223502304147464, + "grad_norm": 1.1008777946014818, + "learning_rate": 7.192313112813749e-07, + "loss": 0.8221413493156433, + "step": 5305 + }, + { + "epoch": 1.2225806451612904, + "grad_norm": 1.0255386420970887, + "learning_rate": 7.188656727351832e-07, + "loss": 0.7819123268127441, + "step": 5306 + }, + { + "epoch": 1.2228110599078341, + "grad_norm": 1.1141595278176613, + "learning_rate": 7.185000749952666e-07, + "loss": 0.7474294900894165, + "step": 5307 + }, + { + "epoch": 1.2230414746543778, + "grad_norm": 1.4333018176649106, + "learning_rate": 7.181345181146919e-07, + "loss": 0.8072259426116943, + "step": 5308 + }, + { + "epoch": 1.2232718894009216, + "grad_norm": 1.3449246489382425, + "learning_rate": 7.177690021465184e-07, + "loss": 0.8718069791793823, + "step": 5309 + }, + { + "epoch": 1.2235023041474655, + "grad_norm": 1.1090181258933243, + "learning_rate": 7.174035271438006e-07, + "loss": 0.8374875783920288, + "step": 5310 + }, + { + "epoch": 1.2237327188940093, + "grad_norm": 1.2085386756305507, + "learning_rate": 7.170380931595869e-07, + "loss": 0.6669566631317139, + "step": 5311 + }, + { + "epoch": 1.223963133640553, + "grad_norm": 1.1706882886588135, + "learning_rate": 7.16672700246919e-07, + "loss": 0.8735665678977966, + "step": 5312 + }, + { + "epoch": 1.2241935483870967, + "grad_norm": 1.1826163019402958, + "learning_rate": 7.16307348458834e-07, + "loss": 0.8312361240386963, + "step": 5313 + }, + { + "epoch": 1.2244239631336407, + "grad_norm": 1.1102424714986416, + "learning_rate": 7.159420378483619e-07, + "loss": 0.7927724123001099, + "step": 5314 + }, + { + "epoch": 1.2246543778801844, + "grad_norm": 1.0527049283172933, + "learning_rate": 7.155767684685264e-07, + "loss": 0.7641698122024536, + "step": 5315 + }, + { + "epoch": 1.2248847926267281, + "grad_norm": 1.0508850668326304, + "learning_rate": 7.15211540372347e-07, + "loss": 0.7490028142929077, + "step": 5316 + }, + { + "epoch": 1.2251152073732718, + "grad_norm": 1.0604993776512237, + "learning_rate": 7.148463536128354e-07, + "loss": 0.7194815874099731, + "step": 5317 + }, + { + "epoch": 1.2253456221198156, + "grad_norm": 1.2779756064695784, + "learning_rate": 7.144812082429979e-07, + "loss": 0.8328256607055664, + "step": 5318 + }, + { + "epoch": 1.2255760368663595, + "grad_norm": 1.1539197608232337, + "learning_rate": 7.141161043158352e-07, + "loss": 0.9124876260757446, + "step": 5319 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 1.346989410896588, + "learning_rate": 7.137510418843416e-07, + "loss": 0.8183319568634033, + "step": 5320 + }, + { + "epoch": 1.226036866359447, + "grad_norm": 1.0902088619882297, + "learning_rate": 7.133860210015048e-07, + "loss": 0.8423885107040405, + "step": 5321 + }, + { + "epoch": 1.2262672811059907, + "grad_norm": 1.064962271727849, + "learning_rate": 7.130210417203082e-07, + "loss": 0.8175387382507324, + "step": 5322 + }, + { + "epoch": 1.2264976958525347, + "grad_norm": 1.0111617635250245, + "learning_rate": 7.126561040937274e-07, + "loss": 0.8415048718452454, + "step": 5323 + }, + { + "epoch": 1.2267281105990784, + "grad_norm": 1.4241774929740556, + "learning_rate": 7.122912081747321e-07, + "loss": 0.6891156435012817, + "step": 5324 + }, + { + "epoch": 1.226958525345622, + "grad_norm": 1.1236132104045742, + "learning_rate": 7.119263540162876e-07, + "loss": 0.667617678642273, + "step": 5325 + }, + { + "epoch": 1.2271889400921658, + "grad_norm": 1.21591291521647, + "learning_rate": 7.115615416713517e-07, + "loss": 0.7752082347869873, + "step": 5326 + }, + { + "epoch": 1.2274193548387098, + "grad_norm": 1.0094697644265302, + "learning_rate": 7.111967711928757e-07, + "loss": 0.6582639813423157, + "step": 5327 + }, + { + "epoch": 1.2276497695852535, + "grad_norm": 0.9823209869062589, + "learning_rate": 7.108320426338063e-07, + "loss": 0.6996462345123291, + "step": 5328 + }, + { + "epoch": 1.2278801843317972, + "grad_norm": 1.1364634127826816, + "learning_rate": 7.104673560470828e-07, + "loss": 0.7132028341293335, + "step": 5329 + }, + { + "epoch": 1.228110599078341, + "grad_norm": 1.1959075580849723, + "learning_rate": 7.101027114856395e-07, + "loss": 0.7344096899032593, + "step": 5330 + }, + { + "epoch": 1.2283410138248847, + "grad_norm": 1.2810764573761082, + "learning_rate": 7.097381090024039e-07, + "loss": 0.7805585861206055, + "step": 5331 + }, + { + "epoch": 1.2285714285714286, + "grad_norm": 1.2310137220528714, + "learning_rate": 7.093735486502976e-07, + "loss": 0.6785855889320374, + "step": 5332 + }, + { + "epoch": 1.2288018433179724, + "grad_norm": 1.3226389203047557, + "learning_rate": 7.090090304822355e-07, + "loss": 0.7465041875839233, + "step": 5333 + }, + { + "epoch": 1.229032258064516, + "grad_norm": 1.0465247410006058, + "learning_rate": 7.086445545511278e-07, + "loss": 0.7400432825088501, + "step": 5334 + }, + { + "epoch": 1.2292626728110598, + "grad_norm": 0.9732969942350592, + "learning_rate": 7.082801209098774e-07, + "loss": 0.8567768335342407, + "step": 5335 + }, + { + "epoch": 1.2294930875576038, + "grad_norm": 1.133102602749406, + "learning_rate": 7.079157296113807e-07, + "loss": 0.7451025247573853, + "step": 5336 + }, + { + "epoch": 1.2297235023041475, + "grad_norm": 1.2953309888801026, + "learning_rate": 7.075513807085299e-07, + "loss": 0.7178194522857666, + "step": 5337 + }, + { + "epoch": 1.2299539170506912, + "grad_norm": 1.114794382407599, + "learning_rate": 7.071870742542086e-07, + "loss": 0.7538058161735535, + "step": 5338 + }, + { + "epoch": 1.230184331797235, + "grad_norm": 1.2706015052011863, + "learning_rate": 7.068228103012959e-07, + "loss": 0.7853896021842957, + "step": 5339 + }, + { + "epoch": 1.230414746543779, + "grad_norm": 1.6145088717882257, + "learning_rate": 7.064585889026644e-07, + "loss": 0.9359887838363647, + "step": 5340 + }, + { + "epoch": 1.2306451612903226, + "grad_norm": 1.2876289498435494, + "learning_rate": 7.060944101111797e-07, + "loss": 0.8590530753135681, + "step": 5341 + }, + { + "epoch": 1.2308755760368664, + "grad_norm": 1.0245387562303532, + "learning_rate": 7.057302739797025e-07, + "loss": 0.7047204971313477, + "step": 5342 + }, + { + "epoch": 1.23110599078341, + "grad_norm": 1.3069544437359595, + "learning_rate": 7.053661805610867e-07, + "loss": 0.8826072216033936, + "step": 5343 + }, + { + "epoch": 1.2313364055299538, + "grad_norm": 1.2593962984780245, + "learning_rate": 7.050021299081792e-07, + "loss": 0.9394192695617676, + "step": 5344 + }, + { + "epoch": 1.2315668202764978, + "grad_norm": 1.1109567819341923, + "learning_rate": 7.046381220738224e-07, + "loss": 0.7814885377883911, + "step": 5345 + }, + { + "epoch": 1.2317972350230415, + "grad_norm": 1.1819250736895568, + "learning_rate": 7.042741571108512e-07, + "loss": 0.781699538230896, + "step": 5346 + }, + { + "epoch": 1.2320276497695852, + "grad_norm": 1.1116588757864085, + "learning_rate": 7.039102350720946e-07, + "loss": 0.6554632186889648, + "step": 5347 + }, + { + "epoch": 1.232258064516129, + "grad_norm": 0.9564548780258206, + "learning_rate": 7.035463560103753e-07, + "loss": 0.6449903249740601, + "step": 5348 + }, + { + "epoch": 1.2324884792626727, + "grad_norm": 1.3130676696714008, + "learning_rate": 7.031825199785101e-07, + "loss": 0.8222958445549011, + "step": 5349 + }, + { + "epoch": 1.2327188940092166, + "grad_norm": 1.073654969776922, + "learning_rate": 7.02818727029309e-07, + "loss": 0.8315533399581909, + "step": 5350 + }, + { + "epoch": 1.2329493087557604, + "grad_norm": 0.9980466179862664, + "learning_rate": 7.024549772155764e-07, + "loss": 0.8065732717514038, + "step": 5351 + }, + { + "epoch": 1.233179723502304, + "grad_norm": 1.3823215182318742, + "learning_rate": 7.020912705901101e-07, + "loss": 0.7607216835021973, + "step": 5352 + }, + { + "epoch": 1.233410138248848, + "grad_norm": 1.3000097773568569, + "learning_rate": 7.01727607205701e-07, + "loss": 0.877311110496521, + "step": 5353 + }, + { + "epoch": 1.2336405529953918, + "grad_norm": 1.1855641794195606, + "learning_rate": 7.013639871151354e-07, + "loss": 0.7352526187896729, + "step": 5354 + }, + { + "epoch": 1.2338709677419355, + "grad_norm": 1.1123782494693044, + "learning_rate": 7.010004103711915e-07, + "loss": 0.7676074504852295, + "step": 5355 + }, + { + "epoch": 1.2341013824884792, + "grad_norm": 1.1035546011135826, + "learning_rate": 7.00636877026642e-07, + "loss": 0.7802003622055054, + "step": 5356 + }, + { + "epoch": 1.234331797235023, + "grad_norm": 1.0576568317960378, + "learning_rate": 7.002733871342537e-07, + "loss": 0.747033953666687, + "step": 5357 + }, + { + "epoch": 1.234562211981567, + "grad_norm": 1.1565555542506367, + "learning_rate": 6.999099407467865e-07, + "loss": 0.8086956739425659, + "step": 5358 + }, + { + "epoch": 1.2347926267281106, + "grad_norm": 1.450692015608809, + "learning_rate": 6.995465379169941e-07, + "loss": 0.9362099170684814, + "step": 5359 + }, + { + "epoch": 1.2350230414746544, + "grad_norm": 1.0699993470783844, + "learning_rate": 6.991831786976241e-07, + "loss": 0.6784812211990356, + "step": 5360 + }, + { + "epoch": 1.235253456221198, + "grad_norm": 1.0206889971672557, + "learning_rate": 6.988198631414171e-07, + "loss": 0.7733708620071411, + "step": 5361 + }, + { + "epoch": 1.2354838709677418, + "grad_norm": 1.1745502344238163, + "learning_rate": 6.984565913011087e-07, + "loss": 0.8747115135192871, + "step": 5362 + }, + { + "epoch": 1.2357142857142858, + "grad_norm": 1.0659966645754941, + "learning_rate": 6.980933632294268e-07, + "loss": 0.6947430372238159, + "step": 5363 + }, + { + "epoch": 1.2359447004608295, + "grad_norm": 1.206089262306805, + "learning_rate": 6.97730178979093e-07, + "loss": 0.7128404378890991, + "step": 5364 + }, + { + "epoch": 1.2361751152073732, + "grad_norm": 1.1120167642627505, + "learning_rate": 6.973670386028242e-07, + "loss": 0.7190830707550049, + "step": 5365 + }, + { + "epoch": 1.2364055299539172, + "grad_norm": 1.1367562157166997, + "learning_rate": 6.970039421533291e-07, + "loss": 0.7625770568847656, + "step": 5366 + }, + { + "epoch": 1.236635944700461, + "grad_norm": 1.109720416461976, + "learning_rate": 6.966408896833104e-07, + "loss": 0.7942707538604736, + "step": 5367 + }, + { + "epoch": 1.2368663594470046, + "grad_norm": 1.2413354296268997, + "learning_rate": 6.962778812454652e-07, + "loss": 0.8329455852508545, + "step": 5368 + }, + { + "epoch": 1.2370967741935484, + "grad_norm": 0.8823115581397621, + "learning_rate": 6.959149168924833e-07, + "loss": 0.6034290790557861, + "step": 5369 + }, + { + "epoch": 1.237327188940092, + "grad_norm": 1.1119487486974622, + "learning_rate": 6.955519966770486e-07, + "loss": 0.8424680233001709, + "step": 5370 + }, + { + "epoch": 1.237557603686636, + "grad_norm": 1.4443979353165184, + "learning_rate": 6.951891206518388e-07, + "loss": 0.8670322895050049, + "step": 5371 + }, + { + "epoch": 1.2377880184331798, + "grad_norm": 1.2577295715670245, + "learning_rate": 6.948262888695244e-07, + "loss": 0.7283621430397034, + "step": 5372 + }, + { + "epoch": 1.2380184331797235, + "grad_norm": 1.1772858057268798, + "learning_rate": 6.9446350138277e-07, + "loss": 0.7990118265151978, + "step": 5373 + }, + { + "epoch": 1.2382488479262672, + "grad_norm": 1.3359682917878526, + "learning_rate": 6.941007582442342e-07, + "loss": 0.945558488368988, + "step": 5374 + }, + { + "epoch": 1.238479262672811, + "grad_norm": 1.186182272846314, + "learning_rate": 6.937380595065685e-07, + "loss": 0.6905936002731323, + "step": 5375 + }, + { + "epoch": 1.238709677419355, + "grad_norm": 1.1665515184197677, + "learning_rate": 6.933754052224176e-07, + "loss": 0.7757662534713745, + "step": 5376 + }, + { + "epoch": 1.2389400921658986, + "grad_norm": 1.1107589407670702, + "learning_rate": 6.930127954444209e-07, + "loss": 0.63062584400177, + "step": 5377 + }, + { + "epoch": 1.2391705069124423, + "grad_norm": 1.2453155093106256, + "learning_rate": 6.926502302252109e-07, + "loss": 0.7341021299362183, + "step": 5378 + }, + { + "epoch": 1.2394009216589863, + "grad_norm": 0.9019761448377311, + "learning_rate": 6.922877096174127e-07, + "loss": 0.572767972946167, + "step": 5379 + }, + { + "epoch": 1.23963133640553, + "grad_norm": 1.274761976544521, + "learning_rate": 6.919252336736463e-07, + "loss": 0.630276083946228, + "step": 5380 + }, + { + "epoch": 1.2398617511520738, + "grad_norm": 1.0769631455551745, + "learning_rate": 6.915628024465244e-07, + "loss": 0.668334424495697, + "step": 5381 + }, + { + "epoch": 1.2400921658986175, + "grad_norm": 0.9444198657704267, + "learning_rate": 6.912004159886529e-07, + "loss": 0.6766513586044312, + "step": 5382 + }, + { + "epoch": 1.2403225806451612, + "grad_norm": 1.3884668691330446, + "learning_rate": 6.908380743526328e-07, + "loss": 0.7016473412513733, + "step": 5383 + }, + { + "epoch": 1.2405529953917052, + "grad_norm": 1.378738366714881, + "learning_rate": 6.904757775910568e-07, + "loss": 0.8837979435920715, + "step": 5384 + }, + { + "epoch": 1.2407834101382489, + "grad_norm": 0.9305030195638431, + "learning_rate": 6.901135257565116e-07, + "loss": 0.7187714576721191, + "step": 5385 + }, + { + "epoch": 1.2410138248847926, + "grad_norm": 1.0935814864632027, + "learning_rate": 6.897513189015782e-07, + "loss": 0.8227157592773438, + "step": 5386 + }, + { + "epoch": 1.2412442396313363, + "grad_norm": 1.278600897043475, + "learning_rate": 6.893891570788301e-07, + "loss": 0.8812209367752075, + "step": 5387 + }, + { + "epoch": 1.24147465437788, + "grad_norm": 1.0426681195674332, + "learning_rate": 6.890270403408348e-07, + "loss": 0.6702297925949097, + "step": 5388 + }, + { + "epoch": 1.241705069124424, + "grad_norm": 1.1718249382850798, + "learning_rate": 6.886649687401529e-07, + "loss": 0.646358847618103, + "step": 5389 + }, + { + "epoch": 1.2419354838709677, + "grad_norm": 1.1131010301922042, + "learning_rate": 6.883029423293383e-07, + "loss": 0.6514080762863159, + "step": 5390 + }, + { + "epoch": 1.2421658986175115, + "grad_norm": 1.0826812738863971, + "learning_rate": 6.879409611609393e-07, + "loss": 0.6938437819480896, + "step": 5391 + }, + { + "epoch": 1.2423963133640552, + "grad_norm": 1.3710627721954263, + "learning_rate": 6.875790252874967e-07, + "loss": 0.8601399064064026, + "step": 5392 + }, + { + "epoch": 1.2426267281105992, + "grad_norm": 1.1590300352526421, + "learning_rate": 6.872171347615445e-07, + "loss": 0.6641080379486084, + "step": 5393 + }, + { + "epoch": 1.2428571428571429, + "grad_norm": 1.0046628491787142, + "learning_rate": 6.868552896356117e-07, + "loss": 0.7109012603759766, + "step": 5394 + }, + { + "epoch": 1.2430875576036866, + "grad_norm": 1.261042767669179, + "learning_rate": 6.864934899622191e-07, + "loss": 0.8558728694915771, + "step": 5395 + }, + { + "epoch": 1.2433179723502303, + "grad_norm": 1.1243133400823155, + "learning_rate": 6.861317357938807e-07, + "loss": 0.6119382977485657, + "step": 5396 + }, + { + "epoch": 1.2435483870967743, + "grad_norm": 1.2850449121793286, + "learning_rate": 6.857700271831059e-07, + "loss": 0.7527587413787842, + "step": 5397 + }, + { + "epoch": 1.243778801843318, + "grad_norm": 1.3104214277299573, + "learning_rate": 6.854083641823957e-07, + "loss": 0.8082761168479919, + "step": 5398 + }, + { + "epoch": 1.2440092165898617, + "grad_norm": 1.0664271007055484, + "learning_rate": 6.850467468442447e-07, + "loss": 0.7289307117462158, + "step": 5399 + }, + { + "epoch": 1.2442396313364055, + "grad_norm": 1.2684124709337747, + "learning_rate": 6.846851752211418e-07, + "loss": 0.8824148178100586, + "step": 5400 + }, + { + "epoch": 1.2444700460829492, + "grad_norm": 1.2011621536911168, + "learning_rate": 6.843236493655682e-07, + "loss": 0.7046724557876587, + "step": 5401 + }, + { + "epoch": 1.2447004608294931, + "grad_norm": 1.0456601321771188, + "learning_rate": 6.839621693299987e-07, + "loss": 0.8192921876907349, + "step": 5402 + }, + { + "epoch": 1.2449308755760369, + "grad_norm": 1.1031705508374716, + "learning_rate": 6.83600735166902e-07, + "loss": 0.7651070356369019, + "step": 5403 + }, + { + "epoch": 1.2451612903225806, + "grad_norm": 1.10155120943284, + "learning_rate": 6.832393469287401e-07, + "loss": 0.7689340114593506, + "step": 5404 + }, + { + "epoch": 1.2453917050691243, + "grad_norm": 1.438313566898243, + "learning_rate": 6.828780046679671e-07, + "loss": 0.9214832782745361, + "step": 5405 + }, + { + "epoch": 1.2456221198156683, + "grad_norm": 1.1160237214981186, + "learning_rate": 6.825167084370322e-07, + "loss": 0.7210682034492493, + "step": 5406 + }, + { + "epoch": 1.245852534562212, + "grad_norm": 1.1608936823977416, + "learning_rate": 6.82155458288377e-07, + "loss": 0.871317446231842, + "step": 5407 + }, + { + "epoch": 1.2460829493087557, + "grad_norm": 1.2750147741770517, + "learning_rate": 6.817942542744359e-07, + "loss": 0.7669065594673157, + "step": 5408 + }, + { + "epoch": 1.2463133640552995, + "grad_norm": 1.0693548196930358, + "learning_rate": 6.814330964476379e-07, + "loss": 0.7317448854446411, + "step": 5409 + }, + { + "epoch": 1.2465437788018434, + "grad_norm": 1.2936969678285373, + "learning_rate": 6.810719848604036e-07, + "loss": 0.7873220443725586, + "step": 5410 + }, + { + "epoch": 1.2467741935483871, + "grad_norm": 1.2973675980536, + "learning_rate": 6.807109195651492e-07, + "loss": 0.713294267654419, + "step": 5411 + }, + { + "epoch": 1.2470046082949309, + "grad_norm": 1.2551238151306954, + "learning_rate": 6.803499006142819e-07, + "loss": 0.7592979669570923, + "step": 5412 + }, + { + "epoch": 1.2472350230414746, + "grad_norm": 1.3113983649465133, + "learning_rate": 6.79988928060203e-07, + "loss": 0.7805737257003784, + "step": 5413 + }, + { + "epoch": 1.2474654377880183, + "grad_norm": 0.8180058983934718, + "learning_rate": 6.79628001955308e-07, + "loss": 0.7706440687179565, + "step": 5414 + }, + { + "epoch": 1.2476958525345623, + "grad_norm": 1.3696824329137627, + "learning_rate": 6.792671223519844e-07, + "loss": 0.772534966468811, + "step": 5415 + }, + { + "epoch": 1.247926267281106, + "grad_norm": 1.2283026355612159, + "learning_rate": 6.789062893026129e-07, + "loss": 0.7939096093177795, + "step": 5416 + }, + { + "epoch": 1.2481566820276497, + "grad_norm": 1.263037130888269, + "learning_rate": 6.78545502859569e-07, + "loss": 0.7062902450561523, + "step": 5417 + }, + { + "epoch": 1.2483870967741935, + "grad_norm": 1.042353004558378, + "learning_rate": 6.781847630752197e-07, + "loss": 0.8296496868133545, + "step": 5418 + }, + { + "epoch": 1.2486175115207374, + "grad_norm": 1.4186103660131706, + "learning_rate": 6.778240700019258e-07, + "loss": 0.926125168800354, + "step": 5419 + }, + { + "epoch": 1.2488479262672811, + "grad_norm": 1.1816532525816696, + "learning_rate": 6.774634236920419e-07, + "loss": 0.7301739454269409, + "step": 5420 + }, + { + "epoch": 1.2490783410138249, + "grad_norm": 1.366957713339659, + "learning_rate": 6.771028241979151e-07, + "loss": 0.7313426733016968, + "step": 5421 + }, + { + "epoch": 1.2493087557603686, + "grad_norm": 0.9539446793763906, + "learning_rate": 6.767422715718853e-07, + "loss": 0.7193025946617126, + "step": 5422 + }, + { + "epoch": 1.2495391705069125, + "grad_norm": 1.1735826178809459, + "learning_rate": 6.763817658662874e-07, + "loss": 0.6544638872146606, + "step": 5423 + }, + { + "epoch": 1.2497695852534563, + "grad_norm": 1.1828661707349362, + "learning_rate": 6.760213071334478e-07, + "loss": 0.8402822613716125, + "step": 5424 + }, + { + "epoch": 1.25, + "grad_norm": 1.1854670368859663, + "learning_rate": 6.756608954256861e-07, + "loss": 0.6840100288391113, + "step": 5425 + }, + { + "epoch": 1.2502304147465437, + "grad_norm": 1.1842873946027908, + "learning_rate": 6.753005307953165e-07, + "loss": 0.7315107583999634, + "step": 5426 + }, + { + "epoch": 1.2504608294930875, + "grad_norm": 0.9743094512393712, + "learning_rate": 6.74940213294645e-07, + "loss": 0.6369785070419312, + "step": 5427 + }, + { + "epoch": 1.2506912442396314, + "grad_norm": 1.0769824502789231, + "learning_rate": 6.745799429759711e-07, + "loss": 0.7700424790382385, + "step": 5428 + }, + { + "epoch": 1.2509216589861751, + "grad_norm": 1.2719323162039158, + "learning_rate": 6.742197198915877e-07, + "loss": 0.7436221241950989, + "step": 5429 + }, + { + "epoch": 1.2511520737327189, + "grad_norm": 1.235326047289827, + "learning_rate": 6.738595440937809e-07, + "loss": 0.8028342723846436, + "step": 5430 + }, + { + "epoch": 1.2513824884792628, + "grad_norm": 1.1651221420823998, + "learning_rate": 6.734994156348288e-07, + "loss": 0.7705515623092651, + "step": 5431 + }, + { + "epoch": 1.2516129032258063, + "grad_norm": 1.509633589240068, + "learning_rate": 6.73139334567005e-07, + "loss": 0.7110899686813354, + "step": 5432 + }, + { + "epoch": 1.2518433179723503, + "grad_norm": 1.0701201128505256, + "learning_rate": 6.727793009425739e-07, + "loss": 0.7495337128639221, + "step": 5433 + }, + { + "epoch": 1.252073732718894, + "grad_norm": 1.1393040143384143, + "learning_rate": 6.724193148137938e-07, + "loss": 0.7735337018966675, + "step": 5434 + }, + { + "epoch": 1.2523041474654377, + "grad_norm": 1.5709409365174263, + "learning_rate": 6.720593762329167e-07, + "loss": 0.8655617237091064, + "step": 5435 + }, + { + "epoch": 1.2525345622119817, + "grad_norm": 1.0969772466203969, + "learning_rate": 6.716994852521871e-07, + "loss": 0.7989616394042969, + "step": 5436 + }, + { + "epoch": 1.2527649769585254, + "grad_norm": 1.2186152186967236, + "learning_rate": 6.713396419238424e-07, + "loss": 0.8090296983718872, + "step": 5437 + }, + { + "epoch": 1.2529953917050691, + "grad_norm": 1.175751705980128, + "learning_rate": 6.709798463001138e-07, + "loss": 0.7150726318359375, + "step": 5438 + }, + { + "epoch": 1.2532258064516129, + "grad_norm": 1.1350361891486582, + "learning_rate": 6.706200984332249e-07, + "loss": 0.7136287689208984, + "step": 5439 + }, + { + "epoch": 1.2534562211981566, + "grad_norm": 1.2991395376590593, + "learning_rate": 6.702603983753927e-07, + "loss": 0.8538687229156494, + "step": 5440 + }, + { + "epoch": 1.2536866359447005, + "grad_norm": 1.5253402941485412, + "learning_rate": 6.699007461788272e-07, + "loss": 0.7960666418075562, + "step": 5441 + }, + { + "epoch": 1.2539170506912443, + "grad_norm": 0.9539757778238315, + "learning_rate": 6.695411418957309e-07, + "loss": 0.7462595701217651, + "step": 5442 + }, + { + "epoch": 1.254147465437788, + "grad_norm": 1.482445221768143, + "learning_rate": 6.691815855783009e-07, + "loss": 0.795913577079773, + "step": 5443 + }, + { + "epoch": 1.2543778801843317, + "grad_norm": 1.071717267875031, + "learning_rate": 6.688220772787258e-07, + "loss": 0.7589330077171326, + "step": 5444 + }, + { + "epoch": 1.2546082949308754, + "grad_norm": 1.4795497320121442, + "learning_rate": 6.684626170491874e-07, + "loss": 0.7719615697860718, + "step": 5445 + }, + { + "epoch": 1.2548387096774194, + "grad_norm": 1.06581311441289, + "learning_rate": 6.681032049418616e-07, + "loss": 0.8516664505004883, + "step": 5446 + }, + { + "epoch": 1.2550691244239631, + "grad_norm": 1.466555451116343, + "learning_rate": 6.677438410089163e-07, + "loss": 0.8597210049629211, + "step": 5447 + }, + { + "epoch": 1.2552995391705069, + "grad_norm": 1.2172979010742704, + "learning_rate": 6.673845253025124e-07, + "loss": 0.7101171016693115, + "step": 5448 + }, + { + "epoch": 1.2555299539170508, + "grad_norm": 1.105900547055049, + "learning_rate": 6.670252578748044e-07, + "loss": 0.6946178078651428, + "step": 5449 + }, + { + "epoch": 1.2557603686635945, + "grad_norm": 1.687580161954866, + "learning_rate": 6.666660387779395e-07, + "loss": 0.9912126660346985, + "step": 5450 + }, + { + "epoch": 1.2559907834101383, + "grad_norm": 1.087382323913162, + "learning_rate": 6.663068680640573e-07, + "loss": 0.6495379209518433, + "step": 5451 + }, + { + "epoch": 1.256221198156682, + "grad_norm": 1.0213661473677353, + "learning_rate": 6.65947745785292e-07, + "loss": 0.6276426315307617, + "step": 5452 + }, + { + "epoch": 1.2564516129032257, + "grad_norm": 1.082562870265783, + "learning_rate": 6.655886719937691e-07, + "loss": 0.7273461818695068, + "step": 5453 + }, + { + "epoch": 1.2566820276497697, + "grad_norm": 1.258671733492057, + "learning_rate": 6.652296467416073e-07, + "loss": 0.8248249292373657, + "step": 5454 + }, + { + "epoch": 1.2569124423963134, + "grad_norm": 1.2124691152915896, + "learning_rate": 6.648706700809196e-07, + "loss": 0.8709753751754761, + "step": 5455 + }, + { + "epoch": 1.2571428571428571, + "grad_norm": 1.4025604957471465, + "learning_rate": 6.645117420638105e-07, + "loss": 0.8207283020019531, + "step": 5456 + }, + { + "epoch": 1.2573732718894008, + "grad_norm": 1.0867491150840567, + "learning_rate": 6.641528627423774e-07, + "loss": 0.8222801685333252, + "step": 5457 + }, + { + "epoch": 1.2576036866359446, + "grad_norm": 1.0891862457945214, + "learning_rate": 6.637940321687121e-07, + "loss": 0.7684904336929321, + "step": 5458 + }, + { + "epoch": 1.2578341013824885, + "grad_norm": 1.106565522930133, + "learning_rate": 6.634352503948979e-07, + "loss": 0.7930517196655273, + "step": 5459 + }, + { + "epoch": 1.2580645161290323, + "grad_norm": 1.255727738748605, + "learning_rate": 6.630765174730116e-07, + "loss": 0.7414563298225403, + "step": 5460 + }, + { + "epoch": 1.258294930875576, + "grad_norm": 1.0415923536335177, + "learning_rate": 6.627178334551227e-07, + "loss": 0.7959232926368713, + "step": 5461 + }, + { + "epoch": 1.25852534562212, + "grad_norm": 1.2823788828450395, + "learning_rate": 6.623591983932935e-07, + "loss": 0.6722866296768188, + "step": 5462 + }, + { + "epoch": 1.2587557603686637, + "grad_norm": 1.0428819037253236, + "learning_rate": 6.620006123395799e-07, + "loss": 0.7688727378845215, + "step": 5463 + }, + { + "epoch": 1.2589861751152074, + "grad_norm": 1.1454091886933473, + "learning_rate": 6.616420753460301e-07, + "loss": 0.7543724179267883, + "step": 5464 + }, + { + "epoch": 1.2592165898617511, + "grad_norm": 1.3156243556780545, + "learning_rate": 6.612835874646847e-07, + "loss": 0.7097430229187012, + "step": 5465 + }, + { + "epoch": 1.2594470046082948, + "grad_norm": 1.1699591097632744, + "learning_rate": 6.609251487475786e-07, + "loss": 0.8640443682670593, + "step": 5466 + }, + { + "epoch": 1.2596774193548388, + "grad_norm": 1.4552439697890553, + "learning_rate": 6.605667592467384e-07, + "loss": 0.7872523069381714, + "step": 5467 + }, + { + "epoch": 1.2599078341013825, + "grad_norm": 1.3601390048962447, + "learning_rate": 6.602084190141835e-07, + "loss": 0.8647557497024536, + "step": 5468 + }, + { + "epoch": 1.2601382488479262, + "grad_norm": 0.9953963267515464, + "learning_rate": 6.598501281019268e-07, + "loss": 0.7323553562164307, + "step": 5469 + }, + { + "epoch": 1.26036866359447, + "grad_norm": 1.2478057023441294, + "learning_rate": 6.594918865619739e-07, + "loss": 0.8214852809906006, + "step": 5470 + }, + { + "epoch": 1.2605990783410137, + "grad_norm": 1.1743890995374524, + "learning_rate": 6.591336944463223e-07, + "loss": 0.8011265397071838, + "step": 5471 + }, + { + "epoch": 1.2608294930875577, + "grad_norm": 0.9651307194588488, + "learning_rate": 6.587755518069642e-07, + "loss": 0.798862636089325, + "step": 5472 + }, + { + "epoch": 1.2610599078341014, + "grad_norm": 1.1888872240865054, + "learning_rate": 6.58417458695883e-07, + "loss": 0.7231202721595764, + "step": 5473 + }, + { + "epoch": 1.261290322580645, + "grad_norm": 1.25713690411949, + "learning_rate": 6.580594151650551e-07, + "loss": 0.8816685676574707, + "step": 5474 + }, + { + "epoch": 1.261520737327189, + "grad_norm": 1.0218552259688816, + "learning_rate": 6.577014212664509e-07, + "loss": 0.6343427300453186, + "step": 5475 + }, + { + "epoch": 1.2617511520737328, + "grad_norm": 1.2062270864209526, + "learning_rate": 6.573434770520321e-07, + "loss": 0.7785895466804504, + "step": 5476 + }, + { + "epoch": 1.2619815668202765, + "grad_norm": 1.2086458816060426, + "learning_rate": 6.569855825737536e-07, + "loss": 0.7408698797225952, + "step": 5477 + }, + { + "epoch": 1.2622119815668202, + "grad_norm": 1.2755490666336102, + "learning_rate": 6.566277378835643e-07, + "loss": 0.8481286764144897, + "step": 5478 + }, + { + "epoch": 1.262442396313364, + "grad_norm": 1.0772225233745287, + "learning_rate": 6.56269943033404e-07, + "loss": 0.8221831917762756, + "step": 5479 + }, + { + "epoch": 1.262672811059908, + "grad_norm": 1.1202704150930312, + "learning_rate": 6.559121980752065e-07, + "loss": 0.805405855178833, + "step": 5480 + }, + { + "epoch": 1.2629032258064516, + "grad_norm": 1.4925713527432443, + "learning_rate": 6.55554503060898e-07, + "loss": 0.8643565773963928, + "step": 5481 + }, + { + "epoch": 1.2631336405529954, + "grad_norm": 1.038997236699539, + "learning_rate": 6.551968580423973e-07, + "loss": 0.7087225914001465, + "step": 5482 + }, + { + "epoch": 1.263364055299539, + "grad_norm": 1.3080505612178328, + "learning_rate": 6.54839263071616e-07, + "loss": 0.8401756882667542, + "step": 5483 + }, + { + "epoch": 1.2635944700460828, + "grad_norm": 0.974231759030553, + "learning_rate": 6.544817182004589e-07, + "loss": 0.76345294713974, + "step": 5484 + }, + { + "epoch": 1.2638248847926268, + "grad_norm": 0.9975788463971886, + "learning_rate": 6.541242234808228e-07, + "loss": 0.7177271842956543, + "step": 5485 + }, + { + "epoch": 1.2640552995391705, + "grad_norm": 1.0524467641617976, + "learning_rate": 6.537667789645981e-07, + "loss": 0.7436186075210571, + "step": 5486 + }, + { + "epoch": 1.2642857142857142, + "grad_norm": 1.025347292021162, + "learning_rate": 6.53409384703667e-07, + "loss": 0.6526673436164856, + "step": 5487 + }, + { + "epoch": 1.2645161290322582, + "grad_norm": 1.4422505610217646, + "learning_rate": 6.530520407499049e-07, + "loss": 0.879219651222229, + "step": 5488 + }, + { + "epoch": 1.264746543778802, + "grad_norm": 1.1643268817299548, + "learning_rate": 6.526947471551798e-07, + "loss": 0.7005003690719604, + "step": 5489 + }, + { + "epoch": 1.2649769585253456, + "grad_norm": 1.276974659887974, + "learning_rate": 6.523375039713525e-07, + "loss": 0.716349720954895, + "step": 5490 + }, + { + "epoch": 1.2652073732718894, + "grad_norm": 1.307490301718017, + "learning_rate": 6.519803112502758e-07, + "loss": 0.8524413704872131, + "step": 5491 + }, + { + "epoch": 1.265437788018433, + "grad_norm": 1.3886244481055607, + "learning_rate": 6.516231690437966e-07, + "loss": 0.8032857179641724, + "step": 5492 + }, + { + "epoch": 1.265668202764977, + "grad_norm": 1.3026581508138244, + "learning_rate": 6.512660774037531e-07, + "loss": 0.8912144899368286, + "step": 5493 + }, + { + "epoch": 1.2658986175115208, + "grad_norm": 1.1001846572449894, + "learning_rate": 6.509090363819764e-07, + "loss": 0.6526974439620972, + "step": 5494 + }, + { + "epoch": 1.2661290322580645, + "grad_norm": 1.1539964772442708, + "learning_rate": 6.505520460302916e-07, + "loss": 0.7436610460281372, + "step": 5495 + }, + { + "epoch": 1.2663594470046082, + "grad_norm": 1.0590907210895066, + "learning_rate": 6.501951064005145e-07, + "loss": 0.7112951874732971, + "step": 5496 + }, + { + "epoch": 1.266589861751152, + "grad_norm": 1.136772271419419, + "learning_rate": 6.498382175444545e-07, + "loss": 0.6908622980117798, + "step": 5497 + }, + { + "epoch": 1.266820276497696, + "grad_norm": 1.2936126009346398, + "learning_rate": 6.494813795139137e-07, + "loss": 0.8169400691986084, + "step": 5498 + }, + { + "epoch": 1.2670506912442396, + "grad_norm": 1.1611805763062155, + "learning_rate": 6.491245923606868e-07, + "loss": 0.7577871084213257, + "step": 5499 + }, + { + "epoch": 1.2672811059907834, + "grad_norm": 1.2166617406598321, + "learning_rate": 6.487678561365606e-07, + "loss": 0.7470887303352356, + "step": 5500 + }, + { + "epoch": 1.2675115207373273, + "grad_norm": 1.2499100792685887, + "learning_rate": 6.484111708933153e-07, + "loss": 0.7862193584442139, + "step": 5501 + }, + { + "epoch": 1.267741935483871, + "grad_norm": 1.0856856438170979, + "learning_rate": 6.48054536682723e-07, + "loss": 0.6809444427490234, + "step": 5502 + }, + { + "epoch": 1.2679723502304148, + "grad_norm": 1.1883483456973896, + "learning_rate": 6.476979535565486e-07, + "loss": 0.7560738921165466, + "step": 5503 + }, + { + "epoch": 1.2682027649769585, + "grad_norm": 1.060654462751894, + "learning_rate": 6.473414215665501e-07, + "loss": 0.6961003541946411, + "step": 5504 + }, + { + "epoch": 1.2684331797235022, + "grad_norm": 1.1318601167609275, + "learning_rate": 6.469849407644775e-07, + "loss": 0.762688159942627, + "step": 5505 + }, + { + "epoch": 1.2686635944700462, + "grad_norm": 1.3318780914664468, + "learning_rate": 6.46628511202073e-07, + "loss": 0.8735007047653198, + "step": 5506 + }, + { + "epoch": 1.26889400921659, + "grad_norm": 1.2498993266864264, + "learning_rate": 6.462721329310727e-07, + "loss": 0.7127432823181152, + "step": 5507 + }, + { + "epoch": 1.2691244239631336, + "grad_norm": 1.1810894491038926, + "learning_rate": 6.45915806003204e-07, + "loss": 0.7720422744750977, + "step": 5508 + }, + { + "epoch": 1.2693548387096774, + "grad_norm": 1.3742393921911886, + "learning_rate": 6.455595304701871e-07, + "loss": 0.8046890497207642, + "step": 5509 + }, + { + "epoch": 1.269585253456221, + "grad_norm": 1.433035812490825, + "learning_rate": 6.452033063837354e-07, + "loss": 0.8218742609024048, + "step": 5510 + }, + { + "epoch": 1.269815668202765, + "grad_norm": 1.3642640568886157, + "learning_rate": 6.448471337955536e-07, + "loss": 0.912622332572937, + "step": 5511 + }, + { + "epoch": 1.2700460829493088, + "grad_norm": 1.3101181049427244, + "learning_rate": 6.444910127573407e-07, + "loss": 0.7940733432769775, + "step": 5512 + }, + { + "epoch": 1.2702764976958525, + "grad_norm": 1.0982469100789136, + "learning_rate": 6.441349433207864e-07, + "loss": 0.7085565328598022, + "step": 5513 + }, + { + "epoch": 1.2705069124423964, + "grad_norm": 1.241687978637031, + "learning_rate": 6.437789255375739e-07, + "loss": 0.9316935539245605, + "step": 5514 + }, + { + "epoch": 1.2707373271889402, + "grad_norm": 0.9697190322352798, + "learning_rate": 6.43422959459379e-07, + "loss": 0.7412574291229248, + "step": 5515 + }, + { + "epoch": 1.270967741935484, + "grad_norm": 0.9713506680995111, + "learning_rate": 6.430670451378695e-07, + "loss": 0.7476450204849243, + "step": 5516 + }, + { + "epoch": 1.2711981566820276, + "grad_norm": 1.1272976564667934, + "learning_rate": 6.427111826247056e-07, + "loss": 0.8530189990997314, + "step": 5517 + }, + { + "epoch": 1.2714285714285714, + "grad_norm": 1.3163108639601895, + "learning_rate": 6.423553719715406e-07, + "loss": 0.8193017840385437, + "step": 5518 + }, + { + "epoch": 1.2716589861751153, + "grad_norm": 1.002275086425174, + "learning_rate": 6.419996132300203e-07, + "loss": 0.7444974780082703, + "step": 5519 + }, + { + "epoch": 1.271889400921659, + "grad_norm": 1.0214749663440856, + "learning_rate": 6.416439064517818e-07, + "loss": 0.7422837018966675, + "step": 5520 + }, + { + "epoch": 1.2721198156682028, + "grad_norm": 1.2499390785362547, + "learning_rate": 6.412882516884562e-07, + "loss": 1.0155640840530396, + "step": 5521 + }, + { + "epoch": 1.2723502304147465, + "grad_norm": 1.489615968336023, + "learning_rate": 6.409326489916658e-07, + "loss": 0.8097087144851685, + "step": 5522 + }, + { + "epoch": 1.2725806451612902, + "grad_norm": 1.293861875643454, + "learning_rate": 6.405770984130257e-07, + "loss": 0.8545565009117126, + "step": 5523 + }, + { + "epoch": 1.2728110599078342, + "grad_norm": 0.9914622760341439, + "learning_rate": 6.402216000041445e-07, + "loss": 0.6765652298927307, + "step": 5524 + }, + { + "epoch": 1.273041474654378, + "grad_norm": 1.103390848542702, + "learning_rate": 6.398661538166217e-07, + "loss": 0.7964426875114441, + "step": 5525 + }, + { + "epoch": 1.2732718894009216, + "grad_norm": 1.2196724846653912, + "learning_rate": 6.395107599020495e-07, + "loss": 0.7449651956558228, + "step": 5526 + }, + { + "epoch": 1.2735023041474656, + "grad_norm": 1.5614043870867116, + "learning_rate": 6.391554183120138e-07, + "loss": 0.8639888167381287, + "step": 5527 + }, + { + "epoch": 1.2737327188940093, + "grad_norm": 1.046130673497984, + "learning_rate": 6.388001290980914e-07, + "loss": 0.7668901681900024, + "step": 5528 + }, + { + "epoch": 1.273963133640553, + "grad_norm": 1.082923428749424, + "learning_rate": 6.384448923118517e-07, + "loss": 0.6461849212646484, + "step": 5529 + }, + { + "epoch": 1.2741935483870968, + "grad_norm": 1.1539877219125736, + "learning_rate": 6.380897080048576e-07, + "loss": 0.7045707702636719, + "step": 5530 + }, + { + "epoch": 1.2744239631336405, + "grad_norm": 1.1893221959186644, + "learning_rate": 6.377345762286632e-07, + "loss": 0.8303793668746948, + "step": 5531 + }, + { + "epoch": 1.2746543778801844, + "grad_norm": 1.112799220738114, + "learning_rate": 6.373794970348152e-07, + "loss": 0.808259129524231, + "step": 5532 + }, + { + "epoch": 1.2748847926267282, + "grad_norm": 1.527249581557179, + "learning_rate": 6.370244704748535e-07, + "loss": 0.8224689960479736, + "step": 5533 + }, + { + "epoch": 1.2751152073732719, + "grad_norm": 1.4408900318423565, + "learning_rate": 6.366694966003089e-07, + "loss": 0.8559266328811646, + "step": 5534 + }, + { + "epoch": 1.2753456221198156, + "grad_norm": 1.3225808297843282, + "learning_rate": 6.363145754627063e-07, + "loss": 0.7972407341003418, + "step": 5535 + }, + { + "epoch": 1.2755760368663593, + "grad_norm": 0.9700139233174567, + "learning_rate": 6.359597071135618e-07, + "loss": 0.7750328779220581, + "step": 5536 + }, + { + "epoch": 1.2758064516129033, + "grad_norm": 1.3472908531853058, + "learning_rate": 6.356048916043836e-07, + "loss": 0.807072639465332, + "step": 5537 + }, + { + "epoch": 1.276036866359447, + "grad_norm": 1.2153299361350896, + "learning_rate": 6.35250128986673e-07, + "loss": 0.8459323048591614, + "step": 5538 + }, + { + "epoch": 1.2762672811059907, + "grad_norm": 1.1921452547723677, + "learning_rate": 6.348954193119233e-07, + "loss": 0.7874447107315063, + "step": 5539 + }, + { + "epoch": 1.2764976958525347, + "grad_norm": 1.243785118643696, + "learning_rate": 6.345407626316202e-07, + "loss": 0.8817394971847534, + "step": 5540 + }, + { + "epoch": 1.2767281105990782, + "grad_norm": 1.0210963009280363, + "learning_rate": 6.341861589972417e-07, + "loss": 0.7936382293701172, + "step": 5541 + }, + { + "epoch": 1.2769585253456222, + "grad_norm": 1.1288567171733945, + "learning_rate": 6.33831608460258e-07, + "loss": 0.7301348447799683, + "step": 5542 + }, + { + "epoch": 1.2771889400921659, + "grad_norm": 0.9930019172389213, + "learning_rate": 6.334771110721311e-07, + "loss": 0.6546784043312073, + "step": 5543 + }, + { + "epoch": 1.2774193548387096, + "grad_norm": 1.1320345708885517, + "learning_rate": 6.331226668843168e-07, + "loss": 0.798918604850769, + "step": 5544 + }, + { + "epoch": 1.2776497695852536, + "grad_norm": 1.0677491026042323, + "learning_rate": 6.327682759482618e-07, + "loss": 0.6275264620780945, + "step": 5545 + }, + { + "epoch": 1.2778801843317973, + "grad_norm": 1.1056891749814017, + "learning_rate": 6.324139383154048e-07, + "loss": 0.6870732307434082, + "step": 5546 + }, + { + "epoch": 1.278110599078341, + "grad_norm": 1.113302907194177, + "learning_rate": 6.320596540371785e-07, + "loss": 0.8280556201934814, + "step": 5547 + }, + { + "epoch": 1.2783410138248847, + "grad_norm": 1.0958194382001605, + "learning_rate": 6.317054231650063e-07, + "loss": 0.8053648471832275, + "step": 5548 + }, + { + "epoch": 1.2785714285714285, + "grad_norm": 1.1500355966221105, + "learning_rate": 6.313512457503043e-07, + "loss": 0.7628893852233887, + "step": 5549 + }, + { + "epoch": 1.2788018433179724, + "grad_norm": 1.1770420137500979, + "learning_rate": 6.30997121844481e-07, + "loss": 0.8075753450393677, + "step": 5550 + }, + { + "epoch": 1.2790322580645161, + "grad_norm": 1.1420933628102303, + "learning_rate": 6.306430514989371e-07, + "loss": 0.7883275747299194, + "step": 5551 + }, + { + "epoch": 1.2792626728110599, + "grad_norm": 1.238710939895555, + "learning_rate": 6.302890347650648e-07, + "loss": 0.7438768744468689, + "step": 5552 + }, + { + "epoch": 1.2794930875576038, + "grad_norm": 1.261177122589368, + "learning_rate": 6.299350716942501e-07, + "loss": 0.7756023406982422, + "step": 5553 + }, + { + "epoch": 1.2797235023041473, + "grad_norm": 1.0915753285175969, + "learning_rate": 6.295811623378698e-07, + "loss": 0.7128444910049438, + "step": 5554 + }, + { + "epoch": 1.2799539170506913, + "grad_norm": 0.9707581386208312, + "learning_rate": 6.292273067472931e-07, + "loss": 0.7611228823661804, + "step": 5555 + }, + { + "epoch": 1.280184331797235, + "grad_norm": 1.0553125250063393, + "learning_rate": 6.288735049738822e-07, + "loss": 0.7803670167922974, + "step": 5556 + }, + { + "epoch": 1.2804147465437787, + "grad_norm": 1.0703973986821036, + "learning_rate": 6.28519757068991e-07, + "loss": 0.958204448223114, + "step": 5557 + }, + { + "epoch": 1.2806451612903227, + "grad_norm": 1.1879640741186497, + "learning_rate": 6.28166063083965e-07, + "loss": 0.7220249772071838, + "step": 5558 + }, + { + "epoch": 1.2808755760368664, + "grad_norm": 1.4250311227945265, + "learning_rate": 6.278124230701427e-07, + "loss": 0.7396695613861084, + "step": 5559 + }, + { + "epoch": 1.2811059907834101, + "grad_norm": 1.1549531480718158, + "learning_rate": 6.274588370788545e-07, + "loss": 0.819474458694458, + "step": 5560 + }, + { + "epoch": 1.2813364055299539, + "grad_norm": 1.0583859146786307, + "learning_rate": 6.271053051614231e-07, + "loss": 0.6997617483139038, + "step": 5561 + }, + { + "epoch": 1.2815668202764976, + "grad_norm": 1.1462805534929357, + "learning_rate": 6.26751827369163e-07, + "loss": 0.7526183128356934, + "step": 5562 + }, + { + "epoch": 1.2817972350230415, + "grad_norm": 1.3576714493720627, + "learning_rate": 6.263984037533805e-07, + "loss": 0.7185813188552856, + "step": 5563 + }, + { + "epoch": 1.2820276497695853, + "grad_norm": 0.9722151716418193, + "learning_rate": 6.260450343653757e-07, + "loss": 0.7739845514297485, + "step": 5564 + }, + { + "epoch": 1.282258064516129, + "grad_norm": 1.0387058407540612, + "learning_rate": 6.25691719256439e-07, + "loss": 0.698557436466217, + "step": 5565 + }, + { + "epoch": 1.2824884792626727, + "grad_norm": 1.1402265972621366, + "learning_rate": 6.253384584778534e-07, + "loss": 0.6946271657943726, + "step": 5566 + }, + { + "epoch": 1.2827188940092165, + "grad_norm": 1.2349626326096388, + "learning_rate": 6.24985252080895e-07, + "loss": 0.7746025323867798, + "step": 5567 + }, + { + "epoch": 1.2829493087557604, + "grad_norm": 1.050385772264468, + "learning_rate": 6.246321001168306e-07, + "loss": 0.8759660720825195, + "step": 5568 + }, + { + "epoch": 1.2831797235023041, + "grad_norm": 1.1535965526965875, + "learning_rate": 6.2427900263692e-07, + "loss": 0.741111159324646, + "step": 5569 + }, + { + "epoch": 1.2834101382488479, + "grad_norm": 1.2619269860039752, + "learning_rate": 6.239259596924149e-07, + "loss": 0.8580630421638489, + "step": 5570 + }, + { + "epoch": 1.2836405529953918, + "grad_norm": 1.0890841483076914, + "learning_rate": 6.235729713345588e-07, + "loss": 0.7139618992805481, + "step": 5571 + }, + { + "epoch": 1.2838709677419355, + "grad_norm": 1.1260979019373678, + "learning_rate": 6.232200376145873e-07, + "loss": 0.8300976753234863, + "step": 5572 + }, + { + "epoch": 1.2841013824884793, + "grad_norm": 1.091655687939806, + "learning_rate": 6.228671585837288e-07, + "loss": 0.7193114757537842, + "step": 5573 + }, + { + "epoch": 1.284331797235023, + "grad_norm": 1.289214780103651, + "learning_rate": 6.225143342932031e-07, + "loss": 0.8802851438522339, + "step": 5574 + }, + { + "epoch": 1.2845622119815667, + "grad_norm": 1.069264068692084, + "learning_rate": 6.221615647942217e-07, + "loss": 0.749543309211731, + "step": 5575 + }, + { + "epoch": 1.2847926267281107, + "grad_norm": 1.1044047193035296, + "learning_rate": 6.218088501379892e-07, + "loss": 0.703508734703064, + "step": 5576 + }, + { + "epoch": 1.2850230414746544, + "grad_norm": 1.4722305319077136, + "learning_rate": 6.214561903757017e-07, + "loss": 0.7519023418426514, + "step": 5577 + }, + { + "epoch": 1.2852534562211981, + "grad_norm": 1.4130549197431626, + "learning_rate": 6.211035855585466e-07, + "loss": 0.9525241851806641, + "step": 5578 + }, + { + "epoch": 1.2854838709677419, + "grad_norm": 1.3149636986285136, + "learning_rate": 6.207510357377046e-07, + "loss": 0.8288872241973877, + "step": 5579 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.3691241647074333, + "learning_rate": 6.203985409643478e-07, + "loss": 0.8531112670898438, + "step": 5580 + }, + { + "epoch": 1.2859447004608295, + "grad_norm": 1.121519108666965, + "learning_rate": 6.200461012896401e-07, + "loss": 0.7106495499610901, + "step": 5581 + }, + { + "epoch": 1.2861751152073733, + "grad_norm": 1.426451214846877, + "learning_rate": 6.19693716764738e-07, + "loss": 0.714931845664978, + "step": 5582 + }, + { + "epoch": 1.286405529953917, + "grad_norm": 1.3296169647206766, + "learning_rate": 6.19341387440789e-07, + "loss": 0.8281360268592834, + "step": 5583 + }, + { + "epoch": 1.286635944700461, + "grad_norm": 1.4833656768811476, + "learning_rate": 6.189891133689342e-07, + "loss": 0.9155910611152649, + "step": 5584 + }, + { + "epoch": 1.2868663594470047, + "grad_norm": 1.3432683189972507, + "learning_rate": 6.186368946003051e-07, + "loss": 0.7573060989379883, + "step": 5585 + }, + { + "epoch": 1.2870967741935484, + "grad_norm": 1.2055594370265132, + "learning_rate": 6.182847311860255e-07, + "loss": 0.6994235515594482, + "step": 5586 + }, + { + "epoch": 1.2873271889400921, + "grad_norm": 1.0775806715124838, + "learning_rate": 6.179326231772123e-07, + "loss": 0.771092414855957, + "step": 5587 + }, + { + "epoch": 1.2875576036866359, + "grad_norm": 1.269208775599209, + "learning_rate": 6.17580570624973e-07, + "loss": 0.7470684051513672, + "step": 5588 + }, + { + "epoch": 1.2877880184331798, + "grad_norm": 1.5425254092924614, + "learning_rate": 6.172285735804075e-07, + "loss": 0.918886125087738, + "step": 5589 + }, + { + "epoch": 1.2880184331797235, + "grad_norm": 1.0377944178544696, + "learning_rate": 6.16876632094608e-07, + "loss": 0.7232617139816284, + "step": 5590 + }, + { + "epoch": 1.2882488479262673, + "grad_norm": 1.1703799662994099, + "learning_rate": 6.16524746218658e-07, + "loss": 0.7367006540298462, + "step": 5591 + }, + { + "epoch": 1.288479262672811, + "grad_norm": 1.1904508940632728, + "learning_rate": 6.161729160036333e-07, + "loss": 0.8783999681472778, + "step": 5592 + }, + { + "epoch": 1.2887096774193547, + "grad_norm": 1.1869935665885074, + "learning_rate": 6.158211415006019e-07, + "loss": 0.8266523480415344, + "step": 5593 + }, + { + "epoch": 1.2889400921658987, + "grad_norm": 1.1675308279856504, + "learning_rate": 6.154694227606234e-07, + "loss": 0.8528730869293213, + "step": 5594 + }, + { + "epoch": 1.2891705069124424, + "grad_norm": 1.3182250244296418, + "learning_rate": 6.151177598347485e-07, + "loss": 0.7586283683776855, + "step": 5595 + }, + { + "epoch": 1.2894009216589861, + "grad_norm": 1.4182043487427547, + "learning_rate": 6.147661527740217e-07, + "loss": 0.8671954870223999, + "step": 5596 + }, + { + "epoch": 1.28963133640553, + "grad_norm": 1.081063839615246, + "learning_rate": 6.14414601629478e-07, + "loss": 0.7354376316070557, + "step": 5597 + }, + { + "epoch": 1.2898617511520738, + "grad_norm": 1.051384434692424, + "learning_rate": 6.140631064521443e-07, + "loss": 0.8515663146972656, + "step": 5598 + }, + { + "epoch": 1.2900921658986175, + "grad_norm": 1.3608023513745535, + "learning_rate": 6.137116672930395e-07, + "loss": 0.9068351984024048, + "step": 5599 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 1.4956373283031226, + "learning_rate": 6.133602842031752e-07, + "loss": 0.7260826230049133, + "step": 5600 + }, + { + "epoch": 1.290552995391705, + "grad_norm": 1.1400144341772105, + "learning_rate": 6.130089572335535e-07, + "loss": 0.7162504196166992, + "step": 5601 + }, + { + "epoch": 1.290783410138249, + "grad_norm": 1.2203621133034757, + "learning_rate": 6.126576864351695e-07, + "loss": 0.7625414133071899, + "step": 5602 + }, + { + "epoch": 1.2910138248847927, + "grad_norm": 1.0985405517526388, + "learning_rate": 6.123064718590099e-07, + "loss": 0.787274956703186, + "step": 5603 + }, + { + "epoch": 1.2912442396313364, + "grad_norm": 1.0173148522997915, + "learning_rate": 6.119553135560519e-07, + "loss": 0.6539326310157776, + "step": 5604 + }, + { + "epoch": 1.2914746543778801, + "grad_norm": 1.0405810111847797, + "learning_rate": 6.11604211577267e-07, + "loss": 0.8481189012527466, + "step": 5605 + }, + { + "epoch": 1.2917050691244238, + "grad_norm": 1.1908108884253377, + "learning_rate": 6.112531659736164e-07, + "loss": 0.794892430305481, + "step": 5606 + }, + { + "epoch": 1.2919354838709678, + "grad_norm": 1.0728869697567227, + "learning_rate": 6.10902176796054e-07, + "loss": 0.6738630533218384, + "step": 5607 + }, + { + "epoch": 1.2921658986175115, + "grad_norm": 1.2190379429225964, + "learning_rate": 6.105512440955258e-07, + "loss": 0.7220937609672546, + "step": 5608 + }, + { + "epoch": 1.2923963133640552, + "grad_norm": 0.9117229942004119, + "learning_rate": 6.102003679229688e-07, + "loss": 0.6831785440444946, + "step": 5609 + }, + { + "epoch": 1.2926267281105992, + "grad_norm": 1.0925904509799125, + "learning_rate": 6.098495483293125e-07, + "loss": 0.7033277750015259, + "step": 5610 + }, + { + "epoch": 1.292857142857143, + "grad_norm": 0.9024231402190447, + "learning_rate": 6.094987853654779e-07, + "loss": 0.7063429355621338, + "step": 5611 + }, + { + "epoch": 1.2930875576036867, + "grad_norm": 1.1531814321684226, + "learning_rate": 6.091480790823771e-07, + "loss": 0.7791472673416138, + "step": 5612 + }, + { + "epoch": 1.2933179723502304, + "grad_norm": 1.3904591821034944, + "learning_rate": 6.087974295309157e-07, + "loss": 0.8674220442771912, + "step": 5613 + }, + { + "epoch": 1.293548387096774, + "grad_norm": 1.0513898416349883, + "learning_rate": 6.084468367619895e-07, + "loss": 0.7878479957580566, + "step": 5614 + }, + { + "epoch": 1.293778801843318, + "grad_norm": 0.9253694996288483, + "learning_rate": 6.080963008264861e-07, + "loss": 0.7019612789154053, + "step": 5615 + }, + { + "epoch": 1.2940092165898618, + "grad_norm": 1.1163623788947772, + "learning_rate": 6.077458217752863e-07, + "loss": 0.68759685754776, + "step": 5616 + }, + { + "epoch": 1.2942396313364055, + "grad_norm": 1.1326420080908837, + "learning_rate": 6.073953996592612e-07, + "loss": 0.851733922958374, + "step": 5617 + }, + { + "epoch": 1.2944700460829492, + "grad_norm": 1.1539848484030915, + "learning_rate": 6.070450345292739e-07, + "loss": 0.699798047542572, + "step": 5618 + }, + { + "epoch": 1.294700460829493, + "grad_norm": 1.3439745934739915, + "learning_rate": 6.066947264361798e-07, + "loss": 0.8625125885009766, + "step": 5619 + }, + { + "epoch": 1.294930875576037, + "grad_norm": 1.2395704270447963, + "learning_rate": 6.063444754308253e-07, + "loss": 0.759062647819519, + "step": 5620 + }, + { + "epoch": 1.2951612903225806, + "grad_norm": 1.1349706072725887, + "learning_rate": 6.059942815640491e-07, + "loss": 0.7549973726272583, + "step": 5621 + }, + { + "epoch": 1.2953917050691244, + "grad_norm": 1.2217826699562653, + "learning_rate": 6.056441448866816e-07, + "loss": 0.8142743110656738, + "step": 5622 + }, + { + "epoch": 1.2956221198156683, + "grad_norm": 1.0818175637274867, + "learning_rate": 6.052940654495442e-07, + "loss": 0.7881144881248474, + "step": 5623 + }, + { + "epoch": 1.295852534562212, + "grad_norm": 1.2201407031885296, + "learning_rate": 6.049440433034505e-07, + "loss": 0.7922053933143616, + "step": 5624 + }, + { + "epoch": 1.2960829493087558, + "grad_norm": 1.1955381878542082, + "learning_rate": 6.045940784992061e-07, + "loss": 0.6808311939239502, + "step": 5625 + }, + { + "epoch": 1.2963133640552995, + "grad_norm": 1.203534246478074, + "learning_rate": 6.04244171087608e-07, + "loss": 0.933373749256134, + "step": 5626 + }, + { + "epoch": 1.2965437788018432, + "grad_norm": 1.3722573775025653, + "learning_rate": 6.038943211194439e-07, + "loss": 0.8077404499053955, + "step": 5627 + }, + { + "epoch": 1.2967741935483872, + "grad_norm": 1.2263754202708472, + "learning_rate": 6.035445286454953e-07, + "loss": 0.7920867204666138, + "step": 5628 + }, + { + "epoch": 1.297004608294931, + "grad_norm": 1.1574994086499075, + "learning_rate": 6.031947937165335e-07, + "loss": 0.5872117280960083, + "step": 5629 + }, + { + "epoch": 1.2972350230414746, + "grad_norm": 1.2959093642025599, + "learning_rate": 6.02845116383322e-07, + "loss": 0.8593505620956421, + "step": 5630 + }, + { + "epoch": 1.2974654377880184, + "grad_norm": 1.4149025135483138, + "learning_rate": 6.02495496696616e-07, + "loss": 0.8352359533309937, + "step": 5631 + }, + { + "epoch": 1.297695852534562, + "grad_norm": 1.1724909355958724, + "learning_rate": 6.021459347071623e-07, + "loss": 0.7316182255744934, + "step": 5632 + }, + { + "epoch": 1.297926267281106, + "grad_norm": 1.1972298924235394, + "learning_rate": 6.017964304656997e-07, + "loss": 0.7294400334358215, + "step": 5633 + }, + { + "epoch": 1.2981566820276498, + "grad_norm": 1.0769002788322786, + "learning_rate": 6.014469840229581e-07, + "loss": 0.6595947742462158, + "step": 5634 + }, + { + "epoch": 1.2983870967741935, + "grad_norm": 1.308087510592029, + "learning_rate": 6.010975954296587e-07, + "loss": 0.7849195003509521, + "step": 5635 + }, + { + "epoch": 1.2986175115207375, + "grad_norm": 1.0709465804551583, + "learning_rate": 6.007482647365159e-07, + "loss": 0.6915944218635559, + "step": 5636 + }, + { + "epoch": 1.2988479262672812, + "grad_norm": 1.1595852934519908, + "learning_rate": 6.003989919942338e-07, + "loss": 0.6821994781494141, + "step": 5637 + }, + { + "epoch": 1.299078341013825, + "grad_norm": 1.0472078656298618, + "learning_rate": 6.000497772535087e-07, + "loss": 0.7333718538284302, + "step": 5638 + }, + { + "epoch": 1.2993087557603686, + "grad_norm": 1.0656731272596272, + "learning_rate": 5.997006205650292e-07, + "loss": 0.8069280385971069, + "step": 5639 + }, + { + "epoch": 1.2995391705069124, + "grad_norm": 1.0655856429852437, + "learning_rate": 5.993515219794745e-07, + "loss": 0.6989297866821289, + "step": 5640 + }, + { + "epoch": 1.2997695852534563, + "grad_norm": 1.187477589278957, + "learning_rate": 5.990024815475161e-07, + "loss": 0.7784403562545776, + "step": 5641 + }, + { + "epoch": 1.3, + "grad_norm": 1.2512602653388225, + "learning_rate": 5.986534993198168e-07, + "loss": 0.6554181575775146, + "step": 5642 + }, + { + "epoch": 1.3002304147465438, + "grad_norm": 1.298436931300319, + "learning_rate": 5.983045753470307e-07, + "loss": 0.7647836208343506, + "step": 5643 + }, + { + "epoch": 1.3004608294930875, + "grad_norm": 0.9269247679622435, + "learning_rate": 5.979557096798033e-07, + "loss": 0.7787084579467773, + "step": 5644 + }, + { + "epoch": 1.3006912442396312, + "grad_norm": 1.0646184845326898, + "learning_rate": 5.97606902368773e-07, + "loss": 0.6367940902709961, + "step": 5645 + }, + { + "epoch": 1.3009216589861752, + "grad_norm": 1.0481428990706296, + "learning_rate": 5.972581534645679e-07, + "loss": 0.7650243043899536, + "step": 5646 + }, + { + "epoch": 1.301152073732719, + "grad_norm": 0.9452672150266047, + "learning_rate": 5.969094630178084e-07, + "loss": 0.6506018042564392, + "step": 5647 + }, + { + "epoch": 1.3013824884792626, + "grad_norm": 1.4764262273840163, + "learning_rate": 5.965608310791071e-07, + "loss": 0.7351242303848267, + "step": 5648 + }, + { + "epoch": 1.3016129032258066, + "grad_norm": 1.2210251097969258, + "learning_rate": 5.96212257699067e-07, + "loss": 0.7327077984809875, + "step": 5649 + }, + { + "epoch": 1.3018433179723503, + "grad_norm": 1.0681197005600311, + "learning_rate": 5.958637429282831e-07, + "loss": 0.6448171138763428, + "step": 5650 + }, + { + "epoch": 1.302073732718894, + "grad_norm": 1.18574113940407, + "learning_rate": 5.955152868173418e-07, + "loss": 0.8347861766815186, + "step": 5651 + }, + { + "epoch": 1.3023041474654378, + "grad_norm": 1.2733315501094051, + "learning_rate": 5.951668894168215e-07, + "loss": 0.736280620098114, + "step": 5652 + }, + { + "epoch": 1.3025345622119815, + "grad_norm": 1.2627292373923777, + "learning_rate": 5.948185507772908e-07, + "loss": 0.8677594661712646, + "step": 5653 + }, + { + "epoch": 1.3027649769585254, + "grad_norm": 1.1729788728933164, + "learning_rate": 5.944702709493113e-07, + "loss": 0.6598676443099976, + "step": 5654 + }, + { + "epoch": 1.3029953917050692, + "grad_norm": 1.1072155159392119, + "learning_rate": 5.941220499834352e-07, + "loss": 0.7795349359512329, + "step": 5655 + }, + { + "epoch": 1.303225806451613, + "grad_norm": 1.1312979891837796, + "learning_rate": 5.937738879302058e-07, + "loss": 0.6929318904876709, + "step": 5656 + }, + { + "epoch": 1.3034562211981566, + "grad_norm": 1.19931324162024, + "learning_rate": 5.934257848401593e-07, + "loss": 0.859328031539917, + "step": 5657 + }, + { + "epoch": 1.3036866359447004, + "grad_norm": 1.435339518052459, + "learning_rate": 5.930777407638216e-07, + "loss": 1.0015549659729004, + "step": 5658 + }, + { + "epoch": 1.3039170506912443, + "grad_norm": 1.0471647927751007, + "learning_rate": 5.927297557517115e-07, + "loss": 0.6775785088539124, + "step": 5659 + }, + { + "epoch": 1.304147465437788, + "grad_norm": 1.0488503999959857, + "learning_rate": 5.923818298543378e-07, + "loss": 0.7228262424468994, + "step": 5660 + }, + { + "epoch": 1.3043778801843318, + "grad_norm": 0.9177755631443217, + "learning_rate": 5.92033963122202e-07, + "loss": 0.6139897108078003, + "step": 5661 + }, + { + "epoch": 1.3046082949308757, + "grad_norm": 1.062819188029367, + "learning_rate": 5.916861556057965e-07, + "loss": 0.7336323261260986, + "step": 5662 + }, + { + "epoch": 1.3048387096774192, + "grad_norm": 1.1985877666304134, + "learning_rate": 5.913384073556049e-07, + "loss": 0.9223559498786926, + "step": 5663 + }, + { + "epoch": 1.3050691244239632, + "grad_norm": 1.1960311086176088, + "learning_rate": 5.909907184221023e-07, + "loss": 0.7230484485626221, + "step": 5664 + }, + { + "epoch": 1.305299539170507, + "grad_norm": 1.1557586988240278, + "learning_rate": 5.906430888557556e-07, + "loss": 0.753510594367981, + "step": 5665 + }, + { + "epoch": 1.3055299539170506, + "grad_norm": 1.2167084005991546, + "learning_rate": 5.902955187070229e-07, + "loss": 0.8960593938827515, + "step": 5666 + }, + { + "epoch": 1.3057603686635946, + "grad_norm": 0.9226031223011045, + "learning_rate": 5.899480080263527e-07, + "loss": 0.6865993738174438, + "step": 5667 + }, + { + "epoch": 1.3059907834101383, + "grad_norm": 1.2350884878154553, + "learning_rate": 5.896005568641868e-07, + "loss": 0.7748720645904541, + "step": 5668 + }, + { + "epoch": 1.306221198156682, + "grad_norm": 1.437104451012044, + "learning_rate": 5.892531652709567e-07, + "loss": 0.834233283996582, + "step": 5669 + }, + { + "epoch": 1.3064516129032258, + "grad_norm": 1.2209490689427414, + "learning_rate": 5.889058332970858e-07, + "loss": 0.8398417234420776, + "step": 5670 + }, + { + "epoch": 1.3066820276497695, + "grad_norm": 0.8546573405192346, + "learning_rate": 5.885585609929891e-07, + "loss": 0.6889529228210449, + "step": 5671 + }, + { + "epoch": 1.3069124423963134, + "grad_norm": 1.1935289122089947, + "learning_rate": 5.882113484090725e-07, + "loss": 0.6625782251358032, + "step": 5672 + }, + { + "epoch": 1.3071428571428572, + "grad_norm": 1.2286244905882078, + "learning_rate": 5.878641955957334e-07, + "loss": 0.7774407267570496, + "step": 5673 + }, + { + "epoch": 1.307373271889401, + "grad_norm": 1.066003573867245, + "learning_rate": 5.875171026033608e-07, + "loss": 0.7799595594406128, + "step": 5674 + }, + { + "epoch": 1.3076036866359446, + "grad_norm": 1.2859461118878832, + "learning_rate": 5.87170069482335e-07, + "loss": 0.800041913986206, + "step": 5675 + }, + { + "epoch": 1.3078341013824883, + "grad_norm": 1.2986825545894243, + "learning_rate": 5.868230962830265e-07, + "loss": 0.7478667497634888, + "step": 5676 + }, + { + "epoch": 1.3080645161290323, + "grad_norm": 0.9705514903251621, + "learning_rate": 5.86476183055799e-07, + "loss": 0.7538981437683105, + "step": 5677 + }, + { + "epoch": 1.308294930875576, + "grad_norm": 1.4195819337110585, + "learning_rate": 5.861293298510061e-07, + "loss": 0.7556810975074768, + "step": 5678 + }, + { + "epoch": 1.3085253456221198, + "grad_norm": 0.9225289666667563, + "learning_rate": 5.85782536718993e-07, + "loss": 0.670037031173706, + "step": 5679 + }, + { + "epoch": 1.3087557603686637, + "grad_norm": 1.1667524105558311, + "learning_rate": 5.854358037100964e-07, + "loss": 0.6238662600517273, + "step": 5680 + }, + { + "epoch": 1.3089861751152074, + "grad_norm": 1.1817165911107195, + "learning_rate": 5.85089130874644e-07, + "loss": 0.7972823977470398, + "step": 5681 + }, + { + "epoch": 1.3092165898617512, + "grad_norm": 1.0746427307389195, + "learning_rate": 5.847425182629549e-07, + "loss": 0.7332338094711304, + "step": 5682 + }, + { + "epoch": 1.3094470046082949, + "grad_norm": 1.2496997052714673, + "learning_rate": 5.843959659253398e-07, + "loss": 0.8186966180801392, + "step": 5683 + }, + { + "epoch": 1.3096774193548386, + "grad_norm": 1.2708999919485935, + "learning_rate": 5.840494739120996e-07, + "loss": 0.8207032680511475, + "step": 5684 + }, + { + "epoch": 1.3099078341013826, + "grad_norm": 1.4960688490449285, + "learning_rate": 5.83703042273528e-07, + "loss": 0.848265528678894, + "step": 5685 + }, + { + "epoch": 1.3101382488479263, + "grad_norm": 1.0212687278019523, + "learning_rate": 5.833566710599088e-07, + "loss": 0.7766404151916504, + "step": 5686 + }, + { + "epoch": 1.31036866359447, + "grad_norm": 1.2185059104564926, + "learning_rate": 5.830103603215168e-07, + "loss": 0.7570784687995911, + "step": 5687 + }, + { + "epoch": 1.3105990783410137, + "grad_norm": 1.1006353524996257, + "learning_rate": 5.826641101086194e-07, + "loss": 0.7551493644714355, + "step": 5688 + }, + { + "epoch": 1.3108294930875575, + "grad_norm": 1.3664942507199704, + "learning_rate": 5.823179204714739e-07, + "loss": 0.8589804172515869, + "step": 5689 + }, + { + "epoch": 1.3110599078341014, + "grad_norm": 1.2869604696659869, + "learning_rate": 5.819717914603288e-07, + "loss": 0.8252761960029602, + "step": 5690 + }, + { + "epoch": 1.3112903225806452, + "grad_norm": 1.0886628872971145, + "learning_rate": 5.816257231254254e-07, + "loss": 0.7784370183944702, + "step": 5691 + }, + { + "epoch": 1.3115207373271889, + "grad_norm": 1.1343775846575583, + "learning_rate": 5.812797155169942e-07, + "loss": 0.8040215969085693, + "step": 5692 + }, + { + "epoch": 1.3117511520737328, + "grad_norm": 1.013609351306971, + "learning_rate": 5.809337686852582e-07, + "loss": 0.8355100154876709, + "step": 5693 + }, + { + "epoch": 1.3119815668202766, + "grad_norm": 1.466649672488184, + "learning_rate": 5.805878826804303e-07, + "loss": 0.8233312368392944, + "step": 5694 + }, + { + "epoch": 1.3122119815668203, + "grad_norm": 1.1563119764352225, + "learning_rate": 5.802420575527165e-07, + "loss": 0.7756507992744446, + "step": 5695 + }, + { + "epoch": 1.312442396313364, + "grad_norm": 1.1867005828091945, + "learning_rate": 5.798962933523124e-07, + "loss": 0.7503829002380371, + "step": 5696 + }, + { + "epoch": 1.3126728110599077, + "grad_norm": 1.506327103479739, + "learning_rate": 5.795505901294051e-07, + "loss": 0.749663770198822, + "step": 5697 + }, + { + "epoch": 1.3129032258064517, + "grad_norm": 1.440884605575443, + "learning_rate": 5.792049479341732e-07, + "loss": 0.9003115296363831, + "step": 5698 + }, + { + "epoch": 1.3131336405529954, + "grad_norm": 1.059615932759845, + "learning_rate": 5.788593668167854e-07, + "loss": 0.655732274055481, + "step": 5699 + }, + { + "epoch": 1.3133640552995391, + "grad_norm": 0.9900775273356892, + "learning_rate": 5.785138468274036e-07, + "loss": 0.7318822145462036, + "step": 5700 + }, + { + "epoch": 1.3135944700460829, + "grad_norm": 0.9099775921199348, + "learning_rate": 5.781683880161788e-07, + "loss": 0.6512752771377563, + "step": 5701 + }, + { + "epoch": 1.3138248847926266, + "grad_norm": 1.1289875219473309, + "learning_rate": 5.778229904332537e-07, + "loss": 0.7232785820960999, + "step": 5702 + }, + { + "epoch": 1.3140552995391706, + "grad_norm": 1.2645196269426846, + "learning_rate": 5.77477654128763e-07, + "loss": 0.837032675743103, + "step": 5703 + }, + { + "epoch": 1.3142857142857143, + "grad_norm": 1.4984544841183642, + "learning_rate": 5.771323791528315e-07, + "loss": 0.926714301109314, + "step": 5704 + }, + { + "epoch": 1.314516129032258, + "grad_norm": 1.1221666474084682, + "learning_rate": 5.76787165555575e-07, + "loss": 0.7228986620903015, + "step": 5705 + }, + { + "epoch": 1.314746543778802, + "grad_norm": 1.3618848390091767, + "learning_rate": 5.764420133871015e-07, + "loss": 0.8330450057983398, + "step": 5706 + }, + { + "epoch": 1.3149769585253457, + "grad_norm": 1.2680150111326054, + "learning_rate": 5.760969226975088e-07, + "loss": 0.793700098991394, + "step": 5707 + }, + { + "epoch": 1.3152073732718894, + "grad_norm": 1.2897950240071954, + "learning_rate": 5.757518935368868e-07, + "loss": 0.8797321319580078, + "step": 5708 + }, + { + "epoch": 1.3154377880184331, + "grad_norm": 1.1147531221594877, + "learning_rate": 5.754069259553159e-07, + "loss": 0.8772039413452148, + "step": 5709 + }, + { + "epoch": 1.3156682027649769, + "grad_norm": 0.820739065285044, + "learning_rate": 5.750620200028672e-07, + "loss": 0.5998358726501465, + "step": 5710 + }, + { + "epoch": 1.3158986175115208, + "grad_norm": 1.7932534766511148, + "learning_rate": 5.747171757296041e-07, + "loss": 0.7694767713546753, + "step": 5711 + }, + { + "epoch": 1.3161290322580645, + "grad_norm": 1.2782062967169578, + "learning_rate": 5.7437239318558e-07, + "loss": 0.8526760339736938, + "step": 5712 + }, + { + "epoch": 1.3163594470046083, + "grad_norm": 1.199230266468518, + "learning_rate": 5.740276724208396e-07, + "loss": 0.8407987356185913, + "step": 5713 + }, + { + "epoch": 1.316589861751152, + "grad_norm": 1.289466266523787, + "learning_rate": 5.736830134854183e-07, + "loss": 0.9731476306915283, + "step": 5714 + }, + { + "epoch": 1.3168202764976957, + "grad_norm": 1.134122607422213, + "learning_rate": 5.733384164293434e-07, + "loss": 0.7230468988418579, + "step": 5715 + }, + { + "epoch": 1.3170506912442397, + "grad_norm": 1.2031868742095575, + "learning_rate": 5.729938813026327e-07, + "loss": 0.8260238766670227, + "step": 5716 + }, + { + "epoch": 1.3172811059907834, + "grad_norm": 1.0909604007760305, + "learning_rate": 5.726494081552948e-07, + "loss": 0.7616437673568726, + "step": 5717 + }, + { + "epoch": 1.3175115207373271, + "grad_norm": 1.1614064666034054, + "learning_rate": 5.723049970373295e-07, + "loss": 0.7628509998321533, + "step": 5718 + }, + { + "epoch": 1.317741935483871, + "grad_norm": 1.2522299219195512, + "learning_rate": 5.719606479987273e-07, + "loss": 0.744842529296875, + "step": 5719 + }, + { + "epoch": 1.3179723502304148, + "grad_norm": 0.9975745357037148, + "learning_rate": 5.716163610894708e-07, + "loss": 0.7228065133094788, + "step": 5720 + }, + { + "epoch": 1.3182027649769585, + "grad_norm": 1.5461378865588107, + "learning_rate": 5.712721363595325e-07, + "loss": 0.8764907121658325, + "step": 5721 + }, + { + "epoch": 1.3184331797235023, + "grad_norm": 1.0737882176659082, + "learning_rate": 5.709279738588757e-07, + "loss": 0.7966248393058777, + "step": 5722 + }, + { + "epoch": 1.318663594470046, + "grad_norm": 1.4239755183906653, + "learning_rate": 5.705838736374558e-07, + "loss": 0.8983157873153687, + "step": 5723 + }, + { + "epoch": 1.31889400921659, + "grad_norm": 1.1693207378088453, + "learning_rate": 5.70239835745218e-07, + "loss": 0.7349347472190857, + "step": 5724 + }, + { + "epoch": 1.3191244239631337, + "grad_norm": 1.4511397115268243, + "learning_rate": 5.698958602320988e-07, + "loss": 0.9297066926956177, + "step": 5725 + }, + { + "epoch": 1.3193548387096774, + "grad_norm": 1.0721204261694746, + "learning_rate": 5.695519471480266e-07, + "loss": 0.7106038331985474, + "step": 5726 + }, + { + "epoch": 1.3195852534562211, + "grad_norm": 1.3074916303787611, + "learning_rate": 5.692080965429193e-07, + "loss": 0.8759022951126099, + "step": 5727 + }, + { + "epoch": 1.3198156682027649, + "grad_norm": 1.2039841953988952, + "learning_rate": 5.688643084666862e-07, + "loss": 0.8337300419807434, + "step": 5728 + }, + { + "epoch": 1.3200460829493088, + "grad_norm": 1.2975435530580146, + "learning_rate": 5.685205829692283e-07, + "loss": 0.8543391227722168, + "step": 5729 + }, + { + "epoch": 1.3202764976958525, + "grad_norm": 0.9960252179140261, + "learning_rate": 5.681769201004366e-07, + "loss": 0.7497329711914062, + "step": 5730 + }, + { + "epoch": 1.3205069124423963, + "grad_norm": 1.0615580947761494, + "learning_rate": 5.678333199101929e-07, + "loss": 0.8190964460372925, + "step": 5731 + }, + { + "epoch": 1.3207373271889402, + "grad_norm": 1.1486652227224357, + "learning_rate": 5.674897824483711e-07, + "loss": 0.8233011960983276, + "step": 5732 + }, + { + "epoch": 1.320967741935484, + "grad_norm": 1.2086113696285639, + "learning_rate": 5.671463077648348e-07, + "loss": 0.75257408618927, + "step": 5733 + }, + { + "epoch": 1.3211981566820277, + "grad_norm": 1.0357997575051858, + "learning_rate": 5.668028959094386e-07, + "loss": 0.6468796133995056, + "step": 5734 + }, + { + "epoch": 1.3214285714285714, + "grad_norm": 0.869693175338726, + "learning_rate": 5.664595469320288e-07, + "loss": 0.6756174564361572, + "step": 5735 + }, + { + "epoch": 1.3216589861751151, + "grad_norm": 1.2928038093451135, + "learning_rate": 5.661162608824419e-07, + "loss": 0.9040344953536987, + "step": 5736 + }, + { + "epoch": 1.321889400921659, + "grad_norm": 1.013287726627938, + "learning_rate": 5.657730378105055e-07, + "loss": 0.8082150816917419, + "step": 5737 + }, + { + "epoch": 1.3221198156682028, + "grad_norm": 1.2602760490074278, + "learning_rate": 5.654298777660375e-07, + "loss": 0.8760210275650024, + "step": 5738 + }, + { + "epoch": 1.3223502304147465, + "grad_norm": 1.4464070872810626, + "learning_rate": 5.650867807988473e-07, + "loss": 0.6980990171432495, + "step": 5739 + }, + { + "epoch": 1.3225806451612903, + "grad_norm": 0.927469939331727, + "learning_rate": 5.647437469587355e-07, + "loss": 0.6552839279174805, + "step": 5740 + }, + { + "epoch": 1.322811059907834, + "grad_norm": 0.9934566913252004, + "learning_rate": 5.644007762954925e-07, + "loss": 0.8304816484451294, + "step": 5741 + }, + { + "epoch": 1.323041474654378, + "grad_norm": 1.1691146043820817, + "learning_rate": 5.640578688589e-07, + "loss": 0.7977567315101624, + "step": 5742 + }, + { + "epoch": 1.3232718894009217, + "grad_norm": 1.4376891352576404, + "learning_rate": 5.637150246987308e-07, + "loss": 0.7656992673873901, + "step": 5743 + }, + { + "epoch": 1.3235023041474654, + "grad_norm": 1.1120822444951537, + "learning_rate": 5.633722438647483e-07, + "loss": 0.921256422996521, + "step": 5744 + }, + { + "epoch": 1.3237327188940093, + "grad_norm": 1.2718785752085355, + "learning_rate": 5.630295264067063e-07, + "loss": 0.8012785315513611, + "step": 5745 + }, + { + "epoch": 1.323963133640553, + "grad_norm": 1.2403067439539972, + "learning_rate": 5.626868723743504e-07, + "loss": 0.613241970539093, + "step": 5746 + }, + { + "epoch": 1.3241935483870968, + "grad_norm": 1.34086331204533, + "learning_rate": 5.623442818174161e-07, + "loss": 0.7134846448898315, + "step": 5747 + }, + { + "epoch": 1.3244239631336405, + "grad_norm": 1.3127547947642921, + "learning_rate": 5.620017547856295e-07, + "loss": 0.8963242173194885, + "step": 5748 + }, + { + "epoch": 1.3246543778801843, + "grad_norm": 1.3476788930677732, + "learning_rate": 5.616592913287087e-07, + "loss": 0.8401378393173218, + "step": 5749 + }, + { + "epoch": 1.3248847926267282, + "grad_norm": 1.0346861015576712, + "learning_rate": 5.613168914963615e-07, + "loss": 0.6455308198928833, + "step": 5750 + }, + { + "epoch": 1.325115207373272, + "grad_norm": 1.105933895384034, + "learning_rate": 5.609745553382863e-07, + "loss": 0.6920031905174255, + "step": 5751 + }, + { + "epoch": 1.3253456221198157, + "grad_norm": 1.1001754091297298, + "learning_rate": 5.606322829041737e-07, + "loss": 0.9099706411361694, + "step": 5752 + }, + { + "epoch": 1.3255760368663594, + "grad_norm": 1.3286482905641974, + "learning_rate": 5.602900742437036e-07, + "loss": 0.8034265637397766, + "step": 5753 + }, + { + "epoch": 1.3258064516129031, + "grad_norm": 0.9956708814709011, + "learning_rate": 5.599479294065471e-07, + "loss": 0.7216918468475342, + "step": 5754 + }, + { + "epoch": 1.326036866359447, + "grad_norm": 1.1406371859334326, + "learning_rate": 5.596058484423655e-07, + "loss": 0.7428277730941772, + "step": 5755 + }, + { + "epoch": 1.3262672811059908, + "grad_norm": 1.3052741120899958, + "learning_rate": 5.592638314008127e-07, + "loss": 0.7636011838912964, + "step": 5756 + }, + { + "epoch": 1.3264976958525345, + "grad_norm": 1.3474656843000283, + "learning_rate": 5.589218783315311e-07, + "loss": 0.7765215635299683, + "step": 5757 + }, + { + "epoch": 1.3267281105990785, + "grad_norm": 1.0612768168901736, + "learning_rate": 5.585799892841551e-07, + "loss": 0.6524033546447754, + "step": 5758 + }, + { + "epoch": 1.3269585253456222, + "grad_norm": 1.134076107561282, + "learning_rate": 5.582381643083087e-07, + "loss": 0.8105186223983765, + "step": 5759 + }, + { + "epoch": 1.327188940092166, + "grad_norm": 1.2647095323418043, + "learning_rate": 5.578964034536084e-07, + "loss": 0.7654449939727783, + "step": 5760 + }, + { + "epoch": 1.3274193548387097, + "grad_norm": 1.0086580295069412, + "learning_rate": 5.5755470676966e-07, + "loss": 0.6545592546463013, + "step": 5761 + }, + { + "epoch": 1.3276497695852534, + "grad_norm": 1.1744400728961766, + "learning_rate": 5.572130743060597e-07, + "loss": 0.7116275429725647, + "step": 5762 + }, + { + "epoch": 1.3278801843317973, + "grad_norm": 1.246651079531453, + "learning_rate": 5.568715061123959e-07, + "loss": 0.8396822214126587, + "step": 5763 + }, + { + "epoch": 1.328110599078341, + "grad_norm": 1.2492632037634621, + "learning_rate": 5.565300022382464e-07, + "loss": 0.6729685664176941, + "step": 5764 + }, + { + "epoch": 1.3283410138248848, + "grad_norm": 1.1356361065972511, + "learning_rate": 5.561885627331795e-07, + "loss": 0.6891340017318726, + "step": 5765 + }, + { + "epoch": 1.3285714285714285, + "grad_norm": 1.1361488307123824, + "learning_rate": 5.558471876467556e-07, + "loss": 0.7232956886291504, + "step": 5766 + }, + { + "epoch": 1.3288018433179722, + "grad_norm": 1.3213229777584583, + "learning_rate": 5.555058770285246e-07, + "loss": 0.7800660133361816, + "step": 5767 + }, + { + "epoch": 1.3290322580645162, + "grad_norm": 1.106817082140827, + "learning_rate": 5.551646309280266e-07, + "loss": 0.6794005036354065, + "step": 5768 + }, + { + "epoch": 1.32926267281106, + "grad_norm": 1.223898149625345, + "learning_rate": 5.548234493947939e-07, + "loss": 0.7739551067352295, + "step": 5769 + }, + { + "epoch": 1.3294930875576036, + "grad_norm": 1.0605861943491384, + "learning_rate": 5.544823324783482e-07, + "loss": 0.759978711605072, + "step": 5770 + }, + { + "epoch": 1.3297235023041476, + "grad_norm": 0.9593168779270222, + "learning_rate": 5.541412802282017e-07, + "loss": 0.7563333511352539, + "step": 5771 + }, + { + "epoch": 1.3299539170506913, + "grad_norm": 1.2126531853296405, + "learning_rate": 5.538002926938587e-07, + "loss": 0.6705852746963501, + "step": 5772 + }, + { + "epoch": 1.330184331797235, + "grad_norm": 1.4207541622240418, + "learning_rate": 5.534593699248124e-07, + "loss": 0.8343281745910645, + "step": 5773 + }, + { + "epoch": 1.3304147465437788, + "grad_norm": 1.4106880574063376, + "learning_rate": 5.531185119705474e-07, + "loss": 0.7158486843109131, + "step": 5774 + }, + { + "epoch": 1.3306451612903225, + "grad_norm": 1.5132468140839932, + "learning_rate": 5.527777188805385e-07, + "loss": 0.8888766765594482, + "step": 5775 + }, + { + "epoch": 1.3308755760368665, + "grad_norm": 1.0939731159249404, + "learning_rate": 5.524369907042519e-07, + "loss": 0.873813271522522, + "step": 5776 + }, + { + "epoch": 1.3311059907834102, + "grad_norm": 1.1685988919933143, + "learning_rate": 5.520963274911437e-07, + "loss": 0.7654919624328613, + "step": 5777 + }, + { + "epoch": 1.331336405529954, + "grad_norm": 0.8790821291361985, + "learning_rate": 5.517557292906606e-07, + "loss": 0.6976190805435181, + "step": 5778 + }, + { + "epoch": 1.3315668202764976, + "grad_norm": 1.0920428500423505, + "learning_rate": 5.5141519615224e-07, + "loss": 0.8356388807296753, + "step": 5779 + }, + { + "epoch": 1.3317972350230414, + "grad_norm": 1.1881219631842852, + "learning_rate": 5.510747281253094e-07, + "loss": 0.719998836517334, + "step": 5780 + }, + { + "epoch": 1.3320276497695853, + "grad_norm": 1.4093262324111957, + "learning_rate": 5.507343252592882e-07, + "loss": 0.8432124853134155, + "step": 5781 + }, + { + "epoch": 1.332258064516129, + "grad_norm": 1.2484869478133402, + "learning_rate": 5.503939876035845e-07, + "loss": 0.8426402807235718, + "step": 5782 + }, + { + "epoch": 1.3324884792626728, + "grad_norm": 1.1843136616988323, + "learning_rate": 5.500537152075986e-07, + "loss": 0.8133292198181152, + "step": 5783 + }, + { + "epoch": 1.3327188940092167, + "grad_norm": 1.2203561788081314, + "learning_rate": 5.497135081207205e-07, + "loss": 0.8097467422485352, + "step": 5784 + }, + { + "epoch": 1.3329493087557602, + "grad_norm": 0.9964838781032684, + "learning_rate": 5.493733663923299e-07, + "loss": 0.6943382024765015, + "step": 5785 + }, + { + "epoch": 1.3331797235023042, + "grad_norm": 0.8958647048569846, + "learning_rate": 5.490332900717993e-07, + "loss": 0.5896245837211609, + "step": 5786 + }, + { + "epoch": 1.333410138248848, + "grad_norm": 1.2066217319918868, + "learning_rate": 5.486932792084895e-07, + "loss": 0.6837725639343262, + "step": 5787 + }, + { + "epoch": 1.3336405529953916, + "grad_norm": 1.3459237431541746, + "learning_rate": 5.483533338517523e-07, + "loss": 0.8371915221214294, + "step": 5788 + }, + { + "epoch": 1.3338709677419356, + "grad_norm": 1.1649657355507903, + "learning_rate": 5.480134540509313e-07, + "loss": 0.8001077175140381, + "step": 5789 + }, + { + "epoch": 1.3341013824884793, + "grad_norm": 1.4458040399946648, + "learning_rate": 5.476736398553591e-07, + "loss": 0.9070717096328735, + "step": 5790 + }, + { + "epoch": 1.334331797235023, + "grad_norm": 1.256932465914866, + "learning_rate": 5.473338913143589e-07, + "loss": 0.9061849117279053, + "step": 5791 + }, + { + "epoch": 1.3345622119815668, + "grad_norm": 1.2993559451282939, + "learning_rate": 5.469942084772454e-07, + "loss": 0.8465786576271057, + "step": 5792 + }, + { + "epoch": 1.3347926267281105, + "grad_norm": 1.2333173266054418, + "learning_rate": 5.466545913933229e-07, + "loss": 0.8221259117126465, + "step": 5793 + }, + { + "epoch": 1.3350230414746544, + "grad_norm": 1.1214356414954587, + "learning_rate": 5.463150401118864e-07, + "loss": 0.594088077545166, + "step": 5794 + }, + { + "epoch": 1.3352534562211982, + "grad_norm": 1.0900215939620008, + "learning_rate": 5.459755546822207e-07, + "loss": 0.6983529925346375, + "step": 5795 + }, + { + "epoch": 1.335483870967742, + "grad_norm": 1.3561793320800521, + "learning_rate": 5.456361351536027e-07, + "loss": 0.7720709443092346, + "step": 5796 + }, + { + "epoch": 1.3357142857142856, + "grad_norm": 1.1798730390657586, + "learning_rate": 5.45296781575298e-07, + "loss": 0.8087977766990662, + "step": 5797 + }, + { + "epoch": 1.3359447004608294, + "grad_norm": 1.123982224882924, + "learning_rate": 5.449574939965636e-07, + "loss": 0.6808000802993774, + "step": 5798 + }, + { + "epoch": 1.3361751152073733, + "grad_norm": 1.0634688756756818, + "learning_rate": 5.446182724666466e-07, + "loss": 0.7222881317138672, + "step": 5799 + }, + { + "epoch": 1.336405529953917, + "grad_norm": 1.1919279054292256, + "learning_rate": 5.44279117034784e-07, + "loss": 0.872687578201294, + "step": 5800 + }, + { + "epoch": 1.3366359447004608, + "grad_norm": 1.3648460603559815, + "learning_rate": 5.439400277502048e-07, + "loss": 0.7728114128112793, + "step": 5801 + }, + { + "epoch": 1.3368663594470047, + "grad_norm": 1.0445795610107291, + "learning_rate": 5.436010046621267e-07, + "loss": 0.807528018951416, + "step": 5802 + }, + { + "epoch": 1.3370967741935484, + "grad_norm": 1.151575262421327, + "learning_rate": 5.432620478197583e-07, + "loss": 0.6997063159942627, + "step": 5803 + }, + { + "epoch": 1.3373271889400922, + "grad_norm": 1.309125931504039, + "learning_rate": 5.429231572722995e-07, + "loss": 0.797568678855896, + "step": 5804 + }, + { + "epoch": 1.337557603686636, + "grad_norm": 1.0057472643211554, + "learning_rate": 5.425843330689386e-07, + "loss": 0.6412359476089478, + "step": 5805 + }, + { + "epoch": 1.3377880184331796, + "grad_norm": 1.1290308654729904, + "learning_rate": 5.422455752588569e-07, + "loss": 0.8605507612228394, + "step": 5806 + }, + { + "epoch": 1.3380184331797236, + "grad_norm": 1.0459165137008808, + "learning_rate": 5.419068838912238e-07, + "loss": 0.856192946434021, + "step": 5807 + }, + { + "epoch": 1.3382488479262673, + "grad_norm": 1.1354202830657318, + "learning_rate": 5.415682590151998e-07, + "loss": 0.8614650368690491, + "step": 5808 + }, + { + "epoch": 1.338479262672811, + "grad_norm": 1.6619444336877072, + "learning_rate": 5.412297006799365e-07, + "loss": 0.9675840139389038, + "step": 5809 + }, + { + "epoch": 1.3387096774193548, + "grad_norm": 1.0659647985310448, + "learning_rate": 5.408912089345747e-07, + "loss": 0.7333405017852783, + "step": 5810 + }, + { + "epoch": 1.3389400921658985, + "grad_norm": 1.3540380425785927, + "learning_rate": 5.405527838282457e-07, + "loss": 0.8271909952163696, + "step": 5811 + }, + { + "epoch": 1.3391705069124424, + "grad_norm": 1.3562249096622705, + "learning_rate": 5.402144254100724e-07, + "loss": 0.8036069869995117, + "step": 5812 + }, + { + "epoch": 1.3394009216589862, + "grad_norm": 1.3975514954701582, + "learning_rate": 5.398761337291667e-07, + "loss": 0.855912446975708, + "step": 5813 + }, + { + "epoch": 1.33963133640553, + "grad_norm": 1.2830545749549949, + "learning_rate": 5.395379088346309e-07, + "loss": 0.8198536038398743, + "step": 5814 + }, + { + "epoch": 1.3398617511520738, + "grad_norm": 1.2130249913700057, + "learning_rate": 5.391997507755581e-07, + "loss": 0.8931646347045898, + "step": 5815 + }, + { + "epoch": 1.3400921658986176, + "grad_norm": 0.9981529734228639, + "learning_rate": 5.388616596010312e-07, + "loss": 0.7073954343795776, + "step": 5816 + }, + { + "epoch": 1.3403225806451613, + "grad_norm": 1.2450035085912274, + "learning_rate": 5.385236353601241e-07, + "loss": 0.7758424282073975, + "step": 5817 + }, + { + "epoch": 1.340552995391705, + "grad_norm": 1.1707291956273551, + "learning_rate": 5.381856781019005e-07, + "loss": 0.6805497407913208, + "step": 5818 + }, + { + "epoch": 1.3407834101382488, + "grad_norm": 1.251540768140409, + "learning_rate": 5.378477878754144e-07, + "loss": 0.8956538438796997, + "step": 5819 + }, + { + "epoch": 1.3410138248847927, + "grad_norm": 1.0594639846580987, + "learning_rate": 5.375099647297096e-07, + "loss": 0.7819657921791077, + "step": 5820 + }, + { + "epoch": 1.3412442396313364, + "grad_norm": 1.0523114055014655, + "learning_rate": 5.371722087138217e-07, + "loss": 0.5764007568359375, + "step": 5821 + }, + { + "epoch": 1.3414746543778802, + "grad_norm": 1.1661401559135987, + "learning_rate": 5.368345198767749e-07, + "loss": 0.697022557258606, + "step": 5822 + }, + { + "epoch": 1.3417050691244239, + "grad_norm": 1.3065346909259405, + "learning_rate": 5.364968982675839e-07, + "loss": 0.7773014307022095, + "step": 5823 + }, + { + "epoch": 1.3419354838709676, + "grad_norm": 1.3340944249973796, + "learning_rate": 5.361593439352551e-07, + "loss": 0.7395004034042358, + "step": 5824 + }, + { + "epoch": 1.3421658986175116, + "grad_norm": 1.0762295080363014, + "learning_rate": 5.358218569287834e-07, + "loss": 0.7989716529846191, + "step": 5825 + }, + { + "epoch": 1.3423963133640553, + "grad_norm": 1.280549478612159, + "learning_rate": 5.354844372971543e-07, + "loss": 0.8894884586334229, + "step": 5826 + }, + { + "epoch": 1.342626728110599, + "grad_norm": 1.5586577225053506, + "learning_rate": 5.351470850893446e-07, + "loss": 0.8415021300315857, + "step": 5827 + }, + { + "epoch": 1.342857142857143, + "grad_norm": 1.4272313895508615, + "learning_rate": 5.3480980035432e-07, + "loss": 0.9963078498840332, + "step": 5828 + }, + { + "epoch": 1.3430875576036867, + "grad_norm": 1.1680739887228044, + "learning_rate": 5.344725831410368e-07, + "loss": 0.8489943742752075, + "step": 5829 + }, + { + "epoch": 1.3433179723502304, + "grad_norm": 0.9897462108554296, + "learning_rate": 5.341354334984422e-07, + "loss": 0.6949954032897949, + "step": 5830 + }, + { + "epoch": 1.3435483870967742, + "grad_norm": 1.1225897948987795, + "learning_rate": 5.337983514754722e-07, + "loss": 0.878408670425415, + "step": 5831 + }, + { + "epoch": 1.3437788018433179, + "grad_norm": 1.2067617593706235, + "learning_rate": 5.334613371210549e-07, + "loss": 0.722877025604248, + "step": 5832 + }, + { + "epoch": 1.3440092165898618, + "grad_norm": 1.04123853110292, + "learning_rate": 5.331243904841068e-07, + "loss": 0.670013427734375, + "step": 5833 + }, + { + "epoch": 1.3442396313364056, + "grad_norm": 1.0789084686611892, + "learning_rate": 5.327875116135354e-07, + "loss": 0.8336968421936035, + "step": 5834 + }, + { + "epoch": 1.3444700460829493, + "grad_norm": 1.2348261826059375, + "learning_rate": 5.324507005582381e-07, + "loss": 0.7917020916938782, + "step": 5835 + }, + { + "epoch": 1.344700460829493, + "grad_norm": 1.288528901659057, + "learning_rate": 5.321139573671024e-07, + "loss": 0.7479217052459717, + "step": 5836 + }, + { + "epoch": 1.3449308755760367, + "grad_norm": 1.206901718846971, + "learning_rate": 5.317772820890068e-07, + "loss": 0.8059084415435791, + "step": 5837 + }, + { + "epoch": 1.3451612903225807, + "grad_norm": 1.0687058344207596, + "learning_rate": 5.314406747728186e-07, + "loss": 0.6853187680244446, + "step": 5838 + }, + { + "epoch": 1.3453917050691244, + "grad_norm": 1.2007310227541288, + "learning_rate": 5.311041354673964e-07, + "loss": 0.7769491672515869, + "step": 5839 + }, + { + "epoch": 1.3456221198156681, + "grad_norm": 1.007121872066712, + "learning_rate": 5.307676642215877e-07, + "loss": 0.6669384241104126, + "step": 5840 + }, + { + "epoch": 1.345852534562212, + "grad_norm": 1.091111253411437, + "learning_rate": 5.304312610842319e-07, + "loss": 0.7884945869445801, + "step": 5841 + }, + { + "epoch": 1.3460829493087558, + "grad_norm": 1.2799296704263758, + "learning_rate": 5.300949261041567e-07, + "loss": 0.8030047416687012, + "step": 5842 + }, + { + "epoch": 1.3463133640552996, + "grad_norm": 1.293856241707333, + "learning_rate": 5.297586593301806e-07, + "loss": 0.7792675495147705, + "step": 5843 + }, + { + "epoch": 1.3465437788018433, + "grad_norm": 1.450964712660266, + "learning_rate": 5.29422460811113e-07, + "loss": 0.8699119091033936, + "step": 5844 + }, + { + "epoch": 1.346774193548387, + "grad_norm": 1.1164478098944863, + "learning_rate": 5.290863305957523e-07, + "loss": 0.8075394630432129, + "step": 5845 + }, + { + "epoch": 1.347004608294931, + "grad_norm": 1.2025668698948455, + "learning_rate": 5.287502687328868e-07, + "loss": 0.7875077128410339, + "step": 5846 + }, + { + "epoch": 1.3472350230414747, + "grad_norm": 1.2743475952279586, + "learning_rate": 5.284142752712965e-07, + "loss": 0.6799413561820984, + "step": 5847 + }, + { + "epoch": 1.3474654377880184, + "grad_norm": 1.3570475044053845, + "learning_rate": 5.280783502597496e-07, + "loss": 0.914801299571991, + "step": 5848 + }, + { + "epoch": 1.3476958525345621, + "grad_norm": 1.4096481978785727, + "learning_rate": 5.277424937470052e-07, + "loss": 0.8591992855072021, + "step": 5849 + }, + { + "epoch": 1.3479262672811059, + "grad_norm": 1.1971358109064123, + "learning_rate": 5.27406705781813e-07, + "loss": 0.7830478549003601, + "step": 5850 + }, + { + "epoch": 1.3481566820276498, + "grad_norm": 1.397466179292115, + "learning_rate": 5.270709864129119e-07, + "loss": 0.8365499973297119, + "step": 5851 + }, + { + "epoch": 1.3483870967741935, + "grad_norm": 1.5417932199175834, + "learning_rate": 5.267353356890305e-07, + "loss": 0.8342669010162354, + "step": 5852 + }, + { + "epoch": 1.3486175115207373, + "grad_norm": 1.0532947941417055, + "learning_rate": 5.263997536588891e-07, + "loss": 0.7802393436431885, + "step": 5853 + }, + { + "epoch": 1.3488479262672812, + "grad_norm": 1.2005511445865484, + "learning_rate": 5.260642403711964e-07, + "loss": 0.8245328068733215, + "step": 5854 + }, + { + "epoch": 1.349078341013825, + "grad_norm": 1.043405656704728, + "learning_rate": 5.257287958746519e-07, + "loss": 0.7209265232086182, + "step": 5855 + }, + { + "epoch": 1.3493087557603687, + "grad_norm": 1.254105643009189, + "learning_rate": 5.253934202179444e-07, + "loss": 0.9258058071136475, + "step": 5856 + }, + { + "epoch": 1.3495391705069124, + "grad_norm": 1.3493584028342165, + "learning_rate": 5.25058113449754e-07, + "loss": 0.6889467835426331, + "step": 5857 + }, + { + "epoch": 1.3497695852534561, + "grad_norm": 1.113027412487739, + "learning_rate": 5.247228756187498e-07, + "loss": 0.8810057640075684, + "step": 5858 + }, + { + "epoch": 1.35, + "grad_norm": 1.140989478824924, + "learning_rate": 5.243877067735909e-07, + "loss": 0.7236393690109253, + "step": 5859 + }, + { + "epoch": 1.3502304147465438, + "grad_norm": 1.1712872152312954, + "learning_rate": 5.240526069629264e-07, + "loss": 0.8287979364395142, + "step": 5860 + }, + { + "epoch": 1.3504608294930875, + "grad_norm": 0.9764543402246563, + "learning_rate": 5.237175762353964e-07, + "loss": 0.8268846869468689, + "step": 5861 + }, + { + "epoch": 1.3506912442396313, + "grad_norm": 1.08770217121451, + "learning_rate": 5.233826146396296e-07, + "loss": 0.7995575666427612, + "step": 5862 + }, + { + "epoch": 1.350921658986175, + "grad_norm": 1.185939350431103, + "learning_rate": 5.230477222242449e-07, + "loss": 0.7379493713378906, + "step": 5863 + }, + { + "epoch": 1.351152073732719, + "grad_norm": 1.1532350043824988, + "learning_rate": 5.227128990378524e-07, + "loss": 0.729906439781189, + "step": 5864 + }, + { + "epoch": 1.3513824884792627, + "grad_norm": 1.3775772205538213, + "learning_rate": 5.223781451290506e-07, + "loss": 0.8356789350509644, + "step": 5865 + }, + { + "epoch": 1.3516129032258064, + "grad_norm": 1.4707388081384496, + "learning_rate": 5.220434605464285e-07, + "loss": 0.8130582571029663, + "step": 5866 + }, + { + "epoch": 1.3518433179723504, + "grad_norm": 1.3840431554185126, + "learning_rate": 5.217088453385658e-07, + "loss": 0.7686447501182556, + "step": 5867 + }, + { + "epoch": 1.352073732718894, + "grad_norm": 1.4824685151456765, + "learning_rate": 5.213742995540309e-07, + "loss": 0.7945844531059265, + "step": 5868 + }, + { + "epoch": 1.3523041474654378, + "grad_norm": 0.9715413572597766, + "learning_rate": 5.210398232413824e-07, + "loss": 0.8082837462425232, + "step": 5869 + }, + { + "epoch": 1.3525345622119815, + "grad_norm": 1.2398246007417328, + "learning_rate": 5.2070541644917e-07, + "loss": 0.7826153039932251, + "step": 5870 + }, + { + "epoch": 1.3527649769585253, + "grad_norm": 1.2471684178108737, + "learning_rate": 5.203710792259318e-07, + "loss": 0.6853276491165161, + "step": 5871 + }, + { + "epoch": 1.3529953917050692, + "grad_norm": 1.2891891865978977, + "learning_rate": 5.200368116201962e-07, + "loss": 0.8354780673980713, + "step": 5872 + }, + { + "epoch": 1.353225806451613, + "grad_norm": 1.1178862343459024, + "learning_rate": 5.197026136804823e-07, + "loss": 0.7857648134231567, + "step": 5873 + }, + { + "epoch": 1.3534562211981567, + "grad_norm": 0.9168225851850988, + "learning_rate": 5.193684854552982e-07, + "loss": 0.663504958152771, + "step": 5874 + }, + { + "epoch": 1.3536866359447004, + "grad_norm": 1.329771615602396, + "learning_rate": 5.190344269931423e-07, + "loss": 0.8192203044891357, + "step": 5875 + }, + { + "epoch": 1.3539170506912441, + "grad_norm": 1.4861685476717017, + "learning_rate": 5.187004383425024e-07, + "loss": 0.801753044128418, + "step": 5876 + }, + { + "epoch": 1.354147465437788, + "grad_norm": 1.3551621393598028, + "learning_rate": 5.183665195518566e-07, + "loss": 0.9427206516265869, + "step": 5877 + }, + { + "epoch": 1.3543778801843318, + "grad_norm": 1.1121835630605517, + "learning_rate": 5.18032670669673e-07, + "loss": 0.7801729440689087, + "step": 5878 + }, + { + "epoch": 1.3546082949308755, + "grad_norm": 1.3936797390586833, + "learning_rate": 5.176988917444094e-07, + "loss": 0.8224533796310425, + "step": 5879 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.9505008459531469, + "learning_rate": 5.173651828245127e-07, + "loss": 0.7800098657608032, + "step": 5880 + }, + { + "epoch": 1.3550691244239632, + "grad_norm": 0.9654380749861797, + "learning_rate": 5.170315439584212e-07, + "loss": 0.7612746953964233, + "step": 5881 + }, + { + "epoch": 1.355299539170507, + "grad_norm": 1.191616140078335, + "learning_rate": 5.166979751945617e-07, + "loss": 0.8027492761611938, + "step": 5882 + }, + { + "epoch": 1.3555299539170507, + "grad_norm": 1.167147993456773, + "learning_rate": 5.163644765813508e-07, + "loss": 0.7509280443191528, + "step": 5883 + }, + { + "epoch": 1.3557603686635944, + "grad_norm": 1.2102231125675782, + "learning_rate": 5.160310481671966e-07, + "loss": 0.7663145661354065, + "step": 5884 + }, + { + "epoch": 1.3559907834101383, + "grad_norm": 1.246862901799125, + "learning_rate": 5.156976900004948e-07, + "loss": 0.7598870396614075, + "step": 5885 + }, + { + "epoch": 1.356221198156682, + "grad_norm": 1.127184650819857, + "learning_rate": 5.153644021296317e-07, + "loss": 0.7923038005828857, + "step": 5886 + }, + { + "epoch": 1.3564516129032258, + "grad_norm": 1.2664053097126295, + "learning_rate": 5.150311846029846e-07, + "loss": 0.8711799383163452, + "step": 5887 + }, + { + "epoch": 1.3566820276497695, + "grad_norm": 1.294570667250746, + "learning_rate": 5.146980374689191e-07, + "loss": 0.7852096557617188, + "step": 5888 + }, + { + "epoch": 1.3569124423963133, + "grad_norm": 1.1426360408928755, + "learning_rate": 5.143649607757905e-07, + "loss": 0.7259876132011414, + "step": 5889 + }, + { + "epoch": 1.3571428571428572, + "grad_norm": 0.9810253925795782, + "learning_rate": 5.140319545719454e-07, + "loss": 0.7612321376800537, + "step": 5890 + }, + { + "epoch": 1.357373271889401, + "grad_norm": 1.2919477789807814, + "learning_rate": 5.136990189057187e-07, + "loss": 0.7881298661231995, + "step": 5891 + }, + { + "epoch": 1.3576036866359447, + "grad_norm": 1.0310706760740191, + "learning_rate": 5.133661538254353e-07, + "loss": 0.6956340074539185, + "step": 5892 + }, + { + "epoch": 1.3578341013824886, + "grad_norm": 1.0277045355993415, + "learning_rate": 5.130333593794107e-07, + "loss": 0.7800698280334473, + "step": 5893 + }, + { + "epoch": 1.3580645161290323, + "grad_norm": 1.0373100274796343, + "learning_rate": 5.127006356159496e-07, + "loss": 0.6920318603515625, + "step": 5894 + }, + { + "epoch": 1.358294930875576, + "grad_norm": 0.9870224446835288, + "learning_rate": 5.123679825833458e-07, + "loss": 0.6972872018814087, + "step": 5895 + }, + { + "epoch": 1.3585253456221198, + "grad_norm": 1.1473583592012562, + "learning_rate": 5.12035400329884e-07, + "loss": 0.8820276260375977, + "step": 5896 + }, + { + "epoch": 1.3587557603686635, + "grad_norm": 1.1566218274104645, + "learning_rate": 5.117028889038375e-07, + "loss": 0.8834109306335449, + "step": 5897 + }, + { + "epoch": 1.3589861751152075, + "grad_norm": 1.1393544418506285, + "learning_rate": 5.113704483534704e-07, + "loss": 0.6981096267700195, + "step": 5898 + }, + { + "epoch": 1.3592165898617512, + "grad_norm": 1.329102048560067, + "learning_rate": 5.11038078727036e-07, + "loss": 0.7617249488830566, + "step": 5899 + }, + { + "epoch": 1.359447004608295, + "grad_norm": 1.7116569149164136, + "learning_rate": 5.107057800727773e-07, + "loss": 0.8373798131942749, + "step": 5900 + }, + { + "epoch": 1.3596774193548387, + "grad_norm": 1.3064563550321244, + "learning_rate": 5.103735524389264e-07, + "loss": 0.7176666855812073, + "step": 5901 + }, + { + "epoch": 1.3599078341013824, + "grad_norm": 0.9003342699900779, + "learning_rate": 5.100413958737067e-07, + "loss": 0.7872966527938843, + "step": 5902 + }, + { + "epoch": 1.3601382488479263, + "grad_norm": 1.1723157653802474, + "learning_rate": 5.097093104253295e-07, + "loss": 0.6668897271156311, + "step": 5903 + }, + { + "epoch": 1.36036866359447, + "grad_norm": 1.2119302484042467, + "learning_rate": 5.093772961419967e-07, + "loss": 0.8413408994674683, + "step": 5904 + }, + { + "epoch": 1.3605990783410138, + "grad_norm": 0.9857990015136971, + "learning_rate": 5.090453530719e-07, + "loss": 0.632825493812561, + "step": 5905 + }, + { + "epoch": 1.3608294930875577, + "grad_norm": 1.2378128322555926, + "learning_rate": 5.087134812632201e-07, + "loss": 0.737346887588501, + "step": 5906 + }, + { + "epoch": 1.3610599078341012, + "grad_norm": 1.2614434601380542, + "learning_rate": 5.083816807641283e-07, + "loss": 1.00008225440979, + "step": 5907 + }, + { + "epoch": 1.3612903225806452, + "grad_norm": 1.2168755174090398, + "learning_rate": 5.08049951622785e-07, + "loss": 0.7844079732894897, + "step": 5908 + }, + { + "epoch": 1.361520737327189, + "grad_norm": 1.0532681425474226, + "learning_rate": 5.077182938873393e-07, + "loss": 0.8615080118179321, + "step": 5909 + }, + { + "epoch": 1.3617511520737327, + "grad_norm": 1.279562028421048, + "learning_rate": 5.073867076059321e-07, + "loss": 0.6930621862411499, + "step": 5910 + }, + { + "epoch": 1.3619815668202766, + "grad_norm": 1.4077453728560791, + "learning_rate": 5.07055192826692e-07, + "loss": 0.7020307183265686, + "step": 5911 + }, + { + "epoch": 1.3622119815668203, + "grad_norm": 1.4403791813866107, + "learning_rate": 5.067237495977379e-07, + "loss": 0.7281042337417603, + "step": 5912 + }, + { + "epoch": 1.362442396313364, + "grad_norm": 1.014203177200115, + "learning_rate": 5.063923779671789e-07, + "loss": 0.8092719316482544, + "step": 5913 + }, + { + "epoch": 1.3626728110599078, + "grad_norm": 1.2597384594296865, + "learning_rate": 5.060610779831125e-07, + "loss": 0.7323317527770996, + "step": 5914 + }, + { + "epoch": 1.3629032258064515, + "grad_norm": 0.9804861205409557, + "learning_rate": 5.05729849693627e-07, + "loss": 0.7370069622993469, + "step": 5915 + }, + { + "epoch": 1.3631336405529955, + "grad_norm": 1.1355071333670705, + "learning_rate": 5.053986931467994e-07, + "loss": 0.7175320386886597, + "step": 5916 + }, + { + "epoch": 1.3633640552995392, + "grad_norm": 1.2834592450306632, + "learning_rate": 5.050676083906964e-07, + "loss": 0.8643501996994019, + "step": 5917 + }, + { + "epoch": 1.363594470046083, + "grad_norm": 1.2479698704612106, + "learning_rate": 5.047365954733752e-07, + "loss": 0.9110950827598572, + "step": 5918 + }, + { + "epoch": 1.3638248847926266, + "grad_norm": 1.6104451195946936, + "learning_rate": 5.044056544428814e-07, + "loss": 0.9242197275161743, + "step": 5919 + }, + { + "epoch": 1.3640552995391704, + "grad_norm": 1.2769108446030992, + "learning_rate": 5.040747853472509e-07, + "loss": 0.9218860864639282, + "step": 5920 + }, + { + "epoch": 1.3642857142857143, + "grad_norm": 1.3302527755174611, + "learning_rate": 5.037439882345084e-07, + "loss": 0.970054030418396, + "step": 5921 + }, + { + "epoch": 1.364516129032258, + "grad_norm": 1.0075132364725619, + "learning_rate": 5.034132631526695e-07, + "loss": 0.7707182168960571, + "step": 5922 + }, + { + "epoch": 1.3647465437788018, + "grad_norm": 1.1036594577594991, + "learning_rate": 5.03082610149738e-07, + "loss": 0.7673811912536621, + "step": 5923 + }, + { + "epoch": 1.3649769585253457, + "grad_norm": 1.2758650519526258, + "learning_rate": 5.027520292737073e-07, + "loss": 0.7387198209762573, + "step": 5924 + }, + { + "epoch": 1.3652073732718895, + "grad_norm": 1.139448521744241, + "learning_rate": 5.024215205725619e-07, + "loss": 0.7803019881248474, + "step": 5925 + }, + { + "epoch": 1.3654377880184332, + "grad_norm": 1.3985269621197394, + "learning_rate": 5.020910840942738e-07, + "loss": 0.8753018379211426, + "step": 5926 + }, + { + "epoch": 1.365668202764977, + "grad_norm": 1.0358625157915384, + "learning_rate": 5.017607198868055e-07, + "loss": 0.7917389869689941, + "step": 5927 + }, + { + "epoch": 1.3658986175115206, + "grad_norm": 1.2995608187995562, + "learning_rate": 5.014304279981095e-07, + "loss": 0.8393691182136536, + "step": 5928 + }, + { + "epoch": 1.3661290322580646, + "grad_norm": 1.2671721961788391, + "learning_rate": 5.011002084761264e-07, + "loss": 0.6635205745697021, + "step": 5929 + }, + { + "epoch": 1.3663594470046083, + "grad_norm": 1.2038857805513816, + "learning_rate": 5.007700613687879e-07, + "loss": 0.7058769464492798, + "step": 5930 + }, + { + "epoch": 1.366589861751152, + "grad_norm": 1.1784688857731938, + "learning_rate": 5.004399867240143e-07, + "loss": 0.841168224811554, + "step": 5931 + }, + { + "epoch": 1.3668202764976958, + "grad_norm": 1.3760327619217738, + "learning_rate": 5.001099845897148e-07, + "loss": 0.7385121583938599, + "step": 5932 + }, + { + "epoch": 1.3670506912442395, + "grad_norm": 1.1633525983686732, + "learning_rate": 4.997800550137897e-07, + "loss": 0.6525158882141113, + "step": 5933 + }, + { + "epoch": 1.3672811059907835, + "grad_norm": 1.2331358286597804, + "learning_rate": 4.994501980441274e-07, + "loss": 0.7838844060897827, + "step": 5934 + }, + { + "epoch": 1.3675115207373272, + "grad_norm": 1.4450953979822279, + "learning_rate": 4.991204137286061e-07, + "loss": 0.8831999897956848, + "step": 5935 + }, + { + "epoch": 1.367741935483871, + "grad_norm": 1.0408031352355525, + "learning_rate": 4.987907021150938e-07, + "loss": 0.8053784966468811, + "step": 5936 + }, + { + "epoch": 1.3679723502304149, + "grad_norm": 1.1356206370071746, + "learning_rate": 4.984610632514475e-07, + "loss": 0.8093301057815552, + "step": 5937 + }, + { + "epoch": 1.3682027649769586, + "grad_norm": 1.0230530705292329, + "learning_rate": 4.981314971855136e-07, + "loss": 0.7609653472900391, + "step": 5938 + }, + { + "epoch": 1.3684331797235023, + "grad_norm": 1.4109994154981755, + "learning_rate": 4.978020039651288e-07, + "loss": 0.7131600379943848, + "step": 5939 + }, + { + "epoch": 1.368663594470046, + "grad_norm": 1.3192550042799691, + "learning_rate": 4.974725836381184e-07, + "loss": 0.6555063724517822, + "step": 5940 + }, + { + "epoch": 1.3688940092165898, + "grad_norm": 1.1278604970222592, + "learning_rate": 4.971432362522968e-07, + "loss": 0.8349519968032837, + "step": 5941 + }, + { + "epoch": 1.3691244239631337, + "grad_norm": 1.2138732932202303, + "learning_rate": 4.968139618554691e-07, + "loss": 0.7335611581802368, + "step": 5942 + }, + { + "epoch": 1.3693548387096774, + "grad_norm": 1.050807913168598, + "learning_rate": 4.964847604954287e-07, + "loss": 0.8349814414978027, + "step": 5943 + }, + { + "epoch": 1.3695852534562212, + "grad_norm": 1.07716704849378, + "learning_rate": 4.961556322199585e-07, + "loss": 0.6816729307174683, + "step": 5944 + }, + { + "epoch": 1.369815668202765, + "grad_norm": 1.5220059571304148, + "learning_rate": 4.958265770768315e-07, + "loss": 0.847672164440155, + "step": 5945 + }, + { + "epoch": 1.3700460829493086, + "grad_norm": 1.267067930725286, + "learning_rate": 4.954975951138095e-07, + "loss": 0.6674519777297974, + "step": 5946 + }, + { + "epoch": 1.3702764976958526, + "grad_norm": 1.0820409905680344, + "learning_rate": 4.951686863786432e-07, + "loss": 0.7836427092552185, + "step": 5947 + }, + { + "epoch": 1.3705069124423963, + "grad_norm": 1.0577780792239002, + "learning_rate": 4.948398509190742e-07, + "loss": 0.640183687210083, + "step": 5948 + }, + { + "epoch": 1.37073732718894, + "grad_norm": 1.223963669470004, + "learning_rate": 4.945110887828322e-07, + "loss": 0.8438451290130615, + "step": 5949 + }, + { + "epoch": 1.370967741935484, + "grad_norm": 1.5483267377377474, + "learning_rate": 4.94182400017636e-07, + "loss": 0.9311714172363281, + "step": 5950 + }, + { + "epoch": 1.3711981566820277, + "grad_norm": 1.2352509732193302, + "learning_rate": 4.938537846711952e-07, + "loss": 0.7332801818847656, + "step": 5951 + }, + { + "epoch": 1.3714285714285714, + "grad_norm": 1.127354832681604, + "learning_rate": 4.935252427912075e-07, + "loss": 0.7189289331436157, + "step": 5952 + }, + { + "epoch": 1.3716589861751152, + "grad_norm": 1.451594181977691, + "learning_rate": 4.9319677442536e-07, + "loss": 0.827372670173645, + "step": 5953 + }, + { + "epoch": 1.371889400921659, + "grad_norm": 1.2273788913776413, + "learning_rate": 4.9286837962133e-07, + "loss": 0.7607625722885132, + "step": 5954 + }, + { + "epoch": 1.3721198156682028, + "grad_norm": 1.1935199245873378, + "learning_rate": 4.925400584267836e-07, + "loss": 0.9420886635780334, + "step": 5955 + }, + { + "epoch": 1.3723502304147466, + "grad_norm": 1.1557325656206936, + "learning_rate": 4.922118108893757e-07, + "loss": 0.7605317831039429, + "step": 5956 + }, + { + "epoch": 1.3725806451612903, + "grad_norm": 1.059494459687004, + "learning_rate": 4.918836370567513e-07, + "loss": 0.8353599309921265, + "step": 5957 + }, + { + "epoch": 1.372811059907834, + "grad_norm": 1.2571100340874592, + "learning_rate": 4.915555369765439e-07, + "loss": 0.8540027141571045, + "step": 5958 + }, + { + "epoch": 1.3730414746543778, + "grad_norm": 1.027809306304352, + "learning_rate": 4.912275106963778e-07, + "loss": 0.6965712308883667, + "step": 5959 + }, + { + "epoch": 1.3732718894009217, + "grad_norm": 1.0356479101830274, + "learning_rate": 4.908995582638648e-07, + "loss": 0.7460787296295166, + "step": 5960 + }, + { + "epoch": 1.3735023041474654, + "grad_norm": 1.1845566109999182, + "learning_rate": 4.905716797266067e-07, + "loss": 0.8652873039245605, + "step": 5961 + }, + { + "epoch": 1.3737327188940092, + "grad_norm": 1.1300176885770365, + "learning_rate": 4.902438751321952e-07, + "loss": 0.7757953405380249, + "step": 5962 + }, + { + "epoch": 1.3739631336405531, + "grad_norm": 1.2945741727860514, + "learning_rate": 4.899161445282102e-07, + "loss": 0.8842452168464661, + "step": 5963 + }, + { + "epoch": 1.3741935483870968, + "grad_norm": 1.1415902309445607, + "learning_rate": 4.895884879622215e-07, + "loss": 0.7259113788604736, + "step": 5964 + }, + { + "epoch": 1.3744239631336406, + "grad_norm": 1.3855842779268248, + "learning_rate": 4.892609054817883e-07, + "loss": 0.8871402144432068, + "step": 5965 + }, + { + "epoch": 1.3746543778801843, + "grad_norm": 1.3262407740428463, + "learning_rate": 4.889333971344586e-07, + "loss": 0.7564518451690674, + "step": 5966 + }, + { + "epoch": 1.374884792626728, + "grad_norm": 1.2010368462649357, + "learning_rate": 4.886059629677692e-07, + "loss": 0.7886015176773071, + "step": 5967 + }, + { + "epoch": 1.375115207373272, + "grad_norm": 1.199947155848343, + "learning_rate": 4.882786030292479e-07, + "loss": 0.8256035447120667, + "step": 5968 + }, + { + "epoch": 1.3753456221198157, + "grad_norm": 1.3084738837241086, + "learning_rate": 4.879513173664099e-07, + "loss": 0.9351227283477783, + "step": 5969 + }, + { + "epoch": 1.3755760368663594, + "grad_norm": 1.1794682657820328, + "learning_rate": 4.876241060267598e-07, + "loss": 0.7221553921699524, + "step": 5970 + }, + { + "epoch": 1.3758064516129032, + "grad_norm": 1.3959950512058854, + "learning_rate": 4.872969690577928e-07, + "loss": 0.7451514005661011, + "step": 5971 + }, + { + "epoch": 1.3760368663594469, + "grad_norm": 2.704793745814284, + "learning_rate": 4.86969906506992e-07, + "loss": 0.810903787612915, + "step": 5972 + }, + { + "epoch": 1.3762672811059908, + "grad_norm": 1.0363767093510534, + "learning_rate": 4.866429184218298e-07, + "loss": 0.6279938817024231, + "step": 5973 + }, + { + "epoch": 1.3764976958525346, + "grad_norm": 1.4075128359986724, + "learning_rate": 4.863160048497688e-07, + "loss": 0.7742956876754761, + "step": 5974 + }, + { + "epoch": 1.3767281105990783, + "grad_norm": 1.0416061346586747, + "learning_rate": 4.859891658382597e-07, + "loss": 0.7423844933509827, + "step": 5975 + }, + { + "epoch": 1.3769585253456222, + "grad_norm": 1.0348526250721313, + "learning_rate": 4.856624014347426e-07, + "loss": 0.8387676477432251, + "step": 5976 + }, + { + "epoch": 1.377188940092166, + "grad_norm": 1.3906652341525882, + "learning_rate": 4.853357116866471e-07, + "loss": 0.7959855794906616, + "step": 5977 + }, + { + "epoch": 1.3774193548387097, + "grad_norm": 1.2781418274310543, + "learning_rate": 4.850090966413913e-07, + "loss": 0.7086259722709656, + "step": 5978 + }, + { + "epoch": 1.3776497695852534, + "grad_norm": 1.113262974989995, + "learning_rate": 4.846825563463838e-07, + "loss": 0.7219396829605103, + "step": 5979 + }, + { + "epoch": 1.3778801843317972, + "grad_norm": 1.2693838975886846, + "learning_rate": 4.84356090849021e-07, + "loss": 0.8383582830429077, + "step": 5980 + }, + { + "epoch": 1.378110599078341, + "grad_norm": 1.2004259850017622, + "learning_rate": 4.840297001966887e-07, + "loss": 0.7624244689941406, + "step": 5981 + }, + { + "epoch": 1.3783410138248848, + "grad_norm": 1.3275243269089372, + "learning_rate": 4.837033844367626e-07, + "loss": 0.7901623249053955, + "step": 5982 + }, + { + "epoch": 1.3785714285714286, + "grad_norm": 1.0665581903589285, + "learning_rate": 4.833771436166068e-07, + "loss": 0.7732094526290894, + "step": 5983 + }, + { + "epoch": 1.3788018433179723, + "grad_norm": 1.221680510593368, + "learning_rate": 4.830509777835744e-07, + "loss": 0.7882228493690491, + "step": 5984 + }, + { + "epoch": 1.379032258064516, + "grad_norm": 1.3954212415484932, + "learning_rate": 4.827248869850086e-07, + "loss": 0.8601159453392029, + "step": 5985 + }, + { + "epoch": 1.37926267281106, + "grad_norm": 1.442537797357167, + "learning_rate": 4.823988712682406e-07, + "loss": 0.8828538656234741, + "step": 5986 + }, + { + "epoch": 1.3794930875576037, + "grad_norm": 1.2814445672112398, + "learning_rate": 4.820729306805907e-07, + "loss": 0.8586058020591736, + "step": 5987 + }, + { + "epoch": 1.3797235023041474, + "grad_norm": 1.3476469386797916, + "learning_rate": 4.8174706526937e-07, + "loss": 0.8276243209838867, + "step": 5988 + }, + { + "epoch": 1.3799539170506914, + "grad_norm": 1.1504215702512235, + "learning_rate": 4.814212750818764e-07, + "loss": 0.837665855884552, + "step": 5989 + }, + { + "epoch": 1.380184331797235, + "grad_norm": 1.0830851541320008, + "learning_rate": 4.810955601653978e-07, + "loss": 0.7493194341659546, + "step": 5990 + }, + { + "epoch": 1.3804147465437788, + "grad_norm": 0.9470923738615639, + "learning_rate": 4.807699205672123e-07, + "loss": 0.8382525444030762, + "step": 5991 + }, + { + "epoch": 1.3806451612903226, + "grad_norm": 1.302996846441217, + "learning_rate": 4.804443563345854e-07, + "loss": 0.8152645826339722, + "step": 5992 + }, + { + "epoch": 1.3808755760368663, + "grad_norm": 1.1087518210488847, + "learning_rate": 4.801188675147719e-07, + "loss": 0.7168164849281311, + "step": 5993 + }, + { + "epoch": 1.3811059907834102, + "grad_norm": 1.3971974855003246, + "learning_rate": 4.79793454155017e-07, + "loss": 0.883512556552887, + "step": 5994 + }, + { + "epoch": 1.381336405529954, + "grad_norm": 1.1775999496250547, + "learning_rate": 4.794681163025536e-07, + "loss": 0.7258438467979431, + "step": 5995 + }, + { + "epoch": 1.3815668202764977, + "grad_norm": 1.102316858629444, + "learning_rate": 4.79142854004604e-07, + "loss": 0.8408991098403931, + "step": 5996 + }, + { + "epoch": 1.3817972350230414, + "grad_norm": 1.2549882230845555, + "learning_rate": 4.788176673083796e-07, + "loss": 0.6506227254867554, + "step": 5997 + }, + { + "epoch": 1.3820276497695851, + "grad_norm": 1.145761304273299, + "learning_rate": 4.784925562610809e-07, + "loss": 0.6971127986907959, + "step": 5998 + }, + { + "epoch": 1.382258064516129, + "grad_norm": 1.3037562977083754, + "learning_rate": 4.781675209098967e-07, + "loss": 0.8399784564971924, + "step": 5999 + }, + { + "epoch": 1.3824884792626728, + "grad_norm": 1.1085204750545832, + "learning_rate": 4.778425613020067e-07, + "loss": 0.6451772451400757, + "step": 6000 + }, + { + "epoch": 1.3827188940092165, + "grad_norm": 1.2906420363235995, + "learning_rate": 4.775176774845774e-07, + "loss": 0.7794390916824341, + "step": 6001 + }, + { + "epoch": 1.3829493087557605, + "grad_norm": 1.2681207047961411, + "learning_rate": 4.771928695047652e-07, + "loss": 0.7743663191795349, + "step": 6002 + }, + { + "epoch": 1.3831797235023042, + "grad_norm": 1.3900227492937691, + "learning_rate": 4.768681374097165e-07, + "loss": 0.7654878497123718, + "step": 6003 + }, + { + "epoch": 1.383410138248848, + "grad_norm": 0.9597367840932265, + "learning_rate": 4.765434812465645e-07, + "loss": 0.634769082069397, + "step": 6004 + }, + { + "epoch": 1.3836405529953917, + "grad_norm": 1.506039076037628, + "learning_rate": 4.762189010624337e-07, + "loss": 0.7941944599151611, + "step": 6005 + }, + { + "epoch": 1.3838709677419354, + "grad_norm": 1.015987334283248, + "learning_rate": 4.75894396904436e-07, + "loss": 0.7437179088592529, + "step": 6006 + }, + { + "epoch": 1.3841013824884794, + "grad_norm": 1.4064808788220893, + "learning_rate": 4.7556996881967236e-07, + "loss": 0.7854535579681396, + "step": 6007 + }, + { + "epoch": 1.384331797235023, + "grad_norm": 1.1454067558015728, + "learning_rate": 4.752456168552339e-07, + "loss": 0.7506910562515259, + "step": 6008 + }, + { + "epoch": 1.3845622119815668, + "grad_norm": 1.3378490743548084, + "learning_rate": 4.749213410581995e-07, + "loss": 0.8967334032058716, + "step": 6009 + }, + { + "epoch": 1.3847926267281105, + "grad_norm": 0.9073367214802157, + "learning_rate": 4.7459714147563677e-07, + "loss": 0.7053096294403076, + "step": 6010 + }, + { + "epoch": 1.3850230414746543, + "grad_norm": 1.4011875457574152, + "learning_rate": 4.7427301815460396e-07, + "loss": 0.8759415149688721, + "step": 6011 + }, + { + "epoch": 1.3852534562211982, + "grad_norm": 1.2083846258038176, + "learning_rate": 4.739489711421466e-07, + "loss": 0.8827483654022217, + "step": 6012 + }, + { + "epoch": 1.385483870967742, + "grad_norm": 0.9892327750407551, + "learning_rate": 4.736250004852993e-07, + "loss": 0.7268258929252625, + "step": 6013 + }, + { + "epoch": 1.3857142857142857, + "grad_norm": 1.3354283922456354, + "learning_rate": 4.7330110623108665e-07, + "loss": 0.7142586708068848, + "step": 6014 + }, + { + "epoch": 1.3859447004608296, + "grad_norm": 0.9791582073391492, + "learning_rate": 4.7297728842652116e-07, + "loss": 0.7123303413391113, + "step": 6015 + }, + { + "epoch": 1.3861751152073734, + "grad_norm": 1.1089770586845422, + "learning_rate": 4.726535471186047e-07, + "loss": 0.7548067569732666, + "step": 6016 + }, + { + "epoch": 1.386405529953917, + "grad_norm": 1.205868893691031, + "learning_rate": 4.723298823543277e-07, + "loss": 0.7792191505432129, + "step": 6017 + }, + { + "epoch": 1.3866359447004608, + "grad_norm": 1.313401532453458, + "learning_rate": 4.7200629418066975e-07, + "loss": 0.8658785820007324, + "step": 6018 + }, + { + "epoch": 1.3868663594470045, + "grad_norm": 1.20345203638671, + "learning_rate": 4.716827826445987e-07, + "loss": 0.7173904776573181, + "step": 6019 + }, + { + "epoch": 1.3870967741935485, + "grad_norm": 1.0016118220950732, + "learning_rate": 4.7135934779307284e-07, + "loss": 0.6675543785095215, + "step": 6020 + }, + { + "epoch": 1.3873271889400922, + "grad_norm": 1.2559637316001069, + "learning_rate": 4.710359896730378e-07, + "loss": 0.8164724111557007, + "step": 6021 + }, + { + "epoch": 1.387557603686636, + "grad_norm": 1.474439832240672, + "learning_rate": 4.707127083314283e-07, + "loss": 0.8354332447052002, + "step": 6022 + }, + { + "epoch": 1.3877880184331797, + "grad_norm": 1.1544900465349175, + "learning_rate": 4.7038950381516885e-07, + "loss": 0.8414663672447205, + "step": 6023 + }, + { + "epoch": 1.3880184331797234, + "grad_norm": 1.2150035811173532, + "learning_rate": 4.700663761711717e-07, + "loss": 0.7693418264389038, + "step": 6024 + }, + { + "epoch": 1.3882488479262673, + "grad_norm": 1.0071958767588902, + "learning_rate": 4.697433254463382e-07, + "loss": 0.7809267044067383, + "step": 6025 + }, + { + "epoch": 1.388479262672811, + "grad_norm": 1.203482571104156, + "learning_rate": 4.6942035168755944e-07, + "loss": 0.7455927133560181, + "step": 6026 + }, + { + "epoch": 1.3887096774193548, + "grad_norm": 1.3018105004563159, + "learning_rate": 4.6909745494171383e-07, + "loss": 0.8217881917953491, + "step": 6027 + }, + { + "epoch": 1.3889400921658988, + "grad_norm": 1.3723027057230852, + "learning_rate": 4.687746352556703e-07, + "loss": 0.8138882517814636, + "step": 6028 + }, + { + "epoch": 1.3891705069124423, + "grad_norm": 1.241759909967513, + "learning_rate": 4.6845189267628505e-07, + "loss": 0.8926469087600708, + "step": 6029 + }, + { + "epoch": 1.3894009216589862, + "grad_norm": 1.3027918343739477, + "learning_rate": 4.681292272504036e-07, + "loss": 0.797023355960846, + "step": 6030 + }, + { + "epoch": 1.38963133640553, + "grad_norm": 0.8383796462842409, + "learning_rate": 4.6780663902486104e-07, + "loss": 0.6767498254776001, + "step": 6031 + }, + { + "epoch": 1.3898617511520737, + "grad_norm": 1.2727364252127855, + "learning_rate": 4.674841280464804e-07, + "loss": 0.7514280080795288, + "step": 6032 + }, + { + "epoch": 1.3900921658986176, + "grad_norm": 1.3853363805552346, + "learning_rate": 4.671616943620731e-07, + "loss": 0.8879726529121399, + "step": 6033 + }, + { + "epoch": 1.3903225806451613, + "grad_norm": 0.8270134553121277, + "learning_rate": 4.66839338018441e-07, + "loss": 0.6674140095710754, + "step": 6034 + }, + { + "epoch": 1.390552995391705, + "grad_norm": 1.078021820178179, + "learning_rate": 4.6651705906237307e-07, + "loss": 0.9094855785369873, + "step": 6035 + }, + { + "epoch": 1.3907834101382488, + "grad_norm": 1.2561393182724931, + "learning_rate": 4.661948575406478e-07, + "loss": 0.8334506750106812, + "step": 6036 + }, + { + "epoch": 1.3910138248847925, + "grad_norm": 1.040119500616202, + "learning_rate": 4.658727335000323e-07, + "loss": 0.6545997858047485, + "step": 6037 + }, + { + "epoch": 1.3912442396313365, + "grad_norm": 1.1967093206075838, + "learning_rate": 4.6555068698728237e-07, + "loss": 0.7810590267181396, + "step": 6038 + }, + { + "epoch": 1.3914746543778802, + "grad_norm": 1.0756703494881659, + "learning_rate": 4.652287180491424e-07, + "loss": 0.7581864595413208, + "step": 6039 + }, + { + "epoch": 1.391705069124424, + "grad_norm": 1.2754594039466507, + "learning_rate": 4.649068267323465e-07, + "loss": 0.7134817242622375, + "step": 6040 + }, + { + "epoch": 1.3919354838709677, + "grad_norm": 0.9730020123763279, + "learning_rate": 4.645850130836162e-07, + "loss": 0.7050445079803467, + "step": 6041 + }, + { + "epoch": 1.3921658986175114, + "grad_norm": 1.146073776977597, + "learning_rate": 4.642632771496622e-07, + "loss": 0.8510535955429077, + "step": 6042 + }, + { + "epoch": 1.3923963133640553, + "grad_norm": 1.3940656685053847, + "learning_rate": 4.6394161897718454e-07, + "loss": 0.8627035617828369, + "step": 6043 + }, + { + "epoch": 1.392626728110599, + "grad_norm": 1.2671457951329919, + "learning_rate": 4.6362003861287127e-07, + "loss": 0.89891517162323, + "step": 6044 + }, + { + "epoch": 1.3928571428571428, + "grad_norm": 1.3215265337916509, + "learning_rate": 4.6329853610339896e-07, + "loss": 0.7267141342163086, + "step": 6045 + }, + { + "epoch": 1.3930875576036867, + "grad_norm": 1.4814794045534565, + "learning_rate": 4.6297711149543405e-07, + "loss": 0.8021189570426941, + "step": 6046 + }, + { + "epoch": 1.3933179723502305, + "grad_norm": 1.0954918085269951, + "learning_rate": 4.6265576483563054e-07, + "loss": 0.7836861610412598, + "step": 6047 + }, + { + "epoch": 1.3935483870967742, + "grad_norm": 1.1158269152355589, + "learning_rate": 4.623344961706309e-07, + "loss": 0.816940188407898, + "step": 6048 + }, + { + "epoch": 1.393778801843318, + "grad_norm": 1.4383712223724088, + "learning_rate": 4.6201330554706773e-07, + "loss": 0.77923583984375, + "step": 6049 + }, + { + "epoch": 1.3940092165898617, + "grad_norm": 1.3116759273395542, + "learning_rate": 4.6169219301156117e-07, + "loss": 0.8017981052398682, + "step": 6050 + }, + { + "epoch": 1.3942396313364056, + "grad_norm": 0.9886522563222937, + "learning_rate": 4.6137115861071973e-07, + "loss": 0.6786847114562988, + "step": 6051 + }, + { + "epoch": 1.3944700460829493, + "grad_norm": 1.1651814302030006, + "learning_rate": 4.61050202391142e-07, + "loss": 0.7802412509918213, + "step": 6052 + }, + { + "epoch": 1.394700460829493, + "grad_norm": 1.1955845105043188, + "learning_rate": 4.6072932439941347e-07, + "loss": 0.7434886693954468, + "step": 6053 + }, + { + "epoch": 1.3949308755760368, + "grad_norm": 1.2231160523968054, + "learning_rate": 4.6040852468211e-07, + "loss": 0.7590811252593994, + "step": 6054 + }, + { + "epoch": 1.3951612903225805, + "grad_norm": 1.5534904257800726, + "learning_rate": 4.600878032857949e-07, + "loss": 0.8952670097351074, + "step": 6055 + }, + { + "epoch": 1.3953917050691245, + "grad_norm": 1.1221688640413483, + "learning_rate": 4.5976716025702036e-07, + "loss": 0.8055328130722046, + "step": 6056 + }, + { + "epoch": 1.3956221198156682, + "grad_norm": 1.2064570897657243, + "learning_rate": 4.5944659564232725e-07, + "loss": 0.8919316530227661, + "step": 6057 + }, + { + "epoch": 1.395852534562212, + "grad_norm": 1.1074605434156857, + "learning_rate": 4.591261094882453e-07, + "loss": 0.701945960521698, + "step": 6058 + }, + { + "epoch": 1.3960829493087559, + "grad_norm": 1.1766452414586335, + "learning_rate": 4.5880570184129206e-07, + "loss": 0.7457436323165894, + "step": 6059 + }, + { + "epoch": 1.3963133640552996, + "grad_norm": 1.193782401804385, + "learning_rate": 4.5848537274797527e-07, + "loss": 0.8093513250350952, + "step": 6060 + }, + { + "epoch": 1.3965437788018433, + "grad_norm": 1.5454221039375025, + "learning_rate": 4.5816512225478965e-07, + "loss": 0.7098822593688965, + "step": 6061 + }, + { + "epoch": 1.396774193548387, + "grad_norm": 1.2339994165792372, + "learning_rate": 4.578449504082189e-07, + "loss": 0.7423167824745178, + "step": 6062 + }, + { + "epoch": 1.3970046082949308, + "grad_norm": 1.1302042774482615, + "learning_rate": 4.5752485725473624e-07, + "loss": 0.8730076551437378, + "step": 6063 + }, + { + "epoch": 1.3972350230414747, + "grad_norm": 1.124374396794659, + "learning_rate": 4.572048428408024e-07, + "loss": 0.6914420127868652, + "step": 6064 + }, + { + "epoch": 1.3974654377880185, + "grad_norm": 1.3148006815381303, + "learning_rate": 4.5688490721286664e-07, + "loss": 0.8051402568817139, + "step": 6065 + }, + { + "epoch": 1.3976958525345622, + "grad_norm": 1.548390651351193, + "learning_rate": 4.5656505041736803e-07, + "loss": 0.9185452461242676, + "step": 6066 + }, + { + "epoch": 1.397926267281106, + "grad_norm": 1.1772485518113056, + "learning_rate": 4.5624527250073287e-07, + "loss": 0.766645073890686, + "step": 6067 + }, + { + "epoch": 1.3981566820276496, + "grad_norm": 1.3246112666718692, + "learning_rate": 4.559255735093763e-07, + "loss": 0.8005224466323853, + "step": 6068 + }, + { + "epoch": 1.3983870967741936, + "grad_norm": 1.2624209909197728, + "learning_rate": 4.5560595348970275e-07, + "loss": 0.8072810173034668, + "step": 6069 + }, + { + "epoch": 1.3986175115207373, + "grad_norm": 1.2197415999956105, + "learning_rate": 4.552864124881045e-07, + "loss": 0.7537474632263184, + "step": 6070 + }, + { + "epoch": 1.398847926267281, + "grad_norm": 1.3524984308216321, + "learning_rate": 4.549669505509619e-07, + "loss": 0.8396750092506409, + "step": 6071 + }, + { + "epoch": 1.399078341013825, + "grad_norm": 1.3095033527266953, + "learning_rate": 4.546475677246453e-07, + "loss": 0.8456804752349854, + "step": 6072 + }, + { + "epoch": 1.3993087557603687, + "grad_norm": 1.212970447769736, + "learning_rate": 4.543282640555123e-07, + "loss": 0.6150076389312744, + "step": 6073 + }, + { + "epoch": 1.3995391705069125, + "grad_norm": 1.1345047277741707, + "learning_rate": 4.540090395899089e-07, + "loss": 0.667172908782959, + "step": 6074 + }, + { + "epoch": 1.3997695852534562, + "grad_norm": 1.1269214154073468, + "learning_rate": 4.5368989437417116e-07, + "loss": 0.7918317914009094, + "step": 6075 + }, + { + "epoch": 1.4, + "grad_norm": 1.070411671989194, + "learning_rate": 4.5337082845462193e-07, + "loss": 0.6800580024719238, + "step": 6076 + }, + { + "epoch": 1.4002304147465439, + "grad_norm": 1.3908779413221009, + "learning_rate": 4.530518418775733e-07, + "loss": 0.9205034971237183, + "step": 6077 + }, + { + "epoch": 1.4004608294930876, + "grad_norm": 0.9376373503434607, + "learning_rate": 4.5273293468932585e-07, + "loss": 0.7228822708129883, + "step": 6078 + }, + { + "epoch": 1.4006912442396313, + "grad_norm": 1.0019153673681407, + "learning_rate": 4.524141069361679e-07, + "loss": 0.6827987432479858, + "step": 6079 + }, + { + "epoch": 1.400921658986175, + "grad_norm": 1.086076018779761, + "learning_rate": 4.520953586643779e-07, + "loss": 0.6272581815719604, + "step": 6080 + }, + { + "epoch": 1.4011520737327188, + "grad_norm": 1.1153873233388363, + "learning_rate": 4.5177668992022125e-07, + "loss": 0.8041881322860718, + "step": 6081 + }, + { + "epoch": 1.4013824884792627, + "grad_norm": 0.986104576594979, + "learning_rate": 4.5145810074995194e-07, + "loss": 0.7284958362579346, + "step": 6082 + }, + { + "epoch": 1.4016129032258065, + "grad_norm": 1.227152604501521, + "learning_rate": 4.511395911998135e-07, + "loss": 0.7653781175613403, + "step": 6083 + }, + { + "epoch": 1.4018433179723502, + "grad_norm": 1.0466936448387898, + "learning_rate": 4.5082116131603677e-07, + "loss": 0.8037170171737671, + "step": 6084 + }, + { + "epoch": 1.4020737327188941, + "grad_norm": 1.1911735797842866, + "learning_rate": 4.505028111448411e-07, + "loss": 0.783043384552002, + "step": 6085 + }, + { + "epoch": 1.4023041474654379, + "grad_norm": 1.0547410930732963, + "learning_rate": 4.501845407324354e-07, + "loss": 0.6712161302566528, + "step": 6086 + }, + { + "epoch": 1.4025345622119816, + "grad_norm": 1.6406574524985842, + "learning_rate": 4.4986635012501575e-07, + "loss": 0.9537261724472046, + "step": 6087 + }, + { + "epoch": 1.4027649769585253, + "grad_norm": 1.4091085059994304, + "learning_rate": 4.495482393687666e-07, + "loss": 0.8984304666519165, + "step": 6088 + }, + { + "epoch": 1.402995391705069, + "grad_norm": 1.0430973660752654, + "learning_rate": 4.4923020850986224e-07, + "loss": 0.6894555687904358, + "step": 6089 + }, + { + "epoch": 1.403225806451613, + "grad_norm": 1.1542541609725157, + "learning_rate": 4.489122575944639e-07, + "loss": 0.685502290725708, + "step": 6090 + }, + { + "epoch": 1.4034562211981567, + "grad_norm": 1.1082950627991512, + "learning_rate": 4.485943866687216e-07, + "loss": 0.6794239282608032, + "step": 6091 + }, + { + "epoch": 1.4036866359447004, + "grad_norm": 1.0717636346133315, + "learning_rate": 4.482765957787744e-07, + "loss": 0.7647888660430908, + "step": 6092 + }, + { + "epoch": 1.4039170506912442, + "grad_norm": 1.3476206179513355, + "learning_rate": 4.4795888497074896e-07, + "loss": 0.798794150352478, + "step": 6093 + }, + { + "epoch": 1.404147465437788, + "grad_norm": 1.0358789181259667, + "learning_rate": 4.4764125429076026e-07, + "loss": 0.79430091381073, + "step": 6094 + }, + { + "epoch": 1.4043778801843319, + "grad_norm": 1.4040182367122596, + "learning_rate": 4.4732370378491255e-07, + "loss": 0.9089795351028442, + "step": 6095 + }, + { + "epoch": 1.4046082949308756, + "grad_norm": 0.9307801992196251, + "learning_rate": 4.4700623349929757e-07, + "loss": 0.8270718455314636, + "step": 6096 + }, + { + "epoch": 1.4048387096774193, + "grad_norm": 1.082228260794844, + "learning_rate": 4.466888434799958e-07, + "loss": 0.7550361156463623, + "step": 6097 + }, + { + "epoch": 1.4050691244239633, + "grad_norm": 1.15557625190535, + "learning_rate": 4.463715337730759e-07, + "loss": 0.7406442165374756, + "step": 6098 + }, + { + "epoch": 1.405299539170507, + "grad_norm": 1.4065045960279658, + "learning_rate": 4.460543044245949e-07, + "loss": 0.830552875995636, + "step": 6099 + }, + { + "epoch": 1.4055299539170507, + "grad_norm": 1.4160409051991987, + "learning_rate": 4.45737155480598e-07, + "loss": 0.8961822390556335, + "step": 6100 + }, + { + "epoch": 1.4057603686635944, + "grad_norm": 1.2630678724710616, + "learning_rate": 4.454200869871195e-07, + "loss": 0.6307489275932312, + "step": 6101 + }, + { + "epoch": 1.4059907834101382, + "grad_norm": 1.437795392364305, + "learning_rate": 4.451030989901808e-07, + "loss": 0.8682084083557129, + "step": 6102 + }, + { + "epoch": 1.4062211981566821, + "grad_norm": 1.1897592960029226, + "learning_rate": 4.4478619153579323e-07, + "loss": 0.7157681584358215, + "step": 6103 + }, + { + "epoch": 1.4064516129032258, + "grad_norm": 1.196767224907471, + "learning_rate": 4.4446936466995486e-07, + "loss": 0.7267071008682251, + "step": 6104 + }, + { + "epoch": 1.4066820276497696, + "grad_norm": 1.1191501401801882, + "learning_rate": 4.4415261843865246e-07, + "loss": 0.8435063362121582, + "step": 6105 + }, + { + "epoch": 1.4069124423963133, + "grad_norm": 1.2220260712556485, + "learning_rate": 4.43835952887862e-07, + "loss": 0.8895175457000732, + "step": 6106 + }, + { + "epoch": 1.407142857142857, + "grad_norm": 1.0150052474935476, + "learning_rate": 4.435193680635467e-07, + "loss": 0.7470073699951172, + "step": 6107 + }, + { + "epoch": 1.407373271889401, + "grad_norm": 1.376675993117338, + "learning_rate": 4.432028640116581e-07, + "loss": 0.7993630170822144, + "step": 6108 + }, + { + "epoch": 1.4076036866359447, + "grad_norm": 1.2675455750766673, + "learning_rate": 4.4288644077813695e-07, + "loss": 0.823069155216217, + "step": 6109 + }, + { + "epoch": 1.4078341013824884, + "grad_norm": 1.374585518914166, + "learning_rate": 4.4257009840891146e-07, + "loss": 0.7665367126464844, + "step": 6110 + }, + { + "epoch": 1.4080645161290324, + "grad_norm": 1.1174810423449963, + "learning_rate": 4.422538369498979e-07, + "loss": 0.7173991799354553, + "step": 6111 + }, + { + "epoch": 1.4082949308755761, + "grad_norm": 0.9476955630635919, + "learning_rate": 4.4193765644700186e-07, + "loss": 0.8288347125053406, + "step": 6112 + }, + { + "epoch": 1.4085253456221198, + "grad_norm": 1.206088367901853, + "learning_rate": 4.4162155694611636e-07, + "loss": 0.8589911460876465, + "step": 6113 + }, + { + "epoch": 1.4087557603686636, + "grad_norm": 1.2884473987369411, + "learning_rate": 4.4130553849312213e-07, + "loss": 0.8783868551254272, + "step": 6114 + }, + { + "epoch": 1.4089861751152073, + "grad_norm": 1.0994332560949611, + "learning_rate": 4.409896011338898e-07, + "loss": 0.7625287771224976, + "step": 6115 + }, + { + "epoch": 1.4092165898617512, + "grad_norm": 1.1571434855502665, + "learning_rate": 4.406737449142769e-07, + "loss": 0.7412571907043457, + "step": 6116 + }, + { + "epoch": 1.409447004608295, + "grad_norm": 0.9525276096114424, + "learning_rate": 4.4035796988012943e-07, + "loss": 0.6248455047607422, + "step": 6117 + }, + { + "epoch": 1.4096774193548387, + "grad_norm": 1.1843810443395109, + "learning_rate": 4.400422760772817e-07, + "loss": 0.7970919609069824, + "step": 6118 + }, + { + "epoch": 1.4099078341013824, + "grad_norm": 1.0403384039115238, + "learning_rate": 4.397266635515563e-07, + "loss": 0.6184223294258118, + "step": 6119 + }, + { + "epoch": 1.4101382488479262, + "grad_norm": 1.07818776364935, + "learning_rate": 4.394111323487637e-07, + "loss": 0.9014843702316284, + "step": 6120 + }, + { + "epoch": 1.41036866359447, + "grad_norm": 1.1660248005288976, + "learning_rate": 4.390956825147034e-07, + "loss": 0.8468939661979675, + "step": 6121 + }, + { + "epoch": 1.4105990783410138, + "grad_norm": 1.0810631729189881, + "learning_rate": 4.3878031409516234e-07, + "loss": 0.7832604646682739, + "step": 6122 + }, + { + "epoch": 1.4108294930875576, + "grad_norm": 1.0700225295832282, + "learning_rate": 4.3846502713591527e-07, + "loss": 0.7202898263931274, + "step": 6123 + }, + { + "epoch": 1.4110599078341015, + "grad_norm": 1.1788285042234896, + "learning_rate": 4.3814982168272664e-07, + "loss": 0.6785540580749512, + "step": 6124 + }, + { + "epoch": 1.4112903225806452, + "grad_norm": 1.3040233352486812, + "learning_rate": 4.378346977813474e-07, + "loss": 0.795532763004303, + "step": 6125 + }, + { + "epoch": 1.411520737327189, + "grad_norm": 0.8875056644654742, + "learning_rate": 4.3751965547751735e-07, + "loss": 0.7715259790420532, + "step": 6126 + }, + { + "epoch": 1.4117511520737327, + "grad_norm": 1.4252318364105403, + "learning_rate": 4.37204694816965e-07, + "loss": 0.8657132983207703, + "step": 6127 + }, + { + "epoch": 1.4119815668202764, + "grad_norm": 1.0207817658354317, + "learning_rate": 4.3688981584540586e-07, + "loss": 0.7253363132476807, + "step": 6128 + }, + { + "epoch": 1.4122119815668204, + "grad_norm": 1.2055823367063212, + "learning_rate": 4.365750186085447e-07, + "loss": 0.8511998653411865, + "step": 6129 + }, + { + "epoch": 1.412442396313364, + "grad_norm": 1.3256931814656627, + "learning_rate": 4.3626030315207386e-07, + "loss": 0.7936528921127319, + "step": 6130 + }, + { + "epoch": 1.4126728110599078, + "grad_norm": 1.1878967804503957, + "learning_rate": 4.3594566952167324e-07, + "loss": 0.758521556854248, + "step": 6131 + }, + { + "epoch": 1.4129032258064516, + "grad_norm": 1.242405288398936, + "learning_rate": 4.3563111776301243e-07, + "loss": 0.8202048540115356, + "step": 6132 + }, + { + "epoch": 1.4131336405529953, + "grad_norm": 1.075213759854547, + "learning_rate": 4.3531664792174773e-07, + "loss": 0.7864067554473877, + "step": 6133 + }, + { + "epoch": 1.4133640552995392, + "grad_norm": 1.472991105564755, + "learning_rate": 4.350022600435236e-07, + "loss": 0.8051233291625977, + "step": 6134 + }, + { + "epoch": 1.413594470046083, + "grad_norm": 1.0811225554895896, + "learning_rate": 4.34687954173974e-07, + "loss": 0.7617348432540894, + "step": 6135 + }, + { + "epoch": 1.4138248847926267, + "grad_norm": 1.299621377240526, + "learning_rate": 4.3437373035871927e-07, + "loss": 0.7899652719497681, + "step": 6136 + }, + { + "epoch": 1.4140552995391706, + "grad_norm": 1.1704157180732915, + "learning_rate": 4.340595886433689e-07, + "loss": 0.8467222452163696, + "step": 6137 + }, + { + "epoch": 1.4142857142857144, + "grad_norm": 1.294364382858993, + "learning_rate": 4.3374552907352003e-07, + "loss": 0.8451426029205322, + "step": 6138 + }, + { + "epoch": 1.414516129032258, + "grad_norm": 1.1053072195052795, + "learning_rate": 4.3343155169475797e-07, + "loss": 0.7140414714813232, + "step": 6139 + }, + { + "epoch": 1.4147465437788018, + "grad_norm": 1.365344165744123, + "learning_rate": 4.331176565526558e-07, + "loss": 0.7680803537368774, + "step": 6140 + }, + { + "epoch": 1.4149769585253456, + "grad_norm": 1.0970331390876962, + "learning_rate": 4.328038436927757e-07, + "loss": 0.7262120246887207, + "step": 6141 + }, + { + "epoch": 1.4152073732718895, + "grad_norm": 1.2176292189863585, + "learning_rate": 4.3249011316066676e-07, + "loss": 0.7788687944412231, + "step": 6142 + }, + { + "epoch": 1.4154377880184332, + "grad_norm": 1.4880584379115793, + "learning_rate": 4.321764650018662e-07, + "loss": 0.7613503336906433, + "step": 6143 + }, + { + "epoch": 1.415668202764977, + "grad_norm": 0.9554644370778598, + "learning_rate": 4.3186289926190056e-07, + "loss": 0.6778309345245361, + "step": 6144 + }, + { + "epoch": 1.4158986175115207, + "grad_norm": 1.5159867718873894, + "learning_rate": 4.315494159862829e-07, + "loss": 0.8626673221588135, + "step": 6145 + }, + { + "epoch": 1.4161290322580644, + "grad_norm": 1.194727935560369, + "learning_rate": 4.312360152205147e-07, + "loss": 0.8321051597595215, + "step": 6146 + }, + { + "epoch": 1.4163594470046084, + "grad_norm": 1.146293428483721, + "learning_rate": 4.309226970100861e-07, + "loss": 0.9317119717597961, + "step": 6147 + }, + { + "epoch": 1.416589861751152, + "grad_norm": 1.4669878139895565, + "learning_rate": 4.306094614004748e-07, + "loss": 0.9479870200157166, + "step": 6148 + }, + { + "epoch": 1.4168202764976958, + "grad_norm": 1.0166991353273056, + "learning_rate": 4.3029630843714606e-07, + "loss": 0.8222699165344238, + "step": 6149 + }, + { + "epoch": 1.4170506912442398, + "grad_norm": 1.427356205375722, + "learning_rate": 4.2998323816555427e-07, + "loss": 0.8232519030570984, + "step": 6150 + }, + { + "epoch": 1.4172811059907833, + "grad_norm": 1.156719588287236, + "learning_rate": 4.2967025063114057e-07, + "loss": 0.7423735857009888, + "step": 6151 + }, + { + "epoch": 1.4175115207373272, + "grad_norm": 1.1009896479281802, + "learning_rate": 4.2935734587933527e-07, + "loss": 0.6947557926177979, + "step": 6152 + }, + { + "epoch": 1.417741935483871, + "grad_norm": 1.2980025668504918, + "learning_rate": 4.290445239555558e-07, + "loss": 0.789128303527832, + "step": 6153 + }, + { + "epoch": 1.4179723502304147, + "grad_norm": 1.344185599290992, + "learning_rate": 4.2873178490520745e-07, + "loss": 0.8025885820388794, + "step": 6154 + }, + { + "epoch": 1.4182027649769586, + "grad_norm": 1.3491619317054568, + "learning_rate": 4.284191287736847e-07, + "loss": 0.8139045238494873, + "step": 6155 + }, + { + "epoch": 1.4184331797235024, + "grad_norm": 1.1246209635446252, + "learning_rate": 4.2810655560636864e-07, + "loss": 0.8154167532920837, + "step": 6156 + }, + { + "epoch": 1.418663594470046, + "grad_norm": 1.0954033524128675, + "learning_rate": 4.2779406544862896e-07, + "loss": 0.6383910775184631, + "step": 6157 + }, + { + "epoch": 1.4188940092165898, + "grad_norm": 1.217902628448707, + "learning_rate": 4.2748165834582316e-07, + "loss": 0.7008179426193237, + "step": 6158 + }, + { + "epoch": 1.4191244239631335, + "grad_norm": 1.2584275851601723, + "learning_rate": 4.2716933434329684e-07, + "loss": 0.9458012580871582, + "step": 6159 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 1.1170402428175406, + "learning_rate": 4.268570934863829e-07, + "loss": 0.7354133725166321, + "step": 6160 + }, + { + "epoch": 1.4195852534562212, + "grad_norm": 1.050503834766047, + "learning_rate": 4.265449358204034e-07, + "loss": 0.7146268486976624, + "step": 6161 + }, + { + "epoch": 1.419815668202765, + "grad_norm": 1.3602740783757037, + "learning_rate": 4.262328613906674e-07, + "loss": 0.7357315421104431, + "step": 6162 + }, + { + "epoch": 1.4200460829493087, + "grad_norm": 1.5139772991772644, + "learning_rate": 4.2592087024247157e-07, + "loss": 0.8006314039230347, + "step": 6163 + }, + { + "epoch": 1.4202764976958524, + "grad_norm": 1.2194249079603743, + "learning_rate": 4.256089624211018e-07, + "loss": 0.8299369812011719, + "step": 6164 + }, + { + "epoch": 1.4205069124423964, + "grad_norm": 1.3878054713959478, + "learning_rate": 4.252971379718308e-07, + "loss": 0.7018890380859375, + "step": 6165 + }, + { + "epoch": 1.42073732718894, + "grad_norm": 1.0332854509364862, + "learning_rate": 4.24985396939919e-07, + "loss": 0.6501315236091614, + "step": 6166 + }, + { + "epoch": 1.4209677419354838, + "grad_norm": 1.6385767983913562, + "learning_rate": 4.24673739370616e-07, + "loss": 0.8379749059677124, + "step": 6167 + }, + { + "epoch": 1.4211981566820278, + "grad_norm": 1.3590615179836698, + "learning_rate": 4.24362165309158e-07, + "loss": 0.7996747493743896, + "step": 6168 + }, + { + "epoch": 1.4214285714285715, + "grad_norm": 1.2270246479776195, + "learning_rate": 4.240506748007695e-07, + "loss": 0.7258181571960449, + "step": 6169 + }, + { + "epoch": 1.4216589861751152, + "grad_norm": 0.9997463365032918, + "learning_rate": 4.237392678906633e-07, + "loss": 0.6035803556442261, + "step": 6170 + }, + { + "epoch": 1.421889400921659, + "grad_norm": 1.1041316785012205, + "learning_rate": 4.2342794462403954e-07, + "loss": 0.7668799757957458, + "step": 6171 + }, + { + "epoch": 1.4221198156682027, + "grad_norm": 0.9385556238542058, + "learning_rate": 4.23116705046086e-07, + "loss": 0.7816733121871948, + "step": 6172 + }, + { + "epoch": 1.4223502304147466, + "grad_norm": 1.2003519134278278, + "learning_rate": 4.228055492019793e-07, + "loss": 0.8753983974456787, + "step": 6173 + }, + { + "epoch": 1.4225806451612903, + "grad_norm": 1.1591394093837553, + "learning_rate": 4.224944771368831e-07, + "loss": 0.8319464921951294, + "step": 6174 + }, + { + "epoch": 1.422811059907834, + "grad_norm": 1.1444278460686073, + "learning_rate": 4.2218348889594866e-07, + "loss": 0.6670328378677368, + "step": 6175 + }, + { + "epoch": 1.4230414746543778, + "grad_norm": 0.9949133230999909, + "learning_rate": 4.218725845243163e-07, + "loss": 0.7879645824432373, + "step": 6176 + }, + { + "epoch": 1.4232718894009215, + "grad_norm": 1.1897456513351008, + "learning_rate": 4.2156176406711287e-07, + "loss": 0.709680438041687, + "step": 6177 + }, + { + "epoch": 1.4235023041474655, + "grad_norm": 1.2454467445687987, + "learning_rate": 4.2125102756945364e-07, + "loss": 0.7990894317626953, + "step": 6178 + }, + { + "epoch": 1.4237327188940092, + "grad_norm": 0.899401568311558, + "learning_rate": 4.2094037507644165e-07, + "loss": 0.7283308506011963, + "step": 6179 + }, + { + "epoch": 1.423963133640553, + "grad_norm": 1.1017464258775596, + "learning_rate": 4.2062980663316715e-07, + "loss": 0.8763309717178345, + "step": 6180 + }, + { + "epoch": 1.4241935483870969, + "grad_norm": 1.5313476968397717, + "learning_rate": 4.2031932228470966e-07, + "loss": 0.9370014667510986, + "step": 6181 + }, + { + "epoch": 1.4244239631336406, + "grad_norm": 1.2317913481286529, + "learning_rate": 4.2000892207613526e-07, + "loss": 0.7883036136627197, + "step": 6182 + }, + { + "epoch": 1.4246543778801843, + "grad_norm": 1.0986212570485994, + "learning_rate": 4.196986060524975e-07, + "loss": 0.7021682262420654, + "step": 6183 + }, + { + "epoch": 1.424884792626728, + "grad_norm": 1.6809928588875014, + "learning_rate": 4.193883742588393e-07, + "loss": 0.842636227607727, + "step": 6184 + }, + { + "epoch": 1.4251152073732718, + "grad_norm": 1.3804520546599122, + "learning_rate": 4.190782267401899e-07, + "loss": 0.8003957867622375, + "step": 6185 + }, + { + "epoch": 1.4253456221198157, + "grad_norm": 1.4234115388616575, + "learning_rate": 4.1876816354156655e-07, + "loss": 0.9799495935440063, + "step": 6186 + }, + { + "epoch": 1.4255760368663595, + "grad_norm": 1.4430834747300494, + "learning_rate": 4.184581847079751e-07, + "loss": 0.8726102113723755, + "step": 6187 + }, + { + "epoch": 1.4258064516129032, + "grad_norm": 1.4779961873749974, + "learning_rate": 4.181482902844082e-07, + "loss": 0.8771729469299316, + "step": 6188 + }, + { + "epoch": 1.426036866359447, + "grad_norm": 0.932904262005563, + "learning_rate": 4.1783848031584644e-07, + "loss": 0.5891281962394714, + "step": 6189 + }, + { + "epoch": 1.4262672811059907, + "grad_norm": 1.0356433358815755, + "learning_rate": 4.1752875484725904e-07, + "loss": 0.8133054971694946, + "step": 6190 + }, + { + "epoch": 1.4264976958525346, + "grad_norm": 1.2051464792634443, + "learning_rate": 4.1721911392360164e-07, + "loss": 0.7175684571266174, + "step": 6191 + }, + { + "epoch": 1.4267281105990783, + "grad_norm": 1.2483759508518841, + "learning_rate": 4.16909557589818e-07, + "loss": 0.7112927436828613, + "step": 6192 + }, + { + "epoch": 1.426958525345622, + "grad_norm": 1.3756845434805187, + "learning_rate": 4.166000858908406e-07, + "loss": 0.8564406037330627, + "step": 6193 + }, + { + "epoch": 1.427188940092166, + "grad_norm": 1.2070686503198162, + "learning_rate": 4.162906988715883e-07, + "loss": 0.7630729675292969, + "step": 6194 + }, + { + "epoch": 1.4274193548387097, + "grad_norm": 0.971140934311516, + "learning_rate": 4.1598139657696806e-07, + "loss": 0.6810768246650696, + "step": 6195 + }, + { + "epoch": 1.4276497695852535, + "grad_norm": 0.9185719080310675, + "learning_rate": 4.1567217905187535e-07, + "loss": 0.8482312560081482, + "step": 6196 + }, + { + "epoch": 1.4278801843317972, + "grad_norm": 1.4356078879259653, + "learning_rate": 4.1536304634119225e-07, + "loss": 0.845355749130249, + "step": 6197 + }, + { + "epoch": 1.428110599078341, + "grad_norm": 1.3990653285356356, + "learning_rate": 4.1505399848978896e-07, + "loss": 0.8082824349403381, + "step": 6198 + }, + { + "epoch": 1.4283410138248849, + "grad_norm": 1.5497395393382225, + "learning_rate": 4.147450355425235e-07, + "loss": 0.8141404390335083, + "step": 6199 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 1.0209015709753073, + "learning_rate": 4.14436157544241e-07, + "loss": 0.8144549131393433, + "step": 6200 + }, + { + "epoch": 1.4288018433179723, + "grad_norm": 1.2316152605954584, + "learning_rate": 4.141273645397754e-07, + "loss": 0.6554359793663025, + "step": 6201 + }, + { + "epoch": 1.429032258064516, + "grad_norm": 1.2095729612520494, + "learning_rate": 4.138186565739472e-07, + "loss": 0.8035449981689453, + "step": 6202 + }, + { + "epoch": 1.4292626728110598, + "grad_norm": 1.348688453980758, + "learning_rate": 4.1351003369156467e-07, + "loss": 0.7848105430603027, + "step": 6203 + }, + { + "epoch": 1.4294930875576037, + "grad_norm": 1.167048125389705, + "learning_rate": 4.132014959374246e-07, + "loss": 0.7064214944839478, + "step": 6204 + }, + { + "epoch": 1.4297235023041475, + "grad_norm": 1.236002479887974, + "learning_rate": 4.128930433563107e-07, + "loss": 0.7636318802833557, + "step": 6205 + }, + { + "epoch": 1.4299539170506912, + "grad_norm": 1.2440935326289273, + "learning_rate": 4.1258467599299395e-07, + "loss": 0.6839499473571777, + "step": 6206 + }, + { + "epoch": 1.4301843317972351, + "grad_norm": 1.1802386777878584, + "learning_rate": 4.122763938922341e-07, + "loss": 0.8355294466018677, + "step": 6207 + }, + { + "epoch": 1.4304147465437789, + "grad_norm": 1.1238131581281627, + "learning_rate": 4.1196819709877773e-07, + "loss": 0.7563334107398987, + "step": 6208 + }, + { + "epoch": 1.4306451612903226, + "grad_norm": 1.1336601077663977, + "learning_rate": 4.116600856573588e-07, + "loss": 0.6991991996765137, + "step": 6209 + }, + { + "epoch": 1.4308755760368663, + "grad_norm": 1.2669311049959366, + "learning_rate": 4.113520596126998e-07, + "loss": 0.7249872088432312, + "step": 6210 + }, + { + "epoch": 1.43110599078341, + "grad_norm": 0.9386622429459606, + "learning_rate": 4.110441190095101e-07, + "loss": 0.6570736169815063, + "step": 6211 + }, + { + "epoch": 1.431336405529954, + "grad_norm": 1.0652944602016763, + "learning_rate": 4.107362638924865e-07, + "loss": 0.7137724161148071, + "step": 6212 + }, + { + "epoch": 1.4315668202764977, + "grad_norm": 1.1571956532799377, + "learning_rate": 4.1042849430631453e-07, + "loss": 0.7620561122894287, + "step": 6213 + }, + { + "epoch": 1.4317972350230415, + "grad_norm": 1.118516282963539, + "learning_rate": 4.1012081029566616e-07, + "loss": 0.8186367750167847, + "step": 6214 + }, + { + "epoch": 1.4320276497695852, + "grad_norm": 1.2414517851095686, + "learning_rate": 4.098132119052008e-07, + "loss": 0.8068171739578247, + "step": 6215 + }, + { + "epoch": 1.432258064516129, + "grad_norm": 1.3160335320341774, + "learning_rate": 4.095056991795668e-07, + "loss": 0.8640002012252808, + "step": 6216 + }, + { + "epoch": 1.4324884792626729, + "grad_norm": 1.4376158954775202, + "learning_rate": 4.0919827216339887e-07, + "loss": 0.8886386156082153, + "step": 6217 + }, + { + "epoch": 1.4327188940092166, + "grad_norm": 1.072787779438559, + "learning_rate": 4.0889093090131965e-07, + "loss": 0.6853137016296387, + "step": 6218 + }, + { + "epoch": 1.4329493087557603, + "grad_norm": 1.0751813749856631, + "learning_rate": 4.0858367543793923e-07, + "loss": 0.7423670291900635, + "step": 6219 + }, + { + "epoch": 1.4331797235023043, + "grad_norm": 1.2596005033506457, + "learning_rate": 4.0827650581785544e-07, + "loss": 0.7969200611114502, + "step": 6220 + }, + { + "epoch": 1.433410138248848, + "grad_norm": 1.1441853902577663, + "learning_rate": 4.079694220856531e-07, + "loss": 0.8506221771240234, + "step": 6221 + }, + { + "epoch": 1.4336405529953917, + "grad_norm": 1.107985966829949, + "learning_rate": 4.076624242859058e-07, + "loss": 0.6755083799362183, + "step": 6222 + }, + { + "epoch": 1.4338709677419355, + "grad_norm": 1.0751582832116895, + "learning_rate": 4.0735551246317333e-07, + "loss": 0.7734944820404053, + "step": 6223 + }, + { + "epoch": 1.4341013824884792, + "grad_norm": 1.1828392807290495, + "learning_rate": 4.0704868666200345e-07, + "loss": 0.8564216494560242, + "step": 6224 + }, + { + "epoch": 1.4343317972350231, + "grad_norm": 0.8521811929477493, + "learning_rate": 4.067419469269321e-07, + "loss": 0.6858065128326416, + "step": 6225 + }, + { + "epoch": 1.4345622119815669, + "grad_norm": 1.4454169020848073, + "learning_rate": 4.064352933024813e-07, + "loss": 0.684749960899353, + "step": 6226 + }, + { + "epoch": 1.4347926267281106, + "grad_norm": 1.0124943930771644, + "learning_rate": 4.061287258331624e-07, + "loss": 0.7648766040802002, + "step": 6227 + }, + { + "epoch": 1.4350230414746543, + "grad_norm": 1.2226521022766697, + "learning_rate": 4.058222445634727e-07, + "loss": 0.924850583076477, + "step": 6228 + }, + { + "epoch": 1.435253456221198, + "grad_norm": 1.2841804739911125, + "learning_rate": 4.055158495378972e-07, + "loss": 0.906406581401825, + "step": 6229 + }, + { + "epoch": 1.435483870967742, + "grad_norm": 1.1497462597145154, + "learning_rate": 4.052095408009095e-07, + "loss": 0.9169156551361084, + "step": 6230 + }, + { + "epoch": 1.4357142857142857, + "grad_norm": 0.9291011874506654, + "learning_rate": 4.0490331839696967e-07, + "loss": 0.7367587685585022, + "step": 6231 + }, + { + "epoch": 1.4359447004608294, + "grad_norm": 0.9837392218179005, + "learning_rate": 4.045971823705249e-07, + "loss": 0.7608749270439148, + "step": 6232 + }, + { + "epoch": 1.4361751152073734, + "grad_norm": 1.006459600101246, + "learning_rate": 4.0429113276601134e-07, + "loss": 0.7008038759231567, + "step": 6233 + }, + { + "epoch": 1.4364055299539171, + "grad_norm": 1.3644950830796674, + "learning_rate": 4.039851696278511e-07, + "loss": 0.8581372499465942, + "step": 6234 + }, + { + "epoch": 1.4366359447004609, + "grad_norm": 1.1117269621825037, + "learning_rate": 4.036792930004542e-07, + "loss": 0.6602354049682617, + "step": 6235 + }, + { + "epoch": 1.4368663594470046, + "grad_norm": 1.1136625894629528, + "learning_rate": 4.0337350292821893e-07, + "loss": 0.8560018539428711, + "step": 6236 + }, + { + "epoch": 1.4370967741935483, + "grad_norm": 1.5699670277885023, + "learning_rate": 4.030677994555298e-07, + "loss": 0.8837640285491943, + "step": 6237 + }, + { + "epoch": 1.4373271889400923, + "grad_norm": 1.1788518631283098, + "learning_rate": 4.027621826267593e-07, + "loss": 0.8214797973632812, + "step": 6238 + }, + { + "epoch": 1.437557603686636, + "grad_norm": 1.091488147712342, + "learning_rate": 4.024566524862675e-07, + "loss": 0.7590944766998291, + "step": 6239 + }, + { + "epoch": 1.4377880184331797, + "grad_norm": 1.5224250495012106, + "learning_rate": 4.021512090784014e-07, + "loss": 0.8792011141777039, + "step": 6240 + }, + { + "epoch": 1.4380184331797234, + "grad_norm": 0.9801567843215049, + "learning_rate": 4.0184585244749556e-07, + "loss": 0.8309401273727417, + "step": 6241 + }, + { + "epoch": 1.4382488479262672, + "grad_norm": 1.2518924977337436, + "learning_rate": 4.015405826378727e-07, + "loss": 0.7474797964096069, + "step": 6242 + }, + { + "epoch": 1.4384792626728111, + "grad_norm": 1.0203221096159534, + "learning_rate": 4.012353996938421e-07, + "loss": 0.7376091480255127, + "step": 6243 + }, + { + "epoch": 1.4387096774193548, + "grad_norm": 1.4049798692682764, + "learning_rate": 4.0093030365970014e-07, + "loss": 0.7809054851531982, + "step": 6244 + }, + { + "epoch": 1.4389400921658986, + "grad_norm": 1.206100995388555, + "learning_rate": 4.0062529457973194e-07, + "loss": 0.8551669120788574, + "step": 6245 + }, + { + "epoch": 1.4391705069124425, + "grad_norm": 1.3285364918408127, + "learning_rate": 4.0032037249820874e-07, + "loss": 0.7874705791473389, + "step": 6246 + }, + { + "epoch": 1.4394009216589863, + "grad_norm": 1.220500481419073, + "learning_rate": 4.0001553745938923e-07, + "loss": 0.8032190799713135, + "step": 6247 + }, + { + "epoch": 1.43963133640553, + "grad_norm": 1.1833761956090303, + "learning_rate": 3.9971078950752057e-07, + "loss": 0.7600107192993164, + "step": 6248 + }, + { + "epoch": 1.4398617511520737, + "grad_norm": 1.0770488794400255, + "learning_rate": 3.994061286868361e-07, + "loss": 0.7738933563232422, + "step": 6249 + }, + { + "epoch": 1.4400921658986174, + "grad_norm": 1.2036013798832181, + "learning_rate": 3.9910155504155665e-07, + "loss": 0.701007604598999, + "step": 6250 + }, + { + "epoch": 1.4403225806451614, + "grad_norm": 1.2067244620095277, + "learning_rate": 3.9879706861589126e-07, + "loss": 0.8962818384170532, + "step": 6251 + }, + { + "epoch": 1.4405529953917051, + "grad_norm": 1.4532648423769148, + "learning_rate": 3.9849266945403513e-07, + "loss": 0.7636146545410156, + "step": 6252 + }, + { + "epoch": 1.4407834101382488, + "grad_norm": 1.4158432417231142, + "learning_rate": 3.981883576001722e-07, + "loss": 0.8816943168640137, + "step": 6253 + }, + { + "epoch": 1.4410138248847926, + "grad_norm": 1.2321816109724755, + "learning_rate": 3.978841330984725e-07, + "loss": 0.7252858877182007, + "step": 6254 + }, + { + "epoch": 1.4412442396313363, + "grad_norm": 1.1568327683598156, + "learning_rate": 3.975799959930932e-07, + "loss": 0.6720175743103027, + "step": 6255 + }, + { + "epoch": 1.4414746543778802, + "grad_norm": 0.981779637597959, + "learning_rate": 3.972759463281805e-07, + "loss": 0.8000779151916504, + "step": 6256 + }, + { + "epoch": 1.441705069124424, + "grad_norm": 1.2561538909400267, + "learning_rate": 3.9697198414786626e-07, + "loss": 0.7356371283531189, + "step": 6257 + }, + { + "epoch": 1.4419354838709677, + "grad_norm": 1.3228468777834088, + "learning_rate": 3.966681094962703e-07, + "loss": 0.708438515663147, + "step": 6258 + }, + { + "epoch": 1.4421658986175117, + "grad_norm": 1.1635121950639566, + "learning_rate": 3.963643224174994e-07, + "loss": 0.709287166595459, + "step": 6259 + }, + { + "epoch": 1.4423963133640554, + "grad_norm": 1.2638923885979756, + "learning_rate": 3.9606062295564813e-07, + "loss": 0.743755578994751, + "step": 6260 + }, + { + "epoch": 1.442626728110599, + "grad_norm": 1.119467668131696, + "learning_rate": 3.9575701115479744e-07, + "loss": 0.9727948904037476, + "step": 6261 + }, + { + "epoch": 1.4428571428571428, + "grad_norm": 1.165539680123963, + "learning_rate": 3.9545348705901703e-07, + "loss": 0.9070688486099243, + "step": 6262 + }, + { + "epoch": 1.4430875576036866, + "grad_norm": 1.3995169117674358, + "learning_rate": 3.951500507123627e-07, + "loss": 0.8167496919631958, + "step": 6263 + }, + { + "epoch": 1.4433179723502305, + "grad_norm": 1.1204443462300027, + "learning_rate": 3.948467021588775e-07, + "loss": 0.7691773772239685, + "step": 6264 + }, + { + "epoch": 1.4435483870967742, + "grad_norm": 1.2915211655205685, + "learning_rate": 3.945434414425927e-07, + "loss": 0.7638411521911621, + "step": 6265 + }, + { + "epoch": 1.443778801843318, + "grad_norm": 1.0311097608426527, + "learning_rate": 3.942402686075258e-07, + "loss": 0.8138284683227539, + "step": 6266 + }, + { + "epoch": 1.4440092165898617, + "grad_norm": 1.430800234304149, + "learning_rate": 3.939371836976816e-07, + "loss": 0.8404628038406372, + "step": 6267 + }, + { + "epoch": 1.4442396313364054, + "grad_norm": 1.0744818989251388, + "learning_rate": 3.936341867570533e-07, + "loss": 0.7354726791381836, + "step": 6268 + }, + { + "epoch": 1.4444700460829494, + "grad_norm": 1.2516347720495873, + "learning_rate": 3.9333127782962003e-07, + "loss": 0.8607511520385742, + "step": 6269 + }, + { + "epoch": 1.444700460829493, + "grad_norm": 1.03787633948696, + "learning_rate": 3.930284569593483e-07, + "loss": 0.7372239232063293, + "step": 6270 + }, + { + "epoch": 1.4449308755760368, + "grad_norm": 1.205690175362699, + "learning_rate": 3.927257241901929e-07, + "loss": 0.8902593851089478, + "step": 6271 + }, + { + "epoch": 1.4451612903225808, + "grad_norm": 1.0978426997676995, + "learning_rate": 3.924230795660947e-07, + "loss": 0.7481765747070312, + "step": 6272 + }, + { + "epoch": 1.4453917050691243, + "grad_norm": 1.1624854693895736, + "learning_rate": 3.9212052313098177e-07, + "loss": 0.6868888139724731, + "step": 6273 + }, + { + "epoch": 1.4456221198156682, + "grad_norm": 1.219538424407328, + "learning_rate": 3.918180549287705e-07, + "loss": 0.6867324709892273, + "step": 6274 + }, + { + "epoch": 1.445852534562212, + "grad_norm": 1.4192898010151693, + "learning_rate": 3.9151567500336323e-07, + "loss": 0.8473105430603027, + "step": 6275 + }, + { + "epoch": 1.4460829493087557, + "grad_norm": 1.2236253801186994, + "learning_rate": 3.912133833986504e-07, + "loss": 0.7629631757736206, + "step": 6276 + }, + { + "epoch": 1.4463133640552996, + "grad_norm": 1.0502703605539807, + "learning_rate": 3.909111801585091e-07, + "loss": 0.9501597881317139, + "step": 6277 + }, + { + "epoch": 1.4465437788018434, + "grad_norm": 1.0568805239624584, + "learning_rate": 3.906090653268037e-07, + "loss": 0.7330536842346191, + "step": 6278 + }, + { + "epoch": 1.446774193548387, + "grad_norm": 1.199243558298224, + "learning_rate": 3.903070389473857e-07, + "loss": 0.907101571559906, + "step": 6279 + }, + { + "epoch": 1.4470046082949308, + "grad_norm": 1.1269939172893009, + "learning_rate": 3.900051010640939e-07, + "loss": 0.8177503347396851, + "step": 6280 + }, + { + "epoch": 1.4472350230414746, + "grad_norm": 1.373102048695832, + "learning_rate": 3.897032517207538e-07, + "loss": 0.7851059436798096, + "step": 6281 + }, + { + "epoch": 1.4474654377880185, + "grad_norm": 0.8801777971944739, + "learning_rate": 3.8940149096117914e-07, + "loss": 0.7056214809417725, + "step": 6282 + }, + { + "epoch": 1.4476958525345622, + "grad_norm": 1.0831833275731695, + "learning_rate": 3.8909981882916975e-07, + "loss": 0.784143328666687, + "step": 6283 + }, + { + "epoch": 1.447926267281106, + "grad_norm": 1.2368924313085696, + "learning_rate": 3.8879823536851253e-07, + "loss": 0.8157210350036621, + "step": 6284 + }, + { + "epoch": 1.4481566820276497, + "grad_norm": 1.276176943713772, + "learning_rate": 3.884967406229828e-07, + "loss": 0.7329680323600769, + "step": 6285 + }, + { + "epoch": 1.4483870967741934, + "grad_norm": 1.4518343581804805, + "learning_rate": 3.8819533463634145e-07, + "loss": 0.9214208722114563, + "step": 6286 + }, + { + "epoch": 1.4486175115207374, + "grad_norm": 1.835142969551997, + "learning_rate": 3.8789401745233706e-07, + "loss": 0.8118722438812256, + "step": 6287 + }, + { + "epoch": 1.448847926267281, + "grad_norm": 1.0485981202236783, + "learning_rate": 3.8759278911470615e-07, + "loss": 0.7517364025115967, + "step": 6288 + }, + { + "epoch": 1.4490783410138248, + "grad_norm": 1.0879409814064, + "learning_rate": 3.872916496671711e-07, + "loss": 0.8979834318161011, + "step": 6289 + }, + { + "epoch": 1.4493087557603688, + "grad_norm": 1.6674549792368192, + "learning_rate": 3.8699059915344166e-07, + "loss": 0.9159818887710571, + "step": 6290 + }, + { + "epoch": 1.4495391705069125, + "grad_norm": 1.2582380909324238, + "learning_rate": 3.8668963761721563e-07, + "loss": 0.8176029324531555, + "step": 6291 + }, + { + "epoch": 1.4497695852534562, + "grad_norm": 1.3257834277786367, + "learning_rate": 3.8638876510217666e-07, + "loss": 0.7077589631080627, + "step": 6292 + }, + { + "epoch": 1.45, + "grad_norm": 1.0304546829516872, + "learning_rate": 3.8608798165199585e-07, + "loss": 0.8107718825340271, + "step": 6293 + }, + { + "epoch": 1.4502304147465437, + "grad_norm": 1.278146889045901, + "learning_rate": 3.8578728731033214e-07, + "loss": 0.9021201133728027, + "step": 6294 + }, + { + "epoch": 1.4504608294930876, + "grad_norm": 1.5907360314325336, + "learning_rate": 3.854866821208306e-07, + "loss": 0.9134507179260254, + "step": 6295 + }, + { + "epoch": 1.4506912442396314, + "grad_norm": 1.2431886164023473, + "learning_rate": 3.8518616612712317e-07, + "loss": 0.9081463813781738, + "step": 6296 + }, + { + "epoch": 1.450921658986175, + "grad_norm": 1.394869861453301, + "learning_rate": 3.848857393728303e-07, + "loss": 0.7892032861709595, + "step": 6297 + }, + { + "epoch": 1.4511520737327188, + "grad_norm": 1.1702087372951315, + "learning_rate": 3.8458540190155796e-07, + "loss": 0.753928542137146, + "step": 6298 + }, + { + "epoch": 1.4513824884792625, + "grad_norm": 1.1800339185606825, + "learning_rate": 3.8428515375689996e-07, + "loss": 0.6316792964935303, + "step": 6299 + }, + { + "epoch": 1.4516129032258065, + "grad_norm": 1.0510746352372813, + "learning_rate": 3.8398499498243665e-07, + "loss": 0.6569210290908813, + "step": 6300 + }, + { + "epoch": 1.4518433179723502, + "grad_norm": 1.2827982624069105, + "learning_rate": 3.836849256217355e-07, + "loss": 0.9082256555557251, + "step": 6301 + }, + { + "epoch": 1.452073732718894, + "grad_norm": 1.2539326790404104, + "learning_rate": 3.833849457183519e-07, + "loss": 0.6533655524253845, + "step": 6302 + }, + { + "epoch": 1.452304147465438, + "grad_norm": 1.1962706885387824, + "learning_rate": 3.830850553158271e-07, + "loss": 0.8181168437004089, + "step": 6303 + }, + { + "epoch": 1.4525345622119816, + "grad_norm": 1.191632474290621, + "learning_rate": 3.827852544576895e-07, + "loss": 0.8258780241012573, + "step": 6304 + }, + { + "epoch": 1.4527649769585254, + "grad_norm": 1.2200843626761786, + "learning_rate": 3.824855431874555e-07, + "loss": 0.7917114496231079, + "step": 6305 + }, + { + "epoch": 1.452995391705069, + "grad_norm": 1.1119249100754447, + "learning_rate": 3.821859215486274e-07, + "loss": 0.7523643970489502, + "step": 6306 + }, + { + "epoch": 1.4532258064516128, + "grad_norm": 1.173507656799684, + "learning_rate": 3.818863895846945e-07, + "loss": 0.7248106002807617, + "step": 6307 + }, + { + "epoch": 1.4534562211981568, + "grad_norm": 1.0384099625968284, + "learning_rate": 3.815869473391343e-07, + "loss": 0.6663920879364014, + "step": 6308 + }, + { + "epoch": 1.4536866359447005, + "grad_norm": 1.2904533830018654, + "learning_rate": 3.8128759485540995e-07, + "loss": 0.887082576751709, + "step": 6309 + }, + { + "epoch": 1.4539170506912442, + "grad_norm": 1.176731626067417, + "learning_rate": 3.8098833217697193e-07, + "loss": 0.8491328954696655, + "step": 6310 + }, + { + "epoch": 1.454147465437788, + "grad_norm": 0.995531509886264, + "learning_rate": 3.806891593472582e-07, + "loss": 0.6749746799468994, + "step": 6311 + }, + { + "epoch": 1.4543778801843317, + "grad_norm": 1.2359927269681388, + "learning_rate": 3.803900764096932e-07, + "loss": 0.7607502937316895, + "step": 6312 + }, + { + "epoch": 1.4546082949308756, + "grad_norm": 0.9855772687954082, + "learning_rate": 3.8009108340768804e-07, + "loss": 0.6713626980781555, + "step": 6313 + }, + { + "epoch": 1.4548387096774194, + "grad_norm": 1.0335982949651026, + "learning_rate": 3.797921803846419e-07, + "loss": 0.7031810879707336, + "step": 6314 + }, + { + "epoch": 1.455069124423963, + "grad_norm": 1.2499044478276522, + "learning_rate": 3.7949336738393955e-07, + "loss": 0.7233775854110718, + "step": 6315 + }, + { + "epoch": 1.455299539170507, + "grad_norm": 1.1902627494977487, + "learning_rate": 3.791946444489532e-07, + "loss": 0.7446990013122559, + "step": 6316 + }, + { + "epoch": 1.4555299539170508, + "grad_norm": 1.0356528338667375, + "learning_rate": 3.7889601162304273e-07, + "loss": 0.731992244720459, + "step": 6317 + }, + { + "epoch": 1.4557603686635945, + "grad_norm": 0.9012124257356037, + "learning_rate": 3.785974689495539e-07, + "loss": 0.7167335152626038, + "step": 6318 + }, + { + "epoch": 1.4559907834101382, + "grad_norm": 1.0367746360279544, + "learning_rate": 3.7829901647181993e-07, + "loss": 0.7634297609329224, + "step": 6319 + }, + { + "epoch": 1.456221198156682, + "grad_norm": 1.323601627974345, + "learning_rate": 3.7800065423316066e-07, + "loss": 0.7584050893783569, + "step": 6320 + }, + { + "epoch": 1.456451612903226, + "grad_norm": 1.3168506305563585, + "learning_rate": 3.777023822768829e-07, + "loss": 0.7150899171829224, + "step": 6321 + }, + { + "epoch": 1.4566820276497696, + "grad_norm": 1.3142694869577929, + "learning_rate": 3.7740420064628034e-07, + "loss": 0.7821052670478821, + "step": 6322 + }, + { + "epoch": 1.4569124423963133, + "grad_norm": 1.1890463822517086, + "learning_rate": 3.7710610938463405e-07, + "loss": 0.8678094148635864, + "step": 6323 + }, + { + "epoch": 1.457142857142857, + "grad_norm": 1.0929926711457507, + "learning_rate": 3.7680810853521107e-07, + "loss": 0.6953635215759277, + "step": 6324 + }, + { + "epoch": 1.4573732718894008, + "grad_norm": 1.392687245093679, + "learning_rate": 3.765101981412665e-07, + "loss": 0.765946626663208, + "step": 6325 + }, + { + "epoch": 1.4576036866359448, + "grad_norm": 1.2287803375758581, + "learning_rate": 3.7621237824604137e-07, + "loss": 0.8828680515289307, + "step": 6326 + }, + { + "epoch": 1.4578341013824885, + "grad_norm": 1.4191080683791804, + "learning_rate": 3.7591464889276326e-07, + "loss": 0.8916178345680237, + "step": 6327 + }, + { + "epoch": 1.4580645161290322, + "grad_norm": 1.4414543071479498, + "learning_rate": 3.756170101246481e-07, + "loss": 0.7563039064407349, + "step": 6328 + }, + { + "epoch": 1.4582949308755762, + "grad_norm": 1.1488058177567217, + "learning_rate": 3.7531946198489725e-07, + "loss": 0.8548855781555176, + "step": 6329 + }, + { + "epoch": 1.4585253456221199, + "grad_norm": 1.2471941201918813, + "learning_rate": 3.750220045166993e-07, + "loss": 0.8337546586990356, + "step": 6330 + }, + { + "epoch": 1.4587557603686636, + "grad_norm": 1.2665043024049272, + "learning_rate": 3.7472463776323036e-07, + "loss": 0.8909939527511597, + "step": 6331 + }, + { + "epoch": 1.4589861751152073, + "grad_norm": 0.9459101838544814, + "learning_rate": 3.744273617676524e-07, + "loss": 0.629026472568512, + "step": 6332 + }, + { + "epoch": 1.459216589861751, + "grad_norm": 1.245577103796106, + "learning_rate": 3.7413017657311454e-07, + "loss": 0.7264849543571472, + "step": 6333 + }, + { + "epoch": 1.459447004608295, + "grad_norm": 1.0987416494814488, + "learning_rate": 3.738330822227532e-07, + "loss": 0.808081865310669, + "step": 6334 + }, + { + "epoch": 1.4596774193548387, + "grad_norm": 1.145687515640666, + "learning_rate": 3.7353607875969115e-07, + "loss": 0.6092932820320129, + "step": 6335 + }, + { + "epoch": 1.4599078341013825, + "grad_norm": 1.2636271324745916, + "learning_rate": 3.7323916622703756e-07, + "loss": 0.8700584173202515, + "step": 6336 + }, + { + "epoch": 1.4601382488479262, + "grad_norm": 1.2867446987977476, + "learning_rate": 3.7294234466788954e-07, + "loss": 0.8424433469772339, + "step": 6337 + }, + { + "epoch": 1.46036866359447, + "grad_norm": 1.1929868573019329, + "learning_rate": 3.7264561412533013e-07, + "loss": 0.8587443828582764, + "step": 6338 + }, + { + "epoch": 1.4605990783410139, + "grad_norm": 1.1369944171843958, + "learning_rate": 3.7234897464242934e-07, + "loss": 0.7708064913749695, + "step": 6339 + }, + { + "epoch": 1.4608294930875576, + "grad_norm": 0.9599493655503268, + "learning_rate": 3.7205242626224395e-07, + "loss": 0.8226567506790161, + "step": 6340 + }, + { + "epoch": 1.4610599078341013, + "grad_norm": 1.6926769297162396, + "learning_rate": 3.717559690278176e-07, + "loss": 0.8414342403411865, + "step": 6341 + }, + { + "epoch": 1.4612903225806453, + "grad_norm": 1.136325082903018, + "learning_rate": 3.714596029821804e-07, + "loss": 0.765863299369812, + "step": 6342 + }, + { + "epoch": 1.461520737327189, + "grad_norm": 1.2033696575950952, + "learning_rate": 3.7116332816834997e-07, + "loss": 0.7253202199935913, + "step": 6343 + }, + { + "epoch": 1.4617511520737327, + "grad_norm": 1.2614732245354896, + "learning_rate": 3.7086714462933e-07, + "loss": 0.786415696144104, + "step": 6344 + }, + { + "epoch": 1.4619815668202765, + "grad_norm": 1.3398597613096093, + "learning_rate": 3.705710524081108e-07, + "loss": 0.8382824659347534, + "step": 6345 + }, + { + "epoch": 1.4622119815668202, + "grad_norm": 1.1421503229190921, + "learning_rate": 3.702750515476705e-07, + "loss": 0.7953319549560547, + "step": 6346 + }, + { + "epoch": 1.4624423963133641, + "grad_norm": 1.1953524657169348, + "learning_rate": 3.699791420909727e-07, + "loss": 0.7897430658340454, + "step": 6347 + }, + { + "epoch": 1.4626728110599079, + "grad_norm": 1.0462269201726477, + "learning_rate": 3.6968332408096804e-07, + "loss": 0.7276254892349243, + "step": 6348 + }, + { + "epoch": 1.4629032258064516, + "grad_norm": 1.2576670635193097, + "learning_rate": 3.693875975605949e-07, + "loss": 0.7318450212478638, + "step": 6349 + }, + { + "epoch": 1.4631336405529953, + "grad_norm": 1.3298595608160129, + "learning_rate": 3.6909196257277676e-07, + "loss": 0.8438090085983276, + "step": 6350 + }, + { + "epoch": 1.463364055299539, + "grad_norm": 1.1958819221255177, + "learning_rate": 3.6879641916042534e-07, + "loss": 0.7977915406227112, + "step": 6351 + }, + { + "epoch": 1.463594470046083, + "grad_norm": 1.5876789525233332, + "learning_rate": 3.685009673664382e-07, + "loss": 0.8845348358154297, + "step": 6352 + }, + { + "epoch": 1.4638248847926267, + "grad_norm": 1.1089282393569035, + "learning_rate": 3.682056072336992e-07, + "loss": 0.8971320986747742, + "step": 6353 + }, + { + "epoch": 1.4640552995391705, + "grad_norm": 1.1499585685789093, + "learning_rate": 3.679103388050803e-07, + "loss": 0.7015302181243896, + "step": 6354 + }, + { + "epoch": 1.4642857142857144, + "grad_norm": 1.058413373940715, + "learning_rate": 3.676151621234389e-07, + "loss": 0.5953146815299988, + "step": 6355 + }, + { + "epoch": 1.4645161290322581, + "grad_norm": 0.940762320723037, + "learning_rate": 3.673200772316193e-07, + "loss": 0.5794636011123657, + "step": 6356 + }, + { + "epoch": 1.4647465437788019, + "grad_norm": 1.4093031765021824, + "learning_rate": 3.6702508417245324e-07, + "loss": 0.8272292017936707, + "step": 6357 + }, + { + "epoch": 1.4649769585253456, + "grad_norm": 1.2004626750502272, + "learning_rate": 3.6673018298875826e-07, + "loss": 0.7239755392074585, + "step": 6358 + }, + { + "epoch": 1.4652073732718893, + "grad_norm": 1.0592207409293348, + "learning_rate": 3.6643537372333886e-07, + "loss": 0.8597465753555298, + "step": 6359 + }, + { + "epoch": 1.4654377880184333, + "grad_norm": 1.3768417389873642, + "learning_rate": 3.661406564189862e-07, + "loss": 0.7540475130081177, + "step": 6360 + }, + { + "epoch": 1.465668202764977, + "grad_norm": 1.2300552177842492, + "learning_rate": 3.658460311184782e-07, + "loss": 0.793259859085083, + "step": 6361 + }, + { + "epoch": 1.4658986175115207, + "grad_norm": 1.1933122341650848, + "learning_rate": 3.6555149786457883e-07, + "loss": 0.797966718673706, + "step": 6362 + }, + { + "epoch": 1.4661290322580645, + "grad_norm": 1.082541374270611, + "learning_rate": 3.6525705670004016e-07, + "loss": 0.7466796636581421, + "step": 6363 + }, + { + "epoch": 1.4663594470046082, + "grad_norm": 0.9612262339874744, + "learning_rate": 3.6496270766759927e-07, + "loss": 0.7694044709205627, + "step": 6364 + }, + { + "epoch": 1.4665898617511521, + "grad_norm": 1.753828188679532, + "learning_rate": 3.6466845080998043e-07, + "loss": 0.7701553106307983, + "step": 6365 + }, + { + "epoch": 1.4668202764976959, + "grad_norm": 1.0670832455899337, + "learning_rate": 3.643742861698952e-07, + "loss": 0.6718326807022095, + "step": 6366 + }, + { + "epoch": 1.4670506912442396, + "grad_norm": 1.1220075290963027, + "learning_rate": 3.6408021379004086e-07, + "loss": 0.7099052667617798, + "step": 6367 + }, + { + "epoch": 1.4672811059907835, + "grad_norm": 1.0614563823752192, + "learning_rate": 3.6378623371310126e-07, + "loss": 0.8650654554367065, + "step": 6368 + }, + { + "epoch": 1.4675115207373273, + "grad_norm": 1.18691798498221, + "learning_rate": 3.6349234598174794e-07, + "loss": 0.7920950055122375, + "step": 6369 + }, + { + "epoch": 1.467741935483871, + "grad_norm": 1.3672164620265899, + "learning_rate": 3.63198550638638e-07, + "loss": 0.7927969098091125, + "step": 6370 + }, + { + "epoch": 1.4679723502304147, + "grad_norm": 1.6817643007938734, + "learning_rate": 3.6290484772641514e-07, + "loss": 0.9403868913650513, + "step": 6371 + }, + { + "epoch": 1.4682027649769585, + "grad_norm": 1.188245842937741, + "learning_rate": 3.626112372877106e-07, + "loss": 0.9157334566116333, + "step": 6372 + }, + { + "epoch": 1.4684331797235024, + "grad_norm": 1.0918511661649737, + "learning_rate": 3.6231771936514067e-07, + "loss": 0.7742066979408264, + "step": 6373 + }, + { + "epoch": 1.4686635944700461, + "grad_norm": 1.0472722321327697, + "learning_rate": 3.6202429400131006e-07, + "loss": 0.69399094581604, + "step": 6374 + }, + { + "epoch": 1.4688940092165899, + "grad_norm": 1.243240675298042, + "learning_rate": 3.6173096123880854e-07, + "loss": 0.874832272529602, + "step": 6375 + }, + { + "epoch": 1.4691244239631336, + "grad_norm": 0.9504044447465768, + "learning_rate": 3.6143772112021275e-07, + "loss": 0.6685272455215454, + "step": 6376 + }, + { + "epoch": 1.4693548387096773, + "grad_norm": 1.2588614059189167, + "learning_rate": 3.611445736880867e-07, + "loss": 0.7422738671302795, + "step": 6377 + }, + { + "epoch": 1.4695852534562213, + "grad_norm": 1.1563672807518934, + "learning_rate": 3.6085151898498e-07, + "loss": 0.8208622932434082, + "step": 6378 + }, + { + "epoch": 1.469815668202765, + "grad_norm": 1.278791922768039, + "learning_rate": 3.605585570534293e-07, + "loss": 0.8001033663749695, + "step": 6379 + }, + { + "epoch": 1.4700460829493087, + "grad_norm": 1.4073194030234843, + "learning_rate": 3.6026568793595744e-07, + "loss": 0.789332926273346, + "step": 6380 + }, + { + "epoch": 1.4702764976958527, + "grad_norm": 1.1542499539799642, + "learning_rate": 3.599729116750742e-07, + "loss": 0.8071820139884949, + "step": 6381 + }, + { + "epoch": 1.4705069124423962, + "grad_norm": 1.3369229588575535, + "learning_rate": 3.5968022831327506e-07, + "loss": 0.8028534054756165, + "step": 6382 + }, + { + "epoch": 1.4707373271889401, + "grad_norm": 1.0119395143433376, + "learning_rate": 3.593876378930435e-07, + "loss": 0.6888329982757568, + "step": 6383 + }, + { + "epoch": 1.4709677419354839, + "grad_norm": 1.285773441215651, + "learning_rate": 3.590951404568483e-07, + "loss": 0.8176132440567017, + "step": 6384 + }, + { + "epoch": 1.4711981566820276, + "grad_norm": 0.9429108192029542, + "learning_rate": 3.588027360471446e-07, + "loss": 0.6715027689933777, + "step": 6385 + }, + { + "epoch": 1.4714285714285715, + "grad_norm": 1.2177133807456715, + "learning_rate": 3.585104247063753e-07, + "loss": 0.8622937798500061, + "step": 6386 + }, + { + "epoch": 1.4716589861751153, + "grad_norm": 1.252482813795077, + "learning_rate": 3.5821820647696864e-07, + "loss": 0.7244299650192261, + "step": 6387 + }, + { + "epoch": 1.471889400921659, + "grad_norm": 1.2422776234152886, + "learning_rate": 3.579260814013393e-07, + "loss": 0.8130464553833008, + "step": 6388 + }, + { + "epoch": 1.4721198156682027, + "grad_norm": 1.739841773852821, + "learning_rate": 3.576340495218897e-07, + "loss": 0.8563692569732666, + "step": 6389 + }, + { + "epoch": 1.4723502304147464, + "grad_norm": 1.1474783445098509, + "learning_rate": 3.573421108810073e-07, + "loss": 0.8315908908843994, + "step": 6390 + }, + { + "epoch": 1.4725806451612904, + "grad_norm": 1.0916407928923948, + "learning_rate": 3.5705026552106645e-07, + "loss": 0.653038740158081, + "step": 6391 + }, + { + "epoch": 1.4728110599078341, + "grad_norm": 1.250110377436999, + "learning_rate": 3.5675851348442876e-07, + "loss": 0.7511966228485107, + "step": 6392 + }, + { + "epoch": 1.4730414746543778, + "grad_norm": 1.226967151246929, + "learning_rate": 3.564668548134413e-07, + "loss": 0.8675990104675293, + "step": 6393 + }, + { + "epoch": 1.4732718894009218, + "grad_norm": 1.2481066388566375, + "learning_rate": 3.5617528955043765e-07, + "loss": 0.7574094533920288, + "step": 6394 + }, + { + "epoch": 1.4735023041474653, + "grad_norm": 1.3612516426224104, + "learning_rate": 3.5588381773773866e-07, + "loss": 0.7004787921905518, + "step": 6395 + }, + { + "epoch": 1.4737327188940093, + "grad_norm": 1.193988835000252, + "learning_rate": 3.555924394176508e-07, + "loss": 0.680101215839386, + "step": 6396 + }, + { + "epoch": 1.473963133640553, + "grad_norm": 1.2956197944669767, + "learning_rate": 3.55301154632467e-07, + "loss": 0.8340710401535034, + "step": 6397 + }, + { + "epoch": 1.4741935483870967, + "grad_norm": 1.2156451361937963, + "learning_rate": 3.5500996342446756e-07, + "loss": 0.8307079076766968, + "step": 6398 + }, + { + "epoch": 1.4744239631336407, + "grad_norm": 1.3824459968937755, + "learning_rate": 3.547188658359179e-07, + "loss": 0.9614958167076111, + "step": 6399 + }, + { + "epoch": 1.4746543778801844, + "grad_norm": 1.2140973914551956, + "learning_rate": 3.544278619090707e-07, + "loss": 0.782494068145752, + "step": 6400 + }, + { + "epoch": 1.4748847926267281, + "grad_norm": 1.372883571978596, + "learning_rate": 3.5413695168616474e-07, + "loss": 0.7474460601806641, + "step": 6401 + }, + { + "epoch": 1.4751152073732718, + "grad_norm": 1.0929029713656226, + "learning_rate": 3.5384613520942484e-07, + "loss": 0.7182635068893433, + "step": 6402 + }, + { + "epoch": 1.4753456221198156, + "grad_norm": 1.1562679128127753, + "learning_rate": 3.5355541252106336e-07, + "loss": 0.8116436004638672, + "step": 6403 + }, + { + "epoch": 1.4755760368663595, + "grad_norm": 1.1320096436261353, + "learning_rate": 3.5326478366327806e-07, + "loss": 0.8007283210754395, + "step": 6404 + }, + { + "epoch": 1.4758064516129032, + "grad_norm": 1.060451283065696, + "learning_rate": 3.5297424867825276e-07, + "loss": 0.7707732915878296, + "step": 6405 + }, + { + "epoch": 1.476036866359447, + "grad_norm": 1.319974893721661, + "learning_rate": 3.5268380760815917e-07, + "loss": 0.8031977415084839, + "step": 6406 + }, + { + "epoch": 1.4762672811059907, + "grad_norm": 1.0847497024921582, + "learning_rate": 3.5239346049515397e-07, + "loss": 0.7113008499145508, + "step": 6407 + }, + { + "epoch": 1.4764976958525344, + "grad_norm": 1.490354792200027, + "learning_rate": 3.521032073813802e-07, + "loss": 0.8069616556167603, + "step": 6408 + }, + { + "epoch": 1.4767281105990784, + "grad_norm": 1.6536617293382079, + "learning_rate": 3.518130483089686e-07, + "loss": 0.9780417680740356, + "step": 6409 + }, + { + "epoch": 1.476958525345622, + "grad_norm": 1.0393285063529043, + "learning_rate": 3.515229833200351e-07, + "loss": 0.765299379825592, + "step": 6410 + }, + { + "epoch": 1.4771889400921658, + "grad_norm": 0.9792702634570369, + "learning_rate": 3.512330124566816e-07, + "loss": 0.7279179096221924, + "step": 6411 + }, + { + "epoch": 1.4774193548387098, + "grad_norm": 1.3765526641198769, + "learning_rate": 3.509431357609978e-07, + "loss": 0.8429825901985168, + "step": 6412 + }, + { + "epoch": 1.4776497695852535, + "grad_norm": 1.2876523066268597, + "learning_rate": 3.506533532750586e-07, + "loss": 0.741936206817627, + "step": 6413 + }, + { + "epoch": 1.4778801843317972, + "grad_norm": 1.0841845353527741, + "learning_rate": 3.5036366504092527e-07, + "loss": 0.6841387748718262, + "step": 6414 + }, + { + "epoch": 1.478110599078341, + "grad_norm": 1.1361546476433346, + "learning_rate": 3.5007407110064626e-07, + "loss": 0.7136961221694946, + "step": 6415 + }, + { + "epoch": 1.4783410138248847, + "grad_norm": 1.1942730912918724, + "learning_rate": 3.497845714962554e-07, + "loss": 0.8483344912528992, + "step": 6416 + }, + { + "epoch": 1.4785714285714286, + "grad_norm": 1.1525838724707749, + "learning_rate": 3.4949516626977294e-07, + "loss": 0.7060235738754272, + "step": 6417 + }, + { + "epoch": 1.4788018433179724, + "grad_norm": 1.2546190088001288, + "learning_rate": 3.4920585546320625e-07, + "loss": 0.7351587414741516, + "step": 6418 + }, + { + "epoch": 1.479032258064516, + "grad_norm": 1.4082190266306274, + "learning_rate": 3.489166391185482e-07, + "loss": 0.7445269823074341, + "step": 6419 + }, + { + "epoch": 1.4792626728110598, + "grad_norm": 1.2308828080413103, + "learning_rate": 3.4862751727777796e-07, + "loss": 0.795128583908081, + "step": 6420 + }, + { + "epoch": 1.4794930875576036, + "grad_norm": 1.3455737723646244, + "learning_rate": 3.4833848998286133e-07, + "loss": 0.7916193008422852, + "step": 6421 + }, + { + "epoch": 1.4797235023041475, + "grad_norm": 1.2062461099240058, + "learning_rate": 3.480495572757497e-07, + "loss": 0.8279474973678589, + "step": 6422 + }, + { + "epoch": 1.4799539170506912, + "grad_norm": 1.3615355231577309, + "learning_rate": 3.477607191983822e-07, + "loss": 0.9339898824691772, + "step": 6423 + }, + { + "epoch": 1.480184331797235, + "grad_norm": 1.2958649175302657, + "learning_rate": 3.4747197579268296e-07, + "loss": 0.8579660654067993, + "step": 6424 + }, + { + "epoch": 1.480414746543779, + "grad_norm": 1.1935735021965341, + "learning_rate": 3.471833271005622e-07, + "loss": 0.7637878060340881, + "step": 6425 + }, + { + "epoch": 1.4806451612903226, + "grad_norm": 1.2997741786350927, + "learning_rate": 3.4689477316391756e-07, + "loss": 0.8600465059280396, + "step": 6426 + }, + { + "epoch": 1.4808755760368664, + "grad_norm": 0.9725758019670567, + "learning_rate": 3.46606314024632e-07, + "loss": 0.6576759815216064, + "step": 6427 + }, + { + "epoch": 1.48110599078341, + "grad_norm": 1.1289750059608772, + "learning_rate": 3.463179497245747e-07, + "loss": 0.7556706666946411, + "step": 6428 + }, + { + "epoch": 1.4813364055299538, + "grad_norm": 1.3449392913610907, + "learning_rate": 3.4602968030560196e-07, + "loss": 0.8826701641082764, + "step": 6429 + }, + { + "epoch": 1.4815668202764978, + "grad_norm": 1.1499087478485694, + "learning_rate": 3.457415058095554e-07, + "loss": 0.7352213263511658, + "step": 6430 + }, + { + "epoch": 1.4817972350230415, + "grad_norm": 1.4434298728988502, + "learning_rate": 3.454534262782628e-07, + "loss": 0.8108851909637451, + "step": 6431 + }, + { + "epoch": 1.4820276497695852, + "grad_norm": 1.3070168078927469, + "learning_rate": 3.4516544175353914e-07, + "loss": 0.8595583438873291, + "step": 6432 + }, + { + "epoch": 1.482258064516129, + "grad_norm": 1.1496814595283131, + "learning_rate": 3.448775522771847e-07, + "loss": 0.7194280028343201, + "step": 6433 + }, + { + "epoch": 1.4824884792626727, + "grad_norm": 1.2788780172510947, + "learning_rate": 3.445897578909861e-07, + "loss": 0.8966056108474731, + "step": 6434 + }, + { + "epoch": 1.4827188940092166, + "grad_norm": 1.4168806857520198, + "learning_rate": 3.443020586367167e-07, + "loss": 0.8089771270751953, + "step": 6435 + }, + { + "epoch": 1.4829493087557604, + "grad_norm": 1.3086078413537297, + "learning_rate": 3.4401445455613555e-07, + "loss": 0.7835644483566284, + "step": 6436 + }, + { + "epoch": 1.483179723502304, + "grad_norm": 1.242850049469479, + "learning_rate": 3.4372694569098746e-07, + "loss": 0.7285257577896118, + "step": 6437 + }, + { + "epoch": 1.483410138248848, + "grad_norm": 1.4884020116718253, + "learning_rate": 3.434395320830048e-07, + "loss": 0.9108592867851257, + "step": 6438 + }, + { + "epoch": 1.4836405529953918, + "grad_norm": 1.265305751937672, + "learning_rate": 3.431522137739049e-07, + "loss": 0.7154395580291748, + "step": 6439 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 1.0883673646660943, + "learning_rate": 3.428649908053917e-07, + "loss": 0.6483602523803711, + "step": 6440 + }, + { + "epoch": 1.4841013824884792, + "grad_norm": 1.457129029114168, + "learning_rate": 3.425778632191551e-07, + "loss": 0.8090662956237793, + "step": 6441 + }, + { + "epoch": 1.484331797235023, + "grad_norm": 1.428702771444548, + "learning_rate": 3.422908310568712e-07, + "loss": 0.7884642481803894, + "step": 6442 + }, + { + "epoch": 1.484562211981567, + "grad_norm": 1.2738553778883674, + "learning_rate": 3.4200389436020225e-07, + "loss": 0.8628194332122803, + "step": 6443 + }, + { + "epoch": 1.4847926267281106, + "grad_norm": 1.1838310809928603, + "learning_rate": 3.4171705317079723e-07, + "loss": 0.8192269802093506, + "step": 6444 + }, + { + "epoch": 1.4850230414746544, + "grad_norm": 1.316668872684636, + "learning_rate": 3.4143030753029054e-07, + "loss": 0.7768012285232544, + "step": 6445 + }, + { + "epoch": 1.485253456221198, + "grad_norm": 1.2324282268735118, + "learning_rate": 3.411436574803026e-07, + "loss": 0.7420791387557983, + "step": 6446 + }, + { + "epoch": 1.4854838709677418, + "grad_norm": 1.3102449774544425, + "learning_rate": 3.4085710306244086e-07, + "loss": 0.823938250541687, + "step": 6447 + }, + { + "epoch": 1.4857142857142858, + "grad_norm": 1.1672900255965821, + "learning_rate": 3.405706443182976e-07, + "loss": 0.7215089201927185, + "step": 6448 + }, + { + "epoch": 1.4859447004608295, + "grad_norm": 1.138949819615918, + "learning_rate": 3.4028428128945286e-07, + "loss": 0.8301436901092529, + "step": 6449 + }, + { + "epoch": 1.4861751152073732, + "grad_norm": 1.1171858572091258, + "learning_rate": 3.399980140174712e-07, + "loss": 0.6727990508079529, + "step": 6450 + }, + { + "epoch": 1.4864055299539172, + "grad_norm": 1.0969379356045603, + "learning_rate": 3.397118425439038e-07, + "loss": 0.8364754319190979, + "step": 6451 + }, + { + "epoch": 1.486635944700461, + "grad_norm": 1.2714499604529865, + "learning_rate": 3.394257669102887e-07, + "loss": 0.7241604328155518, + "step": 6452 + }, + { + "epoch": 1.4868663594470046, + "grad_norm": 1.429435383993002, + "learning_rate": 3.3913978715814897e-07, + "loss": 0.7762489914894104, + "step": 6453 + }, + { + "epoch": 1.4870967741935484, + "grad_norm": 1.3862601382620485, + "learning_rate": 3.38853903328994e-07, + "loss": 0.9278200268745422, + "step": 6454 + }, + { + "epoch": 1.487327188940092, + "grad_norm": 0.9454491284474441, + "learning_rate": 3.3856811546431994e-07, + "loss": 0.693070113658905, + "step": 6455 + }, + { + "epoch": 1.487557603686636, + "grad_norm": 1.4631261008304832, + "learning_rate": 3.382824236056084e-07, + "loss": 0.8541949987411499, + "step": 6456 + }, + { + "epoch": 1.4877880184331798, + "grad_norm": 1.1080747331787868, + "learning_rate": 3.379968277943267e-07, + "loss": 0.7638850212097168, + "step": 6457 + }, + { + "epoch": 1.4880184331797235, + "grad_norm": 1.5396868765343736, + "learning_rate": 3.377113280719295e-07, + "loss": 0.8240739107131958, + "step": 6458 + }, + { + "epoch": 1.4882488479262672, + "grad_norm": 1.037738997106509, + "learning_rate": 3.374259244798562e-07, + "loss": 0.7360633015632629, + "step": 6459 + }, + { + "epoch": 1.488479262672811, + "grad_norm": 1.1287418173516828, + "learning_rate": 3.371406170595328e-07, + "loss": 0.8626362085342407, + "step": 6460 + }, + { + "epoch": 1.488709677419355, + "grad_norm": 1.553133844655672, + "learning_rate": 3.368554058523713e-07, + "loss": 0.8499895334243774, + "step": 6461 + }, + { + "epoch": 1.4889400921658986, + "grad_norm": 1.1568237777707882, + "learning_rate": 3.3657029089976985e-07, + "loss": 0.8335039615631104, + "step": 6462 + }, + { + "epoch": 1.4891705069124423, + "grad_norm": 1.1957026633378731, + "learning_rate": 3.3628527224311196e-07, + "loss": 0.8154790997505188, + "step": 6463 + }, + { + "epoch": 1.4894009216589863, + "grad_norm": 1.2851436413791164, + "learning_rate": 3.3600034992376856e-07, + "loss": 0.7952951192855835, + "step": 6464 + }, + { + "epoch": 1.48963133640553, + "grad_norm": 1.5993164682006433, + "learning_rate": 3.3571552398309535e-07, + "loss": 0.7227598428726196, + "step": 6465 + }, + { + "epoch": 1.4898617511520738, + "grad_norm": 1.1773028491207966, + "learning_rate": 3.3543079446243404e-07, + "loss": 0.6703250408172607, + "step": 6466 + }, + { + "epoch": 1.4900921658986175, + "grad_norm": 1.152932493736184, + "learning_rate": 3.351461614031136e-07, + "loss": 0.7468122243881226, + "step": 6467 + }, + { + "epoch": 1.4903225806451612, + "grad_norm": 1.2933114629854674, + "learning_rate": 3.348616248464475e-07, + "loss": 0.8649178743362427, + "step": 6468 + }, + { + "epoch": 1.4905529953917052, + "grad_norm": 1.013990280281903, + "learning_rate": 3.345771848337359e-07, + "loss": 0.8229554295539856, + "step": 6469 + }, + { + "epoch": 1.4907834101382489, + "grad_norm": 1.3471402030282535, + "learning_rate": 3.342928414062652e-07, + "loss": 0.7275597453117371, + "step": 6470 + }, + { + "epoch": 1.4910138248847926, + "grad_norm": 1.095192106330462, + "learning_rate": 3.3400859460530737e-07, + "loss": 0.657899796962738, + "step": 6471 + }, + { + "epoch": 1.4912442396313363, + "grad_norm": 1.0853913135805695, + "learning_rate": 3.3372444447212e-07, + "loss": 0.7579425573348999, + "step": 6472 + }, + { + "epoch": 1.49147465437788, + "grad_norm": 1.1304988993649205, + "learning_rate": 3.334403910479479e-07, + "loss": 0.8707751631736755, + "step": 6473 + }, + { + "epoch": 1.491705069124424, + "grad_norm": 1.3454806591137698, + "learning_rate": 3.331564343740201e-07, + "loss": 0.7923752665519714, + "step": 6474 + }, + { + "epoch": 1.4919354838709677, + "grad_norm": 1.2646674876263875, + "learning_rate": 3.328725744915536e-07, + "loss": 0.8308948278427124, + "step": 6475 + }, + { + "epoch": 1.4921658986175115, + "grad_norm": 1.4029553470676885, + "learning_rate": 3.3258881144174967e-07, + "loss": 0.8984559774398804, + "step": 6476 + }, + { + "epoch": 1.4923963133640554, + "grad_norm": 1.2358798089346714, + "learning_rate": 3.3230514526579614e-07, + "loss": 0.9279792308807373, + "step": 6477 + }, + { + "epoch": 1.4926267281105992, + "grad_norm": 1.4094728162225774, + "learning_rate": 3.3202157600486655e-07, + "loss": 0.7934520244598389, + "step": 6478 + }, + { + "epoch": 1.4928571428571429, + "grad_norm": 1.658388461731414, + "learning_rate": 3.3173810370012136e-07, + "loss": 0.8463613390922546, + "step": 6479 + }, + { + "epoch": 1.4930875576036866, + "grad_norm": 1.339159678666659, + "learning_rate": 3.314547283927057e-07, + "loss": 0.8087350130081177, + "step": 6480 + }, + { + "epoch": 1.4933179723502303, + "grad_norm": 1.2350842201271304, + "learning_rate": 3.3117145012375113e-07, + "loss": 0.7711254358291626, + "step": 6481 + }, + { + "epoch": 1.4935483870967743, + "grad_norm": 1.2753839749074636, + "learning_rate": 3.3088826893437526e-07, + "loss": 0.7140679359436035, + "step": 6482 + }, + { + "epoch": 1.493778801843318, + "grad_norm": 1.1506161777222865, + "learning_rate": 3.3060518486568103e-07, + "loss": 0.7074463367462158, + "step": 6483 + }, + { + "epoch": 1.4940092165898617, + "grad_norm": 0.8291232249474376, + "learning_rate": 3.3032219795875827e-07, + "loss": 0.7560559511184692, + "step": 6484 + }, + { + "epoch": 1.4942396313364055, + "grad_norm": 1.4344445687170468, + "learning_rate": 3.3003930825468194e-07, + "loss": 0.7699435353279114, + "step": 6485 + }, + { + "epoch": 1.4944700460829492, + "grad_norm": 1.277197987117764, + "learning_rate": 3.297565157945129e-07, + "loss": 0.817488431930542, + "step": 6486 + }, + { + "epoch": 1.4947004608294931, + "grad_norm": 1.1511534488778172, + "learning_rate": 3.294738206192985e-07, + "loss": 0.7534141540527344, + "step": 6487 + }, + { + "epoch": 1.4949308755760369, + "grad_norm": 1.1924480850963226, + "learning_rate": 3.291912227700715e-07, + "loss": 0.7423536777496338, + "step": 6488 + }, + { + "epoch": 1.4951612903225806, + "grad_norm": 0.952322784205302, + "learning_rate": 3.2890872228785003e-07, + "loss": 0.7181985378265381, + "step": 6489 + }, + { + "epoch": 1.4953917050691246, + "grad_norm": 1.270224090305602, + "learning_rate": 3.286263192136396e-07, + "loss": 0.7143938541412354, + "step": 6490 + }, + { + "epoch": 1.4956221198156683, + "grad_norm": 1.3995714023195414, + "learning_rate": 3.2834401358843e-07, + "loss": 0.8247631788253784, + "step": 6491 + }, + { + "epoch": 1.495852534562212, + "grad_norm": 1.1449759372564834, + "learning_rate": 3.280618054531974e-07, + "loss": 0.8627001047134399, + "step": 6492 + }, + { + "epoch": 1.4960829493087557, + "grad_norm": 1.3482725665599868, + "learning_rate": 3.2777969484890456e-07, + "loss": 0.813239574432373, + "step": 6493 + }, + { + "epoch": 1.4963133640552995, + "grad_norm": 0.9200346218481302, + "learning_rate": 3.2749768181649904e-07, + "loss": 0.6633884310722351, + "step": 6494 + }, + { + "epoch": 1.4965437788018434, + "grad_norm": 1.4278232440541767, + "learning_rate": 3.272157663969144e-07, + "loss": 0.7760038375854492, + "step": 6495 + }, + { + "epoch": 1.4967741935483871, + "grad_norm": 1.3200918095184475, + "learning_rate": 3.2693394863107105e-07, + "loss": 0.9352993369102478, + "step": 6496 + }, + { + "epoch": 1.4970046082949309, + "grad_norm": 1.2344539392280847, + "learning_rate": 3.2665222855987397e-07, + "loss": 0.7011485695838928, + "step": 6497 + }, + { + "epoch": 1.4972350230414746, + "grad_norm": 1.2183950494067446, + "learning_rate": 3.263706062242142e-07, + "loss": 0.9008398056030273, + "step": 6498 + }, + { + "epoch": 1.4974654377880183, + "grad_norm": 1.194608222128912, + "learning_rate": 3.260890816649694e-07, + "loss": 0.768037736415863, + "step": 6499 + }, + { + "epoch": 1.4976958525345623, + "grad_norm": 0.9220148240054391, + "learning_rate": 3.258076549230024e-07, + "loss": 0.7603639364242554, + "step": 6500 + }, + { + "epoch": 1.497926267281106, + "grad_norm": 1.3821459764557307, + "learning_rate": 3.2552632603916177e-07, + "loss": 0.7984024286270142, + "step": 6501 + }, + { + "epoch": 1.4981566820276497, + "grad_norm": 1.415424035035242, + "learning_rate": 3.2524509505428187e-07, + "loss": 0.8466978073120117, + "step": 6502 + }, + { + "epoch": 1.4983870967741937, + "grad_norm": 1.3670825801142161, + "learning_rate": 3.24963962009183e-07, + "loss": 0.7964911460876465, + "step": 6503 + }, + { + "epoch": 1.4986175115207372, + "grad_norm": 1.3123478568754847, + "learning_rate": 3.246829269446716e-07, + "loss": 0.7551665306091309, + "step": 6504 + }, + { + "epoch": 1.4988479262672811, + "grad_norm": 1.3193018902055227, + "learning_rate": 3.2440198990153945e-07, + "loss": 0.6468057632446289, + "step": 6505 + }, + { + "epoch": 1.4990783410138249, + "grad_norm": 1.2139801652485203, + "learning_rate": 3.241211509205638e-07, + "loss": 0.7739330530166626, + "step": 6506 + }, + { + "epoch": 1.4993087557603686, + "grad_norm": 1.3659144717848737, + "learning_rate": 3.238404100425085e-07, + "loss": 0.8205568790435791, + "step": 6507 + }, + { + "epoch": 1.4995391705069125, + "grad_norm": 0.958982052367848, + "learning_rate": 3.235597673081227e-07, + "loss": 0.667822003364563, + "step": 6508 + }, + { + "epoch": 1.4997695852534563, + "grad_norm": 1.2374356667574686, + "learning_rate": 3.232792227581409e-07, + "loss": 0.7829990386962891, + "step": 6509 + }, + { + "epoch": 1.5, + "grad_norm": 1.1404525757399535, + "learning_rate": 3.229987764332843e-07, + "loss": 0.768509566783905, + "step": 6510 + }, + { + "epoch": 1.5002304147465437, + "grad_norm": 1.3651547247057954, + "learning_rate": 3.227184283742591e-07, + "loss": 0.8448585867881775, + "step": 6511 + }, + { + "epoch": 1.5004608294930875, + "grad_norm": 1.2722097281432705, + "learning_rate": 3.2243817862175705e-07, + "loss": 0.6929391622543335, + "step": 6512 + }, + { + "epoch": 1.5006912442396314, + "grad_norm": 0.8983294061831201, + "learning_rate": 3.221580272164567e-07, + "loss": 0.6453005075454712, + "step": 6513 + }, + { + "epoch": 1.5009216589861751, + "grad_norm": 1.135934251126359, + "learning_rate": 3.2187797419902143e-07, + "loss": 0.7870811820030212, + "step": 6514 + }, + { + "epoch": 1.5011520737327189, + "grad_norm": 1.264885386654941, + "learning_rate": 3.2159801961010013e-07, + "loss": 0.7032002210617065, + "step": 6515 + }, + { + "epoch": 1.5013824884792628, + "grad_norm": 1.5122369312915371, + "learning_rate": 3.213181634903285e-07, + "loss": 0.8018448352813721, + "step": 6516 + }, + { + "epoch": 1.5016129032258063, + "grad_norm": 1.0930874016239036, + "learning_rate": 3.2103840588032707e-07, + "loss": 0.7066134810447693, + "step": 6517 + }, + { + "epoch": 1.5018433179723503, + "grad_norm": 1.049874936950677, + "learning_rate": 3.207587468207018e-07, + "loss": 0.6835265159606934, + "step": 6518 + }, + { + "epoch": 1.502073732718894, + "grad_norm": 1.1994114231897615, + "learning_rate": 3.204791863520455e-07, + "loss": 0.6679749488830566, + "step": 6519 + }, + { + "epoch": 1.5023041474654377, + "grad_norm": 1.1780261658003046, + "learning_rate": 3.201997245149358e-07, + "loss": 0.781232476234436, + "step": 6520 + }, + { + "epoch": 1.5025345622119817, + "grad_norm": 1.156188659495686, + "learning_rate": 3.1992036134993616e-07, + "loss": 0.7853572368621826, + "step": 6521 + }, + { + "epoch": 1.5027649769585254, + "grad_norm": 1.3156565650023675, + "learning_rate": 3.1964109689759576e-07, + "loss": 0.8220832943916321, + "step": 6522 + }, + { + "epoch": 1.5029953917050691, + "grad_norm": 1.0874952614272322, + "learning_rate": 3.193619311984491e-07, + "loss": 0.8046013116836548, + "step": 6523 + }, + { + "epoch": 1.5032258064516129, + "grad_norm": 1.1481673715256613, + "learning_rate": 3.190828642930174e-07, + "loss": 0.7123414874076843, + "step": 6524 + }, + { + "epoch": 1.5034562211981566, + "grad_norm": 1.2507360463805697, + "learning_rate": 3.188038962218066e-07, + "loss": 0.7913625240325928, + "step": 6525 + }, + { + "epoch": 1.5036866359447005, + "grad_norm": 1.2264479129016654, + "learning_rate": 3.185250270253081e-07, + "loss": 0.7837327718734741, + "step": 6526 + }, + { + "epoch": 1.5039170506912443, + "grad_norm": 1.3223188543102071, + "learning_rate": 3.182462567440002e-07, + "loss": 0.7799992561340332, + "step": 6527 + }, + { + "epoch": 1.504147465437788, + "grad_norm": 1.2906027927929307, + "learning_rate": 3.1796758541834545e-07, + "loss": 0.8591268062591553, + "step": 6528 + }, + { + "epoch": 1.504377880184332, + "grad_norm": 1.1175058933428492, + "learning_rate": 3.176890130887926e-07, + "loss": 0.6886378526687622, + "step": 6529 + }, + { + "epoch": 1.5046082949308754, + "grad_norm": 1.4969255628781877, + "learning_rate": 3.1741053979577647e-07, + "loss": 0.8641641139984131, + "step": 6530 + }, + { + "epoch": 1.5048387096774194, + "grad_norm": 1.3022265823882768, + "learning_rate": 3.1713216557971687e-07, + "loss": 0.8215552568435669, + "step": 6531 + }, + { + "epoch": 1.5050691244239631, + "grad_norm": 1.332125606212464, + "learning_rate": 3.1685389048101906e-07, + "loss": 0.8506371974945068, + "step": 6532 + }, + { + "epoch": 1.5052995391705069, + "grad_norm": 1.371517957091787, + "learning_rate": 3.1657571454007515e-07, + "loss": 0.740912675857544, + "step": 6533 + }, + { + "epoch": 1.5055299539170508, + "grad_norm": 1.0380741302125553, + "learning_rate": 3.162976377972614e-07, + "loss": 0.6458308696746826, + "step": 6534 + }, + { + "epoch": 1.5057603686635943, + "grad_norm": 1.0737980819278299, + "learning_rate": 3.1601966029294013e-07, + "loss": 0.7368316650390625, + "step": 6535 + }, + { + "epoch": 1.5059907834101383, + "grad_norm": 1.1008143995933475, + "learning_rate": 3.1574178206746003e-07, + "loss": 0.6648637056350708, + "step": 6536 + }, + { + "epoch": 1.506221198156682, + "grad_norm": 1.2751679142768328, + "learning_rate": 3.154640031611544e-07, + "loss": 0.706688404083252, + "step": 6537 + }, + { + "epoch": 1.5064516129032257, + "grad_norm": 1.0597131508477158, + "learning_rate": 3.1518632361434263e-07, + "loss": 0.722059965133667, + "step": 6538 + }, + { + "epoch": 1.5066820276497697, + "grad_norm": 1.1420297201861054, + "learning_rate": 3.14908743467329e-07, + "loss": 0.7098807096481323, + "step": 6539 + }, + { + "epoch": 1.5069124423963134, + "grad_norm": 1.1123804283277692, + "learning_rate": 3.1463126276040454e-07, + "loss": 0.7131781578063965, + "step": 6540 + }, + { + "epoch": 1.5071428571428571, + "grad_norm": 0.757735402153, + "learning_rate": 3.143538815338451e-07, + "loss": 0.7292109727859497, + "step": 6541 + }, + { + "epoch": 1.507373271889401, + "grad_norm": 1.1145586582073062, + "learning_rate": 3.1407659982791204e-07, + "loss": 0.7305347919464111, + "step": 6542 + }, + { + "epoch": 1.5076036866359446, + "grad_norm": 1.3246030999705258, + "learning_rate": 3.1379941768285247e-07, + "loss": 0.8072094321250916, + "step": 6543 + }, + { + "epoch": 1.5078341013824885, + "grad_norm": 1.2831968996332677, + "learning_rate": 3.135223351388987e-07, + "loss": 0.8772450685501099, + "step": 6544 + }, + { + "epoch": 1.5080645161290323, + "grad_norm": 1.1816139196453221, + "learning_rate": 3.1324535223626957e-07, + "loss": 0.8463687896728516, + "step": 6545 + }, + { + "epoch": 1.508294930875576, + "grad_norm": 1.1937564350019036, + "learning_rate": 3.1296846901516806e-07, + "loss": 0.6764696836471558, + "step": 6546 + }, + { + "epoch": 1.50852534562212, + "grad_norm": 1.198918569491841, + "learning_rate": 3.126916855157841e-07, + "loss": 0.8395411968231201, + "step": 6547 + }, + { + "epoch": 1.5087557603686634, + "grad_norm": 1.0607235882989698, + "learning_rate": 3.1241500177829195e-07, + "loss": 0.8227219581604004, + "step": 6548 + }, + { + "epoch": 1.5089861751152074, + "grad_norm": 1.1677688606359355, + "learning_rate": 3.121384178428519e-07, + "loss": 0.7079675197601318, + "step": 6549 + }, + { + "epoch": 1.5092165898617511, + "grad_norm": 1.2218836381096956, + "learning_rate": 3.1186193374961014e-07, + "loss": 0.7792578935623169, + "step": 6550 + }, + { + "epoch": 1.5094470046082948, + "grad_norm": 1.403777710630671, + "learning_rate": 3.1158554953869776e-07, + "loss": 0.7821195125579834, + "step": 6551 + }, + { + "epoch": 1.5096774193548388, + "grad_norm": 1.0979873084769438, + "learning_rate": 3.1130926525023114e-07, + "loss": 0.6640183329582214, + "step": 6552 + }, + { + "epoch": 1.5099078341013825, + "grad_norm": 1.4504991573195685, + "learning_rate": 3.110330809243134e-07, + "loss": 0.8087342977523804, + "step": 6553 + }, + { + "epoch": 1.5101382488479262, + "grad_norm": 1.283455986462282, + "learning_rate": 3.1075699660103184e-07, + "loss": 0.7716038227081299, + "step": 6554 + }, + { + "epoch": 1.5103686635944702, + "grad_norm": 1.3208739442859028, + "learning_rate": 3.1048101232045943e-07, + "loss": 0.8408910632133484, + "step": 6555 + }, + { + "epoch": 1.5105990783410137, + "grad_norm": 1.3122071557376964, + "learning_rate": 3.1020512812265564e-07, + "loss": 0.8799750804901123, + "step": 6556 + }, + { + "epoch": 1.5108294930875577, + "grad_norm": 1.0380590960697713, + "learning_rate": 3.0992934404766426e-07, + "loss": 0.6534945368766785, + "step": 6557 + }, + { + "epoch": 1.5110599078341014, + "grad_norm": 1.2848519038217197, + "learning_rate": 3.0965366013551507e-07, + "loss": 0.8293032646179199, + "step": 6558 + }, + { + "epoch": 1.511290322580645, + "grad_norm": 1.2211391336842954, + "learning_rate": 3.0937807642622295e-07, + "loss": 0.7486997842788696, + "step": 6559 + }, + { + "epoch": 1.511520737327189, + "grad_norm": 0.9417803138002018, + "learning_rate": 3.0910259295978914e-07, + "loss": 0.7431119680404663, + "step": 6560 + }, + { + "epoch": 1.5117511520737326, + "grad_norm": 1.255174714268565, + "learning_rate": 3.0882720977619927e-07, + "loss": 0.945859432220459, + "step": 6561 + }, + { + "epoch": 1.5119815668202765, + "grad_norm": 1.3772354918121292, + "learning_rate": 3.0855192691542487e-07, + "loss": 0.7894721031188965, + "step": 6562 + }, + { + "epoch": 1.5122119815668202, + "grad_norm": 1.59783124946433, + "learning_rate": 3.082767444174229e-07, + "loss": 0.8593465089797974, + "step": 6563 + }, + { + "epoch": 1.512442396313364, + "grad_norm": 1.1763179818399963, + "learning_rate": 3.080016623221355e-07, + "loss": 0.8297405242919922, + "step": 6564 + }, + { + "epoch": 1.512672811059908, + "grad_norm": 1.2146736952008175, + "learning_rate": 3.07726680669491e-07, + "loss": 0.7372928857803345, + "step": 6565 + }, + { + "epoch": 1.5129032258064516, + "grad_norm": 0.987456734373793, + "learning_rate": 3.0745179949940235e-07, + "loss": 0.7031347751617432, + "step": 6566 + }, + { + "epoch": 1.5131336405529954, + "grad_norm": 0.9788253167457012, + "learning_rate": 3.071770188517679e-07, + "loss": 0.7086467742919922, + "step": 6567 + }, + { + "epoch": 1.5133640552995393, + "grad_norm": 1.2854493361240282, + "learning_rate": 3.069023387664723e-07, + "loss": 0.9091345071792603, + "step": 6568 + }, + { + "epoch": 1.5135944700460828, + "grad_norm": 1.3979394006170445, + "learning_rate": 3.066277592833847e-07, + "loss": 0.7470624446868896, + "step": 6569 + }, + { + "epoch": 1.5138248847926268, + "grad_norm": 1.2458050386964743, + "learning_rate": 3.0635328044235965e-07, + "loss": 0.75694739818573, + "step": 6570 + }, + { + "epoch": 1.5140552995391705, + "grad_norm": 1.1257752667184633, + "learning_rate": 3.0607890228323796e-07, + "loss": 0.7832024693489075, + "step": 6571 + }, + { + "epoch": 1.5142857142857142, + "grad_norm": 1.4206979397737705, + "learning_rate": 3.0580462484584455e-07, + "loss": 0.6777220368385315, + "step": 6572 + }, + { + "epoch": 1.5145161290322582, + "grad_norm": 1.1010797667803915, + "learning_rate": 3.055304481699913e-07, + "loss": 0.7748236060142517, + "step": 6573 + }, + { + "epoch": 1.5147465437788017, + "grad_norm": 1.1639246159957346, + "learning_rate": 3.052563722954741e-07, + "loss": 0.7495633363723755, + "step": 6574 + }, + { + "epoch": 1.5149769585253456, + "grad_norm": 1.1319897669216112, + "learning_rate": 3.049823972620744e-07, + "loss": 0.8011484742164612, + "step": 6575 + }, + { + "epoch": 1.5152073732718894, + "grad_norm": 1.3878273723563577, + "learning_rate": 3.0470852310956e-07, + "loss": 0.7480140924453735, + "step": 6576 + }, + { + "epoch": 1.515437788018433, + "grad_norm": 1.1963673851290149, + "learning_rate": 3.0443474987768305e-07, + "loss": 0.6561319828033447, + "step": 6577 + }, + { + "epoch": 1.515668202764977, + "grad_norm": 1.1887729560806304, + "learning_rate": 3.041610776061813e-07, + "loss": 0.7437188029289246, + "step": 6578 + }, + { + "epoch": 1.5158986175115208, + "grad_norm": 1.2420532978964127, + "learning_rate": 3.0388750633477766e-07, + "loss": 0.7429096698760986, + "step": 6579 + }, + { + "epoch": 1.5161290322580645, + "grad_norm": 1.3505114972693866, + "learning_rate": 3.0361403610318125e-07, + "loss": 0.859411358833313, + "step": 6580 + }, + { + "epoch": 1.5163594470046085, + "grad_norm": 0.9758931256825946, + "learning_rate": 3.0334066695108565e-07, + "loss": 0.7636305093765259, + "step": 6581 + }, + { + "epoch": 1.516589861751152, + "grad_norm": 1.1796162666849943, + "learning_rate": 3.030673989181699e-07, + "loss": 0.8331989049911499, + "step": 6582 + }, + { + "epoch": 1.516820276497696, + "grad_norm": 1.0763217337155384, + "learning_rate": 3.0279423204409857e-07, + "loss": 0.770574688911438, + "step": 6583 + }, + { + "epoch": 1.5170506912442396, + "grad_norm": 1.3524367915089308, + "learning_rate": 3.025211663685213e-07, + "loss": 0.7470898628234863, + "step": 6584 + }, + { + "epoch": 1.5172811059907834, + "grad_norm": 1.2515745730030696, + "learning_rate": 3.022482019310736e-07, + "loss": 0.7907510995864868, + "step": 6585 + }, + { + "epoch": 1.5175115207373273, + "grad_norm": 1.1087989572536945, + "learning_rate": 3.019753387713757e-07, + "loss": 0.751417338848114, + "step": 6586 + }, + { + "epoch": 1.5177419354838708, + "grad_norm": 1.3862652872284045, + "learning_rate": 3.01702576929033e-07, + "loss": 0.8987867832183838, + "step": 6587 + }, + { + "epoch": 1.5179723502304148, + "grad_norm": 1.2098170472034613, + "learning_rate": 3.0142991644363714e-07, + "loss": 0.7618268728256226, + "step": 6588 + }, + { + "epoch": 1.5182027649769585, + "grad_norm": 1.4029958928912587, + "learning_rate": 3.011573573547641e-07, + "loss": 0.9358207583427429, + "step": 6589 + }, + { + "epoch": 1.5184331797235022, + "grad_norm": 1.4434031985489326, + "learning_rate": 3.008848997019753e-07, + "loss": 0.6549144387245178, + "step": 6590 + }, + { + "epoch": 1.5186635944700462, + "grad_norm": 1.293720092884626, + "learning_rate": 3.00612543524818e-07, + "loss": 0.8642100095748901, + "step": 6591 + }, + { + "epoch": 1.51889400921659, + "grad_norm": 1.2852982676947153, + "learning_rate": 3.003402888628241e-07, + "loss": 0.7348824143409729, + "step": 6592 + }, + { + "epoch": 1.5191244239631336, + "grad_norm": 1.0897732641421132, + "learning_rate": 3.000681357555108e-07, + "loss": 0.8737039566040039, + "step": 6593 + }, + { + "epoch": 1.5193548387096776, + "grad_norm": 1.3095413820866733, + "learning_rate": 2.9979608424238134e-07, + "loss": 0.749860405921936, + "step": 6594 + }, + { + "epoch": 1.519585253456221, + "grad_norm": 1.4291988493830527, + "learning_rate": 2.99524134362923e-07, + "loss": 0.7583779096603394, + "step": 6595 + }, + { + "epoch": 1.519815668202765, + "grad_norm": 1.1886499728868618, + "learning_rate": 2.992522861566095e-07, + "loss": 0.7096224427223206, + "step": 6596 + }, + { + "epoch": 1.5200460829493088, + "grad_norm": 1.3265073494412316, + "learning_rate": 2.9898053966289904e-07, + "loss": 0.7813585996627808, + "step": 6597 + }, + { + "epoch": 1.5202764976958525, + "grad_norm": 1.3753919073529044, + "learning_rate": 2.9870889492123517e-07, + "loss": 0.7744605541229248, + "step": 6598 + }, + { + "epoch": 1.5205069124423964, + "grad_norm": 1.4661404938087315, + "learning_rate": 2.984373519710469e-07, + "loss": 0.8398552536964417, + "step": 6599 + }, + { + "epoch": 1.52073732718894, + "grad_norm": 1.1837780856173943, + "learning_rate": 2.981659108517478e-07, + "loss": 0.6853294372558594, + "step": 6600 + }, + { + "epoch": 1.520967741935484, + "grad_norm": 0.9892560165373243, + "learning_rate": 2.97894571602738e-07, + "loss": 0.7673987150192261, + "step": 6601 + }, + { + "epoch": 1.5211981566820276, + "grad_norm": 1.0638042713840496, + "learning_rate": 2.976233342634017e-07, + "loss": 0.7000377774238586, + "step": 6602 + }, + { + "epoch": 1.5214285714285714, + "grad_norm": 1.2089273111808856, + "learning_rate": 2.9735219887310857e-07, + "loss": 0.8429346680641174, + "step": 6603 + }, + { + "epoch": 1.5216589861751153, + "grad_norm": 1.4255685153178952, + "learning_rate": 2.970811654712133e-07, + "loss": 0.9118648767471313, + "step": 6604 + }, + { + "epoch": 1.521889400921659, + "grad_norm": 1.0974145188834663, + "learning_rate": 2.9681023409705666e-07, + "loss": 0.7745784521102905, + "step": 6605 + }, + { + "epoch": 1.5221198156682028, + "grad_norm": 1.234720575381531, + "learning_rate": 2.9653940478996367e-07, + "loss": 0.8481245040893555, + "step": 6606 + }, + { + "epoch": 1.5223502304147467, + "grad_norm": 1.1446582960275502, + "learning_rate": 2.9626867758924436e-07, + "loss": 0.8643463850021362, + "step": 6607 + }, + { + "epoch": 1.5225806451612902, + "grad_norm": 1.6406368897457513, + "learning_rate": 2.959980525341953e-07, + "loss": 0.9524952173233032, + "step": 6608 + }, + { + "epoch": 1.5228110599078342, + "grad_norm": 1.067119300713527, + "learning_rate": 2.9572752966409686e-07, + "loss": 0.7153829336166382, + "step": 6609 + }, + { + "epoch": 1.523041474654378, + "grad_norm": 1.1739681134356785, + "learning_rate": 2.954571090182149e-07, + "loss": 0.8332774639129639, + "step": 6610 + }, + { + "epoch": 1.5232718894009216, + "grad_norm": 1.3773090684366749, + "learning_rate": 2.9518679063580123e-07, + "loss": 0.7511743307113647, + "step": 6611 + }, + { + "epoch": 1.5235023041474656, + "grad_norm": 1.2327774867248482, + "learning_rate": 2.9491657455609175e-07, + "loss": 0.715233325958252, + "step": 6612 + }, + { + "epoch": 1.523732718894009, + "grad_norm": 1.139323635074032, + "learning_rate": 2.946464608183078e-07, + "loss": 0.7386246919631958, + "step": 6613 + }, + { + "epoch": 1.523963133640553, + "grad_norm": 1.1904592003911236, + "learning_rate": 2.943764494616565e-07, + "loss": 0.8337790369987488, + "step": 6614 + }, + { + "epoch": 1.5241935483870968, + "grad_norm": 1.394927398157402, + "learning_rate": 2.941065405253296e-07, + "loss": 0.8447855710983276, + "step": 6615 + }, + { + "epoch": 1.5244239631336405, + "grad_norm": 1.1307960049130217, + "learning_rate": 2.938367340485035e-07, + "loss": 0.7430610060691833, + "step": 6616 + }, + { + "epoch": 1.5246543778801844, + "grad_norm": 1.134552871583557, + "learning_rate": 2.9356703007034087e-07, + "loss": 0.7740806937217712, + "step": 6617 + }, + { + "epoch": 1.5248847926267282, + "grad_norm": 1.2516085920875086, + "learning_rate": 2.9329742862998875e-07, + "loss": 0.7824152708053589, + "step": 6618 + }, + { + "epoch": 1.5251152073732719, + "grad_norm": 1.0852675062610386, + "learning_rate": 2.930279297665792e-07, + "loss": 0.9222463965415955, + "step": 6619 + }, + { + "epoch": 1.5253456221198156, + "grad_norm": 1.8096931577931101, + "learning_rate": 2.927585335192294e-07, + "loss": 0.9548497200012207, + "step": 6620 + }, + { + "epoch": 1.5255760368663593, + "grad_norm": 1.497275795232007, + "learning_rate": 2.9248923992704255e-07, + "loss": 0.9007906913757324, + "step": 6621 + }, + { + "epoch": 1.5258064516129033, + "grad_norm": 1.0647051889661132, + "learning_rate": 2.9222004902910593e-07, + "loss": 0.6932169198989868, + "step": 6622 + }, + { + "epoch": 1.526036866359447, + "grad_norm": 0.9763599663388729, + "learning_rate": 2.919509608644922e-07, + "loss": 0.7327853441238403, + "step": 6623 + }, + { + "epoch": 1.5262672811059907, + "grad_norm": 1.423305414970627, + "learning_rate": 2.916819754722588e-07, + "loss": 0.617963433265686, + "step": 6624 + }, + { + "epoch": 1.5264976958525347, + "grad_norm": 1.3790687935494703, + "learning_rate": 2.914130928914493e-07, + "loss": 1.0567349195480347, + "step": 6625 + }, + { + "epoch": 1.5267281105990782, + "grad_norm": 1.243824261339929, + "learning_rate": 2.9114431316109145e-07, + "loss": 0.7362378835678101, + "step": 6626 + }, + { + "epoch": 1.5269585253456222, + "grad_norm": 1.1636178458595106, + "learning_rate": 2.9087563632019774e-07, + "loss": 0.6879991888999939, + "step": 6627 + }, + { + "epoch": 1.5271889400921659, + "grad_norm": 1.2540530060828472, + "learning_rate": 2.9060706240776686e-07, + "loss": 0.7804177403450012, + "step": 6628 + }, + { + "epoch": 1.5274193548387096, + "grad_norm": 1.2450061818881997, + "learning_rate": 2.9033859146278197e-07, + "loss": 0.7459548711776733, + "step": 6629 + }, + { + "epoch": 1.5276497695852536, + "grad_norm": 1.1214229491247267, + "learning_rate": 2.900702235242106e-07, + "loss": 0.7392233610153198, + "step": 6630 + }, + { + "epoch": 1.5278801843317973, + "grad_norm": 1.0862664338119448, + "learning_rate": 2.8980195863100675e-07, + "loss": 0.6956135034561157, + "step": 6631 + }, + { + "epoch": 1.528110599078341, + "grad_norm": 1.1232709572579735, + "learning_rate": 2.8953379682210856e-07, + "loss": 0.7042561769485474, + "step": 6632 + }, + { + "epoch": 1.5283410138248847, + "grad_norm": 1.070241779197473, + "learning_rate": 2.8926573813643884e-07, + "loss": 0.7114298343658447, + "step": 6633 + }, + { + "epoch": 1.5285714285714285, + "grad_norm": 1.0297537166419386, + "learning_rate": 2.8899778261290664e-07, + "loss": 0.862826943397522, + "step": 6634 + }, + { + "epoch": 1.5288018433179724, + "grad_norm": 1.3240716498057261, + "learning_rate": 2.8872993029040506e-07, + "loss": 0.8229889869689941, + "step": 6635 + }, + { + "epoch": 1.5290322580645161, + "grad_norm": 1.2292174291080764, + "learning_rate": 2.884621812078122e-07, + "loss": 0.8058778047561646, + "step": 6636 + }, + { + "epoch": 1.5292626728110599, + "grad_norm": 1.2782782809475366, + "learning_rate": 2.881945354039921e-07, + "loss": 0.8150385618209839, + "step": 6637 + }, + { + "epoch": 1.5294930875576038, + "grad_norm": 1.1137449533588037, + "learning_rate": 2.8792699291779276e-07, + "loss": 0.7067136168479919, + "step": 6638 + }, + { + "epoch": 1.5297235023041473, + "grad_norm": 1.2793329729310776, + "learning_rate": 2.8765955378804784e-07, + "loss": 0.7725155353546143, + "step": 6639 + }, + { + "epoch": 1.5299539170506913, + "grad_norm": 1.0584861581127705, + "learning_rate": 2.873922180535754e-07, + "loss": 0.5956720113754272, + "step": 6640 + }, + { + "epoch": 1.530184331797235, + "grad_norm": 1.1955034677005214, + "learning_rate": 2.8712498575317934e-07, + "loss": 0.6506170630455017, + "step": 6641 + }, + { + "epoch": 1.5304147465437787, + "grad_norm": 1.0781697188392338, + "learning_rate": 2.86857856925648e-07, + "loss": 0.7860926985740662, + "step": 6642 + }, + { + "epoch": 1.5306451612903227, + "grad_norm": 1.1840723689685375, + "learning_rate": 2.8659083160975464e-07, + "loss": 0.7003993391990662, + "step": 6643 + }, + { + "epoch": 1.5308755760368664, + "grad_norm": 1.1562706768971642, + "learning_rate": 2.8632390984425746e-07, + "loss": 0.6887079477310181, + "step": 6644 + }, + { + "epoch": 1.5311059907834101, + "grad_norm": 1.243117329825752, + "learning_rate": 2.860570916678998e-07, + "loss": 0.788282036781311, + "step": 6645 + }, + { + "epoch": 1.5313364055299539, + "grad_norm": 1.273283187040626, + "learning_rate": 2.8579037711941043e-07, + "loss": 0.771350085735321, + "step": 6646 + }, + { + "epoch": 1.5315668202764976, + "grad_norm": 1.1000030346921834, + "learning_rate": 2.855237662375021e-07, + "loss": 0.6418509483337402, + "step": 6647 + }, + { + "epoch": 1.5317972350230415, + "grad_norm": 1.022873677691871, + "learning_rate": 2.852572590608735e-07, + "loss": 0.6606692671775818, + "step": 6648 + }, + { + "epoch": 1.5320276497695853, + "grad_norm": 1.4727879897773712, + "learning_rate": 2.849908556282076e-07, + "loss": 0.8623934984207153, + "step": 6649 + }, + { + "epoch": 1.532258064516129, + "grad_norm": 1.1678986803146219, + "learning_rate": 2.8472455597817215e-07, + "loss": 0.848737359046936, + "step": 6650 + }, + { + "epoch": 1.532488479262673, + "grad_norm": 1.2265451299303025, + "learning_rate": 2.844583601494207e-07, + "loss": 0.7156505584716797, + "step": 6651 + }, + { + "epoch": 1.5327188940092165, + "grad_norm": 1.157360063816448, + "learning_rate": 2.8419226818059116e-07, + "loss": 0.598319411277771, + "step": 6652 + }, + { + "epoch": 1.5329493087557604, + "grad_norm": 1.0128877845083564, + "learning_rate": 2.8392628011030585e-07, + "loss": 0.6320680379867554, + "step": 6653 + }, + { + "epoch": 1.5331797235023041, + "grad_norm": 1.2437383042471344, + "learning_rate": 2.836603959771734e-07, + "loss": 0.8770536184310913, + "step": 6654 + }, + { + "epoch": 1.5334101382488479, + "grad_norm": 1.3327586940769975, + "learning_rate": 2.833946158197862e-07, + "loss": 0.896265983581543, + "step": 6655 + }, + { + "epoch": 1.5336405529953918, + "grad_norm": 1.1058301341236145, + "learning_rate": 2.8312893967672145e-07, + "loss": 0.7194868326187134, + "step": 6656 + }, + { + "epoch": 1.5338709677419353, + "grad_norm": 1.1479450761132848, + "learning_rate": 2.828633675865425e-07, + "loss": 0.7993383407592773, + "step": 6657 + }, + { + "epoch": 1.5341013824884793, + "grad_norm": 1.3252275312162691, + "learning_rate": 2.8259789958779635e-07, + "loss": 0.6808127760887146, + "step": 6658 + }, + { + "epoch": 1.534331797235023, + "grad_norm": 1.3083456260381565, + "learning_rate": 2.823325357190153e-07, + "loss": 0.7348822355270386, + "step": 6659 + }, + { + "epoch": 1.5345622119815667, + "grad_norm": 1.4520629186425333, + "learning_rate": 2.820672760187166e-07, + "loss": 0.7729920744895935, + "step": 6660 + }, + { + "epoch": 1.5347926267281107, + "grad_norm": 1.1927593175103235, + "learning_rate": 2.818021205254021e-07, + "loss": 0.803922176361084, + "step": 6661 + }, + { + "epoch": 1.5350230414746544, + "grad_norm": 1.1316086785563555, + "learning_rate": 2.815370692775594e-07, + "loss": 0.7931007146835327, + "step": 6662 + }, + { + "epoch": 1.5352534562211981, + "grad_norm": 0.9381855495475373, + "learning_rate": 2.8127212231365995e-07, + "loss": 0.7990511655807495, + "step": 6663 + }, + { + "epoch": 1.535483870967742, + "grad_norm": 1.1449374360466444, + "learning_rate": 2.8100727967216043e-07, + "loss": 0.8163471817970276, + "step": 6664 + }, + { + "epoch": 1.5357142857142856, + "grad_norm": 1.126530672311672, + "learning_rate": 2.8074254139150225e-07, + "loss": 0.7628358602523804, + "step": 6665 + }, + { + "epoch": 1.5359447004608295, + "grad_norm": 1.216707261403855, + "learning_rate": 2.8047790751011216e-07, + "loss": 0.8008173704147339, + "step": 6666 + }, + { + "epoch": 1.5361751152073733, + "grad_norm": 1.4385072008960633, + "learning_rate": 2.802133780664013e-07, + "loss": 0.9139487743377686, + "step": 6667 + }, + { + "epoch": 1.536405529953917, + "grad_norm": 1.479452922561271, + "learning_rate": 2.7994895309876555e-07, + "loss": 0.9436901211738586, + "step": 6668 + }, + { + "epoch": 1.536635944700461, + "grad_norm": 1.1137684825301204, + "learning_rate": 2.7968463264558617e-07, + "loss": 0.8072221875190735, + "step": 6669 + }, + { + "epoch": 1.5368663594470044, + "grad_norm": 1.4031563621096825, + "learning_rate": 2.7942041674522866e-07, + "loss": 0.7434822916984558, + "step": 6670 + }, + { + "epoch": 1.5370967741935484, + "grad_norm": 1.1245525381043615, + "learning_rate": 2.7915630543604394e-07, + "loss": 0.6729850769042969, + "step": 6671 + }, + { + "epoch": 1.5373271889400921, + "grad_norm": 1.2279789151687839, + "learning_rate": 2.7889229875636723e-07, + "loss": 0.8752315044403076, + "step": 6672 + }, + { + "epoch": 1.5375576036866359, + "grad_norm": 1.2125823370266373, + "learning_rate": 2.786283967445184e-07, + "loss": 0.8519413471221924, + "step": 6673 + }, + { + "epoch": 1.5377880184331798, + "grad_norm": 1.2674824603159123, + "learning_rate": 2.783645994388032e-07, + "loss": 0.8868448734283447, + "step": 6674 + }, + { + "epoch": 1.5380184331797235, + "grad_norm": 1.2984993367707722, + "learning_rate": 2.78100906877511e-07, + "loss": 0.9223456978797913, + "step": 6675 + }, + { + "epoch": 1.5382488479262673, + "grad_norm": 1.0080180068423799, + "learning_rate": 2.7783731909891616e-07, + "loss": 0.799191951751709, + "step": 6676 + }, + { + "epoch": 1.5384792626728112, + "grad_norm": 1.1987572506109172, + "learning_rate": 2.775738361412788e-07, + "loss": 0.7092995643615723, + "step": 6677 + }, + { + "epoch": 1.5387096774193547, + "grad_norm": 1.2206610409098804, + "learning_rate": 2.7731045804284283e-07, + "loss": 0.674687385559082, + "step": 6678 + }, + { + "epoch": 1.5389400921658987, + "grad_norm": 1.4910052625734944, + "learning_rate": 2.77047184841837e-07, + "loss": 0.7366930246353149, + "step": 6679 + }, + { + "epoch": 1.5391705069124424, + "grad_norm": 1.169385374165895, + "learning_rate": 2.767840165764753e-07, + "loss": 0.838137149810791, + "step": 6680 + }, + { + "epoch": 1.5394009216589861, + "grad_norm": 1.2120746756764942, + "learning_rate": 2.765209532849558e-07, + "loss": 0.7507175803184509, + "step": 6681 + }, + { + "epoch": 1.53963133640553, + "grad_norm": 1.2981666739842812, + "learning_rate": 2.7625799500546267e-07, + "loss": 0.8157602548599243, + "step": 6682 + }, + { + "epoch": 1.5398617511520736, + "grad_norm": 1.2345607869860449, + "learning_rate": 2.7599514177616333e-07, + "loss": 0.7779219150543213, + "step": 6683 + }, + { + "epoch": 1.5400921658986175, + "grad_norm": 1.186692939443946, + "learning_rate": 2.757323936352106e-07, + "loss": 0.8261638879776001, + "step": 6684 + }, + { + "epoch": 1.5403225806451613, + "grad_norm": 0.8917527422638705, + "learning_rate": 2.7546975062074197e-07, + "loss": 0.6139177680015564, + "step": 6685 + }, + { + "epoch": 1.540552995391705, + "grad_norm": 1.0945474995666544, + "learning_rate": 2.752072127708802e-07, + "loss": 0.744202733039856, + "step": 6686 + }, + { + "epoch": 1.540783410138249, + "grad_norm": 1.279582503351568, + "learning_rate": 2.749447801237319e-07, + "loss": 0.7685158848762512, + "step": 6687 + }, + { + "epoch": 1.5410138248847927, + "grad_norm": 1.4134776465364736, + "learning_rate": 2.7468245271738865e-07, + "loss": 0.7483633756637573, + "step": 6688 + }, + { + "epoch": 1.5412442396313364, + "grad_norm": 1.4452963556936742, + "learning_rate": 2.7442023058992746e-07, + "loss": 0.8967286348342896, + "step": 6689 + }, + { + "epoch": 1.5414746543778803, + "grad_norm": 3.4447797406152922, + "learning_rate": 2.7415811377940933e-07, + "loss": 0.8035085201263428, + "step": 6690 + }, + { + "epoch": 1.5417050691244238, + "grad_norm": 1.2535208224880003, + "learning_rate": 2.738961023238798e-07, + "loss": 0.8504149913787842, + "step": 6691 + }, + { + "epoch": 1.5419354838709678, + "grad_norm": 1.408249398601243, + "learning_rate": 2.736341962613701e-07, + "loss": 0.7612431049346924, + "step": 6692 + }, + { + "epoch": 1.5421658986175115, + "grad_norm": 1.3117649202054886, + "learning_rate": 2.733723956298951e-07, + "loss": 0.6974390745162964, + "step": 6693 + }, + { + "epoch": 1.5423963133640552, + "grad_norm": 1.10015572050179, + "learning_rate": 2.7311070046745476e-07, + "loss": 0.7946817874908447, + "step": 6694 + }, + { + "epoch": 1.5426267281105992, + "grad_norm": 1.3598767034128523, + "learning_rate": 2.728491108120342e-07, + "loss": 0.7801793813705444, + "step": 6695 + }, + { + "epoch": 1.5428571428571427, + "grad_norm": 1.0989233619042245, + "learning_rate": 2.725876267016023e-07, + "loss": 0.720335066318512, + "step": 6696 + }, + { + "epoch": 1.5430875576036867, + "grad_norm": 0.9331707903973574, + "learning_rate": 2.7232624817411376e-07, + "loss": 0.6820393800735474, + "step": 6697 + }, + { + "epoch": 1.5433179723502304, + "grad_norm": 1.2636082158419006, + "learning_rate": 2.7206497526750694e-07, + "loss": 0.8217613697052002, + "step": 6698 + }, + { + "epoch": 1.543548387096774, + "grad_norm": 1.2388683954169015, + "learning_rate": 2.7180380801970525e-07, + "loss": 0.7600520849227905, + "step": 6699 + }, + { + "epoch": 1.543778801843318, + "grad_norm": 1.2564669684453122, + "learning_rate": 2.7154274646861687e-07, + "loss": 0.9402344226837158, + "step": 6700 + }, + { + "epoch": 1.5440092165898618, + "grad_norm": 1.0720415723340906, + "learning_rate": 2.7128179065213417e-07, + "loss": 0.7470760345458984, + "step": 6701 + }, + { + "epoch": 1.5442396313364055, + "grad_norm": 1.0091593723711232, + "learning_rate": 2.710209406081353e-07, + "loss": 0.6915948390960693, + "step": 6702 + }, + { + "epoch": 1.5444700460829495, + "grad_norm": 1.1829806437851378, + "learning_rate": 2.707601963744817e-07, + "loss": 0.7554904222488403, + "step": 6703 + }, + { + "epoch": 1.544700460829493, + "grad_norm": 0.9892324198221251, + "learning_rate": 2.7049955798902026e-07, + "loss": 0.8197575807571411, + "step": 6704 + }, + { + "epoch": 1.544930875576037, + "grad_norm": 1.3144339350992138, + "learning_rate": 2.702390254895819e-07, + "loss": 0.7106794118881226, + "step": 6705 + }, + { + "epoch": 1.5451612903225806, + "grad_norm": 1.1715761852419602, + "learning_rate": 2.699785989139832e-07, + "loss": 0.6320512294769287, + "step": 6706 + }, + { + "epoch": 1.5453917050691244, + "grad_norm": 1.2156391686389374, + "learning_rate": 2.697182783000246e-07, + "loss": 0.8327566385269165, + "step": 6707 + }, + { + "epoch": 1.5456221198156683, + "grad_norm": 1.2605126330062313, + "learning_rate": 2.6945806368549063e-07, + "loss": 0.8732178211212158, + "step": 6708 + }, + { + "epoch": 1.5458525345622118, + "grad_norm": 1.3881676599881438, + "learning_rate": 2.69197955108152e-07, + "loss": 0.8709380626678467, + "step": 6709 + }, + { + "epoch": 1.5460829493087558, + "grad_norm": 1.2029107229444744, + "learning_rate": 2.689379526057628e-07, + "loss": 0.7821739912033081, + "step": 6710 + }, + { + "epoch": 1.5463133640552995, + "grad_norm": 1.2268892680878298, + "learning_rate": 2.686780562160615e-07, + "loss": 0.8658162355422974, + "step": 6711 + }, + { + "epoch": 1.5465437788018432, + "grad_norm": 0.9914521746084854, + "learning_rate": 2.6841826597677274e-07, + "loss": 0.6354731321334839, + "step": 6712 + }, + { + "epoch": 1.5467741935483872, + "grad_norm": 1.132983970089502, + "learning_rate": 2.68158581925604e-07, + "loss": 0.8000082969665527, + "step": 6713 + }, + { + "epoch": 1.547004608294931, + "grad_norm": 1.0140012222754493, + "learning_rate": 2.6789900410024804e-07, + "loss": 0.7998030185699463, + "step": 6714 + }, + { + "epoch": 1.5472350230414746, + "grad_norm": 1.2207312006862205, + "learning_rate": 2.676395325383827e-07, + "loss": 0.861609935760498, + "step": 6715 + }, + { + "epoch": 1.5474654377880186, + "grad_norm": 1.2739007648131329, + "learning_rate": 2.6738016727766976e-07, + "loss": 0.8119577765464783, + "step": 6716 + }, + { + "epoch": 1.547695852534562, + "grad_norm": 1.1272023201701244, + "learning_rate": 2.671209083557553e-07, + "loss": 0.7704594135284424, + "step": 6717 + }, + { + "epoch": 1.547926267281106, + "grad_norm": 1.1924986504981143, + "learning_rate": 2.6686175581027114e-07, + "loss": 0.7577236890792847, + "step": 6718 + }, + { + "epoch": 1.5481566820276498, + "grad_norm": 1.438095427566863, + "learning_rate": 2.666027096788326e-07, + "loss": 0.8362265825271606, + "step": 6719 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 1.3282450269784174, + "learning_rate": 2.6634376999903984e-07, + "loss": 0.7604315280914307, + "step": 6720 + }, + { + "epoch": 1.5486175115207375, + "grad_norm": 1.0996855935996066, + "learning_rate": 2.6608493680847757e-07, + "loss": 0.7181323766708374, + "step": 6721 + }, + { + "epoch": 1.548847926267281, + "grad_norm": 1.408245929611007, + "learning_rate": 2.6582621014471495e-07, + "loss": 0.8613896369934082, + "step": 6722 + }, + { + "epoch": 1.549078341013825, + "grad_norm": 1.1355853758662044, + "learning_rate": 2.6556759004530616e-07, + "loss": 0.6254151463508606, + "step": 6723 + }, + { + "epoch": 1.5493087557603686, + "grad_norm": 1.1737642272227355, + "learning_rate": 2.6530907654778957e-07, + "loss": 0.7960973381996155, + "step": 6724 + }, + { + "epoch": 1.5495391705069124, + "grad_norm": 1.1419390810119388, + "learning_rate": 2.6505066968968747e-07, + "loss": 0.7899094820022583, + "step": 6725 + }, + { + "epoch": 1.5497695852534563, + "grad_norm": 0.9820941780775652, + "learning_rate": 2.647923695085081e-07, + "loss": 0.6578950881958008, + "step": 6726 + }, + { + "epoch": 1.55, + "grad_norm": 1.3013325638388529, + "learning_rate": 2.64534176041743e-07, + "loss": 0.737798810005188, + "step": 6727 + }, + { + "epoch": 1.5502304147465438, + "grad_norm": 0.9487414790323747, + "learning_rate": 2.642760893268684e-07, + "loss": 0.7809627056121826, + "step": 6728 + }, + { + "epoch": 1.5504608294930877, + "grad_norm": 0.9991258167716155, + "learning_rate": 2.640181094013456e-07, + "loss": 0.6693655252456665, + "step": 6729 + }, + { + "epoch": 1.5506912442396312, + "grad_norm": 0.8705752911958233, + "learning_rate": 2.6376023630262003e-07, + "loss": 0.7264609932899475, + "step": 6730 + }, + { + "epoch": 1.5509216589861752, + "grad_norm": 1.0975251127061347, + "learning_rate": 2.635024700681211e-07, + "loss": 0.7585712671279907, + "step": 6731 + }, + { + "epoch": 1.551152073732719, + "grad_norm": 1.520332751892112, + "learning_rate": 2.6324481073526404e-07, + "loss": 0.7335324287414551, + "step": 6732 + }, + { + "epoch": 1.5513824884792626, + "grad_norm": 1.1271215778218124, + "learning_rate": 2.629872583414473e-07, + "loss": 0.835372805595398, + "step": 6733 + }, + { + "epoch": 1.5516129032258066, + "grad_norm": 1.231737661164668, + "learning_rate": 2.6272981292405405e-07, + "loss": 0.8069926500320435, + "step": 6734 + }, + { + "epoch": 1.55184331797235, + "grad_norm": 1.2110282300687614, + "learning_rate": 2.6247247452045285e-07, + "loss": 0.7548434138298035, + "step": 6735 + }, + { + "epoch": 1.552073732718894, + "grad_norm": 1.281837931597139, + "learning_rate": 2.6221524316799546e-07, + "loss": 0.6907505989074707, + "step": 6736 + }, + { + "epoch": 1.5523041474654378, + "grad_norm": 1.2384070012918627, + "learning_rate": 2.619581189040185e-07, + "loss": 0.8544988632202148, + "step": 6737 + }, + { + "epoch": 1.5525345622119815, + "grad_norm": 1.024260684065218, + "learning_rate": 2.6170110176584404e-07, + "loss": 0.7176710367202759, + "step": 6738 + }, + { + "epoch": 1.5527649769585254, + "grad_norm": 1.1771656195687117, + "learning_rate": 2.6144419179077715e-07, + "loss": 0.7160323858261108, + "step": 6739 + }, + { + "epoch": 1.5529953917050692, + "grad_norm": 1.2619778254885654, + "learning_rate": 2.6118738901610806e-07, + "loss": 0.7749248743057251, + "step": 6740 + }, + { + "epoch": 1.553225806451613, + "grad_norm": 1.3014936029444653, + "learning_rate": 2.6093069347911145e-07, + "loss": 0.7701436281204224, + "step": 6741 + }, + { + "epoch": 1.5534562211981566, + "grad_norm": 1.2206842608778186, + "learning_rate": 2.606741052170459e-07, + "loss": 0.6725181341171265, + "step": 6742 + }, + { + "epoch": 1.5536866359447004, + "grad_norm": 1.0193653205430255, + "learning_rate": 2.6041762426715563e-07, + "loss": 0.7730624675750732, + "step": 6743 + }, + { + "epoch": 1.5539170506912443, + "grad_norm": 0.9417911057706564, + "learning_rate": 2.601612506666682e-07, + "loss": 0.7083867788314819, + "step": 6744 + }, + { + "epoch": 1.554147465437788, + "grad_norm": 1.1436343405561136, + "learning_rate": 2.599049844527953e-07, + "loss": 0.7680408954620361, + "step": 6745 + }, + { + "epoch": 1.5543778801843318, + "grad_norm": 0.9401611092461176, + "learning_rate": 2.596488256627346e-07, + "loss": 0.7145194411277771, + "step": 6746 + }, + { + "epoch": 1.5546082949308757, + "grad_norm": 1.6305632532659482, + "learning_rate": 2.593927743336667e-07, + "loss": 0.8626812696456909, + "step": 6747 + }, + { + "epoch": 1.5548387096774192, + "grad_norm": 1.1326626029703477, + "learning_rate": 2.591368305027569e-07, + "loss": 0.775201678276062, + "step": 6748 + }, + { + "epoch": 1.5550691244239632, + "grad_norm": 1.1775115850016065, + "learning_rate": 2.588809942071557e-07, + "loss": 0.9363858699798584, + "step": 6749 + }, + { + "epoch": 1.555299539170507, + "grad_norm": 1.0406152793499837, + "learning_rate": 2.5862526548399697e-07, + "loss": 0.8079385757446289, + "step": 6750 + }, + { + "epoch": 1.5555299539170506, + "grad_norm": 1.2405408742249928, + "learning_rate": 2.5836964437039934e-07, + "loss": 0.8635082840919495, + "step": 6751 + }, + { + "epoch": 1.5557603686635946, + "grad_norm": 1.072904507718934, + "learning_rate": 2.581141309034662e-07, + "loss": 0.7840827703475952, + "step": 6752 + }, + { + "epoch": 1.5559907834101383, + "grad_norm": 1.202200191511419, + "learning_rate": 2.5785872512028497e-07, + "loss": 0.7833336591720581, + "step": 6753 + }, + { + "epoch": 1.556221198156682, + "grad_norm": 1.2301348726534915, + "learning_rate": 2.576034270579269e-07, + "loss": 0.7340226173400879, + "step": 6754 + }, + { + "epoch": 1.5564516129032258, + "grad_norm": 0.9782804135142905, + "learning_rate": 2.5734823675344895e-07, + "loss": 0.6423541307449341, + "step": 6755 + }, + { + "epoch": 1.5566820276497695, + "grad_norm": 1.1992594758940591, + "learning_rate": 2.570931542438913e-07, + "loss": 0.7772454619407654, + "step": 6756 + }, + { + "epoch": 1.5569124423963134, + "grad_norm": 1.192101331643462, + "learning_rate": 2.568381795662785e-07, + "loss": 0.8113390803337097, + "step": 6757 + }, + { + "epoch": 1.5571428571428572, + "grad_norm": 1.1257023205339645, + "learning_rate": 2.5658331275762045e-07, + "loss": 0.6688467264175415, + "step": 6758 + }, + { + "epoch": 1.557373271889401, + "grad_norm": 1.0966214019602503, + "learning_rate": 2.5632855385491037e-07, + "loss": 0.8140766620635986, + "step": 6759 + }, + { + "epoch": 1.5576036866359448, + "grad_norm": 1.0260387911312179, + "learning_rate": 2.560739028951262e-07, + "loss": 0.7661154270172119, + "step": 6760 + }, + { + "epoch": 1.5578341013824883, + "grad_norm": 1.2298722431512563, + "learning_rate": 2.558193599152302e-07, + "loss": 0.6781749725341797, + "step": 6761 + }, + { + "epoch": 1.5580645161290323, + "grad_norm": 1.62266115954538, + "learning_rate": 2.5556492495216865e-07, + "loss": 0.8885331749916077, + "step": 6762 + }, + { + "epoch": 1.558294930875576, + "grad_norm": 1.3197551931331304, + "learning_rate": 2.55310598042873e-07, + "loss": 0.799277663230896, + "step": 6763 + }, + { + "epoch": 1.5585253456221198, + "grad_norm": 1.205426943239231, + "learning_rate": 2.550563792242583e-07, + "loss": 0.8288404941558838, + "step": 6764 + }, + { + "epoch": 1.5587557603686637, + "grad_norm": 1.1206026594489704, + "learning_rate": 2.5480226853322397e-07, + "loss": 0.9452340602874756, + "step": 6765 + }, + { + "epoch": 1.5589861751152074, + "grad_norm": 1.068059951967386, + "learning_rate": 2.5454826600665347e-07, + "loss": 0.6716231107711792, + "step": 6766 + }, + { + "epoch": 1.5592165898617512, + "grad_norm": 0.9885922984637816, + "learning_rate": 2.542943716814157e-07, + "loss": 0.90239417552948, + "step": 6767 + }, + { + "epoch": 1.5594470046082949, + "grad_norm": 1.306788685526263, + "learning_rate": 2.5404058559436225e-07, + "loss": 0.7895521521568298, + "step": 6768 + }, + { + "epoch": 1.5596774193548386, + "grad_norm": 1.1707304874415911, + "learning_rate": 2.537869077823307e-07, + "loss": 0.8097352385520935, + "step": 6769 + }, + { + "epoch": 1.5599078341013826, + "grad_norm": 1.2075274904697726, + "learning_rate": 2.535333382821415e-07, + "loss": 0.7599455118179321, + "step": 6770 + }, + { + "epoch": 1.5601382488479263, + "grad_norm": 1.3869678105449568, + "learning_rate": 2.5327987713059986e-07, + "loss": 0.8735921382904053, + "step": 6771 + }, + { + "epoch": 1.56036866359447, + "grad_norm": 1.3359870563601237, + "learning_rate": 2.530265243644958e-07, + "loss": 0.7263825535774231, + "step": 6772 + }, + { + "epoch": 1.560599078341014, + "grad_norm": 1.2240386443766704, + "learning_rate": 2.5277328002060296e-07, + "loss": 0.8642966747283936, + "step": 6773 + }, + { + "epoch": 1.5608294930875575, + "grad_norm": 0.9860249164323385, + "learning_rate": 2.525201441356789e-07, + "loss": 0.6928948163986206, + "step": 6774 + }, + { + "epoch": 1.5610599078341014, + "grad_norm": 1.0977742625281808, + "learning_rate": 2.522671167464667e-07, + "loss": 0.7841427326202393, + "step": 6775 + }, + { + "epoch": 1.5612903225806452, + "grad_norm": 1.102415991736206, + "learning_rate": 2.5201419788969267e-07, + "loss": 0.6539766192436218, + "step": 6776 + }, + { + "epoch": 1.5615207373271889, + "grad_norm": 1.3087388284847004, + "learning_rate": 2.5176138760206734e-07, + "loss": 0.7817956805229187, + "step": 6777 + }, + { + "epoch": 1.5617511520737328, + "grad_norm": 1.2176256508295467, + "learning_rate": 2.5150868592028626e-07, + "loss": 0.7847198843955994, + "step": 6778 + }, + { + "epoch": 1.5619815668202763, + "grad_norm": 1.1618742898915668, + "learning_rate": 2.5125609288102856e-07, + "loss": 0.8248952627182007, + "step": 6779 + }, + { + "epoch": 1.5622119815668203, + "grad_norm": 1.315546210832164, + "learning_rate": 2.510036085209578e-07, + "loss": 0.8099820613861084, + "step": 6780 + }, + { + "epoch": 1.562442396313364, + "grad_norm": 1.0605499181430498, + "learning_rate": 2.5075123287672173e-07, + "loss": 0.7764754295349121, + "step": 6781 + }, + { + "epoch": 1.5626728110599077, + "grad_norm": 1.376636441531992, + "learning_rate": 2.5049896598495234e-07, + "loss": 0.8055214285850525, + "step": 6782 + }, + { + "epoch": 1.5629032258064517, + "grad_norm": 1.2086842805836235, + "learning_rate": 2.502468078822656e-07, + "loss": 0.7536123991012573, + "step": 6783 + }, + { + "epoch": 1.5631336405529954, + "grad_norm": 1.3330366284043236, + "learning_rate": 2.499947586052623e-07, + "loss": 0.8212461471557617, + "step": 6784 + }, + { + "epoch": 1.5633640552995391, + "grad_norm": 1.087165735027238, + "learning_rate": 2.49742818190527e-07, + "loss": 0.7297977209091187, + "step": 6785 + }, + { + "epoch": 1.563594470046083, + "grad_norm": 1.3633339944793545, + "learning_rate": 2.494909866746282e-07, + "loss": 0.752082109451294, + "step": 6786 + }, + { + "epoch": 1.5638248847926266, + "grad_norm": 1.231077416550479, + "learning_rate": 2.4923926409411934e-07, + "loss": 0.9181928634643555, + "step": 6787 + }, + { + "epoch": 1.5640552995391706, + "grad_norm": 1.263799738870316, + "learning_rate": 2.489876504855374e-07, + "loss": 0.8607058525085449, + "step": 6788 + }, + { + "epoch": 1.5642857142857143, + "grad_norm": 1.4864085600196295, + "learning_rate": 2.4873614588540347e-07, + "loss": 0.9659625887870789, + "step": 6789 + }, + { + "epoch": 1.564516129032258, + "grad_norm": 1.7132447669994355, + "learning_rate": 2.4848475033022377e-07, + "loss": 0.8357822299003601, + "step": 6790 + }, + { + "epoch": 1.564746543778802, + "grad_norm": 1.4493565138453182, + "learning_rate": 2.482334638564877e-07, + "loss": 0.7871281504631042, + "step": 6791 + }, + { + "epoch": 1.5649769585253455, + "grad_norm": 0.9644716518923556, + "learning_rate": 2.4798228650066874e-07, + "loss": 0.7221591472625732, + "step": 6792 + }, + { + "epoch": 1.5652073732718894, + "grad_norm": 1.217051022182652, + "learning_rate": 2.4773121829922586e-07, + "loss": 0.7399123907089233, + "step": 6793 + }, + { + "epoch": 1.5654377880184331, + "grad_norm": 1.6036073035934815, + "learning_rate": 2.474802592886003e-07, + "loss": 0.8159279227256775, + "step": 6794 + }, + { + "epoch": 1.5656682027649769, + "grad_norm": 1.208678395846015, + "learning_rate": 2.472294095052192e-07, + "loss": 0.8222753405570984, + "step": 6795 + }, + { + "epoch": 1.5658986175115208, + "grad_norm": 1.0411919729384558, + "learning_rate": 2.469786689854928e-07, + "loss": 0.6586673259735107, + "step": 6796 + }, + { + "epoch": 1.5661290322580645, + "grad_norm": 1.0728597460775429, + "learning_rate": 2.467280377658154e-07, + "loss": 0.8361790180206299, + "step": 6797 + }, + { + "epoch": 1.5663594470046083, + "grad_norm": 1.2928413385952742, + "learning_rate": 2.464775158825665e-07, + "loss": 0.7669099569320679, + "step": 6798 + }, + { + "epoch": 1.5665898617511522, + "grad_norm": 1.331214255352709, + "learning_rate": 2.462271033721086e-07, + "loss": 0.7876452207565308, + "step": 6799 + }, + { + "epoch": 1.5668202764976957, + "grad_norm": 1.2617656160077577, + "learning_rate": 2.459768002707887e-07, + "loss": 0.7932916879653931, + "step": 6800 + }, + { + "epoch": 1.5670506912442397, + "grad_norm": 1.1101874723309544, + "learning_rate": 2.457266066149382e-07, + "loss": 0.734020471572876, + "step": 6801 + }, + { + "epoch": 1.5672811059907834, + "grad_norm": 1.2001011742733312, + "learning_rate": 2.4547652244087216e-07, + "loss": 0.6975284814834595, + "step": 6802 + }, + { + "epoch": 1.5675115207373271, + "grad_norm": 1.213830843525294, + "learning_rate": 2.452265477848896e-07, + "loss": 0.7214465737342834, + "step": 6803 + }, + { + "epoch": 1.567741935483871, + "grad_norm": 1.1586033079782525, + "learning_rate": 2.4497668268327485e-07, + "loss": 0.8645110130310059, + "step": 6804 + }, + { + "epoch": 1.5679723502304146, + "grad_norm": 1.0991857687698348, + "learning_rate": 2.4472692717229504e-07, + "loss": 0.7389887571334839, + "step": 6805 + }, + { + "epoch": 1.5682027649769585, + "grad_norm": 1.206958266137894, + "learning_rate": 2.4447728128820165e-07, + "loss": 0.8462876081466675, + "step": 6806 + }, + { + "epoch": 1.5684331797235023, + "grad_norm": 1.2507487710365972, + "learning_rate": 2.44227745067231e-07, + "loss": 0.824936032295227, + "step": 6807 + }, + { + "epoch": 1.568663594470046, + "grad_norm": 1.2566804457387248, + "learning_rate": 2.439783185456027e-07, + "loss": 0.8516823053359985, + "step": 6808 + }, + { + "epoch": 1.56889400921659, + "grad_norm": 1.065798809017728, + "learning_rate": 2.4372900175952015e-07, + "loss": 0.6154674291610718, + "step": 6809 + }, + { + "epoch": 1.5691244239631337, + "grad_norm": 1.2816681742105784, + "learning_rate": 2.434797947451722e-07, + "loss": 0.7769260406494141, + "step": 6810 + }, + { + "epoch": 1.5693548387096774, + "grad_norm": 1.2232245245328917, + "learning_rate": 2.432306975387306e-07, + "loss": 0.9525332450866699, + "step": 6811 + }, + { + "epoch": 1.5695852534562214, + "grad_norm": 1.3409057347397177, + "learning_rate": 2.429817101763511e-07, + "loss": 0.7537581920623779, + "step": 6812 + }, + { + "epoch": 1.5698156682027649, + "grad_norm": 1.3548012775304474, + "learning_rate": 2.427328326941744e-07, + "loss": 0.814711332321167, + "step": 6813 + }, + { + "epoch": 1.5700460829493088, + "grad_norm": 1.3820372699413255, + "learning_rate": 2.4248406512832466e-07, + "loss": 0.708736777305603, + "step": 6814 + }, + { + "epoch": 1.5702764976958525, + "grad_norm": 1.1061554332755352, + "learning_rate": 2.422354075149098e-07, + "loss": 0.6757712960243225, + "step": 6815 + }, + { + "epoch": 1.5705069124423963, + "grad_norm": 1.0865188505414496, + "learning_rate": 2.4198685989002257e-07, + "loss": 0.736266553401947, + "step": 6816 + }, + { + "epoch": 1.5707373271889402, + "grad_norm": 1.1180343138508952, + "learning_rate": 2.417384222897392e-07, + "loss": 0.7423173189163208, + "step": 6817 + }, + { + "epoch": 1.5709677419354837, + "grad_norm": 1.2076049425001651, + "learning_rate": 2.414900947501197e-07, + "loss": 0.7260550260543823, + "step": 6818 + }, + { + "epoch": 1.5711981566820277, + "grad_norm": 1.241277027009942, + "learning_rate": 2.4124187730720915e-07, + "loss": 0.7125939130783081, + "step": 6819 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 1.1330555560067848, + "learning_rate": 2.409937699970356e-07, + "loss": 0.7429558634757996, + "step": 6820 + }, + { + "epoch": 1.5716589861751151, + "grad_norm": 1.1709438494600335, + "learning_rate": 2.407457728556115e-07, + "loss": 0.7166736721992493, + "step": 6821 + }, + { + "epoch": 1.571889400921659, + "grad_norm": 1.1783418664080478, + "learning_rate": 2.4049788591893336e-07, + "loss": 0.7438491582870483, + "step": 6822 + }, + { + "epoch": 1.5721198156682028, + "grad_norm": 1.3579191422740273, + "learning_rate": 2.402501092229814e-07, + "loss": 0.8031798601150513, + "step": 6823 + }, + { + "epoch": 1.5723502304147465, + "grad_norm": 1.3256875261480106, + "learning_rate": 2.400024428037206e-07, + "loss": 0.7067087888717651, + "step": 6824 + }, + { + "epoch": 1.5725806451612905, + "grad_norm": 1.1524386121511956, + "learning_rate": 2.3975488669709906e-07, + "loss": 0.7147783041000366, + "step": 6825 + }, + { + "epoch": 1.572811059907834, + "grad_norm": 1.2529979656124484, + "learning_rate": 2.395074409390491e-07, + "loss": 0.8534795641899109, + "step": 6826 + }, + { + "epoch": 1.573041474654378, + "grad_norm": 1.0527069171574706, + "learning_rate": 2.392601055654875e-07, + "loss": 0.7630984783172607, + "step": 6827 + }, + { + "epoch": 1.5732718894009217, + "grad_norm": 1.3268090351372508, + "learning_rate": 2.390128806123145e-07, + "loss": 0.9395428895950317, + "step": 6828 + }, + { + "epoch": 1.5735023041474654, + "grad_norm": 1.1905263432335205, + "learning_rate": 2.3876576611541423e-07, + "loss": 0.7086023092269897, + "step": 6829 + }, + { + "epoch": 1.5737327188940093, + "grad_norm": 1.0320188306367468, + "learning_rate": 2.385187621106555e-07, + "loss": 0.6937201619148254, + "step": 6830 + }, + { + "epoch": 1.5739631336405528, + "grad_norm": 1.1238131407833931, + "learning_rate": 2.3827186863389037e-07, + "loss": 0.7339247465133667, + "step": 6831 + }, + { + "epoch": 1.5741935483870968, + "grad_norm": 0.9948868064813976, + "learning_rate": 2.3802508572095493e-07, + "loss": 0.8453131318092346, + "step": 6832 + }, + { + "epoch": 1.5744239631336405, + "grad_norm": 1.2870129222879585, + "learning_rate": 2.377784134076698e-07, + "loss": 0.7303619384765625, + "step": 6833 + }, + { + "epoch": 1.5746543778801843, + "grad_norm": 1.1663952236638828, + "learning_rate": 2.3753185172983893e-07, + "loss": 0.9635858535766602, + "step": 6834 + }, + { + "epoch": 1.5748847926267282, + "grad_norm": 0.9711435467160289, + "learning_rate": 2.3728540072324998e-07, + "loss": 0.7174761295318604, + "step": 6835 + }, + { + "epoch": 1.575115207373272, + "grad_norm": 1.0168865512931398, + "learning_rate": 2.3703906042367584e-07, + "loss": 0.7375633716583252, + "step": 6836 + }, + { + "epoch": 1.5753456221198157, + "grad_norm": 1.0569071581049987, + "learning_rate": 2.3679283086687206e-07, + "loss": 0.8202652931213379, + "step": 6837 + }, + { + "epoch": 1.5755760368663596, + "grad_norm": 1.4428887155533328, + "learning_rate": 2.3654671208857823e-07, + "loss": 0.8448499441146851, + "step": 6838 + }, + { + "epoch": 1.5758064516129031, + "grad_norm": 1.3297185542360797, + "learning_rate": 2.3630070412451864e-07, + "loss": 0.7840893268585205, + "step": 6839 + }, + { + "epoch": 1.576036866359447, + "grad_norm": 1.1930310177318706, + "learning_rate": 2.3605480701040092e-07, + "loss": 0.8036940693855286, + "step": 6840 + }, + { + "epoch": 1.5762672811059908, + "grad_norm": 1.2730513650169084, + "learning_rate": 2.3580902078191666e-07, + "loss": 0.8333625793457031, + "step": 6841 + }, + { + "epoch": 1.5764976958525345, + "grad_norm": 1.3288439351572012, + "learning_rate": 2.3556334547474133e-07, + "loss": 0.804919958114624, + "step": 6842 + }, + { + "epoch": 1.5767281105990785, + "grad_norm": 1.2605928054638793, + "learning_rate": 2.3531778112453416e-07, + "loss": 0.752541720867157, + "step": 6843 + }, + { + "epoch": 1.576958525345622, + "grad_norm": 1.0378289852617786, + "learning_rate": 2.3507232776693896e-07, + "loss": 0.647051215171814, + "step": 6844 + }, + { + "epoch": 1.577188940092166, + "grad_norm": 1.1139826400416593, + "learning_rate": 2.3482698543758285e-07, + "loss": 0.7546517848968506, + "step": 6845 + }, + { + "epoch": 1.5774193548387097, + "grad_norm": 1.0118514872509952, + "learning_rate": 2.345817541720766e-07, + "loss": 0.8773425817489624, + "step": 6846 + }, + { + "epoch": 1.5776497695852534, + "grad_norm": 1.1485612061840695, + "learning_rate": 2.3433663400601567e-07, + "loss": 0.9538160562515259, + "step": 6847 + }, + { + "epoch": 1.5778801843317973, + "grad_norm": 1.0298677066929223, + "learning_rate": 2.340916249749787e-07, + "loss": 0.6275157332420349, + "step": 6848 + }, + { + "epoch": 1.578110599078341, + "grad_norm": 1.1889533964841936, + "learning_rate": 2.3384672711452812e-07, + "loss": 0.7729284167289734, + "step": 6849 + }, + { + "epoch": 1.5783410138248848, + "grad_norm": 1.4210079123943715, + "learning_rate": 2.3360194046021108e-07, + "loss": 0.8361644148826599, + "step": 6850 + }, + { + "epoch": 1.5785714285714287, + "grad_norm": 1.2305172757518368, + "learning_rate": 2.3335726504755793e-07, + "loss": 0.6782940626144409, + "step": 6851 + }, + { + "epoch": 1.5788018433179722, + "grad_norm": 1.3612688278959233, + "learning_rate": 2.3311270091208256e-07, + "loss": 0.8036615252494812, + "step": 6852 + }, + { + "epoch": 1.5790322580645162, + "grad_norm": 1.1729176601878941, + "learning_rate": 2.3286824808928362e-07, + "loss": 0.8450125455856323, + "step": 6853 + }, + { + "epoch": 1.57926267281106, + "grad_norm": 1.2162582175159786, + "learning_rate": 2.3262390661464303e-07, + "loss": 0.6546198725700378, + "step": 6854 + }, + { + "epoch": 1.5794930875576036, + "grad_norm": 1.4056383803669428, + "learning_rate": 2.3237967652362612e-07, + "loss": 0.8201385140419006, + "step": 6855 + }, + { + "epoch": 1.5797235023041476, + "grad_norm": 1.3504561324932176, + "learning_rate": 2.3213555785168336e-07, + "loss": 0.8753508925437927, + "step": 6856 + }, + { + "epoch": 1.579953917050691, + "grad_norm": 1.2672866740553073, + "learning_rate": 2.3189155063424782e-07, + "loss": 0.5884093642234802, + "step": 6857 + }, + { + "epoch": 1.580184331797235, + "grad_norm": 1.0135145180947078, + "learning_rate": 2.3164765490673654e-07, + "loss": 0.6494029760360718, + "step": 6858 + }, + { + "epoch": 1.5804147465437788, + "grad_norm": 1.1478304397345402, + "learning_rate": 2.3140387070455126e-07, + "loss": 0.7407097220420837, + "step": 6859 + }, + { + "epoch": 1.5806451612903225, + "grad_norm": 1.3351942864944542, + "learning_rate": 2.3116019806307673e-07, + "loss": 0.8934177160263062, + "step": 6860 + }, + { + "epoch": 1.5808755760368665, + "grad_norm": 1.0696222163552975, + "learning_rate": 2.309166370176816e-07, + "loss": 0.7487956881523132, + "step": 6861 + }, + { + "epoch": 1.5811059907834102, + "grad_norm": 1.316829236490256, + "learning_rate": 2.3067318760371845e-07, + "loss": 0.7744357585906982, + "step": 6862 + }, + { + "epoch": 1.581336405529954, + "grad_norm": 1.3202738468289819, + "learning_rate": 2.304298498565237e-07, + "loss": 0.8871743679046631, + "step": 6863 + }, + { + "epoch": 1.5815668202764976, + "grad_norm": 1.3064493000042272, + "learning_rate": 2.3018662381141717e-07, + "loss": 0.7865666151046753, + "step": 6864 + }, + { + "epoch": 1.5817972350230414, + "grad_norm": 1.4125222114326161, + "learning_rate": 2.2994350950370334e-07, + "loss": 0.8416531682014465, + "step": 6865 + }, + { + "epoch": 1.5820276497695853, + "grad_norm": 1.5275008378701445, + "learning_rate": 2.2970050696866972e-07, + "loss": 0.8443950414657593, + "step": 6866 + }, + { + "epoch": 1.582258064516129, + "grad_norm": 1.4005476364990852, + "learning_rate": 2.2945761624158756e-07, + "loss": 0.7770054340362549, + "step": 6867 + }, + { + "epoch": 1.5824884792626728, + "grad_norm": 1.2159355438440163, + "learning_rate": 2.2921483735771252e-07, + "loss": 0.7263047695159912, + "step": 6868 + }, + { + "epoch": 1.5827188940092167, + "grad_norm": 1.3958985609002883, + "learning_rate": 2.2897217035228312e-07, + "loss": 0.8288376927375793, + "step": 6869 + }, + { + "epoch": 1.5829493087557602, + "grad_norm": 1.2630380344196672, + "learning_rate": 2.2872961526052292e-07, + "loss": 0.8325462937355042, + "step": 6870 + }, + { + "epoch": 1.5831797235023042, + "grad_norm": 1.317005879944655, + "learning_rate": 2.284871721176379e-07, + "loss": 0.7412815093994141, + "step": 6871 + }, + { + "epoch": 1.583410138248848, + "grad_norm": 1.2813974132427688, + "learning_rate": 2.2824484095881823e-07, + "loss": 0.8958117961883545, + "step": 6872 + }, + { + "epoch": 1.5836405529953916, + "grad_norm": 1.228628782021168, + "learning_rate": 2.2800262181923858e-07, + "loss": 0.8374444246292114, + "step": 6873 + }, + { + "epoch": 1.5838709677419356, + "grad_norm": 1.2394995315660131, + "learning_rate": 2.2776051473405634e-07, + "loss": 0.7900353670120239, + "step": 6874 + }, + { + "epoch": 1.5841013824884793, + "grad_norm": 1.129671125708823, + "learning_rate": 2.2751851973841285e-07, + "loss": 0.7420408725738525, + "step": 6875 + }, + { + "epoch": 1.584331797235023, + "grad_norm": 1.3245275433928243, + "learning_rate": 2.2727663686743382e-07, + "loss": 0.8902314305305481, + "step": 6876 + }, + { + "epoch": 1.5845622119815668, + "grad_norm": 1.2122656586799572, + "learning_rate": 2.27034866156228e-07, + "loss": 0.739869236946106, + "step": 6877 + }, + { + "epoch": 1.5847926267281105, + "grad_norm": 1.169654737499052, + "learning_rate": 2.2679320763988775e-07, + "loss": 0.8340646624565125, + "step": 6878 + }, + { + "epoch": 1.5850230414746544, + "grad_norm": 1.3076425110312813, + "learning_rate": 2.2655166135349013e-07, + "loss": 0.7501030564308167, + "step": 6879 + }, + { + "epoch": 1.5852534562211982, + "grad_norm": 1.0619799072208593, + "learning_rate": 2.2631022733209504e-07, + "loss": 0.722623348236084, + "step": 6880 + }, + { + "epoch": 1.585483870967742, + "grad_norm": 1.4046404033814042, + "learning_rate": 2.260689056107461e-07, + "loss": 0.8319696187973022, + "step": 6881 + }, + { + "epoch": 1.5857142857142859, + "grad_norm": 1.2824383261655956, + "learning_rate": 2.2582769622447107e-07, + "loss": 0.85502028465271, + "step": 6882 + }, + { + "epoch": 1.5859447004608294, + "grad_norm": 1.444500113904039, + "learning_rate": 2.2558659920828095e-07, + "loss": 0.7942626476287842, + "step": 6883 + }, + { + "epoch": 1.5861751152073733, + "grad_norm": 0.9346347634599198, + "learning_rate": 2.253456145971705e-07, + "loss": 0.6731030941009521, + "step": 6884 + }, + { + "epoch": 1.586405529953917, + "grad_norm": 1.2567565363582325, + "learning_rate": 2.2510474242611887e-07, + "loss": 0.8479423522949219, + "step": 6885 + }, + { + "epoch": 1.5866359447004608, + "grad_norm": 1.0824322707106273, + "learning_rate": 2.2486398273008812e-07, + "loss": 0.7398810386657715, + "step": 6886 + }, + { + "epoch": 1.5868663594470047, + "grad_norm": 1.4531636253389437, + "learning_rate": 2.246233355440238e-07, + "loss": 0.8422881364822388, + "step": 6887 + }, + { + "epoch": 1.5870967741935482, + "grad_norm": 1.4298247398214885, + "learning_rate": 2.2438280090285612e-07, + "loss": 0.8307279944419861, + "step": 6888 + }, + { + "epoch": 1.5873271889400922, + "grad_norm": 1.3280924437525041, + "learning_rate": 2.2414237884149821e-07, + "loss": 0.8329004049301147, + "step": 6889 + }, + { + "epoch": 1.587557603686636, + "grad_norm": 1.196093026387475, + "learning_rate": 2.2390206939484645e-07, + "loss": 0.801641583442688, + "step": 6890 + }, + { + "epoch": 1.5877880184331796, + "grad_norm": 1.359543687074451, + "learning_rate": 2.2366187259778235e-07, + "loss": 0.9850986003875732, + "step": 6891 + }, + { + "epoch": 1.5880184331797236, + "grad_norm": 1.2770195506897435, + "learning_rate": 2.2342178848516935e-07, + "loss": 0.7169715166091919, + "step": 6892 + }, + { + "epoch": 1.5882488479262673, + "grad_norm": 1.1258655345605515, + "learning_rate": 2.2318181709185603e-07, + "loss": 0.7509033679962158, + "step": 6893 + }, + { + "epoch": 1.588479262672811, + "grad_norm": 1.2429319924869415, + "learning_rate": 2.2294195845267348e-07, + "loss": 0.6974655985832214, + "step": 6894 + }, + { + "epoch": 1.588709677419355, + "grad_norm": 1.1949954122245936, + "learning_rate": 2.227022126024367e-07, + "loss": 0.7388278245925903, + "step": 6895 + }, + { + "epoch": 1.5889400921658985, + "grad_norm": 1.1219112420315915, + "learning_rate": 2.2246257957594506e-07, + "loss": 0.6479122638702393, + "step": 6896 + }, + { + "epoch": 1.5891705069124424, + "grad_norm": 1.2556673774557678, + "learning_rate": 2.222230594079807e-07, + "loss": 0.759338915348053, + "step": 6897 + }, + { + "epoch": 1.5894009216589862, + "grad_norm": 1.1747779352742982, + "learning_rate": 2.2198365213330937e-07, + "loss": 0.7299938201904297, + "step": 6898 + }, + { + "epoch": 1.58963133640553, + "grad_norm": 1.2072520940330866, + "learning_rate": 2.2174435778668122e-07, + "loss": 0.707555890083313, + "step": 6899 + }, + { + "epoch": 1.5898617511520738, + "grad_norm": 1.3083069601374675, + "learning_rate": 2.2150517640282918e-07, + "loss": 0.8311065435409546, + "step": 6900 + }, + { + "epoch": 1.5900921658986173, + "grad_norm": 1.1585381591481734, + "learning_rate": 2.2126610801647028e-07, + "loss": 0.6494649648666382, + "step": 6901 + }, + { + "epoch": 1.5903225806451613, + "grad_norm": 1.006735116508423, + "learning_rate": 2.2102715266230486e-07, + "loss": 0.6563294529914856, + "step": 6902 + }, + { + "epoch": 1.590552995391705, + "grad_norm": 1.0365958828861261, + "learning_rate": 2.207883103750171e-07, + "loss": 0.7426891326904297, + "step": 6903 + }, + { + "epoch": 1.5907834101382488, + "grad_norm": 0.9164747480191582, + "learning_rate": 2.2054958118927413e-07, + "loss": 0.7074661254882812, + "step": 6904 + }, + { + "epoch": 1.5910138248847927, + "grad_norm": 1.4657092079572216, + "learning_rate": 2.203109651397279e-07, + "loss": 0.8407880663871765, + "step": 6905 + }, + { + "epoch": 1.5912442396313364, + "grad_norm": 1.014884431152031, + "learning_rate": 2.2007246226101296e-07, + "loss": 0.7228440642356873, + "step": 6906 + }, + { + "epoch": 1.5914746543778802, + "grad_norm": 1.1100543617790197, + "learning_rate": 2.1983407258774733e-07, + "loss": 0.6988812685012817, + "step": 6907 + }, + { + "epoch": 1.591705069124424, + "grad_norm": 1.3237351414434337, + "learning_rate": 2.195957961545335e-07, + "loss": 0.793757438659668, + "step": 6908 + }, + { + "epoch": 1.5919354838709676, + "grad_norm": 1.2485526093365642, + "learning_rate": 2.1935763299595678e-07, + "loss": 0.8621397018432617, + "step": 6909 + }, + { + "epoch": 1.5921658986175116, + "grad_norm": 1.2314950700356975, + "learning_rate": 2.1911958314658598e-07, + "loss": 0.7661364078521729, + "step": 6910 + }, + { + "epoch": 1.5923963133640553, + "grad_norm": 1.1937782252155265, + "learning_rate": 2.1888164664097408e-07, + "loss": 0.9322741031646729, + "step": 6911 + }, + { + "epoch": 1.592626728110599, + "grad_norm": 1.1787479005369867, + "learning_rate": 2.1864382351365717e-07, + "loss": 0.8411989212036133, + "step": 6912 + }, + { + "epoch": 1.592857142857143, + "grad_norm": 1.515351393614885, + "learning_rate": 2.1840611379915464e-07, + "loss": 0.8212479948997498, + "step": 6913 + }, + { + "epoch": 1.5930875576036865, + "grad_norm": 1.5032885798825617, + "learning_rate": 2.181685175319702e-07, + "loss": 0.7875508069992065, + "step": 6914 + }, + { + "epoch": 1.5933179723502304, + "grad_norm": 1.178669163135756, + "learning_rate": 2.1793103474659047e-07, + "loss": 0.8389852046966553, + "step": 6915 + }, + { + "epoch": 1.5935483870967742, + "grad_norm": 1.388906101423199, + "learning_rate": 2.1769366547748546e-07, + "loss": 0.8223046660423279, + "step": 6916 + }, + { + "epoch": 1.5937788018433179, + "grad_norm": 1.0682255683615596, + "learning_rate": 2.1745640975910962e-07, + "loss": 0.8427159786224365, + "step": 6917 + }, + { + "epoch": 1.5940092165898618, + "grad_norm": 1.2770752550871127, + "learning_rate": 2.172192676258996e-07, + "loss": 0.7448060512542725, + "step": 6918 + }, + { + "epoch": 1.5942396313364056, + "grad_norm": 1.1028377529507616, + "learning_rate": 2.1698223911227686e-07, + "loss": 0.7122288346290588, + "step": 6919 + }, + { + "epoch": 1.5944700460829493, + "grad_norm": 1.3801420957349657, + "learning_rate": 2.1674532425264548e-07, + "loss": 0.7712994813919067, + "step": 6920 + }, + { + "epoch": 1.5947004608294932, + "grad_norm": 1.4967308024498271, + "learning_rate": 2.1650852308139355e-07, + "loss": 0.9656664729118347, + "step": 6921 + }, + { + "epoch": 1.5949308755760367, + "grad_norm": 1.3725078407101703, + "learning_rate": 2.162718356328922e-07, + "loss": 0.748894214630127, + "step": 6922 + }, + { + "epoch": 1.5951612903225807, + "grad_norm": 1.0191925895935576, + "learning_rate": 2.1603526194149635e-07, + "loss": 0.6875454187393188, + "step": 6923 + }, + { + "epoch": 1.5953917050691244, + "grad_norm": 1.099493651981713, + "learning_rate": 2.1579880204154412e-07, + "loss": 0.8258690237998962, + "step": 6924 + }, + { + "epoch": 1.5956221198156681, + "grad_norm": 1.2228052738114181, + "learning_rate": 2.15562455967358e-07, + "loss": 0.7647902965545654, + "step": 6925 + }, + { + "epoch": 1.595852534562212, + "grad_norm": 1.0716326843288577, + "learning_rate": 2.1532622375324284e-07, + "loss": 0.7004281282424927, + "step": 6926 + }, + { + "epoch": 1.5960829493087556, + "grad_norm": 1.1015601686618846, + "learning_rate": 2.1509010543348726e-07, + "loss": 0.7500345706939697, + "step": 6927 + }, + { + "epoch": 1.5963133640552996, + "grad_norm": 1.1261005927566234, + "learning_rate": 2.148541010423641e-07, + "loss": 0.7300195693969727, + "step": 6928 + }, + { + "epoch": 1.5965437788018433, + "grad_norm": 1.0927883255058508, + "learning_rate": 2.1461821061412876e-07, + "loss": 0.7592284679412842, + "step": 6929 + }, + { + "epoch": 1.596774193548387, + "grad_norm": 1.265065855875648, + "learning_rate": 2.1438243418302016e-07, + "loss": 0.7179796099662781, + "step": 6930 + }, + { + "epoch": 1.597004608294931, + "grad_norm": 1.167267121775029, + "learning_rate": 2.1414677178326157e-07, + "loss": 0.8829631805419922, + "step": 6931 + }, + { + "epoch": 1.5972350230414747, + "grad_norm": 1.4030936435750112, + "learning_rate": 2.1391122344905865e-07, + "loss": 0.8661972880363464, + "step": 6932 + }, + { + "epoch": 1.5974654377880184, + "grad_norm": 1.4609293147197595, + "learning_rate": 2.136757892146007e-07, + "loss": 0.7774989604949951, + "step": 6933 + }, + { + "epoch": 1.5976958525345624, + "grad_norm": 1.2556066222087972, + "learning_rate": 2.1344046911406132e-07, + "loss": 0.7343888878822327, + "step": 6934 + }, + { + "epoch": 1.5979262672811059, + "grad_norm": 1.1442684268001395, + "learning_rate": 2.132052631815966e-07, + "loss": 0.7810107469558716, + "step": 6935 + }, + { + "epoch": 1.5981566820276498, + "grad_norm": 1.343676205067389, + "learning_rate": 2.12970171451346e-07, + "loss": 0.7585299611091614, + "step": 6936 + }, + { + "epoch": 1.5983870967741935, + "grad_norm": 1.2827689520736418, + "learning_rate": 2.1273519395743344e-07, + "loss": 0.886371910572052, + "step": 6937 + }, + { + "epoch": 1.5986175115207373, + "grad_norm": 0.9767702062550015, + "learning_rate": 2.1250033073396523e-07, + "loss": 0.6986823081970215, + "step": 6938 + }, + { + "epoch": 1.5988479262672812, + "grad_norm": 1.2062052682782651, + "learning_rate": 2.122655818150312e-07, + "loss": 0.7524189352989197, + "step": 6939 + }, + { + "epoch": 1.5990783410138247, + "grad_norm": 1.1473232496595593, + "learning_rate": 2.120309472347055e-07, + "loss": 0.7699365615844727, + "step": 6940 + }, + { + "epoch": 1.5993087557603687, + "grad_norm": 1.188421090787615, + "learning_rate": 2.1179642702704458e-07, + "loss": 0.8112696409225464, + "step": 6941 + }, + { + "epoch": 1.5995391705069124, + "grad_norm": 1.377266755106213, + "learning_rate": 2.115620212260889e-07, + "loss": 0.7067416906356812, + "step": 6942 + }, + { + "epoch": 1.5997695852534561, + "grad_norm": 0.80841875970131, + "learning_rate": 2.1132772986586211e-07, + "loss": 0.787110447883606, + "step": 6943 + }, + { + "epoch": 1.6, + "grad_norm": 1.282613261539406, + "learning_rate": 2.11093552980371e-07, + "loss": 0.7356789112091064, + "step": 6944 + }, + { + "epoch": 1.6002304147465438, + "grad_norm": 1.2747758780049527, + "learning_rate": 2.1085949060360653e-07, + "loss": 0.8057125806808472, + "step": 6945 + }, + { + "epoch": 1.6004608294930875, + "grad_norm": 1.1828340962550294, + "learning_rate": 2.1062554276954225e-07, + "loss": 0.7169399261474609, + "step": 6946 + }, + { + "epoch": 1.6006912442396315, + "grad_norm": 1.2018304676070681, + "learning_rate": 2.1039170951213526e-07, + "loss": 0.7219180464744568, + "step": 6947 + }, + { + "epoch": 1.600921658986175, + "grad_norm": 1.2736335133966967, + "learning_rate": 2.101579908653266e-07, + "loss": 0.7530789375305176, + "step": 6948 + }, + { + "epoch": 1.601152073732719, + "grad_norm": 1.2374620271602483, + "learning_rate": 2.0992438686303993e-07, + "loss": 0.8192377090454102, + "step": 6949 + }, + { + "epoch": 1.6013824884792627, + "grad_norm": 1.0987195977670645, + "learning_rate": 2.0969089753918223e-07, + "loss": 0.6904648542404175, + "step": 6950 + }, + { + "epoch": 1.6016129032258064, + "grad_norm": 1.2558183684284059, + "learning_rate": 2.0945752292764495e-07, + "loss": 0.7289770245552063, + "step": 6951 + }, + { + "epoch": 1.6018433179723504, + "grad_norm": 1.260283902447682, + "learning_rate": 2.0922426306230157e-07, + "loss": 0.8467620611190796, + "step": 6952 + }, + { + "epoch": 1.6020737327188939, + "grad_norm": 1.3650999598924758, + "learning_rate": 2.089911179770093e-07, + "loss": 0.7835153937339783, + "step": 6953 + }, + { + "epoch": 1.6023041474654378, + "grad_norm": 0.847985634813149, + "learning_rate": 2.0875808770560933e-07, + "loss": 0.6696668267250061, + "step": 6954 + }, + { + "epoch": 1.6025345622119815, + "grad_norm": 1.441689312728025, + "learning_rate": 2.0852517228192556e-07, + "loss": 0.8451364636421204, + "step": 6955 + }, + { + "epoch": 1.6027649769585253, + "grad_norm": 1.2628900414882365, + "learning_rate": 2.0829237173976487e-07, + "loss": 0.7917240858078003, + "step": 6956 + }, + { + "epoch": 1.6029953917050692, + "grad_norm": 1.2514606025933794, + "learning_rate": 2.0805968611291867e-07, + "loss": 0.791597843170166, + "step": 6957 + }, + { + "epoch": 1.603225806451613, + "grad_norm": 1.2854657657217543, + "learning_rate": 2.0782711543516063e-07, + "loss": 0.7571247816085815, + "step": 6958 + }, + { + "epoch": 1.6034562211981567, + "grad_norm": 1.1996866839711877, + "learning_rate": 2.075946597402478e-07, + "loss": 0.9196302890777588, + "step": 6959 + }, + { + "epoch": 1.6036866359447006, + "grad_norm": 0.9955085341059975, + "learning_rate": 2.0736231906192136e-07, + "loss": 0.7106618881225586, + "step": 6960 + }, + { + "epoch": 1.6039170506912441, + "grad_norm": 0.9090693582601959, + "learning_rate": 2.071300934339051e-07, + "loss": 0.8923465013504028, + "step": 6961 + }, + { + "epoch": 1.604147465437788, + "grad_norm": 1.1524730844586952, + "learning_rate": 2.0689798288990601e-07, + "loss": 0.6929241418838501, + "step": 6962 + }, + { + "epoch": 1.6043778801843318, + "grad_norm": 1.4736872345919192, + "learning_rate": 2.0666598746361487e-07, + "loss": 0.935944676399231, + "step": 6963 + }, + { + "epoch": 1.6046082949308755, + "grad_norm": 1.3002916307222088, + "learning_rate": 2.0643410718870536e-07, + "loss": 0.7442188262939453, + "step": 6964 + }, + { + "epoch": 1.6048387096774195, + "grad_norm": 1.13007905720726, + "learning_rate": 2.0620234209883446e-07, + "loss": 0.7340278625488281, + "step": 6965 + }, + { + "epoch": 1.605069124423963, + "grad_norm": 1.1841454047560163, + "learning_rate": 2.0597069222764297e-07, + "loss": 0.7436190247535706, + "step": 6966 + }, + { + "epoch": 1.605299539170507, + "grad_norm": 1.1998918795301519, + "learning_rate": 2.0573915760875406e-07, + "loss": 0.9109283685684204, + "step": 6967 + }, + { + "epoch": 1.6055299539170507, + "grad_norm": 1.362187790875206, + "learning_rate": 2.0550773827577518e-07, + "loss": 0.86224365234375, + "step": 6968 + }, + { + "epoch": 1.6057603686635944, + "grad_norm": 1.0973288140018649, + "learning_rate": 2.0527643426229636e-07, + "loss": 0.6873685121536255, + "step": 6969 + }, + { + "epoch": 1.6059907834101383, + "grad_norm": 1.2862613183491987, + "learning_rate": 2.0504524560189074e-07, + "loss": 0.7634609937667847, + "step": 6970 + }, + { + "epoch": 1.606221198156682, + "grad_norm": 1.642442078921259, + "learning_rate": 2.0481417232811572e-07, + "loss": 0.7940595149993896, + "step": 6971 + }, + { + "epoch": 1.6064516129032258, + "grad_norm": 1.0579671129687211, + "learning_rate": 2.0458321447451078e-07, + "loss": 0.7109687924385071, + "step": 6972 + }, + { + "epoch": 1.6066820276497698, + "grad_norm": 1.3780414286693414, + "learning_rate": 2.04352372074599e-07, + "loss": 0.9476398825645447, + "step": 6973 + }, + { + "epoch": 1.6069124423963133, + "grad_norm": 1.3106188238946987, + "learning_rate": 2.0412164516188747e-07, + "loss": 0.7563579678535461, + "step": 6974 + }, + { + "epoch": 1.6071428571428572, + "grad_norm": 1.1912217950342037, + "learning_rate": 2.0389103376986538e-07, + "loss": 0.7928751707077026, + "step": 6975 + }, + { + "epoch": 1.607373271889401, + "grad_norm": 1.1927610489358789, + "learning_rate": 2.0366053793200565e-07, + "loss": 0.776961624622345, + "step": 6976 + }, + { + "epoch": 1.6076036866359447, + "grad_norm": 1.1830668942381175, + "learning_rate": 2.0343015768176496e-07, + "loss": 0.6511167883872986, + "step": 6977 + }, + { + "epoch": 1.6078341013824886, + "grad_norm": 1.3541662729221868, + "learning_rate": 2.0319989305258235e-07, + "loss": 0.6487337350845337, + "step": 6978 + }, + { + "epoch": 1.6080645161290321, + "grad_norm": 1.5271951763204938, + "learning_rate": 2.0296974407788004e-07, + "loss": 0.921454131603241, + "step": 6979 + }, + { + "epoch": 1.608294930875576, + "grad_norm": 1.0476613319531645, + "learning_rate": 2.0273971079106467e-07, + "loss": 0.8145809769630432, + "step": 6980 + }, + { + "epoch": 1.6085253456221198, + "grad_norm": 0.9495439447317249, + "learning_rate": 2.0250979322552474e-07, + "loss": 0.6655904054641724, + "step": 6981 + }, + { + "epoch": 1.6087557603686635, + "grad_norm": 1.1486957458539049, + "learning_rate": 2.0227999141463258e-07, + "loss": 0.777961254119873, + "step": 6982 + }, + { + "epoch": 1.6089861751152075, + "grad_norm": 1.3274428663782127, + "learning_rate": 2.0205030539174361e-07, + "loss": 0.6543164253234863, + "step": 6983 + }, + { + "epoch": 1.6092165898617512, + "grad_norm": 1.233780092778412, + "learning_rate": 2.018207351901966e-07, + "loss": 0.7842000722885132, + "step": 6984 + }, + { + "epoch": 1.609447004608295, + "grad_norm": 0.999384175284256, + "learning_rate": 2.0159128084331278e-07, + "loss": 0.7264418005943298, + "step": 6985 + }, + { + "epoch": 1.6096774193548387, + "grad_norm": 1.313414021265448, + "learning_rate": 2.0136194238439795e-07, + "loss": 0.8722596168518066, + "step": 6986 + }, + { + "epoch": 1.6099078341013824, + "grad_norm": 1.3518278161266697, + "learning_rate": 2.0113271984673997e-07, + "loss": 0.8162735104560852, + "step": 6987 + }, + { + "epoch": 1.6101382488479263, + "grad_norm": 1.212757185466248, + "learning_rate": 2.0090361326360982e-07, + "loss": 0.6962481737136841, + "step": 6988 + }, + { + "epoch": 1.61036866359447, + "grad_norm": 1.133716172506403, + "learning_rate": 2.0067462266826264e-07, + "loss": 0.8186852931976318, + "step": 6989 + }, + { + "epoch": 1.6105990783410138, + "grad_norm": 1.505728867210405, + "learning_rate": 2.0044574809393543e-07, + "loss": 0.8935987949371338, + "step": 6990 + }, + { + "epoch": 1.6108294930875577, + "grad_norm": 1.2824355796337807, + "learning_rate": 2.002169895738498e-07, + "loss": 0.9152865409851074, + "step": 6991 + }, + { + "epoch": 1.6110599078341012, + "grad_norm": 1.521529078332145, + "learning_rate": 1.9998834714120928e-07, + "loss": 0.8042874336242676, + "step": 6992 + }, + { + "epoch": 1.6112903225806452, + "grad_norm": 1.3198117612600044, + "learning_rate": 1.9975982082920083e-07, + "loss": 0.9621129035949707, + "step": 6993 + }, + { + "epoch": 1.611520737327189, + "grad_norm": 1.1154614331355635, + "learning_rate": 1.9953141067099533e-07, + "loss": 0.8296995162963867, + "step": 6994 + }, + { + "epoch": 1.6117511520737327, + "grad_norm": 1.0827522335122797, + "learning_rate": 1.9930311669974587e-07, + "loss": 0.8129373788833618, + "step": 6995 + }, + { + "epoch": 1.6119815668202766, + "grad_norm": 1.359695561767368, + "learning_rate": 1.9907493894858874e-07, + "loss": 0.7450911998748779, + "step": 6996 + }, + { + "epoch": 1.6122119815668203, + "grad_norm": 1.2367503665171555, + "learning_rate": 1.9884687745064422e-07, + "loss": 0.798037052154541, + "step": 6997 + }, + { + "epoch": 1.612442396313364, + "grad_norm": 1.218969884225304, + "learning_rate": 1.9861893223901494e-07, + "loss": 0.8118857145309448, + "step": 6998 + }, + { + "epoch": 1.6126728110599078, + "grad_norm": 1.2176008366956401, + "learning_rate": 1.9839110334678632e-07, + "loss": 0.7954392433166504, + "step": 6999 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 1.2233633618619175, + "learning_rate": 1.9816339080702825e-07, + "loss": 0.8055616617202759, + "step": 7000 + }, + { + "epoch": 1.6131336405529955, + "grad_norm": 1.503254744382692, + "learning_rate": 1.979357946527924e-07, + "loss": 0.8949761986732483, + "step": 7001 + }, + { + "epoch": 1.6133640552995392, + "grad_norm": 1.376056206509758, + "learning_rate": 1.9770831491711427e-07, + "loss": 0.8327617645263672, + "step": 7002 + }, + { + "epoch": 1.613594470046083, + "grad_norm": 1.2867855951178133, + "learning_rate": 1.9748095163301215e-07, + "loss": 0.7593148946762085, + "step": 7003 + }, + { + "epoch": 1.6138248847926269, + "grad_norm": 1.2449007241812073, + "learning_rate": 1.9725370483348737e-07, + "loss": 0.7639665603637695, + "step": 7004 + }, + { + "epoch": 1.6140552995391704, + "grad_norm": 1.2839981076373308, + "learning_rate": 1.9702657455152448e-07, + "loss": 0.8561587929725647, + "step": 7005 + }, + { + "epoch": 1.6142857142857143, + "grad_norm": 1.4345782240891563, + "learning_rate": 1.9679956082009154e-07, + "loss": 0.835313081741333, + "step": 7006 + }, + { + "epoch": 1.614516129032258, + "grad_norm": 1.680229749258956, + "learning_rate": 1.9657266367213898e-07, + "loss": 0.831456184387207, + "step": 7007 + }, + { + "epoch": 1.6147465437788018, + "grad_norm": 1.1797102347566437, + "learning_rate": 1.963458831406005e-07, + "loss": 0.699436604976654, + "step": 7008 + }, + { + "epoch": 1.6149769585253457, + "grad_norm": 1.2382287230628872, + "learning_rate": 1.9611921925839337e-07, + "loss": 0.7821902632713318, + "step": 7009 + }, + { + "epoch": 1.6152073732718892, + "grad_norm": 1.035873020643515, + "learning_rate": 1.9589267205841742e-07, + "loss": 0.7491241097450256, + "step": 7010 + }, + { + "epoch": 1.6154377880184332, + "grad_norm": 1.3212550422299536, + "learning_rate": 1.956662415735554e-07, + "loss": 0.7299652099609375, + "step": 7011 + }, + { + "epoch": 1.615668202764977, + "grad_norm": 1.2121144450441814, + "learning_rate": 1.9543992783667385e-07, + "loss": 0.692190408706665, + "step": 7012 + }, + { + "epoch": 1.6158986175115206, + "grad_norm": 1.5397188528974992, + "learning_rate": 1.9521373088062166e-07, + "loss": 0.8727273941040039, + "step": 7013 + }, + { + "epoch": 1.6161290322580646, + "grad_norm": 0.9576172656761047, + "learning_rate": 1.9498765073823077e-07, + "loss": 0.6441171169281006, + "step": 7014 + }, + { + "epoch": 1.6163594470046083, + "grad_norm": 1.202013067822893, + "learning_rate": 1.947616874423169e-07, + "loss": 0.6960387229919434, + "step": 7015 + }, + { + "epoch": 1.616589861751152, + "grad_norm": 1.3238157552069112, + "learning_rate": 1.9453584102567788e-07, + "loss": 0.9231700301170349, + "step": 7016 + }, + { + "epoch": 1.616820276497696, + "grad_norm": 1.5395552640428811, + "learning_rate": 1.9431011152109555e-07, + "loss": 0.6957401037216187, + "step": 7017 + }, + { + "epoch": 1.6170506912442395, + "grad_norm": 1.221595091148929, + "learning_rate": 1.9408449896133384e-07, + "loss": 0.6608580350875854, + "step": 7018 + }, + { + "epoch": 1.6172811059907835, + "grad_norm": 1.386134285673899, + "learning_rate": 1.9385900337913997e-07, + "loss": 0.7322397232055664, + "step": 7019 + }, + { + "epoch": 1.6175115207373272, + "grad_norm": 1.1188269604657235, + "learning_rate": 1.9363362480724488e-07, + "loss": 0.6996288299560547, + "step": 7020 + }, + { + "epoch": 1.617741935483871, + "grad_norm": 1.022000935531768, + "learning_rate": 1.9340836327836163e-07, + "loss": 0.7928623557090759, + "step": 7021 + }, + { + "epoch": 1.6179723502304149, + "grad_norm": 0.9992379944358776, + "learning_rate": 1.9318321882518674e-07, + "loss": 0.6275026202201843, + "step": 7022 + }, + { + "epoch": 1.6182027649769584, + "grad_norm": 1.26569218150676, + "learning_rate": 1.9295819148039948e-07, + "loss": 0.6660110950469971, + "step": 7023 + }, + { + "epoch": 1.6184331797235023, + "grad_norm": 1.0401535425644861, + "learning_rate": 1.9273328127666232e-07, + "loss": 0.8129480481147766, + "step": 7024 + }, + { + "epoch": 1.618663594470046, + "grad_norm": 1.146646002030878, + "learning_rate": 1.9250848824662046e-07, + "loss": 0.8070700168609619, + "step": 7025 + }, + { + "epoch": 1.6188940092165898, + "grad_norm": 1.4109951707076815, + "learning_rate": 1.922838124229028e-07, + "loss": 0.8123769760131836, + "step": 7026 + }, + { + "epoch": 1.6191244239631337, + "grad_norm": 0.9906397496222884, + "learning_rate": 1.920592538381205e-07, + "loss": 0.6552244424819946, + "step": 7027 + }, + { + "epoch": 1.6193548387096774, + "grad_norm": 1.0749749429025204, + "learning_rate": 1.9183481252486767e-07, + "loss": 0.8764367699623108, + "step": 7028 + }, + { + "epoch": 1.6195852534562212, + "grad_norm": 1.8347518044142406, + "learning_rate": 1.9161048851572215e-07, + "loss": 0.9075809717178345, + "step": 7029 + }, + { + "epoch": 1.6198156682027651, + "grad_norm": 1.1695152473088226, + "learning_rate": 1.9138628184324412e-07, + "loss": 0.7308327555656433, + "step": 7030 + }, + { + "epoch": 1.6200460829493086, + "grad_norm": 1.4269673355519676, + "learning_rate": 1.9116219253997655e-07, + "loss": 0.838142991065979, + "step": 7031 + }, + { + "epoch": 1.6202764976958526, + "grad_norm": 1.5286648636126694, + "learning_rate": 1.9093822063844623e-07, + "loss": 0.7681041359901428, + "step": 7032 + }, + { + "epoch": 1.6205069124423963, + "grad_norm": 1.1858134701081806, + "learning_rate": 1.907143661711621e-07, + "loss": 0.7179980278015137, + "step": 7033 + }, + { + "epoch": 1.62073732718894, + "grad_norm": 1.2400863874788628, + "learning_rate": 1.9049062917061609e-07, + "loss": 0.8688361644744873, + "step": 7034 + }, + { + "epoch": 1.620967741935484, + "grad_norm": 1.0795907835047491, + "learning_rate": 1.9026700966928388e-07, + "loss": 0.6540178656578064, + "step": 7035 + }, + { + "epoch": 1.6211981566820275, + "grad_norm": 0.9042431894176799, + "learning_rate": 1.900435076996233e-07, + "loss": 0.7834869623184204, + "step": 7036 + }, + { + "epoch": 1.6214285714285714, + "grad_norm": 1.4376571546925008, + "learning_rate": 1.8982012329407505e-07, + "loss": 0.8895971775054932, + "step": 7037 + }, + { + "epoch": 1.6216589861751152, + "grad_norm": 1.1211547009425467, + "learning_rate": 1.8959685648506362e-07, + "loss": 0.6625858545303345, + "step": 7038 + }, + { + "epoch": 1.621889400921659, + "grad_norm": 1.4181930826937483, + "learning_rate": 1.893737073049957e-07, + "loss": 0.651193380355835, + "step": 7039 + }, + { + "epoch": 1.6221198156682028, + "grad_norm": 1.49480203283565, + "learning_rate": 1.8915067578626065e-07, + "loss": 0.8716636896133423, + "step": 7040 + }, + { + "epoch": 1.6223502304147466, + "grad_norm": 1.2037531898880258, + "learning_rate": 1.8892776196123196e-07, + "loss": 0.812637984752655, + "step": 7041 + }, + { + "epoch": 1.6225806451612903, + "grad_norm": 1.4952425500537936, + "learning_rate": 1.887049658622648e-07, + "loss": 0.7803184986114502, + "step": 7042 + }, + { + "epoch": 1.6228110599078343, + "grad_norm": 1.4542796613479354, + "learning_rate": 1.8848228752169793e-07, + "loss": 0.7884814739227295, + "step": 7043 + }, + { + "epoch": 1.6230414746543778, + "grad_norm": 1.3474838088832628, + "learning_rate": 1.8825972697185265e-07, + "loss": 0.7250671982765198, + "step": 7044 + }, + { + "epoch": 1.6232718894009217, + "grad_norm": 1.2055929150487366, + "learning_rate": 1.880372842450332e-07, + "loss": 0.8078780174255371, + "step": 7045 + }, + { + "epoch": 1.6235023041474654, + "grad_norm": 1.2023825853188168, + "learning_rate": 1.878149593735272e-07, + "loss": 0.8523818254470825, + "step": 7046 + }, + { + "epoch": 1.6237327188940092, + "grad_norm": 1.2683431455334386, + "learning_rate": 1.875927523896047e-07, + "loss": 0.8772249221801758, + "step": 7047 + }, + { + "epoch": 1.6239631336405531, + "grad_norm": 1.0815338842817483, + "learning_rate": 1.8737066332551843e-07, + "loss": 0.7906323671340942, + "step": 7048 + }, + { + "epoch": 1.6241935483870966, + "grad_norm": 1.3048529080567755, + "learning_rate": 1.8714869221350492e-07, + "loss": 0.8010337352752686, + "step": 7049 + }, + { + "epoch": 1.6244239631336406, + "grad_norm": 1.365899691735964, + "learning_rate": 1.8692683908578267e-07, + "loss": 0.8978049755096436, + "step": 7050 + }, + { + "epoch": 1.6246543778801843, + "grad_norm": 1.159165616843268, + "learning_rate": 1.8670510397455297e-07, + "loss": 0.6622864007949829, + "step": 7051 + }, + { + "epoch": 1.624884792626728, + "grad_norm": 1.048079119212609, + "learning_rate": 1.8648348691200112e-07, + "loss": 0.7795406579971313, + "step": 7052 + }, + { + "epoch": 1.625115207373272, + "grad_norm": 1.2605630326093136, + "learning_rate": 1.8626198793029423e-07, + "loss": 0.9152054786682129, + "step": 7053 + }, + { + "epoch": 1.6253456221198157, + "grad_norm": 1.1757865506402991, + "learning_rate": 1.860406070615822e-07, + "loss": 0.719946563243866, + "step": 7054 + }, + { + "epoch": 1.6255760368663594, + "grad_norm": 1.2991129477224903, + "learning_rate": 1.8581934433799884e-07, + "loss": 0.782962441444397, + "step": 7055 + }, + { + "epoch": 1.6258064516129034, + "grad_norm": 1.118392005824248, + "learning_rate": 1.855981997916597e-07, + "loss": 0.8119732737541199, + "step": 7056 + }, + { + "epoch": 1.6260368663594469, + "grad_norm": 1.2362407544063627, + "learning_rate": 1.8537717345466351e-07, + "loss": 0.7585981488227844, + "step": 7057 + }, + { + "epoch": 1.6262672811059908, + "grad_norm": 1.158465388331893, + "learning_rate": 1.8515626535909258e-07, + "loss": 0.6846082210540771, + "step": 7058 + }, + { + "epoch": 1.6264976958525346, + "grad_norm": 1.230933966400155, + "learning_rate": 1.8493547553701083e-07, + "loss": 0.7355546951293945, + "step": 7059 + }, + { + "epoch": 1.6267281105990783, + "grad_norm": 1.15836260056471, + "learning_rate": 1.847148040204657e-07, + "loss": 0.6828340291976929, + "step": 7060 + }, + { + "epoch": 1.6269585253456222, + "grad_norm": 1.0499975056987365, + "learning_rate": 1.8449425084148763e-07, + "loss": 0.8513988256454468, + "step": 7061 + }, + { + "epoch": 1.6271889400921657, + "grad_norm": 1.0253802645646743, + "learning_rate": 1.8427381603208947e-07, + "loss": 0.6817762851715088, + "step": 7062 + }, + { + "epoch": 1.6274193548387097, + "grad_norm": 0.9793159138955572, + "learning_rate": 1.8405349962426699e-07, + "loss": 0.7314180731773376, + "step": 7063 + }, + { + "epoch": 1.6276497695852534, + "grad_norm": 1.326821994662743, + "learning_rate": 1.8383330164999898e-07, + "loss": 0.8193466663360596, + "step": 7064 + }, + { + "epoch": 1.6278801843317972, + "grad_norm": 1.2511428182189692, + "learning_rate": 1.8361322214124643e-07, + "loss": 0.7469823360443115, + "step": 7065 + }, + { + "epoch": 1.628110599078341, + "grad_norm": 1.4366505105110272, + "learning_rate": 1.8339326112995423e-07, + "loss": 0.8578816652297974, + "step": 7066 + }, + { + "epoch": 1.6283410138248848, + "grad_norm": 1.4615192025781363, + "learning_rate": 1.8317341864804903e-07, + "loss": 0.8384239077568054, + "step": 7067 + }, + { + "epoch": 1.6285714285714286, + "grad_norm": 1.122194991625306, + "learning_rate": 1.829536947274406e-07, + "loss": 0.8707646131515503, + "step": 7068 + }, + { + "epoch": 1.6288018433179725, + "grad_norm": 1.2319397578647793, + "learning_rate": 1.82734089400022e-07, + "loss": 0.6869943141937256, + "step": 7069 + }, + { + "epoch": 1.629032258064516, + "grad_norm": 1.3893487386527597, + "learning_rate": 1.8251460269766848e-07, + "loss": 0.7776129245758057, + "step": 7070 + }, + { + "epoch": 1.62926267281106, + "grad_norm": 1.104887091227765, + "learning_rate": 1.8229523465223785e-07, + "loss": 0.8126854300498962, + "step": 7071 + }, + { + "epoch": 1.6294930875576037, + "grad_norm": 1.0317016664034484, + "learning_rate": 1.8207598529557166e-07, + "loss": 0.6570720672607422, + "step": 7072 + }, + { + "epoch": 1.6297235023041474, + "grad_norm": 0.8859395443506812, + "learning_rate": 1.818568546594934e-07, + "loss": 0.6485599875450134, + "step": 7073 + }, + { + "epoch": 1.6299539170506914, + "grad_norm": 1.206554438869518, + "learning_rate": 1.816378427758093e-07, + "loss": 0.9132766723632812, + "step": 7074 + }, + { + "epoch": 1.6301843317972349, + "grad_norm": 1.4945592359199265, + "learning_rate": 1.8141894967630932e-07, + "loss": 0.8277286291122437, + "step": 7075 + }, + { + "epoch": 1.6304147465437788, + "grad_norm": 1.3670934774676884, + "learning_rate": 1.812001753927651e-07, + "loss": 0.7409358024597168, + "step": 7076 + }, + { + "epoch": 1.6306451612903226, + "grad_norm": 1.2664504423738472, + "learning_rate": 1.809815199569311e-07, + "loss": 0.8233339786529541, + "step": 7077 + }, + { + "epoch": 1.6308755760368663, + "grad_norm": 1.3727275296136565, + "learning_rate": 1.8076298340054563e-07, + "loss": 0.8704487085342407, + "step": 7078 + }, + { + "epoch": 1.6311059907834102, + "grad_norm": 1.503472652590263, + "learning_rate": 1.8054456575532862e-07, + "loss": 0.8845789432525635, + "step": 7079 + }, + { + "epoch": 1.631336405529954, + "grad_norm": 1.0523258046250148, + "learning_rate": 1.8032626705298272e-07, + "loss": 0.7241162061691284, + "step": 7080 + }, + { + "epoch": 1.6315668202764977, + "grad_norm": 1.193290512437584, + "learning_rate": 1.8010808732519433e-07, + "loss": 0.7065681219100952, + "step": 7081 + }, + { + "epoch": 1.6317972350230416, + "grad_norm": 1.281102564788521, + "learning_rate": 1.7989002660363162e-07, + "loss": 0.6492339372634888, + "step": 7082 + }, + { + "epoch": 1.6320276497695851, + "grad_norm": 0.9673694389198546, + "learning_rate": 1.79672084919946e-07, + "loss": 0.7089248895645142, + "step": 7083 + }, + { + "epoch": 1.632258064516129, + "grad_norm": 1.0367687290608978, + "learning_rate": 1.794542623057712e-07, + "loss": 0.7030316591262817, + "step": 7084 + }, + { + "epoch": 1.6324884792626728, + "grad_norm": 1.1008255373775855, + "learning_rate": 1.792365587927239e-07, + "loss": 0.8626528978347778, + "step": 7085 + }, + { + "epoch": 1.6327188940092165, + "grad_norm": 1.1079176271315754, + "learning_rate": 1.7901897441240333e-07, + "loss": 0.8468672037124634, + "step": 7086 + }, + { + "epoch": 1.6329493087557605, + "grad_norm": 1.4611904004596754, + "learning_rate": 1.7880150919639214e-07, + "loss": 0.8546739816665649, + "step": 7087 + }, + { + "epoch": 1.633179723502304, + "grad_norm": 1.1949871550520017, + "learning_rate": 1.7858416317625468e-07, + "loss": 0.9187895655632019, + "step": 7088 + }, + { + "epoch": 1.633410138248848, + "grad_norm": 1.077248232790752, + "learning_rate": 1.7836693638353827e-07, + "loss": 0.7496293783187866, + "step": 7089 + }, + { + "epoch": 1.6336405529953917, + "grad_norm": 1.0517765508552415, + "learning_rate": 1.7814982884977358e-07, + "loss": 0.682653546333313, + "step": 7090 + }, + { + "epoch": 1.6338709677419354, + "grad_norm": 1.5003665522833143, + "learning_rate": 1.7793284060647295e-07, + "loss": 0.8065551519393921, + "step": 7091 + }, + { + "epoch": 1.6341013824884794, + "grad_norm": 1.134711484772771, + "learning_rate": 1.7771597168513263e-07, + "loss": 0.6605588793754578, + "step": 7092 + }, + { + "epoch": 1.634331797235023, + "grad_norm": 1.0012250391371058, + "learning_rate": 1.7749922211723034e-07, + "loss": 0.7257254123687744, + "step": 7093 + }, + { + "epoch": 1.6345622119815668, + "grad_norm": 1.1831263140816395, + "learning_rate": 1.772825919342269e-07, + "loss": 0.7438890933990479, + "step": 7094 + }, + { + "epoch": 1.6347926267281108, + "grad_norm": 1.250595895627981, + "learning_rate": 1.770660811675664e-07, + "loss": 0.8546249866485596, + "step": 7095 + }, + { + "epoch": 1.6350230414746543, + "grad_norm": 1.1835928544530323, + "learning_rate": 1.7684968984867466e-07, + "loss": 0.727516770362854, + "step": 7096 + }, + { + "epoch": 1.6352534562211982, + "grad_norm": 1.36586374940823, + "learning_rate": 1.766334180089606e-07, + "loss": 0.7578408718109131, + "step": 7097 + }, + { + "epoch": 1.635483870967742, + "grad_norm": 1.4255838450352876, + "learning_rate": 1.7641726567981606e-07, + "loss": 0.8253650665283203, + "step": 7098 + }, + { + "epoch": 1.6357142857142857, + "grad_norm": 1.3615057524495244, + "learning_rate": 1.7620123289261523e-07, + "loss": 0.8932347297668457, + "step": 7099 + }, + { + "epoch": 1.6359447004608296, + "grad_norm": 1.0770953977682685, + "learning_rate": 1.7598531967871465e-07, + "loss": 0.6661143898963928, + "step": 7100 + }, + { + "epoch": 1.6361751152073731, + "grad_norm": 1.2408264386151553, + "learning_rate": 1.7576952606945415e-07, + "loss": 0.8413572311401367, + "step": 7101 + }, + { + "epoch": 1.636405529953917, + "grad_norm": 1.2084626250429713, + "learning_rate": 1.7555385209615603e-07, + "loss": 0.713816225528717, + "step": 7102 + }, + { + "epoch": 1.6366359447004608, + "grad_norm": 1.67339389064804, + "learning_rate": 1.7533829779012466e-07, + "loss": 0.8588179349899292, + "step": 7103 + }, + { + "epoch": 1.6368663594470045, + "grad_norm": 1.3521357251955939, + "learning_rate": 1.7512286318264778e-07, + "loss": 0.8666437864303589, + "step": 7104 + }, + { + "epoch": 1.6370967741935485, + "grad_norm": 1.340257158830322, + "learning_rate": 1.7490754830499522e-07, + "loss": 0.9219843745231628, + "step": 7105 + }, + { + "epoch": 1.6373271889400922, + "grad_norm": 1.3285275552241094, + "learning_rate": 1.7469235318841956e-07, + "loss": 0.93767249584198, + "step": 7106 + }, + { + "epoch": 1.637557603686636, + "grad_norm": 1.2782247944953928, + "learning_rate": 1.7447727786415644e-07, + "loss": 0.7317457795143127, + "step": 7107 + }, + { + "epoch": 1.6377880184331797, + "grad_norm": 1.1023935137429937, + "learning_rate": 1.7426232236342365e-07, + "loss": 0.850578784942627, + "step": 7108 + }, + { + "epoch": 1.6380184331797234, + "grad_norm": 1.1932749051362488, + "learning_rate": 1.7404748671742143e-07, + "loss": 0.7580707669258118, + "step": 7109 + }, + { + "epoch": 1.6382488479262673, + "grad_norm": 1.4967576950530754, + "learning_rate": 1.738327709573333e-07, + "loss": 0.8393806219100952, + "step": 7110 + }, + { + "epoch": 1.638479262672811, + "grad_norm": 1.0170127852420416, + "learning_rate": 1.7361817511432474e-07, + "loss": 0.6641673445701599, + "step": 7111 + }, + { + "epoch": 1.6387096774193548, + "grad_norm": 1.2746608671167614, + "learning_rate": 1.734036992195438e-07, + "loss": 0.7570137977600098, + "step": 7112 + }, + { + "epoch": 1.6389400921658988, + "grad_norm": 1.1366436885649456, + "learning_rate": 1.7318934330412194e-07, + "loss": 0.78557288646698, + "step": 7113 + }, + { + "epoch": 1.6391705069124423, + "grad_norm": 1.3443988626089514, + "learning_rate": 1.729751073991721e-07, + "loss": 0.8309692740440369, + "step": 7114 + }, + { + "epoch": 1.6394009216589862, + "grad_norm": 1.0791152795033432, + "learning_rate": 1.727609915357908e-07, + "loss": 0.6409872770309448, + "step": 7115 + }, + { + "epoch": 1.63963133640553, + "grad_norm": 1.0106967037974632, + "learning_rate": 1.7254699574505648e-07, + "loss": 0.7916153073310852, + "step": 7116 + }, + { + "epoch": 1.6398617511520737, + "grad_norm": 1.5121844712494004, + "learning_rate": 1.7233312005803015e-07, + "loss": 0.7925357818603516, + "step": 7117 + }, + { + "epoch": 1.6400921658986176, + "grad_norm": 1.5493448906965575, + "learning_rate": 1.7211936450575594e-07, + "loss": 0.9124211668968201, + "step": 7118 + }, + { + "epoch": 1.6403225806451613, + "grad_norm": 1.2418161556418856, + "learning_rate": 1.7190572911925994e-07, + "loss": 0.8905198574066162, + "step": 7119 + }, + { + "epoch": 1.640552995391705, + "grad_norm": 1.0755844253909046, + "learning_rate": 1.716922139295509e-07, + "loss": 0.8139728307723999, + "step": 7120 + }, + { + "epoch": 1.6407834101382488, + "grad_norm": 1.3621014779170746, + "learning_rate": 1.7147881896762074e-07, + "loss": 0.7607166767120361, + "step": 7121 + }, + { + "epoch": 1.6410138248847925, + "grad_norm": 1.282778120557478, + "learning_rate": 1.7126554426444316e-07, + "loss": 0.806864857673645, + "step": 7122 + }, + { + "epoch": 1.6412442396313365, + "grad_norm": 1.352241351446694, + "learning_rate": 1.710523898509747e-07, + "loss": 0.697334885597229, + "step": 7123 + }, + { + "epoch": 1.6414746543778802, + "grad_norm": 1.4205201103890581, + "learning_rate": 1.7083935575815455e-07, + "loss": 0.7313966751098633, + "step": 7124 + }, + { + "epoch": 1.641705069124424, + "grad_norm": 1.3868798260826238, + "learning_rate": 1.7062644201690413e-07, + "loss": 0.8857930898666382, + "step": 7125 + }, + { + "epoch": 1.6419354838709679, + "grad_norm": 1.0686783154078314, + "learning_rate": 1.7041364865812758e-07, + "loss": 0.7451884746551514, + "step": 7126 + }, + { + "epoch": 1.6421658986175114, + "grad_norm": 1.2220777026134708, + "learning_rate": 1.7020097571271186e-07, + "loss": 0.7023841142654419, + "step": 7127 + }, + { + "epoch": 1.6423963133640553, + "grad_norm": 1.2608302557028366, + "learning_rate": 1.6998842321152607e-07, + "loss": 0.708385705947876, + "step": 7128 + }, + { + "epoch": 1.642626728110599, + "grad_norm": 1.3854146642080662, + "learning_rate": 1.697759911854215e-07, + "loss": 0.7885474562644958, + "step": 7129 + }, + { + "epoch": 1.6428571428571428, + "grad_norm": 1.161295661131579, + "learning_rate": 1.695636796652331e-07, + "loss": 0.7054568529129028, + "step": 7130 + }, + { + "epoch": 1.6430875576036867, + "grad_norm": 1.1652742930387396, + "learning_rate": 1.6935148868177718e-07, + "loss": 0.6899726986885071, + "step": 7131 + }, + { + "epoch": 1.6433179723502302, + "grad_norm": 1.4011600897250127, + "learning_rate": 1.6913941826585288e-07, + "loss": 0.8558614253997803, + "step": 7132 + }, + { + "epoch": 1.6435483870967742, + "grad_norm": 1.2947217762783314, + "learning_rate": 1.6892746844824223e-07, + "loss": 0.7741858959197998, + "step": 7133 + }, + { + "epoch": 1.643778801843318, + "grad_norm": 1.130755528536183, + "learning_rate": 1.6871563925970943e-07, + "loss": 0.7332532405853271, + "step": 7134 + }, + { + "epoch": 1.6440092165898617, + "grad_norm": 1.4331915051670545, + "learning_rate": 1.6850393073100078e-07, + "loss": 0.8288085460662842, + "step": 7135 + }, + { + "epoch": 1.6442396313364056, + "grad_norm": 1.493040320153856, + "learning_rate": 1.682923428928461e-07, + "loss": 0.9470697641372681, + "step": 7136 + }, + { + "epoch": 1.6444700460829493, + "grad_norm": 1.1093535752232264, + "learning_rate": 1.6808087577595686e-07, + "loss": 0.7123041749000549, + "step": 7137 + }, + { + "epoch": 1.644700460829493, + "grad_norm": 1.3701909416221987, + "learning_rate": 1.6786952941102694e-07, + "loss": 0.8077690005302429, + "step": 7138 + }, + { + "epoch": 1.644930875576037, + "grad_norm": 1.3400770079054931, + "learning_rate": 1.6765830382873348e-07, + "loss": 0.767215371131897, + "step": 7139 + }, + { + "epoch": 1.6451612903225805, + "grad_norm": 1.3723903093182923, + "learning_rate": 1.6744719905973502e-07, + "loss": 0.7488540410995483, + "step": 7140 + }, + { + "epoch": 1.6453917050691245, + "grad_norm": 1.4546211260208752, + "learning_rate": 1.6723621513467378e-07, + "loss": 0.7841323018074036, + "step": 7141 + }, + { + "epoch": 1.6456221198156682, + "grad_norm": 1.2167195095267902, + "learning_rate": 1.6702535208417346e-07, + "loss": 0.65464186668396, + "step": 7142 + }, + { + "epoch": 1.645852534562212, + "grad_norm": 1.3347329400915569, + "learning_rate": 1.6681460993884056e-07, + "loss": 0.8845036029815674, + "step": 7143 + }, + { + "epoch": 1.6460829493087559, + "grad_norm": 1.3318983430245122, + "learning_rate": 1.6660398872926396e-07, + "loss": 0.6741687655448914, + "step": 7144 + }, + { + "epoch": 1.6463133640552994, + "grad_norm": 1.4438874912830426, + "learning_rate": 1.663934884860152e-07, + "loss": 0.8656717538833618, + "step": 7145 + }, + { + "epoch": 1.6465437788018433, + "grad_norm": 1.3298318800949103, + "learning_rate": 1.6618310923964785e-07, + "loss": 0.7588434219360352, + "step": 7146 + }, + { + "epoch": 1.646774193548387, + "grad_norm": 1.3262924093620256, + "learning_rate": 1.6597285102069846e-07, + "loss": 0.7180176973342896, + "step": 7147 + }, + { + "epoch": 1.6470046082949308, + "grad_norm": 1.2551409816382322, + "learning_rate": 1.6576271385968576e-07, + "loss": 0.8253776431083679, + "step": 7148 + }, + { + "epoch": 1.6472350230414747, + "grad_norm": 1.2281736040805922, + "learning_rate": 1.6555269778711046e-07, + "loss": 0.7200941443443298, + "step": 7149 + }, + { + "epoch": 1.6474654377880185, + "grad_norm": 1.1059198918963296, + "learning_rate": 1.653428028334567e-07, + "loss": 0.7076164484024048, + "step": 7150 + }, + { + "epoch": 1.6476958525345622, + "grad_norm": 1.195055160265343, + "learning_rate": 1.6513302902919003e-07, + "loss": 0.8068090677261353, + "step": 7151 + }, + { + "epoch": 1.6479262672811061, + "grad_norm": 1.3947857709427287, + "learning_rate": 1.6492337640475884e-07, + "loss": 0.9712029099464417, + "step": 7152 + }, + { + "epoch": 1.6481566820276496, + "grad_norm": 1.406808701456467, + "learning_rate": 1.6471384499059438e-07, + "loss": 0.8359737992286682, + "step": 7153 + }, + { + "epoch": 1.6483870967741936, + "grad_norm": 1.0570634795327605, + "learning_rate": 1.645044348171094e-07, + "loss": 0.8066359758377075, + "step": 7154 + }, + { + "epoch": 1.6486175115207373, + "grad_norm": 1.3810484659709985, + "learning_rate": 1.642951459146995e-07, + "loss": 0.8717833757400513, + "step": 7155 + }, + { + "epoch": 1.648847926267281, + "grad_norm": 1.0992736543757442, + "learning_rate": 1.6408597831374305e-07, + "loss": 0.7335910201072693, + "step": 7156 + }, + { + "epoch": 1.649078341013825, + "grad_norm": 1.2397456033121492, + "learning_rate": 1.6387693204460028e-07, + "loss": 0.816049337387085, + "step": 7157 + }, + { + "epoch": 1.6493087557603685, + "grad_norm": 1.4068842390673124, + "learning_rate": 1.6366800713761364e-07, + "loss": 0.8060640096664429, + "step": 7158 + }, + { + "epoch": 1.6495391705069125, + "grad_norm": 1.2074799471388065, + "learning_rate": 1.6345920362310894e-07, + "loss": 0.8477619886398315, + "step": 7159 + }, + { + "epoch": 1.6497695852534562, + "grad_norm": 1.332601091577715, + "learning_rate": 1.6325052153139329e-07, + "loss": 0.9793992638587952, + "step": 7160 + }, + { + "epoch": 1.65, + "grad_norm": 1.1909988829986036, + "learning_rate": 1.6304196089275658e-07, + "loss": 0.8020002245903015, + "step": 7161 + }, + { + "epoch": 1.6502304147465439, + "grad_norm": 1.3231428787162685, + "learning_rate": 1.6283352173747146e-07, + "loss": 0.8226429224014282, + "step": 7162 + }, + { + "epoch": 1.6504608294930876, + "grad_norm": 1.2483952861501775, + "learning_rate": 1.6262520409579227e-07, + "loss": 0.7029248476028442, + "step": 7163 + }, + { + "epoch": 1.6506912442396313, + "grad_norm": 1.0969129808942812, + "learning_rate": 1.6241700799795631e-07, + "loss": 0.7234015464782715, + "step": 7164 + }, + { + "epoch": 1.6509216589861753, + "grad_norm": 1.3383637969539028, + "learning_rate": 1.6220893347418285e-07, + "loss": 0.854112982749939, + "step": 7165 + }, + { + "epoch": 1.6511520737327188, + "grad_norm": 1.2277405230752314, + "learning_rate": 1.6200098055467325e-07, + "loss": 0.8098663091659546, + "step": 7166 + }, + { + "epoch": 1.6513824884792627, + "grad_norm": 1.286099874995443, + "learning_rate": 1.617931492696123e-07, + "loss": 0.9032876491546631, + "step": 7167 + }, + { + "epoch": 1.6516129032258065, + "grad_norm": 1.0239384348378415, + "learning_rate": 1.6158543964916606e-07, + "loss": 0.7048916816711426, + "step": 7168 + }, + { + "epoch": 1.6518433179723502, + "grad_norm": 1.2354879671689736, + "learning_rate": 1.6137785172348307e-07, + "loss": 0.879542350769043, + "step": 7169 + }, + { + "epoch": 1.6520737327188941, + "grad_norm": 1.1499858637392877, + "learning_rate": 1.611703855226949e-07, + "loss": 0.7851279377937317, + "step": 7170 + }, + { + "epoch": 1.6523041474654376, + "grad_norm": 1.3219595195357319, + "learning_rate": 1.6096304107691493e-07, + "loss": 0.779682457447052, + "step": 7171 + }, + { + "epoch": 1.6525345622119816, + "grad_norm": 1.2160096597693908, + "learning_rate": 1.6075581841623854e-07, + "loss": 0.7761027812957764, + "step": 7172 + }, + { + "epoch": 1.6527649769585253, + "grad_norm": 1.2474814185415584, + "learning_rate": 1.605487175707443e-07, + "loss": 0.726230263710022, + "step": 7173 + }, + { + "epoch": 1.652995391705069, + "grad_norm": 1.4211290590725025, + "learning_rate": 1.6034173857049238e-07, + "loss": 0.915956437587738, + "step": 7174 + }, + { + "epoch": 1.653225806451613, + "grad_norm": 1.2631109729400856, + "learning_rate": 1.6013488144552534e-07, + "loss": 0.8435969352722168, + "step": 7175 + }, + { + "epoch": 1.6534562211981567, + "grad_norm": 1.4370024530537882, + "learning_rate": 1.599281462258687e-07, + "loss": 0.7775791883468628, + "step": 7176 + }, + { + "epoch": 1.6536866359447004, + "grad_norm": 1.2504716465033257, + "learning_rate": 1.5972153294152945e-07, + "loss": 0.7578383684158325, + "step": 7177 + }, + { + "epoch": 1.6539170506912444, + "grad_norm": 1.25108951979748, + "learning_rate": 1.5951504162249706e-07, + "loss": 0.8378545045852661, + "step": 7178 + }, + { + "epoch": 1.654147465437788, + "grad_norm": 0.8833465476140244, + "learning_rate": 1.59308672298744e-07, + "loss": 0.7071488499641418, + "step": 7179 + }, + { + "epoch": 1.6543778801843319, + "grad_norm": 1.315489910714214, + "learning_rate": 1.591024250002243e-07, + "loss": 0.7424521446228027, + "step": 7180 + }, + { + "epoch": 1.6546082949308756, + "grad_norm": 1.2002526550771535, + "learning_rate": 1.5889629975687401e-07, + "loss": 0.6503180265426636, + "step": 7181 + }, + { + "epoch": 1.6548387096774193, + "grad_norm": 1.1861762089682637, + "learning_rate": 1.5869029659861265e-07, + "loss": 0.7589888572692871, + "step": 7182 + }, + { + "epoch": 1.6550691244239633, + "grad_norm": 1.2877948406073703, + "learning_rate": 1.5848441555534109e-07, + "loss": 0.7609498500823975, + "step": 7183 + }, + { + "epoch": 1.6552995391705068, + "grad_norm": 1.1756552735153392, + "learning_rate": 1.582786566569425e-07, + "loss": 0.7813476324081421, + "step": 7184 + }, + { + "epoch": 1.6555299539170507, + "grad_norm": 1.1595327374780875, + "learning_rate": 1.5807301993328258e-07, + "loss": 0.7386292219161987, + "step": 7185 + }, + { + "epoch": 1.6557603686635944, + "grad_norm": 1.4106740697965885, + "learning_rate": 1.5786750541420922e-07, + "loss": 1.0402865409851074, + "step": 7186 + }, + { + "epoch": 1.6559907834101382, + "grad_norm": 1.071897744375966, + "learning_rate": 1.5766211312955246e-07, + "loss": 0.7375132441520691, + "step": 7187 + }, + { + "epoch": 1.6562211981566821, + "grad_norm": 1.3721197645813625, + "learning_rate": 1.574568431091251e-07, + "loss": 0.7903615236282349, + "step": 7188 + }, + { + "epoch": 1.6564516129032258, + "grad_norm": 1.1205445704505106, + "learning_rate": 1.5725169538272132e-07, + "loss": 0.6912896633148193, + "step": 7189 + }, + { + "epoch": 1.6566820276497696, + "grad_norm": 1.2659829320834666, + "learning_rate": 1.570466699801185e-07, + "loss": 0.7181826233863831, + "step": 7190 + }, + { + "epoch": 1.6569124423963135, + "grad_norm": 1.3941328099536103, + "learning_rate": 1.5684176693107566e-07, + "loss": 0.8328898549079895, + "step": 7191 + }, + { + "epoch": 1.657142857142857, + "grad_norm": 1.275566962551196, + "learning_rate": 1.5663698626533384e-07, + "loss": 0.7775120735168457, + "step": 7192 + }, + { + "epoch": 1.657373271889401, + "grad_norm": 1.3683527646177032, + "learning_rate": 1.564323280126173e-07, + "loss": 0.8412137031555176, + "step": 7193 + }, + { + "epoch": 1.6576036866359447, + "grad_norm": 1.4192183215515342, + "learning_rate": 1.562277922026316e-07, + "loss": 0.7046825885772705, + "step": 7194 + }, + { + "epoch": 1.6578341013824884, + "grad_norm": 1.3386632639806328, + "learning_rate": 1.5602337886506468e-07, + "loss": 0.7107498645782471, + "step": 7195 + }, + { + "epoch": 1.6580645161290324, + "grad_norm": 1.1946522893092928, + "learning_rate": 1.558190880295872e-07, + "loss": 0.640724778175354, + "step": 7196 + }, + { + "epoch": 1.658294930875576, + "grad_norm": 1.3093502483074915, + "learning_rate": 1.556149197258515e-07, + "loss": 0.7856858968734741, + "step": 7197 + }, + { + "epoch": 1.6585253456221198, + "grad_norm": 1.4971129714340625, + "learning_rate": 1.554108739834923e-07, + "loss": 0.7956376075744629, + "step": 7198 + }, + { + "epoch": 1.6587557603686636, + "grad_norm": 1.2753834260169075, + "learning_rate": 1.5520695083212675e-07, + "loss": 0.721325159072876, + "step": 7199 + }, + { + "epoch": 1.6589861751152073, + "grad_norm": 1.060032555829029, + "learning_rate": 1.550031503013539e-07, + "loss": 0.7043335437774658, + "step": 7200 + }, + { + "epoch": 1.6592165898617512, + "grad_norm": 1.2269468216437214, + "learning_rate": 1.5479947242075496e-07, + "loss": 0.7154408693313599, + "step": 7201 + }, + { + "epoch": 1.659447004608295, + "grad_norm": 1.0598234159957265, + "learning_rate": 1.5459591721989397e-07, + "loss": 0.7353748083114624, + "step": 7202 + }, + { + "epoch": 1.6596774193548387, + "grad_norm": 1.1815091781809732, + "learning_rate": 1.5439248472831644e-07, + "loss": 0.7404372692108154, + "step": 7203 + }, + { + "epoch": 1.6599078341013827, + "grad_norm": 1.7521749620198364, + "learning_rate": 1.541891749755503e-07, + "loss": 0.8678613305091858, + "step": 7204 + }, + { + "epoch": 1.6601382488479262, + "grad_norm": 1.2663476960491773, + "learning_rate": 1.5398598799110562e-07, + "loss": 0.7177796363830566, + "step": 7205 + }, + { + "epoch": 1.66036866359447, + "grad_norm": 1.3475911636796425, + "learning_rate": 1.537829238044749e-07, + "loss": 0.7610895037651062, + "step": 7206 + }, + { + "epoch": 1.6605990783410138, + "grad_norm": 1.355013126121341, + "learning_rate": 1.5357998244513227e-07, + "loss": 0.7340127825737, + "step": 7207 + }, + { + "epoch": 1.6608294930875576, + "grad_norm": 1.2008253519594887, + "learning_rate": 1.5337716394253498e-07, + "loss": 0.7060200572013855, + "step": 7208 + }, + { + "epoch": 1.6610599078341015, + "grad_norm": 1.306554098336219, + "learning_rate": 1.5317446832612147e-07, + "loss": 0.8592087030410767, + "step": 7209 + }, + { + "epoch": 1.661290322580645, + "grad_norm": 1.1630740877062444, + "learning_rate": 1.5297189562531264e-07, + "loss": 0.8687897324562073, + "step": 7210 + }, + { + "epoch": 1.661520737327189, + "grad_norm": 1.346256802747815, + "learning_rate": 1.5276944586951202e-07, + "loss": 0.8158563375473022, + "step": 7211 + }, + { + "epoch": 1.6617511520737327, + "grad_norm": 1.2436624388230366, + "learning_rate": 1.5256711908810482e-07, + "loss": 0.7734059691429138, + "step": 7212 + }, + { + "epoch": 1.6619815668202764, + "grad_norm": 1.4006583359216147, + "learning_rate": 1.5236491531045815e-07, + "loss": 0.8302994966506958, + "step": 7213 + }, + { + "epoch": 1.6622119815668204, + "grad_norm": 1.3250021353738068, + "learning_rate": 1.5216283456592216e-07, + "loss": 0.8474830389022827, + "step": 7214 + }, + { + "epoch": 1.662442396313364, + "grad_norm": 1.130266104375724, + "learning_rate": 1.5196087688382808e-07, + "loss": 0.7903469800949097, + "step": 7215 + }, + { + "epoch": 1.6626728110599078, + "grad_norm": 1.1131412296095682, + "learning_rate": 1.5175904229349035e-07, + "loss": 0.7756912708282471, + "step": 7216 + }, + { + "epoch": 1.6629032258064518, + "grad_norm": 1.4164367883683733, + "learning_rate": 1.5155733082420463e-07, + "loss": 0.7495905756950378, + "step": 7217 + }, + { + "epoch": 1.6631336405529953, + "grad_norm": 1.3394708776746769, + "learning_rate": 1.5135574250524897e-07, + "loss": 0.8536649942398071, + "step": 7218 + }, + { + "epoch": 1.6633640552995392, + "grad_norm": 1.3243776315844114, + "learning_rate": 1.5115427736588404e-07, + "loss": 0.7301580905914307, + "step": 7219 + }, + { + "epoch": 1.663594470046083, + "grad_norm": 1.324768351380299, + "learning_rate": 1.5095293543535203e-07, + "loss": 0.7131164073944092, + "step": 7220 + }, + { + "epoch": 1.6638248847926267, + "grad_norm": 1.0897989875613177, + "learning_rate": 1.5075171674287712e-07, + "loss": 0.708457350730896, + "step": 7221 + }, + { + "epoch": 1.6640552995391706, + "grad_norm": 1.402833248483696, + "learning_rate": 1.5055062131766662e-07, + "loss": 0.7509758472442627, + "step": 7222 + }, + { + "epoch": 1.6642857142857141, + "grad_norm": 1.1455053593625757, + "learning_rate": 1.503496491889089e-07, + "loss": 0.8401786088943481, + "step": 7223 + }, + { + "epoch": 1.664516129032258, + "grad_norm": 1.3755379329147759, + "learning_rate": 1.5014880038577482e-07, + "loss": 0.8578320741653442, + "step": 7224 + }, + { + "epoch": 1.6647465437788018, + "grad_norm": 1.0530962657504686, + "learning_rate": 1.4994807493741723e-07, + "loss": 0.6890276670455933, + "step": 7225 + }, + { + "epoch": 1.6649769585253456, + "grad_norm": 1.1705604667481366, + "learning_rate": 1.4974747287297128e-07, + "loss": 0.785246729850769, + "step": 7226 + }, + { + "epoch": 1.6652073732718895, + "grad_norm": 1.1145207566800768, + "learning_rate": 1.4954699422155382e-07, + "loss": 0.7826062440872192, + "step": 7227 + }, + { + "epoch": 1.6654377880184332, + "grad_norm": 1.392497287743248, + "learning_rate": 1.4934663901226452e-07, + "loss": 0.807513952255249, + "step": 7228 + }, + { + "epoch": 1.665668202764977, + "grad_norm": 1.0951466978132682, + "learning_rate": 1.4914640727418448e-07, + "loss": 0.8138872385025024, + "step": 7229 + }, + { + "epoch": 1.6658986175115207, + "grad_norm": 1.0721150835685114, + "learning_rate": 1.489462990363768e-07, + "loss": 0.8465121984481812, + "step": 7230 + }, + { + "epoch": 1.6661290322580644, + "grad_norm": 1.2125852838751665, + "learning_rate": 1.4874631432788743e-07, + "loss": 0.7649251222610474, + "step": 7231 + }, + { + "epoch": 1.6663594470046084, + "grad_norm": 1.242983952838099, + "learning_rate": 1.485464531777436e-07, + "loss": 0.8297271132469177, + "step": 7232 + }, + { + "epoch": 1.666589861751152, + "grad_norm": 1.4592304164798606, + "learning_rate": 1.483467156149546e-07, + "loss": 0.7873194217681885, + "step": 7233 + }, + { + "epoch": 1.6668202764976958, + "grad_norm": 1.1529440121296932, + "learning_rate": 1.4814710166851274e-07, + "loss": 0.6924761533737183, + "step": 7234 + }, + { + "epoch": 1.6670506912442398, + "grad_norm": 0.9776015930659686, + "learning_rate": 1.4794761136739132e-07, + "loss": 0.6600887179374695, + "step": 7235 + }, + { + "epoch": 1.6672811059907833, + "grad_norm": 1.0700715817274216, + "learning_rate": 1.477482447405458e-07, + "loss": 0.6552041172981262, + "step": 7236 + }, + { + "epoch": 1.6675115207373272, + "grad_norm": 1.1844260959064823, + "learning_rate": 1.4754900181691465e-07, + "loss": 0.8609327077865601, + "step": 7237 + }, + { + "epoch": 1.667741935483871, + "grad_norm": 0.9877698580103615, + "learning_rate": 1.4734988262541726e-07, + "loss": 0.6970123052597046, + "step": 7238 + }, + { + "epoch": 1.6679723502304147, + "grad_norm": 1.1422057607025191, + "learning_rate": 1.4715088719495573e-07, + "loss": 0.7859683036804199, + "step": 7239 + }, + { + "epoch": 1.6682027649769586, + "grad_norm": 1.102405207717508, + "learning_rate": 1.4695201555441393e-07, + "loss": 0.7448029518127441, + "step": 7240 + }, + { + "epoch": 1.6684331797235024, + "grad_norm": 1.136418636365662, + "learning_rate": 1.4675326773265762e-07, + "loss": 0.7566728591918945, + "step": 7241 + }, + { + "epoch": 1.668663594470046, + "grad_norm": 1.183347797545015, + "learning_rate": 1.465546437585351e-07, + "loss": 0.7563366889953613, + "step": 7242 + }, + { + "epoch": 1.6688940092165898, + "grad_norm": 1.2270668729431573, + "learning_rate": 1.4635614366087623e-07, + "loss": 0.8580834865570068, + "step": 7243 + }, + { + "epoch": 1.6691244239631335, + "grad_norm": 1.261588467565845, + "learning_rate": 1.4615776746849306e-07, + "loss": 0.6200178861618042, + "step": 7244 + }, + { + "epoch": 1.6693548387096775, + "grad_norm": 1.12353329539602, + "learning_rate": 1.4595951521017958e-07, + "loss": 0.8052491545677185, + "step": 7245 + }, + { + "epoch": 1.6695852534562212, + "grad_norm": 1.7485044689788691, + "learning_rate": 1.4576138691471186e-07, + "loss": 0.7383530735969543, + "step": 7246 + }, + { + "epoch": 1.669815668202765, + "grad_norm": 1.2061617795996018, + "learning_rate": 1.4556338261084776e-07, + "loss": 0.6735742092132568, + "step": 7247 + }, + { + "epoch": 1.670046082949309, + "grad_norm": 1.1671720957777614, + "learning_rate": 1.453655023273277e-07, + "loss": 0.7570016980171204, + "step": 7248 + }, + { + "epoch": 1.6702764976958524, + "grad_norm": 1.1212050061324152, + "learning_rate": 1.4516774609287364e-07, + "loss": 0.7271980047225952, + "step": 7249 + }, + { + "epoch": 1.6705069124423964, + "grad_norm": 1.3773952001351246, + "learning_rate": 1.449701139361894e-07, + "loss": 0.8567354083061218, + "step": 7250 + }, + { + "epoch": 1.67073732718894, + "grad_norm": 1.4372041287717652, + "learning_rate": 1.447726058859614e-07, + "loss": 0.8675428628921509, + "step": 7251 + }, + { + "epoch": 1.6709677419354838, + "grad_norm": 1.6475511282046704, + "learning_rate": 1.4457522197085748e-07, + "loss": 0.9131098389625549, + "step": 7252 + }, + { + "epoch": 1.6711981566820278, + "grad_norm": 0.9228526790942371, + "learning_rate": 1.4437796221952748e-07, + "loss": 0.7921037673950195, + "step": 7253 + }, + { + "epoch": 1.6714285714285713, + "grad_norm": 1.3314958050470875, + "learning_rate": 1.441808266606037e-07, + "loss": 0.7559863328933716, + "step": 7254 + }, + { + "epoch": 1.6716589861751152, + "grad_norm": 1.4253402064070324, + "learning_rate": 1.4398381532269998e-07, + "loss": 0.7433857917785645, + "step": 7255 + }, + { + "epoch": 1.671889400921659, + "grad_norm": 1.340982715064525, + "learning_rate": 1.4378692823441207e-07, + "loss": 0.8171184062957764, + "step": 7256 + }, + { + "epoch": 1.6721198156682027, + "grad_norm": 1.4295893582001031, + "learning_rate": 1.4359016542431824e-07, + "loss": 0.7296291589736938, + "step": 7257 + }, + { + "epoch": 1.6723502304147466, + "grad_norm": 1.1566282275472088, + "learning_rate": 1.4339352692097828e-07, + "loss": 0.7397829294204712, + "step": 7258 + }, + { + "epoch": 1.6725806451612903, + "grad_norm": 1.1030928795639288, + "learning_rate": 1.431970127529335e-07, + "loss": 0.6724194884300232, + "step": 7259 + }, + { + "epoch": 1.672811059907834, + "grad_norm": 1.266832602935082, + "learning_rate": 1.430006229487084e-07, + "loss": 0.7711449861526489, + "step": 7260 + }, + { + "epoch": 1.673041474654378, + "grad_norm": 1.0334522746934713, + "learning_rate": 1.428043575368083e-07, + "loss": 0.7581815719604492, + "step": 7261 + }, + { + "epoch": 1.6732718894009215, + "grad_norm": 1.2775574658714877, + "learning_rate": 1.4260821654572063e-07, + "loss": 0.7092517614364624, + "step": 7262 + }, + { + "epoch": 1.6735023041474655, + "grad_norm": 1.116987885688497, + "learning_rate": 1.4241220000391562e-07, + "loss": 0.646745502948761, + "step": 7263 + }, + { + "epoch": 1.6737327188940092, + "grad_norm": 1.0897996116307995, + "learning_rate": 1.4221630793984453e-07, + "loss": 0.7364122867584229, + "step": 7264 + }, + { + "epoch": 1.673963133640553, + "grad_norm": 1.0366138580080708, + "learning_rate": 1.4202054038194068e-07, + "loss": 0.8186795711517334, + "step": 7265 + }, + { + "epoch": 1.6741935483870969, + "grad_norm": 1.178861697439358, + "learning_rate": 1.4182489735861957e-07, + "loss": 0.7172378301620483, + "step": 7266 + }, + { + "epoch": 1.6744239631336404, + "grad_norm": 1.6433299949580555, + "learning_rate": 1.416293788982783e-07, + "loss": 0.8780974745750427, + "step": 7267 + }, + { + "epoch": 1.6746543778801843, + "grad_norm": 1.303060213158533, + "learning_rate": 1.4143398502929672e-07, + "loss": 0.9034930467605591, + "step": 7268 + }, + { + "epoch": 1.674884792626728, + "grad_norm": 1.283952582595571, + "learning_rate": 1.4123871578003543e-07, + "loss": 0.7994415760040283, + "step": 7269 + }, + { + "epoch": 1.6751152073732718, + "grad_norm": 1.2332939563797212, + "learning_rate": 1.410435711788376e-07, + "loss": 0.8327854871749878, + "step": 7270 + }, + { + "epoch": 1.6753456221198157, + "grad_norm": 1.3516689374751454, + "learning_rate": 1.408485512540285e-07, + "loss": 0.7667550444602966, + "step": 7271 + }, + { + "epoch": 1.6755760368663595, + "grad_norm": 1.3721126007283877, + "learning_rate": 1.4065365603391478e-07, + "loss": 0.8073924779891968, + "step": 7272 + }, + { + "epoch": 1.6758064516129032, + "grad_norm": 1.2537292403097655, + "learning_rate": 1.4045888554678497e-07, + "loss": 0.7265589237213135, + "step": 7273 + }, + { + "epoch": 1.6760368663594472, + "grad_norm": 1.4008103355507637, + "learning_rate": 1.402642398209104e-07, + "loss": 0.6912035942077637, + "step": 7274 + }, + { + "epoch": 1.6762672811059907, + "grad_norm": 1.4159985968960598, + "learning_rate": 1.400697188845432e-07, + "loss": 0.917953372001648, + "step": 7275 + }, + { + "epoch": 1.6764976958525346, + "grad_norm": 1.1092123664048492, + "learning_rate": 1.3987532276591774e-07, + "loss": 0.6989340782165527, + "step": 7276 + }, + { + "epoch": 1.6767281105990783, + "grad_norm": 1.0530722269060104, + "learning_rate": 1.396810514932507e-07, + "loss": 0.6648346185684204, + "step": 7277 + }, + { + "epoch": 1.676958525345622, + "grad_norm": 1.152242717428616, + "learning_rate": 1.3948690509474014e-07, + "loss": 0.6462730169296265, + "step": 7278 + }, + { + "epoch": 1.677188940092166, + "grad_norm": 1.0559078213581141, + "learning_rate": 1.3929288359856584e-07, + "loss": 0.6084051132202148, + "step": 7279 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 1.2568155531692753, + "learning_rate": 1.3909898703289037e-07, + "loss": 0.8593035936355591, + "step": 7280 + }, + { + "epoch": 1.6776497695852535, + "grad_norm": 1.432799112874992, + "learning_rate": 1.389052154258572e-07, + "loss": 0.8064925670623779, + "step": 7281 + }, + { + "epoch": 1.6778801843317972, + "grad_norm": 1.3257643730794528, + "learning_rate": 1.3871156880559186e-07, + "loss": 0.7366064786911011, + "step": 7282 + }, + { + "epoch": 1.678110599078341, + "grad_norm": 1.4541745835743052, + "learning_rate": 1.3851804720020233e-07, + "loss": 0.8090124726295471, + "step": 7283 + }, + { + "epoch": 1.6783410138248849, + "grad_norm": 1.3768572400260246, + "learning_rate": 1.3832465063777787e-07, + "loss": 0.7326936721801758, + "step": 7284 + }, + { + "epoch": 1.6785714285714286, + "grad_norm": 1.1036181265329146, + "learning_rate": 1.3813137914638961e-07, + "loss": 0.7142004370689392, + "step": 7285 + }, + { + "epoch": 1.6788018433179723, + "grad_norm": 1.1850699819171153, + "learning_rate": 1.3793823275409066e-07, + "loss": 0.8358181715011597, + "step": 7286 + }, + { + "epoch": 1.6790322580645163, + "grad_norm": 1.341055264970921, + "learning_rate": 1.3774521148891583e-07, + "loss": 0.7337081432342529, + "step": 7287 + }, + { + "epoch": 1.6792626728110598, + "grad_norm": 1.079298746666331, + "learning_rate": 1.3755231537888222e-07, + "loss": 0.8029334545135498, + "step": 7288 + }, + { + "epoch": 1.6794930875576037, + "grad_norm": 1.1362422930327392, + "learning_rate": 1.373595444519884e-07, + "loss": 0.8132611513137817, + "step": 7289 + }, + { + "epoch": 1.6797235023041475, + "grad_norm": 1.2850987320352512, + "learning_rate": 1.3716689873621446e-07, + "loss": 0.7377278804779053, + "step": 7290 + }, + { + "epoch": 1.6799539170506912, + "grad_norm": 1.5545938019119256, + "learning_rate": 1.3697437825952307e-07, + "loss": 0.788368284702301, + "step": 7291 + }, + { + "epoch": 1.6801843317972351, + "grad_norm": 1.3811107908360538, + "learning_rate": 1.3678198304985822e-07, + "loss": 0.8288586139678955, + "step": 7292 + }, + { + "epoch": 1.6804147465437786, + "grad_norm": 1.2973962244733976, + "learning_rate": 1.3658971313514567e-07, + "loss": 0.8534054160118103, + "step": 7293 + }, + { + "epoch": 1.6806451612903226, + "grad_norm": 1.261356018830994, + "learning_rate": 1.363975685432933e-07, + "loss": 0.8730596303939819, + "step": 7294 + }, + { + "epoch": 1.6808755760368663, + "grad_norm": 1.2262296688166254, + "learning_rate": 1.3620554930219076e-07, + "loss": 0.6891343593597412, + "step": 7295 + }, + { + "epoch": 1.68110599078341, + "grad_norm": 1.4944659665191207, + "learning_rate": 1.360136554397089e-07, + "loss": 0.8575270175933838, + "step": 7296 + }, + { + "epoch": 1.681336405529954, + "grad_norm": 1.1221716147697696, + "learning_rate": 1.3582188698370134e-07, + "loss": 0.82694011926651, + "step": 7297 + }, + { + "epoch": 1.6815668202764977, + "grad_norm": 1.1921152491764102, + "learning_rate": 1.3563024396200296e-07, + "loss": 0.6468113660812378, + "step": 7298 + }, + { + "epoch": 1.6817972350230415, + "grad_norm": 1.1634380991195066, + "learning_rate": 1.3543872640243016e-07, + "loss": 0.6818577647209167, + "step": 7299 + }, + { + "epoch": 1.6820276497695854, + "grad_norm": 1.262155726089824, + "learning_rate": 1.352473343327819e-07, + "loss": 0.7630767822265625, + "step": 7300 + }, + { + "epoch": 1.682258064516129, + "grad_norm": 1.3348546512512276, + "learning_rate": 1.3505606778083832e-07, + "loss": 0.9019678831100464, + "step": 7301 + }, + { + "epoch": 1.6824884792626729, + "grad_norm": 1.1302876731614566, + "learning_rate": 1.3486492677436123e-07, + "loss": 0.821324348449707, + "step": 7302 + }, + { + "epoch": 1.6827188940092166, + "grad_norm": 1.1997119452659193, + "learning_rate": 1.3467391134109495e-07, + "loss": 0.796151876449585, + "step": 7303 + }, + { + "epoch": 1.6829493087557603, + "grad_norm": 1.298615109914031, + "learning_rate": 1.3448302150876488e-07, + "loss": 0.8020445108413696, + "step": 7304 + }, + { + "epoch": 1.6831797235023043, + "grad_norm": 0.9490183941784253, + "learning_rate": 1.3429225730507843e-07, + "loss": 0.7215749025344849, + "step": 7305 + }, + { + "epoch": 1.6834101382488478, + "grad_norm": 1.2708231250445967, + "learning_rate": 1.3410161875772474e-07, + "loss": 0.920941174030304, + "step": 7306 + }, + { + "epoch": 1.6836405529953917, + "grad_norm": 1.4523260098562263, + "learning_rate": 1.3391110589437494e-07, + "loss": 0.8979494571685791, + "step": 7307 + }, + { + "epoch": 1.6838709677419355, + "grad_norm": 1.3126261706157987, + "learning_rate": 1.337207187426812e-07, + "loss": 0.9125145673751831, + "step": 7308 + }, + { + "epoch": 1.6841013824884792, + "grad_norm": 1.1179697975279568, + "learning_rate": 1.3353045733027858e-07, + "loss": 0.8205714225769043, + "step": 7309 + }, + { + "epoch": 1.6843317972350231, + "grad_norm": 1.0993805126125902, + "learning_rate": 1.3334032168478305e-07, + "loss": 0.6914113759994507, + "step": 7310 + }, + { + "epoch": 1.6845622119815669, + "grad_norm": 1.3165472089957067, + "learning_rate": 1.3315031183379233e-07, + "loss": 0.7355014085769653, + "step": 7311 + }, + { + "epoch": 1.6847926267281106, + "grad_norm": 1.3581792517836289, + "learning_rate": 1.3296042780488637e-07, + "loss": 0.7564182281494141, + "step": 7312 + }, + { + "epoch": 1.6850230414746545, + "grad_norm": 1.197316556809727, + "learning_rate": 1.3277066962562643e-07, + "loss": 0.8091372847557068, + "step": 7313 + }, + { + "epoch": 1.685253456221198, + "grad_norm": 1.131878643977171, + "learning_rate": 1.3258103732355586e-07, + "loss": 0.7457877993583679, + "step": 7314 + }, + { + "epoch": 1.685483870967742, + "grad_norm": 1.2462081986852567, + "learning_rate": 1.3239153092619948e-07, + "loss": 0.861819863319397, + "step": 7315 + }, + { + "epoch": 1.6857142857142857, + "grad_norm": 1.2291218741883772, + "learning_rate": 1.3220215046106353e-07, + "loss": 0.7698357105255127, + "step": 7316 + }, + { + "epoch": 1.6859447004608294, + "grad_norm": 1.2862793081172317, + "learning_rate": 1.320128959556369e-07, + "loss": 0.7889456152915955, + "step": 7317 + }, + { + "epoch": 1.6861751152073734, + "grad_norm": 1.0926817497008894, + "learning_rate": 1.3182376743738932e-07, + "loss": 0.6467938423156738, + "step": 7318 + }, + { + "epoch": 1.686405529953917, + "grad_norm": 0.962046315570081, + "learning_rate": 1.3163476493377245e-07, + "loss": 0.7202441692352295, + "step": 7319 + }, + { + "epoch": 1.6866359447004609, + "grad_norm": 1.2860571238613498, + "learning_rate": 1.3144588847222004e-07, + "loss": 0.7464008331298828, + "step": 7320 + }, + { + "epoch": 1.6868663594470046, + "grad_norm": 1.3323127704795366, + "learning_rate": 1.3125713808014704e-07, + "loss": 0.8924611806869507, + "step": 7321 + }, + { + "epoch": 1.6870967741935483, + "grad_norm": 1.5027995023789942, + "learning_rate": 1.3106851378495044e-07, + "loss": 0.6943146586418152, + "step": 7322 + }, + { + "epoch": 1.6873271889400923, + "grad_norm": 1.336362656918588, + "learning_rate": 1.308800156140085e-07, + "loss": 0.7335963249206543, + "step": 7323 + }, + { + "epoch": 1.687557603686636, + "grad_norm": 1.1540515039280186, + "learning_rate": 1.30691643594682e-07, + "loss": 0.6900516748428345, + "step": 7324 + }, + { + "epoch": 1.6877880184331797, + "grad_norm": 1.0161083273097216, + "learning_rate": 1.3050339775431262e-07, + "loss": 0.7230286598205566, + "step": 7325 + }, + { + "epoch": 1.6880184331797237, + "grad_norm": 1.3577939883495977, + "learning_rate": 1.3031527812022403e-07, + "loss": 0.8069840669631958, + "step": 7326 + }, + { + "epoch": 1.6882488479262672, + "grad_norm": 1.1850570268151976, + "learning_rate": 1.3012728471972134e-07, + "loss": 0.7598710060119629, + "step": 7327 + }, + { + "epoch": 1.6884792626728111, + "grad_norm": 1.1081098309526143, + "learning_rate": 1.2993941758009164e-07, + "loss": 0.6817609071731567, + "step": 7328 + }, + { + "epoch": 1.6887096774193548, + "grad_norm": 1.1578322948538884, + "learning_rate": 1.2975167672860387e-07, + "loss": 0.6958975791931152, + "step": 7329 + }, + { + "epoch": 1.6889400921658986, + "grad_norm": 1.3026010781309694, + "learning_rate": 1.2956406219250814e-07, + "loss": 0.8270853757858276, + "step": 7330 + }, + { + "epoch": 1.6891705069124425, + "grad_norm": 1.2716142402347783, + "learning_rate": 1.2937657399903623e-07, + "loss": 0.8045610189437866, + "step": 7331 + }, + { + "epoch": 1.689400921658986, + "grad_norm": 1.3670021400758372, + "learning_rate": 1.2918921217540224e-07, + "loss": 0.6685627698898315, + "step": 7332 + }, + { + "epoch": 1.68963133640553, + "grad_norm": 1.481483528763015, + "learning_rate": 1.2900197674880142e-07, + "loss": 0.8157398700714111, + "step": 7333 + }, + { + "epoch": 1.6898617511520737, + "grad_norm": 1.1922253618562, + "learning_rate": 1.2881486774641025e-07, + "loss": 0.6142218112945557, + "step": 7334 + }, + { + "epoch": 1.6900921658986174, + "grad_norm": 1.2611165552955415, + "learning_rate": 1.2862788519538815e-07, + "loss": 0.7849327921867371, + "step": 7335 + }, + { + "epoch": 1.6903225806451614, + "grad_norm": 1.3074701765125263, + "learning_rate": 1.2844102912287457e-07, + "loss": 0.8035926818847656, + "step": 7336 + }, + { + "epoch": 1.6905529953917051, + "grad_norm": 1.26449405816571, + "learning_rate": 1.2825429955599209e-07, + "loss": 0.8456575870513916, + "step": 7337 + }, + { + "epoch": 1.6907834101382488, + "grad_norm": 1.0994096629111347, + "learning_rate": 1.2806769652184402e-07, + "loss": 0.7436026334762573, + "step": 7338 + }, + { + "epoch": 1.6910138248847926, + "grad_norm": 1.3946687886072922, + "learning_rate": 1.2788122004751522e-07, + "loss": 0.8315454721450806, + "step": 7339 + }, + { + "epoch": 1.6912442396313363, + "grad_norm": 1.1032652805797263, + "learning_rate": 1.2769487016007307e-07, + "loss": 0.7425665855407715, + "step": 7340 + }, + { + "epoch": 1.6914746543778802, + "grad_norm": 1.210532059455236, + "learning_rate": 1.2750864688656572e-07, + "loss": 0.7899731993675232, + "step": 7341 + }, + { + "epoch": 1.691705069124424, + "grad_norm": 1.2339006903630358, + "learning_rate": 1.2732255025402327e-07, + "loss": 0.7637509703636169, + "step": 7342 + }, + { + "epoch": 1.6919354838709677, + "grad_norm": 1.2301886439270189, + "learning_rate": 1.2713658028945717e-07, + "loss": 0.793779730796814, + "step": 7343 + }, + { + "epoch": 1.6921658986175117, + "grad_norm": 1.2351914671209905, + "learning_rate": 1.2695073701986103e-07, + "loss": 0.7248083353042603, + "step": 7344 + }, + { + "epoch": 1.6923963133640552, + "grad_norm": 1.4318296651769333, + "learning_rate": 1.2676502047220973e-07, + "loss": 0.7506270408630371, + "step": 7345 + }, + { + "epoch": 1.692626728110599, + "grad_norm": 1.248314789497465, + "learning_rate": 1.2657943067345965e-07, + "loss": 0.7921839952468872, + "step": 7346 + }, + { + "epoch": 1.6928571428571428, + "grad_norm": 0.9630256947791611, + "learning_rate": 1.263939676505491e-07, + "loss": 0.7627893686294556, + "step": 7347 + }, + { + "epoch": 1.6930875576036866, + "grad_norm": 1.039168896728356, + "learning_rate": 1.262086314303973e-07, + "loss": 0.788955807685852, + "step": 7348 + }, + { + "epoch": 1.6933179723502305, + "grad_norm": 1.0370858136190912, + "learning_rate": 1.2602342203990612e-07, + "loss": 0.5527241826057434, + "step": 7349 + }, + { + "epoch": 1.6935483870967742, + "grad_norm": 1.344465363325951, + "learning_rate": 1.2583833950595825e-07, + "loss": 0.7324573397636414, + "step": 7350 + }, + { + "epoch": 1.693778801843318, + "grad_norm": 1.0731663336898336, + "learning_rate": 1.256533838554179e-07, + "loss": 0.6588207483291626, + "step": 7351 + }, + { + "epoch": 1.6940092165898617, + "grad_norm": 1.417078203000081, + "learning_rate": 1.2546855511513165e-07, + "loss": 0.7597184181213379, + "step": 7352 + }, + { + "epoch": 1.6942396313364054, + "grad_norm": 1.1748568881342167, + "learning_rate": 1.2528385331192692e-07, + "loss": 0.7487671375274658, + "step": 7353 + }, + { + "epoch": 1.6944700460829494, + "grad_norm": 1.0203340332958148, + "learning_rate": 1.250992784726126e-07, + "loss": 0.757739245891571, + "step": 7354 + }, + { + "epoch": 1.694700460829493, + "grad_norm": 1.314521719717035, + "learning_rate": 1.249148306239801e-07, + "loss": 0.616966724395752, + "step": 7355 + }, + { + "epoch": 1.6949308755760368, + "grad_norm": 1.506626916778979, + "learning_rate": 1.2473050979280142e-07, + "loss": 0.9415719509124756, + "step": 7356 + }, + { + "epoch": 1.6951612903225808, + "grad_norm": 1.0903568482188648, + "learning_rate": 1.2454631600583044e-07, + "loss": 0.7731447815895081, + "step": 7357 + }, + { + "epoch": 1.6953917050691243, + "grad_norm": 1.2821570786422227, + "learning_rate": 1.2436224928980276e-07, + "loss": 0.800236701965332, + "step": 7358 + }, + { + "epoch": 1.6956221198156682, + "grad_norm": 1.2900334463062004, + "learning_rate": 1.241783096714356e-07, + "loss": 0.8113845586776733, + "step": 7359 + }, + { + "epoch": 1.695852534562212, + "grad_norm": 1.2157051726485628, + "learning_rate": 1.2399449717742706e-07, + "loss": 0.748763382434845, + "step": 7360 + }, + { + "epoch": 1.6960829493087557, + "grad_norm": 1.3769466349570898, + "learning_rate": 1.2381081183445774e-07, + "loss": 0.8595450520515442, + "step": 7361 + }, + { + "epoch": 1.6963133640552996, + "grad_norm": 1.240341465296028, + "learning_rate": 1.2362725366918913e-07, + "loss": 0.7800960540771484, + "step": 7362 + }, + { + "epoch": 1.6965437788018434, + "grad_norm": 1.1951306648014712, + "learning_rate": 1.2344382270826438e-07, + "loss": 0.6549400687217712, + "step": 7363 + }, + { + "epoch": 1.696774193548387, + "grad_norm": 1.1182982438102955, + "learning_rate": 1.2326051897830858e-07, + "loss": 0.7839380502700806, + "step": 7364 + }, + { + "epoch": 1.6970046082949308, + "grad_norm": 1.2576690972053175, + "learning_rate": 1.230773425059277e-07, + "loss": 0.8436654806137085, + "step": 7365 + }, + { + "epoch": 1.6972350230414746, + "grad_norm": 0.8415515075804344, + "learning_rate": 1.2289429331770974e-07, + "loss": 0.6517987251281738, + "step": 7366 + }, + { + "epoch": 1.6974654377880185, + "grad_norm": 1.073572916121381, + "learning_rate": 1.2271137144022392e-07, + "loss": 0.7108355760574341, + "step": 7367 + }, + { + "epoch": 1.6976958525345622, + "grad_norm": 1.138464806776697, + "learning_rate": 1.2252857690002094e-07, + "loss": 0.7801471948623657, + "step": 7368 + }, + { + "epoch": 1.697926267281106, + "grad_norm": 0.9980466100193536, + "learning_rate": 1.2234590972363358e-07, + "loss": 0.8240209221839905, + "step": 7369 + }, + { + "epoch": 1.69815668202765, + "grad_norm": 1.5026485017018454, + "learning_rate": 1.2216336993757558e-07, + "loss": 0.8119853138923645, + "step": 7370 + }, + { + "epoch": 1.6983870967741934, + "grad_norm": 0.9448426506131885, + "learning_rate": 1.2198095756834216e-07, + "loss": 0.7685642838478088, + "step": 7371 + }, + { + "epoch": 1.6986175115207374, + "grad_norm": 1.1884615399125027, + "learning_rate": 1.217986726424106e-07, + "loss": 0.7820984125137329, + "step": 7372 + }, + { + "epoch": 1.698847926267281, + "grad_norm": 1.4933868054084445, + "learning_rate": 1.2161651518623916e-07, + "loss": 0.8051085472106934, + "step": 7373 + }, + { + "epoch": 1.6990783410138248, + "grad_norm": 1.16418962691877, + "learning_rate": 1.2143448522626742e-07, + "loss": 0.828999400138855, + "step": 7374 + }, + { + "epoch": 1.6993087557603688, + "grad_norm": 1.513005376638313, + "learning_rate": 1.2125258278891738e-07, + "loss": 0.8215579986572266, + "step": 7375 + }, + { + "epoch": 1.6995391705069123, + "grad_norm": 1.2614405602995598, + "learning_rate": 1.2107080790059156e-07, + "loss": 0.9362014532089233, + "step": 7376 + }, + { + "epoch": 1.6997695852534562, + "grad_norm": 1.014310262155135, + "learning_rate": 1.2088916058767428e-07, + "loss": 0.7789602279663086, + "step": 7377 + }, + { + "epoch": 1.7, + "grad_norm": 1.322797235291574, + "learning_rate": 1.2070764087653163e-07, + "loss": 0.8371152877807617, + "step": 7378 + }, + { + "epoch": 1.7002304147465437, + "grad_norm": 1.2225532720655308, + "learning_rate": 1.2052624879351103e-07, + "loss": 0.64423668384552, + "step": 7379 + }, + { + "epoch": 1.7004608294930876, + "grad_norm": 1.3442813905677369, + "learning_rate": 1.203449843649409e-07, + "loss": 0.7635257244110107, + "step": 7380 + }, + { + "epoch": 1.7006912442396314, + "grad_norm": 1.15010903043395, + "learning_rate": 1.2016384761713194e-07, + "loss": 0.7859230041503906, + "step": 7381 + }, + { + "epoch": 1.700921658986175, + "grad_norm": 1.0218637195871514, + "learning_rate": 1.199828385763757e-07, + "loss": 0.7066336870193481, + "step": 7382 + }, + { + "epoch": 1.701152073732719, + "grad_norm": 1.1069799499148123, + "learning_rate": 1.198019572689455e-07, + "loss": 0.7190531492233276, + "step": 7383 + }, + { + "epoch": 1.7013824884792625, + "grad_norm": 1.520158585759741, + "learning_rate": 1.1962120372109586e-07, + "loss": 0.7389136552810669, + "step": 7384 + }, + { + "epoch": 1.7016129032258065, + "grad_norm": 1.5406735409523549, + "learning_rate": 1.1944057795906316e-07, + "loss": 0.774425745010376, + "step": 7385 + }, + { + "epoch": 1.7018433179723502, + "grad_norm": 1.0093305285556118, + "learning_rate": 1.1926008000906484e-07, + "loss": 0.7566725015640259, + "step": 7386 + }, + { + "epoch": 1.702073732718894, + "grad_norm": 1.153413777620863, + "learning_rate": 1.1907970989729987e-07, + "loss": 0.6891475915908813, + "step": 7387 + }, + { + "epoch": 1.702304147465438, + "grad_norm": 1.08541401133235, + "learning_rate": 1.1889946764994873e-07, + "loss": 0.6188378930091858, + "step": 7388 + }, + { + "epoch": 1.7025345622119814, + "grad_norm": 1.1534210847497282, + "learning_rate": 1.1871935329317362e-07, + "loss": 0.703027069568634, + "step": 7389 + }, + { + "epoch": 1.7027649769585254, + "grad_norm": 1.2738888238498793, + "learning_rate": 1.1853936685311772e-07, + "loss": 0.9253139495849609, + "step": 7390 + }, + { + "epoch": 1.702995391705069, + "grad_norm": 1.015934424294919, + "learning_rate": 1.1835950835590569e-07, + "loss": 0.6504430770874023, + "step": 7391 + }, + { + "epoch": 1.7032258064516128, + "grad_norm": 1.0145240040509695, + "learning_rate": 1.18179777827644e-07, + "loss": 0.6656354665756226, + "step": 7392 + }, + { + "epoch": 1.7034562211981568, + "grad_norm": 1.451290987899464, + "learning_rate": 1.1800017529442019e-07, + "loss": 0.8534063100814819, + "step": 7393 + }, + { + "epoch": 1.7036866359447005, + "grad_norm": 1.1896366783409809, + "learning_rate": 1.178207007823031e-07, + "loss": 0.8315893411636353, + "step": 7394 + }, + { + "epoch": 1.7039170506912442, + "grad_norm": 1.1636407894423468, + "learning_rate": 1.1764135431734367e-07, + "loss": 0.8161677718162537, + "step": 7395 + }, + { + "epoch": 1.7041474654377882, + "grad_norm": 1.418011015190517, + "learning_rate": 1.1746213592557352e-07, + "loss": 0.7942687273025513, + "step": 7396 + }, + { + "epoch": 1.7043778801843317, + "grad_norm": 0.9938387819486493, + "learning_rate": 1.1728304563300584e-07, + "loss": 0.8056384325027466, + "step": 7397 + }, + { + "epoch": 1.7046082949308756, + "grad_norm": 1.3626759695428086, + "learning_rate": 1.1710408346563583e-07, + "loss": 0.8535007238388062, + "step": 7398 + }, + { + "epoch": 1.7048387096774194, + "grad_norm": 1.1491077351100174, + "learning_rate": 1.1692524944943916e-07, + "loss": 0.7729576826095581, + "step": 7399 + }, + { + "epoch": 1.705069124423963, + "grad_norm": 1.2729586784281095, + "learning_rate": 1.1674654361037328e-07, + "loss": 0.7755489349365234, + "step": 7400 + }, + { + "epoch": 1.705299539170507, + "grad_norm": 1.7008944920024607, + "learning_rate": 1.1656796597437757e-07, + "loss": 0.8752193450927734, + "step": 7401 + }, + { + "epoch": 1.7055299539170505, + "grad_norm": 1.0505715773863387, + "learning_rate": 1.1638951656737217e-07, + "loss": 0.7135917544364929, + "step": 7402 + }, + { + "epoch": 1.7057603686635945, + "grad_norm": 1.1807276735663779, + "learning_rate": 1.1621119541525859e-07, + "loss": 0.7378124594688416, + "step": 7403 + }, + { + "epoch": 1.7059907834101382, + "grad_norm": 1.1699041912496186, + "learning_rate": 1.1603300254391978e-07, + "loss": 0.637479305267334, + "step": 7404 + }, + { + "epoch": 1.706221198156682, + "grad_norm": 0.9107859734790176, + "learning_rate": 1.1585493797922075e-07, + "loss": 0.6162394881248474, + "step": 7405 + }, + { + "epoch": 1.706451612903226, + "grad_norm": 1.0832025296305532, + "learning_rate": 1.1567700174700701e-07, + "loss": 0.7836494445800781, + "step": 7406 + }, + { + "epoch": 1.7066820276497696, + "grad_norm": 1.3117851793296085, + "learning_rate": 1.154991938731057e-07, + "loss": 0.6297281980514526, + "step": 7407 + }, + { + "epoch": 1.7069124423963133, + "grad_norm": 0.9987358693502671, + "learning_rate": 1.1532151438332549e-07, + "loss": 0.7190115451812744, + "step": 7408 + }, + { + "epoch": 1.7071428571428573, + "grad_norm": 1.353324439932077, + "learning_rate": 1.151439633034561e-07, + "loss": 0.7578086853027344, + "step": 7409 + }, + { + "epoch": 1.7073732718894008, + "grad_norm": 0.986158496671175, + "learning_rate": 1.1496654065926925e-07, + "loss": 0.7347216010093689, + "step": 7410 + }, + { + "epoch": 1.7076036866359448, + "grad_norm": 1.2279759650694806, + "learning_rate": 1.1478924647651711e-07, + "loss": 0.7940168380737305, + "step": 7411 + }, + { + "epoch": 1.7078341013824885, + "grad_norm": 1.2336717780625897, + "learning_rate": 1.1461208078093431e-07, + "loss": 0.7625843286514282, + "step": 7412 + }, + { + "epoch": 1.7080645161290322, + "grad_norm": 1.5771280074431184, + "learning_rate": 1.1443504359823585e-07, + "loss": 0.7603492736816406, + "step": 7413 + }, + { + "epoch": 1.7082949308755762, + "grad_norm": 1.1263740749103024, + "learning_rate": 1.1425813495411817e-07, + "loss": 0.8746018409729004, + "step": 7414 + }, + { + "epoch": 1.7085253456221197, + "grad_norm": 1.2947959548271089, + "learning_rate": 1.1408135487425996e-07, + "loss": 0.72724449634552, + "step": 7415 + }, + { + "epoch": 1.7087557603686636, + "grad_norm": 0.794129708213959, + "learning_rate": 1.1390470338432023e-07, + "loss": 0.6874721646308899, + "step": 7416 + }, + { + "epoch": 1.7089861751152073, + "grad_norm": 0.9673124457868691, + "learning_rate": 1.1372818050993959e-07, + "loss": 0.7129265666007996, + "step": 7417 + }, + { + "epoch": 1.709216589861751, + "grad_norm": 1.3811139782005308, + "learning_rate": 1.1355178627674045e-07, + "loss": 0.7505607008934021, + "step": 7418 + }, + { + "epoch": 1.709447004608295, + "grad_norm": 1.1149863565678992, + "learning_rate": 1.1337552071032608e-07, + "loss": 0.7497769594192505, + "step": 7419 + }, + { + "epoch": 1.7096774193548387, + "grad_norm": 1.342673457996757, + "learning_rate": 1.1319938383628092e-07, + "loss": 0.792352020740509, + "step": 7420 + }, + { + "epoch": 1.7099078341013825, + "grad_norm": 1.1720516000619245, + "learning_rate": 1.1302337568017139e-07, + "loss": 0.780627965927124, + "step": 7421 + }, + { + "epoch": 1.7101382488479264, + "grad_norm": 1.2702279678670012, + "learning_rate": 1.1284749626754464e-07, + "loss": 0.7024368047714233, + "step": 7422 + }, + { + "epoch": 1.71036866359447, + "grad_norm": 1.2880158142162281, + "learning_rate": 1.1267174562392945e-07, + "loss": 0.756782591342926, + "step": 7423 + }, + { + "epoch": 1.7105990783410139, + "grad_norm": 1.2881350167706749, + "learning_rate": 1.1249612377483552e-07, + "loss": 0.8585456609725952, + "step": 7424 + }, + { + "epoch": 1.7108294930875576, + "grad_norm": 1.2079330064248406, + "learning_rate": 1.1232063074575449e-07, + "loss": 0.8610610961914062, + "step": 7425 + }, + { + "epoch": 1.7110599078341013, + "grad_norm": 1.2629835504337044, + "learning_rate": 1.1214526656215872e-07, + "loss": 0.7493829131126404, + "step": 7426 + }, + { + "epoch": 1.7112903225806453, + "grad_norm": 1.1677189056932475, + "learning_rate": 1.1197003124950222e-07, + "loss": 0.7479410171508789, + "step": 7427 + }, + { + "epoch": 1.7115207373271888, + "grad_norm": 1.2024881147733253, + "learning_rate": 1.1179492483322006e-07, + "loss": 0.8056051135063171, + "step": 7428 + }, + { + "epoch": 1.7117511520737327, + "grad_norm": 1.2393004464149642, + "learning_rate": 1.1161994733872848e-07, + "loss": 0.8448202610015869, + "step": 7429 + }, + { + "epoch": 1.7119815668202765, + "grad_norm": 1.3170634810384778, + "learning_rate": 1.1144509879142571e-07, + "loss": 0.7783033847808838, + "step": 7430 + }, + { + "epoch": 1.7122119815668202, + "grad_norm": 1.2589188548838177, + "learning_rate": 1.1127037921669058e-07, + "loss": 0.6591838598251343, + "step": 7431 + }, + { + "epoch": 1.7124423963133641, + "grad_norm": 1.4141951291447457, + "learning_rate": 1.1109578863988322e-07, + "loss": 0.8508287668228149, + "step": 7432 + }, + { + "epoch": 1.7126728110599079, + "grad_norm": 1.0110596601133535, + "learning_rate": 1.1092132708634549e-07, + "loss": 0.7981588840484619, + "step": 7433 + }, + { + "epoch": 1.7129032258064516, + "grad_norm": 1.1560054105611206, + "learning_rate": 1.1074699458140025e-07, + "loss": 0.7754761576652527, + "step": 7434 + }, + { + "epoch": 1.7131336405529956, + "grad_norm": 1.4234254723014017, + "learning_rate": 1.1057279115035124e-07, + "loss": 0.8487040996551514, + "step": 7435 + }, + { + "epoch": 1.713364055299539, + "grad_norm": 1.2105987237993454, + "learning_rate": 1.1039871681848433e-07, + "loss": 0.8175803422927856, + "step": 7436 + }, + { + "epoch": 1.713594470046083, + "grad_norm": 1.0010434545431337, + "learning_rate": 1.1022477161106591e-07, + "loss": 0.8361574411392212, + "step": 7437 + }, + { + "epoch": 1.7138248847926267, + "grad_norm": 1.1841110354603608, + "learning_rate": 1.1005095555334409e-07, + "loss": 0.6253053545951843, + "step": 7438 + }, + { + "epoch": 1.7140552995391705, + "grad_norm": 1.5361244402123166, + "learning_rate": 1.0987726867054792e-07, + "loss": 0.8035168647766113, + "step": 7439 + }, + { + "epoch": 1.7142857142857144, + "grad_norm": 1.0148513511065955, + "learning_rate": 1.0970371098788767e-07, + "loss": 0.7352867722511292, + "step": 7440 + }, + { + "epoch": 1.714516129032258, + "grad_norm": 1.1469128257526675, + "learning_rate": 1.0953028253055541e-07, + "loss": 0.7540202140808105, + "step": 7441 + }, + { + "epoch": 1.7147465437788019, + "grad_norm": 1.2653522382652087, + "learning_rate": 1.0935698332372379e-07, + "loss": 0.7883191108703613, + "step": 7442 + }, + { + "epoch": 1.7149769585253456, + "grad_norm": 1.2745739855530656, + "learning_rate": 1.0918381339254701e-07, + "loss": 0.7581819295883179, + "step": 7443 + }, + { + "epoch": 1.7152073732718893, + "grad_norm": 1.1705192956080483, + "learning_rate": 1.090107727621603e-07, + "loss": 0.8066321611404419, + "step": 7444 + }, + { + "epoch": 1.7154377880184333, + "grad_norm": 1.1820593590096908, + "learning_rate": 1.0883786145768037e-07, + "loss": 0.7427937984466553, + "step": 7445 + }, + { + "epoch": 1.715668202764977, + "grad_norm": 1.3132499515834741, + "learning_rate": 1.0866507950420523e-07, + "loss": 0.7736409902572632, + "step": 7446 + }, + { + "epoch": 1.7158986175115207, + "grad_norm": 1.1930714060597967, + "learning_rate": 1.0849242692681382e-07, + "loss": 0.7253416776657104, + "step": 7447 + }, + { + "epoch": 1.7161290322580647, + "grad_norm": 0.9521960056037656, + "learning_rate": 1.0831990375056643e-07, + "loss": 0.7933270931243896, + "step": 7448 + }, + { + "epoch": 1.7163594470046082, + "grad_norm": 1.407227257578247, + "learning_rate": 1.0814751000050437e-07, + "loss": 0.7946739196777344, + "step": 7449 + }, + { + "epoch": 1.7165898617511521, + "grad_norm": 1.2776015375287177, + "learning_rate": 1.0797524570165073e-07, + "loss": 0.7798205614089966, + "step": 7450 + }, + { + "epoch": 1.7168202764976959, + "grad_norm": 1.2558469001082564, + "learning_rate": 1.078031108790094e-07, + "loss": 0.616565465927124, + "step": 7451 + }, + { + "epoch": 1.7170506912442396, + "grad_norm": 1.2221718815584264, + "learning_rate": 1.0763110555756516e-07, + "loss": 0.8406517505645752, + "step": 7452 + }, + { + "epoch": 1.7172811059907835, + "grad_norm": 1.3773523411720476, + "learning_rate": 1.0745922976228483e-07, + "loss": 0.8827311992645264, + "step": 7453 + }, + { + "epoch": 1.717511520737327, + "grad_norm": 1.2403910104019171, + "learning_rate": 1.0728748351811567e-07, + "loss": 0.585588812828064, + "step": 7454 + }, + { + "epoch": 1.717741935483871, + "grad_norm": 0.9381679846122704, + "learning_rate": 1.0711586684998631e-07, + "loss": 0.6305320858955383, + "step": 7455 + }, + { + "epoch": 1.7179723502304147, + "grad_norm": 1.0634674542520166, + "learning_rate": 1.0694437978280701e-07, + "loss": 0.7982319593429565, + "step": 7456 + }, + { + "epoch": 1.7182027649769585, + "grad_norm": 1.3468349324058282, + "learning_rate": 1.0677302234146879e-07, + "loss": 0.7792943716049194, + "step": 7457 + }, + { + "epoch": 1.7184331797235024, + "grad_norm": 1.308217346349807, + "learning_rate": 1.0660179455084372e-07, + "loss": 0.7019332051277161, + "step": 7458 + }, + { + "epoch": 1.7186635944700461, + "grad_norm": 1.2330257329830192, + "learning_rate": 1.0643069643578562e-07, + "loss": 0.8088894486427307, + "step": 7459 + }, + { + "epoch": 1.7188940092165899, + "grad_norm": 1.5573400915532798, + "learning_rate": 1.0625972802112882e-07, + "loss": 0.799231767654419, + "step": 7460 + }, + { + "epoch": 1.7191244239631336, + "grad_norm": 0.950308854182165, + "learning_rate": 1.0608888933168958e-07, + "loss": 0.7265694737434387, + "step": 7461 + }, + { + "epoch": 1.7193548387096773, + "grad_norm": 1.1717288459308963, + "learning_rate": 1.0591818039226464e-07, + "loss": 0.8566714525222778, + "step": 7462 + }, + { + "epoch": 1.7195852534562213, + "grad_norm": 1.2255123057406947, + "learning_rate": 1.0574760122763216e-07, + "loss": 0.811874508857727, + "step": 7463 + }, + { + "epoch": 1.719815668202765, + "grad_norm": 1.0493349652228454, + "learning_rate": 1.0557715186255156e-07, + "loss": 0.7990631461143494, + "step": 7464 + }, + { + "epoch": 1.7200460829493087, + "grad_norm": 1.3183681626099089, + "learning_rate": 1.0540683232176307e-07, + "loss": 0.8108334541320801, + "step": 7465 + }, + { + "epoch": 1.7202764976958527, + "grad_norm": 1.8420274096120763, + "learning_rate": 1.0523664262998888e-07, + "loss": 0.8927996158599854, + "step": 7466 + }, + { + "epoch": 1.7205069124423962, + "grad_norm": 1.1733285346989661, + "learning_rate": 1.0506658281193138e-07, + "loss": 0.7277737855911255, + "step": 7467 + }, + { + "epoch": 1.7207373271889401, + "grad_norm": 1.0503912207473127, + "learning_rate": 1.0489665289227467e-07, + "loss": 0.7229233980178833, + "step": 7468 + }, + { + "epoch": 1.7209677419354839, + "grad_norm": 1.298634428768958, + "learning_rate": 1.0472685289568373e-07, + "loss": 0.7211846709251404, + "step": 7469 + }, + { + "epoch": 1.7211981566820276, + "grad_norm": 1.1862135261022106, + "learning_rate": 1.0455718284680504e-07, + "loss": 0.8239504098892212, + "step": 7470 + }, + { + "epoch": 1.7214285714285715, + "grad_norm": 1.2304377847970827, + "learning_rate": 1.0438764277026579e-07, + "loss": 0.7492972612380981, + "step": 7471 + }, + { + "epoch": 1.7216589861751153, + "grad_norm": 1.3060072891774943, + "learning_rate": 1.0421823269067442e-07, + "loss": 0.7658303380012512, + "step": 7472 + }, + { + "epoch": 1.721889400921659, + "grad_norm": 1.0618950256674606, + "learning_rate": 1.0404895263262092e-07, + "loss": 0.708244800567627, + "step": 7473 + }, + { + "epoch": 1.7221198156682027, + "grad_norm": 1.1946101503339825, + "learning_rate": 1.0387980262067575e-07, + "loss": 0.7575969696044922, + "step": 7474 + }, + { + "epoch": 1.7223502304147464, + "grad_norm": 1.3899740319803422, + "learning_rate": 1.0371078267939082e-07, + "loss": 0.7321910262107849, + "step": 7475 + }, + { + "epoch": 1.7225806451612904, + "grad_norm": 1.3828231848460977, + "learning_rate": 1.035418928332995e-07, + "loss": 0.7812562584877014, + "step": 7476 + }, + { + "epoch": 1.7228110599078341, + "grad_norm": 1.3136112254743646, + "learning_rate": 1.0337313310691565e-07, + "loss": 0.7272104620933533, + "step": 7477 + }, + { + "epoch": 1.7230414746543778, + "grad_norm": 1.1508289944716614, + "learning_rate": 1.032045035247343e-07, + "loss": 0.7006442546844482, + "step": 7478 + }, + { + "epoch": 1.7232718894009218, + "grad_norm": 1.138231534813956, + "learning_rate": 1.0303600411123226e-07, + "loss": 0.7082154750823975, + "step": 7479 + }, + { + "epoch": 1.7235023041474653, + "grad_norm": 1.4157478972732351, + "learning_rate": 1.0286763489086681e-07, + "loss": 0.7204899191856384, + "step": 7480 + }, + { + "epoch": 1.7237327188940093, + "grad_norm": 1.1954797848768004, + "learning_rate": 1.026993958880763e-07, + "loss": 0.9119626879692078, + "step": 7481 + }, + { + "epoch": 1.723963133640553, + "grad_norm": 1.0923155592461768, + "learning_rate": 1.0253128712728088e-07, + "loss": 0.5961707830429077, + "step": 7482 + }, + { + "epoch": 1.7241935483870967, + "grad_norm": 1.1032837677908203, + "learning_rate": 1.023633086328809e-07, + "loss": 0.7469611167907715, + "step": 7483 + }, + { + "epoch": 1.7244239631336407, + "grad_norm": 1.2394445599695993, + "learning_rate": 1.0219546042925841e-07, + "loss": 0.8353795409202576, + "step": 7484 + }, + { + "epoch": 1.7246543778801844, + "grad_norm": 1.120589163159477, + "learning_rate": 1.0202774254077618e-07, + "loss": 0.6587873101234436, + "step": 7485 + }, + { + "epoch": 1.7248847926267281, + "grad_norm": 1.2182162589741892, + "learning_rate": 1.0186015499177847e-07, + "loss": 0.8595654964447021, + "step": 7486 + }, + { + "epoch": 1.7251152073732718, + "grad_norm": 1.0966229129393803, + "learning_rate": 1.0169269780659028e-07, + "loss": 0.7683298587799072, + "step": 7487 + }, + { + "epoch": 1.7253456221198156, + "grad_norm": 1.372358134101511, + "learning_rate": 1.0152537100951786e-07, + "loss": 0.888152003288269, + "step": 7488 + }, + { + "epoch": 1.7255760368663595, + "grad_norm": 1.1162191205168919, + "learning_rate": 1.013581746248482e-07, + "loss": 0.7835309505462646, + "step": 7489 + }, + { + "epoch": 1.7258064516129032, + "grad_norm": 1.4079534093347241, + "learning_rate": 1.0119110867684999e-07, + "loss": 0.9744646549224854, + "step": 7490 + }, + { + "epoch": 1.726036866359447, + "grad_norm": 1.109483043922066, + "learning_rate": 1.0102417318977251e-07, + "loss": 0.6842091083526611, + "step": 7491 + }, + { + "epoch": 1.726267281105991, + "grad_norm": 1.2357910065520838, + "learning_rate": 1.0085736818784607e-07, + "loss": 0.7435774207115173, + "step": 7492 + }, + { + "epoch": 1.7264976958525344, + "grad_norm": 1.3316804792215136, + "learning_rate": 1.0069069369528249e-07, + "loss": 0.8430237770080566, + "step": 7493 + }, + { + "epoch": 1.7267281105990784, + "grad_norm": 1.1766330255379311, + "learning_rate": 1.0052414973627421e-07, + "loss": 0.8203141689300537, + "step": 7494 + }, + { + "epoch": 1.726958525345622, + "grad_norm": 1.291685708783942, + "learning_rate": 1.0035773633499456e-07, + "loss": 0.7491584420204163, + "step": 7495 + }, + { + "epoch": 1.7271889400921658, + "grad_norm": 0.9475128549493947, + "learning_rate": 1.0019145351559876e-07, + "loss": 0.6738899946212769, + "step": 7496 + }, + { + "epoch": 1.7274193548387098, + "grad_norm": 1.4107090522911332, + "learning_rate": 1.0002530130222231e-07, + "loss": 0.8628265857696533, + "step": 7497 + }, + { + "epoch": 1.7276497695852533, + "grad_norm": 1.5650622568616335, + "learning_rate": 9.985927971898178e-08, + "loss": 1.0158125162124634, + "step": 7498 + }, + { + "epoch": 1.7278801843317972, + "grad_norm": 1.2981782537446935, + "learning_rate": 9.969338878997535e-08, + "loss": 0.7269070148468018, + "step": 7499 + }, + { + "epoch": 1.728110599078341, + "grad_norm": 1.3106792244331589, + "learning_rate": 9.952762853928165e-08, + "loss": 0.8769187927246094, + "step": 7500 + }, + { + "epoch": 1.7283410138248847, + "grad_norm": 1.325563750244826, + "learning_rate": 9.936199899096042e-08, + "loss": 0.7841119170188904, + "step": 7501 + }, + { + "epoch": 1.7285714285714286, + "grad_norm": 1.7907234255256992, + "learning_rate": 9.91965001690529e-08, + "loss": 0.9209425449371338, + "step": 7502 + }, + { + "epoch": 1.7288018433179724, + "grad_norm": 1.110414701934764, + "learning_rate": 9.903113209758096e-08, + "loss": 0.7795250415802002, + "step": 7503 + }, + { + "epoch": 1.729032258064516, + "grad_norm": 1.2158163264490913, + "learning_rate": 9.886589480054741e-08, + "loss": 0.7131094932556152, + "step": 7504 + }, + { + "epoch": 1.72926267281106, + "grad_norm": 1.167789931248441, + "learning_rate": 9.870078830193629e-08, + "loss": 0.8090137839317322, + "step": 7505 + }, + { + "epoch": 1.7294930875576036, + "grad_norm": 1.124104241227004, + "learning_rate": 9.853581262571231e-08, + "loss": 0.7797958850860596, + "step": 7506 + }, + { + "epoch": 1.7297235023041475, + "grad_norm": 1.3470491669984355, + "learning_rate": 9.83709677958221e-08, + "loss": 0.6927989721298218, + "step": 7507 + }, + { + "epoch": 1.7299539170506912, + "grad_norm": 1.152565458620573, + "learning_rate": 9.820625383619219e-08, + "loss": 0.8009092807769775, + "step": 7508 + }, + { + "epoch": 1.730184331797235, + "grad_norm": 1.0970285369996284, + "learning_rate": 9.804167077073056e-08, + "loss": 0.761864423751831, + "step": 7509 + }, + { + "epoch": 1.730414746543779, + "grad_norm": 1.5795757660336223, + "learning_rate": 9.787721862332654e-08, + "loss": 0.7459509372711182, + "step": 7510 + }, + { + "epoch": 1.7306451612903224, + "grad_norm": 1.0401744024243509, + "learning_rate": 9.771289741785005e-08, + "loss": 0.8216449022293091, + "step": 7511 + }, + { + "epoch": 1.7308755760368664, + "grad_norm": 1.3924364017238642, + "learning_rate": 9.754870717815177e-08, + "loss": 0.7860604524612427, + "step": 7512 + }, + { + "epoch": 1.73110599078341, + "grad_norm": 1.146706612325942, + "learning_rate": 9.738464792806422e-08, + "loss": 0.7727769613265991, + "step": 7513 + }, + { + "epoch": 1.7313364055299538, + "grad_norm": 1.2690787911964316, + "learning_rate": 9.722071969140011e-08, + "loss": 0.874458909034729, + "step": 7514 + }, + { + "epoch": 1.7315668202764978, + "grad_norm": 1.1530798069952481, + "learning_rate": 9.705692249195319e-08, + "loss": 0.840191125869751, + "step": 7515 + }, + { + "epoch": 1.7317972350230415, + "grad_norm": 1.1387350117516357, + "learning_rate": 9.689325635349877e-08, + "loss": 0.7169238924980164, + "step": 7516 + }, + { + "epoch": 1.7320276497695852, + "grad_norm": 1.2478630540284088, + "learning_rate": 9.672972129979273e-08, + "loss": 0.7554492950439453, + "step": 7517 + }, + { + "epoch": 1.7322580645161292, + "grad_norm": 1.2166706454141942, + "learning_rate": 9.656631735457154e-08, + "loss": 0.5734076499938965, + "step": 7518 + }, + { + "epoch": 1.7324884792626727, + "grad_norm": 1.5466370383298045, + "learning_rate": 9.640304454155369e-08, + "loss": 0.7867637872695923, + "step": 7519 + }, + { + "epoch": 1.7327188940092166, + "grad_norm": 1.2704443586099365, + "learning_rate": 9.623990288443773e-08, + "loss": 0.7330230474472046, + "step": 7520 + }, + { + "epoch": 1.7329493087557604, + "grad_norm": 1.1352922714992866, + "learning_rate": 9.607689240690319e-08, + "loss": 0.7880058288574219, + "step": 7521 + }, + { + "epoch": 1.733179723502304, + "grad_norm": 1.0605191939295662, + "learning_rate": 9.591401313261139e-08, + "loss": 0.796575665473938, + "step": 7522 + }, + { + "epoch": 1.733410138248848, + "grad_norm": 1.4376273040997398, + "learning_rate": 9.575126508520359e-08, + "loss": 0.8101698160171509, + "step": 7523 + }, + { + "epoch": 1.7336405529953915, + "grad_norm": 1.0868433692155355, + "learning_rate": 9.55886482883026e-08, + "loss": 0.7811597585678101, + "step": 7524 + }, + { + "epoch": 1.7338709677419355, + "grad_norm": 1.1754841201094306, + "learning_rate": 9.542616276551208e-08, + "loss": 0.7680011987686157, + "step": 7525 + }, + { + "epoch": 1.7341013824884792, + "grad_norm": 1.3670730603232781, + "learning_rate": 9.526380854041638e-08, + "loss": 0.8018794059753418, + "step": 7526 + }, + { + "epoch": 1.734331797235023, + "grad_norm": 1.1232468645544793, + "learning_rate": 9.510158563658133e-08, + "loss": 0.7770500183105469, + "step": 7527 + }, + { + "epoch": 1.734562211981567, + "grad_norm": 1.1848169541071576, + "learning_rate": 9.493949407755309e-08, + "loss": 0.7622300982475281, + "step": 7528 + }, + { + "epoch": 1.7347926267281106, + "grad_norm": 1.5281654640943847, + "learning_rate": 9.477753388685928e-08, + "loss": 0.831570029258728, + "step": 7529 + }, + { + "epoch": 1.7350230414746544, + "grad_norm": 1.1599086861943149, + "learning_rate": 9.461570508800776e-08, + "loss": 0.7987254858016968, + "step": 7530 + }, + { + "epoch": 1.7352534562211983, + "grad_norm": 1.2752040500202788, + "learning_rate": 9.44540077044883e-08, + "loss": 0.8219848275184631, + "step": 7531 + }, + { + "epoch": 1.7354838709677418, + "grad_norm": 1.298736989691398, + "learning_rate": 9.429244175977092e-08, + "loss": 0.8273369073867798, + "step": 7532 + }, + { + "epoch": 1.7357142857142858, + "grad_norm": 1.2555474610105797, + "learning_rate": 9.413100727730628e-08, + "loss": 0.8241056203842163, + "step": 7533 + }, + { + "epoch": 1.7359447004608295, + "grad_norm": 1.4118150886368108, + "learning_rate": 9.396970428052697e-08, + "loss": 0.6880715489387512, + "step": 7534 + }, + { + "epoch": 1.7361751152073732, + "grad_norm": 1.092011806345561, + "learning_rate": 9.380853279284551e-08, + "loss": 0.7355446815490723, + "step": 7535 + }, + { + "epoch": 1.7364055299539172, + "grad_norm": 1.2700711725839655, + "learning_rate": 9.364749283765604e-08, + "loss": 0.8835841417312622, + "step": 7536 + }, + { + "epoch": 1.7366359447004607, + "grad_norm": 1.1984936737610834, + "learning_rate": 9.348658443833313e-08, + "loss": 0.80763840675354, + "step": 7537 + }, + { + "epoch": 1.7368663594470046, + "grad_norm": 1.2855970061631397, + "learning_rate": 9.332580761823227e-08, + "loss": 0.7473145723342896, + "step": 7538 + }, + { + "epoch": 1.7370967741935484, + "grad_norm": 1.2970951445867331, + "learning_rate": 9.316516240069028e-08, + "loss": 0.6618188619613647, + "step": 7539 + }, + { + "epoch": 1.737327188940092, + "grad_norm": 1.3396426049949766, + "learning_rate": 9.300464880902447e-08, + "loss": 0.7432928085327148, + "step": 7540 + }, + { + "epoch": 1.737557603686636, + "grad_norm": 1.1659381023507147, + "learning_rate": 9.284426686653302e-08, + "loss": 0.7915963530540466, + "step": 7541 + }, + { + "epoch": 1.7377880184331798, + "grad_norm": 1.1552275821682043, + "learning_rate": 9.26840165964955e-08, + "loss": 0.6428440809249878, + "step": 7542 + }, + { + "epoch": 1.7380184331797235, + "grad_norm": 1.1399241166482426, + "learning_rate": 9.252389802217187e-08, + "loss": 0.7142912149429321, + "step": 7543 + }, + { + "epoch": 1.7382488479262674, + "grad_norm": 1.316337246157137, + "learning_rate": 9.236391116680309e-08, + "loss": 0.878044605255127, + "step": 7544 + }, + { + "epoch": 1.738479262672811, + "grad_norm": 1.089416476430598, + "learning_rate": 9.220405605361103e-08, + "loss": 0.6861810684204102, + "step": 7545 + }, + { + "epoch": 1.738709677419355, + "grad_norm": 1.3890455529154517, + "learning_rate": 9.204433270579825e-08, + "loss": 0.7638171911239624, + "step": 7546 + }, + { + "epoch": 1.7389400921658986, + "grad_norm": 1.1532660265349828, + "learning_rate": 9.188474114654876e-08, + "loss": 0.7149873971939087, + "step": 7547 + }, + { + "epoch": 1.7391705069124423, + "grad_norm": 1.1783502444227563, + "learning_rate": 9.172528139902703e-08, + "loss": 0.7249442338943481, + "step": 7548 + }, + { + "epoch": 1.7394009216589863, + "grad_norm": 1.178650320628679, + "learning_rate": 9.156595348637819e-08, + "loss": 0.6846513748168945, + "step": 7549 + }, + { + "epoch": 1.7396313364055298, + "grad_norm": 1.4706201914955974, + "learning_rate": 9.140675743172843e-08, + "loss": 0.9332281351089478, + "step": 7550 + }, + { + "epoch": 1.7398617511520738, + "grad_norm": 1.1835891939139382, + "learning_rate": 9.124769325818526e-08, + "loss": 0.6878118515014648, + "step": 7551 + }, + { + "epoch": 1.7400921658986175, + "grad_norm": 1.077038469987993, + "learning_rate": 9.108876098883633e-08, + "loss": 0.7695426344871521, + "step": 7552 + }, + { + "epoch": 1.7403225806451612, + "grad_norm": 1.3278288479360603, + "learning_rate": 9.09299606467503e-08, + "loss": 0.7983303666114807, + "step": 7553 + }, + { + "epoch": 1.7405529953917052, + "grad_norm": 1.4656214059917094, + "learning_rate": 9.077129225497726e-08, + "loss": 0.8158761262893677, + "step": 7554 + }, + { + "epoch": 1.7407834101382489, + "grad_norm": 1.1519947124673093, + "learning_rate": 9.061275583654748e-08, + "loss": 0.8064214587211609, + "step": 7555 + }, + { + "epoch": 1.7410138248847926, + "grad_norm": 1.2545881332280804, + "learning_rate": 9.045435141447211e-08, + "loss": 0.9058080911636353, + "step": 7556 + }, + { + "epoch": 1.7412442396313366, + "grad_norm": 1.213639501339424, + "learning_rate": 9.029607901174374e-08, + "loss": 0.7392270565032959, + "step": 7557 + }, + { + "epoch": 1.74147465437788, + "grad_norm": 1.0453486445607982, + "learning_rate": 9.013793865133501e-08, + "loss": 0.7114729881286621, + "step": 7558 + }, + { + "epoch": 1.741705069124424, + "grad_norm": 1.2302263811033798, + "learning_rate": 8.997993035620022e-08, + "loss": 0.8675493597984314, + "step": 7559 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.9934561818451934, + "learning_rate": 8.98220541492738e-08, + "loss": 0.8103020191192627, + "step": 7560 + }, + { + "epoch": 1.7421658986175115, + "grad_norm": 1.2538115734834285, + "learning_rate": 8.966431005347109e-08, + "loss": 0.7339279651641846, + "step": 7561 + }, + { + "epoch": 1.7423963133640554, + "grad_norm": 1.3510829475373114, + "learning_rate": 8.950669809168887e-08, + "loss": 0.6971707344055176, + "step": 7562 + }, + { + "epoch": 1.742626728110599, + "grad_norm": 1.105458403928542, + "learning_rate": 8.934921828680408e-08, + "loss": 0.8633124232292175, + "step": 7563 + }, + { + "epoch": 1.7428571428571429, + "grad_norm": 1.3082830118219664, + "learning_rate": 8.919187066167466e-08, + "loss": 0.7704664468765259, + "step": 7564 + }, + { + "epoch": 1.7430875576036866, + "grad_norm": 1.1782653714880955, + "learning_rate": 8.903465523913955e-08, + "loss": 0.7063533067703247, + "step": 7565 + }, + { + "epoch": 1.7433179723502303, + "grad_norm": 1.1177210535700517, + "learning_rate": 8.887757204201817e-08, + "loss": 0.7094486951828003, + "step": 7566 + }, + { + "epoch": 1.7435483870967743, + "grad_norm": 1.4575572123890834, + "learning_rate": 8.872062109311096e-08, + "loss": 0.8743780255317688, + "step": 7567 + }, + { + "epoch": 1.743778801843318, + "grad_norm": 1.5827740898240907, + "learning_rate": 8.856380241519935e-08, + "loss": 0.7282687425613403, + "step": 7568 + }, + { + "epoch": 1.7440092165898617, + "grad_norm": 1.105316538989134, + "learning_rate": 8.840711603104523e-08, + "loss": 0.7507487535476685, + "step": 7569 + }, + { + "epoch": 1.7442396313364057, + "grad_norm": 1.2820028807325874, + "learning_rate": 8.82505619633912e-08, + "loss": 0.807691216468811, + "step": 7570 + }, + { + "epoch": 1.7444700460829492, + "grad_norm": 1.3537034886290398, + "learning_rate": 8.809414023496142e-08, + "loss": 0.8650702238082886, + "step": 7571 + }, + { + "epoch": 1.7447004608294931, + "grad_norm": 0.9602033366804331, + "learning_rate": 8.793785086845984e-08, + "loss": 0.6872273683547974, + "step": 7572 + }, + { + "epoch": 1.7449308755760369, + "grad_norm": 1.0979215212634434, + "learning_rate": 8.778169388657163e-08, + "loss": 0.7242698669433594, + "step": 7573 + }, + { + "epoch": 1.7451612903225806, + "grad_norm": 1.0962988735603825, + "learning_rate": 8.762566931196313e-08, + "loss": 0.741705060005188, + "step": 7574 + }, + { + "epoch": 1.7453917050691246, + "grad_norm": 1.06231801843056, + "learning_rate": 8.746977716728099e-08, + "loss": 0.7293061017990112, + "step": 7575 + }, + { + "epoch": 1.745622119815668, + "grad_norm": 1.0145801945512316, + "learning_rate": 8.731401747515244e-08, + "loss": 0.8385475277900696, + "step": 7576 + }, + { + "epoch": 1.745852534562212, + "grad_norm": 1.4891647422185605, + "learning_rate": 8.715839025818617e-08, + "loss": 0.8484489917755127, + "step": 7577 + }, + { + "epoch": 1.7460829493087557, + "grad_norm": 1.1930293813449155, + "learning_rate": 8.7002895538971e-08, + "loss": 0.6511530876159668, + "step": 7578 + }, + { + "epoch": 1.7463133640552995, + "grad_norm": 1.4360732745608953, + "learning_rate": 8.684753334007688e-08, + "loss": 0.8274673223495483, + "step": 7579 + }, + { + "epoch": 1.7465437788018434, + "grad_norm": 1.081237944644138, + "learning_rate": 8.669230368405456e-08, + "loss": 0.7367755174636841, + "step": 7580 + }, + { + "epoch": 1.7467741935483871, + "grad_norm": 1.2748877435171337, + "learning_rate": 8.653720659343522e-08, + "loss": 0.80199134349823, + "step": 7581 + }, + { + "epoch": 1.7470046082949309, + "grad_norm": 1.1988639104811598, + "learning_rate": 8.638224209073097e-08, + "loss": 0.7782701253890991, + "step": 7582 + }, + { + "epoch": 1.7472350230414746, + "grad_norm": 1.3660035419508034, + "learning_rate": 8.622741019843504e-08, + "loss": 0.7613752484321594, + "step": 7583 + }, + { + "epoch": 1.7474654377880183, + "grad_norm": 1.3599194483251544, + "learning_rate": 8.60727109390208e-08, + "loss": 0.8213690519332886, + "step": 7584 + }, + { + "epoch": 1.7476958525345623, + "grad_norm": 1.1411507368613496, + "learning_rate": 8.59181443349426e-08, + "loss": 0.7064045667648315, + "step": 7585 + }, + { + "epoch": 1.747926267281106, + "grad_norm": 1.1189241999598565, + "learning_rate": 8.576371040863573e-08, + "loss": 0.6686617136001587, + "step": 7586 + }, + { + "epoch": 1.7481566820276497, + "grad_norm": 1.0194951619872286, + "learning_rate": 8.560940918251592e-08, + "loss": 0.7520097494125366, + "step": 7587 + }, + { + "epoch": 1.7483870967741937, + "grad_norm": 1.0822685191965165, + "learning_rate": 8.545524067897991e-08, + "loss": 0.8176038265228271, + "step": 7588 + }, + { + "epoch": 1.7486175115207372, + "grad_norm": 1.3408318725531652, + "learning_rate": 8.530120492040505e-08, + "loss": 0.6680614948272705, + "step": 7589 + }, + { + "epoch": 1.7488479262672811, + "grad_norm": 1.3621846138568519, + "learning_rate": 8.514730192914921e-08, + "loss": 0.7421592473983765, + "step": 7590 + }, + { + "epoch": 1.7490783410138249, + "grad_norm": 1.2822263575200588, + "learning_rate": 8.499353172755164e-08, + "loss": 0.8869342803955078, + "step": 7591 + }, + { + "epoch": 1.7493087557603686, + "grad_norm": 1.1206823186662898, + "learning_rate": 8.48398943379316e-08, + "loss": 0.6850584745407104, + "step": 7592 + }, + { + "epoch": 1.7495391705069125, + "grad_norm": 1.0932592535391596, + "learning_rate": 8.468638978258914e-08, + "loss": 0.7433363199234009, + "step": 7593 + }, + { + "epoch": 1.7497695852534563, + "grad_norm": 1.0269953798613225, + "learning_rate": 8.453301808380564e-08, + "loss": 0.7744357585906982, + "step": 7594 + }, + { + "epoch": 1.75, + "grad_norm": 1.382126107142446, + "learning_rate": 8.437977926384277e-08, + "loss": 0.8236217498779297, + "step": 7595 + }, + { + "epoch": 1.7502304147465437, + "grad_norm": 1.3329245666066865, + "learning_rate": 8.422667334494249e-08, + "loss": 0.8552603721618652, + "step": 7596 + }, + { + "epoch": 1.7504608294930875, + "grad_norm": 1.4100651978644374, + "learning_rate": 8.407370034932859e-08, + "loss": 0.7755998373031616, + "step": 7597 + }, + { + "epoch": 1.7506912442396314, + "grad_norm": 1.3033243035055457, + "learning_rate": 8.392086029920442e-08, + "loss": 0.8105130195617676, + "step": 7598 + }, + { + "epoch": 1.7509216589861751, + "grad_norm": 1.290928258750675, + "learning_rate": 8.376815321675457e-08, + "loss": 0.8787405490875244, + "step": 7599 + }, + { + "epoch": 1.7511520737327189, + "grad_norm": 1.1296910155342912, + "learning_rate": 8.361557912414441e-08, + "loss": 0.6107788681983948, + "step": 7600 + }, + { + "epoch": 1.7513824884792628, + "grad_norm": 0.9941949428855014, + "learning_rate": 8.34631380435199e-08, + "loss": 0.6825795769691467, + "step": 7601 + }, + { + "epoch": 1.7516129032258063, + "grad_norm": 1.5141115638242784, + "learning_rate": 8.331082999700734e-08, + "loss": 0.7069272994995117, + "step": 7602 + }, + { + "epoch": 1.7518433179723503, + "grad_norm": 1.5687921139560086, + "learning_rate": 8.315865500671449e-08, + "loss": 0.7784801721572876, + "step": 7603 + }, + { + "epoch": 1.752073732718894, + "grad_norm": 1.0771300382051838, + "learning_rate": 8.300661309472912e-08, + "loss": 0.7653795480728149, + "step": 7604 + }, + { + "epoch": 1.7523041474654377, + "grad_norm": 1.5582480598587298, + "learning_rate": 8.285470428311991e-08, + "loss": 0.7386122941970825, + "step": 7605 + }, + { + "epoch": 1.7525345622119817, + "grad_norm": 0.9515219540238303, + "learning_rate": 8.270292859393613e-08, + "loss": 0.7828700542449951, + "step": 7606 + }, + { + "epoch": 1.7527649769585254, + "grad_norm": 1.5500733851956912, + "learning_rate": 8.255128604920792e-08, + "loss": 0.8955565094947815, + "step": 7607 + }, + { + "epoch": 1.7529953917050691, + "grad_norm": 1.2505809950313513, + "learning_rate": 8.2399776670946e-08, + "loss": 0.9071576595306396, + "step": 7608 + }, + { + "epoch": 1.7532258064516129, + "grad_norm": 1.3402860152327503, + "learning_rate": 8.22484004811419e-08, + "loss": 0.752417802810669, + "step": 7609 + }, + { + "epoch": 1.7534562211981566, + "grad_norm": 1.367440429282924, + "learning_rate": 8.209715750176727e-08, + "loss": 0.8611370325088501, + "step": 7610 + }, + { + "epoch": 1.7536866359447005, + "grad_norm": 1.232351895452084, + "learning_rate": 8.19460477547752e-08, + "loss": 0.745223879814148, + "step": 7611 + }, + { + "epoch": 1.7539170506912443, + "grad_norm": 1.0415704016806513, + "learning_rate": 8.179507126209906e-08, + "loss": 0.7799668908119202, + "step": 7612 + }, + { + "epoch": 1.754147465437788, + "grad_norm": 1.3761849870920217, + "learning_rate": 8.164422804565263e-08, + "loss": 0.8177207708358765, + "step": 7613 + }, + { + "epoch": 1.754377880184332, + "grad_norm": 1.2017347256018391, + "learning_rate": 8.149351812733085e-08, + "loss": 0.7111436128616333, + "step": 7614 + }, + { + "epoch": 1.7546082949308754, + "grad_norm": 1.2253776843179969, + "learning_rate": 8.1342941529009e-08, + "loss": 0.6840728521347046, + "step": 7615 + }, + { + "epoch": 1.7548387096774194, + "grad_norm": 1.02983629791633, + "learning_rate": 8.119249827254281e-08, + "loss": 0.6115491986274719, + "step": 7616 + }, + { + "epoch": 1.7550691244239631, + "grad_norm": 1.3870391302655596, + "learning_rate": 8.104218837976939e-08, + "loss": 0.7149351239204407, + "step": 7617 + }, + { + "epoch": 1.7552995391705069, + "grad_norm": 1.2174150358988711, + "learning_rate": 8.089201187250571e-08, + "loss": 0.688147783279419, + "step": 7618 + }, + { + "epoch": 1.7555299539170508, + "grad_norm": 1.2630937737290178, + "learning_rate": 8.074196877254969e-08, + "loss": 0.8092058300971985, + "step": 7619 + }, + { + "epoch": 1.7557603686635943, + "grad_norm": 0.8375696110242734, + "learning_rate": 8.05920591016801e-08, + "loss": 0.7375935912132263, + "step": 7620 + }, + { + "epoch": 1.7559907834101383, + "grad_norm": 1.1868565460321117, + "learning_rate": 8.044228288165599e-08, + "loss": 0.6793934106826782, + "step": 7621 + }, + { + "epoch": 1.756221198156682, + "grad_norm": 1.2102446264436708, + "learning_rate": 8.0292640134217e-08, + "loss": 0.7395757436752319, + "step": 7622 + }, + { + "epoch": 1.7564516129032257, + "grad_norm": 0.9259939168277553, + "learning_rate": 8.014313088108394e-08, + "loss": 0.546409010887146, + "step": 7623 + }, + { + "epoch": 1.7566820276497697, + "grad_norm": 1.4575552468425101, + "learning_rate": 7.999375514395778e-08, + "loss": 0.7790534496307373, + "step": 7624 + }, + { + "epoch": 1.7569124423963134, + "grad_norm": 1.0896798964233478, + "learning_rate": 7.984451294452e-08, + "loss": 0.7398231625556946, + "step": 7625 + }, + { + "epoch": 1.7571428571428571, + "grad_norm": 1.2623646343227142, + "learning_rate": 7.969540430443311e-08, + "loss": 0.7414441108703613, + "step": 7626 + }, + { + "epoch": 1.757373271889401, + "grad_norm": 1.1312110923091452, + "learning_rate": 7.954642924533994e-08, + "loss": 0.7548750638961792, + "step": 7627 + }, + { + "epoch": 1.7576036866359446, + "grad_norm": 0.957909042850816, + "learning_rate": 7.939758778886385e-08, + "loss": 0.7546773552894592, + "step": 7628 + }, + { + "epoch": 1.7578341013824885, + "grad_norm": 1.1252175485529645, + "learning_rate": 7.924887995660945e-08, + "loss": 0.7373867630958557, + "step": 7629 + }, + { + "epoch": 1.7580645161290323, + "grad_norm": 0.9815120449405607, + "learning_rate": 7.910030577016113e-08, + "loss": 0.7271026968955994, + "step": 7630 + }, + { + "epoch": 1.758294930875576, + "grad_norm": 1.3179911972781693, + "learning_rate": 7.89518652510841e-08, + "loss": 0.8723413944244385, + "step": 7631 + }, + { + "epoch": 1.75852534562212, + "grad_norm": 1.3060473211580457, + "learning_rate": 7.880355842092468e-08, + "loss": 0.8282548189163208, + "step": 7632 + }, + { + "epoch": 1.7587557603686634, + "grad_norm": 1.1089249458958528, + "learning_rate": 7.865538530120918e-08, + "loss": 0.7436991930007935, + "step": 7633 + }, + { + "epoch": 1.7589861751152074, + "grad_norm": 1.0884201833829175, + "learning_rate": 7.850734591344488e-08, + "loss": 0.7750650644302368, + "step": 7634 + }, + { + "epoch": 1.7592165898617511, + "grad_norm": 1.1544057740235625, + "learning_rate": 7.835944027911957e-08, + "loss": 0.6824958324432373, + "step": 7635 + }, + { + "epoch": 1.7594470046082948, + "grad_norm": 1.1607504467923393, + "learning_rate": 7.821166841970107e-08, + "loss": 0.8500322103500366, + "step": 7636 + }, + { + "epoch": 1.7596774193548388, + "grad_norm": 1.3527797330475602, + "learning_rate": 7.806403035663889e-08, + "loss": 0.7111128568649292, + "step": 7637 + }, + { + "epoch": 1.7599078341013825, + "grad_norm": 1.1877365592337052, + "learning_rate": 7.791652611136212e-08, + "loss": 0.7320532202720642, + "step": 7638 + }, + { + "epoch": 1.7601382488479262, + "grad_norm": 1.2292449607917257, + "learning_rate": 7.776915570528076e-08, + "loss": 0.8439149856567383, + "step": 7639 + }, + { + "epoch": 1.7603686635944702, + "grad_norm": 1.0358127598823044, + "learning_rate": 7.762191915978578e-08, + "loss": 0.6489125490188599, + "step": 7640 + }, + { + "epoch": 1.7605990783410137, + "grad_norm": 1.0601387517448153, + "learning_rate": 7.74748164962482e-08, + "loss": 0.8118616342544556, + "step": 7641 + }, + { + "epoch": 1.7608294930875577, + "grad_norm": 1.0728808336951121, + "learning_rate": 7.732784773601953e-08, + "loss": 0.7528102397918701, + "step": 7642 + }, + { + "epoch": 1.7610599078341014, + "grad_norm": 1.0747138773482348, + "learning_rate": 7.718101290043244e-08, + "loss": 0.7856849431991577, + "step": 7643 + }, + { + "epoch": 1.761290322580645, + "grad_norm": 1.3272410706900097, + "learning_rate": 7.703431201079969e-08, + "loss": 0.8001973628997803, + "step": 7644 + }, + { + "epoch": 1.761520737327189, + "grad_norm": 1.2721768499077395, + "learning_rate": 7.688774508841478e-08, + "loss": 0.8332471251487732, + "step": 7645 + }, + { + "epoch": 1.7617511520737326, + "grad_norm": 1.2453092502270793, + "learning_rate": 7.67413121545516e-08, + "loss": 0.7534940242767334, + "step": 7646 + }, + { + "epoch": 1.7619815668202765, + "grad_norm": 1.1709183629535718, + "learning_rate": 7.65950132304647e-08, + "loss": 0.737503170967102, + "step": 7647 + }, + { + "epoch": 1.7622119815668202, + "grad_norm": 1.0899482570396566, + "learning_rate": 7.644884833738896e-08, + "loss": 0.7407201528549194, + "step": 7648 + }, + { + "epoch": 1.762442396313364, + "grad_norm": 1.0059187913201149, + "learning_rate": 7.630281749654055e-08, + "loss": 0.7589377164840698, + "step": 7649 + }, + { + "epoch": 1.762672811059908, + "grad_norm": 1.2624576054967966, + "learning_rate": 7.615692072911528e-08, + "loss": 0.6586496829986572, + "step": 7650 + }, + { + "epoch": 1.7629032258064516, + "grad_norm": 1.1572434060771926, + "learning_rate": 7.601115805628977e-08, + "loss": 0.705591082572937, + "step": 7651 + }, + { + "epoch": 1.7631336405529954, + "grad_norm": 1.2006722437863475, + "learning_rate": 7.586552949922176e-08, + "loss": 0.7889619469642639, + "step": 7652 + }, + { + "epoch": 1.7633640552995393, + "grad_norm": 1.0348577197525213, + "learning_rate": 7.572003507904868e-08, + "loss": 0.6912282705307007, + "step": 7653 + }, + { + "epoch": 1.7635944700460828, + "grad_norm": 1.1101374555344716, + "learning_rate": 7.557467481688873e-08, + "loss": 0.7374964952468872, + "step": 7654 + }, + { + "epoch": 1.7638248847926268, + "grad_norm": 1.1479262514291408, + "learning_rate": 7.542944873384105e-08, + "loss": 0.7302298545837402, + "step": 7655 + }, + { + "epoch": 1.7640552995391705, + "grad_norm": 1.2653276061660264, + "learning_rate": 7.5284356850985e-08, + "loss": 0.8323671817779541, + "step": 7656 + }, + { + "epoch": 1.7642857142857142, + "grad_norm": 1.0548505840987745, + "learning_rate": 7.513939918938028e-08, + "loss": 0.6654655933380127, + "step": 7657 + }, + { + "epoch": 1.7645161290322582, + "grad_norm": 1.1231001283574193, + "learning_rate": 7.499457577006751e-08, + "loss": 0.6371186375617981, + "step": 7658 + }, + { + "epoch": 1.7647465437788017, + "grad_norm": 1.3299088323872645, + "learning_rate": 7.484988661406733e-08, + "loss": 0.7761695384979248, + "step": 7659 + }, + { + "epoch": 1.7649769585253456, + "grad_norm": 1.1268786347378037, + "learning_rate": 7.470533174238158e-08, + "loss": 0.779335618019104, + "step": 7660 + }, + { + "epoch": 1.7652073732718894, + "grad_norm": 1.26329747548588, + "learning_rate": 7.456091117599195e-08, + "loss": 0.7642731666564941, + "step": 7661 + }, + { + "epoch": 1.765437788018433, + "grad_norm": 1.417392503393573, + "learning_rate": 7.441662493586076e-08, + "loss": 0.7490801215171814, + "step": 7662 + }, + { + "epoch": 1.765668202764977, + "grad_norm": 1.6109060172749883, + "learning_rate": 7.427247304293139e-08, + "loss": 0.9480686187744141, + "step": 7663 + }, + { + "epoch": 1.7658986175115208, + "grad_norm": 1.243245001745715, + "learning_rate": 7.412845551812707e-08, + "loss": 0.6208070516586304, + "step": 7664 + }, + { + "epoch": 1.7661290322580645, + "grad_norm": 1.2606477635417679, + "learning_rate": 7.398457238235167e-08, + "loss": 0.7782050371170044, + "step": 7665 + }, + { + "epoch": 1.7663594470046085, + "grad_norm": 1.1494295384377444, + "learning_rate": 7.38408236564897e-08, + "loss": 0.6725378632545471, + "step": 7666 + }, + { + "epoch": 1.766589861751152, + "grad_norm": 1.4030647180836417, + "learning_rate": 7.369720936140611e-08, + "loss": 0.8247120380401611, + "step": 7667 + }, + { + "epoch": 1.766820276497696, + "grad_norm": 1.2966757041323174, + "learning_rate": 7.355372951794614e-08, + "loss": 0.7866288423538208, + "step": 7668 + }, + { + "epoch": 1.7670506912442396, + "grad_norm": 1.5029385474750363, + "learning_rate": 7.341038414693613e-08, + "loss": 0.8096400499343872, + "step": 7669 + }, + { + "epoch": 1.7672811059907834, + "grad_norm": 1.5152361583075085, + "learning_rate": 7.326717326918208e-08, + "loss": 0.7799873352050781, + "step": 7670 + }, + { + "epoch": 1.7675115207373273, + "grad_norm": 1.0568101452951337, + "learning_rate": 7.312409690547095e-08, + "loss": 0.809285044670105, + "step": 7671 + }, + { + "epoch": 1.7677419354838708, + "grad_norm": 1.351048640166805, + "learning_rate": 7.298115507657021e-08, + "loss": 0.874248743057251, + "step": 7672 + }, + { + "epoch": 1.7679723502304148, + "grad_norm": 1.1594085684678137, + "learning_rate": 7.283834780322761e-08, + "loss": 0.7418022155761719, + "step": 7673 + }, + { + "epoch": 1.7682027649769585, + "grad_norm": 1.2895302232300179, + "learning_rate": 7.269567510617126e-08, + "loss": 0.720660388469696, + "step": 7674 + }, + { + "epoch": 1.7684331797235022, + "grad_norm": 1.241628438381412, + "learning_rate": 7.255313700611032e-08, + "loss": 0.7655429840087891, + "step": 7675 + }, + { + "epoch": 1.7686635944700462, + "grad_norm": 1.125747625986026, + "learning_rate": 7.241073352373361e-08, + "loss": 0.7303705215454102, + "step": 7676 + }, + { + "epoch": 1.76889400921659, + "grad_norm": 1.1695690935051566, + "learning_rate": 7.226846467971093e-08, + "loss": 0.7997909188270569, + "step": 7677 + }, + { + "epoch": 1.7691244239631336, + "grad_norm": 1.261135372954414, + "learning_rate": 7.212633049469264e-08, + "loss": 0.6546763181686401, + "step": 7678 + }, + { + "epoch": 1.7693548387096776, + "grad_norm": 0.9669222373383191, + "learning_rate": 7.1984330989309e-08, + "loss": 0.6374444961547852, + "step": 7679 + }, + { + "epoch": 1.769585253456221, + "grad_norm": 1.2966171484977755, + "learning_rate": 7.184246618417111e-08, + "loss": 0.7092937231063843, + "step": 7680 + }, + { + "epoch": 1.769815668202765, + "grad_norm": 1.3237517845156634, + "learning_rate": 7.17007360998706e-08, + "loss": 0.7702305316925049, + "step": 7681 + }, + { + "epoch": 1.7700460829493088, + "grad_norm": 0.978090031115468, + "learning_rate": 7.155914075697933e-08, + "loss": 0.7763724327087402, + "step": 7682 + }, + { + "epoch": 1.7702764976958525, + "grad_norm": 0.9935287090208255, + "learning_rate": 7.141768017604966e-08, + "loss": 0.6409577131271362, + "step": 7683 + }, + { + "epoch": 1.7705069124423964, + "grad_norm": 1.2265488041489598, + "learning_rate": 7.127635437761459e-08, + "loss": 0.7500795125961304, + "step": 7684 + }, + { + "epoch": 1.77073732718894, + "grad_norm": 1.405023681248552, + "learning_rate": 7.113516338218717e-08, + "loss": 0.7312004566192627, + "step": 7685 + }, + { + "epoch": 1.770967741935484, + "grad_norm": 0.910138776962328, + "learning_rate": 7.099410721026112e-08, + "loss": 0.823514997959137, + "step": 7686 + }, + { + "epoch": 1.7711981566820276, + "grad_norm": 1.4146285511420962, + "learning_rate": 7.085318588231048e-08, + "loss": 0.9504063129425049, + "step": 7687 + }, + { + "epoch": 1.7714285714285714, + "grad_norm": 0.8614868773221174, + "learning_rate": 7.071239941878981e-08, + "loss": 0.7850733399391174, + "step": 7688 + }, + { + "epoch": 1.7716589861751153, + "grad_norm": 1.356738665999072, + "learning_rate": 7.057174784013431e-08, + "loss": 0.9447094798088074, + "step": 7689 + }, + { + "epoch": 1.771889400921659, + "grad_norm": 1.134179637006652, + "learning_rate": 7.04312311667592e-08, + "loss": 0.6675062775611877, + "step": 7690 + }, + { + "epoch": 1.7721198156682028, + "grad_norm": 0.9395193655643466, + "learning_rate": 7.029084941906005e-08, + "loss": 0.6875232458114624, + "step": 7691 + }, + { + "epoch": 1.7723502304147467, + "grad_norm": 1.3573723926231736, + "learning_rate": 7.015060261741357e-08, + "loss": 0.7847919464111328, + "step": 7692 + }, + { + "epoch": 1.7725806451612902, + "grad_norm": 1.300014614678359, + "learning_rate": 7.001049078217613e-08, + "loss": 0.7924584150314331, + "step": 7693 + }, + { + "epoch": 1.7728110599078342, + "grad_norm": 1.4499718780004744, + "learning_rate": 6.987051393368471e-08, + "loss": 0.8802344799041748, + "step": 7694 + }, + { + "epoch": 1.773041474654378, + "grad_norm": 1.425988233405148, + "learning_rate": 6.973067209225692e-08, + "loss": 0.7038631439208984, + "step": 7695 + }, + { + "epoch": 1.7732718894009216, + "grad_norm": 1.1226859696380713, + "learning_rate": 6.959096527819064e-08, + "loss": 0.9016700387001038, + "step": 7696 + }, + { + "epoch": 1.7735023041474656, + "grad_norm": 1.1967072079572705, + "learning_rate": 6.945139351176387e-08, + "loss": 0.7678165435791016, + "step": 7697 + }, + { + "epoch": 1.773732718894009, + "grad_norm": 1.1001980127511188, + "learning_rate": 6.931195681323565e-08, + "loss": 0.6612143516540527, + "step": 7698 + }, + { + "epoch": 1.773963133640553, + "grad_norm": 1.3968871696274494, + "learning_rate": 6.917265520284476e-08, + "loss": 0.840233325958252, + "step": 7699 + }, + { + "epoch": 1.7741935483870968, + "grad_norm": 1.3698339080168875, + "learning_rate": 6.90334887008106e-08, + "loss": 0.7913506031036377, + "step": 7700 + }, + { + "epoch": 1.7744239631336405, + "grad_norm": 1.3434994536689218, + "learning_rate": 6.889445732733323e-08, + "loss": 0.7523634433746338, + "step": 7701 + }, + { + "epoch": 1.7746543778801844, + "grad_norm": 1.1357027982798495, + "learning_rate": 6.875556110259273e-08, + "loss": 0.7009792327880859, + "step": 7702 + }, + { + "epoch": 1.7748847926267282, + "grad_norm": 0.9926018792518734, + "learning_rate": 6.861680004674963e-08, + "loss": 0.6533738970756531, + "step": 7703 + }, + { + "epoch": 1.7751152073732719, + "grad_norm": 1.0969556014291875, + "learning_rate": 6.847817417994517e-08, + "loss": 0.860493540763855, + "step": 7704 + }, + { + "epoch": 1.7753456221198156, + "grad_norm": 1.3425565367947665, + "learning_rate": 6.833968352230057e-08, + "loss": 0.810010552406311, + "step": 7705 + }, + { + "epoch": 1.7755760368663593, + "grad_norm": 1.2400741621258158, + "learning_rate": 6.820132809391743e-08, + "loss": 0.8443198204040527, + "step": 7706 + }, + { + "epoch": 1.7758064516129033, + "grad_norm": 1.1086679828690398, + "learning_rate": 6.806310791487813e-08, + "loss": 0.758772611618042, + "step": 7707 + }, + { + "epoch": 1.776036866359447, + "grad_norm": 1.2474164003496853, + "learning_rate": 6.792502300524472e-08, + "loss": 0.8438040614128113, + "step": 7708 + }, + { + "epoch": 1.7762672811059907, + "grad_norm": 1.154420265010753, + "learning_rate": 6.778707338506051e-08, + "loss": 0.7727431058883667, + "step": 7709 + }, + { + "epoch": 1.7764976958525347, + "grad_norm": 1.6420516256349273, + "learning_rate": 6.764925907434849e-08, + "loss": 0.8118282556533813, + "step": 7710 + }, + { + "epoch": 1.7767281105990782, + "grad_norm": 1.22888062854885, + "learning_rate": 6.75115800931122e-08, + "loss": 0.7667281627655029, + "step": 7711 + }, + { + "epoch": 1.7769585253456222, + "grad_norm": 1.2558357954388057, + "learning_rate": 6.737403646133566e-08, + "loss": 0.7824913263320923, + "step": 7712 + }, + { + "epoch": 1.7771889400921659, + "grad_norm": 1.176254722115087, + "learning_rate": 6.723662819898312e-08, + "loss": 0.7318419218063354, + "step": 7713 + }, + { + "epoch": 1.7774193548387096, + "grad_norm": 1.2059569400095187, + "learning_rate": 6.709935532599897e-08, + "loss": 0.7060009241104126, + "step": 7714 + }, + { + "epoch": 1.7776497695852536, + "grad_norm": 1.3093811884607869, + "learning_rate": 6.69622178623086e-08, + "loss": 0.7367588877677917, + "step": 7715 + }, + { + "epoch": 1.7778801843317973, + "grad_norm": 1.3618967587860527, + "learning_rate": 6.682521582781708e-08, + "loss": 0.7340742349624634, + "step": 7716 + }, + { + "epoch": 1.778110599078341, + "grad_norm": 1.257394780772999, + "learning_rate": 6.668834924240995e-08, + "loss": 0.6655991077423096, + "step": 7717 + }, + { + "epoch": 1.7783410138248847, + "grad_norm": 1.3379718118337083, + "learning_rate": 6.655161812595367e-08, + "loss": 0.7562434673309326, + "step": 7718 + }, + { + "epoch": 1.7785714285714285, + "grad_norm": 1.2416548769934193, + "learning_rate": 6.641502249829423e-08, + "loss": 0.8078730702400208, + "step": 7719 + }, + { + "epoch": 1.7788018433179724, + "grad_norm": 1.1920319583326109, + "learning_rate": 6.627856237925811e-08, + "loss": 0.6285899877548218, + "step": 7720 + }, + { + "epoch": 1.7790322580645161, + "grad_norm": 1.1055337731409536, + "learning_rate": 6.61422377886528e-08, + "loss": 0.6633951663970947, + "step": 7721 + }, + { + "epoch": 1.7792626728110599, + "grad_norm": 1.0697990396462347, + "learning_rate": 6.600604874626548e-08, + "loss": 0.7273050546646118, + "step": 7722 + }, + { + "epoch": 1.7794930875576038, + "grad_norm": 1.2680575632659172, + "learning_rate": 6.586999527186354e-08, + "loss": 0.6665729284286499, + "step": 7723 + }, + { + "epoch": 1.7797235023041473, + "grad_norm": 1.275935674563519, + "learning_rate": 6.573407738519531e-08, + "loss": 0.7332675457000732, + "step": 7724 + }, + { + "epoch": 1.7799539170506913, + "grad_norm": 1.0778234517601935, + "learning_rate": 6.559829510598892e-08, + "loss": 0.7439071536064148, + "step": 7725 + }, + { + "epoch": 1.780184331797235, + "grad_norm": 1.3635129938987167, + "learning_rate": 6.546264845395299e-08, + "loss": 0.7104752063751221, + "step": 7726 + }, + { + "epoch": 1.7804147465437787, + "grad_norm": 1.2639306988819587, + "learning_rate": 6.53271374487765e-08, + "loss": 0.7792220115661621, + "step": 7727 + }, + { + "epoch": 1.7806451612903227, + "grad_norm": 1.0938522733418012, + "learning_rate": 6.519176211012867e-08, + "loss": 0.6379693746566772, + "step": 7728 + }, + { + "epoch": 1.7808755760368664, + "grad_norm": 1.3289044633653213, + "learning_rate": 6.505652245765881e-08, + "loss": 0.7737444639205933, + "step": 7729 + }, + { + "epoch": 1.7811059907834101, + "grad_norm": 1.1550683939038542, + "learning_rate": 6.49214185109973e-08, + "loss": 0.7681130170822144, + "step": 7730 + }, + { + "epoch": 1.7813364055299539, + "grad_norm": 1.4083081227680676, + "learning_rate": 6.478645028975372e-08, + "loss": 0.8718420267105103, + "step": 7731 + }, + { + "epoch": 1.7815668202764976, + "grad_norm": 1.1823677205039174, + "learning_rate": 6.465161781351914e-08, + "loss": 0.7557366490364075, + "step": 7732 + }, + { + "epoch": 1.7817972350230415, + "grad_norm": 1.1999869902911706, + "learning_rate": 6.45169211018638e-08, + "loss": 0.6794936656951904, + "step": 7733 + }, + { + "epoch": 1.7820276497695853, + "grad_norm": 1.2764239528790797, + "learning_rate": 6.438236017433895e-08, + "loss": 0.8390437364578247, + "step": 7734 + }, + { + "epoch": 1.782258064516129, + "grad_norm": 1.134383511808464, + "learning_rate": 6.424793505047599e-08, + "loss": 0.8024254441261292, + "step": 7735 + }, + { + "epoch": 1.782488479262673, + "grad_norm": 0.8536836629483899, + "learning_rate": 6.411364574978651e-08, + "loss": 0.6382162570953369, + "step": 7736 + }, + { + "epoch": 1.7827188940092165, + "grad_norm": 1.1757601346145792, + "learning_rate": 6.397949229176225e-08, + "loss": 0.6832011938095093, + "step": 7737 + }, + { + "epoch": 1.7829493087557604, + "grad_norm": 1.653357486541517, + "learning_rate": 6.384547469587564e-08, + "loss": 0.9003958702087402, + "step": 7738 + }, + { + "epoch": 1.7831797235023041, + "grad_norm": 1.1523951728047304, + "learning_rate": 6.371159298157913e-08, + "loss": 0.7030328512191772, + "step": 7739 + }, + { + "epoch": 1.7834101382488479, + "grad_norm": 1.2390057793357907, + "learning_rate": 6.357784716830528e-08, + "loss": 0.8153259754180908, + "step": 7740 + }, + { + "epoch": 1.7836405529953918, + "grad_norm": 1.4244568607420958, + "learning_rate": 6.344423727546744e-08, + "loss": 0.8229082226753235, + "step": 7741 + }, + { + "epoch": 1.7838709677419353, + "grad_norm": 1.3055755817113595, + "learning_rate": 6.331076332245866e-08, + "loss": 0.7306294441223145, + "step": 7742 + }, + { + "epoch": 1.7841013824884793, + "grad_norm": 1.1085692686400792, + "learning_rate": 6.317742532865233e-08, + "loss": 0.7613078951835632, + "step": 7743 + }, + { + "epoch": 1.784331797235023, + "grad_norm": 1.4694829399841158, + "learning_rate": 6.304422331340275e-08, + "loss": 0.9164611101150513, + "step": 7744 + }, + { + "epoch": 1.7845622119815667, + "grad_norm": 1.4076564642652605, + "learning_rate": 6.29111572960439e-08, + "loss": 0.8770956993103027, + "step": 7745 + }, + { + "epoch": 1.7847926267281107, + "grad_norm": 1.3274833988945276, + "learning_rate": 6.277822729588989e-08, + "loss": 0.7482821941375732, + "step": 7746 + }, + { + "epoch": 1.7850230414746544, + "grad_norm": 1.3149565308569835, + "learning_rate": 6.264543333223549e-08, + "loss": 0.7850298881530762, + "step": 7747 + }, + { + "epoch": 1.7852534562211981, + "grad_norm": 1.0844733877563915, + "learning_rate": 6.251277542435552e-08, + "loss": 0.5781385898590088, + "step": 7748 + }, + { + "epoch": 1.785483870967742, + "grad_norm": 1.2619844590894689, + "learning_rate": 6.238025359150501e-08, + "loss": 0.8217513561248779, + "step": 7749 + }, + { + "epoch": 1.7857142857142856, + "grad_norm": 1.2512912228822737, + "learning_rate": 6.224786785291969e-08, + "loss": 0.8500482439994812, + "step": 7750 + }, + { + "epoch": 1.7859447004608295, + "grad_norm": 1.458025138254964, + "learning_rate": 6.211561822781474e-08, + "loss": 0.8146470785140991, + "step": 7751 + }, + { + "epoch": 1.7861751152073733, + "grad_norm": 1.248354775738917, + "learning_rate": 6.198350473538616e-08, + "loss": 0.7351702451705933, + "step": 7752 + }, + { + "epoch": 1.786405529953917, + "grad_norm": 1.2620887228989164, + "learning_rate": 6.185152739481026e-08, + "loss": 0.7993056774139404, + "step": 7753 + }, + { + "epoch": 1.786635944700461, + "grad_norm": 1.4031526672609798, + "learning_rate": 6.171968622524315e-08, + "loss": 0.8570160865783691, + "step": 7754 + }, + { + "epoch": 1.7868663594470044, + "grad_norm": 1.1948359150749444, + "learning_rate": 6.158798124582143e-08, + "loss": 0.6200212836265564, + "step": 7755 + }, + { + "epoch": 1.7870967741935484, + "grad_norm": 1.2592084852014216, + "learning_rate": 6.145641247566202e-08, + "loss": 0.8196465373039246, + "step": 7756 + }, + { + "epoch": 1.7873271889400921, + "grad_norm": 0.9917037331823602, + "learning_rate": 6.132497993386165e-08, + "loss": 0.7038032412528992, + "step": 7757 + }, + { + "epoch": 1.7875576036866359, + "grad_norm": 1.2428262727857045, + "learning_rate": 6.119368363949806e-08, + "loss": 0.7222307324409485, + "step": 7758 + }, + { + "epoch": 1.7877880184331798, + "grad_norm": 1.2991738769607613, + "learning_rate": 6.106252361162834e-08, + "loss": 0.8457501530647278, + "step": 7759 + }, + { + "epoch": 1.7880184331797235, + "grad_norm": 1.5487287329891364, + "learning_rate": 6.093149986929025e-08, + "loss": 0.7543236017227173, + "step": 7760 + }, + { + "epoch": 1.7882488479262673, + "grad_norm": 1.294614145507911, + "learning_rate": 6.080061243150191e-08, + "loss": 0.5728875398635864, + "step": 7761 + }, + { + "epoch": 1.7884792626728112, + "grad_norm": 1.3902935059609232, + "learning_rate": 6.066986131726138e-08, + "loss": 0.6864895820617676, + "step": 7762 + }, + { + "epoch": 1.7887096774193547, + "grad_norm": 1.1640824452811938, + "learning_rate": 6.053924654554687e-08, + "loss": 0.8580472469329834, + "step": 7763 + }, + { + "epoch": 1.7889400921658987, + "grad_norm": 1.358237067906671, + "learning_rate": 6.040876813531714e-08, + "loss": 0.7670924663543701, + "step": 7764 + }, + { + "epoch": 1.7891705069124424, + "grad_norm": 1.2558108988688055, + "learning_rate": 6.027842610551082e-08, + "loss": 0.6558287739753723, + "step": 7765 + }, + { + "epoch": 1.7894009216589861, + "grad_norm": 1.2875975662335684, + "learning_rate": 6.014822047504697e-08, + "loss": 0.8186839818954468, + "step": 7766 + }, + { + "epoch": 1.78963133640553, + "grad_norm": 1.2720662525098447, + "learning_rate": 6.001815126282462e-08, + "loss": 0.7862167358398438, + "step": 7767 + }, + { + "epoch": 1.7898617511520736, + "grad_norm": 1.1119662378593531, + "learning_rate": 5.98882184877233e-08, + "loss": 0.8594048023223877, + "step": 7768 + }, + { + "epoch": 1.7900921658986175, + "grad_norm": 1.3277176558233812, + "learning_rate": 5.975842216860238e-08, + "loss": 0.804019033908844, + "step": 7769 + }, + { + "epoch": 1.7903225806451613, + "grad_norm": 1.1244948347974122, + "learning_rate": 5.962876232430192e-08, + "loss": 0.7404098510742188, + "step": 7770 + }, + { + "epoch": 1.790552995391705, + "grad_norm": 1.3595838567399194, + "learning_rate": 5.949923897364173e-08, + "loss": 0.7726024389266968, + "step": 7771 + }, + { + "epoch": 1.790783410138249, + "grad_norm": 1.5060671287860161, + "learning_rate": 5.936985213542178e-08, + "loss": 0.8225048184394836, + "step": 7772 + }, + { + "epoch": 1.7910138248847927, + "grad_norm": 1.4217281972238225, + "learning_rate": 5.924060182842272e-08, + "loss": 0.8485706448554993, + "step": 7773 + }, + { + "epoch": 1.7912442396313364, + "grad_norm": 1.189460803975086, + "learning_rate": 5.9111488071404867e-08, + "loss": 0.6580322980880737, + "step": 7774 + }, + { + "epoch": 1.7914746543778803, + "grad_norm": 1.1783786831629417, + "learning_rate": 5.898251088310879e-08, + "loss": 0.7486656904220581, + "step": 7775 + }, + { + "epoch": 1.7917050691244238, + "grad_norm": 1.5948072851449393, + "learning_rate": 5.885367028225574e-08, + "loss": 0.9068334102630615, + "step": 7776 + }, + { + "epoch": 1.7919354838709678, + "grad_norm": 1.1107745619546634, + "learning_rate": 5.872496628754653e-08, + "loss": 0.7091449499130249, + "step": 7777 + }, + { + "epoch": 1.7921658986175115, + "grad_norm": 1.3473785107334575, + "learning_rate": 5.8596398917662107e-08, + "loss": 0.7248316407203674, + "step": 7778 + }, + { + "epoch": 1.7923963133640552, + "grad_norm": 1.2057819957098448, + "learning_rate": 5.8467968191264315e-08, + "loss": 0.7740335464477539, + "step": 7779 + }, + { + "epoch": 1.7926267281105992, + "grad_norm": 1.267573304949112, + "learning_rate": 5.833967412699448e-08, + "loss": 0.7810479402542114, + "step": 7780 + }, + { + "epoch": 1.7928571428571427, + "grad_norm": 1.004282792701847, + "learning_rate": 5.821151674347435e-08, + "loss": 0.7072443962097168, + "step": 7781 + }, + { + "epoch": 1.7930875576036867, + "grad_norm": 1.1829190770666373, + "learning_rate": 5.808349605930585e-08, + "loss": 0.8218289613723755, + "step": 7782 + }, + { + "epoch": 1.7933179723502304, + "grad_norm": 1.393265214120735, + "learning_rate": 5.795561209307087e-08, + "loss": 0.8928433656692505, + "step": 7783 + }, + { + "epoch": 1.793548387096774, + "grad_norm": 1.455083354855402, + "learning_rate": 5.7827864863331796e-08, + "loss": 0.765188455581665, + "step": 7784 + }, + { + "epoch": 1.793778801843318, + "grad_norm": 1.0118039506572176, + "learning_rate": 5.7700254388630795e-08, + "loss": 0.7149494886398315, + "step": 7785 + }, + { + "epoch": 1.7940092165898618, + "grad_norm": 1.6638445812749356, + "learning_rate": 5.75727806874905e-08, + "loss": 0.8144164085388184, + "step": 7786 + }, + { + "epoch": 1.7942396313364055, + "grad_norm": 1.1101501647130416, + "learning_rate": 5.744544377841354e-08, + "loss": 0.7549517154693604, + "step": 7787 + }, + { + "epoch": 1.7944700460829495, + "grad_norm": 1.1805002478026116, + "learning_rate": 5.731824367988258e-08, + "loss": 0.7820652723312378, + "step": 7788 + }, + { + "epoch": 1.794700460829493, + "grad_norm": 1.2187125462499315, + "learning_rate": 5.719118041036042e-08, + "loss": 0.8253183364868164, + "step": 7789 + }, + { + "epoch": 1.794930875576037, + "grad_norm": 1.3044045265020685, + "learning_rate": 5.70642539882904e-08, + "loss": 0.8177148103713989, + "step": 7790 + }, + { + "epoch": 1.7951612903225806, + "grad_norm": 1.2453642288062106, + "learning_rate": 5.69374644320958e-08, + "loss": 0.722260594367981, + "step": 7791 + }, + { + "epoch": 1.7953917050691244, + "grad_norm": 1.3322495120015716, + "learning_rate": 5.6810811760179434e-08, + "loss": 0.8128643035888672, + "step": 7792 + }, + { + "epoch": 1.7956221198156683, + "grad_norm": 1.2461980802133077, + "learning_rate": 5.6684295990925394e-08, + "loss": 0.8267233371734619, + "step": 7793 + }, + { + "epoch": 1.7958525345622118, + "grad_norm": 1.1467604985666775, + "learning_rate": 5.655791714269697e-08, + "loss": 0.8385082483291626, + "step": 7794 + }, + { + "epoch": 1.7960829493087558, + "grad_norm": 1.2035138425735283, + "learning_rate": 5.643167523383785e-08, + "loss": 0.8705167770385742, + "step": 7795 + }, + { + "epoch": 1.7963133640552995, + "grad_norm": 1.263928906996047, + "learning_rate": 5.6305570282672024e-08, + "loss": 0.7628496885299683, + "step": 7796 + }, + { + "epoch": 1.7965437788018432, + "grad_norm": 1.2993701262886028, + "learning_rate": 5.61796023075034e-08, + "loss": 0.8246536254882812, + "step": 7797 + }, + { + "epoch": 1.7967741935483872, + "grad_norm": 1.2920173759654132, + "learning_rate": 5.6053771326615815e-08, + "loss": 0.7103257179260254, + "step": 7798 + }, + { + "epoch": 1.797004608294931, + "grad_norm": 1.318695367926756, + "learning_rate": 5.5928077358273984e-08, + "loss": 0.614989161491394, + "step": 7799 + }, + { + "epoch": 1.7972350230414746, + "grad_norm": 1.6404840895868877, + "learning_rate": 5.5802520420721866e-08, + "loss": 0.9876137971878052, + "step": 7800 + }, + { + "epoch": 1.7974654377880186, + "grad_norm": 1.2467848598458215, + "learning_rate": 5.5677100532183775e-08, + "loss": 0.7023773193359375, + "step": 7801 + }, + { + "epoch": 1.797695852534562, + "grad_norm": 1.1844278512776936, + "learning_rate": 5.555181771086459e-08, + "loss": 0.6680843830108643, + "step": 7802 + }, + { + "epoch": 1.797926267281106, + "grad_norm": 1.0826933828880965, + "learning_rate": 5.542667197494877e-08, + "loss": 0.7221776843070984, + "step": 7803 + }, + { + "epoch": 1.7981566820276498, + "grad_norm": 1.0071738664190577, + "learning_rate": 5.5301663342601e-08, + "loss": 0.7473262548446655, + "step": 7804 + }, + { + "epoch": 1.7983870967741935, + "grad_norm": 1.2499370802188474, + "learning_rate": 5.517679183196622e-08, + "loss": 0.8690468072891235, + "step": 7805 + }, + { + "epoch": 1.7986175115207375, + "grad_norm": 1.0933317196070476, + "learning_rate": 5.505205746116937e-08, + "loss": 0.8353981971740723, + "step": 7806 + }, + { + "epoch": 1.798847926267281, + "grad_norm": 1.177111485427447, + "learning_rate": 5.4927460248315405e-08, + "loss": 0.7691711187362671, + "step": 7807 + }, + { + "epoch": 1.799078341013825, + "grad_norm": 1.034283547212154, + "learning_rate": 5.480300021148953e-08, + "loss": 0.6732556819915771, + "step": 7808 + }, + { + "epoch": 1.7993087557603686, + "grad_norm": 1.1520777556370354, + "learning_rate": 5.467867736875664e-08, + "loss": 0.7273567914962769, + "step": 7809 + }, + { + "epoch": 1.7995391705069124, + "grad_norm": 1.201774068977123, + "learning_rate": 5.455449173816251e-08, + "loss": 0.7951864004135132, + "step": 7810 + }, + { + "epoch": 1.7997695852534563, + "grad_norm": 1.4133736179333027, + "learning_rate": 5.4430443337732276e-08, + "loss": 0.7073169350624084, + "step": 7811 + }, + { + "epoch": 1.8, + "grad_norm": 1.0101637387022209, + "learning_rate": 5.430653218547132e-08, + "loss": 0.682072639465332, + "step": 7812 + }, + { + "epoch": 1.8002304147465438, + "grad_norm": 0.9949453624163476, + "learning_rate": 5.4182758299365364e-08, + "loss": 0.7512049674987793, + "step": 7813 + }, + { + "epoch": 1.8004608294930877, + "grad_norm": 1.2218170088515747, + "learning_rate": 5.405912169738003e-08, + "loss": 0.7470980882644653, + "step": 7814 + }, + { + "epoch": 1.8006912442396312, + "grad_norm": 1.1792295753175266, + "learning_rate": 5.3935622397460634e-08, + "loss": 0.792417049407959, + "step": 7815 + }, + { + "epoch": 1.8009216589861752, + "grad_norm": 1.4508025797803343, + "learning_rate": 5.3812260417533505e-08, + "loss": 0.8600934743881226, + "step": 7816 + }, + { + "epoch": 1.801152073732719, + "grad_norm": 1.2411035382017865, + "learning_rate": 5.36890357755041e-08, + "loss": 0.6931058168411255, + "step": 7817 + }, + { + "epoch": 1.8013824884792626, + "grad_norm": 1.1047587345616248, + "learning_rate": 5.3565948489258216e-08, + "loss": 0.7382420897483826, + "step": 7818 + }, + { + "epoch": 1.8016129032258066, + "grad_norm": 1.5724454012098283, + "learning_rate": 5.344299857666224e-08, + "loss": 0.6811971068382263, + "step": 7819 + }, + { + "epoch": 1.80184331797235, + "grad_norm": 1.3142032735909368, + "learning_rate": 5.332018605556188e-08, + "loss": 0.8551425933837891, + "step": 7820 + }, + { + "epoch": 1.802073732718894, + "grad_norm": 1.298840655183536, + "learning_rate": 5.319751094378322e-08, + "loss": 0.7907109260559082, + "step": 7821 + }, + { + "epoch": 1.8023041474654378, + "grad_norm": 1.462185741805911, + "learning_rate": 5.3074973259132464e-08, + "loss": 0.6995817422866821, + "step": 7822 + }, + { + "epoch": 1.8025345622119815, + "grad_norm": 1.2098230160416081, + "learning_rate": 5.295257301939582e-08, + "loss": 0.9157558679580688, + "step": 7823 + }, + { + "epoch": 1.8027649769585254, + "grad_norm": 1.3503599705143554, + "learning_rate": 5.283031024233942e-08, + "loss": 0.8181086778640747, + "step": 7824 + }, + { + "epoch": 1.8029953917050692, + "grad_norm": 1.061101797749781, + "learning_rate": 5.270818494570961e-08, + "loss": 0.7170151472091675, + "step": 7825 + }, + { + "epoch": 1.803225806451613, + "grad_norm": 1.3415396727620215, + "learning_rate": 5.258619714723278e-08, + "loss": 0.7548947334289551, + "step": 7826 + }, + { + "epoch": 1.8034562211981566, + "grad_norm": 1.309211881034751, + "learning_rate": 5.2464346864615204e-08, + "loss": 0.7482869625091553, + "step": 7827 + }, + { + "epoch": 1.8036866359447004, + "grad_norm": 1.2839346666214595, + "learning_rate": 5.234263411554329e-08, + "loss": 0.6984925270080566, + "step": 7828 + }, + { + "epoch": 1.8039170506912443, + "grad_norm": 1.4972180990250632, + "learning_rate": 5.222105891768347e-08, + "loss": 0.910038948059082, + "step": 7829 + }, + { + "epoch": 1.804147465437788, + "grad_norm": 1.4071380742837927, + "learning_rate": 5.2099621288682174e-08, + "loss": 0.8936711549758911, + "step": 7830 + }, + { + "epoch": 1.8043778801843318, + "grad_norm": 1.2841490446822148, + "learning_rate": 5.197832124616608e-08, + "loss": 0.7376326322555542, + "step": 7831 + }, + { + "epoch": 1.8046082949308757, + "grad_norm": 1.6922079171273652, + "learning_rate": 5.1857158807741554e-08, + "loss": 0.8373547792434692, + "step": 7832 + }, + { + "epoch": 1.8048387096774192, + "grad_norm": 1.1938115721747944, + "learning_rate": 5.17361339909953e-08, + "loss": 0.7018512487411499, + "step": 7833 + }, + { + "epoch": 1.8050691244239632, + "grad_norm": 1.0051532014919082, + "learning_rate": 5.161524681349394e-08, + "loss": 0.6111225485801697, + "step": 7834 + }, + { + "epoch": 1.805299539170507, + "grad_norm": 1.1643316930206133, + "learning_rate": 5.149449729278388e-08, + "loss": 0.6961934566497803, + "step": 7835 + }, + { + "epoch": 1.8055299539170506, + "grad_norm": 1.1662486414151942, + "learning_rate": 5.137388544639198e-08, + "loss": 0.677324116230011, + "step": 7836 + }, + { + "epoch": 1.8057603686635946, + "grad_norm": 1.1241341054985654, + "learning_rate": 5.125341129182481e-08, + "loss": 0.7124897837638855, + "step": 7837 + }, + { + "epoch": 1.8059907834101383, + "grad_norm": 1.1858041195501718, + "learning_rate": 5.1133074846568815e-08, + "loss": 0.7474578619003296, + "step": 7838 + }, + { + "epoch": 1.806221198156682, + "grad_norm": 1.0832413753523613, + "learning_rate": 5.101287612809102e-08, + "loss": 0.699856162071228, + "step": 7839 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 1.2510053638983376, + "learning_rate": 5.089281515383803e-08, + "loss": 0.6548302173614502, + "step": 7840 + }, + { + "epoch": 1.8066820276497695, + "grad_norm": 1.4067864996197734, + "learning_rate": 5.077289194123624e-08, + "loss": 0.8376108407974243, + "step": 7841 + }, + { + "epoch": 1.8069124423963134, + "grad_norm": 1.4168917230935398, + "learning_rate": 5.065310650769283e-08, + "loss": 0.741931140422821, + "step": 7842 + }, + { + "epoch": 1.8071428571428572, + "grad_norm": 1.0130617353418785, + "learning_rate": 5.053345887059413e-08, + "loss": 0.7253270149230957, + "step": 7843 + }, + { + "epoch": 1.807373271889401, + "grad_norm": 1.452385981822963, + "learning_rate": 5.0413949047306894e-08, + "loss": 0.8248677849769592, + "step": 7844 + }, + { + "epoch": 1.8076036866359448, + "grad_norm": 1.2182337218961132, + "learning_rate": 5.0294577055177925e-08, + "loss": 0.7571253776550293, + "step": 7845 + }, + { + "epoch": 1.8078341013824883, + "grad_norm": 1.3374870147899762, + "learning_rate": 5.017534291153391e-08, + "loss": 0.8256274461746216, + "step": 7846 + }, + { + "epoch": 1.8080645161290323, + "grad_norm": 1.0202351482491858, + "learning_rate": 5.0056246633681356e-08, + "loss": 0.8609060049057007, + "step": 7847 + }, + { + "epoch": 1.808294930875576, + "grad_norm": 1.0533455142790622, + "learning_rate": 4.9937288238907196e-08, + "loss": 0.7005047798156738, + "step": 7848 + }, + { + "epoch": 1.8085253456221198, + "grad_norm": 1.508707208071474, + "learning_rate": 4.981846774447784e-08, + "loss": 0.8640049695968628, + "step": 7849 + }, + { + "epoch": 1.8087557603686637, + "grad_norm": 1.2891784390675838, + "learning_rate": 4.969978516763984e-08, + "loss": 0.8385862112045288, + "step": 7850 + }, + { + "epoch": 1.8089861751152074, + "grad_norm": 1.4569260681358536, + "learning_rate": 4.9581240525620184e-08, + "loss": 0.845676064491272, + "step": 7851 + }, + { + "epoch": 1.8092165898617512, + "grad_norm": 1.1553749249891685, + "learning_rate": 4.9462833835625327e-08, + "loss": 0.7638444304466248, + "step": 7852 + }, + { + "epoch": 1.8094470046082949, + "grad_norm": 1.3732641737808478, + "learning_rate": 4.934456511484153e-08, + "loss": 0.813924252986908, + "step": 7853 + }, + { + "epoch": 1.8096774193548386, + "grad_norm": 1.1884602060780909, + "learning_rate": 4.9226434380435835e-08, + "loss": 0.8773660659790039, + "step": 7854 + }, + { + "epoch": 1.8099078341013826, + "grad_norm": 1.1706588526128812, + "learning_rate": 4.91084416495543e-08, + "loss": 0.6703497171401978, + "step": 7855 + }, + { + "epoch": 1.8101382488479263, + "grad_norm": 1.149648913375334, + "learning_rate": 4.8990586939323896e-08, + "loss": 0.760738730430603, + "step": 7856 + }, + { + "epoch": 1.81036866359447, + "grad_norm": 1.0947557024146697, + "learning_rate": 4.887287026685072e-08, + "loss": 0.6466494798660278, + "step": 7857 + }, + { + "epoch": 1.810599078341014, + "grad_norm": 1.2590537115606641, + "learning_rate": 4.8755291649221206e-08, + "loss": 0.7416050434112549, + "step": 7858 + }, + { + "epoch": 1.8108294930875575, + "grad_norm": 1.200153480710325, + "learning_rate": 4.863785110350205e-08, + "loss": 0.6841444373130798, + "step": 7859 + }, + { + "epoch": 1.8110599078341014, + "grad_norm": 0.9841555488440165, + "learning_rate": 4.8520548646739265e-08, + "loss": 0.7818359732627869, + "step": 7860 + }, + { + "epoch": 1.8112903225806452, + "grad_norm": 1.287409595467329, + "learning_rate": 4.840338429595914e-08, + "loss": 0.8802354335784912, + "step": 7861 + }, + { + "epoch": 1.8115207373271889, + "grad_norm": 1.3745378964034318, + "learning_rate": 4.8286358068168055e-08, + "loss": 0.8105144500732422, + "step": 7862 + }, + { + "epoch": 1.8117511520737328, + "grad_norm": 1.2395900259386188, + "learning_rate": 4.816946998035232e-08, + "loss": 0.6613968014717102, + "step": 7863 + }, + { + "epoch": 1.8119815668202763, + "grad_norm": 1.191330407638407, + "learning_rate": 4.80527200494778e-08, + "loss": 0.7945050001144409, + "step": 7864 + }, + { + "epoch": 1.8122119815668203, + "grad_norm": 1.1914418584370128, + "learning_rate": 4.793610829249084e-08, + "loss": 0.7268643379211426, + "step": 7865 + }, + { + "epoch": 1.812442396313364, + "grad_norm": 1.0567405581917244, + "learning_rate": 4.781963472631745e-08, + "loss": 0.7416445016860962, + "step": 7866 + }, + { + "epoch": 1.8126728110599077, + "grad_norm": 1.246941647908604, + "learning_rate": 4.770329936786355e-08, + "loss": 0.8536533117294312, + "step": 7867 + }, + { + "epoch": 1.8129032258064517, + "grad_norm": 1.2429467538833636, + "learning_rate": 4.7587102234015074e-08, + "loss": 0.8258422017097473, + "step": 7868 + }, + { + "epoch": 1.8131336405529954, + "grad_norm": 1.189598318299626, + "learning_rate": 4.7471043341637874e-08, + "loss": 0.6976941823959351, + "step": 7869 + }, + { + "epoch": 1.8133640552995391, + "grad_norm": 1.222534835599988, + "learning_rate": 4.735512270757758e-08, + "loss": 0.8213087916374207, + "step": 7870 + }, + { + "epoch": 1.813594470046083, + "grad_norm": 1.1756801964544004, + "learning_rate": 4.723934034866028e-08, + "loss": 0.8012057542800903, + "step": 7871 + }, + { + "epoch": 1.8138248847926266, + "grad_norm": 1.0419940327131916, + "learning_rate": 4.7123696281691436e-08, + "loss": 0.7802866697311401, + "step": 7872 + }, + { + "epoch": 1.8140552995391706, + "grad_norm": 1.1630887083640626, + "learning_rate": 4.700819052345639e-08, + "loss": 0.8024426698684692, + "step": 7873 + }, + { + "epoch": 1.8142857142857143, + "grad_norm": 0.9709635675133196, + "learning_rate": 4.689282309072107e-08, + "loss": 0.6383114457130432, + "step": 7874 + }, + { + "epoch": 1.814516129032258, + "grad_norm": 1.2768186922012608, + "learning_rate": 4.677759400023085e-08, + "loss": 0.7226015329360962, + "step": 7875 + }, + { + "epoch": 1.814746543778802, + "grad_norm": 1.0424513670531574, + "learning_rate": 4.6662503268710684e-08, + "loss": 0.8390164971351624, + "step": 7876 + }, + { + "epoch": 1.8149769585253455, + "grad_norm": 1.0443665370850939, + "learning_rate": 4.654755091286633e-08, + "loss": 0.8120134472846985, + "step": 7877 + }, + { + "epoch": 1.8152073732718894, + "grad_norm": 1.305111160234168, + "learning_rate": 4.6432736949382656e-08, + "loss": 0.6554470062255859, + "step": 7878 + }, + { + "epoch": 1.8154377880184331, + "grad_norm": 1.1780234915455678, + "learning_rate": 4.631806139492478e-08, + "loss": 0.7268370985984802, + "step": 7879 + }, + { + "epoch": 1.8156682027649769, + "grad_norm": 1.4051894182356444, + "learning_rate": 4.620352426613794e-08, + "loss": 0.7991992831230164, + "step": 7880 + }, + { + "epoch": 1.8158986175115208, + "grad_norm": 1.1268859101296151, + "learning_rate": 4.608912557964673e-08, + "loss": 0.7695842981338501, + "step": 7881 + }, + { + "epoch": 1.8161290322580645, + "grad_norm": 1.9896156470888766, + "learning_rate": 4.59748653520563e-08, + "loss": 0.8633268475532532, + "step": 7882 + }, + { + "epoch": 1.8163594470046083, + "grad_norm": 1.1364981478494263, + "learning_rate": 4.586074359995118e-08, + "loss": 0.7018440961837769, + "step": 7883 + }, + { + "epoch": 1.8165898617511522, + "grad_norm": 1.1022691462384118, + "learning_rate": 4.574676033989589e-08, + "loss": 0.7304259538650513, + "step": 7884 + }, + { + "epoch": 1.8168202764976957, + "grad_norm": 1.2520833867580832, + "learning_rate": 4.563291558843518e-08, + "loss": 0.7408654689788818, + "step": 7885 + }, + { + "epoch": 1.8170506912442397, + "grad_norm": 0.8583590816187824, + "learning_rate": 4.55192093620933e-08, + "loss": 0.6378169059753418, + "step": 7886 + }, + { + "epoch": 1.8172811059907834, + "grad_norm": 1.2929203847720665, + "learning_rate": 4.540564167737471e-08, + "loss": 0.8854331374168396, + "step": 7887 + }, + { + "epoch": 1.8175115207373271, + "grad_norm": 1.3325768500609418, + "learning_rate": 4.529221255076343e-08, + "loss": 0.6948372721672058, + "step": 7888 + }, + { + "epoch": 1.817741935483871, + "grad_norm": 1.0169430034347062, + "learning_rate": 4.517892199872364e-08, + "loss": 0.8199236392974854, + "step": 7889 + }, + { + "epoch": 1.8179723502304146, + "grad_norm": 1.2358305635738154, + "learning_rate": 4.506577003769918e-08, + "loss": 0.6967995762825012, + "step": 7890 + }, + { + "epoch": 1.8182027649769585, + "grad_norm": 1.5521492896589208, + "learning_rate": 4.495275668411425e-08, + "loss": 0.848435640335083, + "step": 7891 + }, + { + "epoch": 1.8184331797235023, + "grad_norm": 1.0482582355280439, + "learning_rate": 4.483988195437227e-08, + "loss": 0.7085731029510498, + "step": 7892 + }, + { + "epoch": 1.818663594470046, + "grad_norm": 1.540410469929121, + "learning_rate": 4.472714586485682e-08, + "loss": 0.7400653958320618, + "step": 7893 + }, + { + "epoch": 1.81889400921659, + "grad_norm": 1.3011192141788026, + "learning_rate": 4.461454843193169e-08, + "loss": 0.7636830806732178, + "step": 7894 + }, + { + "epoch": 1.8191244239631337, + "grad_norm": 0.9509851989309867, + "learning_rate": 4.4502089671940135e-08, + "loss": 0.6902754306793213, + "step": 7895 + }, + { + "epoch": 1.8193548387096774, + "grad_norm": 1.4497717090666749, + "learning_rate": 4.438976960120522e-08, + "loss": 0.8397349119186401, + "step": 7896 + }, + { + "epoch": 1.8195852534562214, + "grad_norm": 1.1317263019718502, + "learning_rate": 4.4277588236030226e-08, + "loss": 0.7505836486816406, + "step": 7897 + }, + { + "epoch": 1.8198156682027649, + "grad_norm": 1.4213425196027163, + "learning_rate": 4.416554559269814e-08, + "loss": 0.9310287833213806, + "step": 7898 + }, + { + "epoch": 1.8200460829493088, + "grad_norm": 1.0910777164101302, + "learning_rate": 4.405364168747161e-08, + "loss": 0.724685549736023, + "step": 7899 + }, + { + "epoch": 1.8202764976958525, + "grad_norm": 0.99356469827684, + "learning_rate": 4.394187653659365e-08, + "loss": 0.6554735898971558, + "step": 7900 + }, + { + "epoch": 1.8205069124423963, + "grad_norm": 1.5629584518265682, + "learning_rate": 4.383025015628661e-08, + "loss": 0.7494597434997559, + "step": 7901 + }, + { + "epoch": 1.8207373271889402, + "grad_norm": 1.3596683636243805, + "learning_rate": 4.371876256275287e-08, + "loss": 0.817386269569397, + "step": 7902 + }, + { + "epoch": 1.8209677419354837, + "grad_norm": 1.2645292088995888, + "learning_rate": 4.3607413772174806e-08, + "loss": 0.8668064475059509, + "step": 7903 + }, + { + "epoch": 1.8211981566820277, + "grad_norm": 1.2001673372629817, + "learning_rate": 4.34962038007145e-08, + "loss": 0.7400633096694946, + "step": 7904 + }, + { + "epoch": 1.8214285714285714, + "grad_norm": 1.018878326746976, + "learning_rate": 4.3385132664514046e-08, + "loss": 0.7273544073104858, + "step": 7905 + }, + { + "epoch": 1.8216589861751151, + "grad_norm": 1.149057253315942, + "learning_rate": 4.3274200379695315e-08, + "loss": 0.7133193016052246, + "step": 7906 + }, + { + "epoch": 1.821889400921659, + "grad_norm": 1.2433089389356335, + "learning_rate": 4.316340696235976e-08, + "loss": 0.9390736222267151, + "step": 7907 + }, + { + "epoch": 1.8221198156682028, + "grad_norm": 1.1318410882734156, + "learning_rate": 4.3052752428588966e-08, + "loss": 0.7065613269805908, + "step": 7908 + }, + { + "epoch": 1.8223502304147465, + "grad_norm": 1.2803518971044316, + "learning_rate": 4.294223679444442e-08, + "loss": 0.813999354839325, + "step": 7909 + }, + { + "epoch": 1.8225806451612905, + "grad_norm": 1.616827704611462, + "learning_rate": 4.2831860075966955e-08, + "loss": 0.9234256148338318, + "step": 7910 + }, + { + "epoch": 1.822811059907834, + "grad_norm": 1.4124883659201861, + "learning_rate": 4.272162228917808e-08, + "loss": 0.8630207777023315, + "step": 7911 + }, + { + "epoch": 1.823041474654378, + "grad_norm": 1.382424983437882, + "learning_rate": 4.2611523450078456e-08, + "loss": 0.7827208042144775, + "step": 7912 + }, + { + "epoch": 1.8232718894009217, + "grad_norm": 1.3479238410287269, + "learning_rate": 4.250156357464873e-08, + "loss": 0.884107232093811, + "step": 7913 + }, + { + "epoch": 1.8235023041474654, + "grad_norm": 1.3064700630797408, + "learning_rate": 4.2391742678849484e-08, + "loss": 0.8615697026252747, + "step": 7914 + }, + { + "epoch": 1.8237327188940093, + "grad_norm": 1.4410161390206035, + "learning_rate": 4.2282060778621174e-08, + "loss": 0.8001279830932617, + "step": 7915 + }, + { + "epoch": 1.8239631336405528, + "grad_norm": 1.1016373373524035, + "learning_rate": 4.217251788988374e-08, + "loss": 0.7183214426040649, + "step": 7916 + }, + { + "epoch": 1.8241935483870968, + "grad_norm": 1.2680472029966925, + "learning_rate": 4.206311402853746e-08, + "loss": 0.7751119136810303, + "step": 7917 + }, + { + "epoch": 1.8244239631336405, + "grad_norm": 1.287058032235602, + "learning_rate": 4.195384921046208e-08, + "loss": 0.8073426485061646, + "step": 7918 + }, + { + "epoch": 1.8246543778801843, + "grad_norm": 1.053407718143569, + "learning_rate": 4.1844723451517017e-08, + "loss": 0.7918455600738525, + "step": 7919 + }, + { + "epoch": 1.8248847926267282, + "grad_norm": 1.1789390806182918, + "learning_rate": 4.1735736767542054e-08, + "loss": 0.8070017099380493, + "step": 7920 + }, + { + "epoch": 1.825115207373272, + "grad_norm": 1.1456133687492283, + "learning_rate": 4.1626889174356306e-08, + "loss": 0.7202159762382507, + "step": 7921 + }, + { + "epoch": 1.8253456221198157, + "grad_norm": 1.304718816677761, + "learning_rate": 4.15181806877587e-08, + "loss": 0.8412283658981323, + "step": 7922 + }, + { + "epoch": 1.8255760368663596, + "grad_norm": 1.079962569087528, + "learning_rate": 4.140961132352849e-08, + "loss": 0.6230478286743164, + "step": 7923 + }, + { + "epoch": 1.8258064516129031, + "grad_norm": 1.184647211526077, + "learning_rate": 4.1301181097424196e-08, + "loss": 0.6475099921226501, + "step": 7924 + }, + { + "epoch": 1.826036866359447, + "grad_norm": 1.1526955390848261, + "learning_rate": 4.1192890025184223e-08, + "loss": 0.6277462244033813, + "step": 7925 + }, + { + "epoch": 1.8262672811059908, + "grad_norm": 1.048650750687635, + "learning_rate": 4.1084738122527e-08, + "loss": 0.784058690071106, + "step": 7926 + }, + { + "epoch": 1.8264976958525345, + "grad_norm": 1.2758998200943634, + "learning_rate": 4.097672540515063e-08, + "loss": 0.7214534282684326, + "step": 7927 + }, + { + "epoch": 1.8267281105990785, + "grad_norm": 1.3299220547069754, + "learning_rate": 4.086885188873302e-08, + "loss": 0.7504015564918518, + "step": 7928 + }, + { + "epoch": 1.826958525345622, + "grad_norm": 1.3115105618474625, + "learning_rate": 4.076111758893175e-08, + "loss": 0.8837840557098389, + "step": 7929 + }, + { + "epoch": 1.827188940092166, + "grad_norm": 0.9756920709009218, + "learning_rate": 4.065352252138443e-08, + "loss": 0.6903706789016724, + "step": 7930 + }, + { + "epoch": 1.8274193548387097, + "grad_norm": 1.0882078909648618, + "learning_rate": 4.054606670170824e-08, + "loss": 0.6120485067367554, + "step": 7931 + }, + { + "epoch": 1.8276497695852534, + "grad_norm": 1.3933670864132435, + "learning_rate": 4.043875014550047e-08, + "loss": 0.9566253423690796, + "step": 7932 + }, + { + "epoch": 1.8278801843317973, + "grad_norm": 1.143561158140067, + "learning_rate": 4.033157286833766e-08, + "loss": 0.7702776193618774, + "step": 7933 + }, + { + "epoch": 1.828110599078341, + "grad_norm": 1.3861853644171394, + "learning_rate": 4.0224534885776706e-08, + "loss": 0.7326529026031494, + "step": 7934 + }, + { + "epoch": 1.8283410138248848, + "grad_norm": 1.199651876611857, + "learning_rate": 4.011763621335395e-08, + "loss": 0.8161343336105347, + "step": 7935 + }, + { + "epoch": 1.8285714285714287, + "grad_norm": 1.2385311136965618, + "learning_rate": 4.001087686658544e-08, + "loss": 0.7167537212371826, + "step": 7936 + }, + { + "epoch": 1.8288018433179722, + "grad_norm": 1.5866479195226006, + "learning_rate": 3.9904256860967433e-08, + "loss": 0.9195249080657959, + "step": 7937 + }, + { + "epoch": 1.8290322580645162, + "grad_norm": 1.4492337682663832, + "learning_rate": 3.979777621197544e-08, + "loss": 0.9483609199523926, + "step": 7938 + }, + { + "epoch": 1.82926267281106, + "grad_norm": 1.1520857488925356, + "learning_rate": 3.96914349350651e-08, + "loss": 0.6521364450454712, + "step": 7939 + }, + { + "epoch": 1.8294930875576036, + "grad_norm": 1.1394847291425385, + "learning_rate": 3.958523304567174e-08, + "loss": 0.714328408241272, + "step": 7940 + }, + { + "epoch": 1.8297235023041476, + "grad_norm": 1.2749952242619191, + "learning_rate": 3.9479170559210464e-08, + "loss": 0.705136775970459, + "step": 7941 + }, + { + "epoch": 1.829953917050691, + "grad_norm": 1.2310686937076982, + "learning_rate": 3.937324749107584e-08, + "loss": 0.9096843004226685, + "step": 7942 + }, + { + "epoch": 1.830184331797235, + "grad_norm": 1.1347026880501985, + "learning_rate": 3.9267463856642704e-08, + "loss": 0.7797929048538208, + "step": 7943 + }, + { + "epoch": 1.8304147465437788, + "grad_norm": 1.1418375010830168, + "learning_rate": 3.9161819671265414e-08, + "loss": 0.739689290523529, + "step": 7944 + }, + { + "epoch": 1.8306451612903225, + "grad_norm": 1.2414926332489717, + "learning_rate": 3.905631495027795e-08, + "loss": 0.7297589778900146, + "step": 7945 + }, + { + "epoch": 1.8308755760368665, + "grad_norm": 1.1411747974433366, + "learning_rate": 3.895094970899426e-08, + "loss": 0.6632317900657654, + "step": 7946 + }, + { + "epoch": 1.8311059907834102, + "grad_norm": 1.1035263718417188, + "learning_rate": 3.884572396270802e-08, + "loss": 0.8075754642486572, + "step": 7947 + }, + { + "epoch": 1.831336405529954, + "grad_norm": 1.1206981689667126, + "learning_rate": 3.874063772669256e-08, + "loss": 0.879385232925415, + "step": 7948 + }, + { + "epoch": 1.8315668202764976, + "grad_norm": 1.1296410172019098, + "learning_rate": 3.86356910162009e-08, + "loss": 0.7182341814041138, + "step": 7949 + }, + { + "epoch": 1.8317972350230414, + "grad_norm": 1.3256415462362086, + "learning_rate": 3.853088384646608e-08, + "loss": 0.8980770111083984, + "step": 7950 + }, + { + "epoch": 1.8320276497695853, + "grad_norm": 1.2399263879902838, + "learning_rate": 3.8426216232700483e-08, + "loss": 0.7798547744750977, + "step": 7951 + }, + { + "epoch": 1.832258064516129, + "grad_norm": 1.30590072600508, + "learning_rate": 3.832168819009685e-08, + "loss": 0.7545509934425354, + "step": 7952 + }, + { + "epoch": 1.8324884792626728, + "grad_norm": 1.4626138945450415, + "learning_rate": 3.821729973382681e-08, + "loss": 0.7394163608551025, + "step": 7953 + }, + { + "epoch": 1.8327188940092167, + "grad_norm": 1.095086275435991, + "learning_rate": 3.811305087904271e-08, + "loss": 0.7771584987640381, + "step": 7954 + }, + { + "epoch": 1.8329493087557602, + "grad_norm": 1.0772465088176202, + "learning_rate": 3.800894164087587e-08, + "loss": 0.6490596532821655, + "step": 7955 + }, + { + "epoch": 1.8331797235023042, + "grad_norm": 1.6261572682115344, + "learning_rate": 3.7904972034437546e-08, + "loss": 0.8465416431427002, + "step": 7956 + }, + { + "epoch": 1.833410138248848, + "grad_norm": 1.1256653812684285, + "learning_rate": 3.780114207481899e-08, + "loss": 0.6769351363182068, + "step": 7957 + }, + { + "epoch": 1.8336405529953916, + "grad_norm": 1.1157448396752008, + "learning_rate": 3.769745177709094e-08, + "loss": 0.8187215328216553, + "step": 7958 + }, + { + "epoch": 1.8338709677419356, + "grad_norm": 0.9478307441179703, + "learning_rate": 3.759390115630356e-08, + "loss": 0.7524763345718384, + "step": 7959 + }, + { + "epoch": 1.8341013824884793, + "grad_norm": 1.3846707864730958, + "learning_rate": 3.749049022748762e-08, + "loss": 0.8019517064094543, + "step": 7960 + }, + { + "epoch": 1.834331797235023, + "grad_norm": 1.2301171101661803, + "learning_rate": 3.738721900565278e-08, + "loss": 0.7732158899307251, + "step": 7961 + }, + { + "epoch": 1.8345622119815668, + "grad_norm": 1.1624945144679932, + "learning_rate": 3.728408750578871e-08, + "loss": 0.7152917385101318, + "step": 7962 + }, + { + "epoch": 1.8347926267281105, + "grad_norm": 1.2249354034345745, + "learning_rate": 3.7181095742864876e-08, + "loss": 0.7117735147476196, + "step": 7963 + }, + { + "epoch": 1.8350230414746544, + "grad_norm": 1.1387667941982393, + "learning_rate": 3.7078243731830436e-08, + "loss": 0.7651360034942627, + "step": 7964 + }, + { + "epoch": 1.8352534562211982, + "grad_norm": 1.103224145154883, + "learning_rate": 3.697553148761412e-08, + "loss": 0.6686996817588806, + "step": 7965 + }, + { + "epoch": 1.835483870967742, + "grad_norm": 1.4148867918515446, + "learning_rate": 3.687295902512455e-08, + "loss": 0.8654145002365112, + "step": 7966 + }, + { + "epoch": 1.8357142857142859, + "grad_norm": 1.2014603088046913, + "learning_rate": 3.6770526359250046e-08, + "loss": 0.7883874177932739, + "step": 7967 + }, + { + "epoch": 1.8359447004608294, + "grad_norm": 1.3036366063511584, + "learning_rate": 3.666823350485848e-08, + "loss": 0.7270755767822266, + "step": 7968 + }, + { + "epoch": 1.8361751152073733, + "grad_norm": 1.2757403346821974, + "learning_rate": 3.656608047679744e-08, + "loss": 0.654710054397583, + "step": 7969 + }, + { + "epoch": 1.836405529953917, + "grad_norm": 1.3173622827867584, + "learning_rate": 3.6464067289894485e-08, + "loss": 0.688032329082489, + "step": 7970 + }, + { + "epoch": 1.8366359447004608, + "grad_norm": 1.610615012564481, + "learning_rate": 3.6362193958956457e-08, + "loss": 0.901115894317627, + "step": 7971 + }, + { + "epoch": 1.8368663594470047, + "grad_norm": 1.116601972108686, + "learning_rate": 3.6260460498770404e-08, + "loss": 0.7335774302482605, + "step": 7972 + }, + { + "epoch": 1.8370967741935482, + "grad_norm": 1.386903572934919, + "learning_rate": 3.615886692410275e-08, + "loss": 0.8056570291519165, + "step": 7973 + }, + { + "epoch": 1.8373271889400922, + "grad_norm": 1.0398578754417405, + "learning_rate": 3.6057413249699356e-08, + "loss": 0.82081538438797, + "step": 7974 + }, + { + "epoch": 1.837557603686636, + "grad_norm": 1.2589683870881863, + "learning_rate": 3.595609949028655e-08, + "loss": 0.7741475105285645, + "step": 7975 + }, + { + "epoch": 1.8377880184331796, + "grad_norm": 1.4550225731476647, + "learning_rate": 3.5854925660569693e-08, + "loss": 0.9020792245864868, + "step": 7976 + }, + { + "epoch": 1.8380184331797236, + "grad_norm": 1.395018589671643, + "learning_rate": 3.57538917752338e-08, + "loss": 0.759677529335022, + "step": 7977 + }, + { + "epoch": 1.8382488479262673, + "grad_norm": 1.2528132061795532, + "learning_rate": 3.565299784894427e-08, + "loss": 0.6658498644828796, + "step": 7978 + }, + { + "epoch": 1.838479262672811, + "grad_norm": 1.156561409904186, + "learning_rate": 3.5552243896345254e-08, + "loss": 0.8359798192977905, + "step": 7979 + }, + { + "epoch": 1.838709677419355, + "grad_norm": 0.9586985661683237, + "learning_rate": 3.545162993206141e-08, + "loss": 0.656216025352478, + "step": 7980 + }, + { + "epoch": 1.8389400921658985, + "grad_norm": 1.1907827843907386, + "learning_rate": 3.53511559706966e-08, + "loss": 0.7783077359199524, + "step": 7981 + }, + { + "epoch": 1.8391705069124424, + "grad_norm": 1.315887741405374, + "learning_rate": 3.525082202683427e-08, + "loss": 0.7726818919181824, + "step": 7982 + }, + { + "epoch": 1.8394009216589862, + "grad_norm": 1.203190333477806, + "learning_rate": 3.5150628115038213e-08, + "loss": 0.6797339916229248, + "step": 7983 + }, + { + "epoch": 1.83963133640553, + "grad_norm": 1.6491537372199485, + "learning_rate": 3.505057424985114e-08, + "loss": 0.818444013595581, + "step": 7984 + }, + { + "epoch": 1.8398617511520738, + "grad_norm": 1.2385444618355612, + "learning_rate": 3.495066044579564e-08, + "loss": 0.716003954410553, + "step": 7985 + }, + { + "epoch": 1.8400921658986173, + "grad_norm": 1.1184726381698433, + "learning_rate": 3.485088671737435e-08, + "loss": 0.8214380741119385, + "step": 7986 + }, + { + "epoch": 1.8403225806451613, + "grad_norm": 1.2891166927609845, + "learning_rate": 3.475125307906923e-08, + "loss": 0.8004239797592163, + "step": 7987 + }, + { + "epoch": 1.840552995391705, + "grad_norm": 1.0064244623457703, + "learning_rate": 3.465175954534183e-08, + "loss": 0.724868655204773, + "step": 7988 + }, + { + "epoch": 1.8407834101382488, + "grad_norm": 1.2194713737299876, + "learning_rate": 3.455240613063359e-08, + "loss": 0.6774435043334961, + "step": 7989 + }, + { + "epoch": 1.8410138248847927, + "grad_norm": 1.2000954990034474, + "learning_rate": 3.445319284936543e-08, + "loss": 0.7618406414985657, + "step": 7990 + }, + { + "epoch": 1.8412442396313364, + "grad_norm": 1.2446761227229344, + "learning_rate": 3.4354119715938154e-08, + "loss": 0.8176794648170471, + "step": 7991 + }, + { + "epoch": 1.8414746543778802, + "grad_norm": 1.3311989323291133, + "learning_rate": 3.4255186744732045e-08, + "loss": 0.7540123462677002, + "step": 7992 + }, + { + "epoch": 1.841705069124424, + "grad_norm": 0.8317940065053944, + "learning_rate": 3.4156393950107164e-08, + "loss": 0.6888976097106934, + "step": 7993 + }, + { + "epoch": 1.8419354838709676, + "grad_norm": 0.9229557772464766, + "learning_rate": 3.405774134640294e-08, + "loss": 0.6719028949737549, + "step": 7994 + }, + { + "epoch": 1.8421658986175116, + "grad_norm": 1.2216480626353798, + "learning_rate": 3.3959228947938903e-08, + "loss": 0.817806601524353, + "step": 7995 + }, + { + "epoch": 1.8423963133640553, + "grad_norm": 1.176727717908757, + "learning_rate": 3.3860856769013955e-08, + "loss": 0.6681252717971802, + "step": 7996 + }, + { + "epoch": 1.842626728110599, + "grad_norm": 1.261442308873967, + "learning_rate": 3.3762624823906574e-08, + "loss": 0.7965174317359924, + "step": 7997 + }, + { + "epoch": 1.842857142857143, + "grad_norm": 1.163849986057629, + "learning_rate": 3.366453312687512e-08, + "loss": 0.714171826839447, + "step": 7998 + }, + { + "epoch": 1.8430875576036865, + "grad_norm": 1.2077995913515678, + "learning_rate": 3.356658169215743e-08, + "loss": 0.7489287853240967, + "step": 7999 + }, + { + "epoch": 1.8433179723502304, + "grad_norm": 1.270011813451473, + "learning_rate": 3.34687705339709e-08, + "loss": 0.790866494178772, + "step": 8000 + }, + { + "epoch": 1.8435483870967742, + "grad_norm": 0.9665221846950844, + "learning_rate": 3.337109966651297e-08, + "loss": 0.8208349943161011, + "step": 8001 + }, + { + "epoch": 1.8437788018433179, + "grad_norm": 1.1715709525124653, + "learning_rate": 3.3273569103960174e-08, + "loss": 0.7974207401275635, + "step": 8002 + }, + { + "epoch": 1.8440092165898618, + "grad_norm": 1.1483232930238036, + "learning_rate": 3.317617886046908e-08, + "loss": 0.751643180847168, + "step": 8003 + }, + { + "epoch": 1.8442396313364056, + "grad_norm": 1.3210448516681466, + "learning_rate": 3.3078928950175724e-08, + "loss": 0.9231137037277222, + "step": 8004 + }, + { + "epoch": 1.8444700460829493, + "grad_norm": 1.1496984894908708, + "learning_rate": 3.2981819387195683e-08, + "loss": 0.7975907325744629, + "step": 8005 + }, + { + "epoch": 1.8447004608294932, + "grad_norm": 1.1807761173209448, + "learning_rate": 3.288485018562448e-08, + "loss": 0.7467124462127686, + "step": 8006 + }, + { + "epoch": 1.8449308755760367, + "grad_norm": 1.1558703241619663, + "learning_rate": 3.278802135953706e-08, + "loss": 0.7983080148696899, + "step": 8007 + }, + { + "epoch": 1.8451612903225807, + "grad_norm": 1.2273424689042212, + "learning_rate": 3.269133292298787e-08, + "loss": 0.7991635799407959, + "step": 8008 + }, + { + "epoch": 1.8453917050691244, + "grad_norm": 1.3284825495150037, + "learning_rate": 3.259478489001111e-08, + "loss": 0.9309900403022766, + "step": 8009 + }, + { + "epoch": 1.8456221198156681, + "grad_norm": 1.4898197506974649, + "learning_rate": 3.249837727462068e-08, + "loss": 0.7667444944381714, + "step": 8010 + }, + { + "epoch": 1.845852534562212, + "grad_norm": 1.0693184262343387, + "learning_rate": 3.2402110090809955e-08, + "loss": 0.722775936126709, + "step": 8011 + }, + { + "epoch": 1.8460829493087556, + "grad_norm": 1.2061345728793884, + "learning_rate": 3.230598335255208e-08, + "loss": 0.7049660682678223, + "step": 8012 + }, + { + "epoch": 1.8463133640552996, + "grad_norm": 1.2538545243397632, + "learning_rate": 3.220999707379957e-08, + "loss": 0.7543717622756958, + "step": 8013 + }, + { + "epoch": 1.8465437788018433, + "grad_norm": 1.0254969440317054, + "learning_rate": 3.2114151268484825e-08, + "loss": 0.705594539642334, + "step": 8014 + }, + { + "epoch": 1.846774193548387, + "grad_norm": 1.3381301652737214, + "learning_rate": 3.201844595051972e-08, + "loss": 0.8663946390151978, + "step": 8015 + }, + { + "epoch": 1.847004608294931, + "grad_norm": 1.2931743474180666, + "learning_rate": 3.192288113379582e-08, + "loss": 0.6990827918052673, + "step": 8016 + }, + { + "epoch": 1.8472350230414747, + "grad_norm": 1.3047302382268444, + "learning_rate": 3.182745683218391e-08, + "loss": 0.8494592905044556, + "step": 8017 + }, + { + "epoch": 1.8474654377880184, + "grad_norm": 1.1964557388323078, + "learning_rate": 3.173217305953524e-08, + "loss": 0.7689815163612366, + "step": 8018 + }, + { + "epoch": 1.8476958525345624, + "grad_norm": 1.0869127948311592, + "learning_rate": 3.163702982967964e-08, + "loss": 0.7961923480033875, + "step": 8019 + }, + { + "epoch": 1.8479262672811059, + "grad_norm": 1.1859545141002084, + "learning_rate": 3.154202715642729e-08, + "loss": 0.7290681600570679, + "step": 8020 + }, + { + "epoch": 1.8481566820276498, + "grad_norm": 1.2696204436408378, + "learning_rate": 3.1447165053567594e-08, + "loss": 0.7486605048179626, + "step": 8021 + }, + { + "epoch": 1.8483870967741935, + "grad_norm": 1.2409295752272667, + "learning_rate": 3.135244353486977e-08, + "loss": 0.8263967633247375, + "step": 8022 + }, + { + "epoch": 1.8486175115207373, + "grad_norm": 1.3436046094044156, + "learning_rate": 3.1257862614082254e-08, + "loss": 0.7462657690048218, + "step": 8023 + }, + { + "epoch": 1.8488479262672812, + "grad_norm": 1.7105756282592546, + "learning_rate": 3.116342230493374e-08, + "loss": 0.9305819272994995, + "step": 8024 + }, + { + "epoch": 1.8490783410138247, + "grad_norm": 1.1597494849443377, + "learning_rate": 3.1069122621131925e-08, + "loss": 0.7202557325363159, + "step": 8025 + }, + { + "epoch": 1.8493087557603687, + "grad_norm": 1.0985806176068067, + "learning_rate": 3.097496357636409e-08, + "loss": 0.723913311958313, + "step": 8026 + }, + { + "epoch": 1.8495391705069124, + "grad_norm": 1.427360065972912, + "learning_rate": 3.088094518429751e-08, + "loss": 0.7067763805389404, + "step": 8027 + }, + { + "epoch": 1.8497695852534561, + "grad_norm": 1.3110685780585822, + "learning_rate": 3.078706745857884e-08, + "loss": 0.7853527665138245, + "step": 8028 + }, + { + "epoch": 1.85, + "grad_norm": 1.228901367807535, + "learning_rate": 3.0693330412834285e-08, + "loss": 0.7183133363723755, + "step": 8029 + }, + { + "epoch": 1.8502304147465438, + "grad_norm": 1.1077136741228983, + "learning_rate": 3.0599734060669626e-08, + "loss": 0.8041096925735474, + "step": 8030 + }, + { + "epoch": 1.8504608294930875, + "grad_norm": 1.0495776729925357, + "learning_rate": 3.050627841567022e-08, + "loss": 0.7259166240692139, + "step": 8031 + }, + { + "epoch": 1.8506912442396315, + "grad_norm": 1.5016516908972768, + "learning_rate": 3.041296349140099e-08, + "loss": 0.8844292163848877, + "step": 8032 + }, + { + "epoch": 1.850921658986175, + "grad_norm": 1.2846098007302502, + "learning_rate": 3.031978930140666e-08, + "loss": 0.7566810846328735, + "step": 8033 + }, + { + "epoch": 1.851152073732719, + "grad_norm": 1.4566612706299762, + "learning_rate": 3.0226755859211085e-08, + "loss": 0.8365379571914673, + "step": 8034 + }, + { + "epoch": 1.8513824884792627, + "grad_norm": 1.03909937329538, + "learning_rate": 3.013386317831823e-08, + "loss": 0.6786175966262817, + "step": 8035 + }, + { + "epoch": 1.8516129032258064, + "grad_norm": 0.8445952555360507, + "learning_rate": 3.0041111272211206e-08, + "loss": 0.5450198650360107, + "step": 8036 + }, + { + "epoch": 1.8518433179723504, + "grad_norm": 1.3789732970427235, + "learning_rate": 2.994850015435269e-08, + "loss": 0.8792393207550049, + "step": 8037 + }, + { + "epoch": 1.8520737327188939, + "grad_norm": 1.1270074296152806, + "learning_rate": 2.985602983818525e-08, + "loss": 0.8463287353515625, + "step": 8038 + }, + { + "epoch": 1.8523041474654378, + "grad_norm": 1.2927452986312467, + "learning_rate": 2.9763700337130827e-08, + "loss": 0.77659010887146, + "step": 8039 + }, + { + "epoch": 1.8525345622119815, + "grad_norm": 0.8652026295993711, + "learning_rate": 2.9671511664590698e-08, + "loss": 0.6180428266525269, + "step": 8040 + }, + { + "epoch": 1.8527649769585253, + "grad_norm": 1.2049419514211082, + "learning_rate": 2.9579463833946273e-08, + "loss": 0.7886658906936646, + "step": 8041 + }, + { + "epoch": 1.8529953917050692, + "grad_norm": 1.35078980115234, + "learning_rate": 2.9487556858557972e-08, + "loss": 0.8371871709823608, + "step": 8042 + }, + { + "epoch": 1.853225806451613, + "grad_norm": 1.1555875449847217, + "learning_rate": 2.9395790751765904e-08, + "loss": 0.7082366347312927, + "step": 8043 + }, + { + "epoch": 1.8534562211981567, + "grad_norm": 1.2745414422252506, + "learning_rate": 2.930416552689008e-08, + "loss": 0.7866584062576294, + "step": 8044 + }, + { + "epoch": 1.8536866359447006, + "grad_norm": 1.229235509048025, + "learning_rate": 2.9212681197229527e-08, + "loss": 0.8789514303207397, + "step": 8045 + }, + { + "epoch": 1.8539170506912441, + "grad_norm": 1.0208282620264577, + "learning_rate": 2.9121337776063072e-08, + "loss": 0.7041239738464355, + "step": 8046 + }, + { + "epoch": 1.854147465437788, + "grad_norm": 1.3204473756112607, + "learning_rate": 2.9030135276649215e-08, + "loss": 0.8290516138076782, + "step": 8047 + }, + { + "epoch": 1.8543778801843318, + "grad_norm": 1.2424965520320617, + "learning_rate": 2.8939073712225813e-08, + "loss": 0.8532444834709167, + "step": 8048 + }, + { + "epoch": 1.8546082949308755, + "grad_norm": 1.375111764710695, + "learning_rate": 2.8848153096010407e-08, + "loss": 0.8635869026184082, + "step": 8049 + }, + { + "epoch": 1.8548387096774195, + "grad_norm": 1.3481674122248803, + "learning_rate": 2.8757373441199885e-08, + "loss": 0.723747730255127, + "step": 8050 + }, + { + "epoch": 1.855069124423963, + "grad_norm": 1.3399875040651272, + "learning_rate": 2.8666734760970925e-08, + "loss": 0.893456220626831, + "step": 8051 + }, + { + "epoch": 1.855299539170507, + "grad_norm": 1.2732338285848108, + "learning_rate": 2.8576237068479335e-08, + "loss": 0.6871381998062134, + "step": 8052 + }, + { + "epoch": 1.8555299539170507, + "grad_norm": 1.0534516506243037, + "learning_rate": 2.848588037686106e-08, + "loss": 0.7820594906806946, + "step": 8053 + }, + { + "epoch": 1.8557603686635944, + "grad_norm": 1.0873243123362593, + "learning_rate": 2.839566469923105e-08, + "loss": 0.7783479690551758, + "step": 8054 + }, + { + "epoch": 1.8559907834101383, + "grad_norm": 1.25602911336094, + "learning_rate": 2.8305590048684268e-08, + "loss": 0.7612866163253784, + "step": 8055 + }, + { + "epoch": 1.856221198156682, + "grad_norm": 1.0752346215773687, + "learning_rate": 2.82156564382946e-08, + "loss": 0.7483590841293335, + "step": 8056 + }, + { + "epoch": 1.8564516129032258, + "grad_norm": 1.0547692532993052, + "learning_rate": 2.812586388111582e-08, + "loss": 0.7553579807281494, + "step": 8057 + }, + { + "epoch": 1.8566820276497698, + "grad_norm": 1.0828193353243305, + "learning_rate": 2.80362123901815e-08, + "loss": 0.8895602226257324, + "step": 8058 + }, + { + "epoch": 1.8569124423963133, + "grad_norm": 1.1481937931103232, + "learning_rate": 2.794670197850424e-08, + "loss": 0.7974053621292114, + "step": 8059 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 1.0112292806236838, + "learning_rate": 2.7857332659076193e-08, + "loss": 0.7730135917663574, + "step": 8060 + }, + { + "epoch": 1.857373271889401, + "grad_norm": 1.115608079627536, + "learning_rate": 2.7768104444869434e-08, + "loss": 0.7258738279342651, + "step": 8061 + }, + { + "epoch": 1.8576036866359447, + "grad_norm": 1.3030363105586589, + "learning_rate": 2.7679017348835264e-08, + "loss": 0.7068890333175659, + "step": 8062 + }, + { + "epoch": 1.8578341013824886, + "grad_norm": 1.3041822573340287, + "learning_rate": 2.7590071383904568e-08, + "loss": 0.8741557002067566, + "step": 8063 + }, + { + "epoch": 1.8580645161290321, + "grad_norm": 1.3236368529143523, + "learning_rate": 2.750126656298768e-08, + "loss": 0.8723797798156738, + "step": 8064 + }, + { + "epoch": 1.858294930875576, + "grad_norm": 1.2019235064586495, + "learning_rate": 2.7412602898974514e-08, + "loss": 0.8510957956314087, + "step": 8065 + }, + { + "epoch": 1.8585253456221198, + "grad_norm": 0.8996466342772348, + "learning_rate": 2.732408040473444e-08, + "loss": 0.6875216960906982, + "step": 8066 + }, + { + "epoch": 1.8587557603686635, + "grad_norm": 1.235948717542994, + "learning_rate": 2.7235699093116515e-08, + "loss": 0.8057721257209778, + "step": 8067 + }, + { + "epoch": 1.8589861751152075, + "grad_norm": 1.1066694710477807, + "learning_rate": 2.7147458976949145e-08, + "loss": 0.7547335624694824, + "step": 8068 + }, + { + "epoch": 1.8592165898617512, + "grad_norm": 1.2565080056809024, + "learning_rate": 2.7059360069040193e-08, + "loss": 0.8301708102226257, + "step": 8069 + }, + { + "epoch": 1.859447004608295, + "grad_norm": 1.354839024861171, + "learning_rate": 2.69714023821771e-08, + "loss": 0.8313431143760681, + "step": 8070 + }, + { + "epoch": 1.8596774193548387, + "grad_norm": 1.2482736529337517, + "learning_rate": 2.6883585929126872e-08, + "loss": 0.6631792783737183, + "step": 8071 + }, + { + "epoch": 1.8599078341013824, + "grad_norm": 1.342165180678223, + "learning_rate": 2.679591072263576e-08, + "loss": 0.7643609046936035, + "step": 8072 + }, + { + "epoch": 1.8601382488479263, + "grad_norm": 1.5670037508761703, + "learning_rate": 2.670837677543003e-08, + "loss": 0.8543407917022705, + "step": 8073 + }, + { + "epoch": 1.86036866359447, + "grad_norm": 1.0908415634382522, + "learning_rate": 2.662098410021485e-08, + "loss": 0.8051489591598511, + "step": 8074 + }, + { + "epoch": 1.8605990783410138, + "grad_norm": 1.1493604797084143, + "learning_rate": 2.653373270967518e-08, + "loss": 0.7065767645835876, + "step": 8075 + }, + { + "epoch": 1.8608294930875577, + "grad_norm": 0.9852441728403762, + "learning_rate": 2.6446622616475566e-08, + "loss": 0.672603189945221, + "step": 8076 + }, + { + "epoch": 1.8610599078341012, + "grad_norm": 1.2739019796547877, + "learning_rate": 2.6359653833259776e-08, + "loss": 0.7201080918312073, + "step": 8077 + }, + { + "epoch": 1.8612903225806452, + "grad_norm": 1.156933357533599, + "learning_rate": 2.627282637265149e-08, + "loss": 0.7147494554519653, + "step": 8078 + }, + { + "epoch": 1.861520737327189, + "grad_norm": 1.3793116889121875, + "learning_rate": 2.6186140247253297e-08, + "loss": 0.7051082253456116, + "step": 8079 + }, + { + "epoch": 1.8617511520737327, + "grad_norm": 1.2253670327071573, + "learning_rate": 2.6099595469647683e-08, + "loss": 0.5786069631576538, + "step": 8080 + }, + { + "epoch": 1.8619815668202766, + "grad_norm": 1.2391603364729231, + "learning_rate": 2.6013192052396493e-08, + "loss": 0.8880232572555542, + "step": 8081 + }, + { + "epoch": 1.8622119815668203, + "grad_norm": 1.3577487615179598, + "learning_rate": 2.5926930008041137e-08, + "loss": 0.9295729398727417, + "step": 8082 + }, + { + "epoch": 1.862442396313364, + "grad_norm": 1.1507407274303025, + "learning_rate": 2.5840809349102378e-08, + "loss": 0.6963248252868652, + "step": 8083 + }, + { + "epoch": 1.8626728110599078, + "grad_norm": 1.2547838683138512, + "learning_rate": 2.5754830088080548e-08, + "loss": 0.8788298964500427, + "step": 8084 + }, + { + "epoch": 1.8629032258064515, + "grad_norm": 1.3540782368440085, + "learning_rate": 2.5668992237455334e-08, + "loss": 0.7454242706298828, + "step": 8085 + }, + { + "epoch": 1.8631336405529955, + "grad_norm": 1.1950812039913048, + "learning_rate": 2.558329580968599e-08, + "loss": 0.7659780383110046, + "step": 8086 + }, + { + "epoch": 1.8633640552995392, + "grad_norm": 1.5016734977487585, + "learning_rate": 2.5497740817211456e-08, + "loss": 0.8799881935119629, + "step": 8087 + }, + { + "epoch": 1.863594470046083, + "grad_norm": 0.9825172132169212, + "learning_rate": 2.5412327272449684e-08, + "loss": 0.7319198846817017, + "step": 8088 + }, + { + "epoch": 1.8638248847926269, + "grad_norm": 1.0689400870779366, + "learning_rate": 2.532705518779854e-08, + "loss": 0.6450645923614502, + "step": 8089 + }, + { + "epoch": 1.8640552995391704, + "grad_norm": 1.1783740361717576, + "learning_rate": 2.52419245756349e-08, + "loss": 0.7213672399520874, + "step": 8090 + }, + { + "epoch": 1.8642857142857143, + "grad_norm": 1.3483335750734096, + "learning_rate": 2.515693544831554e-08, + "loss": 0.790163516998291, + "step": 8091 + }, + { + "epoch": 1.864516129032258, + "grad_norm": 1.2871905619529331, + "learning_rate": 2.507208781817638e-08, + "loss": 0.8324074745178223, + "step": 8092 + }, + { + "epoch": 1.8647465437788018, + "grad_norm": 1.4095960145667545, + "learning_rate": 2.4987381697533227e-08, + "loss": 0.879224419593811, + "step": 8093 + }, + { + "epoch": 1.8649769585253457, + "grad_norm": 1.4121148041878757, + "learning_rate": 2.4902817098680807e-08, + "loss": 0.8668204545974731, + "step": 8094 + }, + { + "epoch": 1.8652073732718892, + "grad_norm": 1.1605042845973315, + "learning_rate": 2.481839403389341e-08, + "loss": 0.6737711429595947, + "step": 8095 + }, + { + "epoch": 1.8654377880184332, + "grad_norm": 1.3482506919608122, + "learning_rate": 2.4734112515425343e-08, + "loss": 0.8948237299919128, + "step": 8096 + }, + { + "epoch": 1.865668202764977, + "grad_norm": 1.2927456093148797, + "learning_rate": 2.4649972555509823e-08, + "loss": 0.6866592168807983, + "step": 8097 + }, + { + "epoch": 1.8658986175115206, + "grad_norm": 1.2040358944727056, + "learning_rate": 2.4565974166359416e-08, + "loss": 0.8852076530456543, + "step": 8098 + }, + { + "epoch": 1.8661290322580646, + "grad_norm": 1.1474664367024714, + "learning_rate": 2.44821173601667e-08, + "loss": 0.7402448654174805, + "step": 8099 + }, + { + "epoch": 1.8663594470046083, + "grad_norm": 1.299234544884085, + "learning_rate": 2.439840214910316e-08, + "loss": 0.8536320924758911, + "step": 8100 + }, + { + "epoch": 1.866589861751152, + "grad_norm": 1.1550631938568499, + "learning_rate": 2.4314828545319965e-08, + "loss": 0.6408628225326538, + "step": 8101 + }, + { + "epoch": 1.866820276497696, + "grad_norm": 1.188548223378954, + "learning_rate": 2.4231396560947858e-08, + "loss": 0.9578930735588074, + "step": 8102 + }, + { + "epoch": 1.8670506912442395, + "grad_norm": 1.8289817367376688, + "learning_rate": 2.4148106208096708e-08, + "loss": 0.7606109976768494, + "step": 8103 + }, + { + "epoch": 1.8672811059907835, + "grad_norm": 0.9826738512020193, + "learning_rate": 2.4064957498856177e-08, + "loss": 0.7446529865264893, + "step": 8104 + }, + { + "epoch": 1.8675115207373272, + "grad_norm": 1.0744366993530696, + "learning_rate": 2.398195044529505e-08, + "loss": 0.6086497902870178, + "step": 8105 + }, + { + "epoch": 1.867741935483871, + "grad_norm": 1.5561440229209103, + "learning_rate": 2.389908505946181e-08, + "loss": 0.9348995685577393, + "step": 8106 + }, + { + "epoch": 1.8679723502304149, + "grad_norm": 1.1497120508700005, + "learning_rate": 2.381636135338405e-08, + "loss": 0.6817007660865784, + "step": 8107 + }, + { + "epoch": 1.8682027649769584, + "grad_norm": 1.0815805532535518, + "learning_rate": 2.373377933906917e-08, + "loss": 0.7228778600692749, + "step": 8108 + }, + { + "epoch": 1.8684331797235023, + "grad_norm": 1.2824972753864794, + "learning_rate": 2.3651339028503913e-08, + "loss": 0.6974154114723206, + "step": 8109 + }, + { + "epoch": 1.868663594470046, + "grad_norm": 1.2746687740486187, + "learning_rate": 2.3569040433654264e-08, + "loss": 0.8025680780410767, + "step": 8110 + }, + { + "epoch": 1.8688940092165898, + "grad_norm": 1.0439186994105132, + "learning_rate": 2.3486883566465777e-08, + "loss": 0.7570391893386841, + "step": 8111 + }, + { + "epoch": 1.8691244239631337, + "grad_norm": 1.1353343636911755, + "learning_rate": 2.3404868438863246e-08, + "loss": 0.7982438802719116, + "step": 8112 + }, + { + "epoch": 1.8693548387096774, + "grad_norm": 0.948053216671403, + "learning_rate": 2.3322995062751372e-08, + "loss": 0.6615588665008545, + "step": 8113 + }, + { + "epoch": 1.8695852534562212, + "grad_norm": 1.1794145616088556, + "learning_rate": 2.324126345001376e-08, + "loss": 0.7748852968215942, + "step": 8114 + }, + { + "epoch": 1.8698156682027651, + "grad_norm": 1.146675047414541, + "learning_rate": 2.3159673612513587e-08, + "loss": 0.7238468527793884, + "step": 8115 + }, + { + "epoch": 1.8700460829493086, + "grad_norm": 1.2843830020573481, + "learning_rate": 2.3078225562093822e-08, + "loss": 0.8146705627441406, + "step": 8116 + }, + { + "epoch": 1.8702764976958526, + "grad_norm": 1.0747488287412188, + "learning_rate": 2.2996919310576235e-08, + "loss": 0.8393594026565552, + "step": 8117 + }, + { + "epoch": 1.8705069124423963, + "grad_norm": 1.6346887094004536, + "learning_rate": 2.2915754869762384e-08, + "loss": 0.9619652032852173, + "step": 8118 + }, + { + "epoch": 1.87073732718894, + "grad_norm": 1.6641290836048537, + "learning_rate": 2.2834732251433286e-08, + "loss": 0.8301321268081665, + "step": 8119 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 1.2687107297135523, + "learning_rate": 2.2753851467349206e-08, + "loss": 0.8236079812049866, + "step": 8120 + }, + { + "epoch": 1.8711981566820275, + "grad_norm": 1.430457986003777, + "learning_rate": 2.267311252924975e-08, + "loss": 0.9007565379142761, + "step": 8121 + }, + { + "epoch": 1.8714285714285714, + "grad_norm": 1.1827948115854126, + "learning_rate": 2.2592515448854432e-08, + "loss": 0.7430707216262817, + "step": 8122 + }, + { + "epoch": 1.8716589861751152, + "grad_norm": 1.17432989990484, + "learning_rate": 2.2512060237861452e-08, + "loss": 0.7562465667724609, + "step": 8123 + }, + { + "epoch": 1.871889400921659, + "grad_norm": 1.1839994711227122, + "learning_rate": 2.24317469079488e-08, + "loss": 0.7736096978187561, + "step": 8124 + }, + { + "epoch": 1.8721198156682028, + "grad_norm": 1.1809968020267403, + "learning_rate": 2.2351575470774153e-08, + "loss": 0.7652724981307983, + "step": 8125 + }, + { + "epoch": 1.8723502304147466, + "grad_norm": 1.4664554269524215, + "learning_rate": 2.2271545937973978e-08, + "loss": 0.8034792542457581, + "step": 8126 + }, + { + "epoch": 1.8725806451612903, + "grad_norm": 1.2107856133228136, + "learning_rate": 2.219165832116454e-08, + "loss": 0.6158101558685303, + "step": 8127 + }, + { + "epoch": 1.8728110599078343, + "grad_norm": 1.1984460742665393, + "learning_rate": 2.2111912631941564e-08, + "loss": 0.6514682769775391, + "step": 8128 + }, + { + "epoch": 1.8730414746543778, + "grad_norm": 1.1090676234846621, + "learning_rate": 2.203230888187979e-08, + "loss": 0.833041787147522, + "step": 8129 + }, + { + "epoch": 1.8732718894009217, + "grad_norm": 1.3944148742352294, + "learning_rate": 2.1952847082533864e-08, + "loss": 0.8033208250999451, + "step": 8130 + }, + { + "epoch": 1.8735023041474654, + "grad_norm": 1.2067904980609332, + "learning_rate": 2.187352724543734e-08, + "loss": 0.742051362991333, + "step": 8131 + }, + { + "epoch": 1.8737327188940092, + "grad_norm": 1.2058964422107643, + "learning_rate": 2.1794349382103337e-08, + "loss": 0.7411169409751892, + "step": 8132 + }, + { + "epoch": 1.8739631336405531, + "grad_norm": 1.3201479261882787, + "learning_rate": 2.171531350402467e-08, + "loss": 0.7517165541648865, + "step": 8133 + }, + { + "epoch": 1.8741935483870966, + "grad_norm": 1.2371172479380752, + "learning_rate": 2.1636419622673263e-08, + "loss": 0.8010021448135376, + "step": 8134 + }, + { + "epoch": 1.8744239631336406, + "grad_norm": 1.2501522956166489, + "learning_rate": 2.1557667749500187e-08, + "loss": 0.7265241742134094, + "step": 8135 + }, + { + "epoch": 1.8746543778801843, + "grad_norm": 1.191380870353666, + "learning_rate": 2.1479057895936403e-08, + "loss": 0.6809227466583252, + "step": 8136 + }, + { + "epoch": 1.874884792626728, + "grad_norm": 1.2737037893770147, + "learning_rate": 2.140059007339201e-08, + "loss": 0.8235769271850586, + "step": 8137 + }, + { + "epoch": 1.875115207373272, + "grad_norm": 1.1356268338575812, + "learning_rate": 2.132226429325634e-08, + "loss": 0.7556289434432983, + "step": 8138 + }, + { + "epoch": 1.8753456221198157, + "grad_norm": 1.257264783564694, + "learning_rate": 2.1244080566898638e-08, + "loss": 0.7765048742294312, + "step": 8139 + }, + { + "epoch": 1.8755760368663594, + "grad_norm": 1.1776465139256578, + "learning_rate": 2.1166038905666816e-08, + "loss": 0.7637666463851929, + "step": 8140 + }, + { + "epoch": 1.8758064516129034, + "grad_norm": 1.2471130614608452, + "learning_rate": 2.10881393208886e-08, + "loss": 0.8413453698158264, + "step": 8141 + }, + { + "epoch": 1.8760368663594469, + "grad_norm": 1.443351972543058, + "learning_rate": 2.101038182387105e-08, + "loss": 0.7937475442886353, + "step": 8142 + }, + { + "epoch": 1.8762672811059908, + "grad_norm": 1.1772607773578063, + "learning_rate": 2.0932766425900585e-08, + "loss": 0.7654982805252075, + "step": 8143 + }, + { + "epoch": 1.8764976958525346, + "grad_norm": 1.53397176108589, + "learning_rate": 2.0855293138242968e-08, + "loss": 0.8950663805007935, + "step": 8144 + }, + { + "epoch": 1.8767281105990783, + "grad_norm": 1.250929142335872, + "learning_rate": 2.077796197214332e-08, + "loss": 0.6405420303344727, + "step": 8145 + }, + { + "epoch": 1.8769585253456222, + "grad_norm": 1.085136655013558, + "learning_rate": 2.0700772938826217e-08, + "loss": 0.7724314332008362, + "step": 8146 + }, + { + "epoch": 1.8771889400921657, + "grad_norm": 1.09160242748488, + "learning_rate": 2.0623726049495472e-08, + "loss": 0.7929061651229858, + "step": 8147 + }, + { + "epoch": 1.8774193548387097, + "grad_norm": 1.0975195498555617, + "learning_rate": 2.0546821315334363e-08, + "loss": 0.7207096815109253, + "step": 8148 + }, + { + "epoch": 1.8776497695852534, + "grad_norm": 1.347240880442127, + "learning_rate": 2.0470058747505513e-08, + "loss": 0.9234127402305603, + "step": 8149 + }, + { + "epoch": 1.8778801843317972, + "grad_norm": 1.2189429089634525, + "learning_rate": 2.0393438357150906e-08, + "loss": 0.9006322026252747, + "step": 8150 + }, + { + "epoch": 1.878110599078341, + "grad_norm": 0.9863507376975118, + "learning_rate": 2.0316960155391972e-08, + "loss": 0.6289799809455872, + "step": 8151 + }, + { + "epoch": 1.8783410138248848, + "grad_norm": 1.117182475586666, + "learning_rate": 2.0240624153329168e-08, + "loss": 0.8551793098449707, + "step": 8152 + }, + { + "epoch": 1.8785714285714286, + "grad_norm": 1.1253834649892556, + "learning_rate": 2.016443036204285e-08, + "loss": 0.8065170645713806, + "step": 8153 + }, + { + "epoch": 1.8788018433179725, + "grad_norm": 1.0124272640628642, + "learning_rate": 2.0088378792592286e-08, + "loss": 0.6361274719238281, + "step": 8154 + }, + { + "epoch": 1.879032258064516, + "grad_norm": 1.3966308966349001, + "learning_rate": 2.0012469456016312e-08, + "loss": 0.8539700508117676, + "step": 8155 + }, + { + "epoch": 1.87926267281106, + "grad_norm": 1.380681857214056, + "learning_rate": 1.9936702363333115e-08, + "loss": 0.7424989938735962, + "step": 8156 + }, + { + "epoch": 1.8794930875576037, + "grad_norm": 1.0795560964001287, + "learning_rate": 1.9861077525540116e-08, + "loss": 0.5831520557403564, + "step": 8157 + }, + { + "epoch": 1.8797235023041474, + "grad_norm": 1.3034651332513367, + "learning_rate": 1.9785594953614093e-08, + "loss": 0.8080646991729736, + "step": 8158 + }, + { + "epoch": 1.8799539170506914, + "grad_norm": 1.3028494466110516, + "learning_rate": 1.9710254658511392e-08, + "loss": 0.8008537292480469, + "step": 8159 + }, + { + "epoch": 1.8801843317972349, + "grad_norm": 0.7838996508063781, + "learning_rate": 1.9635056651167492e-08, + "loss": 0.7317294478416443, + "step": 8160 + }, + { + "epoch": 1.8804147465437788, + "grad_norm": 1.240068145392807, + "learning_rate": 1.956000094249721e-08, + "loss": 0.803238034248352, + "step": 8161 + }, + { + "epoch": 1.8806451612903226, + "grad_norm": 1.1592302203633778, + "learning_rate": 1.948508754339506e-08, + "loss": 0.7202219367027283, + "step": 8162 + }, + { + "epoch": 1.8808755760368663, + "grad_norm": 1.3406292816176746, + "learning_rate": 1.9410316464734233e-08, + "loss": 0.7691160440444946, + "step": 8163 + }, + { + "epoch": 1.8811059907834102, + "grad_norm": 1.0898220168427848, + "learning_rate": 1.933568771736782e-08, + "loss": 0.7092962265014648, + "step": 8164 + }, + { + "epoch": 1.881336405529954, + "grad_norm": 1.3165421464208054, + "learning_rate": 1.9261201312128274e-08, + "loss": 0.819804310798645, + "step": 8165 + }, + { + "epoch": 1.8815668202764977, + "grad_norm": 1.2278633726487793, + "learning_rate": 1.918685725982694e-08, + "loss": 0.9127538204193115, + "step": 8166 + }, + { + "epoch": 1.8817972350230416, + "grad_norm": 1.198181344272901, + "learning_rate": 1.9112655571254855e-08, + "loss": 0.8023328185081482, + "step": 8167 + }, + { + "epoch": 1.8820276497695851, + "grad_norm": 1.1150363141436184, + "learning_rate": 1.903859625718218e-08, + "loss": 0.723065972328186, + "step": 8168 + }, + { + "epoch": 1.882258064516129, + "grad_norm": 1.329775802249569, + "learning_rate": 1.896467932835877e-08, + "loss": 0.7838670611381531, + "step": 8169 + }, + { + "epoch": 1.8824884792626728, + "grad_norm": 1.0221481880663403, + "learning_rate": 1.8890904795513475e-08, + "loss": 0.6029871702194214, + "step": 8170 + }, + { + "epoch": 1.8827188940092165, + "grad_norm": 1.1179619592038208, + "learning_rate": 1.8817272669354512e-08, + "loss": 0.7622933387756348, + "step": 8171 + }, + { + "epoch": 1.8829493087557605, + "grad_norm": 1.3471730261003036, + "learning_rate": 1.8743782960569444e-08, + "loss": 0.7702913284301758, + "step": 8172 + }, + { + "epoch": 1.883179723502304, + "grad_norm": 1.1115192812221177, + "learning_rate": 1.867043567982518e-08, + "loss": 0.6385080814361572, + "step": 8173 + }, + { + "epoch": 1.883410138248848, + "grad_norm": 1.1957117872616694, + "learning_rate": 1.8597230837768208e-08, + "loss": 0.6886409521102905, + "step": 8174 + }, + { + "epoch": 1.8836405529953917, + "grad_norm": 1.2615274538141057, + "learning_rate": 1.8524168445023803e-08, + "loss": 0.7697125673294067, + "step": 8175 + }, + { + "epoch": 1.8838709677419354, + "grad_norm": 1.2703572064059772, + "learning_rate": 1.8451248512197148e-08, + "loss": 0.7942332029342651, + "step": 8176 + }, + { + "epoch": 1.8841013824884794, + "grad_norm": 1.2486681210000266, + "learning_rate": 1.8378471049872445e-08, + "loss": 0.7751410007476807, + "step": 8177 + }, + { + "epoch": 1.884331797235023, + "grad_norm": 1.4135289386452112, + "learning_rate": 1.8305836068613023e-08, + "loss": 0.8650992512702942, + "step": 8178 + }, + { + "epoch": 1.8845622119815668, + "grad_norm": 1.255590367160678, + "learning_rate": 1.8233343578962e-08, + "loss": 0.7084495425224304, + "step": 8179 + }, + { + "epoch": 1.8847926267281108, + "grad_norm": 1.2065933395861381, + "learning_rate": 1.8160993591441408e-08, + "loss": 0.7428494691848755, + "step": 8180 + }, + { + "epoch": 1.8850230414746543, + "grad_norm": 1.2721568643853003, + "learning_rate": 1.8088786116552844e-08, + "loss": 0.7431809902191162, + "step": 8181 + }, + { + "epoch": 1.8852534562211982, + "grad_norm": 1.5234831289492186, + "learning_rate": 1.801672116477715e-08, + "loss": 0.8312518000602722, + "step": 8182 + }, + { + "epoch": 1.885483870967742, + "grad_norm": 1.412977003038852, + "learning_rate": 1.7944798746574285e-08, + "loss": 0.8574832081794739, + "step": 8183 + }, + { + "epoch": 1.8857142857142857, + "grad_norm": 1.209006694724365, + "learning_rate": 1.7873018872383793e-08, + "loss": 0.7716966867446899, + "step": 8184 + }, + { + "epoch": 1.8859447004608296, + "grad_norm": 1.1984291768693995, + "learning_rate": 1.780138155262456e-08, + "loss": 0.8536000847816467, + "step": 8185 + }, + { + "epoch": 1.8861751152073731, + "grad_norm": 1.4411910829910872, + "learning_rate": 1.7729886797694606e-08, + "loss": 0.6559889316558838, + "step": 8186 + }, + { + "epoch": 1.886405529953917, + "grad_norm": 1.4146541158068258, + "learning_rate": 1.7658534617971065e-08, + "loss": 0.7371512651443481, + "step": 8187 + }, + { + "epoch": 1.8866359447004608, + "grad_norm": 1.5920989952321163, + "learning_rate": 1.7587325023810773e-08, + "loss": 0.8092008829116821, + "step": 8188 + }, + { + "epoch": 1.8868663594470045, + "grad_norm": 1.1485577131831675, + "learning_rate": 1.751625802554979e-08, + "loss": 0.7793067693710327, + "step": 8189 + }, + { + "epoch": 1.8870967741935485, + "grad_norm": 1.3107398360408737, + "learning_rate": 1.7445333633503312e-08, + "loss": 0.8102752566337585, + "step": 8190 + }, + { + "epoch": 1.8873271889400922, + "grad_norm": 0.9411355693415201, + "learning_rate": 1.737455185796588e-08, + "loss": 0.7141490578651428, + "step": 8191 + }, + { + "epoch": 1.887557603686636, + "grad_norm": 1.3771499753857814, + "learning_rate": 1.7303912709211497e-08, + "loss": 0.8010870218276978, + "step": 8192 + }, + { + "epoch": 1.8877880184331797, + "grad_norm": 1.0040229371574219, + "learning_rate": 1.723341619749319e-08, + "loss": 0.7945431470870972, + "step": 8193 + }, + { + "epoch": 1.8880184331797234, + "grad_norm": 1.5084700431378903, + "learning_rate": 1.7163062333043544e-08, + "loss": 0.765398383140564, + "step": 8194 + }, + { + "epoch": 1.8882488479262673, + "grad_norm": 1.141763186710756, + "learning_rate": 1.709285112607428e-08, + "loss": 0.8645910024642944, + "step": 8195 + }, + { + "epoch": 1.888479262672811, + "grad_norm": 1.4294051802947438, + "learning_rate": 1.7022782586776363e-08, + "loss": 0.7650351524353027, + "step": 8196 + }, + { + "epoch": 1.8887096774193548, + "grad_norm": 1.148441042244908, + "learning_rate": 1.695285672532043e-08, + "loss": 0.8059902191162109, + "step": 8197 + }, + { + "epoch": 1.8889400921658988, + "grad_norm": 1.3019488561633756, + "learning_rate": 1.688307355185592e-08, + "loss": 0.8389305472373962, + "step": 8198 + }, + { + "epoch": 1.8891705069124423, + "grad_norm": 1.3363862822981094, + "learning_rate": 1.681343307651173e-08, + "loss": 0.755578875541687, + "step": 8199 + }, + { + "epoch": 1.8894009216589862, + "grad_norm": 1.2754809499843205, + "learning_rate": 1.6743935309396218e-08, + "loss": 0.822825014591217, + "step": 8200 + }, + { + "epoch": 1.88963133640553, + "grad_norm": 1.2571266177044025, + "learning_rate": 1.667458026059676e-08, + "loss": 0.8229342699050903, + "step": 8201 + }, + { + "epoch": 1.8898617511520737, + "grad_norm": 1.3086181916191966, + "learning_rate": 1.6605367940180303e-08, + "loss": 0.7142254114151001, + "step": 8202 + }, + { + "epoch": 1.8900921658986176, + "grad_norm": 1.1722391698259569, + "learning_rate": 1.6536298358192812e-08, + "loss": 0.8904600739479065, + "step": 8203 + }, + { + "epoch": 1.8903225806451613, + "grad_norm": 1.151403763105922, + "learning_rate": 1.6467371524659603e-08, + "loss": 0.8758517503738403, + "step": 8204 + }, + { + "epoch": 1.890552995391705, + "grad_norm": 1.3083947750625244, + "learning_rate": 1.6398587449585555e-08, + "loss": 0.7609111666679382, + "step": 8205 + }, + { + "epoch": 1.8907834101382488, + "grad_norm": 0.9406449994318669, + "learning_rate": 1.6329946142954353e-08, + "loss": 0.8177064657211304, + "step": 8206 + }, + { + "epoch": 1.8910138248847925, + "grad_norm": 1.1366142550146048, + "learning_rate": 1.626144761472925e-08, + "loss": 0.6342105865478516, + "step": 8207 + }, + { + "epoch": 1.8912442396313365, + "grad_norm": 0.8903675484312013, + "learning_rate": 1.6193091874852627e-08, + "loss": 0.6025499105453491, + "step": 8208 + }, + { + "epoch": 1.8914746543778802, + "grad_norm": 1.3017839387858507, + "learning_rate": 1.6124878933246543e-08, + "loss": 0.78373783826828, + "step": 8209 + }, + { + "epoch": 1.891705069124424, + "grad_norm": 1.336095893979754, + "learning_rate": 1.605680879981164e-08, + "loss": 0.8072086572647095, + "step": 8210 + }, + { + "epoch": 1.8919354838709679, + "grad_norm": 1.5597980072939257, + "learning_rate": 1.5988881484428453e-08, + "loss": 0.9057372212409973, + "step": 8211 + }, + { + "epoch": 1.8921658986175114, + "grad_norm": 1.2099616448625954, + "learning_rate": 1.592109699695643e-08, + "loss": 0.8235929012298584, + "step": 8212 + }, + { + "epoch": 1.8923963133640553, + "grad_norm": 1.2417707847492958, + "learning_rate": 1.5853455347234366e-08, + "loss": 0.6610825061798096, + "step": 8213 + }, + { + "epoch": 1.892626728110599, + "grad_norm": 1.4158986087253451, + "learning_rate": 1.5785956545080415e-08, + "loss": 0.7152366638183594, + "step": 8214 + }, + { + "epoch": 1.8928571428571428, + "grad_norm": 1.330885873092923, + "learning_rate": 1.5718600600292066e-08, + "loss": 0.7971903085708618, + "step": 8215 + }, + { + "epoch": 1.8930875576036867, + "grad_norm": 1.226467557812747, + "learning_rate": 1.565138752264572e-08, + "loss": 0.7639449238777161, + "step": 8216 + }, + { + "epoch": 1.8933179723502302, + "grad_norm": 1.0517976072639703, + "learning_rate": 1.5584317321897356e-08, + "loss": 0.6396117806434631, + "step": 8217 + }, + { + "epoch": 1.8935483870967742, + "grad_norm": 1.328962567982178, + "learning_rate": 1.5517390007782183e-08, + "loss": 0.790566086769104, + "step": 8218 + }, + { + "epoch": 1.893778801843318, + "grad_norm": 1.6769404862380202, + "learning_rate": 1.5450605590014544e-08, + "loss": 0.7948310971260071, + "step": 8219 + }, + { + "epoch": 1.8940092165898617, + "grad_norm": 1.2378052027269906, + "learning_rate": 1.5383964078288124e-08, + "loss": 0.9425654411315918, + "step": 8220 + }, + { + "epoch": 1.8942396313364056, + "grad_norm": 1.2441112834124675, + "learning_rate": 1.531746548227586e-08, + "loss": 0.8001678586006165, + "step": 8221 + }, + { + "epoch": 1.8944700460829493, + "grad_norm": 0.9072642646135723, + "learning_rate": 1.5251109811629915e-08, + "loss": 0.6636781692504883, + "step": 8222 + }, + { + "epoch": 1.894700460829493, + "grad_norm": 1.0313464437335311, + "learning_rate": 1.5184897075981807e-08, + "loss": 0.7884416580200195, + "step": 8223 + }, + { + "epoch": 1.894930875576037, + "grad_norm": 1.0907885139753422, + "learning_rate": 1.511882728494218e-08, + "loss": 0.6888208389282227, + "step": 8224 + }, + { + "epoch": 1.8951612903225805, + "grad_norm": 1.3461823033287323, + "learning_rate": 1.5052900448100815e-08, + "loss": 0.7253614664077759, + "step": 8225 + }, + { + "epoch": 1.8953917050691245, + "grad_norm": 1.2272377599078015, + "learning_rate": 1.498711657502716e-08, + "loss": 0.7865983843803406, + "step": 8226 + }, + { + "epoch": 1.8956221198156682, + "grad_norm": 1.4908955714231082, + "learning_rate": 1.492147567526947e-08, + "loss": 0.8778063654899597, + "step": 8227 + }, + { + "epoch": 1.895852534562212, + "grad_norm": 1.2263224402103408, + "learning_rate": 1.4855977758355675e-08, + "loss": 0.7812581062316895, + "step": 8228 + }, + { + "epoch": 1.8960829493087559, + "grad_norm": 1.2890011409819144, + "learning_rate": 1.4790622833792287e-08, + "loss": 0.7160226106643677, + "step": 8229 + }, + { + "epoch": 1.8963133640552994, + "grad_norm": 1.1613199880989007, + "learning_rate": 1.472541091106594e-08, + "loss": 0.8187412619590759, + "step": 8230 + }, + { + "epoch": 1.8965437788018433, + "grad_norm": 1.1653251647412382, + "learning_rate": 1.4660341999641834e-08, + "loss": 0.7517846822738647, + "step": 8231 + }, + { + "epoch": 1.896774193548387, + "grad_norm": 1.3673338656755198, + "learning_rate": 1.4595416108964753e-08, + "loss": 0.9230127334594727, + "step": 8232 + }, + { + "epoch": 1.8970046082949308, + "grad_norm": 1.228175308993719, + "learning_rate": 1.4530633248458269e-08, + "loss": 0.6803582906723022, + "step": 8233 + }, + { + "epoch": 1.8972350230414747, + "grad_norm": 1.2890219242119376, + "learning_rate": 1.4465993427525968e-08, + "loss": 0.8444511294364929, + "step": 8234 + }, + { + "epoch": 1.8974654377880185, + "grad_norm": 1.4479761110450609, + "learning_rate": 1.4401496655550016e-08, + "loss": 0.7622519731521606, + "step": 8235 + }, + { + "epoch": 1.8976958525345622, + "grad_norm": 1.20875065982799, + "learning_rate": 1.4337142941892033e-08, + "loss": 0.687129020690918, + "step": 8236 + }, + { + "epoch": 1.8979262672811061, + "grad_norm": 1.1827775538431895, + "learning_rate": 1.4272932295892992e-08, + "loss": 0.6421219110488892, + "step": 8237 + }, + { + "epoch": 1.8981566820276496, + "grad_norm": 1.2669401147896007, + "learning_rate": 1.4208864726872772e-08, + "loss": 0.7829388380050659, + "step": 8238 + }, + { + "epoch": 1.8983870967741936, + "grad_norm": 1.3482974956529734, + "learning_rate": 1.4144940244130821e-08, + "loss": 0.7754424810409546, + "step": 8239 + }, + { + "epoch": 1.8986175115207373, + "grad_norm": 1.1130898544931584, + "learning_rate": 1.4081158856945719e-08, + "loss": 0.6544859409332275, + "step": 8240 + }, + { + "epoch": 1.898847926267281, + "grad_norm": 1.0822240775455856, + "learning_rate": 1.4017520574575282e-08, + "loss": 0.8020427227020264, + "step": 8241 + }, + { + "epoch": 1.899078341013825, + "grad_norm": 1.1350657169907092, + "learning_rate": 1.3954025406256343e-08, + "loss": 0.7343212366104126, + "step": 8242 + }, + { + "epoch": 1.8993087557603685, + "grad_norm": 1.2792336145941459, + "learning_rate": 1.3890673361205418e-08, + "loss": 0.7643232345581055, + "step": 8243 + }, + { + "epoch": 1.8995391705069125, + "grad_norm": 1.212662168320899, + "learning_rate": 1.3827464448617709e-08, + "loss": 0.7806165814399719, + "step": 8244 + }, + { + "epoch": 1.8997695852534562, + "grad_norm": 1.6104194734157218, + "learning_rate": 1.3764398677667988e-08, + "loss": 0.8533280491828918, + "step": 8245 + }, + { + "epoch": 1.9, + "grad_norm": 1.1289941083869026, + "learning_rate": 1.3701476057510264e-08, + "loss": 0.773565411567688, + "step": 8246 + }, + { + "epoch": 1.9002304147465439, + "grad_norm": 1.1091300492504157, + "learning_rate": 1.3638696597277677e-08, + "loss": 0.7752503752708435, + "step": 8247 + }, + { + "epoch": 1.9004608294930876, + "grad_norm": 0.9880656776459645, + "learning_rate": 1.3576060306082383e-08, + "loss": 0.7466747760772705, + "step": 8248 + }, + { + "epoch": 1.9006912442396313, + "grad_norm": 1.2177337280417093, + "learning_rate": 1.3513567193016106e-08, + "loss": 0.8103033304214478, + "step": 8249 + }, + { + "epoch": 1.9009216589861753, + "grad_norm": 1.0248826665714235, + "learning_rate": 1.3451217267149595e-08, + "loss": 0.6501287817955017, + "step": 8250 + }, + { + "epoch": 1.9011520737327188, + "grad_norm": 1.210107770730306, + "learning_rate": 1.3389010537532941e-08, + "loss": 0.7329230308532715, + "step": 8251 + }, + { + "epoch": 1.9013824884792627, + "grad_norm": 1.3978474783131303, + "learning_rate": 1.3326947013195255e-08, + "loss": 0.8413917422294617, + "step": 8252 + }, + { + "epoch": 1.9016129032258065, + "grad_norm": 1.4081927433558092, + "learning_rate": 1.3265026703144999e-08, + "loss": 0.7283090353012085, + "step": 8253 + }, + { + "epoch": 1.9018433179723502, + "grad_norm": 1.2553133709092965, + "learning_rate": 1.3203249616369872e-08, + "loss": 0.8378126621246338, + "step": 8254 + }, + { + "epoch": 1.9020737327188941, + "grad_norm": 1.099276496142028, + "learning_rate": 1.3141615761836811e-08, + "loss": 0.7675777673721313, + "step": 8255 + }, + { + "epoch": 1.9023041474654376, + "grad_norm": 1.6916159414604328, + "learning_rate": 1.308012514849155e-08, + "loss": 0.6448104381561279, + "step": 8256 + }, + { + "epoch": 1.9025345622119816, + "grad_norm": 1.3264486635424506, + "learning_rate": 1.3018777785259838e-08, + "loss": 0.8024395704269409, + "step": 8257 + }, + { + "epoch": 1.9027649769585253, + "grad_norm": 1.1900370575281645, + "learning_rate": 1.2957573681045887e-08, + "loss": 0.8159325122833252, + "step": 8258 + }, + { + "epoch": 1.902995391705069, + "grad_norm": 1.1100937535082447, + "learning_rate": 1.2896512844733365e-08, + "loss": 0.7916233539581299, + "step": 8259 + }, + { + "epoch": 1.903225806451613, + "grad_norm": 1.2408177778484295, + "learning_rate": 1.2835595285185296e-08, + "loss": 0.798140823841095, + "step": 8260 + }, + { + "epoch": 1.9034562211981567, + "grad_norm": 1.2142666252173266, + "learning_rate": 1.277482101124383e-08, + "loss": 0.7881651520729065, + "step": 8261 + }, + { + "epoch": 1.9036866359447004, + "grad_norm": 1.3615775077613546, + "learning_rate": 1.2714190031730021e-08, + "loss": 0.7023189663887024, + "step": 8262 + }, + { + "epoch": 1.9039170506912444, + "grad_norm": 1.2537620544817238, + "learning_rate": 1.2653702355444606e-08, + "loss": 0.8286309242248535, + "step": 8263 + }, + { + "epoch": 1.904147465437788, + "grad_norm": 1.4181409914325045, + "learning_rate": 1.259335799116723e-08, + "loss": 0.7626973986625671, + "step": 8264 + }, + { + "epoch": 1.9043778801843319, + "grad_norm": 1.7640804361655256, + "learning_rate": 1.2533156947656665e-08, + "loss": 1.0350267887115479, + "step": 8265 + }, + { + "epoch": 1.9046082949308756, + "grad_norm": 1.0808972871053977, + "learning_rate": 1.2473099233651251e-08, + "loss": 0.6378228664398193, + "step": 8266 + }, + { + "epoch": 1.9048387096774193, + "grad_norm": 1.1012549826430145, + "learning_rate": 1.2413184857868241e-08, + "loss": 0.8265732526779175, + "step": 8267 + }, + { + "epoch": 1.9050691244239633, + "grad_norm": 1.102740322591124, + "learning_rate": 1.23534138290039e-08, + "loss": 0.8545348644256592, + "step": 8268 + }, + { + "epoch": 1.9052995391705068, + "grad_norm": 1.1667419775790697, + "learning_rate": 1.2293786155734176e-08, + "loss": 0.660080075263977, + "step": 8269 + }, + { + "epoch": 1.9055299539170507, + "grad_norm": 1.4258566183231558, + "learning_rate": 1.2234301846713813e-08, + "loss": 0.8409689664840698, + "step": 8270 + }, + { + "epoch": 1.9057603686635944, + "grad_norm": 1.3639053971310304, + "learning_rate": 1.2174960910576904e-08, + "loss": 0.8026434183120728, + "step": 8271 + }, + { + "epoch": 1.9059907834101382, + "grad_norm": 1.1477802786886386, + "learning_rate": 1.2115763355936671e-08, + "loss": 0.8315812945365906, + "step": 8272 + }, + { + "epoch": 1.9062211981566821, + "grad_norm": 1.1488868543504023, + "learning_rate": 1.2056709191385572e-08, + "loss": 0.7373194694519043, + "step": 8273 + }, + { + "epoch": 1.9064516129032258, + "grad_norm": 1.28219548502893, + "learning_rate": 1.1997798425495309e-08, + "loss": 0.7502317428588867, + "step": 8274 + }, + { + "epoch": 1.9066820276497696, + "grad_norm": 1.1940555150789485, + "learning_rate": 1.1939031066816707e-08, + "loss": 0.8208760023117065, + "step": 8275 + }, + { + "epoch": 1.9069124423963135, + "grad_norm": 1.2690336009694645, + "learning_rate": 1.188040712387961e-08, + "loss": 0.7584094405174255, + "step": 8276 + }, + { + "epoch": 1.907142857142857, + "grad_norm": 1.3136164329476003, + "learning_rate": 1.1821926605193433e-08, + "loss": 0.7776647210121155, + "step": 8277 + }, + { + "epoch": 1.907373271889401, + "grad_norm": 1.0778088332238458, + "learning_rate": 1.1763589519246387e-08, + "loss": 0.7739659547805786, + "step": 8278 + }, + { + "epoch": 1.9076036866359447, + "grad_norm": 1.3752880267959628, + "learning_rate": 1.170539587450603e-08, + "loss": 0.7276068925857544, + "step": 8279 + }, + { + "epoch": 1.9078341013824884, + "grad_norm": 1.1782987713077362, + "learning_rate": 1.1647345679419163e-08, + "loss": 0.624208927154541, + "step": 8280 + }, + { + "epoch": 1.9080645161290324, + "grad_norm": 1.0744404873031923, + "learning_rate": 1.1589438942411712e-08, + "loss": 0.7865229845046997, + "step": 8281 + }, + { + "epoch": 1.908294930875576, + "grad_norm": 1.1655122856650737, + "learning_rate": 1.1531675671888619e-08, + "loss": 0.8290715217590332, + "step": 8282 + }, + { + "epoch": 1.9085253456221198, + "grad_norm": 1.4733922787626827, + "learning_rate": 1.1474055876234289e-08, + "loss": 0.8750064969062805, + "step": 8283 + }, + { + "epoch": 1.9087557603686636, + "grad_norm": 1.0358743027064434, + "learning_rate": 1.1416579563812146e-08, + "loss": 0.7946900129318237, + "step": 8284 + }, + { + "epoch": 1.9089861751152073, + "grad_norm": 1.1260650941834194, + "learning_rate": 1.1359246742964623e-08, + "loss": 0.6673855781555176, + "step": 8285 + }, + { + "epoch": 1.9092165898617512, + "grad_norm": 1.5734371068415847, + "learning_rate": 1.1302057422013734e-08, + "loss": 0.8423609137535095, + "step": 8286 + }, + { + "epoch": 1.909447004608295, + "grad_norm": 1.1774099615686673, + "learning_rate": 1.124501160926039e-08, + "loss": 0.7583299279212952, + "step": 8287 + }, + { + "epoch": 1.9096774193548387, + "grad_norm": 1.3632188021099019, + "learning_rate": 1.1188109312984639e-08, + "loss": 0.8489730358123779, + "step": 8288 + }, + { + "epoch": 1.9099078341013827, + "grad_norm": 1.268317857067217, + "learning_rate": 1.1131350541445871e-08, + "loss": 0.7460636496543884, + "step": 8289 + }, + { + "epoch": 1.9101382488479262, + "grad_norm": 1.1951667787690143, + "learning_rate": 1.1074735302882387e-08, + "loss": 0.7310905456542969, + "step": 8290 + }, + { + "epoch": 1.91036866359447, + "grad_norm": 1.1692661015812214, + "learning_rate": 1.1018263605511946e-08, + "loss": 0.8411405086517334, + "step": 8291 + }, + { + "epoch": 1.9105990783410138, + "grad_norm": 1.12451343736832, + "learning_rate": 1.0961935457531323e-08, + "loss": 0.7980802059173584, + "step": 8292 + }, + { + "epoch": 1.9108294930875576, + "grad_norm": 1.2914760603674136, + "learning_rate": 1.0905750867116426e-08, + "loss": 0.779492974281311, + "step": 8293 + }, + { + "epoch": 1.9110599078341015, + "grad_norm": 1.0940139924335759, + "learning_rate": 1.0849709842422283e-08, + "loss": 0.7893733978271484, + "step": 8294 + }, + { + "epoch": 1.911290322580645, + "grad_norm": 1.367510888792546, + "learning_rate": 1.07938123915835e-08, + "loss": 0.8281872272491455, + "step": 8295 + }, + { + "epoch": 1.911520737327189, + "grad_norm": 1.3626141199750628, + "learning_rate": 1.0738058522713144e-08, + "loss": 0.721331775188446, + "step": 8296 + }, + { + "epoch": 1.9117511520737327, + "grad_norm": 0.9302233955509024, + "learning_rate": 1.0682448243904073e-08, + "loss": 0.6043491363525391, + "step": 8297 + }, + { + "epoch": 1.9119815668202764, + "grad_norm": 1.002380139729753, + "learning_rate": 1.0626981563227943e-08, + "loss": 0.7737481594085693, + "step": 8298 + }, + { + "epoch": 1.9122119815668204, + "grad_norm": 1.20563258082351, + "learning_rate": 1.0571658488735536e-08, + "loss": 0.771499514579773, + "step": 8299 + }, + { + "epoch": 1.912442396313364, + "grad_norm": 1.1334287395884057, + "learning_rate": 1.0516479028457204e-08, + "loss": 0.6711971759796143, + "step": 8300 + }, + { + "epoch": 1.9126728110599078, + "grad_norm": 1.1514161835446617, + "learning_rate": 1.0461443190402097e-08, + "loss": 0.691685140132904, + "step": 8301 + }, + { + "epoch": 1.9129032258064518, + "grad_norm": 1.0627327279898275, + "learning_rate": 1.0406550982558382e-08, + "loss": 0.7339159250259399, + "step": 8302 + }, + { + "epoch": 1.9131336405529953, + "grad_norm": 1.098827920572517, + "learning_rate": 1.0351802412893796e-08, + "loss": 0.7832008600234985, + "step": 8303 + }, + { + "epoch": 1.9133640552995392, + "grad_norm": 1.8976948304927823, + "learning_rate": 1.0297197489355092e-08, + "loss": 0.862671971321106, + "step": 8304 + }, + { + "epoch": 1.913594470046083, + "grad_norm": 1.2340137918284608, + "learning_rate": 1.0242736219867821e-08, + "loss": 0.6442357897758484, + "step": 8305 + }, + { + "epoch": 1.9138248847926267, + "grad_norm": 1.3262423414476558, + "learning_rate": 1.0188418612337102e-08, + "loss": 0.8777452707290649, + "step": 8306 + }, + { + "epoch": 1.9140552995391706, + "grad_norm": 1.2308393583128812, + "learning_rate": 1.0134244674647186e-08, + "loss": 0.7672470808029175, + "step": 8307 + }, + { + "epoch": 1.9142857142857141, + "grad_norm": 0.9277990008899878, + "learning_rate": 1.0080214414661226e-08, + "loss": 0.7338177561759949, + "step": 8308 + }, + { + "epoch": 1.914516129032258, + "grad_norm": 1.3815065909330264, + "learning_rate": 1.0026327840221727e-08, + "loss": 0.7546414136886597, + "step": 8309 + }, + { + "epoch": 1.9147465437788018, + "grad_norm": 1.0116807626508924, + "learning_rate": 9.972584959149988e-09, + "loss": 0.621455192565918, + "step": 8310 + }, + { + "epoch": 1.9149769585253456, + "grad_norm": 1.0385626369203964, + "learning_rate": 9.918985779247102e-09, + "loss": 0.7403131723403931, + "step": 8311 + }, + { + "epoch": 1.9152073732718895, + "grad_norm": 1.1027069898803628, + "learning_rate": 9.865530308292624e-09, + "loss": 0.7924279570579529, + "step": 8312 + }, + { + "epoch": 1.9154377880184332, + "grad_norm": 1.1362295208393791, + "learning_rate": 9.81221855404568e-09, + "loss": 0.8831228017807007, + "step": 8313 + }, + { + "epoch": 1.915668202764977, + "grad_norm": 1.1281945792188444, + "learning_rate": 9.759050524244417e-09, + "loss": 0.6786219477653503, + "step": 8314 + }, + { + "epoch": 1.9158986175115207, + "grad_norm": 1.2807157366480393, + "learning_rate": 9.70602622660599e-09, + "loss": 0.7311046123504639, + "step": 8315 + }, + { + "epoch": 1.9161290322580644, + "grad_norm": 1.3847340573145779, + "learning_rate": 9.653145668826912e-09, + "loss": 0.8914301991462708, + "step": 8316 + }, + { + "epoch": 1.9163594470046084, + "grad_norm": 1.4027670914288322, + "learning_rate": 9.600408858582709e-09, + "loss": 0.8144292831420898, + "step": 8317 + }, + { + "epoch": 1.916589861751152, + "grad_norm": 1.1077379444431534, + "learning_rate": 9.547815803528036e-09, + "loss": 0.6670823097229004, + "step": 8318 + }, + { + "epoch": 1.9168202764976958, + "grad_norm": 1.2434106495167774, + "learning_rate": 9.495366511296676e-09, + "loss": 0.6801552772521973, + "step": 8319 + }, + { + "epoch": 1.9170506912442398, + "grad_norm": 1.0098918722618904, + "learning_rate": 9.44306098950165e-09, + "loss": 0.8144240379333496, + "step": 8320 + }, + { + "epoch": 1.9172811059907833, + "grad_norm": 1.0515221920732627, + "learning_rate": 9.390899245734995e-09, + "loss": 0.6352888345718384, + "step": 8321 + }, + { + "epoch": 1.9175115207373272, + "grad_norm": 1.2296941092807456, + "learning_rate": 9.33888128756788e-09, + "loss": 0.7513711452484131, + "step": 8322 + }, + { + "epoch": 1.917741935483871, + "grad_norm": 1.4377668264686976, + "learning_rate": 9.287007122550705e-09, + "loss": 0.7699171304702759, + "step": 8323 + }, + { + "epoch": 1.9179723502304147, + "grad_norm": 1.591632209718944, + "learning_rate": 9.235276758212895e-09, + "loss": 0.8321002721786499, + "step": 8324 + }, + { + "epoch": 1.9182027649769586, + "grad_norm": 1.0453744404830132, + "learning_rate": 9.183690202062999e-09, + "loss": 0.6815298795700073, + "step": 8325 + }, + { + "epoch": 1.9184331797235024, + "grad_norm": 1.0030633247337575, + "learning_rate": 9.132247461588915e-09, + "loss": 0.7135178446769714, + "step": 8326 + }, + { + "epoch": 1.918663594470046, + "grad_norm": 1.3123190228023687, + "learning_rate": 9.080948544257338e-09, + "loss": 0.8452005982398987, + "step": 8327 + }, + { + "epoch": 1.9188940092165898, + "grad_norm": 1.1270879003396566, + "learning_rate": 9.029793457514312e-09, + "loss": 0.7449440956115723, + "step": 8328 + }, + { + "epoch": 1.9191244239631335, + "grad_norm": 1.2310904327231214, + "learning_rate": 8.978782208784897e-09, + "loss": 0.8172955513000488, + "step": 8329 + }, + { + "epoch": 1.9193548387096775, + "grad_norm": 1.0097624251077932, + "learning_rate": 8.92791480547317e-09, + "loss": 0.6682305335998535, + "step": 8330 + }, + { + "epoch": 1.9195852534562212, + "grad_norm": 1.1974701853493588, + "learning_rate": 8.877191254962779e-09, + "loss": 0.6874973773956299, + "step": 8331 + }, + { + "epoch": 1.919815668202765, + "grad_norm": 1.1728345166861331, + "learning_rate": 8.826611564615949e-09, + "loss": 0.8371694684028625, + "step": 8332 + }, + { + "epoch": 1.920046082949309, + "grad_norm": 1.1837626119929445, + "learning_rate": 8.77617574177425e-09, + "loss": 0.7147493362426758, + "step": 8333 + }, + { + "epoch": 1.9202764976958524, + "grad_norm": 1.2783488550083906, + "learning_rate": 8.725883793758382e-09, + "loss": 0.7444115877151489, + "step": 8334 + }, + { + "epoch": 1.9205069124423964, + "grad_norm": 1.3799268170287549, + "learning_rate": 8.675735727868283e-09, + "loss": 0.7772307395935059, + "step": 8335 + }, + { + "epoch": 1.92073732718894, + "grad_norm": 1.2730237375907167, + "learning_rate": 8.625731551382798e-09, + "loss": 0.702937126159668, + "step": 8336 + }, + { + "epoch": 1.9209677419354838, + "grad_norm": 1.316574939310684, + "learning_rate": 8.575871271559898e-09, + "loss": 0.7404709458351135, + "step": 8337 + }, + { + "epoch": 1.9211981566820278, + "grad_norm": 1.4216605594412726, + "learning_rate": 8.526154895636906e-09, + "loss": 0.7142058610916138, + "step": 8338 + }, + { + "epoch": 1.9214285714285713, + "grad_norm": 1.381037068322115, + "learning_rate": 8.476582430830048e-09, + "loss": 0.8950545191764832, + "step": 8339 + }, + { + "epoch": 1.9216589861751152, + "grad_norm": 1.2364573338693037, + "learning_rate": 8.42715388433446e-09, + "loss": 0.6939054131507874, + "step": 8340 + }, + { + "epoch": 1.921889400921659, + "grad_norm": 1.3248307922164142, + "learning_rate": 8.377869263324954e-09, + "loss": 0.7916324138641357, + "step": 8341 + }, + { + "epoch": 1.9221198156682027, + "grad_norm": 1.3092539218499513, + "learning_rate": 8.328728574954924e-09, + "loss": 0.8059754371643066, + "step": 8342 + }, + { + "epoch": 1.9223502304147466, + "grad_norm": 1.1195879983393067, + "learning_rate": 8.279731826357105e-09, + "loss": 0.650648295879364, + "step": 8343 + }, + { + "epoch": 1.9225806451612903, + "grad_norm": 0.9135397053997126, + "learning_rate": 8.230879024643478e-09, + "loss": 0.6912552118301392, + "step": 8344 + }, + { + "epoch": 1.922811059907834, + "grad_norm": 0.8588678436998939, + "learning_rate": 8.182170176904702e-09, + "loss": 0.7430927753448486, + "step": 8345 + }, + { + "epoch": 1.923041474654378, + "grad_norm": 1.1000327691208154, + "learning_rate": 8.133605290210898e-09, + "loss": 0.7550772428512573, + "step": 8346 + }, + { + "epoch": 1.9232718894009215, + "grad_norm": 1.1138393113278757, + "learning_rate": 8.08518437161132e-09, + "loss": 0.7235819101333618, + "step": 8347 + }, + { + "epoch": 1.9235023041474655, + "grad_norm": 1.085631464611088, + "learning_rate": 8.036907428134121e-09, + "loss": 0.790582537651062, + "step": 8348 + }, + { + "epoch": 1.9237327188940092, + "grad_norm": 1.2928878399763604, + "learning_rate": 7.988774466786585e-09, + "loss": 0.7350871562957764, + "step": 8349 + }, + { + "epoch": 1.923963133640553, + "grad_norm": 1.3980478677422172, + "learning_rate": 7.940785494555124e-09, + "loss": 0.86177659034729, + "step": 8350 + }, + { + "epoch": 1.9241935483870969, + "grad_norm": 1.196963381013611, + "learning_rate": 7.892940518405499e-09, + "loss": 0.8039232492446899, + "step": 8351 + }, + { + "epoch": 1.9244239631336404, + "grad_norm": 1.231295549355971, + "learning_rate": 7.845239545282046e-09, + "loss": 0.7130967378616333, + "step": 8352 + }, + { + "epoch": 1.9246543778801843, + "grad_norm": 1.0830506625128473, + "learning_rate": 7.797682582108667e-09, + "loss": 0.7297911047935486, + "step": 8353 + }, + { + "epoch": 1.924884792626728, + "grad_norm": 1.2576048144274934, + "learning_rate": 7.750269635788065e-09, + "loss": 0.7302875518798828, + "step": 8354 + }, + { + "epoch": 1.9251152073732718, + "grad_norm": 1.1228331103171292, + "learning_rate": 7.703000713202401e-09, + "loss": 0.7976555824279785, + "step": 8355 + }, + { + "epoch": 1.9253456221198157, + "grad_norm": 1.1181213613597878, + "learning_rate": 7.65587582121252e-09, + "loss": 0.6747829914093018, + "step": 8356 + }, + { + "epoch": 1.9255760368663595, + "grad_norm": 1.3086474559444063, + "learning_rate": 7.608894966658509e-09, + "loss": 0.7217142581939697, + "step": 8357 + }, + { + "epoch": 1.9258064516129032, + "grad_norm": 1.3893709396765357, + "learning_rate": 7.562058156359685e-09, + "loss": 0.8635888695716858, + "step": 8358 + }, + { + "epoch": 1.9260368663594472, + "grad_norm": 1.3318330118319255, + "learning_rate": 7.515365397114282e-09, + "loss": 0.8435994386672974, + "step": 8359 + }, + { + "epoch": 1.9262672811059907, + "grad_norm": 1.4490671236886896, + "learning_rate": 7.468816695699653e-09, + "loss": 0.8632286787033081, + "step": 8360 + }, + { + "epoch": 1.9264976958525346, + "grad_norm": 1.501498499241499, + "learning_rate": 7.422412058872396e-09, + "loss": 0.7916556596755981, + "step": 8361 + }, + { + "epoch": 1.9267281105990783, + "grad_norm": 1.1808854932681303, + "learning_rate": 7.376151493368121e-09, + "loss": 0.8307663202285767, + "step": 8362 + }, + { + "epoch": 1.926958525345622, + "grad_norm": 1.4156996026964064, + "learning_rate": 7.330035005901236e-09, + "loss": 0.9020388126373291, + "step": 8363 + }, + { + "epoch": 1.927188940092166, + "grad_norm": 1.222606934693838, + "learning_rate": 7.28406260316572e-09, + "loss": 0.7926114797592163, + "step": 8364 + }, + { + "epoch": 1.9274193548387095, + "grad_norm": 1.0417046174216056, + "learning_rate": 7.2382342918343446e-09, + "loss": 0.7609784603118896, + "step": 8365 + }, + { + "epoch": 1.9276497695852535, + "grad_norm": 1.3729827404737949, + "learning_rate": 7.192550078559012e-09, + "loss": 0.6010490655899048, + "step": 8366 + }, + { + "epoch": 1.9278801843317972, + "grad_norm": 1.495271329234438, + "learning_rate": 7.147009969970641e-09, + "loss": 0.8219606876373291, + "step": 8367 + }, + { + "epoch": 1.928110599078341, + "grad_norm": 1.207499145814505, + "learning_rate": 7.101613972679499e-09, + "loss": 0.8688151836395264, + "step": 8368 + }, + { + "epoch": 1.9283410138248849, + "grad_norm": 1.0608698410629562, + "learning_rate": 7.0563620932747595e-09, + "loss": 0.7654411792755127, + "step": 8369 + }, + { + "epoch": 1.9285714285714286, + "grad_norm": 1.0982841652537483, + "learning_rate": 7.01125433832439e-09, + "loss": 0.6878413558006287, + "step": 8370 + }, + { + "epoch": 1.9288018433179723, + "grad_norm": 1.0662803206592244, + "learning_rate": 6.966290714375933e-09, + "loss": 0.6703332662582397, + "step": 8371 + }, + { + "epoch": 1.9290322580645163, + "grad_norm": 1.1405585467491617, + "learning_rate": 6.921471227955833e-09, + "loss": 0.752200722694397, + "step": 8372 + }, + { + "epoch": 1.9292626728110598, + "grad_norm": 1.1122335677850106, + "learning_rate": 6.8767958855695526e-09, + "loss": 0.8107069730758667, + "step": 8373 + }, + { + "epoch": 1.9294930875576037, + "grad_norm": 1.4102834771954489, + "learning_rate": 6.832264693701573e-09, + "loss": 0.8816967010498047, + "step": 8374 + }, + { + "epoch": 1.9297235023041475, + "grad_norm": 1.2593635712728732, + "learning_rate": 6.78787765881561e-09, + "loss": 0.7889697551727295, + "step": 8375 + }, + { + "epoch": 1.9299539170506912, + "grad_norm": 1.2377942170623384, + "learning_rate": 6.743634787354291e-09, + "loss": 0.7218060493469238, + "step": 8376 + }, + { + "epoch": 1.9301843317972351, + "grad_norm": 1.2786458190631131, + "learning_rate": 6.699536085739588e-09, + "loss": 0.8061347007751465, + "step": 8377 + }, + { + "epoch": 1.9304147465437786, + "grad_norm": 1.0571211016932303, + "learning_rate": 6.655581560372159e-09, + "loss": 0.7320632934570312, + "step": 8378 + }, + { + "epoch": 1.9306451612903226, + "grad_norm": 1.2201688729332103, + "learning_rate": 6.611771217632123e-09, + "loss": 0.7039695978164673, + "step": 8379 + }, + { + "epoch": 1.9308755760368663, + "grad_norm": 1.0152325785443144, + "learning_rate": 6.568105063878393e-09, + "loss": 0.7056317925453186, + "step": 8380 + }, + { + "epoch": 1.93110599078341, + "grad_norm": 1.3442992098354511, + "learning_rate": 6.524583105449122e-09, + "loss": 0.9265607595443726, + "step": 8381 + }, + { + "epoch": 1.931336405529954, + "grad_norm": 0.9980232024455323, + "learning_rate": 6.481205348661367e-09, + "loss": 0.7249365448951721, + "step": 8382 + }, + { + "epoch": 1.9315668202764977, + "grad_norm": 1.0217670095742197, + "learning_rate": 6.4379717998114256e-09, + "loss": 0.8216372728347778, + "step": 8383 + }, + { + "epoch": 1.9317972350230415, + "grad_norm": 1.0731967820570871, + "learning_rate": 6.394882465174611e-09, + "loss": 0.6750606894493103, + "step": 8384 + }, + { + "epoch": 1.9320276497695854, + "grad_norm": 1.1382732221343326, + "learning_rate": 6.351937351005143e-09, + "loss": 0.8265045285224915, + "step": 8385 + }, + { + "epoch": 1.932258064516129, + "grad_norm": 1.2033626019579449, + "learning_rate": 6.309136463536591e-09, + "loss": 0.5992317795753479, + "step": 8386 + }, + { + "epoch": 1.9324884792626729, + "grad_norm": 1.026760102298627, + "learning_rate": 6.266479808981428e-09, + "loss": 0.6586567163467407, + "step": 8387 + }, + { + "epoch": 1.9327188940092166, + "grad_norm": 1.1335080912138158, + "learning_rate": 6.223967393531259e-09, + "loss": 0.7496415376663208, + "step": 8388 + }, + { + "epoch": 1.9329493087557603, + "grad_norm": 1.2743344602397095, + "learning_rate": 6.181599223356593e-09, + "loss": 0.8637027740478516, + "step": 8389 + }, + { + "epoch": 1.9331797235023043, + "grad_norm": 1.3348493633535858, + "learning_rate": 6.139375304607064e-09, + "loss": 0.6925984621047974, + "step": 8390 + }, + { + "epoch": 1.9334101382488478, + "grad_norm": 1.3338549311969345, + "learning_rate": 6.0972956434115485e-09, + "loss": 0.8345432877540588, + "step": 8391 + }, + { + "epoch": 1.9336405529953917, + "grad_norm": 1.211546505819517, + "learning_rate": 6.055360245877938e-09, + "loss": 0.797752857208252, + "step": 8392 + }, + { + "epoch": 1.9338709677419355, + "grad_norm": 1.025513773253857, + "learning_rate": 6.013569118092809e-09, + "loss": 0.7460094690322876, + "step": 8393 + }, + { + "epoch": 1.9341013824884792, + "grad_norm": 1.0501792229397418, + "learning_rate": 5.97192226612242e-09, + "loss": 0.7695547342300415, + "step": 8394 + }, + { + "epoch": 1.9343317972350231, + "grad_norm": 1.3341559418127071, + "learning_rate": 5.9304196960113795e-09, + "loss": 0.8372104167938232, + "step": 8395 + }, + { + "epoch": 1.9345622119815669, + "grad_norm": 1.174939684239835, + "learning_rate": 5.889061413784091e-09, + "loss": 0.7647950053215027, + "step": 8396 + }, + { + "epoch": 1.9347926267281106, + "grad_norm": 1.0568987578487792, + "learning_rate": 5.84784742544353e-09, + "loss": 0.6958519220352173, + "step": 8397 + }, + { + "epoch": 1.9350230414746545, + "grad_norm": 1.1905008025272417, + "learning_rate": 5.806777736971691e-09, + "loss": 0.8488763570785522, + "step": 8398 + }, + { + "epoch": 1.935253456221198, + "grad_norm": 1.1975357379056275, + "learning_rate": 5.765852354330025e-09, + "loss": 0.6448318958282471, + "step": 8399 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 1.288117894635522, + "learning_rate": 5.725071283458671e-09, + "loss": 0.7449144124984741, + "step": 8400 + }, + { + "epoch": 1.9357142857142857, + "grad_norm": 1.2060473887345362, + "learning_rate": 5.684434530277005e-09, + "loss": 0.8339489102363586, + "step": 8401 + }, + { + "epoch": 1.9359447004608294, + "grad_norm": 1.355663998015665, + "learning_rate": 5.643942100683308e-09, + "loss": 0.7758409380912781, + "step": 8402 + }, + { + "epoch": 1.9361751152073734, + "grad_norm": 1.2457476365021507, + "learning_rate": 5.60359400055499e-09, + "loss": 0.8604291081428528, + "step": 8403 + }, + { + "epoch": 1.936405529953917, + "grad_norm": 0.9800977546704353, + "learning_rate": 5.5633902357487e-09, + "loss": 0.7379741668701172, + "step": 8404 + }, + { + "epoch": 1.9366359447004609, + "grad_norm": 1.0501931597758303, + "learning_rate": 5.52333081209988e-09, + "loss": 0.6943101286888123, + "step": 8405 + }, + { + "epoch": 1.9368663594470046, + "grad_norm": 1.193280273833338, + "learning_rate": 5.483415735422992e-09, + "loss": 0.7397646903991699, + "step": 8406 + }, + { + "epoch": 1.9370967741935483, + "grad_norm": 1.1298510822998358, + "learning_rate": 5.443645011511844e-09, + "loss": 0.7566234469413757, + "step": 8407 + }, + { + "epoch": 1.9373271889400923, + "grad_norm": 1.322820355956732, + "learning_rate": 5.40401864613893e-09, + "loss": 0.6345827579498291, + "step": 8408 + }, + { + "epoch": 1.937557603686636, + "grad_norm": 1.6653451978671274, + "learning_rate": 5.3645366450560944e-09, + "loss": 0.7259831428527832, + "step": 8409 + }, + { + "epoch": 1.9377880184331797, + "grad_norm": 1.347964952979272, + "learning_rate": 5.325199013993975e-09, + "loss": 0.7897600531578064, + "step": 8410 + }, + { + "epoch": 1.9380184331797237, + "grad_norm": 1.3016062068490681, + "learning_rate": 5.286005758662448e-09, + "loss": 0.8421739339828491, + "step": 8411 + }, + { + "epoch": 1.9382488479262672, + "grad_norm": 1.3347958532899202, + "learning_rate": 5.2469568847504085e-09, + "loss": 0.7652501463890076, + "step": 8412 + }, + { + "epoch": 1.9384792626728111, + "grad_norm": 1.3105993577298032, + "learning_rate": 5.2080523979256556e-09, + "loss": 0.6397069096565247, + "step": 8413 + }, + { + "epoch": 1.9387096774193548, + "grad_norm": 1.2689574006754154, + "learning_rate": 5.169292303835116e-09, + "loss": 0.840052604675293, + "step": 8414 + }, + { + "epoch": 1.9389400921658986, + "grad_norm": 1.344062608291919, + "learning_rate": 5.130676608104845e-09, + "loss": 0.8453920483589172, + "step": 8415 + }, + { + "epoch": 1.9391705069124425, + "grad_norm": 1.3358429095342716, + "learning_rate": 5.092205316339915e-09, + "loss": 0.8301386833190918, + "step": 8416 + }, + { + "epoch": 1.939400921658986, + "grad_norm": 1.0570862677742232, + "learning_rate": 5.0538784341241924e-09, + "loss": 0.6682429313659668, + "step": 8417 + }, + { + "epoch": 1.93963133640553, + "grad_norm": 1.4370850274204425, + "learning_rate": 5.0156959670208945e-09, + "loss": 0.7881286144256592, + "step": 8418 + }, + { + "epoch": 1.9398617511520737, + "grad_norm": 1.1170749783406635, + "learning_rate": 4.9776579205721424e-09, + "loss": 0.7413277626037598, + "step": 8419 + }, + { + "epoch": 1.9400921658986174, + "grad_norm": 1.2672048797390025, + "learning_rate": 4.939764300299187e-09, + "loss": 0.6718757152557373, + "step": 8420 + }, + { + "epoch": 1.9403225806451614, + "grad_norm": 1.1707673461814823, + "learning_rate": 4.9020151117019625e-09, + "loss": 0.8595068454742432, + "step": 8421 + }, + { + "epoch": 1.9405529953917051, + "grad_norm": 1.0350774696905816, + "learning_rate": 4.864410360260085e-09, + "loss": 0.6985205411911011, + "step": 8422 + }, + { + "epoch": 1.9407834101382488, + "grad_norm": 1.222465370246094, + "learning_rate": 4.826950051431522e-09, + "loss": 0.7148889303207397, + "step": 8423 + }, + { + "epoch": 1.9410138248847926, + "grad_norm": 1.320040251210183, + "learning_rate": 4.789634190653813e-09, + "loss": 0.8109019994735718, + "step": 8424 + }, + { + "epoch": 1.9412442396313363, + "grad_norm": 1.4762486891336946, + "learning_rate": 4.752462783343292e-09, + "loss": 0.8268437385559082, + "step": 8425 + }, + { + "epoch": 1.9414746543778802, + "grad_norm": 0.9708535634361853, + "learning_rate": 4.715435834895088e-09, + "loss": 0.7300432920455933, + "step": 8426 + }, + { + "epoch": 1.941705069124424, + "grad_norm": 1.3017508085468754, + "learning_rate": 4.6785533506839005e-09, + "loss": 0.848440408706665, + "step": 8427 + }, + { + "epoch": 1.9419354838709677, + "grad_norm": 1.0873655680994063, + "learning_rate": 4.6418153360630044e-09, + "loss": 0.7526305913925171, + "step": 8428 + }, + { + "epoch": 1.9421658986175117, + "grad_norm": 1.1186105868292944, + "learning_rate": 4.605221796365022e-09, + "loss": 0.6987402439117432, + "step": 8429 + }, + { + "epoch": 1.9423963133640552, + "grad_norm": 1.5889483697201847, + "learning_rate": 4.568772736901261e-09, + "loss": 0.7944519519805908, + "step": 8430 + }, + { + "epoch": 1.942626728110599, + "grad_norm": 1.0443704220390153, + "learning_rate": 4.532468162962378e-09, + "loss": 0.7206175327301025, + "step": 8431 + }, + { + "epoch": 1.9428571428571428, + "grad_norm": 1.332362884391146, + "learning_rate": 4.4963080798179345e-09, + "loss": 0.6892992854118347, + "step": 8432 + }, + { + "epoch": 1.9430875576036866, + "grad_norm": 1.0826330060160456, + "learning_rate": 4.460292492716511e-09, + "loss": 0.696158766746521, + "step": 8433 + }, + { + "epoch": 1.9433179723502305, + "grad_norm": 0.9789941295444919, + "learning_rate": 4.424421406885704e-09, + "loss": 0.8007163405418396, + "step": 8434 + }, + { + "epoch": 1.9435483870967742, + "grad_norm": 1.1286085842961833, + "learning_rate": 4.3886948275320135e-09, + "loss": 0.7969222068786621, + "step": 8435 + }, + { + "epoch": 1.943778801843318, + "grad_norm": 1.2183409512094359, + "learning_rate": 4.353112759841404e-09, + "loss": 0.7752852439880371, + "step": 8436 + }, + { + "epoch": 1.9440092165898617, + "grad_norm": 1.1860536416754315, + "learning_rate": 4.317675208978411e-09, + "loss": 0.7788258790969849, + "step": 8437 + }, + { + "epoch": 1.9442396313364054, + "grad_norm": 1.1863849018136006, + "learning_rate": 4.2823821800866964e-09, + "loss": 0.838456392288208, + "step": 8438 + }, + { + "epoch": 1.9444700460829494, + "grad_norm": 1.0569456831140607, + "learning_rate": 4.2472336782890525e-09, + "loss": 0.7503675222396851, + "step": 8439 + }, + { + "epoch": 1.944700460829493, + "grad_norm": 0.9808278818485672, + "learning_rate": 4.212229708687287e-09, + "loss": 0.810901403427124, + "step": 8440 + }, + { + "epoch": 1.9449308755760368, + "grad_norm": 1.0050063922171069, + "learning_rate": 4.1773702763621135e-09, + "loss": 0.7551805973052979, + "step": 8441 + }, + { + "epoch": 1.9451612903225808, + "grad_norm": 1.2275039222333026, + "learning_rate": 4.142655386373373e-09, + "loss": 0.9387043714523315, + "step": 8442 + }, + { + "epoch": 1.9453917050691243, + "grad_norm": 1.034577232879954, + "learning_rate": 4.1080850437598124e-09, + "loss": 0.7508292198181152, + "step": 8443 + }, + { + "epoch": 1.9456221198156682, + "grad_norm": 0.9799945991508818, + "learning_rate": 4.073659253539308e-09, + "loss": 0.737107515335083, + "step": 8444 + }, + { + "epoch": 1.945852534562212, + "grad_norm": 1.477967097078984, + "learning_rate": 4.03937802070875e-09, + "loss": 0.86794114112854, + "step": 8445 + }, + { + "epoch": 1.9460829493087557, + "grad_norm": 0.9207750837260967, + "learning_rate": 4.005241350243937e-09, + "loss": 0.7629859447479248, + "step": 8446 + }, + { + "epoch": 1.9463133640552996, + "grad_norm": 1.4180879805115079, + "learning_rate": 3.971249247099906e-09, + "loss": 0.7455410957336426, + "step": 8447 + }, + { + "epoch": 1.9465437788018434, + "grad_norm": 1.1941620926103322, + "learning_rate": 3.937401716210376e-09, + "loss": 0.8322222828865051, + "step": 8448 + }, + { + "epoch": 1.946774193548387, + "grad_norm": 1.510433091637528, + "learning_rate": 3.903698762488528e-09, + "loss": 0.7961260676383972, + "step": 8449 + }, + { + "epoch": 1.9470046082949308, + "grad_norm": 1.2160569883363423, + "learning_rate": 3.870140390826005e-09, + "loss": 0.8144096732139587, + "step": 8450 + }, + { + "epoch": 1.9472350230414746, + "grad_norm": 1.2123613138822447, + "learning_rate": 3.8367266060939095e-09, + "loss": 0.7973348498344421, + "step": 8451 + }, + { + "epoch": 1.9474654377880185, + "grad_norm": 1.4038735969349747, + "learning_rate": 3.803457413142253e-09, + "loss": 0.8311715126037598, + "step": 8452 + }, + { + "epoch": 1.9476958525345622, + "grad_norm": 0.9815978065709688, + "learning_rate": 3.770332816799948e-09, + "loss": 0.7851812839508057, + "step": 8453 + }, + { + "epoch": 1.947926267281106, + "grad_norm": 1.3820548975058524, + "learning_rate": 3.737352821875039e-09, + "loss": 0.8721193075180054, + "step": 8454 + }, + { + "epoch": 1.94815668202765, + "grad_norm": 1.2337347998012935, + "learning_rate": 3.704517433154364e-09, + "loss": 0.8594118356704712, + "step": 8455 + }, + { + "epoch": 1.9483870967741934, + "grad_norm": 0.9620755666197012, + "learning_rate": 3.671826655404109e-09, + "loss": 0.6526527404785156, + "step": 8456 + }, + { + "epoch": 1.9486175115207374, + "grad_norm": 0.9198704876253201, + "learning_rate": 3.639280493369368e-09, + "loss": 0.7577145099639893, + "step": 8457 + }, + { + "epoch": 1.948847926267281, + "grad_norm": 1.4898349304718468, + "learning_rate": 3.6068789517739173e-09, + "loss": 0.9176833629608154, + "step": 8458 + }, + { + "epoch": 1.9490783410138248, + "grad_norm": 1.5070373914502264, + "learning_rate": 3.5746220353209956e-09, + "loss": 0.8947671055793762, + "step": 8459 + }, + { + "epoch": 1.9493087557603688, + "grad_norm": 1.2654885409411176, + "learning_rate": 3.542509748692524e-09, + "loss": 0.8791666030883789, + "step": 8460 + }, + { + "epoch": 1.9495391705069123, + "grad_norm": 0.9247331783476281, + "learning_rate": 3.5105420965496626e-09, + "loss": 0.7431247234344482, + "step": 8461 + }, + { + "epoch": 1.9497695852534562, + "grad_norm": 1.3437504272827105, + "learning_rate": 3.4787190835324775e-09, + "loss": 0.7998695373535156, + "step": 8462 + }, + { + "epoch": 1.95, + "grad_norm": 1.359553043789141, + "learning_rate": 3.447040714259941e-09, + "loss": 0.8120161294937134, + "step": 8463 + }, + { + "epoch": 1.9502304147465437, + "grad_norm": 1.063781533705899, + "learning_rate": 3.415506993330153e-09, + "loss": 0.8062546849250793, + "step": 8464 + }, + { + "epoch": 1.9504608294930876, + "grad_norm": 1.3290963135655427, + "learning_rate": 3.384117925320229e-09, + "loss": 0.8100919723510742, + "step": 8465 + }, + { + "epoch": 1.9506912442396314, + "grad_norm": 1.410960677080016, + "learning_rate": 3.352873514786303e-09, + "loss": 0.7376535534858704, + "step": 8466 + }, + { + "epoch": 1.950921658986175, + "grad_norm": 1.1333962819853984, + "learning_rate": 3.321773766263303e-09, + "loss": 0.7534361481666565, + "step": 8467 + }, + { + "epoch": 1.951152073732719, + "grad_norm": 0.956942860373484, + "learning_rate": 3.290818684265506e-09, + "loss": 0.6914925575256348, + "step": 8468 + }, + { + "epoch": 1.9513824884792625, + "grad_norm": 1.40322423242457, + "learning_rate": 3.2600082732858746e-09, + "loss": 0.837024450302124, + "step": 8469 + }, + { + "epoch": 1.9516129032258065, + "grad_norm": 1.3077639635125993, + "learning_rate": 3.229342537796609e-09, + "loss": 0.7960337400436401, + "step": 8470 + }, + { + "epoch": 1.9518433179723502, + "grad_norm": 1.1044299774108808, + "learning_rate": 3.1988214822485928e-09, + "loss": 0.6611788868904114, + "step": 8471 + }, + { + "epoch": 1.952073732718894, + "grad_norm": 1.2652589643459276, + "learning_rate": 3.16844511107206e-09, + "loss": 0.8798158168792725, + "step": 8472 + }, + { + "epoch": 1.952304147465438, + "grad_norm": 1.3477135835069336, + "learning_rate": 3.1382134286761506e-09, + "loss": 0.790015459060669, + "step": 8473 + }, + { + "epoch": 1.9525345622119814, + "grad_norm": 1.062422263250462, + "learning_rate": 3.1081264394489103e-09, + "loss": 0.7676407098770142, + "step": 8474 + }, + { + "epoch": 1.9527649769585254, + "grad_norm": 1.1707572290080033, + "learning_rate": 3.07818414775729e-09, + "loss": 0.8213051557540894, + "step": 8475 + }, + { + "epoch": 1.952995391705069, + "grad_norm": 1.328203051872804, + "learning_rate": 3.048386557947591e-09, + "loss": 0.8909401893615723, + "step": 8476 + }, + { + "epoch": 1.9532258064516128, + "grad_norm": 1.2206551189591073, + "learning_rate": 3.0187336743446867e-09, + "loss": 0.838227391242981, + "step": 8477 + }, + { + "epoch": 1.9534562211981568, + "grad_norm": 1.1958685930192579, + "learning_rate": 2.9892255012528013e-09, + "loss": 0.7297696471214294, + "step": 8478 + }, + { + "epoch": 1.9536866359447005, + "grad_norm": 1.508389266534061, + "learning_rate": 2.9598620429550636e-09, + "loss": 1.0060585737228394, + "step": 8479 + }, + { + "epoch": 1.9539170506912442, + "grad_norm": 1.1858328009290373, + "learning_rate": 2.9306433037132873e-09, + "loss": 0.7812967300415039, + "step": 8480 + }, + { + "epoch": 1.9541474654377882, + "grad_norm": 1.196629989025656, + "learning_rate": 2.901569287768746e-09, + "loss": 0.7349315881729126, + "step": 8481 + }, + { + "epoch": 1.9543778801843317, + "grad_norm": 1.1580071941270487, + "learning_rate": 2.8726399993415085e-09, + "loss": 0.7083498239517212, + "step": 8482 + }, + { + "epoch": 1.9546082949308756, + "grad_norm": 1.3308451395414542, + "learning_rate": 2.8438554426304386e-09, + "loss": 0.7969732880592346, + "step": 8483 + }, + { + "epoch": 1.9548387096774194, + "grad_norm": 1.405840014033905, + "learning_rate": 2.815215621813749e-09, + "loss": 0.7701122164726257, + "step": 8484 + }, + { + "epoch": 1.955069124423963, + "grad_norm": 1.0487330945577633, + "learning_rate": 2.7867205410484485e-09, + "loss": 0.7323017120361328, + "step": 8485 + }, + { + "epoch": 1.955299539170507, + "grad_norm": 0.9842598310766136, + "learning_rate": 2.7583702044704504e-09, + "loss": 0.8357248306274414, + "step": 8486 + }, + { + "epoch": 1.9555299539170505, + "grad_norm": 1.4806137218761686, + "learning_rate": 2.7301646161947966e-09, + "loss": 0.8164674043655396, + "step": 8487 + }, + { + "epoch": 1.9557603686635945, + "grad_norm": 1.2641967325925645, + "learning_rate": 2.7021037803156566e-09, + "loss": 0.7972782850265503, + "step": 8488 + }, + { + "epoch": 1.9559907834101382, + "grad_norm": 1.2417679147004388, + "learning_rate": 2.6741877009058835e-09, + "loss": 0.864342987537384, + "step": 8489 + }, + { + "epoch": 1.956221198156682, + "grad_norm": 1.1067561191492752, + "learning_rate": 2.646416382017458e-09, + "loss": 0.7428402900695801, + "step": 8490 + }, + { + "epoch": 1.956451612903226, + "grad_norm": 1.3211414352422526, + "learning_rate": 2.618789827681378e-09, + "loss": 0.7164437770843506, + "step": 8491 + }, + { + "epoch": 1.9566820276497696, + "grad_norm": 1.153189225005644, + "learning_rate": 2.5913080419075473e-09, + "loss": 0.6997767686843872, + "step": 8492 + }, + { + "epoch": 1.9569124423963133, + "grad_norm": 1.2481992001614755, + "learning_rate": 2.563971028684886e-09, + "loss": 0.6399234533309937, + "step": 8493 + }, + { + "epoch": 1.9571428571428573, + "grad_norm": 1.1639751659112805, + "learning_rate": 2.536778791981553e-09, + "loss": 0.7642914056777954, + "step": 8494 + }, + { + "epoch": 1.9573732718894008, + "grad_norm": 1.218382512158835, + "learning_rate": 2.5097313357442806e-09, + "loss": 0.8284746408462524, + "step": 8495 + }, + { + "epoch": 1.9576036866359448, + "grad_norm": 1.2221524988832009, + "learning_rate": 2.4828286638989282e-09, + "loss": 0.6680238246917725, + "step": 8496 + }, + { + "epoch": 1.9578341013824885, + "grad_norm": 1.2965002342798193, + "learning_rate": 2.4560707803504834e-09, + "loss": 0.7621040344238281, + "step": 8497 + }, + { + "epoch": 1.9580645161290322, + "grad_norm": 1.2947556724815892, + "learning_rate": 2.4294576889827278e-09, + "loss": 0.7326159477233887, + "step": 8498 + }, + { + "epoch": 1.9582949308755762, + "grad_norm": 1.0656455780738308, + "learning_rate": 2.4029893936586833e-09, + "loss": 0.6496877670288086, + "step": 8499 + }, + { + "epoch": 1.9585253456221197, + "grad_norm": 1.241192579535759, + "learning_rate": 2.376665898220054e-09, + "loss": 0.665170431137085, + "step": 8500 + } + ], + "logging_steps": 1, + "max_steps": 8680, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7099400653340672.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-8500/training_args.bin b/checkpoint-8500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7eb191dd44f853b2edd49aafea231852c267845 --- /dev/null +++ b/checkpoint-8500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f95b396ac9a3c4ab0d50e403be4c8c0fd191fd2a0aac0b5d95c7c3b72c8501b +size 6968 diff --git a/checkpoint-8500/zero_to_fp32.py b/checkpoint-8500/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-8500/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-8600/README.md b/checkpoint-8600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-8600/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-8600/adapter_config.json b/checkpoint-8600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..07855d838b18d52ab3ab7a1ec1a852f57cf14fd8 --- /dev/null +++ b/checkpoint-8600/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.0.mlp.down_proj", + "v_proj", + "layers.10.mlp.gate_proj", + "layers.1.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.3.mlp.gate_proj", + "layers.5.mlp.down_proj", + "layers.8.mlp.up_proj", + "layers.4.mlp.down_proj", + "layers.1.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.25.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.23.mlp.down_proj", + "layers.15.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.26.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.9.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.12.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.16.mlp.gate_proj", + "layers.25.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.0.mlp.up_proj", + "layers.15.mlp.gate_proj", + "layers.9.mlp.gate_proj", + "layers.22.mlp.gate_proj", + "layers.24.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.14.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "k_proj", + "layers.27.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.19.mlp.up_proj", + "q_proj", + "layers.17.mlp.gate_proj", + "layers.0.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.27.mlp.up_proj", + "layers.2.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.6.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.10.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.2.mlp.up_proj", + "o_proj", + "layers.16.mlp.down_proj", + "layers.4.mlp.gate_proj", + "layers.20.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.22.mlp.up_proj", + "layers.11.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.23.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.17.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.6.mlp.gate_proj", + "layers.16.mlp.up_proj", + "layers.18.mlp.up_proj", + "layers.7.mlp.gate_proj", + "layers.8.mlp.down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-8600/adapter_model.safetensors b/checkpoint-8600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31d26b491832cd2ebfdfae7ac8bcb2a70e3764b0 --- /dev/null +++ b/checkpoint-8600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65856f6a3e95bbaa551081de7fa8e5b32bc11b86014e49d57b89ac2c87e0da80 +size 40428088 diff --git a/checkpoint-8600/chat_template.jinja b/checkpoint-8600/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-8600/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-8600/global_step8600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-8600/global_step8600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b85e173dd98bbdb213fbe0ad01d9c42a333ac841 --- /dev/null +++ b/checkpoint-8600/global_step8600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c61a17589f5071d3d780721527415e3dbc4db3db9946af4ccce8bd1f42e707f +size 242224880 diff --git a/checkpoint-8600/global_step8600/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-8600/global_step8600/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a429a1063f3e169437082971e83c69a9ada88fd --- /dev/null +++ b/checkpoint-8600/global_step8600/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f075d95c92c7749bf6e51b7cbbd76a4398c551ca0d0b0eaa79d6910254aeea +size 460630 diff --git a/checkpoint-8600/latest b/checkpoint-8600/latest new file mode 100644 index 0000000000000000000000000000000000000000..bd2f68de4142a438c6f7e9678de46c517960b066 --- /dev/null +++ b/checkpoint-8600/latest @@ -0,0 +1 @@ +global_step8600 \ No newline at end of file diff --git a/checkpoint-8600/processor_config.json b/checkpoint-8600/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-8600/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-8600/rng_state.pth b/checkpoint-8600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fcf11c9b78de2c2c55fdfc44daef09cd9181c14 --- /dev/null +++ b/checkpoint-8600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc398a73e46bca50defc25b4467441315246a33383a5d6c80985d238e57127f +size 14244 diff --git a/checkpoint-8600/scheduler.pt b/checkpoint-8600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..81c75c2a8af102d7e28061a1bb5ea30b5eef469b --- /dev/null +++ b/checkpoint-8600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2372218038ba481d909db6d272af574353762cb9466cd7254ad15f32a8ba8e0 +size 1000 diff --git a/checkpoint-8600/tokenizer.json b/checkpoint-8600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-8600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-8600/tokenizer_config.json b/checkpoint-8600/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-8600/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-8600/trainer_state.json b/checkpoint-8600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f3cd6f1aae7324f0fd9f15f676f3c3e01f445178 --- /dev/null +++ b/checkpoint-8600/trainer_state.json @@ -0,0 +1,60234 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9815668202764978, + "eval_steps": 500, + "global_step": 8600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002304147465437788, + "grad_norm": 0.3584135221139379, + "learning_rate": 0.0, + "loss": 1.1575632095336914, + "step": 1 + }, + { + "epoch": 0.0004608294930875576, + "grad_norm": 0.3035367055626511, + "learning_rate": 4.6082949308755755e-09, + "loss": 0.9973502159118652, + "step": 2 + }, + { + "epoch": 0.0006912442396313364, + "grad_norm": 0.39685233086299543, + "learning_rate": 9.216589861751151e-09, + "loss": 1.0778999328613281, + "step": 3 + }, + { + "epoch": 0.0009216589861751152, + "grad_norm": 0.4029042979509503, + "learning_rate": 1.3824884792626728e-08, + "loss": 1.1912263631820679, + "step": 4 + }, + { + "epoch": 0.001152073732718894, + "grad_norm": 0.3943812894307851, + "learning_rate": 1.8433179723502302e-08, + "loss": 1.136031150817871, + "step": 5 + }, + { + "epoch": 0.0013824884792626728, + "grad_norm": 0.472718552613566, + "learning_rate": 2.304147465437788e-08, + "loss": 1.1647956371307373, + "step": 6 + }, + { + "epoch": 0.0016129032258064516, + "grad_norm": 0.4378363913681294, + "learning_rate": 2.7649769585253456e-08, + "loss": 1.144924283027649, + "step": 7 + }, + { + "epoch": 0.0018433179723502304, + "grad_norm": 0.412264706125121, + "learning_rate": 3.225806451612903e-08, + "loss": 1.1821019649505615, + "step": 8 + }, + { + "epoch": 0.0020737327188940094, + "grad_norm": 0.35864626774735575, + "learning_rate": 3.6866359447004604e-08, + "loss": 1.0586045980453491, + "step": 9 + }, + { + "epoch": 0.002304147465437788, + "grad_norm": 0.497058147699291, + "learning_rate": 4.1474654377880186e-08, + "loss": 1.2029818296432495, + "step": 10 + }, + { + "epoch": 0.002534562211981567, + "grad_norm": 0.465265464928516, + "learning_rate": 4.608294930875576e-08, + "loss": 1.1411634683609009, + "step": 11 + }, + { + "epoch": 0.0027649769585253456, + "grad_norm": 0.4356529753705429, + "learning_rate": 5.069124423963134e-08, + "loss": 1.2719087600708008, + "step": 12 + }, + { + "epoch": 0.0029953917050691246, + "grad_norm": 0.4469831586732583, + "learning_rate": 5.529953917050691e-08, + "loss": 1.1132495403289795, + "step": 13 + }, + { + "epoch": 0.0032258064516129032, + "grad_norm": 0.3918942421249174, + "learning_rate": 5.990783410138249e-08, + "loss": 1.1900808811187744, + "step": 14 + }, + { + "epoch": 0.0034562211981566822, + "grad_norm": 0.33446734054876004, + "learning_rate": 6.451612903225806e-08, + "loss": 1.2273608446121216, + "step": 15 + }, + { + "epoch": 0.003686635944700461, + "grad_norm": 0.4610551419026991, + "learning_rate": 6.912442396313364e-08, + "loss": 1.2130601406097412, + "step": 16 + }, + { + "epoch": 0.00391705069124424, + "grad_norm": 0.4765520188128542, + "learning_rate": 7.373271889400921e-08, + "loss": 1.0534124374389648, + "step": 17 + }, + { + "epoch": 0.004147465437788019, + "grad_norm": 0.4247458361448018, + "learning_rate": 7.834101382488478e-08, + "loss": 1.1796221733093262, + "step": 18 + }, + { + "epoch": 0.004377880184331797, + "grad_norm": 0.42651087679972033, + "learning_rate": 8.294930875576037e-08, + "loss": 1.118175745010376, + "step": 19 + }, + { + "epoch": 0.004608294930875576, + "grad_norm": 0.37538111415149067, + "learning_rate": 8.755760368663594e-08, + "loss": 1.140963077545166, + "step": 20 + }, + { + "epoch": 0.004838709677419355, + "grad_norm": 0.39373769301837386, + "learning_rate": 9.216589861751152e-08, + "loss": 1.107339859008789, + "step": 21 + }, + { + "epoch": 0.005069124423963134, + "grad_norm": 0.5053900590341595, + "learning_rate": 9.677419354838709e-08, + "loss": 1.171803593635559, + "step": 22 + }, + { + "epoch": 0.005299539170506912, + "grad_norm": 0.32897537004851696, + "learning_rate": 1.0138248847926267e-07, + "loss": 0.9935251474380493, + "step": 23 + }, + { + "epoch": 0.005529953917050691, + "grad_norm": 0.4355535799950001, + "learning_rate": 1.0599078341013824e-07, + "loss": 1.0870952606201172, + "step": 24 + }, + { + "epoch": 0.00576036866359447, + "grad_norm": 0.5215895570336967, + "learning_rate": 1.1059907834101383e-07, + "loss": 1.1520278453826904, + "step": 25 + }, + { + "epoch": 0.005990783410138249, + "grad_norm": 0.4878994851998504, + "learning_rate": 1.152073732718894e-07, + "loss": 1.3603750467300415, + "step": 26 + }, + { + "epoch": 0.006221198156682027, + "grad_norm": 0.3985371704289713, + "learning_rate": 1.1981566820276498e-07, + "loss": 1.230550765991211, + "step": 27 + }, + { + "epoch": 0.0064516129032258064, + "grad_norm": 0.4105556408349015, + "learning_rate": 1.2442396313364054e-07, + "loss": 1.267604112625122, + "step": 28 + }, + { + "epoch": 0.0066820276497695855, + "grad_norm": 0.3604672745500653, + "learning_rate": 1.2903225806451611e-07, + "loss": 1.344348669052124, + "step": 29 + }, + { + "epoch": 0.0069124423963133645, + "grad_norm": 0.42234881975895605, + "learning_rate": 1.336405529953917e-07, + "loss": 1.2794291973114014, + "step": 30 + }, + { + "epoch": 0.007142857142857143, + "grad_norm": 0.39749887698930225, + "learning_rate": 1.3824884792626728e-07, + "loss": 1.2841103076934814, + "step": 31 + }, + { + "epoch": 0.007373271889400922, + "grad_norm": 0.34204310388035036, + "learning_rate": 1.4285714285714285e-07, + "loss": 1.1505224704742432, + "step": 32 + }, + { + "epoch": 0.007603686635944701, + "grad_norm": 0.36676388907062357, + "learning_rate": 1.4746543778801842e-07, + "loss": 0.9800833463668823, + "step": 33 + }, + { + "epoch": 0.00783410138248848, + "grad_norm": 0.4216809539302965, + "learning_rate": 1.52073732718894e-07, + "loss": 1.3712589740753174, + "step": 34 + }, + { + "epoch": 0.008064516129032258, + "grad_norm": 0.46644559931224167, + "learning_rate": 1.5668202764976955e-07, + "loss": 1.2274689674377441, + "step": 35 + }, + { + "epoch": 0.008294930875576038, + "grad_norm": 0.41359150478695417, + "learning_rate": 1.6129032258064515e-07, + "loss": 1.0673755407333374, + "step": 36 + }, + { + "epoch": 0.008525345622119816, + "grad_norm": 0.534062363030203, + "learning_rate": 1.6589861751152074e-07, + "loss": 1.242164134979248, + "step": 37 + }, + { + "epoch": 0.008755760368663594, + "grad_norm": 0.48756247774131056, + "learning_rate": 1.705069124423963e-07, + "loss": 1.190554141998291, + "step": 38 + }, + { + "epoch": 0.008986175115207374, + "grad_norm": 0.35848282094721656, + "learning_rate": 1.7511520737327188e-07, + "loss": 1.3119773864746094, + "step": 39 + }, + { + "epoch": 0.009216589861751152, + "grad_norm": 0.4466769921356875, + "learning_rate": 1.7972350230414745e-07, + "loss": 1.2532517910003662, + "step": 40 + }, + { + "epoch": 0.00944700460829493, + "grad_norm": 0.4271763580587928, + "learning_rate": 1.8433179723502305e-07, + "loss": 1.307154655456543, + "step": 41 + }, + { + "epoch": 0.00967741935483871, + "grad_norm": 0.432221455567464, + "learning_rate": 1.889400921658986e-07, + "loss": 1.1899281740188599, + "step": 42 + }, + { + "epoch": 0.009907834101382488, + "grad_norm": 0.48501644393966153, + "learning_rate": 1.9354838709677418e-07, + "loss": 1.1928249597549438, + "step": 43 + }, + { + "epoch": 0.010138248847926268, + "grad_norm": 0.35170632131851265, + "learning_rate": 1.9815668202764975e-07, + "loss": 1.1663157939910889, + "step": 44 + }, + { + "epoch": 0.010368663594470046, + "grad_norm": 0.43449129429745276, + "learning_rate": 2.0276497695852535e-07, + "loss": 1.1806118488311768, + "step": 45 + }, + { + "epoch": 0.010599078341013824, + "grad_norm": 0.39933118678172597, + "learning_rate": 2.073732718894009e-07, + "loss": 1.1704952716827393, + "step": 46 + }, + { + "epoch": 0.010829493087557604, + "grad_norm": 0.46071358975984034, + "learning_rate": 2.1198156682027649e-07, + "loss": 1.2124149799346924, + "step": 47 + }, + { + "epoch": 0.011059907834101382, + "grad_norm": 0.325920139351066, + "learning_rate": 2.1658986175115208e-07, + "loss": 1.041813850402832, + "step": 48 + }, + { + "epoch": 0.01129032258064516, + "grad_norm": 0.4189805583015969, + "learning_rate": 2.2119815668202765e-07, + "loss": 1.255402684211731, + "step": 49 + }, + { + "epoch": 0.01152073732718894, + "grad_norm": 0.369986826532368, + "learning_rate": 2.2580645161290322e-07, + "loss": 1.1115221977233887, + "step": 50 + }, + { + "epoch": 0.011751152073732719, + "grad_norm": 0.501835295036206, + "learning_rate": 2.304147465437788e-07, + "loss": 1.4048426151275635, + "step": 51 + }, + { + "epoch": 0.011981566820276499, + "grad_norm": 0.38759638044019523, + "learning_rate": 2.3502304147465438e-07, + "loss": 1.1690936088562012, + "step": 52 + }, + { + "epoch": 0.012211981566820277, + "grad_norm": 0.43771993971927803, + "learning_rate": 2.3963133640552995e-07, + "loss": 1.164888620376587, + "step": 53 + }, + { + "epoch": 0.012442396313364055, + "grad_norm": 0.5047093250847474, + "learning_rate": 2.442396313364055e-07, + "loss": 1.004424810409546, + "step": 54 + }, + { + "epoch": 0.012672811059907835, + "grad_norm": 0.371768250028493, + "learning_rate": 2.488479262672811e-07, + "loss": 0.8810856342315674, + "step": 55 + }, + { + "epoch": 0.012903225806451613, + "grad_norm": 0.41437582347111235, + "learning_rate": 2.534562211981567e-07, + "loss": 1.300262451171875, + "step": 56 + }, + { + "epoch": 0.013133640552995391, + "grad_norm": 0.44923919860912964, + "learning_rate": 2.5806451612903223e-07, + "loss": 1.3624285459518433, + "step": 57 + }, + { + "epoch": 0.013364055299539171, + "grad_norm": 0.37916325568511644, + "learning_rate": 2.6267281105990777e-07, + "loss": 1.2133375406265259, + "step": 58 + }, + { + "epoch": 0.013594470046082949, + "grad_norm": 0.3665676434937369, + "learning_rate": 2.672811059907834e-07, + "loss": 1.2203283309936523, + "step": 59 + }, + { + "epoch": 0.013824884792626729, + "grad_norm": 0.4314731168039537, + "learning_rate": 2.7188940092165896e-07, + "loss": 1.291412353515625, + "step": 60 + }, + { + "epoch": 0.014055299539170507, + "grad_norm": 0.46787898249820037, + "learning_rate": 2.7649769585253456e-07, + "loss": 1.1596577167510986, + "step": 61 + }, + { + "epoch": 0.014285714285714285, + "grad_norm": 0.34850075759056304, + "learning_rate": 2.8110599078341015e-07, + "loss": 0.9789823889732361, + "step": 62 + }, + { + "epoch": 0.014516129032258065, + "grad_norm": 0.46810420323672, + "learning_rate": 2.857142857142857e-07, + "loss": 1.220383882522583, + "step": 63 + }, + { + "epoch": 0.014746543778801843, + "grad_norm": 0.36577992953429955, + "learning_rate": 2.903225806451613e-07, + "loss": 1.0961871147155762, + "step": 64 + }, + { + "epoch": 0.014976958525345621, + "grad_norm": 0.4155727286496237, + "learning_rate": 2.9493087557603683e-07, + "loss": 1.2281936407089233, + "step": 65 + }, + { + "epoch": 0.015207373271889401, + "grad_norm": 0.48770399467414544, + "learning_rate": 2.9953917050691243e-07, + "loss": 1.279728889465332, + "step": 66 + }, + { + "epoch": 0.01543778801843318, + "grad_norm": 0.3697109399388579, + "learning_rate": 3.04147465437788e-07, + "loss": 1.0932798385620117, + "step": 67 + }, + { + "epoch": 0.01566820276497696, + "grad_norm": 0.4768828309013543, + "learning_rate": 3.0875576036866356e-07, + "loss": 1.1612955331802368, + "step": 68 + }, + { + "epoch": 0.015898617511520736, + "grad_norm": 0.335260500319883, + "learning_rate": 3.133640552995391e-07, + "loss": 1.193152666091919, + "step": 69 + }, + { + "epoch": 0.016129032258064516, + "grad_norm": 0.3754577001974335, + "learning_rate": 3.1797235023041476e-07, + "loss": 1.3303695917129517, + "step": 70 + }, + { + "epoch": 0.016359447004608296, + "grad_norm": 0.5384978005623245, + "learning_rate": 3.225806451612903e-07, + "loss": 1.3735731840133667, + "step": 71 + }, + { + "epoch": 0.016589861751152075, + "grad_norm": 0.44147085813841874, + "learning_rate": 3.271889400921659e-07, + "loss": 1.162925124168396, + "step": 72 + }, + { + "epoch": 0.016820276497695852, + "grad_norm": 0.46260262466297236, + "learning_rate": 3.317972350230415e-07, + "loss": 1.3879203796386719, + "step": 73 + }, + { + "epoch": 0.017050691244239632, + "grad_norm": 0.33864035083037825, + "learning_rate": 3.3640552995391703e-07, + "loss": 1.2721638679504395, + "step": 74 + }, + { + "epoch": 0.01728110599078341, + "grad_norm": 0.5797449954735189, + "learning_rate": 3.410138248847926e-07, + "loss": 1.3997783660888672, + "step": 75 + }, + { + "epoch": 0.017511520737327188, + "grad_norm": 0.3824734589731608, + "learning_rate": 3.4562211981566817e-07, + "loss": 1.1099059581756592, + "step": 76 + }, + { + "epoch": 0.017741935483870968, + "grad_norm": 0.6286343528066216, + "learning_rate": 3.5023041474654376e-07, + "loss": 1.341759204864502, + "step": 77 + }, + { + "epoch": 0.017972350230414748, + "grad_norm": 0.41058458963409694, + "learning_rate": 3.5483870967741936e-07, + "loss": 1.343479871749878, + "step": 78 + }, + { + "epoch": 0.018202764976958524, + "grad_norm": 0.41653629518149576, + "learning_rate": 3.594470046082949e-07, + "loss": 1.2225772142410278, + "step": 79 + }, + { + "epoch": 0.018433179723502304, + "grad_norm": 0.37871730557010347, + "learning_rate": 3.6405529953917044e-07, + "loss": 1.1934573650360107, + "step": 80 + }, + { + "epoch": 0.018663594470046084, + "grad_norm": 0.36930989407616927, + "learning_rate": 3.686635944700461e-07, + "loss": 1.099440336227417, + "step": 81 + }, + { + "epoch": 0.01889400921658986, + "grad_norm": 0.4445938548359885, + "learning_rate": 3.7327188940092163e-07, + "loss": 1.0864269733428955, + "step": 82 + }, + { + "epoch": 0.01912442396313364, + "grad_norm": 0.4183127094774659, + "learning_rate": 3.778801843317972e-07, + "loss": 1.0706703662872314, + "step": 83 + }, + { + "epoch": 0.01935483870967742, + "grad_norm": 0.3377183372891763, + "learning_rate": 3.824884792626728e-07, + "loss": 1.1675662994384766, + "step": 84 + }, + { + "epoch": 0.019585253456221197, + "grad_norm": 0.4219766455348787, + "learning_rate": 3.8709677419354837e-07, + "loss": 1.3294553756713867, + "step": 85 + }, + { + "epoch": 0.019815668202764977, + "grad_norm": 0.39357768126078463, + "learning_rate": 3.9170506912442396e-07, + "loss": 1.050878882408142, + "step": 86 + }, + { + "epoch": 0.020046082949308756, + "grad_norm": 0.5263429396452582, + "learning_rate": 3.963133640552995e-07, + "loss": 1.3243739604949951, + "step": 87 + }, + { + "epoch": 0.020276497695852536, + "grad_norm": 0.4373425676890139, + "learning_rate": 4.009216589861751e-07, + "loss": 1.1350429058074951, + "step": 88 + }, + { + "epoch": 0.020506912442396313, + "grad_norm": 0.39555461421299365, + "learning_rate": 4.055299539170507e-07, + "loss": 1.24526047706604, + "step": 89 + }, + { + "epoch": 0.020737327188940093, + "grad_norm": 0.5372699223271491, + "learning_rate": 4.1013824884792624e-07, + "loss": 1.3459908962249756, + "step": 90 + }, + { + "epoch": 0.020967741935483872, + "grad_norm": 0.45711998906450413, + "learning_rate": 4.147465437788018e-07, + "loss": 1.2129223346710205, + "step": 91 + }, + { + "epoch": 0.02119815668202765, + "grad_norm": 0.396171288478396, + "learning_rate": 4.1935483870967743e-07, + "loss": 1.0522969961166382, + "step": 92 + }, + { + "epoch": 0.02142857142857143, + "grad_norm": 0.4102245507283394, + "learning_rate": 4.2396313364055297e-07, + "loss": 1.3128937482833862, + "step": 93 + }, + { + "epoch": 0.02165898617511521, + "grad_norm": 0.4498995421630644, + "learning_rate": 4.285714285714285e-07, + "loss": 1.3582855463027954, + "step": 94 + }, + { + "epoch": 0.021889400921658985, + "grad_norm": 0.401280081593378, + "learning_rate": 4.3317972350230416e-07, + "loss": 1.3959028720855713, + "step": 95 + }, + { + "epoch": 0.022119815668202765, + "grad_norm": 0.34811166324547105, + "learning_rate": 4.377880184331797e-07, + "loss": 1.149501085281372, + "step": 96 + }, + { + "epoch": 0.022350230414746545, + "grad_norm": 0.48133121679013907, + "learning_rate": 4.423963133640553e-07, + "loss": 1.024135708808899, + "step": 97 + }, + { + "epoch": 0.02258064516129032, + "grad_norm": 0.42298775317954185, + "learning_rate": 4.4700460829493084e-07, + "loss": 0.9255483150482178, + "step": 98 + }, + { + "epoch": 0.0228110599078341, + "grad_norm": 0.4429779269301727, + "learning_rate": 4.5161290322580644e-07, + "loss": 1.1694722175598145, + "step": 99 + }, + { + "epoch": 0.02304147465437788, + "grad_norm": 0.5257102588195529, + "learning_rate": 4.5622119815668203e-07, + "loss": 1.1588457822799683, + "step": 100 + }, + { + "epoch": 0.023271889400921657, + "grad_norm": 0.37478821324150746, + "learning_rate": 4.608294930875576e-07, + "loss": 1.172672986984253, + "step": 101 + }, + { + "epoch": 0.023502304147465437, + "grad_norm": 0.5416446977134604, + "learning_rate": 4.654377880184331e-07, + "loss": 1.092405915260315, + "step": 102 + }, + { + "epoch": 0.023732718894009217, + "grad_norm": 0.40304171727239163, + "learning_rate": 4.7004608294930877e-07, + "loss": 1.11540687084198, + "step": 103 + }, + { + "epoch": 0.023963133640552997, + "grad_norm": 0.46185115643683655, + "learning_rate": 4.746543778801843e-07, + "loss": 1.1380189657211304, + "step": 104 + }, + { + "epoch": 0.024193548387096774, + "grad_norm": 0.4705857339336588, + "learning_rate": 4.792626728110599e-07, + "loss": 1.1031086444854736, + "step": 105 + }, + { + "epoch": 0.024423963133640553, + "grad_norm": 0.38094574356569405, + "learning_rate": 4.838709677419355e-07, + "loss": 1.1988024711608887, + "step": 106 + }, + { + "epoch": 0.024654377880184333, + "grad_norm": 0.48794686062473364, + "learning_rate": 4.88479262672811e-07, + "loss": 1.0814614295959473, + "step": 107 + }, + { + "epoch": 0.02488479262672811, + "grad_norm": 0.41304010922593737, + "learning_rate": 4.930875576036866e-07, + "loss": 1.0541695356369019, + "step": 108 + }, + { + "epoch": 0.02511520737327189, + "grad_norm": 0.4262047073398665, + "learning_rate": 4.976958525345622e-07, + "loss": 1.2281692028045654, + "step": 109 + }, + { + "epoch": 0.02534562211981567, + "grad_norm": 0.4617413170072456, + "learning_rate": 5.023041474654378e-07, + "loss": 1.2542369365692139, + "step": 110 + }, + { + "epoch": 0.025576036866359446, + "grad_norm": 0.46571699511286535, + "learning_rate": 5.069124423963134e-07, + "loss": 1.36039137840271, + "step": 111 + }, + { + "epoch": 0.025806451612903226, + "grad_norm": 0.3893860976585314, + "learning_rate": 5.11520737327189e-07, + "loss": 1.1092976331710815, + "step": 112 + }, + { + "epoch": 0.026036866359447006, + "grad_norm": 0.4636216593448083, + "learning_rate": 5.161290322580645e-07, + "loss": 1.0634076595306396, + "step": 113 + }, + { + "epoch": 0.026267281105990782, + "grad_norm": 0.3440530135190564, + "learning_rate": 5.2073732718894e-07, + "loss": 1.0024809837341309, + "step": 114 + }, + { + "epoch": 0.026497695852534562, + "grad_norm": 0.4346835070660911, + "learning_rate": 5.253456221198155e-07, + "loss": 1.1691724061965942, + "step": 115 + }, + { + "epoch": 0.026728110599078342, + "grad_norm": 0.46992230717269323, + "learning_rate": 5.299539170506912e-07, + "loss": 1.2053219079971313, + "step": 116 + }, + { + "epoch": 0.02695852534562212, + "grad_norm": 0.3668719861525143, + "learning_rate": 5.345622119815668e-07, + "loss": 1.119420051574707, + "step": 117 + }, + { + "epoch": 0.027188940092165898, + "grad_norm": 0.44063509410116297, + "learning_rate": 5.391705069124423e-07, + "loss": 1.1640167236328125, + "step": 118 + }, + { + "epoch": 0.027419354838709678, + "grad_norm": 0.41158620514350025, + "learning_rate": 5.437788018433179e-07, + "loss": 1.180116057395935, + "step": 119 + }, + { + "epoch": 0.027649769585253458, + "grad_norm": 0.4684655855415561, + "learning_rate": 5.483870967741935e-07, + "loss": 1.0726159811019897, + "step": 120 + }, + { + "epoch": 0.027880184331797234, + "grad_norm": 0.44443528947779826, + "learning_rate": 5.529953917050691e-07, + "loss": 1.03219473361969, + "step": 121 + }, + { + "epoch": 0.028110599078341014, + "grad_norm": 0.4615930748718386, + "learning_rate": 5.576036866359447e-07, + "loss": 1.1545735597610474, + "step": 122 + }, + { + "epoch": 0.028341013824884794, + "grad_norm": 0.4154044637047318, + "learning_rate": 5.622119815668203e-07, + "loss": 1.2409746646881104, + "step": 123 + }, + { + "epoch": 0.02857142857142857, + "grad_norm": 0.48642203067509454, + "learning_rate": 5.668202764976958e-07, + "loss": 1.2717409133911133, + "step": 124 + }, + { + "epoch": 0.02880184331797235, + "grad_norm": 0.5633308049530943, + "learning_rate": 5.714285714285714e-07, + "loss": 1.523846983909607, + "step": 125 + }, + { + "epoch": 0.02903225806451613, + "grad_norm": 0.47068700261388136, + "learning_rate": 5.760368663594469e-07, + "loss": 1.3386890888214111, + "step": 126 + }, + { + "epoch": 0.029262672811059907, + "grad_norm": 0.5199142981609907, + "learning_rate": 5.806451612903226e-07, + "loss": 1.3080404996871948, + "step": 127 + }, + { + "epoch": 0.029493087557603687, + "grad_norm": 0.530224330517059, + "learning_rate": 5.852534562211982e-07, + "loss": 1.3194537162780762, + "step": 128 + }, + { + "epoch": 0.029723502304147466, + "grad_norm": 0.49119251759787413, + "learning_rate": 5.898617511520737e-07, + "loss": 1.0546228885650635, + "step": 129 + }, + { + "epoch": 0.029953917050691243, + "grad_norm": 0.44238233872112126, + "learning_rate": 5.944700460829493e-07, + "loss": 1.3160395622253418, + "step": 130 + }, + { + "epoch": 0.030184331797235023, + "grad_norm": 0.5551864793339897, + "learning_rate": 5.990783410138249e-07, + "loss": 1.3497555255889893, + "step": 131 + }, + { + "epoch": 0.030414746543778803, + "grad_norm": 0.41383181378393813, + "learning_rate": 6.036866359447004e-07, + "loss": 1.0863350629806519, + "step": 132 + }, + { + "epoch": 0.03064516129032258, + "grad_norm": 0.4913368059485873, + "learning_rate": 6.08294930875576e-07, + "loss": 1.1640913486480713, + "step": 133 + }, + { + "epoch": 0.03087557603686636, + "grad_norm": 0.4309615007654084, + "learning_rate": 6.129032258064516e-07, + "loss": 1.398510217666626, + "step": 134 + }, + { + "epoch": 0.03110599078341014, + "grad_norm": 0.46249423735581563, + "learning_rate": 6.175115207373271e-07, + "loss": 1.3015594482421875, + "step": 135 + }, + { + "epoch": 0.03133640552995392, + "grad_norm": 0.5511951371835903, + "learning_rate": 6.221198156682027e-07, + "loss": 1.2786016464233398, + "step": 136 + }, + { + "epoch": 0.031566820276497695, + "grad_norm": 0.35056112177409643, + "learning_rate": 6.267281105990782e-07, + "loss": 1.0863161087036133, + "step": 137 + }, + { + "epoch": 0.03179723502304147, + "grad_norm": 0.49469780540978775, + "learning_rate": 6.313364055299539e-07, + "loss": 1.1590030193328857, + "step": 138 + }, + { + "epoch": 0.032027649769585255, + "grad_norm": 0.4498097850802204, + "learning_rate": 6.359447004608295e-07, + "loss": 1.2473185062408447, + "step": 139 + }, + { + "epoch": 0.03225806451612903, + "grad_norm": 0.46996183926649465, + "learning_rate": 6.40552995391705e-07, + "loss": 1.1982496976852417, + "step": 140 + }, + { + "epoch": 0.03248847926267281, + "grad_norm": 0.39627654459475076, + "learning_rate": 6.451612903225806e-07, + "loss": 1.078690528869629, + "step": 141 + }, + { + "epoch": 0.03271889400921659, + "grad_norm": 0.4831308537053794, + "learning_rate": 6.497695852534562e-07, + "loss": 1.1540311574935913, + "step": 142 + }, + { + "epoch": 0.03294930875576037, + "grad_norm": 0.4510531995801552, + "learning_rate": 6.543778801843318e-07, + "loss": 1.319035530090332, + "step": 143 + }, + { + "epoch": 0.03317972350230415, + "grad_norm": 0.46683155201608206, + "learning_rate": 6.589861751152074e-07, + "loss": 1.199448585510254, + "step": 144 + }, + { + "epoch": 0.03341013824884793, + "grad_norm": 0.526397133846452, + "learning_rate": 6.63594470046083e-07, + "loss": 1.212646484375, + "step": 145 + }, + { + "epoch": 0.033640552995391704, + "grad_norm": 0.6339080221663279, + "learning_rate": 6.682027649769585e-07, + "loss": 1.2833064794540405, + "step": 146 + }, + { + "epoch": 0.03387096774193549, + "grad_norm": 0.6111094782416204, + "learning_rate": 6.728110599078341e-07, + "loss": 1.2852118015289307, + "step": 147 + }, + { + "epoch": 0.034101382488479264, + "grad_norm": 0.36790627555446376, + "learning_rate": 6.774193548387096e-07, + "loss": 1.0287699699401855, + "step": 148 + }, + { + "epoch": 0.03433179723502304, + "grad_norm": 0.4705970251054534, + "learning_rate": 6.820276497695853e-07, + "loss": 1.2580914497375488, + "step": 149 + }, + { + "epoch": 0.03456221198156682, + "grad_norm": 0.4446865658925291, + "learning_rate": 6.866359447004608e-07, + "loss": 1.0557801723480225, + "step": 150 + }, + { + "epoch": 0.0347926267281106, + "grad_norm": 0.4962737867323335, + "learning_rate": 6.912442396313363e-07, + "loss": 1.1820557117462158, + "step": 151 + }, + { + "epoch": 0.035023041474654376, + "grad_norm": 0.4496579463689646, + "learning_rate": 6.958525345622119e-07, + "loss": 1.2777981758117676, + "step": 152 + }, + { + "epoch": 0.03525345622119816, + "grad_norm": 0.4664315599937052, + "learning_rate": 7.004608294930875e-07, + "loss": 1.1465356349945068, + "step": 153 + }, + { + "epoch": 0.035483870967741936, + "grad_norm": 0.5245233624695497, + "learning_rate": 7.05069124423963e-07, + "loss": 1.3553744554519653, + "step": 154 + }, + { + "epoch": 0.03571428571428571, + "grad_norm": 0.5474513239817841, + "learning_rate": 7.096774193548387e-07, + "loss": 1.176223874092102, + "step": 155 + }, + { + "epoch": 0.035944700460829496, + "grad_norm": 0.4022708922904972, + "learning_rate": 7.142857142857143e-07, + "loss": 1.1771761178970337, + "step": 156 + }, + { + "epoch": 0.03617511520737327, + "grad_norm": 0.5000685120319052, + "learning_rate": 7.188940092165898e-07, + "loss": 1.1598860025405884, + "step": 157 + }, + { + "epoch": 0.03640552995391705, + "grad_norm": 0.4955460688514832, + "learning_rate": 7.235023041474654e-07, + "loss": 1.0689195394515991, + "step": 158 + }, + { + "epoch": 0.03663594470046083, + "grad_norm": 0.5324202700222229, + "learning_rate": 7.281105990783409e-07, + "loss": 1.1444990634918213, + "step": 159 + }, + { + "epoch": 0.03686635944700461, + "grad_norm": 0.441885052912425, + "learning_rate": 7.327188940092166e-07, + "loss": 1.2261321544647217, + "step": 160 + }, + { + "epoch": 0.037096774193548385, + "grad_norm": 0.47946473640002796, + "learning_rate": 7.373271889400922e-07, + "loss": 0.9325876235961914, + "step": 161 + }, + { + "epoch": 0.03732718894009217, + "grad_norm": 0.46688477365444836, + "learning_rate": 7.419354838709677e-07, + "loss": 1.071167230606079, + "step": 162 + }, + { + "epoch": 0.037557603686635944, + "grad_norm": 0.5188018198616766, + "learning_rate": 7.465437788018433e-07, + "loss": 1.1856298446655273, + "step": 163 + }, + { + "epoch": 0.03778801843317972, + "grad_norm": 0.5279511073474723, + "learning_rate": 7.511520737327189e-07, + "loss": 1.13883376121521, + "step": 164 + }, + { + "epoch": 0.038018433179723504, + "grad_norm": 0.4671725091927055, + "learning_rate": 7.557603686635944e-07, + "loss": 1.2896685600280762, + "step": 165 + }, + { + "epoch": 0.03824884792626728, + "grad_norm": 0.6286776240106037, + "learning_rate": 7.603686635944701e-07, + "loss": 1.3122754096984863, + "step": 166 + }, + { + "epoch": 0.03847926267281106, + "grad_norm": 0.5120060171404104, + "learning_rate": 7.649769585253457e-07, + "loss": 1.165675163269043, + "step": 167 + }, + { + "epoch": 0.03870967741935484, + "grad_norm": 0.5132036652169082, + "learning_rate": 7.695852534562211e-07, + "loss": 1.1348214149475098, + "step": 168 + }, + { + "epoch": 0.03894009216589862, + "grad_norm": 0.5816469452243797, + "learning_rate": 7.741935483870967e-07, + "loss": 1.287818431854248, + "step": 169 + }, + { + "epoch": 0.03917050691244239, + "grad_norm": 0.4886112893618036, + "learning_rate": 7.788018433179722e-07, + "loss": 1.0723031759262085, + "step": 170 + }, + { + "epoch": 0.03940092165898618, + "grad_norm": 0.5572220637370465, + "learning_rate": 7.834101382488479e-07, + "loss": 1.29054594039917, + "step": 171 + }, + { + "epoch": 0.03963133640552995, + "grad_norm": 0.4996602061858042, + "learning_rate": 7.880184331797235e-07, + "loss": 1.201147198677063, + "step": 172 + }, + { + "epoch": 0.03986175115207373, + "grad_norm": 0.47488604971715725, + "learning_rate": 7.92626728110599e-07, + "loss": 1.2529574632644653, + "step": 173 + }, + { + "epoch": 0.04009216589861751, + "grad_norm": 0.5420947446150967, + "learning_rate": 7.972350230414746e-07, + "loss": 1.3255105018615723, + "step": 174 + }, + { + "epoch": 0.04032258064516129, + "grad_norm": 0.5367164884336, + "learning_rate": 8.018433179723502e-07, + "loss": 1.3167433738708496, + "step": 175 + }, + { + "epoch": 0.04055299539170507, + "grad_norm": 0.5124027812324866, + "learning_rate": 8.064516129032257e-07, + "loss": 1.4780502319335938, + "step": 176 + }, + { + "epoch": 0.04078341013824885, + "grad_norm": 0.49049200777499574, + "learning_rate": 8.110599078341014e-07, + "loss": 1.3096996545791626, + "step": 177 + }, + { + "epoch": 0.041013824884792625, + "grad_norm": 0.5684690759624818, + "learning_rate": 8.15668202764977e-07, + "loss": 1.3124895095825195, + "step": 178 + }, + { + "epoch": 0.04124423963133641, + "grad_norm": 0.5746940747619091, + "learning_rate": 8.202764976958525e-07, + "loss": 1.2589681148529053, + "step": 179 + }, + { + "epoch": 0.041474654377880185, + "grad_norm": 0.5351550863930432, + "learning_rate": 8.248847926267281e-07, + "loss": 1.0576659440994263, + "step": 180 + }, + { + "epoch": 0.04170506912442396, + "grad_norm": 0.5804930108989373, + "learning_rate": 8.294930875576036e-07, + "loss": 1.2647404670715332, + "step": 181 + }, + { + "epoch": 0.041935483870967745, + "grad_norm": 0.5527713530674592, + "learning_rate": 8.341013824884793e-07, + "loss": 1.072542428970337, + "step": 182 + }, + { + "epoch": 0.04216589861751152, + "grad_norm": 0.636913740412271, + "learning_rate": 8.387096774193549e-07, + "loss": 1.2417643070220947, + "step": 183 + }, + { + "epoch": 0.0423963133640553, + "grad_norm": 0.4636179655744076, + "learning_rate": 8.433179723502303e-07, + "loss": 1.2490241527557373, + "step": 184 + }, + { + "epoch": 0.04262672811059908, + "grad_norm": 0.5714553493227277, + "learning_rate": 8.479262672811059e-07, + "loss": 1.1169328689575195, + "step": 185 + }, + { + "epoch": 0.04285714285714286, + "grad_norm": 0.5893436962226742, + "learning_rate": 8.525345622119815e-07, + "loss": 1.1799774169921875, + "step": 186 + }, + { + "epoch": 0.043087557603686634, + "grad_norm": 0.4840759402042485, + "learning_rate": 8.57142857142857e-07, + "loss": 0.9655753374099731, + "step": 187 + }, + { + "epoch": 0.04331797235023042, + "grad_norm": 0.5473512318665162, + "learning_rate": 8.617511520737327e-07, + "loss": 1.2863562107086182, + "step": 188 + }, + { + "epoch": 0.043548387096774194, + "grad_norm": 0.5971573505450626, + "learning_rate": 8.663594470046083e-07, + "loss": 1.056877613067627, + "step": 189 + }, + { + "epoch": 0.04377880184331797, + "grad_norm": 0.5903656134268881, + "learning_rate": 8.709677419354838e-07, + "loss": 1.2128019332885742, + "step": 190 + }, + { + "epoch": 0.044009216589861753, + "grad_norm": 0.5042165136835149, + "learning_rate": 8.755760368663594e-07, + "loss": 1.1397441625595093, + "step": 191 + }, + { + "epoch": 0.04423963133640553, + "grad_norm": 0.5007324461761941, + "learning_rate": 8.801843317972349e-07, + "loss": 1.062232255935669, + "step": 192 + }, + { + "epoch": 0.044470046082949306, + "grad_norm": 0.5077694656116347, + "learning_rate": 8.847926267281106e-07, + "loss": 1.0102736949920654, + "step": 193 + }, + { + "epoch": 0.04470046082949309, + "grad_norm": 0.5039275409209952, + "learning_rate": 8.894009216589862e-07, + "loss": 1.155517339706421, + "step": 194 + }, + { + "epoch": 0.044930875576036866, + "grad_norm": 0.4568536555143312, + "learning_rate": 8.940092165898617e-07, + "loss": 1.042372703552246, + "step": 195 + }, + { + "epoch": 0.04516129032258064, + "grad_norm": 0.6118356615587064, + "learning_rate": 8.986175115207373e-07, + "loss": 1.1158320903778076, + "step": 196 + }, + { + "epoch": 0.045391705069124426, + "grad_norm": 0.6547758969058546, + "learning_rate": 9.032258064516129e-07, + "loss": 1.4693050384521484, + "step": 197 + }, + { + "epoch": 0.0456221198156682, + "grad_norm": 0.5189200191294998, + "learning_rate": 9.078341013824884e-07, + "loss": 1.0990574359893799, + "step": 198 + }, + { + "epoch": 0.04585253456221198, + "grad_norm": 0.5123720508165549, + "learning_rate": 9.124423963133641e-07, + "loss": 1.0259861946105957, + "step": 199 + }, + { + "epoch": 0.04608294930875576, + "grad_norm": 0.4638504791285932, + "learning_rate": 9.170506912442397e-07, + "loss": 1.2708477973937988, + "step": 200 + }, + { + "epoch": 0.04631336405529954, + "grad_norm": 0.426472351706666, + "learning_rate": 9.216589861751152e-07, + "loss": 1.052978754043579, + "step": 201 + }, + { + "epoch": 0.046543778801843315, + "grad_norm": 0.5548008737632977, + "learning_rate": 9.262672811059907e-07, + "loss": 1.3405938148498535, + "step": 202 + }, + { + "epoch": 0.0467741935483871, + "grad_norm": 0.4311530218247671, + "learning_rate": 9.308755760368662e-07, + "loss": 0.9464558362960815, + "step": 203 + }, + { + "epoch": 0.047004608294930875, + "grad_norm": 0.6377195135282403, + "learning_rate": 9.354838709677418e-07, + "loss": 1.3019077777862549, + "step": 204 + }, + { + "epoch": 0.04723502304147465, + "grad_norm": 0.6029329005096047, + "learning_rate": 9.400921658986175e-07, + "loss": 1.146841049194336, + "step": 205 + }, + { + "epoch": 0.047465437788018434, + "grad_norm": 0.6136536598800337, + "learning_rate": 9.44700460829493e-07, + "loss": 1.106084942817688, + "step": 206 + }, + { + "epoch": 0.04769585253456221, + "grad_norm": 0.6661299934206126, + "learning_rate": 9.493087557603686e-07, + "loss": 1.2930629253387451, + "step": 207 + }, + { + "epoch": 0.047926267281105994, + "grad_norm": 0.5555271013101563, + "learning_rate": 9.539170506912442e-07, + "loss": 1.1637842655181885, + "step": 208 + }, + { + "epoch": 0.04815668202764977, + "grad_norm": 0.444081897230925, + "learning_rate": 9.585253456221198e-07, + "loss": 1.1753308773040771, + "step": 209 + }, + { + "epoch": 0.04838709677419355, + "grad_norm": 0.5362299776231612, + "learning_rate": 9.631336405529954e-07, + "loss": 1.2304046154022217, + "step": 210 + }, + { + "epoch": 0.04861751152073733, + "grad_norm": 0.6898819231347578, + "learning_rate": 9.67741935483871e-07, + "loss": 1.4326789379119873, + "step": 211 + }, + { + "epoch": 0.04884792626728111, + "grad_norm": 0.614044501232848, + "learning_rate": 9.723502304147466e-07, + "loss": 1.0759861469268799, + "step": 212 + }, + { + "epoch": 0.04907834101382488, + "grad_norm": 0.5971609176488232, + "learning_rate": 9.76958525345622e-07, + "loss": 1.1514811515808105, + "step": 213 + }, + { + "epoch": 0.04930875576036867, + "grad_norm": 0.49252816443356506, + "learning_rate": 9.815668202764976e-07, + "loss": 1.1618578433990479, + "step": 214 + }, + { + "epoch": 0.04953917050691244, + "grad_norm": 0.5677669382006955, + "learning_rate": 9.861751152073732e-07, + "loss": 1.0321345329284668, + "step": 215 + }, + { + "epoch": 0.04976958525345622, + "grad_norm": 0.4551655972629908, + "learning_rate": 9.907834101382488e-07, + "loss": 1.0391438007354736, + "step": 216 + }, + { + "epoch": 0.05, + "grad_norm": 0.6188957189455181, + "learning_rate": 9.953917050691244e-07, + "loss": 1.080418586730957, + "step": 217 + }, + { + "epoch": 0.05023041474654378, + "grad_norm": 0.6531841586974683, + "learning_rate": 1e-06, + "loss": 1.2095223665237427, + "step": 218 + }, + { + "epoch": 0.050460829493087556, + "grad_norm": 0.5036313537560552, + "learning_rate": 1.0046082949308756e-06, + "loss": 1.1144485473632812, + "step": 219 + }, + { + "epoch": 0.05069124423963134, + "grad_norm": 0.6466646674884302, + "learning_rate": 1.0092165898617511e-06, + "loss": 1.2560818195343018, + "step": 220 + }, + { + "epoch": 0.050921658986175115, + "grad_norm": 0.586777516357483, + "learning_rate": 1.0138248847926267e-06, + "loss": 1.1043426990509033, + "step": 221 + }, + { + "epoch": 0.05115207373271889, + "grad_norm": 0.41448570454396455, + "learning_rate": 1.0184331797235021e-06, + "loss": 1.0725831985473633, + "step": 222 + }, + { + "epoch": 0.051382488479262675, + "grad_norm": 0.5713867853647446, + "learning_rate": 1.023041474654378e-06, + "loss": 0.9764004349708557, + "step": 223 + }, + { + "epoch": 0.05161290322580645, + "grad_norm": 0.6662412690615445, + "learning_rate": 1.0276497695852535e-06, + "loss": 1.2172776460647583, + "step": 224 + }, + { + "epoch": 0.05184331797235023, + "grad_norm": 0.610800258000843, + "learning_rate": 1.032258064516129e-06, + "loss": 1.1065070629119873, + "step": 225 + }, + { + "epoch": 0.05207373271889401, + "grad_norm": 0.5057724484519791, + "learning_rate": 1.0368663594470047e-06, + "loss": 1.0840628147125244, + "step": 226 + }, + { + "epoch": 0.05230414746543779, + "grad_norm": 0.5250793281243177, + "learning_rate": 1.04147465437788e-06, + "loss": 1.109276294708252, + "step": 227 + }, + { + "epoch": 0.052534562211981564, + "grad_norm": 0.7348582040933043, + "learning_rate": 1.0460829493087557e-06, + "loss": 1.186352252960205, + "step": 228 + }, + { + "epoch": 0.05276497695852535, + "grad_norm": 0.48569306871313883, + "learning_rate": 1.050691244239631e-06, + "loss": 1.1605256795883179, + "step": 229 + }, + { + "epoch": 0.052995391705069124, + "grad_norm": 0.6312799860168967, + "learning_rate": 1.0552995391705069e-06, + "loss": 1.0269646644592285, + "step": 230 + }, + { + "epoch": 0.0532258064516129, + "grad_norm": 0.6446173917231129, + "learning_rate": 1.0599078341013825e-06, + "loss": 0.9595874547958374, + "step": 231 + }, + { + "epoch": 0.053456221198156684, + "grad_norm": 0.6010998567907583, + "learning_rate": 1.0645161290322579e-06, + "loss": 1.1606154441833496, + "step": 232 + }, + { + "epoch": 0.05368663594470046, + "grad_norm": 0.6379425251609956, + "learning_rate": 1.0691244239631337e-06, + "loss": 0.9920428991317749, + "step": 233 + }, + { + "epoch": 0.05391705069124424, + "grad_norm": 0.6346840342097714, + "learning_rate": 1.073732718894009e-06, + "loss": 1.2124650478363037, + "step": 234 + }, + { + "epoch": 0.05414746543778802, + "grad_norm": 0.5761223431136224, + "learning_rate": 1.0783410138248847e-06, + "loss": 1.2237420082092285, + "step": 235 + }, + { + "epoch": 0.054377880184331796, + "grad_norm": 0.5178799666370111, + "learning_rate": 1.0829493087557605e-06, + "loss": 1.1484715938568115, + "step": 236 + }, + { + "epoch": 0.05460829493087557, + "grad_norm": 0.5910590598999479, + "learning_rate": 1.0875576036866358e-06, + "loss": 1.2143291234970093, + "step": 237 + }, + { + "epoch": 0.054838709677419356, + "grad_norm": 0.568116947952991, + "learning_rate": 1.0921658986175114e-06, + "loss": 1.1995420455932617, + "step": 238 + }, + { + "epoch": 0.05506912442396313, + "grad_norm": 0.6128333972066793, + "learning_rate": 1.096774193548387e-06, + "loss": 1.2577292919158936, + "step": 239 + }, + { + "epoch": 0.055299539170506916, + "grad_norm": 0.6177738975799152, + "learning_rate": 1.1013824884792626e-06, + "loss": 1.2170629501342773, + "step": 240 + }, + { + "epoch": 0.05552995391705069, + "grad_norm": 0.3580107479174479, + "learning_rate": 1.1059907834101382e-06, + "loss": 0.8318669199943542, + "step": 241 + }, + { + "epoch": 0.05576036866359447, + "grad_norm": 0.4976235536822315, + "learning_rate": 1.1105990783410138e-06, + "loss": 1.0760166645050049, + "step": 242 + }, + { + "epoch": 0.05599078341013825, + "grad_norm": 0.7197455436310494, + "learning_rate": 1.1152073732718894e-06, + "loss": 1.2437031269073486, + "step": 243 + }, + { + "epoch": 0.05622119815668203, + "grad_norm": 0.5957655407019126, + "learning_rate": 1.1198156682027648e-06, + "loss": 1.1680852174758911, + "step": 244 + }, + { + "epoch": 0.056451612903225805, + "grad_norm": 0.6708075502500678, + "learning_rate": 1.1244239631336406e-06, + "loss": 1.051478385925293, + "step": 245 + }, + { + "epoch": 0.05668202764976959, + "grad_norm": 0.547285271256248, + "learning_rate": 1.1290322580645162e-06, + "loss": 1.1433100700378418, + "step": 246 + }, + { + "epoch": 0.056912442396313365, + "grad_norm": 0.6428413238154085, + "learning_rate": 1.1336405529953916e-06, + "loss": 0.9521546363830566, + "step": 247 + }, + { + "epoch": 0.05714285714285714, + "grad_norm": 0.6790518899839243, + "learning_rate": 1.1382488479262674e-06, + "loss": 1.226189136505127, + "step": 248 + }, + { + "epoch": 0.057373271889400924, + "grad_norm": 0.7178538920010674, + "learning_rate": 1.1428571428571428e-06, + "loss": 1.108027696609497, + "step": 249 + }, + { + "epoch": 0.0576036866359447, + "grad_norm": 0.4608432366288286, + "learning_rate": 1.1474654377880184e-06, + "loss": 1.042288064956665, + "step": 250 + }, + { + "epoch": 0.05783410138248848, + "grad_norm": 0.8171244559521852, + "learning_rate": 1.1520737327188938e-06, + "loss": 1.193603754043579, + "step": 251 + }, + { + "epoch": 0.05806451612903226, + "grad_norm": 0.6766522772283506, + "learning_rate": 1.1566820276497696e-06, + "loss": 1.193584680557251, + "step": 252 + }, + { + "epoch": 0.05829493087557604, + "grad_norm": 0.5714710938556213, + "learning_rate": 1.1612903225806452e-06, + "loss": 1.2318934202194214, + "step": 253 + }, + { + "epoch": 0.05852534562211981, + "grad_norm": 0.6443899979691422, + "learning_rate": 1.1658986175115205e-06, + "loss": 1.1626521348953247, + "step": 254 + }, + { + "epoch": 0.0587557603686636, + "grad_norm": 0.6336855527034527, + "learning_rate": 1.1705069124423963e-06, + "loss": 1.2402286529541016, + "step": 255 + }, + { + "epoch": 0.05898617511520737, + "grad_norm": 0.599628545600123, + "learning_rate": 1.1751152073732717e-06, + "loss": 1.190323829650879, + "step": 256 + }, + { + "epoch": 0.05921658986175115, + "grad_norm": 0.655955321737197, + "learning_rate": 1.1797235023041473e-06, + "loss": 1.121636986732483, + "step": 257 + }, + { + "epoch": 0.05944700460829493, + "grad_norm": 0.5349922437861245, + "learning_rate": 1.1843317972350231e-06, + "loss": 1.099304437637329, + "step": 258 + }, + { + "epoch": 0.05967741935483871, + "grad_norm": 0.5611568770807159, + "learning_rate": 1.1889400921658985e-06, + "loss": 1.1730690002441406, + "step": 259 + }, + { + "epoch": 0.059907834101382486, + "grad_norm": 0.5874751551203973, + "learning_rate": 1.1935483870967741e-06, + "loss": 1.1450574398040771, + "step": 260 + }, + { + "epoch": 0.06013824884792627, + "grad_norm": 0.6634311667010621, + "learning_rate": 1.1981566820276497e-06, + "loss": 1.1435421705245972, + "step": 261 + }, + { + "epoch": 0.060368663594470046, + "grad_norm": 0.6113712565981082, + "learning_rate": 1.2027649769585253e-06, + "loss": 1.2153000831604004, + "step": 262 + }, + { + "epoch": 0.06059907834101382, + "grad_norm": 0.4715675476477507, + "learning_rate": 1.207373271889401e-06, + "loss": 1.0380406379699707, + "step": 263 + }, + { + "epoch": 0.060829493087557605, + "grad_norm": 0.5396758253019809, + "learning_rate": 1.2119815668202765e-06, + "loss": 1.1639207601547241, + "step": 264 + }, + { + "epoch": 0.06105990783410138, + "grad_norm": 0.7193765184254299, + "learning_rate": 1.216589861751152e-06, + "loss": 1.1862819194793701, + "step": 265 + }, + { + "epoch": 0.06129032258064516, + "grad_norm": 0.5621136552568688, + "learning_rate": 1.2211981566820275e-06, + "loss": 1.2122020721435547, + "step": 266 + }, + { + "epoch": 0.06152073732718894, + "grad_norm": 0.506518590231947, + "learning_rate": 1.2258064516129033e-06, + "loss": 1.1201646327972412, + "step": 267 + }, + { + "epoch": 0.06175115207373272, + "grad_norm": 0.6015371724768855, + "learning_rate": 1.2304147465437787e-06, + "loss": 0.9520926475524902, + "step": 268 + }, + { + "epoch": 0.061981566820276494, + "grad_norm": 0.6815507447701216, + "learning_rate": 1.2350230414746543e-06, + "loss": 1.0426976680755615, + "step": 269 + }, + { + "epoch": 0.06221198156682028, + "grad_norm": 0.5129880337213574, + "learning_rate": 1.23963133640553e-06, + "loss": 0.934493899345398, + "step": 270 + }, + { + "epoch": 0.062442396313364054, + "grad_norm": 0.5416312735509534, + "learning_rate": 1.2442396313364054e-06, + "loss": 1.23980712890625, + "step": 271 + }, + { + "epoch": 0.06267281105990784, + "grad_norm": 0.5947336924258313, + "learning_rate": 1.248847926267281e-06, + "loss": 1.094742774963379, + "step": 272 + }, + { + "epoch": 0.06290322580645161, + "grad_norm": 0.5496219212827214, + "learning_rate": 1.2534562211981564e-06, + "loss": 1.0271551609039307, + "step": 273 + }, + { + "epoch": 0.06313364055299539, + "grad_norm": 0.43924704821878574, + "learning_rate": 1.2580645161290322e-06, + "loss": 1.159210205078125, + "step": 274 + }, + { + "epoch": 0.06336405529953917, + "grad_norm": 0.6336734571964621, + "learning_rate": 1.2626728110599078e-06, + "loss": 1.127510666847229, + "step": 275 + }, + { + "epoch": 0.06359447004608294, + "grad_norm": 0.564136508309977, + "learning_rate": 1.2672811059907832e-06, + "loss": 1.1371517181396484, + "step": 276 + }, + { + "epoch": 0.06382488479262673, + "grad_norm": 0.5092569849346139, + "learning_rate": 1.271889400921659e-06, + "loss": 1.0296730995178223, + "step": 277 + }, + { + "epoch": 0.06405529953917051, + "grad_norm": 0.47819096787751125, + "learning_rate": 1.2764976958525344e-06, + "loss": 1.036975383758545, + "step": 278 + }, + { + "epoch": 0.06428571428571428, + "grad_norm": 0.5933788958917384, + "learning_rate": 1.28110599078341e-06, + "loss": 1.2120393514633179, + "step": 279 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 0.5094532117085869, + "learning_rate": 1.2857142857142858e-06, + "loss": 1.0084068775177002, + "step": 280 + }, + { + "epoch": 0.06474654377880185, + "grad_norm": 0.5556672645421422, + "learning_rate": 1.2903225806451612e-06, + "loss": 1.2005786895751953, + "step": 281 + }, + { + "epoch": 0.06497695852534562, + "grad_norm": 0.5273275990471241, + "learning_rate": 1.2949308755760368e-06, + "loss": 1.1506783962249756, + "step": 282 + }, + { + "epoch": 0.0652073732718894, + "grad_norm": 0.6565311834699108, + "learning_rate": 1.2995391705069124e-06, + "loss": 1.1219947338104248, + "step": 283 + }, + { + "epoch": 0.06543778801843318, + "grad_norm": 0.5392805741788703, + "learning_rate": 1.304147465437788e-06, + "loss": 1.2041170597076416, + "step": 284 + }, + { + "epoch": 0.06566820276497695, + "grad_norm": 0.4958618059812673, + "learning_rate": 1.3087557603686636e-06, + "loss": 1.0903037786483765, + "step": 285 + }, + { + "epoch": 0.06589861751152074, + "grad_norm": 0.5739593792710319, + "learning_rate": 1.3133640552995392e-06, + "loss": 1.2140064239501953, + "step": 286 + }, + { + "epoch": 0.06612903225806452, + "grad_norm": 0.6611408054194472, + "learning_rate": 1.3179723502304148e-06, + "loss": 1.3026092052459717, + "step": 287 + }, + { + "epoch": 0.0663594470046083, + "grad_norm": 0.5994162091601994, + "learning_rate": 1.3225806451612901e-06, + "loss": 1.0937910079956055, + "step": 288 + }, + { + "epoch": 0.06658986175115207, + "grad_norm": 0.5087892316212932, + "learning_rate": 1.327188940092166e-06, + "loss": 1.1768109798431396, + "step": 289 + }, + { + "epoch": 0.06682027649769585, + "grad_norm": 0.6601843016778813, + "learning_rate": 1.3317972350230413e-06, + "loss": 1.0796440839767456, + "step": 290 + }, + { + "epoch": 0.06705069124423964, + "grad_norm": 0.5059222364831474, + "learning_rate": 1.336405529953917e-06, + "loss": 0.9972932934761047, + "step": 291 + }, + { + "epoch": 0.06728110599078341, + "grad_norm": 0.5571474335328804, + "learning_rate": 1.3410138248847927e-06, + "loss": 0.9860717058181763, + "step": 292 + }, + { + "epoch": 0.06751152073732719, + "grad_norm": 0.5418320654969337, + "learning_rate": 1.3456221198156681e-06, + "loss": 1.045119047164917, + "step": 293 + }, + { + "epoch": 0.06774193548387097, + "grad_norm": 0.5469511174229076, + "learning_rate": 1.3502304147465437e-06, + "loss": 1.2740920782089233, + "step": 294 + }, + { + "epoch": 0.06797235023041474, + "grad_norm": 0.5280888059979016, + "learning_rate": 1.354838709677419e-06, + "loss": 1.0860114097595215, + "step": 295 + }, + { + "epoch": 0.06820276497695853, + "grad_norm": 0.6361673375880608, + "learning_rate": 1.359447004608295e-06, + "loss": 1.111539602279663, + "step": 296 + }, + { + "epoch": 0.06843317972350231, + "grad_norm": 0.6640553054344481, + "learning_rate": 1.3640552995391705e-06, + "loss": 1.1628870964050293, + "step": 297 + }, + { + "epoch": 0.06866359447004608, + "grad_norm": 0.5665129055040568, + "learning_rate": 1.3686635944700459e-06, + "loss": 1.042768955230713, + "step": 298 + }, + { + "epoch": 0.06889400921658986, + "grad_norm": 0.43340931133190164, + "learning_rate": 1.3732718894009217e-06, + "loss": 0.9970331192016602, + "step": 299 + }, + { + "epoch": 0.06912442396313365, + "grad_norm": 0.5645710736996077, + "learning_rate": 1.377880184331797e-06, + "loss": 1.1270179748535156, + "step": 300 + }, + { + "epoch": 0.06935483870967742, + "grad_norm": 0.5065704773498506, + "learning_rate": 1.3824884792626727e-06, + "loss": 0.9505646824836731, + "step": 301 + }, + { + "epoch": 0.0695852534562212, + "grad_norm": 0.5178052985950043, + "learning_rate": 1.3870967741935485e-06, + "loss": 1.0997588634490967, + "step": 302 + }, + { + "epoch": 0.06981566820276498, + "grad_norm": 0.46976885146719827, + "learning_rate": 1.3917050691244239e-06, + "loss": 1.1512106657028198, + "step": 303 + }, + { + "epoch": 0.07004608294930875, + "grad_norm": 0.5368431131511487, + "learning_rate": 1.3963133640552995e-06, + "loss": 1.1340759992599487, + "step": 304 + }, + { + "epoch": 0.07027649769585254, + "grad_norm": 0.6153911846871725, + "learning_rate": 1.400921658986175e-06, + "loss": 1.187511682510376, + "step": 305 + }, + { + "epoch": 0.07050691244239632, + "grad_norm": 0.511555535336468, + "learning_rate": 1.4055299539170507e-06, + "loss": 1.0711122751235962, + "step": 306 + }, + { + "epoch": 0.07073732718894009, + "grad_norm": 0.48287298633713555, + "learning_rate": 1.410138248847926e-06, + "loss": 0.9636896848678589, + "step": 307 + }, + { + "epoch": 0.07096774193548387, + "grad_norm": 0.5910127759130634, + "learning_rate": 1.4147465437788018e-06, + "loss": 1.0506833791732788, + "step": 308 + }, + { + "epoch": 0.07119815668202766, + "grad_norm": 0.46621570534633416, + "learning_rate": 1.4193548387096774e-06, + "loss": 1.1076349020004272, + "step": 309 + }, + { + "epoch": 0.07142857142857142, + "grad_norm": 0.5023143786431462, + "learning_rate": 1.4239631336405528e-06, + "loss": 1.0878944396972656, + "step": 310 + }, + { + "epoch": 0.07165898617511521, + "grad_norm": 0.5894127846415432, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.0808600187301636, + "step": 311 + }, + { + "epoch": 0.07188940092165899, + "grad_norm": 0.6608655757057322, + "learning_rate": 1.433179723502304e-06, + "loss": 1.2117588520050049, + "step": 312 + }, + { + "epoch": 0.07211981566820276, + "grad_norm": 0.49227698344069387, + "learning_rate": 1.4377880184331796e-06, + "loss": 1.0899101495742798, + "step": 313 + }, + { + "epoch": 0.07235023041474654, + "grad_norm": 0.4429228185732454, + "learning_rate": 1.4423963133640554e-06, + "loss": 0.9540426135063171, + "step": 314 + }, + { + "epoch": 0.07258064516129033, + "grad_norm": 0.6262415135725191, + "learning_rate": 1.4470046082949308e-06, + "loss": 1.1170068979263306, + "step": 315 + }, + { + "epoch": 0.0728110599078341, + "grad_norm": 0.5398534028349639, + "learning_rate": 1.4516129032258064e-06, + "loss": 1.2030160427093506, + "step": 316 + }, + { + "epoch": 0.07304147465437788, + "grad_norm": 0.5750696017486863, + "learning_rate": 1.4562211981566818e-06, + "loss": 1.1599903106689453, + "step": 317 + }, + { + "epoch": 0.07327188940092166, + "grad_norm": 0.4940370201046436, + "learning_rate": 1.4608294930875576e-06, + "loss": 1.0777950286865234, + "step": 318 + }, + { + "epoch": 0.07350230414746543, + "grad_norm": 0.5527232601625117, + "learning_rate": 1.4654377880184332e-06, + "loss": 1.1250553131103516, + "step": 319 + }, + { + "epoch": 0.07373271889400922, + "grad_norm": 0.4905671836592669, + "learning_rate": 1.4700460829493086e-06, + "loss": 1.10176420211792, + "step": 320 + }, + { + "epoch": 0.073963133640553, + "grad_norm": 0.5457078879226115, + "learning_rate": 1.4746543778801844e-06, + "loss": 1.111799716949463, + "step": 321 + }, + { + "epoch": 0.07419354838709677, + "grad_norm": 0.4195800331952007, + "learning_rate": 1.4792626728110598e-06, + "loss": 1.1555054187774658, + "step": 322 + }, + { + "epoch": 0.07442396313364055, + "grad_norm": 0.46236670595596, + "learning_rate": 1.4838709677419353e-06, + "loss": 1.0977535247802734, + "step": 323 + }, + { + "epoch": 0.07465437788018434, + "grad_norm": 0.5097860724223924, + "learning_rate": 1.4884792626728112e-06, + "loss": 0.9058012962341309, + "step": 324 + }, + { + "epoch": 0.0748847926267281, + "grad_norm": 0.5077577953430894, + "learning_rate": 1.4930875576036865e-06, + "loss": 1.1147960424423218, + "step": 325 + }, + { + "epoch": 0.07511520737327189, + "grad_norm": 0.44169448790763116, + "learning_rate": 1.4976958525345621e-06, + "loss": 1.1315648555755615, + "step": 326 + }, + { + "epoch": 0.07534562211981567, + "grad_norm": 0.5088086763700569, + "learning_rate": 1.5023041474654377e-06, + "loss": 0.9134868383407593, + "step": 327 + }, + { + "epoch": 0.07557603686635944, + "grad_norm": 0.44118138965972864, + "learning_rate": 1.5069124423963133e-06, + "loss": 1.017493724822998, + "step": 328 + }, + { + "epoch": 0.07580645161290323, + "grad_norm": 0.5038134502792564, + "learning_rate": 1.5115207373271887e-06, + "loss": 1.220658540725708, + "step": 329 + }, + { + "epoch": 0.07603686635944701, + "grad_norm": 0.49196264739665124, + "learning_rate": 1.5161290322580645e-06, + "loss": 1.2254307270050049, + "step": 330 + }, + { + "epoch": 0.07626728110599078, + "grad_norm": 0.6442066774537618, + "learning_rate": 1.5207373271889401e-06, + "loss": 1.2653989791870117, + "step": 331 + }, + { + "epoch": 0.07649769585253456, + "grad_norm": 0.5214989033274106, + "learning_rate": 1.5253456221198155e-06, + "loss": 1.199981451034546, + "step": 332 + }, + { + "epoch": 0.07672811059907834, + "grad_norm": 0.5987857165424706, + "learning_rate": 1.5299539170506913e-06, + "loss": 1.1141018867492676, + "step": 333 + }, + { + "epoch": 0.07695852534562211, + "grad_norm": 0.5942646354683767, + "learning_rate": 1.5345622119815667e-06, + "loss": 1.2139991521835327, + "step": 334 + }, + { + "epoch": 0.0771889400921659, + "grad_norm": 0.46506211352562865, + "learning_rate": 1.5391705069124423e-06, + "loss": 1.0647475719451904, + "step": 335 + }, + { + "epoch": 0.07741935483870968, + "grad_norm": 0.41334086285294086, + "learning_rate": 1.543778801843318e-06, + "loss": 0.9740357398986816, + "step": 336 + }, + { + "epoch": 0.07764976958525345, + "grad_norm": 0.3730662683323707, + "learning_rate": 1.5483870967741935e-06, + "loss": 0.877153754234314, + "step": 337 + }, + { + "epoch": 0.07788018433179723, + "grad_norm": 0.5608914234538745, + "learning_rate": 1.552995391705069e-06, + "loss": 1.2472789287567139, + "step": 338 + }, + { + "epoch": 0.07811059907834102, + "grad_norm": 0.49369711494641005, + "learning_rate": 1.5576036866359445e-06, + "loss": 1.1873078346252441, + "step": 339 + }, + { + "epoch": 0.07834101382488479, + "grad_norm": 0.47054639516827856, + "learning_rate": 1.5622119815668203e-06, + "loss": 1.0728449821472168, + "step": 340 + }, + { + "epoch": 0.07857142857142857, + "grad_norm": 0.5084311239727589, + "learning_rate": 1.5668202764976959e-06, + "loss": 0.9974904656410217, + "step": 341 + }, + { + "epoch": 0.07880184331797235, + "grad_norm": 0.5100945633220172, + "learning_rate": 1.5714285714285712e-06, + "loss": 1.0591039657592773, + "step": 342 + }, + { + "epoch": 0.07903225806451612, + "grad_norm": 0.5927330256525677, + "learning_rate": 1.576036866359447e-06, + "loss": 1.04117751121521, + "step": 343 + }, + { + "epoch": 0.0792626728110599, + "grad_norm": 0.40984725482311923, + "learning_rate": 1.5806451612903224e-06, + "loss": 0.934100866317749, + "step": 344 + }, + { + "epoch": 0.07949308755760369, + "grad_norm": 0.4545690285130126, + "learning_rate": 1.585253456221198e-06, + "loss": 1.0333890914916992, + "step": 345 + }, + { + "epoch": 0.07972350230414746, + "grad_norm": 0.4848318013907446, + "learning_rate": 1.5898617511520738e-06, + "loss": 1.1762741804122925, + "step": 346 + }, + { + "epoch": 0.07995391705069124, + "grad_norm": 0.4082821303075448, + "learning_rate": 1.5944700460829492e-06, + "loss": 1.081842303276062, + "step": 347 + }, + { + "epoch": 0.08018433179723503, + "grad_norm": 0.48343962912190763, + "learning_rate": 1.5990783410138248e-06, + "loss": 1.140712022781372, + "step": 348 + }, + { + "epoch": 0.0804147465437788, + "grad_norm": 0.3519464028715392, + "learning_rate": 1.6036866359447004e-06, + "loss": 1.0155198574066162, + "step": 349 + }, + { + "epoch": 0.08064516129032258, + "grad_norm": 0.4719922940268299, + "learning_rate": 1.608294930875576e-06, + "loss": 1.0673280954360962, + "step": 350 + }, + { + "epoch": 0.08087557603686636, + "grad_norm": 0.44336917730159625, + "learning_rate": 1.6129032258064514e-06, + "loss": 1.1061692237854004, + "step": 351 + }, + { + "epoch": 0.08110599078341015, + "grad_norm": 0.6227306591455409, + "learning_rate": 1.6175115207373272e-06, + "loss": 1.0120354890823364, + "step": 352 + }, + { + "epoch": 0.08133640552995391, + "grad_norm": 0.5343939607764295, + "learning_rate": 1.6221198156682028e-06, + "loss": 1.1260986328125, + "step": 353 + }, + { + "epoch": 0.0815668202764977, + "grad_norm": 0.514597043189326, + "learning_rate": 1.6267281105990782e-06, + "loss": 1.0376214981079102, + "step": 354 + }, + { + "epoch": 0.08179723502304148, + "grad_norm": 0.41314458702115897, + "learning_rate": 1.631336405529954e-06, + "loss": 1.0802130699157715, + "step": 355 + }, + { + "epoch": 0.08202764976958525, + "grad_norm": 0.5113844239661658, + "learning_rate": 1.6359447004608294e-06, + "loss": 1.217378854751587, + "step": 356 + }, + { + "epoch": 0.08225806451612903, + "grad_norm": 0.3681604891354872, + "learning_rate": 1.640552995391705e-06, + "loss": 0.9107617139816284, + "step": 357 + }, + { + "epoch": 0.08248847926267282, + "grad_norm": 0.4567828094638987, + "learning_rate": 1.6451612903225808e-06, + "loss": 1.089385986328125, + "step": 358 + }, + { + "epoch": 0.08271889400921659, + "grad_norm": 0.42382031863730735, + "learning_rate": 1.6497695852534561e-06, + "loss": 1.1420392990112305, + "step": 359 + }, + { + "epoch": 0.08294930875576037, + "grad_norm": 0.4385300551654332, + "learning_rate": 1.6543778801843317e-06, + "loss": 0.9308648705482483, + "step": 360 + }, + { + "epoch": 0.08317972350230415, + "grad_norm": 0.4691608891369802, + "learning_rate": 1.6589861751152071e-06, + "loss": 0.9463413953781128, + "step": 361 + }, + { + "epoch": 0.08341013824884792, + "grad_norm": 0.4312953553305326, + "learning_rate": 1.663594470046083e-06, + "loss": 1.0364834070205688, + "step": 362 + }, + { + "epoch": 0.0836405529953917, + "grad_norm": 0.4601141894995307, + "learning_rate": 1.6682027649769585e-06, + "loss": 0.9992797374725342, + "step": 363 + }, + { + "epoch": 0.08387096774193549, + "grad_norm": 0.4110829230093509, + "learning_rate": 1.672811059907834e-06, + "loss": 0.9862687587738037, + "step": 364 + }, + { + "epoch": 0.08410138248847926, + "grad_norm": 0.538237007116734, + "learning_rate": 1.6774193548387097e-06, + "loss": 1.0882744789123535, + "step": 365 + }, + { + "epoch": 0.08433179723502304, + "grad_norm": 0.38129891099780466, + "learning_rate": 1.682027649769585e-06, + "loss": 0.9217149615287781, + "step": 366 + }, + { + "epoch": 0.08456221198156683, + "grad_norm": 0.47566005804946043, + "learning_rate": 1.6866359447004607e-06, + "loss": 1.0384632349014282, + "step": 367 + }, + { + "epoch": 0.0847926267281106, + "grad_norm": 0.41334228678230484, + "learning_rate": 1.6912442396313363e-06, + "loss": 0.8760565519332886, + "step": 368 + }, + { + "epoch": 0.08502304147465438, + "grad_norm": 0.38194490761666694, + "learning_rate": 1.6958525345622119e-06, + "loss": 0.9868614077568054, + "step": 369 + }, + { + "epoch": 0.08525345622119816, + "grad_norm": 0.41853356164893474, + "learning_rate": 1.7004608294930875e-06, + "loss": 1.0386936664581299, + "step": 370 + }, + { + "epoch": 0.08548387096774193, + "grad_norm": 0.4969082634490474, + "learning_rate": 1.705069124423963e-06, + "loss": 1.2201364040374756, + "step": 371 + }, + { + "epoch": 0.08571428571428572, + "grad_norm": 0.45684500070085005, + "learning_rate": 1.7096774193548387e-06, + "loss": 0.9892920255661011, + "step": 372 + }, + { + "epoch": 0.0859447004608295, + "grad_norm": 0.3411435628885881, + "learning_rate": 1.714285714285714e-06, + "loss": 0.9379667639732361, + "step": 373 + }, + { + "epoch": 0.08617511520737327, + "grad_norm": 0.4493279942699278, + "learning_rate": 1.7188940092165899e-06, + "loss": 1.0150624513626099, + "step": 374 + }, + { + "epoch": 0.08640552995391705, + "grad_norm": 0.3873317793310882, + "learning_rate": 1.7235023041474655e-06, + "loss": 0.8724589943885803, + "step": 375 + }, + { + "epoch": 0.08663594470046083, + "grad_norm": 0.451020649692039, + "learning_rate": 1.7281105990783408e-06, + "loss": 1.005715012550354, + "step": 376 + }, + { + "epoch": 0.0868663594470046, + "grad_norm": 0.40515583321904614, + "learning_rate": 1.7327188940092167e-06, + "loss": 1.0238345861434937, + "step": 377 + }, + { + "epoch": 0.08709677419354839, + "grad_norm": 0.5713304603163627, + "learning_rate": 1.737327188940092e-06, + "loss": 1.061020851135254, + "step": 378 + }, + { + "epoch": 0.08732718894009217, + "grad_norm": 0.31543151666242697, + "learning_rate": 1.7419354838709676e-06, + "loss": 0.8607133626937866, + "step": 379 + }, + { + "epoch": 0.08755760368663594, + "grad_norm": 0.396586403800106, + "learning_rate": 1.7465437788018434e-06, + "loss": 0.9070740938186646, + "step": 380 + }, + { + "epoch": 0.08778801843317972, + "grad_norm": 0.4114853593210784, + "learning_rate": 1.7511520737327188e-06, + "loss": 0.993092954158783, + "step": 381 + }, + { + "epoch": 0.08801843317972351, + "grad_norm": 0.5030976624666732, + "learning_rate": 1.7557603686635944e-06, + "loss": 1.1119567155838013, + "step": 382 + }, + { + "epoch": 0.08824884792626728, + "grad_norm": 0.3947649464982104, + "learning_rate": 1.7603686635944698e-06, + "loss": 1.030786395072937, + "step": 383 + }, + { + "epoch": 0.08847926267281106, + "grad_norm": 0.413233744996873, + "learning_rate": 1.7649769585253456e-06, + "loss": 1.0578559637069702, + "step": 384 + }, + { + "epoch": 0.08870967741935484, + "grad_norm": 0.5116874225270758, + "learning_rate": 1.7695852534562212e-06, + "loss": 1.1282391548156738, + "step": 385 + }, + { + "epoch": 0.08894009216589861, + "grad_norm": 0.36883704269137796, + "learning_rate": 1.7741935483870966e-06, + "loss": 0.7838784456253052, + "step": 386 + }, + { + "epoch": 0.0891705069124424, + "grad_norm": 0.4028833159886203, + "learning_rate": 1.7788018433179724e-06, + "loss": 0.9244300127029419, + "step": 387 + }, + { + "epoch": 0.08940092165898618, + "grad_norm": 0.37786066556159736, + "learning_rate": 1.7834101382488478e-06, + "loss": 0.916866660118103, + "step": 388 + }, + { + "epoch": 0.08963133640552995, + "grad_norm": 0.3466207390337416, + "learning_rate": 1.7880184331797234e-06, + "loss": 0.9918155670166016, + "step": 389 + }, + { + "epoch": 0.08986175115207373, + "grad_norm": 0.49146787648511026, + "learning_rate": 1.792626728110599e-06, + "loss": 0.9879001379013062, + "step": 390 + }, + { + "epoch": 0.09009216589861752, + "grad_norm": 0.4467876721911936, + "learning_rate": 1.7972350230414746e-06, + "loss": 1.0252082347869873, + "step": 391 + }, + { + "epoch": 0.09032258064516129, + "grad_norm": 0.4519090202816701, + "learning_rate": 1.8018433179723502e-06, + "loss": 1.0376901626586914, + "step": 392 + }, + { + "epoch": 0.09055299539170507, + "grad_norm": 0.4158305964101772, + "learning_rate": 1.8064516129032258e-06, + "loss": 1.0237072706222534, + "step": 393 + }, + { + "epoch": 0.09078341013824885, + "grad_norm": 0.3903660894092682, + "learning_rate": 1.8110599078341013e-06, + "loss": 1.004181146621704, + "step": 394 + }, + { + "epoch": 0.09101382488479262, + "grad_norm": 0.4844697096481946, + "learning_rate": 1.8156682027649767e-06, + "loss": 1.1162958145141602, + "step": 395 + }, + { + "epoch": 0.0912442396313364, + "grad_norm": 0.43484007077470926, + "learning_rate": 1.8202764976958525e-06, + "loss": 0.9634548425674438, + "step": 396 + }, + { + "epoch": 0.09147465437788019, + "grad_norm": 0.34256483920586434, + "learning_rate": 1.8248847926267281e-06, + "loss": 0.9306463599205017, + "step": 397 + }, + { + "epoch": 0.09170506912442396, + "grad_norm": 0.4291772843094685, + "learning_rate": 1.8294930875576035e-06, + "loss": 1.0243630409240723, + "step": 398 + }, + { + "epoch": 0.09193548387096774, + "grad_norm": 0.37150575195192614, + "learning_rate": 1.8341013824884793e-06, + "loss": 0.9261370897293091, + "step": 399 + }, + { + "epoch": 0.09216589861751152, + "grad_norm": 0.41574639263883495, + "learning_rate": 1.8387096774193547e-06, + "loss": 0.9929264783859253, + "step": 400 + }, + { + "epoch": 0.0923963133640553, + "grad_norm": 0.4086620199652483, + "learning_rate": 1.8433179723502303e-06, + "loss": 1.0245590209960938, + "step": 401 + }, + { + "epoch": 0.09262672811059908, + "grad_norm": 0.4485366734014856, + "learning_rate": 1.8479262672811061e-06, + "loss": 0.9801148176193237, + "step": 402 + }, + { + "epoch": 0.09285714285714286, + "grad_norm": 0.48045286204627596, + "learning_rate": 1.8525345622119815e-06, + "loss": 1.181383728981018, + "step": 403 + }, + { + "epoch": 0.09308755760368663, + "grad_norm": 0.41845043157279344, + "learning_rate": 1.857142857142857e-06, + "loss": 0.9493411779403687, + "step": 404 + }, + { + "epoch": 0.09331797235023041, + "grad_norm": 0.4897744794150158, + "learning_rate": 1.8617511520737325e-06, + "loss": 1.1096491813659668, + "step": 405 + }, + { + "epoch": 0.0935483870967742, + "grad_norm": 0.4480175053230346, + "learning_rate": 1.8663594470046083e-06, + "loss": 1.1019275188446045, + "step": 406 + }, + { + "epoch": 0.09377880184331797, + "grad_norm": 0.3732577959232657, + "learning_rate": 1.8709677419354837e-06, + "loss": 0.973988950252533, + "step": 407 + }, + { + "epoch": 0.09400921658986175, + "grad_norm": 0.4400203989690802, + "learning_rate": 1.8755760368663593e-06, + "loss": 1.1670622825622559, + "step": 408 + }, + { + "epoch": 0.09423963133640553, + "grad_norm": 0.3329146322312322, + "learning_rate": 1.880184331797235e-06, + "loss": 0.8550488948822021, + "step": 409 + }, + { + "epoch": 0.0944700460829493, + "grad_norm": 0.4080056832475701, + "learning_rate": 1.8847926267281104e-06, + "loss": 1.0501651763916016, + "step": 410 + }, + { + "epoch": 0.09470046082949309, + "grad_norm": 0.4667020783139675, + "learning_rate": 1.889400921658986e-06, + "loss": 1.1323202848434448, + "step": 411 + }, + { + "epoch": 0.09493087557603687, + "grad_norm": 0.4438011539128225, + "learning_rate": 1.8940092165898616e-06, + "loss": 1.168154001235962, + "step": 412 + }, + { + "epoch": 0.09516129032258064, + "grad_norm": 0.5043395094497101, + "learning_rate": 1.8986175115207372e-06, + "loss": 1.0667431354522705, + "step": 413 + }, + { + "epoch": 0.09539170506912442, + "grad_norm": 0.42921175733784445, + "learning_rate": 1.9032258064516128e-06, + "loss": 1.1447162628173828, + "step": 414 + }, + { + "epoch": 0.0956221198156682, + "grad_norm": 0.42501454608228506, + "learning_rate": 1.9078341013824884e-06, + "loss": 0.9403433799743652, + "step": 415 + }, + { + "epoch": 0.09585253456221199, + "grad_norm": 0.4016688989337606, + "learning_rate": 1.912442396313364e-06, + "loss": 0.9837527275085449, + "step": 416 + }, + { + "epoch": 0.09608294930875576, + "grad_norm": 0.422068085350648, + "learning_rate": 1.9170506912442396e-06, + "loss": 1.071333408355713, + "step": 417 + }, + { + "epoch": 0.09631336405529954, + "grad_norm": 0.5124388054628781, + "learning_rate": 1.921658986175115e-06, + "loss": 1.0156168937683105, + "step": 418 + }, + { + "epoch": 0.09654377880184332, + "grad_norm": 0.4338501331744671, + "learning_rate": 1.926267281105991e-06, + "loss": 0.9705266952514648, + "step": 419 + }, + { + "epoch": 0.0967741935483871, + "grad_norm": 0.407144156286867, + "learning_rate": 1.930875576036866e-06, + "loss": 1.0570204257965088, + "step": 420 + }, + { + "epoch": 0.09700460829493088, + "grad_norm": 0.43729360857600713, + "learning_rate": 1.935483870967742e-06, + "loss": 1.141861915588379, + "step": 421 + }, + { + "epoch": 0.09723502304147466, + "grad_norm": 0.4507835554387818, + "learning_rate": 1.9400921658986174e-06, + "loss": 0.9849745631217957, + "step": 422 + }, + { + "epoch": 0.09746543778801843, + "grad_norm": 0.4932195036683519, + "learning_rate": 1.944700460829493e-06, + "loss": 1.0279912948608398, + "step": 423 + }, + { + "epoch": 0.09769585253456221, + "grad_norm": 0.4014365475110759, + "learning_rate": 1.9493087557603686e-06, + "loss": 1.0707788467407227, + "step": 424 + }, + { + "epoch": 0.097926267281106, + "grad_norm": 0.37856248369077095, + "learning_rate": 1.953917050691244e-06, + "loss": 0.9391129016876221, + "step": 425 + }, + { + "epoch": 0.09815668202764977, + "grad_norm": 0.3604046417791118, + "learning_rate": 1.9585253456221198e-06, + "loss": 0.9792884588241577, + "step": 426 + }, + { + "epoch": 0.09838709677419355, + "grad_norm": 0.42091691400517506, + "learning_rate": 1.963133640552995e-06, + "loss": 1.0111792087554932, + "step": 427 + }, + { + "epoch": 0.09861751152073733, + "grad_norm": 0.2951881364083913, + "learning_rate": 1.967741935483871e-06, + "loss": 1.0020272731781006, + "step": 428 + }, + { + "epoch": 0.0988479262672811, + "grad_norm": 0.42473763380817414, + "learning_rate": 1.9723502304147463e-06, + "loss": 1.1002991199493408, + "step": 429 + }, + { + "epoch": 0.09907834101382489, + "grad_norm": 0.3977328364337887, + "learning_rate": 1.976958525345622e-06, + "loss": 0.9656131267547607, + "step": 430 + }, + { + "epoch": 0.09930875576036867, + "grad_norm": 0.4163794190517341, + "learning_rate": 1.9815668202764975e-06, + "loss": 1.1845166683197021, + "step": 431 + }, + { + "epoch": 0.09953917050691244, + "grad_norm": 0.4102761511182145, + "learning_rate": 1.9861751152073733e-06, + "loss": 0.8743879795074463, + "step": 432 + }, + { + "epoch": 0.09976958525345622, + "grad_norm": 0.48299006340600875, + "learning_rate": 1.9907834101382487e-06, + "loss": 1.0800082683563232, + "step": 433 + }, + { + "epoch": 0.1, + "grad_norm": 0.39412754669182365, + "learning_rate": 1.995391705069124e-06, + "loss": 1.0410808324813843, + "step": 434 + }, + { + "epoch": 0.10023041474654378, + "grad_norm": 0.4817128357084655, + "learning_rate": 2e-06, + "loss": 1.0214624404907227, + "step": 435 + }, + { + "epoch": 0.10046082949308756, + "grad_norm": 0.4738161753055533, + "learning_rate": 1.9999999274256618e-06, + "loss": 1.0304028987884521, + "step": 436 + }, + { + "epoch": 0.10069124423963134, + "grad_norm": 0.3946923205513698, + "learning_rate": 1.9999997097026583e-06, + "loss": 1.0457626581192017, + "step": 437 + }, + { + "epoch": 0.10092165898617511, + "grad_norm": 0.43567215904100204, + "learning_rate": 1.9999993468310205e-06, + "loss": 0.9837691187858582, + "step": 438 + }, + { + "epoch": 0.1011520737327189, + "grad_norm": 0.5216317957588074, + "learning_rate": 1.9999988388108013e-06, + "loss": 1.0819612741470337, + "step": 439 + }, + { + "epoch": 0.10138248847926268, + "grad_norm": 0.31182314858852395, + "learning_rate": 1.9999981856420743e-06, + "loss": 1.0417449474334717, + "step": 440 + }, + { + "epoch": 0.10161290322580645, + "grad_norm": 0.5477105048499294, + "learning_rate": 1.999997387324935e-06, + "loss": 1.0501068830490112, + "step": 441 + }, + { + "epoch": 0.10184331797235023, + "grad_norm": 0.4106183150059033, + "learning_rate": 1.999996443859498e-06, + "loss": 1.0635120868682861, + "step": 442 + }, + { + "epoch": 0.10207373271889401, + "grad_norm": 0.4873224989082174, + "learning_rate": 1.999995355245902e-06, + "loss": 0.9732234477996826, + "step": 443 + }, + { + "epoch": 0.10230414746543778, + "grad_norm": 0.3718846857755592, + "learning_rate": 1.9999941214843034e-06, + "loss": 0.9493811130523682, + "step": 444 + }, + { + "epoch": 0.10253456221198157, + "grad_norm": 0.5595191439491263, + "learning_rate": 1.9999927425748817e-06, + "loss": 1.1455141305923462, + "step": 445 + }, + { + "epoch": 0.10276497695852535, + "grad_norm": 0.4237177518607636, + "learning_rate": 1.9999912185178374e-06, + "loss": 0.9341592788696289, + "step": 446 + }, + { + "epoch": 0.10299539170506912, + "grad_norm": 0.3913224265375377, + "learning_rate": 1.9999895493133916e-06, + "loss": 0.9535608291625977, + "step": 447 + }, + { + "epoch": 0.1032258064516129, + "grad_norm": 0.4687207319213409, + "learning_rate": 1.999987734961787e-06, + "loss": 1.1977221965789795, + "step": 448 + }, + { + "epoch": 0.10345622119815669, + "grad_norm": 0.45995634872516833, + "learning_rate": 1.999985775463286e-06, + "loss": 1.1658375263214111, + "step": 449 + }, + { + "epoch": 0.10368663594470046, + "grad_norm": 0.47830181543951694, + "learning_rate": 1.9999836708181734e-06, + "loss": 1.1171612739562988, + "step": 450 + }, + { + "epoch": 0.10391705069124424, + "grad_norm": 0.3823354001067843, + "learning_rate": 1.999981421026755e-06, + "loss": 1.0864373445510864, + "step": 451 + }, + { + "epoch": 0.10414746543778802, + "grad_norm": 0.43518989690984766, + "learning_rate": 1.999979026089357e-06, + "loss": 1.1211299896240234, + "step": 452 + }, + { + "epoch": 0.10437788018433179, + "grad_norm": 0.45163820634554874, + "learning_rate": 1.9999764860063277e-06, + "loss": 1.071751594543457, + "step": 453 + }, + { + "epoch": 0.10460829493087558, + "grad_norm": 0.3749468590501543, + "learning_rate": 1.9999738007780347e-06, + "loss": 1.0377576351165771, + "step": 454 + }, + { + "epoch": 0.10483870967741936, + "grad_norm": 0.42625340690366553, + "learning_rate": 1.9999709704048685e-06, + "loss": 0.9658410549163818, + "step": 455 + }, + { + "epoch": 0.10506912442396313, + "grad_norm": 0.4022888050751363, + "learning_rate": 1.9999679948872395e-06, + "loss": 0.9070194959640503, + "step": 456 + }, + { + "epoch": 0.10529953917050691, + "grad_norm": 0.5570523464378584, + "learning_rate": 1.9999648742255803e-06, + "loss": 1.2197664976119995, + "step": 457 + }, + { + "epoch": 0.1055299539170507, + "grad_norm": 0.3961372853294897, + "learning_rate": 1.9999616084203426e-06, + "loss": 0.9032889604568481, + "step": 458 + }, + { + "epoch": 0.10576036866359446, + "grad_norm": 0.39060467678942784, + "learning_rate": 1.9999581974720017e-06, + "loss": 0.9458762407302856, + "step": 459 + }, + { + "epoch": 0.10599078341013825, + "grad_norm": 0.5068153216782157, + "learning_rate": 1.9999546413810526e-06, + "loss": 1.0024757385253906, + "step": 460 + }, + { + "epoch": 0.10622119815668203, + "grad_norm": 0.38148764403186025, + "learning_rate": 1.9999509401480108e-06, + "loss": 0.9499050378799438, + "step": 461 + }, + { + "epoch": 0.1064516129032258, + "grad_norm": 0.4354491299812492, + "learning_rate": 1.9999470937734132e-06, + "loss": 1.0764188766479492, + "step": 462 + }, + { + "epoch": 0.10668202764976958, + "grad_norm": 0.42800401210878014, + "learning_rate": 1.9999431022578194e-06, + "loss": 0.9858300089836121, + "step": 463 + }, + { + "epoch": 0.10691244239631337, + "grad_norm": 0.41132718920336847, + "learning_rate": 1.999938965601808e-06, + "loss": 0.8965580463409424, + "step": 464 + }, + { + "epoch": 0.10714285714285714, + "grad_norm": 0.39699129711694964, + "learning_rate": 1.9999346838059788e-06, + "loss": 0.8860410451889038, + "step": 465 + }, + { + "epoch": 0.10737327188940092, + "grad_norm": 0.48300723462768347, + "learning_rate": 1.9999302568709546e-06, + "loss": 1.0621274709701538, + "step": 466 + }, + { + "epoch": 0.1076036866359447, + "grad_norm": 0.45149909069714367, + "learning_rate": 1.9999256847973774e-06, + "loss": 0.8894643783569336, + "step": 467 + }, + { + "epoch": 0.10783410138248847, + "grad_norm": 0.3529913357119793, + "learning_rate": 1.999920967585911e-06, + "loss": 0.98856520652771, + "step": 468 + }, + { + "epoch": 0.10806451612903226, + "grad_norm": 0.3260735960256147, + "learning_rate": 1.999916105237239e-06, + "loss": 0.7885239124298096, + "step": 469 + }, + { + "epoch": 0.10829493087557604, + "grad_norm": 0.4477697599226733, + "learning_rate": 1.9999110977520687e-06, + "loss": 1.0274477005004883, + "step": 470 + }, + { + "epoch": 0.10852534562211981, + "grad_norm": 0.3938409891368368, + "learning_rate": 1.999905945131126e-06, + "loss": 0.8672109842300415, + "step": 471 + }, + { + "epoch": 0.10875576036866359, + "grad_norm": 0.37173415889586336, + "learning_rate": 1.9999006473751594e-06, + "loss": 0.852576732635498, + "step": 472 + }, + { + "epoch": 0.10898617511520738, + "grad_norm": 0.3670138423827908, + "learning_rate": 1.9998952044849375e-06, + "loss": 0.9553557634353638, + "step": 473 + }, + { + "epoch": 0.10921658986175115, + "grad_norm": 0.4402707979796638, + "learning_rate": 1.99988961646125e-06, + "loss": 1.1375620365142822, + "step": 474 + }, + { + "epoch": 0.10944700460829493, + "grad_norm": 0.4045716386517098, + "learning_rate": 1.9998838833049083e-06, + "loss": 0.9653681516647339, + "step": 475 + }, + { + "epoch": 0.10967741935483871, + "grad_norm": 0.3653559897200667, + "learning_rate": 1.999878005016745e-06, + "loss": 1.1139185428619385, + "step": 476 + }, + { + "epoch": 0.10990783410138248, + "grad_norm": 0.37459420946595523, + "learning_rate": 1.9998719815976127e-06, + "loss": 0.8375418186187744, + "step": 477 + }, + { + "epoch": 0.11013824884792627, + "grad_norm": 0.33053822521695836, + "learning_rate": 1.999865813048386e-06, + "loss": 1.0005979537963867, + "step": 478 + }, + { + "epoch": 0.11036866359447005, + "grad_norm": 0.39083306344420843, + "learning_rate": 1.99985949936996e-06, + "loss": 0.8499772548675537, + "step": 479 + }, + { + "epoch": 0.11059907834101383, + "grad_norm": 0.3575835338316839, + "learning_rate": 1.999853040563252e-06, + "loss": 0.9805284738540649, + "step": 480 + }, + { + "epoch": 0.1108294930875576, + "grad_norm": 0.43340835059987204, + "learning_rate": 1.9998464366291983e-06, + "loss": 0.9462177753448486, + "step": 481 + }, + { + "epoch": 0.11105990783410138, + "grad_norm": 0.44706726559657484, + "learning_rate": 1.999839687568758e-06, + "loss": 1.1023187637329102, + "step": 482 + }, + { + "epoch": 0.11129032258064517, + "grad_norm": 0.3754824087757579, + "learning_rate": 1.9998327933829103e-06, + "loss": 0.9361279010772705, + "step": 483 + }, + { + "epoch": 0.11152073732718894, + "grad_norm": 0.38419186899738067, + "learning_rate": 1.9998257540726567e-06, + "loss": 0.9811379909515381, + "step": 484 + }, + { + "epoch": 0.11175115207373272, + "grad_norm": 0.4030421476721474, + "learning_rate": 1.9998185696390184e-06, + "loss": 1.0246069431304932, + "step": 485 + }, + { + "epoch": 0.1119815668202765, + "grad_norm": 0.4555360249805513, + "learning_rate": 1.9998112400830385e-06, + "loss": 1.0614899396896362, + "step": 486 + }, + { + "epoch": 0.11221198156682027, + "grad_norm": 0.4347652169333907, + "learning_rate": 1.9998037654057803e-06, + "loss": 1.02305269241333, + "step": 487 + }, + { + "epoch": 0.11244239631336406, + "grad_norm": 0.43672158413630835, + "learning_rate": 1.999796145608329e-06, + "loss": 1.044907808303833, + "step": 488 + }, + { + "epoch": 0.11267281105990784, + "grad_norm": 0.4917956866782855, + "learning_rate": 1.999788380691791e-06, + "loss": 0.9669852256774902, + "step": 489 + }, + { + "epoch": 0.11290322580645161, + "grad_norm": 0.3857920087478492, + "learning_rate": 1.9997804706572933e-06, + "loss": 1.0235236883163452, + "step": 490 + }, + { + "epoch": 0.1131336405529954, + "grad_norm": 0.4541175977583441, + "learning_rate": 1.9997724155059835e-06, + "loss": 0.8982692360877991, + "step": 491 + }, + { + "epoch": 0.11336405529953918, + "grad_norm": 0.481910238333043, + "learning_rate": 1.9997642152390312e-06, + "loss": 0.8390282988548279, + "step": 492 + }, + { + "epoch": 0.11359447004608295, + "grad_norm": 0.39882686276748835, + "learning_rate": 1.9997558698576266e-06, + "loss": 0.8938695192337036, + "step": 493 + }, + { + "epoch": 0.11382488479262673, + "grad_norm": 0.5064684870077569, + "learning_rate": 1.9997473793629813e-06, + "loss": 0.9747422933578491, + "step": 494 + }, + { + "epoch": 0.11405529953917051, + "grad_norm": 0.443509358045386, + "learning_rate": 1.999738743756327e-06, + "loss": 1.050918698310852, + "step": 495 + }, + { + "epoch": 0.11428571428571428, + "grad_norm": 0.5368423996158629, + "learning_rate": 1.9997299630389174e-06, + "loss": 0.9169312715530396, + "step": 496 + }, + { + "epoch": 0.11451612903225807, + "grad_norm": 0.452695866401899, + "learning_rate": 1.9997210372120272e-06, + "loss": 1.0258065462112427, + "step": 497 + }, + { + "epoch": 0.11474654377880185, + "grad_norm": 0.3831239007423439, + "learning_rate": 1.9997119662769523e-06, + "loss": 1.066356897354126, + "step": 498 + }, + { + "epoch": 0.11497695852534562, + "grad_norm": 0.4319474855040805, + "learning_rate": 1.9997027502350086e-06, + "loss": 1.0336101055145264, + "step": 499 + }, + { + "epoch": 0.1152073732718894, + "grad_norm": 0.36856882435983085, + "learning_rate": 1.9996933890875342e-06, + "loss": 1.0434989929199219, + "step": 500 + }, + { + "epoch": 0.11543778801843319, + "grad_norm": 0.4366750071509639, + "learning_rate": 1.9996838828358876e-06, + "loss": 1.0081424713134766, + "step": 501 + }, + { + "epoch": 0.11566820276497695, + "grad_norm": 0.4424253641379215, + "learning_rate": 1.999674231481449e-06, + "loss": 1.0998575687408447, + "step": 502 + }, + { + "epoch": 0.11589861751152074, + "grad_norm": 0.43915567985422416, + "learning_rate": 1.9996644350256193e-06, + "loss": 1.0325868129730225, + "step": 503 + }, + { + "epoch": 0.11612903225806452, + "grad_norm": 0.39758687932867864, + "learning_rate": 1.99965449346982e-06, + "loss": 1.0520741939544678, + "step": 504 + }, + { + "epoch": 0.11635944700460829, + "grad_norm": 0.4373332869451062, + "learning_rate": 1.9996444068154943e-06, + "loss": 0.9355484247207642, + "step": 505 + }, + { + "epoch": 0.11658986175115207, + "grad_norm": 0.478944942365821, + "learning_rate": 1.9996341750641067e-06, + "loss": 1.2088062763214111, + "step": 506 + }, + { + "epoch": 0.11682027649769586, + "grad_norm": 0.45703939880277317, + "learning_rate": 1.9996237982171416e-06, + "loss": 1.007477045059204, + "step": 507 + }, + { + "epoch": 0.11705069124423963, + "grad_norm": 0.516029780444843, + "learning_rate": 1.9996132762761054e-06, + "loss": 0.9528911113739014, + "step": 508 + }, + { + "epoch": 0.11728110599078341, + "grad_norm": 0.44144049831872473, + "learning_rate": 1.9996026092425258e-06, + "loss": 1.0906065702438354, + "step": 509 + }, + { + "epoch": 0.1175115207373272, + "grad_norm": 0.45635386377861326, + "learning_rate": 1.9995917971179507e-06, + "loss": 1.1328812837600708, + "step": 510 + }, + { + "epoch": 0.11774193548387096, + "grad_norm": 0.5010986511700435, + "learning_rate": 1.9995808399039493e-06, + "loss": 1.1367099285125732, + "step": 511 + }, + { + "epoch": 0.11797235023041475, + "grad_norm": 0.5738525299064665, + "learning_rate": 1.999569737602112e-06, + "loss": 1.22605562210083, + "step": 512 + }, + { + "epoch": 0.11820276497695853, + "grad_norm": 0.40700112362856533, + "learning_rate": 1.9995584902140514e-06, + "loss": 0.8814148306846619, + "step": 513 + }, + { + "epoch": 0.1184331797235023, + "grad_norm": 0.4018062947026822, + "learning_rate": 1.9995470977413988e-06, + "loss": 0.916766881942749, + "step": 514 + }, + { + "epoch": 0.11866359447004608, + "grad_norm": 0.3907370494982875, + "learning_rate": 1.999535560185808e-06, + "loss": 0.8088599443435669, + "step": 515 + }, + { + "epoch": 0.11889400921658987, + "grad_norm": 0.5585215819507526, + "learning_rate": 1.9995238775489538e-06, + "loss": 1.0029397010803223, + "step": 516 + }, + { + "epoch": 0.11912442396313364, + "grad_norm": 0.47103060321263474, + "learning_rate": 1.9995120498325322e-06, + "loss": 1.157515287399292, + "step": 517 + }, + { + "epoch": 0.11935483870967742, + "grad_norm": 0.43934234876750516, + "learning_rate": 1.99950007703826e-06, + "loss": 0.989453911781311, + "step": 518 + }, + { + "epoch": 0.1195852534562212, + "grad_norm": 0.501533126043576, + "learning_rate": 1.999487959167874e-06, + "loss": 0.9791898727416992, + "step": 519 + }, + { + "epoch": 0.11981566820276497, + "grad_norm": 0.3947583681206324, + "learning_rate": 1.9994756962231343e-06, + "loss": 0.9994203448295593, + "step": 520 + }, + { + "epoch": 0.12004608294930876, + "grad_norm": 0.4064680989752179, + "learning_rate": 1.999463288205821e-06, + "loss": 0.9096299409866333, + "step": 521 + }, + { + "epoch": 0.12027649769585254, + "grad_norm": 0.5675118509929592, + "learning_rate": 1.999450735117734e-06, + "loss": 0.9956046342849731, + "step": 522 + }, + { + "epoch": 0.12050691244239631, + "grad_norm": 0.40854646192247485, + "learning_rate": 1.9994380369606956e-06, + "loss": 1.0336079597473145, + "step": 523 + }, + { + "epoch": 0.12073732718894009, + "grad_norm": 0.4028964743045085, + "learning_rate": 1.99942519373655e-06, + "loss": 0.8828116655349731, + "step": 524 + }, + { + "epoch": 0.12096774193548387, + "grad_norm": 0.4113573248244064, + "learning_rate": 1.9994122054471597e-06, + "loss": 0.8733093738555908, + "step": 525 + }, + { + "epoch": 0.12119815668202764, + "grad_norm": 0.4633889976755098, + "learning_rate": 1.9993990720944114e-06, + "loss": 1.0312494039535522, + "step": 526 + }, + { + "epoch": 0.12142857142857143, + "grad_norm": 0.39342421435973574, + "learning_rate": 1.9993857936802105e-06, + "loss": 0.9229701161384583, + "step": 527 + }, + { + "epoch": 0.12165898617511521, + "grad_norm": 0.4629141668744642, + "learning_rate": 1.9993723702064853e-06, + "loss": 0.8980100154876709, + "step": 528 + }, + { + "epoch": 0.12188940092165898, + "grad_norm": 0.42208035145091816, + "learning_rate": 1.999358801675183e-06, + "loss": 0.939933180809021, + "step": 529 + }, + { + "epoch": 0.12211981566820276, + "grad_norm": 0.3966309171286601, + "learning_rate": 1.9993450880882733e-06, + "loss": 1.0014444589614868, + "step": 530 + }, + { + "epoch": 0.12235023041474655, + "grad_norm": 0.4166874579150977, + "learning_rate": 1.9993312294477477e-06, + "loss": 0.9995889663696289, + "step": 531 + }, + { + "epoch": 0.12258064516129032, + "grad_norm": 0.37598019229960666, + "learning_rate": 1.9993172257556167e-06, + "loss": 1.0010197162628174, + "step": 532 + }, + { + "epoch": 0.1228110599078341, + "grad_norm": 0.3629842057209114, + "learning_rate": 1.9993030770139135e-06, + "loss": 0.972966194152832, + "step": 533 + }, + { + "epoch": 0.12304147465437788, + "grad_norm": 0.4160633061352588, + "learning_rate": 1.9992887832246917e-06, + "loss": 0.8033444881439209, + "step": 534 + }, + { + "epoch": 0.12327188940092165, + "grad_norm": 0.3895553967201257, + "learning_rate": 1.9992743443900254e-06, + "loss": 0.7532742619514465, + "step": 535 + }, + { + "epoch": 0.12350230414746544, + "grad_norm": 0.46964696388446997, + "learning_rate": 1.9992597605120113e-06, + "loss": 1.058760643005371, + "step": 536 + }, + { + "epoch": 0.12373271889400922, + "grad_norm": 0.37591416731208094, + "learning_rate": 1.9992450315927658e-06, + "loss": 0.8559634685516357, + "step": 537 + }, + { + "epoch": 0.12396313364055299, + "grad_norm": 0.4216079229956694, + "learning_rate": 1.9992301576344267e-06, + "loss": 1.053638219833374, + "step": 538 + }, + { + "epoch": 0.12419354838709677, + "grad_norm": 0.5423293655738015, + "learning_rate": 1.9992151386391528e-06, + "loss": 0.8841970562934875, + "step": 539 + }, + { + "epoch": 0.12442396313364056, + "grad_norm": 0.5667972752402203, + "learning_rate": 1.9991999746091247e-06, + "loss": 0.9355173110961914, + "step": 540 + }, + { + "epoch": 0.12465437788018432, + "grad_norm": 0.43323548094659586, + "learning_rate": 1.999184665546543e-06, + "loss": 0.9978284239768982, + "step": 541 + }, + { + "epoch": 0.12488479262672811, + "grad_norm": 0.4166718713190779, + "learning_rate": 1.99916921145363e-06, + "loss": 0.8855264782905579, + "step": 542 + }, + { + "epoch": 0.1251152073732719, + "grad_norm": 0.5314416958418489, + "learning_rate": 1.9991536123326283e-06, + "loss": 0.885519802570343, + "step": 543 + }, + { + "epoch": 0.12534562211981568, + "grad_norm": 0.4381118612604031, + "learning_rate": 1.9991378681858024e-06, + "loss": 0.9772528409957886, + "step": 544 + }, + { + "epoch": 0.12557603686635946, + "grad_norm": 0.46876887659201405, + "learning_rate": 1.999121979015438e-06, + "loss": 0.8817745447158813, + "step": 545 + }, + { + "epoch": 0.12580645161290321, + "grad_norm": 0.36530562318650095, + "learning_rate": 1.9991059448238404e-06, + "loss": 0.9374080896377563, + "step": 546 + }, + { + "epoch": 0.126036866359447, + "grad_norm": 0.3669313811039727, + "learning_rate": 1.9990897656133383e-06, + "loss": 0.9174116253852844, + "step": 547 + }, + { + "epoch": 0.12626728110599078, + "grad_norm": 0.401361126928626, + "learning_rate": 1.999073441386279e-06, + "loss": 0.9514039158821106, + "step": 548 + }, + { + "epoch": 0.12649769585253456, + "grad_norm": 0.4665811721686224, + "learning_rate": 1.999056972145032e-06, + "loss": 1.10535728931427, + "step": 549 + }, + { + "epoch": 0.12672811059907835, + "grad_norm": 0.4609610092344924, + "learning_rate": 1.999040357891989e-06, + "loss": 1.0641597509384155, + "step": 550 + }, + { + "epoch": 0.12695852534562213, + "grad_norm": 0.39409304359090785, + "learning_rate": 1.99902359862956e-06, + "loss": 0.9596017599105835, + "step": 551 + }, + { + "epoch": 0.1271889400921659, + "grad_norm": 0.4899166130843387, + "learning_rate": 1.9990066943601777e-06, + "loss": 1.083927869796753, + "step": 552 + }, + { + "epoch": 0.12741935483870967, + "grad_norm": 0.42007806110658624, + "learning_rate": 1.998989645086297e-06, + "loss": 0.9146738052368164, + "step": 553 + }, + { + "epoch": 0.12764976958525345, + "grad_norm": 0.41224202627344914, + "learning_rate": 1.998972450810391e-06, + "loss": 0.9038050770759583, + "step": 554 + }, + { + "epoch": 0.12788018433179724, + "grad_norm": 0.45759233489952406, + "learning_rate": 1.9989551115349574e-06, + "loss": 0.973220705986023, + "step": 555 + }, + { + "epoch": 0.12811059907834102, + "grad_norm": 0.424280511041039, + "learning_rate": 1.998937627262511e-06, + "loss": 0.8804281949996948, + "step": 556 + }, + { + "epoch": 0.1283410138248848, + "grad_norm": 0.47603807991909786, + "learning_rate": 1.9989199979955903e-06, + "loss": 1.100919485092163, + "step": 557 + }, + { + "epoch": 0.12857142857142856, + "grad_norm": 0.5871199693144976, + "learning_rate": 1.998902223736755e-06, + "loss": 1.1152353286743164, + "step": 558 + }, + { + "epoch": 0.12880184331797234, + "grad_norm": 0.4236469989661471, + "learning_rate": 1.9988843044885837e-06, + "loss": 1.0721793174743652, + "step": 559 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 0.4234271408586371, + "learning_rate": 1.9988662402536783e-06, + "loss": 0.9035133123397827, + "step": 560 + }, + { + "epoch": 0.1292626728110599, + "grad_norm": 0.4210053632602843, + "learning_rate": 1.9988480310346603e-06, + "loss": 1.0053937435150146, + "step": 561 + }, + { + "epoch": 0.1294930875576037, + "grad_norm": 0.5230478085674195, + "learning_rate": 1.9988296768341728e-06, + "loss": 0.8536228537559509, + "step": 562 + }, + { + "epoch": 0.12972350230414748, + "grad_norm": 0.41493118398063783, + "learning_rate": 1.9988111776548797e-06, + "loss": 0.9673396348953247, + "step": 563 + }, + { + "epoch": 0.12995391705069123, + "grad_norm": 0.36295429679681995, + "learning_rate": 1.998792533499467e-06, + "loss": 0.9402456879615784, + "step": 564 + }, + { + "epoch": 0.13018433179723501, + "grad_norm": 0.3983153697524455, + "learning_rate": 1.99877374437064e-06, + "loss": 0.8900678157806396, + "step": 565 + }, + { + "epoch": 0.1304147465437788, + "grad_norm": 0.47587146443270817, + "learning_rate": 1.9987548102711264e-06, + "loss": 0.9112892150878906, + "step": 566 + }, + { + "epoch": 0.13064516129032258, + "grad_norm": 0.3969666466780631, + "learning_rate": 1.9987357312036743e-06, + "loss": 0.763452410697937, + "step": 567 + }, + { + "epoch": 0.13087557603686636, + "grad_norm": 0.44573355289133143, + "learning_rate": 1.9987165071710527e-06, + "loss": 1.0410873889923096, + "step": 568 + }, + { + "epoch": 0.13110599078341015, + "grad_norm": 0.389372329442145, + "learning_rate": 1.9986971381760524e-06, + "loss": 1.029583215713501, + "step": 569 + }, + { + "epoch": 0.1313364055299539, + "grad_norm": 0.46351745512727555, + "learning_rate": 1.9986776242214845e-06, + "loss": 0.994928777217865, + "step": 570 + }, + { + "epoch": 0.1315668202764977, + "grad_norm": 0.49139035828687805, + "learning_rate": 1.9986579653101817e-06, + "loss": 1.001985788345337, + "step": 571 + }, + { + "epoch": 0.13179723502304147, + "grad_norm": 0.45831221525956994, + "learning_rate": 1.998638161444997e-06, + "loss": 0.9813050031661987, + "step": 572 + }, + { + "epoch": 0.13202764976958525, + "grad_norm": 0.45157644768988, + "learning_rate": 1.9986182126288053e-06, + "loss": 0.8678451180458069, + "step": 573 + }, + { + "epoch": 0.13225806451612904, + "grad_norm": 0.42446769343835317, + "learning_rate": 1.998598118864502e-06, + "loss": 1.0393729209899902, + "step": 574 + }, + { + "epoch": 0.13248847926267282, + "grad_norm": 0.39102315770569207, + "learning_rate": 1.998577880155004e-06, + "loss": 0.9631935358047485, + "step": 575 + }, + { + "epoch": 0.1327188940092166, + "grad_norm": 0.37655183711017187, + "learning_rate": 1.9985574965032483e-06, + "loss": 0.8480437994003296, + "step": 576 + }, + { + "epoch": 0.13294930875576036, + "grad_norm": 0.432103661547375, + "learning_rate": 1.998536967912194e-06, + "loss": 1.0450071096420288, + "step": 577 + }, + { + "epoch": 0.13317972350230414, + "grad_norm": 0.5144084645376303, + "learning_rate": 1.9985162943848207e-06, + "loss": 0.9374763369560242, + "step": 578 + }, + { + "epoch": 0.13341013824884793, + "grad_norm": 0.45444537143479036, + "learning_rate": 1.9984954759241293e-06, + "loss": 0.9405182600021362, + "step": 579 + }, + { + "epoch": 0.1336405529953917, + "grad_norm": 0.42824704739155545, + "learning_rate": 1.998474512533141e-06, + "loss": 0.8406375646591187, + "step": 580 + }, + { + "epoch": 0.1338709677419355, + "grad_norm": 0.48847321743709643, + "learning_rate": 1.9984534042148994e-06, + "loss": 0.9323312044143677, + "step": 581 + }, + { + "epoch": 0.13410138248847928, + "grad_norm": 0.43641992007971325, + "learning_rate": 1.998432150972468e-06, + "loss": 1.0704214572906494, + "step": 582 + }, + { + "epoch": 0.13433179723502303, + "grad_norm": 0.38681502715760335, + "learning_rate": 1.9984107528089315e-06, + "loss": 0.8636025190353394, + "step": 583 + }, + { + "epoch": 0.13456221198156681, + "grad_norm": 0.4361205380771583, + "learning_rate": 1.998389209727396e-06, + "loss": 0.9616303443908691, + "step": 584 + }, + { + "epoch": 0.1347926267281106, + "grad_norm": 0.4406937724242653, + "learning_rate": 1.998367521730988e-06, + "loss": 1.0457193851470947, + "step": 585 + }, + { + "epoch": 0.13502304147465438, + "grad_norm": 0.4060450620979949, + "learning_rate": 1.9983456888228566e-06, + "loss": 1.0460572242736816, + "step": 586 + }, + { + "epoch": 0.13525345622119817, + "grad_norm": 0.3771944294411782, + "learning_rate": 1.9983237110061695e-06, + "loss": 0.9693883657455444, + "step": 587 + }, + { + "epoch": 0.13548387096774195, + "grad_norm": 0.4355709352067949, + "learning_rate": 1.9983015882841175e-06, + "loss": 0.8823472857475281, + "step": 588 + }, + { + "epoch": 0.1357142857142857, + "grad_norm": 0.5606637533068962, + "learning_rate": 1.998279320659912e-06, + "loss": 1.1602983474731445, + "step": 589 + }, + { + "epoch": 0.1359447004608295, + "grad_norm": 0.40130101265364443, + "learning_rate": 1.9982569081367843e-06, + "loss": 0.8191353678703308, + "step": 590 + }, + { + "epoch": 0.13617511520737327, + "grad_norm": 0.40863931644700857, + "learning_rate": 1.9982343507179876e-06, + "loss": 1.141557216644287, + "step": 591 + }, + { + "epoch": 0.13640552995391705, + "grad_norm": 0.4712969186607289, + "learning_rate": 1.998211648406797e-06, + "loss": 0.9688570499420166, + "step": 592 + }, + { + "epoch": 0.13663594470046084, + "grad_norm": 0.4543844570436241, + "learning_rate": 1.9981888012065068e-06, + "loss": 1.0218561887741089, + "step": 593 + }, + { + "epoch": 0.13686635944700462, + "grad_norm": 0.5219271265911207, + "learning_rate": 1.9981658091204334e-06, + "loss": 0.9531952142715454, + "step": 594 + }, + { + "epoch": 0.13709677419354838, + "grad_norm": 0.5314208269690397, + "learning_rate": 1.9981426721519143e-06, + "loss": 1.1421492099761963, + "step": 595 + }, + { + "epoch": 0.13732718894009216, + "grad_norm": 0.3970137466851754, + "learning_rate": 1.9981193903043074e-06, + "loss": 0.8173041343688965, + "step": 596 + }, + { + "epoch": 0.13755760368663594, + "grad_norm": 0.43200053855682263, + "learning_rate": 1.998095963580993e-06, + "loss": 0.8842465877532959, + "step": 597 + }, + { + "epoch": 0.13778801843317973, + "grad_norm": 0.6492506358781442, + "learning_rate": 1.9980723919853703e-06, + "loss": 0.8547788858413696, + "step": 598 + }, + { + "epoch": 0.1380184331797235, + "grad_norm": 0.5287255050220753, + "learning_rate": 1.998048675520861e-06, + "loss": 1.0085712671279907, + "step": 599 + }, + { + "epoch": 0.1382488479262673, + "grad_norm": 0.5226769291219134, + "learning_rate": 1.9980248141909083e-06, + "loss": 0.9276378750801086, + "step": 600 + }, + { + "epoch": 0.13847926267281105, + "grad_norm": 0.44292446989501455, + "learning_rate": 1.998000807998975e-06, + "loss": 0.9236693382263184, + "step": 601 + }, + { + "epoch": 0.13870967741935483, + "grad_norm": 0.43514287150953085, + "learning_rate": 1.9979766569485454e-06, + "loss": 1.0353924036026, + "step": 602 + }, + { + "epoch": 0.13894009216589862, + "grad_norm": 0.3831256791535214, + "learning_rate": 1.9979523610431246e-06, + "loss": 0.8456567525863647, + "step": 603 + }, + { + "epoch": 0.1391705069124424, + "grad_norm": 0.46736174894260846, + "learning_rate": 1.997927920286241e-06, + "loss": 0.997468888759613, + "step": 604 + }, + { + "epoch": 0.13940092165898618, + "grad_norm": 0.38558572890255066, + "learning_rate": 1.9979033346814397e-06, + "loss": 0.8962260484695435, + "step": 605 + }, + { + "epoch": 0.13963133640552997, + "grad_norm": 0.4829548009529998, + "learning_rate": 1.997878604232291e-06, + "loss": 0.8586266040802002, + "step": 606 + }, + { + "epoch": 0.13986175115207372, + "grad_norm": 0.4776734917637134, + "learning_rate": 1.9978537289423837e-06, + "loss": 0.9639670848846436, + "step": 607 + }, + { + "epoch": 0.1400921658986175, + "grad_norm": 0.4115822234384495, + "learning_rate": 1.9978287088153286e-06, + "loss": 1.005727767944336, + "step": 608 + }, + { + "epoch": 0.1403225806451613, + "grad_norm": 0.44858527541471366, + "learning_rate": 1.9978035438547575e-06, + "loss": 1.148871898651123, + "step": 609 + }, + { + "epoch": 0.14055299539170507, + "grad_norm": 0.4357664217922314, + "learning_rate": 1.9977782340643226e-06, + "loss": 1.0459539890289307, + "step": 610 + }, + { + "epoch": 0.14078341013824885, + "grad_norm": 0.43229915305128663, + "learning_rate": 1.9977527794476985e-06, + "loss": 0.92689448595047, + "step": 611 + }, + { + "epoch": 0.14101382488479264, + "grad_norm": 0.5514025110097415, + "learning_rate": 1.997727180008579e-06, + "loss": 0.9735790491104126, + "step": 612 + }, + { + "epoch": 0.1412442396313364, + "grad_norm": 0.5114055458545007, + "learning_rate": 1.99770143575068e-06, + "loss": 0.8882870674133301, + "step": 613 + }, + { + "epoch": 0.14147465437788018, + "grad_norm": 0.47604166837238787, + "learning_rate": 1.9976755466777386e-06, + "loss": 0.9229795932769775, + "step": 614 + }, + { + "epoch": 0.14170506912442396, + "grad_norm": 0.39391923738635765, + "learning_rate": 1.997649512793512e-06, + "loss": 0.9097769260406494, + "step": 615 + }, + { + "epoch": 0.14193548387096774, + "grad_norm": 0.429877903042447, + "learning_rate": 1.9976233341017798e-06, + "loss": 0.7751711010932922, + "step": 616 + }, + { + "epoch": 0.14216589861751153, + "grad_norm": 0.4585028421290768, + "learning_rate": 1.9975970106063414e-06, + "loss": 0.9071080684661865, + "step": 617 + }, + { + "epoch": 0.1423963133640553, + "grad_norm": 0.372835081071011, + "learning_rate": 1.997570542311017e-06, + "loss": 0.8444115519523621, + "step": 618 + }, + { + "epoch": 0.14262672811059907, + "grad_norm": 0.46125503087530084, + "learning_rate": 1.9975439292196496e-06, + "loss": 0.9159516096115112, + "step": 619 + }, + { + "epoch": 0.14285714285714285, + "grad_norm": 0.45879242474243875, + "learning_rate": 1.997517171336101e-06, + "loss": 0.9697242975234985, + "step": 620 + }, + { + "epoch": 0.14308755760368663, + "grad_norm": 0.4605305033840643, + "learning_rate": 1.9974902686642557e-06, + "loss": 0.9894170761108398, + "step": 621 + }, + { + "epoch": 0.14331797235023042, + "grad_norm": 0.48517122287493847, + "learning_rate": 1.9974632212080184e-06, + "loss": 1.0364127159118652, + "step": 622 + }, + { + "epoch": 0.1435483870967742, + "grad_norm": 0.39169164845291754, + "learning_rate": 1.997436028971315e-06, + "loss": 0.8980219960212708, + "step": 623 + }, + { + "epoch": 0.14377880184331798, + "grad_norm": 0.4857070397144096, + "learning_rate": 1.9974086919580925e-06, + "loss": 1.0293703079223633, + "step": 624 + }, + { + "epoch": 0.14400921658986174, + "grad_norm": 0.46693238253454916, + "learning_rate": 1.9973812101723186e-06, + "loss": 1.006148099899292, + "step": 625 + }, + { + "epoch": 0.14423963133640552, + "grad_norm": 0.5525790188158035, + "learning_rate": 1.9973535836179825e-06, + "loss": 0.9489799737930298, + "step": 626 + }, + { + "epoch": 0.1444700460829493, + "grad_norm": 0.3704152285915921, + "learning_rate": 1.997325812299094e-06, + "loss": 0.7601498961448669, + "step": 627 + }, + { + "epoch": 0.1447004608294931, + "grad_norm": 0.6225002321802279, + "learning_rate": 1.9972978962196843e-06, + "loss": 0.8345643281936646, + "step": 628 + }, + { + "epoch": 0.14493087557603687, + "grad_norm": 0.48694459235316484, + "learning_rate": 1.9972698353838053e-06, + "loss": 0.8705894947052002, + "step": 629 + }, + { + "epoch": 0.14516129032258066, + "grad_norm": 0.42033173985472694, + "learning_rate": 1.9972416297955294e-06, + "loss": 0.9515185356140137, + "step": 630 + }, + { + "epoch": 0.1453917050691244, + "grad_norm": 0.38157222553103914, + "learning_rate": 1.9972132794589514e-06, + "loss": 0.7616517543792725, + "step": 631 + }, + { + "epoch": 0.1456221198156682, + "grad_norm": 0.47593248323635307, + "learning_rate": 1.9971847843781862e-06, + "loss": 0.8870444297790527, + "step": 632 + }, + { + "epoch": 0.14585253456221198, + "grad_norm": 0.45987330163099194, + "learning_rate": 1.9971561445573696e-06, + "loss": 0.8709393739700317, + "step": 633 + }, + { + "epoch": 0.14608294930875576, + "grad_norm": 0.35616612587319196, + "learning_rate": 1.997127360000658e-06, + "loss": 0.865444540977478, + "step": 634 + }, + { + "epoch": 0.14631336405529954, + "grad_norm": 0.4431578416665891, + "learning_rate": 1.997098430712231e-06, + "loss": 0.9560728073120117, + "step": 635 + }, + { + "epoch": 0.14654377880184333, + "grad_norm": 0.4107966614124612, + "learning_rate": 1.9970693566962866e-06, + "loss": 0.7579058408737183, + "step": 636 + }, + { + "epoch": 0.14677419354838708, + "grad_norm": 0.4609569973718347, + "learning_rate": 1.997040137957045e-06, + "loss": 1.0709021091461182, + "step": 637 + }, + { + "epoch": 0.14700460829493087, + "grad_norm": 0.5029329480654331, + "learning_rate": 1.9970107744987474e-06, + "loss": 0.9911563396453857, + "step": 638 + }, + { + "epoch": 0.14723502304147465, + "grad_norm": 0.45338591583748106, + "learning_rate": 1.996981266325655e-06, + "loss": 0.9673472046852112, + "step": 639 + }, + { + "epoch": 0.14746543778801843, + "grad_norm": 0.3918341582647882, + "learning_rate": 1.9969516134420523e-06, + "loss": 0.7728441953659058, + "step": 640 + }, + { + "epoch": 0.14769585253456222, + "grad_norm": 0.532382418423259, + "learning_rate": 1.9969218158522426e-06, + "loss": 1.0198101997375488, + "step": 641 + }, + { + "epoch": 0.147926267281106, + "grad_norm": 0.45259693038053805, + "learning_rate": 1.996891873560551e-06, + "loss": 0.9710760116577148, + "step": 642 + }, + { + "epoch": 0.14815668202764978, + "grad_norm": 0.41281494255735757, + "learning_rate": 1.9968617865713237e-06, + "loss": 0.9956847429275513, + "step": 643 + }, + { + "epoch": 0.14838709677419354, + "grad_norm": 0.40081796016292187, + "learning_rate": 1.996831554888928e-06, + "loss": 1.0974771976470947, + "step": 644 + }, + { + "epoch": 0.14861751152073732, + "grad_norm": 0.5353172634899142, + "learning_rate": 1.9968011785177513e-06, + "loss": 0.914455771446228, + "step": 645 + }, + { + "epoch": 0.1488479262672811, + "grad_norm": 0.5511418094652546, + "learning_rate": 1.9967706574622033e-06, + "loss": 1.1308314800262451, + "step": 646 + }, + { + "epoch": 0.1490783410138249, + "grad_norm": 0.5114786055194052, + "learning_rate": 1.9967399917267142e-06, + "loss": 0.981814444065094, + "step": 647 + }, + { + "epoch": 0.14930875576036867, + "grad_norm": 0.431645238473459, + "learning_rate": 1.9967091813157345e-06, + "loss": 0.874076247215271, + "step": 648 + }, + { + "epoch": 0.14953917050691246, + "grad_norm": 0.39621973386547166, + "learning_rate": 1.9966782262337365e-06, + "loss": 0.8496171832084656, + "step": 649 + }, + { + "epoch": 0.1497695852534562, + "grad_norm": 0.49468581823361646, + "learning_rate": 1.9966471264852136e-06, + "loss": 0.9395674467086792, + "step": 650 + }, + { + "epoch": 0.15, + "grad_norm": 0.4120224768195847, + "learning_rate": 1.99661588207468e-06, + "loss": 0.8363018035888672, + "step": 651 + }, + { + "epoch": 0.15023041474654378, + "grad_norm": 0.4552124844336583, + "learning_rate": 1.9965844930066696e-06, + "loss": 1.0035831928253174, + "step": 652 + }, + { + "epoch": 0.15046082949308756, + "grad_norm": 0.3910663219458386, + "learning_rate": 1.99655295928574e-06, + "loss": 1.0316795110702515, + "step": 653 + }, + { + "epoch": 0.15069124423963134, + "grad_norm": 0.4287067909796643, + "learning_rate": 1.9965212809164676e-06, + "loss": 0.9545150995254517, + "step": 654 + }, + { + "epoch": 0.15092165898617513, + "grad_norm": 0.4577708396372056, + "learning_rate": 1.99648945790345e-06, + "loss": 0.993801474571228, + "step": 655 + }, + { + "epoch": 0.15115207373271888, + "grad_norm": 0.4032410507172632, + "learning_rate": 1.9964574902513075e-06, + "loss": 0.8666588664054871, + "step": 656 + }, + { + "epoch": 0.15138248847926267, + "grad_norm": 0.48179220104835324, + "learning_rate": 1.9964253779646787e-06, + "loss": 0.9507651925086975, + "step": 657 + }, + { + "epoch": 0.15161290322580645, + "grad_norm": 0.4899406622119438, + "learning_rate": 1.996393121048226e-06, + "loss": 0.8700851202011108, + "step": 658 + }, + { + "epoch": 0.15184331797235023, + "grad_norm": 0.40256613113119405, + "learning_rate": 1.9963607195066307e-06, + "loss": 0.9966975450515747, + "step": 659 + }, + { + "epoch": 0.15207373271889402, + "grad_norm": 0.44964674216674483, + "learning_rate": 1.9963281733445957e-06, + "loss": 0.9552028179168701, + "step": 660 + }, + { + "epoch": 0.1523041474654378, + "grad_norm": 0.47921018457871023, + "learning_rate": 1.9962954825668456e-06, + "loss": 1.0182740688323975, + "step": 661 + }, + { + "epoch": 0.15253456221198156, + "grad_norm": 0.5096203598929419, + "learning_rate": 1.996262647178125e-06, + "loss": 1.0001778602600098, + "step": 662 + }, + { + "epoch": 0.15276497695852534, + "grad_norm": 0.44730944505165277, + "learning_rate": 1.9962296671832e-06, + "loss": 0.9902865886688232, + "step": 663 + }, + { + "epoch": 0.15299539170506912, + "grad_norm": 0.44977913840647327, + "learning_rate": 1.9961965425868575e-06, + "loss": 0.9272845983505249, + "step": 664 + }, + { + "epoch": 0.1532258064516129, + "grad_norm": 0.5011405916103636, + "learning_rate": 1.996163273393906e-06, + "loss": 0.9705777168273926, + "step": 665 + }, + { + "epoch": 0.1534562211981567, + "grad_norm": 0.5035568947424544, + "learning_rate": 1.9961298596091736e-06, + "loss": 0.9472209215164185, + "step": 666 + }, + { + "epoch": 0.15368663594470047, + "grad_norm": 0.3982786140702462, + "learning_rate": 1.9960963012375113e-06, + "loss": 0.9734043478965759, + "step": 667 + }, + { + "epoch": 0.15391705069124423, + "grad_norm": 0.516464356110248, + "learning_rate": 1.9960625982837894e-06, + "loss": 0.8765468001365662, + "step": 668 + }, + { + "epoch": 0.154147465437788, + "grad_norm": 0.6158206412964224, + "learning_rate": 1.9960287507529e-06, + "loss": 1.0224063396453857, + "step": 669 + }, + { + "epoch": 0.1543778801843318, + "grad_norm": 0.4417623345727513, + "learning_rate": 1.995994758649756e-06, + "loss": 0.983299970626831, + "step": 670 + }, + { + "epoch": 0.15460829493087558, + "grad_norm": 0.4680475004359934, + "learning_rate": 1.9959606219792914e-06, + "loss": 1.0524147748947144, + "step": 671 + }, + { + "epoch": 0.15483870967741936, + "grad_norm": 0.45758073401288785, + "learning_rate": 1.9959263407464606e-06, + "loss": 1.1012977361679077, + "step": 672 + }, + { + "epoch": 0.15506912442396314, + "grad_norm": 0.6298296463565526, + "learning_rate": 1.99589191495624e-06, + "loss": 0.8494049310684204, + "step": 673 + }, + { + "epoch": 0.1552995391705069, + "grad_norm": 0.4795860182347848, + "learning_rate": 1.9958573446136263e-06, + "loss": 0.8677045106887817, + "step": 674 + }, + { + "epoch": 0.15552995391705068, + "grad_norm": 0.5514260857685808, + "learning_rate": 1.995822629723638e-06, + "loss": 1.1034941673278809, + "step": 675 + }, + { + "epoch": 0.15576036866359447, + "grad_norm": 0.3959041890885462, + "learning_rate": 1.9957877702913128e-06, + "loss": 0.8428820371627808, + "step": 676 + }, + { + "epoch": 0.15599078341013825, + "grad_norm": 0.5435721054179383, + "learning_rate": 1.9957527663217107e-06, + "loss": 0.8584408760070801, + "step": 677 + }, + { + "epoch": 0.15622119815668203, + "grad_norm": 0.47044010204436115, + "learning_rate": 1.995717617819913e-06, + "loss": 0.8089514970779419, + "step": 678 + }, + { + "epoch": 0.15645161290322582, + "grad_norm": 0.48360065475233177, + "learning_rate": 1.9956823247910217e-06, + "loss": 0.8459775447845459, + "step": 679 + }, + { + "epoch": 0.15668202764976957, + "grad_norm": 0.441023419118176, + "learning_rate": 1.9956468872401583e-06, + "loss": 1.0583066940307617, + "step": 680 + }, + { + "epoch": 0.15691244239631336, + "grad_norm": 0.4427871322496545, + "learning_rate": 1.995611305172468e-06, + "loss": 0.9396135807037354, + "step": 681 + }, + { + "epoch": 0.15714285714285714, + "grad_norm": 0.4888169944824013, + "learning_rate": 1.995575578593114e-06, + "loss": 1.0143593549728394, + "step": 682 + }, + { + "epoch": 0.15737327188940092, + "grad_norm": 0.44801312951365924, + "learning_rate": 1.9955397075072833e-06, + "loss": 0.8822500109672546, + "step": 683 + }, + { + "epoch": 0.1576036866359447, + "grad_norm": 0.4936771776275296, + "learning_rate": 1.995503691920182e-06, + "loss": 0.8841962218284607, + "step": 684 + }, + { + "epoch": 0.1578341013824885, + "grad_norm": 0.4240877666200064, + "learning_rate": 1.9954675318370374e-06, + "loss": 0.8537080883979797, + "step": 685 + }, + { + "epoch": 0.15806451612903225, + "grad_norm": 0.5056113314098377, + "learning_rate": 1.9954312272630985e-06, + "loss": 1.0292394161224365, + "step": 686 + }, + { + "epoch": 0.15829493087557603, + "grad_norm": 0.5106923922410934, + "learning_rate": 1.995394778203635e-06, + "loss": 0.8741706013679504, + "step": 687 + }, + { + "epoch": 0.1585253456221198, + "grad_norm": 0.47911475912836377, + "learning_rate": 1.995358184663937e-06, + "loss": 0.9429572820663452, + "step": 688 + }, + { + "epoch": 0.1587557603686636, + "grad_norm": 0.5562334593954328, + "learning_rate": 1.995321446649316e-06, + "loss": 0.9522494077682495, + "step": 689 + }, + { + "epoch": 0.15898617511520738, + "grad_norm": 0.5394048021515351, + "learning_rate": 1.9952845641651046e-06, + "loss": 0.9743782877922058, + "step": 690 + }, + { + "epoch": 0.15921658986175116, + "grad_norm": 0.4663620909245047, + "learning_rate": 1.995247537216657e-06, + "loss": 0.926364541053772, + "step": 691 + }, + { + "epoch": 0.15944700460829492, + "grad_norm": 0.4595450639525932, + "learning_rate": 1.995210365809346e-06, + "loss": 0.8355565071105957, + "step": 692 + }, + { + "epoch": 0.1596774193548387, + "grad_norm": 0.44548737988500176, + "learning_rate": 1.9951730499485684e-06, + "loss": 0.9200692772865295, + "step": 693 + }, + { + "epoch": 0.15990783410138248, + "grad_norm": 0.36513232613054547, + "learning_rate": 1.99513558963974e-06, + "loss": 0.7571361064910889, + "step": 694 + }, + { + "epoch": 0.16013824884792627, + "grad_norm": 0.48187866859107054, + "learning_rate": 1.995097984888298e-06, + "loss": 0.935307502746582, + "step": 695 + }, + { + "epoch": 0.16036866359447005, + "grad_norm": 0.5833897193983939, + "learning_rate": 1.995060235699701e-06, + "loss": 1.1118557453155518, + "step": 696 + }, + { + "epoch": 0.16059907834101383, + "grad_norm": 0.3866866326578979, + "learning_rate": 1.995022342079428e-06, + "loss": 0.8024749755859375, + "step": 697 + }, + { + "epoch": 0.1608294930875576, + "grad_norm": 0.44217187311148026, + "learning_rate": 1.994984304032979e-06, + "loss": 0.9018943309783936, + "step": 698 + }, + { + "epoch": 0.16105990783410137, + "grad_norm": 0.4729402911259197, + "learning_rate": 1.9949461215658757e-06, + "loss": 0.8571128249168396, + "step": 699 + }, + { + "epoch": 0.16129032258064516, + "grad_norm": 0.4822593475964477, + "learning_rate": 1.99490779468366e-06, + "loss": 0.9707971215248108, + "step": 700 + }, + { + "epoch": 0.16152073732718894, + "grad_norm": 0.4341551988253619, + "learning_rate": 1.994869323391895e-06, + "loss": 0.8157618045806885, + "step": 701 + }, + { + "epoch": 0.16175115207373272, + "grad_norm": 0.4620050649733586, + "learning_rate": 1.994830707696165e-06, + "loss": 0.9009906053543091, + "step": 702 + }, + { + "epoch": 0.1619815668202765, + "grad_norm": 0.5270647594020066, + "learning_rate": 1.9947919476020745e-06, + "loss": 1.0093860626220703, + "step": 703 + }, + { + "epoch": 0.1622119815668203, + "grad_norm": 0.4233068308539462, + "learning_rate": 1.9947530431152494e-06, + "loss": 1.018160343170166, + "step": 704 + }, + { + "epoch": 0.16244239631336405, + "grad_norm": 0.5753809013533212, + "learning_rate": 1.9947139942413378e-06, + "loss": 0.9755370616912842, + "step": 705 + }, + { + "epoch": 0.16267281105990783, + "grad_norm": 0.490686071812002, + "learning_rate": 1.994674800986006e-06, + "loss": 0.9406822919845581, + "step": 706 + }, + { + "epoch": 0.1629032258064516, + "grad_norm": 0.4856505350445516, + "learning_rate": 1.994635463354944e-06, + "loss": 0.9128296971321106, + "step": 707 + }, + { + "epoch": 0.1631336405529954, + "grad_norm": 0.42889971607025285, + "learning_rate": 1.994595981353861e-06, + "loss": 0.929735541343689, + "step": 708 + }, + { + "epoch": 0.16336405529953918, + "grad_norm": 0.5176054911036664, + "learning_rate": 1.994556354988488e-06, + "loss": 0.9021023511886597, + "step": 709 + }, + { + "epoch": 0.16359447004608296, + "grad_norm": 0.46567553841056064, + "learning_rate": 1.994516584264577e-06, + "loss": 0.9187623262405396, + "step": 710 + }, + { + "epoch": 0.16382488479262672, + "grad_norm": 0.4564071002670219, + "learning_rate": 1.9944766691879e-06, + "loss": 0.8283985257148743, + "step": 711 + }, + { + "epoch": 0.1640552995391705, + "grad_norm": 0.5448909609220928, + "learning_rate": 1.994436609764251e-06, + "loss": 1.0592901706695557, + "step": 712 + }, + { + "epoch": 0.16428571428571428, + "grad_norm": 0.5512946720093808, + "learning_rate": 1.9943964059994446e-06, + "loss": 0.98726487159729, + "step": 713 + }, + { + "epoch": 0.16451612903225807, + "grad_norm": 0.5060774432164115, + "learning_rate": 1.9943560578993165e-06, + "loss": 0.8761749267578125, + "step": 714 + }, + { + "epoch": 0.16474654377880185, + "grad_norm": 0.4759569802502017, + "learning_rate": 1.9943155654697227e-06, + "loss": 0.878170371055603, + "step": 715 + }, + { + "epoch": 0.16497695852534563, + "grad_norm": 0.5212205127966931, + "learning_rate": 1.9942749287165414e-06, + "loss": 0.9444767236709595, + "step": 716 + }, + { + "epoch": 0.1652073732718894, + "grad_norm": 0.436107073640643, + "learning_rate": 1.9942341476456697e-06, + "loss": 0.8270057439804077, + "step": 717 + }, + { + "epoch": 0.16543778801843317, + "grad_norm": 0.36828111446023454, + "learning_rate": 1.9941932222630284e-06, + "loss": 0.825955867767334, + "step": 718 + }, + { + "epoch": 0.16566820276497696, + "grad_norm": 0.4748059596727922, + "learning_rate": 1.9941521525745564e-06, + "loss": 0.9384286403656006, + "step": 719 + }, + { + "epoch": 0.16589861751152074, + "grad_norm": 0.5968010950850139, + "learning_rate": 1.994110938586216e-06, + "loss": 0.9627010226249695, + "step": 720 + }, + { + "epoch": 0.16612903225806452, + "grad_norm": 0.40665371786149496, + "learning_rate": 1.9940695803039886e-06, + "loss": 0.8436836004257202, + "step": 721 + }, + { + "epoch": 0.1663594470046083, + "grad_norm": 0.48219849106464674, + "learning_rate": 1.994028077733878e-06, + "loss": 1.0689928531646729, + "step": 722 + }, + { + "epoch": 0.16658986175115206, + "grad_norm": 0.4600242469407339, + "learning_rate": 1.993986430881907e-06, + "loss": 0.911309003829956, + "step": 723 + }, + { + "epoch": 0.16682027649769585, + "grad_norm": 0.5404195969690949, + "learning_rate": 1.993944639754122e-06, + "loss": 0.9897152185440063, + "step": 724 + }, + { + "epoch": 0.16705069124423963, + "grad_norm": 0.48212503869308937, + "learning_rate": 1.9939027043565883e-06, + "loss": 1.0230367183685303, + "step": 725 + }, + { + "epoch": 0.1672811059907834, + "grad_norm": 0.4398728967426152, + "learning_rate": 1.993860624695393e-06, + "loss": 0.8067069053649902, + "step": 726 + }, + { + "epoch": 0.1675115207373272, + "grad_norm": 0.5835576425821721, + "learning_rate": 1.9938184007766434e-06, + "loss": 0.9784343242645264, + "step": 727 + }, + { + "epoch": 0.16774193548387098, + "grad_norm": 0.5139557651921927, + "learning_rate": 1.9937760326064686e-06, + "loss": 0.8617877960205078, + "step": 728 + }, + { + "epoch": 0.16797235023041474, + "grad_norm": 0.5276605551773887, + "learning_rate": 1.9937335201910183e-06, + "loss": 1.0390141010284424, + "step": 729 + }, + { + "epoch": 0.16820276497695852, + "grad_norm": 0.5007165894606777, + "learning_rate": 1.9936908635364633e-06, + "loss": 1.0478965044021606, + "step": 730 + }, + { + "epoch": 0.1684331797235023, + "grad_norm": 0.46789644745982956, + "learning_rate": 1.9936480626489944e-06, + "loss": 0.8396252393722534, + "step": 731 + }, + { + "epoch": 0.16866359447004609, + "grad_norm": 0.4366381763655398, + "learning_rate": 1.9936051175348256e-06, + "loss": 0.8690099120140076, + "step": 732 + }, + { + "epoch": 0.16889400921658987, + "grad_norm": 0.44373038767323764, + "learning_rate": 1.993562028200189e-06, + "loss": 0.944722056388855, + "step": 733 + }, + { + "epoch": 0.16912442396313365, + "grad_norm": 0.4480067961897654, + "learning_rate": 1.9935187946513385e-06, + "loss": 0.7134733200073242, + "step": 734 + }, + { + "epoch": 0.1693548387096774, + "grad_norm": 0.44081731431481436, + "learning_rate": 1.993475416894551e-06, + "loss": 0.8102486729621887, + "step": 735 + }, + { + "epoch": 0.1695852534562212, + "grad_norm": 0.5621249368486638, + "learning_rate": 1.9934318949361215e-06, + "loss": 0.924787163734436, + "step": 736 + }, + { + "epoch": 0.16981566820276497, + "grad_norm": 0.4621168425652111, + "learning_rate": 1.993388228782368e-06, + "loss": 0.9595087766647339, + "step": 737 + }, + { + "epoch": 0.17004608294930876, + "grad_norm": 0.4164356485660062, + "learning_rate": 1.993344418439628e-06, + "loss": 0.9949792623519897, + "step": 738 + }, + { + "epoch": 0.17027649769585254, + "grad_norm": 0.6359964400004778, + "learning_rate": 1.9933004639142604e-06, + "loss": 1.0905860662460327, + "step": 739 + }, + { + "epoch": 0.17050691244239632, + "grad_norm": 0.39800173884382345, + "learning_rate": 1.9932563652126455e-06, + "loss": 0.9638324975967407, + "step": 740 + }, + { + "epoch": 0.17073732718894008, + "grad_norm": 0.4909114039853375, + "learning_rate": 1.9932121223411844e-06, + "loss": 0.9434946179389954, + "step": 741 + }, + { + "epoch": 0.17096774193548386, + "grad_norm": 0.49072837958490606, + "learning_rate": 1.9931677353062983e-06, + "loss": 0.9050095081329346, + "step": 742 + }, + { + "epoch": 0.17119815668202765, + "grad_norm": 0.509303736181324, + "learning_rate": 1.9931232041144303e-06, + "loss": 1.0698316097259521, + "step": 743 + }, + { + "epoch": 0.17142857142857143, + "grad_norm": 0.393391743712663, + "learning_rate": 1.993078528772044e-06, + "loss": 0.7938296794891357, + "step": 744 + }, + { + "epoch": 0.1716589861751152, + "grad_norm": 0.46597408496400117, + "learning_rate": 1.993033709285624e-06, + "loss": 0.8485043048858643, + "step": 745 + }, + { + "epoch": 0.171889400921659, + "grad_norm": 0.4736797887475262, + "learning_rate": 1.9929887456616754e-06, + "loss": 0.8605694770812988, + "step": 746 + }, + { + "epoch": 0.17211981566820275, + "grad_norm": 0.40523028160004354, + "learning_rate": 1.9929436379067253e-06, + "loss": 0.7101563215255737, + "step": 747 + }, + { + "epoch": 0.17235023041474654, + "grad_norm": 0.4519555914654837, + "learning_rate": 1.9928983860273205e-06, + "loss": 1.093912959098816, + "step": 748 + }, + { + "epoch": 0.17258064516129032, + "grad_norm": 0.4930830686705908, + "learning_rate": 1.9928529900300294e-06, + "loss": 0.8099753856658936, + "step": 749 + }, + { + "epoch": 0.1728110599078341, + "grad_norm": 0.3752662958180716, + "learning_rate": 1.992807449921441e-06, + "loss": 0.7816359400749207, + "step": 750 + }, + { + "epoch": 0.17304147465437789, + "grad_norm": 0.5180432792159949, + "learning_rate": 1.9927617657081656e-06, + "loss": 0.8887455463409424, + "step": 751 + }, + { + "epoch": 0.17327188940092167, + "grad_norm": 0.6260862232080928, + "learning_rate": 1.992715937396834e-06, + "loss": 1.0926017761230469, + "step": 752 + }, + { + "epoch": 0.17350230414746542, + "grad_norm": 0.5546410088380269, + "learning_rate": 1.9926699649940985e-06, + "loss": 0.7657707929611206, + "step": 753 + }, + { + "epoch": 0.1737327188940092, + "grad_norm": 0.5766197712214459, + "learning_rate": 1.992623848506632e-06, + "loss": 0.9350340366363525, + "step": 754 + }, + { + "epoch": 0.173963133640553, + "grad_norm": 0.5011774306610247, + "learning_rate": 1.9925775879411276e-06, + "loss": 0.883575439453125, + "step": 755 + }, + { + "epoch": 0.17419354838709677, + "grad_norm": 0.3678933943457833, + "learning_rate": 1.9925311833043e-06, + "loss": 0.814304769039154, + "step": 756 + }, + { + "epoch": 0.17442396313364056, + "grad_norm": 0.5857143887476359, + "learning_rate": 1.992484634602886e-06, + "loss": 0.9263690710067749, + "step": 757 + }, + { + "epoch": 0.17465437788018434, + "grad_norm": 0.49862680540203774, + "learning_rate": 1.9924379418436402e-06, + "loss": 1.0321627855300903, + "step": 758 + }, + { + "epoch": 0.1748847926267281, + "grad_norm": 0.5062063825952041, + "learning_rate": 1.9923911050333413e-06, + "loss": 0.969459056854248, + "step": 759 + }, + { + "epoch": 0.17511520737327188, + "grad_norm": 0.4554436665394103, + "learning_rate": 1.9923441241787874e-06, + "loss": 0.9926396012306213, + "step": 760 + }, + { + "epoch": 0.17534562211981566, + "grad_norm": 0.43315077691547155, + "learning_rate": 1.9922969992867975e-06, + "loss": 0.776180624961853, + "step": 761 + }, + { + "epoch": 0.17557603686635945, + "grad_norm": 0.5350913373105377, + "learning_rate": 1.992249730364212e-06, + "loss": 0.9413800239562988, + "step": 762 + }, + { + "epoch": 0.17580645161290323, + "grad_norm": 0.48045178893419493, + "learning_rate": 1.9922023174178913e-06, + "loss": 0.8365576267242432, + "step": 763 + }, + { + "epoch": 0.17603686635944701, + "grad_norm": 0.47752363664412967, + "learning_rate": 1.992154760454718e-06, + "loss": 1.023102879524231, + "step": 764 + }, + { + "epoch": 0.17626728110599077, + "grad_norm": 0.6035875388891613, + "learning_rate": 1.9921070594815944e-06, + "loss": 1.079930067062378, + "step": 765 + }, + { + "epoch": 0.17649769585253455, + "grad_norm": 0.44885698296531085, + "learning_rate": 1.9920592145054445e-06, + "loss": 0.8974392414093018, + "step": 766 + }, + { + "epoch": 0.17672811059907834, + "grad_norm": 0.5363940338283703, + "learning_rate": 1.9920112255332133e-06, + "loss": 0.9509298205375671, + "step": 767 + }, + { + "epoch": 0.17695852534562212, + "grad_norm": 0.3960858930926947, + "learning_rate": 1.991963092571866e-06, + "loss": 0.938835620880127, + "step": 768 + }, + { + "epoch": 0.1771889400921659, + "grad_norm": 0.3409332869225393, + "learning_rate": 1.9919148156283888e-06, + "loss": 0.7918044328689575, + "step": 769 + }, + { + "epoch": 0.1774193548387097, + "grad_norm": 0.46985590284048473, + "learning_rate": 1.9918663947097893e-06, + "loss": 0.8235958814620972, + "step": 770 + }, + { + "epoch": 0.17764976958525347, + "grad_norm": 0.4734643903674827, + "learning_rate": 1.9918178298230953e-06, + "loss": 0.9079158902168274, + "step": 771 + }, + { + "epoch": 0.17788018433179723, + "grad_norm": 0.5764167010482935, + "learning_rate": 1.9917691209753563e-06, + "loss": 0.8548607230186462, + "step": 772 + }, + { + "epoch": 0.178110599078341, + "grad_norm": 0.47446352682333093, + "learning_rate": 1.9917202681736428e-06, + "loss": 0.8327757120132446, + "step": 773 + }, + { + "epoch": 0.1783410138248848, + "grad_norm": 0.5415533792438672, + "learning_rate": 1.991671271425045e-06, + "loss": 1.0511503219604492, + "step": 774 + }, + { + "epoch": 0.17857142857142858, + "grad_norm": 0.4310425860855909, + "learning_rate": 1.991622130736675e-06, + "loss": 0.9168857932090759, + "step": 775 + }, + { + "epoch": 0.17880184331797236, + "grad_norm": 0.44391822434593214, + "learning_rate": 1.9915728461156654e-06, + "loss": 0.8740782737731934, + "step": 776 + }, + { + "epoch": 0.17903225806451614, + "grad_norm": 0.5841506637592749, + "learning_rate": 1.99152341756917e-06, + "loss": 0.9706588983535767, + "step": 777 + }, + { + "epoch": 0.1792626728110599, + "grad_norm": 0.5492923015057676, + "learning_rate": 1.9914738451043627e-06, + "loss": 1.144281268119812, + "step": 778 + }, + { + "epoch": 0.17949308755760368, + "grad_norm": 0.4170516305027483, + "learning_rate": 1.9914241287284403e-06, + "loss": 0.973777174949646, + "step": 779 + }, + { + "epoch": 0.17972350230414746, + "grad_norm": 0.4502683719091688, + "learning_rate": 1.991374268448617e-06, + "loss": 0.9002145528793335, + "step": 780 + }, + { + "epoch": 0.17995391705069125, + "grad_norm": 0.5526460425242373, + "learning_rate": 1.9913242642721316e-06, + "loss": 0.9234670400619507, + "step": 781 + }, + { + "epoch": 0.18018433179723503, + "grad_norm": 0.4959743401985291, + "learning_rate": 1.9912741162062415e-06, + "loss": 0.9552402496337891, + "step": 782 + }, + { + "epoch": 0.18041474654377881, + "grad_norm": 0.5510111451188886, + "learning_rate": 1.9912238242582257e-06, + "loss": 1.0485708713531494, + "step": 783 + }, + { + "epoch": 0.18064516129032257, + "grad_norm": 0.5447745918227888, + "learning_rate": 1.991173388435384e-06, + "loss": 0.9852809906005859, + "step": 784 + }, + { + "epoch": 0.18087557603686635, + "grad_norm": 0.4726322734582533, + "learning_rate": 1.991122808745037e-06, + "loss": 0.7824808359146118, + "step": 785 + }, + { + "epoch": 0.18110599078341014, + "grad_norm": 0.6534462420793078, + "learning_rate": 1.9910720851945268e-06, + "loss": 1.0380492210388184, + "step": 786 + }, + { + "epoch": 0.18133640552995392, + "grad_norm": 0.48532232647089923, + "learning_rate": 1.991021217791215e-06, + "loss": 0.9808282256126404, + "step": 787 + }, + { + "epoch": 0.1815668202764977, + "grad_norm": 0.4791928008108061, + "learning_rate": 1.9909702065424854e-06, + "loss": 0.8636116981506348, + "step": 788 + }, + { + "epoch": 0.1817972350230415, + "grad_norm": 0.45783287516468024, + "learning_rate": 1.9909190514557427e-06, + "loss": 0.8179407715797424, + "step": 789 + }, + { + "epoch": 0.18202764976958524, + "grad_norm": 0.4760021295113364, + "learning_rate": 1.990867752538411e-06, + "loss": 0.9424594044685364, + "step": 790 + }, + { + "epoch": 0.18225806451612903, + "grad_norm": 0.5558557995369799, + "learning_rate": 1.9908163097979366e-06, + "loss": 0.9429298043251038, + "step": 791 + }, + { + "epoch": 0.1824884792626728, + "grad_norm": 0.5944732273868478, + "learning_rate": 1.990764723241787e-06, + "loss": 0.9671716690063477, + "step": 792 + }, + { + "epoch": 0.1827188940092166, + "grad_norm": 0.6041148299127167, + "learning_rate": 1.9907129928774494e-06, + "loss": 1.0063345432281494, + "step": 793 + }, + { + "epoch": 0.18294930875576038, + "grad_norm": 0.4817475331580677, + "learning_rate": 1.990661118712432e-06, + "loss": 0.9932061433792114, + "step": 794 + }, + { + "epoch": 0.18317972350230416, + "grad_norm": 0.4648544131499562, + "learning_rate": 1.990609100754265e-06, + "loss": 0.859153151512146, + "step": 795 + }, + { + "epoch": 0.18341013824884791, + "grad_norm": 0.4738825500961963, + "learning_rate": 1.9905569390104984e-06, + "loss": 0.9328111410140991, + "step": 796 + }, + { + "epoch": 0.1836405529953917, + "grad_norm": 0.542624486663781, + "learning_rate": 1.9905046334887033e-06, + "loss": 0.9970628619194031, + "step": 797 + }, + { + "epoch": 0.18387096774193548, + "grad_norm": 0.41971271798029636, + "learning_rate": 1.990452184196472e-06, + "loss": 1.0347282886505127, + "step": 798 + }, + { + "epoch": 0.18410138248847926, + "grad_norm": 0.4270967132251902, + "learning_rate": 1.990399591141417e-06, + "loss": 0.9167106747627258, + "step": 799 + }, + { + "epoch": 0.18433179723502305, + "grad_norm": 0.5046236893106074, + "learning_rate": 1.990346854331173e-06, + "loss": 0.8895610570907593, + "step": 800 + }, + { + "epoch": 0.18456221198156683, + "grad_norm": 0.5237845429219861, + "learning_rate": 1.990293973773394e-06, + "loss": 0.8525041341781616, + "step": 801 + }, + { + "epoch": 0.1847926267281106, + "grad_norm": 0.4894836264572075, + "learning_rate": 1.9902409494757553e-06, + "loss": 0.8184069395065308, + "step": 802 + }, + { + "epoch": 0.18502304147465437, + "grad_norm": 0.430895578738413, + "learning_rate": 1.9901877814459544e-06, + "loss": 0.8342509269714355, + "step": 803 + }, + { + "epoch": 0.18525345622119815, + "grad_norm": 0.49779999067704434, + "learning_rate": 1.9901344696917072e-06, + "loss": 0.9254395365715027, + "step": 804 + }, + { + "epoch": 0.18548387096774194, + "grad_norm": 0.5124892914660328, + "learning_rate": 1.990081014220753e-06, + "loss": 0.9537396430969238, + "step": 805 + }, + { + "epoch": 0.18571428571428572, + "grad_norm": 0.47100696643896606, + "learning_rate": 1.99002741504085e-06, + "loss": 0.871498167514801, + "step": 806 + }, + { + "epoch": 0.1859447004608295, + "grad_norm": 0.43363760401100476, + "learning_rate": 1.9899736721597786e-06, + "loss": 0.879954993724823, + "step": 807 + }, + { + "epoch": 0.18617511520737326, + "grad_norm": 0.5651525829110051, + "learning_rate": 1.9899197855853386e-06, + "loss": 0.9238240718841553, + "step": 808 + }, + { + "epoch": 0.18640552995391704, + "grad_norm": 0.43185548411741037, + "learning_rate": 1.9898657553253527e-06, + "loss": 0.7939119935035706, + "step": 809 + }, + { + "epoch": 0.18663594470046083, + "grad_norm": 0.42423118388289394, + "learning_rate": 1.989811581387663e-06, + "loss": 0.8536086082458496, + "step": 810 + }, + { + "epoch": 0.1868663594470046, + "grad_norm": 0.7488569193689159, + "learning_rate": 1.9897572637801322e-06, + "loss": 0.8272225856781006, + "step": 811 + }, + { + "epoch": 0.1870967741935484, + "grad_norm": 0.5639808995976617, + "learning_rate": 1.989702802510645e-06, + "loss": 0.9187904596328735, + "step": 812 + }, + { + "epoch": 0.18732718894009218, + "grad_norm": 0.5096509814307604, + "learning_rate": 1.989648197587106e-06, + "loss": 0.905516505241394, + "step": 813 + }, + { + "epoch": 0.18755760368663593, + "grad_norm": 0.46349746061643887, + "learning_rate": 1.9895934490174415e-06, + "loss": 0.7548567056655884, + "step": 814 + }, + { + "epoch": 0.18778801843317972, + "grad_norm": 0.5916446556749395, + "learning_rate": 1.9895385568095978e-06, + "loss": 0.8242576122283936, + "step": 815 + }, + { + "epoch": 0.1880184331797235, + "grad_norm": 0.47871736963615374, + "learning_rate": 1.9894835209715427e-06, + "loss": 0.9861007928848267, + "step": 816 + }, + { + "epoch": 0.18824884792626728, + "grad_norm": 0.5325996448618295, + "learning_rate": 1.989428341511264e-06, + "loss": 0.9705426096916199, + "step": 817 + }, + { + "epoch": 0.18847926267281107, + "grad_norm": 0.5222036147665577, + "learning_rate": 1.9893730184367722e-06, + "loss": 0.9773565530776978, + "step": 818 + }, + { + "epoch": 0.18870967741935485, + "grad_norm": 0.42837248272258044, + "learning_rate": 1.989317551756096e-06, + "loss": 0.7929856777191162, + "step": 819 + }, + { + "epoch": 0.1889400921658986, + "grad_norm": 0.48925051722314383, + "learning_rate": 1.9892619414772866e-06, + "loss": 0.9749126434326172, + "step": 820 + }, + { + "epoch": 0.1891705069124424, + "grad_norm": 0.49968815355517815, + "learning_rate": 1.9892061876084166e-06, + "loss": 0.9945374727249146, + "step": 821 + }, + { + "epoch": 0.18940092165898617, + "grad_norm": 0.3942389156154952, + "learning_rate": 1.9891502901575776e-06, + "loss": 0.8016892075538635, + "step": 822 + }, + { + "epoch": 0.18963133640552995, + "grad_norm": 0.5604199160430772, + "learning_rate": 1.9890942491328837e-06, + "loss": 0.9389557838439941, + "step": 823 + }, + { + "epoch": 0.18986175115207374, + "grad_norm": 0.38179956879765936, + "learning_rate": 1.9890380645424686e-06, + "loss": 0.724082887172699, + "step": 824 + }, + { + "epoch": 0.19009216589861752, + "grad_norm": 0.5409880819899738, + "learning_rate": 1.988981736394488e-06, + "loss": 0.8877915143966675, + "step": 825 + }, + { + "epoch": 0.19032258064516128, + "grad_norm": 0.6992705135248997, + "learning_rate": 1.9889252646971177e-06, + "loss": 1.207446813583374, + "step": 826 + }, + { + "epoch": 0.19055299539170506, + "grad_norm": 0.5040994233955279, + "learning_rate": 1.9888686494585542e-06, + "loss": 0.9155057668685913, + "step": 827 + }, + { + "epoch": 0.19078341013824884, + "grad_norm": 0.5532998867192596, + "learning_rate": 1.9888118906870154e-06, + "loss": 1.005772352218628, + "step": 828 + }, + { + "epoch": 0.19101382488479263, + "grad_norm": 0.42790166152469256, + "learning_rate": 1.9887549883907394e-06, + "loss": 0.9060605764389038, + "step": 829 + }, + { + "epoch": 0.1912442396313364, + "grad_norm": 0.5177028577691919, + "learning_rate": 1.988697942577986e-06, + "loss": 0.7652161717414856, + "step": 830 + }, + { + "epoch": 0.1914746543778802, + "grad_norm": 0.5981838434161031, + "learning_rate": 1.9886407532570354e-06, + "loss": 1.0191380977630615, + "step": 831 + }, + { + "epoch": 0.19170506912442398, + "grad_norm": 0.4987711114148914, + "learning_rate": 1.9885834204361876e-06, + "loss": 0.9497933387756348, + "step": 832 + }, + { + "epoch": 0.19193548387096773, + "grad_norm": 0.462035144334916, + "learning_rate": 1.9885259441237657e-06, + "loss": 0.7728058099746704, + "step": 833 + }, + { + "epoch": 0.19216589861751152, + "grad_norm": 0.517810203206895, + "learning_rate": 1.9884683243281113e-06, + "loss": 0.8961999416351318, + "step": 834 + }, + { + "epoch": 0.1923963133640553, + "grad_norm": 0.49386963761649333, + "learning_rate": 1.9884105610575885e-06, + "loss": 0.9218904972076416, + "step": 835 + }, + { + "epoch": 0.19262672811059908, + "grad_norm": 0.49785428541631027, + "learning_rate": 1.9883526543205807e-06, + "loss": 0.8411329984664917, + "step": 836 + }, + { + "epoch": 0.19285714285714287, + "grad_norm": 0.42947794662366, + "learning_rate": 1.988294604125494e-06, + "loss": 0.9536285400390625, + "step": 837 + }, + { + "epoch": 0.19308755760368665, + "grad_norm": 0.589338261376726, + "learning_rate": 1.9882364104807535e-06, + "loss": 0.9404321908950806, + "step": 838 + }, + { + "epoch": 0.1933179723502304, + "grad_norm": 0.6889982860652113, + "learning_rate": 1.9881780733948066e-06, + "loss": 1.2520880699157715, + "step": 839 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 0.5071547317768794, + "learning_rate": 1.9881195928761205e-06, + "loss": 0.8961449861526489, + "step": 840 + }, + { + "epoch": 0.19377880184331797, + "grad_norm": 0.5612915327251169, + "learning_rate": 1.9880609689331833e-06, + "loss": 0.8844394683837891, + "step": 841 + }, + { + "epoch": 0.19400921658986175, + "grad_norm": 0.6383643268501873, + "learning_rate": 1.9880022015745044e-06, + "loss": 1.1305835247039795, + "step": 842 + }, + { + "epoch": 0.19423963133640554, + "grad_norm": 0.5396685716999928, + "learning_rate": 1.9879432908086143e-06, + "loss": 0.9980956315994263, + "step": 843 + }, + { + "epoch": 0.19447004608294932, + "grad_norm": 0.46511386172638836, + "learning_rate": 1.987884236644063e-06, + "loss": 0.7613730430603027, + "step": 844 + }, + { + "epoch": 0.19470046082949308, + "grad_norm": 0.6010725617242704, + "learning_rate": 1.987825039089423e-06, + "loss": 0.9742579460144043, + "step": 845 + }, + { + "epoch": 0.19493087557603686, + "grad_norm": 0.4022001131058661, + "learning_rate": 1.9877656981532864e-06, + "loss": 0.7118766903877258, + "step": 846 + }, + { + "epoch": 0.19516129032258064, + "grad_norm": 0.48902949112989696, + "learning_rate": 1.9877062138442657e-06, + "loss": 0.8657095432281494, + "step": 847 + }, + { + "epoch": 0.19539170506912443, + "grad_norm": 0.42720754806325495, + "learning_rate": 1.987646586170996e-06, + "loss": 0.8543902039527893, + "step": 848 + }, + { + "epoch": 0.1956221198156682, + "grad_norm": 0.4842820004763047, + "learning_rate": 1.9875868151421317e-06, + "loss": 0.8896970748901367, + "step": 849 + }, + { + "epoch": 0.195852534562212, + "grad_norm": 0.5225855938017534, + "learning_rate": 1.9875269007663486e-06, + "loss": 0.8662775754928589, + "step": 850 + }, + { + "epoch": 0.19608294930875575, + "grad_norm": 0.48460338230512107, + "learning_rate": 1.9874668430523434e-06, + "loss": 0.8241516351699829, + "step": 851 + }, + { + "epoch": 0.19631336405529953, + "grad_norm": 0.5278134062893883, + "learning_rate": 1.987406642008833e-06, + "loss": 0.973886251449585, + "step": 852 + }, + { + "epoch": 0.19654377880184332, + "grad_norm": 0.48464213201098744, + "learning_rate": 1.9873462976445554e-06, + "loss": 0.8133533000946045, + "step": 853 + }, + { + "epoch": 0.1967741935483871, + "grad_norm": 0.6657370368562822, + "learning_rate": 1.9872858099682697e-06, + "loss": 1.120869755744934, + "step": 854 + }, + { + "epoch": 0.19700460829493088, + "grad_norm": 0.47886128108046017, + "learning_rate": 1.9872251789887562e-06, + "loss": 0.9376444816589355, + "step": 855 + }, + { + "epoch": 0.19723502304147467, + "grad_norm": 0.4627008078705538, + "learning_rate": 1.9871644047148148e-06, + "loss": 0.8763699531555176, + "step": 856 + }, + { + "epoch": 0.19746543778801842, + "grad_norm": 0.5436736732062664, + "learning_rate": 1.9871034871552667e-06, + "loss": 0.7993260622024536, + "step": 857 + }, + { + "epoch": 0.1976958525345622, + "grad_norm": 0.5225344117964711, + "learning_rate": 1.9870424263189542e-06, + "loss": 1.0312654972076416, + "step": 858 + }, + { + "epoch": 0.197926267281106, + "grad_norm": 0.6040828842975151, + "learning_rate": 1.98698122221474e-06, + "loss": 1.0784629583358765, + "step": 859 + }, + { + "epoch": 0.19815668202764977, + "grad_norm": 0.5681257026488339, + "learning_rate": 1.9869198748515085e-06, + "loss": 1.136039137840271, + "step": 860 + }, + { + "epoch": 0.19838709677419356, + "grad_norm": 0.5123381612546825, + "learning_rate": 1.986858384238163e-06, + "loss": 0.834873378276825, + "step": 861 + }, + { + "epoch": 0.19861751152073734, + "grad_norm": 0.5505167057841309, + "learning_rate": 1.98679675038363e-06, + "loss": 0.9705442190170288, + "step": 862 + }, + { + "epoch": 0.1988479262672811, + "grad_norm": 0.6567761197272963, + "learning_rate": 1.9867349732968547e-06, + "loss": 0.9343886375427246, + "step": 863 + }, + { + "epoch": 0.19907834101382488, + "grad_norm": 0.49387008808397015, + "learning_rate": 1.986673052986805e-06, + "loss": 0.9140456914901733, + "step": 864 + }, + { + "epoch": 0.19930875576036866, + "grad_norm": 0.5850607327811402, + "learning_rate": 1.986610989462467e-06, + "loss": 0.9121139049530029, + "step": 865 + }, + { + "epoch": 0.19953917050691244, + "grad_norm": 0.4775789448856378, + "learning_rate": 1.9865487827328505e-06, + "loss": 0.7333672642707825, + "step": 866 + }, + { + "epoch": 0.19976958525345623, + "grad_norm": 0.5039450613377916, + "learning_rate": 1.986486432806984e-06, + "loss": 0.8405989408493042, + "step": 867 + }, + { + "epoch": 0.2, + "grad_norm": 0.47371690470710304, + "learning_rate": 1.9864239396939176e-06, + "loss": 0.8693375587463379, + "step": 868 + }, + { + "epoch": 0.20023041474654377, + "grad_norm": 0.5727654616233698, + "learning_rate": 1.9863613034027223e-06, + "loss": 1.0137104988098145, + "step": 869 + }, + { + "epoch": 0.20046082949308755, + "grad_norm": 0.5382771457657299, + "learning_rate": 1.9862985239424895e-06, + "loss": 1.0283832550048828, + "step": 870 + }, + { + "epoch": 0.20069124423963133, + "grad_norm": 0.6200501422886965, + "learning_rate": 1.9862356013223316e-06, + "loss": 1.117444634437561, + "step": 871 + }, + { + "epoch": 0.20092165898617512, + "grad_norm": 0.6309070895129882, + "learning_rate": 1.986172535551382e-06, + "loss": 0.8861427307128906, + "step": 872 + }, + { + "epoch": 0.2011520737327189, + "grad_norm": 0.5017852774763055, + "learning_rate": 1.9861093266387946e-06, + "loss": 1.0273747444152832, + "step": 873 + }, + { + "epoch": 0.20138248847926268, + "grad_norm": 0.5141875246573869, + "learning_rate": 1.9860459745937437e-06, + "loss": 0.918023943901062, + "step": 874 + }, + { + "epoch": 0.20161290322580644, + "grad_norm": 0.5278755996885149, + "learning_rate": 1.9859824794254246e-06, + "loss": 0.8983356952667236, + "step": 875 + }, + { + "epoch": 0.20184331797235022, + "grad_norm": 0.5803540160351622, + "learning_rate": 1.985918841143054e-06, + "loss": 1.0180974006652832, + "step": 876 + }, + { + "epoch": 0.202073732718894, + "grad_norm": 0.48253787858386377, + "learning_rate": 1.985855059755869e-06, + "loss": 0.9656573534011841, + "step": 877 + }, + { + "epoch": 0.2023041474654378, + "grad_norm": 0.5015537059540116, + "learning_rate": 1.9857911352731273e-06, + "loss": 0.8522181510925293, + "step": 878 + }, + { + "epoch": 0.20253456221198157, + "grad_norm": 0.4883752495192941, + "learning_rate": 1.985727067704107e-06, + "loss": 0.9180892705917358, + "step": 879 + }, + { + "epoch": 0.20276497695852536, + "grad_norm": 0.5817140345419661, + "learning_rate": 1.985662857058108e-06, + "loss": 0.9979432821273804, + "step": 880 + }, + { + "epoch": 0.2029953917050691, + "grad_norm": 0.5608420179715049, + "learning_rate": 1.98559850334445e-06, + "loss": 0.8916480541229248, + "step": 881 + }, + { + "epoch": 0.2032258064516129, + "grad_norm": 0.41973060059994494, + "learning_rate": 1.9855340065724738e-06, + "loss": 0.8755770921707153, + "step": 882 + }, + { + "epoch": 0.20345622119815668, + "grad_norm": 0.5596516763963291, + "learning_rate": 1.9854693667515418e-06, + "loss": 1.0200350284576416, + "step": 883 + }, + { + "epoch": 0.20368663594470046, + "grad_norm": 0.5199867730002389, + "learning_rate": 1.9854045838910353e-06, + "loss": 0.928024172782898, + "step": 884 + }, + { + "epoch": 0.20391705069124424, + "grad_norm": 0.5756725941645391, + "learning_rate": 1.9853396580003582e-06, + "loss": 0.8617212176322937, + "step": 885 + }, + { + "epoch": 0.20414746543778803, + "grad_norm": 0.5415263717139983, + "learning_rate": 1.985274589088934e-06, + "loss": 0.9383209943771362, + "step": 886 + }, + { + "epoch": 0.20437788018433178, + "grad_norm": 0.48094986017269503, + "learning_rate": 1.985209377166208e-06, + "loss": 0.7217687368392944, + "step": 887 + }, + { + "epoch": 0.20460829493087557, + "grad_norm": 0.612593081169746, + "learning_rate": 1.9851440222416446e-06, + "loss": 1.0717028379440308, + "step": 888 + }, + { + "epoch": 0.20483870967741935, + "grad_norm": 0.6063882651782059, + "learning_rate": 1.9850785243247303e-06, + "loss": 1.0137064456939697, + "step": 889 + }, + { + "epoch": 0.20506912442396313, + "grad_norm": 0.5244411173844509, + "learning_rate": 1.985012883424973e-06, + "loss": 0.8569058179855347, + "step": 890 + }, + { + "epoch": 0.20529953917050692, + "grad_norm": 0.6524290996376207, + "learning_rate": 1.9849470995518993e-06, + "loss": 0.9398901462554932, + "step": 891 + }, + { + "epoch": 0.2055299539170507, + "grad_norm": 0.3752296846015947, + "learning_rate": 1.9848811727150577e-06, + "loss": 0.731800377368927, + "step": 892 + }, + { + "epoch": 0.20576036866359446, + "grad_norm": 0.5142990565199794, + "learning_rate": 1.984815102924018e-06, + "loss": 0.8543055653572083, + "step": 893 + }, + { + "epoch": 0.20599078341013824, + "grad_norm": 0.5278314343821748, + "learning_rate": 1.98474889018837e-06, + "loss": 0.9112114906311035, + "step": 894 + }, + { + "epoch": 0.20622119815668202, + "grad_norm": 0.50708997202126, + "learning_rate": 1.984682534517724e-06, + "loss": 0.8272690773010254, + "step": 895 + }, + { + "epoch": 0.2064516129032258, + "grad_norm": 0.5912295968473946, + "learning_rate": 1.984616035921712e-06, + "loss": 0.9680918455123901, + "step": 896 + }, + { + "epoch": 0.2066820276497696, + "grad_norm": 0.6089139321115737, + "learning_rate": 1.984549394409985e-06, + "loss": 0.815123438835144, + "step": 897 + }, + { + "epoch": 0.20691244239631337, + "grad_norm": 0.4952276433479721, + "learning_rate": 1.984482609992218e-06, + "loss": 0.8035521507263184, + "step": 898 + }, + { + "epoch": 0.20714285714285716, + "grad_norm": 0.548354244530079, + "learning_rate": 1.9844156826781027e-06, + "loss": 0.9000132083892822, + "step": 899 + }, + { + "epoch": 0.2073732718894009, + "grad_norm": 0.6652515011666116, + "learning_rate": 1.9843486124773543e-06, + "loss": 1.06328547000885, + "step": 900 + }, + { + "epoch": 0.2076036866359447, + "grad_norm": 0.4596762245312169, + "learning_rate": 1.9842813993997083e-06, + "loss": 0.9028425216674805, + "step": 901 + }, + { + "epoch": 0.20783410138248848, + "grad_norm": 0.5779573613376965, + "learning_rate": 1.9842140434549196e-06, + "loss": 0.7786350250244141, + "step": 902 + }, + { + "epoch": 0.20806451612903226, + "grad_norm": 0.5102795361356062, + "learning_rate": 1.9841465446527656e-06, + "loss": 0.8041539788246155, + "step": 903 + }, + { + "epoch": 0.20829493087557605, + "grad_norm": 0.4348300351835264, + "learning_rate": 1.9840789030030434e-06, + "loss": 0.8380184173583984, + "step": 904 + }, + { + "epoch": 0.20852534562211983, + "grad_norm": 0.7151525379978475, + "learning_rate": 1.984011118515572e-06, + "loss": 0.8191432952880859, + "step": 905 + }, + { + "epoch": 0.20875576036866358, + "grad_norm": 0.5006646807997585, + "learning_rate": 1.9839431912001885e-06, + "loss": 0.8236384391784668, + "step": 906 + }, + { + "epoch": 0.20898617511520737, + "grad_norm": 0.4959155947407375, + "learning_rate": 1.9838751210667534e-06, + "loss": 0.8218076825141907, + "step": 907 + }, + { + "epoch": 0.20921658986175115, + "grad_norm": 0.5127899266702147, + "learning_rate": 1.983806908125147e-06, + "loss": 0.9140353202819824, + "step": 908 + }, + { + "epoch": 0.20944700460829493, + "grad_norm": 0.5063732794644019, + "learning_rate": 1.9837385523852706e-06, + "loss": 0.9179826974868774, + "step": 909 + }, + { + "epoch": 0.20967741935483872, + "grad_norm": 0.5385574519868781, + "learning_rate": 1.9836700538570456e-06, + "loss": 0.8888909816741943, + "step": 910 + }, + { + "epoch": 0.2099078341013825, + "grad_norm": 0.5208969379705799, + "learning_rate": 1.9836014125504143e-06, + "loss": 0.8951253890991211, + "step": 911 + }, + { + "epoch": 0.21013824884792626, + "grad_norm": 0.6093988535410455, + "learning_rate": 1.98353262847534e-06, + "loss": 1.084958553314209, + "step": 912 + }, + { + "epoch": 0.21036866359447004, + "grad_norm": 0.5061127496745415, + "learning_rate": 1.983463701641807e-06, + "loss": 0.8590713739395142, + "step": 913 + }, + { + "epoch": 0.21059907834101382, + "grad_norm": 0.6396228440899432, + "learning_rate": 1.9833946320598195e-06, + "loss": 1.0393706560134888, + "step": 914 + }, + { + "epoch": 0.2108294930875576, + "grad_norm": 0.49567487165870866, + "learning_rate": 1.983325419739403e-06, + "loss": 0.9403085708618164, + "step": 915 + }, + { + "epoch": 0.2110599078341014, + "grad_norm": 0.49912224081019996, + "learning_rate": 1.9832560646906038e-06, + "loss": 0.8431342244148254, + "step": 916 + }, + { + "epoch": 0.21129032258064517, + "grad_norm": 0.5558843704958377, + "learning_rate": 1.9831865669234884e-06, + "loss": 0.9024044871330261, + "step": 917 + }, + { + "epoch": 0.21152073732718893, + "grad_norm": 0.44775113902692637, + "learning_rate": 1.9831169264481443e-06, + "loss": 0.747347354888916, + "step": 918 + }, + { + "epoch": 0.2117511520737327, + "grad_norm": 0.46715914917156914, + "learning_rate": 1.9830471432746796e-06, + "loss": 0.8266197443008423, + "step": 919 + }, + { + "epoch": 0.2119815668202765, + "grad_norm": 0.5566270603086758, + "learning_rate": 1.9829772174132235e-06, + "loss": 0.8633416295051575, + "step": 920 + }, + { + "epoch": 0.21221198156682028, + "grad_norm": 0.5228096908540074, + "learning_rate": 1.9829071488739256e-06, + "loss": 1.0290095806121826, + "step": 921 + }, + { + "epoch": 0.21244239631336406, + "grad_norm": 0.667274912811163, + "learning_rate": 1.9828369376669566e-06, + "loss": 0.8193448781967163, + "step": 922 + }, + { + "epoch": 0.21267281105990785, + "grad_norm": 0.5677549533509479, + "learning_rate": 1.982766583802507e-06, + "loss": 0.8828415870666504, + "step": 923 + }, + { + "epoch": 0.2129032258064516, + "grad_norm": 0.597806988660978, + "learning_rate": 1.9826960872907885e-06, + "loss": 0.8806191682815552, + "step": 924 + }, + { + "epoch": 0.21313364055299538, + "grad_norm": 0.40902701240404726, + "learning_rate": 1.982625448142034e-06, + "loss": 0.8441533446311951, + "step": 925 + }, + { + "epoch": 0.21336405529953917, + "grad_norm": 0.5142754504345473, + "learning_rate": 1.9825546663664963e-06, + "loss": 0.9084080457687378, + "step": 926 + }, + { + "epoch": 0.21359447004608295, + "grad_norm": 0.7318607240255686, + "learning_rate": 1.98248374197445e-06, + "loss": 0.9005601406097412, + "step": 927 + }, + { + "epoch": 0.21382488479262673, + "grad_norm": 0.48930991442842664, + "learning_rate": 1.9824126749761893e-06, + "loss": 1.0415414571762085, + "step": 928 + }, + { + "epoch": 0.21405529953917052, + "grad_norm": 0.4380456409582823, + "learning_rate": 1.982341465382029e-06, + "loss": 0.8130594491958618, + "step": 929 + }, + { + "epoch": 0.21428571428571427, + "grad_norm": 0.4623167832467728, + "learning_rate": 1.9822701132023053e-06, + "loss": 0.9178205728530884, + "step": 930 + }, + { + "epoch": 0.21451612903225806, + "grad_norm": 0.5894382821211327, + "learning_rate": 1.9821986184473754e-06, + "loss": 0.9927947521209717, + "step": 931 + }, + { + "epoch": 0.21474654377880184, + "grad_norm": 0.5621440238225328, + "learning_rate": 1.982126981127616e-06, + "loss": 0.9172670841217041, + "step": 932 + }, + { + "epoch": 0.21497695852534562, + "grad_norm": 0.5805773191302366, + "learning_rate": 1.9820552012534255e-06, + "loss": 0.9513058066368103, + "step": 933 + }, + { + "epoch": 0.2152073732718894, + "grad_norm": 0.6596090379041671, + "learning_rate": 1.9819832788352227e-06, + "loss": 1.014827013015747, + "step": 934 + }, + { + "epoch": 0.2154377880184332, + "grad_norm": 0.5483468550441934, + "learning_rate": 1.9819112138834473e-06, + "loss": 1.0225746631622314, + "step": 935 + }, + { + "epoch": 0.21566820276497695, + "grad_norm": 0.46659867801168237, + "learning_rate": 1.9818390064085584e-06, + "loss": 0.8804227113723755, + "step": 936 + }, + { + "epoch": 0.21589861751152073, + "grad_norm": 0.42738644934381204, + "learning_rate": 1.9817666564210376e-06, + "loss": 0.7215760350227356, + "step": 937 + }, + { + "epoch": 0.2161290322580645, + "grad_norm": 0.6620668522422565, + "learning_rate": 1.981694163931387e-06, + "loss": 0.9978986978530884, + "step": 938 + }, + { + "epoch": 0.2163594470046083, + "grad_norm": 0.5846107454293807, + "learning_rate": 1.981621528950128e-06, + "loss": 0.8646233081817627, + "step": 939 + }, + { + "epoch": 0.21658986175115208, + "grad_norm": 0.44150430663795637, + "learning_rate": 1.981548751487803e-06, + "loss": 0.9619132876396179, + "step": 940 + }, + { + "epoch": 0.21682027649769586, + "grad_norm": 0.543839377462045, + "learning_rate": 1.981475831554976e-06, + "loss": 0.9209504127502441, + "step": 941 + }, + { + "epoch": 0.21705069124423962, + "grad_norm": 0.563351483363654, + "learning_rate": 1.9814027691622318e-06, + "loss": 0.7629299163818359, + "step": 942 + }, + { + "epoch": 0.2172811059907834, + "grad_norm": 0.4885334834965844, + "learning_rate": 1.9813295643201747e-06, + "loss": 0.8702583312988281, + "step": 943 + }, + { + "epoch": 0.21751152073732719, + "grad_norm": 0.5579102568918498, + "learning_rate": 1.9812562170394305e-06, + "loss": 0.9571657180786133, + "step": 944 + }, + { + "epoch": 0.21774193548387097, + "grad_norm": 0.43227127189367615, + "learning_rate": 1.9811827273306456e-06, + "loss": 0.7271617650985718, + "step": 945 + }, + { + "epoch": 0.21797235023041475, + "grad_norm": 0.46137899963900864, + "learning_rate": 1.9811090952044865e-06, + "loss": 0.8189597725868225, + "step": 946 + }, + { + "epoch": 0.21820276497695854, + "grad_norm": 0.49142212284435566, + "learning_rate": 1.981035320671641e-06, + "loss": 0.7933987379074097, + "step": 947 + }, + { + "epoch": 0.2184331797235023, + "grad_norm": 0.48207328184354004, + "learning_rate": 1.9809614037428174e-06, + "loss": 0.9687645435333252, + "step": 948 + }, + { + "epoch": 0.21866359447004607, + "grad_norm": 0.5647695490676888, + "learning_rate": 1.980887344428745e-06, + "loss": 0.8293745517730713, + "step": 949 + }, + { + "epoch": 0.21889400921658986, + "grad_norm": 0.6489579503887147, + "learning_rate": 1.9808131427401727e-06, + "loss": 1.0447471141815186, + "step": 950 + }, + { + "epoch": 0.21912442396313364, + "grad_norm": 0.48010625791746325, + "learning_rate": 1.9807387986878715e-06, + "loss": 0.8916672468185425, + "step": 951 + }, + { + "epoch": 0.21935483870967742, + "grad_norm": 0.5436399520986829, + "learning_rate": 1.980664312282632e-06, + "loss": 0.8380981683731079, + "step": 952 + }, + { + "epoch": 0.2195852534562212, + "grad_norm": 0.4634469099281989, + "learning_rate": 1.9805896835352656e-06, + "loss": 0.887790322303772, + "step": 953 + }, + { + "epoch": 0.21981566820276496, + "grad_norm": 0.5184548533508342, + "learning_rate": 1.9805149124566048e-06, + "loss": 0.8353140950202942, + "step": 954 + }, + { + "epoch": 0.22004608294930875, + "grad_norm": 0.7177333773715296, + "learning_rate": 1.9804399990575026e-06, + "loss": 1.0337531566619873, + "step": 955 + }, + { + "epoch": 0.22027649769585253, + "grad_norm": 0.4262367777660272, + "learning_rate": 1.9803649433488324e-06, + "loss": 0.8845529556274414, + "step": 956 + }, + { + "epoch": 0.2205069124423963, + "grad_norm": 0.4271901286679727, + "learning_rate": 1.9802897453414884e-06, + "loss": 0.7408445477485657, + "step": 957 + }, + { + "epoch": 0.2207373271889401, + "grad_norm": 0.5478873632644168, + "learning_rate": 1.980214405046386e-06, + "loss": 0.873178243637085, + "step": 958 + }, + { + "epoch": 0.22096774193548388, + "grad_norm": 0.556535747180833, + "learning_rate": 1.98013892247446e-06, + "loss": 1.0207639932632446, + "step": 959 + }, + { + "epoch": 0.22119815668202766, + "grad_norm": 0.5890989419509002, + "learning_rate": 1.980063297636667e-06, + "loss": 0.8626997470855713, + "step": 960 + }, + { + "epoch": 0.22142857142857142, + "grad_norm": 0.5912616927968722, + "learning_rate": 1.9799875305439836e-06, + "loss": 0.8961347341537476, + "step": 961 + }, + { + "epoch": 0.2216589861751152, + "grad_norm": 0.495639914718092, + "learning_rate": 1.9799116212074075e-06, + "loss": 0.8115944862365723, + "step": 962 + }, + { + "epoch": 0.22188940092165899, + "grad_norm": 0.5281413221179645, + "learning_rate": 1.979835569637957e-06, + "loss": 0.8274029493331909, + "step": 963 + }, + { + "epoch": 0.22211981566820277, + "grad_norm": 0.5782364794204825, + "learning_rate": 1.9797593758466706e-06, + "loss": 1.020345687866211, + "step": 964 + }, + { + "epoch": 0.22235023041474655, + "grad_norm": 0.586333023609623, + "learning_rate": 1.979683039844608e-06, + "loss": 0.8164723515510559, + "step": 965 + }, + { + "epoch": 0.22258064516129034, + "grad_norm": 0.48956655235723145, + "learning_rate": 1.979606561642849e-06, + "loss": 0.832849383354187, + "step": 966 + }, + { + "epoch": 0.2228110599078341, + "grad_norm": 0.5810232623043905, + "learning_rate": 1.9795299412524945e-06, + "loss": 0.9765876531600952, + "step": 967 + }, + { + "epoch": 0.22304147465437787, + "grad_norm": 0.5610292572060406, + "learning_rate": 1.9794531786846657e-06, + "loss": 0.9280411005020142, + "step": 968 + }, + { + "epoch": 0.22327188940092166, + "grad_norm": 0.6528516733941818, + "learning_rate": 1.9793762739505042e-06, + "loss": 1.122058629989624, + "step": 969 + }, + { + "epoch": 0.22350230414746544, + "grad_norm": 0.4582570301724996, + "learning_rate": 1.9792992270611737e-06, + "loss": 0.824627161026001, + "step": 970 + }, + { + "epoch": 0.22373271889400922, + "grad_norm": 0.750391550156154, + "learning_rate": 1.9792220380278565e-06, + "loss": 1.0583840608596802, + "step": 971 + }, + { + "epoch": 0.223963133640553, + "grad_norm": 0.5277817422831291, + "learning_rate": 1.979144706861757e-06, + "loss": 1.053803563117981, + "step": 972 + }, + { + "epoch": 0.22419354838709676, + "grad_norm": 0.5197675200798639, + "learning_rate": 1.9790672335740993e-06, + "loss": 0.8572183847427368, + "step": 973 + }, + { + "epoch": 0.22442396313364055, + "grad_norm": 0.5956201422774761, + "learning_rate": 1.978989618176129e-06, + "loss": 0.7955416440963745, + "step": 974 + }, + { + "epoch": 0.22465437788018433, + "grad_norm": 0.6931203377433601, + "learning_rate": 1.9789118606791113e-06, + "loss": 0.9455063343048096, + "step": 975 + }, + { + "epoch": 0.2248847926267281, + "grad_norm": 0.5553738972507489, + "learning_rate": 1.978833961094333e-06, + "loss": 0.788895845413208, + "step": 976 + }, + { + "epoch": 0.2251152073732719, + "grad_norm": 0.4854852275390097, + "learning_rate": 1.9787559194331014e-06, + "loss": 0.8344719409942627, + "step": 977 + }, + { + "epoch": 0.22534562211981568, + "grad_norm": 0.5098723288351352, + "learning_rate": 1.9786777357067436e-06, + "loss": 0.85140061378479, + "step": 978 + }, + { + "epoch": 0.22557603686635944, + "grad_norm": 0.43945689098482754, + "learning_rate": 1.978599409926608e-06, + "loss": 0.8511399030685425, + "step": 979 + }, + { + "epoch": 0.22580645161290322, + "grad_norm": 0.4893125980217, + "learning_rate": 1.9785209421040636e-06, + "loss": 0.9243351221084595, + "step": 980 + }, + { + "epoch": 0.226036866359447, + "grad_norm": 0.5349074342918002, + "learning_rate": 1.9784423322504996e-06, + "loss": 0.9043580293655396, + "step": 981 + }, + { + "epoch": 0.2262672811059908, + "grad_norm": 0.654146848198394, + "learning_rate": 1.978363580377327e-06, + "loss": 0.854049563407898, + "step": 982 + }, + { + "epoch": 0.22649769585253457, + "grad_norm": 0.43507484708504635, + "learning_rate": 1.9782846864959754e-06, + "loss": 0.7785296440124512, + "step": 983 + }, + { + "epoch": 0.22672811059907835, + "grad_norm": 0.5830354059161934, + "learning_rate": 1.9782056506178965e-06, + "loss": 0.8464720845222473, + "step": 984 + }, + { + "epoch": 0.2269585253456221, + "grad_norm": 0.5249975809892665, + "learning_rate": 1.9781264727545624e-06, + "loss": 0.8519179821014404, + "step": 985 + }, + { + "epoch": 0.2271889400921659, + "grad_norm": 0.6176158235785483, + "learning_rate": 1.978047152917466e-06, + "loss": 0.956415057182312, + "step": 986 + }, + { + "epoch": 0.22741935483870968, + "grad_norm": 0.5046722242039021, + "learning_rate": 1.97796769111812e-06, + "loss": 1.028620719909668, + "step": 987 + }, + { + "epoch": 0.22764976958525346, + "grad_norm": 0.4889451789926323, + "learning_rate": 1.9778880873680585e-06, + "loss": 0.8707184195518494, + "step": 988 + }, + { + "epoch": 0.22788018433179724, + "grad_norm": 0.5212071576326044, + "learning_rate": 1.9778083416788355e-06, + "loss": 0.9842795729637146, + "step": 989 + }, + { + "epoch": 0.22811059907834103, + "grad_norm": 0.5963522406410062, + "learning_rate": 1.977728454062026e-06, + "loss": 0.8827522993087769, + "step": 990 + }, + { + "epoch": 0.22834101382488478, + "grad_norm": 0.5285989804764033, + "learning_rate": 1.9776484245292256e-06, + "loss": 0.8608568906784058, + "step": 991 + }, + { + "epoch": 0.22857142857142856, + "grad_norm": 0.7428648265675979, + "learning_rate": 1.977568253092051e-06, + "loss": 0.8512595891952515, + "step": 992 + }, + { + "epoch": 0.22880184331797235, + "grad_norm": 0.520235896024025, + "learning_rate": 1.9774879397621383e-06, + "loss": 0.7335344552993774, + "step": 993 + }, + { + "epoch": 0.22903225806451613, + "grad_norm": 0.6711607827981731, + "learning_rate": 1.9774074845511457e-06, + "loss": 1.0301114320755005, + "step": 994 + }, + { + "epoch": 0.22926267281105991, + "grad_norm": 0.515409965463074, + "learning_rate": 1.97732688747075e-06, + "loss": 0.9011565446853638, + "step": 995 + }, + { + "epoch": 0.2294930875576037, + "grad_norm": 0.5657170632178228, + "learning_rate": 1.9772461485326507e-06, + "loss": 0.8644282221794128, + "step": 996 + }, + { + "epoch": 0.22972350230414745, + "grad_norm": 0.49795498598042737, + "learning_rate": 1.9771652677485664e-06, + "loss": 0.8107467889785767, + "step": 997 + }, + { + "epoch": 0.22995391705069124, + "grad_norm": 0.5832229133316258, + "learning_rate": 1.9770842451302373e-06, + "loss": 1.0090508460998535, + "step": 998 + }, + { + "epoch": 0.23018433179723502, + "grad_norm": 0.4910768822506593, + "learning_rate": 1.977003080689424e-06, + "loss": 0.8153292536735535, + "step": 999 + }, + { + "epoch": 0.2304147465437788, + "grad_norm": 0.6502643477323704, + "learning_rate": 1.976921774437906e-06, + "loss": 0.8446916341781616, + "step": 1000 + }, + { + "epoch": 0.2306451612903226, + "grad_norm": 0.5179047651030808, + "learning_rate": 1.9768403263874865e-06, + "loss": 0.759350597858429, + "step": 1001 + }, + { + "epoch": 0.23087557603686637, + "grad_norm": 0.5414654559095757, + "learning_rate": 1.9767587365499862e-06, + "loss": 0.9181695580482483, + "step": 1002 + }, + { + "epoch": 0.23110599078341013, + "grad_norm": 0.4755050115257823, + "learning_rate": 1.976677004937249e-06, + "loss": 0.8450978994369507, + "step": 1003 + }, + { + "epoch": 0.2313364055299539, + "grad_norm": 0.5616575268963485, + "learning_rate": 1.9765951315611365e-06, + "loss": 0.775252640247345, + "step": 1004 + }, + { + "epoch": 0.2315668202764977, + "grad_norm": 0.5248180263396327, + "learning_rate": 1.976513116433534e-06, + "loss": 0.8682440519332886, + "step": 1005 + }, + { + "epoch": 0.23179723502304148, + "grad_norm": 0.6093284414229693, + "learning_rate": 1.9764309595663457e-06, + "loss": 1.0701451301574707, + "step": 1006 + }, + { + "epoch": 0.23202764976958526, + "grad_norm": 0.5747684398408948, + "learning_rate": 1.976348660971496e-06, + "loss": 0.9381946921348572, + "step": 1007 + }, + { + "epoch": 0.23225806451612904, + "grad_norm": 0.5225356801303237, + "learning_rate": 1.976266220660931e-06, + "loss": 0.7836539149284363, + "step": 1008 + }, + { + "epoch": 0.2324884792626728, + "grad_norm": 0.5379097818020191, + "learning_rate": 1.9761836386466156e-06, + "loss": 0.9271948337554932, + "step": 1009 + }, + { + "epoch": 0.23271889400921658, + "grad_norm": 0.514797473753123, + "learning_rate": 1.976100914940538e-06, + "loss": 0.8268035650253296, + "step": 1010 + }, + { + "epoch": 0.23294930875576036, + "grad_norm": 0.5105764513310544, + "learning_rate": 1.976018049554705e-06, + "loss": 0.8266786336898804, + "step": 1011 + }, + { + "epoch": 0.23317972350230415, + "grad_norm": 0.6250953922330988, + "learning_rate": 1.9759350425011435e-06, + "loss": 0.9437457323074341, + "step": 1012 + }, + { + "epoch": 0.23341013824884793, + "grad_norm": 0.5629533372281755, + "learning_rate": 1.9758518937919033e-06, + "loss": 0.9078803062438965, + "step": 1013 + }, + { + "epoch": 0.23364055299539171, + "grad_norm": 0.5994095472581402, + "learning_rate": 1.975768603439052e-06, + "loss": 0.9873687624931335, + "step": 1014 + }, + { + "epoch": 0.23387096774193547, + "grad_norm": 0.5010269853722422, + "learning_rate": 1.97568517145468e-06, + "loss": 0.9450196027755737, + "step": 1015 + }, + { + "epoch": 0.23410138248847925, + "grad_norm": 0.5173338079683222, + "learning_rate": 1.975601597850897e-06, + "loss": 0.8804495334625244, + "step": 1016 + }, + { + "epoch": 0.23433179723502304, + "grad_norm": 0.5286639294307074, + "learning_rate": 1.9755178826398333e-06, + "loss": 0.9646104574203491, + "step": 1017 + }, + { + "epoch": 0.23456221198156682, + "grad_norm": 0.5917923655178416, + "learning_rate": 1.9754340258336403e-06, + "loss": 0.9829385280609131, + "step": 1018 + }, + { + "epoch": 0.2347926267281106, + "grad_norm": 0.5022802882731887, + "learning_rate": 1.97535002744449e-06, + "loss": 0.8433707356452942, + "step": 1019 + }, + { + "epoch": 0.2350230414746544, + "grad_norm": 0.5984717862988072, + "learning_rate": 1.9752658874845744e-06, + "loss": 0.9892767071723938, + "step": 1020 + }, + { + "epoch": 0.23525345622119814, + "grad_norm": 0.5038568694461213, + "learning_rate": 1.9751816059661065e-06, + "loss": 0.8367536664009094, + "step": 1021 + }, + { + "epoch": 0.23548387096774193, + "grad_norm": 0.6009503951092086, + "learning_rate": 1.9750971829013194e-06, + "loss": 0.8947298526763916, + "step": 1022 + }, + { + "epoch": 0.2357142857142857, + "grad_norm": 0.4955473883987944, + "learning_rate": 1.975012618302467e-06, + "loss": 0.9218910336494446, + "step": 1023 + }, + { + "epoch": 0.2359447004608295, + "grad_norm": 0.46527028147066757, + "learning_rate": 1.9749279121818236e-06, + "loss": 0.8744943141937256, + "step": 1024 + }, + { + "epoch": 0.23617511520737328, + "grad_norm": 0.5457797851350515, + "learning_rate": 1.9748430645516845e-06, + "loss": 0.9023007154464722, + "step": 1025 + }, + { + "epoch": 0.23640552995391706, + "grad_norm": 0.5361296427556177, + "learning_rate": 1.974758075424365e-06, + "loss": 0.8475106954574585, + "step": 1026 + }, + { + "epoch": 0.23663594470046084, + "grad_norm": 0.5535275060374267, + "learning_rate": 1.9746729448122013e-06, + "loss": 0.8594635725021362, + "step": 1027 + }, + { + "epoch": 0.2368663594470046, + "grad_norm": 0.6574105474773485, + "learning_rate": 1.97458767272755e-06, + "loss": 0.9601756930351257, + "step": 1028 + }, + { + "epoch": 0.23709677419354838, + "grad_norm": 0.5454698959338334, + "learning_rate": 1.9745022591827886e-06, + "loss": 0.9281105399131775, + "step": 1029 + }, + { + "epoch": 0.23732718894009217, + "grad_norm": 0.4631930883062957, + "learning_rate": 1.9744167041903136e-06, + "loss": 0.8240020275115967, + "step": 1030 + }, + { + "epoch": 0.23755760368663595, + "grad_norm": 0.5116113956014486, + "learning_rate": 1.9743310077625446e-06, + "loss": 0.807030200958252, + "step": 1031 + }, + { + "epoch": 0.23778801843317973, + "grad_norm": 0.5399356518827937, + "learning_rate": 1.9742451699119194e-06, + "loss": 0.8044267892837524, + "step": 1032 + }, + { + "epoch": 0.23801843317972352, + "grad_norm": 0.5022311335968053, + "learning_rate": 1.9741591906508975e-06, + "loss": 0.9198760390281677, + "step": 1033 + }, + { + "epoch": 0.23824884792626727, + "grad_norm": 0.6382005412114766, + "learning_rate": 1.974073069991959e-06, + "loss": 0.7951973676681519, + "step": 1034 + }, + { + "epoch": 0.23847926267281105, + "grad_norm": 0.5488288386867366, + "learning_rate": 1.9739868079476035e-06, + "loss": 0.8366928100585938, + "step": 1035 + }, + { + "epoch": 0.23870967741935484, + "grad_norm": 0.5327938531465227, + "learning_rate": 1.9739004045303524e-06, + "loss": 0.9644484519958496, + "step": 1036 + }, + { + "epoch": 0.23894009216589862, + "grad_norm": 0.47502000880743445, + "learning_rate": 1.9738138597527464e-06, + "loss": 0.8332105875015259, + "step": 1037 + }, + { + "epoch": 0.2391705069124424, + "grad_norm": 0.4812648524584188, + "learning_rate": 1.9737271736273482e-06, + "loss": 0.8923197388648987, + "step": 1038 + }, + { + "epoch": 0.2394009216589862, + "grad_norm": 0.48693803999160823, + "learning_rate": 1.97364034616674e-06, + "loss": 0.861129879951477, + "step": 1039 + }, + { + "epoch": 0.23963133640552994, + "grad_norm": 0.49858003070315154, + "learning_rate": 1.973553377383524e-06, + "loss": 0.8042281270027161, + "step": 1040 + }, + { + "epoch": 0.23986175115207373, + "grad_norm": 0.603264823916037, + "learning_rate": 1.9734662672903247e-06, + "loss": 1.0315792560577393, + "step": 1041 + }, + { + "epoch": 0.2400921658986175, + "grad_norm": 0.524902457294173, + "learning_rate": 1.973379015899785e-06, + "loss": 0.8165839910507202, + "step": 1042 + }, + { + "epoch": 0.2403225806451613, + "grad_norm": 0.5868579839473654, + "learning_rate": 1.97329162322457e-06, + "loss": 1.0002663135528564, + "step": 1043 + }, + { + "epoch": 0.24055299539170508, + "grad_norm": 0.579630177733921, + "learning_rate": 1.9732040892773642e-06, + "loss": 0.9340938925743103, + "step": 1044 + }, + { + "epoch": 0.24078341013824886, + "grad_norm": 0.40394518210500746, + "learning_rate": 1.973116414070873e-06, + "loss": 0.7457709312438965, + "step": 1045 + }, + { + "epoch": 0.24101382488479262, + "grad_norm": 0.5468265646556031, + "learning_rate": 1.9730285976178227e-06, + "loss": 0.846583366394043, + "step": 1046 + }, + { + "epoch": 0.2412442396313364, + "grad_norm": 0.597351972991794, + "learning_rate": 1.9729406399309594e-06, + "loss": 0.9701514840126038, + "step": 1047 + }, + { + "epoch": 0.24147465437788018, + "grad_norm": 0.430042606733588, + "learning_rate": 1.9728525410230506e-06, + "loss": 0.7943054437637329, + "step": 1048 + }, + { + "epoch": 0.24170506912442397, + "grad_norm": 0.690774172762037, + "learning_rate": 1.972764300906883e-06, + "loss": 0.8885551691055298, + "step": 1049 + }, + { + "epoch": 0.24193548387096775, + "grad_norm": 0.522936671850185, + "learning_rate": 1.9726759195952653e-06, + "loss": 0.8258899450302124, + "step": 1050 + }, + { + "epoch": 0.24216589861751153, + "grad_norm": 0.586622666679495, + "learning_rate": 1.9725873971010255e-06, + "loss": 1.0085303783416748, + "step": 1051 + }, + { + "epoch": 0.2423963133640553, + "grad_norm": 0.49596210148454095, + "learning_rate": 1.9724987334370124e-06, + "loss": 0.814777135848999, + "step": 1052 + }, + { + "epoch": 0.24262672811059907, + "grad_norm": 0.5592433145931486, + "learning_rate": 1.9724099286160953e-06, + "loss": 0.8328995704650879, + "step": 1053 + }, + { + "epoch": 0.24285714285714285, + "grad_norm": 0.5857793622474846, + "learning_rate": 1.9723209826511645e-06, + "loss": 0.8699138164520264, + "step": 1054 + }, + { + "epoch": 0.24308755760368664, + "grad_norm": 0.5678867062742812, + "learning_rate": 1.9722318955551303e-06, + "loss": 0.8298562169075012, + "step": 1055 + }, + { + "epoch": 0.24331797235023042, + "grad_norm": 0.5976489688453608, + "learning_rate": 1.9721426673409236e-06, + "loss": 0.9470195770263672, + "step": 1056 + }, + { + "epoch": 0.2435483870967742, + "grad_norm": 0.48875505327809854, + "learning_rate": 1.9720532980214955e-06, + "loss": 0.7733730673789978, + "step": 1057 + }, + { + "epoch": 0.24377880184331796, + "grad_norm": 0.46823524678841166, + "learning_rate": 1.9719637876098184e-06, + "loss": 0.7761770486831665, + "step": 1058 + }, + { + "epoch": 0.24400921658986174, + "grad_norm": 0.445725356281168, + "learning_rate": 1.971874136118884e-06, + "loss": 0.9270585775375366, + "step": 1059 + }, + { + "epoch": 0.24423963133640553, + "grad_norm": 0.42406381632115403, + "learning_rate": 1.971784343561705e-06, + "loss": 0.906977653503418, + "step": 1060 + }, + { + "epoch": 0.2444700460829493, + "grad_norm": 0.6412884076264423, + "learning_rate": 1.971694409951316e-06, + "loss": 0.9668625593185425, + "step": 1061 + }, + { + "epoch": 0.2447004608294931, + "grad_norm": 0.49415949875048953, + "learning_rate": 1.971604335300769e-06, + "loss": 0.8215349316596985, + "step": 1062 + }, + { + "epoch": 0.24493087557603688, + "grad_norm": 0.5322070043492434, + "learning_rate": 1.971514119623139e-06, + "loss": 0.8351551294326782, + "step": 1063 + }, + { + "epoch": 0.24516129032258063, + "grad_norm": 0.47999809865085763, + "learning_rate": 1.9714237629315206e-06, + "loss": 0.8778517246246338, + "step": 1064 + }, + { + "epoch": 0.24539170506912442, + "grad_norm": 0.5396014898113735, + "learning_rate": 1.9713332652390293e-06, + "loss": 0.9415761232376099, + "step": 1065 + }, + { + "epoch": 0.2456221198156682, + "grad_norm": 0.5420605598116663, + "learning_rate": 1.9712426265588e-06, + "loss": 0.9040292501449585, + "step": 1066 + }, + { + "epoch": 0.24585253456221198, + "grad_norm": 0.6005715295467339, + "learning_rate": 1.9711518469039894e-06, + "loss": 0.8886675834655762, + "step": 1067 + }, + { + "epoch": 0.24608294930875577, + "grad_norm": 0.6273079636247865, + "learning_rate": 1.971060926287774e-06, + "loss": 0.8439750671386719, + "step": 1068 + }, + { + "epoch": 0.24631336405529955, + "grad_norm": 0.5872743245126388, + "learning_rate": 1.9709698647233507e-06, + "loss": 0.8698763251304626, + "step": 1069 + }, + { + "epoch": 0.2465437788018433, + "grad_norm": 0.5858508124188764, + "learning_rate": 1.970878662223937e-06, + "loss": 0.7866508364677429, + "step": 1070 + }, + { + "epoch": 0.2467741935483871, + "grad_norm": 0.46529709331014274, + "learning_rate": 1.97078731880277e-06, + "loss": 0.8652541637420654, + "step": 1071 + }, + { + "epoch": 0.24700460829493087, + "grad_norm": 0.4617144249036463, + "learning_rate": 1.97069583447311e-06, + "loss": 0.8614386320114136, + "step": 1072 + }, + { + "epoch": 0.24723502304147466, + "grad_norm": 0.5647954006429063, + "learning_rate": 1.970604209248234e-06, + "loss": 0.9367830753326416, + "step": 1073 + }, + { + "epoch": 0.24746543778801844, + "grad_norm": 0.5744177103855904, + "learning_rate": 1.9705124431414417e-06, + "loss": 0.8851934671401978, + "step": 1074 + }, + { + "epoch": 0.24769585253456222, + "grad_norm": 0.49563724633359013, + "learning_rate": 1.9704205361660534e-06, + "loss": 0.9619653224945068, + "step": 1075 + }, + { + "epoch": 0.24792626728110598, + "grad_norm": 0.5649060756387019, + "learning_rate": 1.9703284883354094e-06, + "loss": 0.8826392889022827, + "step": 1076 + }, + { + "epoch": 0.24815668202764976, + "grad_norm": 0.6563751938003036, + "learning_rate": 1.970236299662869e-06, + "loss": 0.9075444340705872, + "step": 1077 + }, + { + "epoch": 0.24838709677419354, + "grad_norm": 0.5796370649143662, + "learning_rate": 1.9701439701618147e-06, + "loss": 1.048058032989502, + "step": 1078 + }, + { + "epoch": 0.24861751152073733, + "grad_norm": 0.5313768074192232, + "learning_rate": 1.970051499845647e-06, + "loss": 0.8460798263549805, + "step": 1079 + }, + { + "epoch": 0.2488479262672811, + "grad_norm": 0.7193266180122563, + "learning_rate": 1.9699588887277886e-06, + "loss": 0.9410982131958008, + "step": 1080 + }, + { + "epoch": 0.2490783410138249, + "grad_norm": 0.5102129399153178, + "learning_rate": 1.9698661368216816e-06, + "loss": 0.8247401714324951, + "step": 1081 + }, + { + "epoch": 0.24930875576036865, + "grad_norm": 0.5269386839997043, + "learning_rate": 1.969773244140789e-06, + "loss": 0.8543484210968018, + "step": 1082 + }, + { + "epoch": 0.24953917050691243, + "grad_norm": 0.6681776129080308, + "learning_rate": 1.9696802106985933e-06, + "loss": 0.9339861273765564, + "step": 1083 + }, + { + "epoch": 0.24976958525345622, + "grad_norm": 0.6394378735221973, + "learning_rate": 1.969587036508599e-06, + "loss": 0.8268687725067139, + "step": 1084 + }, + { + "epoch": 0.25, + "grad_norm": 0.5565533707237263, + "learning_rate": 1.96949372158433e-06, + "loss": 0.9990735054016113, + "step": 1085 + }, + { + "epoch": 0.2502304147465438, + "grad_norm": 0.5875792221187977, + "learning_rate": 1.9694002659393305e-06, + "loss": 0.871169924736023, + "step": 1086 + }, + { + "epoch": 0.25046082949308757, + "grad_norm": 0.5066699305192991, + "learning_rate": 1.9693066695871657e-06, + "loss": 0.9275476932525635, + "step": 1087 + }, + { + "epoch": 0.25069124423963135, + "grad_norm": 0.5987932412868929, + "learning_rate": 1.969212932541421e-06, + "loss": 0.802006721496582, + "step": 1088 + }, + { + "epoch": 0.25092165898617513, + "grad_norm": 0.6594060142183631, + "learning_rate": 1.9691190548157023e-06, + "loss": 1.158774495124817, + "step": 1089 + }, + { + "epoch": 0.2511520737327189, + "grad_norm": 0.5926971423347241, + "learning_rate": 1.969025036423636e-06, + "loss": 0.8979278802871704, + "step": 1090 + }, + { + "epoch": 0.2513824884792627, + "grad_norm": 0.48149308442816224, + "learning_rate": 1.968930877378868e-06, + "loss": 0.9486579895019531, + "step": 1091 + }, + { + "epoch": 0.25161290322580643, + "grad_norm": 0.5203236583717573, + "learning_rate": 1.968836577695066e-06, + "loss": 0.8661590814590454, + "step": 1092 + }, + { + "epoch": 0.2518433179723502, + "grad_norm": 0.5636787742284843, + "learning_rate": 1.9687421373859173e-06, + "loss": 0.9224900007247925, + "step": 1093 + }, + { + "epoch": 0.252073732718894, + "grad_norm": 0.6117977186323622, + "learning_rate": 1.96864755646513e-06, + "loss": 0.9563734531402588, + "step": 1094 + }, + { + "epoch": 0.2523041474654378, + "grad_norm": 0.535175631127211, + "learning_rate": 1.968552834946432e-06, + "loss": 0.7457284927368164, + "step": 1095 + }, + { + "epoch": 0.25253456221198156, + "grad_norm": 0.5387959310508903, + "learning_rate": 1.9684579728435727e-06, + "loss": 0.8763077259063721, + "step": 1096 + }, + { + "epoch": 0.25276497695852534, + "grad_norm": 0.5765732282352442, + "learning_rate": 1.9683629701703203e-06, + "loss": 0.8476013541221619, + "step": 1097 + }, + { + "epoch": 0.25299539170506913, + "grad_norm": 0.6265041816963897, + "learning_rate": 1.9682678269404647e-06, + "loss": 0.9706464409828186, + "step": 1098 + }, + { + "epoch": 0.2532258064516129, + "grad_norm": 0.5592313042434921, + "learning_rate": 1.968172543167816e-06, + "loss": 0.9898370504379272, + "step": 1099 + }, + { + "epoch": 0.2534562211981567, + "grad_norm": 0.5273265970472166, + "learning_rate": 1.9680771188662043e-06, + "loss": 0.9073352813720703, + "step": 1100 + }, + { + "epoch": 0.2536866359447005, + "grad_norm": 0.5101975110861352, + "learning_rate": 1.9679815540494805e-06, + "loss": 0.698054850101471, + "step": 1101 + }, + { + "epoch": 0.25391705069124426, + "grad_norm": 0.5334723333803978, + "learning_rate": 1.967885848731515e-06, + "loss": 0.8755865097045898, + "step": 1102 + }, + { + "epoch": 0.25414746543778804, + "grad_norm": 0.7353231676630018, + "learning_rate": 1.9677900029262004e-06, + "loss": 0.8884447813034058, + "step": 1103 + }, + { + "epoch": 0.2543778801843318, + "grad_norm": 0.48855032311862734, + "learning_rate": 1.967694016647448e-06, + "loss": 0.738738477230072, + "step": 1104 + }, + { + "epoch": 0.25460829493087556, + "grad_norm": 0.5363150933196312, + "learning_rate": 1.96759788990919e-06, + "loss": 0.8024383783340454, + "step": 1105 + }, + { + "epoch": 0.25483870967741934, + "grad_norm": 0.703802110686274, + "learning_rate": 1.967501622725379e-06, + "loss": 0.8780910968780518, + "step": 1106 + }, + { + "epoch": 0.2550691244239631, + "grad_norm": 0.47799328608287317, + "learning_rate": 1.967405215109989e-06, + "loss": 0.8709204196929932, + "step": 1107 + }, + { + "epoch": 0.2552995391705069, + "grad_norm": 0.5771096865101828, + "learning_rate": 1.9673086670770122e-06, + "loss": 0.8838910460472107, + "step": 1108 + }, + { + "epoch": 0.2555299539170507, + "grad_norm": 0.6122299943883392, + "learning_rate": 1.967211978640463e-06, + "loss": 0.9310617446899414, + "step": 1109 + }, + { + "epoch": 0.2557603686635945, + "grad_norm": 0.5172180782022067, + "learning_rate": 1.9671151498143756e-06, + "loss": 0.8453254699707031, + "step": 1110 + }, + { + "epoch": 0.25599078341013826, + "grad_norm": 0.6724028308795985, + "learning_rate": 1.967018180612804e-06, + "loss": 1.0201973915100098, + "step": 1111 + }, + { + "epoch": 0.25622119815668204, + "grad_norm": 0.5304279166188671, + "learning_rate": 1.9669210710498242e-06, + "loss": 0.84140944480896, + "step": 1112 + }, + { + "epoch": 0.2564516129032258, + "grad_norm": 0.5850181467371437, + "learning_rate": 1.9668238211395308e-06, + "loss": 0.9012273550033569, + "step": 1113 + }, + { + "epoch": 0.2566820276497696, + "grad_norm": 0.5516270166899023, + "learning_rate": 1.9667264308960394e-06, + "loss": 0.820103645324707, + "step": 1114 + }, + { + "epoch": 0.2569124423963134, + "grad_norm": 0.7253674338479518, + "learning_rate": 1.9666289003334868e-06, + "loss": 1.0709048509597778, + "step": 1115 + }, + { + "epoch": 0.2571428571428571, + "grad_norm": 0.6606805333344365, + "learning_rate": 1.966531229466029e-06, + "loss": 0.9408602714538574, + "step": 1116 + }, + { + "epoch": 0.2573732718894009, + "grad_norm": 0.7074764796406602, + "learning_rate": 1.9664334183078425e-06, + "loss": 0.967316210269928, + "step": 1117 + }, + { + "epoch": 0.2576036866359447, + "grad_norm": 0.7069704403267734, + "learning_rate": 1.9663354668731248e-06, + "loss": 0.9483754634857178, + "step": 1118 + }, + { + "epoch": 0.25783410138248847, + "grad_norm": 0.7072881911304519, + "learning_rate": 1.966237375176093e-06, + "loss": 0.7978509664535522, + "step": 1119 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.5719987288484106, + "learning_rate": 1.9661391432309862e-06, + "loss": 0.8720531463623047, + "step": 1120 + }, + { + "epoch": 0.25829493087557603, + "grad_norm": 0.6673697559796071, + "learning_rate": 1.966040771052061e-06, + "loss": 0.7984024286270142, + "step": 1121 + }, + { + "epoch": 0.2585253456221198, + "grad_norm": 0.5693036626081565, + "learning_rate": 1.965942258653597e-06, + "loss": 0.9255385398864746, + "step": 1122 + }, + { + "epoch": 0.2587557603686636, + "grad_norm": 0.5886763980683305, + "learning_rate": 1.9658436060498927e-06, + "loss": 0.9028007984161377, + "step": 1123 + }, + { + "epoch": 0.2589861751152074, + "grad_norm": 0.5256574840125579, + "learning_rate": 1.9657448132552677e-06, + "loss": 0.8773014545440674, + "step": 1124 + }, + { + "epoch": 0.25921658986175117, + "grad_norm": 0.5356122505196939, + "learning_rate": 1.9656458802840617e-06, + "loss": 0.9280908107757568, + "step": 1125 + }, + { + "epoch": 0.25944700460829495, + "grad_norm": 0.6473213250874083, + "learning_rate": 1.9655468071506344e-06, + "loss": 0.820783793926239, + "step": 1126 + }, + { + "epoch": 0.25967741935483873, + "grad_norm": 0.490374992394704, + "learning_rate": 1.9654475938693663e-06, + "loss": 0.7832465171813965, + "step": 1127 + }, + { + "epoch": 0.25990783410138246, + "grad_norm": 0.6097626342555662, + "learning_rate": 1.965348240454658e-06, + "loss": 0.8824669122695923, + "step": 1128 + }, + { + "epoch": 0.26013824884792625, + "grad_norm": 0.5472888524636408, + "learning_rate": 1.9652487469209305e-06, + "loss": 0.8782131671905518, + "step": 1129 + }, + { + "epoch": 0.26036866359447003, + "grad_norm": 0.6689126051687625, + "learning_rate": 1.9651491132826255e-06, + "loss": 0.938920259475708, + "step": 1130 + }, + { + "epoch": 0.2605990783410138, + "grad_norm": 0.5811243675216263, + "learning_rate": 1.965049339554204e-06, + "loss": 0.8733320236206055, + "step": 1131 + }, + { + "epoch": 0.2608294930875576, + "grad_norm": 0.5773916722243296, + "learning_rate": 1.9649494257501485e-06, + "loss": 0.8688358664512634, + "step": 1132 + }, + { + "epoch": 0.2610599078341014, + "grad_norm": 0.5867794198483245, + "learning_rate": 1.9648493718849617e-06, + "loss": 0.9250427484512329, + "step": 1133 + }, + { + "epoch": 0.26129032258064516, + "grad_norm": 0.5093685293336041, + "learning_rate": 1.9647491779731655e-06, + "loss": 0.7890609502792358, + "step": 1134 + }, + { + "epoch": 0.26152073732718895, + "grad_norm": 0.5526465355704269, + "learning_rate": 1.964648844029303e-06, + "loss": 0.83612060546875, + "step": 1135 + }, + { + "epoch": 0.26175115207373273, + "grad_norm": 0.597714005790405, + "learning_rate": 1.9645483700679387e-06, + "loss": 0.7951240539550781, + "step": 1136 + }, + { + "epoch": 0.2619815668202765, + "grad_norm": 0.5785889079746135, + "learning_rate": 1.9644477561036546e-06, + "loss": 0.9746277332305908, + "step": 1137 + }, + { + "epoch": 0.2622119815668203, + "grad_norm": 0.6092572079482067, + "learning_rate": 1.9643470021510556e-06, + "loss": 0.856966495513916, + "step": 1138 + }, + { + "epoch": 0.2624423963133641, + "grad_norm": 0.5158468607686231, + "learning_rate": 1.9642461082247663e-06, + "loss": 0.7419042587280273, + "step": 1139 + }, + { + "epoch": 0.2626728110599078, + "grad_norm": 0.6141847224483623, + "learning_rate": 1.9641450743394304e-06, + "loss": 0.8868693709373474, + "step": 1140 + }, + { + "epoch": 0.2629032258064516, + "grad_norm": 0.6400145867633011, + "learning_rate": 1.9640439005097133e-06, + "loss": 1.0111520290374756, + "step": 1141 + }, + { + "epoch": 0.2631336405529954, + "grad_norm": 0.5946199662941717, + "learning_rate": 1.9639425867503006e-06, + "loss": 0.9379187226295471, + "step": 1142 + }, + { + "epoch": 0.26336405529953916, + "grad_norm": 0.6188285038344139, + "learning_rate": 1.9638411330758973e-06, + "loss": 0.8451071977615356, + "step": 1143 + }, + { + "epoch": 0.26359447004608294, + "grad_norm": 0.6988429276503174, + "learning_rate": 1.9637395395012295e-06, + "loss": 1.0407288074493408, + "step": 1144 + }, + { + "epoch": 0.2638248847926267, + "grad_norm": 0.7122851693009883, + "learning_rate": 1.9636378060410433e-06, + "loss": 0.9594388008117676, + "step": 1145 + }, + { + "epoch": 0.2640552995391705, + "grad_norm": 0.4400072369022715, + "learning_rate": 1.9635359327101057e-06, + "loss": 0.7940789461135864, + "step": 1146 + }, + { + "epoch": 0.2642857142857143, + "grad_norm": 0.6347840140846547, + "learning_rate": 1.9634339195232025e-06, + "loss": 0.9707269668579102, + "step": 1147 + }, + { + "epoch": 0.2645161290322581, + "grad_norm": 0.6349984514987448, + "learning_rate": 1.9633317664951417e-06, + "loss": 0.9554522037506104, + "step": 1148 + }, + { + "epoch": 0.26474654377880186, + "grad_norm": 0.7144693638673882, + "learning_rate": 1.9632294736407497e-06, + "loss": 1.009516716003418, + "step": 1149 + }, + { + "epoch": 0.26497695852534564, + "grad_norm": 0.5429306162333095, + "learning_rate": 1.9631270409748754e-06, + "loss": 0.8337735533714294, + "step": 1150 + }, + { + "epoch": 0.2652073732718894, + "grad_norm": 0.5901765838606909, + "learning_rate": 1.963024468512386e-06, + "loss": 0.9103367328643799, + "step": 1151 + }, + { + "epoch": 0.2654377880184332, + "grad_norm": 0.3703807183273661, + "learning_rate": 1.9629217562681694e-06, + "loss": 0.7258249521255493, + "step": 1152 + }, + { + "epoch": 0.26566820276497694, + "grad_norm": 0.6322578847379198, + "learning_rate": 1.962818904257135e-06, + "loss": 0.7696776390075684, + "step": 1153 + }, + { + "epoch": 0.2658986175115207, + "grad_norm": 0.5842074670437798, + "learning_rate": 1.962715912494211e-06, + "loss": 0.9027894139289856, + "step": 1154 + }, + { + "epoch": 0.2661290322580645, + "grad_norm": 0.6016444551454023, + "learning_rate": 1.962612780994347e-06, + "loss": 1.0412788391113281, + "step": 1155 + }, + { + "epoch": 0.2663594470046083, + "grad_norm": 0.5483158655152818, + "learning_rate": 1.962509509772512e-06, + "loss": 0.8656542897224426, + "step": 1156 + }, + { + "epoch": 0.26658986175115207, + "grad_norm": 0.56350579921959, + "learning_rate": 1.9624060988436964e-06, + "loss": 0.9541186094284058, + "step": 1157 + }, + { + "epoch": 0.26682027649769585, + "grad_norm": 0.6019903664727945, + "learning_rate": 1.962302548222909e-06, + "loss": 0.7684942483901978, + "step": 1158 + }, + { + "epoch": 0.26705069124423964, + "grad_norm": 0.5978642328134118, + "learning_rate": 1.962198857925181e-06, + "loss": 0.8934941291809082, + "step": 1159 + }, + { + "epoch": 0.2672811059907834, + "grad_norm": 0.8041491872239377, + "learning_rate": 1.962095027965562e-06, + "loss": 0.8674842715263367, + "step": 1160 + }, + { + "epoch": 0.2675115207373272, + "grad_norm": 0.5520577783269698, + "learning_rate": 1.9619910583591237e-06, + "loss": 0.8850778937339783, + "step": 1161 + }, + { + "epoch": 0.267741935483871, + "grad_norm": 0.5547632066870658, + "learning_rate": 1.961886949120957e-06, + "loss": 0.9140915870666504, + "step": 1162 + }, + { + "epoch": 0.26797235023041477, + "grad_norm": 0.5171975434439527, + "learning_rate": 1.9617827002661733e-06, + "loss": 0.7557287812232971, + "step": 1163 + }, + { + "epoch": 0.26820276497695855, + "grad_norm": 0.6409514019909783, + "learning_rate": 1.9616783118099032e-06, + "loss": 0.8780542612075806, + "step": 1164 + }, + { + "epoch": 0.2684331797235023, + "grad_norm": 0.5407478984703894, + "learning_rate": 1.9615737837672995e-06, + "loss": 0.8352043628692627, + "step": 1165 + }, + { + "epoch": 0.26866359447004606, + "grad_norm": 0.5628947650252879, + "learning_rate": 1.961469116153534e-06, + "loss": 0.8119357228279114, + "step": 1166 + }, + { + "epoch": 0.26889400921658985, + "grad_norm": 0.5744461460266088, + "learning_rate": 1.9613643089837992e-06, + "loss": 0.8953120708465576, + "step": 1167 + }, + { + "epoch": 0.26912442396313363, + "grad_norm": 0.5867925171054906, + "learning_rate": 1.9612593622733074e-06, + "loss": 0.9078162908554077, + "step": 1168 + }, + { + "epoch": 0.2693548387096774, + "grad_norm": 0.5358654275940312, + "learning_rate": 1.961154276037292e-06, + "loss": 0.9118859767913818, + "step": 1169 + }, + { + "epoch": 0.2695852534562212, + "grad_norm": 0.5501238198976731, + "learning_rate": 1.9610490502910056e-06, + "loss": 0.8456159234046936, + "step": 1170 + }, + { + "epoch": 0.269815668202765, + "grad_norm": 0.6291583788438779, + "learning_rate": 1.9609436850497222e-06, + "loss": 0.7860552072525024, + "step": 1171 + }, + { + "epoch": 0.27004608294930876, + "grad_norm": 0.5078912747038423, + "learning_rate": 1.9608381803287343e-06, + "loss": 0.8121567368507385, + "step": 1172 + }, + { + "epoch": 0.27027649769585255, + "grad_norm": 0.6271384929565738, + "learning_rate": 1.9607325361433574e-06, + "loss": 0.9212384819984436, + "step": 1173 + }, + { + "epoch": 0.27050691244239633, + "grad_norm": 0.5704107274797215, + "learning_rate": 1.960626752508924e-06, + "loss": 0.9528858661651611, + "step": 1174 + }, + { + "epoch": 0.2707373271889401, + "grad_norm": 0.5901390376692353, + "learning_rate": 1.9605208294407894e-06, + "loss": 0.8561227321624756, + "step": 1175 + }, + { + "epoch": 0.2709677419354839, + "grad_norm": 0.5308748660328867, + "learning_rate": 1.960414766954328e-06, + "loss": 0.9333669543266296, + "step": 1176 + }, + { + "epoch": 0.2711981566820276, + "grad_norm": 0.5146250417484006, + "learning_rate": 1.9603085650649345e-06, + "loss": 0.8879388570785522, + "step": 1177 + }, + { + "epoch": 0.2714285714285714, + "grad_norm": 0.6699060572110628, + "learning_rate": 1.9602022237880244e-06, + "loss": 1.0099214315414429, + "step": 1178 + }, + { + "epoch": 0.2716589861751152, + "grad_norm": 0.5456103597772948, + "learning_rate": 1.9600957431390324e-06, + "loss": 0.9341822862625122, + "step": 1179 + }, + { + "epoch": 0.271889400921659, + "grad_norm": 0.48145703185786454, + "learning_rate": 1.9599891231334144e-06, + "loss": 0.7616428136825562, + "step": 1180 + }, + { + "epoch": 0.27211981566820276, + "grad_norm": 0.4889684884403523, + "learning_rate": 1.959882363786646e-06, + "loss": 0.8270235061645508, + "step": 1181 + }, + { + "epoch": 0.27235023041474654, + "grad_norm": 0.5354748169041671, + "learning_rate": 1.9597754651142233e-06, + "loss": 0.8715114593505859, + "step": 1182 + }, + { + "epoch": 0.2725806451612903, + "grad_norm": 0.5251650427533354, + "learning_rate": 1.959668427131662e-06, + "loss": 0.6910781860351562, + "step": 1183 + }, + { + "epoch": 0.2728110599078341, + "grad_norm": 0.5425639259870759, + "learning_rate": 1.9595612498544997e-06, + "loss": 0.9158545136451721, + "step": 1184 + }, + { + "epoch": 0.2730414746543779, + "grad_norm": 0.4274378587816055, + "learning_rate": 1.9594539332982917e-06, + "loss": 0.7129944562911987, + "step": 1185 + }, + { + "epoch": 0.2732718894009217, + "grad_norm": 0.5549453334752472, + "learning_rate": 1.9593464774786155e-06, + "loss": 0.9487595558166504, + "step": 1186 + }, + { + "epoch": 0.27350230414746546, + "grad_norm": 0.490496609840347, + "learning_rate": 1.959238882411068e-06, + "loss": 0.9455368518829346, + "step": 1187 + }, + { + "epoch": 0.27373271889400924, + "grad_norm": 0.5638225468967204, + "learning_rate": 1.959131148111267e-06, + "loss": 0.9005390405654907, + "step": 1188 + }, + { + "epoch": 0.27396313364055297, + "grad_norm": 0.6239187759866925, + "learning_rate": 1.9590232745948494e-06, + "loss": 0.91117262840271, + "step": 1189 + }, + { + "epoch": 0.27419354838709675, + "grad_norm": 0.46530917608588857, + "learning_rate": 1.958915261877473e-06, + "loss": 0.7940579652786255, + "step": 1190 + }, + { + "epoch": 0.27442396313364054, + "grad_norm": 0.5621028227805456, + "learning_rate": 1.9588071099748155e-06, + "loss": 1.0705196857452393, + "step": 1191 + }, + { + "epoch": 0.2746543778801843, + "grad_norm": 0.7402334674842445, + "learning_rate": 1.9586988189025756e-06, + "loss": 0.9311869740486145, + "step": 1192 + }, + { + "epoch": 0.2748847926267281, + "grad_norm": 0.5809380189675816, + "learning_rate": 1.9585903886764715e-06, + "loss": 0.9400506019592285, + "step": 1193 + }, + { + "epoch": 0.2751152073732719, + "grad_norm": 0.5097271764516258, + "learning_rate": 1.958481819312241e-06, + "loss": 0.8282920122146606, + "step": 1194 + }, + { + "epoch": 0.27534562211981567, + "grad_norm": 0.6446418001070287, + "learning_rate": 1.9583731108256435e-06, + "loss": 0.9111119508743286, + "step": 1195 + }, + { + "epoch": 0.27557603686635945, + "grad_norm": 0.6208204199981331, + "learning_rate": 1.9582642632324576e-06, + "loss": 0.9486548900604248, + "step": 1196 + }, + { + "epoch": 0.27580645161290324, + "grad_norm": 0.634036768829364, + "learning_rate": 1.9581552765484828e-06, + "loss": 0.8452764749526978, + "step": 1197 + }, + { + "epoch": 0.276036866359447, + "grad_norm": 0.6457489846855801, + "learning_rate": 1.958046150789538e-06, + "loss": 0.8636663556098938, + "step": 1198 + }, + { + "epoch": 0.2762672811059908, + "grad_norm": 0.6308230498005049, + "learning_rate": 1.9579368859714623e-06, + "loss": 0.9819158315658569, + "step": 1199 + }, + { + "epoch": 0.2764976958525346, + "grad_norm": 0.6100305190055095, + "learning_rate": 1.957827482110116e-06, + "loss": 0.8010607957839966, + "step": 1200 + }, + { + "epoch": 0.2767281105990783, + "grad_norm": 0.44236661935550003, + "learning_rate": 1.957717939221379e-06, + "loss": 0.7686241865158081, + "step": 1201 + }, + { + "epoch": 0.2769585253456221, + "grad_norm": 0.5324278038856628, + "learning_rate": 1.9576082573211507e-06, + "loss": 0.8548723459243774, + "step": 1202 + }, + { + "epoch": 0.2771889400921659, + "grad_norm": 0.5873649231612361, + "learning_rate": 1.957498436425351e-06, + "loss": 0.7866852283477783, + "step": 1203 + }, + { + "epoch": 0.27741935483870966, + "grad_norm": 0.5578610745935356, + "learning_rate": 1.9573884765499215e-06, + "loss": 0.8086235523223877, + "step": 1204 + }, + { + "epoch": 0.27764976958525345, + "grad_norm": 0.6489442522213279, + "learning_rate": 1.9572783777108217e-06, + "loss": 1.0310871601104736, + "step": 1205 + }, + { + "epoch": 0.27788018433179723, + "grad_norm": 0.6639195648959771, + "learning_rate": 1.957168139924033e-06, + "loss": 0.9482970237731934, + "step": 1206 + }, + { + "epoch": 0.278110599078341, + "grad_norm": 0.5595205782283428, + "learning_rate": 1.957057763205556e-06, + "loss": 0.809493899345398, + "step": 1207 + }, + { + "epoch": 0.2783410138248848, + "grad_norm": 0.5835729385419335, + "learning_rate": 1.956947247571411e-06, + "loss": 0.8679298162460327, + "step": 1208 + }, + { + "epoch": 0.2785714285714286, + "grad_norm": 0.5339273489408208, + "learning_rate": 1.95683659303764e-06, + "loss": 0.8870571255683899, + "step": 1209 + }, + { + "epoch": 0.27880184331797236, + "grad_norm": 0.6400258685482293, + "learning_rate": 1.9567257996203046e-06, + "loss": 0.8452431559562683, + "step": 1210 + }, + { + "epoch": 0.27903225806451615, + "grad_norm": 0.585371400581961, + "learning_rate": 1.9566148673354855e-06, + "loss": 0.8376550674438477, + "step": 1211 + }, + { + "epoch": 0.27926267281105993, + "grad_norm": 0.468171015360779, + "learning_rate": 1.9565037961992853e-06, + "loss": 0.7686463594436646, + "step": 1212 + }, + { + "epoch": 0.2794930875576037, + "grad_norm": 0.6305180956441923, + "learning_rate": 1.956392586227825e-06, + "loss": 1.0064536333084106, + "step": 1213 + }, + { + "epoch": 0.27972350230414744, + "grad_norm": 0.5204866621768998, + "learning_rate": 1.956281237437247e-06, + "loss": 0.9087784290313721, + "step": 1214 + }, + { + "epoch": 0.2799539170506912, + "grad_norm": 0.5800831908467822, + "learning_rate": 1.9561697498437133e-06, + "loss": 0.8528383374214172, + "step": 1215 + }, + { + "epoch": 0.280184331797235, + "grad_norm": 0.492586251170718, + "learning_rate": 1.9560581234634062e-06, + "loss": 0.8229737281799316, + "step": 1216 + }, + { + "epoch": 0.2804147465437788, + "grad_norm": 0.6543530371868361, + "learning_rate": 1.9559463583125285e-06, + "loss": 0.8957454562187195, + "step": 1217 + }, + { + "epoch": 0.2806451612903226, + "grad_norm": 0.6116476174626837, + "learning_rate": 1.955834454407302e-06, + "loss": 0.8373404741287231, + "step": 1218 + }, + { + "epoch": 0.28087557603686636, + "grad_norm": 0.6339166918490768, + "learning_rate": 1.9557224117639698e-06, + "loss": 0.9117659330368042, + "step": 1219 + }, + { + "epoch": 0.28110599078341014, + "grad_norm": 0.7009847380548185, + "learning_rate": 1.9556102303987946e-06, + "loss": 0.9079498052597046, + "step": 1220 + }, + { + "epoch": 0.2813364055299539, + "grad_norm": 0.6797187898490639, + "learning_rate": 1.9554979103280597e-06, + "loss": 0.8127235174179077, + "step": 1221 + }, + { + "epoch": 0.2815668202764977, + "grad_norm": 0.4430544694455362, + "learning_rate": 1.9553854515680684e-06, + "loss": 0.6790676712989807, + "step": 1222 + }, + { + "epoch": 0.2817972350230415, + "grad_norm": 0.547920786044559, + "learning_rate": 1.955272854135143e-06, + "loss": 0.93434739112854, + "step": 1223 + }, + { + "epoch": 0.2820276497695853, + "grad_norm": 0.5831429716678932, + "learning_rate": 1.9551601180456274e-06, + "loss": 0.8624403476715088, + "step": 1224 + }, + { + "epoch": 0.28225806451612906, + "grad_norm": 0.5942670172250124, + "learning_rate": 1.9550472433158856e-06, + "loss": 0.8871273994445801, + "step": 1225 + }, + { + "epoch": 0.2824884792626728, + "grad_norm": 0.6403907324028919, + "learning_rate": 1.9549342299623007e-06, + "loss": 1.0226445198059082, + "step": 1226 + }, + { + "epoch": 0.28271889400921657, + "grad_norm": 0.5570530371692032, + "learning_rate": 1.9548210780012764e-06, + "loss": 0.9232503771781921, + "step": 1227 + }, + { + "epoch": 0.28294930875576035, + "grad_norm": 0.5562171255847491, + "learning_rate": 1.9547077874492367e-06, + "loss": 0.944965124130249, + "step": 1228 + }, + { + "epoch": 0.28317972350230414, + "grad_norm": 0.7815951055502713, + "learning_rate": 1.9545943583226255e-06, + "loss": 0.9491870403289795, + "step": 1229 + }, + { + "epoch": 0.2834101382488479, + "grad_norm": 0.5531880644641158, + "learning_rate": 1.9544807906379065e-06, + "loss": 0.8477638363838196, + "step": 1230 + }, + { + "epoch": 0.2836405529953917, + "grad_norm": 0.6334904267465776, + "learning_rate": 1.9543670844115647e-06, + "loss": 0.9733752012252808, + "step": 1231 + }, + { + "epoch": 0.2838709677419355, + "grad_norm": 0.5077250781055755, + "learning_rate": 1.954253239660104e-06, + "loss": 0.8158911466598511, + "step": 1232 + }, + { + "epoch": 0.28410138248847927, + "grad_norm": 0.47003121688563365, + "learning_rate": 1.9541392564000487e-06, + "loss": 0.8814271092414856, + "step": 1233 + }, + { + "epoch": 0.28433179723502305, + "grad_norm": 0.5974631149552703, + "learning_rate": 1.9540251346479435e-06, + "loss": 0.8366897106170654, + "step": 1234 + }, + { + "epoch": 0.28456221198156684, + "grad_norm": 0.5122641090735244, + "learning_rate": 1.953910874420353e-06, + "loss": 0.8043497800827026, + "step": 1235 + }, + { + "epoch": 0.2847926267281106, + "grad_norm": 0.6923450749153209, + "learning_rate": 1.953796475733862e-06, + "loss": 0.904765248298645, + "step": 1236 + }, + { + "epoch": 0.2850230414746544, + "grad_norm": 0.6316427864189956, + "learning_rate": 1.953681938605075e-06, + "loss": 0.9092245101928711, + "step": 1237 + }, + { + "epoch": 0.28525345622119813, + "grad_norm": 0.44433825637231683, + "learning_rate": 1.953567263050617e-06, + "loss": 0.9119021892547607, + "step": 1238 + }, + { + "epoch": 0.2854838709677419, + "grad_norm": 0.5258256580858013, + "learning_rate": 1.9534524490871336e-06, + "loss": 0.8380709886550903, + "step": 1239 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 0.6731382971935342, + "learning_rate": 1.9533374967312894e-06, + "loss": 0.9410983324050903, + "step": 1240 + }, + { + "epoch": 0.2859447004608295, + "grad_norm": 0.5901005556596554, + "learning_rate": 1.953222405999769e-06, + "loss": 0.882665753364563, + "step": 1241 + }, + { + "epoch": 0.28617511520737327, + "grad_norm": 0.600142706864601, + "learning_rate": 1.953107176909279e-06, + "loss": 0.9334039688110352, + "step": 1242 + }, + { + "epoch": 0.28640552995391705, + "grad_norm": 0.649506044390801, + "learning_rate": 1.9529918094765433e-06, + "loss": 0.8743090033531189, + "step": 1243 + }, + { + "epoch": 0.28663594470046083, + "grad_norm": 0.5149777367828677, + "learning_rate": 1.9528763037183086e-06, + "loss": 0.9017846584320068, + "step": 1244 + }, + { + "epoch": 0.2868663594470046, + "grad_norm": 0.6718877038666831, + "learning_rate": 1.95276065965134e-06, + "loss": 0.9412289261817932, + "step": 1245 + }, + { + "epoch": 0.2870967741935484, + "grad_norm": 0.5829455891585096, + "learning_rate": 1.9526448772924222e-06, + "loss": 0.9008835554122925, + "step": 1246 + }, + { + "epoch": 0.2873271889400922, + "grad_norm": 0.5850809594667484, + "learning_rate": 1.9525289566583622e-06, + "loss": 0.803752064704895, + "step": 1247 + }, + { + "epoch": 0.28755760368663597, + "grad_norm": 0.642250740432813, + "learning_rate": 1.952412897765985e-06, + "loss": 0.8354049921035767, + "step": 1248 + }, + { + "epoch": 0.28778801843317975, + "grad_norm": 0.711123311118831, + "learning_rate": 1.9522967006321363e-06, + "loss": 1.047461748123169, + "step": 1249 + }, + { + "epoch": 0.2880184331797235, + "grad_norm": 0.5664585984555107, + "learning_rate": 1.9521803652736826e-06, + "loss": 0.9036056399345398, + "step": 1250 + }, + { + "epoch": 0.28824884792626726, + "grad_norm": 0.6380477461120507, + "learning_rate": 1.952063891707509e-06, + "loss": 0.9534894227981567, + "step": 1251 + }, + { + "epoch": 0.28847926267281104, + "grad_norm": 0.6213868500155985, + "learning_rate": 1.9519472799505217e-06, + "loss": 0.9200841188430786, + "step": 1252 + }, + { + "epoch": 0.2887096774193548, + "grad_norm": 0.6071864938745559, + "learning_rate": 1.9518305300196475e-06, + "loss": 0.8917449712753296, + "step": 1253 + }, + { + "epoch": 0.2889400921658986, + "grad_norm": 0.43859246681042113, + "learning_rate": 1.9517136419318317e-06, + "loss": 0.92131507396698, + "step": 1254 + }, + { + "epoch": 0.2891705069124424, + "grad_norm": 0.5459214675052779, + "learning_rate": 1.951596615704041e-06, + "loss": 0.8862432241439819, + "step": 1255 + }, + { + "epoch": 0.2894009216589862, + "grad_norm": 0.5238034407201325, + "learning_rate": 1.951479451353261e-06, + "loss": 0.7789605855941772, + "step": 1256 + }, + { + "epoch": 0.28963133640552996, + "grad_norm": 0.6480376013887345, + "learning_rate": 1.951362148896498e-06, + "loss": 0.8187062740325928, + "step": 1257 + }, + { + "epoch": 0.28986175115207374, + "grad_norm": 0.651824990199355, + "learning_rate": 1.9512447083507784e-06, + "loss": 1.0575072765350342, + "step": 1258 + }, + { + "epoch": 0.2900921658986175, + "grad_norm": 0.5300946141437952, + "learning_rate": 1.9511271297331493e-06, + "loss": 0.8027279376983643, + "step": 1259 + }, + { + "epoch": 0.2903225806451613, + "grad_norm": 0.549023479491683, + "learning_rate": 1.951009413060676e-06, + "loss": 0.6641743183135986, + "step": 1260 + }, + { + "epoch": 0.2905529953917051, + "grad_norm": 0.4919566770154341, + "learning_rate": 1.950891558350446e-06, + "loss": 0.7937613725662231, + "step": 1261 + }, + { + "epoch": 0.2907834101382488, + "grad_norm": 0.6213972326398296, + "learning_rate": 1.950773565619564e-06, + "loss": 0.9600511193275452, + "step": 1262 + }, + { + "epoch": 0.2910138248847926, + "grad_norm": 0.6514763319649333, + "learning_rate": 1.9506554348851585e-06, + "loss": 0.8275980353355408, + "step": 1263 + }, + { + "epoch": 0.2912442396313364, + "grad_norm": 0.598467260157347, + "learning_rate": 1.950537166164375e-06, + "loss": 0.9008789658546448, + "step": 1264 + }, + { + "epoch": 0.29147465437788017, + "grad_norm": 0.5520168646542984, + "learning_rate": 1.95041875947438e-06, + "loss": 0.8701465129852295, + "step": 1265 + }, + { + "epoch": 0.29170506912442395, + "grad_norm": 0.5793489097336151, + "learning_rate": 1.95030021483236e-06, + "loss": 0.9313883781433105, + "step": 1266 + }, + { + "epoch": 0.29193548387096774, + "grad_norm": 0.5738973536331494, + "learning_rate": 1.9501815322555222e-06, + "loss": 0.883125901222229, + "step": 1267 + }, + { + "epoch": 0.2921658986175115, + "grad_norm": 0.5430628147775056, + "learning_rate": 1.9500627117610927e-06, + "loss": 0.8856269121170044, + "step": 1268 + }, + { + "epoch": 0.2923963133640553, + "grad_norm": 0.4857560088008075, + "learning_rate": 1.9499437533663184e-06, + "loss": 0.8817840218544006, + "step": 1269 + }, + { + "epoch": 0.2926267281105991, + "grad_norm": 0.7079159031386842, + "learning_rate": 1.949824657088466e-06, + "loss": 0.9911330342292786, + "step": 1270 + }, + { + "epoch": 0.29285714285714287, + "grad_norm": 0.6283382634413396, + "learning_rate": 1.949705422944822e-06, + "loss": 0.8902890682220459, + "step": 1271 + }, + { + "epoch": 0.29308755760368665, + "grad_norm": 0.5381213123876506, + "learning_rate": 1.949586050952693e-06, + "loss": 0.6846401691436768, + "step": 1272 + }, + { + "epoch": 0.29331797235023044, + "grad_norm": 0.6164805880844991, + "learning_rate": 1.9494665411294057e-06, + "loss": 0.9186165928840637, + "step": 1273 + }, + { + "epoch": 0.29354838709677417, + "grad_norm": 0.4648178531483389, + "learning_rate": 1.949346893492307e-06, + "loss": 0.8614095449447632, + "step": 1274 + }, + { + "epoch": 0.29377880184331795, + "grad_norm": 0.6146731068970395, + "learning_rate": 1.9492271080587637e-06, + "loss": 0.7824405431747437, + "step": 1275 + }, + { + "epoch": 0.29400921658986173, + "grad_norm": 0.5415059908334089, + "learning_rate": 1.949107184846162e-06, + "loss": 0.8694697618484497, + "step": 1276 + }, + { + "epoch": 0.2942396313364055, + "grad_norm": 0.6070495052767576, + "learning_rate": 1.948987123871909e-06, + "loss": 0.8839597105979919, + "step": 1277 + }, + { + "epoch": 0.2944700460829493, + "grad_norm": 0.5155544169686388, + "learning_rate": 1.948866925153431e-06, + "loss": 0.832268238067627, + "step": 1278 + }, + { + "epoch": 0.2947004608294931, + "grad_norm": 0.48264272480740306, + "learning_rate": 1.948746588708175e-06, + "loss": 0.8243123888969421, + "step": 1279 + }, + { + "epoch": 0.29493087557603687, + "grad_norm": 0.7516695382591614, + "learning_rate": 1.948626114553608e-06, + "loss": 0.99314284324646, + "step": 1280 + }, + { + "epoch": 0.29516129032258065, + "grad_norm": 0.6001488755214682, + "learning_rate": 1.948505502707216e-06, + "loss": 0.8853542804718018, + "step": 1281 + }, + { + "epoch": 0.29539170506912443, + "grad_norm": 0.7940640499991963, + "learning_rate": 1.948384753186506e-06, + "loss": 0.9623305797576904, + "step": 1282 + }, + { + "epoch": 0.2956221198156682, + "grad_norm": 0.64774993620639, + "learning_rate": 1.948263866009005e-06, + "loss": 0.8321142792701721, + "step": 1283 + }, + { + "epoch": 0.295852534562212, + "grad_norm": 0.6059595321597901, + "learning_rate": 1.948142841192258e-06, + "loss": 0.8911606669425964, + "step": 1284 + }, + { + "epoch": 0.2960829493087558, + "grad_norm": 0.6228210357050852, + "learning_rate": 1.948021678753834e-06, + "loss": 0.9501996040344238, + "step": 1285 + }, + { + "epoch": 0.29631336405529957, + "grad_norm": 0.5846881548888203, + "learning_rate": 1.947900378711318e-06, + "loss": 0.8555784225463867, + "step": 1286 + }, + { + "epoch": 0.2965437788018433, + "grad_norm": 0.5726752466099971, + "learning_rate": 1.9477789410823163e-06, + "loss": 0.7703878283500671, + "step": 1287 + }, + { + "epoch": 0.2967741935483871, + "grad_norm": 0.5629458043150717, + "learning_rate": 1.947657365884457e-06, + "loss": 1.0072009563446045, + "step": 1288 + }, + { + "epoch": 0.29700460829493086, + "grad_norm": 0.5698014348408978, + "learning_rate": 1.9475356531353847e-06, + "loss": 0.7633493542671204, + "step": 1289 + }, + { + "epoch": 0.29723502304147464, + "grad_norm": 0.5241558601711666, + "learning_rate": 1.9474138028527674e-06, + "loss": 0.88579261302948, + "step": 1290 + }, + { + "epoch": 0.2974654377880184, + "grad_norm": 0.6037880677787516, + "learning_rate": 1.94729181505429e-06, + "loss": 0.8356794118881226, + "step": 1291 + }, + { + "epoch": 0.2976958525345622, + "grad_norm": 0.6197051238228268, + "learning_rate": 1.94716968975766e-06, + "loss": 0.8330395817756653, + "step": 1292 + }, + { + "epoch": 0.297926267281106, + "grad_norm": 0.6667932213948545, + "learning_rate": 1.947047426980604e-06, + "loss": 0.9219698905944824, + "step": 1293 + }, + { + "epoch": 0.2981566820276498, + "grad_norm": 0.5409653154450632, + "learning_rate": 1.9469250267408674e-06, + "loss": 0.880803644657135, + "step": 1294 + }, + { + "epoch": 0.29838709677419356, + "grad_norm": 0.5789679620224094, + "learning_rate": 1.9468024890562165e-06, + "loss": 0.8212012052536011, + "step": 1295 + }, + { + "epoch": 0.29861751152073734, + "grad_norm": 0.6209106243517916, + "learning_rate": 1.946679813944438e-06, + "loss": 1.0118587017059326, + "step": 1296 + }, + { + "epoch": 0.2988479262672811, + "grad_norm": 0.6374046746708436, + "learning_rate": 1.9465570014233377e-06, + "loss": 0.8708915710449219, + "step": 1297 + }, + { + "epoch": 0.2990783410138249, + "grad_norm": 0.6373146041782783, + "learning_rate": 1.9464340515107415e-06, + "loss": 0.9386067986488342, + "step": 1298 + }, + { + "epoch": 0.29930875576036864, + "grad_norm": 0.5346925830356088, + "learning_rate": 1.9463109642244958e-06, + "loss": 0.8672319650650024, + "step": 1299 + }, + { + "epoch": 0.2995391705069124, + "grad_norm": 0.7198371333215221, + "learning_rate": 1.9461877395824662e-06, + "loss": 0.9002958536148071, + "step": 1300 + }, + { + "epoch": 0.2997695852534562, + "grad_norm": 0.6247724220238058, + "learning_rate": 1.946064377602539e-06, + "loss": 0.9206029772758484, + "step": 1301 + }, + { + "epoch": 0.3, + "grad_norm": 0.8295443472719992, + "learning_rate": 1.94594087830262e-06, + "loss": 1.0063598155975342, + "step": 1302 + }, + { + "epoch": 0.3002304147465438, + "grad_norm": 0.5149695005553171, + "learning_rate": 1.9458172417006346e-06, + "loss": 0.7616912126541138, + "step": 1303 + }, + { + "epoch": 0.30046082949308756, + "grad_norm": 0.5462398029065331, + "learning_rate": 1.945693467814529e-06, + "loss": 0.8385730385780334, + "step": 1304 + }, + { + "epoch": 0.30069124423963134, + "grad_norm": 0.4854220181479302, + "learning_rate": 1.9455695566622677e-06, + "loss": 0.7032216787338257, + "step": 1305 + }, + { + "epoch": 0.3009216589861751, + "grad_norm": 0.5554776786626977, + "learning_rate": 1.9454455082618373e-06, + "loss": 0.7647181749343872, + "step": 1306 + }, + { + "epoch": 0.3011520737327189, + "grad_norm": 0.7119385935860951, + "learning_rate": 1.945321322631243e-06, + "loss": 0.9918918013572693, + "step": 1307 + }, + { + "epoch": 0.3013824884792627, + "grad_norm": 0.5689741757687454, + "learning_rate": 1.945196999788511e-06, + "loss": 0.838451623916626, + "step": 1308 + }, + { + "epoch": 0.3016129032258065, + "grad_norm": 0.7156229049064139, + "learning_rate": 1.945072539751685e-06, + "loss": 0.9739303588867188, + "step": 1309 + }, + { + "epoch": 0.30184331797235026, + "grad_norm": 0.4850858592361209, + "learning_rate": 1.9449479425388305e-06, + "loss": 0.8233742713928223, + "step": 1310 + }, + { + "epoch": 0.302073732718894, + "grad_norm": 0.666231819455408, + "learning_rate": 1.944823208168034e-06, + "loss": 0.9765088558197021, + "step": 1311 + }, + { + "epoch": 0.30230414746543777, + "grad_norm": 0.5940530240559707, + "learning_rate": 1.944698336657399e-06, + "loss": 0.7614048719406128, + "step": 1312 + }, + { + "epoch": 0.30253456221198155, + "grad_norm": 0.5807403996402337, + "learning_rate": 1.9445733280250512e-06, + "loss": 0.760692834854126, + "step": 1313 + }, + { + "epoch": 0.30276497695852533, + "grad_norm": 0.710580819926471, + "learning_rate": 1.944448182289135e-06, + "loss": 0.8484706878662109, + "step": 1314 + }, + { + "epoch": 0.3029953917050691, + "grad_norm": 0.6131916776262658, + "learning_rate": 1.944322899467816e-06, + "loss": 0.8857289552688599, + "step": 1315 + }, + { + "epoch": 0.3032258064516129, + "grad_norm": 0.7120330171482998, + "learning_rate": 1.944197479579278e-06, + "loss": 0.8375179171562195, + "step": 1316 + }, + { + "epoch": 0.3034562211981567, + "grad_norm": 0.5402001956337824, + "learning_rate": 1.9440719226417263e-06, + "loss": 0.8141925930976868, + "step": 1317 + }, + { + "epoch": 0.30368663594470047, + "grad_norm": 0.7607357810019435, + "learning_rate": 1.943946228673384e-06, + "loss": 0.9970111846923828, + "step": 1318 + }, + { + "epoch": 0.30391705069124425, + "grad_norm": 0.5721230302327327, + "learning_rate": 1.9438203976924966e-06, + "loss": 0.9542866349220276, + "step": 1319 + }, + { + "epoch": 0.30414746543778803, + "grad_norm": 0.5904074306009988, + "learning_rate": 1.943694429717328e-06, + "loss": 0.8808399438858032, + "step": 1320 + }, + { + "epoch": 0.3043778801843318, + "grad_norm": 0.5734964183027593, + "learning_rate": 1.9435683247661623e-06, + "loss": 0.8541150093078613, + "step": 1321 + }, + { + "epoch": 0.3046082949308756, + "grad_norm": 0.7749551173384804, + "learning_rate": 1.943442082857303e-06, + "loss": 0.8887044191360474, + "step": 1322 + }, + { + "epoch": 0.30483870967741933, + "grad_norm": 0.6530281616907251, + "learning_rate": 1.9433157040090746e-06, + "loss": 0.8699131011962891, + "step": 1323 + }, + { + "epoch": 0.3050691244239631, + "grad_norm": 0.6811202971751444, + "learning_rate": 1.9431891882398205e-06, + "loss": 0.7096077799797058, + "step": 1324 + }, + { + "epoch": 0.3052995391705069, + "grad_norm": 0.5279135582200482, + "learning_rate": 1.9430625355679045e-06, + "loss": 0.8040453195571899, + "step": 1325 + }, + { + "epoch": 0.3055299539170507, + "grad_norm": 0.5904456084555657, + "learning_rate": 1.9429357460117093e-06, + "loss": 0.8275970220565796, + "step": 1326 + }, + { + "epoch": 0.30576036866359446, + "grad_norm": 0.5947614996956965, + "learning_rate": 1.9428088195896393e-06, + "loss": 0.9724141359329224, + "step": 1327 + }, + { + "epoch": 0.30599078341013825, + "grad_norm": 0.6696756628924122, + "learning_rate": 1.9426817563201174e-06, + "loss": 0.9293274879455566, + "step": 1328 + }, + { + "epoch": 0.30622119815668203, + "grad_norm": 0.5976334939970911, + "learning_rate": 1.9425545562215865e-06, + "loss": 0.9454036951065063, + "step": 1329 + }, + { + "epoch": 0.3064516129032258, + "grad_norm": 0.48928245529374687, + "learning_rate": 1.9424272193125094e-06, + "loss": 0.7751365900039673, + "step": 1330 + }, + { + "epoch": 0.3066820276497696, + "grad_norm": 0.5211050083614731, + "learning_rate": 1.942299745611369e-06, + "loss": 0.8444688320159912, + "step": 1331 + }, + { + "epoch": 0.3069124423963134, + "grad_norm": 0.6370602856216532, + "learning_rate": 1.9421721351366684e-06, + "loss": 0.7751414775848389, + "step": 1332 + }, + { + "epoch": 0.30714285714285716, + "grad_norm": 0.6732034032956694, + "learning_rate": 1.9420443879069287e-06, + "loss": 0.912209153175354, + "step": 1333 + }, + { + "epoch": 0.30737327188940095, + "grad_norm": 0.4990267188564962, + "learning_rate": 1.941916503940694e-06, + "loss": 0.8897542357444763, + "step": 1334 + }, + { + "epoch": 0.3076036866359447, + "grad_norm": 0.6319943447022882, + "learning_rate": 1.9417884832565257e-06, + "loss": 0.8562046885490417, + "step": 1335 + }, + { + "epoch": 0.30783410138248846, + "grad_norm": 0.6422557067377674, + "learning_rate": 1.9416603258730055e-06, + "loss": 0.8886401653289795, + "step": 1336 + }, + { + "epoch": 0.30806451612903224, + "grad_norm": 0.5477788033894715, + "learning_rate": 1.9415320318087354e-06, + "loss": 0.7401903867721558, + "step": 1337 + }, + { + "epoch": 0.308294930875576, + "grad_norm": 0.5300494853019255, + "learning_rate": 1.941403601082338e-06, + "loss": 0.761360764503479, + "step": 1338 + }, + { + "epoch": 0.3085253456221198, + "grad_norm": 0.5079078108497779, + "learning_rate": 1.9412750337124537e-06, + "loss": 0.9223028421401978, + "step": 1339 + }, + { + "epoch": 0.3087557603686636, + "grad_norm": 0.6370349934611669, + "learning_rate": 1.9411463297177446e-06, + "loss": 0.9287113547325134, + "step": 1340 + }, + { + "epoch": 0.3089861751152074, + "grad_norm": 0.6186566628026451, + "learning_rate": 1.941017489116891e-06, + "loss": 0.8548502922058105, + "step": 1341 + }, + { + "epoch": 0.30921658986175116, + "grad_norm": 0.6058300330585435, + "learning_rate": 1.9408885119285953e-06, + "loss": 0.8885709643363953, + "step": 1342 + }, + { + "epoch": 0.30944700460829494, + "grad_norm": 0.6807261513363189, + "learning_rate": 1.940759398171577e-06, + "loss": 0.8970856666564941, + "step": 1343 + }, + { + "epoch": 0.3096774193548387, + "grad_norm": 0.5664251996297385, + "learning_rate": 1.9406301478645783e-06, + "loss": 0.847138524055481, + "step": 1344 + }, + { + "epoch": 0.3099078341013825, + "grad_norm": 0.5354847877369665, + "learning_rate": 1.9405007610263584e-06, + "loss": 0.7892216444015503, + "step": 1345 + }, + { + "epoch": 0.3101382488479263, + "grad_norm": 0.6610649492142503, + "learning_rate": 1.940371237675698e-06, + "loss": 0.8869141340255737, + "step": 1346 + }, + { + "epoch": 0.3103686635944701, + "grad_norm": 0.6628677961578048, + "learning_rate": 1.9402415778313976e-06, + "loss": 0.8405635356903076, + "step": 1347 + }, + { + "epoch": 0.3105990783410138, + "grad_norm": 0.6646875425992601, + "learning_rate": 1.9401117815122768e-06, + "loss": 0.914352536201477, + "step": 1348 + }, + { + "epoch": 0.3108294930875576, + "grad_norm": 0.5494930695935469, + "learning_rate": 1.9399818487371756e-06, + "loss": 0.9059416055679321, + "step": 1349 + }, + { + "epoch": 0.31105990783410137, + "grad_norm": 0.7196818748440269, + "learning_rate": 1.939851779524953e-06, + "loss": 1.0513644218444824, + "step": 1350 + }, + { + "epoch": 0.31129032258064515, + "grad_norm": 0.6538820317800585, + "learning_rate": 1.9397215738944893e-06, + "loss": 0.8673127889633179, + "step": 1351 + }, + { + "epoch": 0.31152073732718893, + "grad_norm": 0.5818727011440767, + "learning_rate": 1.9395912318646827e-06, + "loss": 0.7907043695449829, + "step": 1352 + }, + { + "epoch": 0.3117511520737327, + "grad_norm": 0.6065295506543811, + "learning_rate": 1.9394607534544527e-06, + "loss": 0.8127990961074829, + "step": 1353 + }, + { + "epoch": 0.3119815668202765, + "grad_norm": 0.598693369273182, + "learning_rate": 1.9393301386827387e-06, + "loss": 0.877837061882019, + "step": 1354 + }, + { + "epoch": 0.3122119815668203, + "grad_norm": 0.734976387219602, + "learning_rate": 1.939199387568498e-06, + "loss": 0.8518592715263367, + "step": 1355 + }, + { + "epoch": 0.31244239631336407, + "grad_norm": 0.5473192670176156, + "learning_rate": 1.9390685001307093e-06, + "loss": 0.7151869535446167, + "step": 1356 + }, + { + "epoch": 0.31267281105990785, + "grad_norm": 0.5581062201544951, + "learning_rate": 1.9389374763883716e-06, + "loss": 0.8325271606445312, + "step": 1357 + }, + { + "epoch": 0.31290322580645163, + "grad_norm": 0.6052904551524719, + "learning_rate": 1.938806316360502e-06, + "loss": 0.6875327825546265, + "step": 1358 + }, + { + "epoch": 0.3131336405529954, + "grad_norm": 0.5481097616797531, + "learning_rate": 1.9386750200661382e-06, + "loss": 0.8944000005722046, + "step": 1359 + }, + { + "epoch": 0.31336405529953915, + "grad_norm": 0.4954617799257055, + "learning_rate": 1.938543587524338e-06, + "loss": 0.8544747829437256, + "step": 1360 + }, + { + "epoch": 0.31359447004608293, + "grad_norm": 0.6938391730058001, + "learning_rate": 1.9384120187541788e-06, + "loss": 0.9216448068618774, + "step": 1361 + }, + { + "epoch": 0.3138248847926267, + "grad_norm": 0.6834174981389874, + "learning_rate": 1.938280313774757e-06, + "loss": 0.9239494800567627, + "step": 1362 + }, + { + "epoch": 0.3140552995391705, + "grad_norm": 0.6816430700209432, + "learning_rate": 1.9381484726051896e-06, + "loss": 0.8903297185897827, + "step": 1363 + }, + { + "epoch": 0.3142857142857143, + "grad_norm": 0.46405748253250195, + "learning_rate": 1.9380164952646137e-06, + "loss": 0.7019625306129456, + "step": 1364 + }, + { + "epoch": 0.31451612903225806, + "grad_norm": 0.6844663370999042, + "learning_rate": 1.9378843817721854e-06, + "loss": 0.8668909072875977, + "step": 1365 + }, + { + "epoch": 0.31474654377880185, + "grad_norm": 0.6454844689755169, + "learning_rate": 1.9377521321470804e-06, + "loss": 0.8124282360076904, + "step": 1366 + }, + { + "epoch": 0.31497695852534563, + "grad_norm": 0.5251795296125459, + "learning_rate": 1.937619746408495e-06, + "loss": 0.7543507814407349, + "step": 1367 + }, + { + "epoch": 0.3152073732718894, + "grad_norm": 0.6140420297919054, + "learning_rate": 1.9374872245756448e-06, + "loss": 0.8355506062507629, + "step": 1368 + }, + { + "epoch": 0.3154377880184332, + "grad_norm": 0.5898778959170111, + "learning_rate": 1.937354566667765e-06, + "loss": 0.9642060399055481, + "step": 1369 + }, + { + "epoch": 0.315668202764977, + "grad_norm": 0.5556038505388771, + "learning_rate": 1.93722177270411e-06, + "loss": 0.9044197201728821, + "step": 1370 + }, + { + "epoch": 0.31589861751152076, + "grad_norm": 0.4271939145273097, + "learning_rate": 1.937088842703956e-06, + "loss": 0.8077869415283203, + "step": 1371 + }, + { + "epoch": 0.3161290322580645, + "grad_norm": 0.6032982707731396, + "learning_rate": 1.9369557766865968e-06, + "loss": 0.8319793939590454, + "step": 1372 + }, + { + "epoch": 0.3163594470046083, + "grad_norm": 0.6304953638761566, + "learning_rate": 1.9368225746713475e-06, + "loss": 0.8233131170272827, + "step": 1373 + }, + { + "epoch": 0.31658986175115206, + "grad_norm": 0.6631214954178034, + "learning_rate": 1.936689236677541e-06, + "loss": 0.7898514270782471, + "step": 1374 + }, + { + "epoch": 0.31682027649769584, + "grad_norm": 0.6121849479571054, + "learning_rate": 1.9365557627245326e-06, + "loss": 0.9243249893188477, + "step": 1375 + }, + { + "epoch": 0.3170506912442396, + "grad_norm": 0.5673475924264754, + "learning_rate": 1.9364221528316946e-06, + "loss": 0.8153131008148193, + "step": 1376 + }, + { + "epoch": 0.3172811059907834, + "grad_norm": 0.6767166003638188, + "learning_rate": 1.936288407018421e-06, + "loss": 0.9203826189041138, + "step": 1377 + }, + { + "epoch": 0.3175115207373272, + "grad_norm": 0.6187562743125278, + "learning_rate": 1.936154525304124e-06, + "loss": 0.902605414390564, + "step": 1378 + }, + { + "epoch": 0.317741935483871, + "grad_norm": 0.6256929156852202, + "learning_rate": 1.936020507708238e-06, + "loss": 0.9504558444023132, + "step": 1379 + }, + { + "epoch": 0.31797235023041476, + "grad_norm": 0.6737932441495208, + "learning_rate": 1.9358863542502133e-06, + "loss": 0.8068373203277588, + "step": 1380 + }, + { + "epoch": 0.31820276497695854, + "grad_norm": 0.6309381884158767, + "learning_rate": 1.935752064949524e-06, + "loss": 1.00711989402771, + "step": 1381 + }, + { + "epoch": 0.3184331797235023, + "grad_norm": 0.6297604875594859, + "learning_rate": 1.935617639825661e-06, + "loss": 0.8271746039390564, + "step": 1382 + }, + { + "epoch": 0.3186635944700461, + "grad_norm": 0.658739150286029, + "learning_rate": 1.9354830788981363e-06, + "loss": 0.8478754758834839, + "step": 1383 + }, + { + "epoch": 0.31889400921658984, + "grad_norm": 0.6165108812612344, + "learning_rate": 1.935348382186481e-06, + "loss": 0.9240723252296448, + "step": 1384 + }, + { + "epoch": 0.3191244239631336, + "grad_norm": 0.6446571506984649, + "learning_rate": 1.935213549710246e-06, + "loss": 0.9275645613670349, + "step": 1385 + }, + { + "epoch": 0.3193548387096774, + "grad_norm": 0.6060948743586713, + "learning_rate": 1.9350785814890027e-06, + "loss": 0.9838275909423828, + "step": 1386 + }, + { + "epoch": 0.3195852534562212, + "grad_norm": 0.5765714017880346, + "learning_rate": 1.934943477542341e-06, + "loss": 0.9259177446365356, + "step": 1387 + }, + { + "epoch": 0.31981566820276497, + "grad_norm": 0.6051365106169855, + "learning_rate": 1.9348082378898714e-06, + "loss": 0.9252835512161255, + "step": 1388 + }, + { + "epoch": 0.32004608294930875, + "grad_norm": 0.5670107070091258, + "learning_rate": 1.9346728625512235e-06, + "loss": 0.8929460048675537, + "step": 1389 + }, + { + "epoch": 0.32027649769585254, + "grad_norm": 0.5325931239107909, + "learning_rate": 1.934537351546047e-06, + "loss": 0.8909564018249512, + "step": 1390 + }, + { + "epoch": 0.3205069124423963, + "grad_norm": 0.6295332947946368, + "learning_rate": 1.934401704894011e-06, + "loss": 0.8745983839035034, + "step": 1391 + }, + { + "epoch": 0.3207373271889401, + "grad_norm": 0.5987888846505133, + "learning_rate": 1.934265922614805e-06, + "loss": 0.8622266054153442, + "step": 1392 + }, + { + "epoch": 0.3209677419354839, + "grad_norm": 0.5587707056179402, + "learning_rate": 1.9341300047281365e-06, + "loss": 0.6796590089797974, + "step": 1393 + }, + { + "epoch": 0.32119815668202767, + "grad_norm": 0.6156409956015295, + "learning_rate": 1.9339939512537344e-06, + "loss": 0.9012733697891235, + "step": 1394 + }, + { + "epoch": 0.32142857142857145, + "grad_norm": 0.5898128750933246, + "learning_rate": 1.933857762211347e-06, + "loss": 0.9196282625198364, + "step": 1395 + }, + { + "epoch": 0.3216589861751152, + "grad_norm": 0.716981638669288, + "learning_rate": 1.9337214376207417e-06, + "loss": 0.7717788219451904, + "step": 1396 + }, + { + "epoch": 0.32188940092165896, + "grad_norm": 0.6574432706431985, + "learning_rate": 1.9335849775017057e-06, + "loss": 0.8516619801521301, + "step": 1397 + }, + { + "epoch": 0.32211981566820275, + "grad_norm": 0.6319036543472709, + "learning_rate": 1.933448381874046e-06, + "loss": 0.8089120388031006, + "step": 1398 + }, + { + "epoch": 0.32235023041474653, + "grad_norm": 0.7117992019263996, + "learning_rate": 1.9333116507575895e-06, + "loss": 0.8940925598144531, + "step": 1399 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 1.1103495530975782, + "learning_rate": 1.9331747841721827e-06, + "loss": 1.0240859985351562, + "step": 1400 + }, + { + "epoch": 0.3228110599078341, + "grad_norm": 0.6110124319562482, + "learning_rate": 1.9330377821376916e-06, + "loss": 0.742689847946167, + "step": 1401 + }, + { + "epoch": 0.3230414746543779, + "grad_norm": 0.6830153635526487, + "learning_rate": 1.932900644674001e-06, + "loss": 0.9843875169754028, + "step": 1402 + }, + { + "epoch": 0.32327188940092166, + "grad_norm": 0.6043326796009376, + "learning_rate": 1.932763371801017e-06, + "loss": 0.7289329767227173, + "step": 1403 + }, + { + "epoch": 0.32350230414746545, + "grad_norm": 0.676828647698979, + "learning_rate": 1.9326259635386644e-06, + "loss": 0.7706295251846313, + "step": 1404 + }, + { + "epoch": 0.32373271889400923, + "grad_norm": 0.526047650367784, + "learning_rate": 1.932488419906888e-06, + "loss": 0.87788325548172, + "step": 1405 + }, + { + "epoch": 0.323963133640553, + "grad_norm": 0.5971998478662486, + "learning_rate": 1.9323507409256515e-06, + "loss": 0.863690972328186, + "step": 1406 + }, + { + "epoch": 0.3241935483870968, + "grad_norm": 0.700825296208237, + "learning_rate": 1.9322129266149396e-06, + "loss": 0.9333875179290771, + "step": 1407 + }, + { + "epoch": 0.3244239631336406, + "grad_norm": 0.6642455421211582, + "learning_rate": 1.9320749769947555e-06, + "loss": 0.9170523881912231, + "step": 1408 + }, + { + "epoch": 0.3246543778801843, + "grad_norm": 0.7524235771818621, + "learning_rate": 1.931936892085122e-06, + "loss": 0.9337698221206665, + "step": 1409 + }, + { + "epoch": 0.3248847926267281, + "grad_norm": 0.5832115844679703, + "learning_rate": 1.9317986719060824e-06, + "loss": 0.8436682224273682, + "step": 1410 + }, + { + "epoch": 0.3251152073732719, + "grad_norm": 0.5569674571153642, + "learning_rate": 1.9316603164776996e-06, + "loss": 0.6652755737304688, + "step": 1411 + }, + { + "epoch": 0.32534562211981566, + "grad_norm": 0.5895248621851672, + "learning_rate": 1.931521825820055e-06, + "loss": 0.7966932654380798, + "step": 1412 + }, + { + "epoch": 0.32557603686635944, + "grad_norm": 0.7207375493085693, + "learning_rate": 1.93138319995325e-06, + "loss": 0.9791682958602905, + "step": 1413 + }, + { + "epoch": 0.3258064516129032, + "grad_norm": 0.6505701538481653, + "learning_rate": 1.931244438897407e-06, + "loss": 0.7403467297554016, + "step": 1414 + }, + { + "epoch": 0.326036866359447, + "grad_norm": 0.5881243698924259, + "learning_rate": 1.931105542672667e-06, + "loss": 0.7758523225784302, + "step": 1415 + }, + { + "epoch": 0.3262672811059908, + "grad_norm": 0.6866613437755184, + "learning_rate": 1.9309665112991894e-06, + "loss": 0.8444551229476929, + "step": 1416 + }, + { + "epoch": 0.3264976958525346, + "grad_norm": 0.6987387290897759, + "learning_rate": 1.9308273447971553e-06, + "loss": 0.8796061277389526, + "step": 1417 + }, + { + "epoch": 0.32672811059907836, + "grad_norm": 0.6235742967720523, + "learning_rate": 1.9306880431867643e-06, + "loss": 0.8386640548706055, + "step": 1418 + }, + { + "epoch": 0.32695852534562214, + "grad_norm": 0.669578268248941, + "learning_rate": 1.930548606488236e-06, + "loss": 0.9229142665863037, + "step": 1419 + }, + { + "epoch": 0.3271889400921659, + "grad_norm": 0.6307605261613933, + "learning_rate": 1.9304090347218094e-06, + "loss": 0.9938615560531616, + "step": 1420 + }, + { + "epoch": 0.32741935483870965, + "grad_norm": 0.6526253572614591, + "learning_rate": 1.930269327907743e-06, + "loss": 0.7946186661720276, + "step": 1421 + }, + { + "epoch": 0.32764976958525344, + "grad_norm": 0.6717401804422498, + "learning_rate": 1.930129486066315e-06, + "loss": 0.9456713199615479, + "step": 1422 + }, + { + "epoch": 0.3278801843317972, + "grad_norm": 0.5156577436912951, + "learning_rate": 1.929989509217824e-06, + "loss": 0.844656765460968, + "step": 1423 + }, + { + "epoch": 0.328110599078341, + "grad_norm": 0.5219846430026822, + "learning_rate": 1.9298493973825862e-06, + "loss": 0.7534950971603394, + "step": 1424 + }, + { + "epoch": 0.3283410138248848, + "grad_norm": 0.7328149629860281, + "learning_rate": 1.92970915058094e-06, + "loss": 0.934429407119751, + "step": 1425 + }, + { + "epoch": 0.32857142857142857, + "grad_norm": 0.6913075282966522, + "learning_rate": 1.929568768833241e-06, + "loss": 0.9491959810256958, + "step": 1426 + }, + { + "epoch": 0.32880184331797235, + "grad_norm": 0.6938433783461605, + "learning_rate": 1.9294282521598657e-06, + "loss": 0.9739001989364624, + "step": 1427 + }, + { + "epoch": 0.32903225806451614, + "grad_norm": 0.7260904191446513, + "learning_rate": 1.92928760058121e-06, + "loss": 0.8159639835357666, + "step": 1428 + }, + { + "epoch": 0.3292626728110599, + "grad_norm": 0.6287238530590293, + "learning_rate": 1.9291468141176894e-06, + "loss": 0.8752772808074951, + "step": 1429 + }, + { + "epoch": 0.3294930875576037, + "grad_norm": 0.6480201898337635, + "learning_rate": 1.929005892789739e-06, + "loss": 0.8543882369995117, + "step": 1430 + }, + { + "epoch": 0.3297235023041475, + "grad_norm": 0.7294679881265868, + "learning_rate": 1.928864836617813e-06, + "loss": 0.8837493658065796, + "step": 1431 + }, + { + "epoch": 0.32995391705069127, + "grad_norm": 0.7638461032292205, + "learning_rate": 1.9287236456223854e-06, + "loss": 0.9320387840270996, + "step": 1432 + }, + { + "epoch": 0.330184331797235, + "grad_norm": 0.5042343025936808, + "learning_rate": 1.92858231982395e-06, + "loss": 0.8272919654846191, + "step": 1433 + }, + { + "epoch": 0.3304147465437788, + "grad_norm": 0.6965906133224807, + "learning_rate": 1.9284408592430207e-06, + "loss": 0.9415527582168579, + "step": 1434 + }, + { + "epoch": 0.33064516129032256, + "grad_norm": 0.7215035047368656, + "learning_rate": 1.928299263900129e-06, + "loss": 0.91558837890625, + "step": 1435 + }, + { + "epoch": 0.33087557603686635, + "grad_norm": 0.5956823050741555, + "learning_rate": 1.9281575338158287e-06, + "loss": 0.9333036541938782, + "step": 1436 + }, + { + "epoch": 0.33110599078341013, + "grad_norm": 0.6051938214219355, + "learning_rate": 1.928015669010691e-06, + "loss": 0.7823847532272339, + "step": 1437 + }, + { + "epoch": 0.3313364055299539, + "grad_norm": 0.7462826372754077, + "learning_rate": 1.9278736695053075e-06, + "loss": 0.8436610102653503, + "step": 1438 + }, + { + "epoch": 0.3315668202764977, + "grad_norm": 0.7254037554281902, + "learning_rate": 1.927731535320289e-06, + "loss": 0.8658925890922546, + "step": 1439 + }, + { + "epoch": 0.3317972350230415, + "grad_norm": 0.6229809292573231, + "learning_rate": 1.9275892664762665e-06, + "loss": 0.8510075807571411, + "step": 1440 + }, + { + "epoch": 0.33202764976958526, + "grad_norm": 0.6349856559462502, + "learning_rate": 1.9274468629938897e-06, + "loss": 0.8002004623413086, + "step": 1441 + }, + { + "epoch": 0.33225806451612905, + "grad_norm": 0.6766111098462606, + "learning_rate": 1.9273043248938287e-06, + "loss": 1.0030219554901123, + "step": 1442 + }, + { + "epoch": 0.33248847926267283, + "grad_norm": 0.6313930076569801, + "learning_rate": 1.9271616521967723e-06, + "loss": 0.8415981531143188, + "step": 1443 + }, + { + "epoch": 0.3327188940092166, + "grad_norm": 0.5599899399531522, + "learning_rate": 1.9270188449234295e-06, + "loss": 0.7704254388809204, + "step": 1444 + }, + { + "epoch": 0.33294930875576034, + "grad_norm": 0.5742869826690059, + "learning_rate": 1.9268759030945294e-06, + "loss": 0.8350723385810852, + "step": 1445 + }, + { + "epoch": 0.3331797235023041, + "grad_norm": 0.7177949171518314, + "learning_rate": 1.926732826730818e-06, + "loss": 0.8729690313339233, + "step": 1446 + }, + { + "epoch": 0.3334101382488479, + "grad_norm": 0.64691268148931, + "learning_rate": 1.926589615853064e-06, + "loss": 0.7758746147155762, + "step": 1447 + }, + { + "epoch": 0.3336405529953917, + "grad_norm": 0.6330035443782508, + "learning_rate": 1.926446270482054e-06, + "loss": 0.7895134687423706, + "step": 1448 + }, + { + "epoch": 0.3338709677419355, + "grad_norm": 0.5710370240153678, + "learning_rate": 1.9263027906385936e-06, + "loss": 1.0239053964614868, + "step": 1449 + }, + { + "epoch": 0.33410138248847926, + "grad_norm": 0.6423159813237256, + "learning_rate": 1.9261591763435104e-06, + "loss": 0.9294595122337341, + "step": 1450 + }, + { + "epoch": 0.33433179723502304, + "grad_norm": 0.690830605411519, + "learning_rate": 1.9260154276176484e-06, + "loss": 0.9786148071289062, + "step": 1451 + }, + { + "epoch": 0.3345622119815668, + "grad_norm": 0.5115027993477321, + "learning_rate": 1.925871544481873e-06, + "loss": 0.8513587117195129, + "step": 1452 + }, + { + "epoch": 0.3347926267281106, + "grad_norm": 0.4974492616751121, + "learning_rate": 1.9257275269570686e-06, + "loss": 0.7737371921539307, + "step": 1453 + }, + { + "epoch": 0.3350230414746544, + "grad_norm": 0.6186615203368176, + "learning_rate": 1.9255833750641392e-06, + "loss": 0.8567382097244263, + "step": 1454 + }, + { + "epoch": 0.3352534562211982, + "grad_norm": 0.5498745898568592, + "learning_rate": 1.9254390888240078e-06, + "loss": 0.893741250038147, + "step": 1455 + }, + { + "epoch": 0.33548387096774196, + "grad_norm": 0.5996544133152318, + "learning_rate": 1.9252946682576184e-06, + "loss": 0.9558119773864746, + "step": 1456 + }, + { + "epoch": 0.3357142857142857, + "grad_norm": 0.6629164295929078, + "learning_rate": 1.9251501133859323e-06, + "loss": 0.7055593729019165, + "step": 1457 + }, + { + "epoch": 0.33594470046082947, + "grad_norm": 0.652213418545905, + "learning_rate": 1.9250054242299326e-06, + "loss": 0.8409907817840576, + "step": 1458 + }, + { + "epoch": 0.33617511520737325, + "grad_norm": 0.5648924790833157, + "learning_rate": 1.9248606008106196e-06, + "loss": 0.9459772109985352, + "step": 1459 + }, + { + "epoch": 0.33640552995391704, + "grad_norm": 0.6285611694534835, + "learning_rate": 1.924715643149015e-06, + "loss": 0.7848879098892212, + "step": 1460 + }, + { + "epoch": 0.3366359447004608, + "grad_norm": 0.8030718131506138, + "learning_rate": 1.924570551266159e-06, + "loss": 1.0365980863571167, + "step": 1461 + }, + { + "epoch": 0.3368663594470046, + "grad_norm": 0.6014174038703485, + "learning_rate": 1.924425325183111e-06, + "loss": 0.7331318855285645, + "step": 1462 + }, + { + "epoch": 0.3370967741935484, + "grad_norm": 0.6427865459032713, + "learning_rate": 1.9242799649209515e-06, + "loss": 0.8536237478256226, + "step": 1463 + }, + { + "epoch": 0.33732718894009217, + "grad_norm": 0.6525839289073214, + "learning_rate": 1.9241344705007784e-06, + "loss": 0.9296326637268066, + "step": 1464 + }, + { + "epoch": 0.33755760368663595, + "grad_norm": 0.887947392639257, + "learning_rate": 1.92398884194371e-06, + "loss": 0.9084932804107666, + "step": 1465 + }, + { + "epoch": 0.33778801843317974, + "grad_norm": 0.5270165853452017, + "learning_rate": 1.9238430792708847e-06, + "loss": 0.7426833510398865, + "step": 1466 + }, + { + "epoch": 0.3380184331797235, + "grad_norm": 0.5410658114261949, + "learning_rate": 1.9236971825034595e-06, + "loss": 0.7655431032180786, + "step": 1467 + }, + { + "epoch": 0.3382488479262673, + "grad_norm": 0.8331011387344854, + "learning_rate": 1.923551151662611e-06, + "loss": 0.9463646411895752, + "step": 1468 + }, + { + "epoch": 0.3384792626728111, + "grad_norm": 0.5486811314665706, + "learning_rate": 1.9234049867695355e-06, + "loss": 0.75661301612854, + "step": 1469 + }, + { + "epoch": 0.3387096774193548, + "grad_norm": 0.6386489226368193, + "learning_rate": 1.9232586878454486e-06, + "loss": 0.7411723136901855, + "step": 1470 + }, + { + "epoch": 0.3389400921658986, + "grad_norm": 0.6921074075590697, + "learning_rate": 1.9231122549115854e-06, + "loss": 0.9537360072135925, + "step": 1471 + }, + { + "epoch": 0.3391705069124424, + "grad_norm": 0.6895160542670777, + "learning_rate": 1.9229656879892004e-06, + "loss": 0.9527197480201721, + "step": 1472 + }, + { + "epoch": 0.33940092165898617, + "grad_norm": 0.7025720730409266, + "learning_rate": 1.9228189870995674e-06, + "loss": 0.9083822965621948, + "step": 1473 + }, + { + "epoch": 0.33963133640552995, + "grad_norm": 0.5301970222083436, + "learning_rate": 1.9226721522639804e-06, + "loss": 0.8546823263168335, + "step": 1474 + }, + { + "epoch": 0.33986175115207373, + "grad_norm": 0.6709689097402769, + "learning_rate": 1.922525183503752e-06, + "loss": 0.7429832816123962, + "step": 1475 + }, + { + "epoch": 0.3400921658986175, + "grad_norm": 0.62032231336291, + "learning_rate": 1.922378080840214e-06, + "loss": 0.8805499076843262, + "step": 1476 + }, + { + "epoch": 0.3403225806451613, + "grad_norm": 0.681736765273056, + "learning_rate": 1.9222308442947193e-06, + "loss": 1.0177074670791626, + "step": 1477 + }, + { + "epoch": 0.3405529953917051, + "grad_norm": 0.5202393927717802, + "learning_rate": 1.922083473888638e-06, + "loss": 0.778317391872406, + "step": 1478 + }, + { + "epoch": 0.34078341013824887, + "grad_norm": 0.5628134051805, + "learning_rate": 1.921935969643361e-06, + "loss": 0.8461896181106567, + "step": 1479 + }, + { + "epoch": 0.34101382488479265, + "grad_norm": 0.5553667327802273, + "learning_rate": 1.921788331580299e-06, + "loss": 0.8028895258903503, + "step": 1480 + }, + { + "epoch": 0.34124423963133643, + "grad_norm": 0.5368047903298083, + "learning_rate": 1.9216405597208803e-06, + "loss": 0.9071121215820312, + "step": 1481 + }, + { + "epoch": 0.34147465437788016, + "grad_norm": 0.6427007304701287, + "learning_rate": 1.921492654086555e-06, + "loss": 0.7715062499046326, + "step": 1482 + }, + { + "epoch": 0.34170506912442394, + "grad_norm": 0.5552851307839923, + "learning_rate": 1.9213446146987907e-06, + "loss": 0.8446664810180664, + "step": 1483 + }, + { + "epoch": 0.3419354838709677, + "grad_norm": 0.712846002939772, + "learning_rate": 1.9211964415790754e-06, + "loss": 0.9835283756256104, + "step": 1484 + }, + { + "epoch": 0.3421658986175115, + "grad_norm": 0.8210412746012221, + "learning_rate": 1.921048134748916e-06, + "loss": 1.0630817413330078, + "step": 1485 + }, + { + "epoch": 0.3423963133640553, + "grad_norm": 0.6748930312757173, + "learning_rate": 1.920899694229839e-06, + "loss": 0.8514837622642517, + "step": 1486 + }, + { + "epoch": 0.3426267281105991, + "grad_norm": 0.6222560657794074, + "learning_rate": 1.920751120043391e-06, + "loss": 0.7302432060241699, + "step": 1487 + }, + { + "epoch": 0.34285714285714286, + "grad_norm": 0.7079869651359869, + "learning_rate": 1.920602412211136e-06, + "loss": 0.778337836265564, + "step": 1488 + }, + { + "epoch": 0.34308755760368664, + "grad_norm": 0.6890026561089317, + "learning_rate": 1.92045357075466e-06, + "loss": 0.815348207950592, + "step": 1489 + }, + { + "epoch": 0.3433179723502304, + "grad_norm": 0.5476065495891982, + "learning_rate": 1.920304595695567e-06, + "loss": 0.7844003438949585, + "step": 1490 + }, + { + "epoch": 0.3435483870967742, + "grad_norm": 0.6758218109549144, + "learning_rate": 1.92015548705548e-06, + "loss": 0.9513435363769531, + "step": 1491 + }, + { + "epoch": 0.343778801843318, + "grad_norm": 0.6450445262879821, + "learning_rate": 1.9200062448560424e-06, + "loss": 0.7506752610206604, + "step": 1492 + }, + { + "epoch": 0.3440092165898618, + "grad_norm": 0.6233205865485715, + "learning_rate": 1.919856869118916e-06, + "loss": 0.739554762840271, + "step": 1493 + }, + { + "epoch": 0.3442396313364055, + "grad_norm": 0.7436551378630792, + "learning_rate": 1.9197073598657826e-06, + "loss": 0.8167033791542053, + "step": 1494 + }, + { + "epoch": 0.3444700460829493, + "grad_norm": 0.6904439986569212, + "learning_rate": 1.919557717118344e-06, + "loss": 0.9308677911758423, + "step": 1495 + }, + { + "epoch": 0.34470046082949307, + "grad_norm": 0.6340340245140523, + "learning_rate": 1.9194079408983197e-06, + "loss": 0.8601467609405518, + "step": 1496 + }, + { + "epoch": 0.34493087557603686, + "grad_norm": 0.5645119744435318, + "learning_rate": 1.91925803122745e-06, + "loss": 0.8062653541564941, + "step": 1497 + }, + { + "epoch": 0.34516129032258064, + "grad_norm": 0.6267130901098985, + "learning_rate": 1.9191079881274943e-06, + "loss": 0.8910555839538574, + "step": 1498 + }, + { + "epoch": 0.3453917050691244, + "grad_norm": 0.6398235864437706, + "learning_rate": 1.9189578116202307e-06, + "loss": 0.8604668378829956, + "step": 1499 + }, + { + "epoch": 0.3456221198156682, + "grad_norm": 0.660935387898433, + "learning_rate": 1.918807501727457e-06, + "loss": 0.7255126237869263, + "step": 1500 + }, + { + "epoch": 0.345852534562212, + "grad_norm": 0.6873891579533423, + "learning_rate": 1.9186570584709912e-06, + "loss": 0.998108983039856, + "step": 1501 + }, + { + "epoch": 0.34608294930875577, + "grad_norm": 0.6220147185177797, + "learning_rate": 1.918506481872669e-06, + "loss": 0.7660422325134277, + "step": 1502 + }, + { + "epoch": 0.34631336405529956, + "grad_norm": 0.6579892645247903, + "learning_rate": 1.9183557719543472e-06, + "loss": 0.868739902973175, + "step": 1503 + }, + { + "epoch": 0.34654377880184334, + "grad_norm": 0.5789973673480234, + "learning_rate": 1.918204928737901e-06, + "loss": 0.6630350351333618, + "step": 1504 + }, + { + "epoch": 0.3467741935483871, + "grad_norm": 0.5444610824332694, + "learning_rate": 1.9180539522452247e-06, + "loss": 0.8651586771011353, + "step": 1505 + }, + { + "epoch": 0.34700460829493085, + "grad_norm": 0.5927111235913876, + "learning_rate": 1.9179028424982326e-06, + "loss": 0.8584417700767517, + "step": 1506 + }, + { + "epoch": 0.34723502304147463, + "grad_norm": 0.5575547611441275, + "learning_rate": 1.917751599518858e-06, + "loss": 0.7793893814086914, + "step": 1507 + }, + { + "epoch": 0.3474654377880184, + "grad_norm": 0.768634414143097, + "learning_rate": 1.9176002233290542e-06, + "loss": 0.8499815464019775, + "step": 1508 + }, + { + "epoch": 0.3476958525345622, + "grad_norm": 0.7795460044280101, + "learning_rate": 1.917448713950792e-06, + "loss": 0.7914199829101562, + "step": 1509 + }, + { + "epoch": 0.347926267281106, + "grad_norm": 0.8510793838671106, + "learning_rate": 1.9172970714060637e-06, + "loss": 0.942331850528717, + "step": 1510 + }, + { + "epoch": 0.34815668202764977, + "grad_norm": 0.621963787262809, + "learning_rate": 1.9171452957168803e-06, + "loss": 0.7780032157897949, + "step": 1511 + }, + { + "epoch": 0.34838709677419355, + "grad_norm": 0.6399045325995384, + "learning_rate": 1.916993386905271e-06, + "loss": 0.8544708490371704, + "step": 1512 + }, + { + "epoch": 0.34861751152073733, + "grad_norm": 0.6890752127070114, + "learning_rate": 1.9168413449932855e-06, + "loss": 0.798173725605011, + "step": 1513 + }, + { + "epoch": 0.3488479262672811, + "grad_norm": 0.7396810139453504, + "learning_rate": 1.9166891700029922e-06, + "loss": 0.9426852464675903, + "step": 1514 + }, + { + "epoch": 0.3490783410138249, + "grad_norm": 0.7455227520654529, + "learning_rate": 1.91653686195648e-06, + "loss": 0.922240138053894, + "step": 1515 + }, + { + "epoch": 0.3493087557603687, + "grad_norm": 0.630161091555718, + "learning_rate": 1.9163844208758556e-06, + "loss": 0.7997978925704956, + "step": 1516 + }, + { + "epoch": 0.34953917050691247, + "grad_norm": 0.7560374253096135, + "learning_rate": 1.9162318467832455e-06, + "loss": 1.0597525835037231, + "step": 1517 + }, + { + "epoch": 0.3497695852534562, + "grad_norm": 0.6669142658812499, + "learning_rate": 1.9160791397007957e-06, + "loss": 0.8211681842803955, + "step": 1518 + }, + { + "epoch": 0.35, + "grad_norm": 0.6134468456903489, + "learning_rate": 1.9159262996506716e-06, + "loss": 0.8078022003173828, + "step": 1519 + }, + { + "epoch": 0.35023041474654376, + "grad_norm": 0.8800559709758627, + "learning_rate": 1.915773326655057e-06, + "loss": 0.9449256658554077, + "step": 1520 + }, + { + "epoch": 0.35046082949308754, + "grad_norm": 0.6806561068219223, + "learning_rate": 1.915620220736157e-06, + "loss": 0.8744012117385864, + "step": 1521 + }, + { + "epoch": 0.35069124423963133, + "grad_norm": 0.501693303726274, + "learning_rate": 1.9154669819161946e-06, + "loss": 0.9503095746040344, + "step": 1522 + }, + { + "epoch": 0.3509216589861751, + "grad_norm": 0.7422922368497302, + "learning_rate": 1.9153136102174106e-06, + "loss": 1.055432915687561, + "step": 1523 + }, + { + "epoch": 0.3511520737327189, + "grad_norm": 0.7420134076461076, + "learning_rate": 1.9151601056620684e-06, + "loss": 0.8540226221084595, + "step": 1524 + }, + { + "epoch": 0.3513824884792627, + "grad_norm": 0.6432500784024293, + "learning_rate": 1.915006468272448e-06, + "loss": 0.8846266865730286, + "step": 1525 + }, + { + "epoch": 0.35161290322580646, + "grad_norm": 0.6065038491164693, + "learning_rate": 1.9148526980708507e-06, + "loss": 0.8941656947135925, + "step": 1526 + }, + { + "epoch": 0.35184331797235024, + "grad_norm": 0.657637251938276, + "learning_rate": 1.914698795079595e-06, + "loss": 0.868419885635376, + "step": 1527 + }, + { + "epoch": 0.35207373271889403, + "grad_norm": 0.6471997072963731, + "learning_rate": 1.91454475932102e-06, + "loss": 0.7375580072402954, + "step": 1528 + }, + { + "epoch": 0.3523041474654378, + "grad_norm": 0.5813494020686044, + "learning_rate": 1.9143905908174844e-06, + "loss": 0.9415492415428162, + "step": 1529 + }, + { + "epoch": 0.35253456221198154, + "grad_norm": 0.5845641741459107, + "learning_rate": 1.9142362895913646e-06, + "loss": 0.8395911455154419, + "step": 1530 + }, + { + "epoch": 0.3527649769585253, + "grad_norm": 0.6214793611789142, + "learning_rate": 1.914081855665057e-06, + "loss": 0.831234335899353, + "step": 1531 + }, + { + "epoch": 0.3529953917050691, + "grad_norm": 0.6337865377576076, + "learning_rate": 1.9139272890609794e-06, + "loss": 0.8975566029548645, + "step": 1532 + }, + { + "epoch": 0.3532258064516129, + "grad_norm": 0.629586080319263, + "learning_rate": 1.913772589801565e-06, + "loss": 0.8134264945983887, + "step": 1533 + }, + { + "epoch": 0.3534562211981567, + "grad_norm": 0.6728325426784268, + "learning_rate": 1.913617757909269e-06, + "loss": 0.9507275819778442, + "step": 1534 + }, + { + "epoch": 0.35368663594470046, + "grad_norm": 0.6431752162471284, + "learning_rate": 1.913462793406565e-06, + "loss": 0.8839038610458374, + "step": 1535 + }, + { + "epoch": 0.35391705069124424, + "grad_norm": 0.5543997844984022, + "learning_rate": 1.9133076963159453e-06, + "loss": 0.8708392381668091, + "step": 1536 + }, + { + "epoch": 0.354147465437788, + "grad_norm": 0.6062385114401656, + "learning_rate": 1.913152466659923e-06, + "loss": 0.7609391212463379, + "step": 1537 + }, + { + "epoch": 0.3543778801843318, + "grad_norm": 0.7180303128257083, + "learning_rate": 1.912997104461029e-06, + "loss": 0.9231283664703369, + "step": 1538 + }, + { + "epoch": 0.3546082949308756, + "grad_norm": 0.6890910539107805, + "learning_rate": 1.912841609741814e-06, + "loss": 1.0297726392745972, + "step": 1539 + }, + { + "epoch": 0.3548387096774194, + "grad_norm": 0.75971130189085, + "learning_rate": 1.9126859825248475e-06, + "loss": 0.8798987865447998, + "step": 1540 + }, + { + "epoch": 0.35506912442396316, + "grad_norm": 0.7030378763019209, + "learning_rate": 1.912530222832719e-06, + "loss": 0.9104069471359253, + "step": 1541 + }, + { + "epoch": 0.35529953917050694, + "grad_norm": 0.6534729730017157, + "learning_rate": 1.9123743306880368e-06, + "loss": 0.7618073225021362, + "step": 1542 + }, + { + "epoch": 0.35552995391705067, + "grad_norm": 0.7461748863693719, + "learning_rate": 1.912218306113428e-06, + "loss": 0.8397510051727295, + "step": 1543 + }, + { + "epoch": 0.35576036866359445, + "grad_norm": 0.7060377086024656, + "learning_rate": 1.91206214913154e-06, + "loss": 0.9884299039840698, + "step": 1544 + }, + { + "epoch": 0.35599078341013823, + "grad_norm": 0.9576081524625122, + "learning_rate": 1.9119058597650385e-06, + "loss": 0.9878349304199219, + "step": 1545 + }, + { + "epoch": 0.356221198156682, + "grad_norm": 0.6493274093007226, + "learning_rate": 1.9117494380366086e-06, + "loss": 0.8790488243103027, + "step": 1546 + }, + { + "epoch": 0.3564516129032258, + "grad_norm": 0.5310131973918355, + "learning_rate": 1.9115928839689546e-06, + "loss": 0.7390745878219604, + "step": 1547 + }, + { + "epoch": 0.3566820276497696, + "grad_norm": 0.6882029258971281, + "learning_rate": 1.9114361975848004e-06, + "loss": 0.7354288101196289, + "step": 1548 + }, + { + "epoch": 0.35691244239631337, + "grad_norm": 0.7667535594605746, + "learning_rate": 1.911279378906889e-06, + "loss": 0.9234673976898193, + "step": 1549 + }, + { + "epoch": 0.35714285714285715, + "grad_norm": 0.6115013610277281, + "learning_rate": 1.911122427957982e-06, + "loss": 0.8913710117340088, + "step": 1550 + }, + { + "epoch": 0.35737327188940093, + "grad_norm": 0.7050561523779678, + "learning_rate": 1.9109653447608605e-06, + "loss": 0.754358172416687, + "step": 1551 + }, + { + "epoch": 0.3576036866359447, + "grad_norm": 0.784312775933048, + "learning_rate": 1.910808129338325e-06, + "loss": 0.7361906170845032, + "step": 1552 + }, + { + "epoch": 0.3578341013824885, + "grad_norm": 0.7799572736490341, + "learning_rate": 1.9106507817131957e-06, + "loss": 0.8167279362678528, + "step": 1553 + }, + { + "epoch": 0.3580645161290323, + "grad_norm": 0.5335250967831215, + "learning_rate": 1.910493301908311e-06, + "loss": 0.7504739761352539, + "step": 1554 + }, + { + "epoch": 0.358294930875576, + "grad_norm": 0.7032319483863736, + "learning_rate": 1.9103356899465287e-06, + "loss": 0.8452355861663818, + "step": 1555 + }, + { + "epoch": 0.3585253456221198, + "grad_norm": 0.6126249946093243, + "learning_rate": 1.9101779458507263e-06, + "loss": 0.891547679901123, + "step": 1556 + }, + { + "epoch": 0.3587557603686636, + "grad_norm": 0.6935978783962933, + "learning_rate": 1.9100200696438e-06, + "loss": 0.8132680654525757, + "step": 1557 + }, + { + "epoch": 0.35898617511520736, + "grad_norm": 0.6519674133121284, + "learning_rate": 1.9098620613486646e-06, + "loss": 0.799482524394989, + "step": 1558 + }, + { + "epoch": 0.35921658986175115, + "grad_norm": 0.5904521460015955, + "learning_rate": 1.909703920988256e-06, + "loss": 0.8490267992019653, + "step": 1559 + }, + { + "epoch": 0.35944700460829493, + "grad_norm": 0.6819976276562522, + "learning_rate": 1.9095456485855277e-06, + "loss": 0.8608428239822388, + "step": 1560 + }, + { + "epoch": 0.3596774193548387, + "grad_norm": 0.710056379748393, + "learning_rate": 1.9093872441634526e-06, + "loss": 0.8460499048233032, + "step": 1561 + }, + { + "epoch": 0.3599078341013825, + "grad_norm": 0.7727130217690178, + "learning_rate": 1.9092287077450226e-06, + "loss": 0.9268433451652527, + "step": 1562 + }, + { + "epoch": 0.3601382488479263, + "grad_norm": 0.612809776724531, + "learning_rate": 1.90907003935325e-06, + "loss": 0.7354154586791992, + "step": 1563 + }, + { + "epoch": 0.36036866359447006, + "grad_norm": 0.6941943523357101, + "learning_rate": 1.9089112390111637e-06, + "loss": 0.87982177734375, + "step": 1564 + }, + { + "epoch": 0.36059907834101385, + "grad_norm": 0.7092001355075633, + "learning_rate": 1.9087523067418148e-06, + "loss": 0.994953453540802, + "step": 1565 + }, + { + "epoch": 0.36082949308755763, + "grad_norm": 0.7240785511234525, + "learning_rate": 1.9085932425682715e-06, + "loss": 0.8623256087303162, + "step": 1566 + }, + { + "epoch": 0.36105990783410136, + "grad_norm": 0.7577571727617612, + "learning_rate": 1.908434046513622e-06, + "loss": 0.8752846717834473, + "step": 1567 + }, + { + "epoch": 0.36129032258064514, + "grad_norm": 0.7538020694732109, + "learning_rate": 1.908274718600973e-06, + "loss": 0.9002033472061157, + "step": 1568 + }, + { + "epoch": 0.3615207373271889, + "grad_norm": 0.6751938160957709, + "learning_rate": 1.908115258853451e-06, + "loss": 0.7290444374084473, + "step": 1569 + }, + { + "epoch": 0.3617511520737327, + "grad_norm": 0.5739449847646289, + "learning_rate": 1.9079556672942016e-06, + "loss": 0.6833889484405518, + "step": 1570 + }, + { + "epoch": 0.3619815668202765, + "grad_norm": 0.7271514059808825, + "learning_rate": 1.907795943946389e-06, + "loss": 1.0033842325210571, + "step": 1571 + }, + { + "epoch": 0.3622119815668203, + "grad_norm": 0.7261786878454322, + "learning_rate": 1.907636088833197e-06, + "loss": 0.9590950012207031, + "step": 1572 + }, + { + "epoch": 0.36244239631336406, + "grad_norm": 0.6796147019608265, + "learning_rate": 1.907476101977828e-06, + "loss": 0.8812122344970703, + "step": 1573 + }, + { + "epoch": 0.36267281105990784, + "grad_norm": 0.5509770826635522, + "learning_rate": 1.9073159834035045e-06, + "loss": 0.7549433708190918, + "step": 1574 + }, + { + "epoch": 0.3629032258064516, + "grad_norm": 0.8344983468044503, + "learning_rate": 1.9071557331334667e-06, + "loss": 0.9235562086105347, + "step": 1575 + }, + { + "epoch": 0.3631336405529954, + "grad_norm": 0.6317903590715543, + "learning_rate": 1.9069953511909755e-06, + "loss": 0.8468542098999023, + "step": 1576 + }, + { + "epoch": 0.3633640552995392, + "grad_norm": 0.5574642699953357, + "learning_rate": 1.9068348375993096e-06, + "loss": 0.8804000616073608, + "step": 1577 + }, + { + "epoch": 0.363594470046083, + "grad_norm": 0.5912501411899118, + "learning_rate": 1.9066741923817676e-06, + "loss": 0.762598991394043, + "step": 1578 + }, + { + "epoch": 0.3638248847926267, + "grad_norm": 0.7706966706442087, + "learning_rate": 1.9065134155616666e-06, + "loss": 0.8791940212249756, + "step": 1579 + }, + { + "epoch": 0.3640552995391705, + "grad_norm": 0.7168527524200441, + "learning_rate": 1.9063525071623439e-06, + "loss": 0.7041842937469482, + "step": 1580 + }, + { + "epoch": 0.36428571428571427, + "grad_norm": 0.6160916310238944, + "learning_rate": 1.9061914672071543e-06, + "loss": 0.9526468515396118, + "step": 1581 + }, + { + "epoch": 0.36451612903225805, + "grad_norm": 0.7118890640067297, + "learning_rate": 1.906030295719473e-06, + "loss": 0.9388316869735718, + "step": 1582 + }, + { + "epoch": 0.36474654377880183, + "grad_norm": 0.6899284739234433, + "learning_rate": 1.9058689927226936e-06, + "loss": 0.7295777797698975, + "step": 1583 + }, + { + "epoch": 0.3649769585253456, + "grad_norm": 0.773766722090894, + "learning_rate": 1.905707558240229e-06, + "loss": 0.7540932297706604, + "step": 1584 + }, + { + "epoch": 0.3652073732718894, + "grad_norm": 0.7012558071518832, + "learning_rate": 1.9055459922955118e-06, + "loss": 0.9457792639732361, + "step": 1585 + }, + { + "epoch": 0.3654377880184332, + "grad_norm": 0.8248538436303866, + "learning_rate": 1.9053842949119923e-06, + "loss": 0.9121883511543274, + "step": 1586 + }, + { + "epoch": 0.36566820276497697, + "grad_norm": 0.7283384308967912, + "learning_rate": 1.905222466113141e-06, + "loss": 0.8140746355056763, + "step": 1587 + }, + { + "epoch": 0.36589861751152075, + "grad_norm": 0.6419705545105435, + "learning_rate": 1.905060505922447e-06, + "loss": 0.7403484582901001, + "step": 1588 + }, + { + "epoch": 0.36612903225806454, + "grad_norm": 0.581047347336086, + "learning_rate": 1.9048984143634188e-06, + "loss": 0.9040734171867371, + "step": 1589 + }, + { + "epoch": 0.3663594470046083, + "grad_norm": 0.8763582049227886, + "learning_rate": 1.9047361914595834e-06, + "loss": 0.9060958623886108, + "step": 1590 + }, + { + "epoch": 0.36658986175115205, + "grad_norm": 0.563240407907546, + "learning_rate": 1.904573837234488e-06, + "loss": 0.6925936937332153, + "step": 1591 + }, + { + "epoch": 0.36682027649769583, + "grad_norm": 0.6465995527416484, + "learning_rate": 1.9044113517116973e-06, + "loss": 0.8120197057723999, + "step": 1592 + }, + { + "epoch": 0.3670506912442396, + "grad_norm": 0.6544256373051048, + "learning_rate": 1.9042487349147965e-06, + "loss": 0.796414852142334, + "step": 1593 + }, + { + "epoch": 0.3672811059907834, + "grad_norm": 0.5916998574283423, + "learning_rate": 1.9040859868673885e-06, + "loss": 0.8390822410583496, + "step": 1594 + }, + { + "epoch": 0.3675115207373272, + "grad_norm": 0.6567403008386238, + "learning_rate": 1.9039231075930967e-06, + "loss": 0.990093469619751, + "step": 1595 + }, + { + "epoch": 0.36774193548387096, + "grad_norm": 0.733917290012865, + "learning_rate": 1.9037600971155623e-06, + "loss": 0.8548597097396851, + "step": 1596 + }, + { + "epoch": 0.36797235023041475, + "grad_norm": 0.5429475903618856, + "learning_rate": 1.9035969554584464e-06, + "loss": 0.687299370765686, + "step": 1597 + }, + { + "epoch": 0.36820276497695853, + "grad_norm": 0.9276548262086025, + "learning_rate": 1.9034336826454282e-06, + "loss": 0.7857942581176758, + "step": 1598 + }, + { + "epoch": 0.3684331797235023, + "grad_norm": 0.7345227244712206, + "learning_rate": 1.9032702787002072e-06, + "loss": 0.8836538195610046, + "step": 1599 + }, + { + "epoch": 0.3686635944700461, + "grad_norm": 0.723858907192251, + "learning_rate": 1.9031067436465011e-06, + "loss": 0.8132715225219727, + "step": 1600 + }, + { + "epoch": 0.3688940092165899, + "grad_norm": 0.6649285274594987, + "learning_rate": 1.9029430775080467e-06, + "loss": 0.7632347345352173, + "step": 1601 + }, + { + "epoch": 0.36912442396313366, + "grad_norm": 0.6319858893374919, + "learning_rate": 1.9027792803086e-06, + "loss": 0.8616297841072083, + "step": 1602 + }, + { + "epoch": 0.36935483870967745, + "grad_norm": 0.6067565637769744, + "learning_rate": 1.9026153520719358e-06, + "loss": 0.8418172597885132, + "step": 1603 + }, + { + "epoch": 0.3695852534562212, + "grad_norm": 0.7094320350542224, + "learning_rate": 1.902451292821848e-06, + "loss": 0.7253717184066772, + "step": 1604 + }, + { + "epoch": 0.36981566820276496, + "grad_norm": 0.8059000016280097, + "learning_rate": 1.90228710258215e-06, + "loss": 0.9746035933494568, + "step": 1605 + }, + { + "epoch": 0.37004608294930874, + "grad_norm": 0.5259402340057983, + "learning_rate": 1.9021227813766733e-06, + "loss": 0.7722853422164917, + "step": 1606 + }, + { + "epoch": 0.3702764976958525, + "grad_norm": 0.6925264238716391, + "learning_rate": 1.9019583292292693e-06, + "loss": 0.8278614282608032, + "step": 1607 + }, + { + "epoch": 0.3705069124423963, + "grad_norm": 0.6439238935194896, + "learning_rate": 1.9017937461638078e-06, + "loss": 0.7433085441589355, + "step": 1608 + }, + { + "epoch": 0.3707373271889401, + "grad_norm": 0.5505689424398915, + "learning_rate": 1.901629032204178e-06, + "loss": 0.9194153547286987, + "step": 1609 + }, + { + "epoch": 0.3709677419354839, + "grad_norm": 0.5866951472740422, + "learning_rate": 1.9014641873742877e-06, + "loss": 0.8502616882324219, + "step": 1610 + }, + { + "epoch": 0.37119815668202766, + "grad_norm": 0.6242266615517361, + "learning_rate": 1.9012992116980637e-06, + "loss": 0.8494570255279541, + "step": 1611 + }, + { + "epoch": 0.37142857142857144, + "grad_norm": 0.7369836132356214, + "learning_rate": 1.9011341051994526e-06, + "loss": 0.8567800521850586, + "step": 1612 + }, + { + "epoch": 0.3716589861751152, + "grad_norm": 0.6246604791910833, + "learning_rate": 1.9009688679024189e-06, + "loss": 0.7739682197570801, + "step": 1613 + }, + { + "epoch": 0.371889400921659, + "grad_norm": 0.754158311495332, + "learning_rate": 1.900803499830947e-06, + "loss": 0.8548814058303833, + "step": 1614 + }, + { + "epoch": 0.3721198156682028, + "grad_norm": 0.5813822362984273, + "learning_rate": 1.9006380010090395e-06, + "loss": 0.7444359064102173, + "step": 1615 + }, + { + "epoch": 0.3723502304147465, + "grad_norm": 1.02732235167255, + "learning_rate": 1.9004723714607183e-06, + "loss": 1.0483827590942383, + "step": 1616 + }, + { + "epoch": 0.3725806451612903, + "grad_norm": 0.7020606936102383, + "learning_rate": 1.9003066112100248e-06, + "loss": 0.7734435200691223, + "step": 1617 + }, + { + "epoch": 0.3728110599078341, + "grad_norm": 0.7388837596699729, + "learning_rate": 1.9001407202810181e-06, + "loss": 0.856806755065918, + "step": 1618 + }, + { + "epoch": 0.37304147465437787, + "grad_norm": 0.6630252498689021, + "learning_rate": 1.8999746986977776e-06, + "loss": 0.8708832263946533, + "step": 1619 + }, + { + "epoch": 0.37327188940092165, + "grad_norm": 0.7833548721469644, + "learning_rate": 1.899808546484401e-06, + "loss": 0.9295653104782104, + "step": 1620 + }, + { + "epoch": 0.37350230414746544, + "grad_norm": 0.8120612065986471, + "learning_rate": 1.8996422636650054e-06, + "loss": 0.8799598217010498, + "step": 1621 + }, + { + "epoch": 0.3737327188940092, + "grad_norm": 0.6113644757026901, + "learning_rate": 1.8994758502637259e-06, + "loss": 0.8014140725135803, + "step": 1622 + }, + { + "epoch": 0.373963133640553, + "grad_norm": 0.7305462035644114, + "learning_rate": 1.8993093063047174e-06, + "loss": 0.8252615928649902, + "step": 1623 + }, + { + "epoch": 0.3741935483870968, + "grad_norm": 0.5571708900709818, + "learning_rate": 1.899142631812154e-06, + "loss": 0.8617361783981323, + "step": 1624 + }, + { + "epoch": 0.37442396313364057, + "grad_norm": 0.7088005059034134, + "learning_rate": 1.8989758268102274e-06, + "loss": 0.9316745400428772, + "step": 1625 + }, + { + "epoch": 0.37465437788018435, + "grad_norm": 0.5449801119846465, + "learning_rate": 1.89880889132315e-06, + "loss": 0.8195457458496094, + "step": 1626 + }, + { + "epoch": 0.37488479262672814, + "grad_norm": 0.7143201633211917, + "learning_rate": 1.8986418253751516e-06, + "loss": 0.7828787565231323, + "step": 1627 + }, + { + "epoch": 0.37511520737327186, + "grad_norm": 0.6506165386805676, + "learning_rate": 1.898474628990482e-06, + "loss": 0.8130955696105957, + "step": 1628 + }, + { + "epoch": 0.37534562211981565, + "grad_norm": 0.7388682274593752, + "learning_rate": 1.8983073021934097e-06, + "loss": 0.9925695657730103, + "step": 1629 + }, + { + "epoch": 0.37557603686635943, + "grad_norm": 0.7851734301973293, + "learning_rate": 1.8981398450082216e-06, + "loss": 0.8547999858856201, + "step": 1630 + }, + { + "epoch": 0.3758064516129032, + "grad_norm": 0.7016894400602667, + "learning_rate": 1.897972257459224e-06, + "loss": 0.8922954797744751, + "step": 1631 + }, + { + "epoch": 0.376036866359447, + "grad_norm": 0.641235710173759, + "learning_rate": 1.8978045395707415e-06, + "loss": 0.8553646802902222, + "step": 1632 + }, + { + "epoch": 0.3762672811059908, + "grad_norm": 0.6780369843564141, + "learning_rate": 1.897636691367119e-06, + "loss": 0.7854139804840088, + "step": 1633 + }, + { + "epoch": 0.37649769585253456, + "grad_norm": 0.8291834208164379, + "learning_rate": 1.897468712872719e-06, + "loss": 0.8968626260757446, + "step": 1634 + }, + { + "epoch": 0.37672811059907835, + "grad_norm": 0.8135056284613995, + "learning_rate": 1.8973006041119234e-06, + "loss": 0.8898152112960815, + "step": 1635 + }, + { + "epoch": 0.37695852534562213, + "grad_norm": 0.7215595529410248, + "learning_rate": 1.8971323651091332e-06, + "loss": 0.8499374389648438, + "step": 1636 + }, + { + "epoch": 0.3771889400921659, + "grad_norm": 0.5955881573233954, + "learning_rate": 1.8969639958887677e-06, + "loss": 0.7803430557250977, + "step": 1637 + }, + { + "epoch": 0.3774193548387097, + "grad_norm": 0.672225539346555, + "learning_rate": 1.8967954964752657e-06, + "loss": 0.7669799327850342, + "step": 1638 + }, + { + "epoch": 0.3776497695852535, + "grad_norm": 0.7164416850564317, + "learning_rate": 1.8966268668930845e-06, + "loss": 0.9085204601287842, + "step": 1639 + }, + { + "epoch": 0.3778801843317972, + "grad_norm": 0.8492247946008473, + "learning_rate": 1.8964581071667005e-06, + "loss": 0.7793002724647522, + "step": 1640 + }, + { + "epoch": 0.378110599078341, + "grad_norm": 0.6359200183287212, + "learning_rate": 1.896289217320609e-06, + "loss": 0.8649430274963379, + "step": 1641 + }, + { + "epoch": 0.3783410138248848, + "grad_norm": 0.6424804906800053, + "learning_rate": 1.8961201973793243e-06, + "loss": 0.856898844242096, + "step": 1642 + }, + { + "epoch": 0.37857142857142856, + "grad_norm": 0.7702312360726356, + "learning_rate": 1.895951047367379e-06, + "loss": 0.8221957087516785, + "step": 1643 + }, + { + "epoch": 0.37880184331797234, + "grad_norm": 0.7163935487823062, + "learning_rate": 1.8957817673093256e-06, + "loss": 0.8158079385757446, + "step": 1644 + }, + { + "epoch": 0.3790322580645161, + "grad_norm": 0.8008902981825888, + "learning_rate": 1.8956123572297343e-06, + "loss": 0.7803312540054321, + "step": 1645 + }, + { + "epoch": 0.3792626728110599, + "grad_norm": 0.7902834195938876, + "learning_rate": 1.8954428171531949e-06, + "loss": 1.035685420036316, + "step": 1646 + }, + { + "epoch": 0.3794930875576037, + "grad_norm": 0.6044824314396153, + "learning_rate": 1.8952731471043161e-06, + "loss": 0.6871123313903809, + "step": 1647 + }, + { + "epoch": 0.3797235023041475, + "grad_norm": 0.6400629937897654, + "learning_rate": 1.8951033471077253e-06, + "loss": 0.9651780128479004, + "step": 1648 + }, + { + "epoch": 0.37995391705069126, + "grad_norm": 0.7485926311468839, + "learning_rate": 1.8949334171880687e-06, + "loss": 1.018349528312683, + "step": 1649 + }, + { + "epoch": 0.38018433179723504, + "grad_norm": 0.6571349103626993, + "learning_rate": 1.894763357370011e-06, + "loss": 0.6839278936386108, + "step": 1650 + }, + { + "epoch": 0.3804147465437788, + "grad_norm": 0.6757724586058976, + "learning_rate": 1.894593167678237e-06, + "loss": 0.8442174196243286, + "step": 1651 + }, + { + "epoch": 0.38064516129032255, + "grad_norm": 0.6368918088972565, + "learning_rate": 1.8944228481374484e-06, + "loss": 0.8224585056304932, + "step": 1652 + }, + { + "epoch": 0.38087557603686634, + "grad_norm": 0.6970802562618803, + "learning_rate": 1.8942523987723678e-06, + "loss": 0.8570500612258911, + "step": 1653 + }, + { + "epoch": 0.3811059907834101, + "grad_norm": 0.731718201815575, + "learning_rate": 1.8940818196077354e-06, + "loss": 0.7696554660797119, + "step": 1654 + }, + { + "epoch": 0.3813364055299539, + "grad_norm": 0.7456139352122005, + "learning_rate": 1.8939111106683103e-06, + "loss": 0.822563886642456, + "step": 1655 + }, + { + "epoch": 0.3815668202764977, + "grad_norm": 0.46565320695076334, + "learning_rate": 1.8937402719788711e-06, + "loss": 0.6537219882011414, + "step": 1656 + }, + { + "epoch": 0.38179723502304147, + "grad_norm": 0.8414098679023442, + "learning_rate": 1.8935693035642145e-06, + "loss": 0.9081932306289673, + "step": 1657 + }, + { + "epoch": 0.38202764976958525, + "grad_norm": 0.5018818977531995, + "learning_rate": 1.8933982054491563e-06, + "loss": 0.6839661598205566, + "step": 1658 + }, + { + "epoch": 0.38225806451612904, + "grad_norm": 0.6964355972832653, + "learning_rate": 1.8932269776585313e-06, + "loss": 0.9187283515930176, + "step": 1659 + }, + { + "epoch": 0.3824884792626728, + "grad_norm": 0.8100260748701062, + "learning_rate": 1.893055620217193e-06, + "loss": 0.9567047357559204, + "step": 1660 + }, + { + "epoch": 0.3827188940092166, + "grad_norm": 0.7345697660292878, + "learning_rate": 1.8928841331500136e-06, + "loss": 0.785561203956604, + "step": 1661 + }, + { + "epoch": 0.3829493087557604, + "grad_norm": 0.882033286363023, + "learning_rate": 1.8927125164818842e-06, + "loss": 0.8986088037490845, + "step": 1662 + }, + { + "epoch": 0.38317972350230417, + "grad_norm": 0.7191553093714457, + "learning_rate": 1.892540770237715e-06, + "loss": 1.0027087926864624, + "step": 1663 + }, + { + "epoch": 0.38341013824884795, + "grad_norm": 0.6970721775230337, + "learning_rate": 1.8923688944424346e-06, + "loss": 0.8502041697502136, + "step": 1664 + }, + { + "epoch": 0.3836405529953917, + "grad_norm": 0.6684142159321271, + "learning_rate": 1.8921968891209907e-06, + "loss": 0.8526991605758667, + "step": 1665 + }, + { + "epoch": 0.38387096774193546, + "grad_norm": 0.7082372977886758, + "learning_rate": 1.8920247542983492e-06, + "loss": 0.8084676265716553, + "step": 1666 + }, + { + "epoch": 0.38410138248847925, + "grad_norm": 0.6206558140284871, + "learning_rate": 1.8918524899994957e-06, + "loss": 0.8922938704490662, + "step": 1667 + }, + { + "epoch": 0.38433179723502303, + "grad_norm": 0.768771022868596, + "learning_rate": 1.8916800962494337e-06, + "loss": 0.7965600490570068, + "step": 1668 + }, + { + "epoch": 0.3845622119815668, + "grad_norm": 0.6752105100256773, + "learning_rate": 1.8915075730731865e-06, + "loss": 0.9505549073219299, + "step": 1669 + }, + { + "epoch": 0.3847926267281106, + "grad_norm": 0.6897214722687708, + "learning_rate": 1.8913349204957947e-06, + "loss": 0.9459924697875977, + "step": 1670 + }, + { + "epoch": 0.3850230414746544, + "grad_norm": 0.6215985429421047, + "learning_rate": 1.8911621385423195e-06, + "loss": 0.8433674573898315, + "step": 1671 + }, + { + "epoch": 0.38525345622119817, + "grad_norm": 0.7790027974124772, + "learning_rate": 1.8909892272378398e-06, + "loss": 0.8945955038070679, + "step": 1672 + }, + { + "epoch": 0.38548387096774195, + "grad_norm": 0.6828005324330048, + "learning_rate": 1.890816186607453e-06, + "loss": 0.8580358624458313, + "step": 1673 + }, + { + "epoch": 0.38571428571428573, + "grad_norm": 0.6249387555876122, + "learning_rate": 1.8906430166762761e-06, + "loss": 0.7708698511123657, + "step": 1674 + }, + { + "epoch": 0.3859447004608295, + "grad_norm": 0.7418139824839276, + "learning_rate": 1.8904697174694446e-06, + "loss": 0.8647153377532959, + "step": 1675 + }, + { + "epoch": 0.3861751152073733, + "grad_norm": 0.7428074816121766, + "learning_rate": 1.890296289012112e-06, + "loss": 0.9380506277084351, + "step": 1676 + }, + { + "epoch": 0.386405529953917, + "grad_norm": 0.6218965089791644, + "learning_rate": 1.8901227313294519e-06, + "loss": 0.8814103603363037, + "step": 1677 + }, + { + "epoch": 0.3866359447004608, + "grad_norm": 0.7768206335574417, + "learning_rate": 1.8899490444466556e-06, + "loss": 0.9348419904708862, + "step": 1678 + }, + { + "epoch": 0.3868663594470046, + "grad_norm": 0.5956095891599564, + "learning_rate": 1.8897752283889338e-06, + "loss": 0.7502046823501587, + "step": 1679 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 0.567040551050712, + "learning_rate": 1.8896012831815155e-06, + "loss": 0.8499769568443298, + "step": 1680 + }, + { + "epoch": 0.38732718894009216, + "grad_norm": 0.6506272613615357, + "learning_rate": 1.8894272088496487e-06, + "loss": 0.8253993391990662, + "step": 1681 + }, + { + "epoch": 0.38755760368663594, + "grad_norm": 0.7707626449058277, + "learning_rate": 1.8892530054185998e-06, + "loss": 0.8494073152542114, + "step": 1682 + }, + { + "epoch": 0.3877880184331797, + "grad_norm": 0.7608738547672518, + "learning_rate": 1.8890786729136546e-06, + "loss": 0.8836106061935425, + "step": 1683 + }, + { + "epoch": 0.3880184331797235, + "grad_norm": 0.636256009552465, + "learning_rate": 1.8889042113601166e-06, + "loss": 0.8949145078659058, + "step": 1684 + }, + { + "epoch": 0.3882488479262673, + "grad_norm": 0.5966436023392323, + "learning_rate": 1.8887296207833095e-06, + "loss": 0.6210965514183044, + "step": 1685 + }, + { + "epoch": 0.3884792626728111, + "grad_norm": 0.8527942588919344, + "learning_rate": 1.8885549012085744e-06, + "loss": 0.9216527938842773, + "step": 1686 + }, + { + "epoch": 0.38870967741935486, + "grad_norm": 0.6878600463475216, + "learning_rate": 1.8883800526612715e-06, + "loss": 0.9266358613967896, + "step": 1687 + }, + { + "epoch": 0.38894009216589864, + "grad_norm": 0.7261249184769291, + "learning_rate": 1.88820507516678e-06, + "loss": 0.8550606966018677, + "step": 1688 + }, + { + "epoch": 0.38917050691244237, + "grad_norm": 0.702582367534852, + "learning_rate": 1.888029968750498e-06, + "loss": 0.8632181882858276, + "step": 1689 + }, + { + "epoch": 0.38940092165898615, + "grad_norm": 0.8055419508573982, + "learning_rate": 1.8878547334378415e-06, + "loss": 0.8795493841171265, + "step": 1690 + }, + { + "epoch": 0.38963133640552994, + "grad_norm": 0.8491490559655837, + "learning_rate": 1.8876793692542456e-06, + "loss": 0.9750456809997559, + "step": 1691 + }, + { + "epoch": 0.3898617511520737, + "grad_norm": 0.7818793926101317, + "learning_rate": 1.8875038762251645e-06, + "loss": 0.9270161390304565, + "step": 1692 + }, + { + "epoch": 0.3900921658986175, + "grad_norm": 0.7260894881906815, + "learning_rate": 1.8873282543760705e-06, + "loss": 0.8154089450836182, + "step": 1693 + }, + { + "epoch": 0.3903225806451613, + "grad_norm": 0.692223503364103, + "learning_rate": 1.887152503732455e-06, + "loss": 0.9245043992996216, + "step": 1694 + }, + { + "epoch": 0.39055299539170507, + "grad_norm": 0.7622355519095229, + "learning_rate": 1.8869766243198284e-06, + "loss": 0.9218056201934814, + "step": 1695 + }, + { + "epoch": 0.39078341013824885, + "grad_norm": 0.5749624768358436, + "learning_rate": 1.8868006161637192e-06, + "loss": 0.7753894329071045, + "step": 1696 + }, + { + "epoch": 0.39101382488479264, + "grad_norm": 0.7181901167791495, + "learning_rate": 1.8866244792896739e-06, + "loss": 0.8455277681350708, + "step": 1697 + }, + { + "epoch": 0.3912442396313364, + "grad_norm": 0.7361657621974459, + "learning_rate": 1.8864482137232596e-06, + "loss": 0.8301571607589722, + "step": 1698 + }, + { + "epoch": 0.3914746543778802, + "grad_norm": 0.5504243602930398, + "learning_rate": 1.8862718194900602e-06, + "loss": 0.9768285155296326, + "step": 1699 + }, + { + "epoch": 0.391705069124424, + "grad_norm": 0.7416616964447972, + "learning_rate": 1.8860952966156798e-06, + "loss": 0.9659395217895508, + "step": 1700 + }, + { + "epoch": 0.3919354838709677, + "grad_norm": 0.731283063502841, + "learning_rate": 1.8859186451257401e-06, + "loss": 0.9975444078445435, + "step": 1701 + }, + { + "epoch": 0.3921658986175115, + "grad_norm": 0.712824030540976, + "learning_rate": 1.8857418650458816e-06, + "loss": 0.9248796701431274, + "step": 1702 + }, + { + "epoch": 0.3923963133640553, + "grad_norm": 0.6864309886370629, + "learning_rate": 1.8855649564017642e-06, + "loss": 0.8792428970336914, + "step": 1703 + }, + { + "epoch": 0.39262672811059907, + "grad_norm": 0.7264626081176593, + "learning_rate": 1.8853879192190657e-06, + "loss": 0.8387417197227478, + "step": 1704 + }, + { + "epoch": 0.39285714285714285, + "grad_norm": 0.707677593822268, + "learning_rate": 1.8852107535234828e-06, + "loss": 0.7020218372344971, + "step": 1705 + }, + { + "epoch": 0.39308755760368663, + "grad_norm": 0.673092322659609, + "learning_rate": 1.885033459340731e-06, + "loss": 0.7388321161270142, + "step": 1706 + }, + { + "epoch": 0.3933179723502304, + "grad_norm": 0.7503922468030345, + "learning_rate": 1.8848560366965441e-06, + "loss": 0.7536240220069885, + "step": 1707 + }, + { + "epoch": 0.3935483870967742, + "grad_norm": 0.7237343332600692, + "learning_rate": 1.8846784856166746e-06, + "loss": 0.747667670249939, + "step": 1708 + }, + { + "epoch": 0.393778801843318, + "grad_norm": 0.7263541821971573, + "learning_rate": 1.8845008061268945e-06, + "loss": 0.8068975210189819, + "step": 1709 + }, + { + "epoch": 0.39400921658986177, + "grad_norm": 0.7581453840562968, + "learning_rate": 1.8843229982529932e-06, + "loss": 0.7613410949707031, + "step": 1710 + }, + { + "epoch": 0.39423963133640555, + "grad_norm": 0.6546080156681554, + "learning_rate": 1.8841450620207793e-06, + "loss": 0.8579158782958984, + "step": 1711 + }, + { + "epoch": 0.39447004608294933, + "grad_norm": 0.6400652758844664, + "learning_rate": 1.88396699745608e-06, + "loss": 0.8754673004150391, + "step": 1712 + }, + { + "epoch": 0.39470046082949306, + "grad_norm": 0.7227539443635326, + "learning_rate": 1.8837888045847415e-06, + "loss": 0.7988177537918091, + "step": 1713 + }, + { + "epoch": 0.39493087557603684, + "grad_norm": 0.7533730909693769, + "learning_rate": 1.8836104834326279e-06, + "loss": 0.8658367395401001, + "step": 1714 + }, + { + "epoch": 0.3951612903225806, + "grad_norm": 0.7819630929666835, + "learning_rate": 1.8834320340256223e-06, + "loss": 0.8777489066123962, + "step": 1715 + }, + { + "epoch": 0.3953917050691244, + "grad_norm": 0.6763778401068745, + "learning_rate": 1.8832534563896264e-06, + "loss": 0.9785901308059692, + "step": 1716 + }, + { + "epoch": 0.3956221198156682, + "grad_norm": 0.7796554840537433, + "learning_rate": 1.883074750550561e-06, + "loss": 0.847503125667572, + "step": 1717 + }, + { + "epoch": 0.395852534562212, + "grad_norm": 0.7786503806499795, + "learning_rate": 1.8828959165343643e-06, + "loss": 1.0159538984298706, + "step": 1718 + }, + { + "epoch": 0.39608294930875576, + "grad_norm": 0.8472423063084373, + "learning_rate": 1.882716954366994e-06, + "loss": 0.9064888954162598, + "step": 1719 + }, + { + "epoch": 0.39631336405529954, + "grad_norm": 0.7664117713246195, + "learning_rate": 1.8825378640744264e-06, + "loss": 0.956849217414856, + "step": 1720 + }, + { + "epoch": 0.3965437788018433, + "grad_norm": 0.758389558529891, + "learning_rate": 1.882358645682656e-06, + "loss": 0.8983441591262817, + "step": 1721 + }, + { + "epoch": 0.3967741935483871, + "grad_norm": 0.5702990900386659, + "learning_rate": 1.8821792992176967e-06, + "loss": 0.7698956727981567, + "step": 1722 + }, + { + "epoch": 0.3970046082949309, + "grad_norm": 0.8118873070872795, + "learning_rate": 1.8819998247055797e-06, + "loss": 0.9376351833343506, + "step": 1723 + }, + { + "epoch": 0.3972350230414747, + "grad_norm": 0.8486728692509508, + "learning_rate": 1.881820222172356e-06, + "loss": 0.8776079416275024, + "step": 1724 + }, + { + "epoch": 0.39746543778801846, + "grad_norm": 0.9552617438975642, + "learning_rate": 1.8816404916440942e-06, + "loss": 0.9776726961135864, + "step": 1725 + }, + { + "epoch": 0.3976958525345622, + "grad_norm": 0.5841959382882552, + "learning_rate": 1.8814606331468822e-06, + "loss": 0.7699686288833618, + "step": 1726 + }, + { + "epoch": 0.39792626728110597, + "grad_norm": 0.7581748259398383, + "learning_rate": 1.8812806467068265e-06, + "loss": 0.8256866931915283, + "step": 1727 + }, + { + "epoch": 0.39815668202764976, + "grad_norm": 0.6320724280659841, + "learning_rate": 1.881100532350051e-06, + "loss": 0.8493847846984863, + "step": 1728 + }, + { + "epoch": 0.39838709677419354, + "grad_norm": 0.6592895509903398, + "learning_rate": 1.8809202901027002e-06, + "loss": 0.8138688802719116, + "step": 1729 + }, + { + "epoch": 0.3986175115207373, + "grad_norm": 0.7569638843586648, + "learning_rate": 1.880739919990935e-06, + "loss": 0.8637882471084595, + "step": 1730 + }, + { + "epoch": 0.3988479262672811, + "grad_norm": 0.5847233582227849, + "learning_rate": 1.880559422040937e-06, + "loss": 0.8988152742385864, + "step": 1731 + }, + { + "epoch": 0.3990783410138249, + "grad_norm": 0.4724369020135308, + "learning_rate": 1.880378796278904e-06, + "loss": 0.8247279524803162, + "step": 1732 + }, + { + "epoch": 0.39930875576036867, + "grad_norm": 0.8071560192562027, + "learning_rate": 1.8801980427310546e-06, + "loss": 0.9699070453643799, + "step": 1733 + }, + { + "epoch": 0.39953917050691246, + "grad_norm": 0.8108307817175047, + "learning_rate": 1.8800171614236241e-06, + "loss": 0.9516465663909912, + "step": 1734 + }, + { + "epoch": 0.39976958525345624, + "grad_norm": 0.655632769560408, + "learning_rate": 1.879836152382868e-06, + "loss": 0.9553602933883667, + "step": 1735 + }, + { + "epoch": 0.4, + "grad_norm": 0.666214042250043, + "learning_rate": 1.879655015635059e-06, + "loss": 0.7805094718933105, + "step": 1736 + }, + { + "epoch": 0.4002304147465438, + "grad_norm": 0.730264537734651, + "learning_rate": 1.8794737512064888e-06, + "loss": 0.9509962797164917, + "step": 1737 + }, + { + "epoch": 0.40046082949308753, + "grad_norm": 0.6755335543884481, + "learning_rate": 1.8792923591234683e-06, + "loss": 0.8663454055786133, + "step": 1738 + }, + { + "epoch": 0.4006912442396313, + "grad_norm": 0.7325230471707477, + "learning_rate": 1.8791108394123257e-06, + "loss": 0.8773336410522461, + "step": 1739 + }, + { + "epoch": 0.4009216589861751, + "grad_norm": 0.6493515009165077, + "learning_rate": 1.8789291920994086e-06, + "loss": 0.7201284766197205, + "step": 1740 + }, + { + "epoch": 0.4011520737327189, + "grad_norm": 0.6665806307840867, + "learning_rate": 1.8787474172110826e-06, + "loss": 0.799161434173584, + "step": 1741 + }, + { + "epoch": 0.40138248847926267, + "grad_norm": 0.8651407328311, + "learning_rate": 1.8785655147737326e-06, + "loss": 0.8987375497817993, + "step": 1742 + }, + { + "epoch": 0.40161290322580645, + "grad_norm": 0.8706739093465035, + "learning_rate": 1.878383484813761e-06, + "loss": 0.8553296327590942, + "step": 1743 + }, + { + "epoch": 0.40184331797235023, + "grad_norm": 0.6706596266673751, + "learning_rate": 1.8782013273575895e-06, + "loss": 0.8376551270484924, + "step": 1744 + }, + { + "epoch": 0.402073732718894, + "grad_norm": 0.7963067027250083, + "learning_rate": 1.8780190424316578e-06, + "loss": 0.8220775723457336, + "step": 1745 + }, + { + "epoch": 0.4023041474654378, + "grad_norm": 0.7339356821882034, + "learning_rate": 1.8778366300624244e-06, + "loss": 0.8614820241928101, + "step": 1746 + }, + { + "epoch": 0.4025345622119816, + "grad_norm": 0.8065421465945496, + "learning_rate": 1.8776540902763665e-06, + "loss": 0.9434851408004761, + "step": 1747 + }, + { + "epoch": 0.40276497695852537, + "grad_norm": 0.8102544073977809, + "learning_rate": 1.877471423099979e-06, + "loss": 0.8150373101234436, + "step": 1748 + }, + { + "epoch": 0.40299539170506915, + "grad_norm": 0.5910178895755134, + "learning_rate": 1.8772886285597762e-06, + "loss": 0.7660368084907532, + "step": 1749 + }, + { + "epoch": 0.4032258064516129, + "grad_norm": 0.7262631962712356, + "learning_rate": 1.8771057066822903e-06, + "loss": 0.7647032141685486, + "step": 1750 + }, + { + "epoch": 0.40345622119815666, + "grad_norm": 0.6238918567790319, + "learning_rate": 1.8769226574940723e-06, + "loss": 0.6034061908721924, + "step": 1751 + }, + { + "epoch": 0.40368663594470044, + "grad_norm": 0.7344154412243011, + "learning_rate": 1.8767394810216914e-06, + "loss": 1.0062675476074219, + "step": 1752 + }, + { + "epoch": 0.40391705069124423, + "grad_norm": 0.6966552417777933, + "learning_rate": 1.8765561772917354e-06, + "loss": 0.9791489839553833, + "step": 1753 + }, + { + "epoch": 0.404147465437788, + "grad_norm": 0.5825611392130148, + "learning_rate": 1.8763727463308108e-06, + "loss": 0.9054251909255981, + "step": 1754 + }, + { + "epoch": 0.4043778801843318, + "grad_norm": 0.7455727854900284, + "learning_rate": 1.8761891881655423e-06, + "loss": 0.9156093597412109, + "step": 1755 + }, + { + "epoch": 0.4046082949308756, + "grad_norm": 0.6983601123297067, + "learning_rate": 1.876005502822573e-06, + "loss": 0.7525647878646851, + "step": 1756 + }, + { + "epoch": 0.40483870967741936, + "grad_norm": 0.6156689393045622, + "learning_rate": 1.8758216903285643e-06, + "loss": 0.8321493864059448, + "step": 1757 + }, + { + "epoch": 0.40506912442396314, + "grad_norm": 0.888147060404811, + "learning_rate": 1.8756377507101973e-06, + "loss": 0.9937042593955994, + "step": 1758 + }, + { + "epoch": 0.40529953917050693, + "grad_norm": 0.553604524827559, + "learning_rate": 1.8754536839941694e-06, + "loss": 0.7001460790634155, + "step": 1759 + }, + { + "epoch": 0.4055299539170507, + "grad_norm": 0.7747422377442987, + "learning_rate": 1.8752694902071986e-06, + "loss": 1.0062569379806519, + "step": 1760 + }, + { + "epoch": 0.4057603686635945, + "grad_norm": 0.7145787925683823, + "learning_rate": 1.8750851693760199e-06, + "loss": 0.7414188385009766, + "step": 1761 + }, + { + "epoch": 0.4059907834101382, + "grad_norm": 0.6306403135362045, + "learning_rate": 1.8749007215273873e-06, + "loss": 0.7181771397590637, + "step": 1762 + }, + { + "epoch": 0.406221198156682, + "grad_norm": 0.7763317855361268, + "learning_rate": 1.8747161466880732e-06, + "loss": 0.8797845244407654, + "step": 1763 + }, + { + "epoch": 0.4064516129032258, + "grad_norm": 0.6123636271862207, + "learning_rate": 1.8745314448848684e-06, + "loss": 0.7774960398674011, + "step": 1764 + }, + { + "epoch": 0.4066820276497696, + "grad_norm": 0.9110978120854332, + "learning_rate": 1.874346616144582e-06, + "loss": 0.8499422073364258, + "step": 1765 + }, + { + "epoch": 0.40691244239631336, + "grad_norm": 0.6306854745937814, + "learning_rate": 1.874161660494042e-06, + "loss": 0.7070250511169434, + "step": 1766 + }, + { + "epoch": 0.40714285714285714, + "grad_norm": 0.6762437905211294, + "learning_rate": 1.8739765779600939e-06, + "loss": 0.8009281158447266, + "step": 1767 + }, + { + "epoch": 0.4073732718894009, + "grad_norm": 0.6084135312041689, + "learning_rate": 1.8737913685696027e-06, + "loss": 0.6866155862808228, + "step": 1768 + }, + { + "epoch": 0.4076036866359447, + "grad_norm": 0.7813040754942882, + "learning_rate": 1.873606032349451e-06, + "loss": 0.8200059533119202, + "step": 1769 + }, + { + "epoch": 0.4078341013824885, + "grad_norm": 0.629385301974861, + "learning_rate": 1.8734205693265404e-06, + "loss": 0.8413814902305603, + "step": 1770 + }, + { + "epoch": 0.4080645161290323, + "grad_norm": 0.776612651465312, + "learning_rate": 1.8732349795277903e-06, + "loss": 0.9935271143913269, + "step": 1771 + }, + { + "epoch": 0.40829493087557606, + "grad_norm": 0.6589503544607032, + "learning_rate": 1.873049262980139e-06, + "loss": 0.8718058466911316, + "step": 1772 + }, + { + "epoch": 0.40852534562211984, + "grad_norm": 0.8620050398467397, + "learning_rate": 1.8728634197105428e-06, + "loss": 0.9009358882904053, + "step": 1773 + }, + { + "epoch": 0.40875576036866357, + "grad_norm": 0.7755306532739165, + "learning_rate": 1.8726774497459768e-06, + "loss": 0.9128156900405884, + "step": 1774 + }, + { + "epoch": 0.40898617511520735, + "grad_norm": 0.6450271750629438, + "learning_rate": 1.8724913531134342e-06, + "loss": 0.8524078130722046, + "step": 1775 + }, + { + "epoch": 0.40921658986175113, + "grad_norm": 0.7569328214438452, + "learning_rate": 1.872305129839927e-06, + "loss": 0.9431420564651489, + "step": 1776 + }, + { + "epoch": 0.4094470046082949, + "grad_norm": 0.6746261931292995, + "learning_rate": 1.8721187799524846e-06, + "loss": 0.7666694521903992, + "step": 1777 + }, + { + "epoch": 0.4096774193548387, + "grad_norm": 0.6448149830483173, + "learning_rate": 1.871932303478156e-06, + "loss": 0.872551679611206, + "step": 1778 + }, + { + "epoch": 0.4099078341013825, + "grad_norm": 0.6320914450645303, + "learning_rate": 1.8717457004440079e-06, + "loss": 0.7596250176429749, + "step": 1779 + }, + { + "epoch": 0.41013824884792627, + "grad_norm": 0.9751786230729174, + "learning_rate": 1.8715589708771253e-06, + "loss": 1.0098414421081543, + "step": 1780 + }, + { + "epoch": 0.41036866359447005, + "grad_norm": 0.9695096083628231, + "learning_rate": 1.871372114804612e-06, + "loss": 0.9961523413658142, + "step": 1781 + }, + { + "epoch": 0.41059907834101383, + "grad_norm": 0.8458697864526913, + "learning_rate": 1.8711851322535896e-06, + "loss": 0.9065390825271606, + "step": 1782 + }, + { + "epoch": 0.4108294930875576, + "grad_norm": 0.5445685826440523, + "learning_rate": 1.8709980232511987e-06, + "loss": 0.7906428575515747, + "step": 1783 + }, + { + "epoch": 0.4110599078341014, + "grad_norm": 0.5783797348856774, + "learning_rate": 1.8708107878245976e-06, + "loss": 0.798285722732544, + "step": 1784 + }, + { + "epoch": 0.4112903225806452, + "grad_norm": 0.7492534516122694, + "learning_rate": 1.870623426000964e-06, + "loss": 0.7809790372848511, + "step": 1785 + }, + { + "epoch": 0.4115207373271889, + "grad_norm": 0.8776810150838931, + "learning_rate": 1.8704359378074921e-06, + "loss": 0.8931630849838257, + "step": 1786 + }, + { + "epoch": 0.4117511520737327, + "grad_norm": 0.6321595970525742, + "learning_rate": 1.870248323271396e-06, + "loss": 0.8219889402389526, + "step": 1787 + }, + { + "epoch": 0.4119815668202765, + "grad_norm": 0.9973808347817518, + "learning_rate": 1.8700605824199084e-06, + "loss": 0.8371819257736206, + "step": 1788 + }, + { + "epoch": 0.41221198156682026, + "grad_norm": 0.7869196176383942, + "learning_rate": 1.8698727152802789e-06, + "loss": 0.951171875, + "step": 1789 + }, + { + "epoch": 0.41244239631336405, + "grad_norm": 0.6763081680317143, + "learning_rate": 1.8696847218797763e-06, + "loss": 0.7678385972976685, + "step": 1790 + }, + { + "epoch": 0.41267281105990783, + "grad_norm": 0.567634539573834, + "learning_rate": 1.8694966022456872e-06, + "loss": 0.9296993017196655, + "step": 1791 + }, + { + "epoch": 0.4129032258064516, + "grad_norm": 0.5450828031444163, + "learning_rate": 1.8693083564053178e-06, + "loss": 0.8991763591766357, + "step": 1792 + }, + { + "epoch": 0.4131336405529954, + "grad_norm": 0.5967294444907658, + "learning_rate": 1.8691199843859913e-06, + "loss": 0.8332901000976562, + "step": 1793 + }, + { + "epoch": 0.4133640552995392, + "grad_norm": 0.7571962190593917, + "learning_rate": 1.8689314862150497e-06, + "loss": 0.7723548412322998, + "step": 1794 + }, + { + "epoch": 0.41359447004608296, + "grad_norm": 0.6588409150246594, + "learning_rate": 1.868742861919853e-06, + "loss": 0.7768993377685547, + "step": 1795 + }, + { + "epoch": 0.41382488479262675, + "grad_norm": 0.43193778142300604, + "learning_rate": 1.86855411152778e-06, + "loss": 0.6058932542800903, + "step": 1796 + }, + { + "epoch": 0.41405529953917053, + "grad_norm": 0.8667574432138021, + "learning_rate": 1.8683652350662274e-06, + "loss": 0.8711605072021484, + "step": 1797 + }, + { + "epoch": 0.4142857142857143, + "grad_norm": 0.8780154463369872, + "learning_rate": 1.8681762325626104e-06, + "loss": 0.9023469090461731, + "step": 1798 + }, + { + "epoch": 0.41451612903225804, + "grad_norm": 0.6070102500189553, + "learning_rate": 1.867987104044363e-06, + "loss": 0.7735910415649414, + "step": 1799 + }, + { + "epoch": 0.4147465437788018, + "grad_norm": 0.6293725885471063, + "learning_rate": 1.8677978495389364e-06, + "loss": 0.6609020829200745, + "step": 1800 + }, + { + "epoch": 0.4149769585253456, + "grad_norm": 0.6485782104038655, + "learning_rate": 1.8676084690738005e-06, + "loss": 0.7823291420936584, + "step": 1801 + }, + { + "epoch": 0.4152073732718894, + "grad_norm": 0.8472581681306268, + "learning_rate": 1.867418962676444e-06, + "loss": 0.9076563715934753, + "step": 1802 + }, + { + "epoch": 0.4154377880184332, + "grad_norm": 0.561807586977654, + "learning_rate": 1.8672293303743735e-06, + "loss": 0.8645772933959961, + "step": 1803 + }, + { + "epoch": 0.41566820276497696, + "grad_norm": 0.6821058596015542, + "learning_rate": 1.8670395721951135e-06, + "loss": 0.8071421384811401, + "step": 1804 + }, + { + "epoch": 0.41589861751152074, + "grad_norm": 0.7396557376618352, + "learning_rate": 1.8668496881662077e-06, + "loss": 0.8459846377372742, + "step": 1805 + }, + { + "epoch": 0.4161290322580645, + "grad_norm": 0.7167052224732033, + "learning_rate": 1.866659678315217e-06, + "loss": 0.8467865586280823, + "step": 1806 + }, + { + "epoch": 0.4163594470046083, + "grad_norm": 0.8262164291061972, + "learning_rate": 1.8664695426697215e-06, + "loss": 0.8963291645050049, + "step": 1807 + }, + { + "epoch": 0.4165898617511521, + "grad_norm": 0.528766323006704, + "learning_rate": 1.8662792812573188e-06, + "loss": 0.7901826500892639, + "step": 1808 + }, + { + "epoch": 0.4168202764976959, + "grad_norm": 0.8974116604603759, + "learning_rate": 1.8660888941056252e-06, + "loss": 0.807115912437439, + "step": 1809 + }, + { + "epoch": 0.41705069124423966, + "grad_norm": 0.6271237317374816, + "learning_rate": 1.8658983812422753e-06, + "loss": 0.8439537286758423, + "step": 1810 + }, + { + "epoch": 0.4172811059907834, + "grad_norm": 0.8360600380108553, + "learning_rate": 1.8657077426949214e-06, + "loss": 0.6920834183692932, + "step": 1811 + }, + { + "epoch": 0.41751152073732717, + "grad_norm": 0.7603232216568709, + "learning_rate": 1.865516978491235e-06, + "loss": 0.8712124824523926, + "step": 1812 + }, + { + "epoch": 0.41774193548387095, + "grad_norm": 0.718498571919399, + "learning_rate": 1.865326088658905e-06, + "loss": 0.7720927596092224, + "step": 1813 + }, + { + "epoch": 0.41797235023041474, + "grad_norm": 0.6953832780918029, + "learning_rate": 1.8651350732256386e-06, + "loss": 0.8003814220428467, + "step": 1814 + }, + { + "epoch": 0.4182027649769585, + "grad_norm": 0.838076886250554, + "learning_rate": 1.8649439322191616e-06, + "loss": 0.8999850749969482, + "step": 1815 + }, + { + "epoch": 0.4184331797235023, + "grad_norm": 0.584714014216153, + "learning_rate": 1.8647526656672179e-06, + "loss": 0.6752324104309082, + "step": 1816 + }, + { + "epoch": 0.4186635944700461, + "grad_norm": 0.7365325720475113, + "learning_rate": 1.8645612735975696e-06, + "loss": 0.8521262407302856, + "step": 1817 + }, + { + "epoch": 0.41889400921658987, + "grad_norm": 0.7194058023938104, + "learning_rate": 1.864369756037997e-06, + "loss": 0.8813315629959106, + "step": 1818 + }, + { + "epoch": 0.41912442396313365, + "grad_norm": 0.742428235010686, + "learning_rate": 1.8641781130162986e-06, + "loss": 0.8358273506164551, + "step": 1819 + }, + { + "epoch": 0.41935483870967744, + "grad_norm": 0.591500867449821, + "learning_rate": 1.863986344560291e-06, + "loss": 0.8051023483276367, + "step": 1820 + }, + { + "epoch": 0.4195852534562212, + "grad_norm": 0.7791039105049288, + "learning_rate": 1.863794450697809e-06, + "loss": 0.768791675567627, + "step": 1821 + }, + { + "epoch": 0.419815668202765, + "grad_norm": 0.9369354252226071, + "learning_rate": 1.8636024314567065e-06, + "loss": 0.8420040607452393, + "step": 1822 + }, + { + "epoch": 0.42004608294930873, + "grad_norm": 0.673055652482875, + "learning_rate": 1.8634102868648542e-06, + "loss": 0.7670450806617737, + "step": 1823 + }, + { + "epoch": 0.4202764976958525, + "grad_norm": 0.6699812957272996, + "learning_rate": 1.863218016950142e-06, + "loss": 0.8292283415794373, + "step": 1824 + }, + { + "epoch": 0.4205069124423963, + "grad_norm": 0.6058254395333167, + "learning_rate": 1.8630256217404767e-06, + "loss": 0.8005781769752502, + "step": 1825 + }, + { + "epoch": 0.4207373271889401, + "grad_norm": 0.923190166351158, + "learning_rate": 1.8628331012637854e-06, + "loss": 0.8214897513389587, + "step": 1826 + }, + { + "epoch": 0.42096774193548386, + "grad_norm": 0.6734314204378448, + "learning_rate": 1.8626404555480118e-06, + "loss": 0.7938524484634399, + "step": 1827 + }, + { + "epoch": 0.42119815668202765, + "grad_norm": 0.7824933974022145, + "learning_rate": 1.862447684621118e-06, + "loss": 1.0047048330307007, + "step": 1828 + }, + { + "epoch": 0.42142857142857143, + "grad_norm": 0.7060449091561402, + "learning_rate": 1.862254788511084e-06, + "loss": 0.7660601139068604, + "step": 1829 + }, + { + "epoch": 0.4216589861751152, + "grad_norm": 0.7940468118829026, + "learning_rate": 1.8620617672459096e-06, + "loss": 0.8227912783622742, + "step": 1830 + }, + { + "epoch": 0.421889400921659, + "grad_norm": 0.8322274877206185, + "learning_rate": 1.8618686208536106e-06, + "loss": 0.8570956587791443, + "step": 1831 + }, + { + "epoch": 0.4221198156682028, + "grad_norm": 0.6215191834076389, + "learning_rate": 1.8616753493622221e-06, + "loss": 0.7472532987594604, + "step": 1832 + }, + { + "epoch": 0.42235023041474656, + "grad_norm": 0.702673502332975, + "learning_rate": 1.8614819527997976e-06, + "loss": 0.812872052192688, + "step": 1833 + }, + { + "epoch": 0.42258064516129035, + "grad_norm": 0.7168526420375322, + "learning_rate": 1.861288431194408e-06, + "loss": 0.7801386117935181, + "step": 1834 + }, + { + "epoch": 0.4228110599078341, + "grad_norm": 0.8740851917776313, + "learning_rate": 1.8610947845741426e-06, + "loss": 0.7834687829017639, + "step": 1835 + }, + { + "epoch": 0.42304147465437786, + "grad_norm": 0.8009990500080056, + "learning_rate": 1.8609010129671097e-06, + "loss": 0.786865234375, + "step": 1836 + }, + { + "epoch": 0.42327188940092164, + "grad_norm": 0.6559457181196078, + "learning_rate": 1.860707116401434e-06, + "loss": 0.7728738784790039, + "step": 1837 + }, + { + "epoch": 0.4235023041474654, + "grad_norm": 0.6384024302830484, + "learning_rate": 1.8605130949052598e-06, + "loss": 0.6508793830871582, + "step": 1838 + }, + { + "epoch": 0.4237327188940092, + "grad_norm": 0.6544986461362278, + "learning_rate": 1.8603189485067492e-06, + "loss": 0.7949484586715698, + "step": 1839 + }, + { + "epoch": 0.423963133640553, + "grad_norm": 0.7679729608195138, + "learning_rate": 1.8601246772340822e-06, + "loss": 0.7151408195495605, + "step": 1840 + }, + { + "epoch": 0.4241935483870968, + "grad_norm": 0.6910188883895837, + "learning_rate": 1.859930281115457e-06, + "loss": 0.7678598165512085, + "step": 1841 + }, + { + "epoch": 0.42442396313364056, + "grad_norm": 0.6547923584739629, + "learning_rate": 1.8597357601790895e-06, + "loss": 0.8042058944702148, + "step": 1842 + }, + { + "epoch": 0.42465437788018434, + "grad_norm": 0.6889925049755639, + "learning_rate": 1.859541114453215e-06, + "loss": 0.7328081130981445, + "step": 1843 + }, + { + "epoch": 0.4248847926267281, + "grad_norm": 0.7385850960276812, + "learning_rate": 1.8593463439660853e-06, + "loss": 0.7646626234054565, + "step": 1844 + }, + { + "epoch": 0.4251152073732719, + "grad_norm": 0.7455331415840897, + "learning_rate": 1.8591514487459717e-06, + "loss": 0.8965721726417542, + "step": 1845 + }, + { + "epoch": 0.4253456221198157, + "grad_norm": 0.6783955368622289, + "learning_rate": 1.8589564288211623e-06, + "loss": 0.8892468810081482, + "step": 1846 + }, + { + "epoch": 0.4255760368663594, + "grad_norm": 0.669354336924349, + "learning_rate": 1.8587612842199648e-06, + "loss": 0.8314409255981445, + "step": 1847 + }, + { + "epoch": 0.4258064516129032, + "grad_norm": 0.7299222952808436, + "learning_rate": 1.8585660149707034e-06, + "loss": 0.7713892459869385, + "step": 1848 + }, + { + "epoch": 0.426036866359447, + "grad_norm": 0.7583328231707663, + "learning_rate": 1.8583706211017216e-06, + "loss": 0.9349459409713745, + "step": 1849 + }, + { + "epoch": 0.42626728110599077, + "grad_norm": 0.7309436500165829, + "learning_rate": 1.8581751026413805e-06, + "loss": 0.8438700437545776, + "step": 1850 + }, + { + "epoch": 0.42649769585253455, + "grad_norm": 1.0171962155435006, + "learning_rate": 1.8579794596180594e-06, + "loss": 0.9559776782989502, + "step": 1851 + }, + { + "epoch": 0.42672811059907834, + "grad_norm": 0.6701533748146308, + "learning_rate": 1.8577836920601556e-06, + "loss": 0.7124872803688049, + "step": 1852 + }, + { + "epoch": 0.4269585253456221, + "grad_norm": 0.8613289026694887, + "learning_rate": 1.8575877999960842e-06, + "loss": 0.7935503125190735, + "step": 1853 + }, + { + "epoch": 0.4271889400921659, + "grad_norm": 0.7107096707504692, + "learning_rate": 1.8573917834542792e-06, + "loss": 0.9145890474319458, + "step": 1854 + }, + { + "epoch": 0.4274193548387097, + "grad_norm": 0.7290504646059204, + "learning_rate": 1.8571956424631918e-06, + "loss": 0.8239228129386902, + "step": 1855 + }, + { + "epoch": 0.42764976958525347, + "grad_norm": 0.6018983094431002, + "learning_rate": 1.8569993770512916e-06, + "loss": 0.8767688274383545, + "step": 1856 + }, + { + "epoch": 0.42788018433179725, + "grad_norm": 0.6742014961339767, + "learning_rate": 1.8568029872470663e-06, + "loss": 0.7860859632492065, + "step": 1857 + }, + { + "epoch": 0.42811059907834104, + "grad_norm": 0.6990668023927343, + "learning_rate": 1.8566064730790218e-06, + "loss": 0.8855729103088379, + "step": 1858 + }, + { + "epoch": 0.4283410138248848, + "grad_norm": 0.8518974155898882, + "learning_rate": 1.8564098345756815e-06, + "loss": 1.023299217224121, + "step": 1859 + }, + { + "epoch": 0.42857142857142855, + "grad_norm": 0.7174059285774532, + "learning_rate": 1.8562130717655878e-06, + "loss": 0.7665202617645264, + "step": 1860 + }, + { + "epoch": 0.42880184331797233, + "grad_norm": 0.7036772811538429, + "learning_rate": 1.8560161846773e-06, + "loss": 0.8456651568412781, + "step": 1861 + }, + { + "epoch": 0.4290322580645161, + "grad_norm": 0.7229483822116546, + "learning_rate": 1.8558191733393964e-06, + "loss": 0.8920061588287354, + "step": 1862 + }, + { + "epoch": 0.4292626728110599, + "grad_norm": 0.8104170426239989, + "learning_rate": 1.8556220377804723e-06, + "loss": 0.8686853051185608, + "step": 1863 + }, + { + "epoch": 0.4294930875576037, + "grad_norm": 0.5832986779631602, + "learning_rate": 1.8554247780291425e-06, + "loss": 0.6976242065429688, + "step": 1864 + }, + { + "epoch": 0.42972350230414746, + "grad_norm": 0.7347161353185314, + "learning_rate": 1.8552273941140387e-06, + "loss": 0.9612032771110535, + "step": 1865 + }, + { + "epoch": 0.42995391705069125, + "grad_norm": 0.6243829709767468, + "learning_rate": 1.8550298860638108e-06, + "loss": 0.9288003444671631, + "step": 1866 + }, + { + "epoch": 0.43018433179723503, + "grad_norm": 0.6743712494799082, + "learning_rate": 1.8548322539071263e-06, + "loss": 0.8397525548934937, + "step": 1867 + }, + { + "epoch": 0.4304147465437788, + "grad_norm": 0.5881426126037044, + "learning_rate": 1.8546344976726722e-06, + "loss": 0.6311365365982056, + "step": 1868 + }, + { + "epoch": 0.4306451612903226, + "grad_norm": 0.7497017851812813, + "learning_rate": 1.8544366173891523e-06, + "loss": 0.7868270874023438, + "step": 1869 + }, + { + "epoch": 0.4308755760368664, + "grad_norm": 0.6265515804052451, + "learning_rate": 1.8542386130852883e-06, + "loss": 0.9197052717208862, + "step": 1870 + }, + { + "epoch": 0.43110599078341016, + "grad_norm": 0.7018278829983491, + "learning_rate": 1.8540404847898206e-06, + "loss": 0.7875635027885437, + "step": 1871 + }, + { + "epoch": 0.4313364055299539, + "grad_norm": 0.7789284724063816, + "learning_rate": 1.853842232531507e-06, + "loss": 0.9805077910423279, + "step": 1872 + }, + { + "epoch": 0.4315668202764977, + "grad_norm": 0.838470325159009, + "learning_rate": 1.8536438563391236e-06, + "loss": 0.8906866312026978, + "step": 1873 + }, + { + "epoch": 0.43179723502304146, + "grad_norm": 0.73247587866706, + "learning_rate": 1.8534453562414649e-06, + "loss": 0.7506693601608276, + "step": 1874 + }, + { + "epoch": 0.43202764976958524, + "grad_norm": 0.6576915367586517, + "learning_rate": 1.8532467322673422e-06, + "loss": 0.6173181533813477, + "step": 1875 + }, + { + "epoch": 0.432258064516129, + "grad_norm": 0.6907344817423696, + "learning_rate": 1.853047984445586e-06, + "loss": 0.9217972755432129, + "step": 1876 + }, + { + "epoch": 0.4324884792626728, + "grad_norm": 0.8808471726659616, + "learning_rate": 1.8528491128050442e-06, + "loss": 0.8300588130950928, + "step": 1877 + }, + { + "epoch": 0.4327188940092166, + "grad_norm": 0.7869544847637374, + "learning_rate": 1.8526501173745826e-06, + "loss": 0.8109279870986938, + "step": 1878 + }, + { + "epoch": 0.4329493087557604, + "grad_norm": 0.8253705845492948, + "learning_rate": 1.852450998183085e-06, + "loss": 0.9243700504302979, + "step": 1879 + }, + { + "epoch": 0.43317972350230416, + "grad_norm": 0.7291726511705204, + "learning_rate": 1.8522517552594539e-06, + "loss": 0.7983531951904297, + "step": 1880 + }, + { + "epoch": 0.43341013824884794, + "grad_norm": 0.837506072245515, + "learning_rate": 1.8520523886326088e-06, + "loss": 0.9931240081787109, + "step": 1881 + }, + { + "epoch": 0.4336405529953917, + "grad_norm": 0.7782064692415819, + "learning_rate": 1.8518528983314874e-06, + "loss": 0.923255443572998, + "step": 1882 + }, + { + "epoch": 0.4338709677419355, + "grad_norm": 0.5003052765919304, + "learning_rate": 1.8516532843850454e-06, + "loss": 0.8470325469970703, + "step": 1883 + }, + { + "epoch": 0.43410138248847924, + "grad_norm": 0.7497886449083292, + "learning_rate": 1.8514535468222566e-06, + "loss": 0.9175074696540833, + "step": 1884 + }, + { + "epoch": 0.434331797235023, + "grad_norm": 0.7474680310474195, + "learning_rate": 1.8512536856721126e-06, + "loss": 0.8617827892303467, + "step": 1885 + }, + { + "epoch": 0.4345622119815668, + "grad_norm": 0.6779026169933022, + "learning_rate": 1.8510537009636231e-06, + "loss": 0.6787248849868774, + "step": 1886 + }, + { + "epoch": 0.4347926267281106, + "grad_norm": 0.6948062534132075, + "learning_rate": 1.8508535927258157e-06, + "loss": 0.8031569719314575, + "step": 1887 + }, + { + "epoch": 0.43502304147465437, + "grad_norm": 0.8219581995376891, + "learning_rate": 1.8506533609877354e-06, + "loss": 1.0252577066421509, + "step": 1888 + }, + { + "epoch": 0.43525345622119815, + "grad_norm": 0.6297691459816858, + "learning_rate": 1.850453005778446e-06, + "loss": 0.7947444915771484, + "step": 1889 + }, + { + "epoch": 0.43548387096774194, + "grad_norm": 0.7974729793994046, + "learning_rate": 1.8502525271270288e-06, + "loss": 0.817523717880249, + "step": 1890 + }, + { + "epoch": 0.4357142857142857, + "grad_norm": 0.905445482286677, + "learning_rate": 1.850051925062583e-06, + "loss": 0.8029658794403076, + "step": 1891 + }, + { + "epoch": 0.4359447004608295, + "grad_norm": 0.7902601112013473, + "learning_rate": 1.8498511996142253e-06, + "loss": 0.871408224105835, + "step": 1892 + }, + { + "epoch": 0.4361751152073733, + "grad_norm": 0.7279346643764769, + "learning_rate": 1.849650350811091e-06, + "loss": 1.0133098363876343, + "step": 1893 + }, + { + "epoch": 0.43640552995391707, + "grad_norm": 0.5859043876213773, + "learning_rate": 1.8494493786823333e-06, + "loss": 0.8320624828338623, + "step": 1894 + }, + { + "epoch": 0.43663594470046085, + "grad_norm": 0.7240549495084485, + "learning_rate": 1.8492482832571225e-06, + "loss": 0.7757631540298462, + "step": 1895 + }, + { + "epoch": 0.4368663594470046, + "grad_norm": 0.7606146142454437, + "learning_rate": 1.8490470645646479e-06, + "loss": 0.8503100872039795, + "step": 1896 + }, + { + "epoch": 0.43709677419354837, + "grad_norm": 0.7560932530175453, + "learning_rate": 1.8488457226341158e-06, + "loss": 0.8145939707756042, + "step": 1897 + }, + { + "epoch": 0.43732718894009215, + "grad_norm": 0.8041258430075643, + "learning_rate": 1.848644257494751e-06, + "loss": 0.831500232219696, + "step": 1898 + }, + { + "epoch": 0.43755760368663593, + "grad_norm": 0.6473340838552745, + "learning_rate": 1.8484426691757956e-06, + "loss": 0.9340692758560181, + "step": 1899 + }, + { + "epoch": 0.4377880184331797, + "grad_norm": 0.7851684163129825, + "learning_rate": 1.8482409577065097e-06, + "loss": 1.011988639831543, + "step": 1900 + }, + { + "epoch": 0.4380184331797235, + "grad_norm": 0.6819650200659566, + "learning_rate": 1.848039123116172e-06, + "loss": 0.8110378980636597, + "step": 1901 + }, + { + "epoch": 0.4382488479262673, + "grad_norm": 0.6310651453357742, + "learning_rate": 1.8478371654340779e-06, + "loss": 0.8230330944061279, + "step": 1902 + }, + { + "epoch": 0.43847926267281107, + "grad_norm": 0.8335502206603579, + "learning_rate": 1.8476350846895419e-06, + "loss": 0.875052809715271, + "step": 1903 + }, + { + "epoch": 0.43870967741935485, + "grad_norm": 0.7394371211482306, + "learning_rate": 1.8474328809118953e-06, + "loss": 0.9373071193695068, + "step": 1904 + }, + { + "epoch": 0.43894009216589863, + "grad_norm": 0.7538115820848524, + "learning_rate": 1.847230554130488e-06, + "loss": 0.8341633677482605, + "step": 1905 + }, + { + "epoch": 0.4391705069124424, + "grad_norm": 0.6579829053639499, + "learning_rate": 1.8470281043746873e-06, + "loss": 0.8147767782211304, + "step": 1906 + }, + { + "epoch": 0.4394009216589862, + "grad_norm": 0.6022228592985512, + "learning_rate": 1.8468255316738785e-06, + "loss": 0.740512490272522, + "step": 1907 + }, + { + "epoch": 0.4396313364055299, + "grad_norm": 0.7743265443588842, + "learning_rate": 1.846622836057465e-06, + "loss": 0.7754743099212646, + "step": 1908 + }, + { + "epoch": 0.4398617511520737, + "grad_norm": 0.7535493986684056, + "learning_rate": 1.8464200175548677e-06, + "loss": 0.9131484031677246, + "step": 1909 + }, + { + "epoch": 0.4400921658986175, + "grad_norm": 0.7099012564704421, + "learning_rate": 1.8462170761955252e-06, + "loss": 0.7084713578224182, + "step": 1910 + }, + { + "epoch": 0.4403225806451613, + "grad_norm": 0.7949281739735957, + "learning_rate": 1.8460140120088945e-06, + "loss": 0.8535224199295044, + "step": 1911 + }, + { + "epoch": 0.44055299539170506, + "grad_norm": 0.8579322326008002, + "learning_rate": 1.8458108250244498e-06, + "loss": 0.7661323547363281, + "step": 1912 + }, + { + "epoch": 0.44078341013824884, + "grad_norm": 0.7355189670899542, + "learning_rate": 1.8456075152716837e-06, + "loss": 0.8064024448394775, + "step": 1913 + }, + { + "epoch": 0.4410138248847926, + "grad_norm": 0.7422340222781728, + "learning_rate": 1.8454040827801058e-06, + "loss": 0.7858735918998718, + "step": 1914 + }, + { + "epoch": 0.4412442396313364, + "grad_norm": 0.6589873136371734, + "learning_rate": 1.8452005275792448e-06, + "loss": 0.9251735210418701, + "step": 1915 + }, + { + "epoch": 0.4414746543778802, + "grad_norm": 0.718018605876598, + "learning_rate": 1.8449968496986461e-06, + "loss": 0.7237124443054199, + "step": 1916 + }, + { + "epoch": 0.441705069124424, + "grad_norm": 0.7573893032737062, + "learning_rate": 1.8447930491678732e-06, + "loss": 0.8939133882522583, + "step": 1917 + }, + { + "epoch": 0.44193548387096776, + "grad_norm": 0.8373489922925343, + "learning_rate": 1.8445891260165076e-06, + "loss": 0.8815577626228333, + "step": 1918 + }, + { + "epoch": 0.44216589861751154, + "grad_norm": 0.8703539982402225, + "learning_rate": 1.8443850802741485e-06, + "loss": 0.943426787853241, + "step": 1919 + }, + { + "epoch": 0.4423963133640553, + "grad_norm": 0.6998600920537428, + "learning_rate": 1.8441809119704126e-06, + "loss": 0.8001632690429688, + "step": 1920 + }, + { + "epoch": 0.44262672811059905, + "grad_norm": 0.8531362441371287, + "learning_rate": 1.8439766211349352e-06, + "loss": 0.8656308650970459, + "step": 1921 + }, + { + "epoch": 0.44285714285714284, + "grad_norm": 0.7261410922718881, + "learning_rate": 1.8437722077973686e-06, + "loss": 0.9774024486541748, + "step": 1922 + }, + { + "epoch": 0.4430875576036866, + "grad_norm": 0.728823767818971, + "learning_rate": 1.8435676719873827e-06, + "loss": 0.7655738592147827, + "step": 1923 + }, + { + "epoch": 0.4433179723502304, + "grad_norm": 0.6595509202419896, + "learning_rate": 1.8433630137346657e-06, + "loss": 0.6455004811286926, + "step": 1924 + }, + { + "epoch": 0.4435483870967742, + "grad_norm": 0.7214853647491487, + "learning_rate": 1.8431582330689243e-06, + "loss": 0.8221153020858765, + "step": 1925 + }, + { + "epoch": 0.44377880184331797, + "grad_norm": 0.7718374957528886, + "learning_rate": 1.8429533300198816e-06, + "loss": 0.7878339886665344, + "step": 1926 + }, + { + "epoch": 0.44400921658986175, + "grad_norm": 0.7666174978175726, + "learning_rate": 1.8427483046172787e-06, + "loss": 0.8292763829231262, + "step": 1927 + }, + { + "epoch": 0.44423963133640554, + "grad_norm": 0.7395800766154846, + "learning_rate": 1.842543156890875e-06, + "loss": 0.7774572372436523, + "step": 1928 + }, + { + "epoch": 0.4444700460829493, + "grad_norm": 0.7419338266362171, + "learning_rate": 1.8423378868704476e-06, + "loss": 0.7327601909637451, + "step": 1929 + }, + { + "epoch": 0.4447004608294931, + "grad_norm": 0.7176112305038147, + "learning_rate": 1.8421324945857909e-06, + "loss": 0.8067511320114136, + "step": 1930 + }, + { + "epoch": 0.4449308755760369, + "grad_norm": 0.780684647138278, + "learning_rate": 1.8419269800667173e-06, + "loss": 0.851010799407959, + "step": 1931 + }, + { + "epoch": 0.44516129032258067, + "grad_norm": 0.7848772154457995, + "learning_rate": 1.8417213433430576e-06, + "loss": 0.8402234315872192, + "step": 1932 + }, + { + "epoch": 0.4453917050691244, + "grad_norm": 0.7848428302916386, + "learning_rate": 1.8415155844446591e-06, + "loss": 0.8857355117797852, + "step": 1933 + }, + { + "epoch": 0.4456221198156682, + "grad_norm": 0.6465222204250215, + "learning_rate": 1.841309703401387e-06, + "loss": 0.7517881393432617, + "step": 1934 + }, + { + "epoch": 0.44585253456221197, + "grad_norm": 0.8220839741097039, + "learning_rate": 1.8411037002431257e-06, + "loss": 0.8583779335021973, + "step": 1935 + }, + { + "epoch": 0.44608294930875575, + "grad_norm": 0.7149579567670102, + "learning_rate": 1.8408975749997758e-06, + "loss": 0.7691524028778076, + "step": 1936 + }, + { + "epoch": 0.44631336405529953, + "grad_norm": 0.6891731440130011, + "learning_rate": 1.8406913277012558e-06, + "loss": 0.9164496660232544, + "step": 1937 + }, + { + "epoch": 0.4465437788018433, + "grad_norm": 0.6382978906826758, + "learning_rate": 1.8404849583775025e-06, + "loss": 0.843226432800293, + "step": 1938 + }, + { + "epoch": 0.4467741935483871, + "grad_norm": 0.843769912689158, + "learning_rate": 1.8402784670584706e-06, + "loss": 0.8492633104324341, + "step": 1939 + }, + { + "epoch": 0.4470046082949309, + "grad_norm": 0.7117202181402426, + "learning_rate": 1.8400718537741314e-06, + "loss": 0.8088324069976807, + "step": 1940 + }, + { + "epoch": 0.44723502304147467, + "grad_norm": 0.8584564611753391, + "learning_rate": 1.8398651185544746e-06, + "loss": 0.8879667520523071, + "step": 1941 + }, + { + "epoch": 0.44746543778801845, + "grad_norm": 0.6515549607308898, + "learning_rate": 1.8396582614295078e-06, + "loss": 0.8926588892936707, + "step": 1942 + }, + { + "epoch": 0.44769585253456223, + "grad_norm": 0.6885634929225364, + "learning_rate": 1.8394512824292558e-06, + "loss": 0.8007583618164062, + "step": 1943 + }, + { + "epoch": 0.447926267281106, + "grad_norm": 0.6940540666117992, + "learning_rate": 1.8392441815837613e-06, + "loss": 0.7420827746391296, + "step": 1944 + }, + { + "epoch": 0.44815668202764974, + "grad_norm": 0.6846873323136197, + "learning_rate": 1.839036958923085e-06, + "loss": 0.7653264999389648, + "step": 1945 + }, + { + "epoch": 0.4483870967741935, + "grad_norm": 0.6684685460178057, + "learning_rate": 1.838829614477305e-06, + "loss": 0.886576771736145, + "step": 1946 + }, + { + "epoch": 0.4486175115207373, + "grad_norm": 0.7769567865097903, + "learning_rate": 1.8386221482765168e-06, + "loss": 0.904376745223999, + "step": 1947 + }, + { + "epoch": 0.4488479262672811, + "grad_norm": 0.6833196213451335, + "learning_rate": 1.838414560350834e-06, + "loss": 0.6791579723358154, + "step": 1948 + }, + { + "epoch": 0.4490783410138249, + "grad_norm": 0.8296885335278092, + "learning_rate": 1.838206850730388e-06, + "loss": 0.9402183294296265, + "step": 1949 + }, + { + "epoch": 0.44930875576036866, + "grad_norm": 0.9215175287627321, + "learning_rate": 1.8379990194453265e-06, + "loss": 0.9756022691726685, + "step": 1950 + }, + { + "epoch": 0.44953917050691244, + "grad_norm": 0.9502651388093868, + "learning_rate": 1.8377910665258173e-06, + "loss": 0.7311051487922668, + "step": 1951 + }, + { + "epoch": 0.4497695852534562, + "grad_norm": 0.5687721596613555, + "learning_rate": 1.8375829920020438e-06, + "loss": 0.6966956853866577, + "step": 1952 + }, + { + "epoch": 0.45, + "grad_norm": 0.7191813033419734, + "learning_rate": 1.8373747959042076e-06, + "loss": 0.7327426671981812, + "step": 1953 + }, + { + "epoch": 0.4502304147465438, + "grad_norm": 0.8067848664348717, + "learning_rate": 1.8371664782625285e-06, + "loss": 0.8650925755500793, + "step": 1954 + }, + { + "epoch": 0.4504608294930876, + "grad_norm": 0.8028206677205298, + "learning_rate": 1.8369580391072431e-06, + "loss": 0.876739501953125, + "step": 1955 + }, + { + "epoch": 0.45069124423963136, + "grad_norm": 0.7092651204784524, + "learning_rate": 1.8367494784686066e-06, + "loss": 0.7787455320358276, + "step": 1956 + }, + { + "epoch": 0.4509216589861751, + "grad_norm": 0.7762123563340246, + "learning_rate": 1.836540796376891e-06, + "loss": 0.8874029517173767, + "step": 1957 + }, + { + "epoch": 0.4511520737327189, + "grad_norm": 0.7670080315961673, + "learning_rate": 1.8363319928623862e-06, + "loss": 0.8944835662841797, + "step": 1958 + }, + { + "epoch": 0.45138248847926266, + "grad_norm": 0.570293089893543, + "learning_rate": 1.8361230679553996e-06, + "loss": 0.7106739282608032, + "step": 1959 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.7068996407627426, + "learning_rate": 1.835914021686257e-06, + "loss": 0.8668634295463562, + "step": 1960 + }, + { + "epoch": 0.4518433179723502, + "grad_norm": 0.7818076957354034, + "learning_rate": 1.8357048540853003e-06, + "loss": 0.8123712539672852, + "step": 1961 + }, + { + "epoch": 0.452073732718894, + "grad_norm": 0.7369058807274856, + "learning_rate": 1.8354955651828907e-06, + "loss": 0.865728497505188, + "step": 1962 + }, + { + "epoch": 0.4523041474654378, + "grad_norm": 0.7502978391788373, + "learning_rate": 1.8352861550094056e-06, + "loss": 0.8066651225090027, + "step": 1963 + }, + { + "epoch": 0.4525345622119816, + "grad_norm": 1.2076261262226256, + "learning_rate": 1.835076623595241e-06, + "loss": 1.020591139793396, + "step": 1964 + }, + { + "epoch": 0.45276497695852536, + "grad_norm": 0.7642119123557376, + "learning_rate": 1.83486697097081e-06, + "loss": 0.839346706867218, + "step": 1965 + }, + { + "epoch": 0.45299539170506914, + "grad_norm": 0.663652311830839, + "learning_rate": 1.8346571971665434e-06, + "loss": 0.7707340121269226, + "step": 1966 + }, + { + "epoch": 0.4532258064516129, + "grad_norm": 0.6603686601649886, + "learning_rate": 1.8344473022128897e-06, + "loss": 0.7969534397125244, + "step": 1967 + }, + { + "epoch": 0.4534562211981567, + "grad_norm": 0.8431782882642489, + "learning_rate": 1.8342372861403143e-06, + "loss": 0.9371283650398254, + "step": 1968 + }, + { + "epoch": 0.45368663594470043, + "grad_norm": 0.7102966402282939, + "learning_rate": 1.8340271489793015e-06, + "loss": 0.7915256023406982, + "step": 1969 + }, + { + "epoch": 0.4539170506912442, + "grad_norm": 0.6028172078632871, + "learning_rate": 1.8338168907603522e-06, + "loss": 0.8394884467124939, + "step": 1970 + }, + { + "epoch": 0.454147465437788, + "grad_norm": 0.8133055611447335, + "learning_rate": 1.833606511513985e-06, + "loss": 0.7786067128181458, + "step": 1971 + }, + { + "epoch": 0.4543778801843318, + "grad_norm": 0.905741517676821, + "learning_rate": 1.833396011270736e-06, + "loss": 0.9237443208694458, + "step": 1972 + }, + { + "epoch": 0.45460829493087557, + "grad_norm": 0.9055049100464759, + "learning_rate": 1.8331853900611596e-06, + "loss": 0.7530162334442139, + "step": 1973 + }, + { + "epoch": 0.45483870967741935, + "grad_norm": 0.7172947421019107, + "learning_rate": 1.8329746479158263e-06, + "loss": 0.8349624872207642, + "step": 1974 + }, + { + "epoch": 0.45506912442396313, + "grad_norm": 0.9222448487169791, + "learning_rate": 1.8327637848653259e-06, + "loss": 0.8748637437820435, + "step": 1975 + }, + { + "epoch": 0.4552995391705069, + "grad_norm": 0.7416851295200875, + "learning_rate": 1.832552800940265e-06, + "loss": 0.9111478924751282, + "step": 1976 + }, + { + "epoch": 0.4555299539170507, + "grad_norm": 0.6251856024732342, + "learning_rate": 1.8323416961712665e-06, + "loss": 0.8108797073364258, + "step": 1977 + }, + { + "epoch": 0.4557603686635945, + "grad_norm": 0.9459625715160394, + "learning_rate": 1.832130470588973e-06, + "loss": 0.9266520738601685, + "step": 1978 + }, + { + "epoch": 0.45599078341013827, + "grad_norm": 0.7773850051724754, + "learning_rate": 1.831919124224043e-06, + "loss": 0.9092522859573364, + "step": 1979 + }, + { + "epoch": 0.45622119815668205, + "grad_norm": 0.664954530341155, + "learning_rate": 1.8317076571071536e-06, + "loss": 0.8249068260192871, + "step": 1980 + }, + { + "epoch": 0.45645161290322583, + "grad_norm": 0.770896895795481, + "learning_rate": 1.8314960692689992e-06, + "loss": 0.7497084140777588, + "step": 1981 + }, + { + "epoch": 0.45668202764976956, + "grad_norm": 0.7450904317902424, + "learning_rate": 1.8312843607402907e-06, + "loss": 0.7360142469406128, + "step": 1982 + }, + { + "epoch": 0.45691244239631335, + "grad_norm": 0.7224490513690306, + "learning_rate": 1.8310725315517578e-06, + "loss": 0.8443512320518494, + "step": 1983 + }, + { + "epoch": 0.45714285714285713, + "grad_norm": 0.6770718154001021, + "learning_rate": 1.830860581734147e-06, + "loss": 0.7995656728744507, + "step": 1984 + }, + { + "epoch": 0.4573732718894009, + "grad_norm": 0.8305927985197211, + "learning_rate": 1.8306485113182229e-06, + "loss": 0.7396436929702759, + "step": 1985 + }, + { + "epoch": 0.4576036866359447, + "grad_norm": 0.7351757860546534, + "learning_rate": 1.8304363203347668e-06, + "loss": 0.7415385246276855, + "step": 1986 + }, + { + "epoch": 0.4578341013824885, + "grad_norm": 0.8416697439034252, + "learning_rate": 1.8302240088145784e-06, + "loss": 0.9316694736480713, + "step": 1987 + }, + { + "epoch": 0.45806451612903226, + "grad_norm": 0.6482250359686991, + "learning_rate": 1.830011576788474e-06, + "loss": 0.7692697048187256, + "step": 1988 + }, + { + "epoch": 0.45829493087557605, + "grad_norm": 0.7546540101557039, + "learning_rate": 1.829799024287288e-06, + "loss": 0.8377524614334106, + "step": 1989 + }, + { + "epoch": 0.45852534562211983, + "grad_norm": 0.800432018333432, + "learning_rate": 1.8295863513418724e-06, + "loss": 0.8005630970001221, + "step": 1990 + }, + { + "epoch": 0.4587557603686636, + "grad_norm": 0.6132717130341248, + "learning_rate": 1.829373557983096e-06, + "loss": 0.8609297275543213, + "step": 1991 + }, + { + "epoch": 0.4589861751152074, + "grad_norm": 0.7611348757483902, + "learning_rate": 1.8291606442418454e-06, + "loss": 0.9111521244049072, + "step": 1992 + }, + { + "epoch": 0.4592165898617512, + "grad_norm": 0.6486046074488622, + "learning_rate": 1.8289476101490254e-06, + "loss": 0.7540388107299805, + "step": 1993 + }, + { + "epoch": 0.4594470046082949, + "grad_norm": 0.7891604292973137, + "learning_rate": 1.8287344557355565e-06, + "loss": 0.9018936157226562, + "step": 1994 + }, + { + "epoch": 0.4596774193548387, + "grad_norm": 0.8558307889574596, + "learning_rate": 1.8285211810323791e-06, + "loss": 0.918912947177887, + "step": 1995 + }, + { + "epoch": 0.4599078341013825, + "grad_norm": 0.6889746928021416, + "learning_rate": 1.8283077860704488e-06, + "loss": 0.7777351140975952, + "step": 1996 + }, + { + "epoch": 0.46013824884792626, + "grad_norm": 0.8546199279018112, + "learning_rate": 1.82809427088074e-06, + "loss": 0.9283437132835388, + "step": 1997 + }, + { + "epoch": 0.46036866359447004, + "grad_norm": 0.7206983576837674, + "learning_rate": 1.8278806354942442e-06, + "loss": 0.7032894492149353, + "step": 1998 + }, + { + "epoch": 0.4605990783410138, + "grad_norm": 0.7084552833839082, + "learning_rate": 1.8276668799419696e-06, + "loss": 0.8392905592918396, + "step": 1999 + }, + { + "epoch": 0.4608294930875576, + "grad_norm": 0.8216520324249929, + "learning_rate": 1.8274530042549434e-06, + "loss": 0.8059369325637817, + "step": 2000 + }, + { + "epoch": 0.4610599078341014, + "grad_norm": 0.7022225516164876, + "learning_rate": 1.827239008464209e-06, + "loss": 0.7738519906997681, + "step": 2001 + }, + { + "epoch": 0.4612903225806452, + "grad_norm": 0.894321981759021, + "learning_rate": 1.8270248926008275e-06, + "loss": 0.9189014434814453, + "step": 2002 + }, + { + "epoch": 0.46152073732718896, + "grad_norm": 0.9750927332357222, + "learning_rate": 1.8268106566958782e-06, + "loss": 0.8878552913665771, + "step": 2003 + }, + { + "epoch": 0.46175115207373274, + "grad_norm": 0.7601663032895281, + "learning_rate": 1.826596300780456e-06, + "loss": 0.9786058664321899, + "step": 2004 + }, + { + "epoch": 0.4619815668202765, + "grad_norm": 0.7513085122069586, + "learning_rate": 1.8263818248856754e-06, + "loss": 0.7887653112411499, + "step": 2005 + }, + { + "epoch": 0.46221198156682025, + "grad_norm": 0.7571825247765968, + "learning_rate": 1.8261672290426668e-06, + "loss": 0.8773549795150757, + "step": 2006 + }, + { + "epoch": 0.46244239631336403, + "grad_norm": 0.6543768471355319, + "learning_rate": 1.8259525132825786e-06, + "loss": 0.6929831504821777, + "step": 2007 + }, + { + "epoch": 0.4626728110599078, + "grad_norm": 0.8544099497368944, + "learning_rate": 1.8257376776365765e-06, + "loss": 0.9438232183456421, + "step": 2008 + }, + { + "epoch": 0.4629032258064516, + "grad_norm": 0.6803330432545487, + "learning_rate": 1.8255227221358435e-06, + "loss": 0.7559594511985779, + "step": 2009 + }, + { + "epoch": 0.4631336405529954, + "grad_norm": 0.7347158890455135, + "learning_rate": 1.8253076468115805e-06, + "loss": 0.8990212678909302, + "step": 2010 + }, + { + "epoch": 0.46336405529953917, + "grad_norm": 0.7325838411869188, + "learning_rate": 1.825092451695005e-06, + "loss": 0.8638331890106201, + "step": 2011 + }, + { + "epoch": 0.46359447004608295, + "grad_norm": 0.7537964319175384, + "learning_rate": 1.8248771368173522e-06, + "loss": 0.9262570142745972, + "step": 2012 + }, + { + "epoch": 0.46382488479262673, + "grad_norm": 0.770620841657562, + "learning_rate": 1.8246617022098754e-06, + "loss": 0.7412514090538025, + "step": 2013 + }, + { + "epoch": 0.4640552995391705, + "grad_norm": 0.8304378021605247, + "learning_rate": 1.8244461479038437e-06, + "loss": 0.8680287599563599, + "step": 2014 + }, + { + "epoch": 0.4642857142857143, + "grad_norm": 0.7004084931574237, + "learning_rate": 1.8242304739305457e-06, + "loss": 0.7774302959442139, + "step": 2015 + }, + { + "epoch": 0.4645161290322581, + "grad_norm": 0.8275882534036313, + "learning_rate": 1.824014680321285e-06, + "loss": 0.9278442859649658, + "step": 2016 + }, + { + "epoch": 0.46474654377880187, + "grad_norm": 0.6808747325759799, + "learning_rate": 1.8237987671073846e-06, + "loss": 0.9617106914520264, + "step": 2017 + }, + { + "epoch": 0.4649769585253456, + "grad_norm": 0.682915952128137, + "learning_rate": 1.8235827343201838e-06, + "loss": 0.7983255386352539, + "step": 2018 + }, + { + "epoch": 0.4652073732718894, + "grad_norm": 0.7878897167758285, + "learning_rate": 1.8233665819910393e-06, + "loss": 0.7966747283935547, + "step": 2019 + }, + { + "epoch": 0.46543778801843316, + "grad_norm": 0.893729443286113, + "learning_rate": 1.8231503101513253e-06, + "loss": 0.8977803587913513, + "step": 2020 + }, + { + "epoch": 0.46566820276497695, + "grad_norm": 0.6522874054217892, + "learning_rate": 1.8229339188324334e-06, + "loss": 0.7098231911659241, + "step": 2021 + }, + { + "epoch": 0.46589861751152073, + "grad_norm": 0.6971785978535421, + "learning_rate": 1.822717408065773e-06, + "loss": 0.6402776837348938, + "step": 2022 + }, + { + "epoch": 0.4661290322580645, + "grad_norm": 0.7272467550896602, + "learning_rate": 1.8225007778827698e-06, + "loss": 0.797479510307312, + "step": 2023 + }, + { + "epoch": 0.4663594470046083, + "grad_norm": 0.7464543289112394, + "learning_rate": 1.8222840283148675e-06, + "loss": 0.8205317258834839, + "step": 2024 + }, + { + "epoch": 0.4665898617511521, + "grad_norm": 0.755319646803663, + "learning_rate": 1.822067159393527e-06, + "loss": 0.8123108148574829, + "step": 2025 + }, + { + "epoch": 0.46682027649769586, + "grad_norm": 0.7470494916721893, + "learning_rate": 1.8218501711502262e-06, + "loss": 0.9103116989135742, + "step": 2026 + }, + { + "epoch": 0.46705069124423965, + "grad_norm": 0.8399971318490079, + "learning_rate": 1.8216330636164617e-06, + "loss": 0.725040078163147, + "step": 2027 + }, + { + "epoch": 0.46728110599078343, + "grad_norm": 0.8693243601175246, + "learning_rate": 1.8214158368237456e-06, + "loss": 0.8598217964172363, + "step": 2028 + }, + { + "epoch": 0.4675115207373272, + "grad_norm": 0.9587381766929439, + "learning_rate": 1.821198490803608e-06, + "loss": 0.9139465093612671, + "step": 2029 + }, + { + "epoch": 0.46774193548387094, + "grad_norm": 0.7850806397253399, + "learning_rate": 1.8209810255875966e-06, + "loss": 0.8331620097160339, + "step": 2030 + }, + { + "epoch": 0.4679723502304147, + "grad_norm": 0.8908286579751021, + "learning_rate": 1.8207634412072764e-06, + "loss": 0.7901387810707092, + "step": 2031 + }, + { + "epoch": 0.4682027649769585, + "grad_norm": 0.6861413854458724, + "learning_rate": 1.8205457376942288e-06, + "loss": 0.7651060819625854, + "step": 2032 + }, + { + "epoch": 0.4684331797235023, + "grad_norm": 0.7738923235394239, + "learning_rate": 1.820327915080054e-06, + "loss": 0.7382134199142456, + "step": 2033 + }, + { + "epoch": 0.4686635944700461, + "grad_norm": 0.6962774548883505, + "learning_rate": 1.8201099733963682e-06, + "loss": 0.7851507067680359, + "step": 2034 + }, + { + "epoch": 0.46889400921658986, + "grad_norm": 0.8995005169228616, + "learning_rate": 1.8198919126748056e-06, + "loss": 0.9357708692550659, + "step": 2035 + }, + { + "epoch": 0.46912442396313364, + "grad_norm": 0.8238296907521364, + "learning_rate": 1.819673732947017e-06, + "loss": 0.8188502788543701, + "step": 2036 + }, + { + "epoch": 0.4693548387096774, + "grad_norm": 1.0258349340262545, + "learning_rate": 1.8194554342446712e-06, + "loss": 0.81590735912323, + "step": 2037 + }, + { + "epoch": 0.4695852534562212, + "grad_norm": 0.811644542087897, + "learning_rate": 1.8192370165994544e-06, + "loss": 0.6879743933677673, + "step": 2038 + }, + { + "epoch": 0.469815668202765, + "grad_norm": 0.8669848845646889, + "learning_rate": 1.8190184800430686e-06, + "loss": 0.9287742376327515, + "step": 2039 + }, + { + "epoch": 0.4700460829493088, + "grad_norm": 0.9807524438459786, + "learning_rate": 1.818799824607235e-06, + "loss": 0.9625484943389893, + "step": 2040 + }, + { + "epoch": 0.47027649769585256, + "grad_norm": 0.8259194997097902, + "learning_rate": 1.8185810503236904e-06, + "loss": 0.8267782926559448, + "step": 2041 + }, + { + "epoch": 0.4705069124423963, + "grad_norm": 0.8404148332122154, + "learning_rate": 1.8183621572241904e-06, + "loss": 0.8827054500579834, + "step": 2042 + }, + { + "epoch": 0.47073732718894007, + "grad_norm": 0.7550183773883651, + "learning_rate": 1.8181431453405067e-06, + "loss": 0.7755721807479858, + "step": 2043 + }, + { + "epoch": 0.47096774193548385, + "grad_norm": 0.9234865066349518, + "learning_rate": 1.8179240147044285e-06, + "loss": 0.8320283889770508, + "step": 2044 + }, + { + "epoch": 0.47119815668202764, + "grad_norm": 0.7077773446032107, + "learning_rate": 1.8177047653477619e-06, + "loss": 0.8737574815750122, + "step": 2045 + }, + { + "epoch": 0.4714285714285714, + "grad_norm": 0.8821209974643925, + "learning_rate": 1.8174853973023317e-06, + "loss": 0.7007719278335571, + "step": 2046 + }, + { + "epoch": 0.4716589861751152, + "grad_norm": 0.822666216900424, + "learning_rate": 1.817265910599978e-06, + "loss": 0.8062577247619629, + "step": 2047 + }, + { + "epoch": 0.471889400921659, + "grad_norm": 0.6775605665320994, + "learning_rate": 1.8170463052725594e-06, + "loss": 0.7059667110443115, + "step": 2048 + }, + { + "epoch": 0.47211981566820277, + "grad_norm": 0.7830423922028903, + "learning_rate": 1.816826581351951e-06, + "loss": 0.9025841951370239, + "step": 2049 + }, + { + "epoch": 0.47235023041474655, + "grad_norm": 0.8388278274768075, + "learning_rate": 1.8166067388700458e-06, + "loss": 0.7534186840057373, + "step": 2050 + }, + { + "epoch": 0.47258064516129034, + "grad_norm": 0.7623620329649421, + "learning_rate": 1.8163867778587534e-06, + "loss": 0.9447616338729858, + "step": 2051 + }, + { + "epoch": 0.4728110599078341, + "grad_norm": 0.6423913345578718, + "learning_rate": 1.8161666983500012e-06, + "loss": 0.7092128992080688, + "step": 2052 + }, + { + "epoch": 0.4730414746543779, + "grad_norm": 0.8648864734786782, + "learning_rate": 1.815946500375733e-06, + "loss": 0.8689497113227844, + "step": 2053 + }, + { + "epoch": 0.4732718894009217, + "grad_norm": 0.8941588190294093, + "learning_rate": 1.8157261839679105e-06, + "loss": 0.9298638105392456, + "step": 2054 + }, + { + "epoch": 0.4735023041474654, + "grad_norm": 0.6527064378770876, + "learning_rate": 1.8155057491585125e-06, + "loss": 0.7138030529022217, + "step": 2055 + }, + { + "epoch": 0.4737327188940092, + "grad_norm": 0.6699370139228978, + "learning_rate": 1.815285195979534e-06, + "loss": 0.825221836566925, + "step": 2056 + }, + { + "epoch": 0.473963133640553, + "grad_norm": 0.8559190132682327, + "learning_rate": 1.8150645244629891e-06, + "loss": 0.8643208742141724, + "step": 2057 + }, + { + "epoch": 0.47419354838709676, + "grad_norm": 0.8338353738235549, + "learning_rate": 1.8148437346409073e-06, + "loss": 0.9611828327178955, + "step": 2058 + }, + { + "epoch": 0.47442396313364055, + "grad_norm": 0.8119567978397472, + "learning_rate": 1.8146228265453363e-06, + "loss": 0.8609912991523743, + "step": 2059 + }, + { + "epoch": 0.47465437788018433, + "grad_norm": 0.7540582566966652, + "learning_rate": 1.8144018002083404e-06, + "loss": 0.8277603387832642, + "step": 2060 + }, + { + "epoch": 0.4748847926267281, + "grad_norm": 0.8438703930452028, + "learning_rate": 1.814180655662001e-06, + "loss": 0.8601360321044922, + "step": 2061 + }, + { + "epoch": 0.4751152073732719, + "grad_norm": 0.7023202538855939, + "learning_rate": 1.8139593929384178e-06, + "loss": 0.8454653024673462, + "step": 2062 + }, + { + "epoch": 0.4753456221198157, + "grad_norm": 0.8270167900724995, + "learning_rate": 1.8137380120697059e-06, + "loss": 0.870082437992096, + "step": 2063 + }, + { + "epoch": 0.47557603686635946, + "grad_norm": 0.8497953303327396, + "learning_rate": 1.8135165130879988e-06, + "loss": 0.8064073324203491, + "step": 2064 + }, + { + "epoch": 0.47580645161290325, + "grad_norm": 0.5532170457954219, + "learning_rate": 1.813294896025447e-06, + "loss": 0.829608678817749, + "step": 2065 + }, + { + "epoch": 0.47603686635944703, + "grad_norm": 0.7131662100806325, + "learning_rate": 1.8130731609142176e-06, + "loss": 0.8185791969299316, + "step": 2066 + }, + { + "epoch": 0.47626728110599076, + "grad_norm": 0.9405207635689381, + "learning_rate": 1.812851307786495e-06, + "loss": 0.8855293989181519, + "step": 2067 + }, + { + "epoch": 0.47649769585253454, + "grad_norm": 0.6766659884445188, + "learning_rate": 1.8126293366744815e-06, + "loss": 0.7495461106300354, + "step": 2068 + }, + { + "epoch": 0.4767281105990783, + "grad_norm": 0.9706294845402844, + "learning_rate": 1.8124072476103956e-06, + "loss": 0.9435098171234131, + "step": 2069 + }, + { + "epoch": 0.4769585253456221, + "grad_norm": 0.7637936743615437, + "learning_rate": 1.8121850406264727e-06, + "loss": 0.9299448728561401, + "step": 2070 + }, + { + "epoch": 0.4771889400921659, + "grad_norm": 0.9500813357187163, + "learning_rate": 1.8119627157549665e-06, + "loss": 0.9011991024017334, + "step": 2071 + }, + { + "epoch": 0.4774193548387097, + "grad_norm": 0.6847341374863515, + "learning_rate": 1.8117402730281476e-06, + "loss": 0.7326598167419434, + "step": 2072 + }, + { + "epoch": 0.47764976958525346, + "grad_norm": 0.7364560962143368, + "learning_rate": 1.8115177124783024e-06, + "loss": 0.8137445449829102, + "step": 2073 + }, + { + "epoch": 0.47788018433179724, + "grad_norm": 0.9429635333298672, + "learning_rate": 1.811295034137735e-06, + "loss": 0.8653519153594971, + "step": 2074 + }, + { + "epoch": 0.478110599078341, + "grad_norm": 0.8511205154632088, + "learning_rate": 1.811072238038768e-06, + "loss": 0.9140677452087402, + "step": 2075 + }, + { + "epoch": 0.4783410138248848, + "grad_norm": 0.8012710450337872, + "learning_rate": 1.810849324213739e-06, + "loss": 0.8878934979438782, + "step": 2076 + }, + { + "epoch": 0.4785714285714286, + "grad_norm": 0.6571390792752639, + "learning_rate": 1.8106262926950045e-06, + "loss": 0.8238190412521362, + "step": 2077 + }, + { + "epoch": 0.4788018433179724, + "grad_norm": 0.8097531572330602, + "learning_rate": 1.8104031435149362e-06, + "loss": 0.7722488641738892, + "step": 2078 + }, + { + "epoch": 0.4790322580645161, + "grad_norm": 0.890992078514086, + "learning_rate": 1.8101798767059248e-06, + "loss": 0.9338192939758301, + "step": 2079 + }, + { + "epoch": 0.4792626728110599, + "grad_norm": 0.8000986035452533, + "learning_rate": 1.8099564923003767e-06, + "loss": 0.7342168688774109, + "step": 2080 + }, + { + "epoch": 0.47949308755760367, + "grad_norm": 0.7644530181466097, + "learning_rate": 1.809732990330716e-06, + "loss": 0.8445772528648376, + "step": 2081 + }, + { + "epoch": 0.47972350230414745, + "grad_norm": 0.7291725333905612, + "learning_rate": 1.8095093708293839e-06, + "loss": 0.825678825378418, + "step": 2082 + }, + { + "epoch": 0.47995391705069124, + "grad_norm": 0.8072481370959372, + "learning_rate": 1.8092856338288381e-06, + "loss": 0.7995405197143555, + "step": 2083 + }, + { + "epoch": 0.480184331797235, + "grad_norm": 0.8193777121106555, + "learning_rate": 1.8090617793615536e-06, + "loss": 0.7811745405197144, + "step": 2084 + }, + { + "epoch": 0.4804147465437788, + "grad_norm": 0.7364459454678961, + "learning_rate": 1.8088378074600231e-06, + "loss": 0.842727780342102, + "step": 2085 + }, + { + "epoch": 0.4806451612903226, + "grad_norm": 0.7640299868769393, + "learning_rate": 1.808613718156756e-06, + "loss": 0.840941309928894, + "step": 2086 + }, + { + "epoch": 0.48087557603686637, + "grad_norm": 0.7783965916533324, + "learning_rate": 1.808389511484278e-06, + "loss": 0.9024466872215271, + "step": 2087 + }, + { + "epoch": 0.48110599078341015, + "grad_norm": 0.8943218774431004, + "learning_rate": 1.8081651874751325e-06, + "loss": 0.9112771153450012, + "step": 2088 + }, + { + "epoch": 0.48133640552995394, + "grad_norm": 0.6675207900987881, + "learning_rate": 1.8079407461618797e-06, + "loss": 0.834719181060791, + "step": 2089 + }, + { + "epoch": 0.4815668202764977, + "grad_norm": 0.8421358450475633, + "learning_rate": 1.8077161875770971e-06, + "loss": 0.8472555875778198, + "step": 2090 + }, + { + "epoch": 0.48179723502304145, + "grad_norm": 0.7303169649115268, + "learning_rate": 1.8074915117533796e-06, + "loss": 0.8459140062332153, + "step": 2091 + }, + { + "epoch": 0.48202764976958523, + "grad_norm": 0.6945162401362365, + "learning_rate": 1.807266718723338e-06, + "loss": 0.6570066213607788, + "step": 2092 + }, + { + "epoch": 0.482258064516129, + "grad_norm": 0.7314212575092469, + "learning_rate": 1.8070418085196006e-06, + "loss": 0.8897342681884766, + "step": 2093 + }, + { + "epoch": 0.4824884792626728, + "grad_norm": 0.8312385191950623, + "learning_rate": 1.8068167811748132e-06, + "loss": 0.8339060544967651, + "step": 2094 + }, + { + "epoch": 0.4827188940092166, + "grad_norm": 0.7547678583050421, + "learning_rate": 1.8065916367216383e-06, + "loss": 0.7972484827041626, + "step": 2095 + }, + { + "epoch": 0.48294930875576036, + "grad_norm": 0.7424060773179767, + "learning_rate": 1.806366375192755e-06, + "loss": 0.7894760966300964, + "step": 2096 + }, + { + "epoch": 0.48317972350230415, + "grad_norm": 0.7408232706643347, + "learning_rate": 1.8061409966208597e-06, + "loss": 0.713944673538208, + "step": 2097 + }, + { + "epoch": 0.48341013824884793, + "grad_norm": 0.8423029874540192, + "learning_rate": 1.8059155010386662e-06, + "loss": 0.7832180261611938, + "step": 2098 + }, + { + "epoch": 0.4836405529953917, + "grad_norm": 0.6563887159918735, + "learning_rate": 1.8056898884789043e-06, + "loss": 0.8873809576034546, + "step": 2099 + }, + { + "epoch": 0.4838709677419355, + "grad_norm": 0.8864132111812594, + "learning_rate": 1.8054641589743218e-06, + "loss": 0.8174929618835449, + "step": 2100 + }, + { + "epoch": 0.4841013824884793, + "grad_norm": 0.6797946394214075, + "learning_rate": 1.805238312557683e-06, + "loss": 0.876921534538269, + "step": 2101 + }, + { + "epoch": 0.48433179723502306, + "grad_norm": 0.7629892942789464, + "learning_rate": 1.8050123492617693e-06, + "loss": 0.9455937147140503, + "step": 2102 + }, + { + "epoch": 0.4845622119815668, + "grad_norm": 0.6880522665173857, + "learning_rate": 1.8047862691193784e-06, + "loss": 0.8146508932113647, + "step": 2103 + }, + { + "epoch": 0.4847926267281106, + "grad_norm": 0.762873599305404, + "learning_rate": 1.8045600721633262e-06, + "loss": 0.8513495326042175, + "step": 2104 + }, + { + "epoch": 0.48502304147465436, + "grad_norm": 0.8329533644475985, + "learning_rate": 1.8043337584264443e-06, + "loss": 0.8430027961730957, + "step": 2105 + }, + { + "epoch": 0.48525345622119814, + "grad_norm": 0.6323595862794837, + "learning_rate": 1.8041073279415826e-06, + "loss": 0.7683960199356079, + "step": 2106 + }, + { + "epoch": 0.4854838709677419, + "grad_norm": 0.6620613064117244, + "learning_rate": 1.8038807807416067e-06, + "loss": 0.7099664211273193, + "step": 2107 + }, + { + "epoch": 0.4857142857142857, + "grad_norm": 0.725415262213876, + "learning_rate": 1.8036541168593994e-06, + "loss": 0.8046330213546753, + "step": 2108 + }, + { + "epoch": 0.4859447004608295, + "grad_norm": 0.7817858416968994, + "learning_rate": 1.803427336327861e-06, + "loss": 0.8387504816055298, + "step": 2109 + }, + { + "epoch": 0.4861751152073733, + "grad_norm": 0.7135784962709865, + "learning_rate": 1.8032004391799085e-06, + "loss": 0.883955717086792, + "step": 2110 + }, + { + "epoch": 0.48640552995391706, + "grad_norm": 0.7408960119431725, + "learning_rate": 1.8029734254484756e-06, + "loss": 0.7622070908546448, + "step": 2111 + }, + { + "epoch": 0.48663594470046084, + "grad_norm": 0.7726145388563513, + "learning_rate": 1.802746295166513e-06, + "loss": 0.6625584363937378, + "step": 2112 + }, + { + "epoch": 0.4868663594470046, + "grad_norm": 0.8189497209718242, + "learning_rate": 1.8025190483669878e-06, + "loss": 0.8232327699661255, + "step": 2113 + }, + { + "epoch": 0.4870967741935484, + "grad_norm": 0.8528139298235252, + "learning_rate": 1.8022916850828857e-06, + "loss": 0.9083148241043091, + "step": 2114 + }, + { + "epoch": 0.4873271889400922, + "grad_norm": 0.7392938308731752, + "learning_rate": 1.8020642053472074e-06, + "loss": 0.8248398303985596, + "step": 2115 + }, + { + "epoch": 0.4875576036866359, + "grad_norm": 0.7121240208517446, + "learning_rate": 1.8018366091929717e-06, + "loss": 0.8055423498153687, + "step": 2116 + }, + { + "epoch": 0.4877880184331797, + "grad_norm": 0.778973471543998, + "learning_rate": 1.8016088966532135e-06, + "loss": 0.8716787695884705, + "step": 2117 + }, + { + "epoch": 0.4880184331797235, + "grad_norm": 0.7561230225795058, + "learning_rate": 1.801381067760985e-06, + "loss": 0.8530780673027039, + "step": 2118 + }, + { + "epoch": 0.48824884792626727, + "grad_norm": 0.6774037273322415, + "learning_rate": 1.8011531225493557e-06, + "loss": 0.7958484888076782, + "step": 2119 + }, + { + "epoch": 0.48847926267281105, + "grad_norm": 0.8596146173926187, + "learning_rate": 1.800925061051411e-06, + "loss": 0.8312872648239136, + "step": 2120 + }, + { + "epoch": 0.48870967741935484, + "grad_norm": 0.8135900564482533, + "learning_rate": 1.8006968833002541e-06, + "loss": 0.8097391128540039, + "step": 2121 + }, + { + "epoch": 0.4889400921658986, + "grad_norm": 0.9139337120301166, + "learning_rate": 1.8004685893290046e-06, + "loss": 0.8636112213134766, + "step": 2122 + }, + { + "epoch": 0.4891705069124424, + "grad_norm": 0.9088930992891967, + "learning_rate": 1.800240179170799e-06, + "loss": 0.9122721552848816, + "step": 2123 + }, + { + "epoch": 0.4894009216589862, + "grad_norm": 0.914017678688966, + "learning_rate": 1.8000116528587907e-06, + "loss": 0.8172330856323242, + "step": 2124 + }, + { + "epoch": 0.48963133640552997, + "grad_norm": 0.8007018337125341, + "learning_rate": 1.7997830104261502e-06, + "loss": 0.7377575635910034, + "step": 2125 + }, + { + "epoch": 0.48986175115207375, + "grad_norm": 0.9218847107737449, + "learning_rate": 1.7995542519060644e-06, + "loss": 0.7278136014938354, + "step": 2126 + }, + { + "epoch": 0.49009216589861754, + "grad_norm": 0.8808842591031234, + "learning_rate": 1.7993253773317374e-06, + "loss": 0.8977715969085693, + "step": 2127 + }, + { + "epoch": 0.49032258064516127, + "grad_norm": 0.7019593909183576, + "learning_rate": 1.7990963867363902e-06, + "loss": 0.789979100227356, + "step": 2128 + }, + { + "epoch": 0.49055299539170505, + "grad_norm": 0.7069412826082713, + "learning_rate": 1.7988672801532602e-06, + "loss": 0.8304328322410583, + "step": 2129 + }, + { + "epoch": 0.49078341013824883, + "grad_norm": 0.7922910084647693, + "learning_rate": 1.7986380576156019e-06, + "loss": 0.7597516179084778, + "step": 2130 + }, + { + "epoch": 0.4910138248847926, + "grad_norm": 0.6007262757544611, + "learning_rate": 1.7984087191566873e-06, + "loss": 0.661639928817749, + "step": 2131 + }, + { + "epoch": 0.4912442396313364, + "grad_norm": 0.7484873666922557, + "learning_rate": 1.7981792648098035e-06, + "loss": 0.7871333360671997, + "step": 2132 + }, + { + "epoch": 0.4914746543778802, + "grad_norm": 0.7758289248832314, + "learning_rate": 1.7979496946082565e-06, + "loss": 0.8166402578353882, + "step": 2133 + }, + { + "epoch": 0.49170506912442397, + "grad_norm": 0.6906377275927077, + "learning_rate": 1.7977200085853674e-06, + "loss": 0.7112412452697754, + "step": 2134 + }, + { + "epoch": 0.49193548387096775, + "grad_norm": 0.8103572300867555, + "learning_rate": 1.7974902067744752e-06, + "loss": 0.8358132839202881, + "step": 2135 + }, + { + "epoch": 0.49216589861751153, + "grad_norm": 0.7103875590554449, + "learning_rate": 1.7972602892089353e-06, + "loss": 0.8544377088546753, + "step": 2136 + }, + { + "epoch": 0.4923963133640553, + "grad_norm": 0.9004573017295656, + "learning_rate": 1.7970302559221197e-06, + "loss": 1.0105161666870117, + "step": 2137 + }, + { + "epoch": 0.4926267281105991, + "grad_norm": 0.7525179633837843, + "learning_rate": 1.7968001069474176e-06, + "loss": 0.7666197419166565, + "step": 2138 + }, + { + "epoch": 0.4928571428571429, + "grad_norm": 0.9209694432294897, + "learning_rate": 1.7965698423182349e-06, + "loss": 0.9250742197036743, + "step": 2139 + }, + { + "epoch": 0.4930875576036866, + "grad_norm": 0.8066717978287462, + "learning_rate": 1.7963394620679942e-06, + "loss": 0.8269995450973511, + "step": 2140 + }, + { + "epoch": 0.4933179723502304, + "grad_norm": 0.9533305612537857, + "learning_rate": 1.7961089662301346e-06, + "loss": 1.0431339740753174, + "step": 2141 + }, + { + "epoch": 0.4935483870967742, + "grad_norm": 0.7107784117562762, + "learning_rate": 1.7958783548381125e-06, + "loss": 0.7474809288978577, + "step": 2142 + }, + { + "epoch": 0.49377880184331796, + "grad_norm": 0.7729911498332706, + "learning_rate": 1.7956476279254007e-06, + "loss": 0.8850520849227905, + "step": 2143 + }, + { + "epoch": 0.49400921658986174, + "grad_norm": 0.8566824172714074, + "learning_rate": 1.7954167855254893e-06, + "loss": 0.8898880481719971, + "step": 2144 + }, + { + "epoch": 0.4942396313364055, + "grad_norm": 0.886855392770134, + "learning_rate": 1.7951858276718842e-06, + "loss": 0.8718239068984985, + "step": 2145 + }, + { + "epoch": 0.4944700460829493, + "grad_norm": 0.7604278475621951, + "learning_rate": 1.794954754398109e-06, + "loss": 0.8407484292984009, + "step": 2146 + }, + { + "epoch": 0.4947004608294931, + "grad_norm": 0.9582215314216729, + "learning_rate": 1.7947235657377036e-06, + "loss": 0.8453764915466309, + "step": 2147 + }, + { + "epoch": 0.4949308755760369, + "grad_norm": 0.6332693049941237, + "learning_rate": 1.794492261724225e-06, + "loss": 0.5795568227767944, + "step": 2148 + }, + { + "epoch": 0.49516129032258066, + "grad_norm": 0.9864343717736791, + "learning_rate": 1.794260842391246e-06, + "loss": 0.8601347208023071, + "step": 2149 + }, + { + "epoch": 0.49539170506912444, + "grad_norm": 0.8909931853274754, + "learning_rate": 1.7940293077723573e-06, + "loss": 0.8328324556350708, + "step": 2150 + }, + { + "epoch": 0.4956221198156682, + "grad_norm": 0.6691517417241877, + "learning_rate": 1.7937976579011655e-06, + "loss": 0.8924463391304016, + "step": 2151 + }, + { + "epoch": 0.49585253456221196, + "grad_norm": 0.7983254161536232, + "learning_rate": 1.7935658928112947e-06, + "loss": 0.9725968837738037, + "step": 2152 + }, + { + "epoch": 0.49608294930875574, + "grad_norm": 0.7649378566504706, + "learning_rate": 1.7933340125363855e-06, + "loss": 0.7814322710037231, + "step": 2153 + }, + { + "epoch": 0.4963133640552995, + "grad_norm": 0.795129549448148, + "learning_rate": 1.793102017110094e-06, + "loss": 0.8022886514663696, + "step": 2154 + }, + { + "epoch": 0.4965437788018433, + "grad_norm": 0.9455352743035539, + "learning_rate": 1.7928699065660951e-06, + "loss": 0.9747333526611328, + "step": 2155 + }, + { + "epoch": 0.4967741935483871, + "grad_norm": 1.0353782305768249, + "learning_rate": 1.7926376809380783e-06, + "loss": 0.9039797782897949, + "step": 2156 + }, + { + "epoch": 0.49700460829493087, + "grad_norm": 1.000992925643121, + "learning_rate": 1.7924053402597518e-06, + "loss": 0.9444677829742432, + "step": 2157 + }, + { + "epoch": 0.49723502304147466, + "grad_norm": 0.7688551400180308, + "learning_rate": 1.7921728845648393e-06, + "loss": 0.8442031741142273, + "step": 2158 + }, + { + "epoch": 0.49746543778801844, + "grad_norm": 0.8590371435800439, + "learning_rate": 1.7919403138870813e-06, + "loss": 0.9410362839698792, + "step": 2159 + }, + { + "epoch": 0.4976958525345622, + "grad_norm": 0.8168398725206235, + "learning_rate": 1.791707628260235e-06, + "loss": 0.8929172158241272, + "step": 2160 + }, + { + "epoch": 0.497926267281106, + "grad_norm": 0.970370102226972, + "learning_rate": 1.7914748277180745e-06, + "loss": 0.9259560108184814, + "step": 2161 + }, + { + "epoch": 0.4981566820276498, + "grad_norm": 0.7778204252845836, + "learning_rate": 1.7912419122943904e-06, + "loss": 0.8201638460159302, + "step": 2162 + }, + { + "epoch": 0.49838709677419357, + "grad_norm": 0.7628075269760098, + "learning_rate": 1.7910088820229907e-06, + "loss": 0.7554556131362915, + "step": 2163 + }, + { + "epoch": 0.4986175115207373, + "grad_norm": 0.7698860809397133, + "learning_rate": 1.7907757369376984e-06, + "loss": 0.8206801414489746, + "step": 2164 + }, + { + "epoch": 0.4988479262672811, + "grad_norm": 0.7606971261006891, + "learning_rate": 1.7905424770723551e-06, + "loss": 0.765400767326355, + "step": 2165 + }, + { + "epoch": 0.49907834101382487, + "grad_norm": 0.9629614917036793, + "learning_rate": 1.7903091024608177e-06, + "loss": 0.9191527366638184, + "step": 2166 + }, + { + "epoch": 0.49930875576036865, + "grad_norm": 1.0883591834210613, + "learning_rate": 1.7900756131369601e-06, + "loss": 0.8515042662620544, + "step": 2167 + }, + { + "epoch": 0.49953917050691243, + "grad_norm": 0.7623230395498896, + "learning_rate": 1.7898420091346736e-06, + "loss": 0.8509752750396729, + "step": 2168 + }, + { + "epoch": 0.4997695852534562, + "grad_norm": 0.7417934516303272, + "learning_rate": 1.7896082904878647e-06, + "loss": 0.8007084131240845, + "step": 2169 + }, + { + "epoch": 0.5, + "grad_norm": 0.8597818097533757, + "learning_rate": 1.789374457230458e-06, + "loss": 0.8395413756370544, + "step": 2170 + }, + { + "epoch": 0.5002304147465437, + "grad_norm": 0.7232889708808644, + "learning_rate": 1.7891405093963937e-06, + "loss": 0.8624853491783142, + "step": 2171 + }, + { + "epoch": 0.5004608294930876, + "grad_norm": 0.6629899968556545, + "learning_rate": 1.788906447019629e-06, + "loss": 0.8141548037528992, + "step": 2172 + }, + { + "epoch": 0.5006912442396313, + "grad_norm": 0.6495144260680482, + "learning_rate": 1.7886722701341382e-06, + "loss": 0.6764500141143799, + "step": 2173 + }, + { + "epoch": 0.5009216589861751, + "grad_norm": 0.6701022764652186, + "learning_rate": 1.7884379787739112e-06, + "loss": 0.710756778717041, + "step": 2174 + }, + { + "epoch": 0.5011520737327189, + "grad_norm": 0.8273999117205362, + "learning_rate": 1.7882035729729555e-06, + "loss": 0.8090574145317078, + "step": 2175 + }, + { + "epoch": 0.5013824884792627, + "grad_norm": 0.6977221855783239, + "learning_rate": 1.7879690527652943e-06, + "loss": 0.7639138102531433, + "step": 2176 + }, + { + "epoch": 0.5016129032258064, + "grad_norm": 0.9185836860641033, + "learning_rate": 1.7877344181849687e-06, + "loss": 0.8093903660774231, + "step": 2177 + }, + { + "epoch": 0.5018433179723503, + "grad_norm": 0.7610855435865236, + "learning_rate": 1.7874996692660348e-06, + "loss": 0.8705824017524719, + "step": 2178 + }, + { + "epoch": 0.502073732718894, + "grad_norm": 0.7815265219501579, + "learning_rate": 1.7872648060425666e-06, + "loss": 0.7365947961807251, + "step": 2179 + }, + { + "epoch": 0.5023041474654378, + "grad_norm": 0.8989287933893153, + "learning_rate": 1.787029828548654e-06, + "loss": 0.9405299425125122, + "step": 2180 + }, + { + "epoch": 0.5025345622119816, + "grad_norm": 0.907417749032586, + "learning_rate": 1.7867947368184036e-06, + "loss": 0.9232017993927002, + "step": 2181 + }, + { + "epoch": 0.5027649769585254, + "grad_norm": 1.0801728154122552, + "learning_rate": 1.7865595308859388e-06, + "loss": 0.9941537380218506, + "step": 2182 + }, + { + "epoch": 0.5029953917050691, + "grad_norm": 0.7341611336832391, + "learning_rate": 1.7863242107853993e-06, + "loss": 0.6981802582740784, + "step": 2183 + }, + { + "epoch": 0.5032258064516129, + "grad_norm": 0.8346521198909456, + "learning_rate": 1.7860887765509417e-06, + "loss": 0.8155109882354736, + "step": 2184 + }, + { + "epoch": 0.5034562211981567, + "grad_norm": 0.8846374910749497, + "learning_rate": 1.7858532282167385e-06, + "loss": 0.7246255874633789, + "step": 2185 + }, + { + "epoch": 0.5036866359447004, + "grad_norm": 0.7027049895049993, + "learning_rate": 1.7856175658169796e-06, + "loss": 0.7042064666748047, + "step": 2186 + }, + { + "epoch": 0.5039170506912443, + "grad_norm": 0.8633735424450812, + "learning_rate": 1.7853817893858714e-06, + "loss": 0.7522145509719849, + "step": 2187 + }, + { + "epoch": 0.504147465437788, + "grad_norm": 0.8170927084265063, + "learning_rate": 1.7851458989576359e-06, + "loss": 1.0157709121704102, + "step": 2188 + }, + { + "epoch": 0.5043778801843318, + "grad_norm": 0.8537305826863457, + "learning_rate": 1.7849098945665127e-06, + "loss": 0.7096433639526367, + "step": 2189 + }, + { + "epoch": 0.5046082949308756, + "grad_norm": 0.8293401368813538, + "learning_rate": 1.7846737762467572e-06, + "loss": 0.7743037939071655, + "step": 2190 + }, + { + "epoch": 0.5048387096774194, + "grad_norm": 0.802261593558941, + "learning_rate": 1.784437544032642e-06, + "loss": 0.7907241582870483, + "step": 2191 + }, + { + "epoch": 0.5050691244239631, + "grad_norm": 0.9488985791352184, + "learning_rate": 1.7842011979584557e-06, + "loss": 0.8692185878753662, + "step": 2192 + }, + { + "epoch": 0.505299539170507, + "grad_norm": 1.0636987469588612, + "learning_rate": 1.783964738058504e-06, + "loss": 0.9678715467453003, + "step": 2193 + }, + { + "epoch": 0.5055299539170507, + "grad_norm": 0.7713527005281836, + "learning_rate": 1.7837281643671077e-06, + "loss": 0.855170726776123, + "step": 2194 + }, + { + "epoch": 0.5057603686635944, + "grad_norm": 0.7469430705420217, + "learning_rate": 1.7834914769186065e-06, + "loss": 0.8452733755111694, + "step": 2195 + }, + { + "epoch": 0.5059907834101383, + "grad_norm": 0.6866121153572871, + "learning_rate": 1.7832546757473543e-06, + "loss": 0.7517217397689819, + "step": 2196 + }, + { + "epoch": 0.506221198156682, + "grad_norm": 0.7453227048555126, + "learning_rate": 1.783017760887723e-06, + "loss": 0.6971632838249207, + "step": 2197 + }, + { + "epoch": 0.5064516129032258, + "grad_norm": 0.7964964192157018, + "learning_rate": 1.7827807323741002e-06, + "loss": 0.8638256192207336, + "step": 2198 + }, + { + "epoch": 0.5066820276497696, + "grad_norm": 0.7941877452524988, + "learning_rate": 1.7825435902408903e-06, + "loss": 0.8410143256187439, + "step": 2199 + }, + { + "epoch": 0.5069124423963134, + "grad_norm": 0.7902588767037179, + "learning_rate": 1.7823063345225143e-06, + "loss": 0.8127691745758057, + "step": 2200 + }, + { + "epoch": 0.5071428571428571, + "grad_norm": 0.7618481515663807, + "learning_rate": 1.7820689652534096e-06, + "loss": 0.7351404428482056, + "step": 2201 + }, + { + "epoch": 0.507373271889401, + "grad_norm": 0.6691944306500267, + "learning_rate": 1.7818314824680298e-06, + "loss": 0.7258716821670532, + "step": 2202 + }, + { + "epoch": 0.5076036866359447, + "grad_norm": 1.0029859864492747, + "learning_rate": 1.7815938862008454e-06, + "loss": 0.9509599208831787, + "step": 2203 + }, + { + "epoch": 0.5078341013824885, + "grad_norm": 0.7738532710061052, + "learning_rate": 1.7813561764863429e-06, + "loss": 0.8600929379463196, + "step": 2204 + }, + { + "epoch": 0.5080645161290323, + "grad_norm": 0.9689099485850551, + "learning_rate": 1.7811183533590257e-06, + "loss": 0.8688119649887085, + "step": 2205 + }, + { + "epoch": 0.5082949308755761, + "grad_norm": 0.7599344683888546, + "learning_rate": 1.780880416853414e-06, + "loss": 0.8447986841201782, + "step": 2206 + }, + { + "epoch": 0.5085253456221198, + "grad_norm": 0.6953642388755117, + "learning_rate": 1.7806423670040433e-06, + "loss": 0.8262573480606079, + "step": 2207 + }, + { + "epoch": 0.5087557603686635, + "grad_norm": 0.7640117945069856, + "learning_rate": 1.7804042038454666e-06, + "loss": 0.9534487724304199, + "step": 2208 + }, + { + "epoch": 0.5089861751152074, + "grad_norm": 0.7513792438385134, + "learning_rate": 1.7801659274122527e-06, + "loss": 0.7712565064430237, + "step": 2209 + }, + { + "epoch": 0.5092165898617511, + "grad_norm": 0.8714588056175714, + "learning_rate": 1.7799275377389873e-06, + "loss": 0.8190760016441345, + "step": 2210 + }, + { + "epoch": 0.509447004608295, + "grad_norm": 0.9379540710774249, + "learning_rate": 1.7796890348602722e-06, + "loss": 0.8647592067718506, + "step": 2211 + }, + { + "epoch": 0.5096774193548387, + "grad_norm": 0.7912467632232041, + "learning_rate": 1.7794504188107257e-06, + "loss": 0.7788198590278625, + "step": 2212 + }, + { + "epoch": 0.5099078341013825, + "grad_norm": 0.7053754197084299, + "learning_rate": 1.779211689624983e-06, + "loss": 0.8610718250274658, + "step": 2213 + }, + { + "epoch": 0.5101382488479262, + "grad_norm": 0.7783569383566119, + "learning_rate": 1.7789728473376952e-06, + "loss": 0.832200825214386, + "step": 2214 + }, + { + "epoch": 0.5103686635944701, + "grad_norm": 0.7823482622118234, + "learning_rate": 1.7787338919835298e-06, + "loss": 0.7325488328933716, + "step": 2215 + }, + { + "epoch": 0.5105990783410138, + "grad_norm": 0.8903627357495159, + "learning_rate": 1.7784948235971707e-06, + "loss": 0.8038203716278076, + "step": 2216 + }, + { + "epoch": 0.5108294930875577, + "grad_norm": 0.6275186054972087, + "learning_rate": 1.7782556422133185e-06, + "loss": 0.7016317248344421, + "step": 2217 + }, + { + "epoch": 0.5110599078341014, + "grad_norm": 0.8951545762278973, + "learning_rate": 1.7780163478666905e-06, + "loss": 0.7964655160903931, + "step": 2218 + }, + { + "epoch": 0.5112903225806451, + "grad_norm": 0.7709224710894249, + "learning_rate": 1.777776940592019e-06, + "loss": 0.6681785583496094, + "step": 2219 + }, + { + "epoch": 0.511520737327189, + "grad_norm": 0.8934880823893885, + "learning_rate": 1.7775374204240547e-06, + "loss": 0.835777759552002, + "step": 2220 + }, + { + "epoch": 0.5117511520737327, + "grad_norm": 1.0248178001051076, + "learning_rate": 1.777297787397563e-06, + "loss": 0.9442443251609802, + "step": 2221 + }, + { + "epoch": 0.5119815668202765, + "grad_norm": 1.072158922361294, + "learning_rate": 1.7770580415473267e-06, + "loss": 0.9351231455802917, + "step": 2222 + }, + { + "epoch": 0.5122119815668202, + "grad_norm": 0.878332211622375, + "learning_rate": 1.776818182908144e-06, + "loss": 0.7238374352455139, + "step": 2223 + }, + { + "epoch": 0.5124423963133641, + "grad_norm": 0.7001659306792695, + "learning_rate": 1.7765782115148308e-06, + "loss": 0.8206230998039246, + "step": 2224 + }, + { + "epoch": 0.5126728110599078, + "grad_norm": 0.6546302150578799, + "learning_rate": 1.7763381274022176e-06, + "loss": 0.748784065246582, + "step": 2225 + }, + { + "epoch": 0.5129032258064516, + "grad_norm": 0.7566703422977776, + "learning_rate": 1.7760979306051533e-06, + "loss": 0.7980858087539673, + "step": 2226 + }, + { + "epoch": 0.5131336405529954, + "grad_norm": 0.8877968508757134, + "learning_rate": 1.7758576211585018e-06, + "loss": 0.8631168603897095, + "step": 2227 + }, + { + "epoch": 0.5133640552995392, + "grad_norm": 0.7405217897025548, + "learning_rate": 1.7756171990971441e-06, + "loss": 0.9405999779701233, + "step": 2228 + }, + { + "epoch": 0.5135944700460829, + "grad_norm": 0.8867257371824923, + "learning_rate": 1.7753766644559763e-06, + "loss": 0.9055094718933105, + "step": 2229 + }, + { + "epoch": 0.5138248847926268, + "grad_norm": 0.827493910498757, + "learning_rate": 1.775136017269912e-06, + "loss": 0.7583146691322327, + "step": 2230 + }, + { + "epoch": 0.5140552995391705, + "grad_norm": 0.8689067612775456, + "learning_rate": 1.7748952575738811e-06, + "loss": 0.8728743195533752, + "step": 2231 + }, + { + "epoch": 0.5142857142857142, + "grad_norm": 0.7067707521741841, + "learning_rate": 1.7746543854028295e-06, + "loss": 0.8133460283279419, + "step": 2232 + }, + { + "epoch": 0.5145161290322581, + "grad_norm": 0.7177694794353267, + "learning_rate": 1.7744134007917194e-06, + "loss": 0.8389721512794495, + "step": 2233 + }, + { + "epoch": 0.5147465437788018, + "grad_norm": 0.9617522193850644, + "learning_rate": 1.774172303775529e-06, + "loss": 0.7016798257827759, + "step": 2234 + }, + { + "epoch": 0.5149769585253456, + "grad_norm": 0.7999711451764379, + "learning_rate": 1.7739310943892538e-06, + "loss": 0.7920540571212769, + "step": 2235 + }, + { + "epoch": 0.5152073732718894, + "grad_norm": 0.6990088891534603, + "learning_rate": 1.7736897726679048e-06, + "loss": 0.900149405002594, + "step": 2236 + }, + { + "epoch": 0.5154377880184332, + "grad_norm": 0.743220745754201, + "learning_rate": 1.7734483386465096e-06, + "loss": 0.8537915349006653, + "step": 2237 + }, + { + "epoch": 0.5156682027649769, + "grad_norm": 0.8134323205434837, + "learning_rate": 1.7732067923601121e-06, + "loss": 0.7418123483657837, + "step": 2238 + }, + { + "epoch": 0.5158986175115208, + "grad_norm": 1.108361921569266, + "learning_rate": 1.7729651338437721e-06, + "loss": 0.8890011310577393, + "step": 2239 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9841321811418366, + "learning_rate": 1.7727233631325663e-06, + "loss": 0.9082813262939453, + "step": 2240 + }, + { + "epoch": 0.5163594470046083, + "grad_norm": 0.9268737545625799, + "learning_rate": 1.7724814802615868e-06, + "loss": 0.8337695598602295, + "step": 2241 + }, + { + "epoch": 0.5165898617511521, + "grad_norm": 1.1037050608526282, + "learning_rate": 1.7722394852659437e-06, + "loss": 0.8990765810012817, + "step": 2242 + }, + { + "epoch": 0.5168202764976959, + "grad_norm": 0.8552834719912825, + "learning_rate": 1.7719973781807614e-06, + "loss": 0.720890998840332, + "step": 2243 + }, + { + "epoch": 0.5170506912442396, + "grad_norm": 0.6406815235154244, + "learning_rate": 1.7717551590411817e-06, + "loss": 0.7966938018798828, + "step": 2244 + }, + { + "epoch": 0.5172811059907834, + "grad_norm": 0.8614270693246835, + "learning_rate": 1.7715128278823622e-06, + "loss": 0.9290107488632202, + "step": 2245 + }, + { + "epoch": 0.5175115207373272, + "grad_norm": 0.8755598994931274, + "learning_rate": 1.771270384739477e-06, + "loss": 0.8388533592224121, + "step": 2246 + }, + { + "epoch": 0.5177419354838709, + "grad_norm": 0.8200932411512113, + "learning_rate": 1.7710278296477169e-06, + "loss": 0.8845043182373047, + "step": 2247 + }, + { + "epoch": 0.5179723502304148, + "grad_norm": 0.8499976704860752, + "learning_rate": 1.7707851626422875e-06, + "loss": 0.879709780216217, + "step": 2248 + }, + { + "epoch": 0.5182027649769585, + "grad_norm": 0.8407815201465851, + "learning_rate": 1.7705423837584123e-06, + "loss": 0.8215152025222778, + "step": 2249 + }, + { + "epoch": 0.5184331797235023, + "grad_norm": 0.8770027311962882, + "learning_rate": 1.7702994930313305e-06, + "loss": 0.8108627796173096, + "step": 2250 + }, + { + "epoch": 0.5186635944700461, + "grad_norm": 0.9106818329739914, + "learning_rate": 1.7700564904962966e-06, + "loss": 0.8391602039337158, + "step": 2251 + }, + { + "epoch": 0.5188940092165899, + "grad_norm": 0.82724043269172, + "learning_rate": 1.769813376188583e-06, + "loss": 0.8664923906326294, + "step": 2252 + }, + { + "epoch": 0.5191244239631336, + "grad_norm": 0.8478256896643234, + "learning_rate": 1.7695701501434765e-06, + "loss": 0.9670882821083069, + "step": 2253 + }, + { + "epoch": 0.5193548387096775, + "grad_norm": 0.8831524743377538, + "learning_rate": 1.7693268123962816e-06, + "loss": 0.946273684501648, + "step": 2254 + }, + { + "epoch": 0.5195852534562212, + "grad_norm": 0.7643743435262689, + "learning_rate": 1.7690833629823184e-06, + "loss": 0.9691795706748962, + "step": 2255 + }, + { + "epoch": 0.5198156682027649, + "grad_norm": 0.7833370135674333, + "learning_rate": 1.7688398019369232e-06, + "loss": 0.8086103200912476, + "step": 2256 + }, + { + "epoch": 0.5200460829493088, + "grad_norm": 0.8183770044685874, + "learning_rate": 1.7685961292954486e-06, + "loss": 0.8574277758598328, + "step": 2257 + }, + { + "epoch": 0.5202764976958525, + "grad_norm": 0.7089387180946831, + "learning_rate": 1.7683523450932633e-06, + "loss": 0.7841963171958923, + "step": 2258 + }, + { + "epoch": 0.5205069124423963, + "grad_norm": 0.7629735238937895, + "learning_rate": 1.7681084493657523e-06, + "loss": 0.6972980499267578, + "step": 2259 + }, + { + "epoch": 0.5207373271889401, + "grad_norm": 0.7917333859989639, + "learning_rate": 1.7678644421483163e-06, + "loss": 0.9193723201751709, + "step": 2260 + }, + { + "epoch": 0.5209677419354839, + "grad_norm": 0.9714597630384237, + "learning_rate": 1.7676203234763736e-06, + "loss": 0.7902654409408569, + "step": 2261 + }, + { + "epoch": 0.5211981566820276, + "grad_norm": 0.7983060164629807, + "learning_rate": 1.767376093385357e-06, + "loss": 0.8804734945297241, + "step": 2262 + }, + { + "epoch": 0.5214285714285715, + "grad_norm": 0.9065709846386143, + "learning_rate": 1.7671317519107163e-06, + "loss": 0.7884976863861084, + "step": 2263 + }, + { + "epoch": 0.5216589861751152, + "grad_norm": 0.9252417906886758, + "learning_rate": 1.7668872990879173e-06, + "loss": 0.8233190774917603, + "step": 2264 + }, + { + "epoch": 0.521889400921659, + "grad_norm": 0.7126124532622758, + "learning_rate": 1.766642734952442e-06, + "loss": 0.7985334396362305, + "step": 2265 + }, + { + "epoch": 0.5221198156682028, + "grad_norm": 0.8073440338214538, + "learning_rate": 1.7663980595397887e-06, + "loss": 0.7805646657943726, + "step": 2266 + }, + { + "epoch": 0.5223502304147466, + "grad_norm": 0.9455838488830395, + "learning_rate": 1.7661532728854718e-06, + "loss": 0.8528248071670532, + "step": 2267 + }, + { + "epoch": 0.5225806451612903, + "grad_norm": 0.882590365173732, + "learning_rate": 1.7659083750250215e-06, + "loss": 0.7714066505432129, + "step": 2268 + }, + { + "epoch": 0.522811059907834, + "grad_norm": 0.7632999883965862, + "learning_rate": 1.7656633659939843e-06, + "loss": 0.8250499963760376, + "step": 2269 + }, + { + "epoch": 0.5230414746543779, + "grad_norm": 0.6787990523098465, + "learning_rate": 1.7654182458279231e-06, + "loss": 0.7878777384757996, + "step": 2270 + }, + { + "epoch": 0.5232718894009216, + "grad_norm": 0.8263772967033729, + "learning_rate": 1.7651730145624174e-06, + "loss": 0.9080224633216858, + "step": 2271 + }, + { + "epoch": 0.5235023041474655, + "grad_norm": 0.8137376292994275, + "learning_rate": 1.7649276722330607e-06, + "loss": 0.8010937571525574, + "step": 2272 + }, + { + "epoch": 0.5237327188940092, + "grad_norm": 0.8996847055009526, + "learning_rate": 1.7646822188754658e-06, + "loss": 0.903404951095581, + "step": 2273 + }, + { + "epoch": 0.523963133640553, + "grad_norm": 0.928692707021516, + "learning_rate": 1.7644366545252589e-06, + "loss": 0.9009061455726624, + "step": 2274 + }, + { + "epoch": 0.5241935483870968, + "grad_norm": 0.7651260343716183, + "learning_rate": 1.7641909792180834e-06, + "loss": 0.7158697843551636, + "step": 2275 + }, + { + "epoch": 0.5244239631336406, + "grad_norm": 0.8041302440889452, + "learning_rate": 1.763945192989599e-06, + "loss": 0.8101463317871094, + "step": 2276 + }, + { + "epoch": 0.5246543778801843, + "grad_norm": 0.8174455436475604, + "learning_rate": 1.7636992958754812e-06, + "loss": 0.758610725402832, + "step": 2277 + }, + { + "epoch": 0.5248847926267282, + "grad_norm": 0.9651314388158028, + "learning_rate": 1.7634532879114216e-06, + "loss": 0.9469501972198486, + "step": 2278 + }, + { + "epoch": 0.5251152073732719, + "grad_norm": 0.6853415956002341, + "learning_rate": 1.7632071691331281e-06, + "loss": 0.7528036236763, + "step": 2279 + }, + { + "epoch": 0.5253456221198156, + "grad_norm": 0.9124447697867164, + "learning_rate": 1.7629609395763242e-06, + "loss": 0.8519324064254761, + "step": 2280 + }, + { + "epoch": 0.5255760368663595, + "grad_norm": 0.9239480610002251, + "learning_rate": 1.7627145992767498e-06, + "loss": 0.8620004653930664, + "step": 2281 + }, + { + "epoch": 0.5258064516129032, + "grad_norm": 0.7831738680942184, + "learning_rate": 1.762468148270161e-06, + "loss": 0.8066067695617676, + "step": 2282 + }, + { + "epoch": 0.526036866359447, + "grad_norm": 0.8314773622163678, + "learning_rate": 1.7622215865923301e-06, + "loss": 0.865642786026001, + "step": 2283 + }, + { + "epoch": 0.5262672811059907, + "grad_norm": 0.7269170910166286, + "learning_rate": 1.761974914279045e-06, + "loss": 0.8478001356124878, + "step": 2284 + }, + { + "epoch": 0.5264976958525346, + "grad_norm": 0.8461811606118353, + "learning_rate": 1.7617281313661098e-06, + "loss": 0.7984344363212585, + "step": 2285 + }, + { + "epoch": 0.5267281105990783, + "grad_norm": 0.8489168247147351, + "learning_rate": 1.7614812378893444e-06, + "loss": 0.8480801582336426, + "step": 2286 + }, + { + "epoch": 0.5269585253456222, + "grad_norm": 0.9126795310234661, + "learning_rate": 1.7612342338845859e-06, + "loss": 0.8667479753494263, + "step": 2287 + }, + { + "epoch": 0.5271889400921659, + "grad_norm": 0.9533468835174431, + "learning_rate": 1.7609871193876854e-06, + "loss": 0.8431364297866821, + "step": 2288 + }, + { + "epoch": 0.5274193548387097, + "grad_norm": 0.8628781350943807, + "learning_rate": 1.7607398944345127e-06, + "loss": 0.8544220924377441, + "step": 2289 + }, + { + "epoch": 0.5276497695852534, + "grad_norm": 0.9575259696859837, + "learning_rate": 1.760492559060951e-06, + "loss": 0.9298971891403198, + "step": 2290 + }, + { + "epoch": 0.5278801843317973, + "grad_norm": 0.8854664005974592, + "learning_rate": 1.760245113302901e-06, + "loss": 0.739667534828186, + "step": 2291 + }, + { + "epoch": 0.528110599078341, + "grad_norm": 0.9418693515744256, + "learning_rate": 1.7599975571962796e-06, + "loss": 0.8981268405914307, + "step": 2292 + }, + { + "epoch": 0.5283410138248847, + "grad_norm": 0.8489202000746718, + "learning_rate": 1.7597498907770185e-06, + "loss": 0.8027834892272949, + "step": 2293 + }, + { + "epoch": 0.5285714285714286, + "grad_norm": 0.7244957329263912, + "learning_rate": 1.7595021140810669e-06, + "loss": 0.7018242478370667, + "step": 2294 + }, + { + "epoch": 0.5288018433179723, + "grad_norm": 0.8699196704594798, + "learning_rate": 1.7592542271443887e-06, + "loss": 0.7655147910118103, + "step": 2295 + }, + { + "epoch": 0.5290322580645161, + "grad_norm": 0.8169123509935803, + "learning_rate": 1.7590062300029644e-06, + "loss": 0.8283153772354126, + "step": 2296 + }, + { + "epoch": 0.5292626728110599, + "grad_norm": 1.0550792201388366, + "learning_rate": 1.7587581226927907e-06, + "loss": 1.0430598258972168, + "step": 2297 + }, + { + "epoch": 0.5294930875576037, + "grad_norm": 0.7609036061197976, + "learning_rate": 1.7585099052498802e-06, + "loss": 0.6683472990989685, + "step": 2298 + }, + { + "epoch": 0.5297235023041474, + "grad_norm": 0.7278178698575015, + "learning_rate": 1.7582615777102609e-06, + "loss": 0.7254939079284668, + "step": 2299 + }, + { + "epoch": 0.5299539170506913, + "grad_norm": 0.7049477325497308, + "learning_rate": 1.7580131401099774e-06, + "loss": 0.7913245558738708, + "step": 2300 + }, + { + "epoch": 0.530184331797235, + "grad_norm": 0.8416230641508338, + "learning_rate": 1.75776459248509e-06, + "loss": 0.7832915782928467, + "step": 2301 + }, + { + "epoch": 0.5304147465437788, + "grad_norm": 0.7722959383546871, + "learning_rate": 1.7575159348716754e-06, + "loss": 0.9754987955093384, + "step": 2302 + }, + { + "epoch": 0.5306451612903226, + "grad_norm": 0.8614799765536667, + "learning_rate": 1.7572671673058254e-06, + "loss": 0.8343901634216309, + "step": 2303 + }, + { + "epoch": 0.5308755760368664, + "grad_norm": 0.862069962418511, + "learning_rate": 1.757018289823649e-06, + "loss": 0.9836198091506958, + "step": 2304 + }, + { + "epoch": 0.5311059907834101, + "grad_norm": 0.7978699236275345, + "learning_rate": 1.7567693024612695e-06, + "loss": 0.8258972764015198, + "step": 2305 + }, + { + "epoch": 0.5313364055299539, + "grad_norm": 0.8169244061103897, + "learning_rate": 1.7565202052548277e-06, + "loss": 0.8822964429855347, + "step": 2306 + }, + { + "epoch": 0.5315668202764977, + "grad_norm": 0.8094894252842574, + "learning_rate": 1.7562709982404797e-06, + "loss": 0.721222996711731, + "step": 2307 + }, + { + "epoch": 0.5317972350230414, + "grad_norm": 0.7759663122688174, + "learning_rate": 1.7560216814543974e-06, + "loss": 0.7273069620132446, + "step": 2308 + }, + { + "epoch": 0.5320276497695853, + "grad_norm": 0.749740659090673, + "learning_rate": 1.755772254932769e-06, + "loss": 0.8031520843505859, + "step": 2309 + }, + { + "epoch": 0.532258064516129, + "grad_norm": 0.8746676083569236, + "learning_rate": 1.7555227187117982e-06, + "loss": 0.8767163157463074, + "step": 2310 + }, + { + "epoch": 0.5324884792626728, + "grad_norm": 1.052374988916139, + "learning_rate": 1.755273072827705e-06, + "loss": 0.8018463850021362, + "step": 2311 + }, + { + "epoch": 0.5327188940092166, + "grad_norm": 0.9632384627648846, + "learning_rate": 1.7550233173167252e-06, + "loss": 0.8281232118606567, + "step": 2312 + }, + { + "epoch": 0.5329493087557604, + "grad_norm": 0.9472067369973646, + "learning_rate": 1.7547734522151103e-06, + "loss": 0.8802565336227417, + "step": 2313 + }, + { + "epoch": 0.5331797235023041, + "grad_norm": 0.7195582219345643, + "learning_rate": 1.754523477559128e-06, + "loss": 0.8055544495582581, + "step": 2314 + }, + { + "epoch": 0.533410138248848, + "grad_norm": 0.9358658916449707, + "learning_rate": 1.754273393385062e-06, + "loss": 0.8163481950759888, + "step": 2315 + }, + { + "epoch": 0.5336405529953917, + "grad_norm": 0.9365559775291885, + "learning_rate": 1.7540231997292111e-06, + "loss": 0.8308255076408386, + "step": 2316 + }, + { + "epoch": 0.5338709677419354, + "grad_norm": 0.9031429015213124, + "learning_rate": 1.7537728966278913e-06, + "loss": 0.8387685418128967, + "step": 2317 + }, + { + "epoch": 0.5341013824884793, + "grad_norm": 0.7470153179334161, + "learning_rate": 1.7535224841174333e-06, + "loss": 0.8668780326843262, + "step": 2318 + }, + { + "epoch": 0.534331797235023, + "grad_norm": 0.7449540611731051, + "learning_rate": 1.7532719622341842e-06, + "loss": 0.8394712209701538, + "step": 2319 + }, + { + "epoch": 0.5345622119815668, + "grad_norm": 0.7539905771593468, + "learning_rate": 1.7530213310145073e-06, + "loss": 0.7755688428878784, + "step": 2320 + }, + { + "epoch": 0.5347926267281106, + "grad_norm": 0.8150738821263226, + "learning_rate": 1.7527705904947805e-06, + "loss": 0.7714632749557495, + "step": 2321 + }, + { + "epoch": 0.5350230414746544, + "grad_norm": 0.807680924946579, + "learning_rate": 1.7525197407113997e-06, + "loss": 0.8810869455337524, + "step": 2322 + }, + { + "epoch": 0.5352534562211981, + "grad_norm": 1.0672299468188131, + "learning_rate": 1.7522687817007742e-06, + "loss": 0.8445242643356323, + "step": 2323 + }, + { + "epoch": 0.535483870967742, + "grad_norm": 1.1338085945775938, + "learning_rate": 1.7520177134993311e-06, + "loss": 0.9602948427200317, + "step": 2324 + }, + { + "epoch": 0.5357142857142857, + "grad_norm": 0.7789379367396811, + "learning_rate": 1.7517665361435126e-06, + "loss": 0.7865237593650818, + "step": 2325 + }, + { + "epoch": 0.5359447004608295, + "grad_norm": 0.8870578602537817, + "learning_rate": 1.7515152496697763e-06, + "loss": 0.8062880039215088, + "step": 2326 + }, + { + "epoch": 0.5361751152073733, + "grad_norm": 0.9742037408160464, + "learning_rate": 1.7512638541145966e-06, + "loss": 0.8386664986610413, + "step": 2327 + }, + { + "epoch": 0.5364055299539171, + "grad_norm": 1.0154937609139327, + "learning_rate": 1.7510123495144629e-06, + "loss": 0.973692774772644, + "step": 2328 + }, + { + "epoch": 0.5366359447004608, + "grad_norm": 0.9023959356834507, + "learning_rate": 1.7507607359058808e-06, + "loss": 0.8250089883804321, + "step": 2329 + }, + { + "epoch": 0.5368663594470046, + "grad_norm": 0.8457870176131529, + "learning_rate": 1.750509013325372e-06, + "loss": 0.8578102588653564, + "step": 2330 + }, + { + "epoch": 0.5370967741935484, + "grad_norm": 0.8804595958614453, + "learning_rate": 1.7502571818094732e-06, + "loss": 0.916475236415863, + "step": 2331 + }, + { + "epoch": 0.5373271889400921, + "grad_norm": 0.9225430635370255, + "learning_rate": 1.7500052413947377e-06, + "loss": 0.8210046291351318, + "step": 2332 + }, + { + "epoch": 0.537557603686636, + "grad_norm": 0.7091387099201478, + "learning_rate": 1.7497531921177344e-06, + "loss": 0.816267728805542, + "step": 2333 + }, + { + "epoch": 0.5377880184331797, + "grad_norm": 0.9764630645457667, + "learning_rate": 1.7495010340150478e-06, + "loss": 1.0091882944107056, + "step": 2334 + }, + { + "epoch": 0.5380184331797235, + "grad_norm": 0.982812584725329, + "learning_rate": 1.7492487671232783e-06, + "loss": 0.7549277544021606, + "step": 2335 + }, + { + "epoch": 0.5382488479262673, + "grad_norm": 0.8589431412898547, + "learning_rate": 1.7489963914790423e-06, + "loss": 0.9584934711456299, + "step": 2336 + }, + { + "epoch": 0.5384792626728111, + "grad_norm": 0.7167225081500926, + "learning_rate": 1.7487439071189713e-06, + "loss": 0.8189069628715515, + "step": 2337 + }, + { + "epoch": 0.5387096774193548, + "grad_norm": 0.976466384445042, + "learning_rate": 1.7484913140797138e-06, + "loss": 0.7529993057250977, + "step": 2338 + }, + { + "epoch": 0.5389400921658987, + "grad_norm": 0.9894954868399615, + "learning_rate": 1.7482386123979324e-06, + "loss": 0.8611496686935425, + "step": 2339 + }, + { + "epoch": 0.5391705069124424, + "grad_norm": 1.2753256885249857, + "learning_rate": 1.7479858021103074e-06, + "loss": 0.9400241374969482, + "step": 2340 + }, + { + "epoch": 0.5394009216589861, + "grad_norm": 0.7513824016722385, + "learning_rate": 1.7477328832535332e-06, + "loss": 0.6686737537384033, + "step": 2341 + }, + { + "epoch": 0.53963133640553, + "grad_norm": 0.7834119073150019, + "learning_rate": 1.747479855864321e-06, + "loss": 0.864795982837677, + "step": 2342 + }, + { + "epoch": 0.5398617511520737, + "grad_norm": 0.9942068845664563, + "learning_rate": 1.7472267199793971e-06, + "loss": 0.9579563140869141, + "step": 2343 + }, + { + "epoch": 0.5400921658986175, + "grad_norm": 0.9464284115225821, + "learning_rate": 1.746973475635504e-06, + "loss": 0.7492884397506714, + "step": 2344 + }, + { + "epoch": 0.5403225806451613, + "grad_norm": 1.1301826150440575, + "learning_rate": 1.7467201228694e-06, + "loss": 1.020420789718628, + "step": 2345 + }, + { + "epoch": 0.5405529953917051, + "grad_norm": 0.8996882097606888, + "learning_rate": 1.7464666617178585e-06, + "loss": 0.8277238011360168, + "step": 2346 + }, + { + "epoch": 0.5407834101382488, + "grad_norm": 0.8343415166384458, + "learning_rate": 1.7462130922176694e-06, + "loss": 0.8160337209701538, + "step": 2347 + }, + { + "epoch": 0.5410138248847927, + "grad_norm": 0.940177897473061, + "learning_rate": 1.7459594144056378e-06, + "loss": 0.8742454648017883, + "step": 2348 + }, + { + "epoch": 0.5412442396313364, + "grad_norm": 0.8263630155636004, + "learning_rate": 1.7457056283185847e-06, + "loss": 0.7987914085388184, + "step": 2349 + }, + { + "epoch": 0.5414746543778802, + "grad_norm": 0.8096196719588583, + "learning_rate": 1.7454517339933467e-06, + "loss": 0.6917734146118164, + "step": 2350 + }, + { + "epoch": 0.541705069124424, + "grad_norm": 0.9860357050478065, + "learning_rate": 1.7451977314667763e-06, + "loss": 0.8338258266448975, + "step": 2351 + }, + { + "epoch": 0.5419354838709678, + "grad_norm": 0.6906626367704619, + "learning_rate": 1.7449436207757418e-06, + "loss": 0.8308743238449097, + "step": 2352 + }, + { + "epoch": 0.5421658986175115, + "grad_norm": 0.7126371911422212, + "learning_rate": 1.744689401957127e-06, + "loss": 0.7843145728111267, + "step": 2353 + }, + { + "epoch": 0.5423963133640552, + "grad_norm": 0.6637904176126797, + "learning_rate": 1.7444350750478314e-06, + "loss": 0.9088687896728516, + "step": 2354 + }, + { + "epoch": 0.5426267281105991, + "grad_norm": 1.1601519737508017, + "learning_rate": 1.74418064008477e-06, + "loss": 0.876841127872467, + "step": 2355 + }, + { + "epoch": 0.5428571428571428, + "grad_norm": 0.804702758707697, + "learning_rate": 1.743926097104874e-06, + "loss": 0.7169051170349121, + "step": 2356 + }, + { + "epoch": 0.5430875576036867, + "grad_norm": 0.8414445338031196, + "learning_rate": 1.7436714461450897e-06, + "loss": 0.7979093194007874, + "step": 2357 + }, + { + "epoch": 0.5433179723502304, + "grad_norm": 0.796767744969521, + "learning_rate": 1.7434166872423795e-06, + "loss": 0.9152545928955078, + "step": 2358 + }, + { + "epoch": 0.5435483870967742, + "grad_norm": 0.8612716514728646, + "learning_rate": 1.7431618204337212e-06, + "loss": 0.8968983888626099, + "step": 2359 + }, + { + "epoch": 0.543778801843318, + "grad_norm": 0.7451796864953032, + "learning_rate": 1.7429068457561086e-06, + "loss": 0.7591085433959961, + "step": 2360 + }, + { + "epoch": 0.5440092165898618, + "grad_norm": 0.8434007797764556, + "learning_rate": 1.7426517632465508e-06, + "loss": 0.6931861639022827, + "step": 2361 + }, + { + "epoch": 0.5442396313364055, + "grad_norm": 0.816030716232177, + "learning_rate": 1.7423965729420729e-06, + "loss": 0.7715095281600952, + "step": 2362 + }, + { + "epoch": 0.5444700460829494, + "grad_norm": 0.7333839549943538, + "learning_rate": 1.742141274879715e-06, + "loss": 0.8282119035720825, + "step": 2363 + }, + { + "epoch": 0.5447004608294931, + "grad_norm": 0.8282161479585932, + "learning_rate": 1.7418858690965337e-06, + "loss": 0.7595704197883606, + "step": 2364 + }, + { + "epoch": 0.5449308755760369, + "grad_norm": 0.8861519618227073, + "learning_rate": 1.7416303556296005e-06, + "loss": 0.8738422393798828, + "step": 2365 + }, + { + "epoch": 0.5451612903225806, + "grad_norm": 0.819062403403448, + "learning_rate": 1.741374734516003e-06, + "loss": 0.8399837017059326, + "step": 2366 + }, + { + "epoch": 0.5453917050691244, + "grad_norm": 0.9147252373002325, + "learning_rate": 1.7411190057928442e-06, + "loss": 0.8213151693344116, + "step": 2367 + }, + { + "epoch": 0.5456221198156682, + "grad_norm": 0.862161359681962, + "learning_rate": 1.740863169497243e-06, + "loss": 0.748835563659668, + "step": 2368 + }, + { + "epoch": 0.5458525345622119, + "grad_norm": 0.6925915187477067, + "learning_rate": 1.7406072256663333e-06, + "loss": 0.9222339391708374, + "step": 2369 + }, + { + "epoch": 0.5460829493087558, + "grad_norm": 0.6352006169320189, + "learning_rate": 1.7403511743372655e-06, + "loss": 0.6543160676956177, + "step": 2370 + }, + { + "epoch": 0.5463133640552995, + "grad_norm": 0.9993386394035012, + "learning_rate": 1.7400950155472046e-06, + "loss": 0.9828567504882812, + "step": 2371 + }, + { + "epoch": 0.5465437788018433, + "grad_norm": 0.9620494284169527, + "learning_rate": 1.739838749333332e-06, + "loss": 0.95346599817276, + "step": 2372 + }, + { + "epoch": 0.5467741935483871, + "grad_norm": 0.4533946729074916, + "learning_rate": 1.7395823757328442e-06, + "loss": 0.626889705657959, + "step": 2373 + }, + { + "epoch": 0.5470046082949309, + "grad_norm": 0.6641652944774505, + "learning_rate": 1.739325894782954e-06, + "loss": 0.8152071833610535, + "step": 2374 + }, + { + "epoch": 0.5472350230414746, + "grad_norm": 0.7149653321076401, + "learning_rate": 1.7390693065208889e-06, + "loss": 0.8244980573654175, + "step": 2375 + }, + { + "epoch": 0.5474654377880185, + "grad_norm": 0.8801604517186058, + "learning_rate": 1.738812610983892e-06, + "loss": 0.8234372138977051, + "step": 2376 + }, + { + "epoch": 0.5476958525345622, + "grad_norm": 0.8626749383303203, + "learning_rate": 1.7385558082092228e-06, + "loss": 0.9334712624549866, + "step": 2377 + }, + { + "epoch": 0.5479262672811059, + "grad_norm": 0.8866496689156442, + "learning_rate": 1.7382988982341557e-06, + "loss": 0.7873882055282593, + "step": 2378 + }, + { + "epoch": 0.5481566820276498, + "grad_norm": 0.7814140858155267, + "learning_rate": 1.7380418810959814e-06, + "loss": 0.7971000671386719, + "step": 2379 + }, + { + "epoch": 0.5483870967741935, + "grad_norm": 0.7452714019733373, + "learning_rate": 1.7377847568320046e-06, + "loss": 0.8617004156112671, + "step": 2380 + }, + { + "epoch": 0.5486175115207373, + "grad_norm": 0.7316280745753603, + "learning_rate": 1.7375275254795472e-06, + "loss": 0.6798374056816101, + "step": 2381 + }, + { + "epoch": 0.5488479262672811, + "grad_norm": 0.8600424341995414, + "learning_rate": 1.7372701870759459e-06, + "loss": 0.8621633052825928, + "step": 2382 + }, + { + "epoch": 0.5490783410138249, + "grad_norm": 0.78685909041996, + "learning_rate": 1.7370127416585527e-06, + "loss": 0.6533470153808594, + "step": 2383 + }, + { + "epoch": 0.5493087557603686, + "grad_norm": 0.9199843580999427, + "learning_rate": 1.736755189264736e-06, + "loss": 0.8854461908340454, + "step": 2384 + }, + { + "epoch": 0.5495391705069125, + "grad_norm": 1.0020485772603467, + "learning_rate": 1.7364975299318786e-06, + "loss": 0.9461240768432617, + "step": 2385 + }, + { + "epoch": 0.5497695852534562, + "grad_norm": 1.0179837516521926, + "learning_rate": 1.73623976369738e-06, + "loss": 0.8936882019042969, + "step": 2386 + }, + { + "epoch": 0.55, + "grad_norm": 0.7527230779520249, + "learning_rate": 1.7359818905986544e-06, + "loss": 0.8177640438079834, + "step": 2387 + }, + { + "epoch": 0.5502304147465438, + "grad_norm": 0.7539178622826256, + "learning_rate": 1.7357239106731317e-06, + "loss": 0.793328046798706, + "step": 2388 + }, + { + "epoch": 0.5504608294930876, + "grad_norm": 0.8548599569350254, + "learning_rate": 1.7354658239582572e-06, + "loss": 0.8837069272994995, + "step": 2389 + }, + { + "epoch": 0.5506912442396313, + "grad_norm": 0.8764277126116193, + "learning_rate": 1.7352076304914918e-06, + "loss": 0.8801138401031494, + "step": 2390 + }, + { + "epoch": 0.5509216589861751, + "grad_norm": 0.7981260720892804, + "learning_rate": 1.7349493303103123e-06, + "loss": 0.865073025226593, + "step": 2391 + }, + { + "epoch": 0.5511520737327189, + "grad_norm": 0.5938962289027067, + "learning_rate": 1.7346909234522107e-06, + "loss": 0.8712339401245117, + "step": 2392 + }, + { + "epoch": 0.5513824884792626, + "grad_norm": 0.6857068624612402, + "learning_rate": 1.7344324099546938e-06, + "loss": 0.7689294815063477, + "step": 2393 + }, + { + "epoch": 0.5516129032258065, + "grad_norm": 0.6784843872797971, + "learning_rate": 1.7341737898552851e-06, + "loss": 0.9228999614715576, + "step": 2394 + }, + { + "epoch": 0.5518433179723502, + "grad_norm": 1.025443261317525, + "learning_rate": 1.7339150631915228e-06, + "loss": 0.9473327398300171, + "step": 2395 + }, + { + "epoch": 0.552073732718894, + "grad_norm": 0.9317831571882359, + "learning_rate": 1.7336562300009604e-06, + "loss": 0.7724621295928955, + "step": 2396 + }, + { + "epoch": 0.5523041474654378, + "grad_norm": 0.7823556125482615, + "learning_rate": 1.7333972903211675e-06, + "loss": 0.8646600246429443, + "step": 2397 + }, + { + "epoch": 0.5525345622119816, + "grad_norm": 0.6673069571562762, + "learning_rate": 1.7331382441897286e-06, + "loss": 0.7143402099609375, + "step": 2398 + }, + { + "epoch": 0.5527649769585253, + "grad_norm": 0.9600129950475998, + "learning_rate": 1.7328790916442446e-06, + "loss": 0.8229624032974243, + "step": 2399 + }, + { + "epoch": 0.5529953917050692, + "grad_norm": 0.8815652742153803, + "learning_rate": 1.7326198327223303e-06, + "loss": 0.7244875431060791, + "step": 2400 + }, + { + "epoch": 0.5532258064516129, + "grad_norm": 0.8586401947703556, + "learning_rate": 1.7323604674616173e-06, + "loss": 0.7797688245773315, + "step": 2401 + }, + { + "epoch": 0.5534562211981566, + "grad_norm": 0.7923271764392044, + "learning_rate": 1.7321009958997519e-06, + "loss": 0.752421498298645, + "step": 2402 + }, + { + "epoch": 0.5536866359447005, + "grad_norm": 0.880725843060538, + "learning_rate": 1.7318414180743962e-06, + "loss": 0.8285892009735107, + "step": 2403 + }, + { + "epoch": 0.5539170506912442, + "grad_norm": 0.7844500606150882, + "learning_rate": 1.7315817340232272e-06, + "loss": 0.8247888088226318, + "step": 2404 + }, + { + "epoch": 0.554147465437788, + "grad_norm": 0.7041289847587934, + "learning_rate": 1.7313219437839384e-06, + "loss": 0.7713418006896973, + "step": 2405 + }, + { + "epoch": 0.5543778801843318, + "grad_norm": 0.8575067968238488, + "learning_rate": 1.7310620473942374e-06, + "loss": 0.8748825788497925, + "step": 2406 + }, + { + "epoch": 0.5546082949308756, + "grad_norm": 0.899949436927101, + "learning_rate": 1.730802044891848e-06, + "loss": 0.9255902767181396, + "step": 2407 + }, + { + "epoch": 0.5548387096774193, + "grad_norm": 0.7968868837370462, + "learning_rate": 1.7305419363145093e-06, + "loss": 0.7226976156234741, + "step": 2408 + }, + { + "epoch": 0.5550691244239632, + "grad_norm": 0.8868777191693532, + "learning_rate": 1.7302817216999754e-06, + "loss": 0.9024704694747925, + "step": 2409 + }, + { + "epoch": 0.5552995391705069, + "grad_norm": 0.8331382998314191, + "learning_rate": 1.7300214010860168e-06, + "loss": 0.7857767343521118, + "step": 2410 + }, + { + "epoch": 0.5555299539170507, + "grad_norm": 0.7111146090264087, + "learning_rate": 1.7297609745104183e-06, + "loss": 0.7280064821243286, + "step": 2411 + }, + { + "epoch": 0.5557603686635945, + "grad_norm": 0.8916895272866717, + "learning_rate": 1.72950044201098e-06, + "loss": 0.8909369111061096, + "step": 2412 + }, + { + "epoch": 0.5559907834101383, + "grad_norm": 0.8724458169518867, + "learning_rate": 1.7292398036255183e-06, + "loss": 0.8543871641159058, + "step": 2413 + }, + { + "epoch": 0.556221198156682, + "grad_norm": 0.7364121573266219, + "learning_rate": 1.7289790593918648e-06, + "loss": 0.6934928894042969, + "step": 2414 + }, + { + "epoch": 0.5564516129032258, + "grad_norm": 0.7288921937743348, + "learning_rate": 1.7287182093478658e-06, + "loss": 0.6323058605194092, + "step": 2415 + }, + { + "epoch": 0.5566820276497696, + "grad_norm": 0.9203399963548066, + "learning_rate": 1.7284572535313833e-06, + "loss": 0.8607437014579773, + "step": 2416 + }, + { + "epoch": 0.5569124423963133, + "grad_norm": 0.8312318653257402, + "learning_rate": 1.7281961919802948e-06, + "loss": 0.932594358921051, + "step": 2417 + }, + { + "epoch": 0.5571428571428572, + "grad_norm": 0.8132622554262421, + "learning_rate": 1.727935024732493e-06, + "loss": 0.7239062786102295, + "step": 2418 + }, + { + "epoch": 0.5573732718894009, + "grad_norm": 0.770772581447816, + "learning_rate": 1.727673751825886e-06, + "loss": 0.7600498199462891, + "step": 2419 + }, + { + "epoch": 0.5576036866359447, + "grad_norm": 0.9553759629640377, + "learning_rate": 1.7274123732983977e-06, + "loss": 0.6888710260391235, + "step": 2420 + }, + { + "epoch": 0.5578341013824885, + "grad_norm": 0.9472816188704319, + "learning_rate": 1.7271508891879657e-06, + "loss": 0.9768370389938354, + "step": 2421 + }, + { + "epoch": 0.5580645161290323, + "grad_norm": 0.7612474564207412, + "learning_rate": 1.7268892995325453e-06, + "loss": 0.7302272319793701, + "step": 2422 + }, + { + "epoch": 0.558294930875576, + "grad_norm": 0.952809818405442, + "learning_rate": 1.7266276043701052e-06, + "loss": 0.7664496898651123, + "step": 2423 + }, + { + "epoch": 0.5585253456221199, + "grad_norm": 0.7105308716985692, + "learning_rate": 1.72636580373863e-06, + "loss": 0.7672723531723022, + "step": 2424 + }, + { + "epoch": 0.5587557603686636, + "grad_norm": 0.9094827818764729, + "learning_rate": 1.7261038976761203e-06, + "loss": 0.7467625141143799, + "step": 2425 + }, + { + "epoch": 0.5589861751152074, + "grad_norm": 1.0609555724090778, + "learning_rate": 1.7258418862205908e-06, + "loss": 0.899692177772522, + "step": 2426 + }, + { + "epoch": 0.5592165898617512, + "grad_norm": 0.8726314105037919, + "learning_rate": 1.7255797694100724e-06, + "loss": 0.9654138088226318, + "step": 2427 + }, + { + "epoch": 0.5594470046082949, + "grad_norm": 1.0261431779245342, + "learning_rate": 1.725317547282611e-06, + "loss": 0.8487396836280823, + "step": 2428 + }, + { + "epoch": 0.5596774193548387, + "grad_norm": 0.7692614118612008, + "learning_rate": 1.7250552198762682e-06, + "loss": 0.7785199284553528, + "step": 2429 + }, + { + "epoch": 0.5599078341013825, + "grad_norm": 0.7931069179642137, + "learning_rate": 1.7247927872291198e-06, + "loss": 0.9243934750556946, + "step": 2430 + }, + { + "epoch": 0.5601382488479263, + "grad_norm": 0.6935679959823647, + "learning_rate": 1.724530249379258e-06, + "loss": 0.8674443960189819, + "step": 2431 + }, + { + "epoch": 0.56036866359447, + "grad_norm": 0.7564063858493598, + "learning_rate": 1.7242676063647895e-06, + "loss": 0.8022270202636719, + "step": 2432 + }, + { + "epoch": 0.5605990783410139, + "grad_norm": 0.8222900385869091, + "learning_rate": 1.7240048582238367e-06, + "loss": 0.8696796894073486, + "step": 2433 + }, + { + "epoch": 0.5608294930875576, + "grad_norm": 0.8560234672396506, + "learning_rate": 1.7237420049945374e-06, + "loss": 0.7752439975738525, + "step": 2434 + }, + { + "epoch": 0.5610599078341014, + "grad_norm": 0.9286340475505503, + "learning_rate": 1.723479046715044e-06, + "loss": 0.7660201787948608, + "step": 2435 + }, + { + "epoch": 0.5612903225806452, + "grad_norm": 0.7639410477119124, + "learning_rate": 1.7232159834235249e-06, + "loss": 0.9319918155670166, + "step": 2436 + }, + { + "epoch": 0.561520737327189, + "grad_norm": 0.8121463742755932, + "learning_rate": 1.722952815158163e-06, + "loss": 0.8175421357154846, + "step": 2437 + }, + { + "epoch": 0.5617511520737327, + "grad_norm": 0.5646145066796834, + "learning_rate": 1.7226895419571573e-06, + "loss": 0.6959598064422607, + "step": 2438 + }, + { + "epoch": 0.5619815668202764, + "grad_norm": 0.9804875774075569, + "learning_rate": 1.722426163858721e-06, + "loss": 0.8629111051559448, + "step": 2439 + }, + { + "epoch": 0.5622119815668203, + "grad_norm": 1.1148628556143985, + "learning_rate": 1.7221626809010833e-06, + "loss": 0.8222612142562866, + "step": 2440 + }, + { + "epoch": 0.562442396313364, + "grad_norm": 0.7126052614291007, + "learning_rate": 1.721899093122489e-06, + "loss": 0.8329352140426636, + "step": 2441 + }, + { + "epoch": 0.5626728110599079, + "grad_norm": 0.7803804718208336, + "learning_rate": 1.7216354005611966e-06, + "loss": 0.8777236938476562, + "step": 2442 + }, + { + "epoch": 0.5629032258064516, + "grad_norm": 0.8601336969746237, + "learning_rate": 1.7213716032554814e-06, + "loss": 0.8487246036529541, + "step": 2443 + }, + { + "epoch": 0.5631336405529954, + "grad_norm": 0.9035051311861264, + "learning_rate": 1.7211077012436327e-06, + "loss": 0.8429645299911499, + "step": 2444 + }, + { + "epoch": 0.5633640552995391, + "grad_norm": 0.9883668092610399, + "learning_rate": 1.720843694563956e-06, + "loss": 0.7683241367340088, + "step": 2445 + }, + { + "epoch": 0.563594470046083, + "grad_norm": 0.839045001132387, + "learning_rate": 1.7205795832547715e-06, + "loss": 0.8468153476715088, + "step": 2446 + }, + { + "epoch": 0.5638248847926267, + "grad_norm": 0.7865527461309724, + "learning_rate": 1.7203153673544136e-06, + "loss": 0.7957276105880737, + "step": 2447 + }, + { + "epoch": 0.5640552995391706, + "grad_norm": 0.7301149604369097, + "learning_rate": 1.7200510469012343e-06, + "loss": 0.703586757183075, + "step": 2448 + }, + { + "epoch": 0.5642857142857143, + "grad_norm": 0.9237896103754119, + "learning_rate": 1.7197866219335988e-06, + "loss": 0.8399583101272583, + "step": 2449 + }, + { + "epoch": 0.5645161290322581, + "grad_norm": 0.9147331037465749, + "learning_rate": 1.7195220924898882e-06, + "loss": 0.8198127746582031, + "step": 2450 + }, + { + "epoch": 0.5647465437788018, + "grad_norm": 0.8751939719560463, + "learning_rate": 1.7192574586084977e-06, + "loss": 0.8345620632171631, + "step": 2451 + }, + { + "epoch": 0.5649769585253456, + "grad_norm": 0.5798955427424709, + "learning_rate": 1.71899272032784e-06, + "loss": 0.7717207670211792, + "step": 2452 + }, + { + "epoch": 0.5652073732718894, + "grad_norm": 1.0279650439820616, + "learning_rate": 1.7187278776863402e-06, + "loss": 0.9178022146224976, + "step": 2453 + }, + { + "epoch": 0.5654377880184331, + "grad_norm": 0.8586126622693072, + "learning_rate": 1.7184629307224405e-06, + "loss": 0.802221417427063, + "step": 2454 + }, + { + "epoch": 0.565668202764977, + "grad_norm": 0.9691589621671786, + "learning_rate": 1.718197879474598e-06, + "loss": 0.8785420656204224, + "step": 2455 + }, + { + "epoch": 0.5658986175115207, + "grad_norm": 0.8087978885886937, + "learning_rate": 1.7179327239812835e-06, + "loss": 0.866797924041748, + "step": 2456 + }, + { + "epoch": 0.5661290322580645, + "grad_norm": 0.7850858892434726, + "learning_rate": 1.7176674642809848e-06, + "loss": 0.8483223915100098, + "step": 2457 + }, + { + "epoch": 0.5663594470046083, + "grad_norm": 0.7634922973789945, + "learning_rate": 1.7174021004122038e-06, + "loss": 0.815066933631897, + "step": 2458 + }, + { + "epoch": 0.5665898617511521, + "grad_norm": 0.7286124953848899, + "learning_rate": 1.7171366324134575e-06, + "loss": 0.8584767580032349, + "step": 2459 + }, + { + "epoch": 0.5668202764976958, + "grad_norm": 0.8250445352678845, + "learning_rate": 1.7168710603232783e-06, + "loss": 0.8710953593254089, + "step": 2460 + }, + { + "epoch": 0.5670506912442397, + "grad_norm": 0.9434416859632441, + "learning_rate": 1.7166053841802137e-06, + "loss": 0.8174586892127991, + "step": 2461 + }, + { + "epoch": 0.5672811059907834, + "grad_norm": 0.8270311207697365, + "learning_rate": 1.7163396040228263e-06, + "loss": 0.7240795493125916, + "step": 2462 + }, + { + "epoch": 0.5675115207373271, + "grad_norm": 0.9011815170935621, + "learning_rate": 1.7160737198896938e-06, + "loss": 0.8026313781738281, + "step": 2463 + }, + { + "epoch": 0.567741935483871, + "grad_norm": 0.906377679717593, + "learning_rate": 1.7158077318194088e-06, + "loss": 0.8170863389968872, + "step": 2464 + }, + { + "epoch": 0.5679723502304147, + "grad_norm": 0.7708394273236241, + "learning_rate": 1.7155416398505794e-06, + "loss": 0.7524861097335815, + "step": 2465 + }, + { + "epoch": 0.5682027649769585, + "grad_norm": 1.053627484653556, + "learning_rate": 1.7152754440218278e-06, + "loss": 0.9895739555358887, + "step": 2466 + }, + { + "epoch": 0.5684331797235023, + "grad_norm": 0.8044893250734789, + "learning_rate": 1.7150091443717924e-06, + "loss": 0.840786874294281, + "step": 2467 + }, + { + "epoch": 0.5686635944700461, + "grad_norm": 0.7235386782272144, + "learning_rate": 1.7147427409391265e-06, + "loss": 0.8896929025650024, + "step": 2468 + }, + { + "epoch": 0.5688940092165898, + "grad_norm": 0.930785639448215, + "learning_rate": 1.714476233762498e-06, + "loss": 0.9940589666366577, + "step": 2469 + }, + { + "epoch": 0.5691244239631337, + "grad_norm": 0.8541894175832414, + "learning_rate": 1.7142096228805896e-06, + "loss": 0.8827046155929565, + "step": 2470 + }, + { + "epoch": 0.5693548387096774, + "grad_norm": 0.8477738552913107, + "learning_rate": 1.7139429083321003e-06, + "loss": 0.8402417302131653, + "step": 2471 + }, + { + "epoch": 0.5695852534562212, + "grad_norm": 1.0681644319875638, + "learning_rate": 1.7136760901557428e-06, + "loss": 0.9298208951950073, + "step": 2472 + }, + { + "epoch": 0.569815668202765, + "grad_norm": 0.799198798955049, + "learning_rate": 1.7134091683902456e-06, + "loss": 0.7272841930389404, + "step": 2473 + }, + { + "epoch": 0.5700460829493088, + "grad_norm": 0.9504491625382946, + "learning_rate": 1.7131421430743522e-06, + "loss": 0.7767274379730225, + "step": 2474 + }, + { + "epoch": 0.5702764976958525, + "grad_norm": 0.8321899881110706, + "learning_rate": 1.7128750142468205e-06, + "loss": 0.8381883502006531, + "step": 2475 + }, + { + "epoch": 0.5705069124423963, + "grad_norm": 0.722993858034587, + "learning_rate": 1.7126077819464247e-06, + "loss": 0.6917109489440918, + "step": 2476 + }, + { + "epoch": 0.5707373271889401, + "grad_norm": 0.8529687693157456, + "learning_rate": 1.712340446211952e-06, + "loss": 0.848122239112854, + "step": 2477 + }, + { + "epoch": 0.5709677419354838, + "grad_norm": 0.8115142651418973, + "learning_rate": 1.7120730070822074e-06, + "loss": 0.7880194187164307, + "step": 2478 + }, + { + "epoch": 0.5711981566820277, + "grad_norm": 0.7900923038142705, + "learning_rate": 1.7118054645960077e-06, + "loss": 0.8782297372817993, + "step": 2479 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 0.8386744568018749, + "learning_rate": 1.7115378187921876e-06, + "loss": 0.9030005931854248, + "step": 2480 + }, + { + "epoch": 0.5716589861751152, + "grad_norm": 1.0512780177061767, + "learning_rate": 1.7112700697095953e-06, + "loss": 0.9950683116912842, + "step": 2481 + }, + { + "epoch": 0.571889400921659, + "grad_norm": 0.7851257012482162, + "learning_rate": 1.7110022173870933e-06, + "loss": 0.8825187683105469, + "step": 2482 + }, + { + "epoch": 0.5721198156682028, + "grad_norm": 0.7742449968104124, + "learning_rate": 1.710734261863561e-06, + "loss": 0.7918775081634521, + "step": 2483 + }, + { + "epoch": 0.5723502304147465, + "grad_norm": 0.8385191739759446, + "learning_rate": 1.7104662031778916e-06, + "loss": 1.0219467878341675, + "step": 2484 + }, + { + "epoch": 0.5725806451612904, + "grad_norm": 0.7273611559924746, + "learning_rate": 1.7101980413689931e-06, + "loss": 0.7633316516876221, + "step": 2485 + }, + { + "epoch": 0.5728110599078341, + "grad_norm": 0.9207367628977638, + "learning_rate": 1.7099297764757891e-06, + "loss": 0.8972171545028687, + "step": 2486 + }, + { + "epoch": 0.5730414746543778, + "grad_norm": 0.9268590747994748, + "learning_rate": 1.7096614085372183e-06, + "loss": 0.9467268586158752, + "step": 2487 + }, + { + "epoch": 0.5732718894009217, + "grad_norm": 0.6697903314360253, + "learning_rate": 1.709392937592233e-06, + "loss": 0.7688668370246887, + "step": 2488 + }, + { + "epoch": 0.5735023041474654, + "grad_norm": 0.9069250629096394, + "learning_rate": 1.7091243636798022e-06, + "loss": 0.8521163463592529, + "step": 2489 + }, + { + "epoch": 0.5737327188940092, + "grad_norm": 1.1876566208797892, + "learning_rate": 1.7088556868389087e-06, + "loss": 0.937403678894043, + "step": 2490 + }, + { + "epoch": 0.573963133640553, + "grad_norm": 0.7484200220587712, + "learning_rate": 1.7085869071085507e-06, + "loss": 0.929175853729248, + "step": 2491 + }, + { + "epoch": 0.5741935483870968, + "grad_norm": 0.75868423962596, + "learning_rate": 1.708318024527741e-06, + "loss": 0.8213154673576355, + "step": 2492 + }, + { + "epoch": 0.5744239631336405, + "grad_norm": 0.8570973138589657, + "learning_rate": 1.708049039135508e-06, + "loss": 0.7666962146759033, + "step": 2493 + }, + { + "epoch": 0.5746543778801844, + "grad_norm": 0.944726193523685, + "learning_rate": 1.707779950970894e-06, + "loss": 0.9787846803665161, + "step": 2494 + }, + { + "epoch": 0.5748847926267281, + "grad_norm": 0.9499725243145639, + "learning_rate": 1.7075107600729575e-06, + "loss": 0.9688804149627686, + "step": 2495 + }, + { + "epoch": 0.5751152073732719, + "grad_norm": 0.7169812071362754, + "learning_rate": 1.7072414664807706e-06, + "loss": 0.7186019420623779, + "step": 2496 + }, + { + "epoch": 0.5753456221198157, + "grad_norm": 0.8737696103531859, + "learning_rate": 1.706972070233421e-06, + "loss": 0.814068615436554, + "step": 2497 + }, + { + "epoch": 0.5755760368663595, + "grad_norm": 0.8930538892783126, + "learning_rate": 1.7067025713700111e-06, + "loss": 0.8439940214157104, + "step": 2498 + }, + { + "epoch": 0.5758064516129032, + "grad_norm": 1.0358274070142592, + "learning_rate": 1.706432969929659e-06, + "loss": 1.0199556350708008, + "step": 2499 + }, + { + "epoch": 0.576036866359447, + "grad_norm": 0.8418547467759998, + "learning_rate": 1.7061632659514964e-06, + "loss": 0.9422338008880615, + "step": 2500 + }, + { + "epoch": 0.5762672811059908, + "grad_norm": 0.8692517624840741, + "learning_rate": 1.7058934594746704e-06, + "loss": 0.9307081699371338, + "step": 2501 + }, + { + "epoch": 0.5764976958525345, + "grad_norm": 0.8121605874769848, + "learning_rate": 1.7056235505383433e-06, + "loss": 0.7202768325805664, + "step": 2502 + }, + { + "epoch": 0.5767281105990784, + "grad_norm": 0.915285295701684, + "learning_rate": 1.7053535391816923e-06, + "loss": 1.0184223651885986, + "step": 2503 + }, + { + "epoch": 0.5769585253456221, + "grad_norm": 0.8238573361353964, + "learning_rate": 1.7050834254439085e-06, + "loss": 0.7957574129104614, + "step": 2504 + }, + { + "epoch": 0.5771889400921659, + "grad_norm": 0.9632097611385487, + "learning_rate": 1.7048132093641989e-06, + "loss": 0.9694541096687317, + "step": 2505 + }, + { + "epoch": 0.5774193548387097, + "grad_norm": 0.7406781740567284, + "learning_rate": 1.704542890981785e-06, + "loss": 0.8427075147628784, + "step": 2506 + }, + { + "epoch": 0.5776497695852535, + "grad_norm": 0.7137957479223747, + "learning_rate": 1.7042724703359032e-06, + "loss": 0.7745763063430786, + "step": 2507 + }, + { + "epoch": 0.5778801843317972, + "grad_norm": 0.8935647722203462, + "learning_rate": 1.7040019474658047e-06, + "loss": 0.8179641962051392, + "step": 2508 + }, + { + "epoch": 0.5781105990783411, + "grad_norm": 0.9010033541227577, + "learning_rate": 1.7037313224107557e-06, + "loss": 0.8118200302124023, + "step": 2509 + }, + { + "epoch": 0.5783410138248848, + "grad_norm": 0.7297456575398072, + "learning_rate": 1.7034605952100364e-06, + "loss": 0.7892665863037109, + "step": 2510 + }, + { + "epoch": 0.5785714285714286, + "grad_norm": 0.736874372872981, + "learning_rate": 1.7031897659029434e-06, + "loss": 0.7442026734352112, + "step": 2511 + }, + { + "epoch": 0.5788018433179724, + "grad_norm": 0.9375581770522491, + "learning_rate": 1.7029188345287865e-06, + "loss": 0.8179585933685303, + "step": 2512 + }, + { + "epoch": 0.5790322580645161, + "grad_norm": 0.8710660194733852, + "learning_rate": 1.7026478011268918e-06, + "loss": 0.7569797039031982, + "step": 2513 + }, + { + "epoch": 0.5792626728110599, + "grad_norm": 0.8952615874674131, + "learning_rate": 1.7023766657365984e-06, + "loss": 0.8464581966400146, + "step": 2514 + }, + { + "epoch": 0.5794930875576036, + "grad_norm": 0.9645554070219402, + "learning_rate": 1.702105428397262e-06, + "loss": 0.7326645255088806, + "step": 2515 + }, + { + "epoch": 0.5797235023041475, + "grad_norm": 0.8243138835822689, + "learning_rate": 1.7018340891482522e-06, + "loss": 0.7993732690811157, + "step": 2516 + }, + { + "epoch": 0.5799539170506912, + "grad_norm": 0.7406582307230963, + "learning_rate": 1.7015626480289532e-06, + "loss": 0.8124513626098633, + "step": 2517 + }, + { + "epoch": 0.580184331797235, + "grad_norm": 0.7758431888553803, + "learning_rate": 1.701291105078765e-06, + "loss": 0.9075840711593628, + "step": 2518 + }, + { + "epoch": 0.5804147465437788, + "grad_norm": 0.8900052121004013, + "learning_rate": 1.7010194603371009e-06, + "loss": 0.8212069272994995, + "step": 2519 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.8737089153257858, + "learning_rate": 1.7007477138433903e-06, + "loss": 0.7582074999809265, + "step": 2520 + }, + { + "epoch": 0.5808755760368663, + "grad_norm": 0.7402264811343096, + "learning_rate": 1.7004758656370769e-06, + "loss": 0.8917636871337891, + "step": 2521 + }, + { + "epoch": 0.5811059907834102, + "grad_norm": 0.9496944008191128, + "learning_rate": 1.7002039157576186e-06, + "loss": 0.8919704556465149, + "step": 2522 + }, + { + "epoch": 0.5813364055299539, + "grad_norm": 0.8803733592170607, + "learning_rate": 1.699931864244489e-06, + "loss": 0.7474988698959351, + "step": 2523 + }, + { + "epoch": 0.5815668202764976, + "grad_norm": 0.9179665061824968, + "learning_rate": 1.6996597111371758e-06, + "loss": 0.8596241474151611, + "step": 2524 + }, + { + "epoch": 0.5817972350230415, + "grad_norm": 0.8260474861422493, + "learning_rate": 1.699387456475182e-06, + "loss": 0.9316335916519165, + "step": 2525 + }, + { + "epoch": 0.5820276497695852, + "grad_norm": 0.7937616616577486, + "learning_rate": 1.6991151002980248e-06, + "loss": 0.7364813089370728, + "step": 2526 + }, + { + "epoch": 0.582258064516129, + "grad_norm": 0.9072210580359311, + "learning_rate": 1.698842642645236e-06, + "loss": 0.789472758769989, + "step": 2527 + }, + { + "epoch": 0.5824884792626728, + "grad_norm": 0.9988239379820413, + "learning_rate": 1.6985700835563627e-06, + "loss": 1.024861216545105, + "step": 2528 + }, + { + "epoch": 0.5827188940092166, + "grad_norm": 0.9746619752287254, + "learning_rate": 1.6982974230709667e-06, + "loss": 0.8465025424957275, + "step": 2529 + }, + { + "epoch": 0.5829493087557603, + "grad_norm": 1.0146741583341603, + "learning_rate": 1.6980246612286244e-06, + "loss": 0.7502799034118652, + "step": 2530 + }, + { + "epoch": 0.5831797235023042, + "grad_norm": 0.866831185770848, + "learning_rate": 1.6977517980689264e-06, + "loss": 0.8019870519638062, + "step": 2531 + }, + { + "epoch": 0.5834101382488479, + "grad_norm": 0.783761351839215, + "learning_rate": 1.6974788336314788e-06, + "loss": 0.9048774242401123, + "step": 2532 + }, + { + "epoch": 0.5836405529953917, + "grad_norm": 0.8577409607010705, + "learning_rate": 1.6972057679559018e-06, + "loss": 0.8411067724227905, + "step": 2533 + }, + { + "epoch": 0.5838709677419355, + "grad_norm": 0.7158353942796929, + "learning_rate": 1.6969326010818304e-06, + "loss": 0.7399133443832397, + "step": 2534 + }, + { + "epoch": 0.5841013824884793, + "grad_norm": 0.7309631229110555, + "learning_rate": 1.6966593330489144e-06, + "loss": 0.7553995847702026, + "step": 2535 + }, + { + "epoch": 0.584331797235023, + "grad_norm": 0.7563702103772202, + "learning_rate": 1.6963859638968188e-06, + "loss": 0.8405054807662964, + "step": 2536 + }, + { + "epoch": 0.5845622119815668, + "grad_norm": 0.739785555800379, + "learning_rate": 1.6961124936652223e-06, + "loss": 0.7619640231132507, + "step": 2537 + }, + { + "epoch": 0.5847926267281106, + "grad_norm": 0.6189871014888121, + "learning_rate": 1.6958389223938187e-06, + "loss": 0.7785576581954956, + "step": 2538 + }, + { + "epoch": 0.5850230414746543, + "grad_norm": 1.0593569746028593, + "learning_rate": 1.695565250122317e-06, + "loss": 0.9230754375457764, + "step": 2539 + }, + { + "epoch": 0.5852534562211982, + "grad_norm": 0.9087046574881754, + "learning_rate": 1.69529147689044e-06, + "loss": 0.798599362373352, + "step": 2540 + }, + { + "epoch": 0.5854838709677419, + "grad_norm": 0.7546263570181881, + "learning_rate": 1.6950176027379253e-06, + "loss": 0.8491491079330444, + "step": 2541 + }, + { + "epoch": 0.5857142857142857, + "grad_norm": 0.9063392015432612, + "learning_rate": 1.694743627704526e-06, + "loss": 0.7906054854393005, + "step": 2542 + }, + { + "epoch": 0.5859447004608295, + "grad_norm": 0.8834118839199732, + "learning_rate": 1.6944695518300084e-06, + "loss": 0.8178746700286865, + "step": 2543 + }, + { + "epoch": 0.5861751152073733, + "grad_norm": 0.9444844508582247, + "learning_rate": 1.6941953751541552e-06, + "loss": 0.867972731590271, + "step": 2544 + }, + { + "epoch": 0.586405529953917, + "grad_norm": 0.8815618278989616, + "learning_rate": 1.6939210977167622e-06, + "loss": 0.8000613451004028, + "step": 2545 + }, + { + "epoch": 0.5866359447004609, + "grad_norm": 0.938056940810552, + "learning_rate": 1.6936467195576403e-06, + "loss": 0.8473562002182007, + "step": 2546 + }, + { + "epoch": 0.5868663594470046, + "grad_norm": 0.960324746454341, + "learning_rate": 1.6933722407166156e-06, + "loss": 0.971686065196991, + "step": 2547 + }, + { + "epoch": 0.5870967741935483, + "grad_norm": 0.718798566737211, + "learning_rate": 1.6930976612335276e-06, + "loss": 0.6679604053497314, + "step": 2548 + }, + { + "epoch": 0.5873271889400922, + "grad_norm": 0.8662288511956259, + "learning_rate": 1.692822981148232e-06, + "loss": 0.81952303647995, + "step": 2549 + }, + { + "epoch": 0.5875576036866359, + "grad_norm": 0.7171085968938, + "learning_rate": 1.6925482005005978e-06, + "loss": 0.8711779713630676, + "step": 2550 + }, + { + "epoch": 0.5877880184331797, + "grad_norm": 0.8419799604008648, + "learning_rate": 1.6922733193305093e-06, + "loss": 0.930451512336731, + "step": 2551 + }, + { + "epoch": 0.5880184331797235, + "grad_norm": 0.8349862719015169, + "learning_rate": 1.6919983376778647e-06, + "loss": 0.8435598611831665, + "step": 2552 + }, + { + "epoch": 0.5882488479262673, + "grad_norm": 0.8491940209701643, + "learning_rate": 1.6917232555825774e-06, + "loss": 0.8868621587753296, + "step": 2553 + }, + { + "epoch": 0.588479262672811, + "grad_norm": 0.7537041162487105, + "learning_rate": 1.6914480730845752e-06, + "loss": 0.6821786165237427, + "step": 2554 + }, + { + "epoch": 0.5887096774193549, + "grad_norm": 0.8487688242201222, + "learning_rate": 1.691172790223801e-06, + "loss": 0.7241402864456177, + "step": 2555 + }, + { + "epoch": 0.5889400921658986, + "grad_norm": 0.7422220828348832, + "learning_rate": 1.690897407040211e-06, + "loss": 0.7477490305900574, + "step": 2556 + }, + { + "epoch": 0.5891705069124424, + "grad_norm": 0.7636915444427955, + "learning_rate": 1.690621923573777e-06, + "loss": 0.7881484031677246, + "step": 2557 + }, + { + "epoch": 0.5894009216589862, + "grad_norm": 0.959692830610789, + "learning_rate": 1.6903463398644848e-06, + "loss": 0.8292979001998901, + "step": 2558 + }, + { + "epoch": 0.58963133640553, + "grad_norm": 0.711937804642515, + "learning_rate": 1.690070655952336e-06, + "loss": 0.7068917751312256, + "step": 2559 + }, + { + "epoch": 0.5898617511520737, + "grad_norm": 1.1143023950252693, + "learning_rate": 1.6897948718773443e-06, + "loss": 0.8907356262207031, + "step": 2560 + }, + { + "epoch": 0.5900921658986175, + "grad_norm": 0.7930222105996996, + "learning_rate": 1.6895189876795405e-06, + "loss": 0.7762824892997742, + "step": 2561 + }, + { + "epoch": 0.5903225806451613, + "grad_norm": 1.0922797891559575, + "learning_rate": 1.6892430033989685e-06, + "loss": 0.9682759046554565, + "step": 2562 + }, + { + "epoch": 0.590552995391705, + "grad_norm": 0.8231082510824629, + "learning_rate": 1.6889669190756866e-06, + "loss": 0.7594735622406006, + "step": 2563 + }, + { + "epoch": 0.5907834101382489, + "grad_norm": 0.8117866090414669, + "learning_rate": 1.6886907347497687e-06, + "loss": 0.8161605000495911, + "step": 2564 + }, + { + "epoch": 0.5910138248847926, + "grad_norm": 0.8557086150703954, + "learning_rate": 1.6884144504613023e-06, + "loss": 0.9390331506729126, + "step": 2565 + }, + { + "epoch": 0.5912442396313364, + "grad_norm": 0.9387748138594502, + "learning_rate": 1.68813806625039e-06, + "loss": 0.8895832300186157, + "step": 2566 + }, + { + "epoch": 0.5914746543778802, + "grad_norm": 0.8802161511936953, + "learning_rate": 1.687861582157148e-06, + "loss": 0.7779919505119324, + "step": 2567 + }, + { + "epoch": 0.591705069124424, + "grad_norm": 1.139110447936057, + "learning_rate": 1.687584998221708e-06, + "loss": 0.8974252343177795, + "step": 2568 + }, + { + "epoch": 0.5919354838709677, + "grad_norm": 0.8073269492940187, + "learning_rate": 1.687308314484216e-06, + "loss": 0.8487393856048584, + "step": 2569 + }, + { + "epoch": 0.5921658986175116, + "grad_norm": 0.8310515688854938, + "learning_rate": 1.6870315309848318e-06, + "loss": 0.8356295824050903, + "step": 2570 + }, + { + "epoch": 0.5923963133640553, + "grad_norm": 0.9033360313158958, + "learning_rate": 1.6867546477637307e-06, + "loss": 0.8180248737335205, + "step": 2571 + }, + { + "epoch": 0.5926267281105991, + "grad_norm": 0.6950974205275126, + "learning_rate": 1.6864776648611013e-06, + "loss": 0.8456830978393555, + "step": 2572 + }, + { + "epoch": 0.5928571428571429, + "grad_norm": 0.9039181033590447, + "learning_rate": 1.6862005823171476e-06, + "loss": 0.8378905057907104, + "step": 2573 + }, + { + "epoch": 0.5930875576036866, + "grad_norm": 0.835432630485808, + "learning_rate": 1.685923400172088e-06, + "loss": 0.8060408234596252, + "step": 2574 + }, + { + "epoch": 0.5933179723502304, + "grad_norm": 0.8354491785263655, + "learning_rate": 1.685646118466155e-06, + "loss": 0.7550709247589111, + "step": 2575 + }, + { + "epoch": 0.5935483870967742, + "grad_norm": 0.805260271869055, + "learning_rate": 1.6853687372395955e-06, + "loss": 0.8475208282470703, + "step": 2576 + }, + { + "epoch": 0.593778801843318, + "grad_norm": 1.0626255995304192, + "learning_rate": 1.6850912565326709e-06, + "loss": 0.8681533336639404, + "step": 2577 + }, + { + "epoch": 0.5940092165898617, + "grad_norm": 0.9000714044087056, + "learning_rate": 1.6848136763856573e-06, + "loss": 0.7756578922271729, + "step": 2578 + }, + { + "epoch": 0.5942396313364056, + "grad_norm": 1.1163759985623336, + "learning_rate": 1.6845359968388456e-06, + "loss": 0.8910564184188843, + "step": 2579 + }, + { + "epoch": 0.5944700460829493, + "grad_norm": 0.7484768523036672, + "learning_rate": 1.6842582179325397e-06, + "loss": 0.7293382883071899, + "step": 2580 + }, + { + "epoch": 0.5947004608294931, + "grad_norm": 0.8208214849988605, + "learning_rate": 1.6839803397070597e-06, + "loss": 0.8497427105903625, + "step": 2581 + }, + { + "epoch": 0.5949308755760369, + "grad_norm": 0.9124854441462121, + "learning_rate": 1.6837023622027386e-06, + "loss": 0.800891637802124, + "step": 2582 + }, + { + "epoch": 0.5951612903225807, + "grad_norm": 0.8887114325795745, + "learning_rate": 1.683424285459925e-06, + "loss": 0.889703631401062, + "step": 2583 + }, + { + "epoch": 0.5953917050691244, + "grad_norm": 0.83139201735135, + "learning_rate": 1.6831461095189808e-06, + "loss": 0.7500913143157959, + "step": 2584 + }, + { + "epoch": 0.5956221198156681, + "grad_norm": 0.8260167845821169, + "learning_rate": 1.6828678344202834e-06, + "loss": 0.8575263023376465, + "step": 2585 + }, + { + "epoch": 0.595852534562212, + "grad_norm": 0.8796083393133354, + "learning_rate": 1.6825894602042238e-06, + "loss": 0.7754372358322144, + "step": 2586 + }, + { + "epoch": 0.5960829493087557, + "grad_norm": 1.0529816523070568, + "learning_rate": 1.6823109869112074e-06, + "loss": 0.8861502408981323, + "step": 2587 + }, + { + "epoch": 0.5963133640552996, + "grad_norm": 0.7738036894554111, + "learning_rate": 1.6820324145816548e-06, + "loss": 0.725920557975769, + "step": 2588 + }, + { + "epoch": 0.5965437788018433, + "grad_norm": 0.7887605961214393, + "learning_rate": 1.6817537432559998e-06, + "loss": 0.6195499897003174, + "step": 2589 + }, + { + "epoch": 0.5967741935483871, + "grad_norm": 0.8405918169035362, + "learning_rate": 1.6814749729746918e-06, + "loss": 0.8757472038269043, + "step": 2590 + }, + { + "epoch": 0.5970046082949308, + "grad_norm": 0.8710168774832879, + "learning_rate": 1.6811961037781934e-06, + "loss": 0.8024059534072876, + "step": 2591 + }, + { + "epoch": 0.5972350230414747, + "grad_norm": 1.1763814328442668, + "learning_rate": 1.6809171357069825e-06, + "loss": 0.8397082090377808, + "step": 2592 + }, + { + "epoch": 0.5974654377880184, + "grad_norm": 0.8163820389720032, + "learning_rate": 1.6806380688015507e-06, + "loss": 0.7693872451782227, + "step": 2593 + }, + { + "epoch": 0.5976958525345623, + "grad_norm": 0.7668441612993817, + "learning_rate": 1.6803589031024043e-06, + "loss": 0.7918043732643127, + "step": 2594 + }, + { + "epoch": 0.597926267281106, + "grad_norm": 0.7951277033960863, + "learning_rate": 1.680079638650064e-06, + "loss": 0.8046969175338745, + "step": 2595 + }, + { + "epoch": 0.5981566820276498, + "grad_norm": 0.9724191958452253, + "learning_rate": 1.6798002754850643e-06, + "loss": 0.7889789938926697, + "step": 2596 + }, + { + "epoch": 0.5983870967741935, + "grad_norm": 0.8356070849986357, + "learning_rate": 1.6795208136479543e-06, + "loss": 0.874780535697937, + "step": 2597 + }, + { + "epoch": 0.5986175115207373, + "grad_norm": 0.8380940855873632, + "learning_rate": 1.679241253179298e-06, + "loss": 0.8728631734848022, + "step": 2598 + }, + { + "epoch": 0.5988479262672811, + "grad_norm": 0.7909132896338992, + "learning_rate": 1.678961594119673e-06, + "loss": 0.5940345525741577, + "step": 2599 + }, + { + "epoch": 0.5990783410138248, + "grad_norm": 0.7873638428289793, + "learning_rate": 1.6786818365096712e-06, + "loss": 0.8524528741836548, + "step": 2600 + }, + { + "epoch": 0.5993087557603687, + "grad_norm": 1.2099119623298256, + "learning_rate": 1.6784019803899e-06, + "loss": 1.0738554000854492, + "step": 2601 + }, + { + "epoch": 0.5995391705069124, + "grad_norm": 0.9987206599474828, + "learning_rate": 1.6781220258009787e-06, + "loss": 0.9146362543106079, + "step": 2602 + }, + { + "epoch": 0.5997695852534562, + "grad_norm": 0.9546196333490053, + "learning_rate": 1.6778419727835434e-06, + "loss": 0.8846019506454468, + "step": 2603 + }, + { + "epoch": 0.6, + "grad_norm": 1.0356705992849526, + "learning_rate": 1.6775618213782427e-06, + "loss": 0.9564694166183472, + "step": 2604 + }, + { + "epoch": 0.6002304147465438, + "grad_norm": 0.8649265876220377, + "learning_rate": 1.6772815716257411e-06, + "loss": 0.7311475276947021, + "step": 2605 + }, + { + "epoch": 0.6004608294930875, + "grad_norm": 0.9996641063184493, + "learning_rate": 1.6770012235667157e-06, + "loss": 0.8198719024658203, + "step": 2606 + }, + { + "epoch": 0.6006912442396314, + "grad_norm": 0.8625199282325245, + "learning_rate": 1.676720777241859e-06, + "loss": 0.7667897939682007, + "step": 2607 + }, + { + "epoch": 0.6009216589861751, + "grad_norm": 0.8068998344787891, + "learning_rate": 1.6764402326918775e-06, + "loss": 0.8438166379928589, + "step": 2608 + }, + { + "epoch": 0.6011520737327188, + "grad_norm": 0.8540979807575545, + "learning_rate": 1.6761595899574913e-06, + "loss": 0.801039457321167, + "step": 2609 + }, + { + "epoch": 0.6013824884792627, + "grad_norm": 0.8234203241271092, + "learning_rate": 1.6758788490794362e-06, + "loss": 0.8063384294509888, + "step": 2610 + }, + { + "epoch": 0.6016129032258064, + "grad_norm": 0.6526013686548677, + "learning_rate": 1.6755980100984609e-06, + "loss": 0.7574378848075867, + "step": 2611 + }, + { + "epoch": 0.6018433179723502, + "grad_norm": 0.9515660687698646, + "learning_rate": 1.6753170730553285e-06, + "loss": 0.7640282511711121, + "step": 2612 + }, + { + "epoch": 0.602073732718894, + "grad_norm": 0.8028588885811085, + "learning_rate": 1.675036037990817e-06, + "loss": 0.8366582989692688, + "step": 2613 + }, + { + "epoch": 0.6023041474654378, + "grad_norm": 0.9790278189412774, + "learning_rate": 1.6747549049457184e-06, + "loss": 0.851488471031189, + "step": 2614 + }, + { + "epoch": 0.6025345622119815, + "grad_norm": 0.8888933014827352, + "learning_rate": 1.6744736739608385e-06, + "loss": 0.6821870803833008, + "step": 2615 + }, + { + "epoch": 0.6027649769585254, + "grad_norm": 0.9884428615602953, + "learning_rate": 1.6741923450769977e-06, + "loss": 0.9263452887535095, + "step": 2616 + }, + { + "epoch": 0.6029953917050691, + "grad_norm": 0.7660541738576696, + "learning_rate": 1.6739109183350303e-06, + "loss": 0.7471155524253845, + "step": 2617 + }, + { + "epoch": 0.603225806451613, + "grad_norm": 0.8463548916487829, + "learning_rate": 1.6736293937757858e-06, + "loss": 0.8859940767288208, + "step": 2618 + }, + { + "epoch": 0.6034562211981567, + "grad_norm": 0.7725702923302962, + "learning_rate": 1.673347771440126e-06, + "loss": 0.8078656792640686, + "step": 2619 + }, + { + "epoch": 0.6036866359447005, + "grad_norm": 0.8796637852565455, + "learning_rate": 1.673066051368929e-06, + "loss": 0.7663185596466064, + "step": 2620 + }, + { + "epoch": 0.6039170506912442, + "grad_norm": 0.7762146466532337, + "learning_rate": 1.6727842336030855e-06, + "loss": 0.7924770712852478, + "step": 2621 + }, + { + "epoch": 0.604147465437788, + "grad_norm": 0.6362525346897695, + "learning_rate": 1.672502318183501e-06, + "loss": 0.7781439423561096, + "step": 2622 + }, + { + "epoch": 0.6043778801843318, + "grad_norm": 0.7824821748809755, + "learning_rate": 1.6722203051510953e-06, + "loss": 0.9342260360717773, + "step": 2623 + }, + { + "epoch": 0.6046082949308755, + "grad_norm": 0.9113412146225311, + "learning_rate": 1.6719381945468024e-06, + "loss": 0.8589230179786682, + "step": 2624 + }, + { + "epoch": 0.6048387096774194, + "grad_norm": 0.9092021688294594, + "learning_rate": 1.67165598641157e-06, + "loss": 0.8692198991775513, + "step": 2625 + }, + { + "epoch": 0.6050691244239631, + "grad_norm": 0.9811252814075038, + "learning_rate": 1.6713736807863606e-06, + "loss": 0.9220771789550781, + "step": 2626 + }, + { + "epoch": 0.6052995391705069, + "grad_norm": 0.7869789442575379, + "learning_rate": 1.6710912777121497e-06, + "loss": 0.670639157295227, + "step": 2627 + }, + { + "epoch": 0.6055299539170507, + "grad_norm": 0.8458627233906328, + "learning_rate": 1.6708087772299287e-06, + "loss": 0.780914306640625, + "step": 2628 + }, + { + "epoch": 0.6057603686635945, + "grad_norm": 0.7718782555310939, + "learning_rate": 1.6705261793807014e-06, + "loss": 0.836430549621582, + "step": 2629 + }, + { + "epoch": 0.6059907834101382, + "grad_norm": 0.8965474432723056, + "learning_rate": 1.670243484205487e-06, + "loss": 0.84266197681427, + "step": 2630 + }, + { + "epoch": 0.6062211981566821, + "grad_norm": 0.8992013517980091, + "learning_rate": 1.6699606917453184e-06, + "loss": 0.9276752471923828, + "step": 2631 + }, + { + "epoch": 0.6064516129032258, + "grad_norm": 0.8740634897243095, + "learning_rate": 1.6696778020412418e-06, + "loss": 0.8319100141525269, + "step": 2632 + }, + { + "epoch": 0.6066820276497696, + "grad_norm": 0.9778851785690291, + "learning_rate": 1.669394815134319e-06, + "loss": 0.7511987686157227, + "step": 2633 + }, + { + "epoch": 0.6069124423963134, + "grad_norm": 0.9559089829828732, + "learning_rate": 1.6691117310656249e-06, + "loss": 0.7847566604614258, + "step": 2634 + }, + { + "epoch": 0.6071428571428571, + "grad_norm": 0.7352732117136743, + "learning_rate": 1.668828549876249e-06, + "loss": 0.8598428964614868, + "step": 2635 + }, + { + "epoch": 0.6073732718894009, + "grad_norm": 0.9632462301651329, + "learning_rate": 1.6685452716072942e-06, + "loss": 0.8676267266273499, + "step": 2636 + }, + { + "epoch": 0.6076036866359447, + "grad_norm": 0.9796050613045469, + "learning_rate": 1.6682618962998787e-06, + "loss": 0.8139858841896057, + "step": 2637 + }, + { + "epoch": 0.6078341013824885, + "grad_norm": 0.9214980939594923, + "learning_rate": 1.6679784239951334e-06, + "loss": 0.878848671913147, + "step": 2638 + }, + { + "epoch": 0.6080645161290322, + "grad_norm": 0.8942413316087445, + "learning_rate": 1.6676948547342038e-06, + "loss": 0.7094229459762573, + "step": 2639 + }, + { + "epoch": 0.6082949308755761, + "grad_norm": 0.7183954232108332, + "learning_rate": 1.6674111885582502e-06, + "loss": 0.7908186912536621, + "step": 2640 + }, + { + "epoch": 0.6085253456221198, + "grad_norm": 0.705517985038791, + "learning_rate": 1.6671274255084465e-06, + "loss": 0.7205992341041565, + "step": 2641 + }, + { + "epoch": 0.6087557603686636, + "grad_norm": 0.937951031991606, + "learning_rate": 1.6668435656259796e-06, + "loss": 0.8098955750465393, + "step": 2642 + }, + { + "epoch": 0.6089861751152074, + "grad_norm": 0.8047793122116887, + "learning_rate": 1.6665596089520522e-06, + "loss": 0.9344205856323242, + "step": 2643 + }, + { + "epoch": 0.6092165898617512, + "grad_norm": 0.73132257965357, + "learning_rate": 1.6662755555278798e-06, + "loss": 0.6149121522903442, + "step": 2644 + }, + { + "epoch": 0.6094470046082949, + "grad_norm": 1.1550816011183633, + "learning_rate": 1.6659914053946929e-06, + "loss": 0.790631115436554, + "step": 2645 + }, + { + "epoch": 0.6096774193548387, + "grad_norm": 0.9832349740984434, + "learning_rate": 1.6657071585937349e-06, + "loss": 0.7789372801780701, + "step": 2646 + }, + { + "epoch": 0.6099078341013825, + "grad_norm": 0.7425679816784971, + "learning_rate": 1.6654228151662641e-06, + "loss": 0.9119753837585449, + "step": 2647 + }, + { + "epoch": 0.6101382488479262, + "grad_norm": 1.0635804319271085, + "learning_rate": 1.6651383751535526e-06, + "loss": 0.827568769454956, + "step": 2648 + }, + { + "epoch": 0.6103686635944701, + "grad_norm": 0.9620609244203838, + "learning_rate": 1.6648538385968865e-06, + "loss": 0.8862377405166626, + "step": 2649 + }, + { + "epoch": 0.6105990783410138, + "grad_norm": 0.7954209003880245, + "learning_rate": 1.6645692055375658e-06, + "loss": 0.7765665054321289, + "step": 2650 + }, + { + "epoch": 0.6108294930875576, + "grad_norm": 0.7698374340240739, + "learning_rate": 1.6642844760169048e-06, + "loss": 0.7673745155334473, + "step": 2651 + }, + { + "epoch": 0.6110599078341014, + "grad_norm": 1.051257553540871, + "learning_rate": 1.6639996500762313e-06, + "loss": 0.8539090752601624, + "step": 2652 + }, + { + "epoch": 0.6112903225806452, + "grad_norm": 0.8676017636407886, + "learning_rate": 1.663714727756888e-06, + "loss": 0.9146299362182617, + "step": 2653 + }, + { + "epoch": 0.6115207373271889, + "grad_norm": 0.9802646170879412, + "learning_rate": 1.6634297091002304e-06, + "loss": 0.6720675230026245, + "step": 2654 + }, + { + "epoch": 0.6117511520737328, + "grad_norm": 0.9963804792413621, + "learning_rate": 1.6631445941476287e-06, + "loss": 0.876419186592102, + "step": 2655 + }, + { + "epoch": 0.6119815668202765, + "grad_norm": 0.8251901500966289, + "learning_rate": 1.6628593829404673e-06, + "loss": 0.781826376914978, + "step": 2656 + }, + { + "epoch": 0.6122119815668203, + "grad_norm": 1.0156308960299383, + "learning_rate": 1.662574075520144e-06, + "loss": 0.8700725436210632, + "step": 2657 + }, + { + "epoch": 0.6124423963133641, + "grad_norm": 0.8730333366815507, + "learning_rate": 1.6622886719280703e-06, + "loss": 0.7927212715148926, + "step": 2658 + }, + { + "epoch": 0.6126728110599078, + "grad_norm": 0.9472958125063492, + "learning_rate": 1.6620031722056732e-06, + "loss": 0.8402982354164124, + "step": 2659 + }, + { + "epoch": 0.6129032258064516, + "grad_norm": 0.9246784332742947, + "learning_rate": 1.6617175763943916e-06, + "loss": 0.844031572341919, + "step": 2660 + }, + { + "epoch": 0.6131336405529954, + "grad_norm": 1.1749754124811849, + "learning_rate": 1.66143188453568e-06, + "loss": 0.7927590608596802, + "step": 2661 + }, + { + "epoch": 0.6133640552995392, + "grad_norm": 0.7562363270320578, + "learning_rate": 1.6611460966710057e-06, + "loss": 0.6881238222122192, + "step": 2662 + }, + { + "epoch": 0.6135944700460829, + "grad_norm": 0.7503304726479316, + "learning_rate": 1.6608602128418512e-06, + "loss": 0.8782250881195068, + "step": 2663 + }, + { + "epoch": 0.6138248847926268, + "grad_norm": 0.764429872232153, + "learning_rate": 1.6605742330897112e-06, + "loss": 0.810072124004364, + "step": 2664 + }, + { + "epoch": 0.6140552995391705, + "grad_norm": 0.7959070796498304, + "learning_rate": 1.660288157456096e-06, + "loss": 0.9278649091720581, + "step": 2665 + }, + { + "epoch": 0.6142857142857143, + "grad_norm": 0.8518702716538695, + "learning_rate": 1.6600019859825287e-06, + "loss": 0.7821990251541138, + "step": 2666 + }, + { + "epoch": 0.614516129032258, + "grad_norm": 0.8000150810917545, + "learning_rate": 1.6597157187105474e-06, + "loss": 0.7945138216018677, + "step": 2667 + }, + { + "epoch": 0.6147465437788019, + "grad_norm": 0.9158855636867193, + "learning_rate": 1.659429355681702e-06, + "loss": 0.7796168327331543, + "step": 2668 + }, + { + "epoch": 0.6149769585253456, + "grad_norm": 0.8778480996767207, + "learning_rate": 1.659142896937559e-06, + "loss": 0.8412867784500122, + "step": 2669 + }, + { + "epoch": 0.6152073732718893, + "grad_norm": 0.8776586025383009, + "learning_rate": 1.6588563425196976e-06, + "loss": 0.8507891893386841, + "step": 2670 + }, + { + "epoch": 0.6154377880184332, + "grad_norm": 0.7470530836348557, + "learning_rate": 1.6585696924697097e-06, + "loss": 0.7538737654685974, + "step": 2671 + }, + { + "epoch": 0.6156682027649769, + "grad_norm": 0.7938343055651664, + "learning_rate": 1.6582829468292027e-06, + "loss": 0.7241994142532349, + "step": 2672 + }, + { + "epoch": 0.6158986175115208, + "grad_norm": 0.7740707689038899, + "learning_rate": 1.6579961056397979e-06, + "loss": 0.8282276391983032, + "step": 2673 + }, + { + "epoch": 0.6161290322580645, + "grad_norm": 0.9834275785675608, + "learning_rate": 1.657709168943129e-06, + "loss": 0.7823094725608826, + "step": 2674 + }, + { + "epoch": 0.6163594470046083, + "grad_norm": 0.7814560466718257, + "learning_rate": 1.6574221367808452e-06, + "loss": 0.7682117819786072, + "step": 2675 + }, + { + "epoch": 0.616589861751152, + "grad_norm": 0.791790817396352, + "learning_rate": 1.6571350091946084e-06, + "loss": 0.7483188509941101, + "step": 2676 + }, + { + "epoch": 0.6168202764976959, + "grad_norm": 0.7904062559480196, + "learning_rate": 1.656847786226095e-06, + "loss": 0.8244579434394836, + "step": 2677 + }, + { + "epoch": 0.6170506912442396, + "grad_norm": 0.935192090002093, + "learning_rate": 1.6565604679169951e-06, + "loss": 0.9741685390472412, + "step": 2678 + }, + { + "epoch": 0.6172811059907835, + "grad_norm": 1.2715516239943523, + "learning_rate": 1.6562730543090122e-06, + "loss": 1.0004706382751465, + "step": 2679 + }, + { + "epoch": 0.6175115207373272, + "grad_norm": 0.7382412100690486, + "learning_rate": 1.6559855454438644e-06, + "loss": 0.6897011399269104, + "step": 2680 + }, + { + "epoch": 0.617741935483871, + "grad_norm": 0.6330897297720288, + "learning_rate": 1.6556979413632833e-06, + "loss": 0.7250478267669678, + "step": 2681 + }, + { + "epoch": 0.6179723502304147, + "grad_norm": 0.9717515360338855, + "learning_rate": 1.6554102421090137e-06, + "loss": 0.850714385509491, + "step": 2682 + }, + { + "epoch": 0.6182027649769585, + "grad_norm": 0.917367886199939, + "learning_rate": 1.6551224477228152e-06, + "loss": 0.8389794230461121, + "step": 2683 + }, + { + "epoch": 0.6184331797235023, + "grad_norm": 0.8244704754842406, + "learning_rate": 1.6548345582464608e-06, + "loss": 0.8004277944564819, + "step": 2684 + }, + { + "epoch": 0.618663594470046, + "grad_norm": 0.9438052955461359, + "learning_rate": 1.654546573721737e-06, + "loss": 0.8439298868179321, + "step": 2685 + }, + { + "epoch": 0.6188940092165899, + "grad_norm": 0.9506767899718855, + "learning_rate": 1.6542584941904448e-06, + "loss": 0.7715939283370972, + "step": 2686 + }, + { + "epoch": 0.6191244239631336, + "grad_norm": 0.7277066195828455, + "learning_rate": 1.6539703196943982e-06, + "loss": 0.8521275520324707, + "step": 2687 + }, + { + "epoch": 0.6193548387096774, + "grad_norm": 0.9502964788805838, + "learning_rate": 1.6536820502754249e-06, + "loss": 0.8773370981216431, + "step": 2688 + }, + { + "epoch": 0.6195852534562212, + "grad_norm": 0.8896877670997408, + "learning_rate": 1.653393685975368e-06, + "loss": 0.7613356113433838, + "step": 2689 + }, + { + "epoch": 0.619815668202765, + "grad_norm": 0.7872525626089157, + "learning_rate": 1.6531052268360823e-06, + "loss": 0.7534692287445068, + "step": 2690 + }, + { + "epoch": 0.6200460829493087, + "grad_norm": 0.8888603991720845, + "learning_rate": 1.652816672899438e-06, + "loss": 0.861242413520813, + "step": 2691 + }, + { + "epoch": 0.6202764976958526, + "grad_norm": 1.0955455640383855, + "learning_rate": 1.652528024207317e-06, + "loss": 0.9778954982757568, + "step": 2692 + }, + { + "epoch": 0.6205069124423963, + "grad_norm": 0.8389124431813023, + "learning_rate": 1.6522392808016176e-06, + "loss": 0.7874879240989685, + "step": 2693 + }, + { + "epoch": 0.6207373271889401, + "grad_norm": 1.038077147354541, + "learning_rate": 1.6519504427242503e-06, + "loss": 0.8306739330291748, + "step": 2694 + }, + { + "epoch": 0.6209677419354839, + "grad_norm": 0.890554970207788, + "learning_rate": 1.651661510017139e-06, + "loss": 0.7617331743240356, + "step": 2695 + }, + { + "epoch": 0.6211981566820276, + "grad_norm": 0.8325839299854928, + "learning_rate": 1.6513724827222223e-06, + "loss": 0.8912776708602905, + "step": 2696 + }, + { + "epoch": 0.6214285714285714, + "grad_norm": 0.9626202232237234, + "learning_rate": 1.6510833608814519e-06, + "loss": 0.832025945186615, + "step": 2697 + }, + { + "epoch": 0.6216589861751152, + "grad_norm": 0.8573045739455887, + "learning_rate": 1.6507941445367934e-06, + "loss": 0.7391358613967896, + "step": 2698 + }, + { + "epoch": 0.621889400921659, + "grad_norm": 0.8417803604945624, + "learning_rate": 1.6505048337302267e-06, + "loss": 0.7968891263008118, + "step": 2699 + }, + { + "epoch": 0.6221198156682027, + "grad_norm": 0.7943584636642551, + "learning_rate": 1.6502154285037446e-06, + "loss": 0.8268226981163025, + "step": 2700 + }, + { + "epoch": 0.6223502304147466, + "grad_norm": 0.8943748659016423, + "learning_rate": 1.6499259288993536e-06, + "loss": 0.8727509379386902, + "step": 2701 + }, + { + "epoch": 0.6225806451612903, + "grad_norm": 0.9781149876582625, + "learning_rate": 1.6496363349590746e-06, + "loss": 0.8419584035873413, + "step": 2702 + }, + { + "epoch": 0.6228110599078341, + "grad_norm": 0.9222004845701074, + "learning_rate": 1.6493466467249415e-06, + "loss": 0.7753620743751526, + "step": 2703 + }, + { + "epoch": 0.6230414746543779, + "grad_norm": 0.8188505837862442, + "learning_rate": 1.6490568642390022e-06, + "loss": 0.7735302448272705, + "step": 2704 + }, + { + "epoch": 0.6232718894009217, + "grad_norm": 0.892742684163995, + "learning_rate": 1.6487669875433183e-06, + "loss": 0.8730747699737549, + "step": 2705 + }, + { + "epoch": 0.6235023041474654, + "grad_norm": 1.081206789540213, + "learning_rate": 1.648477016679965e-06, + "loss": 1.026259183883667, + "step": 2706 + }, + { + "epoch": 0.6237327188940092, + "grad_norm": 1.1700615414540931, + "learning_rate": 1.6481869516910314e-06, + "loss": 1.0710067749023438, + "step": 2707 + }, + { + "epoch": 0.623963133640553, + "grad_norm": 0.8750649396873535, + "learning_rate": 1.6478967926186196e-06, + "loss": 0.8451842069625854, + "step": 2708 + }, + { + "epoch": 0.6241935483870967, + "grad_norm": 1.0025312740636694, + "learning_rate": 1.6476065395048463e-06, + "loss": 0.8114550113677979, + "step": 2709 + }, + { + "epoch": 0.6244239631336406, + "grad_norm": 0.9543936745980088, + "learning_rate": 1.6473161923918408e-06, + "loss": 0.9158897399902344, + "step": 2710 + }, + { + "epoch": 0.6246543778801843, + "grad_norm": 0.9073320322912862, + "learning_rate": 1.6470257513217471e-06, + "loss": 0.8455985188484192, + "step": 2711 + }, + { + "epoch": 0.6248847926267281, + "grad_norm": 0.9409835862192949, + "learning_rate": 1.6467352163367224e-06, + "loss": 0.7869806885719299, + "step": 2712 + }, + { + "epoch": 0.6251152073732719, + "grad_norm": 0.9720046165998673, + "learning_rate": 1.6464445874789369e-06, + "loss": 0.7813467979431152, + "step": 2713 + }, + { + "epoch": 0.6253456221198157, + "grad_norm": 0.9253768349404401, + "learning_rate": 1.646153864790575e-06, + "loss": 0.7607834339141846, + "step": 2714 + }, + { + "epoch": 0.6255760368663594, + "grad_norm": 0.7655542834849622, + "learning_rate": 1.6458630483138354e-06, + "loss": 0.6316394209861755, + "step": 2715 + }, + { + "epoch": 0.6258064516129033, + "grad_norm": 1.0037920503955002, + "learning_rate": 1.6455721380909293e-06, + "loss": 0.8613089323043823, + "step": 2716 + }, + { + "epoch": 0.626036866359447, + "grad_norm": 0.900314234710346, + "learning_rate": 1.6452811341640823e-06, + "loss": 0.8521597385406494, + "step": 2717 + }, + { + "epoch": 0.6262672811059908, + "grad_norm": 0.863334614503053, + "learning_rate": 1.6449900365755322e-06, + "loss": 0.7649816870689392, + "step": 2718 + }, + { + "epoch": 0.6264976958525346, + "grad_norm": 0.7921235061169694, + "learning_rate": 1.6446988453675327e-06, + "loss": 0.669215738773346, + "step": 2719 + }, + { + "epoch": 0.6267281105990783, + "grad_norm": 1.0085146323707468, + "learning_rate": 1.6444075605823491e-06, + "loss": 0.7795897722244263, + "step": 2720 + }, + { + "epoch": 0.6269585253456221, + "grad_norm": 1.0985096718321175, + "learning_rate": 1.6441161822622612e-06, + "loss": 0.9773029088973999, + "step": 2721 + }, + { + "epoch": 0.6271889400921659, + "grad_norm": 0.88062279724108, + "learning_rate": 1.6438247104495622e-06, + "loss": 0.8313496112823486, + "step": 2722 + }, + { + "epoch": 0.6274193548387097, + "grad_norm": 0.8741823244787398, + "learning_rate": 1.6435331451865589e-06, + "loss": 0.822803258895874, + "step": 2723 + }, + { + "epoch": 0.6276497695852534, + "grad_norm": 1.1191623839144935, + "learning_rate": 1.643241486515571e-06, + "loss": 0.8933405876159668, + "step": 2724 + }, + { + "epoch": 0.6278801843317973, + "grad_norm": 0.8721873626078817, + "learning_rate": 1.6429497344789334e-06, + "loss": 0.865382194519043, + "step": 2725 + }, + { + "epoch": 0.628110599078341, + "grad_norm": 0.6623424743433429, + "learning_rate": 1.6426578891189929e-06, + "loss": 0.5955609679222107, + "step": 2726 + }, + { + "epoch": 0.6283410138248848, + "grad_norm": 0.9379654908769754, + "learning_rate": 1.6423659504781102e-06, + "loss": 0.7832648754119873, + "step": 2727 + }, + { + "epoch": 0.6285714285714286, + "grad_norm": 0.9904172136436726, + "learning_rate": 1.6420739185986606e-06, + "loss": 0.8939651250839233, + "step": 2728 + }, + { + "epoch": 0.6288018433179724, + "grad_norm": 0.8754504203733118, + "learning_rate": 1.6417817935230316e-06, + "loss": 0.7950553894042969, + "step": 2729 + }, + { + "epoch": 0.6290322580645161, + "grad_norm": 0.7473547756110924, + "learning_rate": 1.6414895752936247e-06, + "loss": 0.7011410593986511, + "step": 2730 + }, + { + "epoch": 0.6292626728110599, + "grad_norm": 0.8298073820867625, + "learning_rate": 1.6411972639528553e-06, + "loss": 0.8745814561843872, + "step": 2731 + }, + { + "epoch": 0.6294930875576037, + "grad_norm": 0.9643129286331958, + "learning_rate": 1.640904859543152e-06, + "loss": 0.9487906694412231, + "step": 2732 + }, + { + "epoch": 0.6297235023041474, + "grad_norm": 1.0003996457820634, + "learning_rate": 1.6406123621069565e-06, + "loss": 0.8493598103523254, + "step": 2733 + }, + { + "epoch": 0.6299539170506913, + "grad_norm": 0.7043952970778223, + "learning_rate": 1.640319771686725e-06, + "loss": 0.8176105618476868, + "step": 2734 + }, + { + "epoch": 0.630184331797235, + "grad_norm": 1.1365398207749948, + "learning_rate": 1.640027088324926e-06, + "loss": 0.8331952691078186, + "step": 2735 + }, + { + "epoch": 0.6304147465437788, + "grad_norm": 0.9152153352251905, + "learning_rate": 1.6397343120640428e-06, + "loss": 0.7507727146148682, + "step": 2736 + }, + { + "epoch": 0.6306451612903226, + "grad_norm": 0.8498087936716523, + "learning_rate": 1.6394414429465707e-06, + "loss": 0.7681083679199219, + "step": 2737 + }, + { + "epoch": 0.6308755760368664, + "grad_norm": 1.0207970870125542, + "learning_rate": 1.6391484810150197e-06, + "loss": 0.86592036485672, + "step": 2738 + }, + { + "epoch": 0.6311059907834101, + "grad_norm": 0.7893726077346048, + "learning_rate": 1.6388554263119133e-06, + "loss": 0.6561422348022461, + "step": 2739 + }, + { + "epoch": 0.631336405529954, + "grad_norm": 0.8691518888981297, + "learning_rate": 1.6385622788797871e-06, + "loss": 1.0149214267730713, + "step": 2740 + }, + { + "epoch": 0.6315668202764977, + "grad_norm": 3.1459869291369578, + "learning_rate": 1.6382690387611912e-06, + "loss": 0.8542313575744629, + "step": 2741 + }, + { + "epoch": 0.6317972350230415, + "grad_norm": 0.8459688860048273, + "learning_rate": 1.6379757059986898e-06, + "loss": 0.8561190366744995, + "step": 2742 + }, + { + "epoch": 0.6320276497695853, + "grad_norm": 0.8945733601522768, + "learning_rate": 1.6376822806348591e-06, + "loss": 0.7487457990646362, + "step": 2743 + }, + { + "epoch": 0.632258064516129, + "grad_norm": 0.7710656021686645, + "learning_rate": 1.6373887627122894e-06, + "loss": 0.6169087886810303, + "step": 2744 + }, + { + "epoch": 0.6324884792626728, + "grad_norm": 0.9363459151732765, + "learning_rate": 1.6370951522735848e-06, + "loss": 0.8384301662445068, + "step": 2745 + }, + { + "epoch": 0.6327188940092165, + "grad_norm": 0.8816116065345285, + "learning_rate": 1.636801449361362e-06, + "loss": 0.8009958267211914, + "step": 2746 + }, + { + "epoch": 0.6329493087557604, + "grad_norm": 0.7782605199549586, + "learning_rate": 1.6365076540182518e-06, + "loss": 0.7277840375900269, + "step": 2747 + }, + { + "epoch": 0.6331797235023041, + "grad_norm": 0.8629211607674182, + "learning_rate": 1.6362137662868988e-06, + "loss": 0.7994974255561829, + "step": 2748 + }, + { + "epoch": 0.633410138248848, + "grad_norm": 0.9972871876044257, + "learning_rate": 1.6359197862099592e-06, + "loss": 0.9940546751022339, + "step": 2749 + }, + { + "epoch": 0.6336405529953917, + "grad_norm": 0.7083636808435892, + "learning_rate": 1.6356257138301048e-06, + "loss": 0.776983916759491, + "step": 2750 + }, + { + "epoch": 0.6338709677419355, + "grad_norm": 1.0813287689618403, + "learning_rate": 1.6353315491900194e-06, + "loss": 0.8218704462051392, + "step": 2751 + }, + { + "epoch": 0.6341013824884792, + "grad_norm": 0.9285197745822434, + "learning_rate": 1.635037292332401e-06, + "loss": 0.8437784910202026, + "step": 2752 + }, + { + "epoch": 0.6343317972350231, + "grad_norm": 0.7951039096878332, + "learning_rate": 1.63474294329996e-06, + "loss": 0.7774004340171814, + "step": 2753 + }, + { + "epoch": 0.6345622119815668, + "grad_norm": 0.7998446978982631, + "learning_rate": 1.634448502135421e-06, + "loss": 0.8480523824691772, + "step": 2754 + }, + { + "epoch": 0.6347926267281107, + "grad_norm": 0.8710356721404071, + "learning_rate": 1.634153968881522e-06, + "loss": 0.838944673538208, + "step": 2755 + }, + { + "epoch": 0.6350230414746544, + "grad_norm": 0.9609360504840417, + "learning_rate": 1.633859343581014e-06, + "loss": 0.7989159822463989, + "step": 2756 + }, + { + "epoch": 0.6352534562211981, + "grad_norm": 0.8906618388597183, + "learning_rate": 1.6335646262766612e-06, + "loss": 0.8122522234916687, + "step": 2757 + }, + { + "epoch": 0.635483870967742, + "grad_norm": 1.0306905026592958, + "learning_rate": 1.6332698170112418e-06, + "loss": 0.7472352981567383, + "step": 2758 + }, + { + "epoch": 0.6357142857142857, + "grad_norm": 0.7470082329854858, + "learning_rate": 1.6329749158275466e-06, + "loss": 0.7160866260528564, + "step": 2759 + }, + { + "epoch": 0.6359447004608295, + "grad_norm": 0.9276359862380839, + "learning_rate": 1.6326799227683803e-06, + "loss": 0.850339412689209, + "step": 2760 + }, + { + "epoch": 0.6361751152073732, + "grad_norm": 0.8334408182150722, + "learning_rate": 1.632384837876561e-06, + "loss": 0.7683566808700562, + "step": 2761 + }, + { + "epoch": 0.6364055299539171, + "grad_norm": 1.0070287688728312, + "learning_rate": 1.6320896611949197e-06, + "loss": 0.820326030254364, + "step": 2762 + }, + { + "epoch": 0.6366359447004608, + "grad_norm": 0.9088399606663712, + "learning_rate": 1.6317943927663005e-06, + "loss": 0.9319206476211548, + "step": 2763 + }, + { + "epoch": 0.6368663594470046, + "grad_norm": 0.854101738795234, + "learning_rate": 1.6314990326335619e-06, + "loss": 0.8473616242408752, + "step": 2764 + }, + { + "epoch": 0.6370967741935484, + "grad_norm": 0.9083270544798837, + "learning_rate": 1.6312035808395746e-06, + "loss": 0.7515239715576172, + "step": 2765 + }, + { + "epoch": 0.6373271889400922, + "grad_norm": 0.9691327918436982, + "learning_rate": 1.630908037427223e-06, + "loss": 0.8780150413513184, + "step": 2766 + }, + { + "epoch": 0.6375576036866359, + "grad_norm": 0.8183908015853972, + "learning_rate": 1.6306124024394051e-06, + "loss": 0.7502909898757935, + "step": 2767 + }, + { + "epoch": 0.6377880184331797, + "grad_norm": 1.0244030314506845, + "learning_rate": 1.630316675919032e-06, + "loss": 0.8440920114517212, + "step": 2768 + }, + { + "epoch": 0.6380184331797235, + "grad_norm": 0.9479398820781787, + "learning_rate": 1.6300208579090275e-06, + "loss": 0.7769831418991089, + "step": 2769 + }, + { + "epoch": 0.6382488479262672, + "grad_norm": 0.7616107153752498, + "learning_rate": 1.6297249484523297e-06, + "loss": 0.6217764616012573, + "step": 2770 + }, + { + "epoch": 0.6384792626728111, + "grad_norm": 0.7961962297717475, + "learning_rate": 1.6294289475918891e-06, + "loss": 0.8726013898849487, + "step": 2771 + }, + { + "epoch": 0.6387096774193548, + "grad_norm": 0.9993347618775529, + "learning_rate": 1.6291328553706702e-06, + "loss": 0.9624546766281128, + "step": 2772 + }, + { + "epoch": 0.6389400921658986, + "grad_norm": 0.9073330627878557, + "learning_rate": 1.62883667183165e-06, + "loss": 0.733322024345398, + "step": 2773 + }, + { + "epoch": 0.6391705069124424, + "grad_norm": 0.828990327728417, + "learning_rate": 1.6285403970178197e-06, + "loss": 0.7944040298461914, + "step": 2774 + }, + { + "epoch": 0.6394009216589862, + "grad_norm": 0.945508092850191, + "learning_rate": 1.6282440309721825e-06, + "loss": 0.8006964921951294, + "step": 2775 + }, + { + "epoch": 0.6396313364055299, + "grad_norm": 0.8235251563991838, + "learning_rate": 1.6279475737377562e-06, + "loss": 0.8226393461227417, + "step": 2776 + }, + { + "epoch": 0.6398617511520738, + "grad_norm": 0.9205648176506509, + "learning_rate": 1.6276510253575707e-06, + "loss": 0.8216049671173096, + "step": 2777 + }, + { + "epoch": 0.6400921658986175, + "grad_norm": 1.2879339929003093, + "learning_rate": 1.6273543858746698e-06, + "loss": 0.9556760191917419, + "step": 2778 + }, + { + "epoch": 0.6403225806451613, + "grad_norm": 1.226309717633737, + "learning_rate": 1.6270576553321103e-06, + "loss": 0.9736160039901733, + "step": 2779 + }, + { + "epoch": 0.6405529953917051, + "grad_norm": 0.7107959971647043, + "learning_rate": 1.6267608337729622e-06, + "loss": 0.6930527687072754, + "step": 2780 + }, + { + "epoch": 0.6407834101382488, + "grad_norm": 0.8158686811134676, + "learning_rate": 1.6264639212403089e-06, + "loss": 0.8047456741333008, + "step": 2781 + }, + { + "epoch": 0.6410138248847926, + "grad_norm": 0.8454524938044947, + "learning_rate": 1.6261669177772465e-06, + "loss": 0.7278450727462769, + "step": 2782 + }, + { + "epoch": 0.6412442396313364, + "grad_norm": 0.8520417006771478, + "learning_rate": 1.6258698234268852e-06, + "loss": 0.7768574357032776, + "step": 2783 + }, + { + "epoch": 0.6414746543778802, + "grad_norm": 1.0890287289964238, + "learning_rate": 1.6255726382323475e-06, + "loss": 0.7621645331382751, + "step": 2784 + }, + { + "epoch": 0.6417050691244239, + "grad_norm": 0.7437513689171984, + "learning_rate": 1.6252753622367695e-06, + "loss": 0.7566754221916199, + "step": 2785 + }, + { + "epoch": 0.6419354838709678, + "grad_norm": 0.8832427803322862, + "learning_rate": 1.6249779954833005e-06, + "loss": 0.7609840631484985, + "step": 2786 + }, + { + "epoch": 0.6421658986175115, + "grad_norm": 0.7482883809435998, + "learning_rate": 1.6246805380151028e-06, + "loss": 0.7360000610351562, + "step": 2787 + }, + { + "epoch": 0.6423963133640553, + "grad_norm": 1.1130271498528226, + "learning_rate": 1.624382989875352e-06, + "loss": 0.7951081395149231, + "step": 2788 + }, + { + "epoch": 0.6426267281105991, + "grad_norm": 0.7939855049580037, + "learning_rate": 1.6240853511072367e-06, + "loss": 0.7273311614990234, + "step": 2789 + }, + { + "epoch": 0.6428571428571429, + "grad_norm": 1.0416971384804878, + "learning_rate": 1.6237876217539588e-06, + "loss": 0.9270737171173096, + "step": 2790 + }, + { + "epoch": 0.6430875576036866, + "grad_norm": 0.97801359210753, + "learning_rate": 1.6234898018587336e-06, + "loss": 0.7624385356903076, + "step": 2791 + }, + { + "epoch": 0.6433179723502304, + "grad_norm": 0.8529799225121792, + "learning_rate": 1.6231918914647889e-06, + "loss": 0.8266719579696655, + "step": 2792 + }, + { + "epoch": 0.6435483870967742, + "grad_norm": 0.6435153338840431, + "learning_rate": 1.6228938906153663e-06, + "loss": 0.7606902122497559, + "step": 2793 + }, + { + "epoch": 0.6437788018433179, + "grad_norm": 1.022572162531227, + "learning_rate": 1.6225957993537197e-06, + "loss": 0.8239191174507141, + "step": 2794 + }, + { + "epoch": 0.6440092165898618, + "grad_norm": 0.8871272102711673, + "learning_rate": 1.6222976177231174e-06, + "loss": 0.8313608169555664, + "step": 2795 + }, + { + "epoch": 0.6442396313364055, + "grad_norm": 0.7541910127898682, + "learning_rate": 1.6219993457668396e-06, + "loss": 0.7725037932395935, + "step": 2796 + }, + { + "epoch": 0.6444700460829493, + "grad_norm": 0.8887584465014293, + "learning_rate": 1.6217009835281802e-06, + "loss": 0.8791182041168213, + "step": 2797 + }, + { + "epoch": 0.6447004608294931, + "grad_norm": 0.9285171614449231, + "learning_rate": 1.621402531050446e-06, + "loss": 0.7157453298568726, + "step": 2798 + }, + { + "epoch": 0.6449308755760369, + "grad_norm": 0.9675001114911925, + "learning_rate": 1.621103988376957e-06, + "loss": 0.8248307704925537, + "step": 2799 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8114025469253138, + "learning_rate": 1.6208053555510467e-06, + "loss": 0.7094661593437195, + "step": 2800 + }, + { + "epoch": 0.6453917050691245, + "grad_norm": 0.997320269594231, + "learning_rate": 1.6205066326160605e-06, + "loss": 0.9130781888961792, + "step": 2801 + }, + { + "epoch": 0.6456221198156682, + "grad_norm": 0.8555561883924394, + "learning_rate": 1.620207819615358e-06, + "loss": 0.7140541076660156, + "step": 2802 + }, + { + "epoch": 0.645852534562212, + "grad_norm": 0.8223075667705522, + "learning_rate": 1.6199089165923116e-06, + "loss": 0.8638602495193481, + "step": 2803 + }, + { + "epoch": 0.6460829493087558, + "grad_norm": 0.8487880176317714, + "learning_rate": 1.6196099235903068e-06, + "loss": 0.9055536389350891, + "step": 2804 + }, + { + "epoch": 0.6463133640552995, + "grad_norm": 0.9356547902583738, + "learning_rate": 1.6193108406527416e-06, + "loss": 0.7694590091705322, + "step": 2805 + }, + { + "epoch": 0.6465437788018433, + "grad_norm": 0.9047595380936525, + "learning_rate": 1.619011667823028e-06, + "loss": 0.7512019872665405, + "step": 2806 + }, + { + "epoch": 0.646774193548387, + "grad_norm": 0.8406537006369587, + "learning_rate": 1.6187124051445903e-06, + "loss": 0.6362565159797668, + "step": 2807 + }, + { + "epoch": 0.6470046082949309, + "grad_norm": 1.328031327807814, + "learning_rate": 1.6184130526608656e-06, + "loss": 0.885259747505188, + "step": 2808 + }, + { + "epoch": 0.6472350230414746, + "grad_norm": 0.9445009081248091, + "learning_rate": 1.6181136104153054e-06, + "loss": 0.7868754863739014, + "step": 2809 + }, + { + "epoch": 0.6474654377880185, + "grad_norm": 0.901923102146858, + "learning_rate": 1.6178140784513729e-06, + "loss": 0.889660120010376, + "step": 2810 + }, + { + "epoch": 0.6476958525345622, + "grad_norm": 0.7380215273328754, + "learning_rate": 1.6175144568125444e-06, + "loss": 0.8460343480110168, + "step": 2811 + }, + { + "epoch": 0.647926267281106, + "grad_norm": 0.9963582050847237, + "learning_rate": 1.6172147455423105e-06, + "loss": 0.8729731440544128, + "step": 2812 + }, + { + "epoch": 0.6481566820276498, + "grad_norm": 0.9500689129739934, + "learning_rate": 1.616914944684173e-06, + "loss": 0.7937173843383789, + "step": 2813 + }, + { + "epoch": 0.6483870967741936, + "grad_norm": 1.068299419221943, + "learning_rate": 1.6166150542816483e-06, + "loss": 0.8764641284942627, + "step": 2814 + }, + { + "epoch": 0.6486175115207373, + "grad_norm": 0.8942547003902331, + "learning_rate": 1.6163150743782645e-06, + "loss": 0.8078420758247375, + "step": 2815 + }, + { + "epoch": 0.6488479262672812, + "grad_norm": 0.9410598977678883, + "learning_rate": 1.6160150050175636e-06, + "loss": 0.9124993085861206, + "step": 2816 + }, + { + "epoch": 0.6490783410138249, + "grad_norm": 0.8852573714623596, + "learning_rate": 1.6157148462431003e-06, + "loss": 0.9584136009216309, + "step": 2817 + }, + { + "epoch": 0.6493087557603686, + "grad_norm": 1.0833527157774228, + "learning_rate": 1.6154145980984422e-06, + "loss": 0.8404672145843506, + "step": 2818 + }, + { + "epoch": 0.6495391705069125, + "grad_norm": 0.9498348014278839, + "learning_rate": 1.6151142606271695e-06, + "loss": 0.7928001880645752, + "step": 2819 + }, + { + "epoch": 0.6497695852534562, + "grad_norm": 0.8444903444994009, + "learning_rate": 1.6148138338728766e-06, + "loss": 0.7877479791641235, + "step": 2820 + }, + { + "epoch": 0.65, + "grad_norm": 0.814898961059689, + "learning_rate": 1.6145133178791695e-06, + "loss": 0.9502429366111755, + "step": 2821 + }, + { + "epoch": 0.6502304147465438, + "grad_norm": 0.791549779828082, + "learning_rate": 1.6142127126896679e-06, + "loss": 0.7866412401199341, + "step": 2822 + }, + { + "epoch": 0.6504608294930876, + "grad_norm": 0.7841896313928699, + "learning_rate": 1.613912018348004e-06, + "loss": 0.8315345644950867, + "step": 2823 + }, + { + "epoch": 0.6506912442396313, + "grad_norm": 0.6841019539216254, + "learning_rate": 1.6136112348978236e-06, + "loss": 0.9718044400215149, + "step": 2824 + }, + { + "epoch": 0.6509216589861752, + "grad_norm": 0.6502753552916141, + "learning_rate": 1.6133103623827843e-06, + "loss": 0.5874941349029541, + "step": 2825 + }, + { + "epoch": 0.6511520737327189, + "grad_norm": 0.8954999916723304, + "learning_rate": 1.613009400846558e-06, + "loss": 0.9498391151428223, + "step": 2826 + }, + { + "epoch": 0.6513824884792627, + "grad_norm": 0.9527387242959447, + "learning_rate": 1.612708350332829e-06, + "loss": 0.858715295791626, + "step": 2827 + }, + { + "epoch": 0.6516129032258065, + "grad_norm": 0.7771583744459308, + "learning_rate": 1.6124072108852938e-06, + "loss": 0.8618113994598389, + "step": 2828 + }, + { + "epoch": 0.6518433179723502, + "grad_norm": 0.7504136233680345, + "learning_rate": 1.6121059825476628e-06, + "loss": 0.8024446964263916, + "step": 2829 + }, + { + "epoch": 0.652073732718894, + "grad_norm": 0.8461077162414828, + "learning_rate": 1.6118046653636586e-06, + "loss": 0.8021122813224792, + "step": 2830 + }, + { + "epoch": 0.6523041474654377, + "grad_norm": 0.8330044091738112, + "learning_rate": 1.6115032593770176e-06, + "loss": 0.8092107772827148, + "step": 2831 + }, + { + "epoch": 0.6525345622119816, + "grad_norm": 0.8480183578387018, + "learning_rate": 1.6112017646314872e-06, + "loss": 0.9842641353607178, + "step": 2832 + }, + { + "epoch": 0.6527649769585253, + "grad_norm": 0.8051494817524167, + "learning_rate": 1.6109001811708305e-06, + "loss": 0.744353175163269, + "step": 2833 + }, + { + "epoch": 0.6529953917050692, + "grad_norm": 1.0610555371871784, + "learning_rate": 1.6105985090388209e-06, + "loss": 0.7089616060256958, + "step": 2834 + }, + { + "epoch": 0.6532258064516129, + "grad_norm": 0.9119028582239228, + "learning_rate": 1.610296748279246e-06, + "loss": 0.9043736457824707, + "step": 2835 + }, + { + "epoch": 0.6534562211981567, + "grad_norm": 1.0078987757698072, + "learning_rate": 1.6099948989359061e-06, + "loss": 0.9170948266983032, + "step": 2836 + }, + { + "epoch": 0.6536866359447004, + "grad_norm": 0.9289963097672949, + "learning_rate": 1.6096929610526145e-06, + "loss": 0.8275802135467529, + "step": 2837 + }, + { + "epoch": 0.6539170506912443, + "grad_norm": 0.9146670757237039, + "learning_rate": 1.6093909346731965e-06, + "loss": 0.9180251955986023, + "step": 2838 + }, + { + "epoch": 0.654147465437788, + "grad_norm": 0.708269208459363, + "learning_rate": 1.6090888198414908e-06, + "loss": 0.8041235208511353, + "step": 2839 + }, + { + "epoch": 0.6543778801843319, + "grad_norm": 0.9431191202102605, + "learning_rate": 1.6087866166013492e-06, + "loss": 0.7833176851272583, + "step": 2840 + }, + { + "epoch": 0.6546082949308756, + "grad_norm": 0.8680924352570318, + "learning_rate": 1.6084843249966364e-06, + "loss": 0.838886022567749, + "step": 2841 + }, + { + "epoch": 0.6548387096774193, + "grad_norm": 0.8317233103954151, + "learning_rate": 1.6081819450712293e-06, + "loss": 0.837687611579895, + "step": 2842 + }, + { + "epoch": 0.6550691244239631, + "grad_norm": 0.8737630969117387, + "learning_rate": 1.607879476869018e-06, + "loss": 0.6572843790054321, + "step": 2843 + }, + { + "epoch": 0.6552995391705069, + "grad_norm": 0.8513917948170456, + "learning_rate": 1.6075769204339053e-06, + "loss": 0.7698653936386108, + "step": 2844 + }, + { + "epoch": 0.6555299539170507, + "grad_norm": 0.9469558820500475, + "learning_rate": 1.607274275809807e-06, + "loss": 0.8639169335365295, + "step": 2845 + }, + { + "epoch": 0.6557603686635944, + "grad_norm": 0.8250799867539951, + "learning_rate": 1.6069715430406517e-06, + "loss": 0.837492823600769, + "step": 2846 + }, + { + "epoch": 0.6559907834101383, + "grad_norm": 0.9277000604833184, + "learning_rate": 1.6066687221703803e-06, + "loss": 0.8824087381362915, + "step": 2847 + }, + { + "epoch": 0.656221198156682, + "grad_norm": 0.9304701724719217, + "learning_rate": 1.6063658132429468e-06, + "loss": 0.8161731958389282, + "step": 2848 + }, + { + "epoch": 0.6564516129032258, + "grad_norm": 0.7988044282931124, + "learning_rate": 1.6060628163023183e-06, + "loss": 0.8365877270698547, + "step": 2849 + }, + { + "epoch": 0.6566820276497696, + "grad_norm": 0.8477393490951164, + "learning_rate": 1.6057597313924745e-06, + "loss": 0.877829909324646, + "step": 2850 + }, + { + "epoch": 0.6569124423963134, + "grad_norm": 0.857078285622655, + "learning_rate": 1.6054565585574075e-06, + "loss": 0.756903886795044, + "step": 2851 + }, + { + "epoch": 0.6571428571428571, + "grad_norm": 1.0124401818225557, + "learning_rate": 1.6051532978411223e-06, + "loss": 0.7777276039123535, + "step": 2852 + }, + { + "epoch": 0.6573732718894009, + "grad_norm": 0.9464152715401636, + "learning_rate": 1.6048499492876375e-06, + "loss": 0.9191532135009766, + "step": 2853 + }, + { + "epoch": 0.6576036866359447, + "grad_norm": 0.7885787618366824, + "learning_rate": 1.6045465129409829e-06, + "loss": 0.7693309783935547, + "step": 2854 + }, + { + "epoch": 0.6578341013824884, + "grad_norm": 0.8787314035574895, + "learning_rate": 1.6042429888452024e-06, + "loss": 0.7865023612976074, + "step": 2855 + }, + { + "epoch": 0.6580645161290323, + "grad_norm": 0.8588996745183644, + "learning_rate": 1.6039393770443521e-06, + "loss": 0.844336748123169, + "step": 2856 + }, + { + "epoch": 0.658294930875576, + "grad_norm": 0.9455502994869639, + "learning_rate": 1.6036356775825009e-06, + "loss": 0.9590705633163452, + "step": 2857 + }, + { + "epoch": 0.6585253456221198, + "grad_norm": 0.904582718768817, + "learning_rate": 1.6033318905037297e-06, + "loss": 0.8687748312950134, + "step": 2858 + }, + { + "epoch": 0.6587557603686636, + "grad_norm": 0.8848681311153475, + "learning_rate": 1.6030280158521336e-06, + "loss": 0.8669745922088623, + "step": 2859 + }, + { + "epoch": 0.6589861751152074, + "grad_norm": 0.8829211466390271, + "learning_rate": 1.6027240536718191e-06, + "loss": 0.6929436922073364, + "step": 2860 + }, + { + "epoch": 0.6592165898617511, + "grad_norm": 0.9047325967091919, + "learning_rate": 1.6024200040069065e-06, + "loss": 0.6965433359146118, + "step": 2861 + }, + { + "epoch": 0.659447004608295, + "grad_norm": 0.9743729570848424, + "learning_rate": 1.6021158669015273e-06, + "loss": 0.780353307723999, + "step": 2862 + }, + { + "epoch": 0.6596774193548387, + "grad_norm": 0.7726382879850381, + "learning_rate": 1.6018116423998277e-06, + "loss": 0.685762882232666, + "step": 2863 + }, + { + "epoch": 0.6599078341013825, + "grad_norm": 0.8607619933867399, + "learning_rate": 1.6015073305459646e-06, + "loss": 0.8249918222427368, + "step": 2864 + }, + { + "epoch": 0.6601382488479263, + "grad_norm": 0.7388237148259402, + "learning_rate": 1.6012029313841086e-06, + "loss": 0.7327184677124023, + "step": 2865 + }, + { + "epoch": 0.66036866359447, + "grad_norm": 0.9554378042614118, + "learning_rate": 1.6008984449584433e-06, + "loss": 0.7785891890525818, + "step": 2866 + }, + { + "epoch": 0.6605990783410138, + "grad_norm": 0.7196967379779726, + "learning_rate": 1.600593871313164e-06, + "loss": 0.7307751178741455, + "step": 2867 + }, + { + "epoch": 0.6608294930875576, + "grad_norm": 1.2601680054093507, + "learning_rate": 1.6002892104924796e-06, + "loss": 0.8802257180213928, + "step": 2868 + }, + { + "epoch": 0.6610599078341014, + "grad_norm": 1.0302753711943056, + "learning_rate": 1.5999844625406106e-06, + "loss": 0.8699140548706055, + "step": 2869 + }, + { + "epoch": 0.6612903225806451, + "grad_norm": 0.8146336951608913, + "learning_rate": 1.5996796275017914e-06, + "loss": 0.6453604102134705, + "step": 2870 + }, + { + "epoch": 0.661520737327189, + "grad_norm": 0.807532897551279, + "learning_rate": 1.5993747054202682e-06, + "loss": 0.7319324016571045, + "step": 2871 + }, + { + "epoch": 0.6617511520737327, + "grad_norm": 0.9337023535064233, + "learning_rate": 1.5990696963402998e-06, + "loss": 0.8357574343681335, + "step": 2872 + }, + { + "epoch": 0.6619815668202765, + "grad_norm": 0.854915024221744, + "learning_rate": 1.5987646003061581e-06, + "loss": 0.7647984027862549, + "step": 2873 + }, + { + "epoch": 0.6622119815668203, + "grad_norm": 1.0099884737934117, + "learning_rate": 1.5984594173621274e-06, + "loss": 0.8542075753211975, + "step": 2874 + }, + { + "epoch": 0.6624423963133641, + "grad_norm": 0.9685596460194386, + "learning_rate": 1.5981541475525044e-06, + "loss": 0.7689328193664551, + "step": 2875 + }, + { + "epoch": 0.6626728110599078, + "grad_norm": 0.8183777315007433, + "learning_rate": 1.5978487909215987e-06, + "loss": 0.7459174990653992, + "step": 2876 + }, + { + "epoch": 0.6629032258064517, + "grad_norm": 0.8697380019030229, + "learning_rate": 1.5975433475137329e-06, + "loss": 0.8268495202064514, + "step": 2877 + }, + { + "epoch": 0.6631336405529954, + "grad_norm": 0.9013422410425754, + "learning_rate": 1.5972378173732406e-06, + "loss": 0.8254266977310181, + "step": 2878 + }, + { + "epoch": 0.6633640552995391, + "grad_norm": 1.0427681980244552, + "learning_rate": 1.59693220054447e-06, + "loss": 0.8552727103233337, + "step": 2879 + }, + { + "epoch": 0.663594470046083, + "grad_norm": 0.7469699255899254, + "learning_rate": 1.596626497071781e-06, + "loss": 0.7196269035339355, + "step": 2880 + }, + { + "epoch": 0.6638248847926267, + "grad_norm": 0.9146202447996906, + "learning_rate": 1.5963207069995455e-06, + "loss": 0.815540075302124, + "step": 2881 + }, + { + "epoch": 0.6640552995391705, + "grad_norm": 0.8585411055523222, + "learning_rate": 1.596014830372149e-06, + "loss": 0.8040128350257874, + "step": 2882 + }, + { + "epoch": 0.6642857142857143, + "grad_norm": 0.8592608746136836, + "learning_rate": 1.5957088672339887e-06, + "loss": 0.7990812659263611, + "step": 2883 + }, + { + "epoch": 0.6645161290322581, + "grad_norm": 0.9139395957334936, + "learning_rate": 1.5954028176294746e-06, + "loss": 0.956179141998291, + "step": 2884 + }, + { + "epoch": 0.6647465437788018, + "grad_norm": 0.9544806325504157, + "learning_rate": 1.5950966816030304e-06, + "loss": 0.7730144262313843, + "step": 2885 + }, + { + "epoch": 0.6649769585253457, + "grad_norm": 1.0230957824823068, + "learning_rate": 1.5947904591990904e-06, + "loss": 0.902834415435791, + "step": 2886 + }, + { + "epoch": 0.6652073732718894, + "grad_norm": 0.8987169052425068, + "learning_rate": 1.5944841504621027e-06, + "loss": 0.7234599590301514, + "step": 2887 + }, + { + "epoch": 0.6654377880184332, + "grad_norm": 0.9849005395145788, + "learning_rate": 1.5941777554365271e-06, + "loss": 1.0267843008041382, + "step": 2888 + }, + { + "epoch": 0.665668202764977, + "grad_norm": 1.1615941669691254, + "learning_rate": 1.5938712741668376e-06, + "loss": 0.7431002855300903, + "step": 2889 + }, + { + "epoch": 0.6658986175115207, + "grad_norm": 0.8013605201375282, + "learning_rate": 1.5935647066975185e-06, + "loss": 0.7843111753463745, + "step": 2890 + }, + { + "epoch": 0.6661290322580645, + "grad_norm": 0.9498522711625995, + "learning_rate": 1.593258053073068e-06, + "loss": 0.8775256872177124, + "step": 2891 + }, + { + "epoch": 0.6663594470046083, + "grad_norm": 0.8363878343517416, + "learning_rate": 1.5929513133379966e-06, + "loss": 0.7861695289611816, + "step": 2892 + }, + { + "epoch": 0.6665898617511521, + "grad_norm": 1.1446598361432248, + "learning_rate": 1.5926444875368267e-06, + "loss": 0.8721977472305298, + "step": 2893 + }, + { + "epoch": 0.6668202764976958, + "grad_norm": 0.7591669830135314, + "learning_rate": 1.5923375757140941e-06, + "loss": 0.648263692855835, + "step": 2894 + }, + { + "epoch": 0.6670506912442397, + "grad_norm": 0.8984763952333247, + "learning_rate": 1.592030577914347e-06, + "loss": 0.8334729075431824, + "step": 2895 + }, + { + "epoch": 0.6672811059907834, + "grad_norm": 0.7757586607492352, + "learning_rate": 1.591723494182145e-06, + "loss": 0.6105949878692627, + "step": 2896 + }, + { + "epoch": 0.6675115207373272, + "grad_norm": 0.8562379620561761, + "learning_rate": 1.5914163245620608e-06, + "loss": 0.7895448207855225, + "step": 2897 + }, + { + "epoch": 0.667741935483871, + "grad_norm": 0.9487051467126763, + "learning_rate": 1.5911090690986805e-06, + "loss": 0.8728576302528381, + "step": 2898 + }, + { + "epoch": 0.6679723502304148, + "grad_norm": 0.7480056751597441, + "learning_rate": 1.590801727836601e-06, + "loss": 0.7637856006622314, + "step": 2899 + }, + { + "epoch": 0.6682027649769585, + "grad_norm": 1.0125939986027075, + "learning_rate": 1.590494300820433e-06, + "loss": 0.8988397717475891, + "step": 2900 + }, + { + "epoch": 0.6684331797235024, + "grad_norm": 0.9324485554010499, + "learning_rate": 1.590186788094799e-06, + "loss": 0.7486827373504639, + "step": 2901 + }, + { + "epoch": 0.6686635944700461, + "grad_norm": 0.7629631437151, + "learning_rate": 1.589879189704334e-06, + "loss": 0.8212865591049194, + "step": 2902 + }, + { + "epoch": 0.6688940092165898, + "grad_norm": 0.7640149838894683, + "learning_rate": 1.5895715056936853e-06, + "loss": 0.7421284914016724, + "step": 2903 + }, + { + "epoch": 0.6691244239631337, + "grad_norm": 0.8407199034997399, + "learning_rate": 1.5892637361075132e-06, + "loss": 0.8721676468849182, + "step": 2904 + }, + { + "epoch": 0.6693548387096774, + "grad_norm": 0.9214400782360851, + "learning_rate": 1.58895588099049e-06, + "loss": 0.7265836000442505, + "step": 2905 + }, + { + "epoch": 0.6695852534562212, + "grad_norm": 0.959235173078028, + "learning_rate": 1.5886479403873e-06, + "loss": 0.863615870475769, + "step": 2906 + }, + { + "epoch": 0.669815668202765, + "grad_norm": 0.788219849900096, + "learning_rate": 1.588339914342641e-06, + "loss": 0.8362177610397339, + "step": 2907 + }, + { + "epoch": 0.6700460829493088, + "grad_norm": 1.0142262876785297, + "learning_rate": 1.5880318029012223e-06, + "loss": 0.9076892137527466, + "step": 2908 + }, + { + "epoch": 0.6702764976958525, + "grad_norm": 0.957653217332238, + "learning_rate": 1.5877236061077658e-06, + "loss": 0.9149065017700195, + "step": 2909 + }, + { + "epoch": 0.6705069124423964, + "grad_norm": 0.8820705070600866, + "learning_rate": 1.5874153240070062e-06, + "loss": 0.7761013507843018, + "step": 2910 + }, + { + "epoch": 0.6707373271889401, + "grad_norm": 1.049261864076062, + "learning_rate": 1.5871069566436894e-06, + "loss": 0.8671830892562866, + "step": 2911 + }, + { + "epoch": 0.6709677419354839, + "grad_norm": 0.9461120142941367, + "learning_rate": 1.5867985040625755e-06, + "loss": 0.9433870315551758, + "step": 2912 + }, + { + "epoch": 0.6711981566820276, + "grad_norm": 0.934114103387592, + "learning_rate": 1.5864899663084352e-06, + "loss": 0.8009352684020996, + "step": 2913 + }, + { + "epoch": 0.6714285714285714, + "grad_norm": 0.9285902098427739, + "learning_rate": 1.5861813434260528e-06, + "loss": 0.6813808083534241, + "step": 2914 + }, + { + "epoch": 0.6716589861751152, + "grad_norm": 0.7891360814530397, + "learning_rate": 1.5858726354602248e-06, + "loss": 0.712783932685852, + "step": 2915 + }, + { + "epoch": 0.6718894009216589, + "grad_norm": 0.9971879600214522, + "learning_rate": 1.5855638424557588e-06, + "loss": 0.7871056795120239, + "step": 2916 + }, + { + "epoch": 0.6721198156682028, + "grad_norm": 0.9551471269364743, + "learning_rate": 1.5852549644574766e-06, + "loss": 0.8590981960296631, + "step": 2917 + }, + { + "epoch": 0.6723502304147465, + "grad_norm": 0.9338373296128487, + "learning_rate": 1.584946001510211e-06, + "loss": 0.7952913641929626, + "step": 2918 + }, + { + "epoch": 0.6725806451612903, + "grad_norm": 1.0716689971646949, + "learning_rate": 1.5846369536588078e-06, + "loss": 0.8567384481430054, + "step": 2919 + }, + { + "epoch": 0.6728110599078341, + "grad_norm": 1.0797852963412387, + "learning_rate": 1.5843278209481246e-06, + "loss": 0.859541654586792, + "step": 2920 + }, + { + "epoch": 0.6730414746543779, + "grad_norm": 1.1734504357127358, + "learning_rate": 1.5840186034230318e-06, + "loss": 0.7843801975250244, + "step": 2921 + }, + { + "epoch": 0.6732718894009216, + "grad_norm": 0.7736885985619673, + "learning_rate": 1.5837093011284118e-06, + "loss": 0.7448940277099609, + "step": 2922 + }, + { + "epoch": 0.6735023041474655, + "grad_norm": 1.0803788544256392, + "learning_rate": 1.5833999141091593e-06, + "loss": 0.9325242042541504, + "step": 2923 + }, + { + "epoch": 0.6737327188940092, + "grad_norm": 1.2302390941080075, + "learning_rate": 1.5830904424101816e-06, + "loss": 0.8005647659301758, + "step": 2924 + }, + { + "epoch": 0.673963133640553, + "grad_norm": 0.9271295903754758, + "learning_rate": 1.5827808860763984e-06, + "loss": 0.8897464275360107, + "step": 2925 + }, + { + "epoch": 0.6741935483870968, + "grad_norm": 1.0218758099034497, + "learning_rate": 1.5824712451527409e-06, + "loss": 0.8319039344787598, + "step": 2926 + }, + { + "epoch": 0.6744239631336405, + "grad_norm": 1.0734614103347653, + "learning_rate": 1.5821615196841533e-06, + "loss": 0.7638111114501953, + "step": 2927 + }, + { + "epoch": 0.6746543778801843, + "grad_norm": 0.8552316991076688, + "learning_rate": 1.581851709715592e-06, + "loss": 0.7617092132568359, + "step": 2928 + }, + { + "epoch": 0.6748847926267281, + "grad_norm": 1.0119419737078916, + "learning_rate": 1.581541815292025e-06, + "loss": 0.813319742679596, + "step": 2929 + }, + { + "epoch": 0.6751152073732719, + "grad_norm": 0.8324815306646182, + "learning_rate": 1.5812318364584334e-06, + "loss": 0.7495343089103699, + "step": 2930 + }, + { + "epoch": 0.6753456221198156, + "grad_norm": 1.0070331562925772, + "learning_rate": 1.5809217732598103e-06, + "loss": 0.9064745306968689, + "step": 2931 + }, + { + "epoch": 0.6755760368663595, + "grad_norm": 0.77529378116571, + "learning_rate": 1.580611625741161e-06, + "loss": 0.699098527431488, + "step": 2932 + }, + { + "epoch": 0.6758064516129032, + "grad_norm": 0.9525126023464006, + "learning_rate": 1.5803013939475025e-06, + "loss": 0.9168096780776978, + "step": 2933 + }, + { + "epoch": 0.676036866359447, + "grad_norm": 0.8145178437764095, + "learning_rate": 1.5799910779238652e-06, + "loss": 0.8848644495010376, + "step": 2934 + }, + { + "epoch": 0.6762672811059908, + "grad_norm": 0.8852934324704809, + "learning_rate": 1.5796806777152903e-06, + "loss": 0.7795228958129883, + "step": 2935 + }, + { + "epoch": 0.6764976958525346, + "grad_norm": 0.9901973226971541, + "learning_rate": 1.5793701933668327e-06, + "loss": 0.9287698268890381, + "step": 2936 + }, + { + "epoch": 0.6767281105990783, + "grad_norm": 0.9605403793187631, + "learning_rate": 1.5790596249235587e-06, + "loss": 0.8661396503448486, + "step": 2937 + }, + { + "epoch": 0.6769585253456222, + "grad_norm": 1.0073544692346657, + "learning_rate": 1.5787489724305464e-06, + "loss": 0.7544706463813782, + "step": 2938 + }, + { + "epoch": 0.6771889400921659, + "grad_norm": 1.350397583464208, + "learning_rate": 1.5784382359328872e-06, + "loss": 0.8613651990890503, + "step": 2939 + }, + { + "epoch": 0.6774193548387096, + "grad_norm": 1.0225856960398716, + "learning_rate": 1.5781274154756833e-06, + "loss": 0.8695065975189209, + "step": 2940 + }, + { + "epoch": 0.6776497695852535, + "grad_norm": 1.1450515007973723, + "learning_rate": 1.577816511104051e-06, + "loss": 0.9453287720680237, + "step": 2941 + }, + { + "epoch": 0.6778801843317972, + "grad_norm": 0.7720442193305806, + "learning_rate": 1.577505522863117e-06, + "loss": 0.8599261045455933, + "step": 2942 + }, + { + "epoch": 0.678110599078341, + "grad_norm": 0.8831442525084486, + "learning_rate": 1.5771944507980205e-06, + "loss": 0.8143391609191895, + "step": 2943 + }, + { + "epoch": 0.6783410138248848, + "grad_norm": 0.9328639928073722, + "learning_rate": 1.576883294953914e-06, + "loss": 0.9558438062667847, + "step": 2944 + }, + { + "epoch": 0.6785714285714286, + "grad_norm": 0.6484366074680237, + "learning_rate": 1.5765720553759605e-06, + "loss": 0.7348268628120422, + "step": 2945 + }, + { + "epoch": 0.6788018433179723, + "grad_norm": 1.0387482604326927, + "learning_rate": 1.5762607321093366e-06, + "loss": 0.9361155033111572, + "step": 2946 + }, + { + "epoch": 0.6790322580645162, + "grad_norm": 0.9855095789147831, + "learning_rate": 1.5759493251992303e-06, + "loss": 0.8094985485076904, + "step": 2947 + }, + { + "epoch": 0.6792626728110599, + "grad_norm": 1.631714554631539, + "learning_rate": 1.575637834690842e-06, + "loss": 0.8746658563613892, + "step": 2948 + }, + { + "epoch": 0.6794930875576037, + "grad_norm": 0.9249217331606766, + "learning_rate": 1.575326260629384e-06, + "loss": 0.7433050870895386, + "step": 2949 + }, + { + "epoch": 0.6797235023041475, + "grad_norm": 0.9856239464338491, + "learning_rate": 1.5750146030600808e-06, + "loss": 0.8621053695678711, + "step": 2950 + }, + { + "epoch": 0.6799539170506912, + "grad_norm": 0.9119478915395727, + "learning_rate": 1.5747028620281695e-06, + "loss": 0.7541971206665039, + "step": 2951 + }, + { + "epoch": 0.680184331797235, + "grad_norm": 1.0099311239329205, + "learning_rate": 1.5743910375788982e-06, + "loss": 0.9817987680435181, + "step": 2952 + }, + { + "epoch": 0.6804147465437788, + "grad_norm": 1.046074262522893, + "learning_rate": 1.5740791297575283e-06, + "loss": 0.7763534188270569, + "step": 2953 + }, + { + "epoch": 0.6806451612903226, + "grad_norm": 1.0303747349913415, + "learning_rate": 1.573767138609333e-06, + "loss": 0.7482337355613708, + "step": 2954 + }, + { + "epoch": 0.6808755760368663, + "grad_norm": 1.0308347032013807, + "learning_rate": 1.5734550641795967e-06, + "loss": 0.7352473735809326, + "step": 2955 + }, + { + "epoch": 0.6811059907834102, + "grad_norm": 0.9086715245515472, + "learning_rate": 1.573142906513617e-06, + "loss": 0.8657293319702148, + "step": 2956 + }, + { + "epoch": 0.6813364055299539, + "grad_norm": 0.9597438975913184, + "learning_rate": 1.5728306656567033e-06, + "loss": 0.8035376667976379, + "step": 2957 + }, + { + "epoch": 0.6815668202764977, + "grad_norm": 0.9481340627224691, + "learning_rate": 1.572518341654177e-06, + "loss": 0.8030140399932861, + "step": 2958 + }, + { + "epoch": 0.6817972350230415, + "grad_norm": 0.956950799259568, + "learning_rate": 1.5722059345513711e-06, + "loss": 0.797377347946167, + "step": 2959 + }, + { + "epoch": 0.6820276497695853, + "grad_norm": 0.7086079395333297, + "learning_rate": 1.5718934443936311e-06, + "loss": 0.7041053175926208, + "step": 2960 + }, + { + "epoch": 0.682258064516129, + "grad_norm": 1.0251660128790803, + "learning_rate": 1.571580871226315e-06, + "loss": 0.7911885976791382, + "step": 2961 + }, + { + "epoch": 0.6824884792626729, + "grad_norm": 0.8834527581303466, + "learning_rate": 1.5712682150947922e-06, + "loss": 0.7908599376678467, + "step": 2962 + }, + { + "epoch": 0.6827188940092166, + "grad_norm": 0.8159267525070817, + "learning_rate": 1.5709554760444442e-06, + "loss": 0.860281229019165, + "step": 2963 + }, + { + "epoch": 0.6829493087557603, + "grad_norm": 0.8226887233242035, + "learning_rate": 1.5706426541206645e-06, + "loss": 0.6987707018852234, + "step": 2964 + }, + { + "epoch": 0.6831797235023042, + "grad_norm": 0.8719992040747229, + "learning_rate": 1.5703297493688592e-06, + "loss": 0.7198495864868164, + "step": 2965 + }, + { + "epoch": 0.6834101382488479, + "grad_norm": 1.1775957395401402, + "learning_rate": 1.5700167618344455e-06, + "loss": 0.8232598304748535, + "step": 2966 + }, + { + "epoch": 0.6836405529953917, + "grad_norm": 0.8962037845514019, + "learning_rate": 1.569703691562854e-06, + "loss": 0.8425456285476685, + "step": 2967 + }, + { + "epoch": 0.6838709677419355, + "grad_norm": 0.8746880672166448, + "learning_rate": 1.5693905385995252e-06, + "loss": 0.7758797407150269, + "step": 2968 + }, + { + "epoch": 0.6841013824884793, + "grad_norm": 0.9739325658587258, + "learning_rate": 1.569077302989914e-06, + "loss": 0.7478910684585571, + "step": 2969 + }, + { + "epoch": 0.684331797235023, + "grad_norm": 0.88099670074057, + "learning_rate": 1.5687639847794854e-06, + "loss": 0.8274309635162354, + "step": 2970 + }, + { + "epoch": 0.6845622119815669, + "grad_norm": 0.9125307567181903, + "learning_rate": 1.5684505840137173e-06, + "loss": 0.6800183653831482, + "step": 2971 + }, + { + "epoch": 0.6847926267281106, + "grad_norm": 1.1416810893109246, + "learning_rate": 1.5681371007380996e-06, + "loss": 0.7768006324768066, + "step": 2972 + }, + { + "epoch": 0.6850230414746544, + "grad_norm": 0.8308804334079786, + "learning_rate": 1.5678235349981338e-06, + "loss": 0.7462732195854187, + "step": 2973 + }, + { + "epoch": 0.6852534562211982, + "grad_norm": 0.935725297382271, + "learning_rate": 1.5675098868393335e-06, + "loss": 0.8461781144142151, + "step": 2974 + }, + { + "epoch": 0.6854838709677419, + "grad_norm": 0.9717984846524689, + "learning_rate": 1.5671961563072244e-06, + "loss": 0.7968491911888123, + "step": 2975 + }, + { + "epoch": 0.6857142857142857, + "grad_norm": 0.9710985084042064, + "learning_rate": 1.5668823434473443e-06, + "loss": 0.805394172668457, + "step": 2976 + }, + { + "epoch": 0.6859447004608294, + "grad_norm": 0.9297793560483373, + "learning_rate": 1.5665684483052424e-06, + "loss": 0.7241736650466919, + "step": 2977 + }, + { + "epoch": 0.6861751152073733, + "grad_norm": 0.9673260038513803, + "learning_rate": 1.5662544709264801e-06, + "loss": 0.7345866560935974, + "step": 2978 + }, + { + "epoch": 0.686405529953917, + "grad_norm": 0.8604134561659843, + "learning_rate": 1.5659404113566312e-06, + "loss": 0.7605085372924805, + "step": 2979 + }, + { + "epoch": 0.6866359447004609, + "grad_norm": 0.9618303204830516, + "learning_rate": 1.5656262696412808e-06, + "loss": 0.8555188179016113, + "step": 2980 + }, + { + "epoch": 0.6868663594470046, + "grad_norm": 0.8604009092225049, + "learning_rate": 1.5653120458260261e-06, + "loss": 0.7139542698860168, + "step": 2981 + }, + { + "epoch": 0.6870967741935484, + "grad_norm": 0.9290410772154322, + "learning_rate": 1.564997739956476e-06, + "loss": 0.8676587343215942, + "step": 2982 + }, + { + "epoch": 0.6873271889400921, + "grad_norm": 0.9524807718966832, + "learning_rate": 1.5646833520782523e-06, + "loss": 0.8121025562286377, + "step": 2983 + }, + { + "epoch": 0.687557603686636, + "grad_norm": 0.7889521702672326, + "learning_rate": 1.5643688822369873e-06, + "loss": 0.7757136821746826, + "step": 2984 + }, + { + "epoch": 0.6877880184331797, + "grad_norm": 0.8884194014759353, + "learning_rate": 1.5640543304783264e-06, + "loss": 0.8357381820678711, + "step": 2985 + }, + { + "epoch": 0.6880184331797236, + "grad_norm": 0.9725078170053829, + "learning_rate": 1.563739696847926e-06, + "loss": 0.8635811805725098, + "step": 2986 + }, + { + "epoch": 0.6882488479262673, + "grad_norm": 0.9539959391598165, + "learning_rate": 1.563424981391455e-06, + "loss": 0.90900057554245, + "step": 2987 + }, + { + "epoch": 0.688479262672811, + "grad_norm": 1.056070683011334, + "learning_rate": 1.563110184154594e-06, + "loss": 0.9001314043998718, + "step": 2988 + }, + { + "epoch": 0.6887096774193548, + "grad_norm": 0.7893194308475292, + "learning_rate": 1.5627953051830353e-06, + "loss": 0.7482000589370728, + "step": 2989 + }, + { + "epoch": 0.6889400921658986, + "grad_norm": 1.0183435769639337, + "learning_rate": 1.5624803445224829e-06, + "loss": 0.8504235744476318, + "step": 2990 + }, + { + "epoch": 0.6891705069124424, + "grad_norm": 0.9687684393899343, + "learning_rate": 1.5621653022186526e-06, + "loss": 0.7887089252471924, + "step": 2991 + }, + { + "epoch": 0.6894009216589861, + "grad_norm": 0.9412995775666883, + "learning_rate": 1.5618501783172735e-06, + "loss": 0.8745719790458679, + "step": 2992 + }, + { + "epoch": 0.68963133640553, + "grad_norm": 0.8960957701589951, + "learning_rate": 1.5615349728640848e-06, + "loss": 0.8269633054733276, + "step": 2993 + }, + { + "epoch": 0.6898617511520737, + "grad_norm": 0.802430248071724, + "learning_rate": 1.5612196859048382e-06, + "loss": 0.7355072498321533, + "step": 2994 + }, + { + "epoch": 0.6900921658986175, + "grad_norm": 0.9768940563158048, + "learning_rate": 1.5609043174852966e-06, + "loss": 0.857653021812439, + "step": 2995 + }, + { + "epoch": 0.6903225806451613, + "grad_norm": 1.0766498115550724, + "learning_rate": 1.5605888676512365e-06, + "loss": 0.8575785160064697, + "step": 2996 + }, + { + "epoch": 0.6905529953917051, + "grad_norm": 0.8803208034747956, + "learning_rate": 1.560273336448444e-06, + "loss": 0.8631561994552612, + "step": 2997 + }, + { + "epoch": 0.6907834101382488, + "grad_norm": 1.0014936433552548, + "learning_rate": 1.5599577239227185e-06, + "loss": 0.7993800044059753, + "step": 2998 + }, + { + "epoch": 0.6910138248847926, + "grad_norm": 0.8990076202156756, + "learning_rate": 1.5596420301198707e-06, + "loss": 0.7961007356643677, + "step": 2999 + }, + { + "epoch": 0.6912442396313364, + "grad_norm": 1.0216355950582598, + "learning_rate": 1.5593262550857232e-06, + "loss": 0.7536421418190002, + "step": 3000 + }, + { + "epoch": 0.6914746543778801, + "grad_norm": 0.8348839196110558, + "learning_rate": 1.55901039886611e-06, + "loss": 0.70341956615448, + "step": 3001 + }, + { + "epoch": 0.691705069124424, + "grad_norm": 1.0093771985733984, + "learning_rate": 1.5586944615068776e-06, + "loss": 0.8152127265930176, + "step": 3002 + }, + { + "epoch": 0.6919354838709677, + "grad_norm": 0.9332692294841357, + "learning_rate": 1.5583784430538838e-06, + "loss": 0.6728770732879639, + "step": 3003 + }, + { + "epoch": 0.6921658986175115, + "grad_norm": 1.0871891474224546, + "learning_rate": 1.558062343552998e-06, + "loss": 0.8406884670257568, + "step": 3004 + }, + { + "epoch": 0.6923963133640553, + "grad_norm": 0.8920706269230131, + "learning_rate": 1.5577461630501018e-06, + "loss": 0.766754686832428, + "step": 3005 + }, + { + "epoch": 0.6926267281105991, + "grad_norm": 0.714004026253109, + "learning_rate": 1.5574299015910889e-06, + "loss": 0.7456642389297485, + "step": 3006 + }, + { + "epoch": 0.6928571428571428, + "grad_norm": 0.8290815943958627, + "learning_rate": 1.557113559221863e-06, + "loss": 0.7834097743034363, + "step": 3007 + }, + { + "epoch": 0.6930875576036867, + "grad_norm": 0.91346801287595, + "learning_rate": 1.556797135988342e-06, + "loss": 0.7425946593284607, + "step": 3008 + }, + { + "epoch": 0.6933179723502304, + "grad_norm": 1.0483330104966306, + "learning_rate": 1.5564806319364534e-06, + "loss": 0.7914093732833862, + "step": 3009 + }, + { + "epoch": 0.6935483870967742, + "grad_norm": 0.9665010461345012, + "learning_rate": 1.556164047112138e-06, + "loss": 0.819783091545105, + "step": 3010 + }, + { + "epoch": 0.693778801843318, + "grad_norm": 0.985903986481312, + "learning_rate": 1.5558473815613474e-06, + "loss": 0.7147302627563477, + "step": 3011 + }, + { + "epoch": 0.6940092165898617, + "grad_norm": 1.1240220664371217, + "learning_rate": 1.5555306353300452e-06, + "loss": 0.7247470617294312, + "step": 3012 + }, + { + "epoch": 0.6942396313364055, + "grad_norm": 1.2403633886338306, + "learning_rate": 1.5552138084642067e-06, + "loss": 0.8277294635772705, + "step": 3013 + }, + { + "epoch": 0.6944700460829493, + "grad_norm": 0.9054626931882043, + "learning_rate": 1.554896901009819e-06, + "loss": 0.8014394640922546, + "step": 3014 + }, + { + "epoch": 0.6947004608294931, + "grad_norm": 0.9274937399954835, + "learning_rate": 1.5545799130128808e-06, + "loss": 0.7468869686126709, + "step": 3015 + }, + { + "epoch": 0.6949308755760368, + "grad_norm": 0.8904964499744723, + "learning_rate": 1.554262844519402e-06, + "loss": 0.7854933142662048, + "step": 3016 + }, + { + "epoch": 0.6951612903225807, + "grad_norm": 0.9536718451900233, + "learning_rate": 1.5539456955754053e-06, + "loss": 0.8359543681144714, + "step": 3017 + }, + { + "epoch": 0.6953917050691244, + "grad_norm": 0.8313774511874621, + "learning_rate": 1.5536284662269243e-06, + "loss": 0.7767773866653442, + "step": 3018 + }, + { + "epoch": 0.6956221198156682, + "grad_norm": 0.7370790678700915, + "learning_rate": 1.5533111565200044e-06, + "loss": 0.8388162851333618, + "step": 3019 + }, + { + "epoch": 0.695852534562212, + "grad_norm": 0.9159856551917743, + "learning_rate": 1.5529937665007024e-06, + "loss": 0.7791208028793335, + "step": 3020 + }, + { + "epoch": 0.6960829493087558, + "grad_norm": 0.9740300384215894, + "learning_rate": 1.5526762962150875e-06, + "loss": 0.8662698864936829, + "step": 3021 + }, + { + "epoch": 0.6963133640552995, + "grad_norm": 0.7004253764922403, + "learning_rate": 1.5523587457092394e-06, + "loss": 0.737492024898529, + "step": 3022 + }, + { + "epoch": 0.6965437788018434, + "grad_norm": 1.0408775765092733, + "learning_rate": 1.552041115029251e-06, + "loss": 0.83610999584198, + "step": 3023 + }, + { + "epoch": 0.6967741935483871, + "grad_norm": 1.1134023704947162, + "learning_rate": 1.5517234042212254e-06, + "loss": 0.930977463722229, + "step": 3024 + }, + { + "epoch": 0.6970046082949308, + "grad_norm": 0.8756044667716456, + "learning_rate": 1.551405613331278e-06, + "loss": 0.7587058544158936, + "step": 3025 + }, + { + "epoch": 0.6972350230414747, + "grad_norm": 0.7720525053545241, + "learning_rate": 1.551087742405536e-06, + "loss": 0.7549247741699219, + "step": 3026 + }, + { + "epoch": 0.6974654377880184, + "grad_norm": 0.8108175030001162, + "learning_rate": 1.5507697914901376e-06, + "loss": 0.6906812787055969, + "step": 3027 + }, + { + "epoch": 0.6976958525345622, + "grad_norm": 0.7358502568670926, + "learning_rate": 1.5504517606312332e-06, + "loss": 0.7806124687194824, + "step": 3028 + }, + { + "epoch": 0.697926267281106, + "grad_norm": 0.8191496367359047, + "learning_rate": 1.5501336498749846e-06, + "loss": 0.8091036081314087, + "step": 3029 + }, + { + "epoch": 0.6981566820276498, + "grad_norm": 0.923718506351422, + "learning_rate": 1.5498154592675646e-06, + "loss": 0.721937894821167, + "step": 3030 + }, + { + "epoch": 0.6983870967741935, + "grad_norm": 0.729194360630959, + "learning_rate": 1.5494971888551587e-06, + "loss": 0.712378740310669, + "step": 3031 + }, + { + "epoch": 0.6986175115207374, + "grad_norm": 0.9809936276606201, + "learning_rate": 1.5491788386839635e-06, + "loss": 0.8106495141983032, + "step": 3032 + }, + { + "epoch": 0.6988479262672811, + "grad_norm": 1.0550994014291641, + "learning_rate": 1.5488604088001866e-06, + "loss": 0.7886521816253662, + "step": 3033 + }, + { + "epoch": 0.6990783410138249, + "grad_norm": 0.9413909460240358, + "learning_rate": 1.5485418992500479e-06, + "loss": 0.7483402490615845, + "step": 3034 + }, + { + "epoch": 0.6993087557603687, + "grad_norm": 0.9735513924670123, + "learning_rate": 1.5482233100797788e-06, + "loss": 0.6236725449562073, + "step": 3035 + }, + { + "epoch": 0.6995391705069124, + "grad_norm": 1.023064942988146, + "learning_rate": 1.5479046413356222e-06, + "loss": 0.9477910995483398, + "step": 3036 + }, + { + "epoch": 0.6997695852534562, + "grad_norm": 1.0993186685690193, + "learning_rate": 1.5475858930638322e-06, + "loss": 0.8921213746070862, + "step": 3037 + }, + { + "epoch": 0.7, + "grad_norm": 0.7179145673247356, + "learning_rate": 1.5472670653106744e-06, + "loss": 0.7460963726043701, + "step": 3038 + }, + { + "epoch": 0.7002304147465438, + "grad_norm": 0.8319225077693166, + "learning_rate": 1.5469481581224271e-06, + "loss": 0.6135849356651306, + "step": 3039 + }, + { + "epoch": 0.7004608294930875, + "grad_norm": 0.8739744675210649, + "learning_rate": 1.546629171545378e-06, + "loss": 0.8039313554763794, + "step": 3040 + }, + { + "epoch": 0.7006912442396314, + "grad_norm": 1.2210857419731846, + "learning_rate": 1.5463101056258289e-06, + "loss": 0.8751651048660278, + "step": 3041 + }, + { + "epoch": 0.7009216589861751, + "grad_norm": 0.9070575590392688, + "learning_rate": 1.545990960410091e-06, + "loss": 0.7600879669189453, + "step": 3042 + }, + { + "epoch": 0.7011520737327189, + "grad_norm": 0.9983949583794295, + "learning_rate": 1.545671735944488e-06, + "loss": 0.8118841648101807, + "step": 3043 + }, + { + "epoch": 0.7013824884792627, + "grad_norm": 0.7470799565000998, + "learning_rate": 1.5453524322753546e-06, + "loss": 0.7144184112548828, + "step": 3044 + }, + { + "epoch": 0.7016129032258065, + "grad_norm": 1.149288210915265, + "learning_rate": 1.545033049449038e-06, + "loss": 0.9730075001716614, + "step": 3045 + }, + { + "epoch": 0.7018433179723502, + "grad_norm": 0.9334735321523672, + "learning_rate": 1.5447135875118957e-06, + "loss": 0.6930910348892212, + "step": 3046 + }, + { + "epoch": 0.7020737327188941, + "grad_norm": 1.0190518922073715, + "learning_rate": 1.5443940465102973e-06, + "loss": 0.8517031669616699, + "step": 3047 + }, + { + "epoch": 0.7023041474654378, + "grad_norm": 0.9199109424213672, + "learning_rate": 1.5440744264906237e-06, + "loss": 0.7939779758453369, + "step": 3048 + }, + { + "epoch": 0.7025345622119815, + "grad_norm": 1.0310125567194028, + "learning_rate": 1.5437547274992672e-06, + "loss": 0.8946782350540161, + "step": 3049 + }, + { + "epoch": 0.7027649769585254, + "grad_norm": 1.1682685309372194, + "learning_rate": 1.543434949582632e-06, + "loss": 0.9273954033851624, + "step": 3050 + }, + { + "epoch": 0.7029953917050691, + "grad_norm": 0.8496559046178408, + "learning_rate": 1.5431150927871333e-06, + "loss": 0.7731457352638245, + "step": 3051 + }, + { + "epoch": 0.7032258064516129, + "grad_norm": 0.9900519408386056, + "learning_rate": 1.542795157159198e-06, + "loss": 0.7982608079910278, + "step": 3052 + }, + { + "epoch": 0.7034562211981567, + "grad_norm": 1.0252185126476046, + "learning_rate": 1.542475142745264e-06, + "loss": 0.8422989845275879, + "step": 3053 + }, + { + "epoch": 0.7036866359447005, + "grad_norm": 1.1364598749635721, + "learning_rate": 1.542155049591781e-06, + "loss": 0.8344876766204834, + "step": 3054 + }, + { + "epoch": 0.7039170506912442, + "grad_norm": 1.3240029855230715, + "learning_rate": 1.541834877745211e-06, + "loss": 0.8830629587173462, + "step": 3055 + }, + { + "epoch": 0.7041474654377881, + "grad_norm": 0.8841605120149971, + "learning_rate": 1.5415146272520247e-06, + "loss": 0.823864221572876, + "step": 3056 + }, + { + "epoch": 0.7043778801843318, + "grad_norm": 1.226256029650695, + "learning_rate": 1.5411942981587077e-06, + "loss": 0.8577016592025757, + "step": 3057 + }, + { + "epoch": 0.7046082949308756, + "grad_norm": 0.9938154526101401, + "learning_rate": 1.540873890511755e-06, + "loss": 0.7431750297546387, + "step": 3058 + }, + { + "epoch": 0.7048387096774194, + "grad_norm": 1.3100911793106818, + "learning_rate": 1.5405534043576729e-06, + "loss": 0.8219394683837891, + "step": 3059 + }, + { + "epoch": 0.7050691244239631, + "grad_norm": 0.8179546123014678, + "learning_rate": 1.5402328397429795e-06, + "loss": 0.706437349319458, + "step": 3060 + }, + { + "epoch": 0.7052995391705069, + "grad_norm": 0.9400567182130463, + "learning_rate": 1.5399121967142051e-06, + "loss": 0.8669443130493164, + "step": 3061 + }, + { + "epoch": 0.7055299539170506, + "grad_norm": 0.9808762608140087, + "learning_rate": 1.5395914753178897e-06, + "loss": 0.7995564937591553, + "step": 3062 + }, + { + "epoch": 0.7057603686635945, + "grad_norm": 1.0691077372052262, + "learning_rate": 1.5392706756005862e-06, + "loss": 0.7840889692306519, + "step": 3063 + }, + { + "epoch": 0.7059907834101382, + "grad_norm": 0.9593102373354429, + "learning_rate": 1.5389497976088582e-06, + "loss": 0.8231604695320129, + "step": 3064 + }, + { + "epoch": 0.706221198156682, + "grad_norm": 1.0423471516482703, + "learning_rate": 1.5386288413892801e-06, + "loss": 0.7821571826934814, + "step": 3065 + }, + { + "epoch": 0.7064516129032258, + "grad_norm": 0.9221304357539406, + "learning_rate": 1.538307806988439e-06, + "loss": 0.736830472946167, + "step": 3066 + }, + { + "epoch": 0.7066820276497696, + "grad_norm": 0.8124713959576904, + "learning_rate": 1.537986694452932e-06, + "loss": 0.7783113718032837, + "step": 3067 + }, + { + "epoch": 0.7069124423963133, + "grad_norm": 0.8679700879266566, + "learning_rate": 1.5376655038293692e-06, + "loss": 0.8000421524047852, + "step": 3068 + }, + { + "epoch": 0.7071428571428572, + "grad_norm": 0.8513728527683974, + "learning_rate": 1.5373442351643696e-06, + "loss": 0.7446980476379395, + "step": 3069 + }, + { + "epoch": 0.7073732718894009, + "grad_norm": 0.8188336762916474, + "learning_rate": 1.537022888504566e-06, + "loss": 0.7018321752548218, + "step": 3070 + }, + { + "epoch": 0.7076036866359448, + "grad_norm": 0.8259052522128728, + "learning_rate": 1.5367014638966008e-06, + "loss": 0.6903716325759888, + "step": 3071 + }, + { + "epoch": 0.7078341013824885, + "grad_norm": 1.0909385113291765, + "learning_rate": 1.5363799613871289e-06, + "loss": 0.9635254144668579, + "step": 3072 + }, + { + "epoch": 0.7080645161290322, + "grad_norm": 0.7335179559352851, + "learning_rate": 1.5360583810228156e-06, + "loss": 0.8612154722213745, + "step": 3073 + }, + { + "epoch": 0.708294930875576, + "grad_norm": 0.9395034612023028, + "learning_rate": 1.5357367228503376e-06, + "loss": 0.8632407784461975, + "step": 3074 + }, + { + "epoch": 0.7085253456221198, + "grad_norm": 0.9383639731759232, + "learning_rate": 1.5354149869163839e-06, + "loss": 0.8117856979370117, + "step": 3075 + }, + { + "epoch": 0.7087557603686636, + "grad_norm": 0.9770895875008837, + "learning_rate": 1.5350931732676538e-06, + "loss": 0.8062559366226196, + "step": 3076 + }, + { + "epoch": 0.7089861751152073, + "grad_norm": 0.9191794034062433, + "learning_rate": 1.5347712819508576e-06, + "loss": 0.7918965816497803, + "step": 3077 + }, + { + "epoch": 0.7092165898617512, + "grad_norm": 0.7897301018455927, + "learning_rate": 1.534449313012718e-06, + "loss": 0.7564986944198608, + "step": 3078 + }, + { + "epoch": 0.7094470046082949, + "grad_norm": 0.774017262501344, + "learning_rate": 1.534127266499968e-06, + "loss": 0.8261928558349609, + "step": 3079 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.9288792217475005, + "learning_rate": 1.5338051424593524e-06, + "loss": 0.705269455909729, + "step": 3080 + }, + { + "epoch": 0.7099078341013825, + "grad_norm": 0.8500383243043894, + "learning_rate": 1.5334829409376271e-06, + "loss": 0.823144793510437, + "step": 3081 + }, + { + "epoch": 0.7101382488479263, + "grad_norm": 0.7512588375717618, + "learning_rate": 1.5331606619815588e-06, + "loss": 0.7772066593170166, + "step": 3082 + }, + { + "epoch": 0.71036866359447, + "grad_norm": 1.0827682012637947, + "learning_rate": 1.5328383056379265e-06, + "loss": 0.8901097178459167, + "step": 3083 + }, + { + "epoch": 0.7105990783410139, + "grad_norm": 0.9540489638748495, + "learning_rate": 1.5325158719535196e-06, + "loss": 0.8454819917678833, + "step": 3084 + }, + { + "epoch": 0.7108294930875576, + "grad_norm": 0.8879734338037916, + "learning_rate": 1.5321933609751388e-06, + "loss": 0.8444693684577942, + "step": 3085 + }, + { + "epoch": 0.7110599078341013, + "grad_norm": 1.0157021807199436, + "learning_rate": 1.5318707727495964e-06, + "loss": 0.7893826961517334, + "step": 3086 + }, + { + "epoch": 0.7112903225806452, + "grad_norm": 0.9711563338551928, + "learning_rate": 1.531548107323715e-06, + "loss": 0.7536686658859253, + "step": 3087 + }, + { + "epoch": 0.7115207373271889, + "grad_norm": 1.1272305964721914, + "learning_rate": 1.53122536474433e-06, + "loss": 0.8105358481407166, + "step": 3088 + }, + { + "epoch": 0.7117511520737327, + "grad_norm": 0.8430783893005721, + "learning_rate": 1.530902545058286e-06, + "loss": 0.8104212284088135, + "step": 3089 + }, + { + "epoch": 0.7119815668202765, + "grad_norm": 1.1740010494566606, + "learning_rate": 1.5305796483124405e-06, + "loss": 0.7738373279571533, + "step": 3090 + }, + { + "epoch": 0.7122119815668203, + "grad_norm": 0.8346644560955941, + "learning_rate": 1.5302566745536618e-06, + "loss": 0.7583746910095215, + "step": 3091 + }, + { + "epoch": 0.712442396313364, + "grad_norm": 1.0290772907257426, + "learning_rate": 1.5299336238288286e-06, + "loss": 0.8370871543884277, + "step": 3092 + }, + { + "epoch": 0.7126728110599079, + "grad_norm": 0.8908237623549358, + "learning_rate": 1.5296104961848314e-06, + "loss": 0.7833988666534424, + "step": 3093 + }, + { + "epoch": 0.7129032258064516, + "grad_norm": 1.135734716262211, + "learning_rate": 1.5292872916685717e-06, + "loss": 0.8024515509605408, + "step": 3094 + }, + { + "epoch": 0.7131336405529954, + "grad_norm": 0.8156588034123838, + "learning_rate": 1.5289640103269623e-06, + "loss": 0.8044738173484802, + "step": 3095 + }, + { + "epoch": 0.7133640552995392, + "grad_norm": 0.846268334708117, + "learning_rate": 1.5286406522069273e-06, + "loss": 0.7783721685409546, + "step": 3096 + }, + { + "epoch": 0.7135944700460829, + "grad_norm": 0.8004616169511741, + "learning_rate": 1.5283172173554014e-06, + "loss": 0.693443238735199, + "step": 3097 + }, + { + "epoch": 0.7138248847926267, + "grad_norm": 0.9862921565687749, + "learning_rate": 1.527993705819331e-06, + "loss": 0.8142237663269043, + "step": 3098 + }, + { + "epoch": 0.7140552995391705, + "grad_norm": 0.9077662799949481, + "learning_rate": 1.5276701176456726e-06, + "loss": 0.790626049041748, + "step": 3099 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 1.0485200242859731, + "learning_rate": 1.5273464528813953e-06, + "loss": 0.9460805654525757, + "step": 3100 + }, + { + "epoch": 0.714516129032258, + "grad_norm": 0.902776913050398, + "learning_rate": 1.5270227115734789e-06, + "loss": 0.6906337738037109, + "step": 3101 + }, + { + "epoch": 0.7147465437788019, + "grad_norm": 0.8514512995363496, + "learning_rate": 1.526698893768913e-06, + "loss": 0.8828556537628174, + "step": 3102 + }, + { + "epoch": 0.7149769585253456, + "grad_norm": 1.0568586756231748, + "learning_rate": 1.5263749995147004e-06, + "loss": 0.8395771980285645, + "step": 3103 + }, + { + "epoch": 0.7152073732718894, + "grad_norm": 0.814014727084384, + "learning_rate": 1.5260510288578535e-06, + "loss": 0.7103895545005798, + "step": 3104 + }, + { + "epoch": 0.7154377880184332, + "grad_norm": 1.0670304040497072, + "learning_rate": 1.5257269818453956e-06, + "loss": 0.9780298471450806, + "step": 3105 + }, + { + "epoch": 0.715668202764977, + "grad_norm": 0.777700102492748, + "learning_rate": 1.525402858524363e-06, + "loss": 0.8176128268241882, + "step": 3106 + }, + { + "epoch": 0.7158986175115207, + "grad_norm": 0.8127092170976247, + "learning_rate": 1.5250786589418008e-06, + "loss": 0.6766567230224609, + "step": 3107 + }, + { + "epoch": 0.7161290322580646, + "grad_norm": 0.8076252538068988, + "learning_rate": 1.5247543831447662e-06, + "loss": 0.7910950183868408, + "step": 3108 + }, + { + "epoch": 0.7163594470046083, + "grad_norm": 0.76882132080824, + "learning_rate": 1.5244300311803275e-06, + "loss": 0.8444501161575317, + "step": 3109 + }, + { + "epoch": 0.716589861751152, + "grad_norm": 0.9073390489490682, + "learning_rate": 1.5241056030955642e-06, + "loss": 0.7180038690567017, + "step": 3110 + }, + { + "epoch": 0.7168202764976959, + "grad_norm": 0.8535510406326756, + "learning_rate": 1.5237810989375663e-06, + "loss": 0.8563181757926941, + "step": 3111 + }, + { + "epoch": 0.7170506912442396, + "grad_norm": 0.7281554723991874, + "learning_rate": 1.5234565187534353e-06, + "loss": 0.7792840003967285, + "step": 3112 + }, + { + "epoch": 0.7172811059907834, + "grad_norm": 1.2546504724448617, + "learning_rate": 1.5231318625902835e-06, + "loss": 0.8414837121963501, + "step": 3113 + }, + { + "epoch": 0.7175115207373272, + "grad_norm": 0.9151299107605344, + "learning_rate": 1.5228071304952348e-06, + "loss": 0.8549888134002686, + "step": 3114 + }, + { + "epoch": 0.717741935483871, + "grad_norm": 0.8858229770055023, + "learning_rate": 1.5224823225154228e-06, + "loss": 0.7973321676254272, + "step": 3115 + }, + { + "epoch": 0.7179723502304147, + "grad_norm": 0.8923496131316503, + "learning_rate": 1.5221574386979937e-06, + "loss": 0.7328228950500488, + "step": 3116 + }, + { + "epoch": 0.7182027649769586, + "grad_norm": 0.8315355877258431, + "learning_rate": 1.5218324790901033e-06, + "loss": 0.8953883051872253, + "step": 3117 + }, + { + "epoch": 0.7184331797235023, + "grad_norm": 0.8252416441396693, + "learning_rate": 1.5215074437389195e-06, + "loss": 0.7804527282714844, + "step": 3118 + }, + { + "epoch": 0.7186635944700461, + "grad_norm": 1.0592650685202745, + "learning_rate": 1.5211823326916204e-06, + "loss": 0.7581363320350647, + "step": 3119 + }, + { + "epoch": 0.7188940092165899, + "grad_norm": 0.9812896234713268, + "learning_rate": 1.520857145995396e-06, + "loss": 0.7720214128494263, + "step": 3120 + }, + { + "epoch": 0.7191244239631336, + "grad_norm": 0.8448153689850479, + "learning_rate": 1.5205318836974463e-06, + "loss": 0.7142826914787292, + "step": 3121 + }, + { + "epoch": 0.7193548387096774, + "grad_norm": 1.0627992363231917, + "learning_rate": 1.520206545844983e-06, + "loss": 0.715612530708313, + "step": 3122 + }, + { + "epoch": 0.7195852534562212, + "grad_norm": 1.1048993433011334, + "learning_rate": 1.5198811324852277e-06, + "loss": 0.8851219415664673, + "step": 3123 + }, + { + "epoch": 0.719815668202765, + "grad_norm": 0.9292687584217408, + "learning_rate": 1.5195556436654146e-06, + "loss": 0.981631875038147, + "step": 3124 + }, + { + "epoch": 0.7200460829493087, + "grad_norm": 1.043088312445038, + "learning_rate": 1.5192300794327876e-06, + "loss": 0.8586313724517822, + "step": 3125 + }, + { + "epoch": 0.7202764976958526, + "grad_norm": 1.082548105463139, + "learning_rate": 1.518904439834602e-06, + "loss": 0.8863250017166138, + "step": 3126 + }, + { + "epoch": 0.7205069124423963, + "grad_norm": 0.8136107336174612, + "learning_rate": 1.5185787249181239e-06, + "loss": 0.864910900592804, + "step": 3127 + }, + { + "epoch": 0.7207373271889401, + "grad_norm": 0.9898417106954193, + "learning_rate": 1.5182529347306302e-06, + "loss": 0.8120951652526855, + "step": 3128 + }, + { + "epoch": 0.7209677419354839, + "grad_norm": 1.008844559262399, + "learning_rate": 1.517927069319409e-06, + "loss": 0.7866026163101196, + "step": 3129 + }, + { + "epoch": 0.7211981566820277, + "grad_norm": 0.9577789377394936, + "learning_rate": 1.5176011287317598e-06, + "loss": 0.8610655069351196, + "step": 3130 + }, + { + "epoch": 0.7214285714285714, + "grad_norm": 0.8861108738387133, + "learning_rate": 1.5172751130149915e-06, + "loss": 0.7463846206665039, + "step": 3131 + }, + { + "epoch": 0.7216589861751153, + "grad_norm": 0.7361410685782023, + "learning_rate": 1.5169490222164254e-06, + "loss": 0.6578936576843262, + "step": 3132 + }, + { + "epoch": 0.721889400921659, + "grad_norm": 0.9361369886672088, + "learning_rate": 1.516622856383393e-06, + "loss": 0.6849668025970459, + "step": 3133 + }, + { + "epoch": 0.7221198156682027, + "grad_norm": 1.0686822202217916, + "learning_rate": 1.5162966155632372e-06, + "loss": 0.9549611806869507, + "step": 3134 + }, + { + "epoch": 0.7223502304147466, + "grad_norm": 0.9063080856885865, + "learning_rate": 1.5159702998033113e-06, + "loss": 0.8005616664886475, + "step": 3135 + }, + { + "epoch": 0.7225806451612903, + "grad_norm": 1.089721709643384, + "learning_rate": 1.5156439091509793e-06, + "loss": 0.8980830311775208, + "step": 3136 + }, + { + "epoch": 0.7228110599078341, + "grad_norm": 1.012161312959267, + "learning_rate": 1.5153174436536166e-06, + "loss": 0.8247464895248413, + "step": 3137 + }, + { + "epoch": 0.7230414746543778, + "grad_norm": 0.9582357561913161, + "learning_rate": 1.5149909033586088e-06, + "loss": 0.818629264831543, + "step": 3138 + }, + { + "epoch": 0.7232718894009217, + "grad_norm": 0.7730251673290138, + "learning_rate": 1.5146642883133532e-06, + "loss": 0.8928704261779785, + "step": 3139 + }, + { + "epoch": 0.7235023041474654, + "grad_norm": 1.199560365249708, + "learning_rate": 1.5143375985652576e-06, + "loss": 0.9330282807350159, + "step": 3140 + }, + { + "epoch": 0.7237327188940093, + "grad_norm": 0.9749101527395967, + "learning_rate": 1.5140108341617405e-06, + "loss": 0.7961822748184204, + "step": 3141 + }, + { + "epoch": 0.723963133640553, + "grad_norm": 0.9244859383947029, + "learning_rate": 1.513683995150231e-06, + "loss": 0.8073769807815552, + "step": 3142 + }, + { + "epoch": 0.7241935483870968, + "grad_norm": 1.0469784848396728, + "learning_rate": 1.51335708157817e-06, + "loss": 0.946292519569397, + "step": 3143 + }, + { + "epoch": 0.7244239631336405, + "grad_norm": 0.8214787899217685, + "learning_rate": 1.513030093493008e-06, + "loss": 0.806084156036377, + "step": 3144 + }, + { + "epoch": 0.7246543778801844, + "grad_norm": 0.9086362129225068, + "learning_rate": 1.5127030309422072e-06, + "loss": 0.8804534673690796, + "step": 3145 + }, + { + "epoch": 0.7248847926267281, + "grad_norm": 0.973773267534968, + "learning_rate": 1.51237589397324e-06, + "loss": 0.7489848136901855, + "step": 3146 + }, + { + "epoch": 0.7251152073732718, + "grad_norm": 1.047973105384132, + "learning_rate": 1.5120486826335905e-06, + "loss": 0.875586986541748, + "step": 3147 + }, + { + "epoch": 0.7253456221198157, + "grad_norm": 0.8473382638758681, + "learning_rate": 1.5117213969707522e-06, + "loss": 0.8334758281707764, + "step": 3148 + }, + { + "epoch": 0.7255760368663594, + "grad_norm": 0.8693445792084491, + "learning_rate": 1.5113940370322306e-06, + "loss": 0.8010859489440918, + "step": 3149 + }, + { + "epoch": 0.7258064516129032, + "grad_norm": 0.8638975130346471, + "learning_rate": 1.5110666028655417e-06, + "loss": 0.7907547950744629, + "step": 3150 + }, + { + "epoch": 0.726036866359447, + "grad_norm": 0.9542895726151109, + "learning_rate": 1.5107390945182117e-06, + "loss": 0.8922848105430603, + "step": 3151 + }, + { + "epoch": 0.7262672811059908, + "grad_norm": 0.7865624103758176, + "learning_rate": 1.5104115120377783e-06, + "loss": 0.7418628931045532, + "step": 3152 + }, + { + "epoch": 0.7264976958525345, + "grad_norm": 1.0285540479216404, + "learning_rate": 1.51008385547179e-06, + "loss": 0.9063338041305542, + "step": 3153 + }, + { + "epoch": 0.7267281105990784, + "grad_norm": 1.0080575916686718, + "learning_rate": 1.5097561248678047e-06, + "loss": 0.8718822002410889, + "step": 3154 + }, + { + "epoch": 0.7269585253456221, + "grad_norm": 1.0055226715830414, + "learning_rate": 1.5094283202733934e-06, + "loss": 0.950742244720459, + "step": 3155 + }, + { + "epoch": 0.727188940092166, + "grad_norm": 1.126636802719941, + "learning_rate": 1.5091004417361353e-06, + "loss": 0.7963443994522095, + "step": 3156 + }, + { + "epoch": 0.7274193548387097, + "grad_norm": 1.0644638923319971, + "learning_rate": 1.5087724893036225e-06, + "loss": 0.8428621888160706, + "step": 3157 + }, + { + "epoch": 0.7276497695852534, + "grad_norm": 1.0421355661787988, + "learning_rate": 1.508444463023456e-06, + "loss": 0.8271539211273193, + "step": 3158 + }, + { + "epoch": 0.7278801843317972, + "grad_norm": 0.7345991655152693, + "learning_rate": 1.508116362943249e-06, + "loss": 0.7899917364120483, + "step": 3159 + }, + { + "epoch": 0.728110599078341, + "grad_norm": 1.1916065857121023, + "learning_rate": 1.5077881891106246e-06, + "loss": 0.8734809160232544, + "step": 3160 + }, + { + "epoch": 0.7283410138248848, + "grad_norm": 1.0138536766133128, + "learning_rate": 1.5074599415732164e-06, + "loss": 0.7740491628646851, + "step": 3161 + }, + { + "epoch": 0.7285714285714285, + "grad_norm": 0.8952462084516831, + "learning_rate": 1.5071316203786698e-06, + "loss": 0.7219515442848206, + "step": 3162 + }, + { + "epoch": 0.7288018433179724, + "grad_norm": 0.7779518912065628, + "learning_rate": 1.50680322557464e-06, + "loss": 0.8122725486755371, + "step": 3163 + }, + { + "epoch": 0.7290322580645161, + "grad_norm": 0.9965727720770509, + "learning_rate": 1.5064747572087923e-06, + "loss": 0.8280072212219238, + "step": 3164 + }, + { + "epoch": 0.7292626728110599, + "grad_norm": 0.9097690003119847, + "learning_rate": 1.5061462153288047e-06, + "loss": 0.7287842035293579, + "step": 3165 + }, + { + "epoch": 0.7294930875576037, + "grad_norm": 1.0497146109580189, + "learning_rate": 1.5058175999823639e-06, + "loss": 0.8404949903488159, + "step": 3166 + }, + { + "epoch": 0.7297235023041475, + "grad_norm": 0.9887517999095412, + "learning_rate": 1.505488911217168e-06, + "loss": 0.6572415828704834, + "step": 3167 + }, + { + "epoch": 0.7299539170506912, + "grad_norm": 1.0946078663351873, + "learning_rate": 1.5051601490809257e-06, + "loss": 0.8924484848976135, + "step": 3168 + }, + { + "epoch": 0.7301843317972351, + "grad_norm": 1.1648951213224894, + "learning_rate": 1.5048313136213566e-06, + "loss": 0.8701428174972534, + "step": 3169 + }, + { + "epoch": 0.7304147465437788, + "grad_norm": 1.1475520143482136, + "learning_rate": 1.5045024048861906e-06, + "loss": 0.8327716588973999, + "step": 3170 + }, + { + "epoch": 0.7306451612903225, + "grad_norm": 0.9261768702303601, + "learning_rate": 1.5041734229231686e-06, + "loss": 0.8379253149032593, + "step": 3171 + }, + { + "epoch": 0.7308755760368664, + "grad_norm": 0.944084791074753, + "learning_rate": 1.5038443677800413e-06, + "loss": 0.7475664019584656, + "step": 3172 + }, + { + "epoch": 0.7311059907834101, + "grad_norm": 1.2226580752686416, + "learning_rate": 1.5035152395045714e-06, + "loss": 0.9002243280410767, + "step": 3173 + }, + { + "epoch": 0.7313364055299539, + "grad_norm": 0.8355701729873874, + "learning_rate": 1.503186038144531e-06, + "loss": 0.6718685626983643, + "step": 3174 + }, + { + "epoch": 0.7315668202764977, + "grad_norm": 0.8961232238271665, + "learning_rate": 1.5028567637477033e-06, + "loss": 0.6836501359939575, + "step": 3175 + }, + { + "epoch": 0.7317972350230415, + "grad_norm": 0.8859536342600928, + "learning_rate": 1.502527416361882e-06, + "loss": 0.7548954486846924, + "step": 3176 + }, + { + "epoch": 0.7320276497695852, + "grad_norm": 0.9826706955950207, + "learning_rate": 1.5021979960348714e-06, + "loss": 0.8385212421417236, + "step": 3177 + }, + { + "epoch": 0.7322580645161291, + "grad_norm": 0.8341383572022868, + "learning_rate": 1.5018685028144864e-06, + "loss": 0.8605425357818604, + "step": 3178 + }, + { + "epoch": 0.7324884792626728, + "grad_norm": 0.9464588739740442, + "learning_rate": 1.501538936748553e-06, + "loss": 0.8831393718719482, + "step": 3179 + }, + { + "epoch": 0.7327188940092166, + "grad_norm": 0.8991947067614845, + "learning_rate": 1.5012092978849062e-06, + "loss": 0.6965172290802002, + "step": 3180 + }, + { + "epoch": 0.7329493087557604, + "grad_norm": 1.0090692893685214, + "learning_rate": 1.500879586271394e-06, + "loss": 0.8062859773635864, + "step": 3181 + }, + { + "epoch": 0.7331797235023041, + "grad_norm": 0.7952177607289516, + "learning_rate": 1.5005498019558724e-06, + "loss": 0.8285790681838989, + "step": 3182 + }, + { + "epoch": 0.7334101382488479, + "grad_norm": 0.9848452236152132, + "learning_rate": 1.50021994498621e-06, + "loss": 0.612429141998291, + "step": 3183 + }, + { + "epoch": 0.7336405529953917, + "grad_norm": 0.9156545700522013, + "learning_rate": 1.4998900154102847e-06, + "loss": 0.8271423578262329, + "step": 3184 + }, + { + "epoch": 0.7338709677419355, + "grad_norm": 1.033787601007848, + "learning_rate": 1.499560013275986e-06, + "loss": 0.838964581489563, + "step": 3185 + }, + { + "epoch": 0.7341013824884792, + "grad_norm": 0.973220548768116, + "learning_rate": 1.4992299386312119e-06, + "loss": 0.7902333736419678, + "step": 3186 + }, + { + "epoch": 0.7343317972350231, + "grad_norm": 1.0086369878855088, + "learning_rate": 1.4988997915238735e-06, + "loss": 0.8520635366439819, + "step": 3187 + }, + { + "epoch": 0.7345622119815668, + "grad_norm": 0.9892742658321851, + "learning_rate": 1.4985695720018905e-06, + "loss": 0.8666567206382751, + "step": 3188 + }, + { + "epoch": 0.7347926267281106, + "grad_norm": 0.9672613309802366, + "learning_rate": 1.4982392801131944e-06, + "loss": 0.6930691003799438, + "step": 3189 + }, + { + "epoch": 0.7350230414746544, + "grad_norm": 0.7049869743164157, + "learning_rate": 1.4979089159057263e-06, + "loss": 0.7957722544670105, + "step": 3190 + }, + { + "epoch": 0.7352534562211982, + "grad_norm": 1.0247601673009343, + "learning_rate": 1.4975784794274383e-06, + "loss": 0.8966697454452515, + "step": 3191 + }, + { + "epoch": 0.7354838709677419, + "grad_norm": 0.9082832739975722, + "learning_rate": 1.4972479707262926e-06, + "loss": 0.7478537559509277, + "step": 3192 + }, + { + "epoch": 0.7357142857142858, + "grad_norm": 0.9541041339746362, + "learning_rate": 1.4969173898502624e-06, + "loss": 0.8862416744232178, + "step": 3193 + }, + { + "epoch": 0.7359447004608295, + "grad_norm": 0.8171852448254098, + "learning_rate": 1.4965867368473306e-06, + "loss": 0.7910712957382202, + "step": 3194 + }, + { + "epoch": 0.7361751152073732, + "grad_norm": 1.1219879646982642, + "learning_rate": 1.4962560117654916e-06, + "loss": 0.7371944785118103, + "step": 3195 + }, + { + "epoch": 0.7364055299539171, + "grad_norm": 1.097733223938739, + "learning_rate": 1.4959252146527496e-06, + "loss": 0.7966737151145935, + "step": 3196 + }, + { + "epoch": 0.7366359447004608, + "grad_norm": 1.0499505243286467, + "learning_rate": 1.4955943455571188e-06, + "loss": 0.8474653363227844, + "step": 3197 + }, + { + "epoch": 0.7368663594470046, + "grad_norm": 1.1042914253537062, + "learning_rate": 1.4952634045266249e-06, + "loss": 1.0197458267211914, + "step": 3198 + }, + { + "epoch": 0.7370967741935484, + "grad_norm": 1.054872102822339, + "learning_rate": 1.4949323916093036e-06, + "loss": 0.8813979625701904, + "step": 3199 + }, + { + "epoch": 0.7373271889400922, + "grad_norm": 0.9264193586497762, + "learning_rate": 1.4946013068532008e-06, + "loss": 0.9323042631149292, + "step": 3200 + }, + { + "epoch": 0.7375576036866359, + "grad_norm": 1.1184797510334814, + "learning_rate": 1.494270150306373e-06, + "loss": 0.8637902736663818, + "step": 3201 + }, + { + "epoch": 0.7377880184331798, + "grad_norm": 1.1006860616870338, + "learning_rate": 1.4939389220168875e-06, + "loss": 0.8046854734420776, + "step": 3202 + }, + { + "epoch": 0.7380184331797235, + "grad_norm": 0.9882241685181946, + "learning_rate": 1.4936076220328211e-06, + "loss": 0.7616177201271057, + "step": 3203 + }, + { + "epoch": 0.7382488479262673, + "grad_norm": 1.0795779512267711, + "learning_rate": 1.4932762504022619e-06, + "loss": 0.8548959493637085, + "step": 3204 + }, + { + "epoch": 0.738479262672811, + "grad_norm": 0.7907178615166577, + "learning_rate": 1.492944807173308e-06, + "loss": 0.8062562942504883, + "step": 3205 + }, + { + "epoch": 0.7387096774193549, + "grad_norm": 1.3004819436990922, + "learning_rate": 1.492613292394068e-06, + "loss": 0.8776403069496155, + "step": 3206 + }, + { + "epoch": 0.7389400921658986, + "grad_norm": 1.0654471822316505, + "learning_rate": 1.4922817061126605e-06, + "loss": 0.7528336048126221, + "step": 3207 + }, + { + "epoch": 0.7391705069124423, + "grad_norm": 0.9288011243231857, + "learning_rate": 1.4919500483772152e-06, + "loss": 0.7441881895065308, + "step": 3208 + }, + { + "epoch": 0.7394009216589862, + "grad_norm": 0.9496581250230889, + "learning_rate": 1.4916183192358715e-06, + "loss": 0.8925758004188538, + "step": 3209 + }, + { + "epoch": 0.7396313364055299, + "grad_norm": 0.999519243113449, + "learning_rate": 1.4912865187367798e-06, + "loss": 0.7527008652687073, + "step": 3210 + }, + { + "epoch": 0.7398617511520738, + "grad_norm": 0.8631940848050832, + "learning_rate": 1.4909546469281e-06, + "loss": 0.753572404384613, + "step": 3211 + }, + { + "epoch": 0.7400921658986175, + "grad_norm": 0.938203260102219, + "learning_rate": 1.4906227038580036e-06, + "loss": 0.8884274959564209, + "step": 3212 + }, + { + "epoch": 0.7403225806451613, + "grad_norm": 0.7835821294972823, + "learning_rate": 1.4902906895746707e-06, + "loss": 0.7702244520187378, + "step": 3213 + }, + { + "epoch": 0.740552995391705, + "grad_norm": 1.0140732775513552, + "learning_rate": 1.4899586041262936e-06, + "loss": 0.8662835359573364, + "step": 3214 + }, + { + "epoch": 0.7407834101382489, + "grad_norm": 1.0357827096613574, + "learning_rate": 1.4896264475610736e-06, + "loss": 0.9819997549057007, + "step": 3215 + }, + { + "epoch": 0.7410138248847926, + "grad_norm": 1.0094197188590162, + "learning_rate": 1.4892942199272232e-06, + "loss": 0.9137614965438843, + "step": 3216 + }, + { + "epoch": 0.7412442396313365, + "grad_norm": 0.8442315992670393, + "learning_rate": 1.488961921272964e-06, + "loss": 0.7554785013198853, + "step": 3217 + }, + { + "epoch": 0.7414746543778802, + "grad_norm": 1.1172745597106868, + "learning_rate": 1.4886295516465296e-06, + "loss": 0.8528940677642822, + "step": 3218 + }, + { + "epoch": 0.7417050691244239, + "grad_norm": 0.9056918439443091, + "learning_rate": 1.4882971110961626e-06, + "loss": 0.7212377786636353, + "step": 3219 + }, + { + "epoch": 0.7419354838709677, + "grad_norm": 0.9349124518247459, + "learning_rate": 1.4879645996701161e-06, + "loss": 0.7767617702484131, + "step": 3220 + }, + { + "epoch": 0.7421658986175115, + "grad_norm": 0.8749389005214587, + "learning_rate": 1.4876320174166542e-06, + "loss": 0.8083292245864868, + "step": 3221 + }, + { + "epoch": 0.7423963133640553, + "grad_norm": 1.14484646357819, + "learning_rate": 1.4872993643840506e-06, + "loss": 0.8652364015579224, + "step": 3222 + }, + { + "epoch": 0.742626728110599, + "grad_norm": 0.9176030431238368, + "learning_rate": 1.486966640620589e-06, + "loss": 0.7455019950866699, + "step": 3223 + }, + { + "epoch": 0.7428571428571429, + "grad_norm": 1.0637469159007076, + "learning_rate": 1.4866338461745644e-06, + "loss": 0.7881917953491211, + "step": 3224 + }, + { + "epoch": 0.7430875576036866, + "grad_norm": 1.0955814961304737, + "learning_rate": 1.4863009810942813e-06, + "loss": 0.8148372173309326, + "step": 3225 + }, + { + "epoch": 0.7433179723502304, + "grad_norm": 0.7991384008669099, + "learning_rate": 1.4859680454280547e-06, + "loss": 0.6574658751487732, + "step": 3226 + }, + { + "epoch": 0.7435483870967742, + "grad_norm": 0.9231484623709659, + "learning_rate": 1.4856350392242094e-06, + "loss": 0.7831655740737915, + "step": 3227 + }, + { + "epoch": 0.743778801843318, + "grad_norm": 0.8080817272772121, + "learning_rate": 1.485301962531081e-06, + "loss": 0.7406231164932251, + "step": 3228 + }, + { + "epoch": 0.7440092165898617, + "grad_norm": 0.9500561612529754, + "learning_rate": 1.4849688153970154e-06, + "loss": 0.8092324733734131, + "step": 3229 + }, + { + "epoch": 0.7442396313364056, + "grad_norm": 0.969093760928221, + "learning_rate": 1.4846355978703679e-06, + "loss": 0.6662560701370239, + "step": 3230 + }, + { + "epoch": 0.7444700460829493, + "grad_norm": 0.8941354868939383, + "learning_rate": 1.4843023099995052e-06, + "loss": 0.8064731359481812, + "step": 3231 + }, + { + "epoch": 0.744700460829493, + "grad_norm": 1.0463529761361023, + "learning_rate": 1.4839689518328037e-06, + "loss": 0.7424519658088684, + "step": 3232 + }, + { + "epoch": 0.7449308755760369, + "grad_norm": 0.9618875213680247, + "learning_rate": 1.4836355234186489e-06, + "loss": 0.7851438522338867, + "step": 3233 + }, + { + "epoch": 0.7451612903225806, + "grad_norm": 1.2534680382280676, + "learning_rate": 1.4833020248054381e-06, + "loss": 0.896986722946167, + "step": 3234 + }, + { + "epoch": 0.7453917050691244, + "grad_norm": 1.3688846458082455, + "learning_rate": 1.4829684560415787e-06, + "loss": 0.9469928741455078, + "step": 3235 + }, + { + "epoch": 0.7456221198156682, + "grad_norm": 0.8653442286827894, + "learning_rate": 1.4826348171754872e-06, + "loss": 0.7527188062667847, + "step": 3236 + }, + { + "epoch": 0.745852534562212, + "grad_norm": 0.9575212903893582, + "learning_rate": 1.4823011082555907e-06, + "loss": 0.7758080959320068, + "step": 3237 + }, + { + "epoch": 0.7460829493087557, + "grad_norm": 0.9454436343118328, + "learning_rate": 1.481967329330327e-06, + "loss": 0.8359881043434143, + "step": 3238 + }, + { + "epoch": 0.7463133640552996, + "grad_norm": 0.7567559878181612, + "learning_rate": 1.4816334804481434e-06, + "loss": 0.6576982736587524, + "step": 3239 + }, + { + "epoch": 0.7465437788018433, + "grad_norm": 1.0012365138594377, + "learning_rate": 1.4812995616574978e-06, + "loss": 0.7919917106628418, + "step": 3240 + }, + { + "epoch": 0.7467741935483871, + "grad_norm": 0.7865137499791297, + "learning_rate": 1.480965573006858e-06, + "loss": 0.7682263851165771, + "step": 3241 + }, + { + "epoch": 0.7470046082949309, + "grad_norm": 1.0123241682054298, + "learning_rate": 1.4806315145447017e-06, + "loss": 0.8573193550109863, + "step": 3242 + }, + { + "epoch": 0.7472350230414746, + "grad_norm": 0.8191884786597581, + "learning_rate": 1.4802973863195174e-06, + "loss": 0.8473606109619141, + "step": 3243 + }, + { + "epoch": 0.7474654377880184, + "grad_norm": 0.8754073951862541, + "learning_rate": 1.4799631883798033e-06, + "loss": 0.8110678195953369, + "step": 3244 + }, + { + "epoch": 0.7476958525345622, + "grad_norm": 1.2161581760732987, + "learning_rate": 1.4796289207740681e-06, + "loss": 0.6624661087989807, + "step": 3245 + }, + { + "epoch": 0.747926267281106, + "grad_norm": 0.7356293873938221, + "learning_rate": 1.47929458355083e-06, + "loss": 0.8145536184310913, + "step": 3246 + }, + { + "epoch": 0.7481566820276497, + "grad_norm": 0.921128997158793, + "learning_rate": 1.4789601767586172e-06, + "loss": 0.7819876074790955, + "step": 3247 + }, + { + "epoch": 0.7483870967741936, + "grad_norm": 0.973465003660405, + "learning_rate": 1.4786257004459692e-06, + "loss": 0.7573810815811157, + "step": 3248 + }, + { + "epoch": 0.7486175115207373, + "grad_norm": 1.061603620628762, + "learning_rate": 1.4782911546614343e-06, + "loss": 0.8149522542953491, + "step": 3249 + }, + { + "epoch": 0.7488479262672811, + "grad_norm": 1.023358335101362, + "learning_rate": 1.4779565394535714e-06, + "loss": 0.9935284852981567, + "step": 3250 + }, + { + "epoch": 0.7490783410138249, + "grad_norm": 0.8488935416479958, + "learning_rate": 1.4776218548709497e-06, + "loss": 0.8673371076583862, + "step": 3251 + }, + { + "epoch": 0.7493087557603687, + "grad_norm": 1.0304468521950305, + "learning_rate": 1.4772871009621477e-06, + "loss": 0.8569149374961853, + "step": 3252 + }, + { + "epoch": 0.7495391705069124, + "grad_norm": 0.8613722173703313, + "learning_rate": 1.4769522777757551e-06, + "loss": 0.7177854776382446, + "step": 3253 + }, + { + "epoch": 0.7497695852534563, + "grad_norm": 1.0681726446759283, + "learning_rate": 1.4766173853603706e-06, + "loss": 0.8115622997283936, + "step": 3254 + }, + { + "epoch": 0.75, + "grad_norm": 0.782977490159237, + "learning_rate": 1.4762824237646038e-06, + "loss": 0.7209019660949707, + "step": 3255 + }, + { + "epoch": 0.7502304147465437, + "grad_norm": 0.9264325214188774, + "learning_rate": 1.4759473930370736e-06, + "loss": 0.8433470726013184, + "step": 3256 + }, + { + "epoch": 0.7504608294930876, + "grad_norm": 1.0399152705693322, + "learning_rate": 1.4756122932264093e-06, + "loss": 0.853674054145813, + "step": 3257 + }, + { + "epoch": 0.7506912442396313, + "grad_norm": 0.9978956076189626, + "learning_rate": 1.4752771243812503e-06, + "loss": 0.8645769357681274, + "step": 3258 + }, + { + "epoch": 0.7509216589861751, + "grad_norm": 1.4046905803968728, + "learning_rate": 1.474941886550246e-06, + "loss": 0.927452564239502, + "step": 3259 + }, + { + "epoch": 0.7511520737327189, + "grad_norm": 0.8642581213790671, + "learning_rate": 1.4746065797820552e-06, + "loss": 0.7461255788803101, + "step": 3260 + }, + { + "epoch": 0.7513824884792627, + "grad_norm": 0.9230380534710827, + "learning_rate": 1.4742712041253481e-06, + "loss": 0.8737163543701172, + "step": 3261 + }, + { + "epoch": 0.7516129032258064, + "grad_norm": 0.8624828182814519, + "learning_rate": 1.4739357596288036e-06, + "loss": 0.7148758172988892, + "step": 3262 + }, + { + "epoch": 0.7518433179723503, + "grad_norm": 0.8930446588032352, + "learning_rate": 1.4736002463411108e-06, + "loss": 0.738334596157074, + "step": 3263 + }, + { + "epoch": 0.752073732718894, + "grad_norm": 0.9237791770446419, + "learning_rate": 1.4732646643109692e-06, + "loss": 0.7733340263366699, + "step": 3264 + }, + { + "epoch": 0.7523041474654378, + "grad_norm": 0.8815526032135323, + "learning_rate": 1.4729290135870883e-06, + "loss": 0.7882881164550781, + "step": 3265 + }, + { + "epoch": 0.7525345622119816, + "grad_norm": 1.029688172185613, + "learning_rate": 1.472593294218187e-06, + "loss": 0.7908357381820679, + "step": 3266 + }, + { + "epoch": 0.7527649769585254, + "grad_norm": 1.0791156682188368, + "learning_rate": 1.4722575062529946e-06, + "loss": 0.8818062543869019, + "step": 3267 + }, + { + "epoch": 0.7529953917050691, + "grad_norm": 0.9552677127935061, + "learning_rate": 1.4719216497402504e-06, + "loss": 0.7152599692344666, + "step": 3268 + }, + { + "epoch": 0.7532258064516129, + "grad_norm": 0.8322037056106782, + "learning_rate": 1.4715857247287036e-06, + "loss": 0.8503165245056152, + "step": 3269 + }, + { + "epoch": 0.7534562211981567, + "grad_norm": 0.9223729567181368, + "learning_rate": 1.4712497312671128e-06, + "loss": 0.8382623195648193, + "step": 3270 + }, + { + "epoch": 0.7536866359447004, + "grad_norm": 1.0456882119229616, + "learning_rate": 1.4709136694042479e-06, + "loss": 0.8358533382415771, + "step": 3271 + }, + { + "epoch": 0.7539170506912443, + "grad_norm": 0.850717529465525, + "learning_rate": 1.4705775391888868e-06, + "loss": 0.6735624670982361, + "step": 3272 + }, + { + "epoch": 0.754147465437788, + "grad_norm": 0.8890452669379437, + "learning_rate": 1.470241340669819e-06, + "loss": 0.8343949317932129, + "step": 3273 + }, + { + "epoch": 0.7543778801843318, + "grad_norm": 0.9508610560109901, + "learning_rate": 1.4699050738958434e-06, + "loss": 0.8204318284988403, + "step": 3274 + }, + { + "epoch": 0.7546082949308756, + "grad_norm": 0.9484772286558124, + "learning_rate": 1.4695687389157684e-06, + "loss": 0.7541854977607727, + "step": 3275 + }, + { + "epoch": 0.7548387096774194, + "grad_norm": 0.8425504123859369, + "learning_rate": 1.4692323357784122e-06, + "loss": 0.8144943714141846, + "step": 3276 + }, + { + "epoch": 0.7550691244239631, + "grad_norm": 0.8699783126306536, + "learning_rate": 1.468895864532604e-06, + "loss": 0.9045677781105042, + "step": 3277 + }, + { + "epoch": 0.755299539170507, + "grad_norm": 1.1586104318366583, + "learning_rate": 1.4685593252271816e-06, + "loss": 0.8818730115890503, + "step": 3278 + }, + { + "epoch": 0.7555299539170507, + "grad_norm": 1.013621065000431, + "learning_rate": 1.4682227179109932e-06, + "loss": 0.8582229614257812, + "step": 3279 + }, + { + "epoch": 0.7557603686635944, + "grad_norm": 1.016541372354986, + "learning_rate": 1.4678860426328977e-06, + "loss": 0.8769974708557129, + "step": 3280 + }, + { + "epoch": 0.7559907834101383, + "grad_norm": 0.8474484944100091, + "learning_rate": 1.467549299441762e-06, + "loss": 0.8034937381744385, + "step": 3281 + }, + { + "epoch": 0.756221198156682, + "grad_norm": 0.9998169463505984, + "learning_rate": 1.4672124883864646e-06, + "loss": 0.9057378768920898, + "step": 3282 + }, + { + "epoch": 0.7564516129032258, + "grad_norm": 0.9160359407680143, + "learning_rate": 1.4668756095158929e-06, + "loss": 0.8039969205856323, + "step": 3283 + }, + { + "epoch": 0.7566820276497696, + "grad_norm": 0.7311572278532684, + "learning_rate": 1.4665386628789448e-06, + "loss": 0.887493908405304, + "step": 3284 + }, + { + "epoch": 0.7569124423963134, + "grad_norm": 0.9749833066021305, + "learning_rate": 1.4662016485245271e-06, + "loss": 0.783561646938324, + "step": 3285 + }, + { + "epoch": 0.7571428571428571, + "grad_norm": 1.1972955361865625, + "learning_rate": 1.4658645665015579e-06, + "loss": 0.7526337504386902, + "step": 3286 + }, + { + "epoch": 0.757373271889401, + "grad_norm": 1.0074911468135093, + "learning_rate": 1.4655274168589633e-06, + "loss": 0.8583099842071533, + "step": 3287 + }, + { + "epoch": 0.7576036866359447, + "grad_norm": 0.9193819222275846, + "learning_rate": 1.4651901996456802e-06, + "loss": 0.743253767490387, + "step": 3288 + }, + { + "epoch": 0.7578341013824885, + "grad_norm": 0.9481332173734432, + "learning_rate": 1.4648529149106555e-06, + "loss": 0.8763987421989441, + "step": 3289 + }, + { + "epoch": 0.7580645161290323, + "grad_norm": 0.9531439206540595, + "learning_rate": 1.4645155627028455e-06, + "loss": 0.8388645648956299, + "step": 3290 + }, + { + "epoch": 0.7582949308755761, + "grad_norm": 0.9430549047432926, + "learning_rate": 1.4641781430712167e-06, + "loss": 0.8943589925765991, + "step": 3291 + }, + { + "epoch": 0.7585253456221198, + "grad_norm": 0.897306276129885, + "learning_rate": 1.463840656064745e-06, + "loss": 0.9224259257316589, + "step": 3292 + }, + { + "epoch": 0.7587557603686635, + "grad_norm": 0.7118962108569266, + "learning_rate": 1.463503101732416e-06, + "loss": 0.5836232900619507, + "step": 3293 + }, + { + "epoch": 0.7589861751152074, + "grad_norm": 1.2610309452085111, + "learning_rate": 1.4631654801232255e-06, + "loss": 0.6700382828712463, + "step": 3294 + }, + { + "epoch": 0.7592165898617511, + "grad_norm": 0.9159006934526643, + "learning_rate": 1.4628277912861785e-06, + "loss": 0.7876112461090088, + "step": 3295 + }, + { + "epoch": 0.759447004608295, + "grad_norm": 0.9073380438964382, + "learning_rate": 1.4624900352702905e-06, + "loss": 0.8410799503326416, + "step": 3296 + }, + { + "epoch": 0.7596774193548387, + "grad_norm": 0.931630117662002, + "learning_rate": 1.4621522121245859e-06, + "loss": 0.9615974426269531, + "step": 3297 + }, + { + "epoch": 0.7599078341013825, + "grad_norm": 1.1213393394374043, + "learning_rate": 1.4618143218980996e-06, + "loss": 0.7973389625549316, + "step": 3298 + }, + { + "epoch": 0.7601382488479262, + "grad_norm": 0.7835636014361216, + "learning_rate": 1.461476364639876e-06, + "loss": 0.7734094858169556, + "step": 3299 + }, + { + "epoch": 0.7603686635944701, + "grad_norm": 0.9681758067915807, + "learning_rate": 1.461138340398969e-06, + "loss": 0.7365939617156982, + "step": 3300 + }, + { + "epoch": 0.7605990783410138, + "grad_norm": 0.9251627601521192, + "learning_rate": 1.4608002492244421e-06, + "loss": 0.822052001953125, + "step": 3301 + }, + { + "epoch": 0.7608294930875577, + "grad_norm": 0.83536047590978, + "learning_rate": 1.460462091165369e-06, + "loss": 0.7220577001571655, + "step": 3302 + }, + { + "epoch": 0.7610599078341014, + "grad_norm": 0.9806834080573716, + "learning_rate": 1.4601238662708332e-06, + "loss": 0.9795923233032227, + "step": 3303 + }, + { + "epoch": 0.7612903225806451, + "grad_norm": 1.0452301496717684, + "learning_rate": 1.4597855745899273e-06, + "loss": 0.804523229598999, + "step": 3304 + }, + { + "epoch": 0.761520737327189, + "grad_norm": 0.936039712838613, + "learning_rate": 1.4594472161717536e-06, + "loss": 0.7630297541618347, + "step": 3305 + }, + { + "epoch": 0.7617511520737327, + "grad_norm": 1.008258749087615, + "learning_rate": 1.4591087910654254e-06, + "loss": 0.7088560461997986, + "step": 3306 + }, + { + "epoch": 0.7619815668202765, + "grad_norm": 0.8612515545716848, + "learning_rate": 1.4587702993200637e-06, + "loss": 0.6627416014671326, + "step": 3307 + }, + { + "epoch": 0.7622119815668202, + "grad_norm": 1.0700034611745908, + "learning_rate": 1.4584317409848001e-06, + "loss": 0.7931111454963684, + "step": 3308 + }, + { + "epoch": 0.7624423963133641, + "grad_norm": 0.918004873184285, + "learning_rate": 1.4580931161087763e-06, + "loss": 0.8107850551605225, + "step": 3309 + }, + { + "epoch": 0.7626728110599078, + "grad_norm": 1.1251596055699022, + "learning_rate": 1.4577544247411431e-06, + "loss": 0.8211404085159302, + "step": 3310 + }, + { + "epoch": 0.7629032258064516, + "grad_norm": 1.1825093837600291, + "learning_rate": 1.457415666931061e-06, + "loss": 0.9861341714859009, + "step": 3311 + }, + { + "epoch": 0.7631336405529954, + "grad_norm": 1.0573079532917569, + "learning_rate": 1.4570768427277007e-06, + "loss": 0.8963409662246704, + "step": 3312 + }, + { + "epoch": 0.7633640552995392, + "grad_norm": 1.1183054914337, + "learning_rate": 1.4567379521802416e-06, + "loss": 0.7510147094726562, + "step": 3313 + }, + { + "epoch": 0.7635944700460829, + "grad_norm": 1.0312269750408198, + "learning_rate": 1.4563989953378734e-06, + "loss": 0.7761805057525635, + "step": 3314 + }, + { + "epoch": 0.7638248847926268, + "grad_norm": 0.782434581691777, + "learning_rate": 1.4560599722497953e-06, + "loss": 0.6202781200408936, + "step": 3315 + }, + { + "epoch": 0.7640552995391705, + "grad_norm": 0.9114320197488165, + "learning_rate": 1.4557208829652159e-06, + "loss": 0.711891770362854, + "step": 3316 + }, + { + "epoch": 0.7642857142857142, + "grad_norm": 1.0888571874972786, + "learning_rate": 1.4553817275333537e-06, + "loss": 0.8689517974853516, + "step": 3317 + }, + { + "epoch": 0.7645161290322581, + "grad_norm": 0.847547372029402, + "learning_rate": 1.4550425060034365e-06, + "loss": 0.7323688268661499, + "step": 3318 + }, + { + "epoch": 0.7647465437788018, + "grad_norm": 0.954006429800706, + "learning_rate": 1.4547032184247022e-06, + "loss": 0.8934407234191895, + "step": 3319 + }, + { + "epoch": 0.7649769585253456, + "grad_norm": 0.9830574702749578, + "learning_rate": 1.4543638648463975e-06, + "loss": 0.7729885578155518, + "step": 3320 + }, + { + "epoch": 0.7652073732718894, + "grad_norm": 0.9967355019103026, + "learning_rate": 1.454024445317779e-06, + "loss": 0.8962388038635254, + "step": 3321 + }, + { + "epoch": 0.7654377880184332, + "grad_norm": 0.8821073382766633, + "learning_rate": 1.4536849598881137e-06, + "loss": 0.8655213117599487, + "step": 3322 + }, + { + "epoch": 0.7656682027649769, + "grad_norm": 0.8780656658271131, + "learning_rate": 1.453345408606677e-06, + "loss": 0.6471779346466064, + "step": 3323 + }, + { + "epoch": 0.7658986175115208, + "grad_norm": 0.7335596828312507, + "learning_rate": 1.4530057915227545e-06, + "loss": 0.8665071129798889, + "step": 3324 + }, + { + "epoch": 0.7661290322580645, + "grad_norm": 1.054528188345679, + "learning_rate": 1.4526661086856407e-06, + "loss": 0.9504371285438538, + "step": 3325 + }, + { + "epoch": 0.7663594470046083, + "grad_norm": 1.017396914206461, + "learning_rate": 1.452326360144641e-06, + "loss": 0.8122013807296753, + "step": 3326 + }, + { + "epoch": 0.7665898617511521, + "grad_norm": 1.0019111601549837, + "learning_rate": 1.4519865459490687e-06, + "loss": 0.817001223564148, + "step": 3327 + }, + { + "epoch": 0.7668202764976959, + "grad_norm": 0.9387626004792055, + "learning_rate": 1.4516466661482474e-06, + "loss": 0.732322096824646, + "step": 3328 + }, + { + "epoch": 0.7670506912442396, + "grad_norm": 0.8844021324185192, + "learning_rate": 1.4513067207915106e-06, + "loss": 0.7961580157279968, + "step": 3329 + }, + { + "epoch": 0.7672811059907834, + "grad_norm": 0.9579783239612414, + "learning_rate": 1.4509667099282007e-06, + "loss": 0.7660717368125916, + "step": 3330 + }, + { + "epoch": 0.7675115207373272, + "grad_norm": 0.8487336367256668, + "learning_rate": 1.4506266336076698e-06, + "loss": 0.8279193639755249, + "step": 3331 + }, + { + "epoch": 0.7677419354838709, + "grad_norm": 0.8431407438554851, + "learning_rate": 1.4502864918792796e-06, + "loss": 0.7050153017044067, + "step": 3332 + }, + { + "epoch": 0.7679723502304148, + "grad_norm": 0.9386347952909049, + "learning_rate": 1.4499462847924013e-06, + "loss": 0.8146064877510071, + "step": 3333 + }, + { + "epoch": 0.7682027649769585, + "grad_norm": 0.8248232070769104, + "learning_rate": 1.4496060123964153e-06, + "loss": 0.8300814628601074, + "step": 3334 + }, + { + "epoch": 0.7684331797235023, + "grad_norm": 0.848400587593364, + "learning_rate": 1.4492656747407117e-06, + "loss": 0.8240403532981873, + "step": 3335 + }, + { + "epoch": 0.7686635944700461, + "grad_norm": 1.1661360506901004, + "learning_rate": 1.4489252718746908e-06, + "loss": 0.901625394821167, + "step": 3336 + }, + { + "epoch": 0.7688940092165899, + "grad_norm": 0.8620744709914054, + "learning_rate": 1.4485848038477604e-06, + "loss": 0.827139675617218, + "step": 3337 + }, + { + "epoch": 0.7691244239631336, + "grad_norm": 1.111541176491108, + "learning_rate": 1.4482442707093397e-06, + "loss": 0.7032946348190308, + "step": 3338 + }, + { + "epoch": 0.7693548387096775, + "grad_norm": 0.8506038004087974, + "learning_rate": 1.4479036725088564e-06, + "loss": 0.6805816888809204, + "step": 3339 + }, + { + "epoch": 0.7695852534562212, + "grad_norm": 0.8063208135295213, + "learning_rate": 1.447563009295748e-06, + "loss": 0.673591136932373, + "step": 3340 + }, + { + "epoch": 0.7698156682027649, + "grad_norm": 0.8116035277545482, + "learning_rate": 1.4472222811194614e-06, + "loss": 0.6513386964797974, + "step": 3341 + }, + { + "epoch": 0.7700460829493088, + "grad_norm": 0.7654089652768199, + "learning_rate": 1.4468814880294529e-06, + "loss": 0.7367297410964966, + "step": 3342 + }, + { + "epoch": 0.7702764976958525, + "grad_norm": 1.0405555538712603, + "learning_rate": 1.4465406300751878e-06, + "loss": 0.7393670082092285, + "step": 3343 + }, + { + "epoch": 0.7705069124423963, + "grad_norm": 0.7135144631405288, + "learning_rate": 1.4461997073061411e-06, + "loss": 0.7525930404663086, + "step": 3344 + }, + { + "epoch": 0.7707373271889401, + "grad_norm": 0.7583677101512988, + "learning_rate": 1.445858719771798e-06, + "loss": 0.6679942011833191, + "step": 3345 + }, + { + "epoch": 0.7709677419354839, + "grad_norm": 1.0903018310329022, + "learning_rate": 1.4455176675216518e-06, + "loss": 0.8440653085708618, + "step": 3346 + }, + { + "epoch": 0.7711981566820276, + "grad_norm": 0.9929368208299709, + "learning_rate": 1.4451765506052063e-06, + "loss": 0.8765773177146912, + "step": 3347 + }, + { + "epoch": 0.7714285714285715, + "grad_norm": 0.9183070258317377, + "learning_rate": 1.4448353690719732e-06, + "loss": 0.7309157848358154, + "step": 3348 + }, + { + "epoch": 0.7716589861751152, + "grad_norm": 0.8130162073408548, + "learning_rate": 1.4444941229714758e-06, + "loss": 0.8043340444564819, + "step": 3349 + }, + { + "epoch": 0.771889400921659, + "grad_norm": 0.8488386913998837, + "learning_rate": 1.4441528123532443e-06, + "loss": 0.6528831124305725, + "step": 3350 + }, + { + "epoch": 0.7721198156682028, + "grad_norm": 0.7632405080168834, + "learning_rate": 1.4438114372668202e-06, + "loss": 0.7973155975341797, + "step": 3351 + }, + { + "epoch": 0.7723502304147466, + "grad_norm": 0.8366450624031991, + "learning_rate": 1.443469997761754e-06, + "loss": 0.940142810344696, + "step": 3352 + }, + { + "epoch": 0.7725806451612903, + "grad_norm": 1.0048812991349738, + "learning_rate": 1.443128493887604e-06, + "loss": 0.7936829328536987, + "step": 3353 + }, + { + "epoch": 0.772811059907834, + "grad_norm": 0.8583665989338275, + "learning_rate": 1.44278692569394e-06, + "loss": 0.8369218111038208, + "step": 3354 + }, + { + "epoch": 0.7730414746543779, + "grad_norm": 1.313808566044562, + "learning_rate": 1.4424452932303398e-06, + "loss": 0.9305802583694458, + "step": 3355 + }, + { + "epoch": 0.7732718894009216, + "grad_norm": 0.8862565116465879, + "learning_rate": 1.4421035965463916e-06, + "loss": 0.913454532623291, + "step": 3356 + }, + { + "epoch": 0.7735023041474655, + "grad_norm": 1.0772806984700294, + "learning_rate": 1.4417618356916912e-06, + "loss": 0.8552114963531494, + "step": 3357 + }, + { + "epoch": 0.7737327188940092, + "grad_norm": 1.080720564237515, + "learning_rate": 1.4414200107158452e-06, + "loss": 0.8674488067626953, + "step": 3358 + }, + { + "epoch": 0.773963133640553, + "grad_norm": 1.0999604158561203, + "learning_rate": 1.441078121668469e-06, + "loss": 0.9142898321151733, + "step": 3359 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 1.0964749277789683, + "learning_rate": 1.4407361685991872e-06, + "loss": 0.8258639574050903, + "step": 3360 + }, + { + "epoch": 0.7744239631336406, + "grad_norm": 1.062716295700188, + "learning_rate": 1.4403941515576343e-06, + "loss": 0.773646354675293, + "step": 3361 + }, + { + "epoch": 0.7746543778801843, + "grad_norm": 1.1397221950146432, + "learning_rate": 1.440052070593453e-06, + "loss": 0.9481985569000244, + "step": 3362 + }, + { + "epoch": 0.7748847926267282, + "grad_norm": 1.0332478363266029, + "learning_rate": 1.4397099257562965e-06, + "loss": 0.7915977239608765, + "step": 3363 + }, + { + "epoch": 0.7751152073732719, + "grad_norm": 1.057946693927254, + "learning_rate": 1.4393677170958261e-06, + "loss": 0.887650191783905, + "step": 3364 + }, + { + "epoch": 0.7753456221198156, + "grad_norm": 0.8250912024788589, + "learning_rate": 1.4390254446617137e-06, + "loss": 0.8516546487808228, + "step": 3365 + }, + { + "epoch": 0.7755760368663595, + "grad_norm": 0.9895329351481195, + "learning_rate": 1.4386831085036386e-06, + "loss": 0.8076090812683105, + "step": 3366 + }, + { + "epoch": 0.7758064516129032, + "grad_norm": 0.9203902257484836, + "learning_rate": 1.4383407086712913e-06, + "loss": 0.7480059862136841, + "step": 3367 + }, + { + "epoch": 0.776036866359447, + "grad_norm": 1.1101542314671893, + "learning_rate": 1.4379982452143704e-06, + "loss": 0.8586190938949585, + "step": 3368 + }, + { + "epoch": 0.7762672811059907, + "grad_norm": 0.9197679868181698, + "learning_rate": 1.4376557181825842e-06, + "loss": 0.7581472396850586, + "step": 3369 + }, + { + "epoch": 0.7764976958525346, + "grad_norm": 1.2064630913320733, + "learning_rate": 1.4373131276256495e-06, + "loss": 0.7482568621635437, + "step": 3370 + }, + { + "epoch": 0.7767281105990783, + "grad_norm": 1.2204489088505164, + "learning_rate": 1.4369704735932935e-06, + "loss": 0.8822590112686157, + "step": 3371 + }, + { + "epoch": 0.7769585253456222, + "grad_norm": 0.9171528830764245, + "learning_rate": 1.4366277561352517e-06, + "loss": 0.7762279510498047, + "step": 3372 + }, + { + "epoch": 0.7771889400921659, + "grad_norm": 0.9649262790570658, + "learning_rate": 1.4362849753012692e-06, + "loss": 0.8059147596359253, + "step": 3373 + }, + { + "epoch": 0.7774193548387097, + "grad_norm": 1.0529652703364816, + "learning_rate": 1.4359421311411e-06, + "loss": 0.778538703918457, + "step": 3374 + }, + { + "epoch": 0.7776497695852534, + "grad_norm": 1.1587212424703164, + "learning_rate": 1.4355992237045077e-06, + "loss": 0.9422975778579712, + "step": 3375 + }, + { + "epoch": 0.7778801843317973, + "grad_norm": 1.0109308621512796, + "learning_rate": 1.4352562530412645e-06, + "loss": 0.7437118291854858, + "step": 3376 + }, + { + "epoch": 0.778110599078341, + "grad_norm": 0.8961203034935337, + "learning_rate": 1.4349132192011525e-06, + "loss": 0.6935930252075195, + "step": 3377 + }, + { + "epoch": 0.7783410138248847, + "grad_norm": 1.1629979064489353, + "learning_rate": 1.4345701222339628e-06, + "loss": 0.7797117829322815, + "step": 3378 + }, + { + "epoch": 0.7785714285714286, + "grad_norm": 1.0591342199366531, + "learning_rate": 1.434226962189495e-06, + "loss": 0.8795931339263916, + "step": 3379 + }, + { + "epoch": 0.7788018433179723, + "grad_norm": 1.071603440273884, + "learning_rate": 1.433883739117558e-06, + "loss": 0.8936992287635803, + "step": 3380 + }, + { + "epoch": 0.7790322580645161, + "grad_norm": 1.0412928095771106, + "learning_rate": 1.4335404530679708e-06, + "loss": 0.9142701625823975, + "step": 3381 + }, + { + "epoch": 0.7792626728110599, + "grad_norm": 1.0966643259622728, + "learning_rate": 1.4331971040905613e-06, + "loss": 0.8996907472610474, + "step": 3382 + }, + { + "epoch": 0.7794930875576037, + "grad_norm": 1.020250921022328, + "learning_rate": 1.4328536922351654e-06, + "loss": 0.9645330905914307, + "step": 3383 + }, + { + "epoch": 0.7797235023041474, + "grad_norm": 0.7173807290755059, + "learning_rate": 1.4325102175516289e-06, + "loss": 0.5122036933898926, + "step": 3384 + }, + { + "epoch": 0.7799539170506913, + "grad_norm": 0.8487864939918429, + "learning_rate": 1.432166680089807e-06, + "loss": 0.6556990742683411, + "step": 3385 + }, + { + "epoch": 0.780184331797235, + "grad_norm": 0.7980125905366343, + "learning_rate": 1.4318230798995634e-06, + "loss": 0.6642920970916748, + "step": 3386 + }, + { + "epoch": 0.7804147465437788, + "grad_norm": 1.1205844690065134, + "learning_rate": 1.4314794170307718e-06, + "loss": 0.9373915195465088, + "step": 3387 + }, + { + "epoch": 0.7806451612903226, + "grad_norm": 1.1583496011366634, + "learning_rate": 1.4311356915333139e-06, + "loss": 0.8295063972473145, + "step": 3388 + }, + { + "epoch": 0.7808755760368664, + "grad_norm": 1.0075666840710995, + "learning_rate": 1.4307919034570809e-06, + "loss": 0.8167035579681396, + "step": 3389 + }, + { + "epoch": 0.7811059907834101, + "grad_norm": 1.045465756545736, + "learning_rate": 1.4304480528519736e-06, + "loss": 0.8444087505340576, + "step": 3390 + }, + { + "epoch": 0.7813364055299539, + "grad_norm": 0.9731986846355507, + "learning_rate": 1.4301041397679012e-06, + "loss": 0.7753941416740417, + "step": 3391 + }, + { + "epoch": 0.7815668202764977, + "grad_norm": 1.0117493931274548, + "learning_rate": 1.4297601642547824e-06, + "loss": 0.7885915040969849, + "step": 3392 + }, + { + "epoch": 0.7817972350230414, + "grad_norm": 0.9902641403084854, + "learning_rate": 1.4294161263625444e-06, + "loss": 0.730733335018158, + "step": 3393 + }, + { + "epoch": 0.7820276497695853, + "grad_norm": 0.8781208509199174, + "learning_rate": 1.4290720261411241e-06, + "loss": 0.8505427837371826, + "step": 3394 + }, + { + "epoch": 0.782258064516129, + "grad_norm": 0.9435888376510791, + "learning_rate": 1.4287278636404676e-06, + "loss": 0.7370787858963013, + "step": 3395 + }, + { + "epoch": 0.7824884792626728, + "grad_norm": 0.8683550268652552, + "learning_rate": 1.428383638910529e-06, + "loss": 0.6776250600814819, + "step": 3396 + }, + { + "epoch": 0.7827188940092166, + "grad_norm": 1.158711583120319, + "learning_rate": 1.4280393520012726e-06, + "loss": 0.8878101706504822, + "step": 3397 + }, + { + "epoch": 0.7829493087557604, + "grad_norm": 1.0028929146104306, + "learning_rate": 1.427695002962671e-06, + "loss": 0.789238691329956, + "step": 3398 + }, + { + "epoch": 0.7831797235023041, + "grad_norm": 1.0382561381902518, + "learning_rate": 1.4273505918447052e-06, + "loss": 0.772524356842041, + "step": 3399 + }, + { + "epoch": 0.783410138248848, + "grad_norm": 0.8483839499127978, + "learning_rate": 1.4270061186973673e-06, + "loss": 0.682374119758606, + "step": 3400 + }, + { + "epoch": 0.7836405529953917, + "grad_norm": 0.9396222987314208, + "learning_rate": 1.4266615835706566e-06, + "loss": 0.874775767326355, + "step": 3401 + }, + { + "epoch": 0.7838709677419354, + "grad_norm": 1.3780294752863322, + "learning_rate": 1.4263169865145816e-06, + "loss": 0.9141736626625061, + "step": 3402 + }, + { + "epoch": 0.7841013824884793, + "grad_norm": 1.0849695477918648, + "learning_rate": 1.4259723275791603e-06, + "loss": 0.8533145189285278, + "step": 3403 + }, + { + "epoch": 0.784331797235023, + "grad_norm": 0.9340136683520418, + "learning_rate": 1.4256276068144198e-06, + "loss": 0.7920266389846802, + "step": 3404 + }, + { + "epoch": 0.7845622119815668, + "grad_norm": 0.9462841256440514, + "learning_rate": 1.4252828242703957e-06, + "loss": 0.7822731733322144, + "step": 3405 + }, + { + "epoch": 0.7847926267281106, + "grad_norm": 0.9890597976168253, + "learning_rate": 1.4249379799971324e-06, + "loss": 0.7103791832923889, + "step": 3406 + }, + { + "epoch": 0.7850230414746544, + "grad_norm": 1.0298833059227221, + "learning_rate": 1.4245930740446841e-06, + "loss": 0.7857639789581299, + "step": 3407 + }, + { + "epoch": 0.7852534562211981, + "grad_norm": 1.1065594183312877, + "learning_rate": 1.4242481064631134e-06, + "loss": 0.8069730997085571, + "step": 3408 + }, + { + "epoch": 0.785483870967742, + "grad_norm": 1.0472042802008708, + "learning_rate": 1.4239030773024912e-06, + "loss": 0.8758031129837036, + "step": 3409 + }, + { + "epoch": 0.7857142857142857, + "grad_norm": 1.015785019886056, + "learning_rate": 1.4235579866128983e-06, + "loss": 0.895712673664093, + "step": 3410 + }, + { + "epoch": 0.7859447004608295, + "grad_norm": 0.9442660407745113, + "learning_rate": 1.423212834444425e-06, + "loss": 0.7904561758041382, + "step": 3411 + }, + { + "epoch": 0.7861751152073733, + "grad_norm": 1.0957623852355893, + "learning_rate": 1.4228676208471685e-06, + "loss": 0.9322203993797302, + "step": 3412 + }, + { + "epoch": 0.7864055299539171, + "grad_norm": 0.7668753687506044, + "learning_rate": 1.422522345871237e-06, + "loss": 0.9693628549575806, + "step": 3413 + }, + { + "epoch": 0.7866359447004608, + "grad_norm": 0.8417164970136307, + "learning_rate": 1.4221770095667462e-06, + "loss": 0.6737014651298523, + "step": 3414 + }, + { + "epoch": 0.7868663594470046, + "grad_norm": 1.1466654292657967, + "learning_rate": 1.4218316119838215e-06, + "loss": 0.8682050108909607, + "step": 3415 + }, + { + "epoch": 0.7870967741935484, + "grad_norm": 1.058324160083765, + "learning_rate": 1.4214861531725966e-06, + "loss": 0.7920347452163696, + "step": 3416 + }, + { + "epoch": 0.7873271889400921, + "grad_norm": 1.0147867893383273, + "learning_rate": 1.4211406331832144e-06, + "loss": 0.8330510854721069, + "step": 3417 + }, + { + "epoch": 0.787557603686636, + "grad_norm": 0.8802491842183522, + "learning_rate": 1.4207950520658272e-06, + "loss": 0.8314074873924255, + "step": 3418 + }, + { + "epoch": 0.7877880184331797, + "grad_norm": 1.069355954495663, + "learning_rate": 1.420449409870595e-06, + "loss": 0.7045331001281738, + "step": 3419 + }, + { + "epoch": 0.7880184331797235, + "grad_norm": 0.9484390721895568, + "learning_rate": 1.4201037066476876e-06, + "loss": 0.7825411558151245, + "step": 3420 + }, + { + "epoch": 0.7882488479262673, + "grad_norm": 0.86611108370867, + "learning_rate": 1.4197579424472834e-06, + "loss": 0.6960075497627258, + "step": 3421 + }, + { + "epoch": 0.7884792626728111, + "grad_norm": 1.038692849963906, + "learning_rate": 1.4194121173195694e-06, + "loss": 0.8366748094558716, + "step": 3422 + }, + { + "epoch": 0.7887096774193548, + "grad_norm": 0.8605441828045868, + "learning_rate": 1.4190662313147419e-06, + "loss": 0.8859039545059204, + "step": 3423 + }, + { + "epoch": 0.7889400921658987, + "grad_norm": 1.0572382908005622, + "learning_rate": 1.4187202844830057e-06, + "loss": 0.7098245620727539, + "step": 3424 + }, + { + "epoch": 0.7891705069124424, + "grad_norm": 0.9126448008384304, + "learning_rate": 1.4183742768745743e-06, + "loss": 0.7410455942153931, + "step": 3425 + }, + { + "epoch": 0.7894009216589861, + "grad_norm": 0.8007200450015498, + "learning_rate": 1.4180282085396706e-06, + "loss": 0.7414010763168335, + "step": 3426 + }, + { + "epoch": 0.78963133640553, + "grad_norm": 1.090062212374054, + "learning_rate": 1.417682079528526e-06, + "loss": 0.9043526649475098, + "step": 3427 + }, + { + "epoch": 0.7898617511520737, + "grad_norm": 0.8510201071166715, + "learning_rate": 1.4173358898913804e-06, + "loss": 0.7709499597549438, + "step": 3428 + }, + { + "epoch": 0.7900921658986175, + "grad_norm": 1.0829385459770577, + "learning_rate": 1.416989639678483e-06, + "loss": 0.7499940395355225, + "step": 3429 + }, + { + "epoch": 0.7903225806451613, + "grad_norm": 0.766744185733082, + "learning_rate": 1.4166433289400911e-06, + "loss": 0.7401680946350098, + "step": 3430 + }, + { + "epoch": 0.7905529953917051, + "grad_norm": 0.8802012939982503, + "learning_rate": 1.4162969577264718e-06, + "loss": 1.0132567882537842, + "step": 3431 + }, + { + "epoch": 0.7907834101382488, + "grad_norm": 0.9758763490715631, + "learning_rate": 1.4159505260879004e-06, + "loss": 0.8438389301300049, + "step": 3432 + }, + { + "epoch": 0.7910138248847927, + "grad_norm": 1.2075583274029744, + "learning_rate": 1.4156040340746603e-06, + "loss": 0.9149703979492188, + "step": 3433 + }, + { + "epoch": 0.7912442396313364, + "grad_norm": 1.4960555955584764, + "learning_rate": 1.4152574817370451e-06, + "loss": 0.9141047596931458, + "step": 3434 + }, + { + "epoch": 0.7914746543778802, + "grad_norm": 0.924125511762228, + "learning_rate": 1.414910869125356e-06, + "loss": 0.6896570324897766, + "step": 3435 + }, + { + "epoch": 0.791705069124424, + "grad_norm": 0.9277571830040596, + "learning_rate": 1.4145641962899035e-06, + "loss": 0.742916464805603, + "step": 3436 + }, + { + "epoch": 0.7919354838709678, + "grad_norm": 1.0041274553911197, + "learning_rate": 1.414217463281007e-06, + "loss": 0.9315029382705688, + "step": 3437 + }, + { + "epoch": 0.7921658986175115, + "grad_norm": 0.9532695013501692, + "learning_rate": 1.4138706701489942e-06, + "loss": 0.7645175457000732, + "step": 3438 + }, + { + "epoch": 0.7923963133640552, + "grad_norm": 1.0166687927137474, + "learning_rate": 1.413523816944201e-06, + "loss": 0.8253934383392334, + "step": 3439 + }, + { + "epoch": 0.7926267281105991, + "grad_norm": 1.055807296618818, + "learning_rate": 1.4131769037169736e-06, + "loss": 0.8650136590003967, + "step": 3440 + }, + { + "epoch": 0.7928571428571428, + "grad_norm": 1.0239985264965783, + "learning_rate": 1.4128299305176654e-06, + "loss": 0.7453975677490234, + "step": 3441 + }, + { + "epoch": 0.7930875576036867, + "grad_norm": 1.1689392671270256, + "learning_rate": 1.4124828973966392e-06, + "loss": 0.9121813774108887, + "step": 3442 + }, + { + "epoch": 0.7933179723502304, + "grad_norm": 1.16007005259146, + "learning_rate": 1.4121358044042667e-06, + "loss": 0.9097952842712402, + "step": 3443 + }, + { + "epoch": 0.7935483870967742, + "grad_norm": 0.9263687778783555, + "learning_rate": 1.4117886515909277e-06, + "loss": 0.7185770273208618, + "step": 3444 + }, + { + "epoch": 0.793778801843318, + "grad_norm": 0.9816189958888628, + "learning_rate": 1.4114414390070111e-06, + "loss": 0.8192715644836426, + "step": 3445 + }, + { + "epoch": 0.7940092165898618, + "grad_norm": 0.8830372557771754, + "learning_rate": 1.4110941667029143e-06, + "loss": 0.7864251136779785, + "step": 3446 + }, + { + "epoch": 0.7942396313364055, + "grad_norm": 0.9262266668392852, + "learning_rate": 1.4107468347290431e-06, + "loss": 0.7433357834815979, + "step": 3447 + }, + { + "epoch": 0.7944700460829494, + "grad_norm": 0.8826486406616629, + "learning_rate": 1.4103994431358133e-06, + "loss": 0.8196350336074829, + "step": 3448 + }, + { + "epoch": 0.7947004608294931, + "grad_norm": 1.0379031741076927, + "learning_rate": 1.410051991973647e-06, + "loss": 0.7698987126350403, + "step": 3449 + }, + { + "epoch": 0.7949308755760369, + "grad_norm": 1.228700210939763, + "learning_rate": 1.4097044812929776e-06, + "loss": 0.9404128789901733, + "step": 3450 + }, + { + "epoch": 0.7951612903225806, + "grad_norm": 0.9114628140508482, + "learning_rate": 1.4093569111442443e-06, + "loss": 0.827290952205658, + "step": 3451 + }, + { + "epoch": 0.7953917050691244, + "grad_norm": 1.0612294009838623, + "learning_rate": 1.4090092815778976e-06, + "loss": 0.8126389384269714, + "step": 3452 + }, + { + "epoch": 0.7956221198156682, + "grad_norm": 0.9598694992596972, + "learning_rate": 1.4086615926443953e-06, + "loss": 0.7439650297164917, + "step": 3453 + }, + { + "epoch": 0.7958525345622119, + "grad_norm": 0.9952168701899716, + "learning_rate": 1.4083138443942036e-06, + "loss": 0.7505590915679932, + "step": 3454 + }, + { + "epoch": 0.7960829493087558, + "grad_norm": 0.8299073365871691, + "learning_rate": 1.407966036877798e-06, + "loss": 0.7070168256759644, + "step": 3455 + }, + { + "epoch": 0.7963133640552995, + "grad_norm": 0.9422601313607071, + "learning_rate": 1.4076181701456623e-06, + "loss": 0.8271987438201904, + "step": 3456 + }, + { + "epoch": 0.7965437788018433, + "grad_norm": 0.8558890366072001, + "learning_rate": 1.4072702442482886e-06, + "loss": 0.72886061668396, + "step": 3457 + }, + { + "epoch": 0.7967741935483871, + "grad_norm": 1.1355616522222822, + "learning_rate": 1.4069222592361784e-06, + "loss": 0.838603138923645, + "step": 3458 + }, + { + "epoch": 0.7970046082949309, + "grad_norm": 1.1314183210174298, + "learning_rate": 1.4065742151598408e-06, + "loss": 0.9829634428024292, + "step": 3459 + }, + { + "epoch": 0.7972350230414746, + "grad_norm": 1.0528251173572156, + "learning_rate": 1.406226112069794e-06, + "loss": 0.8269632458686829, + "step": 3460 + }, + { + "epoch": 0.7974654377880185, + "grad_norm": 1.0290510208624037, + "learning_rate": 1.405877950016565e-06, + "loss": 0.7234654426574707, + "step": 3461 + }, + { + "epoch": 0.7976958525345622, + "grad_norm": 0.89079385428478, + "learning_rate": 1.4055297290506887e-06, + "loss": 0.7843908071517944, + "step": 3462 + }, + { + "epoch": 0.7979262672811059, + "grad_norm": 0.8247890912721374, + "learning_rate": 1.4051814492227094e-06, + "loss": 0.7294371128082275, + "step": 3463 + }, + { + "epoch": 0.7981566820276498, + "grad_norm": 1.1727486785997119, + "learning_rate": 1.4048331105831787e-06, + "loss": 0.8805780410766602, + "step": 3464 + }, + { + "epoch": 0.7983870967741935, + "grad_norm": 0.9922079942807702, + "learning_rate": 1.404484713182658e-06, + "loss": 0.6933708190917969, + "step": 3465 + }, + { + "epoch": 0.7986175115207373, + "grad_norm": 1.0638183747733119, + "learning_rate": 1.404136257071717e-06, + "loss": 0.8720458745956421, + "step": 3466 + }, + { + "epoch": 0.7988479262672811, + "grad_norm": 1.1404138575251217, + "learning_rate": 1.403787742300933e-06, + "loss": 0.7675988674163818, + "step": 3467 + }, + { + "epoch": 0.7990783410138249, + "grad_norm": 1.0188982193786602, + "learning_rate": 1.403439168920893e-06, + "loss": 0.7630051374435425, + "step": 3468 + }, + { + "epoch": 0.7993087557603686, + "grad_norm": 0.9607713149142998, + "learning_rate": 1.4030905369821914e-06, + "loss": 0.9195173978805542, + "step": 3469 + }, + { + "epoch": 0.7995391705069125, + "grad_norm": 0.966603725031027, + "learning_rate": 1.402741846535432e-06, + "loss": 0.9347431659698486, + "step": 3470 + }, + { + "epoch": 0.7997695852534562, + "grad_norm": 1.0423944793385256, + "learning_rate": 1.4023930976312271e-06, + "loss": 0.7812551259994507, + "step": 3471 + }, + { + "epoch": 0.8, + "grad_norm": 1.0230073164776583, + "learning_rate": 1.4020442903201963e-06, + "loss": 0.7655330896377563, + "step": 3472 + }, + { + "epoch": 0.8002304147465438, + "grad_norm": 1.2791975931288466, + "learning_rate": 1.4016954246529694e-06, + "loss": 0.7543904185295105, + "step": 3473 + }, + { + "epoch": 0.8004608294930876, + "grad_norm": 0.8246426244987128, + "learning_rate": 1.4013465006801833e-06, + "loss": 0.9343980550765991, + "step": 3474 + }, + { + "epoch": 0.8006912442396313, + "grad_norm": 1.1458439395589735, + "learning_rate": 1.4009975184524838e-06, + "loss": 0.7366182208061218, + "step": 3475 + }, + { + "epoch": 0.8009216589861751, + "grad_norm": 1.0109168818205314, + "learning_rate": 1.4006484780205254e-06, + "loss": 0.7028899192810059, + "step": 3476 + }, + { + "epoch": 0.8011520737327189, + "grad_norm": 1.1092959183189253, + "learning_rate": 1.4002993794349708e-06, + "loss": 0.9259153604507446, + "step": 3477 + }, + { + "epoch": 0.8013824884792626, + "grad_norm": 1.091442085001374, + "learning_rate": 1.3999502227464914e-06, + "loss": 0.7263842225074768, + "step": 3478 + }, + { + "epoch": 0.8016129032258065, + "grad_norm": 0.9964781390280828, + "learning_rate": 1.3996010080057664e-06, + "loss": 0.8177748918533325, + "step": 3479 + }, + { + "epoch": 0.8018433179723502, + "grad_norm": 1.080145531043834, + "learning_rate": 1.3992517352634842e-06, + "loss": 0.8526895046234131, + "step": 3480 + }, + { + "epoch": 0.802073732718894, + "grad_norm": 1.031018616296166, + "learning_rate": 1.398902404570341e-06, + "loss": 0.7914575338363647, + "step": 3481 + }, + { + "epoch": 0.8023041474654378, + "grad_norm": 0.816157508913072, + "learning_rate": 1.398553015977042e-06, + "loss": 0.7546013593673706, + "step": 3482 + }, + { + "epoch": 0.8025345622119816, + "grad_norm": 1.0408293581677805, + "learning_rate": 1.3982035695343005e-06, + "loss": 0.7250038385391235, + "step": 3483 + }, + { + "epoch": 0.8027649769585253, + "grad_norm": 1.023275477136697, + "learning_rate": 1.3978540652928376e-06, + "loss": 0.8650141954421997, + "step": 3484 + }, + { + "epoch": 0.8029953917050692, + "grad_norm": 0.9633891302798026, + "learning_rate": 1.3975045033033838e-06, + "loss": 0.8020066022872925, + "step": 3485 + }, + { + "epoch": 0.8032258064516129, + "grad_norm": 0.9146174916063312, + "learning_rate": 1.3971548836166782e-06, + "loss": 0.7376772165298462, + "step": 3486 + }, + { + "epoch": 0.8034562211981566, + "grad_norm": 0.9278800283054291, + "learning_rate": 1.3968052062834665e-06, + "loss": 0.8440769910812378, + "step": 3487 + }, + { + "epoch": 0.8036866359447005, + "grad_norm": 0.8964312010034259, + "learning_rate": 1.3964554713545047e-06, + "loss": 0.7886836528778076, + "step": 3488 + }, + { + "epoch": 0.8039170506912442, + "grad_norm": 0.9177920963823754, + "learning_rate": 1.396105678880556e-06, + "loss": 0.9167575836181641, + "step": 3489 + }, + { + "epoch": 0.804147465437788, + "grad_norm": 0.8367032180339474, + "learning_rate": 1.3957558289123922e-06, + "loss": 0.6761677861213684, + "step": 3490 + }, + { + "epoch": 0.8043778801843318, + "grad_norm": 0.9716984065235628, + "learning_rate": 1.3954059215007938e-06, + "loss": 0.7775592803955078, + "step": 3491 + }, + { + "epoch": 0.8046082949308756, + "grad_norm": 1.00005526663364, + "learning_rate": 1.3950559566965494e-06, + "loss": 0.8127217292785645, + "step": 3492 + }, + { + "epoch": 0.8048387096774193, + "grad_norm": 1.007116682040637, + "learning_rate": 1.394705934550456e-06, + "loss": 0.8134229779243469, + "step": 3493 + }, + { + "epoch": 0.8050691244239632, + "grad_norm": 1.3224030787110577, + "learning_rate": 1.3943558551133186e-06, + "loss": 0.8853167295455933, + "step": 3494 + }, + { + "epoch": 0.8052995391705069, + "grad_norm": 1.0544152264027669, + "learning_rate": 1.3940057184359506e-06, + "loss": 0.8024332523345947, + "step": 3495 + }, + { + "epoch": 0.8055299539170507, + "grad_norm": 0.6779010833647611, + "learning_rate": 1.3936555245691745e-06, + "loss": 0.7581099271774292, + "step": 3496 + }, + { + "epoch": 0.8057603686635945, + "grad_norm": 1.0509729333579008, + "learning_rate": 1.3933052735638203e-06, + "loss": 0.979412317276001, + "step": 3497 + }, + { + "epoch": 0.8059907834101383, + "grad_norm": 0.9816833973848147, + "learning_rate": 1.392954965470726e-06, + "loss": 0.7917830944061279, + "step": 3498 + }, + { + "epoch": 0.806221198156682, + "grad_norm": 0.9622725908619084, + "learning_rate": 1.392604600340739e-06, + "loss": 0.8565326929092407, + "step": 3499 + }, + { + "epoch": 0.8064516129032258, + "grad_norm": 1.0170451339424116, + "learning_rate": 1.3922541782247136e-06, + "loss": 0.7276358604431152, + "step": 3500 + }, + { + "epoch": 0.8066820276497696, + "grad_norm": 0.8351645839157906, + "learning_rate": 1.3919036991735138e-06, + "loss": 0.734528660774231, + "step": 3501 + }, + { + "epoch": 0.8069124423963133, + "grad_norm": 1.1746648423168138, + "learning_rate": 1.391553163238011e-06, + "loss": 0.8786039352416992, + "step": 3502 + }, + { + "epoch": 0.8071428571428572, + "grad_norm": 1.1050955424788658, + "learning_rate": 1.3912025704690844e-06, + "loss": 0.9509482383728027, + "step": 3503 + }, + { + "epoch": 0.8073732718894009, + "grad_norm": 0.8741751886687131, + "learning_rate": 1.3908519209176225e-06, + "loss": 0.7188615202903748, + "step": 3504 + }, + { + "epoch": 0.8076036866359447, + "grad_norm": 1.0307846021250762, + "learning_rate": 1.3905012146345221e-06, + "loss": 0.7681115865707397, + "step": 3505 + }, + { + "epoch": 0.8078341013824885, + "grad_norm": 1.0988034793572021, + "learning_rate": 1.3901504516706874e-06, + "loss": 0.8835415840148926, + "step": 3506 + }, + { + "epoch": 0.8080645161290323, + "grad_norm": 1.0724177836810997, + "learning_rate": 1.389799632077031e-06, + "loss": 0.8179003000259399, + "step": 3507 + }, + { + "epoch": 0.808294930875576, + "grad_norm": 1.1244187286361234, + "learning_rate": 1.3894487559044742e-06, + "loss": 0.9690247774124146, + "step": 3508 + }, + { + "epoch": 0.8085253456221199, + "grad_norm": 0.9601740737567672, + "learning_rate": 1.389097823203946e-06, + "loss": 0.9759812951087952, + "step": 3509 + }, + { + "epoch": 0.8087557603686636, + "grad_norm": 0.8953376224758026, + "learning_rate": 1.3887468340263838e-06, + "loss": 0.6649112105369568, + "step": 3510 + }, + { + "epoch": 0.8089861751152074, + "grad_norm": 0.8803647716437188, + "learning_rate": 1.388395788422733e-06, + "loss": 0.7824583053588867, + "step": 3511 + }, + { + "epoch": 0.8092165898617512, + "grad_norm": 1.0776551292843717, + "learning_rate": 1.3880446864439482e-06, + "loss": 0.8226176500320435, + "step": 3512 + }, + { + "epoch": 0.8094470046082949, + "grad_norm": 1.0775758718001336, + "learning_rate": 1.3876935281409904e-06, + "loss": 0.7708876729011536, + "step": 3513 + }, + { + "epoch": 0.8096774193548387, + "grad_norm": 1.1275141981575327, + "learning_rate": 1.3873423135648303e-06, + "loss": 0.7162825465202332, + "step": 3514 + }, + { + "epoch": 0.8099078341013825, + "grad_norm": 1.1973823780619761, + "learning_rate": 1.3869910427664464e-06, + "loss": 0.815816342830658, + "step": 3515 + }, + { + "epoch": 0.8101382488479263, + "grad_norm": 1.0491570029475803, + "learning_rate": 1.3866397157968248e-06, + "loss": 0.9166251420974731, + "step": 3516 + }, + { + "epoch": 0.81036866359447, + "grad_norm": 1.185963303947227, + "learning_rate": 1.3862883327069606e-06, + "loss": 0.9193897843360901, + "step": 3517 + }, + { + "epoch": 0.8105990783410139, + "grad_norm": 1.1492579516601074, + "learning_rate": 1.3859368935478557e-06, + "loss": 0.9019489288330078, + "step": 3518 + }, + { + "epoch": 0.8108294930875576, + "grad_norm": 1.0706438739080621, + "learning_rate": 1.3855853983705222e-06, + "loss": 0.8616153597831726, + "step": 3519 + }, + { + "epoch": 0.8110599078341014, + "grad_norm": 0.9368530229676858, + "learning_rate": 1.3852338472259782e-06, + "loss": 0.8898462057113647, + "step": 3520 + }, + { + "epoch": 0.8112903225806452, + "grad_norm": 0.9891797921278073, + "learning_rate": 1.3848822401652513e-06, + "loss": 0.770263135433197, + "step": 3521 + }, + { + "epoch": 0.811520737327189, + "grad_norm": 0.950594228231774, + "learning_rate": 1.384530577239377e-06, + "loss": 0.7524563074111938, + "step": 3522 + }, + { + "epoch": 0.8117511520737327, + "grad_norm": 0.8975349550091929, + "learning_rate": 1.3841788584993981e-06, + "loss": 0.776715874671936, + "step": 3523 + }, + { + "epoch": 0.8119815668202764, + "grad_norm": 0.6412822466784485, + "learning_rate": 1.3838270839963666e-06, + "loss": 0.7165439128875732, + "step": 3524 + }, + { + "epoch": 0.8122119815668203, + "grad_norm": 1.0082147827954213, + "learning_rate": 1.383475253781342e-06, + "loss": 0.7641004323959351, + "step": 3525 + }, + { + "epoch": 0.812442396313364, + "grad_norm": 0.9278762834298543, + "learning_rate": 1.3831233679053921e-06, + "loss": 0.7493933439254761, + "step": 3526 + }, + { + "epoch": 0.8126728110599079, + "grad_norm": 1.1064599998463516, + "learning_rate": 1.3827714264195924e-06, + "loss": 0.7981607913970947, + "step": 3527 + }, + { + "epoch": 0.8129032258064516, + "grad_norm": 1.2555949352929368, + "learning_rate": 1.3824194293750272e-06, + "loss": 0.9130103588104248, + "step": 3528 + }, + { + "epoch": 0.8131336405529954, + "grad_norm": 1.0192840808161379, + "learning_rate": 1.3820673768227878e-06, + "loss": 0.7208644151687622, + "step": 3529 + }, + { + "epoch": 0.8133640552995391, + "grad_norm": 0.9880323858602741, + "learning_rate": 1.3817152688139745e-06, + "loss": 0.9134006500244141, + "step": 3530 + }, + { + "epoch": 0.813594470046083, + "grad_norm": 0.836575472485664, + "learning_rate": 1.381363105399695e-06, + "loss": 0.7383376359939575, + "step": 3531 + }, + { + "epoch": 0.8138248847926267, + "grad_norm": 1.4743208995655537, + "learning_rate": 1.381010886631066e-06, + "loss": 0.9143035411834717, + "step": 3532 + }, + { + "epoch": 0.8140552995391706, + "grad_norm": 0.8030889519622723, + "learning_rate": 1.3806586125592107e-06, + "loss": 0.7972506284713745, + "step": 3533 + }, + { + "epoch": 0.8142857142857143, + "grad_norm": 0.9706054308316248, + "learning_rate": 1.380306283235262e-06, + "loss": 0.8999859094619751, + "step": 3534 + }, + { + "epoch": 0.8145161290322581, + "grad_norm": 1.4136312048518, + "learning_rate": 1.37995389871036e-06, + "loss": 0.7759672999382019, + "step": 3535 + }, + { + "epoch": 0.8147465437788018, + "grad_norm": 0.8852561621502252, + "learning_rate": 1.3796014590356522e-06, + "loss": 0.7915023565292358, + "step": 3536 + }, + { + "epoch": 0.8149769585253456, + "grad_norm": 1.0626460640648143, + "learning_rate": 1.3792489642622956e-06, + "loss": 0.8259623050689697, + "step": 3537 + }, + { + "epoch": 0.8152073732718894, + "grad_norm": 0.9193643373115533, + "learning_rate": 1.3788964144414534e-06, + "loss": 0.7786526679992676, + "step": 3538 + }, + { + "epoch": 0.8154377880184331, + "grad_norm": 0.8743120056652736, + "learning_rate": 1.3785438096242987e-06, + "loss": 0.8655314445495605, + "step": 3539 + }, + { + "epoch": 0.815668202764977, + "grad_norm": 1.073925215345039, + "learning_rate": 1.3781911498620108e-06, + "loss": 0.8116016387939453, + "step": 3540 + }, + { + "epoch": 0.8158986175115207, + "grad_norm": 1.07781870851745, + "learning_rate": 1.3778384352057781e-06, + "loss": 0.712907075881958, + "step": 3541 + }, + { + "epoch": 0.8161290322580645, + "grad_norm": 0.9419481549244654, + "learning_rate": 1.377485665706797e-06, + "loss": 0.8271318674087524, + "step": 3542 + }, + { + "epoch": 0.8163594470046083, + "grad_norm": 1.231349694992367, + "learning_rate": 1.3771328414162713e-06, + "loss": 0.9161353707313538, + "step": 3543 + }, + { + "epoch": 0.8165898617511521, + "grad_norm": 1.1900246832578463, + "learning_rate": 1.3767799623854125e-06, + "loss": 0.9555908441543579, + "step": 3544 + }, + { + "epoch": 0.8168202764976958, + "grad_norm": 0.9121338000164769, + "learning_rate": 1.3764270286654414e-06, + "loss": 0.7863249778747559, + "step": 3545 + }, + { + "epoch": 0.8170506912442397, + "grad_norm": 1.0362996056258458, + "learning_rate": 1.3760740403075853e-06, + "loss": 0.9086883068084717, + "step": 3546 + }, + { + "epoch": 0.8172811059907834, + "grad_norm": 0.9211768991499883, + "learning_rate": 1.37572099736308e-06, + "loss": 0.6231412887573242, + "step": 3547 + }, + { + "epoch": 0.8175115207373271, + "grad_norm": 0.94903309328564, + "learning_rate": 1.3753678998831692e-06, + "loss": 0.8221716284751892, + "step": 3548 + }, + { + "epoch": 0.817741935483871, + "grad_norm": 1.0641797094094223, + "learning_rate": 1.375014747919105e-06, + "loss": 0.8077783584594727, + "step": 3549 + }, + { + "epoch": 0.8179723502304147, + "grad_norm": 1.0675643850007648, + "learning_rate": 1.3746615415221463e-06, + "loss": 0.6882060766220093, + "step": 3550 + }, + { + "epoch": 0.8182027649769585, + "grad_norm": 0.8393670588117293, + "learning_rate": 1.3743082807435614e-06, + "loss": 0.700161337852478, + "step": 3551 + }, + { + "epoch": 0.8184331797235023, + "grad_norm": 0.8856084645963668, + "learning_rate": 1.3739549656346243e-06, + "loss": 0.737981915473938, + "step": 3552 + }, + { + "epoch": 0.8186635944700461, + "grad_norm": 0.8562104816360829, + "learning_rate": 1.3736015962466193e-06, + "loss": 0.8025717735290527, + "step": 3553 + }, + { + "epoch": 0.8188940092165898, + "grad_norm": 1.1233745076434911, + "learning_rate": 1.3732481726308372e-06, + "loss": 0.8855722546577454, + "step": 3554 + }, + { + "epoch": 0.8191244239631337, + "grad_norm": 1.2861487220187957, + "learning_rate": 1.3728946948385768e-06, + "loss": 0.819130539894104, + "step": 3555 + }, + { + "epoch": 0.8193548387096774, + "grad_norm": 1.086213399760416, + "learning_rate": 1.3725411629211454e-06, + "loss": 0.8419625759124756, + "step": 3556 + }, + { + "epoch": 0.8195852534562212, + "grad_norm": 0.8659477904111433, + "learning_rate": 1.3721875769298575e-06, + "loss": 0.8478890657424927, + "step": 3557 + }, + { + "epoch": 0.819815668202765, + "grad_norm": 0.9446742102947047, + "learning_rate": 1.371833936916035e-06, + "loss": 0.8654077053070068, + "step": 3558 + }, + { + "epoch": 0.8200460829493088, + "grad_norm": 1.132873117876266, + "learning_rate": 1.371480242931009e-06, + "loss": 0.8898686170578003, + "step": 3559 + }, + { + "epoch": 0.8202764976958525, + "grad_norm": 1.0419861877874252, + "learning_rate": 1.3711264950261176e-06, + "loss": 0.873773455619812, + "step": 3560 + }, + { + "epoch": 0.8205069124423963, + "grad_norm": 0.8068261635969198, + "learning_rate": 1.3707726932527068e-06, + "loss": 0.6323572397232056, + "step": 3561 + }, + { + "epoch": 0.8207373271889401, + "grad_norm": 1.1038849604905803, + "learning_rate": 1.3704188376621304e-06, + "loss": 0.7018281817436218, + "step": 3562 + }, + { + "epoch": 0.8209677419354838, + "grad_norm": 1.084497532058705, + "learning_rate": 1.37006492830575e-06, + "loss": 0.8052775859832764, + "step": 3563 + }, + { + "epoch": 0.8211981566820277, + "grad_norm": 1.0795040103988192, + "learning_rate": 1.3697109652349352e-06, + "loss": 0.8057233095169067, + "step": 3564 + }, + { + "epoch": 0.8214285714285714, + "grad_norm": 1.1240440402053398, + "learning_rate": 1.3693569485010633e-06, + "loss": 0.8647899627685547, + "step": 3565 + }, + { + "epoch": 0.8216589861751152, + "grad_norm": 0.9167509343069911, + "learning_rate": 1.369002878155519e-06, + "loss": 0.8022265434265137, + "step": 3566 + }, + { + "epoch": 0.821889400921659, + "grad_norm": 1.0569217144551386, + "learning_rate": 1.368648754249696e-06, + "loss": 0.8534140586853027, + "step": 3567 + }, + { + "epoch": 0.8221198156682028, + "grad_norm": 1.1336199597215886, + "learning_rate": 1.3682945768349935e-06, + "loss": 0.905183732509613, + "step": 3568 + }, + { + "epoch": 0.8223502304147465, + "grad_norm": 1.0114816874699049, + "learning_rate": 1.3679403459628215e-06, + "loss": 0.6096831560134888, + "step": 3569 + }, + { + "epoch": 0.8225806451612904, + "grad_norm": 1.0433167842442863, + "learning_rate": 1.367586061684595e-06, + "loss": 0.7220188975334167, + "step": 3570 + }, + { + "epoch": 0.8228110599078341, + "grad_norm": 1.2434665139770538, + "learning_rate": 1.3672317240517386e-06, + "loss": 0.8028903007507324, + "step": 3571 + }, + { + "epoch": 0.8230414746543778, + "grad_norm": 0.8999816334081224, + "learning_rate": 1.3668773331156831e-06, + "loss": 0.8121141791343689, + "step": 3572 + }, + { + "epoch": 0.8232718894009217, + "grad_norm": 0.9985064007808814, + "learning_rate": 1.3665228889278687e-06, + "loss": 0.8259282112121582, + "step": 3573 + }, + { + "epoch": 0.8235023041474654, + "grad_norm": 1.0492496227314838, + "learning_rate": 1.3661683915397423e-06, + "loss": 0.9356029033660889, + "step": 3574 + }, + { + "epoch": 0.8237327188940092, + "grad_norm": 0.9103215470779688, + "learning_rate": 1.3658138410027582e-06, + "loss": 0.738788366317749, + "step": 3575 + }, + { + "epoch": 0.823963133640553, + "grad_norm": 0.9813034370683628, + "learning_rate": 1.3654592373683794e-06, + "loss": 0.7775605320930481, + "step": 3576 + }, + { + "epoch": 0.8241935483870968, + "grad_norm": 1.0650813981062164, + "learning_rate": 1.3651045806880766e-06, + "loss": 0.7645376324653625, + "step": 3577 + }, + { + "epoch": 0.8244239631336405, + "grad_norm": 0.9731809944135928, + "learning_rate": 1.3647498710133272e-06, + "loss": 0.7713958024978638, + "step": 3578 + }, + { + "epoch": 0.8246543778801844, + "grad_norm": 1.148498187573576, + "learning_rate": 1.3643951083956165e-06, + "loss": 0.6920947432518005, + "step": 3579 + }, + { + "epoch": 0.8248847926267281, + "grad_norm": 0.8263814798727009, + "learning_rate": 1.3640402928864382e-06, + "loss": 0.7108405828475952, + "step": 3580 + }, + { + "epoch": 0.8251152073732719, + "grad_norm": 1.0141959867722847, + "learning_rate": 1.3636854245372936e-06, + "loss": 0.7879295945167542, + "step": 3581 + }, + { + "epoch": 0.8253456221198157, + "grad_norm": 0.8796188222287911, + "learning_rate": 1.3633305033996909e-06, + "loss": 0.8173119425773621, + "step": 3582 + }, + { + "epoch": 0.8255760368663595, + "grad_norm": 1.230625652029921, + "learning_rate": 1.3629755295251466e-06, + "loss": 0.8530454635620117, + "step": 3583 + }, + { + "epoch": 0.8258064516129032, + "grad_norm": 0.7851178128331011, + "learning_rate": 1.3626205029651846e-06, + "loss": 0.7749553918838501, + "step": 3584 + }, + { + "epoch": 0.826036866359447, + "grad_norm": 0.9879629515788971, + "learning_rate": 1.362265423771337e-06, + "loss": 0.8313847780227661, + "step": 3585 + }, + { + "epoch": 0.8262672811059908, + "grad_norm": 0.9997153587851354, + "learning_rate": 1.3619102919951424e-06, + "loss": 0.7285455465316772, + "step": 3586 + }, + { + "epoch": 0.8264976958525345, + "grad_norm": 1.053529475482116, + "learning_rate": 1.361555107688148e-06, + "loss": 0.8084003925323486, + "step": 3587 + }, + { + "epoch": 0.8267281105990784, + "grad_norm": 1.1979034262658517, + "learning_rate": 1.3611998709019088e-06, + "loss": 0.8506543040275574, + "step": 3588 + }, + { + "epoch": 0.8269585253456221, + "grad_norm": 1.150137696376644, + "learning_rate": 1.3608445816879864e-06, + "loss": 0.8320293426513672, + "step": 3589 + }, + { + "epoch": 0.8271889400921659, + "grad_norm": 1.0954200087136678, + "learning_rate": 1.3604892400979501e-06, + "loss": 0.8116205930709839, + "step": 3590 + }, + { + "epoch": 0.8274193548387097, + "grad_norm": 0.988607654244707, + "learning_rate": 1.3601338461833785e-06, + "loss": 0.8317450284957886, + "step": 3591 + }, + { + "epoch": 0.8276497695852535, + "grad_norm": 1.0502248139840338, + "learning_rate": 1.3597783999958553e-06, + "loss": 0.7348642349243164, + "step": 3592 + }, + { + "epoch": 0.8278801843317972, + "grad_norm": 0.8829971344500126, + "learning_rate": 1.359422901586974e-06, + "loss": 0.8087270259857178, + "step": 3593 + }, + { + "epoch": 0.8281105990783411, + "grad_norm": 1.1012699484003496, + "learning_rate": 1.3590673510083345e-06, + "loss": 0.7964637875556946, + "step": 3594 + }, + { + "epoch": 0.8283410138248848, + "grad_norm": 0.8597833865541051, + "learning_rate": 1.358711748311544e-06, + "loss": 0.6192176342010498, + "step": 3595 + }, + { + "epoch": 0.8285714285714286, + "grad_norm": 1.458647590594062, + "learning_rate": 1.3583560935482182e-06, + "loss": 0.7735739946365356, + "step": 3596 + }, + { + "epoch": 0.8288018433179724, + "grad_norm": 1.209934555151429, + "learning_rate": 1.35800038676998e-06, + "loss": 0.7965315580368042, + "step": 3597 + }, + { + "epoch": 0.8290322580645161, + "grad_norm": 1.0086229436787473, + "learning_rate": 1.3576446280284595e-06, + "loss": 0.6489244699478149, + "step": 3598 + }, + { + "epoch": 0.8292626728110599, + "grad_norm": 1.041271189758682, + "learning_rate": 1.3572888173752946e-06, + "loss": 0.8073695302009583, + "step": 3599 + }, + { + "epoch": 0.8294930875576036, + "grad_norm": 0.7544591630478071, + "learning_rate": 1.3569329548621309e-06, + "loss": 0.7925900816917419, + "step": 3600 + }, + { + "epoch": 0.8297235023041475, + "grad_norm": 1.1274353505725723, + "learning_rate": 1.356577040540621e-06, + "loss": 0.83954918384552, + "step": 3601 + }, + { + "epoch": 0.8299539170506912, + "grad_norm": 0.69092010707332, + "learning_rate": 1.356221074462426e-06, + "loss": 0.6384706497192383, + "step": 3602 + }, + { + "epoch": 0.830184331797235, + "grad_norm": 0.8604009933780791, + "learning_rate": 1.3558650566792136e-06, + "loss": 0.8308184146881104, + "step": 3603 + }, + { + "epoch": 0.8304147465437788, + "grad_norm": 0.9893567222365065, + "learning_rate": 1.3555089872426596e-06, + "loss": 0.7972864508628845, + "step": 3604 + }, + { + "epoch": 0.8306451612903226, + "grad_norm": 1.0575497381629144, + "learning_rate": 1.3551528662044463e-06, + "loss": 0.8038849830627441, + "step": 3605 + }, + { + "epoch": 0.8308755760368663, + "grad_norm": 1.0146034272672162, + "learning_rate": 1.3547966936162646e-06, + "loss": 0.7735980749130249, + "step": 3606 + }, + { + "epoch": 0.8311059907834102, + "grad_norm": 1.169701687059532, + "learning_rate": 1.354440469529813e-06, + "loss": 0.7717504501342773, + "step": 3607 + }, + { + "epoch": 0.8313364055299539, + "grad_norm": 0.8981514617249363, + "learning_rate": 1.3540841939967962e-06, + "loss": 0.9405615329742432, + "step": 3608 + }, + { + "epoch": 0.8315668202764976, + "grad_norm": 0.9913743440349779, + "learning_rate": 1.3537278670689273e-06, + "loss": 0.7730603814125061, + "step": 3609 + }, + { + "epoch": 0.8317972350230415, + "grad_norm": 1.1958069213876743, + "learning_rate": 1.353371488797927e-06, + "loss": 0.8677463531494141, + "step": 3610 + }, + { + "epoch": 0.8320276497695852, + "grad_norm": 1.0362704574624084, + "learning_rate": 1.3530150592355227e-06, + "loss": 0.8261700868606567, + "step": 3611 + }, + { + "epoch": 0.832258064516129, + "grad_norm": 0.9430749395940993, + "learning_rate": 1.35265857843345e-06, + "loss": 0.6799050569534302, + "step": 3612 + }, + { + "epoch": 0.8324884792626728, + "grad_norm": 1.0479319081515341, + "learning_rate": 1.3523020464434514e-06, + "loss": 0.9117664098739624, + "step": 3613 + }, + { + "epoch": 0.8327188940092166, + "grad_norm": 1.0691436327470698, + "learning_rate": 1.3519454633172771e-06, + "loss": 0.8637168407440186, + "step": 3614 + }, + { + "epoch": 0.8329493087557603, + "grad_norm": 0.8579929983536723, + "learning_rate": 1.3515888291066848e-06, + "loss": 0.8169793486595154, + "step": 3615 + }, + { + "epoch": 0.8331797235023042, + "grad_norm": 0.920659117563804, + "learning_rate": 1.3512321438634392e-06, + "loss": 0.6901019811630249, + "step": 3616 + }, + { + "epoch": 0.8334101382488479, + "grad_norm": 1.350300242304736, + "learning_rate": 1.3508754076393133e-06, + "loss": 0.868461012840271, + "step": 3617 + }, + { + "epoch": 0.8336405529953917, + "grad_norm": 0.9765625383196332, + "learning_rate": 1.3505186204860864e-06, + "loss": 0.7916195392608643, + "step": 3618 + }, + { + "epoch": 0.8338709677419355, + "grad_norm": 0.9685384546753151, + "learning_rate": 1.3501617824555456e-06, + "loss": 0.7078498601913452, + "step": 3619 + }, + { + "epoch": 0.8341013824884793, + "grad_norm": 1.2242730037688179, + "learning_rate": 1.3498048935994857e-06, + "loss": 0.890669584274292, + "step": 3620 + }, + { + "epoch": 0.834331797235023, + "grad_norm": 0.8358453705503323, + "learning_rate": 1.3494479539697087e-06, + "loss": 0.8162761926651001, + "step": 3621 + }, + { + "epoch": 0.8345622119815668, + "grad_norm": 1.013077112717635, + "learning_rate": 1.3490909636180233e-06, + "loss": 0.7743235230445862, + "step": 3622 + }, + { + "epoch": 0.8347926267281106, + "grad_norm": 1.0099386147746707, + "learning_rate": 1.3487339225962472e-06, + "loss": 0.8297950029373169, + "step": 3623 + }, + { + "epoch": 0.8350230414746543, + "grad_norm": 1.1865830325248257, + "learning_rate": 1.3483768309562035e-06, + "loss": 0.9550352692604065, + "step": 3624 + }, + { + "epoch": 0.8352534562211982, + "grad_norm": 0.9576603479694407, + "learning_rate": 1.3480196887497242e-06, + "loss": 0.7343823909759521, + "step": 3625 + }, + { + "epoch": 0.8354838709677419, + "grad_norm": 1.0312198523972542, + "learning_rate": 1.3476624960286479e-06, + "loss": 0.8942683935165405, + "step": 3626 + }, + { + "epoch": 0.8357142857142857, + "grad_norm": 1.0216203737583824, + "learning_rate": 1.34730525284482e-06, + "loss": 0.778289794921875, + "step": 3627 + }, + { + "epoch": 0.8359447004608295, + "grad_norm": 0.8374039418656565, + "learning_rate": 1.3469479592500951e-06, + "loss": 0.5924088954925537, + "step": 3628 + }, + { + "epoch": 0.8361751152073733, + "grad_norm": 1.6640914693337763, + "learning_rate": 1.3465906152963329e-06, + "loss": 1.0363706350326538, + "step": 3629 + }, + { + "epoch": 0.836405529953917, + "grad_norm": 1.1094517477504633, + "learning_rate": 1.346233221035402e-06, + "loss": 0.7927669286727905, + "step": 3630 + }, + { + "epoch": 0.8366359447004609, + "grad_norm": 1.017803676905956, + "learning_rate": 1.345875776519177e-06, + "loss": 0.8428707718849182, + "step": 3631 + }, + { + "epoch": 0.8368663594470046, + "grad_norm": 1.0894705086513103, + "learning_rate": 1.345518281799541e-06, + "loss": 0.7975403070449829, + "step": 3632 + }, + { + "epoch": 0.8370967741935483, + "grad_norm": 1.0032068733109394, + "learning_rate": 1.3451607369283842e-06, + "loss": 0.8383880853652954, + "step": 3633 + }, + { + "epoch": 0.8373271889400922, + "grad_norm": 1.007543360201824, + "learning_rate": 1.3448031419576028e-06, + "loss": 0.9033386707305908, + "step": 3634 + }, + { + "epoch": 0.8375576036866359, + "grad_norm": 1.1312406567077748, + "learning_rate": 1.3444454969391021e-06, + "loss": 0.8913514018058777, + "step": 3635 + }, + { + "epoch": 0.8377880184331797, + "grad_norm": 1.4041014769308477, + "learning_rate": 1.3440878019247936e-06, + "loss": 0.9051915407180786, + "step": 3636 + }, + { + "epoch": 0.8380184331797235, + "grad_norm": 0.9777048211867199, + "learning_rate": 1.343730056966596e-06, + "loss": 0.8240993618965149, + "step": 3637 + }, + { + "epoch": 0.8382488479262673, + "grad_norm": 1.1788464491037272, + "learning_rate": 1.3433722621164358e-06, + "loss": 0.8276345133781433, + "step": 3638 + }, + { + "epoch": 0.838479262672811, + "grad_norm": 1.1512835626079758, + "learning_rate": 1.343014417426246e-06, + "loss": 0.8250508904457092, + "step": 3639 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 1.0066201319773938, + "learning_rate": 1.342656522947968e-06, + "loss": 0.7872868180274963, + "step": 3640 + }, + { + "epoch": 0.8389400921658986, + "grad_norm": 0.8473767849665474, + "learning_rate": 1.3422985787335491e-06, + "loss": 0.7634146809577942, + "step": 3641 + }, + { + "epoch": 0.8391705069124424, + "grad_norm": 0.9991956505737468, + "learning_rate": 1.3419405848349448e-06, + "loss": 0.63923180103302, + "step": 3642 + }, + { + "epoch": 0.8394009216589862, + "grad_norm": 0.8936657519523178, + "learning_rate": 1.3415825413041173e-06, + "loss": 0.900942325592041, + "step": 3643 + }, + { + "epoch": 0.83963133640553, + "grad_norm": 0.8086145892134451, + "learning_rate": 1.341224448193036e-06, + "loss": 0.6415199041366577, + "step": 3644 + }, + { + "epoch": 0.8398617511520737, + "grad_norm": 0.7541710851332, + "learning_rate": 1.3408663055536775e-06, + "loss": 0.7750275135040283, + "step": 3645 + }, + { + "epoch": 0.8400921658986175, + "grad_norm": 1.0677810215945565, + "learning_rate": 1.3405081134380264e-06, + "loss": 0.8159983158111572, + "step": 3646 + }, + { + "epoch": 0.8403225806451613, + "grad_norm": 1.0361250834896671, + "learning_rate": 1.3401498718980733e-06, + "loss": 0.6870952844619751, + "step": 3647 + }, + { + "epoch": 0.840552995391705, + "grad_norm": 1.0057736881312165, + "learning_rate": 1.3397915809858168e-06, + "loss": 0.8588749170303345, + "step": 3648 + }, + { + "epoch": 0.8407834101382489, + "grad_norm": 0.8944864050117411, + "learning_rate": 1.3394332407532619e-06, + "loss": 0.6926778554916382, + "step": 3649 + }, + { + "epoch": 0.8410138248847926, + "grad_norm": 0.9996715673645244, + "learning_rate": 1.3390748512524213e-06, + "loss": 0.7165309190750122, + "step": 3650 + }, + { + "epoch": 0.8412442396313364, + "grad_norm": 0.8676606625906299, + "learning_rate": 1.3387164125353149e-06, + "loss": 0.7782741189002991, + "step": 3651 + }, + { + "epoch": 0.8414746543778802, + "grad_norm": 1.2076812224962883, + "learning_rate": 1.3383579246539698e-06, + "loss": 0.9153795838356018, + "step": 3652 + }, + { + "epoch": 0.841705069124424, + "grad_norm": 0.9194313077193984, + "learning_rate": 1.33799938766042e-06, + "loss": 0.8419643044471741, + "step": 3653 + }, + { + "epoch": 0.8419354838709677, + "grad_norm": 0.9325821466469247, + "learning_rate": 1.3376408016067064e-06, + "loss": 0.6927728652954102, + "step": 3654 + }, + { + "epoch": 0.8421658986175116, + "grad_norm": 0.8795285549516815, + "learning_rate": 1.3372821665448774e-06, + "loss": 0.7721414566040039, + "step": 3655 + }, + { + "epoch": 0.8423963133640553, + "grad_norm": 0.8650877944504008, + "learning_rate": 1.3369234825269887e-06, + "loss": 0.7277967929840088, + "step": 3656 + }, + { + "epoch": 0.8426267281105991, + "grad_norm": 0.8893990009557013, + "learning_rate": 1.336564749605102e-06, + "loss": 0.7764936089515686, + "step": 3657 + }, + { + "epoch": 0.8428571428571429, + "grad_norm": 1.0366422012708214, + "learning_rate": 1.336205967831288e-06, + "loss": 0.7445545196533203, + "step": 3658 + }, + { + "epoch": 0.8430875576036866, + "grad_norm": 0.9883734306246509, + "learning_rate": 1.3358471372576227e-06, + "loss": 0.8359465599060059, + "step": 3659 + }, + { + "epoch": 0.8433179723502304, + "grad_norm": 1.1992732184975974, + "learning_rate": 1.33548825793619e-06, + "loss": 0.8634141683578491, + "step": 3660 + }, + { + "epoch": 0.8435483870967742, + "grad_norm": 0.9932267949840192, + "learning_rate": 1.3351293299190804e-06, + "loss": 0.7365708351135254, + "step": 3661 + }, + { + "epoch": 0.843778801843318, + "grad_norm": 1.0553779905834517, + "learning_rate": 1.3347703532583927e-06, + "loss": 0.7135465145111084, + "step": 3662 + }, + { + "epoch": 0.8440092165898617, + "grad_norm": 0.9366872036776951, + "learning_rate": 1.3344113280062313e-06, + "loss": 0.7411447763442993, + "step": 3663 + }, + { + "epoch": 0.8442396313364056, + "grad_norm": 1.1654296408446096, + "learning_rate": 1.3340522542147081e-06, + "loss": 0.7765100002288818, + "step": 3664 + }, + { + "epoch": 0.8444700460829493, + "grad_norm": 0.9657216098787882, + "learning_rate": 1.3336931319359426e-06, + "loss": 0.7638096809387207, + "step": 3665 + }, + { + "epoch": 0.8447004608294931, + "grad_norm": 0.8148482611092309, + "learning_rate": 1.3333339612220606e-06, + "loss": 0.7114577889442444, + "step": 3666 + }, + { + "epoch": 0.8449308755760369, + "grad_norm": 1.075345107734405, + "learning_rate": 1.3329747421251955e-06, + "loss": 0.8702960014343262, + "step": 3667 + }, + { + "epoch": 0.8451612903225807, + "grad_norm": 0.8702936794654799, + "learning_rate": 1.3326154746974878e-06, + "loss": 0.7248300313949585, + "step": 3668 + }, + { + "epoch": 0.8453917050691244, + "grad_norm": 1.0810218150457531, + "learning_rate": 1.332256158991084e-06, + "loss": 0.7648389339447021, + "step": 3669 + }, + { + "epoch": 0.8456221198156681, + "grad_norm": 1.1179174327015893, + "learning_rate": 1.3318967950581383e-06, + "loss": 0.7075401544570923, + "step": 3670 + }, + { + "epoch": 0.845852534562212, + "grad_norm": 0.9497106076514022, + "learning_rate": 1.3315373829508122e-06, + "loss": 0.6923220157623291, + "step": 3671 + }, + { + "epoch": 0.8460829493087557, + "grad_norm": 1.100773813694407, + "learning_rate": 1.3311779227212742e-06, + "loss": 0.7522361874580383, + "step": 3672 + }, + { + "epoch": 0.8463133640552996, + "grad_norm": 1.026931960572947, + "learning_rate": 1.3308184144216989e-06, + "loss": 0.7087293863296509, + "step": 3673 + }, + { + "epoch": 0.8465437788018433, + "grad_norm": 0.793322008156401, + "learning_rate": 1.3304588581042688e-06, + "loss": 0.782098650932312, + "step": 3674 + }, + { + "epoch": 0.8467741935483871, + "grad_norm": 1.029621860148689, + "learning_rate": 1.330099253821173e-06, + "loss": 0.7671197652816772, + "step": 3675 + }, + { + "epoch": 0.8470046082949308, + "grad_norm": 0.8604911309489864, + "learning_rate": 1.3297396016246073e-06, + "loss": 0.8098698258399963, + "step": 3676 + }, + { + "epoch": 0.8472350230414747, + "grad_norm": 0.9021265860196932, + "learning_rate": 1.3293799015667751e-06, + "loss": 0.7671023011207581, + "step": 3677 + }, + { + "epoch": 0.8474654377880184, + "grad_norm": 0.9115553667327773, + "learning_rate": 1.3290201536998862e-06, + "loss": 0.7448668479919434, + "step": 3678 + }, + { + "epoch": 0.8476958525345623, + "grad_norm": 1.4463207292378697, + "learning_rate": 1.3286603580761576e-06, + "loss": 0.946117639541626, + "step": 3679 + }, + { + "epoch": 0.847926267281106, + "grad_norm": 0.932975472082494, + "learning_rate": 1.328300514747813e-06, + "loss": 0.8134163618087769, + "step": 3680 + }, + { + "epoch": 0.8481566820276498, + "grad_norm": 1.0433920810873991, + "learning_rate": 1.327940623767083e-06, + "loss": 0.725477933883667, + "step": 3681 + }, + { + "epoch": 0.8483870967741935, + "grad_norm": 0.9434209059724857, + "learning_rate": 1.3275806851862061e-06, + "loss": 0.8278200626373291, + "step": 3682 + }, + { + "epoch": 0.8486175115207373, + "grad_norm": 1.2837572025692205, + "learning_rate": 1.327220699057426e-06, + "loss": 0.8437181711196899, + "step": 3683 + }, + { + "epoch": 0.8488479262672811, + "grad_norm": 1.0932618965520366, + "learning_rate": 1.326860665432995e-06, + "loss": 0.8921856880187988, + "step": 3684 + }, + { + "epoch": 0.8490783410138248, + "grad_norm": 0.9850919430921788, + "learning_rate": 1.326500584365171e-06, + "loss": 0.7285119295120239, + "step": 3685 + }, + { + "epoch": 0.8493087557603687, + "grad_norm": 1.0119244636074918, + "learning_rate": 1.3261404559062196e-06, + "loss": 0.8968918323516846, + "step": 3686 + }, + { + "epoch": 0.8495391705069124, + "grad_norm": 0.9862869524570133, + "learning_rate": 1.3257802801084123e-06, + "loss": 0.6794285774230957, + "step": 3687 + }, + { + "epoch": 0.8497695852534562, + "grad_norm": 1.1495746754769118, + "learning_rate": 1.3254200570240291e-06, + "loss": 0.869774341583252, + "step": 3688 + }, + { + "epoch": 0.85, + "grad_norm": 1.1620464557259493, + "learning_rate": 1.3250597867053553e-06, + "loss": 0.7862332463264465, + "step": 3689 + }, + { + "epoch": 0.8502304147465438, + "grad_norm": 1.1253065949092746, + "learning_rate": 1.3246994692046835e-06, + "loss": 0.8424299955368042, + "step": 3690 + }, + { + "epoch": 0.8504608294930875, + "grad_norm": 0.7041532260107465, + "learning_rate": 1.3243391045743137e-06, + "loss": 0.6232138276100159, + "step": 3691 + }, + { + "epoch": 0.8506912442396314, + "grad_norm": 0.9563538572085633, + "learning_rate": 1.3239786928665523e-06, + "loss": 0.7108159065246582, + "step": 3692 + }, + { + "epoch": 0.8509216589861751, + "grad_norm": 1.0262733388108027, + "learning_rate": 1.3236182341337126e-06, + "loss": 0.7282330393791199, + "step": 3693 + }, + { + "epoch": 0.8511520737327188, + "grad_norm": 1.2079736335999256, + "learning_rate": 1.3232577284281147e-06, + "loss": 0.7864304780960083, + "step": 3694 + }, + { + "epoch": 0.8513824884792627, + "grad_norm": 0.9682428596442779, + "learning_rate": 1.3228971758020852e-06, + "loss": 0.7826365232467651, + "step": 3695 + }, + { + "epoch": 0.8516129032258064, + "grad_norm": 1.0308498953586989, + "learning_rate": 1.322536576307958e-06, + "loss": 0.8429988026618958, + "step": 3696 + }, + { + "epoch": 0.8518433179723502, + "grad_norm": 1.106791902142165, + "learning_rate": 1.322175929998074e-06, + "loss": 0.771148145198822, + "step": 3697 + }, + { + "epoch": 0.852073732718894, + "grad_norm": 1.2323556662321768, + "learning_rate": 1.3218152369247804e-06, + "loss": 0.9610496759414673, + "step": 3698 + }, + { + "epoch": 0.8523041474654378, + "grad_norm": 1.0124488299649408, + "learning_rate": 1.321454497140431e-06, + "loss": 0.7286547422409058, + "step": 3699 + }, + { + "epoch": 0.8525345622119815, + "grad_norm": 0.8362780560832063, + "learning_rate": 1.321093710697387e-06, + "loss": 0.7446750402450562, + "step": 3700 + }, + { + "epoch": 0.8527649769585254, + "grad_norm": 0.8774754337310029, + "learning_rate": 1.3207328776480156e-06, + "loss": 0.7211639881134033, + "step": 3701 + }, + { + "epoch": 0.8529953917050691, + "grad_norm": 0.9667628641735269, + "learning_rate": 1.320371998044692e-06, + "loss": 0.765962541103363, + "step": 3702 + }, + { + "epoch": 0.853225806451613, + "grad_norm": 1.0775083181101466, + "learning_rate": 1.3200110719397967e-06, + "loss": 0.9090084433555603, + "step": 3703 + }, + { + "epoch": 0.8534562211981567, + "grad_norm": 0.9604272002153474, + "learning_rate": 1.319650099385718e-06, + "loss": 0.8222901225090027, + "step": 3704 + }, + { + "epoch": 0.8536866359447005, + "grad_norm": 1.0297311955715076, + "learning_rate": 1.3192890804348508e-06, + "loss": 0.7929965853691101, + "step": 3705 + }, + { + "epoch": 0.8539170506912442, + "grad_norm": 0.9788103737354025, + "learning_rate": 1.318928015139596e-06, + "loss": 0.89229816198349, + "step": 3706 + }, + { + "epoch": 0.854147465437788, + "grad_norm": 1.1185541946390394, + "learning_rate": 1.3185669035523621e-06, + "loss": 0.8348276615142822, + "step": 3707 + }, + { + "epoch": 0.8543778801843318, + "grad_norm": 1.0960703003892842, + "learning_rate": 1.3182057457255639e-06, + "loss": 0.9006820917129517, + "step": 3708 + }, + { + "epoch": 0.8546082949308755, + "grad_norm": 0.8300224623954644, + "learning_rate": 1.3178445417116233e-06, + "loss": 0.665691614151001, + "step": 3709 + }, + { + "epoch": 0.8548387096774194, + "grad_norm": 0.6677558949928035, + "learning_rate": 1.3174832915629677e-06, + "loss": 0.7073110342025757, + "step": 3710 + }, + { + "epoch": 0.8550691244239631, + "grad_norm": 1.0807205184602706, + "learning_rate": 1.317121995332033e-06, + "loss": 0.7125800848007202, + "step": 3711 + }, + { + "epoch": 0.8552995391705069, + "grad_norm": 1.1504081133401938, + "learning_rate": 1.31676065307126e-06, + "loss": 0.847205638885498, + "step": 3712 + }, + { + "epoch": 0.8555299539170507, + "grad_norm": 1.1272186923536152, + "learning_rate": 1.3163992648330979e-06, + "loss": 0.860866904258728, + "step": 3713 + }, + { + "epoch": 0.8557603686635945, + "grad_norm": 0.9974272492162177, + "learning_rate": 1.3160378306700014e-06, + "loss": 0.811161994934082, + "step": 3714 + }, + { + "epoch": 0.8559907834101382, + "grad_norm": 1.059693566679631, + "learning_rate": 1.3156763506344318e-06, + "loss": 1.0276790857315063, + "step": 3715 + }, + { + "epoch": 0.8562211981566821, + "grad_norm": 0.8617440282777447, + "learning_rate": 1.3153148247788584e-06, + "loss": 0.7462253570556641, + "step": 3716 + }, + { + "epoch": 0.8564516129032258, + "grad_norm": 1.281384523734545, + "learning_rate": 1.314953253155755e-06, + "loss": 0.9181896448135376, + "step": 3717 + }, + { + "epoch": 0.8566820276497696, + "grad_norm": 0.7940667691684741, + "learning_rate": 1.3145916358176044e-06, + "loss": 0.5943678021430969, + "step": 3718 + }, + { + "epoch": 0.8569124423963134, + "grad_norm": 0.9268739898787507, + "learning_rate": 1.3142299728168942e-06, + "loss": 0.7908656597137451, + "step": 3719 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 1.2242140267734891, + "learning_rate": 1.3138682642061192e-06, + "loss": 0.8716393709182739, + "step": 3720 + }, + { + "epoch": 0.8573732718894009, + "grad_norm": 0.9921811812486295, + "learning_rate": 1.3135065100377814e-06, + "loss": 0.76909339427948, + "step": 3721 + }, + { + "epoch": 0.8576036866359447, + "grad_norm": 1.0272733292998222, + "learning_rate": 1.3131447103643884e-06, + "loss": 0.7896728515625, + "step": 3722 + }, + { + "epoch": 0.8578341013824885, + "grad_norm": 1.0326134494637835, + "learning_rate": 1.3127828652384554e-06, + "loss": 0.8458575010299683, + "step": 3723 + }, + { + "epoch": 0.8580645161290322, + "grad_norm": 0.9849414066001893, + "learning_rate": 1.3124209747125036e-06, + "loss": 0.7419729232788086, + "step": 3724 + }, + { + "epoch": 0.8582949308755761, + "grad_norm": 0.9131603734827297, + "learning_rate": 1.3120590388390608e-06, + "loss": 0.8801093697547913, + "step": 3725 + }, + { + "epoch": 0.8585253456221198, + "grad_norm": 0.7986933302941567, + "learning_rate": 1.3116970576706617e-06, + "loss": 0.6337816715240479, + "step": 3726 + }, + { + "epoch": 0.8587557603686636, + "grad_norm": 1.1352865331161706, + "learning_rate": 1.3113350312598472e-06, + "loss": 0.8099665641784668, + "step": 3727 + }, + { + "epoch": 0.8589861751152074, + "grad_norm": 1.0467011868433627, + "learning_rate": 1.3109729596591651e-06, + "loss": 0.7430413961410522, + "step": 3728 + }, + { + "epoch": 0.8592165898617512, + "grad_norm": 1.0569982664185076, + "learning_rate": 1.3106108429211699e-06, + "loss": 0.7374905347824097, + "step": 3729 + }, + { + "epoch": 0.8594470046082949, + "grad_norm": 0.7857724004075162, + "learning_rate": 1.3102486810984217e-06, + "loss": 0.71753990650177, + "step": 3730 + }, + { + "epoch": 0.8596774193548387, + "grad_norm": 1.0554970253272185, + "learning_rate": 1.3098864742434885e-06, + "loss": 0.9126461744308472, + "step": 3731 + }, + { + "epoch": 0.8599078341013825, + "grad_norm": 1.1141466235187625, + "learning_rate": 1.3095242224089434e-06, + "loss": 0.846487283706665, + "step": 3732 + }, + { + "epoch": 0.8601382488479262, + "grad_norm": 0.9640305278845377, + "learning_rate": 1.3091619256473671e-06, + "loss": 0.7026070952415466, + "step": 3733 + }, + { + "epoch": 0.8603686635944701, + "grad_norm": 1.2209599470129553, + "learning_rate": 1.3087995840113471e-06, + "loss": 1.0044158697128296, + "step": 3734 + }, + { + "epoch": 0.8605990783410138, + "grad_norm": 1.2732308696122019, + "learning_rate": 1.3084371975534759e-06, + "loss": 0.8061608076095581, + "step": 3735 + }, + { + "epoch": 0.8608294930875576, + "grad_norm": 1.2155874878372677, + "learning_rate": 1.308074766326354e-06, + "loss": 0.9189345836639404, + "step": 3736 + }, + { + "epoch": 0.8610599078341014, + "grad_norm": 3.0839554304770314, + "learning_rate": 1.3077122903825875e-06, + "loss": 0.8183290958404541, + "step": 3737 + }, + { + "epoch": 0.8612903225806452, + "grad_norm": 0.9202037098580877, + "learning_rate": 1.3073497697747893e-06, + "loss": 0.860893726348877, + "step": 3738 + }, + { + "epoch": 0.8615207373271889, + "grad_norm": 0.7717429741205805, + "learning_rate": 1.306987204555579e-06, + "loss": 0.6732957363128662, + "step": 3739 + }, + { + "epoch": 0.8617511520737328, + "grad_norm": 0.9444170667577415, + "learning_rate": 1.3066245947775821e-06, + "loss": 0.7910758256912231, + "step": 3740 + }, + { + "epoch": 0.8619815668202765, + "grad_norm": 1.316217805471382, + "learning_rate": 1.3062619404934317e-06, + "loss": 0.9422181844711304, + "step": 3741 + }, + { + "epoch": 0.8622119815668203, + "grad_norm": 0.9698503213179374, + "learning_rate": 1.3058992417557657e-06, + "loss": 0.7731142044067383, + "step": 3742 + }, + { + "epoch": 0.8624423963133641, + "grad_norm": 0.9561313394387324, + "learning_rate": 1.3055364986172296e-06, + "loss": 0.8419089317321777, + "step": 3743 + }, + { + "epoch": 0.8626728110599078, + "grad_norm": 0.8852750785802604, + "learning_rate": 1.3051737111304757e-06, + "loss": 0.7535419464111328, + "step": 3744 + }, + { + "epoch": 0.8629032258064516, + "grad_norm": 0.8636514927767351, + "learning_rate": 1.3048108793481614e-06, + "loss": 0.7744847536087036, + "step": 3745 + }, + { + "epoch": 0.8631336405529954, + "grad_norm": 1.04058809416254, + "learning_rate": 1.3044480033229513e-06, + "loss": 0.7578398585319519, + "step": 3746 + }, + { + "epoch": 0.8633640552995392, + "grad_norm": 1.2334871836764278, + "learning_rate": 1.3040850831075168e-06, + "loss": 0.8767418265342712, + "step": 3747 + }, + { + "epoch": 0.8635944700460829, + "grad_norm": 1.1256734507930313, + "learning_rate": 1.303722118754535e-06, + "loss": 0.7484671473503113, + "step": 3748 + }, + { + "epoch": 0.8638248847926268, + "grad_norm": 0.9064086460386975, + "learning_rate": 1.3033591103166897e-06, + "loss": 0.7231101989746094, + "step": 3749 + }, + { + "epoch": 0.8640552995391705, + "grad_norm": 0.896473034432068, + "learning_rate": 1.3029960578466709e-06, + "loss": 0.7626307606697083, + "step": 3750 + }, + { + "epoch": 0.8642857142857143, + "grad_norm": 1.0608055188685264, + "learning_rate": 1.302632961397176e-06, + "loss": 0.7244704961776733, + "step": 3751 + }, + { + "epoch": 0.864516129032258, + "grad_norm": 1.0368271143877468, + "learning_rate": 1.3022698210209066e-06, + "loss": 0.8575884103775024, + "step": 3752 + }, + { + "epoch": 0.8647465437788019, + "grad_norm": 1.050928094888414, + "learning_rate": 1.3019066367705733e-06, + "loss": 0.7617322206497192, + "step": 3753 + }, + { + "epoch": 0.8649769585253456, + "grad_norm": 1.0524737157850867, + "learning_rate": 1.3015434086988914e-06, + "loss": 0.7899904251098633, + "step": 3754 + }, + { + "epoch": 0.8652073732718893, + "grad_norm": 0.7826254299372721, + "learning_rate": 1.3011801368585825e-06, + "loss": 0.6405949592590332, + "step": 3755 + }, + { + "epoch": 0.8654377880184332, + "grad_norm": 1.004484214855527, + "learning_rate": 1.300816821302376e-06, + "loss": 0.8473223447799683, + "step": 3756 + }, + { + "epoch": 0.8656682027649769, + "grad_norm": 1.0318183916575985, + "learning_rate": 1.3004534620830059e-06, + "loss": 0.7843037843704224, + "step": 3757 + }, + { + "epoch": 0.8658986175115208, + "grad_norm": 0.8527211236886993, + "learning_rate": 1.3000900592532134e-06, + "loss": 0.7418329119682312, + "step": 3758 + }, + { + "epoch": 0.8661290322580645, + "grad_norm": 1.1686967012789897, + "learning_rate": 1.2997266128657462e-06, + "loss": 0.9007542133331299, + "step": 3759 + }, + { + "epoch": 0.8663594470046083, + "grad_norm": 1.0002999248018631, + "learning_rate": 1.2993631229733582e-06, + "loss": 0.7214536666870117, + "step": 3760 + }, + { + "epoch": 0.866589861751152, + "grad_norm": 1.060698383579802, + "learning_rate": 1.2989995896288085e-06, + "loss": 0.6538300514221191, + "step": 3761 + }, + { + "epoch": 0.8668202764976959, + "grad_norm": 0.8939424364373206, + "learning_rate": 1.2986360128848647e-06, + "loss": 0.8132497668266296, + "step": 3762 + }, + { + "epoch": 0.8670506912442396, + "grad_norm": 1.2692579875098073, + "learning_rate": 1.2982723927942987e-06, + "loss": 0.8940386176109314, + "step": 3763 + }, + { + "epoch": 0.8672811059907835, + "grad_norm": 0.9095968882110219, + "learning_rate": 1.2979087294098904e-06, + "loss": 0.7426153421401978, + "step": 3764 + }, + { + "epoch": 0.8675115207373272, + "grad_norm": 1.2314721218727755, + "learning_rate": 1.2975450227844236e-06, + "loss": 0.8140754103660583, + "step": 3765 + }, + { + "epoch": 0.867741935483871, + "grad_norm": 1.165847048536148, + "learning_rate": 1.2971812729706907e-06, + "loss": 0.9078278541564941, + "step": 3766 + }, + { + "epoch": 0.8679723502304147, + "grad_norm": 0.8581444329277982, + "learning_rate": 1.29681748002149e-06, + "loss": 0.6632627248764038, + "step": 3767 + }, + { + "epoch": 0.8682027649769585, + "grad_norm": 1.0737542944031577, + "learning_rate": 1.2964536439896245e-06, + "loss": 0.913419246673584, + "step": 3768 + }, + { + "epoch": 0.8684331797235023, + "grad_norm": 0.9232699220030103, + "learning_rate": 1.2960897649279054e-06, + "loss": 0.776391863822937, + "step": 3769 + }, + { + "epoch": 0.868663594470046, + "grad_norm": 0.7836255693570048, + "learning_rate": 1.2957258428891488e-06, + "loss": 0.7171014547348022, + "step": 3770 + }, + { + "epoch": 0.8688940092165899, + "grad_norm": 1.072840063629104, + "learning_rate": 1.2953618779261776e-06, + "loss": 0.8848521709442139, + "step": 3771 + }, + { + "epoch": 0.8691244239631336, + "grad_norm": 0.9374655640180731, + "learning_rate": 1.2949978700918207e-06, + "loss": 0.6794570684432983, + "step": 3772 + }, + { + "epoch": 0.8693548387096774, + "grad_norm": 1.1765914680464367, + "learning_rate": 1.2946338194389137e-06, + "loss": 0.7128770351409912, + "step": 3773 + }, + { + "epoch": 0.8695852534562212, + "grad_norm": 1.0061805151394425, + "learning_rate": 1.2942697260202976e-06, + "loss": 0.7794370651245117, + "step": 3774 + }, + { + "epoch": 0.869815668202765, + "grad_norm": 0.8201503807835805, + "learning_rate": 1.2939055898888203e-06, + "loss": 0.7946528196334839, + "step": 3775 + }, + { + "epoch": 0.8700460829493087, + "grad_norm": 0.8253544658473864, + "learning_rate": 1.2935414110973357e-06, + "loss": 0.7052137851715088, + "step": 3776 + }, + { + "epoch": 0.8702764976958526, + "grad_norm": 1.1148062721900278, + "learning_rate": 1.293177189698704e-06, + "loss": 0.785929799079895, + "step": 3777 + }, + { + "epoch": 0.8705069124423963, + "grad_norm": 1.0434715730493578, + "learning_rate": 1.2928129257457915e-06, + "loss": 0.7907861471176147, + "step": 3778 + }, + { + "epoch": 0.8707373271889401, + "grad_norm": 1.0141295879138945, + "learning_rate": 1.2924486192914704e-06, + "loss": 0.9145845770835876, + "step": 3779 + }, + { + "epoch": 0.8709677419354839, + "grad_norm": 1.2821040685334846, + "learning_rate": 1.2920842703886191e-06, + "loss": 0.8332167863845825, + "step": 3780 + }, + { + "epoch": 0.8711981566820276, + "grad_norm": 1.1443987508087015, + "learning_rate": 1.2917198790901229e-06, + "loss": 0.9593367576599121, + "step": 3781 + }, + { + "epoch": 0.8714285714285714, + "grad_norm": 1.1001262078147525, + "learning_rate": 1.2913554454488723e-06, + "loss": 0.9269144535064697, + "step": 3782 + }, + { + "epoch": 0.8716589861751152, + "grad_norm": 0.8577227656018163, + "learning_rate": 1.2909909695177645e-06, + "loss": 0.8474053144454956, + "step": 3783 + }, + { + "epoch": 0.871889400921659, + "grad_norm": 1.0482742591675172, + "learning_rate": 1.2906264513497027e-06, + "loss": 0.8098207116127014, + "step": 3784 + }, + { + "epoch": 0.8721198156682027, + "grad_norm": 0.9400670599728106, + "learning_rate": 1.2902618909975962e-06, + "loss": 0.7394517064094543, + "step": 3785 + }, + { + "epoch": 0.8723502304147466, + "grad_norm": 1.199479550356467, + "learning_rate": 1.2898972885143606e-06, + "loss": 0.8667110204696655, + "step": 3786 + }, + { + "epoch": 0.8725806451612903, + "grad_norm": 1.2600204383371998, + "learning_rate": 1.289532643952917e-06, + "loss": 0.826819121837616, + "step": 3787 + }, + { + "epoch": 0.8728110599078341, + "grad_norm": 0.9212030006613351, + "learning_rate": 1.2891679573661937e-06, + "loss": 0.7765695452690125, + "step": 3788 + }, + { + "epoch": 0.8730414746543779, + "grad_norm": 0.8409152224560986, + "learning_rate": 1.2888032288071245e-06, + "loss": 0.7180448770523071, + "step": 3789 + }, + { + "epoch": 0.8732718894009217, + "grad_norm": 0.9734045628890519, + "learning_rate": 1.2884384583286486e-06, + "loss": 0.7619662880897522, + "step": 3790 + }, + { + "epoch": 0.8735023041474654, + "grad_norm": 1.0439158459354512, + "learning_rate": 1.2880736459837123e-06, + "loss": 0.8332309126853943, + "step": 3791 + }, + { + "epoch": 0.8737327188940092, + "grad_norm": 1.019583919621154, + "learning_rate": 1.2877087918252676e-06, + "loss": 0.9314864277839661, + "step": 3792 + }, + { + "epoch": 0.873963133640553, + "grad_norm": 1.0252621742811456, + "learning_rate": 1.287343895906273e-06, + "loss": 0.8505650758743286, + "step": 3793 + }, + { + "epoch": 0.8741935483870967, + "grad_norm": 1.1808911521686665, + "learning_rate": 1.286978958279692e-06, + "loss": 0.8086442351341248, + "step": 3794 + }, + { + "epoch": 0.8744239631336406, + "grad_norm": 0.9931096763073582, + "learning_rate": 1.2866139789984951e-06, + "loss": 0.9369934797286987, + "step": 3795 + }, + { + "epoch": 0.8746543778801843, + "grad_norm": 1.0923174237783717, + "learning_rate": 1.2862489581156585e-06, + "loss": 0.6776204705238342, + "step": 3796 + }, + { + "epoch": 0.8748847926267281, + "grad_norm": 1.1437930163109349, + "learning_rate": 1.2858838956841646e-06, + "loss": 0.8742507100105286, + "step": 3797 + }, + { + "epoch": 0.8751152073732719, + "grad_norm": 0.8088256156858264, + "learning_rate": 1.285518791757002e-06, + "loss": 0.6592123508453369, + "step": 3798 + }, + { + "epoch": 0.8753456221198157, + "grad_norm": 1.064419209573929, + "learning_rate": 1.2851536463871646e-06, + "loss": 0.727974534034729, + "step": 3799 + }, + { + "epoch": 0.8755760368663594, + "grad_norm": 1.1114963626056278, + "learning_rate": 1.284788459627653e-06, + "loss": 0.734921395778656, + "step": 3800 + }, + { + "epoch": 0.8758064516129033, + "grad_norm": 1.1341924912712853, + "learning_rate": 1.2844232315314734e-06, + "loss": 0.8848391771316528, + "step": 3801 + }, + { + "epoch": 0.876036866359447, + "grad_norm": 0.9036415522550547, + "learning_rate": 1.284057962151638e-06, + "loss": 0.7014757394790649, + "step": 3802 + }, + { + "epoch": 0.8762672811059908, + "grad_norm": 1.1253352689452834, + "learning_rate": 1.2836926515411662e-06, + "loss": 0.9037606716156006, + "step": 3803 + }, + { + "epoch": 0.8764976958525346, + "grad_norm": 1.0304179621449525, + "learning_rate": 1.2833272997530808e-06, + "loss": 0.7842103242874146, + "step": 3804 + }, + { + "epoch": 0.8767281105990783, + "grad_norm": 0.8881021582469312, + "learning_rate": 1.282961906840413e-06, + "loss": 0.7233899831771851, + "step": 3805 + }, + { + "epoch": 0.8769585253456221, + "grad_norm": 1.0965629604169354, + "learning_rate": 1.2825964728561995e-06, + "loss": 0.8439977169036865, + "step": 3806 + }, + { + "epoch": 0.8771889400921659, + "grad_norm": 0.9011702646392625, + "learning_rate": 1.2822309978534817e-06, + "loss": 0.6734062433242798, + "step": 3807 + }, + { + "epoch": 0.8774193548387097, + "grad_norm": 0.8611901516189409, + "learning_rate": 1.2818654818853082e-06, + "loss": 0.8132908344268799, + "step": 3808 + }, + { + "epoch": 0.8776497695852534, + "grad_norm": 1.0055540352806662, + "learning_rate": 1.2814999250047334e-06, + "loss": 0.7867386341094971, + "step": 3809 + }, + { + "epoch": 0.8778801843317973, + "grad_norm": 0.9631857828899055, + "learning_rate": 1.2811343272648172e-06, + "loss": 0.7367507219314575, + "step": 3810 + }, + { + "epoch": 0.878110599078341, + "grad_norm": 0.9475758390620135, + "learning_rate": 1.280768688718625e-06, + "loss": 0.8154586553573608, + "step": 3811 + }, + { + "epoch": 0.8783410138248848, + "grad_norm": 1.2471162716233217, + "learning_rate": 1.2804030094192297e-06, + "loss": 0.9962621331214905, + "step": 3812 + }, + { + "epoch": 0.8785714285714286, + "grad_norm": 0.9442759022004834, + "learning_rate": 1.280037289419709e-06, + "loss": 0.8720508813858032, + "step": 3813 + }, + { + "epoch": 0.8788018433179724, + "grad_norm": 0.9970556206238078, + "learning_rate": 1.2796715287731461e-06, + "loss": 0.7211558818817139, + "step": 3814 + }, + { + "epoch": 0.8790322580645161, + "grad_norm": 1.0985560987492957, + "learning_rate": 1.279305727532631e-06, + "loss": 0.8354029059410095, + "step": 3815 + }, + { + "epoch": 0.8792626728110599, + "grad_norm": 1.2983425606164107, + "learning_rate": 1.2789398857512597e-06, + "loss": 0.9136772155761719, + "step": 3816 + }, + { + "epoch": 0.8794930875576037, + "grad_norm": 1.099731879502331, + "learning_rate": 1.2785740034821328e-06, + "loss": 0.7603391408920288, + "step": 3817 + }, + { + "epoch": 0.8797235023041474, + "grad_norm": 1.0043618459346715, + "learning_rate": 1.2782080807783582e-06, + "loss": 0.8938640356063843, + "step": 3818 + }, + { + "epoch": 0.8799539170506913, + "grad_norm": 0.9668042432935031, + "learning_rate": 1.2778421176930492e-06, + "loss": 0.8041675090789795, + "step": 3819 + }, + { + "epoch": 0.880184331797235, + "grad_norm": 0.858269124078789, + "learning_rate": 1.2774761142793246e-06, + "loss": 0.7128704786300659, + "step": 3820 + }, + { + "epoch": 0.8804147465437788, + "grad_norm": 1.01263470571454, + "learning_rate": 1.277110070590309e-06, + "loss": 0.7927603721618652, + "step": 3821 + }, + { + "epoch": 0.8806451612903226, + "grad_norm": 0.8447601312860044, + "learning_rate": 1.2767439866791342e-06, + "loss": 0.8294891119003296, + "step": 3822 + }, + { + "epoch": 0.8808755760368664, + "grad_norm": 1.0620381421224903, + "learning_rate": 1.2763778625989354e-06, + "loss": 0.8058860301971436, + "step": 3823 + }, + { + "epoch": 0.8811059907834101, + "grad_norm": 1.1264235058600618, + "learning_rate": 1.2760116984028559e-06, + "loss": 0.9073271751403809, + "step": 3824 + }, + { + "epoch": 0.881336405529954, + "grad_norm": 0.9871957246708625, + "learning_rate": 1.2756454941440439e-06, + "loss": 0.755131721496582, + "step": 3825 + }, + { + "epoch": 0.8815668202764977, + "grad_norm": 0.9177831986454672, + "learning_rate": 1.2752792498756532e-06, + "loss": 0.7571133375167847, + "step": 3826 + }, + { + "epoch": 0.8817972350230415, + "grad_norm": 1.0303718222421674, + "learning_rate": 1.2749129656508438e-06, + "loss": 0.8021755218505859, + "step": 3827 + }, + { + "epoch": 0.8820276497695853, + "grad_norm": 0.9628359079626025, + "learning_rate": 1.2745466415227812e-06, + "loss": 0.7817519903182983, + "step": 3828 + }, + { + "epoch": 0.882258064516129, + "grad_norm": 0.9923984386602839, + "learning_rate": 1.2741802775446375e-06, + "loss": 0.7144416570663452, + "step": 3829 + }, + { + "epoch": 0.8824884792626728, + "grad_norm": 1.1770010674703593, + "learning_rate": 1.2738138737695894e-06, + "loss": 0.8154206275939941, + "step": 3830 + }, + { + "epoch": 0.8827188940092165, + "grad_norm": 1.0860031408073831, + "learning_rate": 1.2734474302508199e-06, + "loss": 0.7478733062744141, + "step": 3831 + }, + { + "epoch": 0.8829493087557604, + "grad_norm": 0.9998255564669785, + "learning_rate": 1.2730809470415177e-06, + "loss": 0.7792314291000366, + "step": 3832 + }, + { + "epoch": 0.8831797235023041, + "grad_norm": 1.1952265957395494, + "learning_rate": 1.2727144241948776e-06, + "loss": 0.8550708293914795, + "step": 3833 + }, + { + "epoch": 0.883410138248848, + "grad_norm": 1.14972903127367, + "learning_rate": 1.2723478617641e-06, + "loss": 0.9415113925933838, + "step": 3834 + }, + { + "epoch": 0.8836405529953917, + "grad_norm": 1.1062517985394071, + "learning_rate": 1.2719812598023909e-06, + "loss": 0.8359560370445251, + "step": 3835 + }, + { + "epoch": 0.8838709677419355, + "grad_norm": 1.2039080793867758, + "learning_rate": 1.2716146183629618e-06, + "loss": 0.9515634775161743, + "step": 3836 + }, + { + "epoch": 0.8841013824884792, + "grad_norm": 1.1195735084656264, + "learning_rate": 1.2712479374990302e-06, + "loss": 0.9433277249336243, + "step": 3837 + }, + { + "epoch": 0.8843317972350231, + "grad_norm": 1.022594144324791, + "learning_rate": 1.27088121726382e-06, + "loss": 0.809203028678894, + "step": 3838 + }, + { + "epoch": 0.8845622119815668, + "grad_norm": 1.0243153152488458, + "learning_rate": 1.2705144577105596e-06, + "loss": 0.8003803491592407, + "step": 3839 + }, + { + "epoch": 0.8847926267281107, + "grad_norm": 1.0509871208480976, + "learning_rate": 1.2701476588924837e-06, + "loss": 0.8258087038993835, + "step": 3840 + }, + { + "epoch": 0.8850230414746544, + "grad_norm": 0.8336199164135607, + "learning_rate": 1.2697808208628326e-06, + "loss": 0.7337249517440796, + "step": 3841 + }, + { + "epoch": 0.8852534562211981, + "grad_norm": 1.1988508685394492, + "learning_rate": 1.269413943674853e-06, + "loss": 0.6963306665420532, + "step": 3842 + }, + { + "epoch": 0.885483870967742, + "grad_norm": 1.1494175494849699, + "learning_rate": 1.2690470273817955e-06, + "loss": 0.8849321603775024, + "step": 3843 + }, + { + "epoch": 0.8857142857142857, + "grad_norm": 0.9311581320318796, + "learning_rate": 1.2686800720369183e-06, + "loss": 0.804117739200592, + "step": 3844 + }, + { + "epoch": 0.8859447004608295, + "grad_norm": 0.9139368239237865, + "learning_rate": 1.2683130776934848e-06, + "loss": 0.7873985767364502, + "step": 3845 + }, + { + "epoch": 0.8861751152073732, + "grad_norm": 1.0475484077031534, + "learning_rate": 1.2679460444047627e-06, + "loss": 0.7401156425476074, + "step": 3846 + }, + { + "epoch": 0.8864055299539171, + "grad_norm": 1.1867976153376456, + "learning_rate": 1.2675789722240274e-06, + "loss": 0.8216343522071838, + "step": 3847 + }, + { + "epoch": 0.8866359447004608, + "grad_norm": 1.1126927795380483, + "learning_rate": 1.2672118612045583e-06, + "loss": 0.9367883205413818, + "step": 3848 + }, + { + "epoch": 0.8868663594470046, + "grad_norm": 1.333436966015092, + "learning_rate": 1.2668447113996411e-06, + "loss": 0.959208607673645, + "step": 3849 + }, + { + "epoch": 0.8870967741935484, + "grad_norm": 1.019926575329533, + "learning_rate": 1.2664775228625678e-06, + "loss": 0.754011869430542, + "step": 3850 + }, + { + "epoch": 0.8873271889400922, + "grad_norm": 1.0679613059424808, + "learning_rate": 1.2661102956466343e-06, + "loss": 0.7200918793678284, + "step": 3851 + }, + { + "epoch": 0.8875576036866359, + "grad_norm": 1.1470470713937198, + "learning_rate": 1.2657430298051441e-06, + "loss": 0.7819997072219849, + "step": 3852 + }, + { + "epoch": 0.8877880184331797, + "grad_norm": 0.7442261609023784, + "learning_rate": 1.2653757253914045e-06, + "loss": 0.6145305037498474, + "step": 3853 + }, + { + "epoch": 0.8880184331797235, + "grad_norm": 1.0307629205268725, + "learning_rate": 1.2650083824587298e-06, + "loss": 0.8730908036231995, + "step": 3854 + }, + { + "epoch": 0.8882488479262672, + "grad_norm": 0.8412211397931054, + "learning_rate": 1.2646410010604395e-06, + "loss": 0.7595944404602051, + "step": 3855 + }, + { + "epoch": 0.8884792626728111, + "grad_norm": 1.1742884385001073, + "learning_rate": 1.264273581249858e-06, + "loss": 0.8533104658126831, + "step": 3856 + }, + { + "epoch": 0.8887096774193548, + "grad_norm": 0.9075889816265436, + "learning_rate": 1.263906123080316e-06, + "loss": 0.7239818572998047, + "step": 3857 + }, + { + "epoch": 0.8889400921658986, + "grad_norm": 1.1211735744208717, + "learning_rate": 1.2635386266051498e-06, + "loss": 0.7675650119781494, + "step": 3858 + }, + { + "epoch": 0.8891705069124424, + "grad_norm": 1.03231156560467, + "learning_rate": 1.2631710918777007e-06, + "loss": 0.8886630535125732, + "step": 3859 + }, + { + "epoch": 0.8894009216589862, + "grad_norm": 1.078590523668252, + "learning_rate": 1.2628035189513159e-06, + "loss": 0.798930287361145, + "step": 3860 + }, + { + "epoch": 0.8896313364055299, + "grad_norm": 0.9635414297502106, + "learning_rate": 1.2624359078793484e-06, + "loss": 0.7189278602600098, + "step": 3861 + }, + { + "epoch": 0.8898617511520738, + "grad_norm": 1.0909939790359444, + "learning_rate": 1.2620682587151565e-06, + "loss": 0.8187342882156372, + "step": 3862 + }, + { + "epoch": 0.8900921658986175, + "grad_norm": 1.1174191800105742, + "learning_rate": 1.2617005715121034e-06, + "loss": 0.880839467048645, + "step": 3863 + }, + { + "epoch": 0.8903225806451613, + "grad_norm": 0.9160208180175933, + "learning_rate": 1.2613328463235586e-06, + "loss": 0.84575355052948, + "step": 3864 + }, + { + "epoch": 0.8905529953917051, + "grad_norm": 0.8361425077510937, + "learning_rate": 1.2609650832028978e-06, + "loss": 0.6823658347129822, + "step": 3865 + }, + { + "epoch": 0.8907834101382488, + "grad_norm": 1.0695425966983703, + "learning_rate": 1.2605972822035e-06, + "loss": 0.8295711278915405, + "step": 3866 + }, + { + "epoch": 0.8910138248847926, + "grad_norm": 1.1932993089448705, + "learning_rate": 1.2602294433787518e-06, + "loss": 0.8684213161468506, + "step": 3867 + }, + { + "epoch": 0.8912442396313364, + "grad_norm": 0.8493371065418897, + "learning_rate": 1.2598615667820447e-06, + "loss": 0.6560889482498169, + "step": 3868 + }, + { + "epoch": 0.8914746543778802, + "grad_norm": 1.0552959260029386, + "learning_rate": 1.259493652466775e-06, + "loss": 0.740487277507782, + "step": 3869 + }, + { + "epoch": 0.8917050691244239, + "grad_norm": 0.9680726179927289, + "learning_rate": 1.2591257004863453e-06, + "loss": 0.8167253732681274, + "step": 3870 + }, + { + "epoch": 0.8919354838709678, + "grad_norm": 0.8741208745575088, + "learning_rate": 1.2587577108941634e-06, + "loss": 0.8521690368652344, + "step": 3871 + }, + { + "epoch": 0.8921658986175115, + "grad_norm": 1.263426910808872, + "learning_rate": 1.2583896837436418e-06, + "loss": 0.8830848932266235, + "step": 3872 + }, + { + "epoch": 0.8923963133640553, + "grad_norm": 0.9234650272103238, + "learning_rate": 1.2580216190881999e-06, + "loss": 0.7080649137496948, + "step": 3873 + }, + { + "epoch": 0.8926267281105991, + "grad_norm": 0.9098984938292525, + "learning_rate": 1.2576535169812614e-06, + "loss": 0.8013911247253418, + "step": 3874 + }, + { + "epoch": 0.8928571428571429, + "grad_norm": 0.9781454154869316, + "learning_rate": 1.2572853774762564e-06, + "loss": 0.8307033777236938, + "step": 3875 + }, + { + "epoch": 0.8930875576036866, + "grad_norm": 1.003074779947638, + "learning_rate": 1.256917200626619e-06, + "loss": 0.7514123916625977, + "step": 3876 + }, + { + "epoch": 0.8933179723502304, + "grad_norm": 1.3024082731165083, + "learning_rate": 1.2565489864857903e-06, + "loss": 0.7608132362365723, + "step": 3877 + }, + { + "epoch": 0.8935483870967742, + "grad_norm": 0.9570998315665514, + "learning_rate": 1.256180735107216e-06, + "loss": 0.8011139631271362, + "step": 3878 + }, + { + "epoch": 0.8937788018433179, + "grad_norm": 1.134653936381734, + "learning_rate": 1.2558124465443467e-06, + "loss": 0.9760414958000183, + "step": 3879 + }, + { + "epoch": 0.8940092165898618, + "grad_norm": 1.0547420638261442, + "learning_rate": 1.2554441208506399e-06, + "loss": 0.7292976379394531, + "step": 3880 + }, + { + "epoch": 0.8942396313364055, + "grad_norm": 1.0683215421992245, + "learning_rate": 1.255075758079557e-06, + "loss": 0.819061279296875, + "step": 3881 + }, + { + "epoch": 0.8944700460829493, + "grad_norm": 1.006803716245281, + "learning_rate": 1.2547073582845652e-06, + "loss": 0.8407306671142578, + "step": 3882 + }, + { + "epoch": 0.8947004608294931, + "grad_norm": 0.8233707920449198, + "learning_rate": 1.2543389215191379e-06, + "loss": 0.7452164888381958, + "step": 3883 + }, + { + "epoch": 0.8949308755760369, + "grad_norm": 1.049978361878961, + "learning_rate": 1.2539704478367525e-06, + "loss": 0.9001756310462952, + "step": 3884 + }, + { + "epoch": 0.8951612903225806, + "grad_norm": 0.8057583780945189, + "learning_rate": 1.253601937290893e-06, + "loss": 0.7006322741508484, + "step": 3885 + }, + { + "epoch": 0.8953917050691245, + "grad_norm": 0.9116907763776896, + "learning_rate": 1.253233389935048e-06, + "loss": 0.8464070558547974, + "step": 3886 + }, + { + "epoch": 0.8956221198156682, + "grad_norm": 0.9768693849406578, + "learning_rate": 1.2528648058227117e-06, + "loss": 0.8153925538063049, + "step": 3887 + }, + { + "epoch": 0.895852534562212, + "grad_norm": 0.9311867207234187, + "learning_rate": 1.2524961850073835e-06, + "loss": 0.7093103528022766, + "step": 3888 + }, + { + "epoch": 0.8960829493087558, + "grad_norm": 0.8533841155936702, + "learning_rate": 1.2521275275425685e-06, + "loss": 0.676047682762146, + "step": 3889 + }, + { + "epoch": 0.8963133640552995, + "grad_norm": 0.87097687176947, + "learning_rate": 1.2517588334817765e-06, + "loss": 0.6980170011520386, + "step": 3890 + }, + { + "epoch": 0.8965437788018433, + "grad_norm": 0.9291831127411667, + "learning_rate": 1.2513901028785232e-06, + "loss": 0.7343952655792236, + "step": 3891 + }, + { + "epoch": 0.896774193548387, + "grad_norm": 1.0285752510532034, + "learning_rate": 1.251021335786329e-06, + "loss": 0.6836012005805969, + "step": 3892 + }, + { + "epoch": 0.8970046082949309, + "grad_norm": 0.9328635468922583, + "learning_rate": 1.2506525322587204e-06, + "loss": 0.7405731678009033, + "step": 3893 + }, + { + "epoch": 0.8972350230414746, + "grad_norm": 0.9162563014074782, + "learning_rate": 1.2502836923492288e-06, + "loss": 0.7626791596412659, + "step": 3894 + }, + { + "epoch": 0.8974654377880185, + "grad_norm": 0.8530894630449782, + "learning_rate": 1.2499148161113904e-06, + "loss": 0.951126754283905, + "step": 3895 + }, + { + "epoch": 0.8976958525345622, + "grad_norm": 1.0356266230162976, + "learning_rate": 1.249545903598747e-06, + "loss": 0.8248430490493774, + "step": 3896 + }, + { + "epoch": 0.897926267281106, + "grad_norm": 1.0696916510331513, + "learning_rate": 1.2491769548648466e-06, + "loss": 0.9306991100311279, + "step": 3897 + }, + { + "epoch": 0.8981566820276498, + "grad_norm": 1.2546361240375576, + "learning_rate": 1.2488079699632406e-06, + "loss": 0.8529196977615356, + "step": 3898 + }, + { + "epoch": 0.8983870967741936, + "grad_norm": 1.1432122269665714, + "learning_rate": 1.2484389489474873e-06, + "loss": 0.8614317178726196, + "step": 3899 + }, + { + "epoch": 0.8986175115207373, + "grad_norm": 0.8777341649032664, + "learning_rate": 1.2480698918711494e-06, + "loss": 0.723548173904419, + "step": 3900 + }, + { + "epoch": 0.8988479262672812, + "grad_norm": 0.8559428728446495, + "learning_rate": 1.2477007987877953e-06, + "loss": 0.9424235820770264, + "step": 3901 + }, + { + "epoch": 0.8990783410138249, + "grad_norm": 1.1966583189697881, + "learning_rate": 1.2473316697509982e-06, + "loss": 0.8307658433914185, + "step": 3902 + }, + { + "epoch": 0.8993087557603686, + "grad_norm": 0.9430977683906336, + "learning_rate": 1.2469625048143364e-06, + "loss": 0.7164772748947144, + "step": 3903 + }, + { + "epoch": 0.8995391705069125, + "grad_norm": 1.0578567003352413, + "learning_rate": 1.2465933040313941e-06, + "loss": 0.824491024017334, + "step": 3904 + }, + { + "epoch": 0.8997695852534562, + "grad_norm": 0.9955753469888821, + "learning_rate": 1.24622406745576e-06, + "loss": 0.7468826770782471, + "step": 3905 + }, + { + "epoch": 0.9, + "grad_norm": 1.0419833775918754, + "learning_rate": 1.2458547951410285e-06, + "loss": 0.8049126863479614, + "step": 3906 + }, + { + "epoch": 0.9002304147465438, + "grad_norm": 1.0794114769462158, + "learning_rate": 1.245485487140799e-06, + "loss": 0.658754825592041, + "step": 3907 + }, + { + "epoch": 0.9004608294930876, + "grad_norm": 0.9848364091798514, + "learning_rate": 1.245116143508676e-06, + "loss": 0.6772202849388123, + "step": 3908 + }, + { + "epoch": 0.9006912442396313, + "grad_norm": 0.9291487276824166, + "learning_rate": 1.2447467642982697e-06, + "loss": 0.8160394430160522, + "step": 3909 + }, + { + "epoch": 0.9009216589861752, + "grad_norm": 1.3459000002689838, + "learning_rate": 1.244377349563194e-06, + "loss": 0.8289823532104492, + "step": 3910 + }, + { + "epoch": 0.9011520737327189, + "grad_norm": 1.0130598759262572, + "learning_rate": 1.24400789935707e-06, + "loss": 0.7574084997177124, + "step": 3911 + }, + { + "epoch": 0.9013824884792627, + "grad_norm": 0.9665886404424858, + "learning_rate": 1.2436384137335218e-06, + "loss": 0.8116365671157837, + "step": 3912 + }, + { + "epoch": 0.9016129032258065, + "grad_norm": 1.0860329839978788, + "learning_rate": 1.2432688927461808e-06, + "loss": 0.814805805683136, + "step": 3913 + }, + { + "epoch": 0.9018433179723502, + "grad_norm": 0.9783977746996081, + "learning_rate": 1.2428993364486822e-06, + "loss": 0.7947453260421753, + "step": 3914 + }, + { + "epoch": 0.902073732718894, + "grad_norm": 1.1432103627131167, + "learning_rate": 1.2425297448946661e-06, + "loss": 0.939562976360321, + "step": 3915 + }, + { + "epoch": 0.9023041474654377, + "grad_norm": 0.9342812306918719, + "learning_rate": 1.2421601181377787e-06, + "loss": 0.9460225105285645, + "step": 3916 + }, + { + "epoch": 0.9025345622119816, + "grad_norm": 1.1417876456910938, + "learning_rate": 1.241790456231671e-06, + "loss": 0.9183799028396606, + "step": 3917 + }, + { + "epoch": 0.9027649769585253, + "grad_norm": 1.1195959115117728, + "learning_rate": 1.2414207592299984e-06, + "loss": 0.6793398857116699, + "step": 3918 + }, + { + "epoch": 0.9029953917050692, + "grad_norm": 0.9758451113738527, + "learning_rate": 1.2410510271864222e-06, + "loss": 0.7796125411987305, + "step": 3919 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 1.127885346985943, + "learning_rate": 1.2406812601546085e-06, + "loss": 0.8164567351341248, + "step": 3920 + }, + { + "epoch": 0.9034562211981567, + "grad_norm": 1.327729370966401, + "learning_rate": 1.2403114581882288e-06, + "loss": 0.7267879247665405, + "step": 3921 + }, + { + "epoch": 0.9036866359447004, + "grad_norm": 0.9644037075475709, + "learning_rate": 1.2399416213409586e-06, + "loss": 0.7277103066444397, + "step": 3922 + }, + { + "epoch": 0.9039170506912443, + "grad_norm": 1.1653209742127064, + "learning_rate": 1.23957174966648e-06, + "loss": 0.8507979512214661, + "step": 3923 + }, + { + "epoch": 0.904147465437788, + "grad_norm": 1.2024221808183382, + "learning_rate": 1.2392018432184792e-06, + "loss": 0.9431333541870117, + "step": 3924 + }, + { + "epoch": 0.9043778801843319, + "grad_norm": 0.9610849982223711, + "learning_rate": 1.2388319020506473e-06, + "loss": 0.669041633605957, + "step": 3925 + }, + { + "epoch": 0.9046082949308756, + "grad_norm": 1.0428863031922808, + "learning_rate": 1.2384619262166808e-06, + "loss": 0.7639964818954468, + "step": 3926 + }, + { + "epoch": 0.9048387096774193, + "grad_norm": 0.9055700075744166, + "learning_rate": 1.2380919157702819e-06, + "loss": 0.7390594482421875, + "step": 3927 + }, + { + "epoch": 0.9050691244239631, + "grad_norm": 1.0183193149474203, + "learning_rate": 1.2377218707651562e-06, + "loss": 0.8320105075836182, + "step": 3928 + }, + { + "epoch": 0.9052995391705069, + "grad_norm": 0.9604555269461571, + "learning_rate": 1.237351791255016e-06, + "loss": 0.6820249557495117, + "step": 3929 + }, + { + "epoch": 0.9055299539170507, + "grad_norm": 1.0758012435150028, + "learning_rate": 1.2369816772935773e-06, + "loss": 0.8548537492752075, + "step": 3930 + }, + { + "epoch": 0.9057603686635944, + "grad_norm": 1.0169473440313737, + "learning_rate": 1.236611528934562e-06, + "loss": 0.7226318120956421, + "step": 3931 + }, + { + "epoch": 0.9059907834101383, + "grad_norm": 1.2196278844047388, + "learning_rate": 1.2362413462316963e-06, + "loss": 0.879987359046936, + "step": 3932 + }, + { + "epoch": 0.906221198156682, + "grad_norm": 0.8628507992206548, + "learning_rate": 1.2358711292387122e-06, + "loss": 0.7919881343841553, + "step": 3933 + }, + { + "epoch": 0.9064516129032258, + "grad_norm": 1.0779297510278616, + "learning_rate": 1.2355008780093456e-06, + "loss": 0.8232694268226624, + "step": 3934 + }, + { + "epoch": 0.9066820276497696, + "grad_norm": 1.249487252121194, + "learning_rate": 1.2351305925973385e-06, + "loss": 0.80347740650177, + "step": 3935 + }, + { + "epoch": 0.9069124423963134, + "grad_norm": 1.2510529509996382, + "learning_rate": 1.234760273056437e-06, + "loss": 0.7818408012390137, + "step": 3936 + }, + { + "epoch": 0.9071428571428571, + "grad_norm": 1.1620371895322128, + "learning_rate": 1.2343899194403931e-06, + "loss": 0.8391210436820984, + "step": 3937 + }, + { + "epoch": 0.9073732718894009, + "grad_norm": 1.1380529418025975, + "learning_rate": 1.2340195318029622e-06, + "loss": 0.7937500476837158, + "step": 3938 + }, + { + "epoch": 0.9076036866359447, + "grad_norm": 0.973433345758839, + "learning_rate": 1.2336491101979065e-06, + "loss": 0.7158668041229248, + "step": 3939 + }, + { + "epoch": 0.9078341013824884, + "grad_norm": 0.9549803277521113, + "learning_rate": 1.2332786546789915e-06, + "loss": 0.6956034898757935, + "step": 3940 + }, + { + "epoch": 0.9080645161290323, + "grad_norm": 1.035574155623001, + "learning_rate": 1.2329081652999887e-06, + "loss": 0.7252948880195618, + "step": 3941 + }, + { + "epoch": 0.908294930875576, + "grad_norm": 1.2086784459715743, + "learning_rate": 1.2325376421146739e-06, + "loss": 0.7131162881851196, + "step": 3942 + }, + { + "epoch": 0.9085253456221198, + "grad_norm": 0.8781165558243194, + "learning_rate": 1.2321670851768285e-06, + "loss": 0.7383663654327393, + "step": 3943 + }, + { + "epoch": 0.9087557603686636, + "grad_norm": 0.9355062944038273, + "learning_rate": 1.2317964945402374e-06, + "loss": 0.8296892642974854, + "step": 3944 + }, + { + "epoch": 0.9089861751152074, + "grad_norm": 1.1131069336270092, + "learning_rate": 1.2314258702586923e-06, + "loss": 0.8314273357391357, + "step": 3945 + }, + { + "epoch": 0.9092165898617511, + "grad_norm": 0.9647703306046335, + "learning_rate": 1.2310552123859888e-06, + "loss": 0.7264384031295776, + "step": 3946 + }, + { + "epoch": 0.909447004608295, + "grad_norm": 0.7580621867286127, + "learning_rate": 1.230684520975927e-06, + "loss": 0.6757937073707581, + "step": 3947 + }, + { + "epoch": 0.9096774193548387, + "grad_norm": 0.8884108342506404, + "learning_rate": 1.230313796082312e-06, + "loss": 0.8318504691123962, + "step": 3948 + }, + { + "epoch": 0.9099078341013825, + "grad_norm": 0.7767337233620181, + "learning_rate": 1.2299430377589547e-06, + "loss": 0.7043207883834839, + "step": 3949 + }, + { + "epoch": 0.9101382488479263, + "grad_norm": 1.0668368590995472, + "learning_rate": 1.2295722460596696e-06, + "loss": 0.8499487638473511, + "step": 3950 + }, + { + "epoch": 0.91036866359447, + "grad_norm": 1.1145902688644103, + "learning_rate": 1.2292014210382772e-06, + "loss": 0.8219600319862366, + "step": 3951 + }, + { + "epoch": 0.9105990783410138, + "grad_norm": 1.2329010539695853, + "learning_rate": 1.2288305627486017e-06, + "loss": 0.8136317133903503, + "step": 3952 + }, + { + "epoch": 0.9108294930875576, + "grad_norm": 1.1220482069317936, + "learning_rate": 1.2284596712444735e-06, + "loss": 0.7858958840370178, + "step": 3953 + }, + { + "epoch": 0.9110599078341014, + "grad_norm": 1.182019995516566, + "learning_rate": 1.2280887465797259e-06, + "loss": 0.8108563423156738, + "step": 3954 + }, + { + "epoch": 0.9112903225806451, + "grad_norm": 1.17197106565382, + "learning_rate": 1.2277177888081987e-06, + "loss": 0.8061145544052124, + "step": 3955 + }, + { + "epoch": 0.911520737327189, + "grad_norm": 1.1140830632516712, + "learning_rate": 1.2273467979837361e-06, + "loss": 0.7769665718078613, + "step": 3956 + }, + { + "epoch": 0.9117511520737327, + "grad_norm": 1.5134088570090107, + "learning_rate": 1.2269757741601867e-06, + "loss": 1.0548570156097412, + "step": 3957 + }, + { + "epoch": 0.9119815668202765, + "grad_norm": 0.9732476833800602, + "learning_rate": 1.226604717391404e-06, + "loss": 0.7095952033996582, + "step": 3958 + }, + { + "epoch": 0.9122119815668203, + "grad_norm": 0.8435340807921997, + "learning_rate": 1.226233627731247e-06, + "loss": 0.7330363392829895, + "step": 3959 + }, + { + "epoch": 0.9124423963133641, + "grad_norm": 0.9706068481575616, + "learning_rate": 1.225862505233578e-06, + "loss": 0.7328442931175232, + "step": 3960 + }, + { + "epoch": 0.9126728110599078, + "grad_norm": 1.059740258312267, + "learning_rate": 1.2254913499522656e-06, + "loss": 0.7572993040084839, + "step": 3961 + }, + { + "epoch": 0.9129032258064517, + "grad_norm": 1.0542941153492202, + "learning_rate": 1.2251201619411823e-06, + "loss": 0.7706469297409058, + "step": 3962 + }, + { + "epoch": 0.9131336405529954, + "grad_norm": 1.1436826868313579, + "learning_rate": 1.2247489412542053e-06, + "loss": 0.7830193042755127, + "step": 3963 + }, + { + "epoch": 0.9133640552995391, + "grad_norm": 1.0827904871592715, + "learning_rate": 1.224377687945217e-06, + "loss": 0.8415955901145935, + "step": 3964 + }, + { + "epoch": 0.913594470046083, + "grad_norm": 1.1895924425921953, + "learning_rate": 1.2240064020681044e-06, + "loss": 0.7383062839508057, + "step": 3965 + }, + { + "epoch": 0.9138248847926267, + "grad_norm": 1.1432920832791855, + "learning_rate": 1.2236350836767593e-06, + "loss": 0.7372882962226868, + "step": 3966 + }, + { + "epoch": 0.9140552995391705, + "grad_norm": 1.0941013432151616, + "learning_rate": 1.2232637328250776e-06, + "loss": 0.7914254665374756, + "step": 3967 + }, + { + "epoch": 0.9142857142857143, + "grad_norm": 0.9886213418734634, + "learning_rate": 1.2228923495669605e-06, + "loss": 0.8510675430297852, + "step": 3968 + }, + { + "epoch": 0.9145161290322581, + "grad_norm": 1.045281864627849, + "learning_rate": 1.2225209339563143e-06, + "loss": 0.7391757369041443, + "step": 3969 + }, + { + "epoch": 0.9147465437788018, + "grad_norm": 0.8746728562097662, + "learning_rate": 1.2221494860470491e-06, + "loss": 0.69194495677948, + "step": 3970 + }, + { + "epoch": 0.9149769585253457, + "grad_norm": 1.0907421288179358, + "learning_rate": 1.22177800589308e-06, + "loss": 0.7593865394592285, + "step": 3971 + }, + { + "epoch": 0.9152073732718894, + "grad_norm": 1.037234739347401, + "learning_rate": 1.2214064935483268e-06, + "loss": 0.7831966876983643, + "step": 3972 + }, + { + "epoch": 0.9154377880184332, + "grad_norm": 1.1150279108134162, + "learning_rate": 1.2210349490667145e-06, + "loss": 0.8858723640441895, + "step": 3973 + }, + { + "epoch": 0.915668202764977, + "grad_norm": 1.1381126617682915, + "learning_rate": 1.2206633725021715e-06, + "loss": 0.8645567893981934, + "step": 3974 + }, + { + "epoch": 0.9158986175115207, + "grad_norm": 0.9188905804582469, + "learning_rate": 1.2202917639086322e-06, + "loss": 0.7619047164916992, + "step": 3975 + }, + { + "epoch": 0.9161290322580645, + "grad_norm": 1.0126992141273314, + "learning_rate": 1.2199201233400355e-06, + "loss": 0.8652681112289429, + "step": 3976 + }, + { + "epoch": 0.9163594470046083, + "grad_norm": 0.9961259698766619, + "learning_rate": 1.2195484508503234e-06, + "loss": 0.6860940456390381, + "step": 3977 + }, + { + "epoch": 0.9165898617511521, + "grad_norm": 0.8860870600955693, + "learning_rate": 1.2191767464934444e-06, + "loss": 0.7372464537620544, + "step": 3978 + }, + { + "epoch": 0.9168202764976958, + "grad_norm": 1.3495413684840594, + "learning_rate": 1.218805010323351e-06, + "loss": 0.8719853162765503, + "step": 3979 + }, + { + "epoch": 0.9170506912442397, + "grad_norm": 0.9968927276513252, + "learning_rate": 1.2184332423940003e-06, + "loss": 0.8203779458999634, + "step": 3980 + }, + { + "epoch": 0.9172811059907834, + "grad_norm": 1.197176686739939, + "learning_rate": 1.218061442759353e-06, + "loss": 0.8648861646652222, + "step": 3981 + }, + { + "epoch": 0.9175115207373272, + "grad_norm": 1.0630748229990676, + "learning_rate": 1.2176896114733766e-06, + "loss": 0.7651659250259399, + "step": 3982 + }, + { + "epoch": 0.917741935483871, + "grad_norm": 1.20459191964974, + "learning_rate": 1.2173177485900408e-06, + "loss": 0.8495512008666992, + "step": 3983 + }, + { + "epoch": 0.9179723502304148, + "grad_norm": 1.3559959351470627, + "learning_rate": 1.2169458541633216e-06, + "loss": 0.7997228503227234, + "step": 3984 + }, + { + "epoch": 0.9182027649769585, + "grad_norm": 0.9870494686008755, + "learning_rate": 1.2165739282471987e-06, + "loss": 0.8353173136711121, + "step": 3985 + }, + { + "epoch": 0.9184331797235024, + "grad_norm": 1.2277323881843956, + "learning_rate": 1.216201970895657e-06, + "loss": 0.9039655327796936, + "step": 3986 + }, + { + "epoch": 0.9186635944700461, + "grad_norm": 0.9209288499077958, + "learning_rate": 1.2158299821626854e-06, + "loss": 0.8158592581748962, + "step": 3987 + }, + { + "epoch": 0.9188940092165898, + "grad_norm": 1.2007654555954255, + "learning_rate": 1.2154579621022776e-06, + "loss": 0.8443971872329712, + "step": 3988 + }, + { + "epoch": 0.9191244239631337, + "grad_norm": 0.916322848733307, + "learning_rate": 1.2150859107684318e-06, + "loss": 0.7934167385101318, + "step": 3989 + }, + { + "epoch": 0.9193548387096774, + "grad_norm": 1.1576910593833736, + "learning_rate": 1.2147138282151512e-06, + "loss": 0.750052809715271, + "step": 3990 + }, + { + "epoch": 0.9195852534562212, + "grad_norm": 1.0948767691124337, + "learning_rate": 1.2143417144964423e-06, + "loss": 0.813056468963623, + "step": 3991 + }, + { + "epoch": 0.919815668202765, + "grad_norm": 1.1487977592190233, + "learning_rate": 1.2139695696663174e-06, + "loss": 0.9478945732116699, + "step": 3992 + }, + { + "epoch": 0.9200460829493088, + "grad_norm": 0.9711264468634061, + "learning_rate": 1.2135973937787927e-06, + "loss": 0.687637448310852, + "step": 3993 + }, + { + "epoch": 0.9202764976958525, + "grad_norm": 1.071392128639805, + "learning_rate": 1.213225186887889e-06, + "loss": 0.8073818683624268, + "step": 3994 + }, + { + "epoch": 0.9205069124423964, + "grad_norm": 1.1074324196567935, + "learning_rate": 1.2128529490476318e-06, + "loss": 0.6684166789054871, + "step": 3995 + }, + { + "epoch": 0.9207373271889401, + "grad_norm": 1.1910033963986806, + "learning_rate": 1.2124806803120506e-06, + "loss": 0.7897466421127319, + "step": 3996 + }, + { + "epoch": 0.9209677419354839, + "grad_norm": 1.0375797321803883, + "learning_rate": 1.21210838073518e-06, + "loss": 0.832312822341919, + "step": 3997 + }, + { + "epoch": 0.9211981566820276, + "grad_norm": 1.036059468253791, + "learning_rate": 1.2117360503710588e-06, + "loss": 0.9536067247390747, + "step": 3998 + }, + { + "epoch": 0.9214285714285714, + "grad_norm": 1.123926651312402, + "learning_rate": 1.2113636892737302e-06, + "loss": 0.8959759473800659, + "step": 3999 + }, + { + "epoch": 0.9216589861751152, + "grad_norm": 0.9405530325495998, + "learning_rate": 1.2109912974972422e-06, + "loss": 0.6789166927337646, + "step": 4000 + }, + { + "epoch": 0.9218894009216589, + "grad_norm": 0.9327551909921717, + "learning_rate": 1.2106188750956464e-06, + "loss": 0.7336491346359253, + "step": 4001 + }, + { + "epoch": 0.9221198156682028, + "grad_norm": 0.8000293761487048, + "learning_rate": 1.2102464221229997e-06, + "loss": 0.7838259935379028, + "step": 4002 + }, + { + "epoch": 0.9223502304147465, + "grad_norm": 1.2907858896278495, + "learning_rate": 1.2098739386333631e-06, + "loss": 0.9147623777389526, + "step": 4003 + }, + { + "epoch": 0.9225806451612903, + "grad_norm": 1.3691019040487797, + "learning_rate": 1.2095014246808022e-06, + "loss": 0.7296491265296936, + "step": 4004 + }, + { + "epoch": 0.9228110599078341, + "grad_norm": 1.1028104717001235, + "learning_rate": 1.2091288803193868e-06, + "loss": 0.7898432016372681, + "step": 4005 + }, + { + "epoch": 0.9230414746543779, + "grad_norm": 1.1562470474736035, + "learning_rate": 1.2087563056031914e-06, + "loss": 0.8190659284591675, + "step": 4006 + }, + { + "epoch": 0.9232718894009216, + "grad_norm": 1.4146112766933352, + "learning_rate": 1.2083837005862945e-06, + "loss": 0.8383443355560303, + "step": 4007 + }, + { + "epoch": 0.9235023041474655, + "grad_norm": 0.7251077105825574, + "learning_rate": 1.2080110653227796e-06, + "loss": 0.5987120866775513, + "step": 4008 + }, + { + "epoch": 0.9237327188940092, + "grad_norm": 1.056645940510342, + "learning_rate": 1.2076383998667334e-06, + "loss": 0.8811358213424683, + "step": 4009 + }, + { + "epoch": 0.923963133640553, + "grad_norm": 0.8867108269493398, + "learning_rate": 1.2072657042722486e-06, + "loss": 0.7958807349205017, + "step": 4010 + }, + { + "epoch": 0.9241935483870968, + "grad_norm": 1.1776412427000924, + "learning_rate": 1.2068929785934215e-06, + "loss": 0.7192457914352417, + "step": 4011 + }, + { + "epoch": 0.9244239631336405, + "grad_norm": 1.0545419352254402, + "learning_rate": 1.2065202228843523e-06, + "loss": 0.6854838132858276, + "step": 4012 + }, + { + "epoch": 0.9246543778801843, + "grad_norm": 1.0759672957343283, + "learning_rate": 1.2061474371991457e-06, + "loss": 0.7334680557250977, + "step": 4013 + }, + { + "epoch": 0.9248847926267281, + "grad_norm": 0.9536076812745731, + "learning_rate": 1.205774621591912e-06, + "loss": 0.7614402770996094, + "step": 4014 + }, + { + "epoch": 0.9251152073732719, + "grad_norm": 1.3871826739545572, + "learning_rate": 1.2054017761167644e-06, + "loss": 0.7502505779266357, + "step": 4015 + }, + { + "epoch": 0.9253456221198156, + "grad_norm": 1.044146949688276, + "learning_rate": 1.2050289008278205e-06, + "loss": 0.7922523021697998, + "step": 4016 + }, + { + "epoch": 0.9255760368663595, + "grad_norm": 1.2025329853302307, + "learning_rate": 1.2046559957792032e-06, + "loss": 0.7534265518188477, + "step": 4017 + }, + { + "epoch": 0.9258064516129032, + "grad_norm": 0.9478426591249515, + "learning_rate": 1.2042830610250395e-06, + "loss": 0.6997093558311462, + "step": 4018 + }, + { + "epoch": 0.926036866359447, + "grad_norm": 1.050086676036124, + "learning_rate": 1.2039100966194594e-06, + "loss": 0.7009599208831787, + "step": 4019 + }, + { + "epoch": 0.9262672811059908, + "grad_norm": 1.108108705874163, + "learning_rate": 1.203537102616599e-06, + "loss": 0.795873761177063, + "step": 4020 + }, + { + "epoch": 0.9264976958525346, + "grad_norm": 1.1836803264586404, + "learning_rate": 1.2031640790705972e-06, + "loss": 0.7860225439071655, + "step": 4021 + }, + { + "epoch": 0.9267281105990783, + "grad_norm": 0.9036535621632875, + "learning_rate": 1.2027910260355989e-06, + "loss": 0.7657063007354736, + "step": 4022 + }, + { + "epoch": 0.9269585253456222, + "grad_norm": 1.0407468417409953, + "learning_rate": 1.2024179435657512e-06, + "loss": 0.782909631729126, + "step": 4023 + }, + { + "epoch": 0.9271889400921659, + "grad_norm": 0.8628791908243046, + "learning_rate": 1.202044831715207e-06, + "loss": 0.713431715965271, + "step": 4024 + }, + { + "epoch": 0.9274193548387096, + "grad_norm": 0.9826922843740741, + "learning_rate": 1.201671690538123e-06, + "loss": 0.9126790165901184, + "step": 4025 + }, + { + "epoch": 0.9276497695852535, + "grad_norm": 0.9552497173996132, + "learning_rate": 1.20129852008866e-06, + "loss": 0.8640999794006348, + "step": 4026 + }, + { + "epoch": 0.9278801843317972, + "grad_norm": 1.0290580406520045, + "learning_rate": 1.2009253204209832e-06, + "loss": 0.723473072052002, + "step": 4027 + }, + { + "epoch": 0.928110599078341, + "grad_norm": 0.9995947167655078, + "learning_rate": 1.2005520915892626e-06, + "loss": 0.6764041185379028, + "step": 4028 + }, + { + "epoch": 0.9283410138248848, + "grad_norm": 1.1315388960653066, + "learning_rate": 1.200178833647671e-06, + "loss": 0.8525882959365845, + "step": 4029 + }, + { + "epoch": 0.9285714285714286, + "grad_norm": 1.1279047416289067, + "learning_rate": 1.1998055466503872e-06, + "loss": 0.714957058429718, + "step": 4030 + }, + { + "epoch": 0.9288018433179723, + "grad_norm": 0.9055007840106456, + "learning_rate": 1.1994322306515925e-06, + "loss": 0.8015910387039185, + "step": 4031 + }, + { + "epoch": 0.9290322580645162, + "grad_norm": 1.1314666315910753, + "learning_rate": 1.1990588857054733e-06, + "loss": 1.0306739807128906, + "step": 4032 + }, + { + "epoch": 0.9292626728110599, + "grad_norm": 1.0078215910327748, + "learning_rate": 1.1986855118662205e-06, + "loss": 0.8307464122772217, + "step": 4033 + }, + { + "epoch": 0.9294930875576037, + "grad_norm": 0.9974753472669955, + "learning_rate": 1.1983121091880286e-06, + "loss": 0.8720347881317139, + "step": 4034 + }, + { + "epoch": 0.9297235023041475, + "grad_norm": 1.0249437684832297, + "learning_rate": 1.1979386777250968e-06, + "loss": 0.7716174721717834, + "step": 4035 + }, + { + "epoch": 0.9299539170506912, + "grad_norm": 0.9533075514678258, + "learning_rate": 1.1975652175316279e-06, + "loss": 0.8968960046768188, + "step": 4036 + }, + { + "epoch": 0.930184331797235, + "grad_norm": 1.0235472692311864, + "learning_rate": 1.197191728661829e-06, + "loss": 0.7472472786903381, + "step": 4037 + }, + { + "epoch": 0.9304147465437788, + "grad_norm": 1.209577738801564, + "learning_rate": 1.196818211169912e-06, + "loss": 0.7969691753387451, + "step": 4038 + }, + { + "epoch": 0.9306451612903226, + "grad_norm": 0.8592343628435503, + "learning_rate": 1.196444665110092e-06, + "loss": 0.6187525987625122, + "step": 4039 + }, + { + "epoch": 0.9308755760368663, + "grad_norm": 1.0503056259771648, + "learning_rate": 1.1960710905365893e-06, + "loss": 0.8715502619743347, + "step": 4040 + }, + { + "epoch": 0.9311059907834102, + "grad_norm": 0.9918268480034713, + "learning_rate": 1.1956974875036273e-06, + "loss": 0.7174774408340454, + "step": 4041 + }, + { + "epoch": 0.9313364055299539, + "grad_norm": 0.8743867275561935, + "learning_rate": 1.1953238560654337e-06, + "loss": 0.6546192169189453, + "step": 4042 + }, + { + "epoch": 0.9315668202764977, + "grad_norm": 1.1024794232135675, + "learning_rate": 1.194950196276241e-06, + "loss": 0.8688700199127197, + "step": 4043 + }, + { + "epoch": 0.9317972350230415, + "grad_norm": 1.0449187982587707, + "learning_rate": 1.1945765081902856e-06, + "loss": 0.7679718732833862, + "step": 4044 + }, + { + "epoch": 0.9320276497695853, + "grad_norm": 0.9426197124643214, + "learning_rate": 1.1942027918618073e-06, + "loss": 0.6335175037384033, + "step": 4045 + }, + { + "epoch": 0.932258064516129, + "grad_norm": 1.0452657366695544, + "learning_rate": 1.1938290473450513e-06, + "loss": 0.785153865814209, + "step": 4046 + }, + { + "epoch": 0.9324884792626729, + "grad_norm": 0.9145063707903602, + "learning_rate": 1.1934552746942653e-06, + "loss": 0.6873019337654114, + "step": 4047 + }, + { + "epoch": 0.9327188940092166, + "grad_norm": 0.9707470479007109, + "learning_rate": 1.1930814739637025e-06, + "loss": 0.7416094541549683, + "step": 4048 + }, + { + "epoch": 0.9329493087557603, + "grad_norm": 1.2103943548089806, + "learning_rate": 1.1927076452076193e-06, + "loss": 0.7206372618675232, + "step": 4049 + }, + { + "epoch": 0.9331797235023042, + "grad_norm": 1.1043264858931607, + "learning_rate": 1.1923337884802767e-06, + "loss": 0.8352477550506592, + "step": 4050 + }, + { + "epoch": 0.9334101382488479, + "grad_norm": 1.116832001192149, + "learning_rate": 1.191959903835939e-06, + "loss": 0.8243483304977417, + "step": 4051 + }, + { + "epoch": 0.9336405529953917, + "grad_norm": 1.4110893804735163, + "learning_rate": 1.1915859913288756e-06, + "loss": 0.827987790107727, + "step": 4052 + }, + { + "epoch": 0.9338709677419355, + "grad_norm": 1.1514055762505417, + "learning_rate": 1.1912120510133589e-06, + "loss": 0.8624123334884644, + "step": 4053 + }, + { + "epoch": 0.9341013824884793, + "grad_norm": 1.2091942284642192, + "learning_rate": 1.1908380829436667e-06, + "loss": 0.8615037202835083, + "step": 4054 + }, + { + "epoch": 0.934331797235023, + "grad_norm": 1.2500115524653743, + "learning_rate": 1.190464087174079e-06, + "loss": 0.9367121458053589, + "step": 4055 + }, + { + "epoch": 0.9345622119815669, + "grad_norm": 1.4503623207353766, + "learning_rate": 1.190090063758881e-06, + "loss": 0.927996039390564, + "step": 4056 + }, + { + "epoch": 0.9347926267281106, + "grad_norm": 1.0709061746508743, + "learning_rate": 1.1897160127523623e-06, + "loss": 0.841314435005188, + "step": 4057 + }, + { + "epoch": 0.9350230414746544, + "grad_norm": 1.1021939339887863, + "learning_rate": 1.189341934208815e-06, + "loss": 0.864904522895813, + "step": 4058 + }, + { + "epoch": 0.9352534562211982, + "grad_norm": 1.148301781904619, + "learning_rate": 1.188967828182537e-06, + "loss": 0.9505404829978943, + "step": 4059 + }, + { + "epoch": 0.9354838709677419, + "grad_norm": 1.0791372441668663, + "learning_rate": 1.188593694727829e-06, + "loss": 0.7347132563591003, + "step": 4060 + }, + { + "epoch": 0.9357142857142857, + "grad_norm": 1.1367351426324537, + "learning_rate": 1.1882195338989958e-06, + "loss": 0.6267231106758118, + "step": 4061 + }, + { + "epoch": 0.9359447004608294, + "grad_norm": 1.0946102482081315, + "learning_rate": 1.1878453457503464e-06, + "loss": 0.8052406907081604, + "step": 4062 + }, + { + "epoch": 0.9361751152073733, + "grad_norm": 1.1032845960202522, + "learning_rate": 1.1874711303361933e-06, + "loss": 0.7928211688995361, + "step": 4063 + }, + { + "epoch": 0.936405529953917, + "grad_norm": 1.1265414942472118, + "learning_rate": 1.1870968877108545e-06, + "loss": 0.8863959312438965, + "step": 4064 + }, + { + "epoch": 0.9366359447004609, + "grad_norm": 1.0592501761240638, + "learning_rate": 1.1867226179286496e-06, + "loss": 0.8749874830245972, + "step": 4065 + }, + { + "epoch": 0.9368663594470046, + "grad_norm": 0.9223254168257967, + "learning_rate": 1.186348321043904e-06, + "loss": 0.7516318559646606, + "step": 4066 + }, + { + "epoch": 0.9370967741935484, + "grad_norm": 1.0863969007807137, + "learning_rate": 1.1859739971109467e-06, + "loss": 0.8435031771659851, + "step": 4067 + }, + { + "epoch": 0.9373271889400921, + "grad_norm": 1.08570563607149, + "learning_rate": 1.1855996461841093e-06, + "loss": 0.8766932487487793, + "step": 4068 + }, + { + "epoch": 0.937557603686636, + "grad_norm": 1.2630999347152494, + "learning_rate": 1.1852252683177293e-06, + "loss": 0.8748513460159302, + "step": 4069 + }, + { + "epoch": 0.9377880184331797, + "grad_norm": 1.2689555695038703, + "learning_rate": 1.184850863566147e-06, + "loss": 0.8917855024337769, + "step": 4070 + }, + { + "epoch": 0.9380184331797236, + "grad_norm": 1.0628114663297852, + "learning_rate": 1.1844764319837064e-06, + "loss": 0.7631640434265137, + "step": 4071 + }, + { + "epoch": 0.9382488479262673, + "grad_norm": 1.0140155614547266, + "learning_rate": 1.1841019736247557e-06, + "loss": 0.8354158401489258, + "step": 4072 + }, + { + "epoch": 0.938479262672811, + "grad_norm": 0.8561335978546013, + "learning_rate": 1.1837274885436473e-06, + "loss": 0.8122761845588684, + "step": 4073 + }, + { + "epoch": 0.9387096774193548, + "grad_norm": 1.5776279194471237, + "learning_rate": 1.1833529767947374e-06, + "loss": 0.8281430006027222, + "step": 4074 + }, + { + "epoch": 0.9389400921658986, + "grad_norm": 1.3828203317822199, + "learning_rate": 1.1829784384323856e-06, + "loss": 0.8291982412338257, + "step": 4075 + }, + { + "epoch": 0.9391705069124424, + "grad_norm": 1.3096607265096822, + "learning_rate": 1.1826038735109553e-06, + "loss": 0.8951852321624756, + "step": 4076 + }, + { + "epoch": 0.9394009216589861, + "grad_norm": 1.2165058417213606, + "learning_rate": 1.182229282084815e-06, + "loss": 0.7006446123123169, + "step": 4077 + }, + { + "epoch": 0.93963133640553, + "grad_norm": 1.1269330295000342, + "learning_rate": 1.1818546642083353e-06, + "loss": 0.8944047689437866, + "step": 4078 + }, + { + "epoch": 0.9398617511520737, + "grad_norm": 0.9351299115123082, + "learning_rate": 1.1814800199358919e-06, + "loss": 0.8252646923065186, + "step": 4079 + }, + { + "epoch": 0.9400921658986175, + "grad_norm": 1.2255680666736817, + "learning_rate": 1.181105349321864e-06, + "loss": 0.7852828502655029, + "step": 4080 + }, + { + "epoch": 0.9403225806451613, + "grad_norm": 1.0734973037527151, + "learning_rate": 1.1807306524206347e-06, + "loss": 0.7758563160896301, + "step": 4081 + }, + { + "epoch": 0.9405529953917051, + "grad_norm": 1.0672387708424669, + "learning_rate": 1.1803559292865899e-06, + "loss": 0.7297114133834839, + "step": 4082 + }, + { + "epoch": 0.9407834101382488, + "grad_norm": 1.1802096748579922, + "learning_rate": 1.1799811799741209e-06, + "loss": 0.7974321842193604, + "step": 4083 + }, + { + "epoch": 0.9410138248847926, + "grad_norm": 1.2930194654348013, + "learning_rate": 1.179606404537622e-06, + "loss": 0.6406733989715576, + "step": 4084 + }, + { + "epoch": 0.9412442396313364, + "grad_norm": 0.9862268230007224, + "learning_rate": 1.179231603031491e-06, + "loss": 0.6925486326217651, + "step": 4085 + }, + { + "epoch": 0.9414746543778801, + "grad_norm": 0.9201295652583962, + "learning_rate": 1.17885677551013e-06, + "loss": 0.792647123336792, + "step": 4086 + }, + { + "epoch": 0.941705069124424, + "grad_norm": 1.0460531669846371, + "learning_rate": 1.1784819220279454e-06, + "loss": 0.7499191761016846, + "step": 4087 + }, + { + "epoch": 0.9419354838709677, + "grad_norm": 1.120763335726602, + "learning_rate": 1.1781070426393455e-06, + "loss": 0.8307451009750366, + "step": 4088 + }, + { + "epoch": 0.9421658986175115, + "grad_norm": 1.1015455973526673, + "learning_rate": 1.1777321373987445e-06, + "loss": 0.7859289646148682, + "step": 4089 + }, + { + "epoch": 0.9423963133640553, + "grad_norm": 1.0291702780651948, + "learning_rate": 1.177357206360559e-06, + "loss": 0.761134922504425, + "step": 4090 + }, + { + "epoch": 0.9426267281105991, + "grad_norm": 1.240188832472171, + "learning_rate": 1.1769822495792098e-06, + "loss": 0.8697078227996826, + "step": 4091 + }, + { + "epoch": 0.9428571428571428, + "grad_norm": 1.0395615260234665, + "learning_rate": 1.1766072671091212e-06, + "loss": 0.731541633605957, + "step": 4092 + }, + { + "epoch": 0.9430875576036867, + "grad_norm": 1.1056530512213054, + "learning_rate": 1.1762322590047219e-06, + "loss": 0.7501940727233887, + "step": 4093 + }, + { + "epoch": 0.9433179723502304, + "grad_norm": 1.1531150840189341, + "learning_rate": 1.1758572253204431e-06, + "loss": 0.9448602199554443, + "step": 4094 + }, + { + "epoch": 0.9435483870967742, + "grad_norm": 0.8884441593083074, + "learning_rate": 1.175482166110721e-06, + "loss": 0.7704026699066162, + "step": 4095 + }, + { + "epoch": 0.943778801843318, + "grad_norm": 0.8973060402184874, + "learning_rate": 1.1751070814299947e-06, + "loss": 0.7905057668685913, + "step": 4096 + }, + { + "epoch": 0.9440092165898617, + "grad_norm": 1.238350046583652, + "learning_rate": 1.1747319713327078e-06, + "loss": 0.8957202434539795, + "step": 4097 + }, + { + "epoch": 0.9442396313364055, + "grad_norm": 0.9896078596502195, + "learning_rate": 1.174356835873306e-06, + "loss": 0.7922521233558655, + "step": 4098 + }, + { + "epoch": 0.9444700460829493, + "grad_norm": 0.9974151293119675, + "learning_rate": 1.1739816751062404e-06, + "loss": 0.6501933336257935, + "step": 4099 + }, + { + "epoch": 0.9447004608294931, + "grad_norm": 0.9673699554437744, + "learning_rate": 1.1736064890859654e-06, + "loss": 0.6743361353874207, + "step": 4100 + }, + { + "epoch": 0.9449308755760368, + "grad_norm": 1.0381670362595088, + "learning_rate": 1.173231277866938e-06, + "loss": 0.920632004737854, + "step": 4101 + }, + { + "epoch": 0.9451612903225807, + "grad_norm": 0.872889135902432, + "learning_rate": 1.1728560415036199e-06, + "loss": 0.7498964071273804, + "step": 4102 + }, + { + "epoch": 0.9453917050691244, + "grad_norm": 0.8444235514312883, + "learning_rate": 1.1724807800504765e-06, + "loss": 0.7665064334869385, + "step": 4103 + }, + { + "epoch": 0.9456221198156682, + "grad_norm": 0.8729439782855682, + "learning_rate": 1.172105493561976e-06, + "loss": 0.75946044921875, + "step": 4104 + }, + { + "epoch": 0.945852534562212, + "grad_norm": 1.016811663523364, + "learning_rate": 1.1717301820925908e-06, + "loss": 0.7701961398124695, + "step": 4105 + }, + { + "epoch": 0.9460829493087558, + "grad_norm": 0.9708618505769702, + "learning_rate": 1.1713548456967974e-06, + "loss": 0.7775348424911499, + "step": 4106 + }, + { + "epoch": 0.9463133640552995, + "grad_norm": 0.8519325609053343, + "learning_rate": 1.1709794844290745e-06, + "loss": 0.8149436712265015, + "step": 4107 + }, + { + "epoch": 0.9465437788018434, + "grad_norm": 0.8519085263981432, + "learning_rate": 1.170604098343906e-06, + "loss": 0.7136009335517883, + "step": 4108 + }, + { + "epoch": 0.9467741935483871, + "grad_norm": 1.2048256186284507, + "learning_rate": 1.1702286874957786e-06, + "loss": 0.7678873538970947, + "step": 4109 + }, + { + "epoch": 0.9470046082949308, + "grad_norm": 0.9842223659547223, + "learning_rate": 1.1698532519391827e-06, + "loss": 0.7506710290908813, + "step": 4110 + }, + { + "epoch": 0.9472350230414747, + "grad_norm": 0.900893049038478, + "learning_rate": 1.1694777917286118e-06, + "loss": 0.6646897792816162, + "step": 4111 + }, + { + "epoch": 0.9474654377880184, + "grad_norm": 1.3857066059132386, + "learning_rate": 1.1691023069185639e-06, + "loss": 0.820647120475769, + "step": 4112 + }, + { + "epoch": 0.9476958525345622, + "grad_norm": 0.9795728799566645, + "learning_rate": 1.1687267975635402e-06, + "loss": 0.872378408908844, + "step": 4113 + }, + { + "epoch": 0.947926267281106, + "grad_norm": 1.0760361173899362, + "learning_rate": 1.168351263718045e-06, + "loss": 0.7920655608177185, + "step": 4114 + }, + { + "epoch": 0.9481566820276498, + "grad_norm": 1.1709025489256302, + "learning_rate": 1.1679757054365866e-06, + "loss": 0.6593836545944214, + "step": 4115 + }, + { + "epoch": 0.9483870967741935, + "grad_norm": 1.0965626572699905, + "learning_rate": 1.1676001227736772e-06, + "loss": 0.7473627328872681, + "step": 4116 + }, + { + "epoch": 0.9486175115207374, + "grad_norm": 1.2027339281506744, + "learning_rate": 1.1672245157838317e-06, + "loss": 0.8001665472984314, + "step": 4117 + }, + { + "epoch": 0.9488479262672811, + "grad_norm": 0.9543944768909415, + "learning_rate": 1.1668488845215689e-06, + "loss": 0.7342571020126343, + "step": 4118 + }, + { + "epoch": 0.9490783410138249, + "grad_norm": 1.2428163281726954, + "learning_rate": 1.1664732290414118e-06, + "loss": 0.7616822719573975, + "step": 4119 + }, + { + "epoch": 0.9493087557603687, + "grad_norm": 1.2486031522636918, + "learning_rate": 1.1660975493978857e-06, + "loss": 0.8885634541511536, + "step": 4120 + }, + { + "epoch": 0.9495391705069124, + "grad_norm": 1.1323168185847523, + "learning_rate": 1.1657218456455205e-06, + "loss": 0.7816281318664551, + "step": 4121 + }, + { + "epoch": 0.9497695852534562, + "grad_norm": 0.9570364600334796, + "learning_rate": 1.1653461178388485e-06, + "loss": 0.7412079572677612, + "step": 4122 + }, + { + "epoch": 0.95, + "grad_norm": 0.957883425985998, + "learning_rate": 1.1649703660324064e-06, + "loss": 0.8096172213554382, + "step": 4123 + }, + { + "epoch": 0.9502304147465438, + "grad_norm": 1.0359903594582591, + "learning_rate": 1.164594590280734e-06, + "loss": 0.6690856218338013, + "step": 4124 + }, + { + "epoch": 0.9504608294930875, + "grad_norm": 0.9697541149080181, + "learning_rate": 1.1642187906383746e-06, + "loss": 0.7509289979934692, + "step": 4125 + }, + { + "epoch": 0.9506912442396314, + "grad_norm": 0.8506285939807987, + "learning_rate": 1.1638429671598754e-06, + "loss": 0.6643730401992798, + "step": 4126 + }, + { + "epoch": 0.9509216589861751, + "grad_norm": 0.994475544194171, + "learning_rate": 1.1634671198997864e-06, + "loss": 0.8100850582122803, + "step": 4127 + }, + { + "epoch": 0.9511520737327189, + "grad_norm": 1.392121351288023, + "learning_rate": 1.1630912489126612e-06, + "loss": 0.919742226600647, + "step": 4128 + }, + { + "epoch": 0.9513824884792627, + "grad_norm": 1.144319413666889, + "learning_rate": 1.1627153542530571e-06, + "loss": 0.8953771591186523, + "step": 4129 + }, + { + "epoch": 0.9516129032258065, + "grad_norm": 0.9663802093818391, + "learning_rate": 1.162339435975535e-06, + "loss": 0.7401770949363708, + "step": 4130 + }, + { + "epoch": 0.9518433179723502, + "grad_norm": 1.0071840947097435, + "learning_rate": 1.1619634941346585e-06, + "loss": 0.7618032097816467, + "step": 4131 + }, + { + "epoch": 0.9520737327188941, + "grad_norm": 1.3156218418351784, + "learning_rate": 1.1615875287849955e-06, + "loss": 0.9134000539779663, + "step": 4132 + }, + { + "epoch": 0.9523041474654378, + "grad_norm": 0.9617492928251477, + "learning_rate": 1.1612115399811162e-06, + "loss": 0.7555145025253296, + "step": 4133 + }, + { + "epoch": 0.9525345622119815, + "grad_norm": 0.9434517704683025, + "learning_rate": 1.1608355277775955e-06, + "loss": 0.9125050902366638, + "step": 4134 + }, + { + "epoch": 0.9527649769585254, + "grad_norm": 0.9082549396493419, + "learning_rate": 1.1604594922290106e-06, + "loss": 0.6575542688369751, + "step": 4135 + }, + { + "epoch": 0.9529953917050691, + "grad_norm": 1.0750997369204898, + "learning_rate": 1.1600834333899431e-06, + "loss": 0.7530527114868164, + "step": 4136 + }, + { + "epoch": 0.9532258064516129, + "grad_norm": 0.9603596342147773, + "learning_rate": 1.159707351314977e-06, + "loss": 0.8818701505661011, + "step": 4137 + }, + { + "epoch": 0.9534562211981567, + "grad_norm": 0.9491169409805379, + "learning_rate": 1.1593312460587003e-06, + "loss": 0.7172919511795044, + "step": 4138 + }, + { + "epoch": 0.9536866359447005, + "grad_norm": 1.1122266085503043, + "learning_rate": 1.1589551176757044e-06, + "loss": 0.8701400756835938, + "step": 4139 + }, + { + "epoch": 0.9539170506912442, + "grad_norm": 1.3285866575691943, + "learning_rate": 1.1585789662205834e-06, + "loss": 0.867475152015686, + "step": 4140 + }, + { + "epoch": 0.9541474654377881, + "grad_norm": 1.1851362026267, + "learning_rate": 1.1582027917479356e-06, + "loss": 0.7809052467346191, + "step": 4141 + }, + { + "epoch": 0.9543778801843318, + "grad_norm": 1.1986202884801196, + "learning_rate": 1.1578265943123619e-06, + "loss": 0.8589099645614624, + "step": 4142 + }, + { + "epoch": 0.9546082949308756, + "grad_norm": 0.893566517908755, + "learning_rate": 1.157450373968467e-06, + "loss": 0.7826642394065857, + "step": 4143 + }, + { + "epoch": 0.9548387096774194, + "grad_norm": 1.3652425128856092, + "learning_rate": 1.1570741307708585e-06, + "loss": 0.9550029635429382, + "step": 4144 + }, + { + "epoch": 0.9550691244239631, + "grad_norm": 1.0826442844044148, + "learning_rate": 1.1566978647741478e-06, + "loss": 0.8607431650161743, + "step": 4145 + }, + { + "epoch": 0.9552995391705069, + "grad_norm": 0.8247649155112424, + "learning_rate": 1.15632157603295e-06, + "loss": 0.7350449562072754, + "step": 4146 + }, + { + "epoch": 0.9555299539170506, + "grad_norm": 1.033301557916291, + "learning_rate": 1.1559452646018818e-06, + "loss": 0.853142261505127, + "step": 4147 + }, + { + "epoch": 0.9557603686635945, + "grad_norm": 1.0495554531445934, + "learning_rate": 1.1555689305355651e-06, + "loss": 0.7137192487716675, + "step": 4148 + }, + { + "epoch": 0.9559907834101382, + "grad_norm": 1.158813208265862, + "learning_rate": 1.1551925738886244e-06, + "loss": 0.9007513523101807, + "step": 4149 + }, + { + "epoch": 0.956221198156682, + "grad_norm": 1.1071306366128357, + "learning_rate": 1.1548161947156867e-06, + "loss": 0.8499083518981934, + "step": 4150 + }, + { + "epoch": 0.9564516129032258, + "grad_norm": 0.874419574252059, + "learning_rate": 1.1544397930713836e-06, + "loss": 0.8068628311157227, + "step": 4151 + }, + { + "epoch": 0.9566820276497696, + "grad_norm": 1.1729788609256337, + "learning_rate": 1.1540633690103487e-06, + "loss": 0.8357307314872742, + "step": 4152 + }, + { + "epoch": 0.9569124423963133, + "grad_norm": 1.262397502444813, + "learning_rate": 1.1536869225872198e-06, + "loss": 0.7650378942489624, + "step": 4153 + }, + { + "epoch": 0.9571428571428572, + "grad_norm": 0.9933463317010283, + "learning_rate": 1.1533104538566376e-06, + "loss": 0.8717354536056519, + "step": 4154 + }, + { + "epoch": 0.9573732718894009, + "grad_norm": 0.9807638290234347, + "learning_rate": 1.152933962873246e-06, + "loss": 0.6314762830734253, + "step": 4155 + }, + { + "epoch": 0.9576036866359448, + "grad_norm": 1.1279705073097503, + "learning_rate": 1.152557449691692e-06, + "loss": 0.8949059844017029, + "step": 4156 + }, + { + "epoch": 0.9578341013824885, + "grad_norm": 1.137203803563717, + "learning_rate": 1.1521809143666261e-06, + "loss": 0.7862699031829834, + "step": 4157 + }, + { + "epoch": 0.9580645161290322, + "grad_norm": 0.8970512868442762, + "learning_rate": 1.151804356952702e-06, + "loss": 0.7954641580581665, + "step": 4158 + }, + { + "epoch": 0.958294930875576, + "grad_norm": 1.0478069911824797, + "learning_rate": 1.1514277775045766e-06, + "loss": 0.7654163241386414, + "step": 4159 + }, + { + "epoch": 0.9585253456221198, + "grad_norm": 1.0321973050954667, + "learning_rate": 1.1510511760769097e-06, + "loss": 0.7050681114196777, + "step": 4160 + }, + { + "epoch": 0.9587557603686636, + "grad_norm": 1.0667493196933242, + "learning_rate": 1.1506745527243646e-06, + "loss": 0.8646515607833862, + "step": 4161 + }, + { + "epoch": 0.9589861751152073, + "grad_norm": 0.9392654190881413, + "learning_rate": 1.1502979075016078e-06, + "loss": 0.7427883148193359, + "step": 4162 + }, + { + "epoch": 0.9592165898617512, + "grad_norm": 1.2506151155745373, + "learning_rate": 1.1499212404633083e-06, + "loss": 0.7800190448760986, + "step": 4163 + }, + { + "epoch": 0.9594470046082949, + "grad_norm": 1.0487739651932841, + "learning_rate": 1.1495445516641394e-06, + "loss": 0.789481520652771, + "step": 4164 + }, + { + "epoch": 0.9596774193548387, + "grad_norm": 0.8332785453272284, + "learning_rate": 1.1491678411587768e-06, + "loss": 0.7975008487701416, + "step": 4165 + }, + { + "epoch": 0.9599078341013825, + "grad_norm": 0.9306560917040928, + "learning_rate": 1.1487911090018994e-06, + "loss": 0.7964596748352051, + "step": 4166 + }, + { + "epoch": 0.9601382488479263, + "grad_norm": 0.8915843631095149, + "learning_rate": 1.1484143552481895e-06, + "loss": 0.7008803486824036, + "step": 4167 + }, + { + "epoch": 0.96036866359447, + "grad_norm": 0.888889684402262, + "learning_rate": 1.1480375799523328e-06, + "loss": 0.708189070224762, + "step": 4168 + }, + { + "epoch": 0.9605990783410139, + "grad_norm": 1.1069917813185677, + "learning_rate": 1.1476607831690167e-06, + "loss": 0.8207682371139526, + "step": 4169 + }, + { + "epoch": 0.9608294930875576, + "grad_norm": 1.200280235865814, + "learning_rate": 1.1472839649529337e-06, + "loss": 0.7682942152023315, + "step": 4170 + }, + { + "epoch": 0.9610599078341013, + "grad_norm": 1.0122999990692296, + "learning_rate": 1.1469071253587785e-06, + "loss": 0.8435598611831665, + "step": 4171 + }, + { + "epoch": 0.9612903225806452, + "grad_norm": 0.79536207500534, + "learning_rate": 1.1465302644412483e-06, + "loss": 0.7516113519668579, + "step": 4172 + }, + { + "epoch": 0.9615207373271889, + "grad_norm": 0.881539477347835, + "learning_rate": 1.1461533822550442e-06, + "loss": 0.7125411629676819, + "step": 4173 + }, + { + "epoch": 0.9617511520737327, + "grad_norm": 0.9108745928942158, + "learning_rate": 1.14577647885487e-06, + "loss": 0.7560747861862183, + "step": 4174 + }, + { + "epoch": 0.9619815668202765, + "grad_norm": 0.9027443230900505, + "learning_rate": 1.1453995542954332e-06, + "loss": 0.6702673435211182, + "step": 4175 + }, + { + "epoch": 0.9622119815668203, + "grad_norm": 1.1520258504461998, + "learning_rate": 1.1450226086314433e-06, + "loss": 0.8083088397979736, + "step": 4176 + }, + { + "epoch": 0.962442396313364, + "grad_norm": 0.9906259449003554, + "learning_rate": 1.1446456419176135e-06, + "loss": 0.7579925060272217, + "step": 4177 + }, + { + "epoch": 0.9626728110599079, + "grad_norm": 0.9460352601625827, + "learning_rate": 1.1442686542086609e-06, + "loss": 0.713416576385498, + "step": 4178 + }, + { + "epoch": 0.9629032258064516, + "grad_norm": 1.1770844867552515, + "learning_rate": 1.1438916455593035e-06, + "loss": 0.7767639756202698, + "step": 4179 + }, + { + "epoch": 0.9631336405529954, + "grad_norm": 1.0244180953454374, + "learning_rate": 1.1435146160242645e-06, + "loss": 0.7493964433670044, + "step": 4180 + }, + { + "epoch": 0.9633640552995392, + "grad_norm": 1.1249907720020325, + "learning_rate": 1.1431375656582692e-06, + "loss": 0.8789365291595459, + "step": 4181 + }, + { + "epoch": 0.9635944700460829, + "grad_norm": 1.177047767616621, + "learning_rate": 1.1427604945160457e-06, + "loss": 0.7750524878501892, + "step": 4182 + }, + { + "epoch": 0.9638248847926267, + "grad_norm": 1.1195166665130392, + "learning_rate": 1.142383402652325e-06, + "loss": 0.9330715537071228, + "step": 4183 + }, + { + "epoch": 0.9640552995391705, + "grad_norm": 0.933339002257347, + "learning_rate": 1.142006290121842e-06, + "loss": 0.6845035552978516, + "step": 4184 + }, + { + "epoch": 0.9642857142857143, + "grad_norm": 0.9794843601160967, + "learning_rate": 1.1416291569793343e-06, + "loss": 0.7295390963554382, + "step": 4185 + }, + { + "epoch": 0.964516129032258, + "grad_norm": 1.0666753158619988, + "learning_rate": 1.1412520032795419e-06, + "loss": 0.6869080066680908, + "step": 4186 + }, + { + "epoch": 0.9647465437788019, + "grad_norm": 1.506743316898968, + "learning_rate": 1.140874829077208e-06, + "loss": 1.0916842222213745, + "step": 4187 + }, + { + "epoch": 0.9649769585253456, + "grad_norm": 1.0539994363877199, + "learning_rate": 1.1404976344270793e-06, + "loss": 0.7487984299659729, + "step": 4188 + }, + { + "epoch": 0.9652073732718894, + "grad_norm": 1.024674697115665, + "learning_rate": 1.140120419383905e-06, + "loss": 0.8852604627609253, + "step": 4189 + }, + { + "epoch": 0.9654377880184332, + "grad_norm": 1.065174441144157, + "learning_rate": 1.139743184002437e-06, + "loss": 0.7384698987007141, + "step": 4190 + }, + { + "epoch": 0.965668202764977, + "grad_norm": 1.2009691028192717, + "learning_rate": 1.1393659283374312e-06, + "loss": 0.8033223152160645, + "step": 4191 + }, + { + "epoch": 0.9658986175115207, + "grad_norm": 1.2698866658546557, + "learning_rate": 1.1389886524436453e-06, + "loss": 0.8870355486869812, + "step": 4192 + }, + { + "epoch": 0.9661290322580646, + "grad_norm": 1.1198376045036553, + "learning_rate": 1.1386113563758405e-06, + "loss": 0.869537353515625, + "step": 4193 + }, + { + "epoch": 0.9663594470046083, + "grad_norm": 1.027781409519754, + "learning_rate": 1.1382340401887808e-06, + "loss": 0.8564068675041199, + "step": 4194 + }, + { + "epoch": 0.966589861751152, + "grad_norm": 0.9894593103049535, + "learning_rate": 1.1378567039372332e-06, + "loss": 0.7988623380661011, + "step": 4195 + }, + { + "epoch": 0.9668202764976959, + "grad_norm": 1.0843651981255995, + "learning_rate": 1.1374793476759673e-06, + "loss": 0.9405556917190552, + "step": 4196 + }, + { + "epoch": 0.9670506912442396, + "grad_norm": 0.8756334921680484, + "learning_rate": 1.137101971459756e-06, + "loss": 0.6757407188415527, + "step": 4197 + }, + { + "epoch": 0.9672811059907834, + "grad_norm": 1.1855730012050456, + "learning_rate": 1.1367245753433757e-06, + "loss": 0.7521541118621826, + "step": 4198 + }, + { + "epoch": 0.9675115207373272, + "grad_norm": 1.0137943151941313, + "learning_rate": 1.1363471593816037e-06, + "loss": 0.7306162714958191, + "step": 4199 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8912209844157076, + "learning_rate": 1.135969723629222e-06, + "loss": 0.6884766817092896, + "step": 4200 + }, + { + "epoch": 0.9679723502304147, + "grad_norm": 1.2084507323846643, + "learning_rate": 1.1355922681410152e-06, + "loss": 0.8420373201370239, + "step": 4201 + }, + { + "epoch": 0.9682027649769586, + "grad_norm": 0.7638761509020496, + "learning_rate": 1.1352147929717704e-06, + "loss": 0.7252322435379028, + "step": 4202 + }, + { + "epoch": 0.9684331797235023, + "grad_norm": 0.9448982669089191, + "learning_rate": 1.134837298176277e-06, + "loss": 0.6375538110733032, + "step": 4203 + }, + { + "epoch": 0.9686635944700461, + "grad_norm": 1.0629192948024473, + "learning_rate": 1.1344597838093283e-06, + "loss": 0.713671863079071, + "step": 4204 + }, + { + "epoch": 0.9688940092165899, + "grad_norm": 1.0319385361068514, + "learning_rate": 1.1340822499257201e-06, + "loss": 0.8591479063034058, + "step": 4205 + }, + { + "epoch": 0.9691244239631336, + "grad_norm": 1.0671754327237228, + "learning_rate": 1.1337046965802505e-06, + "loss": 0.7638808488845825, + "step": 4206 + }, + { + "epoch": 0.9693548387096774, + "grad_norm": 1.1032489557963816, + "learning_rate": 1.1333271238277215e-06, + "loss": 0.8133253455162048, + "step": 4207 + }, + { + "epoch": 0.9695852534562212, + "grad_norm": 0.9621754998556686, + "learning_rate": 1.132949531722937e-06, + "loss": 0.6938756704330444, + "step": 4208 + }, + { + "epoch": 0.969815668202765, + "grad_norm": 1.171557608199449, + "learning_rate": 1.132571920320704e-06, + "loss": 0.793639063835144, + "step": 4209 + }, + { + "epoch": 0.9700460829493087, + "grad_norm": 1.066219056403929, + "learning_rate": 1.132194289675832e-06, + "loss": 0.7188536524772644, + "step": 4210 + }, + { + "epoch": 0.9702764976958526, + "grad_norm": 1.2873690827507545, + "learning_rate": 1.1318166398431343e-06, + "loss": 0.8076587319374084, + "step": 4211 + }, + { + "epoch": 0.9705069124423963, + "grad_norm": 1.2434961707112964, + "learning_rate": 1.1314389708774258e-06, + "loss": 0.8390023708343506, + "step": 4212 + }, + { + "epoch": 0.9707373271889401, + "grad_norm": 1.2800250293744322, + "learning_rate": 1.1310612828335243e-06, + "loss": 0.8395706415176392, + "step": 4213 + }, + { + "epoch": 0.9709677419354839, + "grad_norm": 1.1156221851257155, + "learning_rate": 1.1306835757662515e-06, + "loss": 0.9672995805740356, + "step": 4214 + }, + { + "epoch": 0.9711981566820277, + "grad_norm": 1.1859433022618981, + "learning_rate": 1.1303058497304303e-06, + "loss": 0.7716202735900879, + "step": 4215 + }, + { + "epoch": 0.9714285714285714, + "grad_norm": 0.9257750691433206, + "learning_rate": 1.1299281047808876e-06, + "loss": 0.6318329572677612, + "step": 4216 + }, + { + "epoch": 0.9716589861751153, + "grad_norm": 1.1802189065520408, + "learning_rate": 1.1295503409724525e-06, + "loss": 0.8287553787231445, + "step": 4217 + }, + { + "epoch": 0.971889400921659, + "grad_norm": 0.835147088990129, + "learning_rate": 1.129172558359957e-06, + "loss": 0.6903107762336731, + "step": 4218 + }, + { + "epoch": 0.9721198156682027, + "grad_norm": 0.9693907793654548, + "learning_rate": 1.1287947569982355e-06, + "loss": 0.684443473815918, + "step": 4219 + }, + { + "epoch": 0.9723502304147466, + "grad_norm": 1.2152908203730401, + "learning_rate": 1.1284169369421254e-06, + "loss": 0.8566167950630188, + "step": 4220 + }, + { + "epoch": 0.9725806451612903, + "grad_norm": 1.0787740661687364, + "learning_rate": 1.1280390982464673e-06, + "loss": 0.8103536367416382, + "step": 4221 + }, + { + "epoch": 0.9728110599078341, + "grad_norm": 1.115333195517037, + "learning_rate": 1.1276612409661036e-06, + "loss": 0.8027071356773376, + "step": 4222 + }, + { + "epoch": 0.9730414746543778, + "grad_norm": 1.1442493875477038, + "learning_rate": 1.1272833651558796e-06, + "loss": 0.8251115679740906, + "step": 4223 + }, + { + "epoch": 0.9732718894009217, + "grad_norm": 1.1151561398542829, + "learning_rate": 1.1269054708706437e-06, + "loss": 0.6468047499656677, + "step": 4224 + }, + { + "epoch": 0.9735023041474654, + "grad_norm": 1.129830296326307, + "learning_rate": 1.1265275581652465e-06, + "loss": 0.8085706233978271, + "step": 4225 + }, + { + "epoch": 0.9737327188940093, + "grad_norm": 1.139574441171448, + "learning_rate": 1.1261496270945418e-06, + "loss": 0.8396503925323486, + "step": 4226 + }, + { + "epoch": 0.973963133640553, + "grad_norm": 0.9978900351940978, + "learning_rate": 1.1257716777133861e-06, + "loss": 0.7860006093978882, + "step": 4227 + }, + { + "epoch": 0.9741935483870968, + "grad_norm": 1.1484873689809545, + "learning_rate": 1.1253937100766373e-06, + "loss": 0.8630701303482056, + "step": 4228 + }, + { + "epoch": 0.9744239631336405, + "grad_norm": 0.9488769562872501, + "learning_rate": 1.1250157242391577e-06, + "loss": 0.8363114595413208, + "step": 4229 + }, + { + "epoch": 0.9746543778801844, + "grad_norm": 1.1415512207130691, + "learning_rate": 1.1246377202558114e-06, + "loss": 0.7837141156196594, + "step": 4230 + }, + { + "epoch": 0.9748847926267281, + "grad_norm": 1.3474534084840375, + "learning_rate": 1.1242596981814648e-06, + "loss": 0.8283151984214783, + "step": 4231 + }, + { + "epoch": 0.9751152073732718, + "grad_norm": 1.2728043293758005, + "learning_rate": 1.1238816580709878e-06, + "loss": 0.9232061505317688, + "step": 4232 + }, + { + "epoch": 0.9753456221198157, + "grad_norm": 1.125514954365521, + "learning_rate": 1.123503599979252e-06, + "loss": 0.8721164464950562, + "step": 4233 + }, + { + "epoch": 0.9755760368663594, + "grad_norm": 1.0382014546922784, + "learning_rate": 1.1231255239611321e-06, + "loss": 0.9398131370544434, + "step": 4234 + }, + { + "epoch": 0.9758064516129032, + "grad_norm": 1.0916134182788353, + "learning_rate": 1.1227474300715054e-06, + "loss": 0.8124324083328247, + "step": 4235 + }, + { + "epoch": 0.976036866359447, + "grad_norm": 0.8607187401974831, + "learning_rate": 1.1223693183652515e-06, + "loss": 0.8532534837722778, + "step": 4236 + }, + { + "epoch": 0.9762672811059908, + "grad_norm": 1.10871517745179, + "learning_rate": 1.1219911888972536e-06, + "loss": 0.7547662258148193, + "step": 4237 + }, + { + "epoch": 0.9764976958525345, + "grad_norm": 1.036940513326952, + "learning_rate": 1.1216130417223956e-06, + "loss": 0.7407231330871582, + "step": 4238 + }, + { + "epoch": 0.9767281105990784, + "grad_norm": 1.0573090435680337, + "learning_rate": 1.1212348768955657e-06, + "loss": 0.8190197944641113, + "step": 4239 + }, + { + "epoch": 0.9769585253456221, + "grad_norm": 1.111465926757279, + "learning_rate": 1.1208566944716542e-06, + "loss": 0.6641337871551514, + "step": 4240 + }, + { + "epoch": 0.977188940092166, + "grad_norm": 1.224342353107687, + "learning_rate": 1.120478494505553e-06, + "loss": 0.8953202962875366, + "step": 4241 + }, + { + "epoch": 0.9774193548387097, + "grad_norm": 0.9676272600083323, + "learning_rate": 1.1201002770521583e-06, + "loss": 0.7803191542625427, + "step": 4242 + }, + { + "epoch": 0.9776497695852534, + "grad_norm": 1.1107043139306134, + "learning_rate": 1.1197220421663674e-06, + "loss": 0.6827100515365601, + "step": 4243 + }, + { + "epoch": 0.9778801843317972, + "grad_norm": 1.2085442462659117, + "learning_rate": 1.1193437899030802e-06, + "loss": 0.8513565063476562, + "step": 4244 + }, + { + "epoch": 0.978110599078341, + "grad_norm": 0.9785496460004156, + "learning_rate": 1.1189655203172e-06, + "loss": 0.7196829915046692, + "step": 4245 + }, + { + "epoch": 0.9783410138248848, + "grad_norm": 1.0764048064511267, + "learning_rate": 1.1185872334636319e-06, + "loss": 0.7823485136032104, + "step": 4246 + }, + { + "epoch": 0.9785714285714285, + "grad_norm": 1.0963006166840967, + "learning_rate": 1.1182089293972841e-06, + "loss": 0.7178136110305786, + "step": 4247 + }, + { + "epoch": 0.9788018433179724, + "grad_norm": 1.0782886091125194, + "learning_rate": 1.1178306081730664e-06, + "loss": 0.7746715545654297, + "step": 4248 + }, + { + "epoch": 0.9790322580645161, + "grad_norm": 0.9177757629071243, + "learning_rate": 1.117452269845892e-06, + "loss": 0.8829167485237122, + "step": 4249 + }, + { + "epoch": 0.9792626728110599, + "grad_norm": 0.9096983569344097, + "learning_rate": 1.1170739144706764e-06, + "loss": 0.7592206001281738, + "step": 4250 + }, + { + "epoch": 0.9794930875576037, + "grad_norm": 0.8361017174057647, + "learning_rate": 1.1166955421023368e-06, + "loss": 0.8107382655143738, + "step": 4251 + }, + { + "epoch": 0.9797235023041475, + "grad_norm": 0.9837092835211146, + "learning_rate": 1.116317152795794e-06, + "loss": 0.6807001829147339, + "step": 4252 + }, + { + "epoch": 0.9799539170506912, + "grad_norm": 1.1872199804636603, + "learning_rate": 1.1159387466059705e-06, + "loss": 0.7752517461776733, + "step": 4253 + }, + { + "epoch": 0.9801843317972351, + "grad_norm": 0.8560133871531077, + "learning_rate": 1.115560323587791e-06, + "loss": 0.7484745383262634, + "step": 4254 + }, + { + "epoch": 0.9804147465437788, + "grad_norm": 1.153488759551228, + "learning_rate": 1.1151818837961838e-06, + "loss": 0.877413809299469, + "step": 4255 + }, + { + "epoch": 0.9806451612903225, + "grad_norm": 1.0087457568089837, + "learning_rate": 1.1148034272860785e-06, + "loss": 0.7806656360626221, + "step": 4256 + }, + { + "epoch": 0.9808755760368664, + "grad_norm": 0.849135201735791, + "learning_rate": 1.1144249541124078e-06, + "loss": 0.6938076019287109, + "step": 4257 + }, + { + "epoch": 0.9811059907834101, + "grad_norm": 1.0559339187336096, + "learning_rate": 1.1140464643301064e-06, + "loss": 0.8832957148551941, + "step": 4258 + }, + { + "epoch": 0.9813364055299539, + "grad_norm": 1.1632523287766907, + "learning_rate": 1.1136679579941117e-06, + "loss": 0.7794016003608704, + "step": 4259 + }, + { + "epoch": 0.9815668202764977, + "grad_norm": 0.9689102084269609, + "learning_rate": 1.1132894351593636e-06, + "loss": 0.6877585053443909, + "step": 4260 + }, + { + "epoch": 0.9817972350230415, + "grad_norm": 1.0902109747190951, + "learning_rate": 1.1129108958808037e-06, + "loss": 0.8268473148345947, + "step": 4261 + }, + { + "epoch": 0.9820276497695852, + "grad_norm": 1.0260596307079526, + "learning_rate": 1.112532340213377e-06, + "loss": 0.6717547178268433, + "step": 4262 + }, + { + "epoch": 0.9822580645161291, + "grad_norm": 1.0646130416760407, + "learning_rate": 1.11215376821203e-06, + "loss": 0.849999725818634, + "step": 4263 + }, + { + "epoch": 0.9824884792626728, + "grad_norm": 1.005034332417578, + "learning_rate": 1.1117751799317118e-06, + "loss": 0.6562552452087402, + "step": 4264 + }, + { + "epoch": 0.9827188940092166, + "grad_norm": 1.0885536317886024, + "learning_rate": 1.1113965754273743e-06, + "loss": 0.7734784483909607, + "step": 4265 + }, + { + "epoch": 0.9829493087557604, + "grad_norm": 1.0527283904271951, + "learning_rate": 1.1110179547539717e-06, + "loss": 0.7580564022064209, + "step": 4266 + }, + { + "epoch": 0.9831797235023041, + "grad_norm": 1.121984331535499, + "learning_rate": 1.1106393179664595e-06, + "loss": 0.9207481145858765, + "step": 4267 + }, + { + "epoch": 0.9834101382488479, + "grad_norm": 1.1182241685665208, + "learning_rate": 1.1102606651197968e-06, + "loss": 0.8987482786178589, + "step": 4268 + }, + { + "epoch": 0.9836405529953917, + "grad_norm": 0.8558732255272679, + "learning_rate": 1.1098819962689445e-06, + "loss": 0.7486778497695923, + "step": 4269 + }, + { + "epoch": 0.9838709677419355, + "grad_norm": 0.9905311956335509, + "learning_rate": 1.1095033114688662e-06, + "loss": 0.7387109994888306, + "step": 4270 + }, + { + "epoch": 0.9841013824884792, + "grad_norm": 0.913366940312768, + "learning_rate": 1.109124610774527e-06, + "loss": 0.7337637543678284, + "step": 4271 + }, + { + "epoch": 0.9843317972350231, + "grad_norm": 1.1127819698251733, + "learning_rate": 1.1087458942408952e-06, + "loss": 0.7419463396072388, + "step": 4272 + }, + { + "epoch": 0.9845622119815668, + "grad_norm": 1.0024132905496845, + "learning_rate": 1.1083671619229407e-06, + "loss": 0.7525068521499634, + "step": 4273 + }, + { + "epoch": 0.9847926267281106, + "grad_norm": 1.2794306882440036, + "learning_rate": 1.107988413875636e-06, + "loss": 0.8593931198120117, + "step": 4274 + }, + { + "epoch": 0.9850230414746544, + "grad_norm": 1.1058497522784536, + "learning_rate": 1.107609650153956e-06, + "loss": 0.9123519659042358, + "step": 4275 + }, + { + "epoch": 0.9852534562211982, + "grad_norm": 1.0134863035075283, + "learning_rate": 1.107230870812878e-06, + "loss": 0.7099615335464478, + "step": 4276 + }, + { + "epoch": 0.9854838709677419, + "grad_norm": 1.0305482113277953, + "learning_rate": 1.1068520759073807e-06, + "loss": 0.9525141716003418, + "step": 4277 + }, + { + "epoch": 0.9857142857142858, + "grad_norm": 1.078520213597711, + "learning_rate": 1.106473265492446e-06, + "loss": 0.8360154628753662, + "step": 4278 + }, + { + "epoch": 0.9859447004608295, + "grad_norm": 0.835665323629814, + "learning_rate": 1.106094439623058e-06, + "loss": 0.7788960933685303, + "step": 4279 + }, + { + "epoch": 0.9861751152073732, + "grad_norm": 1.4332707697001132, + "learning_rate": 1.1057155983542024e-06, + "loss": 0.76897132396698, + "step": 4280 + }, + { + "epoch": 0.9864055299539171, + "grad_norm": 1.2788839563876278, + "learning_rate": 1.1053367417408678e-06, + "loss": 0.8062764406204224, + "step": 4281 + }, + { + "epoch": 0.9866359447004608, + "grad_norm": 1.0759322336892816, + "learning_rate": 1.1049578698380446e-06, + "loss": 0.6796555519104004, + "step": 4282 + }, + { + "epoch": 0.9868663594470046, + "grad_norm": 1.2156156083740777, + "learning_rate": 1.1045789827007256e-06, + "loss": 0.8495693206787109, + "step": 4283 + }, + { + "epoch": 0.9870967741935484, + "grad_norm": 1.1065961656311563, + "learning_rate": 1.1042000803839054e-06, + "loss": 0.9202588200569153, + "step": 4284 + }, + { + "epoch": 0.9873271889400922, + "grad_norm": 1.0492103887070696, + "learning_rate": 1.1038211629425815e-06, + "loss": 0.8204039335250854, + "step": 4285 + }, + { + "epoch": 0.9875576036866359, + "grad_norm": 1.3424135227199923, + "learning_rate": 1.1034422304317534e-06, + "loss": 0.921082615852356, + "step": 4286 + }, + { + "epoch": 0.9877880184331798, + "grad_norm": 1.1158968493314756, + "learning_rate": 1.1030632829064225e-06, + "loss": 0.8114739656448364, + "step": 4287 + }, + { + "epoch": 0.9880184331797235, + "grad_norm": 1.160400130956272, + "learning_rate": 1.1026843204215924e-06, + "loss": 0.7394933700561523, + "step": 4288 + }, + { + "epoch": 0.9882488479262673, + "grad_norm": 1.102093260654992, + "learning_rate": 1.1023053430322692e-06, + "loss": 0.9515210390090942, + "step": 4289 + }, + { + "epoch": 0.988479262672811, + "grad_norm": 1.0914130901392678, + "learning_rate": 1.1019263507934611e-06, + "loss": 0.6729186773300171, + "step": 4290 + }, + { + "epoch": 0.9887096774193549, + "grad_norm": 0.9547635126100301, + "learning_rate": 1.1015473437601776e-06, + "loss": 0.6455283164978027, + "step": 4291 + }, + { + "epoch": 0.9889400921658986, + "grad_norm": 1.1259220869244864, + "learning_rate": 1.1011683219874322e-06, + "loss": 0.8071424961090088, + "step": 4292 + }, + { + "epoch": 0.9891705069124423, + "grad_norm": 0.8980294635582122, + "learning_rate": 1.1007892855302385e-06, + "loss": 0.7287160754203796, + "step": 4293 + }, + { + "epoch": 0.9894009216589862, + "grad_norm": 0.956104694967055, + "learning_rate": 1.1004102344436135e-06, + "loss": 0.7916513681411743, + "step": 4294 + }, + { + "epoch": 0.9896313364055299, + "grad_norm": 0.948939194234829, + "learning_rate": 1.1000311687825757e-06, + "loss": 0.8075610399246216, + "step": 4295 + }, + { + "epoch": 0.9898617511520738, + "grad_norm": 0.8467724433306772, + "learning_rate": 1.0996520886021465e-06, + "loss": 0.6144437193870544, + "step": 4296 + }, + { + "epoch": 0.9900921658986175, + "grad_norm": 1.1816936561057356, + "learning_rate": 1.0992729939573482e-06, + "loss": 0.830337643623352, + "step": 4297 + }, + { + "epoch": 0.9903225806451613, + "grad_norm": 1.1631921516982922, + "learning_rate": 1.0988938849032063e-06, + "loss": 0.7104393243789673, + "step": 4298 + }, + { + "epoch": 0.990552995391705, + "grad_norm": 1.0166827801425276, + "learning_rate": 1.0985147614947484e-06, + "loss": 0.746238112449646, + "step": 4299 + }, + { + "epoch": 0.9907834101382489, + "grad_norm": 0.8744941548736713, + "learning_rate": 1.0981356237870027e-06, + "loss": 0.7309597730636597, + "step": 4300 + }, + { + "epoch": 0.9910138248847926, + "grad_norm": 1.1787483382236952, + "learning_rate": 1.0977564718350013e-06, + "loss": 0.799136757850647, + "step": 4301 + }, + { + "epoch": 0.9912442396313365, + "grad_norm": 1.146252036070138, + "learning_rate": 1.0973773056937776e-06, + "loss": 0.7477747201919556, + "step": 4302 + }, + { + "epoch": 0.9914746543778802, + "grad_norm": 1.1466743668258872, + "learning_rate": 1.0969981254183668e-06, + "loss": 0.8051053285598755, + "step": 4303 + }, + { + "epoch": 0.9917050691244239, + "grad_norm": 0.9910519080633017, + "learning_rate": 1.0966189310638063e-06, + "loss": 0.8023163080215454, + "step": 4304 + }, + { + "epoch": 0.9919354838709677, + "grad_norm": 0.9483313078672773, + "learning_rate": 1.096239722685136e-06, + "loss": 0.6804348230361938, + "step": 4305 + }, + { + "epoch": 0.9921658986175115, + "grad_norm": 1.119857177527024, + "learning_rate": 1.0958605003373976e-06, + "loss": 0.8276509046554565, + "step": 4306 + }, + { + "epoch": 0.9923963133640553, + "grad_norm": 1.2511674827094457, + "learning_rate": 1.095481264075634e-06, + "loss": 0.9733830690383911, + "step": 4307 + }, + { + "epoch": 0.992626728110599, + "grad_norm": 1.070745120202566, + "learning_rate": 1.0951020139548917e-06, + "loss": 0.824803352355957, + "step": 4308 + }, + { + "epoch": 0.9928571428571429, + "grad_norm": 1.100108017822232, + "learning_rate": 1.094722750030218e-06, + "loss": 0.8144090175628662, + "step": 4309 + }, + { + "epoch": 0.9930875576036866, + "grad_norm": 1.1329325704330306, + "learning_rate": 1.0943434723566623e-06, + "loss": 0.8394016027450562, + "step": 4310 + }, + { + "epoch": 0.9933179723502304, + "grad_norm": 1.0464489724076296, + "learning_rate": 1.0939641809892766e-06, + "loss": 0.7688177824020386, + "step": 4311 + }, + { + "epoch": 0.9935483870967742, + "grad_norm": 1.0599291427198123, + "learning_rate": 1.0935848759831144e-06, + "loss": 0.8157391548156738, + "step": 4312 + }, + { + "epoch": 0.993778801843318, + "grad_norm": 1.0072726544693649, + "learning_rate": 1.0932055573932316e-06, + "loss": 0.7618423700332642, + "step": 4313 + }, + { + "epoch": 0.9940092165898617, + "grad_norm": 0.8996295977906229, + "learning_rate": 1.0928262252746848e-06, + "loss": 0.7404567003250122, + "step": 4314 + }, + { + "epoch": 0.9942396313364056, + "grad_norm": 0.8729845318677907, + "learning_rate": 1.092446879682535e-06, + "loss": 0.6825613975524902, + "step": 4315 + }, + { + "epoch": 0.9944700460829493, + "grad_norm": 0.886318283085954, + "learning_rate": 1.0920675206718428e-06, + "loss": 0.6607732772827148, + "step": 4316 + }, + { + "epoch": 0.994700460829493, + "grad_norm": 1.1703494407740602, + "learning_rate": 1.0916881482976716e-06, + "loss": 0.715195894241333, + "step": 4317 + }, + { + "epoch": 0.9949308755760369, + "grad_norm": 1.0266525014281969, + "learning_rate": 1.0913087626150872e-06, + "loss": 0.7593914270401001, + "step": 4318 + }, + { + "epoch": 0.9951612903225806, + "grad_norm": 0.9546142286310197, + "learning_rate": 1.090929363679157e-06, + "loss": 0.8368399143218994, + "step": 4319 + }, + { + "epoch": 0.9953917050691244, + "grad_norm": 1.0080836713071024, + "learning_rate": 1.0905499515449499e-06, + "loss": 0.7799170613288879, + "step": 4320 + }, + { + "epoch": 0.9956221198156682, + "grad_norm": 1.0450181436512773, + "learning_rate": 1.0901705262675372e-06, + "loss": 0.8194636702537537, + "step": 4321 + }, + { + "epoch": 0.995852534562212, + "grad_norm": 0.7482572391575254, + "learning_rate": 1.0897910879019917e-06, + "loss": 0.7150344848632812, + "step": 4322 + }, + { + "epoch": 0.9960829493087557, + "grad_norm": 1.0624528328831144, + "learning_rate": 1.089411636503389e-06, + "loss": 0.737568736076355, + "step": 4323 + }, + { + "epoch": 0.9963133640552996, + "grad_norm": 0.9578129661977193, + "learning_rate": 1.0890321721268056e-06, + "loss": 0.7037359476089478, + "step": 4324 + }, + { + "epoch": 0.9965437788018433, + "grad_norm": 1.1660806477651886, + "learning_rate": 1.0886526948273206e-06, + "loss": 0.7664542198181152, + "step": 4325 + }, + { + "epoch": 0.9967741935483871, + "grad_norm": 1.1927624722703807, + "learning_rate": 1.0882732046600138e-06, + "loss": 0.7700943946838379, + "step": 4326 + }, + { + "epoch": 0.9970046082949309, + "grad_norm": 0.9828460552540413, + "learning_rate": 1.0878937016799683e-06, + "loss": 0.7634885311126709, + "step": 4327 + }, + { + "epoch": 0.9972350230414746, + "grad_norm": 0.9138031795649807, + "learning_rate": 1.0875141859422685e-06, + "loss": 0.6784960031509399, + "step": 4328 + }, + { + "epoch": 0.9974654377880184, + "grad_norm": 0.9227707667287056, + "learning_rate": 1.0871346575020002e-06, + "loss": 0.7224948406219482, + "step": 4329 + }, + { + "epoch": 0.9976958525345622, + "grad_norm": 1.140456315375248, + "learning_rate": 1.086755116414252e-06, + "loss": 0.7886664867401123, + "step": 4330 + }, + { + "epoch": 0.997926267281106, + "grad_norm": 0.8735584486255558, + "learning_rate": 1.0863755627341133e-06, + "loss": 0.7871295809745789, + "step": 4331 + }, + { + "epoch": 0.9981566820276497, + "grad_norm": 0.9703663985745814, + "learning_rate": 1.085995996516676e-06, + "loss": 0.700717568397522, + "step": 4332 + }, + { + "epoch": 0.9983870967741936, + "grad_norm": 1.0137806073331785, + "learning_rate": 1.085616417817034e-06, + "loss": 0.9090461730957031, + "step": 4333 + }, + { + "epoch": 0.9986175115207373, + "grad_norm": 0.8161279565195018, + "learning_rate": 1.0852368266902818e-06, + "loss": 0.7697109580039978, + "step": 4334 + }, + { + "epoch": 0.9988479262672811, + "grad_norm": 1.1335275167371797, + "learning_rate": 1.0848572231915177e-06, + "loss": 0.8135972023010254, + "step": 4335 + }, + { + "epoch": 0.9990783410138249, + "grad_norm": 0.9620227504979613, + "learning_rate": 1.0844776073758392e-06, + "loss": 0.803811252117157, + "step": 4336 + }, + { + "epoch": 0.9993087557603687, + "grad_norm": 1.1159399325844028, + "learning_rate": 1.0840979792983482e-06, + "loss": 0.874006986618042, + "step": 4337 + }, + { + "epoch": 0.9995391705069124, + "grad_norm": 1.0695664725891423, + "learning_rate": 1.0837183390141472e-06, + "loss": 0.7424730062484741, + "step": 4338 + }, + { + "epoch": 0.9997695852534563, + "grad_norm": 1.0413618177070603, + "learning_rate": 1.0833386865783393e-06, + "loss": 0.8219665884971619, + "step": 4339 + }, + { + "epoch": 1.0, + "grad_norm": 1.2200287736254531, + "learning_rate": 1.0829590220460319e-06, + "loss": 0.7065195441246033, + "step": 4340 + }, + { + "epoch": 1.0002304147465437, + "grad_norm": 1.4255251627812264, + "learning_rate": 1.0825793454723324e-06, + "loss": 0.7988346219062805, + "step": 4341 + }, + { + "epoch": 1.0004608294930875, + "grad_norm": 0.9544404961531333, + "learning_rate": 1.08219965691235e-06, + "loss": 0.6731617450714111, + "step": 4342 + }, + { + "epoch": 1.0006912442396314, + "grad_norm": 1.0713203032897287, + "learning_rate": 1.0818199564211964e-06, + "loss": 0.8058687448501587, + "step": 4343 + }, + { + "epoch": 1.0009216589861751, + "grad_norm": 1.2330384736552804, + "learning_rate": 1.081440244053984e-06, + "loss": 0.8351448178291321, + "step": 4344 + }, + { + "epoch": 1.0011520737327189, + "grad_norm": 0.9578484310628987, + "learning_rate": 1.0810605198658286e-06, + "loss": 0.8619185090065002, + "step": 4345 + }, + { + "epoch": 1.0013824884792626, + "grad_norm": 1.030004028036847, + "learning_rate": 1.0806807839118455e-06, + "loss": 0.7600966691970825, + "step": 4346 + }, + { + "epoch": 1.0016129032258065, + "grad_norm": 1.103182000242006, + "learning_rate": 1.0803010362471536e-06, + "loss": 0.8123422265052795, + "step": 4347 + }, + { + "epoch": 1.0018433179723503, + "grad_norm": 1.0359331933938025, + "learning_rate": 1.0799212769268727e-06, + "loss": 0.8277603983879089, + "step": 4348 + }, + { + "epoch": 1.002073732718894, + "grad_norm": 0.7466130076646643, + "learning_rate": 1.079541506006124e-06, + "loss": 0.6666774153709412, + "step": 4349 + }, + { + "epoch": 1.0023041474654377, + "grad_norm": 1.0582236596847403, + "learning_rate": 1.0791617235400313e-06, + "loss": 0.8483254909515381, + "step": 4350 + }, + { + "epoch": 1.0025345622119817, + "grad_norm": 0.9094409000603249, + "learning_rate": 1.0787819295837193e-06, + "loss": 0.6585661172866821, + "step": 4351 + }, + { + "epoch": 1.0027649769585254, + "grad_norm": 1.0274936512349702, + "learning_rate": 1.0784021241923142e-06, + "loss": 0.7591124773025513, + "step": 4352 + }, + { + "epoch": 1.0029953917050691, + "grad_norm": 1.0201165998262116, + "learning_rate": 1.078022307420945e-06, + "loss": 0.7305805683135986, + "step": 4353 + }, + { + "epoch": 1.0032258064516129, + "grad_norm": 0.8894858318623733, + "learning_rate": 1.0776424793247407e-06, + "loss": 0.6558996438980103, + "step": 4354 + }, + { + "epoch": 1.0034562211981566, + "grad_norm": 1.313034349644303, + "learning_rate": 1.0772626399588336e-06, + "loss": 0.6837360262870789, + "step": 4355 + }, + { + "epoch": 1.0036866359447005, + "grad_norm": 0.9187212026563307, + "learning_rate": 1.0768827893783562e-06, + "loss": 0.778124988079071, + "step": 4356 + }, + { + "epoch": 1.0039170506912443, + "grad_norm": 1.0828207561971888, + "learning_rate": 1.0765029276384438e-06, + "loss": 0.7676408886909485, + "step": 4357 + }, + { + "epoch": 1.004147465437788, + "grad_norm": 1.1604376015370672, + "learning_rate": 1.0761230547942333e-06, + "loss": 0.854246973991394, + "step": 4358 + }, + { + "epoch": 1.0043778801843317, + "grad_norm": 0.9177073619188721, + "learning_rate": 1.0757431709008615e-06, + "loss": 0.716766893863678, + "step": 4359 + }, + { + "epoch": 1.0046082949308757, + "grad_norm": 0.9439720321299626, + "learning_rate": 1.075363276013469e-06, + "loss": 0.6827799081802368, + "step": 4360 + }, + { + "epoch": 1.0048387096774194, + "grad_norm": 0.9539231430903122, + "learning_rate": 1.074983370187197e-06, + "loss": 0.7977348566055298, + "step": 4361 + }, + { + "epoch": 1.0050691244239631, + "grad_norm": 1.1227456227969494, + "learning_rate": 1.0746034534771878e-06, + "loss": 0.6958035826683044, + "step": 4362 + }, + { + "epoch": 1.0052995391705069, + "grad_norm": 0.9288361874867539, + "learning_rate": 1.0742235259385861e-06, + "loss": 0.8407979607582092, + "step": 4363 + }, + { + "epoch": 1.0055299539170508, + "grad_norm": 0.8466973629768922, + "learning_rate": 1.073843587626538e-06, + "loss": 0.8180495500564575, + "step": 4364 + }, + { + "epoch": 1.0057603686635945, + "grad_norm": 0.9973113541484702, + "learning_rate": 1.0734636385961907e-06, + "loss": 0.7551306486129761, + "step": 4365 + }, + { + "epoch": 1.0059907834101383, + "grad_norm": 1.1054013447474482, + "learning_rate": 1.0730836789026936e-06, + "loss": 0.6598455309867859, + "step": 4366 + }, + { + "epoch": 1.006221198156682, + "grad_norm": 0.9578758202335947, + "learning_rate": 1.0727037086011971e-06, + "loss": 0.9186126589775085, + "step": 4367 + }, + { + "epoch": 1.0064516129032257, + "grad_norm": 1.0208878451508383, + "learning_rate": 1.0723237277468538e-06, + "loss": 0.8491259813308716, + "step": 4368 + }, + { + "epoch": 1.0066820276497697, + "grad_norm": 1.0678483382751343, + "learning_rate": 1.071943736394817e-06, + "loss": 0.6938691139221191, + "step": 4369 + }, + { + "epoch": 1.0069124423963134, + "grad_norm": 1.1084737690479445, + "learning_rate": 1.0715637346002423e-06, + "loss": 0.801313579082489, + "step": 4370 + }, + { + "epoch": 1.0071428571428571, + "grad_norm": 0.983698557868892, + "learning_rate": 1.071183722418286e-06, + "loss": 0.7663706541061401, + "step": 4371 + }, + { + "epoch": 1.0073732718894008, + "grad_norm": 0.8508185045615759, + "learning_rate": 1.070803699904107e-06, + "loss": 0.7434467077255249, + "step": 4372 + }, + { + "epoch": 1.0076036866359448, + "grad_norm": 1.331303605136832, + "learning_rate": 1.0704236671128643e-06, + "loss": 0.8366774320602417, + "step": 4373 + }, + { + "epoch": 1.0078341013824885, + "grad_norm": 1.276875198714222, + "learning_rate": 1.07004362409972e-06, + "loss": 0.7027710676193237, + "step": 4374 + }, + { + "epoch": 1.0080645161290323, + "grad_norm": 1.1122995966371962, + "learning_rate": 1.0696635709198357e-06, + "loss": 0.7965548038482666, + "step": 4375 + }, + { + "epoch": 1.008294930875576, + "grad_norm": 1.0387807228424288, + "learning_rate": 1.0692835076283768e-06, + "loss": 0.8058432340621948, + "step": 4376 + }, + { + "epoch": 1.0085253456221197, + "grad_norm": 1.1870264013217662, + "learning_rate": 1.0689034342805085e-06, + "loss": 0.9056248068809509, + "step": 4377 + }, + { + "epoch": 1.0087557603686637, + "grad_norm": 1.0069765876574615, + "learning_rate": 1.0685233509313979e-06, + "loss": 0.8407673835754395, + "step": 4378 + }, + { + "epoch": 1.0089861751152074, + "grad_norm": 1.3133023777292065, + "learning_rate": 1.0681432576362133e-06, + "loss": 0.9138794541358948, + "step": 4379 + }, + { + "epoch": 1.0092165898617511, + "grad_norm": 1.3361237624577444, + "learning_rate": 1.067763154450125e-06, + "loss": 0.6640630960464478, + "step": 4380 + }, + { + "epoch": 1.0094470046082948, + "grad_norm": 1.4646712113013267, + "learning_rate": 1.0673830414283051e-06, + "loss": 0.9387146234512329, + "step": 4381 + }, + { + "epoch": 1.0096774193548388, + "grad_norm": 1.0228212242769696, + "learning_rate": 1.067002918625926e-06, + "loss": 0.7288271188735962, + "step": 4382 + }, + { + "epoch": 1.0099078341013825, + "grad_norm": 1.1693551967727813, + "learning_rate": 1.0666227860981613e-06, + "loss": 0.7886035442352295, + "step": 4383 + }, + { + "epoch": 1.0101382488479262, + "grad_norm": 1.056596025284508, + "learning_rate": 1.066242643900188e-06, + "loss": 0.6929852962493896, + "step": 4384 + }, + { + "epoch": 1.01036866359447, + "grad_norm": 0.9057033157053335, + "learning_rate": 1.065862492087182e-06, + "loss": 0.7709990739822388, + "step": 4385 + }, + { + "epoch": 1.010599078341014, + "grad_norm": 1.0362803754904506, + "learning_rate": 1.065482330714323e-06, + "loss": 0.811382532119751, + "step": 4386 + }, + { + "epoch": 1.0108294930875577, + "grad_norm": 1.2204693151649666, + "learning_rate": 1.0651021598367905e-06, + "loss": 0.8274353742599487, + "step": 4387 + }, + { + "epoch": 1.0110599078341014, + "grad_norm": 0.9995911348883496, + "learning_rate": 1.0647219795097651e-06, + "loss": 0.7449204921722412, + "step": 4388 + }, + { + "epoch": 1.011290322580645, + "grad_norm": 0.906861932756066, + "learning_rate": 1.0643417897884303e-06, + "loss": 0.675945520401001, + "step": 4389 + }, + { + "epoch": 1.0115207373271888, + "grad_norm": 1.183632210098949, + "learning_rate": 1.06396159072797e-06, + "loss": 0.7329400777816772, + "step": 4390 + }, + { + "epoch": 1.0117511520737328, + "grad_norm": 0.9566645616399831, + "learning_rate": 1.0635813823835692e-06, + "loss": 0.7809139490127563, + "step": 4391 + }, + { + "epoch": 1.0119815668202765, + "grad_norm": 1.0167427862718812, + "learning_rate": 1.0632011648104155e-06, + "loss": 0.799081563949585, + "step": 4392 + }, + { + "epoch": 1.0122119815668202, + "grad_norm": 1.0484890321007356, + "learning_rate": 1.062820938063696e-06, + "loss": 0.7738279104232788, + "step": 4393 + }, + { + "epoch": 1.012442396313364, + "grad_norm": 0.9791695127555486, + "learning_rate": 1.0624407021986007e-06, + "loss": 0.895797610282898, + "step": 4394 + }, + { + "epoch": 1.012672811059908, + "grad_norm": 0.9476041908693101, + "learning_rate": 1.0620604572703198e-06, + "loss": 0.6887848973274231, + "step": 4395 + }, + { + "epoch": 1.0129032258064516, + "grad_norm": 1.0915270783702586, + "learning_rate": 1.0616802033340457e-06, + "loss": 0.9540888071060181, + "step": 4396 + }, + { + "epoch": 1.0131336405529954, + "grad_norm": 1.3368596619746418, + "learning_rate": 1.0612999404449721e-06, + "loss": 0.9047783017158508, + "step": 4397 + }, + { + "epoch": 1.013364055299539, + "grad_norm": 0.924946076870977, + "learning_rate": 1.0609196686582931e-06, + "loss": 0.7030448913574219, + "step": 4398 + }, + { + "epoch": 1.013594470046083, + "grad_norm": 0.9501232585433265, + "learning_rate": 1.0605393880292046e-06, + "loss": 0.8097348213195801, + "step": 4399 + }, + { + "epoch": 1.0138248847926268, + "grad_norm": 1.0163791343408108, + "learning_rate": 1.0601590986129045e-06, + "loss": 0.7446185350418091, + "step": 4400 + }, + { + "epoch": 1.0140552995391705, + "grad_norm": 1.0548185515811, + "learning_rate": 1.0597788004645908e-06, + "loss": 0.7450964450836182, + "step": 4401 + }, + { + "epoch": 1.0142857142857142, + "grad_norm": 1.1891450532947472, + "learning_rate": 1.0593984936394632e-06, + "loss": 0.8326355218887329, + "step": 4402 + }, + { + "epoch": 1.014516129032258, + "grad_norm": 1.0194370020803867, + "learning_rate": 1.0590181781927227e-06, + "loss": 0.7013953924179077, + "step": 4403 + }, + { + "epoch": 1.014746543778802, + "grad_norm": 1.2634402455639506, + "learning_rate": 1.0586378541795723e-06, + "loss": 0.7806364297866821, + "step": 4404 + }, + { + "epoch": 1.0149769585253456, + "grad_norm": 1.2061797737844093, + "learning_rate": 1.0582575216552146e-06, + "loss": 0.8207389116287231, + "step": 4405 + }, + { + "epoch": 1.0152073732718894, + "grad_norm": 1.123863770924685, + "learning_rate": 1.0578771806748545e-06, + "loss": 0.8042873740196228, + "step": 4406 + }, + { + "epoch": 1.015437788018433, + "grad_norm": 0.9837741196260199, + "learning_rate": 1.057496831293699e-06, + "loss": 0.7225071787834167, + "step": 4407 + }, + { + "epoch": 1.015668202764977, + "grad_norm": 0.8165867352878113, + "learning_rate": 1.0571164735669538e-06, + "loss": 0.7783743143081665, + "step": 4408 + }, + { + "epoch": 1.0158986175115208, + "grad_norm": 1.1050702802288892, + "learning_rate": 1.0567361075498286e-06, + "loss": 0.7455039024353027, + "step": 4409 + }, + { + "epoch": 1.0161290322580645, + "grad_norm": 1.0331220241961572, + "learning_rate": 1.0563557332975322e-06, + "loss": 0.7819615602493286, + "step": 4410 + }, + { + "epoch": 1.0163594470046082, + "grad_norm": 1.052305833495017, + "learning_rate": 1.0559753508652758e-06, + "loss": 0.6466404795646667, + "step": 4411 + }, + { + "epoch": 1.0165898617511522, + "grad_norm": 0.9503687927611121, + "learning_rate": 1.0555949603082715e-06, + "loss": 0.8728539943695068, + "step": 4412 + }, + { + "epoch": 1.016820276497696, + "grad_norm": 0.9080353373358744, + "learning_rate": 1.055214561681732e-06, + "loss": 0.6082659959793091, + "step": 4413 + }, + { + "epoch": 1.0170506912442396, + "grad_norm": 1.1401384988886654, + "learning_rate": 1.054834155040872e-06, + "loss": 0.8429103493690491, + "step": 4414 + }, + { + "epoch": 1.0172811059907834, + "grad_norm": 0.9060045457810262, + "learning_rate": 1.0544537404409073e-06, + "loss": 0.7953135967254639, + "step": 4415 + }, + { + "epoch": 1.017511520737327, + "grad_norm": 0.6713482182574511, + "learning_rate": 1.0540733179370542e-06, + "loss": 0.7243527173995972, + "step": 4416 + }, + { + "epoch": 1.017741935483871, + "grad_norm": 1.4572192259453962, + "learning_rate": 1.0536928875845303e-06, + "loss": 0.6882613897323608, + "step": 4417 + }, + { + "epoch": 1.0179723502304148, + "grad_norm": 0.9719982264568039, + "learning_rate": 1.053312449438555e-06, + "loss": 0.9157286882400513, + "step": 4418 + }, + { + "epoch": 1.0182027649769585, + "grad_norm": 1.1196456434566004, + "learning_rate": 1.0529320035543482e-06, + "loss": 0.7224643230438232, + "step": 4419 + }, + { + "epoch": 1.0184331797235022, + "grad_norm": 1.4712628070157254, + "learning_rate": 1.0525515499871311e-06, + "loss": 0.874829888343811, + "step": 4420 + }, + { + "epoch": 1.0186635944700462, + "grad_norm": 0.9184049522457163, + "learning_rate": 1.0521710887921262e-06, + "loss": 0.6911267042160034, + "step": 4421 + }, + { + "epoch": 1.01889400921659, + "grad_norm": 1.1423796554253005, + "learning_rate": 1.051790620024557e-06, + "loss": 0.9065574407577515, + "step": 4422 + }, + { + "epoch": 1.0191244239631336, + "grad_norm": 1.225714416603257, + "learning_rate": 1.0514101437396474e-06, + "loss": 0.7671108245849609, + "step": 4423 + }, + { + "epoch": 1.0193548387096774, + "grad_norm": 1.3506661037387142, + "learning_rate": 1.051029659992624e-06, + "loss": 0.8706510066986084, + "step": 4424 + }, + { + "epoch": 1.019585253456221, + "grad_norm": 1.4185673299670827, + "learning_rate": 1.0506491688387128e-06, + "loss": 0.741087794303894, + "step": 4425 + }, + { + "epoch": 1.019815668202765, + "grad_norm": 1.0122076007105019, + "learning_rate": 1.0502686703331419e-06, + "loss": 0.8045330047607422, + "step": 4426 + }, + { + "epoch": 1.0200460829493088, + "grad_norm": 1.1768435258548835, + "learning_rate": 1.0498881645311398e-06, + "loss": 0.8464969992637634, + "step": 4427 + }, + { + "epoch": 1.0202764976958525, + "grad_norm": 1.1260966872974236, + "learning_rate": 1.0495076514879367e-06, + "loss": 0.7660650610923767, + "step": 4428 + }, + { + "epoch": 1.0205069124423962, + "grad_norm": 1.0026539513539563, + "learning_rate": 1.0491271312587636e-06, + "loss": 0.8565669059753418, + "step": 4429 + }, + { + "epoch": 1.0207373271889402, + "grad_norm": 1.306851956145893, + "learning_rate": 1.0487466038988525e-06, + "loss": 0.8884295225143433, + "step": 4430 + }, + { + "epoch": 1.020967741935484, + "grad_norm": 1.0672501887857282, + "learning_rate": 1.0483660694634361e-06, + "loss": 0.7300036549568176, + "step": 4431 + }, + { + "epoch": 1.0211981566820276, + "grad_norm": 1.261937486377886, + "learning_rate": 1.0479855280077493e-06, + "loss": 0.7879898548126221, + "step": 4432 + }, + { + "epoch": 1.0214285714285714, + "grad_norm": 1.5182696761272942, + "learning_rate": 1.0476049795870263e-06, + "loss": 0.9811698198318481, + "step": 4433 + }, + { + "epoch": 1.0216589861751153, + "grad_norm": 1.1962738461411733, + "learning_rate": 1.0472244242565034e-06, + "loss": 0.7706241607666016, + "step": 4434 + }, + { + "epoch": 1.021889400921659, + "grad_norm": 1.289215010975763, + "learning_rate": 1.046843862071418e-06, + "loss": 0.761093020439148, + "step": 4435 + }, + { + "epoch": 1.0221198156682028, + "grad_norm": 1.2142929670752842, + "learning_rate": 1.046463293087008e-06, + "loss": 0.8306092619895935, + "step": 4436 + }, + { + "epoch": 1.0223502304147465, + "grad_norm": 1.0820298518439184, + "learning_rate": 1.0460827173585125e-06, + "loss": 0.9669788479804993, + "step": 4437 + }, + { + "epoch": 1.0225806451612902, + "grad_norm": 1.173748576404213, + "learning_rate": 1.0457021349411715e-06, + "loss": 0.8461639285087585, + "step": 4438 + }, + { + "epoch": 1.0228110599078342, + "grad_norm": 1.0738697424760002, + "learning_rate": 1.0453215458902262e-06, + "loss": 0.7230383157730103, + "step": 4439 + }, + { + "epoch": 1.023041474654378, + "grad_norm": 1.195555915731222, + "learning_rate": 1.0449409502609186e-06, + "loss": 0.7506514191627502, + "step": 4440 + }, + { + "epoch": 1.0232718894009216, + "grad_norm": 1.2468090783946124, + "learning_rate": 1.0445603481084914e-06, + "loss": 0.7530048489570618, + "step": 4441 + }, + { + "epoch": 1.0235023041474653, + "grad_norm": 1.1659142578592716, + "learning_rate": 1.044179739488189e-06, + "loss": 0.8402249813079834, + "step": 4442 + }, + { + "epoch": 1.0237327188940093, + "grad_norm": 0.9379480482149454, + "learning_rate": 1.0437991244552557e-06, + "loss": 0.7661963701248169, + "step": 4443 + }, + { + "epoch": 1.023963133640553, + "grad_norm": 1.484925993605904, + "learning_rate": 1.043418503064937e-06, + "loss": 0.7982668876647949, + "step": 4444 + }, + { + "epoch": 1.0241935483870968, + "grad_norm": 1.5153078123946815, + "learning_rate": 1.0430378753724807e-06, + "loss": 0.899538516998291, + "step": 4445 + }, + { + "epoch": 1.0244239631336405, + "grad_norm": 1.0283178313705175, + "learning_rate": 1.0426572414331337e-06, + "loss": 0.8027441501617432, + "step": 4446 + }, + { + "epoch": 1.0246543778801844, + "grad_norm": 1.0275551729897887, + "learning_rate": 1.0422766013021442e-06, + "loss": 0.8575221300125122, + "step": 4447 + }, + { + "epoch": 1.0248847926267282, + "grad_norm": 1.0529216327738424, + "learning_rate": 1.0418959550347622e-06, + "loss": 0.7001699209213257, + "step": 4448 + }, + { + "epoch": 1.0251152073732719, + "grad_norm": 1.344629476023339, + "learning_rate": 1.041515302686238e-06, + "loss": 0.9296507835388184, + "step": 4449 + }, + { + "epoch": 1.0253456221198156, + "grad_norm": 1.1736142719382505, + "learning_rate": 1.0411346443118222e-06, + "loss": 0.8214550018310547, + "step": 4450 + }, + { + "epoch": 1.0255760368663593, + "grad_norm": 1.111485424859677, + "learning_rate": 1.0407539799667673e-06, + "loss": 0.7598673701286316, + "step": 4451 + }, + { + "epoch": 1.0258064516129033, + "grad_norm": 1.1453890077051856, + "learning_rate": 1.0403733097063265e-06, + "loss": 0.8222990036010742, + "step": 4452 + }, + { + "epoch": 1.026036866359447, + "grad_norm": 0.8681765527907143, + "learning_rate": 1.039992633585753e-06, + "loss": 0.7860872745513916, + "step": 4453 + }, + { + "epoch": 1.0262672811059907, + "grad_norm": 0.7352315377021262, + "learning_rate": 1.0396119516603018e-06, + "loss": 0.6602796912193298, + "step": 4454 + }, + { + "epoch": 1.0264976958525345, + "grad_norm": 0.7865024675454858, + "learning_rate": 1.0392312639852278e-06, + "loss": 0.554654598236084, + "step": 4455 + }, + { + "epoch": 1.0267281105990784, + "grad_norm": 0.997694873166315, + "learning_rate": 1.0388505706157885e-06, + "loss": 0.7977210879325867, + "step": 4456 + }, + { + "epoch": 1.0269585253456222, + "grad_norm": 0.9315155505189272, + "learning_rate": 1.0384698716072398e-06, + "loss": 0.8770938515663147, + "step": 4457 + }, + { + "epoch": 1.0271889400921659, + "grad_norm": 1.1958306146081352, + "learning_rate": 1.0380891670148403e-06, + "loss": 0.710452675819397, + "step": 4458 + }, + { + "epoch": 1.0274193548387096, + "grad_norm": 1.0231453414790668, + "learning_rate": 1.0377084568938485e-06, + "loss": 0.8876768946647644, + "step": 4459 + }, + { + "epoch": 1.0276497695852536, + "grad_norm": 1.1707146109643827, + "learning_rate": 1.0373277412995241e-06, + "loss": 0.7770971059799194, + "step": 4460 + }, + { + "epoch": 1.0278801843317973, + "grad_norm": 1.2438301523835749, + "learning_rate": 1.0369470202871275e-06, + "loss": 0.9199050068855286, + "step": 4461 + }, + { + "epoch": 1.028110599078341, + "grad_norm": 1.225766455591599, + "learning_rate": 1.0365662939119199e-06, + "loss": 0.7931548357009888, + "step": 4462 + }, + { + "epoch": 1.0283410138248847, + "grad_norm": 0.9403888957806107, + "learning_rate": 1.0361855622291636e-06, + "loss": 0.7484941482543945, + "step": 4463 + }, + { + "epoch": 1.0285714285714285, + "grad_norm": 1.1077517121943607, + "learning_rate": 1.03580482529412e-06, + "loss": 0.7639475464820862, + "step": 4464 + }, + { + "epoch": 1.0288018433179724, + "grad_norm": 0.9266455289292281, + "learning_rate": 1.035424083162054e-06, + "loss": 0.7705268859863281, + "step": 4465 + }, + { + "epoch": 1.0290322580645161, + "grad_norm": 1.0602296301972336, + "learning_rate": 1.0350433358882288e-06, + "loss": 0.7714117169380188, + "step": 4466 + }, + { + "epoch": 1.0292626728110599, + "grad_norm": 0.9812855436464868, + "learning_rate": 1.0346625835279102e-06, + "loss": 0.851073145866394, + "step": 4467 + }, + { + "epoch": 1.0294930875576036, + "grad_norm": 0.9352903997309275, + "learning_rate": 1.0342818261363631e-06, + "loss": 0.8001583218574524, + "step": 4468 + }, + { + "epoch": 1.0297235023041476, + "grad_norm": 1.1158901092617035, + "learning_rate": 1.0339010637688547e-06, + "loss": 0.8352588415145874, + "step": 4469 + }, + { + "epoch": 1.0299539170506913, + "grad_norm": 0.91245372061127, + "learning_rate": 1.0335202964806515e-06, + "loss": 0.8136032223701477, + "step": 4470 + }, + { + "epoch": 1.030184331797235, + "grad_norm": 1.1248571903620148, + "learning_rate": 1.0331395243270215e-06, + "loss": 0.8041108846664429, + "step": 4471 + }, + { + "epoch": 1.0304147465437787, + "grad_norm": 0.9370378251466553, + "learning_rate": 1.032758747363234e-06, + "loss": 0.6961067914962769, + "step": 4472 + }, + { + "epoch": 1.0306451612903227, + "grad_norm": 0.8328897533850071, + "learning_rate": 1.0323779656445572e-06, + "loss": 0.8063983917236328, + "step": 4473 + }, + { + "epoch": 1.0308755760368664, + "grad_norm": 1.01915176563276, + "learning_rate": 1.0319971792262618e-06, + "loss": 0.706061601638794, + "step": 4474 + }, + { + "epoch": 1.0311059907834101, + "grad_norm": 1.1193687254143303, + "learning_rate": 1.0316163881636181e-06, + "loss": 0.8510581254959106, + "step": 4475 + }, + { + "epoch": 1.0313364055299539, + "grad_norm": 0.8459775762451333, + "learning_rate": 1.0312355925118975e-06, + "loss": 0.7169028520584106, + "step": 4476 + }, + { + "epoch": 1.0315668202764976, + "grad_norm": 0.8345675502163972, + "learning_rate": 1.0308547923263718e-06, + "loss": 0.7513360977172852, + "step": 4477 + }, + { + "epoch": 1.0317972350230415, + "grad_norm": 1.1826641384928935, + "learning_rate": 1.030473987662314e-06, + "loss": 0.7408783435821533, + "step": 4478 + }, + { + "epoch": 1.0320276497695853, + "grad_norm": 1.2135549739175484, + "learning_rate": 1.0300931785749974e-06, + "loss": 0.8177747130393982, + "step": 4479 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 1.074036475926982, + "learning_rate": 1.0297123651196954e-06, + "loss": 0.7530791759490967, + "step": 4480 + }, + { + "epoch": 1.0324884792626727, + "grad_norm": 1.2947307404575235, + "learning_rate": 1.0293315473516832e-06, + "loss": 0.7958859205245972, + "step": 4481 + }, + { + "epoch": 1.0327188940092167, + "grad_norm": 1.2482360288136136, + "learning_rate": 1.0289507253262357e-06, + "loss": 0.8719943761825562, + "step": 4482 + }, + { + "epoch": 1.0329493087557604, + "grad_norm": 1.0347953021678673, + "learning_rate": 1.028569899098629e-06, + "loss": 0.7584139108657837, + "step": 4483 + }, + { + "epoch": 1.0331797235023041, + "grad_norm": 1.1621251755994506, + "learning_rate": 1.0281890687241387e-06, + "loss": 0.852983832359314, + "step": 4484 + }, + { + "epoch": 1.0334101382488479, + "grad_norm": 0.995758429643109, + "learning_rate": 1.027808234258043e-06, + "loss": 0.7455692291259766, + "step": 4485 + }, + { + "epoch": 1.0336405529953918, + "grad_norm": 0.9126434588001895, + "learning_rate": 1.0274273957556185e-06, + "loss": 0.7078343629837036, + "step": 4486 + }, + { + "epoch": 1.0338709677419355, + "grad_norm": 1.056440353383354, + "learning_rate": 1.027046553272144e-06, + "loss": 0.7580842971801758, + "step": 4487 + }, + { + "epoch": 1.0341013824884793, + "grad_norm": 0.9071452550966383, + "learning_rate": 1.026665706862898e-06, + "loss": 0.7271389961242676, + "step": 4488 + }, + { + "epoch": 1.034331797235023, + "grad_norm": 1.3819767756673818, + "learning_rate": 1.0262848565831599e-06, + "loss": 0.8271546363830566, + "step": 4489 + }, + { + "epoch": 1.0345622119815667, + "grad_norm": 1.1533046933911033, + "learning_rate": 1.0259040024882098e-06, + "loss": 0.6799920201301575, + "step": 4490 + }, + { + "epoch": 1.0347926267281107, + "grad_norm": 0.7837273040397605, + "learning_rate": 1.0255231446333277e-06, + "loss": 0.6962645053863525, + "step": 4491 + }, + { + "epoch": 1.0350230414746544, + "grad_norm": 1.2060107344479347, + "learning_rate": 1.0251422830737955e-06, + "loss": 0.8722797632217407, + "step": 4492 + }, + { + "epoch": 1.0352534562211981, + "grad_norm": 1.0328841633467782, + "learning_rate": 1.024761417864894e-06, + "loss": 0.8054880499839783, + "step": 4493 + }, + { + "epoch": 1.0354838709677419, + "grad_norm": 0.9178345615112383, + "learning_rate": 1.0243805490619053e-06, + "loss": 0.8196548223495483, + "step": 4494 + }, + { + "epoch": 1.0357142857142858, + "grad_norm": 1.5010413914558958, + "learning_rate": 1.0239996767201122e-06, + "loss": 0.8197275400161743, + "step": 4495 + }, + { + "epoch": 1.0359447004608295, + "grad_norm": 1.1223467429515472, + "learning_rate": 1.0236188008947978e-06, + "loss": 0.7704858779907227, + "step": 4496 + }, + { + "epoch": 1.0361751152073733, + "grad_norm": 1.2288506828429187, + "learning_rate": 1.0232379216412459e-06, + "loss": 0.8296232223510742, + "step": 4497 + }, + { + "epoch": 1.036405529953917, + "grad_norm": 1.1910482399414777, + "learning_rate": 1.0228570390147404e-06, + "loss": 0.6546601057052612, + "step": 4498 + }, + { + "epoch": 1.036635944700461, + "grad_norm": 1.0493042801064925, + "learning_rate": 1.0224761530705656e-06, + "loss": 0.808987021446228, + "step": 4499 + }, + { + "epoch": 1.0368663594470047, + "grad_norm": 1.0198435860671902, + "learning_rate": 1.0220952638640073e-06, + "loss": 0.862627387046814, + "step": 4500 + }, + { + "epoch": 1.0370967741935484, + "grad_norm": 0.9314966888515314, + "learning_rate": 1.0217143714503507e-06, + "loss": 0.781114935874939, + "step": 4501 + }, + { + "epoch": 1.0373271889400921, + "grad_norm": 1.1732597442137338, + "learning_rate": 1.0213334758848814e-06, + "loss": 0.7186112403869629, + "step": 4502 + }, + { + "epoch": 1.0375576036866359, + "grad_norm": 0.9870711221115687, + "learning_rate": 1.0209525772228868e-06, + "loss": 0.8112529516220093, + "step": 4503 + }, + { + "epoch": 1.0377880184331798, + "grad_norm": 1.1558866878107408, + "learning_rate": 1.020571675519653e-06, + "loss": 0.7364751100540161, + "step": 4504 + }, + { + "epoch": 1.0380184331797235, + "grad_norm": 1.296821231113786, + "learning_rate": 1.0201907708304681e-06, + "loss": 0.7015886902809143, + "step": 4505 + }, + { + "epoch": 1.0382488479262673, + "grad_norm": 0.8755063657778166, + "learning_rate": 1.0198098632106197e-06, + "loss": 0.7018470168113708, + "step": 4506 + }, + { + "epoch": 1.038479262672811, + "grad_norm": 0.9958013421397902, + "learning_rate": 1.0194289527153953e-06, + "loss": 0.820391058921814, + "step": 4507 + }, + { + "epoch": 1.038709677419355, + "grad_norm": 1.2026544914516983, + "learning_rate": 1.0190480394000844e-06, + "loss": 0.8341129422187805, + "step": 4508 + }, + { + "epoch": 1.0389400921658987, + "grad_norm": 0.8606365913019236, + "learning_rate": 1.0186671233199757e-06, + "loss": 0.7345695495605469, + "step": 4509 + }, + { + "epoch": 1.0391705069124424, + "grad_norm": 1.375974242893794, + "learning_rate": 1.0182862045303589e-06, + "loss": 0.8899500370025635, + "step": 4510 + }, + { + "epoch": 1.0394009216589861, + "grad_norm": 1.001562990779633, + "learning_rate": 1.0179052830865238e-06, + "loss": 0.8158663511276245, + "step": 4511 + }, + { + "epoch": 1.0396313364055298, + "grad_norm": 1.1574048409080129, + "learning_rate": 1.0175243590437604e-06, + "loss": 0.734848141670227, + "step": 4512 + }, + { + "epoch": 1.0398617511520738, + "grad_norm": 1.062511127484639, + "learning_rate": 1.0171434324573596e-06, + "loss": 0.7920876741409302, + "step": 4513 + }, + { + "epoch": 1.0400921658986175, + "grad_norm": 1.2131341489328324, + "learning_rate": 1.0167625033826122e-06, + "loss": 0.9224791526794434, + "step": 4514 + }, + { + "epoch": 1.0403225806451613, + "grad_norm": 1.152494191321953, + "learning_rate": 1.0163815718748096e-06, + "loss": 0.7086025476455688, + "step": 4515 + }, + { + "epoch": 1.040552995391705, + "grad_norm": 1.0223491213154539, + "learning_rate": 1.0160006379892434e-06, + "loss": 0.7657936811447144, + "step": 4516 + }, + { + "epoch": 1.040783410138249, + "grad_norm": 1.11296257844156, + "learning_rate": 1.0156197017812058e-06, + "loss": 0.786298394203186, + "step": 4517 + }, + { + "epoch": 1.0410138248847927, + "grad_norm": 1.1998728834800867, + "learning_rate": 1.0152387633059895e-06, + "loss": 0.8667294979095459, + "step": 4518 + }, + { + "epoch": 1.0412442396313364, + "grad_norm": 1.0233425185279803, + "learning_rate": 1.0148578226188866e-06, + "loss": 0.8479517102241516, + "step": 4519 + }, + { + "epoch": 1.0414746543778801, + "grad_norm": 0.8930216519245627, + "learning_rate": 1.0144768797751904e-06, + "loss": 0.6430692076683044, + "step": 4520 + }, + { + "epoch": 1.041705069124424, + "grad_norm": 1.122852329570553, + "learning_rate": 1.0140959348301946e-06, + "loss": 0.874313473701477, + "step": 4521 + }, + { + "epoch": 1.0419354838709678, + "grad_norm": 1.101097598838231, + "learning_rate": 1.013714987839192e-06, + "loss": 0.8439676761627197, + "step": 4522 + }, + { + "epoch": 1.0421658986175115, + "grad_norm": 1.2477053670484948, + "learning_rate": 1.0133340388574774e-06, + "loss": 0.7480089664459229, + "step": 4523 + }, + { + "epoch": 1.0423963133640552, + "grad_norm": 1.3143250159570112, + "learning_rate": 1.012953087940345e-06, + "loss": 0.8786139488220215, + "step": 4524 + }, + { + "epoch": 1.042626728110599, + "grad_norm": 1.1897211165926171, + "learning_rate": 1.0125721351430885e-06, + "loss": 0.8333299160003662, + "step": 4525 + }, + { + "epoch": 1.042857142857143, + "grad_norm": 1.055645356383861, + "learning_rate": 1.0121911805210032e-06, + "loss": 0.8201998472213745, + "step": 4526 + }, + { + "epoch": 1.0430875576036867, + "grad_norm": 1.160199033506195, + "learning_rate": 1.0118102241293847e-06, + "loss": 0.7793110609054565, + "step": 4527 + }, + { + "epoch": 1.0433179723502304, + "grad_norm": 1.045720270383819, + "learning_rate": 1.0114292660235272e-06, + "loss": 0.7148817777633667, + "step": 4528 + }, + { + "epoch": 1.043548387096774, + "grad_norm": 1.0726942336798908, + "learning_rate": 1.011048306258727e-06, + "loss": 0.7945176362991333, + "step": 4529 + }, + { + "epoch": 1.043778801843318, + "grad_norm": 1.0532791972453868, + "learning_rate": 1.01066734489028e-06, + "loss": 0.7246826887130737, + "step": 4530 + }, + { + "epoch": 1.0440092165898618, + "grad_norm": 1.230297656368, + "learning_rate": 1.0102863819734822e-06, + "loss": 0.7342358827590942, + "step": 4531 + }, + { + "epoch": 1.0442396313364055, + "grad_norm": 1.1072867148521375, + "learning_rate": 1.0099054175636292e-06, + "loss": 0.6837234497070312, + "step": 4532 + }, + { + "epoch": 1.0444700460829492, + "grad_norm": 0.8847188010063922, + "learning_rate": 1.0095244517160184e-06, + "loss": 0.6941408514976501, + "step": 4533 + }, + { + "epoch": 1.0447004608294932, + "grad_norm": 0.9992175314765978, + "learning_rate": 1.009143484485946e-06, + "loss": 0.7835201025009155, + "step": 4534 + }, + { + "epoch": 1.044930875576037, + "grad_norm": 1.1533173348493126, + "learning_rate": 1.0087625159287086e-06, + "loss": 0.7887566089630127, + "step": 4535 + }, + { + "epoch": 1.0451612903225806, + "grad_norm": 0.9980831932241371, + "learning_rate": 1.0083815460996036e-06, + "loss": 0.7106727361679077, + "step": 4536 + }, + { + "epoch": 1.0453917050691244, + "grad_norm": 1.1003103489016812, + "learning_rate": 1.0080005750539287e-06, + "loss": 0.8316382169723511, + "step": 4537 + }, + { + "epoch": 1.045622119815668, + "grad_norm": 1.278017855977623, + "learning_rate": 1.0076196028469805e-06, + "loss": 0.7535592317581177, + "step": 4538 + }, + { + "epoch": 1.045852534562212, + "grad_norm": 1.2167524484109087, + "learning_rate": 1.0072386295340571e-06, + "loss": 0.9255459308624268, + "step": 4539 + }, + { + "epoch": 1.0460829493087558, + "grad_norm": 0.9884104383515986, + "learning_rate": 1.0068576551704561e-06, + "loss": 0.7415009140968323, + "step": 4540 + }, + { + "epoch": 1.0463133640552995, + "grad_norm": 0.9221193872044946, + "learning_rate": 1.0064766798114758e-06, + "loss": 0.673210620880127, + "step": 4541 + }, + { + "epoch": 1.0465437788018432, + "grad_norm": 1.2907861596502346, + "learning_rate": 1.006095703512414e-06, + "loss": 0.7063118815422058, + "step": 4542 + }, + { + "epoch": 1.0467741935483872, + "grad_norm": 1.0344490200256125, + "learning_rate": 1.005714726328569e-06, + "loss": 0.73606276512146, + "step": 4543 + }, + { + "epoch": 1.047004608294931, + "grad_norm": 1.1024687809140408, + "learning_rate": 1.005333748315239e-06, + "loss": 0.6723713874816895, + "step": 4544 + }, + { + "epoch": 1.0472350230414746, + "grad_norm": 1.0566239460690536, + "learning_rate": 1.0049527695277223e-06, + "loss": 0.643845796585083, + "step": 4545 + }, + { + "epoch": 1.0474654377880184, + "grad_norm": 1.1196128686458957, + "learning_rate": 1.0045717900213175e-06, + "loss": 0.8820847272872925, + "step": 4546 + }, + { + "epoch": 1.047695852534562, + "grad_norm": 1.177142500227169, + "learning_rate": 1.0041908098513239e-06, + "loss": 0.6555176973342896, + "step": 4547 + }, + { + "epoch": 1.047926267281106, + "grad_norm": 1.4046987769414077, + "learning_rate": 1.0038098290730394e-06, + "loss": 0.8142974376678467, + "step": 4548 + }, + { + "epoch": 1.0481566820276498, + "grad_norm": 1.3843242800793498, + "learning_rate": 1.0034288477417634e-06, + "loss": 0.8107532262802124, + "step": 4549 + }, + { + "epoch": 1.0483870967741935, + "grad_norm": 1.093115680939654, + "learning_rate": 1.0030478659127947e-06, + "loss": 0.7078464031219482, + "step": 4550 + }, + { + "epoch": 1.0486175115207372, + "grad_norm": 1.3647000829373368, + "learning_rate": 1.0026668836414322e-06, + "loss": 0.9168295860290527, + "step": 4551 + }, + { + "epoch": 1.0488479262672812, + "grad_norm": 0.7154125463388302, + "learning_rate": 1.0022859009829752e-06, + "loss": 0.7384864091873169, + "step": 4552 + }, + { + "epoch": 1.049078341013825, + "grad_norm": 0.9459016715465385, + "learning_rate": 1.0019049179927229e-06, + "loss": 0.6092562675476074, + "step": 4553 + }, + { + "epoch": 1.0493087557603686, + "grad_norm": 1.159695075830992, + "learning_rate": 1.001523934725974e-06, + "loss": 0.713464617729187, + "step": 4554 + }, + { + "epoch": 1.0495391705069124, + "grad_norm": 0.9471368467961162, + "learning_rate": 1.001142951238028e-06, + "loss": 0.7514123916625977, + "step": 4555 + }, + { + "epoch": 1.0497695852534563, + "grad_norm": 1.1414214053095963, + "learning_rate": 1.000761967584184e-06, + "loss": 0.8092095851898193, + "step": 4556 + }, + { + "epoch": 1.05, + "grad_norm": 0.830509770117895, + "learning_rate": 1.000380983819742e-06, + "loss": 0.7609254717826843, + "step": 4557 + }, + { + "epoch": 1.0502304147465438, + "grad_norm": 0.8874333429433436, + "learning_rate": 1e-06, + "loss": 0.8363404273986816, + "step": 4558 + }, + { + "epoch": 1.0504608294930875, + "grad_norm": 1.1983399653767088, + "learning_rate": 9.996190161802584e-07, + "loss": 0.8139501810073853, + "step": 4559 + }, + { + "epoch": 1.0506912442396312, + "grad_norm": 0.8984420952696672, + "learning_rate": 9.992380324158157e-07, + "loss": 0.8064978122711182, + "step": 4560 + }, + { + "epoch": 1.0509216589861752, + "grad_norm": 0.9258651657418774, + "learning_rate": 9.988570487619721e-07, + "loss": 0.7162975072860718, + "step": 4561 + }, + { + "epoch": 1.051152073732719, + "grad_norm": 1.2196516767947119, + "learning_rate": 9.984760652740261e-07, + "loss": 0.9298074245452881, + "step": 4562 + }, + { + "epoch": 1.0513824884792626, + "grad_norm": 1.0770268299074148, + "learning_rate": 9.980950820072773e-07, + "loss": 0.6929144859313965, + "step": 4563 + }, + { + "epoch": 1.0516129032258064, + "grad_norm": 0.919564091111097, + "learning_rate": 9.97714099017025e-07, + "loss": 0.6516381502151489, + "step": 4564 + }, + { + "epoch": 1.0518433179723503, + "grad_norm": 1.091105354713726, + "learning_rate": 9.97333116358568e-07, + "loss": 0.864730715751648, + "step": 4565 + }, + { + "epoch": 1.052073732718894, + "grad_norm": 0.9113453911026408, + "learning_rate": 9.969521340872052e-07, + "loss": 0.7911246418952942, + "step": 4566 + }, + { + "epoch": 1.0523041474654378, + "grad_norm": 1.032556518691269, + "learning_rate": 9.965711522582367e-07, + "loss": 0.7766593098640442, + "step": 4567 + }, + { + "epoch": 1.0525345622119815, + "grad_norm": 1.1309615036566574, + "learning_rate": 9.961901709269607e-07, + "loss": 0.7703378200531006, + "step": 4568 + }, + { + "epoch": 1.0527649769585254, + "grad_norm": 0.9296180823184125, + "learning_rate": 9.958091901486762e-07, + "loss": 0.7068926692008972, + "step": 4569 + }, + { + "epoch": 1.0529953917050692, + "grad_norm": 1.0589255494911889, + "learning_rate": 9.954282099786824e-07, + "loss": 0.740556538105011, + "step": 4570 + }, + { + "epoch": 1.053225806451613, + "grad_norm": 1.1264720214776667, + "learning_rate": 9.950472304722778e-07, + "loss": 0.798403263092041, + "step": 4571 + }, + { + "epoch": 1.0534562211981566, + "grad_norm": 0.9551633921802427, + "learning_rate": 9.94666251684761e-07, + "loss": 0.6945887804031372, + "step": 4572 + }, + { + "epoch": 1.0536866359447004, + "grad_norm": 1.0978186377940822, + "learning_rate": 9.942852736714312e-07, + "loss": 0.8257915377616882, + "step": 4573 + }, + { + "epoch": 1.0539170506912443, + "grad_norm": 1.108870855150134, + "learning_rate": 9.939042964875859e-07, + "loss": 0.751315712928772, + "step": 4574 + }, + { + "epoch": 1.054147465437788, + "grad_norm": 0.8929134755319279, + "learning_rate": 9.935233201885241e-07, + "loss": 0.6607721447944641, + "step": 4575 + }, + { + "epoch": 1.0543778801843318, + "grad_norm": 1.1623094406064765, + "learning_rate": 9.931423448295438e-07, + "loss": 0.9135023355484009, + "step": 4576 + }, + { + "epoch": 1.0546082949308755, + "grad_norm": 1.1079901137426853, + "learning_rate": 9.927613704659428e-07, + "loss": 0.8238483667373657, + "step": 4577 + }, + { + "epoch": 1.0548387096774194, + "grad_norm": 1.0927838633299076, + "learning_rate": 9.923803971530196e-07, + "loss": 0.7657001614570618, + "step": 4578 + }, + { + "epoch": 1.0550691244239632, + "grad_norm": 1.0858899027259339, + "learning_rate": 9.919994249460717e-07, + "loss": 0.6360250115394592, + "step": 4579 + }, + { + "epoch": 1.055299539170507, + "grad_norm": 3.1983788784304843, + "learning_rate": 9.916184539003963e-07, + "loss": 0.6958763003349304, + "step": 4580 + }, + { + "epoch": 1.0555299539170506, + "grad_norm": 1.0079237517587447, + "learning_rate": 9.912374840712915e-07, + "loss": 0.7093038558959961, + "step": 4581 + }, + { + "epoch": 1.0557603686635946, + "grad_norm": 1.0680215254508902, + "learning_rate": 9.908565155140544e-07, + "loss": 0.7641304731369019, + "step": 4582 + }, + { + "epoch": 1.0559907834101383, + "grad_norm": 0.8923201066182703, + "learning_rate": 9.904755482839817e-07, + "loss": 0.7976446151733398, + "step": 4583 + }, + { + "epoch": 1.056221198156682, + "grad_norm": 1.0963737907088362, + "learning_rate": 9.900945824363707e-07, + "loss": 0.8407114744186401, + "step": 4584 + }, + { + "epoch": 1.0564516129032258, + "grad_norm": 1.0695401976763876, + "learning_rate": 9.897136180265181e-07, + "loss": 0.7988634705543518, + "step": 4585 + }, + { + "epoch": 1.0566820276497695, + "grad_norm": 1.072342293651018, + "learning_rate": 9.893326551097198e-07, + "loss": 0.7847359776496887, + "step": 4586 + }, + { + "epoch": 1.0569124423963134, + "grad_norm": 1.0629893453410204, + "learning_rate": 9.889516937412728e-07, + "loss": 0.8458963632583618, + "step": 4587 + }, + { + "epoch": 1.0571428571428572, + "grad_norm": 1.1301054626559641, + "learning_rate": 9.88570733976473e-07, + "loss": 0.8479788899421692, + "step": 4588 + }, + { + "epoch": 1.057373271889401, + "grad_norm": 1.180492999769349, + "learning_rate": 9.881897758706154e-07, + "loss": 0.7467283010482788, + "step": 4589 + }, + { + "epoch": 1.0576036866359446, + "grad_norm": 1.1676226241505752, + "learning_rate": 9.878088194789967e-07, + "loss": 0.9400098323822021, + "step": 4590 + }, + { + "epoch": 1.0578341013824886, + "grad_norm": 1.2151292863225376, + "learning_rate": 9.874278648569118e-07, + "loss": 0.8901257514953613, + "step": 4591 + }, + { + "epoch": 1.0580645161290323, + "grad_norm": 1.2956773767909102, + "learning_rate": 9.870469120596552e-07, + "loss": 0.840053379535675, + "step": 4592 + }, + { + "epoch": 1.058294930875576, + "grad_norm": 0.9938952111506293, + "learning_rate": 9.866659611425225e-07, + "loss": 0.6825235486030579, + "step": 4593 + }, + { + "epoch": 1.0585253456221198, + "grad_norm": 1.2521534530730631, + "learning_rate": 9.86285012160808e-07, + "loss": 0.7783857583999634, + "step": 4594 + }, + { + "epoch": 1.0587557603686637, + "grad_norm": 1.0517032997656734, + "learning_rate": 9.859040651698055e-07, + "loss": 0.7901174426078796, + "step": 4595 + }, + { + "epoch": 1.0589861751152074, + "grad_norm": 1.2211963787816231, + "learning_rate": 9.855231202248097e-07, + "loss": 0.9475124478340149, + "step": 4596 + }, + { + "epoch": 1.0592165898617512, + "grad_norm": 1.1872676544788658, + "learning_rate": 9.851421773811133e-07, + "loss": 0.8582692742347717, + "step": 4597 + }, + { + "epoch": 1.0594470046082949, + "grad_norm": 1.1723948726757356, + "learning_rate": 9.847612366940106e-07, + "loss": 0.7885586023330688, + "step": 4598 + }, + { + "epoch": 1.0596774193548386, + "grad_norm": 1.17635061110199, + "learning_rate": 9.843802982187943e-07, + "loss": 0.7981748580932617, + "step": 4599 + }, + { + "epoch": 1.0599078341013826, + "grad_norm": 0.9066343519689628, + "learning_rate": 9.839993620107563e-07, + "loss": 0.7060403823852539, + "step": 4600 + }, + { + "epoch": 1.0601382488479263, + "grad_norm": 1.2126688495293467, + "learning_rate": 9.836184281251905e-07, + "loss": 0.7902223467826843, + "step": 4601 + }, + { + "epoch": 1.06036866359447, + "grad_norm": 0.9972491115312556, + "learning_rate": 9.83237496617388e-07, + "loss": 0.7074719071388245, + "step": 4602 + }, + { + "epoch": 1.0605990783410137, + "grad_norm": 0.9455936494800175, + "learning_rate": 9.828565675426405e-07, + "loss": 0.7180163264274597, + "step": 4603 + }, + { + "epoch": 1.0608294930875577, + "grad_norm": 0.8990997781996365, + "learning_rate": 9.824756409562397e-07, + "loss": 0.7040787935256958, + "step": 4604 + }, + { + "epoch": 1.0610599078341014, + "grad_norm": 1.0311368456712493, + "learning_rate": 9.820947169134765e-07, + "loss": 0.8387063145637512, + "step": 4605 + }, + { + "epoch": 1.0612903225806452, + "grad_norm": 1.0692817612993422, + "learning_rate": 9.81713795469641e-07, + "loss": 0.8587188124656677, + "step": 4606 + }, + { + "epoch": 1.0615207373271889, + "grad_norm": 1.0418289468184643, + "learning_rate": 9.813328766800242e-07, + "loss": 0.729094386100769, + "step": 4607 + }, + { + "epoch": 1.0617511520737328, + "grad_norm": 1.1884134090864242, + "learning_rate": 9.809519605999158e-07, + "loss": 1.0576609373092651, + "step": 4608 + }, + { + "epoch": 1.0619815668202766, + "grad_norm": 1.1124938149620707, + "learning_rate": 9.805710472846044e-07, + "loss": 0.7605572938919067, + "step": 4609 + }, + { + "epoch": 1.0622119815668203, + "grad_norm": 0.9566684121068049, + "learning_rate": 9.801901367893807e-07, + "loss": 0.722477912902832, + "step": 4610 + }, + { + "epoch": 1.062442396313364, + "grad_norm": 0.9185071862681494, + "learning_rate": 9.79809229169532e-07, + "loss": 0.7335925698280334, + "step": 4611 + }, + { + "epoch": 1.0626728110599077, + "grad_norm": 1.0494538531790283, + "learning_rate": 9.794283244803466e-07, + "loss": 0.8116357922554016, + "step": 4612 + }, + { + "epoch": 1.0629032258064517, + "grad_norm": 1.0519905027101895, + "learning_rate": 9.79047422777113e-07, + "loss": 0.8004311323165894, + "step": 4613 + }, + { + "epoch": 1.0631336405529954, + "grad_norm": 0.9803128568921189, + "learning_rate": 9.786665241151185e-07, + "loss": 0.8198168277740479, + "step": 4614 + }, + { + "epoch": 1.0633640552995391, + "grad_norm": 0.9841178854805237, + "learning_rate": 9.782856285496494e-07, + "loss": 0.7031205892562866, + "step": 4615 + }, + { + "epoch": 1.0635944700460829, + "grad_norm": 1.055262322588535, + "learning_rate": 9.779047361359928e-07, + "loss": 0.7303737998008728, + "step": 4616 + }, + { + "epoch": 1.0638248847926268, + "grad_norm": 1.1694198331033647, + "learning_rate": 9.775238469294345e-07, + "loss": 0.8775424957275391, + "step": 4617 + }, + { + "epoch": 1.0640552995391706, + "grad_norm": 0.9013154484602001, + "learning_rate": 9.771429609852597e-07, + "loss": 0.7463759183883667, + "step": 4618 + }, + { + "epoch": 1.0642857142857143, + "grad_norm": 0.8792691967623277, + "learning_rate": 9.767620783587542e-07, + "loss": 0.7200205326080322, + "step": 4619 + }, + { + "epoch": 1.064516129032258, + "grad_norm": 0.9102194522316246, + "learning_rate": 9.763811991052019e-07, + "loss": 0.8255786299705505, + "step": 4620 + }, + { + "epoch": 1.064746543778802, + "grad_norm": 1.2552865619465912, + "learning_rate": 9.760003232798877e-07, + "loss": 0.7975195050239563, + "step": 4621 + }, + { + "epoch": 1.0649769585253457, + "grad_norm": 0.9993977940644363, + "learning_rate": 9.756194509380948e-07, + "loss": 0.6993064880371094, + "step": 4622 + }, + { + "epoch": 1.0652073732718894, + "grad_norm": 1.314757658160511, + "learning_rate": 9.752385821351062e-07, + "loss": 0.818634033203125, + "step": 4623 + }, + { + "epoch": 1.0654377880184331, + "grad_norm": 1.0949894149977886, + "learning_rate": 9.748577169262046e-07, + "loss": 0.707933783531189, + "step": 4624 + }, + { + "epoch": 1.0656682027649769, + "grad_norm": 1.1439419332653986, + "learning_rate": 9.744768553666723e-07, + "loss": 0.8133440017700195, + "step": 4625 + }, + { + "epoch": 1.0658986175115208, + "grad_norm": 1.1394394770433072, + "learning_rate": 9.740959975117901e-07, + "loss": 0.8818857669830322, + "step": 4626 + }, + { + "epoch": 1.0661290322580645, + "grad_norm": 0.9617616601353652, + "learning_rate": 9.737151434168402e-07, + "loss": 0.6057544946670532, + "step": 4627 + }, + { + "epoch": 1.0663594470046083, + "grad_norm": 1.047486055121172, + "learning_rate": 9.733342931371023e-07, + "loss": 0.7560185194015503, + "step": 4628 + }, + { + "epoch": 1.066589861751152, + "grad_norm": 1.233360971442642, + "learning_rate": 9.72953446727856e-07, + "loss": 0.8196524381637573, + "step": 4629 + }, + { + "epoch": 1.066820276497696, + "grad_norm": 1.031309795003994, + "learning_rate": 9.725726042443814e-07, + "loss": 0.8695862889289856, + "step": 4630 + }, + { + "epoch": 1.0670506912442397, + "grad_norm": 0.9769847065094724, + "learning_rate": 9.721917657419573e-07, + "loss": 0.7753207683563232, + "step": 4631 + }, + { + "epoch": 1.0672811059907834, + "grad_norm": 1.0908524037443617, + "learning_rate": 9.718109312758612e-07, + "loss": 0.8245481252670288, + "step": 4632 + }, + { + "epoch": 1.0675115207373271, + "grad_norm": 1.201628166799481, + "learning_rate": 9.71430100901371e-07, + "loss": 0.8654806613922119, + "step": 4633 + }, + { + "epoch": 1.067741935483871, + "grad_norm": 1.22982718965067, + "learning_rate": 9.710492746737642e-07, + "loss": 0.8667370080947876, + "step": 4634 + }, + { + "epoch": 1.0679723502304148, + "grad_norm": 1.2635323967888392, + "learning_rate": 9.706684526483167e-07, + "loss": 0.7786421775817871, + "step": 4635 + }, + { + "epoch": 1.0682027649769585, + "grad_norm": 1.037203898616246, + "learning_rate": 9.702876348803045e-07, + "loss": 0.7788090705871582, + "step": 4636 + }, + { + "epoch": 1.0684331797235023, + "grad_norm": 1.1815160856137523, + "learning_rate": 9.69906821425003e-07, + "loss": 0.812332034111023, + "step": 4637 + }, + { + "epoch": 1.068663594470046, + "grad_norm": 1.2578908038434822, + "learning_rate": 9.69526012337686e-07, + "loss": 0.7884202599525452, + "step": 4638 + }, + { + "epoch": 1.06889400921659, + "grad_norm": 1.0539526708204177, + "learning_rate": 9.69145207673628e-07, + "loss": 0.725990891456604, + "step": 4639 + }, + { + "epoch": 1.0691244239631337, + "grad_norm": 1.01343921612526, + "learning_rate": 9.687644074881028e-07, + "loss": 0.7277272343635559, + "step": 4640 + }, + { + "epoch": 1.0693548387096774, + "grad_norm": 1.0871506025213427, + "learning_rate": 9.683836118363818e-07, + "loss": 0.8081945180892944, + "step": 4641 + }, + { + "epoch": 1.0695852534562211, + "grad_norm": 1.1050642405984226, + "learning_rate": 9.680028207737383e-07, + "loss": 0.8633503913879395, + "step": 4642 + }, + { + "epoch": 1.069815668202765, + "grad_norm": 0.9415461517108813, + "learning_rate": 9.67622034355443e-07, + "loss": 0.7873313426971436, + "step": 4643 + }, + { + "epoch": 1.0700460829493088, + "grad_norm": 1.269353126640295, + "learning_rate": 9.67241252636766e-07, + "loss": 0.7927644848823547, + "step": 4644 + }, + { + "epoch": 1.0702764976958525, + "grad_norm": 1.395156348091843, + "learning_rate": 9.668604756729784e-07, + "loss": 0.9458138942718506, + "step": 4645 + }, + { + "epoch": 1.0705069124423963, + "grad_norm": 1.2621680271291411, + "learning_rate": 9.664797035193484e-07, + "loss": 0.7471280097961426, + "step": 4646 + }, + { + "epoch": 1.07073732718894, + "grad_norm": 1.0373772164844823, + "learning_rate": 9.660989362311455e-07, + "loss": 0.7666789293289185, + "step": 4647 + }, + { + "epoch": 1.070967741935484, + "grad_norm": 0.8355654249705468, + "learning_rate": 9.65718173863637e-07, + "loss": 0.7846331000328064, + "step": 4648 + }, + { + "epoch": 1.0711981566820277, + "grad_norm": 1.1393955111251446, + "learning_rate": 9.653374164720897e-07, + "loss": 0.7790371179580688, + "step": 4649 + }, + { + "epoch": 1.0714285714285714, + "grad_norm": 1.110758470727215, + "learning_rate": 9.64956664111771e-07, + "loss": 0.9056169986724854, + "step": 4650 + }, + { + "epoch": 1.0716589861751151, + "grad_norm": 0.84240400487228, + "learning_rate": 9.645759168379461e-07, + "loss": 0.6839256286621094, + "step": 4651 + }, + { + "epoch": 1.071889400921659, + "grad_norm": 1.377334701305697, + "learning_rate": 9.641951747058799e-07, + "loss": 0.7071784138679504, + "step": 4652 + }, + { + "epoch": 1.0721198156682028, + "grad_norm": 1.1683127374870803, + "learning_rate": 9.638144377708366e-07, + "loss": 0.8166929483413696, + "step": 4653 + }, + { + "epoch": 1.0723502304147465, + "grad_norm": 1.239204160701412, + "learning_rate": 9.6343370608808e-07, + "loss": 0.8013010621070862, + "step": 4654 + }, + { + "epoch": 1.0725806451612903, + "grad_norm": 1.0825444957318084, + "learning_rate": 9.630529797128722e-07, + "loss": 0.8157169818878174, + "step": 4655 + }, + { + "epoch": 1.072811059907834, + "grad_norm": 1.0890180382455945, + "learning_rate": 9.626722587004758e-07, + "loss": 0.6467397212982178, + "step": 4656 + }, + { + "epoch": 1.073041474654378, + "grad_norm": 0.840613071204114, + "learning_rate": 9.622915431061519e-07, + "loss": 0.6623806953430176, + "step": 4657 + }, + { + "epoch": 1.0732718894009217, + "grad_norm": 0.9242647901691624, + "learning_rate": 9.619108329851596e-07, + "loss": 0.8333703279495239, + "step": 4658 + }, + { + "epoch": 1.0735023041474654, + "grad_norm": 1.1552752606597634, + "learning_rate": 9.615301283927603e-07, + "loss": 0.8798840045928955, + "step": 4659 + }, + { + "epoch": 1.0737327188940091, + "grad_norm": 1.1547075721097313, + "learning_rate": 9.611494293842119e-07, + "loss": 0.8712242841720581, + "step": 4660 + }, + { + "epoch": 1.073963133640553, + "grad_norm": 1.030127804248938, + "learning_rate": 9.60768736014772e-07, + "loss": 0.720801591873169, + "step": 4661 + }, + { + "epoch": 1.0741935483870968, + "grad_norm": 1.0305643381766019, + "learning_rate": 9.603880483396983e-07, + "loss": 0.7974982857704163, + "step": 4662 + }, + { + "epoch": 1.0744239631336405, + "grad_norm": 1.1569753217458012, + "learning_rate": 9.600073664142471e-07, + "loss": 0.7656542062759399, + "step": 4663 + }, + { + "epoch": 1.0746543778801843, + "grad_norm": 1.2831377014983525, + "learning_rate": 9.596266902936737e-07, + "loss": 0.8274385333061218, + "step": 4664 + }, + { + "epoch": 1.0748847926267282, + "grad_norm": 1.1261587516242995, + "learning_rate": 9.592460200332328e-07, + "loss": 0.6508798599243164, + "step": 4665 + }, + { + "epoch": 1.075115207373272, + "grad_norm": 0.8712727383997491, + "learning_rate": 9.588653556881781e-07, + "loss": 0.6393407583236694, + "step": 4666 + }, + { + "epoch": 1.0753456221198157, + "grad_norm": 0.8300127743505744, + "learning_rate": 9.58484697313762e-07, + "loss": 0.7857781052589417, + "step": 4667 + }, + { + "epoch": 1.0755760368663594, + "grad_norm": 1.0591582120645788, + "learning_rate": 9.58104044965238e-07, + "loss": 0.7433615922927856, + "step": 4668 + }, + { + "epoch": 1.0758064516129031, + "grad_norm": 0.9252765779736452, + "learning_rate": 9.57723398697856e-07, + "loss": 0.6694349646568298, + "step": 4669 + }, + { + "epoch": 1.076036866359447, + "grad_norm": 1.06633744555344, + "learning_rate": 9.573427585668664e-07, + "loss": 0.7849506735801697, + "step": 4670 + }, + { + "epoch": 1.0762672811059908, + "grad_norm": 0.948086558097784, + "learning_rate": 9.569621246275194e-07, + "loss": 0.5924462080001831, + "step": 4671 + }, + { + "epoch": 1.0764976958525345, + "grad_norm": 1.0764379613448063, + "learning_rate": 9.565814969350628e-07, + "loss": 0.7679359316825867, + "step": 4672 + }, + { + "epoch": 1.0767281105990782, + "grad_norm": 0.8770076747846444, + "learning_rate": 9.562008755447444e-07, + "loss": 0.803286612033844, + "step": 4673 + }, + { + "epoch": 1.0769585253456222, + "grad_norm": 0.9139287879253918, + "learning_rate": 9.558202605118112e-07, + "loss": 0.6302975416183472, + "step": 4674 + }, + { + "epoch": 1.077188940092166, + "grad_norm": 1.1929014758233443, + "learning_rate": 9.554396518915085e-07, + "loss": 0.7441667914390564, + "step": 4675 + }, + { + "epoch": 1.0774193548387097, + "grad_norm": 1.1469726623234646, + "learning_rate": 9.550590497390815e-07, + "loss": 0.805221438407898, + "step": 4676 + }, + { + "epoch": 1.0776497695852534, + "grad_norm": 1.1540692428304171, + "learning_rate": 9.54678454109774e-07, + "loss": 0.9557743072509766, + "step": 4677 + }, + { + "epoch": 1.0778801843317973, + "grad_norm": 1.0781366924036009, + "learning_rate": 9.542978650588284e-07, + "loss": 0.7361980080604553, + "step": 4678 + }, + { + "epoch": 1.078110599078341, + "grad_norm": 1.2143012487351885, + "learning_rate": 9.539172826414876e-07, + "loss": 0.7474843263626099, + "step": 4679 + }, + { + "epoch": 1.0783410138248848, + "grad_norm": 1.0143818885553835, + "learning_rate": 9.535367069129923e-07, + "loss": 0.595927357673645, + "step": 4680 + }, + { + "epoch": 1.0785714285714285, + "grad_norm": 1.1128254146821686, + "learning_rate": 9.531561379285818e-07, + "loss": 0.894598126411438, + "step": 4681 + }, + { + "epoch": 1.0788018433179722, + "grad_norm": 1.3233034879697116, + "learning_rate": 9.527755757434966e-07, + "loss": 0.915902853012085, + "step": 4682 + }, + { + "epoch": 1.0790322580645162, + "grad_norm": 1.3436084997047495, + "learning_rate": 9.523950204129739e-07, + "loss": 0.8670432567596436, + "step": 4683 + }, + { + "epoch": 1.07926267281106, + "grad_norm": 1.119487791223308, + "learning_rate": 9.520144719922508e-07, + "loss": 0.7829893231391907, + "step": 4684 + }, + { + "epoch": 1.0794930875576036, + "grad_norm": 1.1633745895382166, + "learning_rate": 9.516339305365638e-07, + "loss": 0.6584970951080322, + "step": 4685 + }, + { + "epoch": 1.0797235023041474, + "grad_norm": 1.0240703451548752, + "learning_rate": 9.512533961011478e-07, + "loss": 0.7853457927703857, + "step": 4686 + }, + { + "epoch": 1.0799539170506913, + "grad_norm": 0.8755927642296618, + "learning_rate": 9.508728687412364e-07, + "loss": 0.7890632152557373, + "step": 4687 + }, + { + "epoch": 1.080184331797235, + "grad_norm": 1.1475809434863895, + "learning_rate": 9.504923485120634e-07, + "loss": 0.8281408548355103, + "step": 4688 + }, + { + "epoch": 1.0804147465437788, + "grad_norm": 0.9222741947208914, + "learning_rate": 9.501118354688605e-07, + "loss": 0.7878601551055908, + "step": 4689 + }, + { + "epoch": 1.0806451612903225, + "grad_norm": 1.3827368592572105, + "learning_rate": 9.497313296668582e-07, + "loss": 0.8332592844963074, + "step": 4690 + }, + { + "epoch": 1.0808755760368665, + "grad_norm": 1.0564274993228098, + "learning_rate": 9.493508311612874e-07, + "loss": 0.7680759429931641, + "step": 4691 + }, + { + "epoch": 1.0811059907834102, + "grad_norm": 0.9446139934289677, + "learning_rate": 9.489703400073762e-07, + "loss": 0.6368690729141235, + "step": 4692 + }, + { + "epoch": 1.081336405529954, + "grad_norm": 1.1588361552017052, + "learning_rate": 9.485898562603525e-07, + "loss": 0.7018477916717529, + "step": 4693 + }, + { + "epoch": 1.0815668202764976, + "grad_norm": 1.057066552712669, + "learning_rate": 9.482093799754432e-07, + "loss": 0.8494987487792969, + "step": 4694 + }, + { + "epoch": 1.0817972350230414, + "grad_norm": 1.0119994692546468, + "learning_rate": 9.478289112078736e-07, + "loss": 0.8146306276321411, + "step": 4695 + }, + { + "epoch": 1.0820276497695853, + "grad_norm": 1.054771760893497, + "learning_rate": 9.474484500128689e-07, + "loss": 0.7832612991333008, + "step": 4696 + }, + { + "epoch": 1.082258064516129, + "grad_norm": 1.0487197763357414, + "learning_rate": 9.470679964456519e-07, + "loss": 0.8569360971450806, + "step": 4697 + }, + { + "epoch": 1.0824884792626728, + "grad_norm": 1.1432115985173055, + "learning_rate": 9.466875505614449e-07, + "loss": 0.8145112991333008, + "step": 4698 + }, + { + "epoch": 1.0827188940092165, + "grad_norm": 1.0578814317560323, + "learning_rate": 9.463071124154697e-07, + "loss": 0.6632689237594604, + "step": 4699 + }, + { + "epoch": 1.0829493087557605, + "grad_norm": 1.1233922356996344, + "learning_rate": 9.459266820629461e-07, + "loss": 0.6299769878387451, + "step": 4700 + }, + { + "epoch": 1.0831797235023042, + "grad_norm": 1.0275349813599226, + "learning_rate": 9.455462595590925e-07, + "loss": 0.7722063064575195, + "step": 4701 + }, + { + "epoch": 1.083410138248848, + "grad_norm": 1.2023285008908922, + "learning_rate": 9.451658449591278e-07, + "loss": 0.8219027519226074, + "step": 4702 + }, + { + "epoch": 1.0836405529953916, + "grad_norm": 1.1618110682341312, + "learning_rate": 9.44785438318268e-07, + "loss": 0.9078400731086731, + "step": 4703 + }, + { + "epoch": 1.0838709677419356, + "grad_norm": 1.087404948952653, + "learning_rate": 9.444050396917286e-07, + "loss": 0.8062041997909546, + "step": 4704 + }, + { + "epoch": 1.0841013824884793, + "grad_norm": 0.9599318157385525, + "learning_rate": 9.440246491347242e-07, + "loss": 0.6379001140594482, + "step": 4705 + }, + { + "epoch": 1.084331797235023, + "grad_norm": 1.179840039843376, + "learning_rate": 9.436442667024679e-07, + "loss": 0.919986367225647, + "step": 4706 + }, + { + "epoch": 1.0845622119815668, + "grad_norm": 1.025427308273649, + "learning_rate": 9.432638924501715e-07, + "loss": 0.6534138917922974, + "step": 4707 + }, + { + "epoch": 1.0847926267281105, + "grad_norm": 1.1537368190719173, + "learning_rate": 9.428835264330462e-07, + "loss": 0.8340045809745789, + "step": 4708 + }, + { + "epoch": 1.0850230414746544, + "grad_norm": 1.2598648406656967, + "learning_rate": 9.425031687063014e-07, + "loss": 0.8347625732421875, + "step": 4709 + }, + { + "epoch": 1.0852534562211982, + "grad_norm": 1.080310831214647, + "learning_rate": 9.421228193251452e-07, + "loss": 0.807063639163971, + "step": 4710 + }, + { + "epoch": 1.085483870967742, + "grad_norm": 0.8480154931503633, + "learning_rate": 9.417424783447855e-07, + "loss": 0.7375985383987427, + "step": 4711 + }, + { + "epoch": 1.0857142857142856, + "grad_norm": 0.9219258926876724, + "learning_rate": 9.413621458204281e-07, + "loss": 0.5723168849945068, + "step": 4712 + }, + { + "epoch": 1.0859447004608296, + "grad_norm": 1.20469026899904, + "learning_rate": 9.409818218072772e-07, + "loss": 0.8272668123245239, + "step": 4713 + }, + { + "epoch": 1.0861751152073733, + "grad_norm": 1.0744380351617728, + "learning_rate": 9.406015063605368e-07, + "loss": 0.6400803327560425, + "step": 4714 + }, + { + "epoch": 1.086405529953917, + "grad_norm": 0.9959690478635643, + "learning_rate": 9.402211995354095e-07, + "loss": 0.6829795837402344, + "step": 4715 + }, + { + "epoch": 1.0866359447004608, + "grad_norm": 1.0434747079590168, + "learning_rate": 9.398409013870954e-07, + "loss": 0.8509865999221802, + "step": 4716 + }, + { + "epoch": 1.0868663594470047, + "grad_norm": 1.0730582514021882, + "learning_rate": 9.394606119707954e-07, + "loss": 0.895818829536438, + "step": 4717 + }, + { + "epoch": 1.0870967741935484, + "grad_norm": 1.2584943519033869, + "learning_rate": 9.390803313417072e-07, + "loss": 0.8534268140792847, + "step": 4718 + }, + { + "epoch": 1.0873271889400922, + "grad_norm": 1.0910485662903118, + "learning_rate": 9.38700059555028e-07, + "loss": 0.8603401184082031, + "step": 4719 + }, + { + "epoch": 1.087557603686636, + "grad_norm": 1.1060380385520165, + "learning_rate": 9.383197966659542e-07, + "loss": 0.8810417652130127, + "step": 4720 + }, + { + "epoch": 1.0877880184331796, + "grad_norm": 1.078874247367276, + "learning_rate": 9.3793954272968e-07, + "loss": 0.7144299149513245, + "step": 4721 + }, + { + "epoch": 1.0880184331797236, + "grad_norm": 1.3140311568193026, + "learning_rate": 9.375592978013994e-07, + "loss": 0.8780069351196289, + "step": 4722 + }, + { + "epoch": 1.0882488479262673, + "grad_norm": 1.1329108063995987, + "learning_rate": 9.371790619363041e-07, + "loss": 0.7976780533790588, + "step": 4723 + }, + { + "epoch": 1.088479262672811, + "grad_norm": 1.0979402846559465, + "learning_rate": 9.367988351895846e-07, + "loss": 0.9183385372161865, + "step": 4724 + }, + { + "epoch": 1.0887096774193548, + "grad_norm": 1.0551038276717553, + "learning_rate": 9.364186176164306e-07, + "loss": 0.7891188859939575, + "step": 4725 + }, + { + "epoch": 1.0889400921658987, + "grad_norm": 0.9930223107211231, + "learning_rate": 9.360384092720301e-07, + "loss": 0.7586535215377808, + "step": 4726 + }, + { + "epoch": 1.0891705069124424, + "grad_norm": 1.1542507976324667, + "learning_rate": 9.356582102115696e-07, + "loss": 0.7915316224098206, + "step": 4727 + }, + { + "epoch": 1.0894009216589862, + "grad_norm": 0.901378484170352, + "learning_rate": 9.352780204902349e-07, + "loss": 0.6608257293701172, + "step": 4728 + }, + { + "epoch": 1.08963133640553, + "grad_norm": 1.1982692712799377, + "learning_rate": 9.3489784016321e-07, + "loss": 0.8375273942947388, + "step": 4729 + }, + { + "epoch": 1.0898617511520738, + "grad_norm": 1.43591815259741, + "learning_rate": 9.345176692856768e-07, + "loss": 0.7629055976867676, + "step": 4730 + }, + { + "epoch": 1.0900921658986176, + "grad_norm": 1.3741081876453818, + "learning_rate": 9.341375079128177e-07, + "loss": 0.8037875890731812, + "step": 4731 + }, + { + "epoch": 1.0903225806451613, + "grad_norm": 1.1252370555828741, + "learning_rate": 9.337573560998123e-07, + "loss": 0.8843437433242798, + "step": 4732 + }, + { + "epoch": 1.090552995391705, + "grad_norm": 1.058447534132799, + "learning_rate": 9.333772139018387e-07, + "loss": 0.7164910435676575, + "step": 4733 + }, + { + "epoch": 1.0907834101382488, + "grad_norm": 1.144703504042011, + "learning_rate": 9.329970813740742e-07, + "loss": 0.8076978921890259, + "step": 4734 + }, + { + "epoch": 1.0910138248847927, + "grad_norm": 1.091507904535434, + "learning_rate": 9.326169585716949e-07, + "loss": 0.7265340089797974, + "step": 4735 + }, + { + "epoch": 1.0912442396313364, + "grad_norm": 0.9010611551057135, + "learning_rate": 9.322368455498747e-07, + "loss": 0.7438681125640869, + "step": 4736 + }, + { + "epoch": 1.0914746543778802, + "grad_norm": 1.455573835192626, + "learning_rate": 9.318567423637868e-07, + "loss": 0.8760604858398438, + "step": 4737 + }, + { + "epoch": 1.0917050691244239, + "grad_norm": 1.064698472707054, + "learning_rate": 9.314766490686026e-07, + "loss": 0.7216911315917969, + "step": 4738 + }, + { + "epoch": 1.0919354838709678, + "grad_norm": 1.207051606070953, + "learning_rate": 9.310965657194916e-07, + "loss": 0.8003707528114319, + "step": 4739 + }, + { + "epoch": 1.0921658986175116, + "grad_norm": 0.9484074376515712, + "learning_rate": 9.307164923716233e-07, + "loss": 0.6496548652648926, + "step": 4740 + }, + { + "epoch": 1.0923963133640553, + "grad_norm": 1.0304975730869472, + "learning_rate": 9.303364290801644e-07, + "loss": 0.7659108638763428, + "step": 4741 + }, + { + "epoch": 1.092626728110599, + "grad_norm": 1.016478094690519, + "learning_rate": 9.299563759002802e-07, + "loss": 0.7799512147903442, + "step": 4742 + }, + { + "epoch": 1.092857142857143, + "grad_norm": 0.9921566283768914, + "learning_rate": 9.295763328871357e-07, + "loss": 0.7675691246986389, + "step": 4743 + }, + { + "epoch": 1.0930875576036867, + "grad_norm": 1.0513054078420998, + "learning_rate": 9.291963000958931e-07, + "loss": 0.677080512046814, + "step": 4744 + }, + { + "epoch": 1.0933179723502304, + "grad_norm": 1.0842277521538888, + "learning_rate": 9.28816277581714e-07, + "loss": 0.7885928153991699, + "step": 4745 + }, + { + "epoch": 1.0935483870967742, + "grad_norm": 1.07543209238493, + "learning_rate": 9.28436265399758e-07, + "loss": 0.6568010449409485, + "step": 4746 + }, + { + "epoch": 1.0937788018433179, + "grad_norm": 1.076830779801181, + "learning_rate": 9.280562636051827e-07, + "loss": 0.9438225030899048, + "step": 4747 + }, + { + "epoch": 1.0940092165898618, + "grad_norm": 1.0420094595322553, + "learning_rate": 9.276762722531461e-07, + "loss": 0.8119498491287231, + "step": 4748 + }, + { + "epoch": 1.0942396313364056, + "grad_norm": 0.8228863679585698, + "learning_rate": 9.272962913988029e-07, + "loss": 0.7570452690124512, + "step": 4749 + }, + { + "epoch": 1.0944700460829493, + "grad_norm": 1.0990726312613297, + "learning_rate": 9.269163210973063e-07, + "loss": 0.7541190385818481, + "step": 4750 + }, + { + "epoch": 1.094700460829493, + "grad_norm": 1.015570437282189, + "learning_rate": 9.265363614038093e-07, + "loss": 0.6481921672821045, + "step": 4751 + }, + { + "epoch": 1.094930875576037, + "grad_norm": 1.1173263478947815, + "learning_rate": 9.261564123734623e-07, + "loss": 0.7997267246246338, + "step": 4752 + }, + { + "epoch": 1.0951612903225807, + "grad_norm": 1.4388540160892265, + "learning_rate": 9.25776474061414e-07, + "loss": 0.9093008637428284, + "step": 4753 + }, + { + "epoch": 1.0953917050691244, + "grad_norm": 1.3909093606880625, + "learning_rate": 9.253965465228122e-07, + "loss": 0.7609673142433167, + "step": 4754 + }, + { + "epoch": 1.0956221198156681, + "grad_norm": 1.311027419629587, + "learning_rate": 9.250166298128032e-07, + "loss": 0.8338878154754639, + "step": 4755 + }, + { + "epoch": 1.095852534562212, + "grad_norm": 1.1912490488387477, + "learning_rate": 9.246367239865308e-07, + "loss": 0.7503781318664551, + "step": 4756 + }, + { + "epoch": 1.0960829493087558, + "grad_norm": 1.0417471668794835, + "learning_rate": 9.242568290991384e-07, + "loss": 0.7630816698074341, + "step": 4757 + }, + { + "epoch": 1.0963133640552996, + "grad_norm": 1.4287601409586015, + "learning_rate": 9.238769452057671e-07, + "loss": 0.8026378154754639, + "step": 4758 + }, + { + "epoch": 1.0965437788018433, + "grad_norm": 1.0309152969100308, + "learning_rate": 9.234970723615558e-07, + "loss": 0.8256090879440308, + "step": 4759 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 1.1197681925892131, + "learning_rate": 9.231172106216437e-07, + "loss": 0.7331836223602295, + "step": 4760 + }, + { + "epoch": 1.097004608294931, + "grad_norm": 1.1300301361381715, + "learning_rate": 9.227373600411667e-07, + "loss": 0.886203944683075, + "step": 4761 + }, + { + "epoch": 1.0972350230414747, + "grad_norm": 1.113695044174903, + "learning_rate": 9.223575206752592e-07, + "loss": 0.7802814245223999, + "step": 4762 + }, + { + "epoch": 1.0974654377880184, + "grad_norm": 1.3075634566953063, + "learning_rate": 9.219776925790552e-07, + "loss": 0.9682798385620117, + "step": 4763 + }, + { + "epoch": 1.0976958525345621, + "grad_norm": 1.1689607681364365, + "learning_rate": 9.215978758076858e-07, + "loss": 0.8733793497085571, + "step": 4764 + }, + { + "epoch": 1.097926267281106, + "grad_norm": 1.0890238577837303, + "learning_rate": 9.212180704162809e-07, + "loss": 0.8403818607330322, + "step": 4765 + }, + { + "epoch": 1.0981566820276498, + "grad_norm": 1.0898706001284595, + "learning_rate": 9.208382764599688e-07, + "loss": 0.7957059144973755, + "step": 4766 + }, + { + "epoch": 1.0983870967741935, + "grad_norm": 1.290224136897281, + "learning_rate": 9.204584939938761e-07, + "loss": 0.8943477272987366, + "step": 4767 + }, + { + "epoch": 1.0986175115207373, + "grad_norm": 1.0710230295284595, + "learning_rate": 9.200787230731273e-07, + "loss": 0.7084406018257141, + "step": 4768 + }, + { + "epoch": 1.098847926267281, + "grad_norm": 1.190836398847277, + "learning_rate": 9.196989637528465e-07, + "loss": 0.8374637365341187, + "step": 4769 + }, + { + "epoch": 1.099078341013825, + "grad_norm": 1.3757022429132086, + "learning_rate": 9.193192160881543e-07, + "loss": 0.6963578462600708, + "step": 4770 + }, + { + "epoch": 1.0993087557603687, + "grad_norm": 0.9887346096468936, + "learning_rate": 9.189394801341716e-07, + "loss": 0.6732540130615234, + "step": 4771 + }, + { + "epoch": 1.0995391705069124, + "grad_norm": 1.092710990198668, + "learning_rate": 9.185597559460159e-07, + "loss": 0.7104849219322205, + "step": 4772 + }, + { + "epoch": 1.0997695852534561, + "grad_norm": 1.3885045688613133, + "learning_rate": 9.181800435788037e-07, + "loss": 0.8461153507232666, + "step": 4773 + }, + { + "epoch": 1.1, + "grad_norm": 1.0447899457724443, + "learning_rate": 9.178003430876502e-07, + "loss": 0.7120847105979919, + "step": 4774 + }, + { + "epoch": 1.1002304147465438, + "grad_norm": 1.0881207229188647, + "learning_rate": 9.174206545276677e-07, + "loss": 0.8108617067337036, + "step": 4775 + }, + { + "epoch": 1.1004608294930875, + "grad_norm": 0.9153115264713604, + "learning_rate": 9.170409779539678e-07, + "loss": 0.7019558548927307, + "step": 4776 + }, + { + "epoch": 1.1006912442396313, + "grad_norm": 0.9272452690627847, + "learning_rate": 9.166613134216605e-07, + "loss": 0.7563629150390625, + "step": 4777 + }, + { + "epoch": 1.100921658986175, + "grad_norm": 0.9795708897837844, + "learning_rate": 9.162816609858533e-07, + "loss": 0.777009129524231, + "step": 4778 + }, + { + "epoch": 1.101152073732719, + "grad_norm": 1.143317572483065, + "learning_rate": 9.159020207016516e-07, + "loss": 0.812334418296814, + "step": 4779 + }, + { + "epoch": 1.1013824884792627, + "grad_norm": 0.8685579046345627, + "learning_rate": 9.155223926241608e-07, + "loss": 0.609114408493042, + "step": 4780 + }, + { + "epoch": 1.1016129032258064, + "grad_norm": 1.1689773804888128, + "learning_rate": 9.151427768084828e-07, + "loss": 0.8277549147605896, + "step": 4781 + }, + { + "epoch": 1.1018433179723501, + "grad_norm": 1.2556834532396843, + "learning_rate": 9.147631733097179e-07, + "loss": 0.8649400472640991, + "step": 4782 + }, + { + "epoch": 1.102073732718894, + "grad_norm": 0.8878271909604711, + "learning_rate": 9.14383582182966e-07, + "loss": 0.7894293665885925, + "step": 4783 + }, + { + "epoch": 1.1023041474654378, + "grad_norm": 1.3844953995401048, + "learning_rate": 9.14004003483324e-07, + "loss": 0.9121778011322021, + "step": 4784 + }, + { + "epoch": 1.1025345622119815, + "grad_norm": 1.0899535734318635, + "learning_rate": 9.136244372658867e-07, + "loss": 0.7162299156188965, + "step": 4785 + }, + { + "epoch": 1.1027649769585253, + "grad_norm": 1.1193596859001855, + "learning_rate": 9.132448835857482e-07, + "loss": 0.7059808969497681, + "step": 4786 + }, + { + "epoch": 1.1029953917050692, + "grad_norm": 1.2034226051758443, + "learning_rate": 9.128653424979999e-07, + "loss": 0.8172405958175659, + "step": 4787 + }, + { + "epoch": 1.103225806451613, + "grad_norm": 0.876114016677297, + "learning_rate": 9.124858140577316e-07, + "loss": 0.7672706842422485, + "step": 4788 + }, + { + "epoch": 1.1034562211981567, + "grad_norm": 1.2578760464526295, + "learning_rate": 9.121062983200318e-07, + "loss": 0.7054900527000427, + "step": 4789 + }, + { + "epoch": 1.1036866359447004, + "grad_norm": 1.0063162295686867, + "learning_rate": 9.117267953399865e-07, + "loss": 0.888538122177124, + "step": 4790 + }, + { + "epoch": 1.1039170506912441, + "grad_norm": 1.1758406583219614, + "learning_rate": 9.113473051726796e-07, + "loss": 0.7918668985366821, + "step": 4791 + }, + { + "epoch": 1.104147465437788, + "grad_norm": 1.220328177578168, + "learning_rate": 9.109678278731942e-07, + "loss": 0.7385697960853577, + "step": 4792 + }, + { + "epoch": 1.1043778801843318, + "grad_norm": 1.0627777124669568, + "learning_rate": 9.105883634966107e-07, + "loss": 0.6394056081771851, + "step": 4793 + }, + { + "epoch": 1.1046082949308755, + "grad_norm": 1.2147960582385422, + "learning_rate": 9.102089120980081e-07, + "loss": 0.8372077941894531, + "step": 4794 + }, + { + "epoch": 1.1048387096774193, + "grad_norm": 1.0764884273918471, + "learning_rate": 9.098294737324628e-07, + "loss": 0.6944066286087036, + "step": 4795 + }, + { + "epoch": 1.1050691244239632, + "grad_norm": 1.3210680270500303, + "learning_rate": 9.0945004845505e-07, + "loss": 0.8480994701385498, + "step": 4796 + }, + { + "epoch": 1.105299539170507, + "grad_norm": 1.3778825395187644, + "learning_rate": 9.090706363208431e-07, + "loss": 0.837437629699707, + "step": 4797 + }, + { + "epoch": 1.1055299539170507, + "grad_norm": 1.2126670676110476, + "learning_rate": 9.086912373849128e-07, + "loss": 0.8610002398490906, + "step": 4798 + }, + { + "epoch": 1.1057603686635944, + "grad_norm": 1.1204211704902753, + "learning_rate": 9.083118517023281e-07, + "loss": 0.7323784828186035, + "step": 4799 + }, + { + "epoch": 1.1059907834101383, + "grad_norm": 1.394483021595883, + "learning_rate": 9.079324793281573e-07, + "loss": 0.7838932871818542, + "step": 4800 + }, + { + "epoch": 1.106221198156682, + "grad_norm": 1.1333807320340106, + "learning_rate": 9.075531203174651e-07, + "loss": 0.7655705213546753, + "step": 4801 + }, + { + "epoch": 1.1064516129032258, + "grad_norm": 1.199812107745982, + "learning_rate": 9.071737747253148e-07, + "loss": 0.8320151567459106, + "step": 4802 + }, + { + "epoch": 1.1066820276497695, + "grad_norm": 1.0428789095876687, + "learning_rate": 9.067944426067687e-07, + "loss": 0.7434612512588501, + "step": 4803 + }, + { + "epoch": 1.1069124423963133, + "grad_norm": 1.348302596081637, + "learning_rate": 9.064151240168857e-07, + "loss": 0.8351321220397949, + "step": 4804 + }, + { + "epoch": 1.1071428571428572, + "grad_norm": 0.9731377071478325, + "learning_rate": 9.060358190107233e-07, + "loss": 0.6648053526878357, + "step": 4805 + }, + { + "epoch": 1.107373271889401, + "grad_norm": 1.236779616553706, + "learning_rate": 9.056565276433377e-07, + "loss": 0.7507585287094116, + "step": 4806 + }, + { + "epoch": 1.1076036866359447, + "grad_norm": 1.0866303306873377, + "learning_rate": 9.052772499697823e-07, + "loss": 0.7638635635375977, + "step": 4807 + }, + { + "epoch": 1.1078341013824884, + "grad_norm": 1.3204341922490346, + "learning_rate": 9.048979860451081e-07, + "loss": 0.8066626191139221, + "step": 4808 + }, + { + "epoch": 1.1080645161290323, + "grad_norm": 0.9459322006964221, + "learning_rate": 9.045187359243659e-07, + "loss": 0.7090466022491455, + "step": 4809 + }, + { + "epoch": 1.108294930875576, + "grad_norm": 1.1112578831827626, + "learning_rate": 9.041394996626027e-07, + "loss": 0.7071142792701721, + "step": 4810 + }, + { + "epoch": 1.1085253456221198, + "grad_norm": 1.0134445673972028, + "learning_rate": 9.037602773148638e-07, + "loss": 0.7103942036628723, + "step": 4811 + }, + { + "epoch": 1.1087557603686635, + "grad_norm": 1.1348721368793189, + "learning_rate": 9.033810689361936e-07, + "loss": 0.8408492207527161, + "step": 4812 + }, + { + "epoch": 1.1089861751152075, + "grad_norm": 0.9439878571651674, + "learning_rate": 9.030018745816335e-07, + "loss": 0.7621495723724365, + "step": 4813 + }, + { + "epoch": 1.1092165898617512, + "grad_norm": 1.152461687801826, + "learning_rate": 9.026226943062225e-07, + "loss": 0.7105196714401245, + "step": 4814 + }, + { + "epoch": 1.109447004608295, + "grad_norm": 1.079152769158689, + "learning_rate": 9.022435281649986e-07, + "loss": 0.8733636140823364, + "step": 4815 + }, + { + "epoch": 1.1096774193548387, + "grad_norm": 1.223534472251507, + "learning_rate": 9.018643762129974e-07, + "loss": 0.9097845554351807, + "step": 4816 + }, + { + "epoch": 1.1099078341013824, + "grad_norm": 1.2220607424054495, + "learning_rate": 9.014852385052519e-07, + "loss": 0.8743059635162354, + "step": 4817 + }, + { + "epoch": 1.1101382488479263, + "grad_norm": 1.0404677289419784, + "learning_rate": 9.011061150967937e-07, + "loss": 0.7898736000061035, + "step": 4818 + }, + { + "epoch": 1.11036866359447, + "grad_norm": 1.1698125073586854, + "learning_rate": 9.007270060426516e-07, + "loss": 0.871254563331604, + "step": 4819 + }, + { + "epoch": 1.1105990783410138, + "grad_norm": 1.323286168379092, + "learning_rate": 9.003479113978536e-07, + "loss": 0.6833579540252686, + "step": 4820 + }, + { + "epoch": 1.1108294930875575, + "grad_norm": 1.285642784687423, + "learning_rate": 8.999688312174243e-07, + "loss": 0.8289071321487427, + "step": 4821 + }, + { + "epoch": 1.1110599078341015, + "grad_norm": 1.1884737282905606, + "learning_rate": 8.995897655563864e-07, + "loss": 0.6798583269119263, + "step": 4822 + }, + { + "epoch": 1.1112903225806452, + "grad_norm": 1.1108358813410262, + "learning_rate": 8.992107144697614e-07, + "loss": 0.6518250703811646, + "step": 4823 + }, + { + "epoch": 1.111520737327189, + "grad_norm": 1.3596600109698966, + "learning_rate": 8.988316780125679e-07, + "loss": 0.9316667318344116, + "step": 4824 + }, + { + "epoch": 1.1117511520737327, + "grad_norm": 0.9951654747842746, + "learning_rate": 8.98452656239822e-07, + "loss": 0.755483865737915, + "step": 4825 + }, + { + "epoch": 1.1119815668202766, + "grad_norm": 1.0146600815927005, + "learning_rate": 8.980736492065391e-07, + "loss": 0.7892755270004272, + "step": 4826 + }, + { + "epoch": 1.1122119815668203, + "grad_norm": 0.9930161298314518, + "learning_rate": 8.976946569677308e-07, + "loss": 0.703255295753479, + "step": 4827 + }, + { + "epoch": 1.112442396313364, + "grad_norm": 1.1559327578235137, + "learning_rate": 8.973156795784073e-07, + "loss": 0.7885171175003052, + "step": 4828 + }, + { + "epoch": 1.1126728110599078, + "grad_norm": 1.1407519814570228, + "learning_rate": 8.969367170935776e-07, + "loss": 0.8035199642181396, + "step": 4829 + }, + { + "epoch": 1.1129032258064515, + "grad_norm": 1.0245821351407076, + "learning_rate": 8.965577695682467e-07, + "loss": 0.8272112607955933, + "step": 4830 + }, + { + "epoch": 1.1131336405529955, + "grad_norm": 1.1104598721433627, + "learning_rate": 8.961788370574182e-07, + "loss": 0.8734478950500488, + "step": 4831 + }, + { + "epoch": 1.1133640552995392, + "grad_norm": 1.2722110058519596, + "learning_rate": 8.957999196160946e-07, + "loss": 0.7487469911575317, + "step": 4832 + }, + { + "epoch": 1.113594470046083, + "grad_norm": 1.3783344397611896, + "learning_rate": 8.954210172992748e-07, + "loss": 0.9193693399429321, + "step": 4833 + }, + { + "epoch": 1.1138248847926266, + "grad_norm": 1.4522583636726432, + "learning_rate": 8.950421301619555e-07, + "loss": 0.8228428959846497, + "step": 4834 + }, + { + "epoch": 1.1140552995391706, + "grad_norm": 0.9646412535671615, + "learning_rate": 8.946632582591324e-07, + "loss": 0.7419015169143677, + "step": 4835 + }, + { + "epoch": 1.1142857142857143, + "grad_norm": 1.1957500872812925, + "learning_rate": 8.942844016457975e-07, + "loss": 0.827411949634552, + "step": 4836 + }, + { + "epoch": 1.114516129032258, + "grad_norm": 0.9975223373000859, + "learning_rate": 8.93905560376942e-07, + "loss": 0.7066754102706909, + "step": 4837 + }, + { + "epoch": 1.1147465437788018, + "grad_norm": 1.2336329306802043, + "learning_rate": 8.93526734507554e-07, + "loss": 0.7201621532440186, + "step": 4838 + }, + { + "epoch": 1.1149769585253457, + "grad_norm": 0.8521980282185057, + "learning_rate": 8.931479240926196e-07, + "loss": 0.6363521814346313, + "step": 4839 + }, + { + "epoch": 1.1152073732718895, + "grad_norm": 1.0065898101647581, + "learning_rate": 8.927691291871223e-07, + "loss": 0.8232909440994263, + "step": 4840 + }, + { + "epoch": 1.1154377880184332, + "grad_norm": 1.0354249430711853, + "learning_rate": 8.923903498460441e-07, + "loss": 0.7006033658981323, + "step": 4841 + }, + { + "epoch": 1.115668202764977, + "grad_norm": 1.1957171429651339, + "learning_rate": 8.920115861243638e-07, + "loss": 0.6982721090316772, + "step": 4842 + }, + { + "epoch": 1.1158986175115206, + "grad_norm": 1.039109039901578, + "learning_rate": 8.916328380770593e-07, + "loss": 0.7735922336578369, + "step": 4843 + }, + { + "epoch": 1.1161290322580646, + "grad_norm": 1.189307260310029, + "learning_rate": 8.912541057591049e-07, + "loss": 0.7430423498153687, + "step": 4844 + }, + { + "epoch": 1.1163594470046083, + "grad_norm": 1.0189703427385546, + "learning_rate": 8.908753892254729e-07, + "loss": 0.7783932685852051, + "step": 4845 + }, + { + "epoch": 1.116589861751152, + "grad_norm": 0.895546986970967, + "learning_rate": 8.904966885311339e-07, + "loss": 0.726211428642273, + "step": 4846 + }, + { + "epoch": 1.1168202764976958, + "grad_norm": 1.0042101088511581, + "learning_rate": 8.901180037310555e-07, + "loss": 0.664351761341095, + "step": 4847 + }, + { + "epoch": 1.1170506912442397, + "grad_norm": 1.192545271664204, + "learning_rate": 8.897393348802031e-07, + "loss": 0.8246554136276245, + "step": 4848 + }, + { + "epoch": 1.1172811059907835, + "grad_norm": 1.3113785088290244, + "learning_rate": 8.893606820335405e-07, + "loss": 0.9435447454452515, + "step": 4849 + }, + { + "epoch": 1.1175115207373272, + "grad_norm": 1.1196400925650334, + "learning_rate": 8.889820452460286e-07, + "loss": 0.8471171855926514, + "step": 4850 + }, + { + "epoch": 1.117741935483871, + "grad_norm": 0.9950597161448561, + "learning_rate": 8.886034245726254e-07, + "loss": 0.6038233041763306, + "step": 4851 + }, + { + "epoch": 1.1179723502304149, + "grad_norm": 1.1171540360532777, + "learning_rate": 8.882248200682881e-07, + "loss": 0.8186997771263123, + "step": 4852 + }, + { + "epoch": 1.1182027649769586, + "grad_norm": 1.2436642718372632, + "learning_rate": 8.878462317879702e-07, + "loss": 0.789948582649231, + "step": 4853 + }, + { + "epoch": 1.1184331797235023, + "grad_norm": 1.0789321556804603, + "learning_rate": 8.87467659786623e-07, + "loss": 0.7543652057647705, + "step": 4854 + }, + { + "epoch": 1.118663594470046, + "grad_norm": 1.0717127208024606, + "learning_rate": 8.870891041191963e-07, + "loss": 0.5985269546508789, + "step": 4855 + }, + { + "epoch": 1.1188940092165898, + "grad_norm": 1.109115113465042, + "learning_rate": 8.867105648406364e-07, + "loss": 0.7676643133163452, + "step": 4856 + }, + { + "epoch": 1.1191244239631337, + "grad_norm": 1.0078052507528568, + "learning_rate": 8.863320420058881e-07, + "loss": 0.7317303419113159, + "step": 4857 + }, + { + "epoch": 1.1193548387096774, + "grad_norm": 1.117240479042085, + "learning_rate": 8.859535356698936e-07, + "loss": 0.8357843160629272, + "step": 4858 + }, + { + "epoch": 1.1195852534562212, + "grad_norm": 1.2827717071860176, + "learning_rate": 8.855750458875923e-07, + "loss": 0.7149945497512817, + "step": 4859 + }, + { + "epoch": 1.119815668202765, + "grad_norm": 1.1258754685876486, + "learning_rate": 8.851965727139214e-07, + "loss": 0.7059169411659241, + "step": 4860 + }, + { + "epoch": 1.1200460829493089, + "grad_norm": 1.0779991100813224, + "learning_rate": 8.848181162038163e-07, + "loss": 0.7530190944671631, + "step": 4861 + }, + { + "epoch": 1.1202764976958526, + "grad_norm": 1.12578616970897, + "learning_rate": 8.844396764122092e-07, + "loss": 0.808814287185669, + "step": 4862 + }, + { + "epoch": 1.1205069124423963, + "grad_norm": 1.174668121226261, + "learning_rate": 8.840612533940295e-07, + "loss": 0.7205604910850525, + "step": 4863 + }, + { + "epoch": 1.12073732718894, + "grad_norm": 1.0284636891818573, + "learning_rate": 8.83682847204206e-07, + "loss": 0.7493274211883545, + "step": 4864 + }, + { + "epoch": 1.120967741935484, + "grad_norm": 1.1974475439930412, + "learning_rate": 8.833044578976631e-07, + "loss": 0.8115849494934082, + "step": 4865 + }, + { + "epoch": 1.1211981566820277, + "grad_norm": 1.2224514970634248, + "learning_rate": 8.829260855293237e-07, + "loss": 0.8188419342041016, + "step": 4866 + }, + { + "epoch": 1.1214285714285714, + "grad_norm": 1.372584236180193, + "learning_rate": 8.82547730154108e-07, + "loss": 0.6152349710464478, + "step": 4867 + }, + { + "epoch": 1.1216589861751152, + "grad_norm": 0.9364210771252817, + "learning_rate": 8.821693918269333e-07, + "loss": 0.7629969120025635, + "step": 4868 + }, + { + "epoch": 1.121889400921659, + "grad_norm": 1.0637191210851928, + "learning_rate": 8.81791070602716e-07, + "loss": 0.7063733339309692, + "step": 4869 + }, + { + "epoch": 1.1221198156682028, + "grad_norm": 1.2221996591019166, + "learning_rate": 8.814127665363682e-07, + "loss": 0.729676365852356, + "step": 4870 + }, + { + "epoch": 1.1223502304147466, + "grad_norm": 1.2363948838699006, + "learning_rate": 8.810344796827999e-07, + "loss": 0.8188877105712891, + "step": 4871 + }, + { + "epoch": 1.1225806451612903, + "grad_norm": 1.4364824515163135, + "learning_rate": 8.806562100969199e-07, + "loss": 0.70793217420578, + "step": 4872 + }, + { + "epoch": 1.122811059907834, + "grad_norm": 1.2471671753090219, + "learning_rate": 8.802779578336329e-07, + "loss": 0.8086484670639038, + "step": 4873 + }, + { + "epoch": 1.123041474654378, + "grad_norm": 1.209058465827679, + "learning_rate": 8.798997229478417e-07, + "loss": 0.8954081535339355, + "step": 4874 + }, + { + "epoch": 1.1232718894009217, + "grad_norm": 1.0352094557860352, + "learning_rate": 8.795215054944469e-07, + "loss": 0.6615205407142639, + "step": 4875 + }, + { + "epoch": 1.1235023041474654, + "grad_norm": 1.3182700744777898, + "learning_rate": 8.79143305528346e-07, + "loss": 0.6851116418838501, + "step": 4876 + }, + { + "epoch": 1.1237327188940092, + "grad_norm": 0.9311237252586447, + "learning_rate": 8.787651231044342e-07, + "loss": 0.7594672441482544, + "step": 4877 + }, + { + "epoch": 1.123963133640553, + "grad_norm": 1.2505187148095604, + "learning_rate": 8.783869582776044e-07, + "loss": 0.7170572280883789, + "step": 4878 + }, + { + "epoch": 1.1241935483870968, + "grad_norm": 1.1244851690255748, + "learning_rate": 8.780088111027467e-07, + "loss": 0.9139137864112854, + "step": 4879 + }, + { + "epoch": 1.1244239631336406, + "grad_norm": 1.2468380143920514, + "learning_rate": 8.776306816347482e-07, + "loss": 0.8716791868209839, + "step": 4880 + }, + { + "epoch": 1.1246543778801843, + "grad_norm": 1.5043743610246187, + "learning_rate": 8.772525699284946e-07, + "loss": 0.840330958366394, + "step": 4881 + }, + { + "epoch": 1.124884792626728, + "grad_norm": 1.28802116274467, + "learning_rate": 8.768744760388681e-07, + "loss": 0.7713445425033569, + "step": 4882 + }, + { + "epoch": 1.125115207373272, + "grad_norm": 1.2058132743835892, + "learning_rate": 8.764964000207479e-07, + "loss": 0.8964767456054688, + "step": 4883 + }, + { + "epoch": 1.1253456221198157, + "grad_norm": 1.12361515551762, + "learning_rate": 8.761183419290121e-07, + "loss": 0.8038421869277954, + "step": 4884 + }, + { + "epoch": 1.1255760368663594, + "grad_norm": 0.7722654284456119, + "learning_rate": 8.757403018185351e-07, + "loss": 0.6601011753082275, + "step": 4885 + }, + { + "epoch": 1.1258064516129032, + "grad_norm": 0.8011265369746955, + "learning_rate": 8.753622797441885e-07, + "loss": 0.8226664066314697, + "step": 4886 + }, + { + "epoch": 1.1260368663594469, + "grad_norm": 1.0633366554284305, + "learning_rate": 8.749842757608422e-07, + "loss": 0.7062248587608337, + "step": 4887 + }, + { + "epoch": 1.1262672811059908, + "grad_norm": 1.318395948514478, + "learning_rate": 8.746062899233628e-07, + "loss": 0.8642051815986633, + "step": 4888 + }, + { + "epoch": 1.1264976958525346, + "grad_norm": 1.2332349128972684, + "learning_rate": 8.74228322286614e-07, + "loss": 0.8194048404693604, + "step": 4889 + }, + { + "epoch": 1.1267281105990783, + "grad_norm": 1.121678775220638, + "learning_rate": 8.738503729054583e-07, + "loss": 0.6957820653915405, + "step": 4890 + }, + { + "epoch": 1.1269585253456222, + "grad_norm": 0.9775692035561586, + "learning_rate": 8.734724418347537e-07, + "loss": 0.8107770681381226, + "step": 4891 + }, + { + "epoch": 1.127188940092166, + "grad_norm": 1.1508754542191086, + "learning_rate": 8.730945291293563e-07, + "loss": 0.7727551460266113, + "step": 4892 + }, + { + "epoch": 1.1274193548387097, + "grad_norm": 1.1347047929449647, + "learning_rate": 8.727166348441207e-07, + "loss": 0.7389936447143555, + "step": 4893 + }, + { + "epoch": 1.1276497695852534, + "grad_norm": 1.2733389095695957, + "learning_rate": 8.723387590338964e-07, + "loss": 0.7666463851928711, + "step": 4894 + }, + { + "epoch": 1.1278801843317972, + "grad_norm": 1.1990629153183452, + "learning_rate": 8.719609017535328e-07, + "loss": 0.7795453071594238, + "step": 4895 + }, + { + "epoch": 1.128110599078341, + "grad_norm": 1.1062968437903737, + "learning_rate": 8.715830630578746e-07, + "loss": 0.8560752272605896, + "step": 4896 + }, + { + "epoch": 1.1283410138248848, + "grad_norm": 1.2251043883259816, + "learning_rate": 8.712052430017645e-07, + "loss": 0.7574455738067627, + "step": 4897 + }, + { + "epoch": 1.1285714285714286, + "grad_norm": 1.3025894471719623, + "learning_rate": 8.708274416400432e-07, + "loss": 0.8017276525497437, + "step": 4898 + }, + { + "epoch": 1.1288018433179723, + "grad_norm": 0.9942840399227726, + "learning_rate": 8.704496590275477e-07, + "loss": 0.7046157121658325, + "step": 4899 + }, + { + "epoch": 1.129032258064516, + "grad_norm": 1.187705347283351, + "learning_rate": 8.700718952191124e-07, + "loss": 0.7352035641670227, + "step": 4900 + }, + { + "epoch": 1.12926267281106, + "grad_norm": 0.9471130432852718, + "learning_rate": 8.696941502695698e-07, + "loss": 0.6444690227508545, + "step": 4901 + }, + { + "epoch": 1.1294930875576037, + "grad_norm": 1.0628821586759927, + "learning_rate": 8.69316424233749e-07, + "loss": 0.7909440994262695, + "step": 4902 + }, + { + "epoch": 1.1297235023041474, + "grad_norm": 0.9483928902743061, + "learning_rate": 8.689387171664756e-07, + "loss": 0.646790087223053, + "step": 4903 + }, + { + "epoch": 1.1299539170506911, + "grad_norm": 1.2796319408131067, + "learning_rate": 8.685610291225744e-07, + "loss": 0.786831796169281, + "step": 4904 + }, + { + "epoch": 1.130184331797235, + "grad_norm": 1.143272972798168, + "learning_rate": 8.681833601568657e-07, + "loss": 0.8004348278045654, + "step": 4905 + }, + { + "epoch": 1.1304147465437788, + "grad_norm": 0.996600703731369, + "learning_rate": 8.678057103241677e-07, + "loss": 0.6846532821655273, + "step": 4906 + }, + { + "epoch": 1.1306451612903226, + "grad_norm": 1.299426572962062, + "learning_rate": 8.67428079679296e-07, + "loss": 0.7555707693099976, + "step": 4907 + }, + { + "epoch": 1.1308755760368663, + "grad_norm": 1.3809719247833205, + "learning_rate": 8.67050468277063e-07, + "loss": 0.852725625038147, + "step": 4908 + }, + { + "epoch": 1.1311059907834102, + "grad_norm": 0.9844151846464619, + "learning_rate": 8.666728761722782e-07, + "loss": 0.6990044713020325, + "step": 4909 + }, + { + "epoch": 1.131336405529954, + "grad_norm": 1.223366973696945, + "learning_rate": 8.662953034197493e-07, + "loss": 0.8050999641418457, + "step": 4910 + }, + { + "epoch": 1.1315668202764977, + "grad_norm": 1.3085197840977536, + "learning_rate": 8.659177500742802e-07, + "loss": 0.8169291019439697, + "step": 4911 + }, + { + "epoch": 1.1317972350230414, + "grad_norm": 1.081294035300873, + "learning_rate": 8.655402161906716e-07, + "loss": 0.7814679145812988, + "step": 4912 + }, + { + "epoch": 1.1320276497695851, + "grad_norm": 1.237970773045493, + "learning_rate": 8.651627018237231e-07, + "loss": 0.6734834313392639, + "step": 4913 + }, + { + "epoch": 1.132258064516129, + "grad_norm": 1.1143770605215586, + "learning_rate": 8.647852070282299e-07, + "loss": 0.8765416145324707, + "step": 4914 + }, + { + "epoch": 1.1324884792626728, + "grad_norm": 1.3797966848789986, + "learning_rate": 8.644077318589847e-07, + "loss": 1.0023764371871948, + "step": 4915 + }, + { + "epoch": 1.1327188940092165, + "grad_norm": 1.0387287080137257, + "learning_rate": 8.64030276370778e-07, + "loss": 0.7561393976211548, + "step": 4916 + }, + { + "epoch": 1.1329493087557603, + "grad_norm": 1.123376400728965, + "learning_rate": 8.636528406183961e-07, + "loss": 0.8252062797546387, + "step": 4917 + }, + { + "epoch": 1.1331797235023042, + "grad_norm": 1.3939443114820729, + "learning_rate": 8.632754246566246e-07, + "loss": 0.7598097324371338, + "step": 4918 + }, + { + "epoch": 1.133410138248848, + "grad_norm": 0.8823184534346743, + "learning_rate": 8.628980285402438e-07, + "loss": 0.6113640069961548, + "step": 4919 + }, + { + "epoch": 1.1336405529953917, + "grad_norm": 1.096652563873467, + "learning_rate": 8.625206523240325e-07, + "loss": 0.7457853555679321, + "step": 4920 + }, + { + "epoch": 1.1338709677419354, + "grad_norm": 1.0304826450193199, + "learning_rate": 8.62143296062767e-07, + "loss": 0.7334161996841431, + "step": 4921 + }, + { + "epoch": 1.1341013824884794, + "grad_norm": 1.1383631487720753, + "learning_rate": 8.617659598112195e-07, + "loss": 0.7446962594985962, + "step": 4922 + }, + { + "epoch": 1.134331797235023, + "grad_norm": 0.9360514056176105, + "learning_rate": 8.613886436241594e-07, + "loss": 0.7074497938156128, + "step": 4923 + }, + { + "epoch": 1.1345622119815668, + "grad_norm": 0.9945384740922374, + "learning_rate": 8.610113475563547e-07, + "loss": 0.6728851795196533, + "step": 4924 + }, + { + "epoch": 1.1347926267281105, + "grad_norm": 1.0533766436674836, + "learning_rate": 8.606340716625689e-07, + "loss": 0.7732793092727661, + "step": 4925 + }, + { + "epoch": 1.1350230414746543, + "grad_norm": 1.2301857240081557, + "learning_rate": 8.60256815997563e-07, + "loss": 0.7514671683311462, + "step": 4926 + }, + { + "epoch": 1.1352534562211982, + "grad_norm": 1.2507291163181513, + "learning_rate": 8.598795806160952e-07, + "loss": 0.7824795842170715, + "step": 4927 + }, + { + "epoch": 1.135483870967742, + "grad_norm": 1.1585997268920079, + "learning_rate": 8.59502365572921e-07, + "loss": 0.789236307144165, + "step": 4928 + }, + { + "epoch": 1.1357142857142857, + "grad_norm": 1.1796078109098491, + "learning_rate": 8.591251709227919e-07, + "loss": 0.7005175948143005, + "step": 4929 + }, + { + "epoch": 1.1359447004608294, + "grad_norm": 1.2299124062921447, + "learning_rate": 8.587479967204582e-07, + "loss": 0.7851300239562988, + "step": 4930 + }, + { + "epoch": 1.1361751152073734, + "grad_norm": 1.5129438725714193, + "learning_rate": 8.583708430206658e-07, + "loss": 0.8901405334472656, + "step": 4931 + }, + { + "epoch": 1.136405529953917, + "grad_norm": 1.1049343524856345, + "learning_rate": 8.579937098781576e-07, + "loss": 0.8118528127670288, + "step": 4932 + }, + { + "epoch": 1.1366359447004608, + "grad_norm": 1.0631974751851168, + "learning_rate": 8.57616597347675e-07, + "loss": 0.6500028371810913, + "step": 4933 + }, + { + "epoch": 1.1368663594470045, + "grad_norm": 1.057066415615051, + "learning_rate": 8.572395054839547e-07, + "loss": 0.7752922773361206, + "step": 4934 + }, + { + "epoch": 1.1370967741935485, + "grad_norm": 1.124364781444334, + "learning_rate": 8.568624343417309e-07, + "loss": 0.7346245050430298, + "step": 4935 + }, + { + "epoch": 1.1373271889400922, + "grad_norm": 1.4547001781507483, + "learning_rate": 8.564853839757356e-07, + "loss": 0.9249104261398315, + "step": 4936 + }, + { + "epoch": 1.137557603686636, + "grad_norm": 1.0350864816884677, + "learning_rate": 8.561083544406965e-07, + "loss": 0.7407078742980957, + "step": 4937 + }, + { + "epoch": 1.1377880184331797, + "grad_norm": 1.197156559440129, + "learning_rate": 8.557313457913393e-07, + "loss": 0.7615865468978882, + "step": 4938 + }, + { + "epoch": 1.1380184331797234, + "grad_norm": 1.2125718427071739, + "learning_rate": 8.553543580823866e-07, + "loss": 0.757561445236206, + "step": 4939 + }, + { + "epoch": 1.1382488479262673, + "grad_norm": 1.1468001082336654, + "learning_rate": 8.549773913685572e-07, + "loss": 0.7130411863327026, + "step": 4940 + }, + { + "epoch": 1.138479262672811, + "grad_norm": 1.1282357144069963, + "learning_rate": 8.54600445704567e-07, + "loss": 0.7507551312446594, + "step": 4941 + }, + { + "epoch": 1.1387096774193548, + "grad_norm": 1.0556143227749322, + "learning_rate": 8.542235211451301e-07, + "loss": 0.896443247795105, + "step": 4942 + }, + { + "epoch": 1.1389400921658985, + "grad_norm": 1.145222677509159, + "learning_rate": 8.538466177449557e-07, + "loss": 0.7530815601348877, + "step": 4943 + }, + { + "epoch": 1.1391705069124425, + "grad_norm": 1.2481258172783056, + "learning_rate": 8.534697355587517e-07, + "loss": 0.8730431795120239, + "step": 4944 + }, + { + "epoch": 1.1394009216589862, + "grad_norm": 1.3010516024158107, + "learning_rate": 8.530928746412216e-07, + "loss": 0.6452720165252686, + "step": 4945 + }, + { + "epoch": 1.13963133640553, + "grad_norm": 1.1712957128451178, + "learning_rate": 8.527160350470661e-07, + "loss": 0.7679018974304199, + "step": 4946 + }, + { + "epoch": 1.1398617511520737, + "grad_norm": 1.402874429077297, + "learning_rate": 8.523392168309832e-07, + "loss": 0.8186824321746826, + "step": 4947 + }, + { + "epoch": 1.1400921658986176, + "grad_norm": 1.1669467278440648, + "learning_rate": 8.519624200476676e-07, + "loss": 0.666642427444458, + "step": 4948 + }, + { + "epoch": 1.1403225806451613, + "grad_norm": 1.0160881327834055, + "learning_rate": 8.515856447518104e-07, + "loss": 0.7478682994842529, + "step": 4949 + }, + { + "epoch": 1.140552995391705, + "grad_norm": 1.2340329971083113, + "learning_rate": 8.512088909981007e-07, + "loss": 0.7527793645858765, + "step": 4950 + }, + { + "epoch": 1.1407834101382488, + "grad_norm": 1.136863530366948, + "learning_rate": 8.508321588412235e-07, + "loss": 0.7614094018936157, + "step": 4951 + }, + { + "epoch": 1.1410138248847925, + "grad_norm": 1.2371366016065355, + "learning_rate": 8.504554483358605e-07, + "loss": 0.8294994831085205, + "step": 4952 + }, + { + "epoch": 1.1412442396313365, + "grad_norm": 1.4759487382386114, + "learning_rate": 8.500787595366919e-07, + "loss": 0.8900095224380493, + "step": 4953 + }, + { + "epoch": 1.1414746543778802, + "grad_norm": 1.0721192735972314, + "learning_rate": 8.497020924983926e-07, + "loss": 0.8403744697570801, + "step": 4954 + }, + { + "epoch": 1.141705069124424, + "grad_norm": 1.0449510164412683, + "learning_rate": 8.493254472756355e-07, + "loss": 0.7046208381652832, + "step": 4955 + }, + { + "epoch": 1.1419354838709677, + "grad_norm": 1.3018714779233174, + "learning_rate": 8.489488239230904e-07, + "loss": 0.8226789832115173, + "step": 4956 + }, + { + "epoch": 1.1421658986175116, + "grad_norm": 1.058902427650911, + "learning_rate": 8.485722224954236e-07, + "loss": 0.7248969674110413, + "step": 4957 + }, + { + "epoch": 1.1423963133640553, + "grad_norm": 1.1327549620980084, + "learning_rate": 8.481956430472979e-07, + "loss": 0.8116840124130249, + "step": 4958 + }, + { + "epoch": 1.142626728110599, + "grad_norm": 1.062622286893391, + "learning_rate": 8.478190856333739e-07, + "loss": 0.7534138560295105, + "step": 4959 + }, + { + "epoch": 1.1428571428571428, + "grad_norm": 1.3427980825750856, + "learning_rate": 8.474425503083082e-07, + "loss": 0.8945306539535522, + "step": 4960 + }, + { + "epoch": 1.1430875576036867, + "grad_norm": 1.1592346473165394, + "learning_rate": 8.47066037126754e-07, + "loss": 0.7554503083229065, + "step": 4961 + }, + { + "epoch": 1.1433179723502305, + "grad_norm": 1.4596388821753403, + "learning_rate": 8.466895461433625e-07, + "loss": 0.832726776599884, + "step": 4962 + }, + { + "epoch": 1.1435483870967742, + "grad_norm": 1.250046955776058, + "learning_rate": 8.463130774127804e-07, + "loss": 0.8312773704528809, + "step": 4963 + }, + { + "epoch": 1.143778801843318, + "grad_norm": 0.9153601791246997, + "learning_rate": 8.459366309896512e-07, + "loss": 0.6484537124633789, + "step": 4964 + }, + { + "epoch": 1.1440092165898617, + "grad_norm": 1.2863432770713337, + "learning_rate": 8.455602069286165e-07, + "loss": 0.9216604828834534, + "step": 4965 + }, + { + "epoch": 1.1442396313364056, + "grad_norm": 1.134985678431753, + "learning_rate": 8.451838052843131e-07, + "loss": 0.6213096380233765, + "step": 4966 + }, + { + "epoch": 1.1444700460829493, + "grad_norm": 0.9562822723791001, + "learning_rate": 8.448074261113756e-07, + "loss": 0.6873677968978882, + "step": 4967 + }, + { + "epoch": 1.144700460829493, + "grad_norm": 1.215560824144924, + "learning_rate": 8.444310694644348e-07, + "loss": 0.7883448600769043, + "step": 4968 + }, + { + "epoch": 1.1449308755760368, + "grad_norm": 1.1944176371651494, + "learning_rate": 8.440547353981178e-07, + "loss": 0.724172830581665, + "step": 4969 + }, + { + "epoch": 1.1451612903225807, + "grad_norm": 1.0792006702141475, + "learning_rate": 8.4367842396705e-07, + "loss": 0.7115252017974854, + "step": 4970 + }, + { + "epoch": 1.1453917050691245, + "grad_norm": 1.0823773323138404, + "learning_rate": 8.433021352258521e-07, + "loss": 0.7165110111236572, + "step": 4971 + }, + { + "epoch": 1.1456221198156682, + "grad_norm": 1.0874360604645514, + "learning_rate": 8.429258692291413e-07, + "loss": 0.7563315629959106, + "step": 4972 + }, + { + "epoch": 1.145852534562212, + "grad_norm": 1.1334099478279698, + "learning_rate": 8.425496260315331e-07, + "loss": 0.7528449892997742, + "step": 4973 + }, + { + "epoch": 1.1460829493087559, + "grad_norm": 1.1141426795021205, + "learning_rate": 8.421734056876383e-07, + "loss": 0.7976171970367432, + "step": 4974 + }, + { + "epoch": 1.1463133640552996, + "grad_norm": 1.020985144100356, + "learning_rate": 8.417972082520644e-07, + "loss": 0.7498095035552979, + "step": 4975 + }, + { + "epoch": 1.1465437788018433, + "grad_norm": 1.3446642320448154, + "learning_rate": 8.414210337794165e-07, + "loss": 0.9568856954574585, + "step": 4976 + }, + { + "epoch": 1.146774193548387, + "grad_norm": 0.9499457055768262, + "learning_rate": 8.410448823242957e-07, + "loss": 0.6402908563613892, + "step": 4977 + }, + { + "epoch": 1.1470046082949308, + "grad_norm": 1.1759709167305108, + "learning_rate": 8.406687539412995e-07, + "loss": 0.8224657773971558, + "step": 4978 + }, + { + "epoch": 1.1472350230414747, + "grad_norm": 1.2886598107348421, + "learning_rate": 8.402926486850229e-07, + "loss": 0.7804544568061829, + "step": 4979 + }, + { + "epoch": 1.1474654377880185, + "grad_norm": 1.1861127295236977, + "learning_rate": 8.39916566610057e-07, + "loss": 0.7920527458190918, + "step": 4980 + }, + { + "epoch": 1.1476958525345622, + "grad_norm": 1.1244888328051699, + "learning_rate": 8.395405077709891e-07, + "loss": 0.7672078609466553, + "step": 4981 + }, + { + "epoch": 1.147926267281106, + "grad_norm": 1.2427545332028853, + "learning_rate": 8.391644722224047e-07, + "loss": 0.6997950077056885, + "step": 4982 + }, + { + "epoch": 1.1481566820276499, + "grad_norm": 1.057637628401912, + "learning_rate": 8.38788460018884e-07, + "loss": 0.7754349708557129, + "step": 4983 + }, + { + "epoch": 1.1483870967741936, + "grad_norm": 1.1458978330134115, + "learning_rate": 8.384124712150046e-07, + "loss": 0.706238329410553, + "step": 4984 + }, + { + "epoch": 1.1486175115207373, + "grad_norm": 0.8874927618348325, + "learning_rate": 8.380365058653415e-07, + "loss": 0.7115224599838257, + "step": 4985 + }, + { + "epoch": 1.148847926267281, + "grad_norm": 1.349182229007694, + "learning_rate": 8.376605640244652e-07, + "loss": 0.9026098847389221, + "step": 4986 + }, + { + "epoch": 1.149078341013825, + "grad_norm": 1.359066441839043, + "learning_rate": 8.372846457469428e-07, + "loss": 0.9123632311820984, + "step": 4987 + }, + { + "epoch": 1.1493087557603687, + "grad_norm": 1.1389830084868187, + "learning_rate": 8.369087510873389e-07, + "loss": 0.8365681171417236, + "step": 4988 + }, + { + "epoch": 1.1495391705069125, + "grad_norm": 1.1572327597453433, + "learning_rate": 8.36532880100214e-07, + "loss": 0.7506389617919922, + "step": 4989 + }, + { + "epoch": 1.1497695852534562, + "grad_norm": 1.1932866122784214, + "learning_rate": 8.361570328401246e-07, + "loss": 0.7736936807632446, + "step": 4990 + }, + { + "epoch": 1.15, + "grad_norm": 1.0939095427412457, + "learning_rate": 8.357812093616254e-07, + "loss": 0.7364238500595093, + "step": 4991 + }, + { + "epoch": 1.1502304147465439, + "grad_norm": 1.154457809524142, + "learning_rate": 8.354054097192659e-07, + "loss": 0.8588067293167114, + "step": 4992 + }, + { + "epoch": 1.1504608294930876, + "grad_norm": 1.0040260335609983, + "learning_rate": 8.350296339675938e-07, + "loss": 0.777319073677063, + "step": 4993 + }, + { + "epoch": 1.1506912442396313, + "grad_norm": 1.2472613338245313, + "learning_rate": 8.346538821611517e-07, + "loss": 0.6695454716682434, + "step": 4994 + }, + { + "epoch": 1.150921658986175, + "grad_norm": 1.1333204343634593, + "learning_rate": 8.342781543544796e-07, + "loss": 0.7785383462905884, + "step": 4995 + }, + { + "epoch": 1.1511520737327188, + "grad_norm": 1.2063502081148214, + "learning_rate": 8.339024506021143e-07, + "loss": 0.7386239767074585, + "step": 4996 + }, + { + "epoch": 1.1513824884792627, + "grad_norm": 1.015973129089863, + "learning_rate": 8.335267709585884e-07, + "loss": 0.8044750690460205, + "step": 4997 + }, + { + "epoch": 1.1516129032258065, + "grad_norm": 0.991689333823338, + "learning_rate": 8.331511154784307e-07, + "loss": 0.6925652623176575, + "step": 4998 + }, + { + "epoch": 1.1518433179723502, + "grad_norm": 1.1362021503644928, + "learning_rate": 8.327754842161684e-07, + "loss": 0.7906935214996338, + "step": 4999 + }, + { + "epoch": 1.1520737327188941, + "grad_norm": 1.0865966340855062, + "learning_rate": 8.323998772263231e-07, + "loss": 0.7131960988044739, + "step": 5000 + }, + { + "epoch": 1.1523041474654379, + "grad_norm": 1.0459163670419733, + "learning_rate": 8.320242945634132e-07, + "loss": 0.8412370085716248, + "step": 5001 + }, + { + "epoch": 1.1525345622119816, + "grad_norm": 1.219248495471204, + "learning_rate": 8.316487362819551e-07, + "loss": 0.7800952792167664, + "step": 5002 + }, + { + "epoch": 1.1527649769585253, + "grad_norm": 1.2269188284281454, + "learning_rate": 8.312732024364602e-07, + "loss": 0.8620247840881348, + "step": 5003 + }, + { + "epoch": 1.152995391705069, + "grad_norm": 1.1576962368399284, + "learning_rate": 8.30897693081436e-07, + "loss": 0.7551721334457397, + "step": 5004 + }, + { + "epoch": 1.153225806451613, + "grad_norm": 1.1081098689134552, + "learning_rate": 8.305222082713882e-07, + "loss": 0.8510593175888062, + "step": 5005 + }, + { + "epoch": 1.1534562211981567, + "grad_norm": 1.0356186889640762, + "learning_rate": 8.301467480608176e-07, + "loss": 0.6503845453262329, + "step": 5006 + }, + { + "epoch": 1.1536866359447004, + "grad_norm": 1.1593829978588668, + "learning_rate": 8.297713125042212e-07, + "loss": 0.7729237079620361, + "step": 5007 + }, + { + "epoch": 1.1539170506912442, + "grad_norm": 1.0812796919286354, + "learning_rate": 8.293959016560939e-07, + "loss": 0.77802574634552, + "step": 5008 + }, + { + "epoch": 1.154147465437788, + "grad_norm": 0.9915519400035699, + "learning_rate": 8.290205155709256e-07, + "loss": 0.7977825999259949, + "step": 5009 + }, + { + "epoch": 1.1543778801843319, + "grad_norm": 1.1128731733324948, + "learning_rate": 8.286451543032027e-07, + "loss": 0.7479745149612427, + "step": 5010 + }, + { + "epoch": 1.1546082949308756, + "grad_norm": 1.0554376798438097, + "learning_rate": 8.282698179074092e-07, + "loss": 0.7631532549858093, + "step": 5011 + }, + { + "epoch": 1.1548387096774193, + "grad_norm": 1.1424098237872247, + "learning_rate": 8.278945064380243e-07, + "loss": 0.7437061071395874, + "step": 5012 + }, + { + "epoch": 1.1550691244239633, + "grad_norm": 1.2208599961881346, + "learning_rate": 8.275192199495236e-07, + "loss": 0.9334282875061035, + "step": 5013 + }, + { + "epoch": 1.155299539170507, + "grad_norm": 1.1846438304674103, + "learning_rate": 8.2714395849638e-07, + "loss": 0.7119227647781372, + "step": 5014 + }, + { + "epoch": 1.1555299539170507, + "grad_norm": 1.202224273678675, + "learning_rate": 8.267687221330619e-07, + "loss": 0.8335816860198975, + "step": 5015 + }, + { + "epoch": 1.1557603686635944, + "grad_norm": 1.290989413518125, + "learning_rate": 8.263935109140347e-07, + "loss": 0.6130940914154053, + "step": 5016 + }, + { + "epoch": 1.1559907834101382, + "grad_norm": 1.1118999574659398, + "learning_rate": 8.260183248937595e-07, + "loss": 0.8223903179168701, + "step": 5017 + }, + { + "epoch": 1.1562211981566821, + "grad_norm": 1.1042026567968168, + "learning_rate": 8.256431641266938e-07, + "loss": 0.8024790287017822, + "step": 5018 + }, + { + "epoch": 1.1564516129032258, + "grad_norm": 1.2308316211864536, + "learning_rate": 8.252680286672924e-07, + "loss": 0.7425345182418823, + "step": 5019 + }, + { + "epoch": 1.1566820276497696, + "grad_norm": 0.9907420981370885, + "learning_rate": 8.248929185700053e-07, + "loss": 0.7729727029800415, + "step": 5020 + }, + { + "epoch": 1.1569124423963133, + "grad_norm": 1.096476255015683, + "learning_rate": 8.245178338892788e-07, + "loss": 0.8451874256134033, + "step": 5021 + }, + { + "epoch": 1.157142857142857, + "grad_norm": 1.1584589365926052, + "learning_rate": 8.241427746795569e-07, + "loss": 0.8666542768478394, + "step": 5022 + }, + { + "epoch": 1.157373271889401, + "grad_norm": 1.2897904410488261, + "learning_rate": 8.237677409952784e-07, + "loss": 0.740352988243103, + "step": 5023 + }, + { + "epoch": 1.1576036866359447, + "grad_norm": 0.9937724952342799, + "learning_rate": 8.233927328908788e-07, + "loss": 0.6325985193252563, + "step": 5024 + }, + { + "epoch": 1.1578341013824884, + "grad_norm": 1.0099472902179978, + "learning_rate": 8.230177504207901e-07, + "loss": 0.8075892925262451, + "step": 5025 + }, + { + "epoch": 1.1580645161290322, + "grad_norm": 1.0459718249244707, + "learning_rate": 8.22642793639441e-07, + "loss": 0.7176432609558105, + "step": 5026 + }, + { + "epoch": 1.1582949308755761, + "grad_norm": 1.1804726429614583, + "learning_rate": 8.222678626012554e-07, + "loss": 0.7734829187393188, + "step": 5027 + }, + { + "epoch": 1.1585253456221198, + "grad_norm": 1.3220222245590558, + "learning_rate": 8.218929573606544e-07, + "loss": 0.8642655611038208, + "step": 5028 + }, + { + "epoch": 1.1587557603686636, + "grad_norm": 1.0337487495481472, + "learning_rate": 8.215180779720548e-07, + "loss": 0.7788450121879578, + "step": 5029 + }, + { + "epoch": 1.1589861751152073, + "grad_norm": 0.9361659768144168, + "learning_rate": 8.211432244898696e-07, + "loss": 0.7470313310623169, + "step": 5030 + }, + { + "epoch": 1.1592165898617512, + "grad_norm": 0.9907043815397547, + "learning_rate": 8.207683969685091e-07, + "loss": 0.7691675424575806, + "step": 5031 + }, + { + "epoch": 1.159447004608295, + "grad_norm": 0.9920310393320094, + "learning_rate": 8.203935954623783e-07, + "loss": 0.7060209512710571, + "step": 5032 + }, + { + "epoch": 1.1596774193548387, + "grad_norm": 1.189958639239752, + "learning_rate": 8.20018820025879e-07, + "loss": 0.7617488503456116, + "step": 5033 + }, + { + "epoch": 1.1599078341013824, + "grad_norm": 1.2174023482004634, + "learning_rate": 8.196440707134102e-07, + "loss": 0.7016350626945496, + "step": 5034 + }, + { + "epoch": 1.1601382488479262, + "grad_norm": 1.3407340114210469, + "learning_rate": 8.192693475793657e-07, + "loss": 0.8375445604324341, + "step": 5035 + }, + { + "epoch": 1.16036866359447, + "grad_norm": 1.2333127293881232, + "learning_rate": 8.188946506781359e-07, + "loss": 0.8903663158416748, + "step": 5036 + }, + { + "epoch": 1.1605990783410138, + "grad_norm": 1.1046448662682735, + "learning_rate": 8.18519980064108e-07, + "loss": 0.7613073587417603, + "step": 5037 + }, + { + "epoch": 1.1608294930875576, + "grad_norm": 1.2358045096315418, + "learning_rate": 8.181453357916649e-07, + "loss": 0.7443521022796631, + "step": 5038 + }, + { + "epoch": 1.1610599078341013, + "grad_norm": 1.0132222940739166, + "learning_rate": 8.17770717915185e-07, + "loss": 0.7986443042755127, + "step": 5039 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 1.1475221794766963, + "learning_rate": 8.173961264890447e-07, + "loss": 0.7128815650939941, + "step": 5040 + }, + { + "epoch": 1.161520737327189, + "grad_norm": 2.1353174029488593, + "learning_rate": 8.170215615676144e-07, + "loss": 0.7189117074012756, + "step": 5041 + }, + { + "epoch": 1.1617511520737327, + "grad_norm": 1.0970239097626442, + "learning_rate": 8.166470232052626e-07, + "loss": 0.8358731269836426, + "step": 5042 + }, + { + "epoch": 1.1619815668202764, + "grad_norm": 1.3103703595946257, + "learning_rate": 8.162725114563527e-07, + "loss": 0.7734829187393188, + "step": 5043 + }, + { + "epoch": 1.1622119815668204, + "grad_norm": 1.0836793655881298, + "learning_rate": 8.158980263752443e-07, + "loss": 0.842268705368042, + "step": 5044 + }, + { + "epoch": 1.162442396313364, + "grad_norm": 1.0953254817646525, + "learning_rate": 8.155235680162937e-07, + "loss": 0.7973036766052246, + "step": 5045 + }, + { + "epoch": 1.1626728110599078, + "grad_norm": 1.1431491680692596, + "learning_rate": 8.151491364338532e-07, + "loss": 0.743615984916687, + "step": 5046 + }, + { + "epoch": 1.1629032258064516, + "grad_norm": 1.2354800674331334, + "learning_rate": 8.147747316822705e-07, + "loss": 0.799458384513855, + "step": 5047 + }, + { + "epoch": 1.1631336405529953, + "grad_norm": 1.4365906916451476, + "learning_rate": 8.144003538158907e-07, + "loss": 0.8368128538131714, + "step": 5048 + }, + { + "epoch": 1.1633640552995392, + "grad_norm": 1.0543438991079201, + "learning_rate": 8.140260028890537e-07, + "loss": 0.8543322086334229, + "step": 5049 + }, + { + "epoch": 1.163594470046083, + "grad_norm": 1.4010693577495907, + "learning_rate": 8.136516789560957e-07, + "loss": 0.9586522579193115, + "step": 5050 + }, + { + "epoch": 1.1638248847926267, + "grad_norm": 1.0831898931931903, + "learning_rate": 8.132773820713505e-07, + "loss": 0.7781316041946411, + "step": 5051 + }, + { + "epoch": 1.1640552995391704, + "grad_norm": 1.1820241176000723, + "learning_rate": 8.129031122891459e-07, + "loss": 0.7726340293884277, + "step": 5052 + }, + { + "epoch": 1.1642857142857144, + "grad_norm": 1.2561245635498344, + "learning_rate": 8.125288696638064e-07, + "loss": 0.886093258857727, + "step": 5053 + }, + { + "epoch": 1.164516129032258, + "grad_norm": 1.1568232893052595, + "learning_rate": 8.121546542496538e-07, + "loss": 0.7896960973739624, + "step": 5054 + }, + { + "epoch": 1.1647465437788018, + "grad_norm": 1.066019166680275, + "learning_rate": 8.117804661010045e-07, + "loss": 0.8272452354431152, + "step": 5055 + }, + { + "epoch": 1.1649769585253456, + "grad_norm": 1.216096321256879, + "learning_rate": 8.11406305272171e-07, + "loss": 0.8452264070510864, + "step": 5056 + }, + { + "epoch": 1.1652073732718895, + "grad_norm": 1.1423033593169452, + "learning_rate": 8.11032171817463e-07, + "loss": 0.7973369359970093, + "step": 5057 + }, + { + "epoch": 1.1654377880184332, + "grad_norm": 0.9573952961126706, + "learning_rate": 8.10658065791185e-07, + "loss": 0.8045153617858887, + "step": 5058 + }, + { + "epoch": 1.165668202764977, + "grad_norm": 1.2070626820317865, + "learning_rate": 8.102839872476378e-07, + "loss": 0.8921254873275757, + "step": 5059 + }, + { + "epoch": 1.1658986175115207, + "grad_norm": 1.1196640968944265, + "learning_rate": 8.099099362411191e-07, + "loss": 0.7633669376373291, + "step": 5060 + }, + { + "epoch": 1.1661290322580644, + "grad_norm": 1.4676357149183228, + "learning_rate": 8.095359128259214e-07, + "loss": 0.9303205013275146, + "step": 5061 + }, + { + "epoch": 1.1663594470046084, + "grad_norm": 1.1532839170590041, + "learning_rate": 8.091619170563335e-07, + "loss": 0.867104709148407, + "step": 5062 + }, + { + "epoch": 1.166589861751152, + "grad_norm": 1.2071495700843942, + "learning_rate": 8.087879489866409e-07, + "loss": 0.8136844038963318, + "step": 5063 + }, + { + "epoch": 1.1668202764976958, + "grad_norm": 1.5482117252744063, + "learning_rate": 8.084140086711246e-07, + "loss": 0.9016939997673035, + "step": 5064 + }, + { + "epoch": 1.1670506912442395, + "grad_norm": 1.5795186850129557, + "learning_rate": 8.080400961640608e-07, + "loss": 0.8621236085891724, + "step": 5065 + }, + { + "epoch": 1.1672811059907835, + "grad_norm": 1.336449231038986, + "learning_rate": 8.076662115197234e-07, + "loss": 0.856648862361908, + "step": 5066 + }, + { + "epoch": 1.1675115207373272, + "grad_norm": 1.3107118910408024, + "learning_rate": 8.072923547923805e-07, + "loss": 0.7752784490585327, + "step": 5067 + }, + { + "epoch": 1.167741935483871, + "grad_norm": 1.3093385224686542, + "learning_rate": 8.069185260362974e-07, + "loss": 0.8573904037475586, + "step": 5068 + }, + { + "epoch": 1.1679723502304147, + "grad_norm": 1.1636599679682322, + "learning_rate": 8.065447253057347e-07, + "loss": 0.724372148513794, + "step": 5069 + }, + { + "epoch": 1.1682027649769586, + "grad_norm": 1.146758460237727, + "learning_rate": 8.061709526549486e-07, + "loss": 0.7428436875343323, + "step": 5070 + }, + { + "epoch": 1.1684331797235024, + "grad_norm": 1.273017047999111, + "learning_rate": 8.057972081381925e-07, + "loss": 0.8888595104217529, + "step": 5071 + }, + { + "epoch": 1.168663594470046, + "grad_norm": 0.9497262022662447, + "learning_rate": 8.054234918097146e-07, + "loss": 0.5753290057182312, + "step": 5072 + }, + { + "epoch": 1.1688940092165898, + "grad_norm": 1.037170746248572, + "learning_rate": 8.050498037237589e-07, + "loss": 0.6724086999893188, + "step": 5073 + }, + { + "epoch": 1.1691244239631335, + "grad_norm": 1.1504888789916348, + "learning_rate": 8.046761439345664e-07, + "loss": 0.7410751581192017, + "step": 5074 + }, + { + "epoch": 1.1693548387096775, + "grad_norm": 1.2658920818717738, + "learning_rate": 8.043025124963731e-07, + "loss": 0.8522979021072388, + "step": 5075 + }, + { + "epoch": 1.1695852534562212, + "grad_norm": 0.9918624551952729, + "learning_rate": 8.039289094634109e-07, + "loss": 0.6243441700935364, + "step": 5076 + }, + { + "epoch": 1.169815668202765, + "grad_norm": 1.113826210544245, + "learning_rate": 8.03555334889908e-07, + "loss": 0.9332150220870972, + "step": 5077 + }, + { + "epoch": 1.1700460829493087, + "grad_norm": 1.17170377289517, + "learning_rate": 8.031817888300883e-07, + "loss": 0.7620645761489868, + "step": 5078 + }, + { + "epoch": 1.1702764976958526, + "grad_norm": 1.2693395517069683, + "learning_rate": 8.028082713381708e-07, + "loss": 0.6983245015144348, + "step": 5079 + }, + { + "epoch": 1.1705069124423964, + "grad_norm": 1.049572082944252, + "learning_rate": 8.024347824683723e-07, + "loss": 0.6220129728317261, + "step": 5080 + }, + { + "epoch": 1.17073732718894, + "grad_norm": 1.0906919021349344, + "learning_rate": 8.020613222749034e-07, + "loss": 0.7363810539245605, + "step": 5081 + }, + { + "epoch": 1.1709677419354838, + "grad_norm": 1.1450127350480972, + "learning_rate": 8.016878908119713e-07, + "loss": 0.6864198446273804, + "step": 5082 + }, + { + "epoch": 1.1711981566820278, + "grad_norm": 1.061738817269073, + "learning_rate": 8.013144881337795e-07, + "loss": 0.758607029914856, + "step": 5083 + }, + { + "epoch": 1.1714285714285715, + "grad_norm": 1.038630253415404, + "learning_rate": 8.009411142945269e-07, + "loss": 0.7519336938858032, + "step": 5084 + }, + { + "epoch": 1.1716589861751152, + "grad_norm": 1.132431622302542, + "learning_rate": 8.005677693484076e-07, + "loss": 0.7681798934936523, + "step": 5085 + }, + { + "epoch": 1.171889400921659, + "grad_norm": 1.1022208744006678, + "learning_rate": 8.00194453349613e-07, + "loss": 0.6808522939682007, + "step": 5086 + }, + { + "epoch": 1.1721198156682027, + "grad_norm": 1.039877694159321, + "learning_rate": 7.99821166352329e-07, + "loss": 0.7373358607292175, + "step": 5087 + }, + { + "epoch": 1.1723502304147466, + "grad_norm": 1.0199898679930943, + "learning_rate": 7.994479084107374e-07, + "loss": 0.7272510528564453, + "step": 5088 + }, + { + "epoch": 1.1725806451612903, + "grad_norm": 1.2473385255320408, + "learning_rate": 7.990746795790166e-07, + "loss": 0.845584511756897, + "step": 5089 + }, + { + "epoch": 1.172811059907834, + "grad_norm": 1.188342902392479, + "learning_rate": 7.987014799113397e-07, + "loss": 0.7751157283782959, + "step": 5090 + }, + { + "epoch": 1.1730414746543778, + "grad_norm": 1.1193246813934836, + "learning_rate": 7.98328309461877e-07, + "loss": 0.679701566696167, + "step": 5091 + }, + { + "epoch": 1.1732718894009218, + "grad_norm": 1.1116687434739936, + "learning_rate": 7.979551682847932e-07, + "loss": 0.7630679607391357, + "step": 5092 + }, + { + "epoch": 1.1735023041474655, + "grad_norm": 1.0309555153446328, + "learning_rate": 7.975820564342487e-07, + "loss": 0.700912594795227, + "step": 5093 + }, + { + "epoch": 1.1737327188940092, + "grad_norm": 1.097867809116453, + "learning_rate": 7.972089739644012e-07, + "loss": 0.6789706945419312, + "step": 5094 + }, + { + "epoch": 1.173963133640553, + "grad_norm": 1.411041629986285, + "learning_rate": 7.968359209294027e-07, + "loss": 0.6744855642318726, + "step": 5095 + }, + { + "epoch": 1.1741935483870969, + "grad_norm": 1.060959542495881, + "learning_rate": 7.964628973834011e-07, + "loss": 0.7551798820495605, + "step": 5096 + }, + { + "epoch": 1.1744239631336406, + "grad_norm": 0.9743982939550204, + "learning_rate": 7.960899033805407e-07, + "loss": 0.711478054523468, + "step": 5097 + }, + { + "epoch": 1.1746543778801843, + "grad_norm": 1.1281696794434548, + "learning_rate": 7.95716938974961e-07, + "loss": 0.7464019060134888, + "step": 5098 + }, + { + "epoch": 1.174884792626728, + "grad_norm": 1.2269121334355921, + "learning_rate": 7.953440042207966e-07, + "loss": 0.7667930126190186, + "step": 5099 + }, + { + "epoch": 1.1751152073732718, + "grad_norm": 0.9314104563097803, + "learning_rate": 7.949710991721796e-07, + "loss": 0.7574796676635742, + "step": 5100 + }, + { + "epoch": 1.1753456221198157, + "grad_norm": 0.9285474016256665, + "learning_rate": 7.945982238832361e-07, + "loss": 0.6627304553985596, + "step": 5101 + }, + { + "epoch": 1.1755760368663595, + "grad_norm": 1.2503590742658475, + "learning_rate": 7.942253784080879e-07, + "loss": 0.6803916692733765, + "step": 5102 + }, + { + "epoch": 1.1758064516129032, + "grad_norm": 1.1622603764445048, + "learning_rate": 7.938525628008541e-07, + "loss": 0.7107337713241577, + "step": 5103 + }, + { + "epoch": 1.176036866359447, + "grad_norm": 1.0411872319848583, + "learning_rate": 7.934797771156481e-07, + "loss": 0.7669517993927002, + "step": 5104 + }, + { + "epoch": 1.1762672811059907, + "grad_norm": 1.185214338142044, + "learning_rate": 7.931070214065787e-07, + "loss": 0.7431854605674744, + "step": 5105 + }, + { + "epoch": 1.1764976958525346, + "grad_norm": 1.121798206744332, + "learning_rate": 7.927342957277512e-07, + "loss": 0.7778047323226929, + "step": 5106 + }, + { + "epoch": 1.1767281105990783, + "grad_norm": 1.1095356364162186, + "learning_rate": 7.923616001332666e-07, + "loss": 0.7759886980056763, + "step": 5107 + }, + { + "epoch": 1.176958525345622, + "grad_norm": 1.236811676128496, + "learning_rate": 7.919889346772206e-07, + "loss": 0.8010379076004028, + "step": 5108 + }, + { + "epoch": 1.177188940092166, + "grad_norm": 1.06629818182004, + "learning_rate": 7.916162994137055e-07, + "loss": 0.6671626567840576, + "step": 5109 + }, + { + "epoch": 1.1774193548387097, + "grad_norm": 1.3043487682811514, + "learning_rate": 7.912436943968088e-07, + "loss": 0.7521620988845825, + "step": 5110 + }, + { + "epoch": 1.1776497695852535, + "grad_norm": 1.0243889894502596, + "learning_rate": 7.908711196806131e-07, + "loss": 0.7626729011535645, + "step": 5111 + }, + { + "epoch": 1.1778801843317972, + "grad_norm": 1.2636422633100723, + "learning_rate": 7.904985753191979e-07, + "loss": 0.8247047066688538, + "step": 5112 + }, + { + "epoch": 1.178110599078341, + "grad_norm": 0.9958902943746148, + "learning_rate": 7.901260613666372e-07, + "loss": 0.6851831078529358, + "step": 5113 + }, + { + "epoch": 1.1783410138248849, + "grad_norm": 1.114469339271613, + "learning_rate": 7.897535778770003e-07, + "loss": 0.7752102613449097, + "step": 5114 + }, + { + "epoch": 1.1785714285714286, + "grad_norm": 1.0998339013097813, + "learning_rate": 7.893811249043537e-07, + "loss": 0.8885148167610168, + "step": 5115 + }, + { + "epoch": 1.1788018433179723, + "grad_norm": 1.3062040351627935, + "learning_rate": 7.890087025027579e-07, + "loss": 0.7530373334884644, + "step": 5116 + }, + { + "epoch": 1.179032258064516, + "grad_norm": 1.0400370692656624, + "learning_rate": 7.886363107262697e-07, + "loss": 0.7795672416687012, + "step": 5117 + }, + { + "epoch": 1.1792626728110598, + "grad_norm": 1.0719443222612952, + "learning_rate": 7.882639496289413e-07, + "loss": 0.7563966512680054, + "step": 5118 + }, + { + "epoch": 1.1794930875576037, + "grad_norm": 0.9799024359449507, + "learning_rate": 7.878916192648198e-07, + "loss": 0.7218793630599976, + "step": 5119 + }, + { + "epoch": 1.1797235023041475, + "grad_norm": 1.3292879414667447, + "learning_rate": 7.875193196879494e-07, + "loss": 0.8213250637054443, + "step": 5120 + }, + { + "epoch": 1.1799539170506912, + "grad_norm": 1.118163280715499, + "learning_rate": 7.871470509523685e-07, + "loss": 0.8134827613830566, + "step": 5121 + }, + { + "epoch": 1.1801843317972351, + "grad_norm": 0.9613119464109229, + "learning_rate": 7.867748131121109e-07, + "loss": 0.6135407090187073, + "step": 5122 + }, + { + "epoch": 1.1804147465437789, + "grad_norm": 1.2999694720426915, + "learning_rate": 7.864026062212073e-07, + "loss": 0.8110366463661194, + "step": 5123 + }, + { + "epoch": 1.1806451612903226, + "grad_norm": 0.9962674732824631, + "learning_rate": 7.860304303336827e-07, + "loss": 0.6723964214324951, + "step": 5124 + }, + { + "epoch": 1.1808755760368663, + "grad_norm": 1.2942490465484493, + "learning_rate": 7.856582855035577e-07, + "loss": 0.8308886885643005, + "step": 5125 + }, + { + "epoch": 1.18110599078341, + "grad_norm": 1.023999175845692, + "learning_rate": 7.852861717848488e-07, + "loss": 0.7960010766983032, + "step": 5126 + }, + { + "epoch": 1.181336405529954, + "grad_norm": 1.2456351777125307, + "learning_rate": 7.84914089231568e-07, + "loss": 0.7931640148162842, + "step": 5127 + }, + { + "epoch": 1.1815668202764977, + "grad_norm": 1.2288164842517166, + "learning_rate": 7.845420378977222e-07, + "loss": 0.762995719909668, + "step": 5128 + }, + { + "epoch": 1.1817972350230415, + "grad_norm": 1.373671152705427, + "learning_rate": 7.841700178373146e-07, + "loss": 0.9416301250457764, + "step": 5129 + }, + { + "epoch": 1.1820276497695852, + "grad_norm": 1.0032147289786453, + "learning_rate": 7.837980291043431e-07, + "loss": 0.7666923999786377, + "step": 5130 + }, + { + "epoch": 1.182258064516129, + "grad_norm": 1.1123898953678502, + "learning_rate": 7.834260717528012e-07, + "loss": 0.7668861150741577, + "step": 5131 + }, + { + "epoch": 1.1824884792626729, + "grad_norm": 1.1236616956881595, + "learning_rate": 7.830541458366786e-07, + "loss": 0.7576566934585571, + "step": 5132 + }, + { + "epoch": 1.1827188940092166, + "grad_norm": 1.0432406760791426, + "learning_rate": 7.826822514099595e-07, + "loss": 0.6288204193115234, + "step": 5133 + }, + { + "epoch": 1.1829493087557603, + "grad_norm": 1.2747953745069134, + "learning_rate": 7.823103885266236e-07, + "loss": 0.8332630395889282, + "step": 5134 + }, + { + "epoch": 1.1831797235023043, + "grad_norm": 1.3987532245853456, + "learning_rate": 7.819385572406469e-07, + "loss": 0.9294546246528625, + "step": 5135 + }, + { + "epoch": 1.183410138248848, + "grad_norm": 0.9911973140133253, + "learning_rate": 7.81566757606e-07, + "loss": 0.637617826461792, + "step": 5136 + }, + { + "epoch": 1.1836405529953917, + "grad_norm": 1.2295561738436023, + "learning_rate": 7.81194989676649e-07, + "loss": 0.7614878416061401, + "step": 5137 + }, + { + "epoch": 1.1838709677419355, + "grad_norm": 1.2939539056978149, + "learning_rate": 7.808232535065556e-07, + "loss": 0.8612164258956909, + "step": 5138 + }, + { + "epoch": 1.1841013824884792, + "grad_norm": 1.0758125620247463, + "learning_rate": 7.804515491496765e-07, + "loss": 0.7530151605606079, + "step": 5139 + }, + { + "epoch": 1.1843317972350231, + "grad_norm": 0.9883281570065391, + "learning_rate": 7.800798766599648e-07, + "loss": 0.7739782929420471, + "step": 5140 + }, + { + "epoch": 1.1845622119815669, + "grad_norm": 1.0835226521428547, + "learning_rate": 7.797082360913678e-07, + "loss": 0.7992277145385742, + "step": 5141 + }, + { + "epoch": 1.1847926267281106, + "grad_norm": 1.2343955942215838, + "learning_rate": 7.793366274978284e-07, + "loss": 0.8744574785232544, + "step": 5142 + }, + { + "epoch": 1.1850230414746543, + "grad_norm": 0.9992165946111031, + "learning_rate": 7.789650509332857e-07, + "loss": 0.7522493600845337, + "step": 5143 + }, + { + "epoch": 1.185253456221198, + "grad_norm": 1.1095107175779666, + "learning_rate": 7.785935064516733e-07, + "loss": 0.8811007142066956, + "step": 5144 + }, + { + "epoch": 1.185483870967742, + "grad_norm": 0.9512882648642599, + "learning_rate": 7.782219941069201e-07, + "loss": 0.8141417503356934, + "step": 5145 + }, + { + "epoch": 1.1857142857142857, + "grad_norm": 1.3048397777053706, + "learning_rate": 7.778505139529509e-07, + "loss": 0.9473680257797241, + "step": 5146 + }, + { + "epoch": 1.1859447004608294, + "grad_norm": 1.1561666933094623, + "learning_rate": 7.774790660436857e-07, + "loss": 0.740132212638855, + "step": 5147 + }, + { + "epoch": 1.1861751152073732, + "grad_norm": 1.1265716565789026, + "learning_rate": 7.771076504330392e-07, + "loss": 0.7904594540596008, + "step": 5148 + }, + { + "epoch": 1.1864055299539171, + "grad_norm": 1.1481555737803508, + "learning_rate": 7.767362671749224e-07, + "loss": 0.8085094690322876, + "step": 5149 + }, + { + "epoch": 1.1866359447004609, + "grad_norm": 1.3362082879917547, + "learning_rate": 7.76364916323241e-07, + "loss": 0.6954756379127502, + "step": 5150 + }, + { + "epoch": 1.1868663594470046, + "grad_norm": 1.175085216674836, + "learning_rate": 7.759935979318953e-07, + "loss": 0.8575167059898376, + "step": 5151 + }, + { + "epoch": 1.1870967741935483, + "grad_norm": 0.9330545417113619, + "learning_rate": 7.756223120547829e-07, + "loss": 0.6125110387802124, + "step": 5152 + }, + { + "epoch": 1.1873271889400923, + "grad_norm": 1.1387987197615417, + "learning_rate": 7.752510587457949e-07, + "loss": 0.7737400531768799, + "step": 5153 + }, + { + "epoch": 1.187557603686636, + "grad_norm": 0.9473095115528148, + "learning_rate": 7.748798380588177e-07, + "loss": 0.7300955653190613, + "step": 5154 + }, + { + "epoch": 1.1877880184331797, + "grad_norm": 0.9479432315278626, + "learning_rate": 7.745086500477343e-07, + "loss": 0.7974356412887573, + "step": 5155 + }, + { + "epoch": 1.1880184331797234, + "grad_norm": 1.120213603018525, + "learning_rate": 7.74137494766422e-07, + "loss": 0.8158693313598633, + "step": 5156 + }, + { + "epoch": 1.1882488479262672, + "grad_norm": 0.9086968377624679, + "learning_rate": 7.737663722687531e-07, + "loss": 0.6656177639961243, + "step": 5157 + }, + { + "epoch": 1.1884792626728111, + "grad_norm": 1.284345958176322, + "learning_rate": 7.733952826085958e-07, + "loss": 0.7796640992164612, + "step": 5158 + }, + { + "epoch": 1.1887096774193548, + "grad_norm": 1.1079992534891525, + "learning_rate": 7.730242258398135e-07, + "loss": 0.9224779009819031, + "step": 5159 + }, + { + "epoch": 1.1889400921658986, + "grad_norm": 1.2013047291849663, + "learning_rate": 7.726532020162639e-07, + "loss": 0.7105277180671692, + "step": 5160 + }, + { + "epoch": 1.1891705069124423, + "grad_norm": 0.9139263319393289, + "learning_rate": 7.722822111918012e-07, + "loss": 0.5793930292129517, + "step": 5161 + }, + { + "epoch": 1.1894009216589863, + "grad_norm": 0.9419478266668957, + "learning_rate": 7.719112534202743e-07, + "loss": 0.7319367527961731, + "step": 5162 + }, + { + "epoch": 1.18963133640553, + "grad_norm": 1.182614737199728, + "learning_rate": 7.715403287555266e-07, + "loss": 0.7517954111099243, + "step": 5163 + }, + { + "epoch": 1.1898617511520737, + "grad_norm": 1.1800441614309307, + "learning_rate": 7.711694372513981e-07, + "loss": 0.8633241057395935, + "step": 5164 + }, + { + "epoch": 1.1900921658986174, + "grad_norm": 1.280920610105802, + "learning_rate": 7.707985789617227e-07, + "loss": 0.6453210115432739, + "step": 5165 + }, + { + "epoch": 1.1903225806451614, + "grad_norm": 1.1209224749220659, + "learning_rate": 7.704277539403303e-07, + "loss": 0.7609909772872925, + "step": 5166 + }, + { + "epoch": 1.1905529953917051, + "grad_norm": 1.1829891287159422, + "learning_rate": 7.700569622410453e-07, + "loss": 0.7419755458831787, + "step": 5167 + }, + { + "epoch": 1.1907834101382488, + "grad_norm": 1.0759571852853795, + "learning_rate": 7.696862039176879e-07, + "loss": 0.849078357219696, + "step": 5168 + }, + { + "epoch": 1.1910138248847926, + "grad_norm": 1.3077976619104341, + "learning_rate": 7.693154790240732e-07, + "loss": 0.8147921562194824, + "step": 5169 + }, + { + "epoch": 1.1912442396313363, + "grad_norm": 1.1349568865686221, + "learning_rate": 7.689447876140114e-07, + "loss": 0.7660118937492371, + "step": 5170 + }, + { + "epoch": 1.1914746543778802, + "grad_norm": 0.9919046297525586, + "learning_rate": 7.685741297413075e-07, + "loss": 0.7775185108184814, + "step": 5171 + }, + { + "epoch": 1.191705069124424, + "grad_norm": 1.0634336005518812, + "learning_rate": 7.682035054597624e-07, + "loss": 0.7184321880340576, + "step": 5172 + }, + { + "epoch": 1.1919354838709677, + "grad_norm": 0.9191067866194278, + "learning_rate": 7.678329148231719e-07, + "loss": 0.7108585834503174, + "step": 5173 + }, + { + "epoch": 1.1921658986175114, + "grad_norm": 1.169972531551494, + "learning_rate": 7.674623578853259e-07, + "loss": 0.7252670526504517, + "step": 5174 + }, + { + "epoch": 1.1923963133640554, + "grad_norm": 1.0227424567448893, + "learning_rate": 7.670918347000113e-07, + "loss": 0.818352460861206, + "step": 5175 + }, + { + "epoch": 1.192626728110599, + "grad_norm": 0.8768631462521176, + "learning_rate": 7.667213453210086e-07, + "loss": 0.6538013815879822, + "step": 5176 + }, + { + "epoch": 1.1928571428571428, + "grad_norm": 1.1216359209528128, + "learning_rate": 7.663508898020935e-07, + "loss": 0.7058148384094238, + "step": 5177 + }, + { + "epoch": 1.1930875576036866, + "grad_norm": 1.0528263608484594, + "learning_rate": 7.659804681970377e-07, + "loss": 0.7003160715103149, + "step": 5178 + }, + { + "epoch": 1.1933179723502305, + "grad_norm": 1.2339709506043992, + "learning_rate": 7.656100805596072e-07, + "loss": 0.84567791223526, + "step": 5179 + }, + { + "epoch": 1.1935483870967742, + "grad_norm": 1.239861543806107, + "learning_rate": 7.652397269435626e-07, + "loss": 0.7994743585586548, + "step": 5180 + }, + { + "epoch": 1.193778801843318, + "grad_norm": 1.3106444419652792, + "learning_rate": 7.648694074026615e-07, + "loss": 0.8177791833877563, + "step": 5181 + }, + { + "epoch": 1.1940092165898617, + "grad_norm": 1.362939104353802, + "learning_rate": 7.644991219906545e-07, + "loss": 0.6663975715637207, + "step": 5182 + }, + { + "epoch": 1.1942396313364054, + "grad_norm": 1.1422405746222943, + "learning_rate": 7.641288707612878e-07, + "loss": 0.8275883197784424, + "step": 5183 + }, + { + "epoch": 1.1944700460829494, + "grad_norm": 1.1201157873973466, + "learning_rate": 7.637586537683036e-07, + "loss": 0.7710767388343811, + "step": 5184 + }, + { + "epoch": 1.194700460829493, + "grad_norm": 1.1629669577400157, + "learning_rate": 7.633884710654382e-07, + "loss": 0.7628582715988159, + "step": 5185 + }, + { + "epoch": 1.1949308755760368, + "grad_norm": 1.3793540006541976, + "learning_rate": 7.630183227064227e-07, + "loss": 0.7002676725387573, + "step": 5186 + }, + { + "epoch": 1.1951612903225806, + "grad_norm": 0.9948455527839576, + "learning_rate": 7.626482087449841e-07, + "loss": 0.8272073268890381, + "step": 5187 + }, + { + "epoch": 1.1953917050691245, + "grad_norm": 1.0711227380559258, + "learning_rate": 7.622781292348435e-07, + "loss": 0.7881417274475098, + "step": 5188 + }, + { + "epoch": 1.1956221198156682, + "grad_norm": 1.0728428578693516, + "learning_rate": 7.61908084229718e-07, + "loss": 0.797294020652771, + "step": 5189 + }, + { + "epoch": 1.195852534562212, + "grad_norm": 1.0264450399364256, + "learning_rate": 7.615380737833191e-07, + "loss": 0.7752290964126587, + "step": 5190 + }, + { + "epoch": 1.1960829493087557, + "grad_norm": 1.0830464595218987, + "learning_rate": 7.611680979493525e-07, + "loss": 0.7299143075942993, + "step": 5191 + }, + { + "epoch": 1.1963133640552996, + "grad_norm": 1.4839567137751186, + "learning_rate": 7.60798156781521e-07, + "loss": 0.6749997138977051, + "step": 5192 + }, + { + "epoch": 1.1965437788018434, + "grad_norm": 1.2717197322235172, + "learning_rate": 7.6042825033352e-07, + "loss": 0.7933796048164368, + "step": 5193 + }, + { + "epoch": 1.196774193548387, + "grad_norm": 1.1254669600910374, + "learning_rate": 7.600583786590411e-07, + "loss": 0.7214919328689575, + "step": 5194 + }, + { + "epoch": 1.1970046082949308, + "grad_norm": 1.0000165841598083, + "learning_rate": 7.596885418117713e-07, + "loss": 0.7804256081581116, + "step": 5195 + }, + { + "epoch": 1.1972350230414746, + "grad_norm": 1.2738023107912249, + "learning_rate": 7.593187398453915e-07, + "loss": 0.7615138292312622, + "step": 5196 + }, + { + "epoch": 1.1974654377880185, + "grad_norm": 1.0493977127227612, + "learning_rate": 7.589489728135778e-07, + "loss": 0.8473657369613647, + "step": 5197 + }, + { + "epoch": 1.1976958525345622, + "grad_norm": 1.2204301678409606, + "learning_rate": 7.585792407700018e-07, + "loss": 0.7302027940750122, + "step": 5198 + }, + { + "epoch": 1.197926267281106, + "grad_norm": 1.123276567811957, + "learning_rate": 7.582095437683294e-07, + "loss": 0.7631692886352539, + "step": 5199 + }, + { + "epoch": 1.1981566820276497, + "grad_norm": 1.339389807954867, + "learning_rate": 7.578398818622211e-07, + "loss": 0.7982754707336426, + "step": 5200 + }, + { + "epoch": 1.1983870967741936, + "grad_norm": 1.3949436336418501, + "learning_rate": 7.574702551053339e-07, + "loss": 0.8445635437965393, + "step": 5201 + }, + { + "epoch": 1.1986175115207374, + "grad_norm": 1.267881130363425, + "learning_rate": 7.571006635513182e-07, + "loss": 0.8486276268959045, + "step": 5202 + }, + { + "epoch": 1.198847926267281, + "grad_norm": 1.2841422228776138, + "learning_rate": 7.567311072538191e-07, + "loss": 0.8433184623718262, + "step": 5203 + }, + { + "epoch": 1.1990783410138248, + "grad_norm": 1.5895945882971518, + "learning_rate": 7.56361586266478e-07, + "loss": 0.9772260189056396, + "step": 5204 + }, + { + "epoch": 1.1993087557603688, + "grad_norm": 1.1927959868338558, + "learning_rate": 7.559921006429304e-07, + "loss": 0.8349692821502686, + "step": 5205 + }, + { + "epoch": 1.1995391705069125, + "grad_norm": 1.070076083870323, + "learning_rate": 7.556226504368059e-07, + "loss": 0.7454575300216675, + "step": 5206 + }, + { + "epoch": 1.1997695852534562, + "grad_norm": 0.882927792535501, + "learning_rate": 7.552532357017303e-07, + "loss": 0.6680991649627686, + "step": 5207 + }, + { + "epoch": 1.2, + "grad_norm": 1.1844993546767875, + "learning_rate": 7.54883856491324e-07, + "loss": 0.6528318524360657, + "step": 5208 + }, + { + "epoch": 1.2002304147465437, + "grad_norm": 1.0482736751922475, + "learning_rate": 7.545145128592008e-07, + "loss": 0.7711834907531738, + "step": 5209 + }, + { + "epoch": 1.2004608294930876, + "grad_norm": 1.022603342926927, + "learning_rate": 7.541452048589714e-07, + "loss": 0.6378746628761292, + "step": 5210 + }, + { + "epoch": 1.2006912442396314, + "grad_norm": 0.9309859008896244, + "learning_rate": 7.537759325442402e-07, + "loss": 0.7489340305328369, + "step": 5211 + }, + { + "epoch": 1.200921658986175, + "grad_norm": 1.0825673838806515, + "learning_rate": 7.53406695968606e-07, + "loss": 0.7869534492492676, + "step": 5212 + }, + { + "epoch": 1.2011520737327188, + "grad_norm": 1.1316888770375757, + "learning_rate": 7.530374951856637e-07, + "loss": 0.7252482175827026, + "step": 5213 + }, + { + "epoch": 1.2013824884792628, + "grad_norm": 1.1337087819491523, + "learning_rate": 7.526683302490018e-07, + "loss": 0.763259768486023, + "step": 5214 + }, + { + "epoch": 1.2016129032258065, + "grad_norm": 1.405277715760194, + "learning_rate": 7.522992012122046e-07, + "loss": 0.8135688304901123, + "step": 5215 + }, + { + "epoch": 1.2018433179723502, + "grad_norm": 1.5589534049714566, + "learning_rate": 7.519301081288504e-07, + "loss": 0.9282290935516357, + "step": 5216 + }, + { + "epoch": 1.202073732718894, + "grad_norm": 1.2621340712897178, + "learning_rate": 7.515610510525125e-07, + "loss": 0.7968727946281433, + "step": 5217 + }, + { + "epoch": 1.202304147465438, + "grad_norm": 1.4154309582650375, + "learning_rate": 7.511920300367594e-07, + "loss": 0.9495606422424316, + "step": 5218 + }, + { + "epoch": 1.2025345622119816, + "grad_norm": 1.120709992771365, + "learning_rate": 7.508230451351537e-07, + "loss": 0.6790425181388855, + "step": 5219 + }, + { + "epoch": 1.2027649769585254, + "grad_norm": 1.1216778132469425, + "learning_rate": 7.504540964012527e-07, + "loss": 0.7269036173820496, + "step": 5220 + }, + { + "epoch": 1.202995391705069, + "grad_norm": 1.4394573291388193, + "learning_rate": 7.500851838886097e-07, + "loss": 0.820799708366394, + "step": 5221 + }, + { + "epoch": 1.2032258064516128, + "grad_norm": 1.1080457725700354, + "learning_rate": 7.497163076507715e-07, + "loss": 0.7693401575088501, + "step": 5222 + }, + { + "epoch": 1.2034562211981568, + "grad_norm": 1.1611837511561531, + "learning_rate": 7.493474677412793e-07, + "loss": 0.7687606811523438, + "step": 5223 + }, + { + "epoch": 1.2036866359447005, + "grad_norm": 0.9784122136232752, + "learning_rate": 7.489786642136709e-07, + "loss": 0.6858488321304321, + "step": 5224 + }, + { + "epoch": 1.2039170506912442, + "grad_norm": 0.8776412008252917, + "learning_rate": 7.486098971214769e-07, + "loss": 0.7575044631958008, + "step": 5225 + }, + { + "epoch": 1.204147465437788, + "grad_norm": 0.8129887936087057, + "learning_rate": 7.482411665182236e-07, + "loss": 0.6799627542495728, + "step": 5226 + }, + { + "epoch": 1.2043778801843317, + "grad_norm": 1.4994332488998736, + "learning_rate": 7.478724724574317e-07, + "loss": 0.8882759809494019, + "step": 5227 + }, + { + "epoch": 1.2046082949308756, + "grad_norm": 1.10750930167245, + "learning_rate": 7.475038149926165e-07, + "loss": 0.7835016250610352, + "step": 5228 + }, + { + "epoch": 1.2048387096774194, + "grad_norm": 1.3325922049902164, + "learning_rate": 7.471351941772883e-07, + "loss": 0.9264512062072754, + "step": 5229 + }, + { + "epoch": 1.205069124423963, + "grad_norm": 1.225862576818596, + "learning_rate": 7.467666100649521e-07, + "loss": 0.8094228506088257, + "step": 5230 + }, + { + "epoch": 1.205299539170507, + "grad_norm": 1.167425367358343, + "learning_rate": 7.463980627091073e-07, + "loss": 0.7782102823257446, + "step": 5231 + }, + { + "epoch": 1.2055299539170508, + "grad_norm": 1.2892161969383955, + "learning_rate": 7.460295521632474e-07, + "loss": 0.7946768999099731, + "step": 5232 + }, + { + "epoch": 1.2057603686635945, + "grad_norm": 1.2538288509415036, + "learning_rate": 7.456610784808624e-07, + "loss": 0.7571625709533691, + "step": 5233 + }, + { + "epoch": 1.2059907834101382, + "grad_norm": 1.3786667467707436, + "learning_rate": 7.45292641715435e-07, + "loss": 0.9760236144065857, + "step": 5234 + }, + { + "epoch": 1.206221198156682, + "grad_norm": 1.0717694328508904, + "learning_rate": 7.449242419204431e-07, + "loss": 0.6370055675506592, + "step": 5235 + }, + { + "epoch": 1.206451612903226, + "grad_norm": 1.226412390848778, + "learning_rate": 7.445558791493603e-07, + "loss": 0.7991320490837097, + "step": 5236 + }, + { + "epoch": 1.2066820276497696, + "grad_norm": 1.0607083796487833, + "learning_rate": 7.441875534556531e-07, + "loss": 0.8840054273605347, + "step": 5237 + }, + { + "epoch": 1.2069124423963133, + "grad_norm": 1.0615184698087237, + "learning_rate": 7.438192648927841e-07, + "loss": 0.8634533882141113, + "step": 5238 + }, + { + "epoch": 1.207142857142857, + "grad_norm": 0.9816687263450602, + "learning_rate": 7.434510135142098e-07, + "loss": 0.7081723213195801, + "step": 5239 + }, + { + "epoch": 1.2073732718894008, + "grad_norm": 1.1398058732045784, + "learning_rate": 7.430827993733808e-07, + "loss": 0.7160249352455139, + "step": 5240 + }, + { + "epoch": 1.2076036866359448, + "grad_norm": 0.8011837684152103, + "learning_rate": 7.427146225237438e-07, + "loss": 0.5323421955108643, + "step": 5241 + }, + { + "epoch": 1.2078341013824885, + "grad_norm": 1.0448270993907307, + "learning_rate": 7.423464830187386e-07, + "loss": 0.6439197063446045, + "step": 5242 + }, + { + "epoch": 1.2080645161290322, + "grad_norm": 1.2861588666790074, + "learning_rate": 7.419783809117999e-07, + "loss": 0.8268016576766968, + "step": 5243 + }, + { + "epoch": 1.2082949308755762, + "grad_norm": 1.0010661947708184, + "learning_rate": 7.416103162563582e-07, + "loss": 0.8115339279174805, + "step": 5244 + }, + { + "epoch": 1.2085253456221199, + "grad_norm": 1.05524382659239, + "learning_rate": 7.41242289105837e-07, + "loss": 0.8677197694778442, + "step": 5245 + }, + { + "epoch": 1.2087557603686636, + "grad_norm": 1.3337261104998102, + "learning_rate": 7.408742995136547e-07, + "loss": 0.7942948937416077, + "step": 5246 + }, + { + "epoch": 1.2089861751152073, + "grad_norm": 1.4261507552200647, + "learning_rate": 7.405063475332249e-07, + "loss": 0.8457766771316528, + "step": 5247 + }, + { + "epoch": 1.209216589861751, + "grad_norm": 1.2992145711475631, + "learning_rate": 7.401384332179552e-07, + "loss": 0.8463923931121826, + "step": 5248 + }, + { + "epoch": 1.209447004608295, + "grad_norm": 1.2576660242210724, + "learning_rate": 7.397705566212479e-07, + "loss": 0.9192875623703003, + "step": 5249 + }, + { + "epoch": 1.2096774193548387, + "grad_norm": 1.257257688865163, + "learning_rate": 7.394027177964999e-07, + "loss": 0.7461347579956055, + "step": 5250 + }, + { + "epoch": 1.2099078341013825, + "grad_norm": 1.150791607540225, + "learning_rate": 7.390349167971025e-07, + "loss": 0.6953321695327759, + "step": 5251 + }, + { + "epoch": 1.2101382488479262, + "grad_norm": 1.0284326235023098, + "learning_rate": 7.38667153676441e-07, + "loss": 0.7226089835166931, + "step": 5252 + }, + { + "epoch": 1.21036866359447, + "grad_norm": 0.8781484717910895, + "learning_rate": 7.382994284878967e-07, + "loss": 0.6746406555175781, + "step": 5253 + }, + { + "epoch": 1.2105990783410139, + "grad_norm": 1.109396083619457, + "learning_rate": 7.379317412848438e-07, + "loss": 0.7600215673446655, + "step": 5254 + }, + { + "epoch": 1.2108294930875576, + "grad_norm": 1.0821310147954002, + "learning_rate": 7.375640921206514e-07, + "loss": 0.7530734539031982, + "step": 5255 + }, + { + "epoch": 1.2110599078341013, + "grad_norm": 1.0572444642243028, + "learning_rate": 7.371964810486839e-07, + "loss": 0.8103033304214478, + "step": 5256 + }, + { + "epoch": 1.2112903225806453, + "grad_norm": 1.5370115848017, + "learning_rate": 7.368289081222994e-07, + "loss": 0.8916831016540527, + "step": 5257 + }, + { + "epoch": 1.211520737327189, + "grad_norm": 0.9972990737801745, + "learning_rate": 7.364613733948501e-07, + "loss": 0.6728129386901855, + "step": 5258 + }, + { + "epoch": 1.2117511520737327, + "grad_norm": 1.2459715050980873, + "learning_rate": 7.360938769196841e-07, + "loss": 0.8609380722045898, + "step": 5259 + }, + { + "epoch": 1.2119815668202765, + "grad_norm": 1.2704694196315967, + "learning_rate": 7.357264187501422e-07, + "loss": 0.9370373487472534, + "step": 5260 + }, + { + "epoch": 1.2122119815668202, + "grad_norm": 1.1080973982930933, + "learning_rate": 7.353589989395604e-07, + "loss": 0.6812434196472168, + "step": 5261 + }, + { + "epoch": 1.2124423963133641, + "grad_norm": 1.1917998982451765, + "learning_rate": 7.349916175412701e-07, + "loss": 0.7661731243133545, + "step": 5262 + }, + { + "epoch": 1.2126728110599079, + "grad_norm": 1.175052294784061, + "learning_rate": 7.346242746085951e-07, + "loss": 0.7306643128395081, + "step": 5263 + }, + { + "epoch": 1.2129032258064516, + "grad_norm": 1.2065862060559862, + "learning_rate": 7.34256970194856e-07, + "loss": 0.7189076542854309, + "step": 5264 + }, + { + "epoch": 1.2131336405529953, + "grad_norm": 0.8932044441494517, + "learning_rate": 7.338897043533656e-07, + "loss": 0.6935977935791016, + "step": 5265 + }, + { + "epoch": 1.213364055299539, + "grad_norm": 1.1224428177486496, + "learning_rate": 7.335224771374323e-07, + "loss": 0.8451323509216309, + "step": 5266 + }, + { + "epoch": 1.213594470046083, + "grad_norm": 1.1211043364668347, + "learning_rate": 7.331552886003589e-07, + "loss": 0.7936843037605286, + "step": 5267 + }, + { + "epoch": 1.2138248847926267, + "grad_norm": 1.1507587511456696, + "learning_rate": 7.327881387954418e-07, + "loss": 0.7989950776100159, + "step": 5268 + }, + { + "epoch": 1.2140552995391705, + "grad_norm": 1.1166217189865624, + "learning_rate": 7.324210277759726e-07, + "loss": 0.7579236030578613, + "step": 5269 + }, + { + "epoch": 1.2142857142857142, + "grad_norm": 1.1276787851795544, + "learning_rate": 7.320539555952372e-07, + "loss": 0.7101268768310547, + "step": 5270 + }, + { + "epoch": 1.2145161290322581, + "grad_norm": 1.0342829920040018, + "learning_rate": 7.316869223065155e-07, + "loss": 0.7920513153076172, + "step": 5271 + }, + { + "epoch": 1.2147465437788019, + "grad_norm": 1.4357028015234437, + "learning_rate": 7.313199279630814e-07, + "loss": 0.9241428375244141, + "step": 5272 + }, + { + "epoch": 1.2149769585253456, + "grad_norm": 1.1653282891915406, + "learning_rate": 7.309529726182044e-07, + "loss": 0.8278338313102722, + "step": 5273 + }, + { + "epoch": 1.2152073732718893, + "grad_norm": 0.9443953324177181, + "learning_rate": 7.305860563251473e-07, + "loss": 0.8230598568916321, + "step": 5274 + }, + { + "epoch": 1.2154377880184333, + "grad_norm": 0.9783962526324749, + "learning_rate": 7.302191791371672e-07, + "loss": 0.7791799902915955, + "step": 5275 + }, + { + "epoch": 1.215668202764977, + "grad_norm": 1.1070826926760935, + "learning_rate": 7.298523411075163e-07, + "loss": 0.705475926399231, + "step": 5276 + }, + { + "epoch": 1.2158986175115207, + "grad_norm": 1.2064718691511076, + "learning_rate": 7.294855422894406e-07, + "loss": 0.8078421354293823, + "step": 5277 + }, + { + "epoch": 1.2161290322580645, + "grad_norm": 1.2182160993977798, + "learning_rate": 7.2911878273618e-07, + "loss": 0.8115853667259216, + "step": 5278 + }, + { + "epoch": 1.2163594470046082, + "grad_norm": 1.0596504935928797, + "learning_rate": 7.287520625009698e-07, + "loss": 0.6917247772216797, + "step": 5279 + }, + { + "epoch": 1.2165898617511521, + "grad_norm": 1.0522660082790807, + "learning_rate": 7.283853816370386e-07, + "loss": 0.7131551504135132, + "step": 5280 + }, + { + "epoch": 1.2168202764976959, + "grad_norm": 0.9495683492221387, + "learning_rate": 7.280187401976093e-07, + "loss": 0.713994562625885, + "step": 5281 + }, + { + "epoch": 1.2170506912442396, + "grad_norm": 1.0845439765546743, + "learning_rate": 7.276521382359001e-07, + "loss": 0.7123454809188843, + "step": 5282 + }, + { + "epoch": 1.2172811059907833, + "grad_norm": 1.395671188469518, + "learning_rate": 7.272855758051226e-07, + "loss": 0.7805770635604858, + "step": 5283 + }, + { + "epoch": 1.2175115207373273, + "grad_norm": 0.9191020761831104, + "learning_rate": 7.269190529584823e-07, + "loss": 0.756670355796814, + "step": 5284 + }, + { + "epoch": 1.217741935483871, + "grad_norm": 0.9614002237797926, + "learning_rate": 7.265525697491804e-07, + "loss": 0.5992655754089355, + "step": 5285 + }, + { + "epoch": 1.2179723502304147, + "grad_norm": 1.1857893348181308, + "learning_rate": 7.26186126230411e-07, + "loss": 0.7552722692489624, + "step": 5286 + }, + { + "epoch": 1.2182027649769585, + "grad_norm": 1.3153742960319537, + "learning_rate": 7.258197224553627e-07, + "loss": 0.7189064025878906, + "step": 5287 + }, + { + "epoch": 1.2184331797235024, + "grad_norm": 1.115820306372996, + "learning_rate": 7.254533584772188e-07, + "loss": 0.8277319669723511, + "step": 5288 + }, + { + "epoch": 1.2186635944700461, + "grad_norm": 1.0584826489222536, + "learning_rate": 7.250870343491561e-07, + "loss": 0.6655987501144409, + "step": 5289 + }, + { + "epoch": 1.2188940092165899, + "grad_norm": 1.3888484350972408, + "learning_rate": 7.247207501243469e-07, + "loss": 0.8654178380966187, + "step": 5290 + }, + { + "epoch": 1.2191244239631336, + "grad_norm": 1.1781514985004269, + "learning_rate": 7.243545058559564e-07, + "loss": 0.9148486852645874, + "step": 5291 + }, + { + "epoch": 1.2193548387096773, + "grad_norm": 1.0525236851594717, + "learning_rate": 7.239883015971439e-07, + "loss": 0.8003618717193604, + "step": 5292 + }, + { + "epoch": 1.2195852534562213, + "grad_norm": 1.1614945814905475, + "learning_rate": 7.236221374010647e-07, + "loss": 0.7290889024734497, + "step": 5293 + }, + { + "epoch": 1.219815668202765, + "grad_norm": 0.963434252776205, + "learning_rate": 7.232560133208663e-07, + "loss": 0.5989147424697876, + "step": 5294 + }, + { + "epoch": 1.2200460829493087, + "grad_norm": 0.8766403983792901, + "learning_rate": 7.228899294096907e-07, + "loss": 0.8424522876739502, + "step": 5295 + }, + { + "epoch": 1.2202764976958524, + "grad_norm": 1.1686896205403536, + "learning_rate": 7.225238857206754e-07, + "loss": 0.7753746509552002, + "step": 5296 + }, + { + "epoch": 1.2205069124423964, + "grad_norm": 1.1424848742103464, + "learning_rate": 7.221578823069508e-07, + "loss": 0.693191647529602, + "step": 5297 + }, + { + "epoch": 1.2207373271889401, + "grad_norm": 1.177332636609729, + "learning_rate": 7.217919192216417e-07, + "loss": 0.7561964988708496, + "step": 5298 + }, + { + "epoch": 1.2209677419354839, + "grad_norm": 0.9927977088932712, + "learning_rate": 7.214259965178673e-07, + "loss": 0.7721199989318848, + "step": 5299 + }, + { + "epoch": 1.2211981566820276, + "grad_norm": 1.39798744468456, + "learning_rate": 7.210601142487407e-07, + "loss": 0.8100659251213074, + "step": 5300 + }, + { + "epoch": 1.2214285714285715, + "grad_norm": 1.0570396078634527, + "learning_rate": 7.206942724673688e-07, + "loss": 0.6753256916999817, + "step": 5301 + }, + { + "epoch": 1.2216589861751153, + "grad_norm": 1.1020954128293505, + "learning_rate": 7.20328471226854e-07, + "loss": 0.7534425854682922, + "step": 5302 + }, + { + "epoch": 1.221889400921659, + "grad_norm": 1.5962153366210945, + "learning_rate": 7.199627105802913e-07, + "loss": 0.8275027275085449, + "step": 5303 + }, + { + "epoch": 1.2221198156682027, + "grad_norm": 1.1431238814592317, + "learning_rate": 7.195969905807702e-07, + "loss": 0.728579580783844, + "step": 5304 + }, + { + "epoch": 1.2223502304147464, + "grad_norm": 1.1008777946014818, + "learning_rate": 7.192313112813749e-07, + "loss": 0.8221413493156433, + "step": 5305 + }, + { + "epoch": 1.2225806451612904, + "grad_norm": 1.0255386420970887, + "learning_rate": 7.188656727351832e-07, + "loss": 0.7819123268127441, + "step": 5306 + }, + { + "epoch": 1.2228110599078341, + "grad_norm": 1.1141595278176613, + "learning_rate": 7.185000749952666e-07, + "loss": 0.7474294900894165, + "step": 5307 + }, + { + "epoch": 1.2230414746543778, + "grad_norm": 1.4333018176649106, + "learning_rate": 7.181345181146919e-07, + "loss": 0.8072259426116943, + "step": 5308 + }, + { + "epoch": 1.2232718894009216, + "grad_norm": 1.3449246489382425, + "learning_rate": 7.177690021465184e-07, + "loss": 0.8718069791793823, + "step": 5309 + }, + { + "epoch": 1.2235023041474655, + "grad_norm": 1.1090181258933243, + "learning_rate": 7.174035271438006e-07, + "loss": 0.8374875783920288, + "step": 5310 + }, + { + "epoch": 1.2237327188940093, + "grad_norm": 1.2085386756305507, + "learning_rate": 7.170380931595869e-07, + "loss": 0.6669566631317139, + "step": 5311 + }, + { + "epoch": 1.223963133640553, + "grad_norm": 1.1706882886588135, + "learning_rate": 7.16672700246919e-07, + "loss": 0.8735665678977966, + "step": 5312 + }, + { + "epoch": 1.2241935483870967, + "grad_norm": 1.1826163019402958, + "learning_rate": 7.16307348458834e-07, + "loss": 0.8312361240386963, + "step": 5313 + }, + { + "epoch": 1.2244239631336407, + "grad_norm": 1.1102424714986416, + "learning_rate": 7.159420378483619e-07, + "loss": 0.7927724123001099, + "step": 5314 + }, + { + "epoch": 1.2246543778801844, + "grad_norm": 1.0527049283172933, + "learning_rate": 7.155767684685264e-07, + "loss": 0.7641698122024536, + "step": 5315 + }, + { + "epoch": 1.2248847926267281, + "grad_norm": 1.0508850668326304, + "learning_rate": 7.15211540372347e-07, + "loss": 0.7490028142929077, + "step": 5316 + }, + { + "epoch": 1.2251152073732718, + "grad_norm": 1.0604993776512237, + "learning_rate": 7.148463536128354e-07, + "loss": 0.7194815874099731, + "step": 5317 + }, + { + "epoch": 1.2253456221198156, + "grad_norm": 1.2779756064695784, + "learning_rate": 7.144812082429979e-07, + "loss": 0.8328256607055664, + "step": 5318 + }, + { + "epoch": 1.2255760368663595, + "grad_norm": 1.1539197608232337, + "learning_rate": 7.141161043158352e-07, + "loss": 0.9124876260757446, + "step": 5319 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 1.346989410896588, + "learning_rate": 7.137510418843416e-07, + "loss": 0.8183319568634033, + "step": 5320 + }, + { + "epoch": 1.226036866359447, + "grad_norm": 1.0902088619882297, + "learning_rate": 7.133860210015048e-07, + "loss": 0.8423885107040405, + "step": 5321 + }, + { + "epoch": 1.2262672811059907, + "grad_norm": 1.064962271727849, + "learning_rate": 7.130210417203082e-07, + "loss": 0.8175387382507324, + "step": 5322 + }, + { + "epoch": 1.2264976958525347, + "grad_norm": 1.0111617635250245, + "learning_rate": 7.126561040937274e-07, + "loss": 0.8415048718452454, + "step": 5323 + }, + { + "epoch": 1.2267281105990784, + "grad_norm": 1.4241774929740556, + "learning_rate": 7.122912081747321e-07, + "loss": 0.6891156435012817, + "step": 5324 + }, + { + "epoch": 1.226958525345622, + "grad_norm": 1.1236132104045742, + "learning_rate": 7.119263540162876e-07, + "loss": 0.667617678642273, + "step": 5325 + }, + { + "epoch": 1.2271889400921658, + "grad_norm": 1.21591291521647, + "learning_rate": 7.115615416713517e-07, + "loss": 0.7752082347869873, + "step": 5326 + }, + { + "epoch": 1.2274193548387098, + "grad_norm": 1.0094697644265302, + "learning_rate": 7.111967711928757e-07, + "loss": 0.6582639813423157, + "step": 5327 + }, + { + "epoch": 1.2276497695852535, + "grad_norm": 0.9823209869062589, + "learning_rate": 7.108320426338063e-07, + "loss": 0.6996462345123291, + "step": 5328 + }, + { + "epoch": 1.2278801843317972, + "grad_norm": 1.1364634127826816, + "learning_rate": 7.104673560470828e-07, + "loss": 0.7132028341293335, + "step": 5329 + }, + { + "epoch": 1.228110599078341, + "grad_norm": 1.1959075580849723, + "learning_rate": 7.101027114856395e-07, + "loss": 0.7344096899032593, + "step": 5330 + }, + { + "epoch": 1.2283410138248847, + "grad_norm": 1.2810764573761082, + "learning_rate": 7.097381090024039e-07, + "loss": 0.7805585861206055, + "step": 5331 + }, + { + "epoch": 1.2285714285714286, + "grad_norm": 1.2310137220528714, + "learning_rate": 7.093735486502976e-07, + "loss": 0.6785855889320374, + "step": 5332 + }, + { + "epoch": 1.2288018433179724, + "grad_norm": 1.3226389203047557, + "learning_rate": 7.090090304822355e-07, + "loss": 0.7465041875839233, + "step": 5333 + }, + { + "epoch": 1.229032258064516, + "grad_norm": 1.0465247410006058, + "learning_rate": 7.086445545511278e-07, + "loss": 0.7400432825088501, + "step": 5334 + }, + { + "epoch": 1.2292626728110598, + "grad_norm": 0.9732969942350592, + "learning_rate": 7.082801209098774e-07, + "loss": 0.8567768335342407, + "step": 5335 + }, + { + "epoch": 1.2294930875576038, + "grad_norm": 1.133102602749406, + "learning_rate": 7.079157296113807e-07, + "loss": 0.7451025247573853, + "step": 5336 + }, + { + "epoch": 1.2297235023041475, + "grad_norm": 1.2953309888801026, + "learning_rate": 7.075513807085299e-07, + "loss": 0.7178194522857666, + "step": 5337 + }, + { + "epoch": 1.2299539170506912, + "grad_norm": 1.114794382407599, + "learning_rate": 7.071870742542086e-07, + "loss": 0.7538058161735535, + "step": 5338 + }, + { + "epoch": 1.230184331797235, + "grad_norm": 1.2706015052011863, + "learning_rate": 7.068228103012959e-07, + "loss": 0.7853896021842957, + "step": 5339 + }, + { + "epoch": 1.230414746543779, + "grad_norm": 1.6145088717882257, + "learning_rate": 7.064585889026644e-07, + "loss": 0.9359887838363647, + "step": 5340 + }, + { + "epoch": 1.2306451612903226, + "grad_norm": 1.2876289498435494, + "learning_rate": 7.060944101111797e-07, + "loss": 0.8590530753135681, + "step": 5341 + }, + { + "epoch": 1.2308755760368664, + "grad_norm": 1.0245387562303532, + "learning_rate": 7.057302739797025e-07, + "loss": 0.7047204971313477, + "step": 5342 + }, + { + "epoch": 1.23110599078341, + "grad_norm": 1.3069544437359595, + "learning_rate": 7.053661805610867e-07, + "loss": 0.8826072216033936, + "step": 5343 + }, + { + "epoch": 1.2313364055299538, + "grad_norm": 1.2593962984780245, + "learning_rate": 7.050021299081792e-07, + "loss": 0.9394192695617676, + "step": 5344 + }, + { + "epoch": 1.2315668202764978, + "grad_norm": 1.1109567819341923, + "learning_rate": 7.046381220738224e-07, + "loss": 0.7814885377883911, + "step": 5345 + }, + { + "epoch": 1.2317972350230415, + "grad_norm": 1.1819250736895568, + "learning_rate": 7.042741571108512e-07, + "loss": 0.781699538230896, + "step": 5346 + }, + { + "epoch": 1.2320276497695852, + "grad_norm": 1.1116588757864085, + "learning_rate": 7.039102350720946e-07, + "loss": 0.6554632186889648, + "step": 5347 + }, + { + "epoch": 1.232258064516129, + "grad_norm": 0.9564548780258206, + "learning_rate": 7.035463560103753e-07, + "loss": 0.6449903249740601, + "step": 5348 + }, + { + "epoch": 1.2324884792626727, + "grad_norm": 1.3130676696714008, + "learning_rate": 7.031825199785101e-07, + "loss": 0.8222958445549011, + "step": 5349 + }, + { + "epoch": 1.2327188940092166, + "grad_norm": 1.073654969776922, + "learning_rate": 7.02818727029309e-07, + "loss": 0.8315533399581909, + "step": 5350 + }, + { + "epoch": 1.2329493087557604, + "grad_norm": 0.9980466179862664, + "learning_rate": 7.024549772155764e-07, + "loss": 0.8065732717514038, + "step": 5351 + }, + { + "epoch": 1.233179723502304, + "grad_norm": 1.3823215182318742, + "learning_rate": 7.020912705901101e-07, + "loss": 0.7607216835021973, + "step": 5352 + }, + { + "epoch": 1.233410138248848, + "grad_norm": 1.3000097773568569, + "learning_rate": 7.01727607205701e-07, + "loss": 0.877311110496521, + "step": 5353 + }, + { + "epoch": 1.2336405529953918, + "grad_norm": 1.1855641794195606, + "learning_rate": 7.013639871151354e-07, + "loss": 0.7352526187896729, + "step": 5354 + }, + { + "epoch": 1.2338709677419355, + "grad_norm": 1.1123782494693044, + "learning_rate": 7.010004103711915e-07, + "loss": 0.7676074504852295, + "step": 5355 + }, + { + "epoch": 1.2341013824884792, + "grad_norm": 1.1035546011135826, + "learning_rate": 7.00636877026642e-07, + "loss": 0.7802003622055054, + "step": 5356 + }, + { + "epoch": 1.234331797235023, + "grad_norm": 1.0576568317960378, + "learning_rate": 7.002733871342537e-07, + "loss": 0.747033953666687, + "step": 5357 + }, + { + "epoch": 1.234562211981567, + "grad_norm": 1.1565555542506367, + "learning_rate": 6.999099407467865e-07, + "loss": 0.8086956739425659, + "step": 5358 + }, + { + "epoch": 1.2347926267281106, + "grad_norm": 1.450692015608809, + "learning_rate": 6.995465379169941e-07, + "loss": 0.9362099170684814, + "step": 5359 + }, + { + "epoch": 1.2350230414746544, + "grad_norm": 1.0699993470783844, + "learning_rate": 6.991831786976241e-07, + "loss": 0.6784812211990356, + "step": 5360 + }, + { + "epoch": 1.235253456221198, + "grad_norm": 1.0206889971672557, + "learning_rate": 6.988198631414171e-07, + "loss": 0.7733708620071411, + "step": 5361 + }, + { + "epoch": 1.2354838709677418, + "grad_norm": 1.1745502344238163, + "learning_rate": 6.984565913011087e-07, + "loss": 0.8747115135192871, + "step": 5362 + }, + { + "epoch": 1.2357142857142858, + "grad_norm": 1.0659966645754941, + "learning_rate": 6.980933632294268e-07, + "loss": 0.6947430372238159, + "step": 5363 + }, + { + "epoch": 1.2359447004608295, + "grad_norm": 1.206089262306805, + "learning_rate": 6.97730178979093e-07, + "loss": 0.7128404378890991, + "step": 5364 + }, + { + "epoch": 1.2361751152073732, + "grad_norm": 1.1120167642627505, + "learning_rate": 6.973670386028242e-07, + "loss": 0.7190830707550049, + "step": 5365 + }, + { + "epoch": 1.2364055299539172, + "grad_norm": 1.1367562157166997, + "learning_rate": 6.970039421533291e-07, + "loss": 0.7625770568847656, + "step": 5366 + }, + { + "epoch": 1.236635944700461, + "grad_norm": 1.109720416461976, + "learning_rate": 6.966408896833104e-07, + "loss": 0.7942707538604736, + "step": 5367 + }, + { + "epoch": 1.2368663594470046, + "grad_norm": 1.2413354296268997, + "learning_rate": 6.962778812454652e-07, + "loss": 0.8329455852508545, + "step": 5368 + }, + { + "epoch": 1.2370967741935484, + "grad_norm": 0.8823115581397621, + "learning_rate": 6.959149168924833e-07, + "loss": 0.6034290790557861, + "step": 5369 + }, + { + "epoch": 1.237327188940092, + "grad_norm": 1.1119487486974622, + "learning_rate": 6.955519966770486e-07, + "loss": 0.8424680233001709, + "step": 5370 + }, + { + "epoch": 1.237557603686636, + "grad_norm": 1.4443979353165184, + "learning_rate": 6.951891206518388e-07, + "loss": 0.8670322895050049, + "step": 5371 + }, + { + "epoch": 1.2377880184331798, + "grad_norm": 1.2577295715670245, + "learning_rate": 6.948262888695244e-07, + "loss": 0.7283621430397034, + "step": 5372 + }, + { + "epoch": 1.2380184331797235, + "grad_norm": 1.1772858057268798, + "learning_rate": 6.9446350138277e-07, + "loss": 0.7990118265151978, + "step": 5373 + }, + { + "epoch": 1.2382488479262672, + "grad_norm": 1.3359682917878526, + "learning_rate": 6.941007582442342e-07, + "loss": 0.945558488368988, + "step": 5374 + }, + { + "epoch": 1.238479262672811, + "grad_norm": 1.186182272846314, + "learning_rate": 6.937380595065685e-07, + "loss": 0.6905936002731323, + "step": 5375 + }, + { + "epoch": 1.238709677419355, + "grad_norm": 1.1665515184197677, + "learning_rate": 6.933754052224176e-07, + "loss": 0.7757662534713745, + "step": 5376 + }, + { + "epoch": 1.2389400921658986, + "grad_norm": 1.1107589407670702, + "learning_rate": 6.930127954444209e-07, + "loss": 0.63062584400177, + "step": 5377 + }, + { + "epoch": 1.2391705069124423, + "grad_norm": 1.2453155093106256, + "learning_rate": 6.926502302252109e-07, + "loss": 0.7341021299362183, + "step": 5378 + }, + { + "epoch": 1.2394009216589863, + "grad_norm": 0.9019761448377311, + "learning_rate": 6.922877096174127e-07, + "loss": 0.572767972946167, + "step": 5379 + }, + { + "epoch": 1.23963133640553, + "grad_norm": 1.274761976544521, + "learning_rate": 6.919252336736463e-07, + "loss": 0.630276083946228, + "step": 5380 + }, + { + "epoch": 1.2398617511520738, + "grad_norm": 1.0769631455551745, + "learning_rate": 6.915628024465244e-07, + "loss": 0.668334424495697, + "step": 5381 + }, + { + "epoch": 1.2400921658986175, + "grad_norm": 0.9444198657704267, + "learning_rate": 6.912004159886529e-07, + "loss": 0.6766513586044312, + "step": 5382 + }, + { + "epoch": 1.2403225806451612, + "grad_norm": 1.3884668691330446, + "learning_rate": 6.908380743526328e-07, + "loss": 0.7016473412513733, + "step": 5383 + }, + { + "epoch": 1.2405529953917052, + "grad_norm": 1.378738366714881, + "learning_rate": 6.904757775910568e-07, + "loss": 0.8837979435920715, + "step": 5384 + }, + { + "epoch": 1.2407834101382489, + "grad_norm": 0.9305030195638431, + "learning_rate": 6.901135257565116e-07, + "loss": 0.7187714576721191, + "step": 5385 + }, + { + "epoch": 1.2410138248847926, + "grad_norm": 1.0935814864632027, + "learning_rate": 6.897513189015782e-07, + "loss": 0.8227157592773438, + "step": 5386 + }, + { + "epoch": 1.2412442396313363, + "grad_norm": 1.278600897043475, + "learning_rate": 6.893891570788301e-07, + "loss": 0.8812209367752075, + "step": 5387 + }, + { + "epoch": 1.24147465437788, + "grad_norm": 1.0426681195674332, + "learning_rate": 6.890270403408348e-07, + "loss": 0.6702297925949097, + "step": 5388 + }, + { + "epoch": 1.241705069124424, + "grad_norm": 1.1718249382850798, + "learning_rate": 6.886649687401529e-07, + "loss": 0.646358847618103, + "step": 5389 + }, + { + "epoch": 1.2419354838709677, + "grad_norm": 1.1131010301922042, + "learning_rate": 6.883029423293383e-07, + "loss": 0.6514080762863159, + "step": 5390 + }, + { + "epoch": 1.2421658986175115, + "grad_norm": 1.0826812738863971, + "learning_rate": 6.879409611609393e-07, + "loss": 0.6938437819480896, + "step": 5391 + }, + { + "epoch": 1.2423963133640552, + "grad_norm": 1.3710627721954263, + "learning_rate": 6.875790252874967e-07, + "loss": 0.8601399064064026, + "step": 5392 + }, + { + "epoch": 1.2426267281105992, + "grad_norm": 1.1590300352526421, + "learning_rate": 6.872171347615445e-07, + "loss": 0.6641080379486084, + "step": 5393 + }, + { + "epoch": 1.2428571428571429, + "grad_norm": 1.0046628491787142, + "learning_rate": 6.868552896356117e-07, + "loss": 0.7109012603759766, + "step": 5394 + }, + { + "epoch": 1.2430875576036866, + "grad_norm": 1.261042767669179, + "learning_rate": 6.864934899622191e-07, + "loss": 0.8558728694915771, + "step": 5395 + }, + { + "epoch": 1.2433179723502303, + "grad_norm": 1.1243133400823155, + "learning_rate": 6.861317357938807e-07, + "loss": 0.6119382977485657, + "step": 5396 + }, + { + "epoch": 1.2435483870967743, + "grad_norm": 1.2850449121793286, + "learning_rate": 6.857700271831059e-07, + "loss": 0.7527587413787842, + "step": 5397 + }, + { + "epoch": 1.243778801843318, + "grad_norm": 1.3104214277299573, + "learning_rate": 6.854083641823957e-07, + "loss": 0.8082761168479919, + "step": 5398 + }, + { + "epoch": 1.2440092165898617, + "grad_norm": 1.0664271007055484, + "learning_rate": 6.850467468442447e-07, + "loss": 0.7289307117462158, + "step": 5399 + }, + { + "epoch": 1.2442396313364055, + "grad_norm": 1.2684124709337747, + "learning_rate": 6.846851752211418e-07, + "loss": 0.8824148178100586, + "step": 5400 + }, + { + "epoch": 1.2444700460829492, + "grad_norm": 1.2011621536911168, + "learning_rate": 6.843236493655682e-07, + "loss": 0.7046724557876587, + "step": 5401 + }, + { + "epoch": 1.2447004608294931, + "grad_norm": 1.0456601321771188, + "learning_rate": 6.839621693299987e-07, + "loss": 0.8192921876907349, + "step": 5402 + }, + { + "epoch": 1.2449308755760369, + "grad_norm": 1.1031705508374716, + "learning_rate": 6.83600735166902e-07, + "loss": 0.7651070356369019, + "step": 5403 + }, + { + "epoch": 1.2451612903225806, + "grad_norm": 1.10155120943284, + "learning_rate": 6.832393469287401e-07, + "loss": 0.7689340114593506, + "step": 5404 + }, + { + "epoch": 1.2453917050691243, + "grad_norm": 1.438313566898243, + "learning_rate": 6.828780046679671e-07, + "loss": 0.9214832782745361, + "step": 5405 + }, + { + "epoch": 1.2456221198156683, + "grad_norm": 1.1160237214981186, + "learning_rate": 6.825167084370322e-07, + "loss": 0.7210682034492493, + "step": 5406 + }, + { + "epoch": 1.245852534562212, + "grad_norm": 1.1608936823977416, + "learning_rate": 6.82155458288377e-07, + "loss": 0.871317446231842, + "step": 5407 + }, + { + "epoch": 1.2460829493087557, + "grad_norm": 1.2750147741770517, + "learning_rate": 6.817942542744359e-07, + "loss": 0.7669065594673157, + "step": 5408 + }, + { + "epoch": 1.2463133640552995, + "grad_norm": 1.0693548196930358, + "learning_rate": 6.814330964476379e-07, + "loss": 0.7317448854446411, + "step": 5409 + }, + { + "epoch": 1.2465437788018434, + "grad_norm": 1.2936969678285373, + "learning_rate": 6.810719848604036e-07, + "loss": 0.7873220443725586, + "step": 5410 + }, + { + "epoch": 1.2467741935483871, + "grad_norm": 1.2973675980536, + "learning_rate": 6.807109195651492e-07, + "loss": 0.713294267654419, + "step": 5411 + }, + { + "epoch": 1.2470046082949309, + "grad_norm": 1.2551238151306954, + "learning_rate": 6.803499006142819e-07, + "loss": 0.7592979669570923, + "step": 5412 + }, + { + "epoch": 1.2472350230414746, + "grad_norm": 1.3113983649465133, + "learning_rate": 6.79988928060203e-07, + "loss": 0.7805737257003784, + "step": 5413 + }, + { + "epoch": 1.2474654377880183, + "grad_norm": 0.8180058983934718, + "learning_rate": 6.79628001955308e-07, + "loss": 0.7706440687179565, + "step": 5414 + }, + { + "epoch": 1.2476958525345623, + "grad_norm": 1.3696824329137627, + "learning_rate": 6.792671223519844e-07, + "loss": 0.772534966468811, + "step": 5415 + }, + { + "epoch": 1.247926267281106, + "grad_norm": 1.2283026355612159, + "learning_rate": 6.789062893026129e-07, + "loss": 0.7939096093177795, + "step": 5416 + }, + { + "epoch": 1.2481566820276497, + "grad_norm": 1.263037130888269, + "learning_rate": 6.78545502859569e-07, + "loss": 0.7062902450561523, + "step": 5417 + }, + { + "epoch": 1.2483870967741935, + "grad_norm": 1.042353004558378, + "learning_rate": 6.781847630752197e-07, + "loss": 0.8296496868133545, + "step": 5418 + }, + { + "epoch": 1.2486175115207374, + "grad_norm": 1.4186103660131706, + "learning_rate": 6.778240700019258e-07, + "loss": 0.926125168800354, + "step": 5419 + }, + { + "epoch": 1.2488479262672811, + "grad_norm": 1.1816532525816696, + "learning_rate": 6.774634236920419e-07, + "loss": 0.7301739454269409, + "step": 5420 + }, + { + "epoch": 1.2490783410138249, + "grad_norm": 1.366957713339659, + "learning_rate": 6.771028241979151e-07, + "loss": 0.7313426733016968, + "step": 5421 + }, + { + "epoch": 1.2493087557603686, + "grad_norm": 0.9539446793763906, + "learning_rate": 6.767422715718853e-07, + "loss": 0.7193025946617126, + "step": 5422 + }, + { + "epoch": 1.2495391705069125, + "grad_norm": 1.1735826178809459, + "learning_rate": 6.763817658662874e-07, + "loss": 0.6544638872146606, + "step": 5423 + }, + { + "epoch": 1.2497695852534563, + "grad_norm": 1.1828661707349362, + "learning_rate": 6.760213071334478e-07, + "loss": 0.8402822613716125, + "step": 5424 + }, + { + "epoch": 1.25, + "grad_norm": 1.1854670368859663, + "learning_rate": 6.756608954256861e-07, + "loss": 0.6840100288391113, + "step": 5425 + }, + { + "epoch": 1.2502304147465437, + "grad_norm": 1.1842873946027908, + "learning_rate": 6.753005307953165e-07, + "loss": 0.7315107583999634, + "step": 5426 + }, + { + "epoch": 1.2504608294930875, + "grad_norm": 0.9743094512393712, + "learning_rate": 6.74940213294645e-07, + "loss": 0.6369785070419312, + "step": 5427 + }, + { + "epoch": 1.2506912442396314, + "grad_norm": 1.0769824502789231, + "learning_rate": 6.745799429759711e-07, + "loss": 0.7700424790382385, + "step": 5428 + }, + { + "epoch": 1.2509216589861751, + "grad_norm": 1.2719323162039158, + "learning_rate": 6.742197198915877e-07, + "loss": 0.7436221241950989, + "step": 5429 + }, + { + "epoch": 1.2511520737327189, + "grad_norm": 1.235326047289827, + "learning_rate": 6.738595440937809e-07, + "loss": 0.8028342723846436, + "step": 5430 + }, + { + "epoch": 1.2513824884792628, + "grad_norm": 1.1651221420823998, + "learning_rate": 6.734994156348288e-07, + "loss": 0.7705515623092651, + "step": 5431 + }, + { + "epoch": 1.2516129032258063, + "grad_norm": 1.509633589240068, + "learning_rate": 6.73139334567005e-07, + "loss": 0.7110899686813354, + "step": 5432 + }, + { + "epoch": 1.2518433179723503, + "grad_norm": 1.0701201128505256, + "learning_rate": 6.727793009425739e-07, + "loss": 0.7495337128639221, + "step": 5433 + }, + { + "epoch": 1.252073732718894, + "grad_norm": 1.1393040143384143, + "learning_rate": 6.724193148137938e-07, + "loss": 0.7735337018966675, + "step": 5434 + }, + { + "epoch": 1.2523041474654377, + "grad_norm": 1.5709409365174263, + "learning_rate": 6.720593762329167e-07, + "loss": 0.8655617237091064, + "step": 5435 + }, + { + "epoch": 1.2525345622119817, + "grad_norm": 1.0969772466203969, + "learning_rate": 6.716994852521871e-07, + "loss": 0.7989616394042969, + "step": 5436 + }, + { + "epoch": 1.2527649769585254, + "grad_norm": 1.2186152186967236, + "learning_rate": 6.713396419238424e-07, + "loss": 0.8090296983718872, + "step": 5437 + }, + { + "epoch": 1.2529953917050691, + "grad_norm": 1.175751705980128, + "learning_rate": 6.709798463001138e-07, + "loss": 0.7150726318359375, + "step": 5438 + }, + { + "epoch": 1.2532258064516129, + "grad_norm": 1.1350361891486582, + "learning_rate": 6.706200984332249e-07, + "loss": 0.7136287689208984, + "step": 5439 + }, + { + "epoch": 1.2534562211981566, + "grad_norm": 1.2991395376590593, + "learning_rate": 6.702603983753927e-07, + "loss": 0.8538687229156494, + "step": 5440 + }, + { + "epoch": 1.2536866359447005, + "grad_norm": 1.5253402941485412, + "learning_rate": 6.699007461788272e-07, + "loss": 0.7960666418075562, + "step": 5441 + }, + { + "epoch": 1.2539170506912443, + "grad_norm": 0.9539757778238315, + "learning_rate": 6.695411418957309e-07, + "loss": 0.7462595701217651, + "step": 5442 + }, + { + "epoch": 1.254147465437788, + "grad_norm": 1.482445221768143, + "learning_rate": 6.691815855783009e-07, + "loss": 0.795913577079773, + "step": 5443 + }, + { + "epoch": 1.2543778801843317, + "grad_norm": 1.071717267875031, + "learning_rate": 6.688220772787258e-07, + "loss": 0.7589330077171326, + "step": 5444 + }, + { + "epoch": 1.2546082949308754, + "grad_norm": 1.4795497320121442, + "learning_rate": 6.684626170491874e-07, + "loss": 0.7719615697860718, + "step": 5445 + }, + { + "epoch": 1.2548387096774194, + "grad_norm": 1.06581311441289, + "learning_rate": 6.681032049418616e-07, + "loss": 0.8516664505004883, + "step": 5446 + }, + { + "epoch": 1.2550691244239631, + "grad_norm": 1.466555451116343, + "learning_rate": 6.677438410089163e-07, + "loss": 0.8597210049629211, + "step": 5447 + }, + { + "epoch": 1.2552995391705069, + "grad_norm": 1.2172979010742704, + "learning_rate": 6.673845253025124e-07, + "loss": 0.7101171016693115, + "step": 5448 + }, + { + "epoch": 1.2555299539170508, + "grad_norm": 1.105900547055049, + "learning_rate": 6.670252578748044e-07, + "loss": 0.6946178078651428, + "step": 5449 + }, + { + "epoch": 1.2557603686635945, + "grad_norm": 1.687580161954866, + "learning_rate": 6.666660387779395e-07, + "loss": 0.9912126660346985, + "step": 5450 + }, + { + "epoch": 1.2559907834101383, + "grad_norm": 1.087382323913162, + "learning_rate": 6.663068680640573e-07, + "loss": 0.6495379209518433, + "step": 5451 + }, + { + "epoch": 1.256221198156682, + "grad_norm": 1.0213661473677353, + "learning_rate": 6.65947745785292e-07, + "loss": 0.6276426315307617, + "step": 5452 + }, + { + "epoch": 1.2564516129032257, + "grad_norm": 1.082562870265783, + "learning_rate": 6.655886719937691e-07, + "loss": 0.7273461818695068, + "step": 5453 + }, + { + "epoch": 1.2566820276497697, + "grad_norm": 1.258671733492057, + "learning_rate": 6.652296467416073e-07, + "loss": 0.8248249292373657, + "step": 5454 + }, + { + "epoch": 1.2569124423963134, + "grad_norm": 1.2124691152915896, + "learning_rate": 6.648706700809196e-07, + "loss": 0.8709753751754761, + "step": 5455 + }, + { + "epoch": 1.2571428571428571, + "grad_norm": 1.4025604957471465, + "learning_rate": 6.645117420638105e-07, + "loss": 0.8207283020019531, + "step": 5456 + }, + { + "epoch": 1.2573732718894008, + "grad_norm": 1.0867491150840567, + "learning_rate": 6.641528627423774e-07, + "loss": 0.8222801685333252, + "step": 5457 + }, + { + "epoch": 1.2576036866359446, + "grad_norm": 1.0891862457945214, + "learning_rate": 6.637940321687121e-07, + "loss": 0.7684904336929321, + "step": 5458 + }, + { + "epoch": 1.2578341013824885, + "grad_norm": 1.106565522930133, + "learning_rate": 6.634352503948979e-07, + "loss": 0.7930517196655273, + "step": 5459 + }, + { + "epoch": 1.2580645161290323, + "grad_norm": 1.255727738748605, + "learning_rate": 6.630765174730116e-07, + "loss": 0.7414563298225403, + "step": 5460 + }, + { + "epoch": 1.258294930875576, + "grad_norm": 1.0415923536335177, + "learning_rate": 6.627178334551227e-07, + "loss": 0.7959232926368713, + "step": 5461 + }, + { + "epoch": 1.25852534562212, + "grad_norm": 1.2823788828450395, + "learning_rate": 6.623591983932935e-07, + "loss": 0.6722866296768188, + "step": 5462 + }, + { + "epoch": 1.2587557603686637, + "grad_norm": 1.0428819037253236, + "learning_rate": 6.620006123395799e-07, + "loss": 0.7688727378845215, + "step": 5463 + }, + { + "epoch": 1.2589861751152074, + "grad_norm": 1.1454091886933473, + "learning_rate": 6.616420753460301e-07, + "loss": 0.7543724179267883, + "step": 5464 + }, + { + "epoch": 1.2592165898617511, + "grad_norm": 1.3156243556780545, + "learning_rate": 6.612835874646847e-07, + "loss": 0.7097430229187012, + "step": 5465 + }, + { + "epoch": 1.2594470046082948, + "grad_norm": 1.1699591097632744, + "learning_rate": 6.609251487475786e-07, + "loss": 0.8640443682670593, + "step": 5466 + }, + { + "epoch": 1.2596774193548388, + "grad_norm": 1.4552439697890553, + "learning_rate": 6.605667592467384e-07, + "loss": 0.7872523069381714, + "step": 5467 + }, + { + "epoch": 1.2599078341013825, + "grad_norm": 1.3601390048962447, + "learning_rate": 6.602084190141835e-07, + "loss": 0.8647557497024536, + "step": 5468 + }, + { + "epoch": 1.2601382488479262, + "grad_norm": 0.9953963267515464, + "learning_rate": 6.598501281019268e-07, + "loss": 0.7323553562164307, + "step": 5469 + }, + { + "epoch": 1.26036866359447, + "grad_norm": 1.2478057023441294, + "learning_rate": 6.594918865619739e-07, + "loss": 0.8214852809906006, + "step": 5470 + }, + { + "epoch": 1.2605990783410137, + "grad_norm": 1.1743890995374524, + "learning_rate": 6.591336944463223e-07, + "loss": 0.8011265397071838, + "step": 5471 + }, + { + "epoch": 1.2608294930875577, + "grad_norm": 0.9651307194588488, + "learning_rate": 6.587755518069642e-07, + "loss": 0.798862636089325, + "step": 5472 + }, + { + "epoch": 1.2610599078341014, + "grad_norm": 1.1888872240865054, + "learning_rate": 6.58417458695883e-07, + "loss": 0.7231202721595764, + "step": 5473 + }, + { + "epoch": 1.261290322580645, + "grad_norm": 1.25713690411949, + "learning_rate": 6.580594151650551e-07, + "loss": 0.8816685676574707, + "step": 5474 + }, + { + "epoch": 1.261520737327189, + "grad_norm": 1.0218552259688816, + "learning_rate": 6.577014212664509e-07, + "loss": 0.6343427300453186, + "step": 5475 + }, + { + "epoch": 1.2617511520737328, + "grad_norm": 1.2062270864209526, + "learning_rate": 6.573434770520321e-07, + "loss": 0.7785895466804504, + "step": 5476 + }, + { + "epoch": 1.2619815668202765, + "grad_norm": 1.2086458816060426, + "learning_rate": 6.569855825737536e-07, + "loss": 0.7408698797225952, + "step": 5477 + }, + { + "epoch": 1.2622119815668202, + "grad_norm": 1.2755490666336102, + "learning_rate": 6.566277378835643e-07, + "loss": 0.8481286764144897, + "step": 5478 + }, + { + "epoch": 1.262442396313364, + "grad_norm": 1.0772225233745287, + "learning_rate": 6.56269943033404e-07, + "loss": 0.8221831917762756, + "step": 5479 + }, + { + "epoch": 1.262672811059908, + "grad_norm": 1.1202704150930312, + "learning_rate": 6.559121980752065e-07, + "loss": 0.805405855178833, + "step": 5480 + }, + { + "epoch": 1.2629032258064516, + "grad_norm": 1.4925713527432443, + "learning_rate": 6.55554503060898e-07, + "loss": 0.8643565773963928, + "step": 5481 + }, + { + "epoch": 1.2631336405529954, + "grad_norm": 1.038997236699539, + "learning_rate": 6.551968580423973e-07, + "loss": 0.7087225914001465, + "step": 5482 + }, + { + "epoch": 1.263364055299539, + "grad_norm": 1.3080505612178328, + "learning_rate": 6.54839263071616e-07, + "loss": 0.8401756882667542, + "step": 5483 + }, + { + "epoch": 1.2635944700460828, + "grad_norm": 0.974231759030553, + "learning_rate": 6.544817182004589e-07, + "loss": 0.76345294713974, + "step": 5484 + }, + { + "epoch": 1.2638248847926268, + "grad_norm": 0.9975788463971886, + "learning_rate": 6.541242234808228e-07, + "loss": 0.7177271842956543, + "step": 5485 + }, + { + "epoch": 1.2640552995391705, + "grad_norm": 1.0524467641617976, + "learning_rate": 6.537667789645981e-07, + "loss": 0.7436186075210571, + "step": 5486 + }, + { + "epoch": 1.2642857142857142, + "grad_norm": 1.025347292021162, + "learning_rate": 6.53409384703667e-07, + "loss": 0.6526673436164856, + "step": 5487 + }, + { + "epoch": 1.2645161290322582, + "grad_norm": 1.4422505610217646, + "learning_rate": 6.530520407499049e-07, + "loss": 0.879219651222229, + "step": 5488 + }, + { + "epoch": 1.264746543778802, + "grad_norm": 1.1643268817299548, + "learning_rate": 6.526947471551798e-07, + "loss": 0.7005003690719604, + "step": 5489 + }, + { + "epoch": 1.2649769585253456, + "grad_norm": 1.276974659887974, + "learning_rate": 6.523375039713525e-07, + "loss": 0.716349720954895, + "step": 5490 + }, + { + "epoch": 1.2652073732718894, + "grad_norm": 1.307490301718017, + "learning_rate": 6.519803112502758e-07, + "loss": 0.8524413704872131, + "step": 5491 + }, + { + "epoch": 1.265437788018433, + "grad_norm": 1.3886244481055607, + "learning_rate": 6.516231690437966e-07, + "loss": 0.8032857179641724, + "step": 5492 + }, + { + "epoch": 1.265668202764977, + "grad_norm": 1.3026581508138244, + "learning_rate": 6.512660774037531e-07, + "loss": 0.8912144899368286, + "step": 5493 + }, + { + "epoch": 1.2658986175115208, + "grad_norm": 1.1001846572449894, + "learning_rate": 6.509090363819764e-07, + "loss": 0.6526974439620972, + "step": 5494 + }, + { + "epoch": 1.2661290322580645, + "grad_norm": 1.1539964772442708, + "learning_rate": 6.505520460302916e-07, + "loss": 0.7436610460281372, + "step": 5495 + }, + { + "epoch": 1.2663594470046082, + "grad_norm": 1.0590907210895066, + "learning_rate": 6.501951064005145e-07, + "loss": 0.7112951874732971, + "step": 5496 + }, + { + "epoch": 1.266589861751152, + "grad_norm": 1.136772271419419, + "learning_rate": 6.498382175444545e-07, + "loss": 0.6908622980117798, + "step": 5497 + }, + { + "epoch": 1.266820276497696, + "grad_norm": 1.2936126009346398, + "learning_rate": 6.494813795139137e-07, + "loss": 0.8169400691986084, + "step": 5498 + }, + { + "epoch": 1.2670506912442396, + "grad_norm": 1.1611805763062155, + "learning_rate": 6.491245923606868e-07, + "loss": 0.7577871084213257, + "step": 5499 + }, + { + "epoch": 1.2672811059907834, + "grad_norm": 1.2166617406598321, + "learning_rate": 6.487678561365606e-07, + "loss": 0.7470887303352356, + "step": 5500 + }, + { + "epoch": 1.2675115207373273, + "grad_norm": 1.2499100792685887, + "learning_rate": 6.484111708933153e-07, + "loss": 0.7862193584442139, + "step": 5501 + }, + { + "epoch": 1.267741935483871, + "grad_norm": 1.0856856438170979, + "learning_rate": 6.48054536682723e-07, + "loss": 0.6809444427490234, + "step": 5502 + }, + { + "epoch": 1.2679723502304148, + "grad_norm": 1.1883483456973896, + "learning_rate": 6.476979535565486e-07, + "loss": 0.7560738921165466, + "step": 5503 + }, + { + "epoch": 1.2682027649769585, + "grad_norm": 1.060654462751894, + "learning_rate": 6.473414215665501e-07, + "loss": 0.6961003541946411, + "step": 5504 + }, + { + "epoch": 1.2684331797235022, + "grad_norm": 1.1318601167609275, + "learning_rate": 6.469849407644775e-07, + "loss": 0.762688159942627, + "step": 5505 + }, + { + "epoch": 1.2686635944700462, + "grad_norm": 1.3318780914664468, + "learning_rate": 6.46628511202073e-07, + "loss": 0.8735007047653198, + "step": 5506 + }, + { + "epoch": 1.26889400921659, + "grad_norm": 1.2498993266864264, + "learning_rate": 6.462721329310727e-07, + "loss": 0.7127432823181152, + "step": 5507 + }, + { + "epoch": 1.2691244239631336, + "grad_norm": 1.1810894491038926, + "learning_rate": 6.45915806003204e-07, + "loss": 0.7720422744750977, + "step": 5508 + }, + { + "epoch": 1.2693548387096774, + "grad_norm": 1.3742393921911886, + "learning_rate": 6.455595304701871e-07, + "loss": 0.8046890497207642, + "step": 5509 + }, + { + "epoch": 1.269585253456221, + "grad_norm": 1.433035812490825, + "learning_rate": 6.452033063837354e-07, + "loss": 0.8218742609024048, + "step": 5510 + }, + { + "epoch": 1.269815668202765, + "grad_norm": 1.3642640568886157, + "learning_rate": 6.448471337955536e-07, + "loss": 0.912622332572937, + "step": 5511 + }, + { + "epoch": 1.2700460829493088, + "grad_norm": 1.3101181049427244, + "learning_rate": 6.444910127573407e-07, + "loss": 0.7940733432769775, + "step": 5512 + }, + { + "epoch": 1.2702764976958525, + "grad_norm": 1.0982469100789136, + "learning_rate": 6.441349433207864e-07, + "loss": 0.7085565328598022, + "step": 5513 + }, + { + "epoch": 1.2705069124423964, + "grad_norm": 1.241687978637031, + "learning_rate": 6.437789255375739e-07, + "loss": 0.9316935539245605, + "step": 5514 + }, + { + "epoch": 1.2707373271889402, + "grad_norm": 0.9697190322352798, + "learning_rate": 6.43422959459379e-07, + "loss": 0.7412574291229248, + "step": 5515 + }, + { + "epoch": 1.270967741935484, + "grad_norm": 0.9713506680995111, + "learning_rate": 6.430670451378695e-07, + "loss": 0.7476450204849243, + "step": 5516 + }, + { + "epoch": 1.2711981566820276, + "grad_norm": 1.1272976564667934, + "learning_rate": 6.427111826247056e-07, + "loss": 0.8530189990997314, + "step": 5517 + }, + { + "epoch": 1.2714285714285714, + "grad_norm": 1.3163108639601895, + "learning_rate": 6.423553719715406e-07, + "loss": 0.8193017840385437, + "step": 5518 + }, + { + "epoch": 1.2716589861751153, + "grad_norm": 1.002275086425174, + "learning_rate": 6.419996132300203e-07, + "loss": 0.7444974780082703, + "step": 5519 + }, + { + "epoch": 1.271889400921659, + "grad_norm": 1.0214749663440856, + "learning_rate": 6.416439064517818e-07, + "loss": 0.7422837018966675, + "step": 5520 + }, + { + "epoch": 1.2721198156682028, + "grad_norm": 1.2499390785362547, + "learning_rate": 6.412882516884562e-07, + "loss": 1.0155640840530396, + "step": 5521 + }, + { + "epoch": 1.2723502304147465, + "grad_norm": 1.489615968336023, + "learning_rate": 6.409326489916658e-07, + "loss": 0.8097087144851685, + "step": 5522 + }, + { + "epoch": 1.2725806451612902, + "grad_norm": 1.293861875643454, + "learning_rate": 6.405770984130257e-07, + "loss": 0.8545565009117126, + "step": 5523 + }, + { + "epoch": 1.2728110599078342, + "grad_norm": 0.9914622760341439, + "learning_rate": 6.402216000041445e-07, + "loss": 0.6765652298927307, + "step": 5524 + }, + { + "epoch": 1.273041474654378, + "grad_norm": 1.103390848542702, + "learning_rate": 6.398661538166217e-07, + "loss": 0.7964426875114441, + "step": 5525 + }, + { + "epoch": 1.2732718894009216, + "grad_norm": 1.2196724846653912, + "learning_rate": 6.395107599020495e-07, + "loss": 0.7449651956558228, + "step": 5526 + }, + { + "epoch": 1.2735023041474656, + "grad_norm": 1.5614043870867116, + "learning_rate": 6.391554183120138e-07, + "loss": 0.8639888167381287, + "step": 5527 + }, + { + "epoch": 1.2737327188940093, + "grad_norm": 1.046130673497984, + "learning_rate": 6.388001290980914e-07, + "loss": 0.7668901681900024, + "step": 5528 + }, + { + "epoch": 1.273963133640553, + "grad_norm": 1.082923428749424, + "learning_rate": 6.384448923118517e-07, + "loss": 0.6461849212646484, + "step": 5529 + }, + { + "epoch": 1.2741935483870968, + "grad_norm": 1.1539877219125736, + "learning_rate": 6.380897080048576e-07, + "loss": 0.7045707702636719, + "step": 5530 + }, + { + "epoch": 1.2744239631336405, + "grad_norm": 1.1893221959186644, + "learning_rate": 6.377345762286632e-07, + "loss": 0.8303793668746948, + "step": 5531 + }, + { + "epoch": 1.2746543778801844, + "grad_norm": 1.112799220738114, + "learning_rate": 6.373794970348152e-07, + "loss": 0.808259129524231, + "step": 5532 + }, + { + "epoch": 1.2748847926267282, + "grad_norm": 1.527249581557179, + "learning_rate": 6.370244704748535e-07, + "loss": 0.8224689960479736, + "step": 5533 + }, + { + "epoch": 1.2751152073732719, + "grad_norm": 1.4408900318423565, + "learning_rate": 6.366694966003089e-07, + "loss": 0.8559266328811646, + "step": 5534 + }, + { + "epoch": 1.2753456221198156, + "grad_norm": 1.3225808297843282, + "learning_rate": 6.363145754627063e-07, + "loss": 0.7972407341003418, + "step": 5535 + }, + { + "epoch": 1.2755760368663593, + "grad_norm": 0.9700139233174567, + "learning_rate": 6.359597071135618e-07, + "loss": 0.7750328779220581, + "step": 5536 + }, + { + "epoch": 1.2758064516129033, + "grad_norm": 1.3472908531853058, + "learning_rate": 6.356048916043836e-07, + "loss": 0.807072639465332, + "step": 5537 + }, + { + "epoch": 1.276036866359447, + "grad_norm": 1.2153299361350896, + "learning_rate": 6.35250128986673e-07, + "loss": 0.8459323048591614, + "step": 5538 + }, + { + "epoch": 1.2762672811059907, + "grad_norm": 1.1921452547723677, + "learning_rate": 6.348954193119233e-07, + "loss": 0.7874447107315063, + "step": 5539 + }, + { + "epoch": 1.2764976958525347, + "grad_norm": 1.243785118643696, + "learning_rate": 6.345407626316202e-07, + "loss": 0.8817394971847534, + "step": 5540 + }, + { + "epoch": 1.2767281105990782, + "grad_norm": 1.0210963009280363, + "learning_rate": 6.341861589972417e-07, + "loss": 0.7936382293701172, + "step": 5541 + }, + { + "epoch": 1.2769585253456222, + "grad_norm": 1.1288567171733945, + "learning_rate": 6.33831608460258e-07, + "loss": 0.7301348447799683, + "step": 5542 + }, + { + "epoch": 1.2771889400921659, + "grad_norm": 0.9930019172389213, + "learning_rate": 6.334771110721311e-07, + "loss": 0.6546784043312073, + "step": 5543 + }, + { + "epoch": 1.2774193548387096, + "grad_norm": 1.1320345708885517, + "learning_rate": 6.331226668843168e-07, + "loss": 0.798918604850769, + "step": 5544 + }, + { + "epoch": 1.2776497695852536, + "grad_norm": 1.0677491026042323, + "learning_rate": 6.327682759482618e-07, + "loss": 0.6275264620780945, + "step": 5545 + }, + { + "epoch": 1.2778801843317973, + "grad_norm": 1.1056891749814017, + "learning_rate": 6.324139383154048e-07, + "loss": 0.6870732307434082, + "step": 5546 + }, + { + "epoch": 1.278110599078341, + "grad_norm": 1.113302907194177, + "learning_rate": 6.320596540371785e-07, + "loss": 0.8280556201934814, + "step": 5547 + }, + { + "epoch": 1.2783410138248847, + "grad_norm": 1.0958194382001605, + "learning_rate": 6.317054231650063e-07, + "loss": 0.8053648471832275, + "step": 5548 + }, + { + "epoch": 1.2785714285714285, + "grad_norm": 1.1500355966221105, + "learning_rate": 6.313512457503043e-07, + "loss": 0.7628893852233887, + "step": 5549 + }, + { + "epoch": 1.2788018433179724, + "grad_norm": 1.1770420137500979, + "learning_rate": 6.30997121844481e-07, + "loss": 0.8075753450393677, + "step": 5550 + }, + { + "epoch": 1.2790322580645161, + "grad_norm": 1.1420933628102303, + "learning_rate": 6.306430514989371e-07, + "loss": 0.7883275747299194, + "step": 5551 + }, + { + "epoch": 1.2792626728110599, + "grad_norm": 1.238710939895555, + "learning_rate": 6.302890347650648e-07, + "loss": 0.7438768744468689, + "step": 5552 + }, + { + "epoch": 1.2794930875576038, + "grad_norm": 1.261177122589368, + "learning_rate": 6.299350716942501e-07, + "loss": 0.7756023406982422, + "step": 5553 + }, + { + "epoch": 1.2797235023041473, + "grad_norm": 1.0915753285175969, + "learning_rate": 6.295811623378698e-07, + "loss": 0.7128444910049438, + "step": 5554 + }, + { + "epoch": 1.2799539170506913, + "grad_norm": 0.9707581386208312, + "learning_rate": 6.292273067472931e-07, + "loss": 0.7611228823661804, + "step": 5555 + }, + { + "epoch": 1.280184331797235, + "grad_norm": 1.0553125250063393, + "learning_rate": 6.288735049738822e-07, + "loss": 0.7803670167922974, + "step": 5556 + }, + { + "epoch": 1.2804147465437787, + "grad_norm": 1.0703973986821036, + "learning_rate": 6.28519757068991e-07, + "loss": 0.958204448223114, + "step": 5557 + }, + { + "epoch": 1.2806451612903227, + "grad_norm": 1.1879640741186497, + "learning_rate": 6.28166063083965e-07, + "loss": 0.7220249772071838, + "step": 5558 + }, + { + "epoch": 1.2808755760368664, + "grad_norm": 1.4250311227945265, + "learning_rate": 6.278124230701427e-07, + "loss": 0.7396695613861084, + "step": 5559 + }, + { + "epoch": 1.2811059907834101, + "grad_norm": 1.1549531480718158, + "learning_rate": 6.274588370788545e-07, + "loss": 0.819474458694458, + "step": 5560 + }, + { + "epoch": 1.2813364055299539, + "grad_norm": 1.0583859146786307, + "learning_rate": 6.271053051614231e-07, + "loss": 0.6997617483139038, + "step": 5561 + }, + { + "epoch": 1.2815668202764976, + "grad_norm": 1.1462805534929357, + "learning_rate": 6.26751827369163e-07, + "loss": 0.7526183128356934, + "step": 5562 + }, + { + "epoch": 1.2817972350230415, + "grad_norm": 1.3576714493720627, + "learning_rate": 6.263984037533805e-07, + "loss": 0.7185813188552856, + "step": 5563 + }, + { + "epoch": 1.2820276497695853, + "grad_norm": 0.9722151716418193, + "learning_rate": 6.260450343653757e-07, + "loss": 0.7739845514297485, + "step": 5564 + }, + { + "epoch": 1.282258064516129, + "grad_norm": 1.0387058407540612, + "learning_rate": 6.25691719256439e-07, + "loss": 0.698557436466217, + "step": 5565 + }, + { + "epoch": 1.2824884792626727, + "grad_norm": 1.1402265972621366, + "learning_rate": 6.253384584778534e-07, + "loss": 0.6946271657943726, + "step": 5566 + }, + { + "epoch": 1.2827188940092165, + "grad_norm": 1.2349626326096388, + "learning_rate": 6.24985252080895e-07, + "loss": 0.7746025323867798, + "step": 5567 + }, + { + "epoch": 1.2829493087557604, + "grad_norm": 1.050385772264468, + "learning_rate": 6.246321001168306e-07, + "loss": 0.8759660720825195, + "step": 5568 + }, + { + "epoch": 1.2831797235023041, + "grad_norm": 1.1535965526965875, + "learning_rate": 6.2427900263692e-07, + "loss": 0.741111159324646, + "step": 5569 + }, + { + "epoch": 1.2834101382488479, + "grad_norm": 1.2619269860039752, + "learning_rate": 6.239259596924149e-07, + "loss": 0.8580630421638489, + "step": 5570 + }, + { + "epoch": 1.2836405529953918, + "grad_norm": 1.0890841483076914, + "learning_rate": 6.235729713345588e-07, + "loss": 0.7139618992805481, + "step": 5571 + }, + { + "epoch": 1.2838709677419355, + "grad_norm": 1.1260979019373678, + "learning_rate": 6.232200376145873e-07, + "loss": 0.8300976753234863, + "step": 5572 + }, + { + "epoch": 1.2841013824884793, + "grad_norm": 1.091655687939806, + "learning_rate": 6.228671585837288e-07, + "loss": 0.7193114757537842, + "step": 5573 + }, + { + "epoch": 1.284331797235023, + "grad_norm": 1.289214780103651, + "learning_rate": 6.225143342932031e-07, + "loss": 0.8802851438522339, + "step": 5574 + }, + { + "epoch": 1.2845622119815667, + "grad_norm": 1.069264068692084, + "learning_rate": 6.221615647942217e-07, + "loss": 0.749543309211731, + "step": 5575 + }, + { + "epoch": 1.2847926267281107, + "grad_norm": 1.1044047193035296, + "learning_rate": 6.218088501379892e-07, + "loss": 0.703508734703064, + "step": 5576 + }, + { + "epoch": 1.2850230414746544, + "grad_norm": 1.4722305319077136, + "learning_rate": 6.214561903757017e-07, + "loss": 0.7519023418426514, + "step": 5577 + }, + { + "epoch": 1.2852534562211981, + "grad_norm": 1.4130549197431626, + "learning_rate": 6.211035855585466e-07, + "loss": 0.9525241851806641, + "step": 5578 + }, + { + "epoch": 1.2854838709677419, + "grad_norm": 1.3149636986285136, + "learning_rate": 6.207510357377046e-07, + "loss": 0.8288872241973877, + "step": 5579 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.3691241647074333, + "learning_rate": 6.203985409643478e-07, + "loss": 0.8531112670898438, + "step": 5580 + }, + { + "epoch": 1.2859447004608295, + "grad_norm": 1.121519108666965, + "learning_rate": 6.200461012896401e-07, + "loss": 0.7106495499610901, + "step": 5581 + }, + { + "epoch": 1.2861751152073733, + "grad_norm": 1.426451214846877, + "learning_rate": 6.19693716764738e-07, + "loss": 0.714931845664978, + "step": 5582 + }, + { + "epoch": 1.286405529953917, + "grad_norm": 1.3296169647206766, + "learning_rate": 6.19341387440789e-07, + "loss": 0.8281360268592834, + "step": 5583 + }, + { + "epoch": 1.286635944700461, + "grad_norm": 1.4833656768811476, + "learning_rate": 6.189891133689342e-07, + "loss": 0.9155910611152649, + "step": 5584 + }, + { + "epoch": 1.2868663594470047, + "grad_norm": 1.3432683189972507, + "learning_rate": 6.186368946003051e-07, + "loss": 0.7573060989379883, + "step": 5585 + }, + { + "epoch": 1.2870967741935484, + "grad_norm": 1.2055594370265132, + "learning_rate": 6.182847311860255e-07, + "loss": 0.6994235515594482, + "step": 5586 + }, + { + "epoch": 1.2873271889400921, + "grad_norm": 1.0775806715124838, + "learning_rate": 6.179326231772123e-07, + "loss": 0.771092414855957, + "step": 5587 + }, + { + "epoch": 1.2875576036866359, + "grad_norm": 1.269208775599209, + "learning_rate": 6.17580570624973e-07, + "loss": 0.7470684051513672, + "step": 5588 + }, + { + "epoch": 1.2877880184331798, + "grad_norm": 1.5425254092924614, + "learning_rate": 6.172285735804075e-07, + "loss": 0.918886125087738, + "step": 5589 + }, + { + "epoch": 1.2880184331797235, + "grad_norm": 1.0377944178544696, + "learning_rate": 6.16876632094608e-07, + "loss": 0.7232617139816284, + "step": 5590 + }, + { + "epoch": 1.2882488479262673, + "grad_norm": 1.1703799662994099, + "learning_rate": 6.16524746218658e-07, + "loss": 0.7367006540298462, + "step": 5591 + }, + { + "epoch": 1.288479262672811, + "grad_norm": 1.1904508940632728, + "learning_rate": 6.161729160036333e-07, + "loss": 0.8783999681472778, + "step": 5592 + }, + { + "epoch": 1.2887096774193547, + "grad_norm": 1.1869935665885074, + "learning_rate": 6.158211415006019e-07, + "loss": 0.8266523480415344, + "step": 5593 + }, + { + "epoch": 1.2889400921658987, + "grad_norm": 1.1675308279856504, + "learning_rate": 6.154694227606234e-07, + "loss": 0.8528730869293213, + "step": 5594 + }, + { + "epoch": 1.2891705069124424, + "grad_norm": 1.3182250244296418, + "learning_rate": 6.151177598347485e-07, + "loss": 0.7586283683776855, + "step": 5595 + }, + { + "epoch": 1.2894009216589861, + "grad_norm": 1.4182043487427547, + "learning_rate": 6.147661527740217e-07, + "loss": 0.8671954870223999, + "step": 5596 + }, + { + "epoch": 1.28963133640553, + "grad_norm": 1.081063839615246, + "learning_rate": 6.14414601629478e-07, + "loss": 0.7354376316070557, + "step": 5597 + }, + { + "epoch": 1.2898617511520738, + "grad_norm": 1.051384434692424, + "learning_rate": 6.140631064521443e-07, + "loss": 0.8515663146972656, + "step": 5598 + }, + { + "epoch": 1.2900921658986175, + "grad_norm": 1.3608023513745535, + "learning_rate": 6.137116672930395e-07, + "loss": 0.9068351984024048, + "step": 5599 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 1.4956373283031226, + "learning_rate": 6.133602842031752e-07, + "loss": 0.7260826230049133, + "step": 5600 + }, + { + "epoch": 1.290552995391705, + "grad_norm": 1.1400144341772105, + "learning_rate": 6.130089572335535e-07, + "loss": 0.7162504196166992, + "step": 5601 + }, + { + "epoch": 1.290783410138249, + "grad_norm": 1.2203621133034757, + "learning_rate": 6.126576864351695e-07, + "loss": 0.7625414133071899, + "step": 5602 + }, + { + "epoch": 1.2910138248847927, + "grad_norm": 1.0985405517526388, + "learning_rate": 6.123064718590099e-07, + "loss": 0.787274956703186, + "step": 5603 + }, + { + "epoch": 1.2912442396313364, + "grad_norm": 1.0173148522997915, + "learning_rate": 6.119553135560519e-07, + "loss": 0.6539326310157776, + "step": 5604 + }, + { + "epoch": 1.2914746543778801, + "grad_norm": 1.0405810111847797, + "learning_rate": 6.11604211577267e-07, + "loss": 0.8481189012527466, + "step": 5605 + }, + { + "epoch": 1.2917050691244238, + "grad_norm": 1.1908108884253377, + "learning_rate": 6.112531659736164e-07, + "loss": 0.794892430305481, + "step": 5606 + }, + { + "epoch": 1.2919354838709678, + "grad_norm": 1.0728869697567227, + "learning_rate": 6.10902176796054e-07, + "loss": 0.6738630533218384, + "step": 5607 + }, + { + "epoch": 1.2921658986175115, + "grad_norm": 1.2190379429225964, + "learning_rate": 6.105512440955258e-07, + "loss": 0.7220937609672546, + "step": 5608 + }, + { + "epoch": 1.2923963133640552, + "grad_norm": 0.9117229942004119, + "learning_rate": 6.102003679229688e-07, + "loss": 0.6831785440444946, + "step": 5609 + }, + { + "epoch": 1.2926267281105992, + "grad_norm": 1.0925904509799125, + "learning_rate": 6.098495483293125e-07, + "loss": 0.7033277750015259, + "step": 5610 + }, + { + "epoch": 1.292857142857143, + "grad_norm": 0.9024231402190447, + "learning_rate": 6.094987853654779e-07, + "loss": 0.7063429355621338, + "step": 5611 + }, + { + "epoch": 1.2930875576036867, + "grad_norm": 1.1531814321684226, + "learning_rate": 6.091480790823771e-07, + "loss": 0.7791472673416138, + "step": 5612 + }, + { + "epoch": 1.2933179723502304, + "grad_norm": 1.3904591821034944, + "learning_rate": 6.087974295309157e-07, + "loss": 0.8674220442771912, + "step": 5613 + }, + { + "epoch": 1.293548387096774, + "grad_norm": 1.0513898416349883, + "learning_rate": 6.084468367619895e-07, + "loss": 0.7878479957580566, + "step": 5614 + }, + { + "epoch": 1.293778801843318, + "grad_norm": 0.9253694996288483, + "learning_rate": 6.080963008264861e-07, + "loss": 0.7019612789154053, + "step": 5615 + }, + { + "epoch": 1.2940092165898618, + "grad_norm": 1.1163623788947772, + "learning_rate": 6.077458217752863e-07, + "loss": 0.68759685754776, + "step": 5616 + }, + { + "epoch": 1.2942396313364055, + "grad_norm": 1.1326420080908837, + "learning_rate": 6.073953996592612e-07, + "loss": 0.851733922958374, + "step": 5617 + }, + { + "epoch": 1.2944700460829492, + "grad_norm": 1.1539848484030915, + "learning_rate": 6.070450345292739e-07, + "loss": 0.699798047542572, + "step": 5618 + }, + { + "epoch": 1.294700460829493, + "grad_norm": 1.3439745934739915, + "learning_rate": 6.066947264361798e-07, + "loss": 0.8625125885009766, + "step": 5619 + }, + { + "epoch": 1.294930875576037, + "grad_norm": 1.2395704270447963, + "learning_rate": 6.063444754308253e-07, + "loss": 0.759062647819519, + "step": 5620 + }, + { + "epoch": 1.2951612903225806, + "grad_norm": 1.1349706072725887, + "learning_rate": 6.059942815640491e-07, + "loss": 0.7549973726272583, + "step": 5621 + }, + { + "epoch": 1.2953917050691244, + "grad_norm": 1.2217826699562653, + "learning_rate": 6.056441448866816e-07, + "loss": 0.8142743110656738, + "step": 5622 + }, + { + "epoch": 1.2956221198156683, + "grad_norm": 1.0818175637274867, + "learning_rate": 6.052940654495442e-07, + "loss": 0.7881144881248474, + "step": 5623 + }, + { + "epoch": 1.295852534562212, + "grad_norm": 1.2201407031885296, + "learning_rate": 6.049440433034505e-07, + "loss": 0.7922053933143616, + "step": 5624 + }, + { + "epoch": 1.2960829493087558, + "grad_norm": 1.1955381878542082, + "learning_rate": 6.045940784992061e-07, + "loss": 0.6808311939239502, + "step": 5625 + }, + { + "epoch": 1.2963133640552995, + "grad_norm": 1.203534246478074, + "learning_rate": 6.04244171087608e-07, + "loss": 0.933373749256134, + "step": 5626 + }, + { + "epoch": 1.2965437788018432, + "grad_norm": 1.3722573775025653, + "learning_rate": 6.038943211194439e-07, + "loss": 0.8077404499053955, + "step": 5627 + }, + { + "epoch": 1.2967741935483872, + "grad_norm": 1.2263754202708472, + "learning_rate": 6.035445286454953e-07, + "loss": 0.7920867204666138, + "step": 5628 + }, + { + "epoch": 1.297004608294931, + "grad_norm": 1.1574994086499075, + "learning_rate": 6.031947937165335e-07, + "loss": 0.5872117280960083, + "step": 5629 + }, + { + "epoch": 1.2972350230414746, + "grad_norm": 1.2959093642025599, + "learning_rate": 6.02845116383322e-07, + "loss": 0.8593505620956421, + "step": 5630 + }, + { + "epoch": 1.2974654377880184, + "grad_norm": 1.4149025135483138, + "learning_rate": 6.02495496696616e-07, + "loss": 0.8352359533309937, + "step": 5631 + }, + { + "epoch": 1.297695852534562, + "grad_norm": 1.1724909355958724, + "learning_rate": 6.021459347071623e-07, + "loss": 0.7316182255744934, + "step": 5632 + }, + { + "epoch": 1.297926267281106, + "grad_norm": 1.1972298924235394, + "learning_rate": 6.017964304656997e-07, + "loss": 0.7294400334358215, + "step": 5633 + }, + { + "epoch": 1.2981566820276498, + "grad_norm": 1.0769002788322786, + "learning_rate": 6.014469840229581e-07, + "loss": 0.6595947742462158, + "step": 5634 + }, + { + "epoch": 1.2983870967741935, + "grad_norm": 1.308087510592029, + "learning_rate": 6.010975954296587e-07, + "loss": 0.7849195003509521, + "step": 5635 + }, + { + "epoch": 1.2986175115207375, + "grad_norm": 1.0709465804551583, + "learning_rate": 6.007482647365159e-07, + "loss": 0.6915944218635559, + "step": 5636 + }, + { + "epoch": 1.2988479262672812, + "grad_norm": 1.1595852934519908, + "learning_rate": 6.003989919942338e-07, + "loss": 0.6821994781494141, + "step": 5637 + }, + { + "epoch": 1.299078341013825, + "grad_norm": 1.0472078656298618, + "learning_rate": 6.000497772535087e-07, + "loss": 0.7333718538284302, + "step": 5638 + }, + { + "epoch": 1.2993087557603686, + "grad_norm": 1.0656731272596272, + "learning_rate": 5.997006205650292e-07, + "loss": 0.8069280385971069, + "step": 5639 + }, + { + "epoch": 1.2995391705069124, + "grad_norm": 1.0655856429852437, + "learning_rate": 5.993515219794745e-07, + "loss": 0.6989297866821289, + "step": 5640 + }, + { + "epoch": 1.2997695852534563, + "grad_norm": 1.187477589278957, + "learning_rate": 5.990024815475161e-07, + "loss": 0.7784403562545776, + "step": 5641 + }, + { + "epoch": 1.3, + "grad_norm": 1.2512602653388225, + "learning_rate": 5.986534993198168e-07, + "loss": 0.6554181575775146, + "step": 5642 + }, + { + "epoch": 1.3002304147465438, + "grad_norm": 1.298436931300319, + "learning_rate": 5.983045753470307e-07, + "loss": 0.7647836208343506, + "step": 5643 + }, + { + "epoch": 1.3004608294930875, + "grad_norm": 0.9269247679622435, + "learning_rate": 5.979557096798033e-07, + "loss": 0.7787084579467773, + "step": 5644 + }, + { + "epoch": 1.3006912442396312, + "grad_norm": 1.0646184845326898, + "learning_rate": 5.97606902368773e-07, + "loss": 0.6367940902709961, + "step": 5645 + }, + { + "epoch": 1.3009216589861752, + "grad_norm": 1.0481428990706296, + "learning_rate": 5.972581534645679e-07, + "loss": 0.7650243043899536, + "step": 5646 + }, + { + "epoch": 1.301152073732719, + "grad_norm": 0.9452672150266047, + "learning_rate": 5.969094630178084e-07, + "loss": 0.6506018042564392, + "step": 5647 + }, + { + "epoch": 1.3013824884792626, + "grad_norm": 1.4764262273840163, + "learning_rate": 5.965608310791071e-07, + "loss": 0.7351242303848267, + "step": 5648 + }, + { + "epoch": 1.3016129032258066, + "grad_norm": 1.2210251097969258, + "learning_rate": 5.96212257699067e-07, + "loss": 0.7327077984809875, + "step": 5649 + }, + { + "epoch": 1.3018433179723503, + "grad_norm": 1.0681197005600311, + "learning_rate": 5.958637429282831e-07, + "loss": 0.6448171138763428, + "step": 5650 + }, + { + "epoch": 1.302073732718894, + "grad_norm": 1.18574113940407, + "learning_rate": 5.955152868173418e-07, + "loss": 0.8347861766815186, + "step": 5651 + }, + { + "epoch": 1.3023041474654378, + "grad_norm": 1.2733315501094051, + "learning_rate": 5.951668894168215e-07, + "loss": 0.736280620098114, + "step": 5652 + }, + { + "epoch": 1.3025345622119815, + "grad_norm": 1.2627292373923777, + "learning_rate": 5.948185507772908e-07, + "loss": 0.8677594661712646, + "step": 5653 + }, + { + "epoch": 1.3027649769585254, + "grad_norm": 1.1729788728933164, + "learning_rate": 5.944702709493113e-07, + "loss": 0.6598676443099976, + "step": 5654 + }, + { + "epoch": 1.3029953917050692, + "grad_norm": 1.1072155159392119, + "learning_rate": 5.941220499834352e-07, + "loss": 0.7795349359512329, + "step": 5655 + }, + { + "epoch": 1.303225806451613, + "grad_norm": 1.1312979891837796, + "learning_rate": 5.937738879302058e-07, + "loss": 0.6929318904876709, + "step": 5656 + }, + { + "epoch": 1.3034562211981566, + "grad_norm": 1.19931324162024, + "learning_rate": 5.934257848401593e-07, + "loss": 0.859328031539917, + "step": 5657 + }, + { + "epoch": 1.3036866359447004, + "grad_norm": 1.435339518052459, + "learning_rate": 5.930777407638216e-07, + "loss": 1.0015549659729004, + "step": 5658 + }, + { + "epoch": 1.3039170506912443, + "grad_norm": 1.0471647927751007, + "learning_rate": 5.927297557517115e-07, + "loss": 0.6775785088539124, + "step": 5659 + }, + { + "epoch": 1.304147465437788, + "grad_norm": 1.0488503999959857, + "learning_rate": 5.923818298543378e-07, + "loss": 0.7228262424468994, + "step": 5660 + }, + { + "epoch": 1.3043778801843318, + "grad_norm": 0.9177755631443217, + "learning_rate": 5.92033963122202e-07, + "loss": 0.6139897108078003, + "step": 5661 + }, + { + "epoch": 1.3046082949308757, + "grad_norm": 1.062819188029367, + "learning_rate": 5.916861556057965e-07, + "loss": 0.7336323261260986, + "step": 5662 + }, + { + "epoch": 1.3048387096774192, + "grad_norm": 1.1985877666304134, + "learning_rate": 5.913384073556049e-07, + "loss": 0.9223559498786926, + "step": 5663 + }, + { + "epoch": 1.3050691244239632, + "grad_norm": 1.1960311086176088, + "learning_rate": 5.909907184221023e-07, + "loss": 0.7230484485626221, + "step": 5664 + }, + { + "epoch": 1.305299539170507, + "grad_norm": 1.1557586988240278, + "learning_rate": 5.906430888557556e-07, + "loss": 0.753510594367981, + "step": 5665 + }, + { + "epoch": 1.3055299539170506, + "grad_norm": 1.2167084005991546, + "learning_rate": 5.902955187070229e-07, + "loss": 0.8960593938827515, + "step": 5666 + }, + { + "epoch": 1.3057603686635946, + "grad_norm": 0.9226031223011045, + "learning_rate": 5.899480080263527e-07, + "loss": 0.6865993738174438, + "step": 5667 + }, + { + "epoch": 1.3059907834101383, + "grad_norm": 1.2350884878154553, + "learning_rate": 5.896005568641868e-07, + "loss": 0.7748720645904541, + "step": 5668 + }, + { + "epoch": 1.306221198156682, + "grad_norm": 1.437104451012044, + "learning_rate": 5.892531652709567e-07, + "loss": 0.834233283996582, + "step": 5669 + }, + { + "epoch": 1.3064516129032258, + "grad_norm": 1.2209490689427414, + "learning_rate": 5.889058332970858e-07, + "loss": 0.8398417234420776, + "step": 5670 + }, + { + "epoch": 1.3066820276497695, + "grad_norm": 0.8546573405192346, + "learning_rate": 5.885585609929891e-07, + "loss": 0.6889529228210449, + "step": 5671 + }, + { + "epoch": 1.3069124423963134, + "grad_norm": 1.1935289122089947, + "learning_rate": 5.882113484090725e-07, + "loss": 0.6625782251358032, + "step": 5672 + }, + { + "epoch": 1.3071428571428572, + "grad_norm": 1.2286244905882078, + "learning_rate": 5.878641955957334e-07, + "loss": 0.7774407267570496, + "step": 5673 + }, + { + "epoch": 1.307373271889401, + "grad_norm": 1.066003573867245, + "learning_rate": 5.875171026033608e-07, + "loss": 0.7799595594406128, + "step": 5674 + }, + { + "epoch": 1.3076036866359446, + "grad_norm": 1.2859461118878832, + "learning_rate": 5.87170069482335e-07, + "loss": 0.800041913986206, + "step": 5675 + }, + { + "epoch": 1.3078341013824883, + "grad_norm": 1.2986825545894243, + "learning_rate": 5.868230962830265e-07, + "loss": 0.7478667497634888, + "step": 5676 + }, + { + "epoch": 1.3080645161290323, + "grad_norm": 0.9705514903251621, + "learning_rate": 5.86476183055799e-07, + "loss": 0.7538981437683105, + "step": 5677 + }, + { + "epoch": 1.308294930875576, + "grad_norm": 1.4195819337110585, + "learning_rate": 5.861293298510061e-07, + "loss": 0.7556810975074768, + "step": 5678 + }, + { + "epoch": 1.3085253456221198, + "grad_norm": 0.9225289666667563, + "learning_rate": 5.85782536718993e-07, + "loss": 0.670037031173706, + "step": 5679 + }, + { + "epoch": 1.3087557603686637, + "grad_norm": 1.1667524105558311, + "learning_rate": 5.854358037100964e-07, + "loss": 0.6238662600517273, + "step": 5680 + }, + { + "epoch": 1.3089861751152074, + "grad_norm": 1.1817165911107195, + "learning_rate": 5.85089130874644e-07, + "loss": 0.7972823977470398, + "step": 5681 + }, + { + "epoch": 1.3092165898617512, + "grad_norm": 1.0746427307389195, + "learning_rate": 5.847425182629549e-07, + "loss": 0.7332338094711304, + "step": 5682 + }, + { + "epoch": 1.3094470046082949, + "grad_norm": 1.2496997052714673, + "learning_rate": 5.843959659253398e-07, + "loss": 0.8186966180801392, + "step": 5683 + }, + { + "epoch": 1.3096774193548386, + "grad_norm": 1.2708999919485935, + "learning_rate": 5.840494739120996e-07, + "loss": 0.8207032680511475, + "step": 5684 + }, + { + "epoch": 1.3099078341013826, + "grad_norm": 1.4960688490449285, + "learning_rate": 5.83703042273528e-07, + "loss": 0.848265528678894, + "step": 5685 + }, + { + "epoch": 1.3101382488479263, + "grad_norm": 1.0212687278019523, + "learning_rate": 5.833566710599088e-07, + "loss": 0.7766404151916504, + "step": 5686 + }, + { + "epoch": 1.31036866359447, + "grad_norm": 1.2185059104564926, + "learning_rate": 5.830103603215168e-07, + "loss": 0.7570784687995911, + "step": 5687 + }, + { + "epoch": 1.3105990783410137, + "grad_norm": 1.1006353524996257, + "learning_rate": 5.826641101086194e-07, + "loss": 0.7551493644714355, + "step": 5688 + }, + { + "epoch": 1.3108294930875575, + "grad_norm": 1.3664942507199704, + "learning_rate": 5.823179204714739e-07, + "loss": 0.8589804172515869, + "step": 5689 + }, + { + "epoch": 1.3110599078341014, + "grad_norm": 1.2869604696659869, + "learning_rate": 5.819717914603288e-07, + "loss": 0.8252761960029602, + "step": 5690 + }, + { + "epoch": 1.3112903225806452, + "grad_norm": 1.0886628872971145, + "learning_rate": 5.816257231254254e-07, + "loss": 0.7784370183944702, + "step": 5691 + }, + { + "epoch": 1.3115207373271889, + "grad_norm": 1.1343775846575583, + "learning_rate": 5.812797155169942e-07, + "loss": 0.8040215969085693, + "step": 5692 + }, + { + "epoch": 1.3117511520737328, + "grad_norm": 1.013609351306971, + "learning_rate": 5.809337686852582e-07, + "loss": 0.8355100154876709, + "step": 5693 + }, + { + "epoch": 1.3119815668202766, + "grad_norm": 1.466649672488184, + "learning_rate": 5.805878826804303e-07, + "loss": 0.8233312368392944, + "step": 5694 + }, + { + "epoch": 1.3122119815668203, + "grad_norm": 1.1563119764352225, + "learning_rate": 5.802420575527165e-07, + "loss": 0.7756507992744446, + "step": 5695 + }, + { + "epoch": 1.312442396313364, + "grad_norm": 1.1867005828091945, + "learning_rate": 5.798962933523124e-07, + "loss": 0.7503829002380371, + "step": 5696 + }, + { + "epoch": 1.3126728110599077, + "grad_norm": 1.506327103479739, + "learning_rate": 5.795505901294051e-07, + "loss": 0.749663770198822, + "step": 5697 + }, + { + "epoch": 1.3129032258064517, + "grad_norm": 1.440884605575443, + "learning_rate": 5.792049479341732e-07, + "loss": 0.9003115296363831, + "step": 5698 + }, + { + "epoch": 1.3131336405529954, + "grad_norm": 1.059615932759845, + "learning_rate": 5.788593668167854e-07, + "loss": 0.655732274055481, + "step": 5699 + }, + { + "epoch": 1.3133640552995391, + "grad_norm": 0.9900775273356892, + "learning_rate": 5.785138468274036e-07, + "loss": 0.7318822145462036, + "step": 5700 + }, + { + "epoch": 1.3135944700460829, + "grad_norm": 0.9099775921199348, + "learning_rate": 5.781683880161788e-07, + "loss": 0.6512752771377563, + "step": 5701 + }, + { + "epoch": 1.3138248847926266, + "grad_norm": 1.1289875219473309, + "learning_rate": 5.778229904332537e-07, + "loss": 0.7232785820960999, + "step": 5702 + }, + { + "epoch": 1.3140552995391706, + "grad_norm": 1.2645196269426846, + "learning_rate": 5.77477654128763e-07, + "loss": 0.837032675743103, + "step": 5703 + }, + { + "epoch": 1.3142857142857143, + "grad_norm": 1.4984544841183642, + "learning_rate": 5.771323791528315e-07, + "loss": 0.926714301109314, + "step": 5704 + }, + { + "epoch": 1.314516129032258, + "grad_norm": 1.1221666474084682, + "learning_rate": 5.76787165555575e-07, + "loss": 0.7228986620903015, + "step": 5705 + }, + { + "epoch": 1.314746543778802, + "grad_norm": 1.3618848390091767, + "learning_rate": 5.764420133871015e-07, + "loss": 0.8330450057983398, + "step": 5706 + }, + { + "epoch": 1.3149769585253457, + "grad_norm": 1.2680150111326054, + "learning_rate": 5.760969226975088e-07, + "loss": 0.793700098991394, + "step": 5707 + }, + { + "epoch": 1.3152073732718894, + "grad_norm": 1.2897950240071954, + "learning_rate": 5.757518935368868e-07, + "loss": 0.8797321319580078, + "step": 5708 + }, + { + "epoch": 1.3154377880184331, + "grad_norm": 1.1147531221594877, + "learning_rate": 5.754069259553159e-07, + "loss": 0.8772039413452148, + "step": 5709 + }, + { + "epoch": 1.3156682027649769, + "grad_norm": 0.820739065285044, + "learning_rate": 5.750620200028672e-07, + "loss": 0.5998358726501465, + "step": 5710 + }, + { + "epoch": 1.3158986175115208, + "grad_norm": 1.7932534766511148, + "learning_rate": 5.747171757296041e-07, + "loss": 0.7694767713546753, + "step": 5711 + }, + { + "epoch": 1.3161290322580645, + "grad_norm": 1.2782062967169578, + "learning_rate": 5.7437239318558e-07, + "loss": 0.8526760339736938, + "step": 5712 + }, + { + "epoch": 1.3163594470046083, + "grad_norm": 1.199230266468518, + "learning_rate": 5.740276724208396e-07, + "loss": 0.8407987356185913, + "step": 5713 + }, + { + "epoch": 1.316589861751152, + "grad_norm": 1.289466266523787, + "learning_rate": 5.736830134854183e-07, + "loss": 0.9731476306915283, + "step": 5714 + }, + { + "epoch": 1.3168202764976957, + "grad_norm": 1.134122607422213, + "learning_rate": 5.733384164293434e-07, + "loss": 0.7230468988418579, + "step": 5715 + }, + { + "epoch": 1.3170506912442397, + "grad_norm": 1.2031868742095575, + "learning_rate": 5.729938813026327e-07, + "loss": 0.8260238766670227, + "step": 5716 + }, + { + "epoch": 1.3172811059907834, + "grad_norm": 1.0909604007760305, + "learning_rate": 5.726494081552948e-07, + "loss": 0.7616437673568726, + "step": 5717 + }, + { + "epoch": 1.3175115207373271, + "grad_norm": 1.1614064666034054, + "learning_rate": 5.723049970373295e-07, + "loss": 0.7628509998321533, + "step": 5718 + }, + { + "epoch": 1.317741935483871, + "grad_norm": 1.2522299219195512, + "learning_rate": 5.719606479987273e-07, + "loss": 0.744842529296875, + "step": 5719 + }, + { + "epoch": 1.3179723502304148, + "grad_norm": 0.9975745357037148, + "learning_rate": 5.716163610894708e-07, + "loss": 0.7228065133094788, + "step": 5720 + }, + { + "epoch": 1.3182027649769585, + "grad_norm": 1.5461378865588107, + "learning_rate": 5.712721363595325e-07, + "loss": 0.8764907121658325, + "step": 5721 + }, + { + "epoch": 1.3184331797235023, + "grad_norm": 1.0737882176659082, + "learning_rate": 5.709279738588757e-07, + "loss": 0.7966248393058777, + "step": 5722 + }, + { + "epoch": 1.318663594470046, + "grad_norm": 1.4239755183906653, + "learning_rate": 5.705838736374558e-07, + "loss": 0.8983157873153687, + "step": 5723 + }, + { + "epoch": 1.31889400921659, + "grad_norm": 1.1693207378088453, + "learning_rate": 5.70239835745218e-07, + "loss": 0.7349347472190857, + "step": 5724 + }, + { + "epoch": 1.3191244239631337, + "grad_norm": 1.4511397115268243, + "learning_rate": 5.698958602320988e-07, + "loss": 0.9297066926956177, + "step": 5725 + }, + { + "epoch": 1.3193548387096774, + "grad_norm": 1.0721204261694746, + "learning_rate": 5.695519471480266e-07, + "loss": 0.7106038331985474, + "step": 5726 + }, + { + "epoch": 1.3195852534562211, + "grad_norm": 1.3074916303787611, + "learning_rate": 5.692080965429193e-07, + "loss": 0.8759022951126099, + "step": 5727 + }, + { + "epoch": 1.3198156682027649, + "grad_norm": 1.2039841953988952, + "learning_rate": 5.688643084666862e-07, + "loss": 0.8337300419807434, + "step": 5728 + }, + { + "epoch": 1.3200460829493088, + "grad_norm": 1.2975435530580146, + "learning_rate": 5.685205829692283e-07, + "loss": 0.8543391227722168, + "step": 5729 + }, + { + "epoch": 1.3202764976958525, + "grad_norm": 0.9960252179140261, + "learning_rate": 5.681769201004366e-07, + "loss": 0.7497329711914062, + "step": 5730 + }, + { + "epoch": 1.3205069124423963, + "grad_norm": 1.0615580947761494, + "learning_rate": 5.678333199101929e-07, + "loss": 0.8190964460372925, + "step": 5731 + }, + { + "epoch": 1.3207373271889402, + "grad_norm": 1.1486652227224357, + "learning_rate": 5.674897824483711e-07, + "loss": 0.8233011960983276, + "step": 5732 + }, + { + "epoch": 1.320967741935484, + "grad_norm": 1.2086113696285639, + "learning_rate": 5.671463077648348e-07, + "loss": 0.75257408618927, + "step": 5733 + }, + { + "epoch": 1.3211981566820277, + "grad_norm": 1.0357997575051858, + "learning_rate": 5.668028959094386e-07, + "loss": 0.6468796133995056, + "step": 5734 + }, + { + "epoch": 1.3214285714285714, + "grad_norm": 0.869693175338726, + "learning_rate": 5.664595469320288e-07, + "loss": 0.6756174564361572, + "step": 5735 + }, + { + "epoch": 1.3216589861751151, + "grad_norm": 1.2928038093451135, + "learning_rate": 5.661162608824419e-07, + "loss": 0.9040344953536987, + "step": 5736 + }, + { + "epoch": 1.321889400921659, + "grad_norm": 1.013287726627938, + "learning_rate": 5.657730378105055e-07, + "loss": 0.8082150816917419, + "step": 5737 + }, + { + "epoch": 1.3221198156682028, + "grad_norm": 1.2602760490074278, + "learning_rate": 5.654298777660375e-07, + "loss": 0.8760210275650024, + "step": 5738 + }, + { + "epoch": 1.3223502304147465, + "grad_norm": 1.4464070872810626, + "learning_rate": 5.650867807988473e-07, + "loss": 0.6980990171432495, + "step": 5739 + }, + { + "epoch": 1.3225806451612903, + "grad_norm": 0.927469939331727, + "learning_rate": 5.647437469587355e-07, + "loss": 0.6552839279174805, + "step": 5740 + }, + { + "epoch": 1.322811059907834, + "grad_norm": 0.9934566913252004, + "learning_rate": 5.644007762954925e-07, + "loss": 0.8304816484451294, + "step": 5741 + }, + { + "epoch": 1.323041474654378, + "grad_norm": 1.1691146043820817, + "learning_rate": 5.640578688589e-07, + "loss": 0.7977567315101624, + "step": 5742 + }, + { + "epoch": 1.3232718894009217, + "grad_norm": 1.4376891352576404, + "learning_rate": 5.637150246987308e-07, + "loss": 0.7656992673873901, + "step": 5743 + }, + { + "epoch": 1.3235023041474654, + "grad_norm": 1.1120822444951537, + "learning_rate": 5.633722438647483e-07, + "loss": 0.921256422996521, + "step": 5744 + }, + { + "epoch": 1.3237327188940093, + "grad_norm": 1.2718785752085355, + "learning_rate": 5.630295264067063e-07, + "loss": 0.8012785315513611, + "step": 5745 + }, + { + "epoch": 1.323963133640553, + "grad_norm": 1.2403067439539972, + "learning_rate": 5.626868723743504e-07, + "loss": 0.613241970539093, + "step": 5746 + }, + { + "epoch": 1.3241935483870968, + "grad_norm": 1.34086331204533, + "learning_rate": 5.623442818174161e-07, + "loss": 0.7134846448898315, + "step": 5747 + }, + { + "epoch": 1.3244239631336405, + "grad_norm": 1.3127547947642921, + "learning_rate": 5.620017547856295e-07, + "loss": 0.8963242173194885, + "step": 5748 + }, + { + "epoch": 1.3246543778801843, + "grad_norm": 1.3476788930677732, + "learning_rate": 5.616592913287087e-07, + "loss": 0.8401378393173218, + "step": 5749 + }, + { + "epoch": 1.3248847926267282, + "grad_norm": 1.0346861015576712, + "learning_rate": 5.613168914963615e-07, + "loss": 0.6455308198928833, + "step": 5750 + }, + { + "epoch": 1.325115207373272, + "grad_norm": 1.105933895384034, + "learning_rate": 5.609745553382863e-07, + "loss": 0.6920031905174255, + "step": 5751 + }, + { + "epoch": 1.3253456221198157, + "grad_norm": 1.1001754091297298, + "learning_rate": 5.606322829041737e-07, + "loss": 0.9099706411361694, + "step": 5752 + }, + { + "epoch": 1.3255760368663594, + "grad_norm": 1.3286482905641974, + "learning_rate": 5.602900742437036e-07, + "loss": 0.8034265637397766, + "step": 5753 + }, + { + "epoch": 1.3258064516129031, + "grad_norm": 0.9956708814709011, + "learning_rate": 5.599479294065471e-07, + "loss": 0.7216918468475342, + "step": 5754 + }, + { + "epoch": 1.326036866359447, + "grad_norm": 1.1406371859334326, + "learning_rate": 5.596058484423655e-07, + "loss": 0.7428277730941772, + "step": 5755 + }, + { + "epoch": 1.3262672811059908, + "grad_norm": 1.3052741120899958, + "learning_rate": 5.592638314008127e-07, + "loss": 0.7636011838912964, + "step": 5756 + }, + { + "epoch": 1.3264976958525345, + "grad_norm": 1.3474656843000283, + "learning_rate": 5.589218783315311e-07, + "loss": 0.7765215635299683, + "step": 5757 + }, + { + "epoch": 1.3267281105990785, + "grad_norm": 1.0612768168901736, + "learning_rate": 5.585799892841551e-07, + "loss": 0.6524033546447754, + "step": 5758 + }, + { + "epoch": 1.3269585253456222, + "grad_norm": 1.134076107561282, + "learning_rate": 5.582381643083087e-07, + "loss": 0.8105186223983765, + "step": 5759 + }, + { + "epoch": 1.327188940092166, + "grad_norm": 1.2647095323418043, + "learning_rate": 5.578964034536084e-07, + "loss": 0.7654449939727783, + "step": 5760 + }, + { + "epoch": 1.3274193548387097, + "grad_norm": 1.0086580295069412, + "learning_rate": 5.5755470676966e-07, + "loss": 0.6545592546463013, + "step": 5761 + }, + { + "epoch": 1.3276497695852534, + "grad_norm": 1.1744400728961766, + "learning_rate": 5.572130743060597e-07, + "loss": 0.7116275429725647, + "step": 5762 + }, + { + "epoch": 1.3278801843317973, + "grad_norm": 1.246651079531453, + "learning_rate": 5.568715061123959e-07, + "loss": 0.8396822214126587, + "step": 5763 + }, + { + "epoch": 1.328110599078341, + "grad_norm": 1.2492632037634621, + "learning_rate": 5.565300022382464e-07, + "loss": 0.6729685664176941, + "step": 5764 + }, + { + "epoch": 1.3283410138248848, + "grad_norm": 1.1356361065972511, + "learning_rate": 5.561885627331795e-07, + "loss": 0.6891340017318726, + "step": 5765 + }, + { + "epoch": 1.3285714285714285, + "grad_norm": 1.1361488307123824, + "learning_rate": 5.558471876467556e-07, + "loss": 0.7232956886291504, + "step": 5766 + }, + { + "epoch": 1.3288018433179722, + "grad_norm": 1.3213229777584583, + "learning_rate": 5.555058770285246e-07, + "loss": 0.7800660133361816, + "step": 5767 + }, + { + "epoch": 1.3290322580645162, + "grad_norm": 1.106817082140827, + "learning_rate": 5.551646309280266e-07, + "loss": 0.6794005036354065, + "step": 5768 + }, + { + "epoch": 1.32926267281106, + "grad_norm": 1.223898149625345, + "learning_rate": 5.548234493947939e-07, + "loss": 0.7739551067352295, + "step": 5769 + }, + { + "epoch": 1.3294930875576036, + "grad_norm": 1.0605861943491384, + "learning_rate": 5.544823324783482e-07, + "loss": 0.759978711605072, + "step": 5770 + }, + { + "epoch": 1.3297235023041476, + "grad_norm": 0.9593168779270222, + "learning_rate": 5.541412802282017e-07, + "loss": 0.7563333511352539, + "step": 5771 + }, + { + "epoch": 1.3299539170506913, + "grad_norm": 1.2126531853296405, + "learning_rate": 5.538002926938587e-07, + "loss": 0.6705852746963501, + "step": 5772 + }, + { + "epoch": 1.330184331797235, + "grad_norm": 1.4207541622240418, + "learning_rate": 5.534593699248124e-07, + "loss": 0.8343281745910645, + "step": 5773 + }, + { + "epoch": 1.3304147465437788, + "grad_norm": 1.4106880574063376, + "learning_rate": 5.531185119705474e-07, + "loss": 0.7158486843109131, + "step": 5774 + }, + { + "epoch": 1.3306451612903225, + "grad_norm": 1.5132468140839932, + "learning_rate": 5.527777188805385e-07, + "loss": 0.8888766765594482, + "step": 5775 + }, + { + "epoch": 1.3308755760368665, + "grad_norm": 1.0939731159249404, + "learning_rate": 5.524369907042519e-07, + "loss": 0.873813271522522, + "step": 5776 + }, + { + "epoch": 1.3311059907834102, + "grad_norm": 1.1685988919933143, + "learning_rate": 5.520963274911437e-07, + "loss": 0.7654919624328613, + "step": 5777 + }, + { + "epoch": 1.331336405529954, + "grad_norm": 0.8790821291361985, + "learning_rate": 5.517557292906606e-07, + "loss": 0.6976190805435181, + "step": 5778 + }, + { + "epoch": 1.3315668202764976, + "grad_norm": 1.0920428500423505, + "learning_rate": 5.5141519615224e-07, + "loss": 0.8356388807296753, + "step": 5779 + }, + { + "epoch": 1.3317972350230414, + "grad_norm": 1.1881219631842852, + "learning_rate": 5.510747281253094e-07, + "loss": 0.719998836517334, + "step": 5780 + }, + { + "epoch": 1.3320276497695853, + "grad_norm": 1.4093262324111957, + "learning_rate": 5.507343252592882e-07, + "loss": 0.8432124853134155, + "step": 5781 + }, + { + "epoch": 1.332258064516129, + "grad_norm": 1.2484869478133402, + "learning_rate": 5.503939876035845e-07, + "loss": 0.8426402807235718, + "step": 5782 + }, + { + "epoch": 1.3324884792626728, + "grad_norm": 1.1843136616988323, + "learning_rate": 5.500537152075986e-07, + "loss": 0.8133292198181152, + "step": 5783 + }, + { + "epoch": 1.3327188940092167, + "grad_norm": 1.2203561788081314, + "learning_rate": 5.497135081207205e-07, + "loss": 0.8097467422485352, + "step": 5784 + }, + { + "epoch": 1.3329493087557602, + "grad_norm": 0.9964838781032684, + "learning_rate": 5.493733663923299e-07, + "loss": 0.6943382024765015, + "step": 5785 + }, + { + "epoch": 1.3331797235023042, + "grad_norm": 0.8958647048569846, + "learning_rate": 5.490332900717993e-07, + "loss": 0.5896245837211609, + "step": 5786 + }, + { + "epoch": 1.333410138248848, + "grad_norm": 1.2066217319918868, + "learning_rate": 5.486932792084895e-07, + "loss": 0.6837725639343262, + "step": 5787 + }, + { + "epoch": 1.3336405529953916, + "grad_norm": 1.3459237431541746, + "learning_rate": 5.483533338517523e-07, + "loss": 0.8371915221214294, + "step": 5788 + }, + { + "epoch": 1.3338709677419356, + "grad_norm": 1.1649657355507903, + "learning_rate": 5.480134540509313e-07, + "loss": 0.8001077175140381, + "step": 5789 + }, + { + "epoch": 1.3341013824884793, + "grad_norm": 1.4458040399946648, + "learning_rate": 5.476736398553591e-07, + "loss": 0.9070717096328735, + "step": 5790 + }, + { + "epoch": 1.334331797235023, + "grad_norm": 1.256932465914866, + "learning_rate": 5.473338913143589e-07, + "loss": 0.9061849117279053, + "step": 5791 + }, + { + "epoch": 1.3345622119815668, + "grad_norm": 1.2993559451282939, + "learning_rate": 5.469942084772454e-07, + "loss": 0.8465786576271057, + "step": 5792 + }, + { + "epoch": 1.3347926267281105, + "grad_norm": 1.2333173266054418, + "learning_rate": 5.466545913933229e-07, + "loss": 0.8221259117126465, + "step": 5793 + }, + { + "epoch": 1.3350230414746544, + "grad_norm": 1.1214356414954587, + "learning_rate": 5.463150401118864e-07, + "loss": 0.594088077545166, + "step": 5794 + }, + { + "epoch": 1.3352534562211982, + "grad_norm": 1.0900215939620008, + "learning_rate": 5.459755546822207e-07, + "loss": 0.6983529925346375, + "step": 5795 + }, + { + "epoch": 1.335483870967742, + "grad_norm": 1.3561793320800521, + "learning_rate": 5.456361351536027e-07, + "loss": 0.7720709443092346, + "step": 5796 + }, + { + "epoch": 1.3357142857142856, + "grad_norm": 1.1798730390657586, + "learning_rate": 5.45296781575298e-07, + "loss": 0.8087977766990662, + "step": 5797 + }, + { + "epoch": 1.3359447004608294, + "grad_norm": 1.123982224882924, + "learning_rate": 5.449574939965636e-07, + "loss": 0.6808000802993774, + "step": 5798 + }, + { + "epoch": 1.3361751152073733, + "grad_norm": 1.0634688756756818, + "learning_rate": 5.446182724666466e-07, + "loss": 0.7222881317138672, + "step": 5799 + }, + { + "epoch": 1.336405529953917, + "grad_norm": 1.1919279054292256, + "learning_rate": 5.44279117034784e-07, + "loss": 0.872687578201294, + "step": 5800 + }, + { + "epoch": 1.3366359447004608, + "grad_norm": 1.3648460603559815, + "learning_rate": 5.439400277502048e-07, + "loss": 0.7728114128112793, + "step": 5801 + }, + { + "epoch": 1.3368663594470047, + "grad_norm": 1.0445795610107291, + "learning_rate": 5.436010046621267e-07, + "loss": 0.807528018951416, + "step": 5802 + }, + { + "epoch": 1.3370967741935484, + "grad_norm": 1.151575262421327, + "learning_rate": 5.432620478197583e-07, + "loss": 0.6997063159942627, + "step": 5803 + }, + { + "epoch": 1.3373271889400922, + "grad_norm": 1.309125931504039, + "learning_rate": 5.429231572722995e-07, + "loss": 0.797568678855896, + "step": 5804 + }, + { + "epoch": 1.337557603686636, + "grad_norm": 1.0057472643211554, + "learning_rate": 5.425843330689386e-07, + "loss": 0.6412359476089478, + "step": 5805 + }, + { + "epoch": 1.3377880184331796, + "grad_norm": 1.1290308654729904, + "learning_rate": 5.422455752588569e-07, + "loss": 0.8605507612228394, + "step": 5806 + }, + { + "epoch": 1.3380184331797236, + "grad_norm": 1.0459165137008808, + "learning_rate": 5.419068838912238e-07, + "loss": 0.856192946434021, + "step": 5807 + }, + { + "epoch": 1.3382488479262673, + "grad_norm": 1.1354202830657318, + "learning_rate": 5.415682590151998e-07, + "loss": 0.8614650368690491, + "step": 5808 + }, + { + "epoch": 1.338479262672811, + "grad_norm": 1.6619444336877072, + "learning_rate": 5.412297006799365e-07, + "loss": 0.9675840139389038, + "step": 5809 + }, + { + "epoch": 1.3387096774193548, + "grad_norm": 1.0659647985310448, + "learning_rate": 5.408912089345747e-07, + "loss": 0.7333405017852783, + "step": 5810 + }, + { + "epoch": 1.3389400921658985, + "grad_norm": 1.3540380425785927, + "learning_rate": 5.405527838282457e-07, + "loss": 0.8271909952163696, + "step": 5811 + }, + { + "epoch": 1.3391705069124424, + "grad_norm": 1.3562249096622705, + "learning_rate": 5.402144254100724e-07, + "loss": 0.8036069869995117, + "step": 5812 + }, + { + "epoch": 1.3394009216589862, + "grad_norm": 1.3975514954701582, + "learning_rate": 5.398761337291667e-07, + "loss": 0.855912446975708, + "step": 5813 + }, + { + "epoch": 1.33963133640553, + "grad_norm": 1.2830545749549949, + "learning_rate": 5.395379088346309e-07, + "loss": 0.8198536038398743, + "step": 5814 + }, + { + "epoch": 1.3398617511520738, + "grad_norm": 1.2130249913700057, + "learning_rate": 5.391997507755581e-07, + "loss": 0.8931646347045898, + "step": 5815 + }, + { + "epoch": 1.3400921658986176, + "grad_norm": 0.9981529734228639, + "learning_rate": 5.388616596010312e-07, + "loss": 0.7073954343795776, + "step": 5816 + }, + { + "epoch": 1.3403225806451613, + "grad_norm": 1.2450035085912274, + "learning_rate": 5.385236353601241e-07, + "loss": 0.7758424282073975, + "step": 5817 + }, + { + "epoch": 1.340552995391705, + "grad_norm": 1.1707291956273551, + "learning_rate": 5.381856781019005e-07, + "loss": 0.6805497407913208, + "step": 5818 + }, + { + "epoch": 1.3407834101382488, + "grad_norm": 1.251540768140409, + "learning_rate": 5.378477878754144e-07, + "loss": 0.8956538438796997, + "step": 5819 + }, + { + "epoch": 1.3410138248847927, + "grad_norm": 1.0594639846580987, + "learning_rate": 5.375099647297096e-07, + "loss": 0.7819657921791077, + "step": 5820 + }, + { + "epoch": 1.3412442396313364, + "grad_norm": 1.0523114055014655, + "learning_rate": 5.371722087138217e-07, + "loss": 0.5764007568359375, + "step": 5821 + }, + { + "epoch": 1.3414746543778802, + "grad_norm": 1.1661401559135987, + "learning_rate": 5.368345198767749e-07, + "loss": 0.697022557258606, + "step": 5822 + }, + { + "epoch": 1.3417050691244239, + "grad_norm": 1.3065346909259405, + "learning_rate": 5.364968982675839e-07, + "loss": 0.7773014307022095, + "step": 5823 + }, + { + "epoch": 1.3419354838709676, + "grad_norm": 1.3340944249973796, + "learning_rate": 5.361593439352551e-07, + "loss": 0.7395004034042358, + "step": 5824 + }, + { + "epoch": 1.3421658986175116, + "grad_norm": 1.0762295080363014, + "learning_rate": 5.358218569287834e-07, + "loss": 0.7989716529846191, + "step": 5825 + }, + { + "epoch": 1.3423963133640553, + "grad_norm": 1.280549478612159, + "learning_rate": 5.354844372971543e-07, + "loss": 0.8894884586334229, + "step": 5826 + }, + { + "epoch": 1.342626728110599, + "grad_norm": 1.5586577225053506, + "learning_rate": 5.351470850893446e-07, + "loss": 0.8415021300315857, + "step": 5827 + }, + { + "epoch": 1.342857142857143, + "grad_norm": 1.4272313895508615, + "learning_rate": 5.3480980035432e-07, + "loss": 0.9963078498840332, + "step": 5828 + }, + { + "epoch": 1.3430875576036867, + "grad_norm": 1.1680739887228044, + "learning_rate": 5.344725831410368e-07, + "loss": 0.8489943742752075, + "step": 5829 + }, + { + "epoch": 1.3433179723502304, + "grad_norm": 0.9897462108554296, + "learning_rate": 5.341354334984422e-07, + "loss": 0.6949954032897949, + "step": 5830 + }, + { + "epoch": 1.3435483870967742, + "grad_norm": 1.1225897948987795, + "learning_rate": 5.337983514754722e-07, + "loss": 0.878408670425415, + "step": 5831 + }, + { + "epoch": 1.3437788018433179, + "grad_norm": 1.2067617593706235, + "learning_rate": 5.334613371210549e-07, + "loss": 0.722877025604248, + "step": 5832 + }, + { + "epoch": 1.3440092165898618, + "grad_norm": 1.04123853110292, + "learning_rate": 5.331243904841068e-07, + "loss": 0.670013427734375, + "step": 5833 + }, + { + "epoch": 1.3442396313364056, + "grad_norm": 1.0789084686611892, + "learning_rate": 5.327875116135354e-07, + "loss": 0.8336968421936035, + "step": 5834 + }, + { + "epoch": 1.3444700460829493, + "grad_norm": 1.2348261826059375, + "learning_rate": 5.324507005582381e-07, + "loss": 0.7917020916938782, + "step": 5835 + }, + { + "epoch": 1.344700460829493, + "grad_norm": 1.288528901659057, + "learning_rate": 5.321139573671024e-07, + "loss": 0.7479217052459717, + "step": 5836 + }, + { + "epoch": 1.3449308755760367, + "grad_norm": 1.206901718846971, + "learning_rate": 5.317772820890068e-07, + "loss": 0.8059084415435791, + "step": 5837 + }, + { + "epoch": 1.3451612903225807, + "grad_norm": 1.0687058344207596, + "learning_rate": 5.314406747728186e-07, + "loss": 0.6853187680244446, + "step": 5838 + }, + { + "epoch": 1.3453917050691244, + "grad_norm": 1.2007310227541288, + "learning_rate": 5.311041354673964e-07, + "loss": 0.7769491672515869, + "step": 5839 + }, + { + "epoch": 1.3456221198156681, + "grad_norm": 1.007121872066712, + "learning_rate": 5.307676642215877e-07, + "loss": 0.6669384241104126, + "step": 5840 + }, + { + "epoch": 1.345852534562212, + "grad_norm": 1.091111253411437, + "learning_rate": 5.304312610842319e-07, + "loss": 0.7884945869445801, + "step": 5841 + }, + { + "epoch": 1.3460829493087558, + "grad_norm": 1.2799296704263758, + "learning_rate": 5.300949261041567e-07, + "loss": 0.8030047416687012, + "step": 5842 + }, + { + "epoch": 1.3463133640552996, + "grad_norm": 1.293856241707333, + "learning_rate": 5.297586593301806e-07, + "loss": 0.7792675495147705, + "step": 5843 + }, + { + "epoch": 1.3465437788018433, + "grad_norm": 1.450964712660266, + "learning_rate": 5.29422460811113e-07, + "loss": 0.8699119091033936, + "step": 5844 + }, + { + "epoch": 1.346774193548387, + "grad_norm": 1.1164478098944863, + "learning_rate": 5.290863305957523e-07, + "loss": 0.8075394630432129, + "step": 5845 + }, + { + "epoch": 1.347004608294931, + "grad_norm": 1.2025668698948455, + "learning_rate": 5.287502687328868e-07, + "loss": 0.7875077128410339, + "step": 5846 + }, + { + "epoch": 1.3472350230414747, + "grad_norm": 1.2743475952279586, + "learning_rate": 5.284142752712965e-07, + "loss": 0.6799413561820984, + "step": 5847 + }, + { + "epoch": 1.3474654377880184, + "grad_norm": 1.3570475044053845, + "learning_rate": 5.280783502597496e-07, + "loss": 0.914801299571991, + "step": 5848 + }, + { + "epoch": 1.3476958525345621, + "grad_norm": 1.4096481978785727, + "learning_rate": 5.277424937470052e-07, + "loss": 0.8591992855072021, + "step": 5849 + }, + { + "epoch": 1.3479262672811059, + "grad_norm": 1.1971358109064123, + "learning_rate": 5.27406705781813e-07, + "loss": 0.7830478549003601, + "step": 5850 + }, + { + "epoch": 1.3481566820276498, + "grad_norm": 1.397466179292115, + "learning_rate": 5.270709864129119e-07, + "loss": 0.8365499973297119, + "step": 5851 + }, + { + "epoch": 1.3483870967741935, + "grad_norm": 1.5417932199175834, + "learning_rate": 5.267353356890305e-07, + "loss": 0.8342669010162354, + "step": 5852 + }, + { + "epoch": 1.3486175115207373, + "grad_norm": 1.0532947941417055, + "learning_rate": 5.263997536588891e-07, + "loss": 0.7802393436431885, + "step": 5853 + }, + { + "epoch": 1.3488479262672812, + "grad_norm": 1.2005511445865484, + "learning_rate": 5.260642403711964e-07, + "loss": 0.8245328068733215, + "step": 5854 + }, + { + "epoch": 1.349078341013825, + "grad_norm": 1.043405656704728, + "learning_rate": 5.257287958746519e-07, + "loss": 0.7209265232086182, + "step": 5855 + }, + { + "epoch": 1.3493087557603687, + "grad_norm": 1.254105643009189, + "learning_rate": 5.253934202179444e-07, + "loss": 0.9258058071136475, + "step": 5856 + }, + { + "epoch": 1.3495391705069124, + "grad_norm": 1.3493584028342165, + "learning_rate": 5.25058113449754e-07, + "loss": 0.6889467835426331, + "step": 5857 + }, + { + "epoch": 1.3497695852534561, + "grad_norm": 1.113027412487739, + "learning_rate": 5.247228756187498e-07, + "loss": 0.8810057640075684, + "step": 5858 + }, + { + "epoch": 1.35, + "grad_norm": 1.140989478824924, + "learning_rate": 5.243877067735909e-07, + "loss": 0.7236393690109253, + "step": 5859 + }, + { + "epoch": 1.3502304147465438, + "grad_norm": 1.1712872152312954, + "learning_rate": 5.240526069629264e-07, + "loss": 0.8287979364395142, + "step": 5860 + }, + { + "epoch": 1.3504608294930875, + "grad_norm": 0.9764543402246563, + "learning_rate": 5.237175762353964e-07, + "loss": 0.8268846869468689, + "step": 5861 + }, + { + "epoch": 1.3506912442396313, + "grad_norm": 1.08770217121451, + "learning_rate": 5.233826146396296e-07, + "loss": 0.7995575666427612, + "step": 5862 + }, + { + "epoch": 1.350921658986175, + "grad_norm": 1.185939350431103, + "learning_rate": 5.230477222242449e-07, + "loss": 0.7379493713378906, + "step": 5863 + }, + { + "epoch": 1.351152073732719, + "grad_norm": 1.1532350043824988, + "learning_rate": 5.227128990378524e-07, + "loss": 0.729906439781189, + "step": 5864 + }, + { + "epoch": 1.3513824884792627, + "grad_norm": 1.3775772205538213, + "learning_rate": 5.223781451290506e-07, + "loss": 0.8356789350509644, + "step": 5865 + }, + { + "epoch": 1.3516129032258064, + "grad_norm": 1.4707388081384496, + "learning_rate": 5.220434605464285e-07, + "loss": 0.8130582571029663, + "step": 5866 + }, + { + "epoch": 1.3518433179723504, + "grad_norm": 1.3840431554185126, + "learning_rate": 5.217088453385658e-07, + "loss": 0.7686447501182556, + "step": 5867 + }, + { + "epoch": 1.352073732718894, + "grad_norm": 1.4824685151456765, + "learning_rate": 5.213742995540309e-07, + "loss": 0.7945844531059265, + "step": 5868 + }, + { + "epoch": 1.3523041474654378, + "grad_norm": 0.9715413572597766, + "learning_rate": 5.210398232413824e-07, + "loss": 0.8082837462425232, + "step": 5869 + }, + { + "epoch": 1.3525345622119815, + "grad_norm": 1.2398246007417328, + "learning_rate": 5.2070541644917e-07, + "loss": 0.7826153039932251, + "step": 5870 + }, + { + "epoch": 1.3527649769585253, + "grad_norm": 1.2471684178108737, + "learning_rate": 5.203710792259318e-07, + "loss": 0.6853276491165161, + "step": 5871 + }, + { + "epoch": 1.3529953917050692, + "grad_norm": 1.2891891865978977, + "learning_rate": 5.200368116201962e-07, + "loss": 0.8354780673980713, + "step": 5872 + }, + { + "epoch": 1.353225806451613, + "grad_norm": 1.1178862343459024, + "learning_rate": 5.197026136804823e-07, + "loss": 0.7857648134231567, + "step": 5873 + }, + { + "epoch": 1.3534562211981567, + "grad_norm": 0.9168225851850988, + "learning_rate": 5.193684854552982e-07, + "loss": 0.663504958152771, + "step": 5874 + }, + { + "epoch": 1.3536866359447004, + "grad_norm": 1.329771615602396, + "learning_rate": 5.190344269931423e-07, + "loss": 0.8192203044891357, + "step": 5875 + }, + { + "epoch": 1.3539170506912441, + "grad_norm": 1.4861685476717017, + "learning_rate": 5.187004383425024e-07, + "loss": 0.801753044128418, + "step": 5876 + }, + { + "epoch": 1.354147465437788, + "grad_norm": 1.3551621393598028, + "learning_rate": 5.183665195518566e-07, + "loss": 0.9427206516265869, + "step": 5877 + }, + { + "epoch": 1.3543778801843318, + "grad_norm": 1.1121835630605517, + "learning_rate": 5.18032670669673e-07, + "loss": 0.7801729440689087, + "step": 5878 + }, + { + "epoch": 1.3546082949308755, + "grad_norm": 1.3936797390586833, + "learning_rate": 5.176988917444094e-07, + "loss": 0.8224533796310425, + "step": 5879 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.9505008459531469, + "learning_rate": 5.173651828245127e-07, + "loss": 0.7800098657608032, + "step": 5880 + }, + { + "epoch": 1.3550691244239632, + "grad_norm": 0.9654380749861797, + "learning_rate": 5.170315439584212e-07, + "loss": 0.7612746953964233, + "step": 5881 + }, + { + "epoch": 1.355299539170507, + "grad_norm": 1.191616140078335, + "learning_rate": 5.166979751945617e-07, + "loss": 0.8027492761611938, + "step": 5882 + }, + { + "epoch": 1.3555299539170507, + "grad_norm": 1.167147993456773, + "learning_rate": 5.163644765813508e-07, + "loss": 0.7509280443191528, + "step": 5883 + }, + { + "epoch": 1.3557603686635944, + "grad_norm": 1.2102231125675782, + "learning_rate": 5.160310481671966e-07, + "loss": 0.7663145661354065, + "step": 5884 + }, + { + "epoch": 1.3559907834101383, + "grad_norm": 1.246862901799125, + "learning_rate": 5.156976900004948e-07, + "loss": 0.7598870396614075, + "step": 5885 + }, + { + "epoch": 1.356221198156682, + "grad_norm": 1.127184650819857, + "learning_rate": 5.153644021296317e-07, + "loss": 0.7923038005828857, + "step": 5886 + }, + { + "epoch": 1.3564516129032258, + "grad_norm": 1.2664053097126295, + "learning_rate": 5.150311846029846e-07, + "loss": 0.8711799383163452, + "step": 5887 + }, + { + "epoch": 1.3566820276497695, + "grad_norm": 1.294570667250746, + "learning_rate": 5.146980374689191e-07, + "loss": 0.7852096557617188, + "step": 5888 + }, + { + "epoch": 1.3569124423963133, + "grad_norm": 1.1426360408928755, + "learning_rate": 5.143649607757905e-07, + "loss": 0.7259876132011414, + "step": 5889 + }, + { + "epoch": 1.3571428571428572, + "grad_norm": 0.9810253925795782, + "learning_rate": 5.140319545719454e-07, + "loss": 0.7612321376800537, + "step": 5890 + }, + { + "epoch": 1.357373271889401, + "grad_norm": 1.2919477789807814, + "learning_rate": 5.136990189057187e-07, + "loss": 0.7881298661231995, + "step": 5891 + }, + { + "epoch": 1.3576036866359447, + "grad_norm": 1.0310706760740191, + "learning_rate": 5.133661538254353e-07, + "loss": 0.6956340074539185, + "step": 5892 + }, + { + "epoch": 1.3578341013824886, + "grad_norm": 1.0277045355993415, + "learning_rate": 5.130333593794107e-07, + "loss": 0.7800698280334473, + "step": 5893 + }, + { + "epoch": 1.3580645161290323, + "grad_norm": 1.0373100274796343, + "learning_rate": 5.127006356159496e-07, + "loss": 0.6920318603515625, + "step": 5894 + }, + { + "epoch": 1.358294930875576, + "grad_norm": 0.9870224446835288, + "learning_rate": 5.123679825833458e-07, + "loss": 0.6972872018814087, + "step": 5895 + }, + { + "epoch": 1.3585253456221198, + "grad_norm": 1.1473583592012562, + "learning_rate": 5.12035400329884e-07, + "loss": 0.8820276260375977, + "step": 5896 + }, + { + "epoch": 1.3587557603686635, + "grad_norm": 1.1566218274104645, + "learning_rate": 5.117028889038375e-07, + "loss": 0.8834109306335449, + "step": 5897 + }, + { + "epoch": 1.3589861751152075, + "grad_norm": 1.1393544418506285, + "learning_rate": 5.113704483534704e-07, + "loss": 0.6981096267700195, + "step": 5898 + }, + { + "epoch": 1.3592165898617512, + "grad_norm": 1.329102048560067, + "learning_rate": 5.11038078727036e-07, + "loss": 0.7617249488830566, + "step": 5899 + }, + { + "epoch": 1.359447004608295, + "grad_norm": 1.7116569149164136, + "learning_rate": 5.107057800727773e-07, + "loss": 0.8373798131942749, + "step": 5900 + }, + { + "epoch": 1.3596774193548387, + "grad_norm": 1.3064563550321244, + "learning_rate": 5.103735524389264e-07, + "loss": 0.7176666855812073, + "step": 5901 + }, + { + "epoch": 1.3599078341013824, + "grad_norm": 0.9003342699900779, + "learning_rate": 5.100413958737067e-07, + "loss": 0.7872966527938843, + "step": 5902 + }, + { + "epoch": 1.3601382488479263, + "grad_norm": 1.1723157653802474, + "learning_rate": 5.097093104253295e-07, + "loss": 0.6668897271156311, + "step": 5903 + }, + { + "epoch": 1.36036866359447, + "grad_norm": 1.2119302484042467, + "learning_rate": 5.093772961419967e-07, + "loss": 0.8413408994674683, + "step": 5904 + }, + { + "epoch": 1.3605990783410138, + "grad_norm": 0.9857990015136971, + "learning_rate": 5.090453530719e-07, + "loss": 0.632825493812561, + "step": 5905 + }, + { + "epoch": 1.3608294930875577, + "grad_norm": 1.2378128322555926, + "learning_rate": 5.087134812632201e-07, + "loss": 0.737346887588501, + "step": 5906 + }, + { + "epoch": 1.3610599078341012, + "grad_norm": 1.2614434601380542, + "learning_rate": 5.083816807641283e-07, + "loss": 1.00008225440979, + "step": 5907 + }, + { + "epoch": 1.3612903225806452, + "grad_norm": 1.2168755174090398, + "learning_rate": 5.08049951622785e-07, + "loss": 0.7844079732894897, + "step": 5908 + }, + { + "epoch": 1.361520737327189, + "grad_norm": 1.0532681425474226, + "learning_rate": 5.077182938873393e-07, + "loss": 0.8615080118179321, + "step": 5909 + }, + { + "epoch": 1.3617511520737327, + "grad_norm": 1.279562028421048, + "learning_rate": 5.073867076059321e-07, + "loss": 0.6930621862411499, + "step": 5910 + }, + { + "epoch": 1.3619815668202766, + "grad_norm": 1.4077453728560791, + "learning_rate": 5.07055192826692e-07, + "loss": 0.7020307183265686, + "step": 5911 + }, + { + "epoch": 1.3622119815668203, + "grad_norm": 1.4403791813866107, + "learning_rate": 5.067237495977379e-07, + "loss": 0.7281042337417603, + "step": 5912 + }, + { + "epoch": 1.362442396313364, + "grad_norm": 1.014203177200115, + "learning_rate": 5.063923779671789e-07, + "loss": 0.8092719316482544, + "step": 5913 + }, + { + "epoch": 1.3626728110599078, + "grad_norm": 1.2597384594296865, + "learning_rate": 5.060610779831125e-07, + "loss": 0.7323317527770996, + "step": 5914 + }, + { + "epoch": 1.3629032258064515, + "grad_norm": 0.9804861205409557, + "learning_rate": 5.05729849693627e-07, + "loss": 0.7370069622993469, + "step": 5915 + }, + { + "epoch": 1.3631336405529955, + "grad_norm": 1.1355071333670705, + "learning_rate": 5.053986931467994e-07, + "loss": 0.7175320386886597, + "step": 5916 + }, + { + "epoch": 1.3633640552995392, + "grad_norm": 1.2834592450306632, + "learning_rate": 5.050676083906964e-07, + "loss": 0.8643501996994019, + "step": 5917 + }, + { + "epoch": 1.363594470046083, + "grad_norm": 1.2479698704612106, + "learning_rate": 5.047365954733752e-07, + "loss": 0.9110950827598572, + "step": 5918 + }, + { + "epoch": 1.3638248847926266, + "grad_norm": 1.6104451195946936, + "learning_rate": 5.044056544428814e-07, + "loss": 0.9242197275161743, + "step": 5919 + }, + { + "epoch": 1.3640552995391704, + "grad_norm": 1.2769108446030992, + "learning_rate": 5.040747853472509e-07, + "loss": 0.9218860864639282, + "step": 5920 + }, + { + "epoch": 1.3642857142857143, + "grad_norm": 1.3302527755174611, + "learning_rate": 5.037439882345084e-07, + "loss": 0.970054030418396, + "step": 5921 + }, + { + "epoch": 1.364516129032258, + "grad_norm": 1.0075132364725619, + "learning_rate": 5.034132631526695e-07, + "loss": 0.7707182168960571, + "step": 5922 + }, + { + "epoch": 1.3647465437788018, + "grad_norm": 1.1036594577594991, + "learning_rate": 5.03082610149738e-07, + "loss": 0.7673811912536621, + "step": 5923 + }, + { + "epoch": 1.3649769585253457, + "grad_norm": 1.2758650519526258, + "learning_rate": 5.027520292737073e-07, + "loss": 0.7387198209762573, + "step": 5924 + }, + { + "epoch": 1.3652073732718895, + "grad_norm": 1.139448521744241, + "learning_rate": 5.024215205725619e-07, + "loss": 0.7803019881248474, + "step": 5925 + }, + { + "epoch": 1.3654377880184332, + "grad_norm": 1.3985269621197394, + "learning_rate": 5.020910840942738e-07, + "loss": 0.8753018379211426, + "step": 5926 + }, + { + "epoch": 1.365668202764977, + "grad_norm": 1.0358625157915384, + "learning_rate": 5.017607198868055e-07, + "loss": 0.7917389869689941, + "step": 5927 + }, + { + "epoch": 1.3658986175115206, + "grad_norm": 1.2995608187995562, + "learning_rate": 5.014304279981095e-07, + "loss": 0.8393691182136536, + "step": 5928 + }, + { + "epoch": 1.3661290322580646, + "grad_norm": 1.2671721961788391, + "learning_rate": 5.011002084761264e-07, + "loss": 0.6635205745697021, + "step": 5929 + }, + { + "epoch": 1.3663594470046083, + "grad_norm": 1.2038857805513816, + "learning_rate": 5.007700613687879e-07, + "loss": 0.7058769464492798, + "step": 5930 + }, + { + "epoch": 1.366589861751152, + "grad_norm": 1.1784688857731938, + "learning_rate": 5.004399867240143e-07, + "loss": 0.841168224811554, + "step": 5931 + }, + { + "epoch": 1.3668202764976958, + "grad_norm": 1.3760327619217738, + "learning_rate": 5.001099845897148e-07, + "loss": 0.7385121583938599, + "step": 5932 + }, + { + "epoch": 1.3670506912442395, + "grad_norm": 1.1633525983686732, + "learning_rate": 4.997800550137897e-07, + "loss": 0.6525158882141113, + "step": 5933 + }, + { + "epoch": 1.3672811059907835, + "grad_norm": 1.2331358286597804, + "learning_rate": 4.994501980441274e-07, + "loss": 0.7838844060897827, + "step": 5934 + }, + { + "epoch": 1.3675115207373272, + "grad_norm": 1.4450953979822279, + "learning_rate": 4.991204137286061e-07, + "loss": 0.8831999897956848, + "step": 5935 + }, + { + "epoch": 1.367741935483871, + "grad_norm": 1.0408031352355525, + "learning_rate": 4.987907021150938e-07, + "loss": 0.8053784966468811, + "step": 5936 + }, + { + "epoch": 1.3679723502304149, + "grad_norm": 1.1356206370071746, + "learning_rate": 4.984610632514475e-07, + "loss": 0.8093301057815552, + "step": 5937 + }, + { + "epoch": 1.3682027649769586, + "grad_norm": 1.0230530705292329, + "learning_rate": 4.981314971855136e-07, + "loss": 0.7609653472900391, + "step": 5938 + }, + { + "epoch": 1.3684331797235023, + "grad_norm": 1.4109994154981755, + "learning_rate": 4.978020039651288e-07, + "loss": 0.7131600379943848, + "step": 5939 + }, + { + "epoch": 1.368663594470046, + "grad_norm": 1.3192550042799691, + "learning_rate": 4.974725836381184e-07, + "loss": 0.6555063724517822, + "step": 5940 + }, + { + "epoch": 1.3688940092165898, + "grad_norm": 1.1278604970222592, + "learning_rate": 4.971432362522968e-07, + "loss": 0.8349519968032837, + "step": 5941 + }, + { + "epoch": 1.3691244239631337, + "grad_norm": 1.2138732932202303, + "learning_rate": 4.968139618554691e-07, + "loss": 0.7335611581802368, + "step": 5942 + }, + { + "epoch": 1.3693548387096774, + "grad_norm": 1.050807913168598, + "learning_rate": 4.964847604954287e-07, + "loss": 0.8349814414978027, + "step": 5943 + }, + { + "epoch": 1.3695852534562212, + "grad_norm": 1.07716704849378, + "learning_rate": 4.961556322199585e-07, + "loss": 0.6816729307174683, + "step": 5944 + }, + { + "epoch": 1.369815668202765, + "grad_norm": 1.5220059571304148, + "learning_rate": 4.958265770768315e-07, + "loss": 0.847672164440155, + "step": 5945 + }, + { + "epoch": 1.3700460829493086, + "grad_norm": 1.267067930725286, + "learning_rate": 4.954975951138095e-07, + "loss": 0.6674519777297974, + "step": 5946 + }, + { + "epoch": 1.3702764976958526, + "grad_norm": 1.0820409905680344, + "learning_rate": 4.951686863786432e-07, + "loss": 0.7836427092552185, + "step": 5947 + }, + { + "epoch": 1.3705069124423963, + "grad_norm": 1.0577780792239002, + "learning_rate": 4.948398509190742e-07, + "loss": 0.640183687210083, + "step": 5948 + }, + { + "epoch": 1.37073732718894, + "grad_norm": 1.223963669470004, + "learning_rate": 4.945110887828322e-07, + "loss": 0.8438451290130615, + "step": 5949 + }, + { + "epoch": 1.370967741935484, + "grad_norm": 1.5483267377377474, + "learning_rate": 4.94182400017636e-07, + "loss": 0.9311714172363281, + "step": 5950 + }, + { + "epoch": 1.3711981566820277, + "grad_norm": 1.2352509732193302, + "learning_rate": 4.938537846711952e-07, + "loss": 0.7332801818847656, + "step": 5951 + }, + { + "epoch": 1.3714285714285714, + "grad_norm": 1.127354832681604, + "learning_rate": 4.935252427912075e-07, + "loss": 0.7189289331436157, + "step": 5952 + }, + { + "epoch": 1.3716589861751152, + "grad_norm": 1.451594181977691, + "learning_rate": 4.9319677442536e-07, + "loss": 0.827372670173645, + "step": 5953 + }, + { + "epoch": 1.371889400921659, + "grad_norm": 1.2273788913776413, + "learning_rate": 4.9286837962133e-07, + "loss": 0.7607625722885132, + "step": 5954 + }, + { + "epoch": 1.3721198156682028, + "grad_norm": 1.1935199245873378, + "learning_rate": 4.925400584267836e-07, + "loss": 0.9420886635780334, + "step": 5955 + }, + { + "epoch": 1.3723502304147466, + "grad_norm": 1.1557325656206936, + "learning_rate": 4.922118108893757e-07, + "loss": 0.7605317831039429, + "step": 5956 + }, + { + "epoch": 1.3725806451612903, + "grad_norm": 1.059494459687004, + "learning_rate": 4.918836370567513e-07, + "loss": 0.8353599309921265, + "step": 5957 + }, + { + "epoch": 1.372811059907834, + "grad_norm": 1.2571100340874592, + "learning_rate": 4.915555369765439e-07, + "loss": 0.8540027141571045, + "step": 5958 + }, + { + "epoch": 1.3730414746543778, + "grad_norm": 1.027809306304352, + "learning_rate": 4.912275106963778e-07, + "loss": 0.6965712308883667, + "step": 5959 + }, + { + "epoch": 1.3732718894009217, + "grad_norm": 1.0356479101830274, + "learning_rate": 4.908995582638648e-07, + "loss": 0.7460787296295166, + "step": 5960 + }, + { + "epoch": 1.3735023041474654, + "grad_norm": 1.1845566109999182, + "learning_rate": 4.905716797266067e-07, + "loss": 0.8652873039245605, + "step": 5961 + }, + { + "epoch": 1.3737327188940092, + "grad_norm": 1.1300176885770365, + "learning_rate": 4.902438751321952e-07, + "loss": 0.7757953405380249, + "step": 5962 + }, + { + "epoch": 1.3739631336405531, + "grad_norm": 1.2945741727860514, + "learning_rate": 4.899161445282102e-07, + "loss": 0.8842452168464661, + "step": 5963 + }, + { + "epoch": 1.3741935483870968, + "grad_norm": 1.1415902309445607, + "learning_rate": 4.895884879622215e-07, + "loss": 0.7259113788604736, + "step": 5964 + }, + { + "epoch": 1.3744239631336406, + "grad_norm": 1.3855842779268248, + "learning_rate": 4.892609054817883e-07, + "loss": 0.8871402144432068, + "step": 5965 + }, + { + "epoch": 1.3746543778801843, + "grad_norm": 1.3262407740428463, + "learning_rate": 4.889333971344586e-07, + "loss": 0.7564518451690674, + "step": 5966 + }, + { + "epoch": 1.374884792626728, + "grad_norm": 1.2010368462649357, + "learning_rate": 4.886059629677692e-07, + "loss": 0.7886015176773071, + "step": 5967 + }, + { + "epoch": 1.375115207373272, + "grad_norm": 1.199947155848343, + "learning_rate": 4.882786030292479e-07, + "loss": 0.8256035447120667, + "step": 5968 + }, + { + "epoch": 1.3753456221198157, + "grad_norm": 1.3084738837241086, + "learning_rate": 4.879513173664099e-07, + "loss": 0.9351227283477783, + "step": 5969 + }, + { + "epoch": 1.3755760368663594, + "grad_norm": 1.1794682657820328, + "learning_rate": 4.876241060267598e-07, + "loss": 0.7221553921699524, + "step": 5970 + }, + { + "epoch": 1.3758064516129032, + "grad_norm": 1.3959950512058854, + "learning_rate": 4.872969690577928e-07, + "loss": 0.7451514005661011, + "step": 5971 + }, + { + "epoch": 1.3760368663594469, + "grad_norm": 2.704793745814284, + "learning_rate": 4.86969906506992e-07, + "loss": 0.810903787612915, + "step": 5972 + }, + { + "epoch": 1.3762672811059908, + "grad_norm": 1.0363767093510534, + "learning_rate": 4.866429184218298e-07, + "loss": 0.6279938817024231, + "step": 5973 + }, + { + "epoch": 1.3764976958525346, + "grad_norm": 1.4075128359986724, + "learning_rate": 4.863160048497688e-07, + "loss": 0.7742956876754761, + "step": 5974 + }, + { + "epoch": 1.3767281105990783, + "grad_norm": 1.0416061346586747, + "learning_rate": 4.859891658382597e-07, + "loss": 0.7423844933509827, + "step": 5975 + }, + { + "epoch": 1.3769585253456222, + "grad_norm": 1.0348526250721313, + "learning_rate": 4.856624014347426e-07, + "loss": 0.8387676477432251, + "step": 5976 + }, + { + "epoch": 1.377188940092166, + "grad_norm": 1.3906652341525882, + "learning_rate": 4.853357116866471e-07, + "loss": 0.7959855794906616, + "step": 5977 + }, + { + "epoch": 1.3774193548387097, + "grad_norm": 1.2781418274310543, + "learning_rate": 4.850090966413913e-07, + "loss": 0.7086259722709656, + "step": 5978 + }, + { + "epoch": 1.3776497695852534, + "grad_norm": 1.113262974989995, + "learning_rate": 4.846825563463838e-07, + "loss": 0.7219396829605103, + "step": 5979 + }, + { + "epoch": 1.3778801843317972, + "grad_norm": 1.2693838975886846, + "learning_rate": 4.84356090849021e-07, + "loss": 0.8383582830429077, + "step": 5980 + }, + { + "epoch": 1.378110599078341, + "grad_norm": 1.2004259850017622, + "learning_rate": 4.840297001966887e-07, + "loss": 0.7624244689941406, + "step": 5981 + }, + { + "epoch": 1.3783410138248848, + "grad_norm": 1.3275243269089372, + "learning_rate": 4.837033844367626e-07, + "loss": 0.7901623249053955, + "step": 5982 + }, + { + "epoch": 1.3785714285714286, + "grad_norm": 1.0665581903589285, + "learning_rate": 4.833771436166068e-07, + "loss": 0.7732094526290894, + "step": 5983 + }, + { + "epoch": 1.3788018433179723, + "grad_norm": 1.221680510593368, + "learning_rate": 4.830509777835744e-07, + "loss": 0.7882228493690491, + "step": 5984 + }, + { + "epoch": 1.379032258064516, + "grad_norm": 1.3954212415484932, + "learning_rate": 4.827248869850086e-07, + "loss": 0.8601159453392029, + "step": 5985 + }, + { + "epoch": 1.37926267281106, + "grad_norm": 1.442537797357167, + "learning_rate": 4.823988712682406e-07, + "loss": 0.8828538656234741, + "step": 5986 + }, + { + "epoch": 1.3794930875576037, + "grad_norm": 1.2814445672112398, + "learning_rate": 4.820729306805907e-07, + "loss": 0.8586058020591736, + "step": 5987 + }, + { + "epoch": 1.3797235023041474, + "grad_norm": 1.3476469386797916, + "learning_rate": 4.8174706526937e-07, + "loss": 0.8276243209838867, + "step": 5988 + }, + { + "epoch": 1.3799539170506914, + "grad_norm": 1.1504215702512235, + "learning_rate": 4.814212750818764e-07, + "loss": 0.837665855884552, + "step": 5989 + }, + { + "epoch": 1.380184331797235, + "grad_norm": 1.0830851541320008, + "learning_rate": 4.810955601653978e-07, + "loss": 0.7493194341659546, + "step": 5990 + }, + { + "epoch": 1.3804147465437788, + "grad_norm": 0.9470923738615639, + "learning_rate": 4.807699205672123e-07, + "loss": 0.8382525444030762, + "step": 5991 + }, + { + "epoch": 1.3806451612903226, + "grad_norm": 1.302996846441217, + "learning_rate": 4.804443563345854e-07, + "loss": 0.8152645826339722, + "step": 5992 + }, + { + "epoch": 1.3808755760368663, + "grad_norm": 1.1087518210488847, + "learning_rate": 4.801188675147719e-07, + "loss": 0.7168164849281311, + "step": 5993 + }, + { + "epoch": 1.3811059907834102, + "grad_norm": 1.3971974855003246, + "learning_rate": 4.79793454155017e-07, + "loss": 0.883512556552887, + "step": 5994 + }, + { + "epoch": 1.381336405529954, + "grad_norm": 1.1775999496250547, + "learning_rate": 4.794681163025536e-07, + "loss": 0.7258438467979431, + "step": 5995 + }, + { + "epoch": 1.3815668202764977, + "grad_norm": 1.102316858629444, + "learning_rate": 4.79142854004604e-07, + "loss": 0.8408991098403931, + "step": 5996 + }, + { + "epoch": 1.3817972350230414, + "grad_norm": 1.2549882230845555, + "learning_rate": 4.788176673083796e-07, + "loss": 0.6506227254867554, + "step": 5997 + }, + { + "epoch": 1.3820276497695851, + "grad_norm": 1.145761304273299, + "learning_rate": 4.784925562610809e-07, + "loss": 0.6971127986907959, + "step": 5998 + }, + { + "epoch": 1.382258064516129, + "grad_norm": 1.3037562977083754, + "learning_rate": 4.781675209098967e-07, + "loss": 0.8399784564971924, + "step": 5999 + }, + { + "epoch": 1.3824884792626728, + "grad_norm": 1.1085204750545832, + "learning_rate": 4.778425613020067e-07, + "loss": 0.6451772451400757, + "step": 6000 + }, + { + "epoch": 1.3827188940092165, + "grad_norm": 1.2906420363235995, + "learning_rate": 4.775176774845774e-07, + "loss": 0.7794390916824341, + "step": 6001 + }, + { + "epoch": 1.3829493087557605, + "grad_norm": 1.2681207047961411, + "learning_rate": 4.771928695047652e-07, + "loss": 0.7743663191795349, + "step": 6002 + }, + { + "epoch": 1.3831797235023042, + "grad_norm": 1.3900227492937691, + "learning_rate": 4.768681374097165e-07, + "loss": 0.7654878497123718, + "step": 6003 + }, + { + "epoch": 1.383410138248848, + "grad_norm": 0.9597367840932265, + "learning_rate": 4.765434812465645e-07, + "loss": 0.634769082069397, + "step": 6004 + }, + { + "epoch": 1.3836405529953917, + "grad_norm": 1.506039076037628, + "learning_rate": 4.762189010624337e-07, + "loss": 0.7941944599151611, + "step": 6005 + }, + { + "epoch": 1.3838709677419354, + "grad_norm": 1.015987334283248, + "learning_rate": 4.75894396904436e-07, + "loss": 0.7437179088592529, + "step": 6006 + }, + { + "epoch": 1.3841013824884794, + "grad_norm": 1.4064808788220893, + "learning_rate": 4.7556996881967236e-07, + "loss": 0.7854535579681396, + "step": 6007 + }, + { + "epoch": 1.384331797235023, + "grad_norm": 1.1454067558015728, + "learning_rate": 4.752456168552339e-07, + "loss": 0.7506910562515259, + "step": 6008 + }, + { + "epoch": 1.3845622119815668, + "grad_norm": 1.3378490743548084, + "learning_rate": 4.749213410581995e-07, + "loss": 0.8967334032058716, + "step": 6009 + }, + { + "epoch": 1.3847926267281105, + "grad_norm": 0.9073367214802157, + "learning_rate": 4.7459714147563677e-07, + "loss": 0.7053096294403076, + "step": 6010 + }, + { + "epoch": 1.3850230414746543, + "grad_norm": 1.4011875457574152, + "learning_rate": 4.7427301815460396e-07, + "loss": 0.8759415149688721, + "step": 6011 + }, + { + "epoch": 1.3852534562211982, + "grad_norm": 1.2083846258038176, + "learning_rate": 4.739489711421466e-07, + "loss": 0.8827483654022217, + "step": 6012 + }, + { + "epoch": 1.385483870967742, + "grad_norm": 0.9892327750407551, + "learning_rate": 4.736250004852993e-07, + "loss": 0.7268258929252625, + "step": 6013 + }, + { + "epoch": 1.3857142857142857, + "grad_norm": 1.3354283922456354, + "learning_rate": 4.7330110623108665e-07, + "loss": 0.7142586708068848, + "step": 6014 + }, + { + "epoch": 1.3859447004608296, + "grad_norm": 0.9791582073391492, + "learning_rate": 4.7297728842652116e-07, + "loss": 0.7123303413391113, + "step": 6015 + }, + { + "epoch": 1.3861751152073734, + "grad_norm": 1.1089770586845422, + "learning_rate": 4.726535471186047e-07, + "loss": 0.7548067569732666, + "step": 6016 + }, + { + "epoch": 1.386405529953917, + "grad_norm": 1.205868893691031, + "learning_rate": 4.723298823543277e-07, + "loss": 0.7792191505432129, + "step": 6017 + }, + { + "epoch": 1.3866359447004608, + "grad_norm": 1.313401532453458, + "learning_rate": 4.7200629418066975e-07, + "loss": 0.8658785820007324, + "step": 6018 + }, + { + "epoch": 1.3868663594470045, + "grad_norm": 1.20345203638671, + "learning_rate": 4.716827826445987e-07, + "loss": 0.7173904776573181, + "step": 6019 + }, + { + "epoch": 1.3870967741935485, + "grad_norm": 1.0016118220950732, + "learning_rate": 4.7135934779307284e-07, + "loss": 0.6675543785095215, + "step": 6020 + }, + { + "epoch": 1.3873271889400922, + "grad_norm": 1.2559637316001069, + "learning_rate": 4.710359896730378e-07, + "loss": 0.8164724111557007, + "step": 6021 + }, + { + "epoch": 1.387557603686636, + "grad_norm": 1.474439832240672, + "learning_rate": 4.707127083314283e-07, + "loss": 0.8354332447052002, + "step": 6022 + }, + { + "epoch": 1.3877880184331797, + "grad_norm": 1.1544900465349175, + "learning_rate": 4.7038950381516885e-07, + "loss": 0.8414663672447205, + "step": 6023 + }, + { + "epoch": 1.3880184331797234, + "grad_norm": 1.2150035811173532, + "learning_rate": 4.700663761711717e-07, + "loss": 0.7693418264389038, + "step": 6024 + }, + { + "epoch": 1.3882488479262673, + "grad_norm": 1.0071958767588902, + "learning_rate": 4.697433254463382e-07, + "loss": 0.7809267044067383, + "step": 6025 + }, + { + "epoch": 1.388479262672811, + "grad_norm": 1.203482571104156, + "learning_rate": 4.6942035168755944e-07, + "loss": 0.7455927133560181, + "step": 6026 + }, + { + "epoch": 1.3887096774193548, + "grad_norm": 1.3018105004563159, + "learning_rate": 4.6909745494171383e-07, + "loss": 0.8217881917953491, + "step": 6027 + }, + { + "epoch": 1.3889400921658988, + "grad_norm": 1.3723027057230852, + "learning_rate": 4.687746352556703e-07, + "loss": 0.8138882517814636, + "step": 6028 + }, + { + "epoch": 1.3891705069124423, + "grad_norm": 1.241759909967513, + "learning_rate": 4.6845189267628505e-07, + "loss": 0.8926469087600708, + "step": 6029 + }, + { + "epoch": 1.3894009216589862, + "grad_norm": 1.3027918343739477, + "learning_rate": 4.681292272504036e-07, + "loss": 0.797023355960846, + "step": 6030 + }, + { + "epoch": 1.38963133640553, + "grad_norm": 0.8383796462842409, + "learning_rate": 4.6780663902486104e-07, + "loss": 0.6767498254776001, + "step": 6031 + }, + { + "epoch": 1.3898617511520737, + "grad_norm": 1.2727364252127855, + "learning_rate": 4.674841280464804e-07, + "loss": 0.7514280080795288, + "step": 6032 + }, + { + "epoch": 1.3900921658986176, + "grad_norm": 1.3853363805552346, + "learning_rate": 4.671616943620731e-07, + "loss": 0.8879726529121399, + "step": 6033 + }, + { + "epoch": 1.3903225806451613, + "grad_norm": 0.8270134553121277, + "learning_rate": 4.66839338018441e-07, + "loss": 0.6674140095710754, + "step": 6034 + }, + { + "epoch": 1.390552995391705, + "grad_norm": 1.078021820178179, + "learning_rate": 4.6651705906237307e-07, + "loss": 0.9094855785369873, + "step": 6035 + }, + { + "epoch": 1.3907834101382488, + "grad_norm": 1.2561393182724931, + "learning_rate": 4.661948575406478e-07, + "loss": 0.8334506750106812, + "step": 6036 + }, + { + "epoch": 1.3910138248847925, + "grad_norm": 1.040119500616202, + "learning_rate": 4.658727335000323e-07, + "loss": 0.6545997858047485, + "step": 6037 + }, + { + "epoch": 1.3912442396313365, + "grad_norm": 1.1967093206075838, + "learning_rate": 4.6555068698728237e-07, + "loss": 0.7810590267181396, + "step": 6038 + }, + { + "epoch": 1.3914746543778802, + "grad_norm": 1.0756703494881659, + "learning_rate": 4.652287180491424e-07, + "loss": 0.7581864595413208, + "step": 6039 + }, + { + "epoch": 1.391705069124424, + "grad_norm": 1.2754594039466507, + "learning_rate": 4.649068267323465e-07, + "loss": 0.7134817242622375, + "step": 6040 + }, + { + "epoch": 1.3919354838709677, + "grad_norm": 0.9730020123763279, + "learning_rate": 4.645850130836162e-07, + "loss": 0.7050445079803467, + "step": 6041 + }, + { + "epoch": 1.3921658986175114, + "grad_norm": 1.146073776977597, + "learning_rate": 4.642632771496622e-07, + "loss": 0.8510535955429077, + "step": 6042 + }, + { + "epoch": 1.3923963133640553, + "grad_norm": 1.3940656685053847, + "learning_rate": 4.6394161897718454e-07, + "loss": 0.8627035617828369, + "step": 6043 + }, + { + "epoch": 1.392626728110599, + "grad_norm": 1.2671457951329919, + "learning_rate": 4.6362003861287127e-07, + "loss": 0.89891517162323, + "step": 6044 + }, + { + "epoch": 1.3928571428571428, + "grad_norm": 1.3215265337916509, + "learning_rate": 4.6329853610339896e-07, + "loss": 0.7267141342163086, + "step": 6045 + }, + { + "epoch": 1.3930875576036867, + "grad_norm": 1.4814794045534565, + "learning_rate": 4.6297711149543405e-07, + "loss": 0.8021189570426941, + "step": 6046 + }, + { + "epoch": 1.3933179723502305, + "grad_norm": 1.0954918085269951, + "learning_rate": 4.6265576483563054e-07, + "loss": 0.7836861610412598, + "step": 6047 + }, + { + "epoch": 1.3935483870967742, + "grad_norm": 1.1158269152355589, + "learning_rate": 4.623344961706309e-07, + "loss": 0.816940188407898, + "step": 6048 + }, + { + "epoch": 1.393778801843318, + "grad_norm": 1.4383712223724088, + "learning_rate": 4.6201330554706773e-07, + "loss": 0.77923583984375, + "step": 6049 + }, + { + "epoch": 1.3940092165898617, + "grad_norm": 1.3116759273395542, + "learning_rate": 4.6169219301156117e-07, + "loss": 0.8017981052398682, + "step": 6050 + }, + { + "epoch": 1.3942396313364056, + "grad_norm": 0.9886522563222937, + "learning_rate": 4.6137115861071973e-07, + "loss": 0.6786847114562988, + "step": 6051 + }, + { + "epoch": 1.3944700460829493, + "grad_norm": 1.1651814302030006, + "learning_rate": 4.61050202391142e-07, + "loss": 0.7802412509918213, + "step": 6052 + }, + { + "epoch": 1.394700460829493, + "grad_norm": 1.1955845105043188, + "learning_rate": 4.6072932439941347e-07, + "loss": 0.7434886693954468, + "step": 6053 + }, + { + "epoch": 1.3949308755760368, + "grad_norm": 1.2231160523968054, + "learning_rate": 4.6040852468211e-07, + "loss": 0.7590811252593994, + "step": 6054 + }, + { + "epoch": 1.3951612903225805, + "grad_norm": 1.5534904257800726, + "learning_rate": 4.600878032857949e-07, + "loss": 0.8952670097351074, + "step": 6055 + }, + { + "epoch": 1.3953917050691245, + "grad_norm": 1.1221688640413483, + "learning_rate": 4.5976716025702036e-07, + "loss": 0.8055328130722046, + "step": 6056 + }, + { + "epoch": 1.3956221198156682, + "grad_norm": 1.2064570897657243, + "learning_rate": 4.5944659564232725e-07, + "loss": 0.8919316530227661, + "step": 6057 + }, + { + "epoch": 1.395852534562212, + "grad_norm": 1.1074605434156857, + "learning_rate": 4.591261094882453e-07, + "loss": 0.701945960521698, + "step": 6058 + }, + { + "epoch": 1.3960829493087559, + "grad_norm": 1.1766452414586335, + "learning_rate": 4.5880570184129206e-07, + "loss": 0.7457436323165894, + "step": 6059 + }, + { + "epoch": 1.3963133640552996, + "grad_norm": 1.193782401804385, + "learning_rate": 4.5848537274797527e-07, + "loss": 0.8093513250350952, + "step": 6060 + }, + { + "epoch": 1.3965437788018433, + "grad_norm": 1.5454221039375025, + "learning_rate": 4.5816512225478965e-07, + "loss": 0.7098822593688965, + "step": 6061 + }, + { + "epoch": 1.396774193548387, + "grad_norm": 1.2339994165792372, + "learning_rate": 4.578449504082189e-07, + "loss": 0.7423167824745178, + "step": 6062 + }, + { + "epoch": 1.3970046082949308, + "grad_norm": 1.1302042774482615, + "learning_rate": 4.5752485725473624e-07, + "loss": 0.8730076551437378, + "step": 6063 + }, + { + "epoch": 1.3972350230414747, + "grad_norm": 1.124374396794659, + "learning_rate": 4.572048428408024e-07, + "loss": 0.6914420127868652, + "step": 6064 + }, + { + "epoch": 1.3974654377880185, + "grad_norm": 1.3148006815381303, + "learning_rate": 4.5688490721286664e-07, + "loss": 0.8051402568817139, + "step": 6065 + }, + { + "epoch": 1.3976958525345622, + "grad_norm": 1.548390651351193, + "learning_rate": 4.5656505041736803e-07, + "loss": 0.9185452461242676, + "step": 6066 + }, + { + "epoch": 1.397926267281106, + "grad_norm": 1.1772485518113056, + "learning_rate": 4.5624527250073287e-07, + "loss": 0.766645073890686, + "step": 6067 + }, + { + "epoch": 1.3981566820276496, + "grad_norm": 1.3246112666718692, + "learning_rate": 4.559255735093763e-07, + "loss": 0.8005224466323853, + "step": 6068 + }, + { + "epoch": 1.3983870967741936, + "grad_norm": 1.2624209909197728, + "learning_rate": 4.5560595348970275e-07, + "loss": 0.8072810173034668, + "step": 6069 + }, + { + "epoch": 1.3986175115207373, + "grad_norm": 1.2197415999956105, + "learning_rate": 4.552864124881045e-07, + "loss": 0.7537474632263184, + "step": 6070 + }, + { + "epoch": 1.398847926267281, + "grad_norm": 1.3524984308216321, + "learning_rate": 4.549669505509619e-07, + "loss": 0.8396750092506409, + "step": 6071 + }, + { + "epoch": 1.399078341013825, + "grad_norm": 1.3095033527266953, + "learning_rate": 4.546475677246453e-07, + "loss": 0.8456804752349854, + "step": 6072 + }, + { + "epoch": 1.3993087557603687, + "grad_norm": 1.212970447769736, + "learning_rate": 4.543282640555123e-07, + "loss": 0.6150076389312744, + "step": 6073 + }, + { + "epoch": 1.3995391705069125, + "grad_norm": 1.1345047277741707, + "learning_rate": 4.540090395899089e-07, + "loss": 0.667172908782959, + "step": 6074 + }, + { + "epoch": 1.3997695852534562, + "grad_norm": 1.1269214154073468, + "learning_rate": 4.5368989437417116e-07, + "loss": 0.7918317914009094, + "step": 6075 + }, + { + "epoch": 1.4, + "grad_norm": 1.070411671989194, + "learning_rate": 4.5337082845462193e-07, + "loss": 0.6800580024719238, + "step": 6076 + }, + { + "epoch": 1.4002304147465439, + "grad_norm": 1.3908779413221009, + "learning_rate": 4.530518418775733e-07, + "loss": 0.9205034971237183, + "step": 6077 + }, + { + "epoch": 1.4004608294930876, + "grad_norm": 0.9376373503434607, + "learning_rate": 4.5273293468932585e-07, + "loss": 0.7228822708129883, + "step": 6078 + }, + { + "epoch": 1.4006912442396313, + "grad_norm": 1.0019153673681407, + "learning_rate": 4.524141069361679e-07, + "loss": 0.6827987432479858, + "step": 6079 + }, + { + "epoch": 1.400921658986175, + "grad_norm": 1.086076018779761, + "learning_rate": 4.520953586643779e-07, + "loss": 0.6272581815719604, + "step": 6080 + }, + { + "epoch": 1.4011520737327188, + "grad_norm": 1.1153873233388363, + "learning_rate": 4.5177668992022125e-07, + "loss": 0.8041881322860718, + "step": 6081 + }, + { + "epoch": 1.4013824884792627, + "grad_norm": 0.986104576594979, + "learning_rate": 4.5145810074995194e-07, + "loss": 0.7284958362579346, + "step": 6082 + }, + { + "epoch": 1.4016129032258065, + "grad_norm": 1.227152604501521, + "learning_rate": 4.511395911998135e-07, + "loss": 0.7653781175613403, + "step": 6083 + }, + { + "epoch": 1.4018433179723502, + "grad_norm": 1.0466936448387898, + "learning_rate": 4.5082116131603677e-07, + "loss": 0.8037170171737671, + "step": 6084 + }, + { + "epoch": 1.4020737327188941, + "grad_norm": 1.1911735797842866, + "learning_rate": 4.505028111448411e-07, + "loss": 0.783043384552002, + "step": 6085 + }, + { + "epoch": 1.4023041474654379, + "grad_norm": 1.0547410930732963, + "learning_rate": 4.501845407324354e-07, + "loss": 0.6712161302566528, + "step": 6086 + }, + { + "epoch": 1.4025345622119816, + "grad_norm": 1.6406574524985842, + "learning_rate": 4.4986635012501575e-07, + "loss": 0.9537261724472046, + "step": 6087 + }, + { + "epoch": 1.4027649769585253, + "grad_norm": 1.4091085059994304, + "learning_rate": 4.495482393687666e-07, + "loss": 0.8984304666519165, + "step": 6088 + }, + { + "epoch": 1.402995391705069, + "grad_norm": 1.0430973660752654, + "learning_rate": 4.4923020850986224e-07, + "loss": 0.6894555687904358, + "step": 6089 + }, + { + "epoch": 1.403225806451613, + "grad_norm": 1.1542541609725157, + "learning_rate": 4.489122575944639e-07, + "loss": 0.685502290725708, + "step": 6090 + }, + { + "epoch": 1.4034562211981567, + "grad_norm": 1.1082950627991512, + "learning_rate": 4.485943866687216e-07, + "loss": 0.6794239282608032, + "step": 6091 + }, + { + "epoch": 1.4036866359447004, + "grad_norm": 1.0717636346133315, + "learning_rate": 4.482765957787744e-07, + "loss": 0.7647888660430908, + "step": 6092 + }, + { + "epoch": 1.4039170506912442, + "grad_norm": 1.3476206179513355, + "learning_rate": 4.4795888497074896e-07, + "loss": 0.798794150352478, + "step": 6093 + }, + { + "epoch": 1.404147465437788, + "grad_norm": 1.0358789181259667, + "learning_rate": 4.4764125429076026e-07, + "loss": 0.79430091381073, + "step": 6094 + }, + { + "epoch": 1.4043778801843319, + "grad_norm": 1.4040182367122596, + "learning_rate": 4.4732370378491255e-07, + "loss": 0.9089795351028442, + "step": 6095 + }, + { + "epoch": 1.4046082949308756, + "grad_norm": 0.9307801992196251, + "learning_rate": 4.4700623349929757e-07, + "loss": 0.8270718455314636, + "step": 6096 + }, + { + "epoch": 1.4048387096774193, + "grad_norm": 1.082228260794844, + "learning_rate": 4.466888434799958e-07, + "loss": 0.7550361156463623, + "step": 6097 + }, + { + "epoch": 1.4050691244239633, + "grad_norm": 1.15557625190535, + "learning_rate": 4.463715337730759e-07, + "loss": 0.7406442165374756, + "step": 6098 + }, + { + "epoch": 1.405299539170507, + "grad_norm": 1.4065045960279658, + "learning_rate": 4.460543044245949e-07, + "loss": 0.830552875995636, + "step": 6099 + }, + { + "epoch": 1.4055299539170507, + "grad_norm": 1.4160409051991987, + "learning_rate": 4.45737155480598e-07, + "loss": 0.8961822390556335, + "step": 6100 + }, + { + "epoch": 1.4057603686635944, + "grad_norm": 1.2630678724710616, + "learning_rate": 4.454200869871195e-07, + "loss": 0.6307489275932312, + "step": 6101 + }, + { + "epoch": 1.4059907834101382, + "grad_norm": 1.437795392364305, + "learning_rate": 4.451030989901808e-07, + "loss": 0.8682084083557129, + "step": 6102 + }, + { + "epoch": 1.4062211981566821, + "grad_norm": 1.1897592960029226, + "learning_rate": 4.4478619153579323e-07, + "loss": 0.7157681584358215, + "step": 6103 + }, + { + "epoch": 1.4064516129032258, + "grad_norm": 1.196767224907471, + "learning_rate": 4.4446936466995486e-07, + "loss": 0.7267071008682251, + "step": 6104 + }, + { + "epoch": 1.4066820276497696, + "grad_norm": 1.1191501401801882, + "learning_rate": 4.4415261843865246e-07, + "loss": 0.8435063362121582, + "step": 6105 + }, + { + "epoch": 1.4069124423963133, + "grad_norm": 1.2220260712556485, + "learning_rate": 4.43835952887862e-07, + "loss": 0.8895175457000732, + "step": 6106 + }, + { + "epoch": 1.407142857142857, + "grad_norm": 1.0150052474935476, + "learning_rate": 4.435193680635467e-07, + "loss": 0.7470073699951172, + "step": 6107 + }, + { + "epoch": 1.407373271889401, + "grad_norm": 1.376675993117338, + "learning_rate": 4.432028640116581e-07, + "loss": 0.7993630170822144, + "step": 6108 + }, + { + "epoch": 1.4076036866359447, + "grad_norm": 1.2675455750766673, + "learning_rate": 4.4288644077813695e-07, + "loss": 0.823069155216217, + "step": 6109 + }, + { + "epoch": 1.4078341013824884, + "grad_norm": 1.374585518914166, + "learning_rate": 4.4257009840891146e-07, + "loss": 0.7665367126464844, + "step": 6110 + }, + { + "epoch": 1.4080645161290324, + "grad_norm": 1.1174810423449963, + "learning_rate": 4.422538369498979e-07, + "loss": 0.7173991799354553, + "step": 6111 + }, + { + "epoch": 1.4082949308755761, + "grad_norm": 0.9476955630635919, + "learning_rate": 4.4193765644700186e-07, + "loss": 0.8288347125053406, + "step": 6112 + }, + { + "epoch": 1.4085253456221198, + "grad_norm": 1.206088367901853, + "learning_rate": 4.4162155694611636e-07, + "loss": 0.8589911460876465, + "step": 6113 + }, + { + "epoch": 1.4087557603686636, + "grad_norm": 1.2884473987369411, + "learning_rate": 4.4130553849312213e-07, + "loss": 0.8783868551254272, + "step": 6114 + }, + { + "epoch": 1.4089861751152073, + "grad_norm": 1.0994332560949611, + "learning_rate": 4.409896011338898e-07, + "loss": 0.7625287771224976, + "step": 6115 + }, + { + "epoch": 1.4092165898617512, + "grad_norm": 1.1571434855502665, + "learning_rate": 4.406737449142769e-07, + "loss": 0.7412571907043457, + "step": 6116 + }, + { + "epoch": 1.409447004608295, + "grad_norm": 0.9525276096114424, + "learning_rate": 4.4035796988012943e-07, + "loss": 0.6248455047607422, + "step": 6117 + }, + { + "epoch": 1.4096774193548387, + "grad_norm": 1.1843810443395109, + "learning_rate": 4.400422760772817e-07, + "loss": 0.7970919609069824, + "step": 6118 + }, + { + "epoch": 1.4099078341013824, + "grad_norm": 1.0403384039115238, + "learning_rate": 4.397266635515563e-07, + "loss": 0.6184223294258118, + "step": 6119 + }, + { + "epoch": 1.4101382488479262, + "grad_norm": 1.07818776364935, + "learning_rate": 4.394111323487637e-07, + "loss": 0.9014843702316284, + "step": 6120 + }, + { + "epoch": 1.41036866359447, + "grad_norm": 1.1660248005288976, + "learning_rate": 4.390956825147034e-07, + "loss": 0.8468939661979675, + "step": 6121 + }, + { + "epoch": 1.4105990783410138, + "grad_norm": 1.0810631729189881, + "learning_rate": 4.3878031409516234e-07, + "loss": 0.7832604646682739, + "step": 6122 + }, + { + "epoch": 1.4108294930875576, + "grad_norm": 1.0700225295832282, + "learning_rate": 4.3846502713591527e-07, + "loss": 0.7202898263931274, + "step": 6123 + }, + { + "epoch": 1.4110599078341015, + "grad_norm": 1.1788285042234896, + "learning_rate": 4.3814982168272664e-07, + "loss": 0.6785540580749512, + "step": 6124 + }, + { + "epoch": 1.4112903225806452, + "grad_norm": 1.3040233352486812, + "learning_rate": 4.378346977813474e-07, + "loss": 0.795532763004303, + "step": 6125 + }, + { + "epoch": 1.411520737327189, + "grad_norm": 0.8875056644654742, + "learning_rate": 4.3751965547751735e-07, + "loss": 0.7715259790420532, + "step": 6126 + }, + { + "epoch": 1.4117511520737327, + "grad_norm": 1.4252318364105403, + "learning_rate": 4.37204694816965e-07, + "loss": 0.8657132983207703, + "step": 6127 + }, + { + "epoch": 1.4119815668202764, + "grad_norm": 1.0207817658354317, + "learning_rate": 4.3688981584540586e-07, + "loss": 0.7253363132476807, + "step": 6128 + }, + { + "epoch": 1.4122119815668204, + "grad_norm": 1.2055823367063212, + "learning_rate": 4.365750186085447e-07, + "loss": 0.8511998653411865, + "step": 6129 + }, + { + "epoch": 1.412442396313364, + "grad_norm": 1.3256931814656627, + "learning_rate": 4.3626030315207386e-07, + "loss": 0.7936528921127319, + "step": 6130 + }, + { + "epoch": 1.4126728110599078, + "grad_norm": 1.1878967804503957, + "learning_rate": 4.3594566952167324e-07, + "loss": 0.758521556854248, + "step": 6131 + }, + { + "epoch": 1.4129032258064516, + "grad_norm": 1.242405288398936, + "learning_rate": 4.3563111776301243e-07, + "loss": 0.8202048540115356, + "step": 6132 + }, + { + "epoch": 1.4131336405529953, + "grad_norm": 1.075213759854547, + "learning_rate": 4.3531664792174773e-07, + "loss": 0.7864067554473877, + "step": 6133 + }, + { + "epoch": 1.4133640552995392, + "grad_norm": 1.472991105564755, + "learning_rate": 4.350022600435236e-07, + "loss": 0.8051233291625977, + "step": 6134 + }, + { + "epoch": 1.413594470046083, + "grad_norm": 1.0811225554895896, + "learning_rate": 4.34687954173974e-07, + "loss": 0.7617348432540894, + "step": 6135 + }, + { + "epoch": 1.4138248847926267, + "grad_norm": 1.299621377240526, + "learning_rate": 4.3437373035871927e-07, + "loss": 0.7899652719497681, + "step": 6136 + }, + { + "epoch": 1.4140552995391706, + "grad_norm": 1.1704157180732915, + "learning_rate": 4.340595886433689e-07, + "loss": 0.8467222452163696, + "step": 6137 + }, + { + "epoch": 1.4142857142857144, + "grad_norm": 1.294364382858993, + "learning_rate": 4.3374552907352003e-07, + "loss": 0.8451426029205322, + "step": 6138 + }, + { + "epoch": 1.414516129032258, + "grad_norm": 1.1053072195052795, + "learning_rate": 4.3343155169475797e-07, + "loss": 0.7140414714813232, + "step": 6139 + }, + { + "epoch": 1.4147465437788018, + "grad_norm": 1.365344165744123, + "learning_rate": 4.331176565526558e-07, + "loss": 0.7680803537368774, + "step": 6140 + }, + { + "epoch": 1.4149769585253456, + "grad_norm": 1.0970331390876962, + "learning_rate": 4.328038436927757e-07, + "loss": 0.7262120246887207, + "step": 6141 + }, + { + "epoch": 1.4152073732718895, + "grad_norm": 1.2176292189863585, + "learning_rate": 4.3249011316066676e-07, + "loss": 0.7788687944412231, + "step": 6142 + }, + { + "epoch": 1.4154377880184332, + "grad_norm": 1.4880584379115793, + "learning_rate": 4.321764650018662e-07, + "loss": 0.7613503336906433, + "step": 6143 + }, + { + "epoch": 1.415668202764977, + "grad_norm": 0.9554644370778598, + "learning_rate": 4.3186289926190056e-07, + "loss": 0.6778309345245361, + "step": 6144 + }, + { + "epoch": 1.4158986175115207, + "grad_norm": 1.5159867718873894, + "learning_rate": 4.315494159862829e-07, + "loss": 0.8626673221588135, + "step": 6145 + }, + { + "epoch": 1.4161290322580644, + "grad_norm": 1.194727935560369, + "learning_rate": 4.312360152205147e-07, + "loss": 0.8321051597595215, + "step": 6146 + }, + { + "epoch": 1.4163594470046084, + "grad_norm": 1.146293428483721, + "learning_rate": 4.309226970100861e-07, + "loss": 0.9317119717597961, + "step": 6147 + }, + { + "epoch": 1.416589861751152, + "grad_norm": 1.4669878139895565, + "learning_rate": 4.306094614004748e-07, + "loss": 0.9479870200157166, + "step": 6148 + }, + { + "epoch": 1.4168202764976958, + "grad_norm": 1.0166991353273056, + "learning_rate": 4.3029630843714606e-07, + "loss": 0.8222699165344238, + "step": 6149 + }, + { + "epoch": 1.4170506912442398, + "grad_norm": 1.427356205375722, + "learning_rate": 4.2998323816555427e-07, + "loss": 0.8232519030570984, + "step": 6150 + }, + { + "epoch": 1.4172811059907833, + "grad_norm": 1.156719588287236, + "learning_rate": 4.2967025063114057e-07, + "loss": 0.7423735857009888, + "step": 6151 + }, + { + "epoch": 1.4175115207373272, + "grad_norm": 1.1009896479281802, + "learning_rate": 4.2935734587933527e-07, + "loss": 0.6947557926177979, + "step": 6152 + }, + { + "epoch": 1.417741935483871, + "grad_norm": 1.2980025668504918, + "learning_rate": 4.290445239555558e-07, + "loss": 0.789128303527832, + "step": 6153 + }, + { + "epoch": 1.4179723502304147, + "grad_norm": 1.344185599290992, + "learning_rate": 4.2873178490520745e-07, + "loss": 0.8025885820388794, + "step": 6154 + }, + { + "epoch": 1.4182027649769586, + "grad_norm": 1.3491619317054568, + "learning_rate": 4.284191287736847e-07, + "loss": 0.8139045238494873, + "step": 6155 + }, + { + "epoch": 1.4184331797235024, + "grad_norm": 1.1246209635446252, + "learning_rate": 4.2810655560636864e-07, + "loss": 0.8154167532920837, + "step": 6156 + }, + { + "epoch": 1.418663594470046, + "grad_norm": 1.0954033524128675, + "learning_rate": 4.2779406544862896e-07, + "loss": 0.6383910775184631, + "step": 6157 + }, + { + "epoch": 1.4188940092165898, + "grad_norm": 1.217902628448707, + "learning_rate": 4.2748165834582316e-07, + "loss": 0.7008179426193237, + "step": 6158 + }, + { + "epoch": 1.4191244239631335, + "grad_norm": 1.2584275851601723, + "learning_rate": 4.2716933434329684e-07, + "loss": 0.9458012580871582, + "step": 6159 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 1.1170402428175406, + "learning_rate": 4.268570934863829e-07, + "loss": 0.7354133725166321, + "step": 6160 + }, + { + "epoch": 1.4195852534562212, + "grad_norm": 1.050503834766047, + "learning_rate": 4.265449358204034e-07, + "loss": 0.7146268486976624, + "step": 6161 + }, + { + "epoch": 1.419815668202765, + "grad_norm": 1.3602740783757037, + "learning_rate": 4.262328613906674e-07, + "loss": 0.7357315421104431, + "step": 6162 + }, + { + "epoch": 1.4200460829493087, + "grad_norm": 1.5139772991772644, + "learning_rate": 4.2592087024247157e-07, + "loss": 0.8006314039230347, + "step": 6163 + }, + { + "epoch": 1.4202764976958524, + "grad_norm": 1.2194249079603743, + "learning_rate": 4.256089624211018e-07, + "loss": 0.8299369812011719, + "step": 6164 + }, + { + "epoch": 1.4205069124423964, + "grad_norm": 1.3878054713959478, + "learning_rate": 4.252971379718308e-07, + "loss": 0.7018890380859375, + "step": 6165 + }, + { + "epoch": 1.42073732718894, + "grad_norm": 1.0332854509364862, + "learning_rate": 4.24985396939919e-07, + "loss": 0.6501315236091614, + "step": 6166 + }, + { + "epoch": 1.4209677419354838, + "grad_norm": 1.6385767983913562, + "learning_rate": 4.24673739370616e-07, + "loss": 0.8379749059677124, + "step": 6167 + }, + { + "epoch": 1.4211981566820278, + "grad_norm": 1.3590615179836698, + "learning_rate": 4.24362165309158e-07, + "loss": 0.7996747493743896, + "step": 6168 + }, + { + "epoch": 1.4214285714285715, + "grad_norm": 1.2270246479776195, + "learning_rate": 4.240506748007695e-07, + "loss": 0.7258181571960449, + "step": 6169 + }, + { + "epoch": 1.4216589861751152, + "grad_norm": 0.9997463365032918, + "learning_rate": 4.237392678906633e-07, + "loss": 0.6035803556442261, + "step": 6170 + }, + { + "epoch": 1.421889400921659, + "grad_norm": 1.1041316785012205, + "learning_rate": 4.2342794462403954e-07, + "loss": 0.7668799757957458, + "step": 6171 + }, + { + "epoch": 1.4221198156682027, + "grad_norm": 0.9385556238542058, + "learning_rate": 4.23116705046086e-07, + "loss": 0.7816733121871948, + "step": 6172 + }, + { + "epoch": 1.4223502304147466, + "grad_norm": 1.2003519134278278, + "learning_rate": 4.228055492019793e-07, + "loss": 0.8753983974456787, + "step": 6173 + }, + { + "epoch": 1.4225806451612903, + "grad_norm": 1.1591394093837553, + "learning_rate": 4.224944771368831e-07, + "loss": 0.8319464921951294, + "step": 6174 + }, + { + "epoch": 1.422811059907834, + "grad_norm": 1.1444278460686073, + "learning_rate": 4.2218348889594866e-07, + "loss": 0.6670328378677368, + "step": 6175 + }, + { + "epoch": 1.4230414746543778, + "grad_norm": 0.9949133230999909, + "learning_rate": 4.218725845243163e-07, + "loss": 0.7879645824432373, + "step": 6176 + }, + { + "epoch": 1.4232718894009215, + "grad_norm": 1.1897456513351008, + "learning_rate": 4.2156176406711287e-07, + "loss": 0.709680438041687, + "step": 6177 + }, + { + "epoch": 1.4235023041474655, + "grad_norm": 1.2454467445687987, + "learning_rate": 4.2125102756945364e-07, + "loss": 0.7990894317626953, + "step": 6178 + }, + { + "epoch": 1.4237327188940092, + "grad_norm": 0.899401568311558, + "learning_rate": 4.2094037507644165e-07, + "loss": 0.7283308506011963, + "step": 6179 + }, + { + "epoch": 1.423963133640553, + "grad_norm": 1.1017464258775596, + "learning_rate": 4.2062980663316715e-07, + "loss": 0.8763309717178345, + "step": 6180 + }, + { + "epoch": 1.4241935483870969, + "grad_norm": 1.5313476968397717, + "learning_rate": 4.2031932228470966e-07, + "loss": 0.9370014667510986, + "step": 6181 + }, + { + "epoch": 1.4244239631336406, + "grad_norm": 1.2317913481286529, + "learning_rate": 4.2000892207613526e-07, + "loss": 0.7883036136627197, + "step": 6182 + }, + { + "epoch": 1.4246543778801843, + "grad_norm": 1.0986212570485994, + "learning_rate": 4.196986060524975e-07, + "loss": 0.7021682262420654, + "step": 6183 + }, + { + "epoch": 1.424884792626728, + "grad_norm": 1.6809928588875014, + "learning_rate": 4.193883742588393e-07, + "loss": 0.842636227607727, + "step": 6184 + }, + { + "epoch": 1.4251152073732718, + "grad_norm": 1.3804520546599122, + "learning_rate": 4.190782267401899e-07, + "loss": 0.8003957867622375, + "step": 6185 + }, + { + "epoch": 1.4253456221198157, + "grad_norm": 1.4234115388616575, + "learning_rate": 4.1876816354156655e-07, + "loss": 0.9799495935440063, + "step": 6186 + }, + { + "epoch": 1.4255760368663595, + "grad_norm": 1.4430834747300494, + "learning_rate": 4.184581847079751e-07, + "loss": 0.8726102113723755, + "step": 6187 + }, + { + "epoch": 1.4258064516129032, + "grad_norm": 1.4779961873749974, + "learning_rate": 4.181482902844082e-07, + "loss": 0.8771729469299316, + "step": 6188 + }, + { + "epoch": 1.426036866359447, + "grad_norm": 0.932904262005563, + "learning_rate": 4.1783848031584644e-07, + "loss": 0.5891281962394714, + "step": 6189 + }, + { + "epoch": 1.4262672811059907, + "grad_norm": 1.0356433358815755, + "learning_rate": 4.1752875484725904e-07, + "loss": 0.8133054971694946, + "step": 6190 + }, + { + "epoch": 1.4264976958525346, + "grad_norm": 1.2051464792634443, + "learning_rate": 4.1721911392360164e-07, + "loss": 0.7175684571266174, + "step": 6191 + }, + { + "epoch": 1.4267281105990783, + "grad_norm": 1.2483759508518841, + "learning_rate": 4.16909557589818e-07, + "loss": 0.7112927436828613, + "step": 6192 + }, + { + "epoch": 1.426958525345622, + "grad_norm": 1.3756845434805187, + "learning_rate": 4.166000858908406e-07, + "loss": 0.8564406037330627, + "step": 6193 + }, + { + "epoch": 1.427188940092166, + "grad_norm": 1.2070686503198162, + "learning_rate": 4.162906988715883e-07, + "loss": 0.7630729675292969, + "step": 6194 + }, + { + "epoch": 1.4274193548387097, + "grad_norm": 0.971140934311516, + "learning_rate": 4.1598139657696806e-07, + "loss": 0.6810768246650696, + "step": 6195 + }, + { + "epoch": 1.4276497695852535, + "grad_norm": 0.9185719080310675, + "learning_rate": 4.1567217905187535e-07, + "loss": 0.8482312560081482, + "step": 6196 + }, + { + "epoch": 1.4278801843317972, + "grad_norm": 1.4356078879259653, + "learning_rate": 4.1536304634119225e-07, + "loss": 0.845355749130249, + "step": 6197 + }, + { + "epoch": 1.428110599078341, + "grad_norm": 1.3990653285356356, + "learning_rate": 4.1505399848978896e-07, + "loss": 0.8082824349403381, + "step": 6198 + }, + { + "epoch": 1.4283410138248849, + "grad_norm": 1.5497395393382225, + "learning_rate": 4.147450355425235e-07, + "loss": 0.8141404390335083, + "step": 6199 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 1.0209015709753073, + "learning_rate": 4.14436157544241e-07, + "loss": 0.8144549131393433, + "step": 6200 + }, + { + "epoch": 1.4288018433179723, + "grad_norm": 1.2316152605954584, + "learning_rate": 4.141273645397754e-07, + "loss": 0.6554359793663025, + "step": 6201 + }, + { + "epoch": 1.429032258064516, + "grad_norm": 1.2095729612520494, + "learning_rate": 4.138186565739472e-07, + "loss": 0.8035449981689453, + "step": 6202 + }, + { + "epoch": 1.4292626728110598, + "grad_norm": 1.348688453980758, + "learning_rate": 4.1351003369156467e-07, + "loss": 0.7848105430603027, + "step": 6203 + }, + { + "epoch": 1.4294930875576037, + "grad_norm": 1.167048125389705, + "learning_rate": 4.132014959374246e-07, + "loss": 0.7064214944839478, + "step": 6204 + }, + { + "epoch": 1.4297235023041475, + "grad_norm": 1.236002479887974, + "learning_rate": 4.128930433563107e-07, + "loss": 0.7636318802833557, + "step": 6205 + }, + { + "epoch": 1.4299539170506912, + "grad_norm": 1.2440935326289273, + "learning_rate": 4.1258467599299395e-07, + "loss": 0.6839499473571777, + "step": 6206 + }, + { + "epoch": 1.4301843317972351, + "grad_norm": 1.1802386777878584, + "learning_rate": 4.122763938922341e-07, + "loss": 0.8355294466018677, + "step": 6207 + }, + { + "epoch": 1.4304147465437789, + "grad_norm": 1.1238131581281627, + "learning_rate": 4.1196819709877773e-07, + "loss": 0.7563334107398987, + "step": 6208 + }, + { + "epoch": 1.4306451612903226, + "grad_norm": 1.1336601077663977, + "learning_rate": 4.116600856573588e-07, + "loss": 0.6991991996765137, + "step": 6209 + }, + { + "epoch": 1.4308755760368663, + "grad_norm": 1.2669311049959366, + "learning_rate": 4.113520596126998e-07, + "loss": 0.7249872088432312, + "step": 6210 + }, + { + "epoch": 1.43110599078341, + "grad_norm": 0.9386622429459606, + "learning_rate": 4.110441190095101e-07, + "loss": 0.6570736169815063, + "step": 6211 + }, + { + "epoch": 1.431336405529954, + "grad_norm": 1.0652944602016763, + "learning_rate": 4.107362638924865e-07, + "loss": 0.7137724161148071, + "step": 6212 + }, + { + "epoch": 1.4315668202764977, + "grad_norm": 1.1571956532799377, + "learning_rate": 4.1042849430631453e-07, + "loss": 0.7620561122894287, + "step": 6213 + }, + { + "epoch": 1.4317972350230415, + "grad_norm": 1.118516282963539, + "learning_rate": 4.1012081029566616e-07, + "loss": 0.8186367750167847, + "step": 6214 + }, + { + "epoch": 1.4320276497695852, + "grad_norm": 1.2414517851095686, + "learning_rate": 4.098132119052008e-07, + "loss": 0.8068171739578247, + "step": 6215 + }, + { + "epoch": 1.432258064516129, + "grad_norm": 1.3160335320341774, + "learning_rate": 4.095056991795668e-07, + "loss": 0.8640002012252808, + "step": 6216 + }, + { + "epoch": 1.4324884792626729, + "grad_norm": 1.4376158954775202, + "learning_rate": 4.0919827216339887e-07, + "loss": 0.8886386156082153, + "step": 6217 + }, + { + "epoch": 1.4327188940092166, + "grad_norm": 1.072787779438559, + "learning_rate": 4.0889093090131965e-07, + "loss": 0.6853137016296387, + "step": 6218 + }, + { + "epoch": 1.4329493087557603, + "grad_norm": 1.0751813749856631, + "learning_rate": 4.0858367543793923e-07, + "loss": 0.7423670291900635, + "step": 6219 + }, + { + "epoch": 1.4331797235023043, + "grad_norm": 1.2596005033506457, + "learning_rate": 4.0827650581785544e-07, + "loss": 0.7969200611114502, + "step": 6220 + }, + { + "epoch": 1.433410138248848, + "grad_norm": 1.1441853902577663, + "learning_rate": 4.079694220856531e-07, + "loss": 0.8506221771240234, + "step": 6221 + }, + { + "epoch": 1.4336405529953917, + "grad_norm": 1.107985966829949, + "learning_rate": 4.076624242859058e-07, + "loss": 0.6755083799362183, + "step": 6222 + }, + { + "epoch": 1.4338709677419355, + "grad_norm": 1.0751582832116895, + "learning_rate": 4.0735551246317333e-07, + "loss": 0.7734944820404053, + "step": 6223 + }, + { + "epoch": 1.4341013824884792, + "grad_norm": 1.1828392807290495, + "learning_rate": 4.0704868666200345e-07, + "loss": 0.8564216494560242, + "step": 6224 + }, + { + "epoch": 1.4343317972350231, + "grad_norm": 0.8521811929477493, + "learning_rate": 4.067419469269321e-07, + "loss": 0.6858065128326416, + "step": 6225 + }, + { + "epoch": 1.4345622119815669, + "grad_norm": 1.4454169020848073, + "learning_rate": 4.064352933024813e-07, + "loss": 0.684749960899353, + "step": 6226 + }, + { + "epoch": 1.4347926267281106, + "grad_norm": 1.0124943930771644, + "learning_rate": 4.061287258331624e-07, + "loss": 0.7648766040802002, + "step": 6227 + }, + { + "epoch": 1.4350230414746543, + "grad_norm": 1.2226521022766697, + "learning_rate": 4.058222445634727e-07, + "loss": 0.924850583076477, + "step": 6228 + }, + { + "epoch": 1.435253456221198, + "grad_norm": 1.2841804739911125, + "learning_rate": 4.055158495378972e-07, + "loss": 0.906406581401825, + "step": 6229 + }, + { + "epoch": 1.435483870967742, + "grad_norm": 1.1497462597145154, + "learning_rate": 4.052095408009095e-07, + "loss": 0.9169156551361084, + "step": 6230 + }, + { + "epoch": 1.4357142857142857, + "grad_norm": 0.9291011874506654, + "learning_rate": 4.0490331839696967e-07, + "loss": 0.7367587685585022, + "step": 6231 + }, + { + "epoch": 1.4359447004608294, + "grad_norm": 0.9837392218179005, + "learning_rate": 4.045971823705249e-07, + "loss": 0.7608749270439148, + "step": 6232 + }, + { + "epoch": 1.4361751152073734, + "grad_norm": 1.006459600101246, + "learning_rate": 4.0429113276601134e-07, + "loss": 0.7008038759231567, + "step": 6233 + }, + { + "epoch": 1.4364055299539171, + "grad_norm": 1.3644950830796674, + "learning_rate": 4.039851696278511e-07, + "loss": 0.8581372499465942, + "step": 6234 + }, + { + "epoch": 1.4366359447004609, + "grad_norm": 1.1117269621825037, + "learning_rate": 4.036792930004542e-07, + "loss": 0.6602354049682617, + "step": 6235 + }, + { + "epoch": 1.4368663594470046, + "grad_norm": 1.1136625894629528, + "learning_rate": 4.0337350292821893e-07, + "loss": 0.8560018539428711, + "step": 6236 + }, + { + "epoch": 1.4370967741935483, + "grad_norm": 1.5699670277885023, + "learning_rate": 4.030677994555298e-07, + "loss": 0.8837640285491943, + "step": 6237 + }, + { + "epoch": 1.4373271889400923, + "grad_norm": 1.1788518631283098, + "learning_rate": 4.027621826267593e-07, + "loss": 0.8214797973632812, + "step": 6238 + }, + { + "epoch": 1.437557603686636, + "grad_norm": 1.091488147712342, + "learning_rate": 4.024566524862675e-07, + "loss": 0.7590944766998291, + "step": 6239 + }, + { + "epoch": 1.4377880184331797, + "grad_norm": 1.5224250495012106, + "learning_rate": 4.021512090784014e-07, + "loss": 0.8792011141777039, + "step": 6240 + }, + { + "epoch": 1.4380184331797234, + "grad_norm": 0.9801567843215049, + "learning_rate": 4.0184585244749556e-07, + "loss": 0.8309401273727417, + "step": 6241 + }, + { + "epoch": 1.4382488479262672, + "grad_norm": 1.2518924977337436, + "learning_rate": 4.015405826378727e-07, + "loss": 0.7474797964096069, + "step": 6242 + }, + { + "epoch": 1.4384792626728111, + "grad_norm": 1.0203221096159534, + "learning_rate": 4.012353996938421e-07, + "loss": 0.7376091480255127, + "step": 6243 + }, + { + "epoch": 1.4387096774193548, + "grad_norm": 1.4049798692682764, + "learning_rate": 4.0093030365970014e-07, + "loss": 0.7809054851531982, + "step": 6244 + }, + { + "epoch": 1.4389400921658986, + "grad_norm": 1.206100995388555, + "learning_rate": 4.0062529457973194e-07, + "loss": 0.8551669120788574, + "step": 6245 + }, + { + "epoch": 1.4391705069124425, + "grad_norm": 1.3285364918408127, + "learning_rate": 4.0032037249820874e-07, + "loss": 0.7874705791473389, + "step": 6246 + }, + { + "epoch": 1.4394009216589863, + "grad_norm": 1.220500481419073, + "learning_rate": 4.0001553745938923e-07, + "loss": 0.8032190799713135, + "step": 6247 + }, + { + "epoch": 1.43963133640553, + "grad_norm": 1.1833761956090303, + "learning_rate": 3.9971078950752057e-07, + "loss": 0.7600107192993164, + "step": 6248 + }, + { + "epoch": 1.4398617511520737, + "grad_norm": 1.0770488794400255, + "learning_rate": 3.994061286868361e-07, + "loss": 0.7738933563232422, + "step": 6249 + }, + { + "epoch": 1.4400921658986174, + "grad_norm": 1.2036013798832181, + "learning_rate": 3.9910155504155665e-07, + "loss": 0.701007604598999, + "step": 6250 + }, + { + "epoch": 1.4403225806451614, + "grad_norm": 1.2067244620095277, + "learning_rate": 3.9879706861589126e-07, + "loss": 0.8962818384170532, + "step": 6251 + }, + { + "epoch": 1.4405529953917051, + "grad_norm": 1.4532648423769148, + "learning_rate": 3.9849266945403513e-07, + "loss": 0.7636146545410156, + "step": 6252 + }, + { + "epoch": 1.4407834101382488, + "grad_norm": 1.4158432417231142, + "learning_rate": 3.981883576001722e-07, + "loss": 0.8816943168640137, + "step": 6253 + }, + { + "epoch": 1.4410138248847926, + "grad_norm": 1.2321816109724755, + "learning_rate": 3.978841330984725e-07, + "loss": 0.7252858877182007, + "step": 6254 + }, + { + "epoch": 1.4412442396313363, + "grad_norm": 1.1568327683598156, + "learning_rate": 3.975799959930932e-07, + "loss": 0.6720175743103027, + "step": 6255 + }, + { + "epoch": 1.4414746543778802, + "grad_norm": 0.981779637597959, + "learning_rate": 3.972759463281805e-07, + "loss": 0.8000779151916504, + "step": 6256 + }, + { + "epoch": 1.441705069124424, + "grad_norm": 1.2561538909400267, + "learning_rate": 3.9697198414786626e-07, + "loss": 0.7356371283531189, + "step": 6257 + }, + { + "epoch": 1.4419354838709677, + "grad_norm": 1.3228468777834088, + "learning_rate": 3.966681094962703e-07, + "loss": 0.708438515663147, + "step": 6258 + }, + { + "epoch": 1.4421658986175117, + "grad_norm": 1.1635121950639566, + "learning_rate": 3.963643224174994e-07, + "loss": 0.709287166595459, + "step": 6259 + }, + { + "epoch": 1.4423963133640554, + "grad_norm": 1.2638923885979756, + "learning_rate": 3.9606062295564813e-07, + "loss": 0.743755578994751, + "step": 6260 + }, + { + "epoch": 1.442626728110599, + "grad_norm": 1.119467668131696, + "learning_rate": 3.9575701115479744e-07, + "loss": 0.9727948904037476, + "step": 6261 + }, + { + "epoch": 1.4428571428571428, + "grad_norm": 1.165539680123963, + "learning_rate": 3.9545348705901703e-07, + "loss": 0.9070688486099243, + "step": 6262 + }, + { + "epoch": 1.4430875576036866, + "grad_norm": 1.3995169117674358, + "learning_rate": 3.951500507123627e-07, + "loss": 0.8167496919631958, + "step": 6263 + }, + { + "epoch": 1.4433179723502305, + "grad_norm": 1.1204443462300027, + "learning_rate": 3.948467021588775e-07, + "loss": 0.7691773772239685, + "step": 6264 + }, + { + "epoch": 1.4435483870967742, + "grad_norm": 1.2915211655205685, + "learning_rate": 3.945434414425927e-07, + "loss": 0.7638411521911621, + "step": 6265 + }, + { + "epoch": 1.443778801843318, + "grad_norm": 1.0311097608426527, + "learning_rate": 3.942402686075258e-07, + "loss": 0.8138284683227539, + "step": 6266 + }, + { + "epoch": 1.4440092165898617, + "grad_norm": 1.430800234304149, + "learning_rate": 3.939371836976816e-07, + "loss": 0.8404628038406372, + "step": 6267 + }, + { + "epoch": 1.4442396313364054, + "grad_norm": 1.0744818989251388, + "learning_rate": 3.936341867570533e-07, + "loss": 0.7354726791381836, + "step": 6268 + }, + { + "epoch": 1.4444700460829494, + "grad_norm": 1.2516347720495873, + "learning_rate": 3.9333127782962003e-07, + "loss": 0.8607511520385742, + "step": 6269 + }, + { + "epoch": 1.444700460829493, + "grad_norm": 1.03787633948696, + "learning_rate": 3.930284569593483e-07, + "loss": 0.7372239232063293, + "step": 6270 + }, + { + "epoch": 1.4449308755760368, + "grad_norm": 1.205690175362699, + "learning_rate": 3.927257241901929e-07, + "loss": 0.8902593851089478, + "step": 6271 + }, + { + "epoch": 1.4451612903225808, + "grad_norm": 1.0978426997676995, + "learning_rate": 3.924230795660947e-07, + "loss": 0.7481765747070312, + "step": 6272 + }, + { + "epoch": 1.4453917050691243, + "grad_norm": 1.1624854693895736, + "learning_rate": 3.9212052313098177e-07, + "loss": 0.6868888139724731, + "step": 6273 + }, + { + "epoch": 1.4456221198156682, + "grad_norm": 1.219538424407328, + "learning_rate": 3.918180549287705e-07, + "loss": 0.6867324709892273, + "step": 6274 + }, + { + "epoch": 1.445852534562212, + "grad_norm": 1.4192898010151693, + "learning_rate": 3.9151567500336323e-07, + "loss": 0.8473105430603027, + "step": 6275 + }, + { + "epoch": 1.4460829493087557, + "grad_norm": 1.2236253801186994, + "learning_rate": 3.912133833986504e-07, + "loss": 0.7629631757736206, + "step": 6276 + }, + { + "epoch": 1.4463133640552996, + "grad_norm": 1.0502703605539807, + "learning_rate": 3.909111801585091e-07, + "loss": 0.9501597881317139, + "step": 6277 + }, + { + "epoch": 1.4465437788018434, + "grad_norm": 1.0568805239624584, + "learning_rate": 3.906090653268037e-07, + "loss": 0.7330536842346191, + "step": 6278 + }, + { + "epoch": 1.446774193548387, + "grad_norm": 1.199243558298224, + "learning_rate": 3.903070389473857e-07, + "loss": 0.907101571559906, + "step": 6279 + }, + { + "epoch": 1.4470046082949308, + "grad_norm": 1.1269939172893009, + "learning_rate": 3.900051010640939e-07, + "loss": 0.8177503347396851, + "step": 6280 + }, + { + "epoch": 1.4472350230414746, + "grad_norm": 1.373102048695832, + "learning_rate": 3.897032517207538e-07, + "loss": 0.7851059436798096, + "step": 6281 + }, + { + "epoch": 1.4474654377880185, + "grad_norm": 0.8801777971944739, + "learning_rate": 3.8940149096117914e-07, + "loss": 0.7056214809417725, + "step": 6282 + }, + { + "epoch": 1.4476958525345622, + "grad_norm": 1.0831833275731695, + "learning_rate": 3.8909981882916975e-07, + "loss": 0.784143328666687, + "step": 6283 + }, + { + "epoch": 1.447926267281106, + "grad_norm": 1.2368924313085696, + "learning_rate": 3.8879823536851253e-07, + "loss": 0.8157210350036621, + "step": 6284 + }, + { + "epoch": 1.4481566820276497, + "grad_norm": 1.276176943713772, + "learning_rate": 3.884967406229828e-07, + "loss": 0.7329680323600769, + "step": 6285 + }, + { + "epoch": 1.4483870967741934, + "grad_norm": 1.4518343581804805, + "learning_rate": 3.8819533463634145e-07, + "loss": 0.9214208722114563, + "step": 6286 + }, + { + "epoch": 1.4486175115207374, + "grad_norm": 1.835142969551997, + "learning_rate": 3.8789401745233706e-07, + "loss": 0.8118722438812256, + "step": 6287 + }, + { + "epoch": 1.448847926267281, + "grad_norm": 1.0485981202236783, + "learning_rate": 3.8759278911470615e-07, + "loss": 0.7517364025115967, + "step": 6288 + }, + { + "epoch": 1.4490783410138248, + "grad_norm": 1.0879409814064, + "learning_rate": 3.872916496671711e-07, + "loss": 0.8979834318161011, + "step": 6289 + }, + { + "epoch": 1.4493087557603688, + "grad_norm": 1.6674549792368192, + "learning_rate": 3.8699059915344166e-07, + "loss": 0.9159818887710571, + "step": 6290 + }, + { + "epoch": 1.4495391705069125, + "grad_norm": 1.2582380909324238, + "learning_rate": 3.8668963761721563e-07, + "loss": 0.8176029324531555, + "step": 6291 + }, + { + "epoch": 1.4497695852534562, + "grad_norm": 1.3257834277786367, + "learning_rate": 3.8638876510217666e-07, + "loss": 0.7077589631080627, + "step": 6292 + }, + { + "epoch": 1.45, + "grad_norm": 1.0304546829516872, + "learning_rate": 3.8608798165199585e-07, + "loss": 0.8107718825340271, + "step": 6293 + }, + { + "epoch": 1.4502304147465437, + "grad_norm": 1.278146889045901, + "learning_rate": 3.8578728731033214e-07, + "loss": 0.9021201133728027, + "step": 6294 + }, + { + "epoch": 1.4504608294930876, + "grad_norm": 1.5907360314325336, + "learning_rate": 3.854866821208306e-07, + "loss": 0.9134507179260254, + "step": 6295 + }, + { + "epoch": 1.4506912442396314, + "grad_norm": 1.2431886164023473, + "learning_rate": 3.8518616612712317e-07, + "loss": 0.9081463813781738, + "step": 6296 + }, + { + "epoch": 1.450921658986175, + "grad_norm": 1.394869861453301, + "learning_rate": 3.848857393728303e-07, + "loss": 0.7892032861709595, + "step": 6297 + }, + { + "epoch": 1.4511520737327188, + "grad_norm": 1.1702087372951315, + "learning_rate": 3.8458540190155796e-07, + "loss": 0.753928542137146, + "step": 6298 + }, + { + "epoch": 1.4513824884792625, + "grad_norm": 1.1800339185606825, + "learning_rate": 3.8428515375689996e-07, + "loss": 0.6316792964935303, + "step": 6299 + }, + { + "epoch": 1.4516129032258065, + "grad_norm": 1.0510746352372813, + "learning_rate": 3.8398499498243665e-07, + "loss": 0.6569210290908813, + "step": 6300 + }, + { + "epoch": 1.4518433179723502, + "grad_norm": 1.2827982624069105, + "learning_rate": 3.836849256217355e-07, + "loss": 0.9082256555557251, + "step": 6301 + }, + { + "epoch": 1.452073732718894, + "grad_norm": 1.2539326790404104, + "learning_rate": 3.833849457183519e-07, + "loss": 0.6533655524253845, + "step": 6302 + }, + { + "epoch": 1.452304147465438, + "grad_norm": 1.1962706885387824, + "learning_rate": 3.830850553158271e-07, + "loss": 0.8181168437004089, + "step": 6303 + }, + { + "epoch": 1.4525345622119816, + "grad_norm": 1.191632474290621, + "learning_rate": 3.827852544576895e-07, + "loss": 0.8258780241012573, + "step": 6304 + }, + { + "epoch": 1.4527649769585254, + "grad_norm": 1.2200843626761786, + "learning_rate": 3.824855431874555e-07, + "loss": 0.7917114496231079, + "step": 6305 + }, + { + "epoch": 1.452995391705069, + "grad_norm": 1.1119249100754447, + "learning_rate": 3.821859215486274e-07, + "loss": 0.7523643970489502, + "step": 6306 + }, + { + "epoch": 1.4532258064516128, + "grad_norm": 1.173507656799684, + "learning_rate": 3.818863895846945e-07, + "loss": 0.7248106002807617, + "step": 6307 + }, + { + "epoch": 1.4534562211981568, + "grad_norm": 1.0384099625968284, + "learning_rate": 3.815869473391343e-07, + "loss": 0.6663920879364014, + "step": 6308 + }, + { + "epoch": 1.4536866359447005, + "grad_norm": 1.2904533830018654, + "learning_rate": 3.8128759485540995e-07, + "loss": 0.887082576751709, + "step": 6309 + }, + { + "epoch": 1.4539170506912442, + "grad_norm": 1.176731626067417, + "learning_rate": 3.8098833217697193e-07, + "loss": 0.8491328954696655, + "step": 6310 + }, + { + "epoch": 1.454147465437788, + "grad_norm": 0.995531509886264, + "learning_rate": 3.806891593472582e-07, + "loss": 0.6749746799468994, + "step": 6311 + }, + { + "epoch": 1.4543778801843317, + "grad_norm": 1.2359927269681388, + "learning_rate": 3.803900764096932e-07, + "loss": 0.7607502937316895, + "step": 6312 + }, + { + "epoch": 1.4546082949308756, + "grad_norm": 0.9855772687954082, + "learning_rate": 3.8009108340768804e-07, + "loss": 0.6713626980781555, + "step": 6313 + }, + { + "epoch": 1.4548387096774194, + "grad_norm": 1.0335982949651026, + "learning_rate": 3.797921803846419e-07, + "loss": 0.7031810879707336, + "step": 6314 + }, + { + "epoch": 1.455069124423963, + "grad_norm": 1.2499044478276522, + "learning_rate": 3.7949336738393955e-07, + "loss": 0.7233775854110718, + "step": 6315 + }, + { + "epoch": 1.455299539170507, + "grad_norm": 1.1902627494977487, + "learning_rate": 3.791946444489532e-07, + "loss": 0.7446990013122559, + "step": 6316 + }, + { + "epoch": 1.4555299539170508, + "grad_norm": 1.0356528338667375, + "learning_rate": 3.7889601162304273e-07, + "loss": 0.731992244720459, + "step": 6317 + }, + { + "epoch": 1.4557603686635945, + "grad_norm": 0.9012124257356037, + "learning_rate": 3.785974689495539e-07, + "loss": 0.7167335152626038, + "step": 6318 + }, + { + "epoch": 1.4559907834101382, + "grad_norm": 1.0367746360279544, + "learning_rate": 3.7829901647181993e-07, + "loss": 0.7634297609329224, + "step": 6319 + }, + { + "epoch": 1.456221198156682, + "grad_norm": 1.323601627974345, + "learning_rate": 3.7800065423316066e-07, + "loss": 0.7584050893783569, + "step": 6320 + }, + { + "epoch": 1.456451612903226, + "grad_norm": 1.3168506305563585, + "learning_rate": 3.777023822768829e-07, + "loss": 0.7150899171829224, + "step": 6321 + }, + { + "epoch": 1.4566820276497696, + "grad_norm": 1.3142694869577929, + "learning_rate": 3.7740420064628034e-07, + "loss": 0.7821052670478821, + "step": 6322 + }, + { + "epoch": 1.4569124423963133, + "grad_norm": 1.1890463822517086, + "learning_rate": 3.7710610938463405e-07, + "loss": 0.8678094148635864, + "step": 6323 + }, + { + "epoch": 1.457142857142857, + "grad_norm": 1.0929926711457507, + "learning_rate": 3.7680810853521107e-07, + "loss": 0.6953635215759277, + "step": 6324 + }, + { + "epoch": 1.4573732718894008, + "grad_norm": 1.392687245093679, + "learning_rate": 3.765101981412665e-07, + "loss": 0.765946626663208, + "step": 6325 + }, + { + "epoch": 1.4576036866359448, + "grad_norm": 1.2287803375758581, + "learning_rate": 3.7621237824604137e-07, + "loss": 0.8828680515289307, + "step": 6326 + }, + { + "epoch": 1.4578341013824885, + "grad_norm": 1.4191080683791804, + "learning_rate": 3.7591464889276326e-07, + "loss": 0.8916178345680237, + "step": 6327 + }, + { + "epoch": 1.4580645161290322, + "grad_norm": 1.4414543071479498, + "learning_rate": 3.756170101246481e-07, + "loss": 0.7563039064407349, + "step": 6328 + }, + { + "epoch": 1.4582949308755762, + "grad_norm": 1.1488058177567217, + "learning_rate": 3.7531946198489725e-07, + "loss": 0.8548855781555176, + "step": 6329 + }, + { + "epoch": 1.4585253456221199, + "grad_norm": 1.2471941201918813, + "learning_rate": 3.750220045166993e-07, + "loss": 0.8337546586990356, + "step": 6330 + }, + { + "epoch": 1.4587557603686636, + "grad_norm": 1.2665043024049272, + "learning_rate": 3.7472463776323036e-07, + "loss": 0.8909939527511597, + "step": 6331 + }, + { + "epoch": 1.4589861751152073, + "grad_norm": 0.9459101838544814, + "learning_rate": 3.744273617676524e-07, + "loss": 0.629026472568512, + "step": 6332 + }, + { + "epoch": 1.459216589861751, + "grad_norm": 1.245577103796106, + "learning_rate": 3.7413017657311454e-07, + "loss": 0.7264849543571472, + "step": 6333 + }, + { + "epoch": 1.459447004608295, + "grad_norm": 1.0987416494814488, + "learning_rate": 3.738330822227532e-07, + "loss": 0.808081865310669, + "step": 6334 + }, + { + "epoch": 1.4596774193548387, + "grad_norm": 1.145687515640666, + "learning_rate": 3.7353607875969115e-07, + "loss": 0.6092932820320129, + "step": 6335 + }, + { + "epoch": 1.4599078341013825, + "grad_norm": 1.2636271324745916, + "learning_rate": 3.7323916622703756e-07, + "loss": 0.8700584173202515, + "step": 6336 + }, + { + "epoch": 1.4601382488479262, + "grad_norm": 1.2867446987977476, + "learning_rate": 3.7294234466788954e-07, + "loss": 0.8424433469772339, + "step": 6337 + }, + { + "epoch": 1.46036866359447, + "grad_norm": 1.1929868573019329, + "learning_rate": 3.7264561412533013e-07, + "loss": 0.8587443828582764, + "step": 6338 + }, + { + "epoch": 1.4605990783410139, + "grad_norm": 1.1369944171843958, + "learning_rate": 3.7234897464242934e-07, + "loss": 0.7708064913749695, + "step": 6339 + }, + { + "epoch": 1.4608294930875576, + "grad_norm": 0.9599493655503268, + "learning_rate": 3.7205242626224395e-07, + "loss": 0.8226567506790161, + "step": 6340 + }, + { + "epoch": 1.4610599078341013, + "grad_norm": 1.6926769297162396, + "learning_rate": 3.717559690278176e-07, + "loss": 0.8414342403411865, + "step": 6341 + }, + { + "epoch": 1.4612903225806453, + "grad_norm": 1.136325082903018, + "learning_rate": 3.714596029821804e-07, + "loss": 0.765863299369812, + "step": 6342 + }, + { + "epoch": 1.461520737327189, + "grad_norm": 1.2033696575950952, + "learning_rate": 3.7116332816834997e-07, + "loss": 0.7253202199935913, + "step": 6343 + }, + { + "epoch": 1.4617511520737327, + "grad_norm": 1.2614732245354896, + "learning_rate": 3.7086714462933e-07, + "loss": 0.786415696144104, + "step": 6344 + }, + { + "epoch": 1.4619815668202765, + "grad_norm": 1.3398597613096093, + "learning_rate": 3.705710524081108e-07, + "loss": 0.8382824659347534, + "step": 6345 + }, + { + "epoch": 1.4622119815668202, + "grad_norm": 1.1421503229190921, + "learning_rate": 3.702750515476705e-07, + "loss": 0.7953319549560547, + "step": 6346 + }, + { + "epoch": 1.4624423963133641, + "grad_norm": 1.1953524657169348, + "learning_rate": 3.699791420909727e-07, + "loss": 0.7897430658340454, + "step": 6347 + }, + { + "epoch": 1.4626728110599079, + "grad_norm": 1.0462269201726477, + "learning_rate": 3.6968332408096804e-07, + "loss": 0.7276254892349243, + "step": 6348 + }, + { + "epoch": 1.4629032258064516, + "grad_norm": 1.2576670635193097, + "learning_rate": 3.693875975605949e-07, + "loss": 0.7318450212478638, + "step": 6349 + }, + { + "epoch": 1.4631336405529953, + "grad_norm": 1.3298595608160129, + "learning_rate": 3.6909196257277676e-07, + "loss": 0.8438090085983276, + "step": 6350 + }, + { + "epoch": 1.463364055299539, + "grad_norm": 1.1958819221255177, + "learning_rate": 3.6879641916042534e-07, + "loss": 0.7977915406227112, + "step": 6351 + }, + { + "epoch": 1.463594470046083, + "grad_norm": 1.5876789525233332, + "learning_rate": 3.685009673664382e-07, + "loss": 0.8845348358154297, + "step": 6352 + }, + { + "epoch": 1.4638248847926267, + "grad_norm": 1.1089282393569035, + "learning_rate": 3.682056072336992e-07, + "loss": 0.8971320986747742, + "step": 6353 + }, + { + "epoch": 1.4640552995391705, + "grad_norm": 1.1499585685789093, + "learning_rate": 3.679103388050803e-07, + "loss": 0.7015302181243896, + "step": 6354 + }, + { + "epoch": 1.4642857142857144, + "grad_norm": 1.058413373940715, + "learning_rate": 3.676151621234389e-07, + "loss": 0.5953146815299988, + "step": 6355 + }, + { + "epoch": 1.4645161290322581, + "grad_norm": 0.940762320723037, + "learning_rate": 3.673200772316193e-07, + "loss": 0.5794636011123657, + "step": 6356 + }, + { + "epoch": 1.4647465437788019, + "grad_norm": 1.4093031765021824, + "learning_rate": 3.6702508417245324e-07, + "loss": 0.8272292017936707, + "step": 6357 + }, + { + "epoch": 1.4649769585253456, + "grad_norm": 1.2004626750502272, + "learning_rate": 3.6673018298875826e-07, + "loss": 0.7239755392074585, + "step": 6358 + }, + { + "epoch": 1.4652073732718893, + "grad_norm": 1.0592207409293348, + "learning_rate": 3.6643537372333886e-07, + "loss": 0.8597465753555298, + "step": 6359 + }, + { + "epoch": 1.4654377880184333, + "grad_norm": 1.3768417389873642, + "learning_rate": 3.661406564189862e-07, + "loss": 0.7540475130081177, + "step": 6360 + }, + { + "epoch": 1.465668202764977, + "grad_norm": 1.2300552177842492, + "learning_rate": 3.658460311184782e-07, + "loss": 0.793259859085083, + "step": 6361 + }, + { + "epoch": 1.4658986175115207, + "grad_norm": 1.1933122341650848, + "learning_rate": 3.6555149786457883e-07, + "loss": 0.797966718673706, + "step": 6362 + }, + { + "epoch": 1.4661290322580645, + "grad_norm": 1.082541374270611, + "learning_rate": 3.6525705670004016e-07, + "loss": 0.7466796636581421, + "step": 6363 + }, + { + "epoch": 1.4663594470046082, + "grad_norm": 0.9612262339874744, + "learning_rate": 3.6496270766759927e-07, + "loss": 0.7694044709205627, + "step": 6364 + }, + { + "epoch": 1.4665898617511521, + "grad_norm": 1.753828188679532, + "learning_rate": 3.6466845080998043e-07, + "loss": 0.7701553106307983, + "step": 6365 + }, + { + "epoch": 1.4668202764976959, + "grad_norm": 1.0670832455899337, + "learning_rate": 3.643742861698952e-07, + "loss": 0.6718326807022095, + "step": 6366 + }, + { + "epoch": 1.4670506912442396, + "grad_norm": 1.1220075290963027, + "learning_rate": 3.6408021379004086e-07, + "loss": 0.7099052667617798, + "step": 6367 + }, + { + "epoch": 1.4672811059907835, + "grad_norm": 1.0614563823752192, + "learning_rate": 3.6378623371310126e-07, + "loss": 0.8650654554367065, + "step": 6368 + }, + { + "epoch": 1.4675115207373273, + "grad_norm": 1.18691798498221, + "learning_rate": 3.6349234598174794e-07, + "loss": 0.7920950055122375, + "step": 6369 + }, + { + "epoch": 1.467741935483871, + "grad_norm": 1.3672164620265899, + "learning_rate": 3.63198550638638e-07, + "loss": 0.7927969098091125, + "step": 6370 + }, + { + "epoch": 1.4679723502304147, + "grad_norm": 1.6817643007938734, + "learning_rate": 3.6290484772641514e-07, + "loss": 0.9403868913650513, + "step": 6371 + }, + { + "epoch": 1.4682027649769585, + "grad_norm": 1.188245842937741, + "learning_rate": 3.626112372877106e-07, + "loss": 0.9157334566116333, + "step": 6372 + }, + { + "epoch": 1.4684331797235024, + "grad_norm": 1.0918511661649737, + "learning_rate": 3.6231771936514067e-07, + "loss": 0.7742066979408264, + "step": 6373 + }, + { + "epoch": 1.4686635944700461, + "grad_norm": 1.0472722321327697, + "learning_rate": 3.6202429400131006e-07, + "loss": 0.69399094581604, + "step": 6374 + }, + { + "epoch": 1.4688940092165899, + "grad_norm": 1.243240675298042, + "learning_rate": 3.6173096123880854e-07, + "loss": 0.874832272529602, + "step": 6375 + }, + { + "epoch": 1.4691244239631336, + "grad_norm": 0.9504044447465768, + "learning_rate": 3.6143772112021275e-07, + "loss": 0.6685272455215454, + "step": 6376 + }, + { + "epoch": 1.4693548387096773, + "grad_norm": 1.2588614059189167, + "learning_rate": 3.611445736880867e-07, + "loss": 0.7422738671302795, + "step": 6377 + }, + { + "epoch": 1.4695852534562213, + "grad_norm": 1.1563672807518934, + "learning_rate": 3.6085151898498e-07, + "loss": 0.8208622932434082, + "step": 6378 + }, + { + "epoch": 1.469815668202765, + "grad_norm": 1.278791922768039, + "learning_rate": 3.605585570534293e-07, + "loss": 0.8001033663749695, + "step": 6379 + }, + { + "epoch": 1.4700460829493087, + "grad_norm": 1.4073194030234843, + "learning_rate": 3.6026568793595744e-07, + "loss": 0.789332926273346, + "step": 6380 + }, + { + "epoch": 1.4702764976958527, + "grad_norm": 1.1542499539799642, + "learning_rate": 3.599729116750742e-07, + "loss": 0.8071820139884949, + "step": 6381 + }, + { + "epoch": 1.4705069124423962, + "grad_norm": 1.3369229588575535, + "learning_rate": 3.5968022831327506e-07, + "loss": 0.8028534054756165, + "step": 6382 + }, + { + "epoch": 1.4707373271889401, + "grad_norm": 1.0119395143433376, + "learning_rate": 3.593876378930435e-07, + "loss": 0.6888329982757568, + "step": 6383 + }, + { + "epoch": 1.4709677419354839, + "grad_norm": 1.285773441215651, + "learning_rate": 3.590951404568483e-07, + "loss": 0.8176132440567017, + "step": 6384 + }, + { + "epoch": 1.4711981566820276, + "grad_norm": 0.9429108192029542, + "learning_rate": 3.588027360471446e-07, + "loss": 0.6715027689933777, + "step": 6385 + }, + { + "epoch": 1.4714285714285715, + "grad_norm": 1.2177133807456715, + "learning_rate": 3.585104247063753e-07, + "loss": 0.8622937798500061, + "step": 6386 + }, + { + "epoch": 1.4716589861751153, + "grad_norm": 1.252482813795077, + "learning_rate": 3.5821820647696864e-07, + "loss": 0.7244299650192261, + "step": 6387 + }, + { + "epoch": 1.471889400921659, + "grad_norm": 1.2422776234152886, + "learning_rate": 3.579260814013393e-07, + "loss": 0.8130464553833008, + "step": 6388 + }, + { + "epoch": 1.4721198156682027, + "grad_norm": 1.739841773852821, + "learning_rate": 3.576340495218897e-07, + "loss": 0.8563692569732666, + "step": 6389 + }, + { + "epoch": 1.4723502304147464, + "grad_norm": 1.1474783445098509, + "learning_rate": 3.573421108810073e-07, + "loss": 0.8315908908843994, + "step": 6390 + }, + { + "epoch": 1.4725806451612904, + "grad_norm": 1.0916407928923948, + "learning_rate": 3.5705026552106645e-07, + "loss": 0.653038740158081, + "step": 6391 + }, + { + "epoch": 1.4728110599078341, + "grad_norm": 1.250110377436999, + "learning_rate": 3.5675851348442876e-07, + "loss": 0.7511966228485107, + "step": 6392 + }, + { + "epoch": 1.4730414746543778, + "grad_norm": 1.226967151246929, + "learning_rate": 3.564668548134413e-07, + "loss": 0.8675990104675293, + "step": 6393 + }, + { + "epoch": 1.4732718894009218, + "grad_norm": 1.2481066388566375, + "learning_rate": 3.5617528955043765e-07, + "loss": 0.7574094533920288, + "step": 6394 + }, + { + "epoch": 1.4735023041474653, + "grad_norm": 1.3612516426224104, + "learning_rate": 3.5588381773773866e-07, + "loss": 0.7004787921905518, + "step": 6395 + }, + { + "epoch": 1.4737327188940093, + "grad_norm": 1.193988835000252, + "learning_rate": 3.555924394176508e-07, + "loss": 0.680101215839386, + "step": 6396 + }, + { + "epoch": 1.473963133640553, + "grad_norm": 1.2956197944669767, + "learning_rate": 3.55301154632467e-07, + "loss": 0.8340710401535034, + "step": 6397 + }, + { + "epoch": 1.4741935483870967, + "grad_norm": 1.2156451361937963, + "learning_rate": 3.5500996342446756e-07, + "loss": 0.8307079076766968, + "step": 6398 + }, + { + "epoch": 1.4744239631336407, + "grad_norm": 1.3824459968937755, + "learning_rate": 3.547188658359179e-07, + "loss": 0.9614958167076111, + "step": 6399 + }, + { + "epoch": 1.4746543778801844, + "grad_norm": 1.2140973914551956, + "learning_rate": 3.544278619090707e-07, + "loss": 0.782494068145752, + "step": 6400 + }, + { + "epoch": 1.4748847926267281, + "grad_norm": 1.372883571978596, + "learning_rate": 3.5413695168616474e-07, + "loss": 0.7474460601806641, + "step": 6401 + }, + { + "epoch": 1.4751152073732718, + "grad_norm": 1.0929029713656226, + "learning_rate": 3.5384613520942484e-07, + "loss": 0.7182635068893433, + "step": 6402 + }, + { + "epoch": 1.4753456221198156, + "grad_norm": 1.1562679128127753, + "learning_rate": 3.5355541252106336e-07, + "loss": 0.8116436004638672, + "step": 6403 + }, + { + "epoch": 1.4755760368663595, + "grad_norm": 1.1320096436261353, + "learning_rate": 3.5326478366327806e-07, + "loss": 0.8007283210754395, + "step": 6404 + }, + { + "epoch": 1.4758064516129032, + "grad_norm": 1.060451283065696, + "learning_rate": 3.5297424867825276e-07, + "loss": 0.7707732915878296, + "step": 6405 + }, + { + "epoch": 1.476036866359447, + "grad_norm": 1.319974893721661, + "learning_rate": 3.5268380760815917e-07, + "loss": 0.8031977415084839, + "step": 6406 + }, + { + "epoch": 1.4762672811059907, + "grad_norm": 1.0847497024921582, + "learning_rate": 3.5239346049515397e-07, + "loss": 0.7113008499145508, + "step": 6407 + }, + { + "epoch": 1.4764976958525344, + "grad_norm": 1.490354792200027, + "learning_rate": 3.521032073813802e-07, + "loss": 0.8069616556167603, + "step": 6408 + }, + { + "epoch": 1.4767281105990784, + "grad_norm": 1.6536617293382079, + "learning_rate": 3.518130483089686e-07, + "loss": 0.9780417680740356, + "step": 6409 + }, + { + "epoch": 1.476958525345622, + "grad_norm": 1.0393285063529043, + "learning_rate": 3.515229833200351e-07, + "loss": 0.765299379825592, + "step": 6410 + }, + { + "epoch": 1.4771889400921658, + "grad_norm": 0.9792702634570369, + "learning_rate": 3.512330124566816e-07, + "loss": 0.7279179096221924, + "step": 6411 + }, + { + "epoch": 1.4774193548387098, + "grad_norm": 1.3765526641198769, + "learning_rate": 3.509431357609978e-07, + "loss": 0.8429825901985168, + "step": 6412 + }, + { + "epoch": 1.4776497695852535, + "grad_norm": 1.2876523066268597, + "learning_rate": 3.506533532750586e-07, + "loss": 0.741936206817627, + "step": 6413 + }, + { + "epoch": 1.4778801843317972, + "grad_norm": 1.0841845353527741, + "learning_rate": 3.5036366504092527e-07, + "loss": 0.6841387748718262, + "step": 6414 + }, + { + "epoch": 1.478110599078341, + "grad_norm": 1.1361546476433346, + "learning_rate": 3.5007407110064626e-07, + "loss": 0.7136961221694946, + "step": 6415 + }, + { + "epoch": 1.4783410138248847, + "grad_norm": 1.1942730912918724, + "learning_rate": 3.497845714962554e-07, + "loss": 0.8483344912528992, + "step": 6416 + }, + { + "epoch": 1.4785714285714286, + "grad_norm": 1.1525838724707749, + "learning_rate": 3.4949516626977294e-07, + "loss": 0.7060235738754272, + "step": 6417 + }, + { + "epoch": 1.4788018433179724, + "grad_norm": 1.2546190088001288, + "learning_rate": 3.4920585546320625e-07, + "loss": 0.7351587414741516, + "step": 6418 + }, + { + "epoch": 1.479032258064516, + "grad_norm": 1.4082190266306274, + "learning_rate": 3.489166391185482e-07, + "loss": 0.7445269823074341, + "step": 6419 + }, + { + "epoch": 1.4792626728110598, + "grad_norm": 1.2308828080413103, + "learning_rate": 3.4862751727777796e-07, + "loss": 0.795128583908081, + "step": 6420 + }, + { + "epoch": 1.4794930875576036, + "grad_norm": 1.3455737723646244, + "learning_rate": 3.4833848998286133e-07, + "loss": 0.7916193008422852, + "step": 6421 + }, + { + "epoch": 1.4797235023041475, + "grad_norm": 1.2062461099240058, + "learning_rate": 3.480495572757497e-07, + "loss": 0.8279474973678589, + "step": 6422 + }, + { + "epoch": 1.4799539170506912, + "grad_norm": 1.3615355231577309, + "learning_rate": 3.477607191983822e-07, + "loss": 0.9339898824691772, + "step": 6423 + }, + { + "epoch": 1.480184331797235, + "grad_norm": 1.2958649175302657, + "learning_rate": 3.4747197579268296e-07, + "loss": 0.8579660654067993, + "step": 6424 + }, + { + "epoch": 1.480414746543779, + "grad_norm": 1.1935735021965341, + "learning_rate": 3.471833271005622e-07, + "loss": 0.7637878060340881, + "step": 6425 + }, + { + "epoch": 1.4806451612903226, + "grad_norm": 1.2997741786350927, + "learning_rate": 3.4689477316391756e-07, + "loss": 0.8600465059280396, + "step": 6426 + }, + { + "epoch": 1.4808755760368664, + "grad_norm": 0.9725758019670567, + "learning_rate": 3.46606314024632e-07, + "loss": 0.6576759815216064, + "step": 6427 + }, + { + "epoch": 1.48110599078341, + "grad_norm": 1.1289750059608772, + "learning_rate": 3.463179497245747e-07, + "loss": 0.7556706666946411, + "step": 6428 + }, + { + "epoch": 1.4813364055299538, + "grad_norm": 1.3449392913610907, + "learning_rate": 3.4602968030560196e-07, + "loss": 0.8826701641082764, + "step": 6429 + }, + { + "epoch": 1.4815668202764978, + "grad_norm": 1.1499087478485694, + "learning_rate": 3.457415058095554e-07, + "loss": 0.7352213263511658, + "step": 6430 + }, + { + "epoch": 1.4817972350230415, + "grad_norm": 1.4434298728988502, + "learning_rate": 3.454534262782628e-07, + "loss": 0.8108851909637451, + "step": 6431 + }, + { + "epoch": 1.4820276497695852, + "grad_norm": 1.3070168078927469, + "learning_rate": 3.4516544175353914e-07, + "loss": 0.8595583438873291, + "step": 6432 + }, + { + "epoch": 1.482258064516129, + "grad_norm": 1.1496814595283131, + "learning_rate": 3.448775522771847e-07, + "loss": 0.7194280028343201, + "step": 6433 + }, + { + "epoch": 1.4824884792626727, + "grad_norm": 1.2788780172510947, + "learning_rate": 3.445897578909861e-07, + "loss": 0.8966056108474731, + "step": 6434 + }, + { + "epoch": 1.4827188940092166, + "grad_norm": 1.4168806857520198, + "learning_rate": 3.443020586367167e-07, + "loss": 0.8089771270751953, + "step": 6435 + }, + { + "epoch": 1.4829493087557604, + "grad_norm": 1.3086078413537297, + "learning_rate": 3.4401445455613555e-07, + "loss": 0.7835644483566284, + "step": 6436 + }, + { + "epoch": 1.483179723502304, + "grad_norm": 1.242850049469479, + "learning_rate": 3.4372694569098746e-07, + "loss": 0.7285257577896118, + "step": 6437 + }, + { + "epoch": 1.483410138248848, + "grad_norm": 1.4884020116718253, + "learning_rate": 3.434395320830048e-07, + "loss": 0.9108592867851257, + "step": 6438 + }, + { + "epoch": 1.4836405529953918, + "grad_norm": 1.265305751937672, + "learning_rate": 3.431522137739049e-07, + "loss": 0.7154395580291748, + "step": 6439 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 1.0883673646660943, + "learning_rate": 3.428649908053917e-07, + "loss": 0.6483602523803711, + "step": 6440 + }, + { + "epoch": 1.4841013824884792, + "grad_norm": 1.457129029114168, + "learning_rate": 3.425778632191551e-07, + "loss": 0.8090662956237793, + "step": 6441 + }, + { + "epoch": 1.484331797235023, + "grad_norm": 1.428702771444548, + "learning_rate": 3.422908310568712e-07, + "loss": 0.7884642481803894, + "step": 6442 + }, + { + "epoch": 1.484562211981567, + "grad_norm": 1.2738553778883674, + "learning_rate": 3.4200389436020225e-07, + "loss": 0.8628194332122803, + "step": 6443 + }, + { + "epoch": 1.4847926267281106, + "grad_norm": 1.1838310809928603, + "learning_rate": 3.4171705317079723e-07, + "loss": 0.8192269802093506, + "step": 6444 + }, + { + "epoch": 1.4850230414746544, + "grad_norm": 1.316668872684636, + "learning_rate": 3.4143030753029054e-07, + "loss": 0.7768012285232544, + "step": 6445 + }, + { + "epoch": 1.485253456221198, + "grad_norm": 1.2324282268735118, + "learning_rate": 3.411436574803026e-07, + "loss": 0.7420791387557983, + "step": 6446 + }, + { + "epoch": 1.4854838709677418, + "grad_norm": 1.3102449774544425, + "learning_rate": 3.4085710306244086e-07, + "loss": 0.823938250541687, + "step": 6447 + }, + { + "epoch": 1.4857142857142858, + "grad_norm": 1.1672900255965821, + "learning_rate": 3.405706443182976e-07, + "loss": 0.7215089201927185, + "step": 6448 + }, + { + "epoch": 1.4859447004608295, + "grad_norm": 1.138949819615918, + "learning_rate": 3.4028428128945286e-07, + "loss": 0.8301436901092529, + "step": 6449 + }, + { + "epoch": 1.4861751152073732, + "grad_norm": 1.1171858572091258, + "learning_rate": 3.399980140174712e-07, + "loss": 0.6727990508079529, + "step": 6450 + }, + { + "epoch": 1.4864055299539172, + "grad_norm": 1.0969379356045603, + "learning_rate": 3.397118425439038e-07, + "loss": 0.8364754319190979, + "step": 6451 + }, + { + "epoch": 1.486635944700461, + "grad_norm": 1.2714499604529865, + "learning_rate": 3.394257669102887e-07, + "loss": 0.7241604328155518, + "step": 6452 + }, + { + "epoch": 1.4868663594470046, + "grad_norm": 1.429435383993002, + "learning_rate": 3.3913978715814897e-07, + "loss": 0.7762489914894104, + "step": 6453 + }, + { + "epoch": 1.4870967741935484, + "grad_norm": 1.3862601382620485, + "learning_rate": 3.38853903328994e-07, + "loss": 0.9278200268745422, + "step": 6454 + }, + { + "epoch": 1.487327188940092, + "grad_norm": 0.9454491284474441, + "learning_rate": 3.3856811546431994e-07, + "loss": 0.693070113658905, + "step": 6455 + }, + { + "epoch": 1.487557603686636, + "grad_norm": 1.4631261008304832, + "learning_rate": 3.382824236056084e-07, + "loss": 0.8541949987411499, + "step": 6456 + }, + { + "epoch": 1.4877880184331798, + "grad_norm": 1.1080747331787868, + "learning_rate": 3.379968277943267e-07, + "loss": 0.7638850212097168, + "step": 6457 + }, + { + "epoch": 1.4880184331797235, + "grad_norm": 1.5396868765343736, + "learning_rate": 3.377113280719295e-07, + "loss": 0.8240739107131958, + "step": 6458 + }, + { + "epoch": 1.4882488479262672, + "grad_norm": 1.037738997106509, + "learning_rate": 3.374259244798562e-07, + "loss": 0.7360633015632629, + "step": 6459 + }, + { + "epoch": 1.488479262672811, + "grad_norm": 1.1287418173516828, + "learning_rate": 3.371406170595328e-07, + "loss": 0.8626362085342407, + "step": 6460 + }, + { + "epoch": 1.488709677419355, + "grad_norm": 1.553133844655672, + "learning_rate": 3.368554058523713e-07, + "loss": 0.8499895334243774, + "step": 6461 + }, + { + "epoch": 1.4889400921658986, + "grad_norm": 1.1568237777707882, + "learning_rate": 3.3657029089976985e-07, + "loss": 0.8335039615631104, + "step": 6462 + }, + { + "epoch": 1.4891705069124423, + "grad_norm": 1.1957026633378731, + "learning_rate": 3.3628527224311196e-07, + "loss": 0.8154790997505188, + "step": 6463 + }, + { + "epoch": 1.4894009216589863, + "grad_norm": 1.2851436413791164, + "learning_rate": 3.3600034992376856e-07, + "loss": 0.7952951192855835, + "step": 6464 + }, + { + "epoch": 1.48963133640553, + "grad_norm": 1.5993164682006433, + "learning_rate": 3.3571552398309535e-07, + "loss": 0.7227598428726196, + "step": 6465 + }, + { + "epoch": 1.4898617511520738, + "grad_norm": 1.1773028491207966, + "learning_rate": 3.3543079446243404e-07, + "loss": 0.6703250408172607, + "step": 6466 + }, + { + "epoch": 1.4900921658986175, + "grad_norm": 1.152932493736184, + "learning_rate": 3.351461614031136e-07, + "loss": 0.7468122243881226, + "step": 6467 + }, + { + "epoch": 1.4903225806451612, + "grad_norm": 1.2933114629854674, + "learning_rate": 3.348616248464475e-07, + "loss": 0.8649178743362427, + "step": 6468 + }, + { + "epoch": 1.4905529953917052, + "grad_norm": 1.013990280281903, + "learning_rate": 3.345771848337359e-07, + "loss": 0.8229554295539856, + "step": 6469 + }, + { + "epoch": 1.4907834101382489, + "grad_norm": 1.3471402030282535, + "learning_rate": 3.342928414062652e-07, + "loss": 0.7275597453117371, + "step": 6470 + }, + { + "epoch": 1.4910138248847926, + "grad_norm": 1.095192106330462, + "learning_rate": 3.3400859460530737e-07, + "loss": 0.657899796962738, + "step": 6471 + }, + { + "epoch": 1.4912442396313363, + "grad_norm": 1.0853913135805695, + "learning_rate": 3.3372444447212e-07, + "loss": 0.7579425573348999, + "step": 6472 + }, + { + "epoch": 1.49147465437788, + "grad_norm": 1.1304988993649205, + "learning_rate": 3.334403910479479e-07, + "loss": 0.8707751631736755, + "step": 6473 + }, + { + "epoch": 1.491705069124424, + "grad_norm": 1.3454806591137698, + "learning_rate": 3.331564343740201e-07, + "loss": 0.7923752665519714, + "step": 6474 + }, + { + "epoch": 1.4919354838709677, + "grad_norm": 1.2646674876263875, + "learning_rate": 3.328725744915536e-07, + "loss": 0.8308948278427124, + "step": 6475 + }, + { + "epoch": 1.4921658986175115, + "grad_norm": 1.4029553470676885, + "learning_rate": 3.3258881144174967e-07, + "loss": 0.8984559774398804, + "step": 6476 + }, + { + "epoch": 1.4923963133640554, + "grad_norm": 1.2358798089346714, + "learning_rate": 3.3230514526579614e-07, + "loss": 0.9279792308807373, + "step": 6477 + }, + { + "epoch": 1.4926267281105992, + "grad_norm": 1.4094728162225774, + "learning_rate": 3.3202157600486655e-07, + "loss": 0.7934520244598389, + "step": 6478 + }, + { + "epoch": 1.4928571428571429, + "grad_norm": 1.658388461731414, + "learning_rate": 3.3173810370012136e-07, + "loss": 0.8463613390922546, + "step": 6479 + }, + { + "epoch": 1.4930875576036866, + "grad_norm": 1.339159678666659, + "learning_rate": 3.314547283927057e-07, + "loss": 0.8087350130081177, + "step": 6480 + }, + { + "epoch": 1.4933179723502303, + "grad_norm": 1.2350842201271304, + "learning_rate": 3.3117145012375113e-07, + "loss": 0.7711254358291626, + "step": 6481 + }, + { + "epoch": 1.4935483870967743, + "grad_norm": 1.2753839749074636, + "learning_rate": 3.3088826893437526e-07, + "loss": 0.7140679359436035, + "step": 6482 + }, + { + "epoch": 1.493778801843318, + "grad_norm": 1.1506161777222865, + "learning_rate": 3.3060518486568103e-07, + "loss": 0.7074463367462158, + "step": 6483 + }, + { + "epoch": 1.4940092165898617, + "grad_norm": 0.8291232249474376, + "learning_rate": 3.3032219795875827e-07, + "loss": 0.7560559511184692, + "step": 6484 + }, + { + "epoch": 1.4942396313364055, + "grad_norm": 1.4344445687170468, + "learning_rate": 3.3003930825468194e-07, + "loss": 0.7699435353279114, + "step": 6485 + }, + { + "epoch": 1.4944700460829492, + "grad_norm": 1.277197987117764, + "learning_rate": 3.297565157945129e-07, + "loss": 0.817488431930542, + "step": 6486 + }, + { + "epoch": 1.4947004608294931, + "grad_norm": 1.1511534488778172, + "learning_rate": 3.294738206192985e-07, + "loss": 0.7534141540527344, + "step": 6487 + }, + { + "epoch": 1.4949308755760369, + "grad_norm": 1.1924480850963226, + "learning_rate": 3.291912227700715e-07, + "loss": 0.7423536777496338, + "step": 6488 + }, + { + "epoch": 1.4951612903225806, + "grad_norm": 0.952322784205302, + "learning_rate": 3.2890872228785003e-07, + "loss": 0.7181985378265381, + "step": 6489 + }, + { + "epoch": 1.4953917050691246, + "grad_norm": 1.270224090305602, + "learning_rate": 3.286263192136396e-07, + "loss": 0.7143938541412354, + "step": 6490 + }, + { + "epoch": 1.4956221198156683, + "grad_norm": 1.3995714023195414, + "learning_rate": 3.2834401358843e-07, + "loss": 0.8247631788253784, + "step": 6491 + }, + { + "epoch": 1.495852534562212, + "grad_norm": 1.1449759372564834, + "learning_rate": 3.280618054531974e-07, + "loss": 0.8627001047134399, + "step": 6492 + }, + { + "epoch": 1.4960829493087557, + "grad_norm": 1.3482725665599868, + "learning_rate": 3.2777969484890456e-07, + "loss": 0.813239574432373, + "step": 6493 + }, + { + "epoch": 1.4963133640552995, + "grad_norm": 0.9200346218481302, + "learning_rate": 3.2749768181649904e-07, + "loss": 0.6633884310722351, + "step": 6494 + }, + { + "epoch": 1.4965437788018434, + "grad_norm": 1.4278232440541767, + "learning_rate": 3.272157663969144e-07, + "loss": 0.7760038375854492, + "step": 6495 + }, + { + "epoch": 1.4967741935483871, + "grad_norm": 1.3200918095184475, + "learning_rate": 3.2693394863107105e-07, + "loss": 0.9352993369102478, + "step": 6496 + }, + { + "epoch": 1.4970046082949309, + "grad_norm": 1.2344539392280847, + "learning_rate": 3.2665222855987397e-07, + "loss": 0.7011485695838928, + "step": 6497 + }, + { + "epoch": 1.4972350230414746, + "grad_norm": 1.2183950494067446, + "learning_rate": 3.263706062242142e-07, + "loss": 0.9008398056030273, + "step": 6498 + }, + { + "epoch": 1.4974654377880183, + "grad_norm": 1.194608222128912, + "learning_rate": 3.260890816649694e-07, + "loss": 0.768037736415863, + "step": 6499 + }, + { + "epoch": 1.4976958525345623, + "grad_norm": 0.9220148240054391, + "learning_rate": 3.258076549230024e-07, + "loss": 0.7603639364242554, + "step": 6500 + }, + { + "epoch": 1.497926267281106, + "grad_norm": 1.3821459764557307, + "learning_rate": 3.2552632603916177e-07, + "loss": 0.7984024286270142, + "step": 6501 + }, + { + "epoch": 1.4981566820276497, + "grad_norm": 1.415424035035242, + "learning_rate": 3.2524509505428187e-07, + "loss": 0.8466978073120117, + "step": 6502 + }, + { + "epoch": 1.4983870967741937, + "grad_norm": 1.3670825801142161, + "learning_rate": 3.24963962009183e-07, + "loss": 0.7964911460876465, + "step": 6503 + }, + { + "epoch": 1.4986175115207372, + "grad_norm": 1.3123478568754847, + "learning_rate": 3.246829269446716e-07, + "loss": 0.7551665306091309, + "step": 6504 + }, + { + "epoch": 1.4988479262672811, + "grad_norm": 1.3193018902055227, + "learning_rate": 3.2440198990153945e-07, + "loss": 0.6468057632446289, + "step": 6505 + }, + { + "epoch": 1.4990783410138249, + "grad_norm": 1.2139801652485203, + "learning_rate": 3.241211509205638e-07, + "loss": 0.7739330530166626, + "step": 6506 + }, + { + "epoch": 1.4993087557603686, + "grad_norm": 1.3659144717848737, + "learning_rate": 3.238404100425085e-07, + "loss": 0.8205568790435791, + "step": 6507 + }, + { + "epoch": 1.4995391705069125, + "grad_norm": 0.958982052367848, + "learning_rate": 3.235597673081227e-07, + "loss": 0.667822003364563, + "step": 6508 + }, + { + "epoch": 1.4997695852534563, + "grad_norm": 1.2374356667574686, + "learning_rate": 3.232792227581409e-07, + "loss": 0.7829990386962891, + "step": 6509 + }, + { + "epoch": 1.5, + "grad_norm": 1.1404525757399535, + "learning_rate": 3.229987764332843e-07, + "loss": 0.768509566783905, + "step": 6510 + }, + { + "epoch": 1.5002304147465437, + "grad_norm": 1.3651547247057954, + "learning_rate": 3.227184283742591e-07, + "loss": 0.8448585867881775, + "step": 6511 + }, + { + "epoch": 1.5004608294930875, + "grad_norm": 1.2722097281432705, + "learning_rate": 3.2243817862175705e-07, + "loss": 0.6929391622543335, + "step": 6512 + }, + { + "epoch": 1.5006912442396314, + "grad_norm": 0.8983294061831201, + "learning_rate": 3.221580272164567e-07, + "loss": 0.6453005075454712, + "step": 6513 + }, + { + "epoch": 1.5009216589861751, + "grad_norm": 1.135934251126359, + "learning_rate": 3.2187797419902143e-07, + "loss": 0.7870811820030212, + "step": 6514 + }, + { + "epoch": 1.5011520737327189, + "grad_norm": 1.264885386654941, + "learning_rate": 3.2159801961010013e-07, + "loss": 0.7032002210617065, + "step": 6515 + }, + { + "epoch": 1.5013824884792628, + "grad_norm": 1.5122369312915371, + "learning_rate": 3.213181634903285e-07, + "loss": 0.8018448352813721, + "step": 6516 + }, + { + "epoch": 1.5016129032258063, + "grad_norm": 1.0930874016239036, + "learning_rate": 3.2103840588032707e-07, + "loss": 0.7066134810447693, + "step": 6517 + }, + { + "epoch": 1.5018433179723503, + "grad_norm": 1.049874936950677, + "learning_rate": 3.207587468207018e-07, + "loss": 0.6835265159606934, + "step": 6518 + }, + { + "epoch": 1.502073732718894, + "grad_norm": 1.1994114231897615, + "learning_rate": 3.204791863520455e-07, + "loss": 0.6679749488830566, + "step": 6519 + }, + { + "epoch": 1.5023041474654377, + "grad_norm": 1.1780261658003046, + "learning_rate": 3.201997245149358e-07, + "loss": 0.781232476234436, + "step": 6520 + }, + { + "epoch": 1.5025345622119817, + "grad_norm": 1.156188659495686, + "learning_rate": 3.1992036134993616e-07, + "loss": 0.7853572368621826, + "step": 6521 + }, + { + "epoch": 1.5027649769585254, + "grad_norm": 1.3156565650023675, + "learning_rate": 3.1964109689759576e-07, + "loss": 0.8220832943916321, + "step": 6522 + }, + { + "epoch": 1.5029953917050691, + "grad_norm": 1.0874952614272322, + "learning_rate": 3.193619311984491e-07, + "loss": 0.8046013116836548, + "step": 6523 + }, + { + "epoch": 1.5032258064516129, + "grad_norm": 1.1481673715256613, + "learning_rate": 3.190828642930174e-07, + "loss": 0.7123414874076843, + "step": 6524 + }, + { + "epoch": 1.5034562211981566, + "grad_norm": 1.2507360463805697, + "learning_rate": 3.188038962218066e-07, + "loss": 0.7913625240325928, + "step": 6525 + }, + { + "epoch": 1.5036866359447005, + "grad_norm": 1.2264479129016654, + "learning_rate": 3.185250270253081e-07, + "loss": 0.7837327718734741, + "step": 6526 + }, + { + "epoch": 1.5039170506912443, + "grad_norm": 1.3223188543102071, + "learning_rate": 3.182462567440002e-07, + "loss": 0.7799992561340332, + "step": 6527 + }, + { + "epoch": 1.504147465437788, + "grad_norm": 1.2906027927929307, + "learning_rate": 3.1796758541834545e-07, + "loss": 0.8591268062591553, + "step": 6528 + }, + { + "epoch": 1.504377880184332, + "grad_norm": 1.1175058933428492, + "learning_rate": 3.176890130887926e-07, + "loss": 0.6886378526687622, + "step": 6529 + }, + { + "epoch": 1.5046082949308754, + "grad_norm": 1.4969255628781877, + "learning_rate": 3.1741053979577647e-07, + "loss": 0.8641641139984131, + "step": 6530 + }, + { + "epoch": 1.5048387096774194, + "grad_norm": 1.3022265823882768, + "learning_rate": 3.1713216557971687e-07, + "loss": 0.8215552568435669, + "step": 6531 + }, + { + "epoch": 1.5050691244239631, + "grad_norm": 1.332125606212464, + "learning_rate": 3.1685389048101906e-07, + "loss": 0.8506371974945068, + "step": 6532 + }, + { + "epoch": 1.5052995391705069, + "grad_norm": 1.371517957091787, + "learning_rate": 3.1657571454007515e-07, + "loss": 0.740912675857544, + "step": 6533 + }, + { + "epoch": 1.5055299539170508, + "grad_norm": 1.0380741302125553, + "learning_rate": 3.162976377972614e-07, + "loss": 0.6458308696746826, + "step": 6534 + }, + { + "epoch": 1.5057603686635943, + "grad_norm": 1.0737980819278299, + "learning_rate": 3.1601966029294013e-07, + "loss": 0.7368316650390625, + "step": 6535 + }, + { + "epoch": 1.5059907834101383, + "grad_norm": 1.1008143995933475, + "learning_rate": 3.1574178206746003e-07, + "loss": 0.6648637056350708, + "step": 6536 + }, + { + "epoch": 1.506221198156682, + "grad_norm": 1.2751679142768328, + "learning_rate": 3.154640031611544e-07, + "loss": 0.706688404083252, + "step": 6537 + }, + { + "epoch": 1.5064516129032257, + "grad_norm": 1.0597131508477158, + "learning_rate": 3.1518632361434263e-07, + "loss": 0.722059965133667, + "step": 6538 + }, + { + "epoch": 1.5066820276497697, + "grad_norm": 1.1420297201861054, + "learning_rate": 3.14908743467329e-07, + "loss": 0.7098807096481323, + "step": 6539 + }, + { + "epoch": 1.5069124423963134, + "grad_norm": 1.1123804283277692, + "learning_rate": 3.1463126276040454e-07, + "loss": 0.7131781578063965, + "step": 6540 + }, + { + "epoch": 1.5071428571428571, + "grad_norm": 0.757735402153, + "learning_rate": 3.143538815338451e-07, + "loss": 0.7292109727859497, + "step": 6541 + }, + { + "epoch": 1.507373271889401, + "grad_norm": 1.1145586582073062, + "learning_rate": 3.1407659982791204e-07, + "loss": 0.7305347919464111, + "step": 6542 + }, + { + "epoch": 1.5076036866359446, + "grad_norm": 1.3246030999705258, + "learning_rate": 3.1379941768285247e-07, + "loss": 0.8072094321250916, + "step": 6543 + }, + { + "epoch": 1.5078341013824885, + "grad_norm": 1.2831968996332677, + "learning_rate": 3.135223351388987e-07, + "loss": 0.8772450685501099, + "step": 6544 + }, + { + "epoch": 1.5080645161290323, + "grad_norm": 1.1816139196453221, + "learning_rate": 3.1324535223626957e-07, + "loss": 0.8463687896728516, + "step": 6545 + }, + { + "epoch": 1.508294930875576, + "grad_norm": 1.1937564350019036, + "learning_rate": 3.1296846901516806e-07, + "loss": 0.6764696836471558, + "step": 6546 + }, + { + "epoch": 1.50852534562212, + "grad_norm": 1.198918569491841, + "learning_rate": 3.126916855157841e-07, + "loss": 0.8395411968231201, + "step": 6547 + }, + { + "epoch": 1.5087557603686634, + "grad_norm": 1.0607235882989698, + "learning_rate": 3.1241500177829195e-07, + "loss": 0.8227219581604004, + "step": 6548 + }, + { + "epoch": 1.5089861751152074, + "grad_norm": 1.1677688606359355, + "learning_rate": 3.121384178428519e-07, + "loss": 0.7079675197601318, + "step": 6549 + }, + { + "epoch": 1.5092165898617511, + "grad_norm": 1.2218836381096956, + "learning_rate": 3.1186193374961014e-07, + "loss": 0.7792578935623169, + "step": 6550 + }, + { + "epoch": 1.5094470046082948, + "grad_norm": 1.403777710630671, + "learning_rate": 3.1158554953869776e-07, + "loss": 0.7821195125579834, + "step": 6551 + }, + { + "epoch": 1.5096774193548388, + "grad_norm": 1.0979873084769438, + "learning_rate": 3.1130926525023114e-07, + "loss": 0.6640183329582214, + "step": 6552 + }, + { + "epoch": 1.5099078341013825, + "grad_norm": 1.4504991573195685, + "learning_rate": 3.110330809243134e-07, + "loss": 0.8087342977523804, + "step": 6553 + }, + { + "epoch": 1.5101382488479262, + "grad_norm": 1.283455986462282, + "learning_rate": 3.1075699660103184e-07, + "loss": 0.7716038227081299, + "step": 6554 + }, + { + "epoch": 1.5103686635944702, + "grad_norm": 1.3208739442859028, + "learning_rate": 3.1048101232045943e-07, + "loss": 0.8408910632133484, + "step": 6555 + }, + { + "epoch": 1.5105990783410137, + "grad_norm": 1.3122071557376964, + "learning_rate": 3.1020512812265564e-07, + "loss": 0.8799750804901123, + "step": 6556 + }, + { + "epoch": 1.5108294930875577, + "grad_norm": 1.0380590960697713, + "learning_rate": 3.0992934404766426e-07, + "loss": 0.6534945368766785, + "step": 6557 + }, + { + "epoch": 1.5110599078341014, + "grad_norm": 1.2848519038217197, + "learning_rate": 3.0965366013551507e-07, + "loss": 0.8293032646179199, + "step": 6558 + }, + { + "epoch": 1.511290322580645, + "grad_norm": 1.2211391336842954, + "learning_rate": 3.0937807642622295e-07, + "loss": 0.7486997842788696, + "step": 6559 + }, + { + "epoch": 1.511520737327189, + "grad_norm": 0.9417803138002018, + "learning_rate": 3.0910259295978914e-07, + "loss": 0.7431119680404663, + "step": 6560 + }, + { + "epoch": 1.5117511520737326, + "grad_norm": 1.255174714268565, + "learning_rate": 3.0882720977619927e-07, + "loss": 0.945859432220459, + "step": 6561 + }, + { + "epoch": 1.5119815668202765, + "grad_norm": 1.3772354918121292, + "learning_rate": 3.0855192691542487e-07, + "loss": 0.7894721031188965, + "step": 6562 + }, + { + "epoch": 1.5122119815668202, + "grad_norm": 1.59783124946433, + "learning_rate": 3.082767444174229e-07, + "loss": 0.8593465089797974, + "step": 6563 + }, + { + "epoch": 1.512442396313364, + "grad_norm": 1.1763179818399963, + "learning_rate": 3.080016623221355e-07, + "loss": 0.8297405242919922, + "step": 6564 + }, + { + "epoch": 1.512672811059908, + "grad_norm": 1.2146736952008175, + "learning_rate": 3.07726680669491e-07, + "loss": 0.7372928857803345, + "step": 6565 + }, + { + "epoch": 1.5129032258064516, + "grad_norm": 0.987456734373793, + "learning_rate": 3.0745179949940235e-07, + "loss": 0.7031347751617432, + "step": 6566 + }, + { + "epoch": 1.5131336405529954, + "grad_norm": 0.9788253167457012, + "learning_rate": 3.071770188517679e-07, + "loss": 0.7086467742919922, + "step": 6567 + }, + { + "epoch": 1.5133640552995393, + "grad_norm": 1.2854493361240282, + "learning_rate": 3.069023387664723e-07, + "loss": 0.9091345071792603, + "step": 6568 + }, + { + "epoch": 1.5135944700460828, + "grad_norm": 1.3979394006170445, + "learning_rate": 3.066277592833847e-07, + "loss": 0.7470624446868896, + "step": 6569 + }, + { + "epoch": 1.5138248847926268, + "grad_norm": 1.2458050386964743, + "learning_rate": 3.0635328044235965e-07, + "loss": 0.75694739818573, + "step": 6570 + }, + { + "epoch": 1.5140552995391705, + "grad_norm": 1.1257752667184633, + "learning_rate": 3.0607890228323796e-07, + "loss": 0.7832024693489075, + "step": 6571 + }, + { + "epoch": 1.5142857142857142, + "grad_norm": 1.4206979397737705, + "learning_rate": 3.0580462484584455e-07, + "loss": 0.6777220368385315, + "step": 6572 + }, + { + "epoch": 1.5145161290322582, + "grad_norm": 1.1010797667803915, + "learning_rate": 3.055304481699913e-07, + "loss": 0.7748236060142517, + "step": 6573 + }, + { + "epoch": 1.5147465437788017, + "grad_norm": 1.1639246159957346, + "learning_rate": 3.052563722954741e-07, + "loss": 0.7495633363723755, + "step": 6574 + }, + { + "epoch": 1.5149769585253456, + "grad_norm": 1.1319897669216112, + "learning_rate": 3.049823972620744e-07, + "loss": 0.8011484742164612, + "step": 6575 + }, + { + "epoch": 1.5152073732718894, + "grad_norm": 1.3878273723563577, + "learning_rate": 3.0470852310956e-07, + "loss": 0.7480140924453735, + "step": 6576 + }, + { + "epoch": 1.515437788018433, + "grad_norm": 1.1963673851290149, + "learning_rate": 3.0443474987768305e-07, + "loss": 0.6561319828033447, + "step": 6577 + }, + { + "epoch": 1.515668202764977, + "grad_norm": 1.1887729560806304, + "learning_rate": 3.041610776061813e-07, + "loss": 0.7437188029289246, + "step": 6578 + }, + { + "epoch": 1.5158986175115208, + "grad_norm": 1.2420532978964127, + "learning_rate": 3.0388750633477766e-07, + "loss": 0.7429096698760986, + "step": 6579 + }, + { + "epoch": 1.5161290322580645, + "grad_norm": 1.3505114972693866, + "learning_rate": 3.0361403610318125e-07, + "loss": 0.859411358833313, + "step": 6580 + }, + { + "epoch": 1.5163594470046085, + "grad_norm": 0.9758931256825946, + "learning_rate": 3.0334066695108565e-07, + "loss": 0.7636305093765259, + "step": 6581 + }, + { + "epoch": 1.516589861751152, + "grad_norm": 1.1796162666849943, + "learning_rate": 3.030673989181699e-07, + "loss": 0.8331989049911499, + "step": 6582 + }, + { + "epoch": 1.516820276497696, + "grad_norm": 1.0763217337155384, + "learning_rate": 3.0279423204409857e-07, + "loss": 0.770574688911438, + "step": 6583 + }, + { + "epoch": 1.5170506912442396, + "grad_norm": 1.3524367915089308, + "learning_rate": 3.025211663685213e-07, + "loss": 0.7470898628234863, + "step": 6584 + }, + { + "epoch": 1.5172811059907834, + "grad_norm": 1.2515745730030696, + "learning_rate": 3.022482019310736e-07, + "loss": 0.7907510995864868, + "step": 6585 + }, + { + "epoch": 1.5175115207373273, + "grad_norm": 1.1087989572536945, + "learning_rate": 3.019753387713757e-07, + "loss": 0.751417338848114, + "step": 6586 + }, + { + "epoch": 1.5177419354838708, + "grad_norm": 1.3862652872284045, + "learning_rate": 3.01702576929033e-07, + "loss": 0.8987867832183838, + "step": 6587 + }, + { + "epoch": 1.5179723502304148, + "grad_norm": 1.2098170472034613, + "learning_rate": 3.0142991644363714e-07, + "loss": 0.7618268728256226, + "step": 6588 + }, + { + "epoch": 1.5182027649769585, + "grad_norm": 1.4029958928912587, + "learning_rate": 3.011573573547641e-07, + "loss": 0.9358207583427429, + "step": 6589 + }, + { + "epoch": 1.5184331797235022, + "grad_norm": 1.4434031985489326, + "learning_rate": 3.008848997019753e-07, + "loss": 0.6549144387245178, + "step": 6590 + }, + { + "epoch": 1.5186635944700462, + "grad_norm": 1.293720092884626, + "learning_rate": 3.00612543524818e-07, + "loss": 0.8642100095748901, + "step": 6591 + }, + { + "epoch": 1.51889400921659, + "grad_norm": 1.2852982676947153, + "learning_rate": 3.003402888628241e-07, + "loss": 0.7348824143409729, + "step": 6592 + }, + { + "epoch": 1.5191244239631336, + "grad_norm": 1.0897732641421132, + "learning_rate": 3.000681357555108e-07, + "loss": 0.8737039566040039, + "step": 6593 + }, + { + "epoch": 1.5193548387096776, + "grad_norm": 1.3095413820866733, + "learning_rate": 2.9979608424238134e-07, + "loss": 0.749860405921936, + "step": 6594 + }, + { + "epoch": 1.519585253456221, + "grad_norm": 1.4291988493830527, + "learning_rate": 2.99524134362923e-07, + "loss": 0.7583779096603394, + "step": 6595 + }, + { + "epoch": 1.519815668202765, + "grad_norm": 1.1886499728868618, + "learning_rate": 2.992522861566095e-07, + "loss": 0.7096224427223206, + "step": 6596 + }, + { + "epoch": 1.5200460829493088, + "grad_norm": 1.3265073494412316, + "learning_rate": 2.9898053966289904e-07, + "loss": 0.7813585996627808, + "step": 6597 + }, + { + "epoch": 1.5202764976958525, + "grad_norm": 1.3753919073529044, + "learning_rate": 2.9870889492123517e-07, + "loss": 0.7744605541229248, + "step": 6598 + }, + { + "epoch": 1.5205069124423964, + "grad_norm": 1.4661404938087315, + "learning_rate": 2.984373519710469e-07, + "loss": 0.8398552536964417, + "step": 6599 + }, + { + "epoch": 1.52073732718894, + "grad_norm": 1.1837780856173943, + "learning_rate": 2.981659108517478e-07, + "loss": 0.6853294372558594, + "step": 6600 + }, + { + "epoch": 1.520967741935484, + "grad_norm": 0.9892560165373243, + "learning_rate": 2.97894571602738e-07, + "loss": 0.7673987150192261, + "step": 6601 + }, + { + "epoch": 1.5211981566820276, + "grad_norm": 1.0638042713840496, + "learning_rate": 2.976233342634017e-07, + "loss": 0.7000377774238586, + "step": 6602 + }, + { + "epoch": 1.5214285714285714, + "grad_norm": 1.2089273111808856, + "learning_rate": 2.9735219887310857e-07, + "loss": 0.8429346680641174, + "step": 6603 + }, + { + "epoch": 1.5216589861751153, + "grad_norm": 1.4255685153178952, + "learning_rate": 2.970811654712133e-07, + "loss": 0.9118648767471313, + "step": 6604 + }, + { + "epoch": 1.521889400921659, + "grad_norm": 1.0974145188834663, + "learning_rate": 2.9681023409705666e-07, + "loss": 0.7745784521102905, + "step": 6605 + }, + { + "epoch": 1.5221198156682028, + "grad_norm": 1.234720575381531, + "learning_rate": 2.9653940478996367e-07, + "loss": 0.8481245040893555, + "step": 6606 + }, + { + "epoch": 1.5223502304147467, + "grad_norm": 1.1446582960275502, + "learning_rate": 2.9626867758924436e-07, + "loss": 0.8643463850021362, + "step": 6607 + }, + { + "epoch": 1.5225806451612902, + "grad_norm": 1.6406368897457513, + "learning_rate": 2.959980525341953e-07, + "loss": 0.9524952173233032, + "step": 6608 + }, + { + "epoch": 1.5228110599078342, + "grad_norm": 1.067119300713527, + "learning_rate": 2.9572752966409686e-07, + "loss": 0.7153829336166382, + "step": 6609 + }, + { + "epoch": 1.523041474654378, + "grad_norm": 1.1739681134356785, + "learning_rate": 2.954571090182149e-07, + "loss": 0.8332774639129639, + "step": 6610 + }, + { + "epoch": 1.5232718894009216, + "grad_norm": 1.3773090684366749, + "learning_rate": 2.9518679063580123e-07, + "loss": 0.7511743307113647, + "step": 6611 + }, + { + "epoch": 1.5235023041474656, + "grad_norm": 1.2327774867248482, + "learning_rate": 2.9491657455609175e-07, + "loss": 0.715233325958252, + "step": 6612 + }, + { + "epoch": 1.523732718894009, + "grad_norm": 1.139323635074032, + "learning_rate": 2.946464608183078e-07, + "loss": 0.7386246919631958, + "step": 6613 + }, + { + "epoch": 1.523963133640553, + "grad_norm": 1.1904592003911236, + "learning_rate": 2.943764494616565e-07, + "loss": 0.8337790369987488, + "step": 6614 + }, + { + "epoch": 1.5241935483870968, + "grad_norm": 1.394927398157402, + "learning_rate": 2.941065405253296e-07, + "loss": 0.8447855710983276, + "step": 6615 + }, + { + "epoch": 1.5244239631336405, + "grad_norm": 1.1307960049130217, + "learning_rate": 2.938367340485035e-07, + "loss": 0.7430610060691833, + "step": 6616 + }, + { + "epoch": 1.5246543778801844, + "grad_norm": 1.134552871583557, + "learning_rate": 2.9356703007034087e-07, + "loss": 0.7740806937217712, + "step": 6617 + }, + { + "epoch": 1.5248847926267282, + "grad_norm": 1.2516085920875086, + "learning_rate": 2.9329742862998875e-07, + "loss": 0.7824152708053589, + "step": 6618 + }, + { + "epoch": 1.5251152073732719, + "grad_norm": 1.0852675062610386, + "learning_rate": 2.930279297665792e-07, + "loss": 0.9222463965415955, + "step": 6619 + }, + { + "epoch": 1.5253456221198156, + "grad_norm": 1.8096931577931101, + "learning_rate": 2.927585335192294e-07, + "loss": 0.9548497200012207, + "step": 6620 + }, + { + "epoch": 1.5255760368663593, + "grad_norm": 1.497275795232007, + "learning_rate": 2.9248923992704255e-07, + "loss": 0.9007906913757324, + "step": 6621 + }, + { + "epoch": 1.5258064516129033, + "grad_norm": 1.0647051889661132, + "learning_rate": 2.9222004902910593e-07, + "loss": 0.6932169198989868, + "step": 6622 + }, + { + "epoch": 1.526036866359447, + "grad_norm": 0.9763599663388729, + "learning_rate": 2.919509608644922e-07, + "loss": 0.7327853441238403, + "step": 6623 + }, + { + "epoch": 1.5262672811059907, + "grad_norm": 1.423305414970627, + "learning_rate": 2.916819754722588e-07, + "loss": 0.617963433265686, + "step": 6624 + }, + { + "epoch": 1.5264976958525347, + "grad_norm": 1.3790687935494703, + "learning_rate": 2.914130928914493e-07, + "loss": 1.0567349195480347, + "step": 6625 + }, + { + "epoch": 1.5267281105990782, + "grad_norm": 1.243824261339929, + "learning_rate": 2.9114431316109145e-07, + "loss": 0.7362378835678101, + "step": 6626 + }, + { + "epoch": 1.5269585253456222, + "grad_norm": 1.1636178458595106, + "learning_rate": 2.9087563632019774e-07, + "loss": 0.6879991888999939, + "step": 6627 + }, + { + "epoch": 1.5271889400921659, + "grad_norm": 1.2540530060828472, + "learning_rate": 2.9060706240776686e-07, + "loss": 0.7804177403450012, + "step": 6628 + }, + { + "epoch": 1.5274193548387096, + "grad_norm": 1.2450061818881997, + "learning_rate": 2.9033859146278197e-07, + "loss": 0.7459548711776733, + "step": 6629 + }, + { + "epoch": 1.5276497695852536, + "grad_norm": 1.1214229491247267, + "learning_rate": 2.900702235242106e-07, + "loss": 0.7392233610153198, + "step": 6630 + }, + { + "epoch": 1.5278801843317973, + "grad_norm": 1.0862664338119448, + "learning_rate": 2.8980195863100675e-07, + "loss": 0.6956135034561157, + "step": 6631 + }, + { + "epoch": 1.528110599078341, + "grad_norm": 1.1232709572579735, + "learning_rate": 2.8953379682210856e-07, + "loss": 0.7042561769485474, + "step": 6632 + }, + { + "epoch": 1.5283410138248847, + "grad_norm": 1.070241779197473, + "learning_rate": 2.8926573813643884e-07, + "loss": 0.7114298343658447, + "step": 6633 + }, + { + "epoch": 1.5285714285714285, + "grad_norm": 1.0297537166419386, + "learning_rate": 2.8899778261290664e-07, + "loss": 0.862826943397522, + "step": 6634 + }, + { + "epoch": 1.5288018433179724, + "grad_norm": 1.3240716498057261, + "learning_rate": 2.8872993029040506e-07, + "loss": 0.8229889869689941, + "step": 6635 + }, + { + "epoch": 1.5290322580645161, + "grad_norm": 1.2292174291080764, + "learning_rate": 2.884621812078122e-07, + "loss": 0.8058778047561646, + "step": 6636 + }, + { + "epoch": 1.5292626728110599, + "grad_norm": 1.2782782809475366, + "learning_rate": 2.881945354039921e-07, + "loss": 0.8150385618209839, + "step": 6637 + }, + { + "epoch": 1.5294930875576038, + "grad_norm": 1.1137449533588037, + "learning_rate": 2.8792699291779276e-07, + "loss": 0.7067136168479919, + "step": 6638 + }, + { + "epoch": 1.5297235023041473, + "grad_norm": 1.2793329729310776, + "learning_rate": 2.8765955378804784e-07, + "loss": 0.7725155353546143, + "step": 6639 + }, + { + "epoch": 1.5299539170506913, + "grad_norm": 1.0584861581127705, + "learning_rate": 2.873922180535754e-07, + "loss": 0.5956720113754272, + "step": 6640 + }, + { + "epoch": 1.530184331797235, + "grad_norm": 1.1955034677005214, + "learning_rate": 2.8712498575317934e-07, + "loss": 0.6506170630455017, + "step": 6641 + }, + { + "epoch": 1.5304147465437787, + "grad_norm": 1.0781697188392338, + "learning_rate": 2.86857856925648e-07, + "loss": 0.7860926985740662, + "step": 6642 + }, + { + "epoch": 1.5306451612903227, + "grad_norm": 1.1840723689685375, + "learning_rate": 2.8659083160975464e-07, + "loss": 0.7003993391990662, + "step": 6643 + }, + { + "epoch": 1.5308755760368664, + "grad_norm": 1.1562706768971642, + "learning_rate": 2.8632390984425746e-07, + "loss": 0.6887079477310181, + "step": 6644 + }, + { + "epoch": 1.5311059907834101, + "grad_norm": 1.243117329825752, + "learning_rate": 2.860570916678998e-07, + "loss": 0.788282036781311, + "step": 6645 + }, + { + "epoch": 1.5313364055299539, + "grad_norm": 1.273283187040626, + "learning_rate": 2.8579037711941043e-07, + "loss": 0.771350085735321, + "step": 6646 + }, + { + "epoch": 1.5315668202764976, + "grad_norm": 1.1000030346921834, + "learning_rate": 2.855237662375021e-07, + "loss": 0.6418509483337402, + "step": 6647 + }, + { + "epoch": 1.5317972350230415, + "grad_norm": 1.022873677691871, + "learning_rate": 2.852572590608735e-07, + "loss": 0.6606692671775818, + "step": 6648 + }, + { + "epoch": 1.5320276497695853, + "grad_norm": 1.4727879897773712, + "learning_rate": 2.849908556282076e-07, + "loss": 0.8623934984207153, + "step": 6649 + }, + { + "epoch": 1.532258064516129, + "grad_norm": 1.1678986803146219, + "learning_rate": 2.8472455597817215e-07, + "loss": 0.848737359046936, + "step": 6650 + }, + { + "epoch": 1.532488479262673, + "grad_norm": 1.2265451299303025, + "learning_rate": 2.844583601494207e-07, + "loss": 0.7156505584716797, + "step": 6651 + }, + { + "epoch": 1.5327188940092165, + "grad_norm": 1.157360063816448, + "learning_rate": 2.8419226818059116e-07, + "loss": 0.598319411277771, + "step": 6652 + }, + { + "epoch": 1.5329493087557604, + "grad_norm": 1.0128877845083564, + "learning_rate": 2.8392628011030585e-07, + "loss": 0.6320680379867554, + "step": 6653 + }, + { + "epoch": 1.5331797235023041, + "grad_norm": 1.2437383042471344, + "learning_rate": 2.836603959771734e-07, + "loss": 0.8770536184310913, + "step": 6654 + }, + { + "epoch": 1.5334101382488479, + "grad_norm": 1.3327586940769975, + "learning_rate": 2.833946158197862e-07, + "loss": 0.896265983581543, + "step": 6655 + }, + { + "epoch": 1.5336405529953918, + "grad_norm": 1.1058301341236145, + "learning_rate": 2.8312893967672145e-07, + "loss": 0.7194868326187134, + "step": 6656 + }, + { + "epoch": 1.5338709677419353, + "grad_norm": 1.1479450761132848, + "learning_rate": 2.828633675865425e-07, + "loss": 0.7993383407592773, + "step": 6657 + }, + { + "epoch": 1.5341013824884793, + "grad_norm": 1.3252275312162691, + "learning_rate": 2.8259789958779635e-07, + "loss": 0.6808127760887146, + "step": 6658 + }, + { + "epoch": 1.534331797235023, + "grad_norm": 1.3083456260381565, + "learning_rate": 2.823325357190153e-07, + "loss": 0.7348822355270386, + "step": 6659 + }, + { + "epoch": 1.5345622119815667, + "grad_norm": 1.4520629186425333, + "learning_rate": 2.820672760187166e-07, + "loss": 0.7729920744895935, + "step": 6660 + }, + { + "epoch": 1.5347926267281107, + "grad_norm": 1.1927593175103235, + "learning_rate": 2.818021205254021e-07, + "loss": 0.803922176361084, + "step": 6661 + }, + { + "epoch": 1.5350230414746544, + "grad_norm": 1.1316086785563555, + "learning_rate": 2.815370692775594e-07, + "loss": 0.7931007146835327, + "step": 6662 + }, + { + "epoch": 1.5352534562211981, + "grad_norm": 0.9381855495475373, + "learning_rate": 2.8127212231365995e-07, + "loss": 0.7990511655807495, + "step": 6663 + }, + { + "epoch": 1.535483870967742, + "grad_norm": 1.1449374360466444, + "learning_rate": 2.8100727967216043e-07, + "loss": 0.8163471817970276, + "step": 6664 + }, + { + "epoch": 1.5357142857142856, + "grad_norm": 1.126530672311672, + "learning_rate": 2.8074254139150225e-07, + "loss": 0.7628358602523804, + "step": 6665 + }, + { + "epoch": 1.5359447004608295, + "grad_norm": 1.216707261403855, + "learning_rate": 2.8047790751011216e-07, + "loss": 0.8008173704147339, + "step": 6666 + }, + { + "epoch": 1.5361751152073733, + "grad_norm": 1.4385072008960633, + "learning_rate": 2.802133780664013e-07, + "loss": 0.9139487743377686, + "step": 6667 + }, + { + "epoch": 1.536405529953917, + "grad_norm": 1.479452922561271, + "learning_rate": 2.7994895309876555e-07, + "loss": 0.9436901211738586, + "step": 6668 + }, + { + "epoch": 1.536635944700461, + "grad_norm": 1.1137684825301204, + "learning_rate": 2.7968463264558617e-07, + "loss": 0.8072221875190735, + "step": 6669 + }, + { + "epoch": 1.5368663594470044, + "grad_norm": 1.4031563621096825, + "learning_rate": 2.7942041674522866e-07, + "loss": 0.7434822916984558, + "step": 6670 + }, + { + "epoch": 1.5370967741935484, + "grad_norm": 1.1245525381043615, + "learning_rate": 2.7915630543604394e-07, + "loss": 0.6729850769042969, + "step": 6671 + }, + { + "epoch": 1.5373271889400921, + "grad_norm": 1.2279789151687839, + "learning_rate": 2.7889229875636723e-07, + "loss": 0.8752315044403076, + "step": 6672 + }, + { + "epoch": 1.5375576036866359, + "grad_norm": 1.2125823370266373, + "learning_rate": 2.786283967445184e-07, + "loss": 0.8519413471221924, + "step": 6673 + }, + { + "epoch": 1.5377880184331798, + "grad_norm": 1.2674824603159123, + "learning_rate": 2.783645994388032e-07, + "loss": 0.8868448734283447, + "step": 6674 + }, + { + "epoch": 1.5380184331797235, + "grad_norm": 1.2984993367707722, + "learning_rate": 2.78100906877511e-07, + "loss": 0.9223456978797913, + "step": 6675 + }, + { + "epoch": 1.5382488479262673, + "grad_norm": 1.0080180068423799, + "learning_rate": 2.7783731909891616e-07, + "loss": 0.799191951751709, + "step": 6676 + }, + { + "epoch": 1.5384792626728112, + "grad_norm": 1.1987572506109172, + "learning_rate": 2.775738361412788e-07, + "loss": 0.7092995643615723, + "step": 6677 + }, + { + "epoch": 1.5387096774193547, + "grad_norm": 1.2206610409098804, + "learning_rate": 2.7731045804284283e-07, + "loss": 0.674687385559082, + "step": 6678 + }, + { + "epoch": 1.5389400921658987, + "grad_norm": 1.4910052625734944, + "learning_rate": 2.77047184841837e-07, + "loss": 0.7366930246353149, + "step": 6679 + }, + { + "epoch": 1.5391705069124424, + "grad_norm": 1.169385374165895, + "learning_rate": 2.767840165764753e-07, + "loss": 0.838137149810791, + "step": 6680 + }, + { + "epoch": 1.5394009216589861, + "grad_norm": 1.2120746756764942, + "learning_rate": 2.765209532849558e-07, + "loss": 0.7507175803184509, + "step": 6681 + }, + { + "epoch": 1.53963133640553, + "grad_norm": 1.2981666739842812, + "learning_rate": 2.7625799500546267e-07, + "loss": 0.8157602548599243, + "step": 6682 + }, + { + "epoch": 1.5398617511520736, + "grad_norm": 1.2345607869860449, + "learning_rate": 2.7599514177616333e-07, + "loss": 0.7779219150543213, + "step": 6683 + }, + { + "epoch": 1.5400921658986175, + "grad_norm": 1.186692939443946, + "learning_rate": 2.757323936352106e-07, + "loss": 0.8261638879776001, + "step": 6684 + }, + { + "epoch": 1.5403225806451613, + "grad_norm": 0.8917527422638705, + "learning_rate": 2.7546975062074197e-07, + "loss": 0.6139177680015564, + "step": 6685 + }, + { + "epoch": 1.540552995391705, + "grad_norm": 1.0945474995666544, + "learning_rate": 2.752072127708802e-07, + "loss": 0.744202733039856, + "step": 6686 + }, + { + "epoch": 1.540783410138249, + "grad_norm": 1.279582503351568, + "learning_rate": 2.749447801237319e-07, + "loss": 0.7685158848762512, + "step": 6687 + }, + { + "epoch": 1.5410138248847927, + "grad_norm": 1.4134776465364736, + "learning_rate": 2.7468245271738865e-07, + "loss": 0.7483633756637573, + "step": 6688 + }, + { + "epoch": 1.5412442396313364, + "grad_norm": 1.4452963556936742, + "learning_rate": 2.7442023058992746e-07, + "loss": 0.8967286348342896, + "step": 6689 + }, + { + "epoch": 1.5414746543778803, + "grad_norm": 3.4447797406152922, + "learning_rate": 2.7415811377940933e-07, + "loss": 0.8035085201263428, + "step": 6690 + }, + { + "epoch": 1.5417050691244238, + "grad_norm": 1.2535208224880003, + "learning_rate": 2.738961023238798e-07, + "loss": 0.8504149913787842, + "step": 6691 + }, + { + "epoch": 1.5419354838709678, + "grad_norm": 1.408249398601243, + "learning_rate": 2.736341962613701e-07, + "loss": 0.7612431049346924, + "step": 6692 + }, + { + "epoch": 1.5421658986175115, + "grad_norm": 1.3117649202054886, + "learning_rate": 2.733723956298951e-07, + "loss": 0.6974390745162964, + "step": 6693 + }, + { + "epoch": 1.5423963133640552, + "grad_norm": 1.10015572050179, + "learning_rate": 2.7311070046745476e-07, + "loss": 0.7946817874908447, + "step": 6694 + }, + { + "epoch": 1.5426267281105992, + "grad_norm": 1.3598767034128523, + "learning_rate": 2.728491108120342e-07, + "loss": 0.7801793813705444, + "step": 6695 + }, + { + "epoch": 1.5428571428571427, + "grad_norm": 1.0989233619042245, + "learning_rate": 2.725876267016023e-07, + "loss": 0.720335066318512, + "step": 6696 + }, + { + "epoch": 1.5430875576036867, + "grad_norm": 0.9331707903973574, + "learning_rate": 2.7232624817411376e-07, + "loss": 0.6820393800735474, + "step": 6697 + }, + { + "epoch": 1.5433179723502304, + "grad_norm": 1.2636082158419006, + "learning_rate": 2.7206497526750694e-07, + "loss": 0.8217613697052002, + "step": 6698 + }, + { + "epoch": 1.543548387096774, + "grad_norm": 1.2388683954169015, + "learning_rate": 2.7180380801970525e-07, + "loss": 0.7600520849227905, + "step": 6699 + }, + { + "epoch": 1.543778801843318, + "grad_norm": 1.2564669684453122, + "learning_rate": 2.7154274646861687e-07, + "loss": 0.9402344226837158, + "step": 6700 + }, + { + "epoch": 1.5440092165898618, + "grad_norm": 1.0720415723340906, + "learning_rate": 2.7128179065213417e-07, + "loss": 0.7470760345458984, + "step": 6701 + }, + { + "epoch": 1.5442396313364055, + "grad_norm": 1.0091593723711232, + "learning_rate": 2.710209406081353e-07, + "loss": 0.6915948390960693, + "step": 6702 + }, + { + "epoch": 1.5444700460829495, + "grad_norm": 1.1829806437851378, + "learning_rate": 2.707601963744817e-07, + "loss": 0.7554904222488403, + "step": 6703 + }, + { + "epoch": 1.544700460829493, + "grad_norm": 0.9892324198221251, + "learning_rate": 2.7049955798902026e-07, + "loss": 0.8197575807571411, + "step": 6704 + }, + { + "epoch": 1.544930875576037, + "grad_norm": 1.3144339350992138, + "learning_rate": 2.702390254895819e-07, + "loss": 0.7106794118881226, + "step": 6705 + }, + { + "epoch": 1.5451612903225806, + "grad_norm": 1.1715761852419602, + "learning_rate": 2.699785989139832e-07, + "loss": 0.6320512294769287, + "step": 6706 + }, + { + "epoch": 1.5453917050691244, + "grad_norm": 1.2156391686389374, + "learning_rate": 2.697182783000246e-07, + "loss": 0.8327566385269165, + "step": 6707 + }, + { + "epoch": 1.5456221198156683, + "grad_norm": 1.2605126330062313, + "learning_rate": 2.6945806368549063e-07, + "loss": 0.8732178211212158, + "step": 6708 + }, + { + "epoch": 1.5458525345622118, + "grad_norm": 1.3881676599881438, + "learning_rate": 2.69197955108152e-07, + "loss": 0.8709380626678467, + "step": 6709 + }, + { + "epoch": 1.5460829493087558, + "grad_norm": 1.2029107229444744, + "learning_rate": 2.689379526057628e-07, + "loss": 0.7821739912033081, + "step": 6710 + }, + { + "epoch": 1.5463133640552995, + "grad_norm": 1.2268892680878298, + "learning_rate": 2.686780562160615e-07, + "loss": 0.8658162355422974, + "step": 6711 + }, + { + "epoch": 1.5465437788018432, + "grad_norm": 0.9914521746084854, + "learning_rate": 2.6841826597677274e-07, + "loss": 0.6354731321334839, + "step": 6712 + }, + { + "epoch": 1.5467741935483872, + "grad_norm": 1.132983970089502, + "learning_rate": 2.68158581925604e-07, + "loss": 0.8000082969665527, + "step": 6713 + }, + { + "epoch": 1.547004608294931, + "grad_norm": 1.0140012222754493, + "learning_rate": 2.6789900410024804e-07, + "loss": 0.7998030185699463, + "step": 6714 + }, + { + "epoch": 1.5472350230414746, + "grad_norm": 1.2207312006862205, + "learning_rate": 2.676395325383827e-07, + "loss": 0.861609935760498, + "step": 6715 + }, + { + "epoch": 1.5474654377880186, + "grad_norm": 1.2739007648131329, + "learning_rate": 2.6738016727766976e-07, + "loss": 0.8119577765464783, + "step": 6716 + }, + { + "epoch": 1.547695852534562, + "grad_norm": 1.1272023201701244, + "learning_rate": 2.671209083557553e-07, + "loss": 0.7704594135284424, + "step": 6717 + }, + { + "epoch": 1.547926267281106, + "grad_norm": 1.1924986504981143, + "learning_rate": 2.6686175581027114e-07, + "loss": 0.7577236890792847, + "step": 6718 + }, + { + "epoch": 1.5481566820276498, + "grad_norm": 1.438095427566863, + "learning_rate": 2.666027096788326e-07, + "loss": 0.8362265825271606, + "step": 6719 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 1.3282450269784174, + "learning_rate": 2.6634376999903984e-07, + "loss": 0.7604315280914307, + "step": 6720 + }, + { + "epoch": 1.5486175115207375, + "grad_norm": 1.0996855935996066, + "learning_rate": 2.6608493680847757e-07, + "loss": 0.7181323766708374, + "step": 6721 + }, + { + "epoch": 1.548847926267281, + "grad_norm": 1.408245929611007, + "learning_rate": 2.6582621014471495e-07, + "loss": 0.8613896369934082, + "step": 6722 + }, + { + "epoch": 1.549078341013825, + "grad_norm": 1.1355853758662044, + "learning_rate": 2.6556759004530616e-07, + "loss": 0.6254151463508606, + "step": 6723 + }, + { + "epoch": 1.5493087557603686, + "grad_norm": 1.1737642272227355, + "learning_rate": 2.6530907654778957e-07, + "loss": 0.7960973381996155, + "step": 6724 + }, + { + "epoch": 1.5495391705069124, + "grad_norm": 1.1419390810119388, + "learning_rate": 2.6505066968968747e-07, + "loss": 0.7899094820022583, + "step": 6725 + }, + { + "epoch": 1.5497695852534563, + "grad_norm": 0.9820941780775652, + "learning_rate": 2.647923695085081e-07, + "loss": 0.6578950881958008, + "step": 6726 + }, + { + "epoch": 1.55, + "grad_norm": 1.3013325638388529, + "learning_rate": 2.64534176041743e-07, + "loss": 0.737798810005188, + "step": 6727 + }, + { + "epoch": 1.5502304147465438, + "grad_norm": 0.9487414790323747, + "learning_rate": 2.642760893268684e-07, + "loss": 0.7809627056121826, + "step": 6728 + }, + { + "epoch": 1.5504608294930877, + "grad_norm": 0.9991258167716155, + "learning_rate": 2.640181094013456e-07, + "loss": 0.6693655252456665, + "step": 6729 + }, + { + "epoch": 1.5506912442396312, + "grad_norm": 0.8705752911958233, + "learning_rate": 2.6376023630262003e-07, + "loss": 0.7264609932899475, + "step": 6730 + }, + { + "epoch": 1.5509216589861752, + "grad_norm": 1.0975251127061347, + "learning_rate": 2.635024700681211e-07, + "loss": 0.7585712671279907, + "step": 6731 + }, + { + "epoch": 1.551152073732719, + "grad_norm": 1.520332751892112, + "learning_rate": 2.6324481073526404e-07, + "loss": 0.7335324287414551, + "step": 6732 + }, + { + "epoch": 1.5513824884792626, + "grad_norm": 1.1271215778218124, + "learning_rate": 2.629872583414473e-07, + "loss": 0.835372805595398, + "step": 6733 + }, + { + "epoch": 1.5516129032258066, + "grad_norm": 1.231737661164668, + "learning_rate": 2.6272981292405405e-07, + "loss": 0.8069926500320435, + "step": 6734 + }, + { + "epoch": 1.55184331797235, + "grad_norm": 1.2110282300687614, + "learning_rate": 2.6247247452045285e-07, + "loss": 0.7548434138298035, + "step": 6735 + }, + { + "epoch": 1.552073732718894, + "grad_norm": 1.281837931597139, + "learning_rate": 2.6221524316799546e-07, + "loss": 0.6907505989074707, + "step": 6736 + }, + { + "epoch": 1.5523041474654378, + "grad_norm": 1.2384070012918627, + "learning_rate": 2.619581189040185e-07, + "loss": 0.8544988632202148, + "step": 6737 + }, + { + "epoch": 1.5525345622119815, + "grad_norm": 1.024260684065218, + "learning_rate": 2.6170110176584404e-07, + "loss": 0.7176710367202759, + "step": 6738 + }, + { + "epoch": 1.5527649769585254, + "grad_norm": 1.1771656195687117, + "learning_rate": 2.6144419179077715e-07, + "loss": 0.7160323858261108, + "step": 6739 + }, + { + "epoch": 1.5529953917050692, + "grad_norm": 1.2619778254885654, + "learning_rate": 2.6118738901610806e-07, + "loss": 0.7749248743057251, + "step": 6740 + }, + { + "epoch": 1.553225806451613, + "grad_norm": 1.3014936029444653, + "learning_rate": 2.6093069347911145e-07, + "loss": 0.7701436281204224, + "step": 6741 + }, + { + "epoch": 1.5534562211981566, + "grad_norm": 1.2206842608778186, + "learning_rate": 2.606741052170459e-07, + "loss": 0.6725181341171265, + "step": 6742 + }, + { + "epoch": 1.5536866359447004, + "grad_norm": 1.0193653205430255, + "learning_rate": 2.6041762426715563e-07, + "loss": 0.7730624675750732, + "step": 6743 + }, + { + "epoch": 1.5539170506912443, + "grad_norm": 0.9417911057706564, + "learning_rate": 2.601612506666682e-07, + "loss": 0.7083867788314819, + "step": 6744 + }, + { + "epoch": 1.554147465437788, + "grad_norm": 1.1436343405561136, + "learning_rate": 2.599049844527953e-07, + "loss": 0.7680408954620361, + "step": 6745 + }, + { + "epoch": 1.5543778801843318, + "grad_norm": 0.9401611092461176, + "learning_rate": 2.596488256627346e-07, + "loss": 0.7145194411277771, + "step": 6746 + }, + { + "epoch": 1.5546082949308757, + "grad_norm": 1.6305632532659482, + "learning_rate": 2.593927743336667e-07, + "loss": 0.8626812696456909, + "step": 6747 + }, + { + "epoch": 1.5548387096774192, + "grad_norm": 1.1326626029703477, + "learning_rate": 2.591368305027569e-07, + "loss": 0.775201678276062, + "step": 6748 + }, + { + "epoch": 1.5550691244239632, + "grad_norm": 1.1775115850016065, + "learning_rate": 2.588809942071557e-07, + "loss": 0.9363858699798584, + "step": 6749 + }, + { + "epoch": 1.555299539170507, + "grad_norm": 1.0406152793499837, + "learning_rate": 2.5862526548399697e-07, + "loss": 0.8079385757446289, + "step": 6750 + }, + { + "epoch": 1.5555299539170506, + "grad_norm": 1.2405408742249928, + "learning_rate": 2.5836964437039934e-07, + "loss": 0.8635082840919495, + "step": 6751 + }, + { + "epoch": 1.5557603686635946, + "grad_norm": 1.072904507718934, + "learning_rate": 2.581141309034662e-07, + "loss": 0.7840827703475952, + "step": 6752 + }, + { + "epoch": 1.5559907834101383, + "grad_norm": 1.202200191511419, + "learning_rate": 2.5785872512028497e-07, + "loss": 0.7833336591720581, + "step": 6753 + }, + { + "epoch": 1.556221198156682, + "grad_norm": 1.2301348726534915, + "learning_rate": 2.576034270579269e-07, + "loss": 0.7340226173400879, + "step": 6754 + }, + { + "epoch": 1.5564516129032258, + "grad_norm": 0.9782804135142905, + "learning_rate": 2.5734823675344895e-07, + "loss": 0.6423541307449341, + "step": 6755 + }, + { + "epoch": 1.5566820276497695, + "grad_norm": 1.1992594758940591, + "learning_rate": 2.570931542438913e-07, + "loss": 0.7772454619407654, + "step": 6756 + }, + { + "epoch": 1.5569124423963134, + "grad_norm": 1.192101331643462, + "learning_rate": 2.568381795662785e-07, + "loss": 0.8113390803337097, + "step": 6757 + }, + { + "epoch": 1.5571428571428572, + "grad_norm": 1.1257023205339645, + "learning_rate": 2.5658331275762045e-07, + "loss": 0.6688467264175415, + "step": 6758 + }, + { + "epoch": 1.557373271889401, + "grad_norm": 1.0966214019602503, + "learning_rate": 2.5632855385491037e-07, + "loss": 0.8140766620635986, + "step": 6759 + }, + { + "epoch": 1.5576036866359448, + "grad_norm": 1.0260387911312179, + "learning_rate": 2.560739028951262e-07, + "loss": 0.7661154270172119, + "step": 6760 + }, + { + "epoch": 1.5578341013824883, + "grad_norm": 1.2298722431512563, + "learning_rate": 2.558193599152302e-07, + "loss": 0.6781749725341797, + "step": 6761 + }, + { + "epoch": 1.5580645161290323, + "grad_norm": 1.62266115954538, + "learning_rate": 2.5556492495216865e-07, + "loss": 0.8885331749916077, + "step": 6762 + }, + { + "epoch": 1.558294930875576, + "grad_norm": 1.3197551931331304, + "learning_rate": 2.55310598042873e-07, + "loss": 0.799277663230896, + "step": 6763 + }, + { + "epoch": 1.5585253456221198, + "grad_norm": 1.205426943239231, + "learning_rate": 2.550563792242583e-07, + "loss": 0.8288404941558838, + "step": 6764 + }, + { + "epoch": 1.5587557603686637, + "grad_norm": 1.1206026594489704, + "learning_rate": 2.5480226853322397e-07, + "loss": 0.9452340602874756, + "step": 6765 + }, + { + "epoch": 1.5589861751152074, + "grad_norm": 1.068059951967386, + "learning_rate": 2.5454826600665347e-07, + "loss": 0.6716231107711792, + "step": 6766 + }, + { + "epoch": 1.5592165898617512, + "grad_norm": 0.9885922984637816, + "learning_rate": 2.542943716814157e-07, + "loss": 0.90239417552948, + "step": 6767 + }, + { + "epoch": 1.5594470046082949, + "grad_norm": 1.306788685526263, + "learning_rate": 2.5404058559436225e-07, + "loss": 0.7895521521568298, + "step": 6768 + }, + { + "epoch": 1.5596774193548386, + "grad_norm": 1.1707304874415911, + "learning_rate": 2.537869077823307e-07, + "loss": 0.8097352385520935, + "step": 6769 + }, + { + "epoch": 1.5599078341013826, + "grad_norm": 1.2075274904697726, + "learning_rate": 2.535333382821415e-07, + "loss": 0.7599455118179321, + "step": 6770 + }, + { + "epoch": 1.5601382488479263, + "grad_norm": 1.3869678105449568, + "learning_rate": 2.5327987713059986e-07, + "loss": 0.8735921382904053, + "step": 6771 + }, + { + "epoch": 1.56036866359447, + "grad_norm": 1.3359870563601237, + "learning_rate": 2.530265243644958e-07, + "loss": 0.7263825535774231, + "step": 6772 + }, + { + "epoch": 1.560599078341014, + "grad_norm": 1.2240386443766704, + "learning_rate": 2.5277328002060296e-07, + "loss": 0.8642966747283936, + "step": 6773 + }, + { + "epoch": 1.5608294930875575, + "grad_norm": 0.9860249164323385, + "learning_rate": 2.525201441356789e-07, + "loss": 0.6928948163986206, + "step": 6774 + }, + { + "epoch": 1.5610599078341014, + "grad_norm": 1.0977742625281808, + "learning_rate": 2.522671167464667e-07, + "loss": 0.7841427326202393, + "step": 6775 + }, + { + "epoch": 1.5612903225806452, + "grad_norm": 1.102415991736206, + "learning_rate": 2.5201419788969267e-07, + "loss": 0.6539766192436218, + "step": 6776 + }, + { + "epoch": 1.5615207373271889, + "grad_norm": 1.3087388284847004, + "learning_rate": 2.5176138760206734e-07, + "loss": 0.7817956805229187, + "step": 6777 + }, + { + "epoch": 1.5617511520737328, + "grad_norm": 1.2176256508295467, + "learning_rate": 2.5150868592028626e-07, + "loss": 0.7847198843955994, + "step": 6778 + }, + { + "epoch": 1.5619815668202763, + "grad_norm": 1.1618742898915668, + "learning_rate": 2.5125609288102856e-07, + "loss": 0.8248952627182007, + "step": 6779 + }, + { + "epoch": 1.5622119815668203, + "grad_norm": 1.315546210832164, + "learning_rate": 2.510036085209578e-07, + "loss": 0.8099820613861084, + "step": 6780 + }, + { + "epoch": 1.562442396313364, + "grad_norm": 1.0605499181430498, + "learning_rate": 2.5075123287672173e-07, + "loss": 0.7764754295349121, + "step": 6781 + }, + { + "epoch": 1.5626728110599077, + "grad_norm": 1.376636441531992, + "learning_rate": 2.5049896598495234e-07, + "loss": 0.8055214285850525, + "step": 6782 + }, + { + "epoch": 1.5629032258064517, + "grad_norm": 1.2086842805836235, + "learning_rate": 2.502468078822656e-07, + "loss": 0.7536123991012573, + "step": 6783 + }, + { + "epoch": 1.5631336405529954, + "grad_norm": 1.3330366284043236, + "learning_rate": 2.499947586052623e-07, + "loss": 0.8212461471557617, + "step": 6784 + }, + { + "epoch": 1.5633640552995391, + "grad_norm": 1.087165735027238, + "learning_rate": 2.49742818190527e-07, + "loss": 0.7297977209091187, + "step": 6785 + }, + { + "epoch": 1.563594470046083, + "grad_norm": 1.3633339944793545, + "learning_rate": 2.494909866746282e-07, + "loss": 0.752082109451294, + "step": 6786 + }, + { + "epoch": 1.5638248847926266, + "grad_norm": 1.231077416550479, + "learning_rate": 2.4923926409411934e-07, + "loss": 0.9181928634643555, + "step": 6787 + }, + { + "epoch": 1.5640552995391706, + "grad_norm": 1.263799738870316, + "learning_rate": 2.489876504855374e-07, + "loss": 0.8607058525085449, + "step": 6788 + }, + { + "epoch": 1.5642857142857143, + "grad_norm": 1.4864085600196295, + "learning_rate": 2.4873614588540347e-07, + "loss": 0.9659625887870789, + "step": 6789 + }, + { + "epoch": 1.564516129032258, + "grad_norm": 1.7132447669994355, + "learning_rate": 2.4848475033022377e-07, + "loss": 0.8357822299003601, + "step": 6790 + }, + { + "epoch": 1.564746543778802, + "grad_norm": 1.4493565138453182, + "learning_rate": 2.482334638564877e-07, + "loss": 0.7871281504631042, + "step": 6791 + }, + { + "epoch": 1.5649769585253455, + "grad_norm": 0.9644716518923556, + "learning_rate": 2.4798228650066874e-07, + "loss": 0.7221591472625732, + "step": 6792 + }, + { + "epoch": 1.5652073732718894, + "grad_norm": 1.217051022182652, + "learning_rate": 2.4773121829922586e-07, + "loss": 0.7399123907089233, + "step": 6793 + }, + { + "epoch": 1.5654377880184331, + "grad_norm": 1.6036073035934815, + "learning_rate": 2.474802592886003e-07, + "loss": 0.8159279227256775, + "step": 6794 + }, + { + "epoch": 1.5656682027649769, + "grad_norm": 1.208678395846015, + "learning_rate": 2.472294095052192e-07, + "loss": 0.8222753405570984, + "step": 6795 + }, + { + "epoch": 1.5658986175115208, + "grad_norm": 1.0411919729384558, + "learning_rate": 2.469786689854928e-07, + "loss": 0.6586673259735107, + "step": 6796 + }, + { + "epoch": 1.5661290322580645, + "grad_norm": 1.0728597460775429, + "learning_rate": 2.467280377658154e-07, + "loss": 0.8361790180206299, + "step": 6797 + }, + { + "epoch": 1.5663594470046083, + "grad_norm": 1.2928413385952742, + "learning_rate": 2.464775158825665e-07, + "loss": 0.7669099569320679, + "step": 6798 + }, + { + "epoch": 1.5665898617511522, + "grad_norm": 1.331214255352709, + "learning_rate": 2.462271033721086e-07, + "loss": 0.7876452207565308, + "step": 6799 + }, + { + "epoch": 1.5668202764976957, + "grad_norm": 1.2617656160077577, + "learning_rate": 2.459768002707887e-07, + "loss": 0.7932916879653931, + "step": 6800 + }, + { + "epoch": 1.5670506912442397, + "grad_norm": 1.1101874723309544, + "learning_rate": 2.457266066149382e-07, + "loss": 0.734020471572876, + "step": 6801 + }, + { + "epoch": 1.5672811059907834, + "grad_norm": 1.2001011742733312, + "learning_rate": 2.4547652244087216e-07, + "loss": 0.6975284814834595, + "step": 6802 + }, + { + "epoch": 1.5675115207373271, + "grad_norm": 1.213830843525294, + "learning_rate": 2.452265477848896e-07, + "loss": 0.7214465737342834, + "step": 6803 + }, + { + "epoch": 1.567741935483871, + "grad_norm": 1.1586033079782525, + "learning_rate": 2.4497668268327485e-07, + "loss": 0.8645110130310059, + "step": 6804 + }, + { + "epoch": 1.5679723502304146, + "grad_norm": 1.0991857687698348, + "learning_rate": 2.4472692717229504e-07, + "loss": 0.7389887571334839, + "step": 6805 + }, + { + "epoch": 1.5682027649769585, + "grad_norm": 1.206958266137894, + "learning_rate": 2.4447728128820165e-07, + "loss": 0.8462876081466675, + "step": 6806 + }, + { + "epoch": 1.5684331797235023, + "grad_norm": 1.2507487710365972, + "learning_rate": 2.44227745067231e-07, + "loss": 0.824936032295227, + "step": 6807 + }, + { + "epoch": 1.568663594470046, + "grad_norm": 1.2566804457387248, + "learning_rate": 2.439783185456027e-07, + "loss": 0.8516823053359985, + "step": 6808 + }, + { + "epoch": 1.56889400921659, + "grad_norm": 1.065798809017728, + "learning_rate": 2.4372900175952015e-07, + "loss": 0.6154674291610718, + "step": 6809 + }, + { + "epoch": 1.5691244239631337, + "grad_norm": 1.2816681742105784, + "learning_rate": 2.434797947451722e-07, + "loss": 0.7769260406494141, + "step": 6810 + }, + { + "epoch": 1.5693548387096774, + "grad_norm": 1.2232245245328917, + "learning_rate": 2.432306975387306e-07, + "loss": 0.9525332450866699, + "step": 6811 + }, + { + "epoch": 1.5695852534562214, + "grad_norm": 1.3409057347397177, + "learning_rate": 2.429817101763511e-07, + "loss": 0.7537581920623779, + "step": 6812 + }, + { + "epoch": 1.5698156682027649, + "grad_norm": 1.3548012775304474, + "learning_rate": 2.427328326941744e-07, + "loss": 0.814711332321167, + "step": 6813 + }, + { + "epoch": 1.5700460829493088, + "grad_norm": 1.3820372699413255, + "learning_rate": 2.4248406512832466e-07, + "loss": 0.708736777305603, + "step": 6814 + }, + { + "epoch": 1.5702764976958525, + "grad_norm": 1.1061554332755352, + "learning_rate": 2.422354075149098e-07, + "loss": 0.6757712960243225, + "step": 6815 + }, + { + "epoch": 1.5705069124423963, + "grad_norm": 1.0865188505414496, + "learning_rate": 2.4198685989002257e-07, + "loss": 0.736266553401947, + "step": 6816 + }, + { + "epoch": 1.5707373271889402, + "grad_norm": 1.1180343138508952, + "learning_rate": 2.417384222897392e-07, + "loss": 0.7423173189163208, + "step": 6817 + }, + { + "epoch": 1.5709677419354837, + "grad_norm": 1.2076049425001651, + "learning_rate": 2.414900947501197e-07, + "loss": 0.7260550260543823, + "step": 6818 + }, + { + "epoch": 1.5711981566820277, + "grad_norm": 1.241277027009942, + "learning_rate": 2.4124187730720915e-07, + "loss": 0.7125939130783081, + "step": 6819 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 1.1330555560067848, + "learning_rate": 2.409937699970356e-07, + "loss": 0.7429558634757996, + "step": 6820 + }, + { + "epoch": 1.5716589861751151, + "grad_norm": 1.1709438494600335, + "learning_rate": 2.407457728556115e-07, + "loss": 0.7166736721992493, + "step": 6821 + }, + { + "epoch": 1.571889400921659, + "grad_norm": 1.1783418664080478, + "learning_rate": 2.4049788591893336e-07, + "loss": 0.7438491582870483, + "step": 6822 + }, + { + "epoch": 1.5721198156682028, + "grad_norm": 1.3579191422740273, + "learning_rate": 2.402501092229814e-07, + "loss": 0.8031798601150513, + "step": 6823 + }, + { + "epoch": 1.5723502304147465, + "grad_norm": 1.3256875261480106, + "learning_rate": 2.400024428037206e-07, + "loss": 0.7067087888717651, + "step": 6824 + }, + { + "epoch": 1.5725806451612905, + "grad_norm": 1.1524386121511956, + "learning_rate": 2.3975488669709906e-07, + "loss": 0.7147783041000366, + "step": 6825 + }, + { + "epoch": 1.572811059907834, + "grad_norm": 1.2529979656124484, + "learning_rate": 2.395074409390491e-07, + "loss": 0.8534795641899109, + "step": 6826 + }, + { + "epoch": 1.573041474654378, + "grad_norm": 1.0527069171574706, + "learning_rate": 2.392601055654875e-07, + "loss": 0.7630984783172607, + "step": 6827 + }, + { + "epoch": 1.5732718894009217, + "grad_norm": 1.3268090351372508, + "learning_rate": 2.390128806123145e-07, + "loss": 0.9395428895950317, + "step": 6828 + }, + { + "epoch": 1.5735023041474654, + "grad_norm": 1.1905263432335205, + "learning_rate": 2.3876576611541423e-07, + "loss": 0.7086023092269897, + "step": 6829 + }, + { + "epoch": 1.5737327188940093, + "grad_norm": 1.0320188306367468, + "learning_rate": 2.385187621106555e-07, + "loss": 0.6937201619148254, + "step": 6830 + }, + { + "epoch": 1.5739631336405528, + "grad_norm": 1.1238131407833931, + "learning_rate": 2.3827186863389037e-07, + "loss": 0.7339247465133667, + "step": 6831 + }, + { + "epoch": 1.5741935483870968, + "grad_norm": 0.9948868064813976, + "learning_rate": 2.3802508572095493e-07, + "loss": 0.8453131318092346, + "step": 6832 + }, + { + "epoch": 1.5744239631336405, + "grad_norm": 1.2870129222879585, + "learning_rate": 2.377784134076698e-07, + "loss": 0.7303619384765625, + "step": 6833 + }, + { + "epoch": 1.5746543778801843, + "grad_norm": 1.1663952236638828, + "learning_rate": 2.3753185172983893e-07, + "loss": 0.9635858535766602, + "step": 6834 + }, + { + "epoch": 1.5748847926267282, + "grad_norm": 0.9711435467160289, + "learning_rate": 2.3728540072324998e-07, + "loss": 0.7174761295318604, + "step": 6835 + }, + { + "epoch": 1.575115207373272, + "grad_norm": 1.0168865512931398, + "learning_rate": 2.3703906042367584e-07, + "loss": 0.7375633716583252, + "step": 6836 + }, + { + "epoch": 1.5753456221198157, + "grad_norm": 1.0569071581049987, + "learning_rate": 2.3679283086687206e-07, + "loss": 0.8202652931213379, + "step": 6837 + }, + { + "epoch": 1.5755760368663596, + "grad_norm": 1.4428887155533328, + "learning_rate": 2.3654671208857823e-07, + "loss": 0.8448499441146851, + "step": 6838 + }, + { + "epoch": 1.5758064516129031, + "grad_norm": 1.3297185542360797, + "learning_rate": 2.3630070412451864e-07, + "loss": 0.7840893268585205, + "step": 6839 + }, + { + "epoch": 1.576036866359447, + "grad_norm": 1.1930310177318706, + "learning_rate": 2.3605480701040092e-07, + "loss": 0.8036940693855286, + "step": 6840 + }, + { + "epoch": 1.5762672811059908, + "grad_norm": 1.2730513650169084, + "learning_rate": 2.3580902078191666e-07, + "loss": 0.8333625793457031, + "step": 6841 + }, + { + "epoch": 1.5764976958525345, + "grad_norm": 1.3288439351572012, + "learning_rate": 2.3556334547474133e-07, + "loss": 0.804919958114624, + "step": 6842 + }, + { + "epoch": 1.5767281105990785, + "grad_norm": 1.2605928054638793, + "learning_rate": 2.3531778112453416e-07, + "loss": 0.752541720867157, + "step": 6843 + }, + { + "epoch": 1.576958525345622, + "grad_norm": 1.0378289852617786, + "learning_rate": 2.3507232776693896e-07, + "loss": 0.647051215171814, + "step": 6844 + }, + { + "epoch": 1.577188940092166, + "grad_norm": 1.1139826400416593, + "learning_rate": 2.3482698543758285e-07, + "loss": 0.7546517848968506, + "step": 6845 + }, + { + "epoch": 1.5774193548387097, + "grad_norm": 1.0118514872509952, + "learning_rate": 2.345817541720766e-07, + "loss": 0.8773425817489624, + "step": 6846 + }, + { + "epoch": 1.5776497695852534, + "grad_norm": 1.1485612061840695, + "learning_rate": 2.3433663400601567e-07, + "loss": 0.9538160562515259, + "step": 6847 + }, + { + "epoch": 1.5778801843317973, + "grad_norm": 1.0298677066929223, + "learning_rate": 2.340916249749787e-07, + "loss": 0.6275157332420349, + "step": 6848 + }, + { + "epoch": 1.578110599078341, + "grad_norm": 1.1889533964841936, + "learning_rate": 2.3384672711452812e-07, + "loss": 0.7729284167289734, + "step": 6849 + }, + { + "epoch": 1.5783410138248848, + "grad_norm": 1.4210079123943715, + "learning_rate": 2.3360194046021108e-07, + "loss": 0.8361644148826599, + "step": 6850 + }, + { + "epoch": 1.5785714285714287, + "grad_norm": 1.2305172757518368, + "learning_rate": 2.3335726504755793e-07, + "loss": 0.6782940626144409, + "step": 6851 + }, + { + "epoch": 1.5788018433179722, + "grad_norm": 1.3612688278959233, + "learning_rate": 2.3311270091208256e-07, + "loss": 0.8036615252494812, + "step": 6852 + }, + { + "epoch": 1.5790322580645162, + "grad_norm": 1.1729176601878941, + "learning_rate": 2.3286824808928362e-07, + "loss": 0.8450125455856323, + "step": 6853 + }, + { + "epoch": 1.57926267281106, + "grad_norm": 1.2162582175159786, + "learning_rate": 2.3262390661464303e-07, + "loss": 0.6546198725700378, + "step": 6854 + }, + { + "epoch": 1.5794930875576036, + "grad_norm": 1.4056383803669428, + "learning_rate": 2.3237967652362612e-07, + "loss": 0.8201385140419006, + "step": 6855 + }, + { + "epoch": 1.5797235023041476, + "grad_norm": 1.3504561324932176, + "learning_rate": 2.3213555785168336e-07, + "loss": 0.8753508925437927, + "step": 6856 + }, + { + "epoch": 1.579953917050691, + "grad_norm": 1.2672866740553073, + "learning_rate": 2.3189155063424782e-07, + "loss": 0.5884093642234802, + "step": 6857 + }, + { + "epoch": 1.580184331797235, + "grad_norm": 1.0135145180947078, + "learning_rate": 2.3164765490673654e-07, + "loss": 0.6494029760360718, + "step": 6858 + }, + { + "epoch": 1.5804147465437788, + "grad_norm": 1.1478304397345402, + "learning_rate": 2.3140387070455126e-07, + "loss": 0.7407097220420837, + "step": 6859 + }, + { + "epoch": 1.5806451612903225, + "grad_norm": 1.3351942864944542, + "learning_rate": 2.3116019806307673e-07, + "loss": 0.8934177160263062, + "step": 6860 + }, + { + "epoch": 1.5808755760368665, + "grad_norm": 1.0696222163552975, + "learning_rate": 2.309166370176816e-07, + "loss": 0.7487956881523132, + "step": 6861 + }, + { + "epoch": 1.5811059907834102, + "grad_norm": 1.316829236490256, + "learning_rate": 2.3067318760371845e-07, + "loss": 0.7744357585906982, + "step": 6862 + }, + { + "epoch": 1.581336405529954, + "grad_norm": 1.3202738468289819, + "learning_rate": 2.304298498565237e-07, + "loss": 0.8871743679046631, + "step": 6863 + }, + { + "epoch": 1.5815668202764976, + "grad_norm": 1.3064493000042272, + "learning_rate": 2.3018662381141717e-07, + "loss": 0.7865666151046753, + "step": 6864 + }, + { + "epoch": 1.5817972350230414, + "grad_norm": 1.4125222114326161, + "learning_rate": 2.2994350950370334e-07, + "loss": 0.8416531682014465, + "step": 6865 + }, + { + "epoch": 1.5820276497695853, + "grad_norm": 1.5275008378701445, + "learning_rate": 2.2970050696866972e-07, + "loss": 0.8443950414657593, + "step": 6866 + }, + { + "epoch": 1.582258064516129, + "grad_norm": 1.4005476364990852, + "learning_rate": 2.2945761624158756e-07, + "loss": 0.7770054340362549, + "step": 6867 + }, + { + "epoch": 1.5824884792626728, + "grad_norm": 1.2159355438440163, + "learning_rate": 2.2921483735771252e-07, + "loss": 0.7263047695159912, + "step": 6868 + }, + { + "epoch": 1.5827188940092167, + "grad_norm": 1.3958985609002883, + "learning_rate": 2.2897217035228312e-07, + "loss": 0.8288376927375793, + "step": 6869 + }, + { + "epoch": 1.5829493087557602, + "grad_norm": 1.2630380344196672, + "learning_rate": 2.2872961526052292e-07, + "loss": 0.8325462937355042, + "step": 6870 + }, + { + "epoch": 1.5831797235023042, + "grad_norm": 1.317005879944655, + "learning_rate": 2.284871721176379e-07, + "loss": 0.7412815093994141, + "step": 6871 + }, + { + "epoch": 1.583410138248848, + "grad_norm": 1.2813974132427688, + "learning_rate": 2.2824484095881823e-07, + "loss": 0.8958117961883545, + "step": 6872 + }, + { + "epoch": 1.5836405529953916, + "grad_norm": 1.228628782021168, + "learning_rate": 2.2800262181923858e-07, + "loss": 0.8374444246292114, + "step": 6873 + }, + { + "epoch": 1.5838709677419356, + "grad_norm": 1.2394995315660131, + "learning_rate": 2.2776051473405634e-07, + "loss": 0.7900353670120239, + "step": 6874 + }, + { + "epoch": 1.5841013824884793, + "grad_norm": 1.129671125708823, + "learning_rate": 2.2751851973841285e-07, + "loss": 0.7420408725738525, + "step": 6875 + }, + { + "epoch": 1.584331797235023, + "grad_norm": 1.3245275433928243, + "learning_rate": 2.2727663686743382e-07, + "loss": 0.8902314305305481, + "step": 6876 + }, + { + "epoch": 1.5845622119815668, + "grad_norm": 1.2122656586799572, + "learning_rate": 2.27034866156228e-07, + "loss": 0.739869236946106, + "step": 6877 + }, + { + "epoch": 1.5847926267281105, + "grad_norm": 1.169654737499052, + "learning_rate": 2.2679320763988775e-07, + "loss": 0.8340646624565125, + "step": 6878 + }, + { + "epoch": 1.5850230414746544, + "grad_norm": 1.3076425110312813, + "learning_rate": 2.2655166135349013e-07, + "loss": 0.7501030564308167, + "step": 6879 + }, + { + "epoch": 1.5852534562211982, + "grad_norm": 1.0619799072208593, + "learning_rate": 2.2631022733209504e-07, + "loss": 0.722623348236084, + "step": 6880 + }, + { + "epoch": 1.585483870967742, + "grad_norm": 1.4046404033814042, + "learning_rate": 2.260689056107461e-07, + "loss": 0.8319696187973022, + "step": 6881 + }, + { + "epoch": 1.5857142857142859, + "grad_norm": 1.2824383261655956, + "learning_rate": 2.2582769622447107e-07, + "loss": 0.85502028465271, + "step": 6882 + }, + { + "epoch": 1.5859447004608294, + "grad_norm": 1.444500113904039, + "learning_rate": 2.2558659920828095e-07, + "loss": 0.7942626476287842, + "step": 6883 + }, + { + "epoch": 1.5861751152073733, + "grad_norm": 0.9346347634599198, + "learning_rate": 2.253456145971705e-07, + "loss": 0.6731030941009521, + "step": 6884 + }, + { + "epoch": 1.586405529953917, + "grad_norm": 1.2567565363582325, + "learning_rate": 2.2510474242611887e-07, + "loss": 0.8479423522949219, + "step": 6885 + }, + { + "epoch": 1.5866359447004608, + "grad_norm": 1.0824322707106273, + "learning_rate": 2.2486398273008812e-07, + "loss": 0.7398810386657715, + "step": 6886 + }, + { + "epoch": 1.5868663594470047, + "grad_norm": 1.4531636253389437, + "learning_rate": 2.246233355440238e-07, + "loss": 0.8422881364822388, + "step": 6887 + }, + { + "epoch": 1.5870967741935482, + "grad_norm": 1.4298247398214885, + "learning_rate": 2.2438280090285612e-07, + "loss": 0.8307279944419861, + "step": 6888 + }, + { + "epoch": 1.5873271889400922, + "grad_norm": 1.3280924437525041, + "learning_rate": 2.2414237884149821e-07, + "loss": 0.8329004049301147, + "step": 6889 + }, + { + "epoch": 1.587557603686636, + "grad_norm": 1.196093026387475, + "learning_rate": 2.2390206939484645e-07, + "loss": 0.801641583442688, + "step": 6890 + }, + { + "epoch": 1.5877880184331796, + "grad_norm": 1.359543687074451, + "learning_rate": 2.2366187259778235e-07, + "loss": 0.9850986003875732, + "step": 6891 + }, + { + "epoch": 1.5880184331797236, + "grad_norm": 1.2770195506897435, + "learning_rate": 2.2342178848516935e-07, + "loss": 0.7169715166091919, + "step": 6892 + }, + { + "epoch": 1.5882488479262673, + "grad_norm": 1.1258655345605515, + "learning_rate": 2.2318181709185603e-07, + "loss": 0.7509033679962158, + "step": 6893 + }, + { + "epoch": 1.588479262672811, + "grad_norm": 1.2429319924869415, + "learning_rate": 2.2294195845267348e-07, + "loss": 0.6974655985832214, + "step": 6894 + }, + { + "epoch": 1.588709677419355, + "grad_norm": 1.1949954122245936, + "learning_rate": 2.227022126024367e-07, + "loss": 0.7388278245925903, + "step": 6895 + }, + { + "epoch": 1.5889400921658985, + "grad_norm": 1.1219112420315915, + "learning_rate": 2.2246257957594506e-07, + "loss": 0.6479122638702393, + "step": 6896 + }, + { + "epoch": 1.5891705069124424, + "grad_norm": 1.2556673774557678, + "learning_rate": 2.222230594079807e-07, + "loss": 0.759338915348053, + "step": 6897 + }, + { + "epoch": 1.5894009216589862, + "grad_norm": 1.1747779352742982, + "learning_rate": 2.2198365213330937e-07, + "loss": 0.7299938201904297, + "step": 6898 + }, + { + "epoch": 1.58963133640553, + "grad_norm": 1.2072520940330866, + "learning_rate": 2.2174435778668122e-07, + "loss": 0.707555890083313, + "step": 6899 + }, + { + "epoch": 1.5898617511520738, + "grad_norm": 1.3083069601374675, + "learning_rate": 2.2150517640282918e-07, + "loss": 0.8311065435409546, + "step": 6900 + }, + { + "epoch": 1.5900921658986173, + "grad_norm": 1.1585381591481734, + "learning_rate": 2.2126610801647028e-07, + "loss": 0.6494649648666382, + "step": 6901 + }, + { + "epoch": 1.5903225806451613, + "grad_norm": 1.006735116508423, + "learning_rate": 2.2102715266230486e-07, + "loss": 0.6563294529914856, + "step": 6902 + }, + { + "epoch": 1.590552995391705, + "grad_norm": 1.0365958828861261, + "learning_rate": 2.207883103750171e-07, + "loss": 0.7426891326904297, + "step": 6903 + }, + { + "epoch": 1.5907834101382488, + "grad_norm": 0.9164747480191582, + "learning_rate": 2.2054958118927413e-07, + "loss": 0.7074661254882812, + "step": 6904 + }, + { + "epoch": 1.5910138248847927, + "grad_norm": 1.4657092079572216, + "learning_rate": 2.203109651397279e-07, + "loss": 0.8407880663871765, + "step": 6905 + }, + { + "epoch": 1.5912442396313364, + "grad_norm": 1.014884431152031, + "learning_rate": 2.2007246226101296e-07, + "loss": 0.7228440642356873, + "step": 6906 + }, + { + "epoch": 1.5914746543778802, + "grad_norm": 1.1100543617790197, + "learning_rate": 2.1983407258774733e-07, + "loss": 0.6988812685012817, + "step": 6907 + }, + { + "epoch": 1.591705069124424, + "grad_norm": 1.3237351414434337, + "learning_rate": 2.195957961545335e-07, + "loss": 0.793757438659668, + "step": 6908 + }, + { + "epoch": 1.5919354838709676, + "grad_norm": 1.2485526093365642, + "learning_rate": 2.1935763299595678e-07, + "loss": 0.8621397018432617, + "step": 6909 + }, + { + "epoch": 1.5921658986175116, + "grad_norm": 1.2314950700356975, + "learning_rate": 2.1911958314658598e-07, + "loss": 0.7661364078521729, + "step": 6910 + }, + { + "epoch": 1.5923963133640553, + "grad_norm": 1.1937782252155265, + "learning_rate": 2.1888164664097408e-07, + "loss": 0.9322741031646729, + "step": 6911 + }, + { + "epoch": 1.592626728110599, + "grad_norm": 1.1787479005369867, + "learning_rate": 2.1864382351365717e-07, + "loss": 0.8411989212036133, + "step": 6912 + }, + { + "epoch": 1.592857142857143, + "grad_norm": 1.515351393614885, + "learning_rate": 2.1840611379915464e-07, + "loss": 0.8212479948997498, + "step": 6913 + }, + { + "epoch": 1.5930875576036865, + "grad_norm": 1.5032885798825617, + "learning_rate": 2.181685175319702e-07, + "loss": 0.7875508069992065, + "step": 6914 + }, + { + "epoch": 1.5933179723502304, + "grad_norm": 1.178669163135756, + "learning_rate": 2.1793103474659047e-07, + "loss": 0.8389852046966553, + "step": 6915 + }, + { + "epoch": 1.5935483870967742, + "grad_norm": 1.388906101423199, + "learning_rate": 2.1769366547748546e-07, + "loss": 0.8223046660423279, + "step": 6916 + }, + { + "epoch": 1.5937788018433179, + "grad_norm": 1.0682255683615596, + "learning_rate": 2.1745640975910962e-07, + "loss": 0.8427159786224365, + "step": 6917 + }, + { + "epoch": 1.5940092165898618, + "grad_norm": 1.2770752550871127, + "learning_rate": 2.172192676258996e-07, + "loss": 0.7448060512542725, + "step": 6918 + }, + { + "epoch": 1.5942396313364056, + "grad_norm": 1.1028377529507616, + "learning_rate": 2.1698223911227686e-07, + "loss": 0.7122288346290588, + "step": 6919 + }, + { + "epoch": 1.5944700460829493, + "grad_norm": 1.3801420957349657, + "learning_rate": 2.1674532425264548e-07, + "loss": 0.7712994813919067, + "step": 6920 + }, + { + "epoch": 1.5947004608294932, + "grad_norm": 1.4967308024498271, + "learning_rate": 2.1650852308139355e-07, + "loss": 0.9656664729118347, + "step": 6921 + }, + { + "epoch": 1.5949308755760367, + "grad_norm": 1.3725078407101703, + "learning_rate": 2.162718356328922e-07, + "loss": 0.748894214630127, + "step": 6922 + }, + { + "epoch": 1.5951612903225807, + "grad_norm": 1.0191925895935576, + "learning_rate": 2.1603526194149635e-07, + "loss": 0.6875454187393188, + "step": 6923 + }, + { + "epoch": 1.5953917050691244, + "grad_norm": 1.099493651981713, + "learning_rate": 2.1579880204154412e-07, + "loss": 0.8258690237998962, + "step": 6924 + }, + { + "epoch": 1.5956221198156681, + "grad_norm": 1.2228052738114181, + "learning_rate": 2.15562455967358e-07, + "loss": 0.7647902965545654, + "step": 6925 + }, + { + "epoch": 1.595852534562212, + "grad_norm": 1.0716326843288577, + "learning_rate": 2.1532622375324284e-07, + "loss": 0.7004281282424927, + "step": 6926 + }, + { + "epoch": 1.5960829493087556, + "grad_norm": 1.1015601686618846, + "learning_rate": 2.1509010543348726e-07, + "loss": 0.7500345706939697, + "step": 6927 + }, + { + "epoch": 1.5963133640552996, + "grad_norm": 1.1261005927566234, + "learning_rate": 2.148541010423641e-07, + "loss": 0.7300195693969727, + "step": 6928 + }, + { + "epoch": 1.5965437788018433, + "grad_norm": 1.0927883255058508, + "learning_rate": 2.1461821061412876e-07, + "loss": 0.7592284679412842, + "step": 6929 + }, + { + "epoch": 1.596774193548387, + "grad_norm": 1.265065855875648, + "learning_rate": 2.1438243418302016e-07, + "loss": 0.7179796099662781, + "step": 6930 + }, + { + "epoch": 1.597004608294931, + "grad_norm": 1.167267121775029, + "learning_rate": 2.1414677178326157e-07, + "loss": 0.8829631805419922, + "step": 6931 + }, + { + "epoch": 1.5972350230414747, + "grad_norm": 1.4030936435750112, + "learning_rate": 2.1391122344905865e-07, + "loss": 0.8661972880363464, + "step": 6932 + }, + { + "epoch": 1.5974654377880184, + "grad_norm": 1.4609293147197595, + "learning_rate": 2.136757892146007e-07, + "loss": 0.7774989604949951, + "step": 6933 + }, + { + "epoch": 1.5976958525345624, + "grad_norm": 1.2556066222087972, + "learning_rate": 2.1344046911406132e-07, + "loss": 0.7343888878822327, + "step": 6934 + }, + { + "epoch": 1.5979262672811059, + "grad_norm": 1.1442684268001395, + "learning_rate": 2.132052631815966e-07, + "loss": 0.7810107469558716, + "step": 6935 + }, + { + "epoch": 1.5981566820276498, + "grad_norm": 1.343676205067389, + "learning_rate": 2.12970171451346e-07, + "loss": 0.7585299611091614, + "step": 6936 + }, + { + "epoch": 1.5983870967741935, + "grad_norm": 1.2827689520736418, + "learning_rate": 2.1273519395743344e-07, + "loss": 0.886371910572052, + "step": 6937 + }, + { + "epoch": 1.5986175115207373, + "grad_norm": 0.9767702062550015, + "learning_rate": 2.1250033073396523e-07, + "loss": 0.6986823081970215, + "step": 6938 + }, + { + "epoch": 1.5988479262672812, + "grad_norm": 1.2062052682782651, + "learning_rate": 2.122655818150312e-07, + "loss": 0.7524189352989197, + "step": 6939 + }, + { + "epoch": 1.5990783410138247, + "grad_norm": 1.1473232496595593, + "learning_rate": 2.120309472347055e-07, + "loss": 0.7699365615844727, + "step": 6940 + }, + { + "epoch": 1.5993087557603687, + "grad_norm": 1.188421090787615, + "learning_rate": 2.1179642702704458e-07, + "loss": 0.8112696409225464, + "step": 6941 + }, + { + "epoch": 1.5995391705069124, + "grad_norm": 1.377266755106213, + "learning_rate": 2.115620212260889e-07, + "loss": 0.7067416906356812, + "step": 6942 + }, + { + "epoch": 1.5997695852534561, + "grad_norm": 0.80841875970131, + "learning_rate": 2.1132772986586211e-07, + "loss": 0.787110447883606, + "step": 6943 + }, + { + "epoch": 1.6, + "grad_norm": 1.282613261539406, + "learning_rate": 2.11093552980371e-07, + "loss": 0.7356789112091064, + "step": 6944 + }, + { + "epoch": 1.6002304147465438, + "grad_norm": 1.2747758780049527, + "learning_rate": 2.1085949060360653e-07, + "loss": 0.8057125806808472, + "step": 6945 + }, + { + "epoch": 1.6004608294930875, + "grad_norm": 1.1828340962550294, + "learning_rate": 2.1062554276954225e-07, + "loss": 0.7169399261474609, + "step": 6946 + }, + { + "epoch": 1.6006912442396315, + "grad_norm": 1.2018304676070681, + "learning_rate": 2.1039170951213526e-07, + "loss": 0.7219180464744568, + "step": 6947 + }, + { + "epoch": 1.600921658986175, + "grad_norm": 1.2736335133966967, + "learning_rate": 2.101579908653266e-07, + "loss": 0.7530789375305176, + "step": 6948 + }, + { + "epoch": 1.601152073732719, + "grad_norm": 1.2374620271602483, + "learning_rate": 2.0992438686303993e-07, + "loss": 0.8192377090454102, + "step": 6949 + }, + { + "epoch": 1.6013824884792627, + "grad_norm": 1.0987195977670645, + "learning_rate": 2.0969089753918223e-07, + "loss": 0.6904648542404175, + "step": 6950 + }, + { + "epoch": 1.6016129032258064, + "grad_norm": 1.2558183684284059, + "learning_rate": 2.0945752292764495e-07, + "loss": 0.7289770245552063, + "step": 6951 + }, + { + "epoch": 1.6018433179723504, + "grad_norm": 1.260283902447682, + "learning_rate": 2.0922426306230157e-07, + "loss": 0.8467620611190796, + "step": 6952 + }, + { + "epoch": 1.6020737327188939, + "grad_norm": 1.3650999598924758, + "learning_rate": 2.089911179770093e-07, + "loss": 0.7835153937339783, + "step": 6953 + }, + { + "epoch": 1.6023041474654378, + "grad_norm": 0.847985634813149, + "learning_rate": 2.0875808770560933e-07, + "loss": 0.6696668267250061, + "step": 6954 + }, + { + "epoch": 1.6025345622119815, + "grad_norm": 1.441689312728025, + "learning_rate": 2.0852517228192556e-07, + "loss": 0.8451364636421204, + "step": 6955 + }, + { + "epoch": 1.6027649769585253, + "grad_norm": 1.2628900414882365, + "learning_rate": 2.0829237173976487e-07, + "loss": 0.7917240858078003, + "step": 6956 + }, + { + "epoch": 1.6029953917050692, + "grad_norm": 1.2514606025933794, + "learning_rate": 2.0805968611291867e-07, + "loss": 0.791597843170166, + "step": 6957 + }, + { + "epoch": 1.603225806451613, + "grad_norm": 1.2854657657217543, + "learning_rate": 2.0782711543516063e-07, + "loss": 0.7571247816085815, + "step": 6958 + }, + { + "epoch": 1.6034562211981567, + "grad_norm": 1.1996866839711877, + "learning_rate": 2.075946597402478e-07, + "loss": 0.9196302890777588, + "step": 6959 + }, + { + "epoch": 1.6036866359447006, + "grad_norm": 0.9955085341059975, + "learning_rate": 2.0736231906192136e-07, + "loss": 0.7106618881225586, + "step": 6960 + }, + { + "epoch": 1.6039170506912441, + "grad_norm": 0.9090693582601959, + "learning_rate": 2.071300934339051e-07, + "loss": 0.8923465013504028, + "step": 6961 + }, + { + "epoch": 1.604147465437788, + "grad_norm": 1.1524730844586952, + "learning_rate": 2.0689798288990601e-07, + "loss": 0.6929241418838501, + "step": 6962 + }, + { + "epoch": 1.6043778801843318, + "grad_norm": 1.4736872345919192, + "learning_rate": 2.0666598746361487e-07, + "loss": 0.935944676399231, + "step": 6963 + }, + { + "epoch": 1.6046082949308755, + "grad_norm": 1.3002916307222088, + "learning_rate": 2.0643410718870536e-07, + "loss": 0.7442188262939453, + "step": 6964 + }, + { + "epoch": 1.6048387096774195, + "grad_norm": 1.13007905720726, + "learning_rate": 2.0620234209883446e-07, + "loss": 0.7340278625488281, + "step": 6965 + }, + { + "epoch": 1.605069124423963, + "grad_norm": 1.1841454047560163, + "learning_rate": 2.0597069222764297e-07, + "loss": 0.7436190247535706, + "step": 6966 + }, + { + "epoch": 1.605299539170507, + "grad_norm": 1.1998918795301519, + "learning_rate": 2.0573915760875406e-07, + "loss": 0.9109283685684204, + "step": 6967 + }, + { + "epoch": 1.6055299539170507, + "grad_norm": 1.362187790875206, + "learning_rate": 2.0550773827577518e-07, + "loss": 0.86224365234375, + "step": 6968 + }, + { + "epoch": 1.6057603686635944, + "grad_norm": 1.0973288140018649, + "learning_rate": 2.0527643426229636e-07, + "loss": 0.6873685121536255, + "step": 6969 + }, + { + "epoch": 1.6059907834101383, + "grad_norm": 1.2862613183491987, + "learning_rate": 2.0504524560189074e-07, + "loss": 0.7634609937667847, + "step": 6970 + }, + { + "epoch": 1.606221198156682, + "grad_norm": 1.642442078921259, + "learning_rate": 2.0481417232811572e-07, + "loss": 0.7940595149993896, + "step": 6971 + }, + { + "epoch": 1.6064516129032258, + "grad_norm": 1.0579671129687211, + "learning_rate": 2.0458321447451078e-07, + "loss": 0.7109687924385071, + "step": 6972 + }, + { + "epoch": 1.6066820276497698, + "grad_norm": 1.3780414286693414, + "learning_rate": 2.04352372074599e-07, + "loss": 0.9476398825645447, + "step": 6973 + }, + { + "epoch": 1.6069124423963133, + "grad_norm": 1.3106188238946987, + "learning_rate": 2.0412164516188747e-07, + "loss": 0.7563579678535461, + "step": 6974 + }, + { + "epoch": 1.6071428571428572, + "grad_norm": 1.1912217950342037, + "learning_rate": 2.0389103376986538e-07, + "loss": 0.7928751707077026, + "step": 6975 + }, + { + "epoch": 1.607373271889401, + "grad_norm": 1.1927610489358789, + "learning_rate": 2.0366053793200565e-07, + "loss": 0.776961624622345, + "step": 6976 + }, + { + "epoch": 1.6076036866359447, + "grad_norm": 1.1830668942381175, + "learning_rate": 2.0343015768176496e-07, + "loss": 0.6511167883872986, + "step": 6977 + }, + { + "epoch": 1.6078341013824886, + "grad_norm": 1.3541662729221868, + "learning_rate": 2.0319989305258235e-07, + "loss": 0.6487337350845337, + "step": 6978 + }, + { + "epoch": 1.6080645161290321, + "grad_norm": 1.5271951763204938, + "learning_rate": 2.0296974407788004e-07, + "loss": 0.921454131603241, + "step": 6979 + }, + { + "epoch": 1.608294930875576, + "grad_norm": 1.0476613319531645, + "learning_rate": 2.0273971079106467e-07, + "loss": 0.8145809769630432, + "step": 6980 + }, + { + "epoch": 1.6085253456221198, + "grad_norm": 0.9495439447317249, + "learning_rate": 2.0250979322552474e-07, + "loss": 0.6655904054641724, + "step": 6981 + }, + { + "epoch": 1.6087557603686635, + "grad_norm": 1.1486957458539049, + "learning_rate": 2.0227999141463258e-07, + "loss": 0.777961254119873, + "step": 6982 + }, + { + "epoch": 1.6089861751152075, + "grad_norm": 1.3274428663782127, + "learning_rate": 2.0205030539174361e-07, + "loss": 0.6543164253234863, + "step": 6983 + }, + { + "epoch": 1.6092165898617512, + "grad_norm": 1.233780092778412, + "learning_rate": 2.018207351901966e-07, + "loss": 0.7842000722885132, + "step": 6984 + }, + { + "epoch": 1.609447004608295, + "grad_norm": 0.999384175284256, + "learning_rate": 2.0159128084331278e-07, + "loss": 0.7264418005943298, + "step": 6985 + }, + { + "epoch": 1.6096774193548387, + "grad_norm": 1.313414021265448, + "learning_rate": 2.0136194238439795e-07, + "loss": 0.8722596168518066, + "step": 6986 + }, + { + "epoch": 1.6099078341013824, + "grad_norm": 1.3518278161266697, + "learning_rate": 2.0113271984673997e-07, + "loss": 0.8162735104560852, + "step": 6987 + }, + { + "epoch": 1.6101382488479263, + "grad_norm": 1.212757185466248, + "learning_rate": 2.0090361326360982e-07, + "loss": 0.6962481737136841, + "step": 6988 + }, + { + "epoch": 1.61036866359447, + "grad_norm": 1.133716172506403, + "learning_rate": 2.0067462266826264e-07, + "loss": 0.8186852931976318, + "step": 6989 + }, + { + "epoch": 1.6105990783410138, + "grad_norm": 1.505728867210405, + "learning_rate": 2.0044574809393543e-07, + "loss": 0.8935987949371338, + "step": 6990 + }, + { + "epoch": 1.6108294930875577, + "grad_norm": 1.2824355796337807, + "learning_rate": 2.002169895738498e-07, + "loss": 0.9152865409851074, + "step": 6991 + }, + { + "epoch": 1.6110599078341012, + "grad_norm": 1.521529078332145, + "learning_rate": 1.9998834714120928e-07, + "loss": 0.8042874336242676, + "step": 6992 + }, + { + "epoch": 1.6112903225806452, + "grad_norm": 1.3198117612600044, + "learning_rate": 1.9975982082920083e-07, + "loss": 0.9621129035949707, + "step": 6993 + }, + { + "epoch": 1.611520737327189, + "grad_norm": 1.1154614331355635, + "learning_rate": 1.9953141067099533e-07, + "loss": 0.8296995162963867, + "step": 6994 + }, + { + "epoch": 1.6117511520737327, + "grad_norm": 1.0827522335122797, + "learning_rate": 1.9930311669974587e-07, + "loss": 0.8129373788833618, + "step": 6995 + }, + { + "epoch": 1.6119815668202766, + "grad_norm": 1.359695561767368, + "learning_rate": 1.9907493894858874e-07, + "loss": 0.7450911998748779, + "step": 6996 + }, + { + "epoch": 1.6122119815668203, + "grad_norm": 1.2367503665171555, + "learning_rate": 1.9884687745064422e-07, + "loss": 0.798037052154541, + "step": 6997 + }, + { + "epoch": 1.612442396313364, + "grad_norm": 1.218969884225304, + "learning_rate": 1.9861893223901494e-07, + "loss": 0.8118857145309448, + "step": 6998 + }, + { + "epoch": 1.6126728110599078, + "grad_norm": 1.2176008366956401, + "learning_rate": 1.9839110334678632e-07, + "loss": 0.7954392433166504, + "step": 6999 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 1.2233633618619175, + "learning_rate": 1.9816339080702825e-07, + "loss": 0.8055616617202759, + "step": 7000 + }, + { + "epoch": 1.6131336405529955, + "grad_norm": 1.503254744382692, + "learning_rate": 1.979357946527924e-07, + "loss": 0.8949761986732483, + "step": 7001 + }, + { + "epoch": 1.6133640552995392, + "grad_norm": 1.376056206509758, + "learning_rate": 1.9770831491711427e-07, + "loss": 0.8327617645263672, + "step": 7002 + }, + { + "epoch": 1.613594470046083, + "grad_norm": 1.2867855951178133, + "learning_rate": 1.9748095163301215e-07, + "loss": 0.7593148946762085, + "step": 7003 + }, + { + "epoch": 1.6138248847926269, + "grad_norm": 1.2449007241812073, + "learning_rate": 1.9725370483348737e-07, + "loss": 0.7639665603637695, + "step": 7004 + }, + { + "epoch": 1.6140552995391704, + "grad_norm": 1.2839981076373308, + "learning_rate": 1.9702657455152448e-07, + "loss": 0.8561587929725647, + "step": 7005 + }, + { + "epoch": 1.6142857142857143, + "grad_norm": 1.4345782240891563, + "learning_rate": 1.9679956082009154e-07, + "loss": 0.835313081741333, + "step": 7006 + }, + { + "epoch": 1.614516129032258, + "grad_norm": 1.680229749258956, + "learning_rate": 1.9657266367213898e-07, + "loss": 0.831456184387207, + "step": 7007 + }, + { + "epoch": 1.6147465437788018, + "grad_norm": 1.1797102347566437, + "learning_rate": 1.963458831406005e-07, + "loss": 0.699436604976654, + "step": 7008 + }, + { + "epoch": 1.6149769585253457, + "grad_norm": 1.2382287230628872, + "learning_rate": 1.9611921925839337e-07, + "loss": 0.7821902632713318, + "step": 7009 + }, + { + "epoch": 1.6152073732718892, + "grad_norm": 1.035873020643515, + "learning_rate": 1.9589267205841742e-07, + "loss": 0.7491241097450256, + "step": 7010 + }, + { + "epoch": 1.6154377880184332, + "grad_norm": 1.3212550422299536, + "learning_rate": 1.956662415735554e-07, + "loss": 0.7299652099609375, + "step": 7011 + }, + { + "epoch": 1.615668202764977, + "grad_norm": 1.2121144450441814, + "learning_rate": 1.9543992783667385e-07, + "loss": 0.692190408706665, + "step": 7012 + }, + { + "epoch": 1.6158986175115206, + "grad_norm": 1.5397188528974992, + "learning_rate": 1.9521373088062166e-07, + "loss": 0.8727273941040039, + "step": 7013 + }, + { + "epoch": 1.6161290322580646, + "grad_norm": 0.9576172656761047, + "learning_rate": 1.9498765073823077e-07, + "loss": 0.6441171169281006, + "step": 7014 + }, + { + "epoch": 1.6163594470046083, + "grad_norm": 1.202013067822893, + "learning_rate": 1.947616874423169e-07, + "loss": 0.6960387229919434, + "step": 7015 + }, + { + "epoch": 1.616589861751152, + "grad_norm": 1.3238157552069112, + "learning_rate": 1.9453584102567788e-07, + "loss": 0.9231700301170349, + "step": 7016 + }, + { + "epoch": 1.616820276497696, + "grad_norm": 1.5395552640428811, + "learning_rate": 1.9431011152109555e-07, + "loss": 0.6957401037216187, + "step": 7017 + }, + { + "epoch": 1.6170506912442395, + "grad_norm": 1.221595091148929, + "learning_rate": 1.9408449896133384e-07, + "loss": 0.6608580350875854, + "step": 7018 + }, + { + "epoch": 1.6172811059907835, + "grad_norm": 1.386134285673899, + "learning_rate": 1.9385900337913997e-07, + "loss": 0.7322397232055664, + "step": 7019 + }, + { + "epoch": 1.6175115207373272, + "grad_norm": 1.1188269604657235, + "learning_rate": 1.9363362480724488e-07, + "loss": 0.6996288299560547, + "step": 7020 + }, + { + "epoch": 1.617741935483871, + "grad_norm": 1.022000935531768, + "learning_rate": 1.9340836327836163e-07, + "loss": 0.7928623557090759, + "step": 7021 + }, + { + "epoch": 1.6179723502304149, + "grad_norm": 0.9992379944358776, + "learning_rate": 1.9318321882518674e-07, + "loss": 0.6275026202201843, + "step": 7022 + }, + { + "epoch": 1.6182027649769584, + "grad_norm": 1.26569218150676, + "learning_rate": 1.9295819148039948e-07, + "loss": 0.6660110950469971, + "step": 7023 + }, + { + "epoch": 1.6184331797235023, + "grad_norm": 1.0401535425644861, + "learning_rate": 1.9273328127666232e-07, + "loss": 0.8129480481147766, + "step": 7024 + }, + { + "epoch": 1.618663594470046, + "grad_norm": 1.146646002030878, + "learning_rate": 1.9250848824662046e-07, + "loss": 0.8070700168609619, + "step": 7025 + }, + { + "epoch": 1.6188940092165898, + "grad_norm": 1.4109951707076815, + "learning_rate": 1.922838124229028e-07, + "loss": 0.8123769760131836, + "step": 7026 + }, + { + "epoch": 1.6191244239631337, + "grad_norm": 0.9906397496222884, + "learning_rate": 1.920592538381205e-07, + "loss": 0.6552244424819946, + "step": 7027 + }, + { + "epoch": 1.6193548387096774, + "grad_norm": 1.0749749429025204, + "learning_rate": 1.9183481252486767e-07, + "loss": 0.8764367699623108, + "step": 7028 + }, + { + "epoch": 1.6195852534562212, + "grad_norm": 1.8347518044142406, + "learning_rate": 1.9161048851572215e-07, + "loss": 0.9075809717178345, + "step": 7029 + }, + { + "epoch": 1.6198156682027651, + "grad_norm": 1.1695152473088226, + "learning_rate": 1.9138628184324412e-07, + "loss": 0.7308327555656433, + "step": 7030 + }, + { + "epoch": 1.6200460829493086, + "grad_norm": 1.4269673355519676, + "learning_rate": 1.9116219253997655e-07, + "loss": 0.838142991065979, + "step": 7031 + }, + { + "epoch": 1.6202764976958526, + "grad_norm": 1.5286648636126694, + "learning_rate": 1.9093822063844623e-07, + "loss": 0.7681041359901428, + "step": 7032 + }, + { + "epoch": 1.6205069124423963, + "grad_norm": 1.1858134701081806, + "learning_rate": 1.907143661711621e-07, + "loss": 0.7179980278015137, + "step": 7033 + }, + { + "epoch": 1.62073732718894, + "grad_norm": 1.2400863874788628, + "learning_rate": 1.9049062917061609e-07, + "loss": 0.8688361644744873, + "step": 7034 + }, + { + "epoch": 1.620967741935484, + "grad_norm": 1.0795907835047491, + "learning_rate": 1.9026700966928388e-07, + "loss": 0.6540178656578064, + "step": 7035 + }, + { + "epoch": 1.6211981566820275, + "grad_norm": 0.9042431894176799, + "learning_rate": 1.900435076996233e-07, + "loss": 0.7834869623184204, + "step": 7036 + }, + { + "epoch": 1.6214285714285714, + "grad_norm": 1.4376571546925008, + "learning_rate": 1.8982012329407505e-07, + "loss": 0.8895971775054932, + "step": 7037 + }, + { + "epoch": 1.6216589861751152, + "grad_norm": 1.1211547009425467, + "learning_rate": 1.8959685648506362e-07, + "loss": 0.6625858545303345, + "step": 7038 + }, + { + "epoch": 1.621889400921659, + "grad_norm": 1.4181930826937483, + "learning_rate": 1.893737073049957e-07, + "loss": 0.651193380355835, + "step": 7039 + }, + { + "epoch": 1.6221198156682028, + "grad_norm": 1.49480203283565, + "learning_rate": 1.8915067578626065e-07, + "loss": 0.8716636896133423, + "step": 7040 + }, + { + "epoch": 1.6223502304147466, + "grad_norm": 1.2037531898880258, + "learning_rate": 1.8892776196123196e-07, + "loss": 0.812637984752655, + "step": 7041 + }, + { + "epoch": 1.6225806451612903, + "grad_norm": 1.4952425500537936, + "learning_rate": 1.887049658622648e-07, + "loss": 0.7803184986114502, + "step": 7042 + }, + { + "epoch": 1.6228110599078343, + "grad_norm": 1.4542796613479354, + "learning_rate": 1.8848228752169793e-07, + "loss": 0.7884814739227295, + "step": 7043 + }, + { + "epoch": 1.6230414746543778, + "grad_norm": 1.3474838088832628, + "learning_rate": 1.8825972697185265e-07, + "loss": 0.7250671982765198, + "step": 7044 + }, + { + "epoch": 1.6232718894009217, + "grad_norm": 1.2055929150487366, + "learning_rate": 1.880372842450332e-07, + "loss": 0.8078780174255371, + "step": 7045 + }, + { + "epoch": 1.6235023041474654, + "grad_norm": 1.2023825853188168, + "learning_rate": 1.878149593735272e-07, + "loss": 0.8523818254470825, + "step": 7046 + }, + { + "epoch": 1.6237327188940092, + "grad_norm": 1.2683431455334386, + "learning_rate": 1.875927523896047e-07, + "loss": 0.8772249221801758, + "step": 7047 + }, + { + "epoch": 1.6239631336405531, + "grad_norm": 1.0815338842817483, + "learning_rate": 1.8737066332551843e-07, + "loss": 0.7906323671340942, + "step": 7048 + }, + { + "epoch": 1.6241935483870966, + "grad_norm": 1.3048529080567755, + "learning_rate": 1.8714869221350492e-07, + "loss": 0.8010337352752686, + "step": 7049 + }, + { + "epoch": 1.6244239631336406, + "grad_norm": 1.365899691735964, + "learning_rate": 1.8692683908578267e-07, + "loss": 0.8978049755096436, + "step": 7050 + }, + { + "epoch": 1.6246543778801843, + "grad_norm": 1.159165616843268, + "learning_rate": 1.8670510397455297e-07, + "loss": 0.6622864007949829, + "step": 7051 + }, + { + "epoch": 1.624884792626728, + "grad_norm": 1.048079119212609, + "learning_rate": 1.8648348691200112e-07, + "loss": 0.7795406579971313, + "step": 7052 + }, + { + "epoch": 1.625115207373272, + "grad_norm": 1.2605630326093136, + "learning_rate": 1.8626198793029423e-07, + "loss": 0.9152054786682129, + "step": 7053 + }, + { + "epoch": 1.6253456221198157, + "grad_norm": 1.1757865506402991, + "learning_rate": 1.860406070615822e-07, + "loss": 0.719946563243866, + "step": 7054 + }, + { + "epoch": 1.6255760368663594, + "grad_norm": 1.2991129477224903, + "learning_rate": 1.8581934433799884e-07, + "loss": 0.782962441444397, + "step": 7055 + }, + { + "epoch": 1.6258064516129034, + "grad_norm": 1.118392005824248, + "learning_rate": 1.855981997916597e-07, + "loss": 0.8119732737541199, + "step": 7056 + }, + { + "epoch": 1.6260368663594469, + "grad_norm": 1.2362407544063627, + "learning_rate": 1.8537717345466351e-07, + "loss": 0.7585981488227844, + "step": 7057 + }, + { + "epoch": 1.6262672811059908, + "grad_norm": 1.158465388331893, + "learning_rate": 1.8515626535909258e-07, + "loss": 0.6846082210540771, + "step": 7058 + }, + { + "epoch": 1.6264976958525346, + "grad_norm": 1.230933966400155, + "learning_rate": 1.8493547553701083e-07, + "loss": 0.7355546951293945, + "step": 7059 + }, + { + "epoch": 1.6267281105990783, + "grad_norm": 1.15836260056471, + "learning_rate": 1.847148040204657e-07, + "loss": 0.6828340291976929, + "step": 7060 + }, + { + "epoch": 1.6269585253456222, + "grad_norm": 1.0499975056987365, + "learning_rate": 1.8449425084148763e-07, + "loss": 0.8513988256454468, + "step": 7061 + }, + { + "epoch": 1.6271889400921657, + "grad_norm": 1.0253802645646743, + "learning_rate": 1.8427381603208947e-07, + "loss": 0.6817762851715088, + "step": 7062 + }, + { + "epoch": 1.6274193548387097, + "grad_norm": 0.9793159138955572, + "learning_rate": 1.8405349962426699e-07, + "loss": 0.7314180731773376, + "step": 7063 + }, + { + "epoch": 1.6276497695852534, + "grad_norm": 1.326821994662743, + "learning_rate": 1.8383330164999898e-07, + "loss": 0.8193466663360596, + "step": 7064 + }, + { + "epoch": 1.6278801843317972, + "grad_norm": 1.2511428182189692, + "learning_rate": 1.8361322214124643e-07, + "loss": 0.7469823360443115, + "step": 7065 + }, + { + "epoch": 1.628110599078341, + "grad_norm": 1.4366505105110272, + "learning_rate": 1.8339326112995423e-07, + "loss": 0.8578816652297974, + "step": 7066 + }, + { + "epoch": 1.6283410138248848, + "grad_norm": 1.4615192025781363, + "learning_rate": 1.8317341864804903e-07, + "loss": 0.8384239077568054, + "step": 7067 + }, + { + "epoch": 1.6285714285714286, + "grad_norm": 1.122194991625306, + "learning_rate": 1.829536947274406e-07, + "loss": 0.8707646131515503, + "step": 7068 + }, + { + "epoch": 1.6288018433179725, + "grad_norm": 1.2319397578647793, + "learning_rate": 1.82734089400022e-07, + "loss": 0.6869943141937256, + "step": 7069 + }, + { + "epoch": 1.629032258064516, + "grad_norm": 1.3893487386527597, + "learning_rate": 1.8251460269766848e-07, + "loss": 0.7776129245758057, + "step": 7070 + }, + { + "epoch": 1.62926267281106, + "grad_norm": 1.104887091227765, + "learning_rate": 1.8229523465223785e-07, + "loss": 0.8126854300498962, + "step": 7071 + }, + { + "epoch": 1.6294930875576037, + "grad_norm": 1.0317016664034484, + "learning_rate": 1.8207598529557166e-07, + "loss": 0.6570720672607422, + "step": 7072 + }, + { + "epoch": 1.6297235023041474, + "grad_norm": 0.8859395443506812, + "learning_rate": 1.818568546594934e-07, + "loss": 0.6485599875450134, + "step": 7073 + }, + { + "epoch": 1.6299539170506914, + "grad_norm": 1.206554438869518, + "learning_rate": 1.816378427758093e-07, + "loss": 0.9132766723632812, + "step": 7074 + }, + { + "epoch": 1.6301843317972349, + "grad_norm": 1.4945592359199265, + "learning_rate": 1.8141894967630932e-07, + "loss": 0.8277286291122437, + "step": 7075 + }, + { + "epoch": 1.6304147465437788, + "grad_norm": 1.3670934774676884, + "learning_rate": 1.812001753927651e-07, + "loss": 0.7409358024597168, + "step": 7076 + }, + { + "epoch": 1.6306451612903226, + "grad_norm": 1.2664504423738472, + "learning_rate": 1.809815199569311e-07, + "loss": 0.8233339786529541, + "step": 7077 + }, + { + "epoch": 1.6308755760368663, + "grad_norm": 1.3727275296136565, + "learning_rate": 1.8076298340054563e-07, + "loss": 0.8704487085342407, + "step": 7078 + }, + { + "epoch": 1.6311059907834102, + "grad_norm": 1.503472652590263, + "learning_rate": 1.8054456575532862e-07, + "loss": 0.8845789432525635, + "step": 7079 + }, + { + "epoch": 1.631336405529954, + "grad_norm": 1.0523258046250148, + "learning_rate": 1.8032626705298272e-07, + "loss": 0.7241162061691284, + "step": 7080 + }, + { + "epoch": 1.6315668202764977, + "grad_norm": 1.193290512437584, + "learning_rate": 1.8010808732519433e-07, + "loss": 0.7065681219100952, + "step": 7081 + }, + { + "epoch": 1.6317972350230416, + "grad_norm": 1.281102564788521, + "learning_rate": 1.7989002660363162e-07, + "loss": 0.6492339372634888, + "step": 7082 + }, + { + "epoch": 1.6320276497695851, + "grad_norm": 0.9673694389198546, + "learning_rate": 1.79672084919946e-07, + "loss": 0.7089248895645142, + "step": 7083 + }, + { + "epoch": 1.632258064516129, + "grad_norm": 1.0367687290608978, + "learning_rate": 1.794542623057712e-07, + "loss": 0.7030316591262817, + "step": 7084 + }, + { + "epoch": 1.6324884792626728, + "grad_norm": 1.1008255373775855, + "learning_rate": 1.792365587927239e-07, + "loss": 0.8626528978347778, + "step": 7085 + }, + { + "epoch": 1.6327188940092165, + "grad_norm": 1.1079176271315754, + "learning_rate": 1.7901897441240333e-07, + "loss": 0.8468672037124634, + "step": 7086 + }, + { + "epoch": 1.6329493087557605, + "grad_norm": 1.4611904004596754, + "learning_rate": 1.7880150919639214e-07, + "loss": 0.8546739816665649, + "step": 7087 + }, + { + "epoch": 1.633179723502304, + "grad_norm": 1.1949871550520017, + "learning_rate": 1.7858416317625468e-07, + "loss": 0.9187895655632019, + "step": 7088 + }, + { + "epoch": 1.633410138248848, + "grad_norm": 1.077248232790752, + "learning_rate": 1.7836693638353827e-07, + "loss": 0.7496293783187866, + "step": 7089 + }, + { + "epoch": 1.6336405529953917, + "grad_norm": 1.0517765508552415, + "learning_rate": 1.7814982884977358e-07, + "loss": 0.682653546333313, + "step": 7090 + }, + { + "epoch": 1.6338709677419354, + "grad_norm": 1.5003665522833143, + "learning_rate": 1.7793284060647295e-07, + "loss": 0.8065551519393921, + "step": 7091 + }, + { + "epoch": 1.6341013824884794, + "grad_norm": 1.134711484772771, + "learning_rate": 1.7771597168513263e-07, + "loss": 0.6605588793754578, + "step": 7092 + }, + { + "epoch": 1.634331797235023, + "grad_norm": 1.0012250391371058, + "learning_rate": 1.7749922211723034e-07, + "loss": 0.7257254123687744, + "step": 7093 + }, + { + "epoch": 1.6345622119815668, + "grad_norm": 1.1831263140816395, + "learning_rate": 1.772825919342269e-07, + "loss": 0.7438890933990479, + "step": 7094 + }, + { + "epoch": 1.6347926267281108, + "grad_norm": 1.250595895627981, + "learning_rate": 1.770660811675664e-07, + "loss": 0.8546249866485596, + "step": 7095 + }, + { + "epoch": 1.6350230414746543, + "grad_norm": 1.1835928544530323, + "learning_rate": 1.7684968984867466e-07, + "loss": 0.727516770362854, + "step": 7096 + }, + { + "epoch": 1.6352534562211982, + "grad_norm": 1.36586374940823, + "learning_rate": 1.766334180089606e-07, + "loss": 0.7578408718109131, + "step": 7097 + }, + { + "epoch": 1.635483870967742, + "grad_norm": 1.4255838450352876, + "learning_rate": 1.7641726567981606e-07, + "loss": 0.8253650665283203, + "step": 7098 + }, + { + "epoch": 1.6357142857142857, + "grad_norm": 1.3615057524495244, + "learning_rate": 1.7620123289261523e-07, + "loss": 0.8932347297668457, + "step": 7099 + }, + { + "epoch": 1.6359447004608296, + "grad_norm": 1.0770953977682685, + "learning_rate": 1.7598531967871465e-07, + "loss": 0.6661143898963928, + "step": 7100 + }, + { + "epoch": 1.6361751152073731, + "grad_norm": 1.2408264386151553, + "learning_rate": 1.7576952606945415e-07, + "loss": 0.8413572311401367, + "step": 7101 + }, + { + "epoch": 1.636405529953917, + "grad_norm": 1.2084626250429713, + "learning_rate": 1.7555385209615603e-07, + "loss": 0.713816225528717, + "step": 7102 + }, + { + "epoch": 1.6366359447004608, + "grad_norm": 1.67339389064804, + "learning_rate": 1.7533829779012466e-07, + "loss": 0.8588179349899292, + "step": 7103 + }, + { + "epoch": 1.6368663594470045, + "grad_norm": 1.3521357251955939, + "learning_rate": 1.7512286318264778e-07, + "loss": 0.8666437864303589, + "step": 7104 + }, + { + "epoch": 1.6370967741935485, + "grad_norm": 1.340257158830322, + "learning_rate": 1.7490754830499522e-07, + "loss": 0.9219843745231628, + "step": 7105 + }, + { + "epoch": 1.6373271889400922, + "grad_norm": 1.3285275552241094, + "learning_rate": 1.7469235318841956e-07, + "loss": 0.93767249584198, + "step": 7106 + }, + { + "epoch": 1.637557603686636, + "grad_norm": 1.2782247944953928, + "learning_rate": 1.7447727786415644e-07, + "loss": 0.7317457795143127, + "step": 7107 + }, + { + "epoch": 1.6377880184331797, + "grad_norm": 1.1023935137429937, + "learning_rate": 1.7426232236342365e-07, + "loss": 0.850578784942627, + "step": 7108 + }, + { + "epoch": 1.6380184331797234, + "grad_norm": 1.1932749051362488, + "learning_rate": 1.7404748671742143e-07, + "loss": 0.7580707669258118, + "step": 7109 + }, + { + "epoch": 1.6382488479262673, + "grad_norm": 1.4967576950530754, + "learning_rate": 1.738327709573333e-07, + "loss": 0.8393806219100952, + "step": 7110 + }, + { + "epoch": 1.638479262672811, + "grad_norm": 1.0170127852420416, + "learning_rate": 1.7361817511432474e-07, + "loss": 0.6641673445701599, + "step": 7111 + }, + { + "epoch": 1.6387096774193548, + "grad_norm": 1.2746608671167614, + "learning_rate": 1.734036992195438e-07, + "loss": 0.7570137977600098, + "step": 7112 + }, + { + "epoch": 1.6389400921658988, + "grad_norm": 1.1366436885649456, + "learning_rate": 1.7318934330412194e-07, + "loss": 0.78557288646698, + "step": 7113 + }, + { + "epoch": 1.6391705069124423, + "grad_norm": 1.3443988626089514, + "learning_rate": 1.729751073991721e-07, + "loss": 0.8309692740440369, + "step": 7114 + }, + { + "epoch": 1.6394009216589862, + "grad_norm": 1.0791152795033432, + "learning_rate": 1.727609915357908e-07, + "loss": 0.6409872770309448, + "step": 7115 + }, + { + "epoch": 1.63963133640553, + "grad_norm": 1.0106967037974632, + "learning_rate": 1.7254699574505648e-07, + "loss": 0.7916153073310852, + "step": 7116 + }, + { + "epoch": 1.6398617511520737, + "grad_norm": 1.5121844712494004, + "learning_rate": 1.7233312005803015e-07, + "loss": 0.7925357818603516, + "step": 7117 + }, + { + "epoch": 1.6400921658986176, + "grad_norm": 1.5493448906965575, + "learning_rate": 1.7211936450575594e-07, + "loss": 0.9124211668968201, + "step": 7118 + }, + { + "epoch": 1.6403225806451613, + "grad_norm": 1.2418161556418856, + "learning_rate": 1.7190572911925994e-07, + "loss": 0.8905198574066162, + "step": 7119 + }, + { + "epoch": 1.640552995391705, + "grad_norm": 1.0755844253909046, + "learning_rate": 1.716922139295509e-07, + "loss": 0.8139728307723999, + "step": 7120 + }, + { + "epoch": 1.6407834101382488, + "grad_norm": 1.3621014779170746, + "learning_rate": 1.7147881896762074e-07, + "loss": 0.7607166767120361, + "step": 7121 + }, + { + "epoch": 1.6410138248847925, + "grad_norm": 1.282778120557478, + "learning_rate": 1.7126554426444316e-07, + "loss": 0.806864857673645, + "step": 7122 + }, + { + "epoch": 1.6412442396313365, + "grad_norm": 1.352241351446694, + "learning_rate": 1.710523898509747e-07, + "loss": 0.697334885597229, + "step": 7123 + }, + { + "epoch": 1.6414746543778802, + "grad_norm": 1.4205201103890581, + "learning_rate": 1.7083935575815455e-07, + "loss": 0.7313966751098633, + "step": 7124 + }, + { + "epoch": 1.641705069124424, + "grad_norm": 1.3868798260826238, + "learning_rate": 1.7062644201690413e-07, + "loss": 0.8857930898666382, + "step": 7125 + }, + { + "epoch": 1.6419354838709679, + "grad_norm": 1.0686783154078314, + "learning_rate": 1.7041364865812758e-07, + "loss": 0.7451884746551514, + "step": 7126 + }, + { + "epoch": 1.6421658986175114, + "grad_norm": 1.2220777026134708, + "learning_rate": 1.7020097571271186e-07, + "loss": 0.7023841142654419, + "step": 7127 + }, + { + "epoch": 1.6423963133640553, + "grad_norm": 1.2608302557028366, + "learning_rate": 1.6998842321152607e-07, + "loss": 0.708385705947876, + "step": 7128 + }, + { + "epoch": 1.642626728110599, + "grad_norm": 1.3854146642080662, + "learning_rate": 1.697759911854215e-07, + "loss": 0.7885474562644958, + "step": 7129 + }, + { + "epoch": 1.6428571428571428, + "grad_norm": 1.161295661131579, + "learning_rate": 1.695636796652331e-07, + "loss": 0.7054568529129028, + "step": 7130 + }, + { + "epoch": 1.6430875576036867, + "grad_norm": 1.1652742930387396, + "learning_rate": 1.6935148868177718e-07, + "loss": 0.6899726986885071, + "step": 7131 + }, + { + "epoch": 1.6433179723502302, + "grad_norm": 1.4011600897250127, + "learning_rate": 1.6913941826585288e-07, + "loss": 0.8558614253997803, + "step": 7132 + }, + { + "epoch": 1.6435483870967742, + "grad_norm": 1.2947217762783314, + "learning_rate": 1.6892746844824223e-07, + "loss": 0.7741858959197998, + "step": 7133 + }, + { + "epoch": 1.643778801843318, + "grad_norm": 1.130755528536183, + "learning_rate": 1.6871563925970943e-07, + "loss": 0.7332532405853271, + "step": 7134 + }, + { + "epoch": 1.6440092165898617, + "grad_norm": 1.4331915051670545, + "learning_rate": 1.6850393073100078e-07, + "loss": 0.8288085460662842, + "step": 7135 + }, + { + "epoch": 1.6442396313364056, + "grad_norm": 1.493040320153856, + "learning_rate": 1.682923428928461e-07, + "loss": 0.9470697641372681, + "step": 7136 + }, + { + "epoch": 1.6444700460829493, + "grad_norm": 1.1093535752232264, + "learning_rate": 1.6808087577595686e-07, + "loss": 0.7123041749000549, + "step": 7137 + }, + { + "epoch": 1.644700460829493, + "grad_norm": 1.3701909416221987, + "learning_rate": 1.6786952941102694e-07, + "loss": 0.8077690005302429, + "step": 7138 + }, + { + "epoch": 1.644930875576037, + "grad_norm": 1.3400770079054931, + "learning_rate": 1.6765830382873348e-07, + "loss": 0.767215371131897, + "step": 7139 + }, + { + "epoch": 1.6451612903225805, + "grad_norm": 1.3723903093182923, + "learning_rate": 1.6744719905973502e-07, + "loss": 0.7488540410995483, + "step": 7140 + }, + { + "epoch": 1.6453917050691245, + "grad_norm": 1.4546211260208752, + "learning_rate": 1.6723621513467378e-07, + "loss": 0.7841323018074036, + "step": 7141 + }, + { + "epoch": 1.6456221198156682, + "grad_norm": 1.2167195095267902, + "learning_rate": 1.6702535208417346e-07, + "loss": 0.65464186668396, + "step": 7142 + }, + { + "epoch": 1.645852534562212, + "grad_norm": 1.3347329400915569, + "learning_rate": 1.6681460993884056e-07, + "loss": 0.8845036029815674, + "step": 7143 + }, + { + "epoch": 1.6460829493087559, + "grad_norm": 1.3318983430245122, + "learning_rate": 1.6660398872926396e-07, + "loss": 0.6741687655448914, + "step": 7144 + }, + { + "epoch": 1.6463133640552994, + "grad_norm": 1.4438874912830426, + "learning_rate": 1.663934884860152e-07, + "loss": 0.8656717538833618, + "step": 7145 + }, + { + "epoch": 1.6465437788018433, + "grad_norm": 1.3298318800949103, + "learning_rate": 1.6618310923964785e-07, + "loss": 0.7588434219360352, + "step": 7146 + }, + { + "epoch": 1.646774193548387, + "grad_norm": 1.3262924093620256, + "learning_rate": 1.6597285102069846e-07, + "loss": 0.7180176973342896, + "step": 7147 + }, + { + "epoch": 1.6470046082949308, + "grad_norm": 1.2551409816382322, + "learning_rate": 1.6576271385968576e-07, + "loss": 0.8253776431083679, + "step": 7148 + }, + { + "epoch": 1.6472350230414747, + "grad_norm": 1.2281736040805922, + "learning_rate": 1.6555269778711046e-07, + "loss": 0.7200941443443298, + "step": 7149 + }, + { + "epoch": 1.6474654377880185, + "grad_norm": 1.1059198918963296, + "learning_rate": 1.653428028334567e-07, + "loss": 0.7076164484024048, + "step": 7150 + }, + { + "epoch": 1.6476958525345622, + "grad_norm": 1.195055160265343, + "learning_rate": 1.6513302902919003e-07, + "loss": 0.8068090677261353, + "step": 7151 + }, + { + "epoch": 1.6479262672811061, + "grad_norm": 1.3947857709427287, + "learning_rate": 1.6492337640475884e-07, + "loss": 0.9712029099464417, + "step": 7152 + }, + { + "epoch": 1.6481566820276496, + "grad_norm": 1.406808701456467, + "learning_rate": 1.6471384499059438e-07, + "loss": 0.8359737992286682, + "step": 7153 + }, + { + "epoch": 1.6483870967741936, + "grad_norm": 1.0570634795327605, + "learning_rate": 1.645044348171094e-07, + "loss": 0.8066359758377075, + "step": 7154 + }, + { + "epoch": 1.6486175115207373, + "grad_norm": 1.3810484659709985, + "learning_rate": 1.642951459146995e-07, + "loss": 0.8717833757400513, + "step": 7155 + }, + { + "epoch": 1.648847926267281, + "grad_norm": 1.0992736543757442, + "learning_rate": 1.6408597831374305e-07, + "loss": 0.7335910201072693, + "step": 7156 + }, + { + "epoch": 1.649078341013825, + "grad_norm": 1.2397456033121492, + "learning_rate": 1.6387693204460028e-07, + "loss": 0.816049337387085, + "step": 7157 + }, + { + "epoch": 1.6493087557603685, + "grad_norm": 1.4068842390673124, + "learning_rate": 1.6366800713761364e-07, + "loss": 0.8060640096664429, + "step": 7158 + }, + { + "epoch": 1.6495391705069125, + "grad_norm": 1.2074799471388065, + "learning_rate": 1.6345920362310894e-07, + "loss": 0.8477619886398315, + "step": 7159 + }, + { + "epoch": 1.6497695852534562, + "grad_norm": 1.332601091577715, + "learning_rate": 1.6325052153139329e-07, + "loss": 0.9793992638587952, + "step": 7160 + }, + { + "epoch": 1.65, + "grad_norm": 1.1909988829986036, + "learning_rate": 1.6304196089275658e-07, + "loss": 0.8020002245903015, + "step": 7161 + }, + { + "epoch": 1.6502304147465439, + "grad_norm": 1.3231428787162685, + "learning_rate": 1.6283352173747146e-07, + "loss": 0.8226429224014282, + "step": 7162 + }, + { + "epoch": 1.6504608294930876, + "grad_norm": 1.2483952861501775, + "learning_rate": 1.6262520409579227e-07, + "loss": 0.7029248476028442, + "step": 7163 + }, + { + "epoch": 1.6506912442396313, + "grad_norm": 1.0969129808942812, + "learning_rate": 1.6241700799795631e-07, + "loss": 0.7234015464782715, + "step": 7164 + }, + { + "epoch": 1.6509216589861753, + "grad_norm": 1.3383637969539028, + "learning_rate": 1.6220893347418285e-07, + "loss": 0.854112982749939, + "step": 7165 + }, + { + "epoch": 1.6511520737327188, + "grad_norm": 1.2277405230752314, + "learning_rate": 1.6200098055467325e-07, + "loss": 0.8098663091659546, + "step": 7166 + }, + { + "epoch": 1.6513824884792627, + "grad_norm": 1.286099874995443, + "learning_rate": 1.617931492696123e-07, + "loss": 0.9032876491546631, + "step": 7167 + }, + { + "epoch": 1.6516129032258065, + "grad_norm": 1.0239384348378415, + "learning_rate": 1.6158543964916606e-07, + "loss": 0.7048916816711426, + "step": 7168 + }, + { + "epoch": 1.6518433179723502, + "grad_norm": 1.2354879671689736, + "learning_rate": 1.6137785172348307e-07, + "loss": 0.879542350769043, + "step": 7169 + }, + { + "epoch": 1.6520737327188941, + "grad_norm": 1.1499858637392877, + "learning_rate": 1.611703855226949e-07, + "loss": 0.7851279377937317, + "step": 7170 + }, + { + "epoch": 1.6523041474654376, + "grad_norm": 1.3219595195357319, + "learning_rate": 1.6096304107691493e-07, + "loss": 0.779682457447052, + "step": 7171 + }, + { + "epoch": 1.6525345622119816, + "grad_norm": 1.2160096597693908, + "learning_rate": 1.6075581841623854e-07, + "loss": 0.7761027812957764, + "step": 7172 + }, + { + "epoch": 1.6527649769585253, + "grad_norm": 1.2474814185415584, + "learning_rate": 1.605487175707443e-07, + "loss": 0.726230263710022, + "step": 7173 + }, + { + "epoch": 1.652995391705069, + "grad_norm": 1.4211290590725025, + "learning_rate": 1.6034173857049238e-07, + "loss": 0.915956437587738, + "step": 7174 + }, + { + "epoch": 1.653225806451613, + "grad_norm": 1.2631109729400856, + "learning_rate": 1.6013488144552534e-07, + "loss": 0.8435969352722168, + "step": 7175 + }, + { + "epoch": 1.6534562211981567, + "grad_norm": 1.4370024530537882, + "learning_rate": 1.599281462258687e-07, + "loss": 0.7775791883468628, + "step": 7176 + }, + { + "epoch": 1.6536866359447004, + "grad_norm": 1.2504716465033257, + "learning_rate": 1.5972153294152945e-07, + "loss": 0.7578383684158325, + "step": 7177 + }, + { + "epoch": 1.6539170506912444, + "grad_norm": 1.25108951979748, + "learning_rate": 1.5951504162249706e-07, + "loss": 0.8378545045852661, + "step": 7178 + }, + { + "epoch": 1.654147465437788, + "grad_norm": 0.8833465476140244, + "learning_rate": 1.59308672298744e-07, + "loss": 0.7071488499641418, + "step": 7179 + }, + { + "epoch": 1.6543778801843319, + "grad_norm": 1.315489910714214, + "learning_rate": 1.591024250002243e-07, + "loss": 0.7424521446228027, + "step": 7180 + }, + { + "epoch": 1.6546082949308756, + "grad_norm": 1.2002526550771535, + "learning_rate": 1.5889629975687401e-07, + "loss": 0.6503180265426636, + "step": 7181 + }, + { + "epoch": 1.6548387096774193, + "grad_norm": 1.1861762089682637, + "learning_rate": 1.5869029659861265e-07, + "loss": 0.7589888572692871, + "step": 7182 + }, + { + "epoch": 1.6550691244239633, + "grad_norm": 1.2877948406073703, + "learning_rate": 1.5848441555534109e-07, + "loss": 0.7609498500823975, + "step": 7183 + }, + { + "epoch": 1.6552995391705068, + "grad_norm": 1.1756552735153392, + "learning_rate": 1.582786566569425e-07, + "loss": 0.7813476324081421, + "step": 7184 + }, + { + "epoch": 1.6555299539170507, + "grad_norm": 1.1595327374780875, + "learning_rate": 1.5807301993328258e-07, + "loss": 0.7386292219161987, + "step": 7185 + }, + { + "epoch": 1.6557603686635944, + "grad_norm": 1.4106740697965885, + "learning_rate": 1.5786750541420922e-07, + "loss": 1.0402865409851074, + "step": 7186 + }, + { + "epoch": 1.6559907834101382, + "grad_norm": 1.071897744375966, + "learning_rate": 1.5766211312955246e-07, + "loss": 0.7375132441520691, + "step": 7187 + }, + { + "epoch": 1.6562211981566821, + "grad_norm": 1.3721197645813625, + "learning_rate": 1.574568431091251e-07, + "loss": 0.7903615236282349, + "step": 7188 + }, + { + "epoch": 1.6564516129032258, + "grad_norm": 1.1205445704505106, + "learning_rate": 1.5725169538272132e-07, + "loss": 0.6912896633148193, + "step": 7189 + }, + { + "epoch": 1.6566820276497696, + "grad_norm": 1.2659829320834666, + "learning_rate": 1.570466699801185e-07, + "loss": 0.7181826233863831, + "step": 7190 + }, + { + "epoch": 1.6569124423963135, + "grad_norm": 1.3941328099536103, + "learning_rate": 1.5684176693107566e-07, + "loss": 0.8328898549079895, + "step": 7191 + }, + { + "epoch": 1.657142857142857, + "grad_norm": 1.275566962551196, + "learning_rate": 1.5663698626533384e-07, + "loss": 0.7775120735168457, + "step": 7192 + }, + { + "epoch": 1.657373271889401, + "grad_norm": 1.3683527646177032, + "learning_rate": 1.564323280126173e-07, + "loss": 0.8412137031555176, + "step": 7193 + }, + { + "epoch": 1.6576036866359447, + "grad_norm": 1.4192183215515342, + "learning_rate": 1.562277922026316e-07, + "loss": 0.7046825885772705, + "step": 7194 + }, + { + "epoch": 1.6578341013824884, + "grad_norm": 1.3386632639806328, + "learning_rate": 1.5602337886506468e-07, + "loss": 0.7107498645782471, + "step": 7195 + }, + { + "epoch": 1.6580645161290324, + "grad_norm": 1.1946522893092928, + "learning_rate": 1.558190880295872e-07, + "loss": 0.640724778175354, + "step": 7196 + }, + { + "epoch": 1.658294930875576, + "grad_norm": 1.3093502483074915, + "learning_rate": 1.556149197258515e-07, + "loss": 0.7856858968734741, + "step": 7197 + }, + { + "epoch": 1.6585253456221198, + "grad_norm": 1.4971129714340625, + "learning_rate": 1.554108739834923e-07, + "loss": 0.7956376075744629, + "step": 7198 + }, + { + "epoch": 1.6587557603686636, + "grad_norm": 1.2753834260169075, + "learning_rate": 1.5520695083212675e-07, + "loss": 0.721325159072876, + "step": 7199 + }, + { + "epoch": 1.6589861751152073, + "grad_norm": 1.060032555829029, + "learning_rate": 1.550031503013539e-07, + "loss": 0.7043335437774658, + "step": 7200 + }, + { + "epoch": 1.6592165898617512, + "grad_norm": 1.2269468216437214, + "learning_rate": 1.5479947242075496e-07, + "loss": 0.7154408693313599, + "step": 7201 + }, + { + "epoch": 1.659447004608295, + "grad_norm": 1.0598234159957265, + "learning_rate": 1.5459591721989397e-07, + "loss": 0.7353748083114624, + "step": 7202 + }, + { + "epoch": 1.6596774193548387, + "grad_norm": 1.1815091781809732, + "learning_rate": 1.5439248472831644e-07, + "loss": 0.7404372692108154, + "step": 7203 + }, + { + "epoch": 1.6599078341013827, + "grad_norm": 1.7521749620198364, + "learning_rate": 1.541891749755503e-07, + "loss": 0.8678613305091858, + "step": 7204 + }, + { + "epoch": 1.6601382488479262, + "grad_norm": 1.2663476960491773, + "learning_rate": 1.5398598799110562e-07, + "loss": 0.7177796363830566, + "step": 7205 + }, + { + "epoch": 1.66036866359447, + "grad_norm": 1.3475911636796425, + "learning_rate": 1.537829238044749e-07, + "loss": 0.7610895037651062, + "step": 7206 + }, + { + "epoch": 1.6605990783410138, + "grad_norm": 1.355013126121341, + "learning_rate": 1.5357998244513227e-07, + "loss": 0.7340127825737, + "step": 7207 + }, + { + "epoch": 1.6608294930875576, + "grad_norm": 1.2008253519594887, + "learning_rate": 1.5337716394253498e-07, + "loss": 0.7060200572013855, + "step": 7208 + }, + { + "epoch": 1.6610599078341015, + "grad_norm": 1.306554098336219, + "learning_rate": 1.5317446832612147e-07, + "loss": 0.8592087030410767, + "step": 7209 + }, + { + "epoch": 1.661290322580645, + "grad_norm": 1.1630740877062444, + "learning_rate": 1.5297189562531264e-07, + "loss": 0.8687897324562073, + "step": 7210 + }, + { + "epoch": 1.661520737327189, + "grad_norm": 1.346256802747815, + "learning_rate": 1.5276944586951202e-07, + "loss": 0.8158563375473022, + "step": 7211 + }, + { + "epoch": 1.6617511520737327, + "grad_norm": 1.2436624388230366, + "learning_rate": 1.5256711908810482e-07, + "loss": 0.7734059691429138, + "step": 7212 + }, + { + "epoch": 1.6619815668202764, + "grad_norm": 1.4006583359216147, + "learning_rate": 1.5236491531045815e-07, + "loss": 0.8302994966506958, + "step": 7213 + }, + { + "epoch": 1.6622119815668204, + "grad_norm": 1.3250021353738068, + "learning_rate": 1.5216283456592216e-07, + "loss": 0.8474830389022827, + "step": 7214 + }, + { + "epoch": 1.662442396313364, + "grad_norm": 1.130266104375724, + "learning_rate": 1.5196087688382808e-07, + "loss": 0.7903469800949097, + "step": 7215 + }, + { + "epoch": 1.6626728110599078, + "grad_norm": 1.1131412296095682, + "learning_rate": 1.5175904229349035e-07, + "loss": 0.7756912708282471, + "step": 7216 + }, + { + "epoch": 1.6629032258064518, + "grad_norm": 1.4164367883683733, + "learning_rate": 1.5155733082420463e-07, + "loss": 0.7495905756950378, + "step": 7217 + }, + { + "epoch": 1.6631336405529953, + "grad_norm": 1.3394708776746769, + "learning_rate": 1.5135574250524897e-07, + "loss": 0.8536649942398071, + "step": 7218 + }, + { + "epoch": 1.6633640552995392, + "grad_norm": 1.3243776315844114, + "learning_rate": 1.5115427736588404e-07, + "loss": 0.7301580905914307, + "step": 7219 + }, + { + "epoch": 1.663594470046083, + "grad_norm": 1.324768351380299, + "learning_rate": 1.5095293543535203e-07, + "loss": 0.7131164073944092, + "step": 7220 + }, + { + "epoch": 1.6638248847926267, + "grad_norm": 1.0897989875613177, + "learning_rate": 1.5075171674287712e-07, + "loss": 0.708457350730896, + "step": 7221 + }, + { + "epoch": 1.6640552995391706, + "grad_norm": 1.402833248483696, + "learning_rate": 1.5055062131766662e-07, + "loss": 0.7509758472442627, + "step": 7222 + }, + { + "epoch": 1.6642857142857141, + "grad_norm": 1.1455053593625757, + "learning_rate": 1.503496491889089e-07, + "loss": 0.8401786088943481, + "step": 7223 + }, + { + "epoch": 1.664516129032258, + "grad_norm": 1.3755379329147759, + "learning_rate": 1.5014880038577482e-07, + "loss": 0.8578320741653442, + "step": 7224 + }, + { + "epoch": 1.6647465437788018, + "grad_norm": 1.0530962657504686, + "learning_rate": 1.4994807493741723e-07, + "loss": 0.6890276670455933, + "step": 7225 + }, + { + "epoch": 1.6649769585253456, + "grad_norm": 1.1705604667481366, + "learning_rate": 1.4974747287297128e-07, + "loss": 0.785246729850769, + "step": 7226 + }, + { + "epoch": 1.6652073732718895, + "grad_norm": 1.1145207566800768, + "learning_rate": 1.4954699422155382e-07, + "loss": 0.7826062440872192, + "step": 7227 + }, + { + "epoch": 1.6654377880184332, + "grad_norm": 1.392497287743248, + "learning_rate": 1.4934663901226452e-07, + "loss": 0.807513952255249, + "step": 7228 + }, + { + "epoch": 1.665668202764977, + "grad_norm": 1.0951466978132682, + "learning_rate": 1.4914640727418448e-07, + "loss": 0.8138872385025024, + "step": 7229 + }, + { + "epoch": 1.6658986175115207, + "grad_norm": 1.0721150835685114, + "learning_rate": 1.489462990363768e-07, + "loss": 0.8465121984481812, + "step": 7230 + }, + { + "epoch": 1.6661290322580644, + "grad_norm": 1.2125852838751665, + "learning_rate": 1.4874631432788743e-07, + "loss": 0.7649251222610474, + "step": 7231 + }, + { + "epoch": 1.6663594470046084, + "grad_norm": 1.242983952838099, + "learning_rate": 1.485464531777436e-07, + "loss": 0.8297271132469177, + "step": 7232 + }, + { + "epoch": 1.666589861751152, + "grad_norm": 1.4592304164798606, + "learning_rate": 1.483467156149546e-07, + "loss": 0.7873194217681885, + "step": 7233 + }, + { + "epoch": 1.6668202764976958, + "grad_norm": 1.1529440121296932, + "learning_rate": 1.4814710166851274e-07, + "loss": 0.6924761533737183, + "step": 7234 + }, + { + "epoch": 1.6670506912442398, + "grad_norm": 0.9776015930659686, + "learning_rate": 1.4794761136739132e-07, + "loss": 0.6600887179374695, + "step": 7235 + }, + { + "epoch": 1.6672811059907833, + "grad_norm": 1.0700715817274216, + "learning_rate": 1.477482447405458e-07, + "loss": 0.6552041172981262, + "step": 7236 + }, + { + "epoch": 1.6675115207373272, + "grad_norm": 1.1844260959064823, + "learning_rate": 1.4754900181691465e-07, + "loss": 0.8609327077865601, + "step": 7237 + }, + { + "epoch": 1.667741935483871, + "grad_norm": 0.9877698580103615, + "learning_rate": 1.4734988262541726e-07, + "loss": 0.6970123052597046, + "step": 7238 + }, + { + "epoch": 1.6679723502304147, + "grad_norm": 1.1422057607025191, + "learning_rate": 1.4715088719495573e-07, + "loss": 0.7859683036804199, + "step": 7239 + }, + { + "epoch": 1.6682027649769586, + "grad_norm": 1.102405207717508, + "learning_rate": 1.4695201555441393e-07, + "loss": 0.7448029518127441, + "step": 7240 + }, + { + "epoch": 1.6684331797235024, + "grad_norm": 1.136418636365662, + "learning_rate": 1.4675326773265762e-07, + "loss": 0.7566728591918945, + "step": 7241 + }, + { + "epoch": 1.668663594470046, + "grad_norm": 1.183347797545015, + "learning_rate": 1.465546437585351e-07, + "loss": 0.7563366889953613, + "step": 7242 + }, + { + "epoch": 1.6688940092165898, + "grad_norm": 1.2270668729431573, + "learning_rate": 1.4635614366087623e-07, + "loss": 0.8580834865570068, + "step": 7243 + }, + { + "epoch": 1.6691244239631335, + "grad_norm": 1.261588467565845, + "learning_rate": 1.4615776746849306e-07, + "loss": 0.6200178861618042, + "step": 7244 + }, + { + "epoch": 1.6693548387096775, + "grad_norm": 1.12353329539602, + "learning_rate": 1.4595951521017958e-07, + "loss": 0.8052491545677185, + "step": 7245 + }, + { + "epoch": 1.6695852534562212, + "grad_norm": 1.7485044689788691, + "learning_rate": 1.4576138691471186e-07, + "loss": 0.7383530735969543, + "step": 7246 + }, + { + "epoch": 1.669815668202765, + "grad_norm": 1.2061617795996018, + "learning_rate": 1.4556338261084776e-07, + "loss": 0.6735742092132568, + "step": 7247 + }, + { + "epoch": 1.670046082949309, + "grad_norm": 1.1671720957777614, + "learning_rate": 1.453655023273277e-07, + "loss": 0.7570016980171204, + "step": 7248 + }, + { + "epoch": 1.6702764976958524, + "grad_norm": 1.1212050061324152, + "learning_rate": 1.4516774609287364e-07, + "loss": 0.7271980047225952, + "step": 7249 + }, + { + "epoch": 1.6705069124423964, + "grad_norm": 1.3773952001351246, + "learning_rate": 1.449701139361894e-07, + "loss": 0.8567354083061218, + "step": 7250 + }, + { + "epoch": 1.67073732718894, + "grad_norm": 1.4372041287717652, + "learning_rate": 1.447726058859614e-07, + "loss": 0.8675428628921509, + "step": 7251 + }, + { + "epoch": 1.6709677419354838, + "grad_norm": 1.6475511282046704, + "learning_rate": 1.4457522197085748e-07, + "loss": 0.9131098389625549, + "step": 7252 + }, + { + "epoch": 1.6711981566820278, + "grad_norm": 0.9228526790942371, + "learning_rate": 1.4437796221952748e-07, + "loss": 0.7921037673950195, + "step": 7253 + }, + { + "epoch": 1.6714285714285713, + "grad_norm": 1.3314958050470875, + "learning_rate": 1.441808266606037e-07, + "loss": 0.7559863328933716, + "step": 7254 + }, + { + "epoch": 1.6716589861751152, + "grad_norm": 1.4253402064070324, + "learning_rate": 1.4398381532269998e-07, + "loss": 0.7433857917785645, + "step": 7255 + }, + { + "epoch": 1.671889400921659, + "grad_norm": 1.340982715064525, + "learning_rate": 1.4378692823441207e-07, + "loss": 0.8171184062957764, + "step": 7256 + }, + { + "epoch": 1.6721198156682027, + "grad_norm": 1.4295893582001031, + "learning_rate": 1.4359016542431824e-07, + "loss": 0.7296291589736938, + "step": 7257 + }, + { + "epoch": 1.6723502304147466, + "grad_norm": 1.1566282275472088, + "learning_rate": 1.4339352692097828e-07, + "loss": 0.7397829294204712, + "step": 7258 + }, + { + "epoch": 1.6725806451612903, + "grad_norm": 1.1030928795639288, + "learning_rate": 1.431970127529335e-07, + "loss": 0.6724194884300232, + "step": 7259 + }, + { + "epoch": 1.672811059907834, + "grad_norm": 1.266832602935082, + "learning_rate": 1.430006229487084e-07, + "loss": 0.7711449861526489, + "step": 7260 + }, + { + "epoch": 1.673041474654378, + "grad_norm": 1.0334522746934713, + "learning_rate": 1.428043575368083e-07, + "loss": 0.7581815719604492, + "step": 7261 + }, + { + "epoch": 1.6732718894009215, + "grad_norm": 1.2775574658714877, + "learning_rate": 1.4260821654572063e-07, + "loss": 0.7092517614364624, + "step": 7262 + }, + { + "epoch": 1.6735023041474655, + "grad_norm": 1.116987885688497, + "learning_rate": 1.4241220000391562e-07, + "loss": 0.646745502948761, + "step": 7263 + }, + { + "epoch": 1.6737327188940092, + "grad_norm": 1.0897996116307995, + "learning_rate": 1.4221630793984453e-07, + "loss": 0.7364122867584229, + "step": 7264 + }, + { + "epoch": 1.673963133640553, + "grad_norm": 1.0366138580080708, + "learning_rate": 1.4202054038194068e-07, + "loss": 0.8186795711517334, + "step": 7265 + }, + { + "epoch": 1.6741935483870969, + "grad_norm": 1.178861697439358, + "learning_rate": 1.4182489735861957e-07, + "loss": 0.7172378301620483, + "step": 7266 + }, + { + "epoch": 1.6744239631336404, + "grad_norm": 1.6433299949580555, + "learning_rate": 1.416293788982783e-07, + "loss": 0.8780974745750427, + "step": 7267 + }, + { + "epoch": 1.6746543778801843, + "grad_norm": 1.303060213158533, + "learning_rate": 1.4143398502929672e-07, + "loss": 0.9034930467605591, + "step": 7268 + }, + { + "epoch": 1.674884792626728, + "grad_norm": 1.283952582595571, + "learning_rate": 1.4123871578003543e-07, + "loss": 0.7994415760040283, + "step": 7269 + }, + { + "epoch": 1.6751152073732718, + "grad_norm": 1.2332939563797212, + "learning_rate": 1.410435711788376e-07, + "loss": 0.8327854871749878, + "step": 7270 + }, + { + "epoch": 1.6753456221198157, + "grad_norm": 1.3516689374751454, + "learning_rate": 1.408485512540285e-07, + "loss": 0.7667550444602966, + "step": 7271 + }, + { + "epoch": 1.6755760368663595, + "grad_norm": 1.3721126007283877, + "learning_rate": 1.4065365603391478e-07, + "loss": 0.8073924779891968, + "step": 7272 + }, + { + "epoch": 1.6758064516129032, + "grad_norm": 1.2537292403097655, + "learning_rate": 1.4045888554678497e-07, + "loss": 0.7265589237213135, + "step": 7273 + }, + { + "epoch": 1.6760368663594472, + "grad_norm": 1.4008103355507637, + "learning_rate": 1.402642398209104e-07, + "loss": 0.6912035942077637, + "step": 7274 + }, + { + "epoch": 1.6762672811059907, + "grad_norm": 1.4159985968960598, + "learning_rate": 1.400697188845432e-07, + "loss": 0.917953372001648, + "step": 7275 + }, + { + "epoch": 1.6764976958525346, + "grad_norm": 1.1092123664048492, + "learning_rate": 1.3987532276591774e-07, + "loss": 0.6989340782165527, + "step": 7276 + }, + { + "epoch": 1.6767281105990783, + "grad_norm": 1.0530722269060104, + "learning_rate": 1.396810514932507e-07, + "loss": 0.6648346185684204, + "step": 7277 + }, + { + "epoch": 1.676958525345622, + "grad_norm": 1.152242717428616, + "learning_rate": 1.3948690509474014e-07, + "loss": 0.6462730169296265, + "step": 7278 + }, + { + "epoch": 1.677188940092166, + "grad_norm": 1.0559078213581141, + "learning_rate": 1.3929288359856584e-07, + "loss": 0.6084051132202148, + "step": 7279 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 1.2568155531692753, + "learning_rate": 1.3909898703289037e-07, + "loss": 0.8593035936355591, + "step": 7280 + }, + { + "epoch": 1.6776497695852535, + "grad_norm": 1.432799112874992, + "learning_rate": 1.389052154258572e-07, + "loss": 0.8064925670623779, + "step": 7281 + }, + { + "epoch": 1.6778801843317972, + "grad_norm": 1.3257643730794528, + "learning_rate": 1.3871156880559186e-07, + "loss": 0.7366064786911011, + "step": 7282 + }, + { + "epoch": 1.678110599078341, + "grad_norm": 1.4541745835743052, + "learning_rate": 1.3851804720020233e-07, + "loss": 0.8090124726295471, + "step": 7283 + }, + { + "epoch": 1.6783410138248849, + "grad_norm": 1.3768572400260246, + "learning_rate": 1.3832465063777787e-07, + "loss": 0.7326936721801758, + "step": 7284 + }, + { + "epoch": 1.6785714285714286, + "grad_norm": 1.1036181265329146, + "learning_rate": 1.3813137914638961e-07, + "loss": 0.7142004370689392, + "step": 7285 + }, + { + "epoch": 1.6788018433179723, + "grad_norm": 1.1850699819171153, + "learning_rate": 1.3793823275409066e-07, + "loss": 0.8358181715011597, + "step": 7286 + }, + { + "epoch": 1.6790322580645163, + "grad_norm": 1.341055264970921, + "learning_rate": 1.3774521148891583e-07, + "loss": 0.7337081432342529, + "step": 7287 + }, + { + "epoch": 1.6792626728110598, + "grad_norm": 1.079298746666331, + "learning_rate": 1.3755231537888222e-07, + "loss": 0.8029334545135498, + "step": 7288 + }, + { + "epoch": 1.6794930875576037, + "grad_norm": 1.1362422930327392, + "learning_rate": 1.373595444519884e-07, + "loss": 0.8132611513137817, + "step": 7289 + }, + { + "epoch": 1.6797235023041475, + "grad_norm": 1.2850987320352512, + "learning_rate": 1.3716689873621446e-07, + "loss": 0.7377278804779053, + "step": 7290 + }, + { + "epoch": 1.6799539170506912, + "grad_norm": 1.5545938019119256, + "learning_rate": 1.3697437825952307e-07, + "loss": 0.788368284702301, + "step": 7291 + }, + { + "epoch": 1.6801843317972351, + "grad_norm": 1.3811107908360538, + "learning_rate": 1.3678198304985822e-07, + "loss": 0.8288586139678955, + "step": 7292 + }, + { + "epoch": 1.6804147465437786, + "grad_norm": 1.2973962244733976, + "learning_rate": 1.3658971313514567e-07, + "loss": 0.8534054160118103, + "step": 7293 + }, + { + "epoch": 1.6806451612903226, + "grad_norm": 1.261356018830994, + "learning_rate": 1.363975685432933e-07, + "loss": 0.8730596303939819, + "step": 7294 + }, + { + "epoch": 1.6808755760368663, + "grad_norm": 1.2262296688166254, + "learning_rate": 1.3620554930219076e-07, + "loss": 0.6891343593597412, + "step": 7295 + }, + { + "epoch": 1.68110599078341, + "grad_norm": 1.4944659665191207, + "learning_rate": 1.360136554397089e-07, + "loss": 0.8575270175933838, + "step": 7296 + }, + { + "epoch": 1.681336405529954, + "grad_norm": 1.1221716147697696, + "learning_rate": 1.3582188698370134e-07, + "loss": 0.82694011926651, + "step": 7297 + }, + { + "epoch": 1.6815668202764977, + "grad_norm": 1.1921152491764102, + "learning_rate": 1.3563024396200296e-07, + "loss": 0.6468113660812378, + "step": 7298 + }, + { + "epoch": 1.6817972350230415, + "grad_norm": 1.1634380991195066, + "learning_rate": 1.3543872640243016e-07, + "loss": 0.6818577647209167, + "step": 7299 + }, + { + "epoch": 1.6820276497695854, + "grad_norm": 1.262155726089824, + "learning_rate": 1.352473343327819e-07, + "loss": 0.7630767822265625, + "step": 7300 + }, + { + "epoch": 1.682258064516129, + "grad_norm": 1.3348546512512276, + "learning_rate": 1.3505606778083832e-07, + "loss": 0.9019678831100464, + "step": 7301 + }, + { + "epoch": 1.6824884792626729, + "grad_norm": 1.1302876731614566, + "learning_rate": 1.3486492677436123e-07, + "loss": 0.821324348449707, + "step": 7302 + }, + { + "epoch": 1.6827188940092166, + "grad_norm": 1.1997119452659193, + "learning_rate": 1.3467391134109495e-07, + "loss": 0.796151876449585, + "step": 7303 + }, + { + "epoch": 1.6829493087557603, + "grad_norm": 1.298615109914031, + "learning_rate": 1.3448302150876488e-07, + "loss": 0.8020445108413696, + "step": 7304 + }, + { + "epoch": 1.6831797235023043, + "grad_norm": 0.9490183941784253, + "learning_rate": 1.3429225730507843e-07, + "loss": 0.7215749025344849, + "step": 7305 + }, + { + "epoch": 1.6834101382488478, + "grad_norm": 1.2708231250445967, + "learning_rate": 1.3410161875772474e-07, + "loss": 0.920941174030304, + "step": 7306 + }, + { + "epoch": 1.6836405529953917, + "grad_norm": 1.4523260098562263, + "learning_rate": 1.3391110589437494e-07, + "loss": 0.8979494571685791, + "step": 7307 + }, + { + "epoch": 1.6838709677419355, + "grad_norm": 1.3126261706157987, + "learning_rate": 1.337207187426812e-07, + "loss": 0.9125145673751831, + "step": 7308 + }, + { + "epoch": 1.6841013824884792, + "grad_norm": 1.1179697975279568, + "learning_rate": 1.3353045733027858e-07, + "loss": 0.8205714225769043, + "step": 7309 + }, + { + "epoch": 1.6843317972350231, + "grad_norm": 1.0993805126125902, + "learning_rate": 1.3334032168478305e-07, + "loss": 0.6914113759994507, + "step": 7310 + }, + { + "epoch": 1.6845622119815669, + "grad_norm": 1.3165472089957067, + "learning_rate": 1.3315031183379233e-07, + "loss": 0.7355014085769653, + "step": 7311 + }, + { + "epoch": 1.6847926267281106, + "grad_norm": 1.3581792517836289, + "learning_rate": 1.3296042780488637e-07, + "loss": 0.7564182281494141, + "step": 7312 + }, + { + "epoch": 1.6850230414746545, + "grad_norm": 1.197316556809727, + "learning_rate": 1.3277066962562643e-07, + "loss": 0.8091372847557068, + "step": 7313 + }, + { + "epoch": 1.685253456221198, + "grad_norm": 1.131878643977171, + "learning_rate": 1.3258103732355586e-07, + "loss": 0.7457877993583679, + "step": 7314 + }, + { + "epoch": 1.685483870967742, + "grad_norm": 1.2462081986852567, + "learning_rate": 1.3239153092619948e-07, + "loss": 0.861819863319397, + "step": 7315 + }, + { + "epoch": 1.6857142857142857, + "grad_norm": 1.2291218741883772, + "learning_rate": 1.3220215046106353e-07, + "loss": 0.7698357105255127, + "step": 7316 + }, + { + "epoch": 1.6859447004608294, + "grad_norm": 1.2862793081172317, + "learning_rate": 1.320128959556369e-07, + "loss": 0.7889456152915955, + "step": 7317 + }, + { + "epoch": 1.6861751152073734, + "grad_norm": 1.0926817497008894, + "learning_rate": 1.3182376743738932e-07, + "loss": 0.6467938423156738, + "step": 7318 + }, + { + "epoch": 1.686405529953917, + "grad_norm": 0.962046315570081, + "learning_rate": 1.3163476493377245e-07, + "loss": 0.7202441692352295, + "step": 7319 + }, + { + "epoch": 1.6866359447004609, + "grad_norm": 1.2860571238613498, + "learning_rate": 1.3144588847222004e-07, + "loss": 0.7464008331298828, + "step": 7320 + }, + { + "epoch": 1.6868663594470046, + "grad_norm": 1.3323127704795366, + "learning_rate": 1.3125713808014704e-07, + "loss": 0.8924611806869507, + "step": 7321 + }, + { + "epoch": 1.6870967741935483, + "grad_norm": 1.5027995023789942, + "learning_rate": 1.3106851378495044e-07, + "loss": 0.6943146586418152, + "step": 7322 + }, + { + "epoch": 1.6873271889400923, + "grad_norm": 1.336362656918588, + "learning_rate": 1.308800156140085e-07, + "loss": 0.7335963249206543, + "step": 7323 + }, + { + "epoch": 1.687557603686636, + "grad_norm": 1.1540515039280186, + "learning_rate": 1.30691643594682e-07, + "loss": 0.6900516748428345, + "step": 7324 + }, + { + "epoch": 1.6877880184331797, + "grad_norm": 1.0161083273097216, + "learning_rate": 1.3050339775431262e-07, + "loss": 0.7230286598205566, + "step": 7325 + }, + { + "epoch": 1.6880184331797237, + "grad_norm": 1.3577939883495977, + "learning_rate": 1.3031527812022403e-07, + "loss": 0.8069840669631958, + "step": 7326 + }, + { + "epoch": 1.6882488479262672, + "grad_norm": 1.1850570268151976, + "learning_rate": 1.3012728471972134e-07, + "loss": 0.7598710060119629, + "step": 7327 + }, + { + "epoch": 1.6884792626728111, + "grad_norm": 1.1081098309526143, + "learning_rate": 1.2993941758009164e-07, + "loss": 0.6817609071731567, + "step": 7328 + }, + { + "epoch": 1.6887096774193548, + "grad_norm": 1.1578322948538884, + "learning_rate": 1.2975167672860387e-07, + "loss": 0.6958975791931152, + "step": 7329 + }, + { + "epoch": 1.6889400921658986, + "grad_norm": 1.3026010781309694, + "learning_rate": 1.2956406219250814e-07, + "loss": 0.8270853757858276, + "step": 7330 + }, + { + "epoch": 1.6891705069124425, + "grad_norm": 1.2716142402347783, + "learning_rate": 1.2937657399903623e-07, + "loss": 0.8045610189437866, + "step": 7331 + }, + { + "epoch": 1.689400921658986, + "grad_norm": 1.3670021400758372, + "learning_rate": 1.2918921217540224e-07, + "loss": 0.6685627698898315, + "step": 7332 + }, + { + "epoch": 1.68963133640553, + "grad_norm": 1.481483528763015, + "learning_rate": 1.2900197674880142e-07, + "loss": 0.8157398700714111, + "step": 7333 + }, + { + "epoch": 1.6898617511520737, + "grad_norm": 1.1922253618562, + "learning_rate": 1.2881486774641025e-07, + "loss": 0.6142218112945557, + "step": 7334 + }, + { + "epoch": 1.6900921658986174, + "grad_norm": 1.2611165552955415, + "learning_rate": 1.2862788519538815e-07, + "loss": 0.7849327921867371, + "step": 7335 + }, + { + "epoch": 1.6903225806451614, + "grad_norm": 1.3074701765125263, + "learning_rate": 1.2844102912287457e-07, + "loss": 0.8035926818847656, + "step": 7336 + }, + { + "epoch": 1.6905529953917051, + "grad_norm": 1.26449405816571, + "learning_rate": 1.2825429955599209e-07, + "loss": 0.8456575870513916, + "step": 7337 + }, + { + "epoch": 1.6907834101382488, + "grad_norm": 1.0994096629111347, + "learning_rate": 1.2806769652184402e-07, + "loss": 0.7436026334762573, + "step": 7338 + }, + { + "epoch": 1.6910138248847926, + "grad_norm": 1.3946687886072922, + "learning_rate": 1.2788122004751522e-07, + "loss": 0.8315454721450806, + "step": 7339 + }, + { + "epoch": 1.6912442396313363, + "grad_norm": 1.1032652805797263, + "learning_rate": 1.2769487016007307e-07, + "loss": 0.7425665855407715, + "step": 7340 + }, + { + "epoch": 1.6914746543778802, + "grad_norm": 1.210532059455236, + "learning_rate": 1.2750864688656572e-07, + "loss": 0.7899731993675232, + "step": 7341 + }, + { + "epoch": 1.691705069124424, + "grad_norm": 1.2339006903630358, + "learning_rate": 1.2732255025402327e-07, + "loss": 0.7637509703636169, + "step": 7342 + }, + { + "epoch": 1.6919354838709677, + "grad_norm": 1.2301886439270189, + "learning_rate": 1.2713658028945717e-07, + "loss": 0.793779730796814, + "step": 7343 + }, + { + "epoch": 1.6921658986175117, + "grad_norm": 1.2351914671209905, + "learning_rate": 1.2695073701986103e-07, + "loss": 0.7248083353042603, + "step": 7344 + }, + { + "epoch": 1.6923963133640552, + "grad_norm": 1.4318296651769333, + "learning_rate": 1.2676502047220973e-07, + "loss": 0.7506270408630371, + "step": 7345 + }, + { + "epoch": 1.692626728110599, + "grad_norm": 1.248314789497465, + "learning_rate": 1.2657943067345965e-07, + "loss": 0.7921839952468872, + "step": 7346 + }, + { + "epoch": 1.6928571428571428, + "grad_norm": 0.9630256947791611, + "learning_rate": 1.263939676505491e-07, + "loss": 0.7627893686294556, + "step": 7347 + }, + { + "epoch": 1.6930875576036866, + "grad_norm": 1.039168896728356, + "learning_rate": 1.262086314303973e-07, + "loss": 0.788955807685852, + "step": 7348 + }, + { + "epoch": 1.6933179723502305, + "grad_norm": 1.0370858136190912, + "learning_rate": 1.2602342203990612e-07, + "loss": 0.5527241826057434, + "step": 7349 + }, + { + "epoch": 1.6935483870967742, + "grad_norm": 1.344465363325951, + "learning_rate": 1.2583833950595825e-07, + "loss": 0.7324573397636414, + "step": 7350 + }, + { + "epoch": 1.693778801843318, + "grad_norm": 1.0731663336898336, + "learning_rate": 1.256533838554179e-07, + "loss": 0.6588207483291626, + "step": 7351 + }, + { + "epoch": 1.6940092165898617, + "grad_norm": 1.417078203000081, + "learning_rate": 1.2546855511513165e-07, + "loss": 0.7597184181213379, + "step": 7352 + }, + { + "epoch": 1.6942396313364054, + "grad_norm": 1.1748568881342167, + "learning_rate": 1.2528385331192692e-07, + "loss": 0.7487671375274658, + "step": 7353 + }, + { + "epoch": 1.6944700460829494, + "grad_norm": 1.0203340332958148, + "learning_rate": 1.250992784726126e-07, + "loss": 0.757739245891571, + "step": 7354 + }, + { + "epoch": 1.694700460829493, + "grad_norm": 1.314521719717035, + "learning_rate": 1.249148306239801e-07, + "loss": 0.616966724395752, + "step": 7355 + }, + { + "epoch": 1.6949308755760368, + "grad_norm": 1.506626916778979, + "learning_rate": 1.2473050979280142e-07, + "loss": 0.9415719509124756, + "step": 7356 + }, + { + "epoch": 1.6951612903225808, + "grad_norm": 1.0903568482188648, + "learning_rate": 1.2454631600583044e-07, + "loss": 0.7731447815895081, + "step": 7357 + }, + { + "epoch": 1.6953917050691243, + "grad_norm": 1.2821570786422227, + "learning_rate": 1.2436224928980276e-07, + "loss": 0.800236701965332, + "step": 7358 + }, + { + "epoch": 1.6956221198156682, + "grad_norm": 1.2900334463062004, + "learning_rate": 1.241783096714356e-07, + "loss": 0.8113845586776733, + "step": 7359 + }, + { + "epoch": 1.695852534562212, + "grad_norm": 1.2157051726485628, + "learning_rate": 1.2399449717742706e-07, + "loss": 0.748763382434845, + "step": 7360 + }, + { + "epoch": 1.6960829493087557, + "grad_norm": 1.3769466349570898, + "learning_rate": 1.2381081183445774e-07, + "loss": 0.8595450520515442, + "step": 7361 + }, + { + "epoch": 1.6963133640552996, + "grad_norm": 1.240341465296028, + "learning_rate": 1.2362725366918913e-07, + "loss": 0.7800960540771484, + "step": 7362 + }, + { + "epoch": 1.6965437788018434, + "grad_norm": 1.1951306648014712, + "learning_rate": 1.2344382270826438e-07, + "loss": 0.6549400687217712, + "step": 7363 + }, + { + "epoch": 1.696774193548387, + "grad_norm": 1.1182982438102955, + "learning_rate": 1.2326051897830858e-07, + "loss": 0.7839380502700806, + "step": 7364 + }, + { + "epoch": 1.6970046082949308, + "grad_norm": 1.2576690972053175, + "learning_rate": 1.230773425059277e-07, + "loss": 0.8436654806137085, + "step": 7365 + }, + { + "epoch": 1.6972350230414746, + "grad_norm": 0.8415515075804344, + "learning_rate": 1.2289429331770974e-07, + "loss": 0.6517987251281738, + "step": 7366 + }, + { + "epoch": 1.6974654377880185, + "grad_norm": 1.073572916121381, + "learning_rate": 1.2271137144022392e-07, + "loss": 0.7108355760574341, + "step": 7367 + }, + { + "epoch": 1.6976958525345622, + "grad_norm": 1.138464806776697, + "learning_rate": 1.2252857690002094e-07, + "loss": 0.7801471948623657, + "step": 7368 + }, + { + "epoch": 1.697926267281106, + "grad_norm": 0.9980466100193536, + "learning_rate": 1.2234590972363358e-07, + "loss": 0.8240209221839905, + "step": 7369 + }, + { + "epoch": 1.69815668202765, + "grad_norm": 1.5026485017018454, + "learning_rate": 1.2216336993757558e-07, + "loss": 0.8119853138923645, + "step": 7370 + }, + { + "epoch": 1.6983870967741934, + "grad_norm": 0.9448426506131885, + "learning_rate": 1.2198095756834216e-07, + "loss": 0.7685642838478088, + "step": 7371 + }, + { + "epoch": 1.6986175115207374, + "grad_norm": 1.1884615399125027, + "learning_rate": 1.217986726424106e-07, + "loss": 0.7820984125137329, + "step": 7372 + }, + { + "epoch": 1.698847926267281, + "grad_norm": 1.4933868054084445, + "learning_rate": 1.2161651518623916e-07, + "loss": 0.8051085472106934, + "step": 7373 + }, + { + "epoch": 1.6990783410138248, + "grad_norm": 1.16418962691877, + "learning_rate": 1.2143448522626742e-07, + "loss": 0.828999400138855, + "step": 7374 + }, + { + "epoch": 1.6993087557603688, + "grad_norm": 1.513005376638313, + "learning_rate": 1.2125258278891738e-07, + "loss": 0.8215579986572266, + "step": 7375 + }, + { + "epoch": 1.6995391705069123, + "grad_norm": 1.2614405602995598, + "learning_rate": 1.2107080790059156e-07, + "loss": 0.9362014532089233, + "step": 7376 + }, + { + "epoch": 1.6997695852534562, + "grad_norm": 1.014310262155135, + "learning_rate": 1.2088916058767428e-07, + "loss": 0.7789602279663086, + "step": 7377 + }, + { + "epoch": 1.7, + "grad_norm": 1.322797235291574, + "learning_rate": 1.2070764087653163e-07, + "loss": 0.8371152877807617, + "step": 7378 + }, + { + "epoch": 1.7002304147465437, + "grad_norm": 1.2225532720655308, + "learning_rate": 1.2052624879351103e-07, + "loss": 0.64423668384552, + "step": 7379 + }, + { + "epoch": 1.7004608294930876, + "grad_norm": 1.3442813905677369, + "learning_rate": 1.203449843649409e-07, + "loss": 0.7635257244110107, + "step": 7380 + }, + { + "epoch": 1.7006912442396314, + "grad_norm": 1.15010903043395, + "learning_rate": 1.2016384761713194e-07, + "loss": 0.7859230041503906, + "step": 7381 + }, + { + "epoch": 1.700921658986175, + "grad_norm": 1.0218637195871514, + "learning_rate": 1.199828385763757e-07, + "loss": 0.7066336870193481, + "step": 7382 + }, + { + "epoch": 1.701152073732719, + "grad_norm": 1.1069799499148123, + "learning_rate": 1.198019572689455e-07, + "loss": 0.7190531492233276, + "step": 7383 + }, + { + "epoch": 1.7013824884792625, + "grad_norm": 1.520158585759741, + "learning_rate": 1.1962120372109586e-07, + "loss": 0.7389136552810669, + "step": 7384 + }, + { + "epoch": 1.7016129032258065, + "grad_norm": 1.5406735409523549, + "learning_rate": 1.1944057795906316e-07, + "loss": 0.774425745010376, + "step": 7385 + }, + { + "epoch": 1.7018433179723502, + "grad_norm": 1.0093305285556118, + "learning_rate": 1.1926008000906484e-07, + "loss": 0.7566725015640259, + "step": 7386 + }, + { + "epoch": 1.702073732718894, + "grad_norm": 1.153413777620863, + "learning_rate": 1.1907970989729987e-07, + "loss": 0.6891475915908813, + "step": 7387 + }, + { + "epoch": 1.702304147465438, + "grad_norm": 1.08541401133235, + "learning_rate": 1.1889946764994873e-07, + "loss": 0.6188378930091858, + "step": 7388 + }, + { + "epoch": 1.7025345622119814, + "grad_norm": 1.1534210847497282, + "learning_rate": 1.1871935329317362e-07, + "loss": 0.703027069568634, + "step": 7389 + }, + { + "epoch": 1.7027649769585254, + "grad_norm": 1.2738888238498793, + "learning_rate": 1.1853936685311772e-07, + "loss": 0.9253139495849609, + "step": 7390 + }, + { + "epoch": 1.702995391705069, + "grad_norm": 1.015934424294919, + "learning_rate": 1.1835950835590569e-07, + "loss": 0.6504430770874023, + "step": 7391 + }, + { + "epoch": 1.7032258064516128, + "grad_norm": 1.0145240040509695, + "learning_rate": 1.18179777827644e-07, + "loss": 0.6656354665756226, + "step": 7392 + }, + { + "epoch": 1.7034562211981568, + "grad_norm": 1.451290987899464, + "learning_rate": 1.1800017529442019e-07, + "loss": 0.8534063100814819, + "step": 7393 + }, + { + "epoch": 1.7036866359447005, + "grad_norm": 1.1896366783409809, + "learning_rate": 1.178207007823031e-07, + "loss": 0.8315893411636353, + "step": 7394 + }, + { + "epoch": 1.7039170506912442, + "grad_norm": 1.1636407894423468, + "learning_rate": 1.1764135431734367e-07, + "loss": 0.8161677718162537, + "step": 7395 + }, + { + "epoch": 1.7041474654377882, + "grad_norm": 1.418011015190517, + "learning_rate": 1.1746213592557352e-07, + "loss": 0.7942687273025513, + "step": 7396 + }, + { + "epoch": 1.7043778801843317, + "grad_norm": 0.9938387819486493, + "learning_rate": 1.1728304563300584e-07, + "loss": 0.8056384325027466, + "step": 7397 + }, + { + "epoch": 1.7046082949308756, + "grad_norm": 1.3626759695428086, + "learning_rate": 1.1710408346563583e-07, + "loss": 0.8535007238388062, + "step": 7398 + }, + { + "epoch": 1.7048387096774194, + "grad_norm": 1.1491077351100174, + "learning_rate": 1.1692524944943916e-07, + "loss": 0.7729576826095581, + "step": 7399 + }, + { + "epoch": 1.705069124423963, + "grad_norm": 1.2729586784281095, + "learning_rate": 1.1674654361037328e-07, + "loss": 0.7755489349365234, + "step": 7400 + }, + { + "epoch": 1.705299539170507, + "grad_norm": 1.7008944920024607, + "learning_rate": 1.1656796597437757e-07, + "loss": 0.8752193450927734, + "step": 7401 + }, + { + "epoch": 1.7055299539170505, + "grad_norm": 1.0505715773863387, + "learning_rate": 1.1638951656737217e-07, + "loss": 0.7135917544364929, + "step": 7402 + }, + { + "epoch": 1.7057603686635945, + "grad_norm": 1.1807276735663779, + "learning_rate": 1.1621119541525859e-07, + "loss": 0.7378124594688416, + "step": 7403 + }, + { + "epoch": 1.7059907834101382, + "grad_norm": 1.1699041912496186, + "learning_rate": 1.1603300254391978e-07, + "loss": 0.637479305267334, + "step": 7404 + }, + { + "epoch": 1.706221198156682, + "grad_norm": 0.9107859734790176, + "learning_rate": 1.1585493797922075e-07, + "loss": 0.6162394881248474, + "step": 7405 + }, + { + "epoch": 1.706451612903226, + "grad_norm": 1.0832025296305532, + "learning_rate": 1.1567700174700701e-07, + "loss": 0.7836494445800781, + "step": 7406 + }, + { + "epoch": 1.7066820276497696, + "grad_norm": 1.3117851793296085, + "learning_rate": 1.154991938731057e-07, + "loss": 0.6297281980514526, + "step": 7407 + }, + { + "epoch": 1.7069124423963133, + "grad_norm": 0.9987358693502671, + "learning_rate": 1.1532151438332549e-07, + "loss": 0.7190115451812744, + "step": 7408 + }, + { + "epoch": 1.7071428571428573, + "grad_norm": 1.353324439932077, + "learning_rate": 1.151439633034561e-07, + "loss": 0.7578086853027344, + "step": 7409 + }, + { + "epoch": 1.7073732718894008, + "grad_norm": 0.986158496671175, + "learning_rate": 1.1496654065926925e-07, + "loss": 0.7347216010093689, + "step": 7410 + }, + { + "epoch": 1.7076036866359448, + "grad_norm": 1.2279759650694806, + "learning_rate": 1.1478924647651711e-07, + "loss": 0.7940168380737305, + "step": 7411 + }, + { + "epoch": 1.7078341013824885, + "grad_norm": 1.2336717780625897, + "learning_rate": 1.1461208078093431e-07, + "loss": 0.7625843286514282, + "step": 7412 + }, + { + "epoch": 1.7080645161290322, + "grad_norm": 1.5771280074431184, + "learning_rate": 1.1443504359823585e-07, + "loss": 0.7603492736816406, + "step": 7413 + }, + { + "epoch": 1.7082949308755762, + "grad_norm": 1.1263740749103024, + "learning_rate": 1.1425813495411817e-07, + "loss": 0.8746018409729004, + "step": 7414 + }, + { + "epoch": 1.7085253456221197, + "grad_norm": 1.2947959548271089, + "learning_rate": 1.1408135487425996e-07, + "loss": 0.72724449634552, + "step": 7415 + }, + { + "epoch": 1.7087557603686636, + "grad_norm": 0.794129708213959, + "learning_rate": 1.1390470338432023e-07, + "loss": 0.6874721646308899, + "step": 7416 + }, + { + "epoch": 1.7089861751152073, + "grad_norm": 0.9673124457868691, + "learning_rate": 1.1372818050993959e-07, + "loss": 0.7129265666007996, + "step": 7417 + }, + { + "epoch": 1.709216589861751, + "grad_norm": 1.3811139782005308, + "learning_rate": 1.1355178627674045e-07, + "loss": 0.7505607008934021, + "step": 7418 + }, + { + "epoch": 1.709447004608295, + "grad_norm": 1.1149863565678992, + "learning_rate": 1.1337552071032608e-07, + "loss": 0.7497769594192505, + "step": 7419 + }, + { + "epoch": 1.7096774193548387, + "grad_norm": 1.342673457996757, + "learning_rate": 1.1319938383628092e-07, + "loss": 0.792352020740509, + "step": 7420 + }, + { + "epoch": 1.7099078341013825, + "grad_norm": 1.1720516000619245, + "learning_rate": 1.1302337568017139e-07, + "loss": 0.780627965927124, + "step": 7421 + }, + { + "epoch": 1.7101382488479264, + "grad_norm": 1.2702279678670012, + "learning_rate": 1.1284749626754464e-07, + "loss": 0.7024368047714233, + "step": 7422 + }, + { + "epoch": 1.71036866359447, + "grad_norm": 1.2880158142162281, + "learning_rate": 1.1267174562392945e-07, + "loss": 0.756782591342926, + "step": 7423 + }, + { + "epoch": 1.7105990783410139, + "grad_norm": 1.2881350167706749, + "learning_rate": 1.1249612377483552e-07, + "loss": 0.8585456609725952, + "step": 7424 + }, + { + "epoch": 1.7108294930875576, + "grad_norm": 1.2079330064248406, + "learning_rate": 1.1232063074575449e-07, + "loss": 0.8610610961914062, + "step": 7425 + }, + { + "epoch": 1.7110599078341013, + "grad_norm": 1.2629835504337044, + "learning_rate": 1.1214526656215872e-07, + "loss": 0.7493829131126404, + "step": 7426 + }, + { + "epoch": 1.7112903225806453, + "grad_norm": 1.1677189056932475, + "learning_rate": 1.1197003124950222e-07, + "loss": 0.7479410171508789, + "step": 7427 + }, + { + "epoch": 1.7115207373271888, + "grad_norm": 1.2024881147733253, + "learning_rate": 1.1179492483322006e-07, + "loss": 0.8056051135063171, + "step": 7428 + }, + { + "epoch": 1.7117511520737327, + "grad_norm": 1.2393004464149642, + "learning_rate": 1.1161994733872848e-07, + "loss": 0.8448202610015869, + "step": 7429 + }, + { + "epoch": 1.7119815668202765, + "grad_norm": 1.3170634810384778, + "learning_rate": 1.1144509879142571e-07, + "loss": 0.7783033847808838, + "step": 7430 + }, + { + "epoch": 1.7122119815668202, + "grad_norm": 1.2589188548838177, + "learning_rate": 1.1127037921669058e-07, + "loss": 0.6591838598251343, + "step": 7431 + }, + { + "epoch": 1.7124423963133641, + "grad_norm": 1.4141951291447457, + "learning_rate": 1.1109578863988322e-07, + "loss": 0.8508287668228149, + "step": 7432 + }, + { + "epoch": 1.7126728110599079, + "grad_norm": 1.0110596601133535, + "learning_rate": 1.1092132708634549e-07, + "loss": 0.7981588840484619, + "step": 7433 + }, + { + "epoch": 1.7129032258064516, + "grad_norm": 1.1560054105611206, + "learning_rate": 1.1074699458140025e-07, + "loss": 0.7754761576652527, + "step": 7434 + }, + { + "epoch": 1.7131336405529956, + "grad_norm": 1.4234254723014017, + "learning_rate": 1.1057279115035124e-07, + "loss": 0.8487040996551514, + "step": 7435 + }, + { + "epoch": 1.713364055299539, + "grad_norm": 1.2105987237993454, + "learning_rate": 1.1039871681848433e-07, + "loss": 0.8175803422927856, + "step": 7436 + }, + { + "epoch": 1.713594470046083, + "grad_norm": 1.0010434545431337, + "learning_rate": 1.1022477161106591e-07, + "loss": 0.8361574411392212, + "step": 7437 + }, + { + "epoch": 1.7138248847926267, + "grad_norm": 1.1841110354603608, + "learning_rate": 1.1005095555334409e-07, + "loss": 0.6253053545951843, + "step": 7438 + }, + { + "epoch": 1.7140552995391705, + "grad_norm": 1.5361244402123166, + "learning_rate": 1.0987726867054792e-07, + "loss": 0.8035168647766113, + "step": 7439 + }, + { + "epoch": 1.7142857142857144, + "grad_norm": 1.0148513511065955, + "learning_rate": 1.0970371098788767e-07, + "loss": 0.7352867722511292, + "step": 7440 + }, + { + "epoch": 1.714516129032258, + "grad_norm": 1.1469128257526675, + "learning_rate": 1.0953028253055541e-07, + "loss": 0.7540202140808105, + "step": 7441 + }, + { + "epoch": 1.7147465437788019, + "grad_norm": 1.2653522382652087, + "learning_rate": 1.0935698332372379e-07, + "loss": 0.7883191108703613, + "step": 7442 + }, + { + "epoch": 1.7149769585253456, + "grad_norm": 1.2745739855530656, + "learning_rate": 1.0918381339254701e-07, + "loss": 0.7581819295883179, + "step": 7443 + }, + { + "epoch": 1.7152073732718893, + "grad_norm": 1.1705192956080483, + "learning_rate": 1.090107727621603e-07, + "loss": 0.8066321611404419, + "step": 7444 + }, + { + "epoch": 1.7154377880184333, + "grad_norm": 1.1820593590096908, + "learning_rate": 1.0883786145768037e-07, + "loss": 0.7427937984466553, + "step": 7445 + }, + { + "epoch": 1.715668202764977, + "grad_norm": 1.3132499515834741, + "learning_rate": 1.0866507950420523e-07, + "loss": 0.7736409902572632, + "step": 7446 + }, + { + "epoch": 1.7158986175115207, + "grad_norm": 1.1930714060597967, + "learning_rate": 1.0849242692681382e-07, + "loss": 0.7253416776657104, + "step": 7447 + }, + { + "epoch": 1.7161290322580647, + "grad_norm": 0.9521960056037656, + "learning_rate": 1.0831990375056643e-07, + "loss": 0.7933270931243896, + "step": 7448 + }, + { + "epoch": 1.7163594470046082, + "grad_norm": 1.407227257578247, + "learning_rate": 1.0814751000050437e-07, + "loss": 0.7946739196777344, + "step": 7449 + }, + { + "epoch": 1.7165898617511521, + "grad_norm": 1.2776015375287177, + "learning_rate": 1.0797524570165073e-07, + "loss": 0.7798205614089966, + "step": 7450 + }, + { + "epoch": 1.7168202764976959, + "grad_norm": 1.2558469001082564, + "learning_rate": 1.078031108790094e-07, + "loss": 0.616565465927124, + "step": 7451 + }, + { + "epoch": 1.7170506912442396, + "grad_norm": 1.2221718815584264, + "learning_rate": 1.0763110555756516e-07, + "loss": 0.8406517505645752, + "step": 7452 + }, + { + "epoch": 1.7172811059907835, + "grad_norm": 1.3773523411720476, + "learning_rate": 1.0745922976228483e-07, + "loss": 0.8827311992645264, + "step": 7453 + }, + { + "epoch": 1.717511520737327, + "grad_norm": 1.2403910104019171, + "learning_rate": 1.0728748351811567e-07, + "loss": 0.585588812828064, + "step": 7454 + }, + { + "epoch": 1.717741935483871, + "grad_norm": 0.9381679846122704, + "learning_rate": 1.0711586684998631e-07, + "loss": 0.6305320858955383, + "step": 7455 + }, + { + "epoch": 1.7179723502304147, + "grad_norm": 1.0634674542520166, + "learning_rate": 1.0694437978280701e-07, + "loss": 0.7982319593429565, + "step": 7456 + }, + { + "epoch": 1.7182027649769585, + "grad_norm": 1.3468349324058282, + "learning_rate": 1.0677302234146879e-07, + "loss": 0.7792943716049194, + "step": 7457 + }, + { + "epoch": 1.7184331797235024, + "grad_norm": 1.308217346349807, + "learning_rate": 1.0660179455084372e-07, + "loss": 0.7019332051277161, + "step": 7458 + }, + { + "epoch": 1.7186635944700461, + "grad_norm": 1.2330257329830192, + "learning_rate": 1.0643069643578562e-07, + "loss": 0.8088894486427307, + "step": 7459 + }, + { + "epoch": 1.7188940092165899, + "grad_norm": 1.5573400915532798, + "learning_rate": 1.0625972802112882e-07, + "loss": 0.799231767654419, + "step": 7460 + }, + { + "epoch": 1.7191244239631336, + "grad_norm": 0.950308854182165, + "learning_rate": 1.0608888933168958e-07, + "loss": 0.7265694737434387, + "step": 7461 + }, + { + "epoch": 1.7193548387096773, + "grad_norm": 1.1717288459308963, + "learning_rate": 1.0591818039226464e-07, + "loss": 0.8566714525222778, + "step": 7462 + }, + { + "epoch": 1.7195852534562213, + "grad_norm": 1.2255123057406947, + "learning_rate": 1.0574760122763216e-07, + "loss": 0.811874508857727, + "step": 7463 + }, + { + "epoch": 1.719815668202765, + "grad_norm": 1.0493349652228454, + "learning_rate": 1.0557715186255156e-07, + "loss": 0.7990631461143494, + "step": 7464 + }, + { + "epoch": 1.7200460829493087, + "grad_norm": 1.3183681626099089, + "learning_rate": 1.0540683232176307e-07, + "loss": 0.8108334541320801, + "step": 7465 + }, + { + "epoch": 1.7202764976958527, + "grad_norm": 1.8420274096120763, + "learning_rate": 1.0523664262998888e-07, + "loss": 0.8927996158599854, + "step": 7466 + }, + { + "epoch": 1.7205069124423962, + "grad_norm": 1.1733285346989661, + "learning_rate": 1.0506658281193138e-07, + "loss": 0.7277737855911255, + "step": 7467 + }, + { + "epoch": 1.7207373271889401, + "grad_norm": 1.0503912207473127, + "learning_rate": 1.0489665289227467e-07, + "loss": 0.7229233980178833, + "step": 7468 + }, + { + "epoch": 1.7209677419354839, + "grad_norm": 1.298634428768958, + "learning_rate": 1.0472685289568373e-07, + "loss": 0.7211846709251404, + "step": 7469 + }, + { + "epoch": 1.7211981566820276, + "grad_norm": 1.1862135261022106, + "learning_rate": 1.0455718284680504e-07, + "loss": 0.8239504098892212, + "step": 7470 + }, + { + "epoch": 1.7214285714285715, + "grad_norm": 1.2304377847970827, + "learning_rate": 1.0438764277026579e-07, + "loss": 0.7492972612380981, + "step": 7471 + }, + { + "epoch": 1.7216589861751153, + "grad_norm": 1.3060072891774943, + "learning_rate": 1.0421823269067442e-07, + "loss": 0.7658303380012512, + "step": 7472 + }, + { + "epoch": 1.721889400921659, + "grad_norm": 1.0618950256674606, + "learning_rate": 1.0404895263262092e-07, + "loss": 0.708244800567627, + "step": 7473 + }, + { + "epoch": 1.7221198156682027, + "grad_norm": 1.1946101503339825, + "learning_rate": 1.0387980262067575e-07, + "loss": 0.7575969696044922, + "step": 7474 + }, + { + "epoch": 1.7223502304147464, + "grad_norm": 1.3899740319803422, + "learning_rate": 1.0371078267939082e-07, + "loss": 0.7321910262107849, + "step": 7475 + }, + { + "epoch": 1.7225806451612904, + "grad_norm": 1.3828231848460977, + "learning_rate": 1.035418928332995e-07, + "loss": 0.7812562584877014, + "step": 7476 + }, + { + "epoch": 1.7228110599078341, + "grad_norm": 1.3136112254743646, + "learning_rate": 1.0337313310691565e-07, + "loss": 0.7272104620933533, + "step": 7477 + }, + { + "epoch": 1.7230414746543778, + "grad_norm": 1.1508289944716614, + "learning_rate": 1.032045035247343e-07, + "loss": 0.7006442546844482, + "step": 7478 + }, + { + "epoch": 1.7232718894009218, + "grad_norm": 1.138231534813956, + "learning_rate": 1.0303600411123226e-07, + "loss": 0.7082154750823975, + "step": 7479 + }, + { + "epoch": 1.7235023041474653, + "grad_norm": 1.4157478972732351, + "learning_rate": 1.0286763489086681e-07, + "loss": 0.7204899191856384, + "step": 7480 + }, + { + "epoch": 1.7237327188940093, + "grad_norm": 1.1954797848768004, + "learning_rate": 1.026993958880763e-07, + "loss": 0.9119626879692078, + "step": 7481 + }, + { + "epoch": 1.723963133640553, + "grad_norm": 1.0923155592461768, + "learning_rate": 1.0253128712728088e-07, + "loss": 0.5961707830429077, + "step": 7482 + }, + { + "epoch": 1.7241935483870967, + "grad_norm": 1.1032837677908203, + "learning_rate": 1.023633086328809e-07, + "loss": 0.7469611167907715, + "step": 7483 + }, + { + "epoch": 1.7244239631336407, + "grad_norm": 1.2394445599695993, + "learning_rate": 1.0219546042925841e-07, + "loss": 0.8353795409202576, + "step": 7484 + }, + { + "epoch": 1.7246543778801844, + "grad_norm": 1.120589163159477, + "learning_rate": 1.0202774254077618e-07, + "loss": 0.6587873101234436, + "step": 7485 + }, + { + "epoch": 1.7248847926267281, + "grad_norm": 1.2182162589741892, + "learning_rate": 1.0186015499177847e-07, + "loss": 0.8595654964447021, + "step": 7486 + }, + { + "epoch": 1.7251152073732718, + "grad_norm": 1.0966229129393803, + "learning_rate": 1.0169269780659028e-07, + "loss": 0.7683298587799072, + "step": 7487 + }, + { + "epoch": 1.7253456221198156, + "grad_norm": 1.372358134101511, + "learning_rate": 1.0152537100951786e-07, + "loss": 0.888152003288269, + "step": 7488 + }, + { + "epoch": 1.7255760368663595, + "grad_norm": 1.1162191205168919, + "learning_rate": 1.013581746248482e-07, + "loss": 0.7835309505462646, + "step": 7489 + }, + { + "epoch": 1.7258064516129032, + "grad_norm": 1.4079534093347241, + "learning_rate": 1.0119110867684999e-07, + "loss": 0.9744646549224854, + "step": 7490 + }, + { + "epoch": 1.726036866359447, + "grad_norm": 1.109483043922066, + "learning_rate": 1.0102417318977251e-07, + "loss": 0.6842091083526611, + "step": 7491 + }, + { + "epoch": 1.726267281105991, + "grad_norm": 1.2357910065520838, + "learning_rate": 1.0085736818784607e-07, + "loss": 0.7435774207115173, + "step": 7492 + }, + { + "epoch": 1.7264976958525344, + "grad_norm": 1.3316804792215136, + "learning_rate": 1.0069069369528249e-07, + "loss": 0.8430237770080566, + "step": 7493 + }, + { + "epoch": 1.7267281105990784, + "grad_norm": 1.1766330255379311, + "learning_rate": 1.0052414973627421e-07, + "loss": 0.8203141689300537, + "step": 7494 + }, + { + "epoch": 1.726958525345622, + "grad_norm": 1.291685708783942, + "learning_rate": 1.0035773633499456e-07, + "loss": 0.7491584420204163, + "step": 7495 + }, + { + "epoch": 1.7271889400921658, + "grad_norm": 0.9475128549493947, + "learning_rate": 1.0019145351559876e-07, + "loss": 0.6738899946212769, + "step": 7496 + }, + { + "epoch": 1.7274193548387098, + "grad_norm": 1.4107090522911332, + "learning_rate": 1.0002530130222231e-07, + "loss": 0.8628265857696533, + "step": 7497 + }, + { + "epoch": 1.7276497695852533, + "grad_norm": 1.5650622568616335, + "learning_rate": 9.985927971898178e-08, + "loss": 1.0158125162124634, + "step": 7498 + }, + { + "epoch": 1.7278801843317972, + "grad_norm": 1.2981782537446935, + "learning_rate": 9.969338878997535e-08, + "loss": 0.7269070148468018, + "step": 7499 + }, + { + "epoch": 1.728110599078341, + "grad_norm": 1.3106792244331589, + "learning_rate": 9.952762853928165e-08, + "loss": 0.8769187927246094, + "step": 7500 + }, + { + "epoch": 1.7283410138248847, + "grad_norm": 1.325563750244826, + "learning_rate": 9.936199899096042e-08, + "loss": 0.7841119170188904, + "step": 7501 + }, + { + "epoch": 1.7285714285714286, + "grad_norm": 1.7907234255256992, + "learning_rate": 9.91965001690529e-08, + "loss": 0.9209425449371338, + "step": 7502 + }, + { + "epoch": 1.7288018433179724, + "grad_norm": 1.110414701934764, + "learning_rate": 9.903113209758096e-08, + "loss": 0.7795250415802002, + "step": 7503 + }, + { + "epoch": 1.729032258064516, + "grad_norm": 1.2158163264490913, + "learning_rate": 9.886589480054741e-08, + "loss": 0.7131094932556152, + "step": 7504 + }, + { + "epoch": 1.72926267281106, + "grad_norm": 1.167789931248441, + "learning_rate": 9.870078830193629e-08, + "loss": 0.8090137839317322, + "step": 7505 + }, + { + "epoch": 1.7294930875576036, + "grad_norm": 1.124104241227004, + "learning_rate": 9.853581262571231e-08, + "loss": 0.7797958850860596, + "step": 7506 + }, + { + "epoch": 1.7297235023041475, + "grad_norm": 1.3470491669984355, + "learning_rate": 9.83709677958221e-08, + "loss": 0.6927989721298218, + "step": 7507 + }, + { + "epoch": 1.7299539170506912, + "grad_norm": 1.152565458620573, + "learning_rate": 9.820625383619219e-08, + "loss": 0.8009092807769775, + "step": 7508 + }, + { + "epoch": 1.730184331797235, + "grad_norm": 1.0970285369996284, + "learning_rate": 9.804167077073056e-08, + "loss": 0.761864423751831, + "step": 7509 + }, + { + "epoch": 1.730414746543779, + "grad_norm": 1.5795757660336223, + "learning_rate": 9.787721862332654e-08, + "loss": 0.7459509372711182, + "step": 7510 + }, + { + "epoch": 1.7306451612903224, + "grad_norm": 1.0401744024243509, + "learning_rate": 9.771289741785005e-08, + "loss": 0.8216449022293091, + "step": 7511 + }, + { + "epoch": 1.7308755760368664, + "grad_norm": 1.3924364017238642, + "learning_rate": 9.754870717815177e-08, + "loss": 0.7860604524612427, + "step": 7512 + }, + { + "epoch": 1.73110599078341, + "grad_norm": 1.146706612325942, + "learning_rate": 9.738464792806422e-08, + "loss": 0.7727769613265991, + "step": 7513 + }, + { + "epoch": 1.7313364055299538, + "grad_norm": 1.2690787911964316, + "learning_rate": 9.722071969140011e-08, + "loss": 0.874458909034729, + "step": 7514 + }, + { + "epoch": 1.7315668202764978, + "grad_norm": 1.1530798069952481, + "learning_rate": 9.705692249195319e-08, + "loss": 0.840191125869751, + "step": 7515 + }, + { + "epoch": 1.7317972350230415, + "grad_norm": 1.1387350117516357, + "learning_rate": 9.689325635349877e-08, + "loss": 0.7169238924980164, + "step": 7516 + }, + { + "epoch": 1.7320276497695852, + "grad_norm": 1.2478630540284088, + "learning_rate": 9.672972129979273e-08, + "loss": 0.7554492950439453, + "step": 7517 + }, + { + "epoch": 1.7322580645161292, + "grad_norm": 1.2166706454141942, + "learning_rate": 9.656631735457154e-08, + "loss": 0.5734076499938965, + "step": 7518 + }, + { + "epoch": 1.7324884792626727, + "grad_norm": 1.5466370383298045, + "learning_rate": 9.640304454155369e-08, + "loss": 0.7867637872695923, + "step": 7519 + }, + { + "epoch": 1.7327188940092166, + "grad_norm": 1.2704443586099365, + "learning_rate": 9.623990288443773e-08, + "loss": 0.7330230474472046, + "step": 7520 + }, + { + "epoch": 1.7329493087557604, + "grad_norm": 1.1352922714992866, + "learning_rate": 9.607689240690319e-08, + "loss": 0.7880058288574219, + "step": 7521 + }, + { + "epoch": 1.733179723502304, + "grad_norm": 1.0605191939295662, + "learning_rate": 9.591401313261139e-08, + "loss": 0.796575665473938, + "step": 7522 + }, + { + "epoch": 1.733410138248848, + "grad_norm": 1.4376273040997398, + "learning_rate": 9.575126508520359e-08, + "loss": 0.8101698160171509, + "step": 7523 + }, + { + "epoch": 1.7336405529953915, + "grad_norm": 1.0868433692155355, + "learning_rate": 9.55886482883026e-08, + "loss": 0.7811597585678101, + "step": 7524 + }, + { + "epoch": 1.7338709677419355, + "grad_norm": 1.1754841201094306, + "learning_rate": 9.542616276551208e-08, + "loss": 0.7680011987686157, + "step": 7525 + }, + { + "epoch": 1.7341013824884792, + "grad_norm": 1.3670730603232781, + "learning_rate": 9.526380854041638e-08, + "loss": 0.8018794059753418, + "step": 7526 + }, + { + "epoch": 1.734331797235023, + "grad_norm": 1.1232468645544793, + "learning_rate": 9.510158563658133e-08, + "loss": 0.7770500183105469, + "step": 7527 + }, + { + "epoch": 1.734562211981567, + "grad_norm": 1.1848169541071576, + "learning_rate": 9.493949407755309e-08, + "loss": 0.7622300982475281, + "step": 7528 + }, + { + "epoch": 1.7347926267281106, + "grad_norm": 1.5281654640943847, + "learning_rate": 9.477753388685928e-08, + "loss": 0.831570029258728, + "step": 7529 + }, + { + "epoch": 1.7350230414746544, + "grad_norm": 1.1599086861943149, + "learning_rate": 9.461570508800776e-08, + "loss": 0.7987254858016968, + "step": 7530 + }, + { + "epoch": 1.7352534562211983, + "grad_norm": 1.2752040500202788, + "learning_rate": 9.44540077044883e-08, + "loss": 0.8219848275184631, + "step": 7531 + }, + { + "epoch": 1.7354838709677418, + "grad_norm": 1.298736989691398, + "learning_rate": 9.429244175977092e-08, + "loss": 0.8273369073867798, + "step": 7532 + }, + { + "epoch": 1.7357142857142858, + "grad_norm": 1.2555474610105797, + "learning_rate": 9.413100727730628e-08, + "loss": 0.8241056203842163, + "step": 7533 + }, + { + "epoch": 1.7359447004608295, + "grad_norm": 1.4118150886368108, + "learning_rate": 9.396970428052697e-08, + "loss": 0.6880715489387512, + "step": 7534 + }, + { + "epoch": 1.7361751152073732, + "grad_norm": 1.092011806345561, + "learning_rate": 9.380853279284551e-08, + "loss": 0.7355446815490723, + "step": 7535 + }, + { + "epoch": 1.7364055299539172, + "grad_norm": 1.2700711725839655, + "learning_rate": 9.364749283765604e-08, + "loss": 0.8835841417312622, + "step": 7536 + }, + { + "epoch": 1.7366359447004607, + "grad_norm": 1.1984936737610834, + "learning_rate": 9.348658443833313e-08, + "loss": 0.80763840675354, + "step": 7537 + }, + { + "epoch": 1.7368663594470046, + "grad_norm": 1.2855970061631397, + "learning_rate": 9.332580761823227e-08, + "loss": 0.7473145723342896, + "step": 7538 + }, + { + "epoch": 1.7370967741935484, + "grad_norm": 1.2970951445867331, + "learning_rate": 9.316516240069028e-08, + "loss": 0.6618188619613647, + "step": 7539 + }, + { + "epoch": 1.737327188940092, + "grad_norm": 1.3396426049949766, + "learning_rate": 9.300464880902447e-08, + "loss": 0.7432928085327148, + "step": 7540 + }, + { + "epoch": 1.737557603686636, + "grad_norm": 1.1659381023507147, + "learning_rate": 9.284426686653302e-08, + "loss": 0.7915963530540466, + "step": 7541 + }, + { + "epoch": 1.7377880184331798, + "grad_norm": 1.1552275821682043, + "learning_rate": 9.26840165964955e-08, + "loss": 0.6428440809249878, + "step": 7542 + }, + { + "epoch": 1.7380184331797235, + "grad_norm": 1.1399241166482426, + "learning_rate": 9.252389802217187e-08, + "loss": 0.7142912149429321, + "step": 7543 + }, + { + "epoch": 1.7382488479262674, + "grad_norm": 1.316337246157137, + "learning_rate": 9.236391116680309e-08, + "loss": 0.878044605255127, + "step": 7544 + }, + { + "epoch": 1.738479262672811, + "grad_norm": 1.089416476430598, + "learning_rate": 9.220405605361103e-08, + "loss": 0.6861810684204102, + "step": 7545 + }, + { + "epoch": 1.738709677419355, + "grad_norm": 1.3890455529154517, + "learning_rate": 9.204433270579825e-08, + "loss": 0.7638171911239624, + "step": 7546 + }, + { + "epoch": 1.7389400921658986, + "grad_norm": 1.1532660265349828, + "learning_rate": 9.188474114654876e-08, + "loss": 0.7149873971939087, + "step": 7547 + }, + { + "epoch": 1.7391705069124423, + "grad_norm": 1.1783502444227563, + "learning_rate": 9.172528139902703e-08, + "loss": 0.7249442338943481, + "step": 7548 + }, + { + "epoch": 1.7394009216589863, + "grad_norm": 1.178650320628679, + "learning_rate": 9.156595348637819e-08, + "loss": 0.6846513748168945, + "step": 7549 + }, + { + "epoch": 1.7396313364055298, + "grad_norm": 1.4706201914955974, + "learning_rate": 9.140675743172843e-08, + "loss": 0.9332281351089478, + "step": 7550 + }, + { + "epoch": 1.7398617511520738, + "grad_norm": 1.1835891939139382, + "learning_rate": 9.124769325818526e-08, + "loss": 0.6878118515014648, + "step": 7551 + }, + { + "epoch": 1.7400921658986175, + "grad_norm": 1.077038469987993, + "learning_rate": 9.108876098883633e-08, + "loss": 0.7695426344871521, + "step": 7552 + }, + { + "epoch": 1.7403225806451612, + "grad_norm": 1.3278288479360603, + "learning_rate": 9.09299606467503e-08, + "loss": 0.7983303666114807, + "step": 7553 + }, + { + "epoch": 1.7405529953917052, + "grad_norm": 1.4656214059917094, + "learning_rate": 9.077129225497726e-08, + "loss": 0.8158761262893677, + "step": 7554 + }, + { + "epoch": 1.7407834101382489, + "grad_norm": 1.1519947124673093, + "learning_rate": 9.061275583654748e-08, + "loss": 0.8064214587211609, + "step": 7555 + }, + { + "epoch": 1.7410138248847926, + "grad_norm": 1.2545881332280804, + "learning_rate": 9.045435141447211e-08, + "loss": 0.9058080911636353, + "step": 7556 + }, + { + "epoch": 1.7412442396313366, + "grad_norm": 1.213639501339424, + "learning_rate": 9.029607901174374e-08, + "loss": 0.7392270565032959, + "step": 7557 + }, + { + "epoch": 1.74147465437788, + "grad_norm": 1.0453486445607982, + "learning_rate": 9.013793865133501e-08, + "loss": 0.7114729881286621, + "step": 7558 + }, + { + "epoch": 1.741705069124424, + "grad_norm": 1.2302263811033798, + "learning_rate": 8.997993035620022e-08, + "loss": 0.8675493597984314, + "step": 7559 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.9934561818451934, + "learning_rate": 8.98220541492738e-08, + "loss": 0.8103020191192627, + "step": 7560 + }, + { + "epoch": 1.7421658986175115, + "grad_norm": 1.2538115734834285, + "learning_rate": 8.966431005347109e-08, + "loss": 0.7339279651641846, + "step": 7561 + }, + { + "epoch": 1.7423963133640554, + "grad_norm": 1.3510829475373114, + "learning_rate": 8.950669809168887e-08, + "loss": 0.6971707344055176, + "step": 7562 + }, + { + "epoch": 1.742626728110599, + "grad_norm": 1.105458403928542, + "learning_rate": 8.934921828680408e-08, + "loss": 0.8633124232292175, + "step": 7563 + }, + { + "epoch": 1.7428571428571429, + "grad_norm": 1.3082830118219664, + "learning_rate": 8.919187066167466e-08, + "loss": 0.7704664468765259, + "step": 7564 + }, + { + "epoch": 1.7430875576036866, + "grad_norm": 1.1782653714880955, + "learning_rate": 8.903465523913955e-08, + "loss": 0.7063533067703247, + "step": 7565 + }, + { + "epoch": 1.7433179723502303, + "grad_norm": 1.1177210535700517, + "learning_rate": 8.887757204201817e-08, + "loss": 0.7094486951828003, + "step": 7566 + }, + { + "epoch": 1.7435483870967743, + "grad_norm": 1.4575572123890834, + "learning_rate": 8.872062109311096e-08, + "loss": 0.8743780255317688, + "step": 7567 + }, + { + "epoch": 1.743778801843318, + "grad_norm": 1.5827740898240907, + "learning_rate": 8.856380241519935e-08, + "loss": 0.7282687425613403, + "step": 7568 + }, + { + "epoch": 1.7440092165898617, + "grad_norm": 1.105316538989134, + "learning_rate": 8.840711603104523e-08, + "loss": 0.7507487535476685, + "step": 7569 + }, + { + "epoch": 1.7442396313364057, + "grad_norm": 1.2820028807325874, + "learning_rate": 8.82505619633912e-08, + "loss": 0.807691216468811, + "step": 7570 + }, + { + "epoch": 1.7444700460829492, + "grad_norm": 1.3537034886290398, + "learning_rate": 8.809414023496142e-08, + "loss": 0.8650702238082886, + "step": 7571 + }, + { + "epoch": 1.7447004608294931, + "grad_norm": 0.9602033366804331, + "learning_rate": 8.793785086845984e-08, + "loss": 0.6872273683547974, + "step": 7572 + }, + { + "epoch": 1.7449308755760369, + "grad_norm": 1.0979215212634434, + "learning_rate": 8.778169388657163e-08, + "loss": 0.7242698669433594, + "step": 7573 + }, + { + "epoch": 1.7451612903225806, + "grad_norm": 1.0962988735603825, + "learning_rate": 8.762566931196313e-08, + "loss": 0.741705060005188, + "step": 7574 + }, + { + "epoch": 1.7453917050691246, + "grad_norm": 1.06231801843056, + "learning_rate": 8.746977716728099e-08, + "loss": 0.7293061017990112, + "step": 7575 + }, + { + "epoch": 1.745622119815668, + "grad_norm": 1.0145801945512316, + "learning_rate": 8.731401747515244e-08, + "loss": 0.8385475277900696, + "step": 7576 + }, + { + "epoch": 1.745852534562212, + "grad_norm": 1.4891647422185605, + "learning_rate": 8.715839025818617e-08, + "loss": 0.8484489917755127, + "step": 7577 + }, + { + "epoch": 1.7460829493087557, + "grad_norm": 1.1930293813449155, + "learning_rate": 8.7002895538971e-08, + "loss": 0.6511530876159668, + "step": 7578 + }, + { + "epoch": 1.7463133640552995, + "grad_norm": 1.4360732745608953, + "learning_rate": 8.684753334007688e-08, + "loss": 0.8274673223495483, + "step": 7579 + }, + { + "epoch": 1.7465437788018434, + "grad_norm": 1.081237944644138, + "learning_rate": 8.669230368405456e-08, + "loss": 0.7367755174636841, + "step": 7580 + }, + { + "epoch": 1.7467741935483871, + "grad_norm": 1.2748877435171337, + "learning_rate": 8.653720659343522e-08, + "loss": 0.80199134349823, + "step": 7581 + }, + { + "epoch": 1.7470046082949309, + "grad_norm": 1.1988639104811598, + "learning_rate": 8.638224209073097e-08, + "loss": 0.7782701253890991, + "step": 7582 + }, + { + "epoch": 1.7472350230414746, + "grad_norm": 1.3660035419508034, + "learning_rate": 8.622741019843504e-08, + "loss": 0.7613752484321594, + "step": 7583 + }, + { + "epoch": 1.7474654377880183, + "grad_norm": 1.3599194483251544, + "learning_rate": 8.60727109390208e-08, + "loss": 0.8213690519332886, + "step": 7584 + }, + { + "epoch": 1.7476958525345623, + "grad_norm": 1.1411507368613496, + "learning_rate": 8.59181443349426e-08, + "loss": 0.7064045667648315, + "step": 7585 + }, + { + "epoch": 1.747926267281106, + "grad_norm": 1.1189241999598565, + "learning_rate": 8.576371040863573e-08, + "loss": 0.6686617136001587, + "step": 7586 + }, + { + "epoch": 1.7481566820276497, + "grad_norm": 1.0194951619872286, + "learning_rate": 8.560940918251592e-08, + "loss": 0.7520097494125366, + "step": 7587 + }, + { + "epoch": 1.7483870967741937, + "grad_norm": 1.0822685191965165, + "learning_rate": 8.545524067897991e-08, + "loss": 0.8176038265228271, + "step": 7588 + }, + { + "epoch": 1.7486175115207372, + "grad_norm": 1.3408318725531652, + "learning_rate": 8.530120492040505e-08, + "loss": 0.6680614948272705, + "step": 7589 + }, + { + "epoch": 1.7488479262672811, + "grad_norm": 1.3621846138568519, + "learning_rate": 8.514730192914921e-08, + "loss": 0.7421592473983765, + "step": 7590 + }, + { + "epoch": 1.7490783410138249, + "grad_norm": 1.2822263575200588, + "learning_rate": 8.499353172755164e-08, + "loss": 0.8869342803955078, + "step": 7591 + }, + { + "epoch": 1.7493087557603686, + "grad_norm": 1.1206823186662898, + "learning_rate": 8.48398943379316e-08, + "loss": 0.6850584745407104, + "step": 7592 + }, + { + "epoch": 1.7495391705069125, + "grad_norm": 1.0932592535391596, + "learning_rate": 8.468638978258914e-08, + "loss": 0.7433363199234009, + "step": 7593 + }, + { + "epoch": 1.7497695852534563, + "grad_norm": 1.0269953798613225, + "learning_rate": 8.453301808380564e-08, + "loss": 0.7744357585906982, + "step": 7594 + }, + { + "epoch": 1.75, + "grad_norm": 1.382126107142446, + "learning_rate": 8.437977926384277e-08, + "loss": 0.8236217498779297, + "step": 7595 + }, + { + "epoch": 1.7502304147465437, + "grad_norm": 1.3329245666066865, + "learning_rate": 8.422667334494249e-08, + "loss": 0.8552603721618652, + "step": 7596 + }, + { + "epoch": 1.7504608294930875, + "grad_norm": 1.4100651978644374, + "learning_rate": 8.407370034932859e-08, + "loss": 0.7755998373031616, + "step": 7597 + }, + { + "epoch": 1.7506912442396314, + "grad_norm": 1.3033243035055457, + "learning_rate": 8.392086029920442e-08, + "loss": 0.8105130195617676, + "step": 7598 + }, + { + "epoch": 1.7509216589861751, + "grad_norm": 1.290928258750675, + "learning_rate": 8.376815321675457e-08, + "loss": 0.8787405490875244, + "step": 7599 + }, + { + "epoch": 1.7511520737327189, + "grad_norm": 1.1296910155342912, + "learning_rate": 8.361557912414441e-08, + "loss": 0.6107788681983948, + "step": 7600 + }, + { + "epoch": 1.7513824884792628, + "grad_norm": 0.9941949428855014, + "learning_rate": 8.34631380435199e-08, + "loss": 0.6825795769691467, + "step": 7601 + }, + { + "epoch": 1.7516129032258063, + "grad_norm": 1.5141115638242784, + "learning_rate": 8.331082999700734e-08, + "loss": 0.7069272994995117, + "step": 7602 + }, + { + "epoch": 1.7518433179723503, + "grad_norm": 1.5687921139560086, + "learning_rate": 8.315865500671449e-08, + "loss": 0.7784801721572876, + "step": 7603 + }, + { + "epoch": 1.752073732718894, + "grad_norm": 1.0771300382051838, + "learning_rate": 8.300661309472912e-08, + "loss": 0.7653795480728149, + "step": 7604 + }, + { + "epoch": 1.7523041474654377, + "grad_norm": 1.5582480598587298, + "learning_rate": 8.285470428311991e-08, + "loss": 0.7386122941970825, + "step": 7605 + }, + { + "epoch": 1.7525345622119817, + "grad_norm": 0.9515219540238303, + "learning_rate": 8.270292859393613e-08, + "loss": 0.7828700542449951, + "step": 7606 + }, + { + "epoch": 1.7527649769585254, + "grad_norm": 1.5500733851956912, + "learning_rate": 8.255128604920792e-08, + "loss": 0.8955565094947815, + "step": 7607 + }, + { + "epoch": 1.7529953917050691, + "grad_norm": 1.2505809950313513, + "learning_rate": 8.2399776670946e-08, + "loss": 0.9071576595306396, + "step": 7608 + }, + { + "epoch": 1.7532258064516129, + "grad_norm": 1.3402860152327503, + "learning_rate": 8.22484004811419e-08, + "loss": 0.752417802810669, + "step": 7609 + }, + { + "epoch": 1.7534562211981566, + "grad_norm": 1.367440429282924, + "learning_rate": 8.209715750176727e-08, + "loss": 0.8611370325088501, + "step": 7610 + }, + { + "epoch": 1.7536866359447005, + "grad_norm": 1.232351895452084, + "learning_rate": 8.19460477547752e-08, + "loss": 0.745223879814148, + "step": 7611 + }, + { + "epoch": 1.7539170506912443, + "grad_norm": 1.0415704016806513, + "learning_rate": 8.179507126209906e-08, + "loss": 0.7799668908119202, + "step": 7612 + }, + { + "epoch": 1.754147465437788, + "grad_norm": 1.3761849870920217, + "learning_rate": 8.164422804565263e-08, + "loss": 0.8177207708358765, + "step": 7613 + }, + { + "epoch": 1.754377880184332, + "grad_norm": 1.2017347256018391, + "learning_rate": 8.149351812733085e-08, + "loss": 0.7111436128616333, + "step": 7614 + }, + { + "epoch": 1.7546082949308754, + "grad_norm": 1.2253776843179969, + "learning_rate": 8.1342941529009e-08, + "loss": 0.6840728521347046, + "step": 7615 + }, + { + "epoch": 1.7548387096774194, + "grad_norm": 1.02983629791633, + "learning_rate": 8.119249827254281e-08, + "loss": 0.6115491986274719, + "step": 7616 + }, + { + "epoch": 1.7550691244239631, + "grad_norm": 1.3870391302655596, + "learning_rate": 8.104218837976939e-08, + "loss": 0.7149351239204407, + "step": 7617 + }, + { + "epoch": 1.7552995391705069, + "grad_norm": 1.2174150358988711, + "learning_rate": 8.089201187250571e-08, + "loss": 0.688147783279419, + "step": 7618 + }, + { + "epoch": 1.7555299539170508, + "grad_norm": 1.2630937737290178, + "learning_rate": 8.074196877254969e-08, + "loss": 0.8092058300971985, + "step": 7619 + }, + { + "epoch": 1.7557603686635943, + "grad_norm": 0.8375696110242734, + "learning_rate": 8.05920591016801e-08, + "loss": 0.7375935912132263, + "step": 7620 + }, + { + "epoch": 1.7559907834101383, + "grad_norm": 1.1868565460321117, + "learning_rate": 8.044228288165599e-08, + "loss": 0.6793934106826782, + "step": 7621 + }, + { + "epoch": 1.756221198156682, + "grad_norm": 1.2102446264436708, + "learning_rate": 8.0292640134217e-08, + "loss": 0.7395757436752319, + "step": 7622 + }, + { + "epoch": 1.7564516129032257, + "grad_norm": 0.9259939168277553, + "learning_rate": 8.014313088108394e-08, + "loss": 0.546409010887146, + "step": 7623 + }, + { + "epoch": 1.7566820276497697, + "grad_norm": 1.4575552468425101, + "learning_rate": 7.999375514395778e-08, + "loss": 0.7790534496307373, + "step": 7624 + }, + { + "epoch": 1.7569124423963134, + "grad_norm": 1.0896798964233478, + "learning_rate": 7.984451294452e-08, + "loss": 0.7398231625556946, + "step": 7625 + }, + { + "epoch": 1.7571428571428571, + "grad_norm": 1.2623646343227142, + "learning_rate": 7.969540430443311e-08, + "loss": 0.7414441108703613, + "step": 7626 + }, + { + "epoch": 1.757373271889401, + "grad_norm": 1.1312110923091452, + "learning_rate": 7.954642924533994e-08, + "loss": 0.7548750638961792, + "step": 7627 + }, + { + "epoch": 1.7576036866359446, + "grad_norm": 0.957909042850816, + "learning_rate": 7.939758778886385e-08, + "loss": 0.7546773552894592, + "step": 7628 + }, + { + "epoch": 1.7578341013824885, + "grad_norm": 1.1252175485529645, + "learning_rate": 7.924887995660945e-08, + "loss": 0.7373867630958557, + "step": 7629 + }, + { + "epoch": 1.7580645161290323, + "grad_norm": 0.9815120449405607, + "learning_rate": 7.910030577016113e-08, + "loss": 0.7271026968955994, + "step": 7630 + }, + { + "epoch": 1.758294930875576, + "grad_norm": 1.3179911972781693, + "learning_rate": 7.89518652510841e-08, + "loss": 0.8723413944244385, + "step": 7631 + }, + { + "epoch": 1.75852534562212, + "grad_norm": 1.3060473211580457, + "learning_rate": 7.880355842092468e-08, + "loss": 0.8282548189163208, + "step": 7632 + }, + { + "epoch": 1.7587557603686634, + "grad_norm": 1.1089249458958528, + "learning_rate": 7.865538530120918e-08, + "loss": 0.7436991930007935, + "step": 7633 + }, + { + "epoch": 1.7589861751152074, + "grad_norm": 1.0884201833829175, + "learning_rate": 7.850734591344488e-08, + "loss": 0.7750650644302368, + "step": 7634 + }, + { + "epoch": 1.7592165898617511, + "grad_norm": 1.1544057740235625, + "learning_rate": 7.835944027911957e-08, + "loss": 0.6824958324432373, + "step": 7635 + }, + { + "epoch": 1.7594470046082948, + "grad_norm": 1.1607504467923393, + "learning_rate": 7.821166841970107e-08, + "loss": 0.8500322103500366, + "step": 7636 + }, + { + "epoch": 1.7596774193548388, + "grad_norm": 1.3527797330475602, + "learning_rate": 7.806403035663889e-08, + "loss": 0.7111128568649292, + "step": 7637 + }, + { + "epoch": 1.7599078341013825, + "grad_norm": 1.1877365592337052, + "learning_rate": 7.791652611136212e-08, + "loss": 0.7320532202720642, + "step": 7638 + }, + { + "epoch": 1.7601382488479262, + "grad_norm": 1.2292449607917257, + "learning_rate": 7.776915570528076e-08, + "loss": 0.8439149856567383, + "step": 7639 + }, + { + "epoch": 1.7603686635944702, + "grad_norm": 1.0358127598823044, + "learning_rate": 7.762191915978578e-08, + "loss": 0.6489125490188599, + "step": 7640 + }, + { + "epoch": 1.7605990783410137, + "grad_norm": 1.0601387517448153, + "learning_rate": 7.74748164962482e-08, + "loss": 0.8118616342544556, + "step": 7641 + }, + { + "epoch": 1.7608294930875577, + "grad_norm": 1.0728808336951121, + "learning_rate": 7.732784773601953e-08, + "loss": 0.7528102397918701, + "step": 7642 + }, + { + "epoch": 1.7610599078341014, + "grad_norm": 1.0747138773482348, + "learning_rate": 7.718101290043244e-08, + "loss": 0.7856849431991577, + "step": 7643 + }, + { + "epoch": 1.761290322580645, + "grad_norm": 1.3272410706900097, + "learning_rate": 7.703431201079969e-08, + "loss": 0.8001973628997803, + "step": 7644 + }, + { + "epoch": 1.761520737327189, + "grad_norm": 1.2721768499077395, + "learning_rate": 7.688774508841478e-08, + "loss": 0.8332471251487732, + "step": 7645 + }, + { + "epoch": 1.7617511520737326, + "grad_norm": 1.2453092502270793, + "learning_rate": 7.67413121545516e-08, + "loss": 0.7534940242767334, + "step": 7646 + }, + { + "epoch": 1.7619815668202765, + "grad_norm": 1.1709183629535718, + "learning_rate": 7.65950132304647e-08, + "loss": 0.737503170967102, + "step": 7647 + }, + { + "epoch": 1.7622119815668202, + "grad_norm": 1.0899482570396566, + "learning_rate": 7.644884833738896e-08, + "loss": 0.7407201528549194, + "step": 7648 + }, + { + "epoch": 1.762442396313364, + "grad_norm": 1.0059187913201149, + "learning_rate": 7.630281749654055e-08, + "loss": 0.7589377164840698, + "step": 7649 + }, + { + "epoch": 1.762672811059908, + "grad_norm": 1.2624576054967966, + "learning_rate": 7.615692072911528e-08, + "loss": 0.6586496829986572, + "step": 7650 + }, + { + "epoch": 1.7629032258064516, + "grad_norm": 1.1572434060771926, + "learning_rate": 7.601115805628977e-08, + "loss": 0.705591082572937, + "step": 7651 + }, + { + "epoch": 1.7631336405529954, + "grad_norm": 1.2006722437863475, + "learning_rate": 7.586552949922176e-08, + "loss": 0.7889619469642639, + "step": 7652 + }, + { + "epoch": 1.7633640552995393, + "grad_norm": 1.0348577197525213, + "learning_rate": 7.572003507904868e-08, + "loss": 0.6912282705307007, + "step": 7653 + }, + { + "epoch": 1.7635944700460828, + "grad_norm": 1.1101374555344716, + "learning_rate": 7.557467481688873e-08, + "loss": 0.7374964952468872, + "step": 7654 + }, + { + "epoch": 1.7638248847926268, + "grad_norm": 1.1479262514291408, + "learning_rate": 7.542944873384105e-08, + "loss": 0.7302298545837402, + "step": 7655 + }, + { + "epoch": 1.7640552995391705, + "grad_norm": 1.2653276061660264, + "learning_rate": 7.5284356850985e-08, + "loss": 0.8323671817779541, + "step": 7656 + }, + { + "epoch": 1.7642857142857142, + "grad_norm": 1.0548505840987745, + "learning_rate": 7.513939918938028e-08, + "loss": 0.6654655933380127, + "step": 7657 + }, + { + "epoch": 1.7645161290322582, + "grad_norm": 1.1231001283574193, + "learning_rate": 7.499457577006751e-08, + "loss": 0.6371186375617981, + "step": 7658 + }, + { + "epoch": 1.7647465437788017, + "grad_norm": 1.3299088323872645, + "learning_rate": 7.484988661406733e-08, + "loss": 0.7761695384979248, + "step": 7659 + }, + { + "epoch": 1.7649769585253456, + "grad_norm": 1.1268786347378037, + "learning_rate": 7.470533174238158e-08, + "loss": 0.779335618019104, + "step": 7660 + }, + { + "epoch": 1.7652073732718894, + "grad_norm": 1.26329747548588, + "learning_rate": 7.456091117599195e-08, + "loss": 0.7642731666564941, + "step": 7661 + }, + { + "epoch": 1.765437788018433, + "grad_norm": 1.417392503393573, + "learning_rate": 7.441662493586076e-08, + "loss": 0.7490801215171814, + "step": 7662 + }, + { + "epoch": 1.765668202764977, + "grad_norm": 1.6109060172749883, + "learning_rate": 7.427247304293139e-08, + "loss": 0.9480686187744141, + "step": 7663 + }, + { + "epoch": 1.7658986175115208, + "grad_norm": 1.243245001745715, + "learning_rate": 7.412845551812707e-08, + "loss": 0.6208070516586304, + "step": 7664 + }, + { + "epoch": 1.7661290322580645, + "grad_norm": 1.2606477635417679, + "learning_rate": 7.398457238235167e-08, + "loss": 0.7782050371170044, + "step": 7665 + }, + { + "epoch": 1.7663594470046085, + "grad_norm": 1.1494295384377444, + "learning_rate": 7.38408236564897e-08, + "loss": 0.6725378632545471, + "step": 7666 + }, + { + "epoch": 1.766589861751152, + "grad_norm": 1.4030647180836417, + "learning_rate": 7.369720936140611e-08, + "loss": 0.8247120380401611, + "step": 7667 + }, + { + "epoch": 1.766820276497696, + "grad_norm": 1.2966757041323174, + "learning_rate": 7.355372951794614e-08, + "loss": 0.7866288423538208, + "step": 7668 + }, + { + "epoch": 1.7670506912442396, + "grad_norm": 1.5029385474750363, + "learning_rate": 7.341038414693613e-08, + "loss": 0.8096400499343872, + "step": 7669 + }, + { + "epoch": 1.7672811059907834, + "grad_norm": 1.5152361583075085, + "learning_rate": 7.326717326918208e-08, + "loss": 0.7799873352050781, + "step": 7670 + }, + { + "epoch": 1.7675115207373273, + "grad_norm": 1.0568101452951337, + "learning_rate": 7.312409690547095e-08, + "loss": 0.809285044670105, + "step": 7671 + }, + { + "epoch": 1.7677419354838708, + "grad_norm": 1.351048640166805, + "learning_rate": 7.298115507657021e-08, + "loss": 0.874248743057251, + "step": 7672 + }, + { + "epoch": 1.7679723502304148, + "grad_norm": 1.1594085684678137, + "learning_rate": 7.283834780322761e-08, + "loss": 0.7418022155761719, + "step": 7673 + }, + { + "epoch": 1.7682027649769585, + "grad_norm": 1.2895302232300179, + "learning_rate": 7.269567510617126e-08, + "loss": 0.720660388469696, + "step": 7674 + }, + { + "epoch": 1.7684331797235022, + "grad_norm": 1.241628438381412, + "learning_rate": 7.255313700611032e-08, + "loss": 0.7655429840087891, + "step": 7675 + }, + { + "epoch": 1.7686635944700462, + "grad_norm": 1.125747625986026, + "learning_rate": 7.241073352373361e-08, + "loss": 0.7303705215454102, + "step": 7676 + }, + { + "epoch": 1.76889400921659, + "grad_norm": 1.1695690935051566, + "learning_rate": 7.226846467971093e-08, + "loss": 0.7997909188270569, + "step": 7677 + }, + { + "epoch": 1.7691244239631336, + "grad_norm": 1.261135372954414, + "learning_rate": 7.212633049469264e-08, + "loss": 0.6546763181686401, + "step": 7678 + }, + { + "epoch": 1.7693548387096776, + "grad_norm": 0.9669222373383191, + "learning_rate": 7.1984330989309e-08, + "loss": 0.6374444961547852, + "step": 7679 + }, + { + "epoch": 1.769585253456221, + "grad_norm": 1.2966171484977755, + "learning_rate": 7.184246618417111e-08, + "loss": 0.7092937231063843, + "step": 7680 + }, + { + "epoch": 1.769815668202765, + "grad_norm": 1.3237517845156634, + "learning_rate": 7.17007360998706e-08, + "loss": 0.7702305316925049, + "step": 7681 + }, + { + "epoch": 1.7700460829493088, + "grad_norm": 0.978090031115468, + "learning_rate": 7.155914075697933e-08, + "loss": 0.7763724327087402, + "step": 7682 + }, + { + "epoch": 1.7702764976958525, + "grad_norm": 0.9935287090208255, + "learning_rate": 7.141768017604966e-08, + "loss": 0.6409577131271362, + "step": 7683 + }, + { + "epoch": 1.7705069124423964, + "grad_norm": 1.2265488041489598, + "learning_rate": 7.127635437761459e-08, + "loss": 0.7500795125961304, + "step": 7684 + }, + { + "epoch": 1.77073732718894, + "grad_norm": 1.405023681248552, + "learning_rate": 7.113516338218717e-08, + "loss": 0.7312004566192627, + "step": 7685 + }, + { + "epoch": 1.770967741935484, + "grad_norm": 0.910138776962328, + "learning_rate": 7.099410721026112e-08, + "loss": 0.823514997959137, + "step": 7686 + }, + { + "epoch": 1.7711981566820276, + "grad_norm": 1.4146285511420962, + "learning_rate": 7.085318588231048e-08, + "loss": 0.9504063129425049, + "step": 7687 + }, + { + "epoch": 1.7714285714285714, + "grad_norm": 0.8614868773221174, + "learning_rate": 7.071239941878981e-08, + "loss": 0.7850733399391174, + "step": 7688 + }, + { + "epoch": 1.7716589861751153, + "grad_norm": 1.356738665999072, + "learning_rate": 7.057174784013431e-08, + "loss": 0.9447094798088074, + "step": 7689 + }, + { + "epoch": 1.771889400921659, + "grad_norm": 1.134179637006652, + "learning_rate": 7.04312311667592e-08, + "loss": 0.6675062775611877, + "step": 7690 + }, + { + "epoch": 1.7721198156682028, + "grad_norm": 0.9395193655643466, + "learning_rate": 7.029084941906005e-08, + "loss": 0.6875232458114624, + "step": 7691 + }, + { + "epoch": 1.7723502304147467, + "grad_norm": 1.3573723926231736, + "learning_rate": 7.015060261741357e-08, + "loss": 0.7847919464111328, + "step": 7692 + }, + { + "epoch": 1.7725806451612902, + "grad_norm": 1.300014614678359, + "learning_rate": 7.001049078217613e-08, + "loss": 0.7924584150314331, + "step": 7693 + }, + { + "epoch": 1.7728110599078342, + "grad_norm": 1.4499718780004744, + "learning_rate": 6.987051393368471e-08, + "loss": 0.8802344799041748, + "step": 7694 + }, + { + "epoch": 1.773041474654378, + "grad_norm": 1.425988233405148, + "learning_rate": 6.973067209225692e-08, + "loss": 0.7038631439208984, + "step": 7695 + }, + { + "epoch": 1.7732718894009216, + "grad_norm": 1.1226859696380713, + "learning_rate": 6.959096527819064e-08, + "loss": 0.9016700387001038, + "step": 7696 + }, + { + "epoch": 1.7735023041474656, + "grad_norm": 1.1967072079572705, + "learning_rate": 6.945139351176387e-08, + "loss": 0.7678165435791016, + "step": 7697 + }, + { + "epoch": 1.773732718894009, + "grad_norm": 1.1001980127511188, + "learning_rate": 6.931195681323565e-08, + "loss": 0.6612143516540527, + "step": 7698 + }, + { + "epoch": 1.773963133640553, + "grad_norm": 1.3968871696274494, + "learning_rate": 6.917265520284476e-08, + "loss": 0.840233325958252, + "step": 7699 + }, + { + "epoch": 1.7741935483870968, + "grad_norm": 1.3698339080168875, + "learning_rate": 6.90334887008106e-08, + "loss": 0.7913506031036377, + "step": 7700 + }, + { + "epoch": 1.7744239631336405, + "grad_norm": 1.3434994536689218, + "learning_rate": 6.889445732733323e-08, + "loss": 0.7523634433746338, + "step": 7701 + }, + { + "epoch": 1.7746543778801844, + "grad_norm": 1.1357027982798495, + "learning_rate": 6.875556110259273e-08, + "loss": 0.7009792327880859, + "step": 7702 + }, + { + "epoch": 1.7748847926267282, + "grad_norm": 0.9926018792518734, + "learning_rate": 6.861680004674963e-08, + "loss": 0.6533738970756531, + "step": 7703 + }, + { + "epoch": 1.7751152073732719, + "grad_norm": 1.0969556014291875, + "learning_rate": 6.847817417994517e-08, + "loss": 0.860493540763855, + "step": 7704 + }, + { + "epoch": 1.7753456221198156, + "grad_norm": 1.3425565367947665, + "learning_rate": 6.833968352230057e-08, + "loss": 0.810010552406311, + "step": 7705 + }, + { + "epoch": 1.7755760368663593, + "grad_norm": 1.2400741621258158, + "learning_rate": 6.820132809391743e-08, + "loss": 0.8443198204040527, + "step": 7706 + }, + { + "epoch": 1.7758064516129033, + "grad_norm": 1.1086679828690398, + "learning_rate": 6.806310791487813e-08, + "loss": 0.758772611618042, + "step": 7707 + }, + { + "epoch": 1.776036866359447, + "grad_norm": 1.2474164003496853, + "learning_rate": 6.792502300524472e-08, + "loss": 0.8438040614128113, + "step": 7708 + }, + { + "epoch": 1.7762672811059907, + "grad_norm": 1.154420265010753, + "learning_rate": 6.778707338506051e-08, + "loss": 0.7727431058883667, + "step": 7709 + }, + { + "epoch": 1.7764976958525347, + "grad_norm": 1.6420516256349273, + "learning_rate": 6.764925907434849e-08, + "loss": 0.8118282556533813, + "step": 7710 + }, + { + "epoch": 1.7767281105990782, + "grad_norm": 1.22888062854885, + "learning_rate": 6.75115800931122e-08, + "loss": 0.7667281627655029, + "step": 7711 + }, + { + "epoch": 1.7769585253456222, + "grad_norm": 1.2558357954388057, + "learning_rate": 6.737403646133566e-08, + "loss": 0.7824913263320923, + "step": 7712 + }, + { + "epoch": 1.7771889400921659, + "grad_norm": 1.176254722115087, + "learning_rate": 6.723662819898312e-08, + "loss": 0.7318419218063354, + "step": 7713 + }, + { + "epoch": 1.7774193548387096, + "grad_norm": 1.2059569400095187, + "learning_rate": 6.709935532599897e-08, + "loss": 0.7060009241104126, + "step": 7714 + }, + { + "epoch": 1.7776497695852536, + "grad_norm": 1.3093811884607869, + "learning_rate": 6.69622178623086e-08, + "loss": 0.7367588877677917, + "step": 7715 + }, + { + "epoch": 1.7778801843317973, + "grad_norm": 1.3618967587860527, + "learning_rate": 6.682521582781708e-08, + "loss": 0.7340742349624634, + "step": 7716 + }, + { + "epoch": 1.778110599078341, + "grad_norm": 1.257394780772999, + "learning_rate": 6.668834924240995e-08, + "loss": 0.6655991077423096, + "step": 7717 + }, + { + "epoch": 1.7783410138248847, + "grad_norm": 1.3379718118337083, + "learning_rate": 6.655161812595367e-08, + "loss": 0.7562434673309326, + "step": 7718 + }, + { + "epoch": 1.7785714285714285, + "grad_norm": 1.2416548769934193, + "learning_rate": 6.641502249829423e-08, + "loss": 0.8078730702400208, + "step": 7719 + }, + { + "epoch": 1.7788018433179724, + "grad_norm": 1.1920319583326109, + "learning_rate": 6.627856237925811e-08, + "loss": 0.6285899877548218, + "step": 7720 + }, + { + "epoch": 1.7790322580645161, + "grad_norm": 1.1055337731409536, + "learning_rate": 6.61422377886528e-08, + "loss": 0.6633951663970947, + "step": 7721 + }, + { + "epoch": 1.7792626728110599, + "grad_norm": 1.0697990396462347, + "learning_rate": 6.600604874626548e-08, + "loss": 0.7273050546646118, + "step": 7722 + }, + { + "epoch": 1.7794930875576038, + "grad_norm": 1.2680575632659172, + "learning_rate": 6.586999527186354e-08, + "loss": 0.6665729284286499, + "step": 7723 + }, + { + "epoch": 1.7797235023041473, + "grad_norm": 1.275935674563519, + "learning_rate": 6.573407738519531e-08, + "loss": 0.7332675457000732, + "step": 7724 + }, + { + "epoch": 1.7799539170506913, + "grad_norm": 1.0778234517601935, + "learning_rate": 6.559829510598892e-08, + "loss": 0.7439071536064148, + "step": 7725 + }, + { + "epoch": 1.780184331797235, + "grad_norm": 1.3635129938987167, + "learning_rate": 6.546264845395299e-08, + "loss": 0.7104752063751221, + "step": 7726 + }, + { + "epoch": 1.7804147465437787, + "grad_norm": 1.2639306988819587, + "learning_rate": 6.53271374487765e-08, + "loss": 0.7792220115661621, + "step": 7727 + }, + { + "epoch": 1.7806451612903227, + "grad_norm": 1.0938522733418012, + "learning_rate": 6.519176211012867e-08, + "loss": 0.6379693746566772, + "step": 7728 + }, + { + "epoch": 1.7808755760368664, + "grad_norm": 1.3289044633653213, + "learning_rate": 6.505652245765881e-08, + "loss": 0.7737444639205933, + "step": 7729 + }, + { + "epoch": 1.7811059907834101, + "grad_norm": 1.1550683939038542, + "learning_rate": 6.49214185109973e-08, + "loss": 0.7681130170822144, + "step": 7730 + }, + { + "epoch": 1.7813364055299539, + "grad_norm": 1.4083081227680676, + "learning_rate": 6.478645028975372e-08, + "loss": 0.8718420267105103, + "step": 7731 + }, + { + "epoch": 1.7815668202764976, + "grad_norm": 1.1823677205039174, + "learning_rate": 6.465161781351914e-08, + "loss": 0.7557366490364075, + "step": 7732 + }, + { + "epoch": 1.7817972350230415, + "grad_norm": 1.1999869902911706, + "learning_rate": 6.45169211018638e-08, + "loss": 0.6794936656951904, + "step": 7733 + }, + { + "epoch": 1.7820276497695853, + "grad_norm": 1.2764239528790797, + "learning_rate": 6.438236017433895e-08, + "loss": 0.8390437364578247, + "step": 7734 + }, + { + "epoch": 1.782258064516129, + "grad_norm": 1.134383511808464, + "learning_rate": 6.424793505047599e-08, + "loss": 0.8024254441261292, + "step": 7735 + }, + { + "epoch": 1.782488479262673, + "grad_norm": 0.8536836629483899, + "learning_rate": 6.411364574978651e-08, + "loss": 0.6382162570953369, + "step": 7736 + }, + { + "epoch": 1.7827188940092165, + "grad_norm": 1.1757601346145792, + "learning_rate": 6.397949229176225e-08, + "loss": 0.6832011938095093, + "step": 7737 + }, + { + "epoch": 1.7829493087557604, + "grad_norm": 1.653357486541517, + "learning_rate": 6.384547469587564e-08, + "loss": 0.9003958702087402, + "step": 7738 + }, + { + "epoch": 1.7831797235023041, + "grad_norm": 1.1523951728047304, + "learning_rate": 6.371159298157913e-08, + "loss": 0.7030328512191772, + "step": 7739 + }, + { + "epoch": 1.7834101382488479, + "grad_norm": 1.2390057793357907, + "learning_rate": 6.357784716830528e-08, + "loss": 0.8153259754180908, + "step": 7740 + }, + { + "epoch": 1.7836405529953918, + "grad_norm": 1.4244568607420958, + "learning_rate": 6.344423727546744e-08, + "loss": 0.8229082226753235, + "step": 7741 + }, + { + "epoch": 1.7838709677419353, + "grad_norm": 1.3055755817113595, + "learning_rate": 6.331076332245866e-08, + "loss": 0.7306294441223145, + "step": 7742 + }, + { + "epoch": 1.7841013824884793, + "grad_norm": 1.1085692686400792, + "learning_rate": 6.317742532865233e-08, + "loss": 0.7613078951835632, + "step": 7743 + }, + { + "epoch": 1.784331797235023, + "grad_norm": 1.4694829399841158, + "learning_rate": 6.304422331340275e-08, + "loss": 0.9164611101150513, + "step": 7744 + }, + { + "epoch": 1.7845622119815667, + "grad_norm": 1.4076564642652605, + "learning_rate": 6.29111572960439e-08, + "loss": 0.8770956993103027, + "step": 7745 + }, + { + "epoch": 1.7847926267281107, + "grad_norm": 1.3274833988945276, + "learning_rate": 6.277822729588989e-08, + "loss": 0.7482821941375732, + "step": 7746 + }, + { + "epoch": 1.7850230414746544, + "grad_norm": 1.3149565308569835, + "learning_rate": 6.264543333223549e-08, + "loss": 0.7850298881530762, + "step": 7747 + }, + { + "epoch": 1.7852534562211981, + "grad_norm": 1.0844733877563915, + "learning_rate": 6.251277542435552e-08, + "loss": 0.5781385898590088, + "step": 7748 + }, + { + "epoch": 1.785483870967742, + "grad_norm": 1.2619844590894689, + "learning_rate": 6.238025359150501e-08, + "loss": 0.8217513561248779, + "step": 7749 + }, + { + "epoch": 1.7857142857142856, + "grad_norm": 1.2512912228822737, + "learning_rate": 6.224786785291969e-08, + "loss": 0.8500482439994812, + "step": 7750 + }, + { + "epoch": 1.7859447004608295, + "grad_norm": 1.458025138254964, + "learning_rate": 6.211561822781474e-08, + "loss": 0.8146470785140991, + "step": 7751 + }, + { + "epoch": 1.7861751152073733, + "grad_norm": 1.248354775738917, + "learning_rate": 6.198350473538616e-08, + "loss": 0.7351702451705933, + "step": 7752 + }, + { + "epoch": 1.786405529953917, + "grad_norm": 1.2620887228989164, + "learning_rate": 6.185152739481026e-08, + "loss": 0.7993056774139404, + "step": 7753 + }, + { + "epoch": 1.786635944700461, + "grad_norm": 1.4031526672609798, + "learning_rate": 6.171968622524315e-08, + "loss": 0.8570160865783691, + "step": 7754 + }, + { + "epoch": 1.7868663594470044, + "grad_norm": 1.1948359150749444, + "learning_rate": 6.158798124582143e-08, + "loss": 0.6200212836265564, + "step": 7755 + }, + { + "epoch": 1.7870967741935484, + "grad_norm": 1.2592084852014216, + "learning_rate": 6.145641247566202e-08, + "loss": 0.8196465373039246, + "step": 7756 + }, + { + "epoch": 1.7873271889400921, + "grad_norm": 0.9917037331823602, + "learning_rate": 6.132497993386165e-08, + "loss": 0.7038032412528992, + "step": 7757 + }, + { + "epoch": 1.7875576036866359, + "grad_norm": 1.2428262727857045, + "learning_rate": 6.119368363949806e-08, + "loss": 0.7222307324409485, + "step": 7758 + }, + { + "epoch": 1.7877880184331798, + "grad_norm": 1.2991738769607613, + "learning_rate": 6.106252361162834e-08, + "loss": 0.8457501530647278, + "step": 7759 + }, + { + "epoch": 1.7880184331797235, + "grad_norm": 1.5487287329891364, + "learning_rate": 6.093149986929025e-08, + "loss": 0.7543236017227173, + "step": 7760 + }, + { + "epoch": 1.7882488479262673, + "grad_norm": 1.294614145507911, + "learning_rate": 6.080061243150191e-08, + "loss": 0.5728875398635864, + "step": 7761 + }, + { + "epoch": 1.7884792626728112, + "grad_norm": 1.3902935059609232, + "learning_rate": 6.066986131726138e-08, + "loss": 0.6864895820617676, + "step": 7762 + }, + { + "epoch": 1.7887096774193547, + "grad_norm": 1.1640824452811938, + "learning_rate": 6.053924654554687e-08, + "loss": 0.8580472469329834, + "step": 7763 + }, + { + "epoch": 1.7889400921658987, + "grad_norm": 1.358237067906671, + "learning_rate": 6.040876813531714e-08, + "loss": 0.7670924663543701, + "step": 7764 + }, + { + "epoch": 1.7891705069124424, + "grad_norm": 1.2558108988688055, + "learning_rate": 6.027842610551082e-08, + "loss": 0.6558287739753723, + "step": 7765 + }, + { + "epoch": 1.7894009216589861, + "grad_norm": 1.2875975662335684, + "learning_rate": 6.014822047504697e-08, + "loss": 0.8186839818954468, + "step": 7766 + }, + { + "epoch": 1.78963133640553, + "grad_norm": 1.2720662525098447, + "learning_rate": 6.001815126282462e-08, + "loss": 0.7862167358398438, + "step": 7767 + }, + { + "epoch": 1.7898617511520736, + "grad_norm": 1.1119662378593531, + "learning_rate": 5.98882184877233e-08, + "loss": 0.8594048023223877, + "step": 7768 + }, + { + "epoch": 1.7900921658986175, + "grad_norm": 1.3277176558233812, + "learning_rate": 5.975842216860238e-08, + "loss": 0.804019033908844, + "step": 7769 + }, + { + "epoch": 1.7903225806451613, + "grad_norm": 1.1244948347974122, + "learning_rate": 5.962876232430192e-08, + "loss": 0.7404098510742188, + "step": 7770 + }, + { + "epoch": 1.790552995391705, + "grad_norm": 1.3595838567399194, + "learning_rate": 5.949923897364173e-08, + "loss": 0.7726024389266968, + "step": 7771 + }, + { + "epoch": 1.790783410138249, + "grad_norm": 1.5060671287860161, + "learning_rate": 5.936985213542178e-08, + "loss": 0.8225048184394836, + "step": 7772 + }, + { + "epoch": 1.7910138248847927, + "grad_norm": 1.4217281972238225, + "learning_rate": 5.924060182842272e-08, + "loss": 0.8485706448554993, + "step": 7773 + }, + { + "epoch": 1.7912442396313364, + "grad_norm": 1.189460803975086, + "learning_rate": 5.9111488071404867e-08, + "loss": 0.6580322980880737, + "step": 7774 + }, + { + "epoch": 1.7914746543778803, + "grad_norm": 1.1783786831629417, + "learning_rate": 5.898251088310879e-08, + "loss": 0.7486656904220581, + "step": 7775 + }, + { + "epoch": 1.7917050691244238, + "grad_norm": 1.5948072851449393, + "learning_rate": 5.885367028225574e-08, + "loss": 0.9068334102630615, + "step": 7776 + }, + { + "epoch": 1.7919354838709678, + "grad_norm": 1.1107745619546634, + "learning_rate": 5.872496628754653e-08, + "loss": 0.7091449499130249, + "step": 7777 + }, + { + "epoch": 1.7921658986175115, + "grad_norm": 1.3473785107334575, + "learning_rate": 5.8596398917662107e-08, + "loss": 0.7248316407203674, + "step": 7778 + }, + { + "epoch": 1.7923963133640552, + "grad_norm": 1.2057819957098448, + "learning_rate": 5.8467968191264315e-08, + "loss": 0.7740335464477539, + "step": 7779 + }, + { + "epoch": 1.7926267281105992, + "grad_norm": 1.267573304949112, + "learning_rate": 5.833967412699448e-08, + "loss": 0.7810479402542114, + "step": 7780 + }, + { + "epoch": 1.7928571428571427, + "grad_norm": 1.004282792701847, + "learning_rate": 5.821151674347435e-08, + "loss": 0.7072443962097168, + "step": 7781 + }, + { + "epoch": 1.7930875576036867, + "grad_norm": 1.1829190770666373, + "learning_rate": 5.808349605930585e-08, + "loss": 0.8218289613723755, + "step": 7782 + }, + { + "epoch": 1.7933179723502304, + "grad_norm": 1.393265214120735, + "learning_rate": 5.795561209307087e-08, + "loss": 0.8928433656692505, + "step": 7783 + }, + { + "epoch": 1.793548387096774, + "grad_norm": 1.455083354855402, + "learning_rate": 5.7827864863331796e-08, + "loss": 0.765188455581665, + "step": 7784 + }, + { + "epoch": 1.793778801843318, + "grad_norm": 1.0118039506572176, + "learning_rate": 5.7700254388630795e-08, + "loss": 0.7149494886398315, + "step": 7785 + }, + { + "epoch": 1.7940092165898618, + "grad_norm": 1.6638445812749356, + "learning_rate": 5.75727806874905e-08, + "loss": 0.8144164085388184, + "step": 7786 + }, + { + "epoch": 1.7942396313364055, + "grad_norm": 1.1101501647130416, + "learning_rate": 5.744544377841354e-08, + "loss": 0.7549517154693604, + "step": 7787 + }, + { + "epoch": 1.7944700460829495, + "grad_norm": 1.1805002478026116, + "learning_rate": 5.731824367988258e-08, + "loss": 0.7820652723312378, + "step": 7788 + }, + { + "epoch": 1.794700460829493, + "grad_norm": 1.2187125462499315, + "learning_rate": 5.719118041036042e-08, + "loss": 0.8253183364868164, + "step": 7789 + }, + { + "epoch": 1.794930875576037, + "grad_norm": 1.3044045265020685, + "learning_rate": 5.70642539882904e-08, + "loss": 0.8177148103713989, + "step": 7790 + }, + { + "epoch": 1.7951612903225806, + "grad_norm": 1.2453642288062106, + "learning_rate": 5.69374644320958e-08, + "loss": 0.722260594367981, + "step": 7791 + }, + { + "epoch": 1.7953917050691244, + "grad_norm": 1.3322495120015716, + "learning_rate": 5.6810811760179434e-08, + "loss": 0.8128643035888672, + "step": 7792 + }, + { + "epoch": 1.7956221198156683, + "grad_norm": 1.2461980802133077, + "learning_rate": 5.6684295990925394e-08, + "loss": 0.8267233371734619, + "step": 7793 + }, + { + "epoch": 1.7958525345622118, + "grad_norm": 1.1467604985666775, + "learning_rate": 5.655791714269697e-08, + "loss": 0.8385082483291626, + "step": 7794 + }, + { + "epoch": 1.7960829493087558, + "grad_norm": 1.2035138425735283, + "learning_rate": 5.643167523383785e-08, + "loss": 0.8705167770385742, + "step": 7795 + }, + { + "epoch": 1.7963133640552995, + "grad_norm": 1.263928906996047, + "learning_rate": 5.6305570282672024e-08, + "loss": 0.7628496885299683, + "step": 7796 + }, + { + "epoch": 1.7965437788018432, + "grad_norm": 1.2993701262886028, + "learning_rate": 5.61796023075034e-08, + "loss": 0.8246536254882812, + "step": 7797 + }, + { + "epoch": 1.7967741935483872, + "grad_norm": 1.2920173759654132, + "learning_rate": 5.6053771326615815e-08, + "loss": 0.7103257179260254, + "step": 7798 + }, + { + "epoch": 1.797004608294931, + "grad_norm": 1.318695367926756, + "learning_rate": 5.5928077358273984e-08, + "loss": 0.614989161491394, + "step": 7799 + }, + { + "epoch": 1.7972350230414746, + "grad_norm": 1.6404840895868877, + "learning_rate": 5.5802520420721866e-08, + "loss": 0.9876137971878052, + "step": 7800 + }, + { + "epoch": 1.7974654377880186, + "grad_norm": 1.2467848598458215, + "learning_rate": 5.5677100532183775e-08, + "loss": 0.7023773193359375, + "step": 7801 + }, + { + "epoch": 1.797695852534562, + "grad_norm": 1.1844278512776936, + "learning_rate": 5.555181771086459e-08, + "loss": 0.6680843830108643, + "step": 7802 + }, + { + "epoch": 1.797926267281106, + "grad_norm": 1.0826933828880965, + "learning_rate": 5.542667197494877e-08, + "loss": 0.7221776843070984, + "step": 7803 + }, + { + "epoch": 1.7981566820276498, + "grad_norm": 1.0071738664190577, + "learning_rate": 5.5301663342601e-08, + "loss": 0.7473262548446655, + "step": 7804 + }, + { + "epoch": 1.7983870967741935, + "grad_norm": 1.2499370802188474, + "learning_rate": 5.517679183196622e-08, + "loss": 0.8690468072891235, + "step": 7805 + }, + { + "epoch": 1.7986175115207375, + "grad_norm": 1.0933317196070476, + "learning_rate": 5.505205746116937e-08, + "loss": 0.8353981971740723, + "step": 7806 + }, + { + "epoch": 1.798847926267281, + "grad_norm": 1.177111485427447, + "learning_rate": 5.4927460248315405e-08, + "loss": 0.7691711187362671, + "step": 7807 + }, + { + "epoch": 1.799078341013825, + "grad_norm": 1.034283547212154, + "learning_rate": 5.480300021148953e-08, + "loss": 0.6732556819915771, + "step": 7808 + }, + { + "epoch": 1.7993087557603686, + "grad_norm": 1.1520777556370354, + "learning_rate": 5.467867736875664e-08, + "loss": 0.7273567914962769, + "step": 7809 + }, + { + "epoch": 1.7995391705069124, + "grad_norm": 1.201774068977123, + "learning_rate": 5.455449173816251e-08, + "loss": 0.7951864004135132, + "step": 7810 + }, + { + "epoch": 1.7997695852534563, + "grad_norm": 1.4133736179333027, + "learning_rate": 5.4430443337732276e-08, + "loss": 0.7073169350624084, + "step": 7811 + }, + { + "epoch": 1.8, + "grad_norm": 1.0101637387022209, + "learning_rate": 5.430653218547132e-08, + "loss": 0.682072639465332, + "step": 7812 + }, + { + "epoch": 1.8002304147465438, + "grad_norm": 0.9949453624163476, + "learning_rate": 5.4182758299365364e-08, + "loss": 0.7512049674987793, + "step": 7813 + }, + { + "epoch": 1.8004608294930877, + "grad_norm": 1.2218170088515747, + "learning_rate": 5.405912169738003e-08, + "loss": 0.7470980882644653, + "step": 7814 + }, + { + "epoch": 1.8006912442396312, + "grad_norm": 1.1792295753175266, + "learning_rate": 5.3935622397460634e-08, + "loss": 0.792417049407959, + "step": 7815 + }, + { + "epoch": 1.8009216589861752, + "grad_norm": 1.4508025797803343, + "learning_rate": 5.3812260417533505e-08, + "loss": 0.8600934743881226, + "step": 7816 + }, + { + "epoch": 1.801152073732719, + "grad_norm": 1.2411035382017865, + "learning_rate": 5.36890357755041e-08, + "loss": 0.6931058168411255, + "step": 7817 + }, + { + "epoch": 1.8013824884792626, + "grad_norm": 1.1047587345616248, + "learning_rate": 5.3565948489258216e-08, + "loss": 0.7382420897483826, + "step": 7818 + }, + { + "epoch": 1.8016129032258066, + "grad_norm": 1.5724454012098283, + "learning_rate": 5.344299857666224e-08, + "loss": 0.6811971068382263, + "step": 7819 + }, + { + "epoch": 1.80184331797235, + "grad_norm": 1.3142032735909368, + "learning_rate": 5.332018605556188e-08, + "loss": 0.8551425933837891, + "step": 7820 + }, + { + "epoch": 1.802073732718894, + "grad_norm": 1.298840655183536, + "learning_rate": 5.319751094378322e-08, + "loss": 0.7907109260559082, + "step": 7821 + }, + { + "epoch": 1.8023041474654378, + "grad_norm": 1.462185741805911, + "learning_rate": 5.3074973259132464e-08, + "loss": 0.6995817422866821, + "step": 7822 + }, + { + "epoch": 1.8025345622119815, + "grad_norm": 1.2098230160416081, + "learning_rate": 5.295257301939582e-08, + "loss": 0.9157558679580688, + "step": 7823 + }, + { + "epoch": 1.8027649769585254, + "grad_norm": 1.3503599705143554, + "learning_rate": 5.283031024233942e-08, + "loss": 0.8181086778640747, + "step": 7824 + }, + { + "epoch": 1.8029953917050692, + "grad_norm": 1.061101797749781, + "learning_rate": 5.270818494570961e-08, + "loss": 0.7170151472091675, + "step": 7825 + }, + { + "epoch": 1.803225806451613, + "grad_norm": 1.3415396727620215, + "learning_rate": 5.258619714723278e-08, + "loss": 0.7548947334289551, + "step": 7826 + }, + { + "epoch": 1.8034562211981566, + "grad_norm": 1.309211881034751, + "learning_rate": 5.2464346864615204e-08, + "loss": 0.7482869625091553, + "step": 7827 + }, + { + "epoch": 1.8036866359447004, + "grad_norm": 1.2839346666214595, + "learning_rate": 5.234263411554329e-08, + "loss": 0.6984925270080566, + "step": 7828 + }, + { + "epoch": 1.8039170506912443, + "grad_norm": 1.4972180990250632, + "learning_rate": 5.222105891768347e-08, + "loss": 0.910038948059082, + "step": 7829 + }, + { + "epoch": 1.804147465437788, + "grad_norm": 1.4071380742837927, + "learning_rate": 5.2099621288682174e-08, + "loss": 0.8936711549758911, + "step": 7830 + }, + { + "epoch": 1.8043778801843318, + "grad_norm": 1.2841490446822148, + "learning_rate": 5.197832124616608e-08, + "loss": 0.7376326322555542, + "step": 7831 + }, + { + "epoch": 1.8046082949308757, + "grad_norm": 1.6922079171273652, + "learning_rate": 5.1857158807741554e-08, + "loss": 0.8373547792434692, + "step": 7832 + }, + { + "epoch": 1.8048387096774192, + "grad_norm": 1.1938115721747944, + "learning_rate": 5.17361339909953e-08, + "loss": 0.7018512487411499, + "step": 7833 + }, + { + "epoch": 1.8050691244239632, + "grad_norm": 1.0051532014919082, + "learning_rate": 5.161524681349394e-08, + "loss": 0.6111225485801697, + "step": 7834 + }, + { + "epoch": 1.805299539170507, + "grad_norm": 1.1643316930206133, + "learning_rate": 5.149449729278388e-08, + "loss": 0.6961934566497803, + "step": 7835 + }, + { + "epoch": 1.8055299539170506, + "grad_norm": 1.1662486414151942, + "learning_rate": 5.137388544639198e-08, + "loss": 0.677324116230011, + "step": 7836 + }, + { + "epoch": 1.8057603686635946, + "grad_norm": 1.1241341054985654, + "learning_rate": 5.125341129182481e-08, + "loss": 0.7124897837638855, + "step": 7837 + }, + { + "epoch": 1.8059907834101383, + "grad_norm": 1.1858041195501718, + "learning_rate": 5.1133074846568815e-08, + "loss": 0.7474578619003296, + "step": 7838 + }, + { + "epoch": 1.806221198156682, + "grad_norm": 1.0832413753523613, + "learning_rate": 5.101287612809102e-08, + "loss": 0.699856162071228, + "step": 7839 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 1.2510053638983376, + "learning_rate": 5.089281515383803e-08, + "loss": 0.6548302173614502, + "step": 7840 + }, + { + "epoch": 1.8066820276497695, + "grad_norm": 1.4067864996197734, + "learning_rate": 5.077289194123624e-08, + "loss": 0.8376108407974243, + "step": 7841 + }, + { + "epoch": 1.8069124423963134, + "grad_norm": 1.4168917230935398, + "learning_rate": 5.065310650769283e-08, + "loss": 0.741931140422821, + "step": 7842 + }, + { + "epoch": 1.8071428571428572, + "grad_norm": 1.0130617353418785, + "learning_rate": 5.053345887059413e-08, + "loss": 0.7253270149230957, + "step": 7843 + }, + { + "epoch": 1.807373271889401, + "grad_norm": 1.452385981822963, + "learning_rate": 5.0413949047306894e-08, + "loss": 0.8248677849769592, + "step": 7844 + }, + { + "epoch": 1.8076036866359448, + "grad_norm": 1.2182337218961132, + "learning_rate": 5.0294577055177925e-08, + "loss": 0.7571253776550293, + "step": 7845 + }, + { + "epoch": 1.8078341013824883, + "grad_norm": 1.3374870147899762, + "learning_rate": 5.017534291153391e-08, + "loss": 0.8256274461746216, + "step": 7846 + }, + { + "epoch": 1.8080645161290323, + "grad_norm": 1.0202351482491858, + "learning_rate": 5.0056246633681356e-08, + "loss": 0.8609060049057007, + "step": 7847 + }, + { + "epoch": 1.808294930875576, + "grad_norm": 1.0533455142790622, + "learning_rate": 4.9937288238907196e-08, + "loss": 0.7005047798156738, + "step": 7848 + }, + { + "epoch": 1.8085253456221198, + "grad_norm": 1.508707208071474, + "learning_rate": 4.981846774447784e-08, + "loss": 0.8640049695968628, + "step": 7849 + }, + { + "epoch": 1.8087557603686637, + "grad_norm": 1.2891784390675838, + "learning_rate": 4.969978516763984e-08, + "loss": 0.8385862112045288, + "step": 7850 + }, + { + "epoch": 1.8089861751152074, + "grad_norm": 1.4569260681358536, + "learning_rate": 4.9581240525620184e-08, + "loss": 0.845676064491272, + "step": 7851 + }, + { + "epoch": 1.8092165898617512, + "grad_norm": 1.1553749249891685, + "learning_rate": 4.9462833835625327e-08, + "loss": 0.7638444304466248, + "step": 7852 + }, + { + "epoch": 1.8094470046082949, + "grad_norm": 1.3732641737808478, + "learning_rate": 4.934456511484153e-08, + "loss": 0.813924252986908, + "step": 7853 + }, + { + "epoch": 1.8096774193548386, + "grad_norm": 1.1884602060780909, + "learning_rate": 4.9226434380435835e-08, + "loss": 0.8773660659790039, + "step": 7854 + }, + { + "epoch": 1.8099078341013826, + "grad_norm": 1.1706588526128812, + "learning_rate": 4.91084416495543e-08, + "loss": 0.6703497171401978, + "step": 7855 + }, + { + "epoch": 1.8101382488479263, + "grad_norm": 1.149648913375334, + "learning_rate": 4.8990586939323896e-08, + "loss": 0.760738730430603, + "step": 7856 + }, + { + "epoch": 1.81036866359447, + "grad_norm": 1.0947557024146697, + "learning_rate": 4.887287026685072e-08, + "loss": 0.6466494798660278, + "step": 7857 + }, + { + "epoch": 1.810599078341014, + "grad_norm": 1.2590537115606641, + "learning_rate": 4.8755291649221206e-08, + "loss": 0.7416050434112549, + "step": 7858 + }, + { + "epoch": 1.8108294930875575, + "grad_norm": 1.200153480710325, + "learning_rate": 4.863785110350205e-08, + "loss": 0.6841444373130798, + "step": 7859 + }, + { + "epoch": 1.8110599078341014, + "grad_norm": 0.9841555488440165, + "learning_rate": 4.8520548646739265e-08, + "loss": 0.7818359732627869, + "step": 7860 + }, + { + "epoch": 1.8112903225806452, + "grad_norm": 1.287409595467329, + "learning_rate": 4.840338429595914e-08, + "loss": 0.8802354335784912, + "step": 7861 + }, + { + "epoch": 1.8115207373271889, + "grad_norm": 1.3745378964034318, + "learning_rate": 4.8286358068168055e-08, + "loss": 0.8105144500732422, + "step": 7862 + }, + { + "epoch": 1.8117511520737328, + "grad_norm": 1.2395900259386188, + "learning_rate": 4.816946998035232e-08, + "loss": 0.6613968014717102, + "step": 7863 + }, + { + "epoch": 1.8119815668202763, + "grad_norm": 1.191330407638407, + "learning_rate": 4.80527200494778e-08, + "loss": 0.7945050001144409, + "step": 7864 + }, + { + "epoch": 1.8122119815668203, + "grad_norm": 1.1914418584370128, + "learning_rate": 4.793610829249084e-08, + "loss": 0.7268643379211426, + "step": 7865 + }, + { + "epoch": 1.812442396313364, + "grad_norm": 1.0567405581917244, + "learning_rate": 4.781963472631745e-08, + "loss": 0.7416445016860962, + "step": 7866 + }, + { + "epoch": 1.8126728110599077, + "grad_norm": 1.246941647908604, + "learning_rate": 4.770329936786355e-08, + "loss": 0.8536533117294312, + "step": 7867 + }, + { + "epoch": 1.8129032258064517, + "grad_norm": 1.2429467538833636, + "learning_rate": 4.7587102234015074e-08, + "loss": 0.8258422017097473, + "step": 7868 + }, + { + "epoch": 1.8131336405529954, + "grad_norm": 1.189598318299626, + "learning_rate": 4.7471043341637874e-08, + "loss": 0.6976941823959351, + "step": 7869 + }, + { + "epoch": 1.8133640552995391, + "grad_norm": 1.222534835599988, + "learning_rate": 4.735512270757758e-08, + "loss": 0.8213087916374207, + "step": 7870 + }, + { + "epoch": 1.813594470046083, + "grad_norm": 1.1756801964544004, + "learning_rate": 4.723934034866028e-08, + "loss": 0.8012057542800903, + "step": 7871 + }, + { + "epoch": 1.8138248847926266, + "grad_norm": 1.0419940327131916, + "learning_rate": 4.7123696281691436e-08, + "loss": 0.7802866697311401, + "step": 7872 + }, + { + "epoch": 1.8140552995391706, + "grad_norm": 1.1630887083640626, + "learning_rate": 4.700819052345639e-08, + "loss": 0.8024426698684692, + "step": 7873 + }, + { + "epoch": 1.8142857142857143, + "grad_norm": 0.9709635675133196, + "learning_rate": 4.689282309072107e-08, + "loss": 0.6383114457130432, + "step": 7874 + }, + { + "epoch": 1.814516129032258, + "grad_norm": 1.2768186922012608, + "learning_rate": 4.677759400023085e-08, + "loss": 0.7226015329360962, + "step": 7875 + }, + { + "epoch": 1.814746543778802, + "grad_norm": 1.0424513670531574, + "learning_rate": 4.6662503268710684e-08, + "loss": 0.8390164971351624, + "step": 7876 + }, + { + "epoch": 1.8149769585253455, + "grad_norm": 1.0443665370850939, + "learning_rate": 4.654755091286633e-08, + "loss": 0.8120134472846985, + "step": 7877 + }, + { + "epoch": 1.8152073732718894, + "grad_norm": 1.305111160234168, + "learning_rate": 4.6432736949382656e-08, + "loss": 0.6554470062255859, + "step": 7878 + }, + { + "epoch": 1.8154377880184331, + "grad_norm": 1.1780234915455678, + "learning_rate": 4.631806139492478e-08, + "loss": 0.7268370985984802, + "step": 7879 + }, + { + "epoch": 1.8156682027649769, + "grad_norm": 1.4051894182356444, + "learning_rate": 4.620352426613794e-08, + "loss": 0.7991992831230164, + "step": 7880 + }, + { + "epoch": 1.8158986175115208, + "grad_norm": 1.1268859101296151, + "learning_rate": 4.608912557964673e-08, + "loss": 0.7695842981338501, + "step": 7881 + }, + { + "epoch": 1.8161290322580645, + "grad_norm": 1.9896156470888766, + "learning_rate": 4.59748653520563e-08, + "loss": 0.8633268475532532, + "step": 7882 + }, + { + "epoch": 1.8163594470046083, + "grad_norm": 1.1364981478494263, + "learning_rate": 4.586074359995118e-08, + "loss": 0.7018440961837769, + "step": 7883 + }, + { + "epoch": 1.8165898617511522, + "grad_norm": 1.1022691462384118, + "learning_rate": 4.574676033989589e-08, + "loss": 0.7304259538650513, + "step": 7884 + }, + { + "epoch": 1.8168202764976957, + "grad_norm": 1.2520833867580832, + "learning_rate": 4.563291558843518e-08, + "loss": 0.7408654689788818, + "step": 7885 + }, + { + "epoch": 1.8170506912442397, + "grad_norm": 0.8583590816187824, + "learning_rate": 4.55192093620933e-08, + "loss": 0.6378169059753418, + "step": 7886 + }, + { + "epoch": 1.8172811059907834, + "grad_norm": 1.2929203847720665, + "learning_rate": 4.540564167737471e-08, + "loss": 0.8854331374168396, + "step": 7887 + }, + { + "epoch": 1.8175115207373271, + "grad_norm": 1.3325768500609418, + "learning_rate": 4.529221255076343e-08, + "loss": 0.6948372721672058, + "step": 7888 + }, + { + "epoch": 1.817741935483871, + "grad_norm": 1.0169430034347062, + "learning_rate": 4.517892199872364e-08, + "loss": 0.8199236392974854, + "step": 7889 + }, + { + "epoch": 1.8179723502304146, + "grad_norm": 1.2358305635738154, + "learning_rate": 4.506577003769918e-08, + "loss": 0.6967995762825012, + "step": 7890 + }, + { + "epoch": 1.8182027649769585, + "grad_norm": 1.5521492896589208, + "learning_rate": 4.495275668411425e-08, + "loss": 0.848435640335083, + "step": 7891 + }, + { + "epoch": 1.8184331797235023, + "grad_norm": 1.0482582355280439, + "learning_rate": 4.483988195437227e-08, + "loss": 0.7085731029510498, + "step": 7892 + }, + { + "epoch": 1.818663594470046, + "grad_norm": 1.540410469929121, + "learning_rate": 4.472714586485682e-08, + "loss": 0.7400653958320618, + "step": 7893 + }, + { + "epoch": 1.81889400921659, + "grad_norm": 1.3011192141788026, + "learning_rate": 4.461454843193169e-08, + "loss": 0.7636830806732178, + "step": 7894 + }, + { + "epoch": 1.8191244239631337, + "grad_norm": 0.9509851989309867, + "learning_rate": 4.4502089671940135e-08, + "loss": 0.6902754306793213, + "step": 7895 + }, + { + "epoch": 1.8193548387096774, + "grad_norm": 1.4497717090666749, + "learning_rate": 4.438976960120522e-08, + "loss": 0.8397349119186401, + "step": 7896 + }, + { + "epoch": 1.8195852534562214, + "grad_norm": 1.1317263019718502, + "learning_rate": 4.4277588236030226e-08, + "loss": 0.7505836486816406, + "step": 7897 + }, + { + "epoch": 1.8198156682027649, + "grad_norm": 1.4213425196027163, + "learning_rate": 4.416554559269814e-08, + "loss": 0.9310287833213806, + "step": 7898 + }, + { + "epoch": 1.8200460829493088, + "grad_norm": 1.0910777164101302, + "learning_rate": 4.405364168747161e-08, + "loss": 0.724685549736023, + "step": 7899 + }, + { + "epoch": 1.8202764976958525, + "grad_norm": 0.99356469827684, + "learning_rate": 4.394187653659365e-08, + "loss": 0.6554735898971558, + "step": 7900 + }, + { + "epoch": 1.8205069124423963, + "grad_norm": 1.5629584518265682, + "learning_rate": 4.383025015628661e-08, + "loss": 0.7494597434997559, + "step": 7901 + }, + { + "epoch": 1.8207373271889402, + "grad_norm": 1.3596683636243805, + "learning_rate": 4.371876256275287e-08, + "loss": 0.817386269569397, + "step": 7902 + }, + { + "epoch": 1.8209677419354837, + "grad_norm": 1.2645292088995888, + "learning_rate": 4.3607413772174806e-08, + "loss": 0.8668064475059509, + "step": 7903 + }, + { + "epoch": 1.8211981566820277, + "grad_norm": 1.2001673372629817, + "learning_rate": 4.34962038007145e-08, + "loss": 0.7400633096694946, + "step": 7904 + }, + { + "epoch": 1.8214285714285714, + "grad_norm": 1.018878326746976, + "learning_rate": 4.3385132664514046e-08, + "loss": 0.7273544073104858, + "step": 7905 + }, + { + "epoch": 1.8216589861751151, + "grad_norm": 1.149057253315942, + "learning_rate": 4.3274200379695315e-08, + "loss": 0.7133193016052246, + "step": 7906 + }, + { + "epoch": 1.821889400921659, + "grad_norm": 1.2433089389356335, + "learning_rate": 4.316340696235976e-08, + "loss": 0.9390736222267151, + "step": 7907 + }, + { + "epoch": 1.8221198156682028, + "grad_norm": 1.1318410882734156, + "learning_rate": 4.3052752428588966e-08, + "loss": 0.7065613269805908, + "step": 7908 + }, + { + "epoch": 1.8223502304147465, + "grad_norm": 1.2803518971044316, + "learning_rate": 4.294223679444442e-08, + "loss": 0.813999354839325, + "step": 7909 + }, + { + "epoch": 1.8225806451612905, + "grad_norm": 1.616827704611462, + "learning_rate": 4.2831860075966955e-08, + "loss": 0.9234256148338318, + "step": 7910 + }, + { + "epoch": 1.822811059907834, + "grad_norm": 1.4124883659201861, + "learning_rate": 4.272162228917808e-08, + "loss": 0.8630207777023315, + "step": 7911 + }, + { + "epoch": 1.823041474654378, + "grad_norm": 1.382424983437882, + "learning_rate": 4.2611523450078456e-08, + "loss": 0.7827208042144775, + "step": 7912 + }, + { + "epoch": 1.8232718894009217, + "grad_norm": 1.3479238410287269, + "learning_rate": 4.250156357464873e-08, + "loss": 0.884107232093811, + "step": 7913 + }, + { + "epoch": 1.8235023041474654, + "grad_norm": 1.3064700630797408, + "learning_rate": 4.2391742678849484e-08, + "loss": 0.8615697026252747, + "step": 7914 + }, + { + "epoch": 1.8237327188940093, + "grad_norm": 1.4410161390206035, + "learning_rate": 4.2282060778621174e-08, + "loss": 0.8001279830932617, + "step": 7915 + }, + { + "epoch": 1.8239631336405528, + "grad_norm": 1.1016373373524035, + "learning_rate": 4.217251788988374e-08, + "loss": 0.7183214426040649, + "step": 7916 + }, + { + "epoch": 1.8241935483870968, + "grad_norm": 1.2680472029966925, + "learning_rate": 4.206311402853746e-08, + "loss": 0.7751119136810303, + "step": 7917 + }, + { + "epoch": 1.8244239631336405, + "grad_norm": 1.287058032235602, + "learning_rate": 4.195384921046208e-08, + "loss": 0.8073426485061646, + "step": 7918 + }, + { + "epoch": 1.8246543778801843, + "grad_norm": 1.053407718143569, + "learning_rate": 4.1844723451517017e-08, + "loss": 0.7918455600738525, + "step": 7919 + }, + { + "epoch": 1.8248847926267282, + "grad_norm": 1.1789390806182918, + "learning_rate": 4.1735736767542054e-08, + "loss": 0.8070017099380493, + "step": 7920 + }, + { + "epoch": 1.825115207373272, + "grad_norm": 1.1456133687492283, + "learning_rate": 4.1626889174356306e-08, + "loss": 0.7202159762382507, + "step": 7921 + }, + { + "epoch": 1.8253456221198157, + "grad_norm": 1.304718816677761, + "learning_rate": 4.15181806877587e-08, + "loss": 0.8412283658981323, + "step": 7922 + }, + { + "epoch": 1.8255760368663596, + "grad_norm": 1.079962569087528, + "learning_rate": 4.140961132352849e-08, + "loss": 0.6230478286743164, + "step": 7923 + }, + { + "epoch": 1.8258064516129031, + "grad_norm": 1.184647211526077, + "learning_rate": 4.1301181097424196e-08, + "loss": 0.6475099921226501, + "step": 7924 + }, + { + "epoch": 1.826036866359447, + "grad_norm": 1.1526955390848261, + "learning_rate": 4.1192890025184223e-08, + "loss": 0.6277462244033813, + "step": 7925 + }, + { + "epoch": 1.8262672811059908, + "grad_norm": 1.048650750687635, + "learning_rate": 4.1084738122527e-08, + "loss": 0.784058690071106, + "step": 7926 + }, + { + "epoch": 1.8264976958525345, + "grad_norm": 1.2758998200943634, + "learning_rate": 4.097672540515063e-08, + "loss": 0.7214534282684326, + "step": 7927 + }, + { + "epoch": 1.8267281105990785, + "grad_norm": 1.3299220547069754, + "learning_rate": 4.086885188873302e-08, + "loss": 0.7504015564918518, + "step": 7928 + }, + { + "epoch": 1.826958525345622, + "grad_norm": 1.3115105618474625, + "learning_rate": 4.076111758893175e-08, + "loss": 0.8837840557098389, + "step": 7929 + }, + { + "epoch": 1.827188940092166, + "grad_norm": 0.9756920709009218, + "learning_rate": 4.065352252138443e-08, + "loss": 0.6903706789016724, + "step": 7930 + }, + { + "epoch": 1.8274193548387097, + "grad_norm": 1.0882078909648618, + "learning_rate": 4.054606670170824e-08, + "loss": 0.6120485067367554, + "step": 7931 + }, + { + "epoch": 1.8276497695852534, + "grad_norm": 1.3933670864132435, + "learning_rate": 4.043875014550047e-08, + "loss": 0.9566253423690796, + "step": 7932 + }, + { + "epoch": 1.8278801843317973, + "grad_norm": 1.143561158140067, + "learning_rate": 4.033157286833766e-08, + "loss": 0.7702776193618774, + "step": 7933 + }, + { + "epoch": 1.828110599078341, + "grad_norm": 1.3861853644171394, + "learning_rate": 4.0224534885776706e-08, + "loss": 0.7326529026031494, + "step": 7934 + }, + { + "epoch": 1.8283410138248848, + "grad_norm": 1.199651876611857, + "learning_rate": 4.011763621335395e-08, + "loss": 0.8161343336105347, + "step": 7935 + }, + { + "epoch": 1.8285714285714287, + "grad_norm": 1.2385311136965618, + "learning_rate": 4.001087686658544e-08, + "loss": 0.7167537212371826, + "step": 7936 + }, + { + "epoch": 1.8288018433179722, + "grad_norm": 1.5866479195226006, + "learning_rate": 3.9904256860967433e-08, + "loss": 0.9195249080657959, + "step": 7937 + }, + { + "epoch": 1.8290322580645162, + "grad_norm": 1.4492337682663832, + "learning_rate": 3.979777621197544e-08, + "loss": 0.9483609199523926, + "step": 7938 + }, + { + "epoch": 1.82926267281106, + "grad_norm": 1.1520857488925356, + "learning_rate": 3.96914349350651e-08, + "loss": 0.6521364450454712, + "step": 7939 + }, + { + "epoch": 1.8294930875576036, + "grad_norm": 1.1394847291425385, + "learning_rate": 3.958523304567174e-08, + "loss": 0.714328408241272, + "step": 7940 + }, + { + "epoch": 1.8297235023041476, + "grad_norm": 1.2749952242619191, + "learning_rate": 3.9479170559210464e-08, + "loss": 0.705136775970459, + "step": 7941 + }, + { + "epoch": 1.829953917050691, + "grad_norm": 1.2310686937076982, + "learning_rate": 3.937324749107584e-08, + "loss": 0.9096843004226685, + "step": 7942 + }, + { + "epoch": 1.830184331797235, + "grad_norm": 1.1347026880501985, + "learning_rate": 3.9267463856642704e-08, + "loss": 0.7797929048538208, + "step": 7943 + }, + { + "epoch": 1.8304147465437788, + "grad_norm": 1.1418375010830168, + "learning_rate": 3.9161819671265414e-08, + "loss": 0.739689290523529, + "step": 7944 + }, + { + "epoch": 1.8306451612903225, + "grad_norm": 1.2414926332489717, + "learning_rate": 3.905631495027795e-08, + "loss": 0.7297589778900146, + "step": 7945 + }, + { + "epoch": 1.8308755760368665, + "grad_norm": 1.1411747974433366, + "learning_rate": 3.895094970899426e-08, + "loss": 0.6632317900657654, + "step": 7946 + }, + { + "epoch": 1.8311059907834102, + "grad_norm": 1.1035263718417188, + "learning_rate": 3.884572396270802e-08, + "loss": 0.8075754642486572, + "step": 7947 + }, + { + "epoch": 1.831336405529954, + "grad_norm": 1.1206981689667126, + "learning_rate": 3.874063772669256e-08, + "loss": 0.879385232925415, + "step": 7948 + }, + { + "epoch": 1.8315668202764976, + "grad_norm": 1.1296410172019098, + "learning_rate": 3.86356910162009e-08, + "loss": 0.7182341814041138, + "step": 7949 + }, + { + "epoch": 1.8317972350230414, + "grad_norm": 1.3256415462362086, + "learning_rate": 3.853088384646608e-08, + "loss": 0.8980770111083984, + "step": 7950 + }, + { + "epoch": 1.8320276497695853, + "grad_norm": 1.2399263879902838, + "learning_rate": 3.8426216232700483e-08, + "loss": 0.7798547744750977, + "step": 7951 + }, + { + "epoch": 1.832258064516129, + "grad_norm": 1.30590072600508, + "learning_rate": 3.832168819009685e-08, + "loss": 0.7545509934425354, + "step": 7952 + }, + { + "epoch": 1.8324884792626728, + "grad_norm": 1.4626138945450415, + "learning_rate": 3.821729973382681e-08, + "loss": 0.7394163608551025, + "step": 7953 + }, + { + "epoch": 1.8327188940092167, + "grad_norm": 1.095086275435991, + "learning_rate": 3.811305087904271e-08, + "loss": 0.7771584987640381, + "step": 7954 + }, + { + "epoch": 1.8329493087557602, + "grad_norm": 1.0772465088176202, + "learning_rate": 3.800894164087587e-08, + "loss": 0.6490596532821655, + "step": 7955 + }, + { + "epoch": 1.8331797235023042, + "grad_norm": 1.6261572682115344, + "learning_rate": 3.7904972034437546e-08, + "loss": 0.8465416431427002, + "step": 7956 + }, + { + "epoch": 1.833410138248848, + "grad_norm": 1.1256653812684285, + "learning_rate": 3.780114207481899e-08, + "loss": 0.6769351363182068, + "step": 7957 + }, + { + "epoch": 1.8336405529953916, + "grad_norm": 1.1157448396752008, + "learning_rate": 3.769745177709094e-08, + "loss": 0.8187215328216553, + "step": 7958 + }, + { + "epoch": 1.8338709677419356, + "grad_norm": 0.9478307441179703, + "learning_rate": 3.759390115630356e-08, + "loss": 0.7524763345718384, + "step": 7959 + }, + { + "epoch": 1.8341013824884793, + "grad_norm": 1.3846707864730958, + "learning_rate": 3.749049022748762e-08, + "loss": 0.8019517064094543, + "step": 7960 + }, + { + "epoch": 1.834331797235023, + "grad_norm": 1.2301171101661803, + "learning_rate": 3.738721900565278e-08, + "loss": 0.7732158899307251, + "step": 7961 + }, + { + "epoch": 1.8345622119815668, + "grad_norm": 1.1624945144679932, + "learning_rate": 3.728408750578871e-08, + "loss": 0.7152917385101318, + "step": 7962 + }, + { + "epoch": 1.8347926267281105, + "grad_norm": 1.2249354034345745, + "learning_rate": 3.7181095742864876e-08, + "loss": 0.7117735147476196, + "step": 7963 + }, + { + "epoch": 1.8350230414746544, + "grad_norm": 1.1387667941982393, + "learning_rate": 3.7078243731830436e-08, + "loss": 0.7651360034942627, + "step": 7964 + }, + { + "epoch": 1.8352534562211982, + "grad_norm": 1.103224145154883, + "learning_rate": 3.697553148761412e-08, + "loss": 0.6686996817588806, + "step": 7965 + }, + { + "epoch": 1.835483870967742, + "grad_norm": 1.4148867918515446, + "learning_rate": 3.687295902512455e-08, + "loss": 0.8654145002365112, + "step": 7966 + }, + { + "epoch": 1.8357142857142859, + "grad_norm": 1.2014603088046913, + "learning_rate": 3.6770526359250046e-08, + "loss": 0.7883874177932739, + "step": 7967 + }, + { + "epoch": 1.8359447004608294, + "grad_norm": 1.3036366063511584, + "learning_rate": 3.666823350485848e-08, + "loss": 0.7270755767822266, + "step": 7968 + }, + { + "epoch": 1.8361751152073733, + "grad_norm": 1.2757403346821974, + "learning_rate": 3.656608047679744e-08, + "loss": 0.654710054397583, + "step": 7969 + }, + { + "epoch": 1.836405529953917, + "grad_norm": 1.3173622827867584, + "learning_rate": 3.6464067289894485e-08, + "loss": 0.688032329082489, + "step": 7970 + }, + { + "epoch": 1.8366359447004608, + "grad_norm": 1.610615012564481, + "learning_rate": 3.6362193958956457e-08, + "loss": 0.901115894317627, + "step": 7971 + }, + { + "epoch": 1.8368663594470047, + "grad_norm": 1.116601972108686, + "learning_rate": 3.6260460498770404e-08, + "loss": 0.7335774302482605, + "step": 7972 + }, + { + "epoch": 1.8370967741935482, + "grad_norm": 1.386903572934919, + "learning_rate": 3.615886692410275e-08, + "loss": 0.8056570291519165, + "step": 7973 + }, + { + "epoch": 1.8373271889400922, + "grad_norm": 1.0398578754417405, + "learning_rate": 3.6057413249699356e-08, + "loss": 0.82081538438797, + "step": 7974 + }, + { + "epoch": 1.837557603686636, + "grad_norm": 1.2589683870881863, + "learning_rate": 3.595609949028655e-08, + "loss": 0.7741475105285645, + "step": 7975 + }, + { + "epoch": 1.8377880184331796, + "grad_norm": 1.4550225731476647, + "learning_rate": 3.5854925660569693e-08, + "loss": 0.9020792245864868, + "step": 7976 + }, + { + "epoch": 1.8380184331797236, + "grad_norm": 1.395018589671643, + "learning_rate": 3.57538917752338e-08, + "loss": 0.759677529335022, + "step": 7977 + }, + { + "epoch": 1.8382488479262673, + "grad_norm": 1.2528132061795532, + "learning_rate": 3.565299784894427e-08, + "loss": 0.6658498644828796, + "step": 7978 + }, + { + "epoch": 1.838479262672811, + "grad_norm": 1.156561409904186, + "learning_rate": 3.5552243896345254e-08, + "loss": 0.8359798192977905, + "step": 7979 + }, + { + "epoch": 1.838709677419355, + "grad_norm": 0.9586985661683237, + "learning_rate": 3.545162993206141e-08, + "loss": 0.656216025352478, + "step": 7980 + }, + { + "epoch": 1.8389400921658985, + "grad_norm": 1.1907827843907386, + "learning_rate": 3.53511559706966e-08, + "loss": 0.7783077359199524, + "step": 7981 + }, + { + "epoch": 1.8391705069124424, + "grad_norm": 1.315887741405374, + "learning_rate": 3.525082202683427e-08, + "loss": 0.7726818919181824, + "step": 7982 + }, + { + "epoch": 1.8394009216589862, + "grad_norm": 1.203190333477806, + "learning_rate": 3.5150628115038213e-08, + "loss": 0.6797339916229248, + "step": 7983 + }, + { + "epoch": 1.83963133640553, + "grad_norm": 1.6491537372199485, + "learning_rate": 3.505057424985114e-08, + "loss": 0.818444013595581, + "step": 7984 + }, + { + "epoch": 1.8398617511520738, + "grad_norm": 1.2385444618355612, + "learning_rate": 3.495066044579564e-08, + "loss": 0.716003954410553, + "step": 7985 + }, + { + "epoch": 1.8400921658986173, + "grad_norm": 1.1184726381698433, + "learning_rate": 3.485088671737435e-08, + "loss": 0.8214380741119385, + "step": 7986 + }, + { + "epoch": 1.8403225806451613, + "grad_norm": 1.2891166927609845, + "learning_rate": 3.475125307906923e-08, + "loss": 0.8004239797592163, + "step": 7987 + }, + { + "epoch": 1.840552995391705, + "grad_norm": 1.0064244623457703, + "learning_rate": 3.465175954534183e-08, + "loss": 0.724868655204773, + "step": 7988 + }, + { + "epoch": 1.8407834101382488, + "grad_norm": 1.2194713737299876, + "learning_rate": 3.455240613063359e-08, + "loss": 0.6774435043334961, + "step": 7989 + }, + { + "epoch": 1.8410138248847927, + "grad_norm": 1.2000954990034474, + "learning_rate": 3.445319284936543e-08, + "loss": 0.7618406414985657, + "step": 7990 + }, + { + "epoch": 1.8412442396313364, + "grad_norm": 1.2446761227229344, + "learning_rate": 3.4354119715938154e-08, + "loss": 0.8176794648170471, + "step": 7991 + }, + { + "epoch": 1.8414746543778802, + "grad_norm": 1.3311989323291133, + "learning_rate": 3.4255186744732045e-08, + "loss": 0.7540123462677002, + "step": 7992 + }, + { + "epoch": 1.841705069124424, + "grad_norm": 0.8317940065053944, + "learning_rate": 3.4156393950107164e-08, + "loss": 0.6888976097106934, + "step": 7993 + }, + { + "epoch": 1.8419354838709676, + "grad_norm": 0.9229557772464766, + "learning_rate": 3.405774134640294e-08, + "loss": 0.6719028949737549, + "step": 7994 + }, + { + "epoch": 1.8421658986175116, + "grad_norm": 1.2216480626353798, + "learning_rate": 3.3959228947938903e-08, + "loss": 0.817806601524353, + "step": 7995 + }, + { + "epoch": 1.8423963133640553, + "grad_norm": 1.176727717908757, + "learning_rate": 3.3860856769013955e-08, + "loss": 0.6681252717971802, + "step": 7996 + }, + { + "epoch": 1.842626728110599, + "grad_norm": 1.261442308873967, + "learning_rate": 3.3762624823906574e-08, + "loss": 0.7965174317359924, + "step": 7997 + }, + { + "epoch": 1.842857142857143, + "grad_norm": 1.163849986057629, + "learning_rate": 3.366453312687512e-08, + "loss": 0.714171826839447, + "step": 7998 + }, + { + "epoch": 1.8430875576036865, + "grad_norm": 1.2077995913515678, + "learning_rate": 3.356658169215743e-08, + "loss": 0.7489287853240967, + "step": 7999 + }, + { + "epoch": 1.8433179723502304, + "grad_norm": 1.270011813451473, + "learning_rate": 3.34687705339709e-08, + "loss": 0.790866494178772, + "step": 8000 + }, + { + "epoch": 1.8435483870967742, + "grad_norm": 0.9665221846950844, + "learning_rate": 3.337109966651297e-08, + "loss": 0.8208349943161011, + "step": 8001 + }, + { + "epoch": 1.8437788018433179, + "grad_norm": 1.1715709525124653, + "learning_rate": 3.3273569103960174e-08, + "loss": 0.7974207401275635, + "step": 8002 + }, + { + "epoch": 1.8440092165898618, + "grad_norm": 1.1483232930238036, + "learning_rate": 3.317617886046908e-08, + "loss": 0.751643180847168, + "step": 8003 + }, + { + "epoch": 1.8442396313364056, + "grad_norm": 1.3210448516681466, + "learning_rate": 3.3078928950175724e-08, + "loss": 0.9231137037277222, + "step": 8004 + }, + { + "epoch": 1.8444700460829493, + "grad_norm": 1.1496984894908708, + "learning_rate": 3.2981819387195683e-08, + "loss": 0.7975907325744629, + "step": 8005 + }, + { + "epoch": 1.8447004608294932, + "grad_norm": 1.1807761173209448, + "learning_rate": 3.288485018562448e-08, + "loss": 0.7467124462127686, + "step": 8006 + }, + { + "epoch": 1.8449308755760367, + "grad_norm": 1.1558703241619663, + "learning_rate": 3.278802135953706e-08, + "loss": 0.7983080148696899, + "step": 8007 + }, + { + "epoch": 1.8451612903225807, + "grad_norm": 1.2273424689042212, + "learning_rate": 3.269133292298787e-08, + "loss": 0.7991635799407959, + "step": 8008 + }, + { + "epoch": 1.8453917050691244, + "grad_norm": 1.3284825495150037, + "learning_rate": 3.259478489001111e-08, + "loss": 0.9309900403022766, + "step": 8009 + }, + { + "epoch": 1.8456221198156681, + "grad_norm": 1.4898197506974649, + "learning_rate": 3.249837727462068e-08, + "loss": 0.7667444944381714, + "step": 8010 + }, + { + "epoch": 1.845852534562212, + "grad_norm": 1.0693184262343387, + "learning_rate": 3.2402110090809955e-08, + "loss": 0.722775936126709, + "step": 8011 + }, + { + "epoch": 1.8460829493087556, + "grad_norm": 1.2061345728793884, + "learning_rate": 3.230598335255208e-08, + "loss": 0.7049660682678223, + "step": 8012 + }, + { + "epoch": 1.8463133640552996, + "grad_norm": 1.2538545243397632, + "learning_rate": 3.220999707379957e-08, + "loss": 0.7543717622756958, + "step": 8013 + }, + { + "epoch": 1.8465437788018433, + "grad_norm": 1.0254969440317054, + "learning_rate": 3.2114151268484825e-08, + "loss": 0.705594539642334, + "step": 8014 + }, + { + "epoch": 1.846774193548387, + "grad_norm": 1.3381301652737214, + "learning_rate": 3.201844595051972e-08, + "loss": 0.8663946390151978, + "step": 8015 + }, + { + "epoch": 1.847004608294931, + "grad_norm": 1.2931743474180666, + "learning_rate": 3.192288113379582e-08, + "loss": 0.6990827918052673, + "step": 8016 + }, + { + "epoch": 1.8472350230414747, + "grad_norm": 1.3047302382268444, + "learning_rate": 3.182745683218391e-08, + "loss": 0.8494592905044556, + "step": 8017 + }, + { + "epoch": 1.8474654377880184, + "grad_norm": 1.1964557388323078, + "learning_rate": 3.173217305953524e-08, + "loss": 0.7689815163612366, + "step": 8018 + }, + { + "epoch": 1.8476958525345624, + "grad_norm": 1.0869127948311592, + "learning_rate": 3.163702982967964e-08, + "loss": 0.7961923480033875, + "step": 8019 + }, + { + "epoch": 1.8479262672811059, + "grad_norm": 1.1859545141002084, + "learning_rate": 3.154202715642729e-08, + "loss": 0.7290681600570679, + "step": 8020 + }, + { + "epoch": 1.8481566820276498, + "grad_norm": 1.2696204436408378, + "learning_rate": 3.1447165053567594e-08, + "loss": 0.7486605048179626, + "step": 8021 + }, + { + "epoch": 1.8483870967741935, + "grad_norm": 1.2409295752272667, + "learning_rate": 3.135244353486977e-08, + "loss": 0.8263967633247375, + "step": 8022 + }, + { + "epoch": 1.8486175115207373, + "grad_norm": 1.3436046094044156, + "learning_rate": 3.1257862614082254e-08, + "loss": 0.7462657690048218, + "step": 8023 + }, + { + "epoch": 1.8488479262672812, + "grad_norm": 1.7105756282592546, + "learning_rate": 3.116342230493374e-08, + "loss": 0.9305819272994995, + "step": 8024 + }, + { + "epoch": 1.8490783410138247, + "grad_norm": 1.1597494849443377, + "learning_rate": 3.1069122621131925e-08, + "loss": 0.7202557325363159, + "step": 8025 + }, + { + "epoch": 1.8493087557603687, + "grad_norm": 1.0985806176068067, + "learning_rate": 3.097496357636409e-08, + "loss": 0.723913311958313, + "step": 8026 + }, + { + "epoch": 1.8495391705069124, + "grad_norm": 1.427360065972912, + "learning_rate": 3.088094518429751e-08, + "loss": 0.7067763805389404, + "step": 8027 + }, + { + "epoch": 1.8497695852534561, + "grad_norm": 1.3110685780585822, + "learning_rate": 3.078706745857884e-08, + "loss": 0.7853527665138245, + "step": 8028 + }, + { + "epoch": 1.85, + "grad_norm": 1.228901367807535, + "learning_rate": 3.0693330412834285e-08, + "loss": 0.7183133363723755, + "step": 8029 + }, + { + "epoch": 1.8502304147465438, + "grad_norm": 1.1077136741228983, + "learning_rate": 3.0599734060669626e-08, + "loss": 0.8041096925735474, + "step": 8030 + }, + { + "epoch": 1.8504608294930875, + "grad_norm": 1.0495776729925357, + "learning_rate": 3.050627841567022e-08, + "loss": 0.7259166240692139, + "step": 8031 + }, + { + "epoch": 1.8506912442396315, + "grad_norm": 1.5016516908972768, + "learning_rate": 3.041296349140099e-08, + "loss": 0.8844292163848877, + "step": 8032 + }, + { + "epoch": 1.850921658986175, + "grad_norm": 1.2846098007302502, + "learning_rate": 3.031978930140666e-08, + "loss": 0.7566810846328735, + "step": 8033 + }, + { + "epoch": 1.851152073732719, + "grad_norm": 1.4566612706299762, + "learning_rate": 3.0226755859211085e-08, + "loss": 0.8365379571914673, + "step": 8034 + }, + { + "epoch": 1.8513824884792627, + "grad_norm": 1.03909937329538, + "learning_rate": 3.013386317831823e-08, + "loss": 0.6786175966262817, + "step": 8035 + }, + { + "epoch": 1.8516129032258064, + "grad_norm": 0.8445952555360507, + "learning_rate": 3.0041111272211206e-08, + "loss": 0.5450198650360107, + "step": 8036 + }, + { + "epoch": 1.8518433179723504, + "grad_norm": 1.3789732970427235, + "learning_rate": 2.994850015435269e-08, + "loss": 0.8792393207550049, + "step": 8037 + }, + { + "epoch": 1.8520737327188939, + "grad_norm": 1.1270074296152806, + "learning_rate": 2.985602983818525e-08, + "loss": 0.8463287353515625, + "step": 8038 + }, + { + "epoch": 1.8523041474654378, + "grad_norm": 1.2927452986312467, + "learning_rate": 2.9763700337130827e-08, + "loss": 0.77659010887146, + "step": 8039 + }, + { + "epoch": 1.8525345622119815, + "grad_norm": 0.8652026295993711, + "learning_rate": 2.9671511664590698e-08, + "loss": 0.6180428266525269, + "step": 8040 + }, + { + "epoch": 1.8527649769585253, + "grad_norm": 1.2049419514211082, + "learning_rate": 2.9579463833946273e-08, + "loss": 0.7886658906936646, + "step": 8041 + }, + { + "epoch": 1.8529953917050692, + "grad_norm": 1.35078980115234, + "learning_rate": 2.9487556858557972e-08, + "loss": 0.8371871709823608, + "step": 8042 + }, + { + "epoch": 1.853225806451613, + "grad_norm": 1.1555875449847217, + "learning_rate": 2.9395790751765904e-08, + "loss": 0.7082366347312927, + "step": 8043 + }, + { + "epoch": 1.8534562211981567, + "grad_norm": 1.2745414422252506, + "learning_rate": 2.930416552689008e-08, + "loss": 0.7866584062576294, + "step": 8044 + }, + { + "epoch": 1.8536866359447006, + "grad_norm": 1.229235509048025, + "learning_rate": 2.9212681197229527e-08, + "loss": 0.8789514303207397, + "step": 8045 + }, + { + "epoch": 1.8539170506912441, + "grad_norm": 1.0208282620264577, + "learning_rate": 2.9121337776063072e-08, + "loss": 0.7041239738464355, + "step": 8046 + }, + { + "epoch": 1.854147465437788, + "grad_norm": 1.3204473756112607, + "learning_rate": 2.9030135276649215e-08, + "loss": 0.8290516138076782, + "step": 8047 + }, + { + "epoch": 1.8543778801843318, + "grad_norm": 1.2424965520320617, + "learning_rate": 2.8939073712225813e-08, + "loss": 0.8532444834709167, + "step": 8048 + }, + { + "epoch": 1.8546082949308755, + "grad_norm": 1.375111764710695, + "learning_rate": 2.8848153096010407e-08, + "loss": 0.8635869026184082, + "step": 8049 + }, + { + "epoch": 1.8548387096774195, + "grad_norm": 1.3481674122248803, + "learning_rate": 2.8757373441199885e-08, + "loss": 0.723747730255127, + "step": 8050 + }, + { + "epoch": 1.855069124423963, + "grad_norm": 1.3399875040651272, + "learning_rate": 2.8666734760970925e-08, + "loss": 0.893456220626831, + "step": 8051 + }, + { + "epoch": 1.855299539170507, + "grad_norm": 1.2732338285848108, + "learning_rate": 2.8576237068479335e-08, + "loss": 0.6871381998062134, + "step": 8052 + }, + { + "epoch": 1.8555299539170507, + "grad_norm": 1.0534516506243037, + "learning_rate": 2.848588037686106e-08, + "loss": 0.7820594906806946, + "step": 8053 + }, + { + "epoch": 1.8557603686635944, + "grad_norm": 1.0873243123362593, + "learning_rate": 2.839566469923105e-08, + "loss": 0.7783479690551758, + "step": 8054 + }, + { + "epoch": 1.8559907834101383, + "grad_norm": 1.25602911336094, + "learning_rate": 2.8305590048684268e-08, + "loss": 0.7612866163253784, + "step": 8055 + }, + { + "epoch": 1.856221198156682, + "grad_norm": 1.0752346215773687, + "learning_rate": 2.82156564382946e-08, + "loss": 0.7483590841293335, + "step": 8056 + }, + { + "epoch": 1.8564516129032258, + "grad_norm": 1.0547692532993052, + "learning_rate": 2.812586388111582e-08, + "loss": 0.7553579807281494, + "step": 8057 + }, + { + "epoch": 1.8566820276497698, + "grad_norm": 1.0828193353243305, + "learning_rate": 2.80362123901815e-08, + "loss": 0.8895602226257324, + "step": 8058 + }, + { + "epoch": 1.8569124423963133, + "grad_norm": 1.1481937931103232, + "learning_rate": 2.794670197850424e-08, + "loss": 0.7974053621292114, + "step": 8059 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 1.0112292806236838, + "learning_rate": 2.7857332659076193e-08, + "loss": 0.7730135917663574, + "step": 8060 + }, + { + "epoch": 1.857373271889401, + "grad_norm": 1.115608079627536, + "learning_rate": 2.7768104444869434e-08, + "loss": 0.7258738279342651, + "step": 8061 + }, + { + "epoch": 1.8576036866359447, + "grad_norm": 1.3030363105586589, + "learning_rate": 2.7679017348835264e-08, + "loss": 0.7068890333175659, + "step": 8062 + }, + { + "epoch": 1.8578341013824886, + "grad_norm": 1.3041822573340287, + "learning_rate": 2.7590071383904568e-08, + "loss": 0.8741557002067566, + "step": 8063 + }, + { + "epoch": 1.8580645161290321, + "grad_norm": 1.3236368529143523, + "learning_rate": 2.750126656298768e-08, + "loss": 0.8723797798156738, + "step": 8064 + }, + { + "epoch": 1.858294930875576, + "grad_norm": 1.2019235064586495, + "learning_rate": 2.7412602898974514e-08, + "loss": 0.8510957956314087, + "step": 8065 + }, + { + "epoch": 1.8585253456221198, + "grad_norm": 0.8996466342772348, + "learning_rate": 2.732408040473444e-08, + "loss": 0.6875216960906982, + "step": 8066 + }, + { + "epoch": 1.8587557603686635, + "grad_norm": 1.235948717542994, + "learning_rate": 2.7235699093116515e-08, + "loss": 0.8057721257209778, + "step": 8067 + }, + { + "epoch": 1.8589861751152075, + "grad_norm": 1.1066694710477807, + "learning_rate": 2.7147458976949145e-08, + "loss": 0.7547335624694824, + "step": 8068 + }, + { + "epoch": 1.8592165898617512, + "grad_norm": 1.2565080056809024, + "learning_rate": 2.7059360069040193e-08, + "loss": 0.8301708102226257, + "step": 8069 + }, + { + "epoch": 1.859447004608295, + "grad_norm": 1.354839024861171, + "learning_rate": 2.69714023821771e-08, + "loss": 0.8313431143760681, + "step": 8070 + }, + { + "epoch": 1.8596774193548387, + "grad_norm": 1.2482736529337517, + "learning_rate": 2.6883585929126872e-08, + "loss": 0.6631792783737183, + "step": 8071 + }, + { + "epoch": 1.8599078341013824, + "grad_norm": 1.342165180678223, + "learning_rate": 2.679591072263576e-08, + "loss": 0.7643609046936035, + "step": 8072 + }, + { + "epoch": 1.8601382488479263, + "grad_norm": 1.5670037508761703, + "learning_rate": 2.670837677543003e-08, + "loss": 0.8543407917022705, + "step": 8073 + }, + { + "epoch": 1.86036866359447, + "grad_norm": 1.0908415634382522, + "learning_rate": 2.662098410021485e-08, + "loss": 0.8051489591598511, + "step": 8074 + }, + { + "epoch": 1.8605990783410138, + "grad_norm": 1.1493604797084143, + "learning_rate": 2.653373270967518e-08, + "loss": 0.7065767645835876, + "step": 8075 + }, + { + "epoch": 1.8608294930875577, + "grad_norm": 0.9852441728403762, + "learning_rate": 2.6446622616475566e-08, + "loss": 0.672603189945221, + "step": 8076 + }, + { + "epoch": 1.8610599078341012, + "grad_norm": 1.2739019796547877, + "learning_rate": 2.6359653833259776e-08, + "loss": 0.7201080918312073, + "step": 8077 + }, + { + "epoch": 1.8612903225806452, + "grad_norm": 1.156933357533599, + "learning_rate": 2.627282637265149e-08, + "loss": 0.7147494554519653, + "step": 8078 + }, + { + "epoch": 1.861520737327189, + "grad_norm": 1.3793116889121875, + "learning_rate": 2.6186140247253297e-08, + "loss": 0.7051082253456116, + "step": 8079 + }, + { + "epoch": 1.8617511520737327, + "grad_norm": 1.2253670327071573, + "learning_rate": 2.6099595469647683e-08, + "loss": 0.5786069631576538, + "step": 8080 + }, + { + "epoch": 1.8619815668202766, + "grad_norm": 1.2391603364729231, + "learning_rate": 2.6013192052396493e-08, + "loss": 0.8880232572555542, + "step": 8081 + }, + { + "epoch": 1.8622119815668203, + "grad_norm": 1.3577487615179598, + "learning_rate": 2.5926930008041137e-08, + "loss": 0.9295729398727417, + "step": 8082 + }, + { + "epoch": 1.862442396313364, + "grad_norm": 1.1507407274303025, + "learning_rate": 2.5840809349102378e-08, + "loss": 0.6963248252868652, + "step": 8083 + }, + { + "epoch": 1.8626728110599078, + "grad_norm": 1.2547838683138512, + "learning_rate": 2.5754830088080548e-08, + "loss": 0.8788298964500427, + "step": 8084 + }, + { + "epoch": 1.8629032258064515, + "grad_norm": 1.3540782368440085, + "learning_rate": 2.5668992237455334e-08, + "loss": 0.7454242706298828, + "step": 8085 + }, + { + "epoch": 1.8631336405529955, + "grad_norm": 1.1950812039913048, + "learning_rate": 2.558329580968599e-08, + "loss": 0.7659780383110046, + "step": 8086 + }, + { + "epoch": 1.8633640552995392, + "grad_norm": 1.5016734977487585, + "learning_rate": 2.5497740817211456e-08, + "loss": 0.8799881935119629, + "step": 8087 + }, + { + "epoch": 1.863594470046083, + "grad_norm": 0.9825172132169212, + "learning_rate": 2.5412327272449684e-08, + "loss": 0.7319198846817017, + "step": 8088 + }, + { + "epoch": 1.8638248847926269, + "grad_norm": 1.0689400870779366, + "learning_rate": 2.532705518779854e-08, + "loss": 0.6450645923614502, + "step": 8089 + }, + { + "epoch": 1.8640552995391704, + "grad_norm": 1.1783740361717576, + "learning_rate": 2.52419245756349e-08, + "loss": 0.7213672399520874, + "step": 8090 + }, + { + "epoch": 1.8642857142857143, + "grad_norm": 1.3483335750734096, + "learning_rate": 2.515693544831554e-08, + "loss": 0.790163516998291, + "step": 8091 + }, + { + "epoch": 1.864516129032258, + "grad_norm": 1.2871905619529331, + "learning_rate": 2.507208781817638e-08, + "loss": 0.8324074745178223, + "step": 8092 + }, + { + "epoch": 1.8647465437788018, + "grad_norm": 1.4095960145667545, + "learning_rate": 2.4987381697533227e-08, + "loss": 0.879224419593811, + "step": 8093 + }, + { + "epoch": 1.8649769585253457, + "grad_norm": 1.4121148041878757, + "learning_rate": 2.4902817098680807e-08, + "loss": 0.8668204545974731, + "step": 8094 + }, + { + "epoch": 1.8652073732718892, + "grad_norm": 1.1605042845973315, + "learning_rate": 2.481839403389341e-08, + "loss": 0.6737711429595947, + "step": 8095 + }, + { + "epoch": 1.8654377880184332, + "grad_norm": 1.3482506919608122, + "learning_rate": 2.4734112515425343e-08, + "loss": 0.8948237299919128, + "step": 8096 + }, + { + "epoch": 1.865668202764977, + "grad_norm": 1.2927456093148797, + "learning_rate": 2.4649972555509823e-08, + "loss": 0.6866592168807983, + "step": 8097 + }, + { + "epoch": 1.8658986175115206, + "grad_norm": 1.2040358944727056, + "learning_rate": 2.4565974166359416e-08, + "loss": 0.8852076530456543, + "step": 8098 + }, + { + "epoch": 1.8661290322580646, + "grad_norm": 1.1474664367024714, + "learning_rate": 2.44821173601667e-08, + "loss": 0.7402448654174805, + "step": 8099 + }, + { + "epoch": 1.8663594470046083, + "grad_norm": 1.299234544884085, + "learning_rate": 2.439840214910316e-08, + "loss": 0.8536320924758911, + "step": 8100 + }, + { + "epoch": 1.866589861751152, + "grad_norm": 1.1550631938568499, + "learning_rate": 2.4314828545319965e-08, + "loss": 0.6408628225326538, + "step": 8101 + }, + { + "epoch": 1.866820276497696, + "grad_norm": 1.188548223378954, + "learning_rate": 2.4231396560947858e-08, + "loss": 0.9578930735588074, + "step": 8102 + }, + { + "epoch": 1.8670506912442395, + "grad_norm": 1.8289817367376688, + "learning_rate": 2.4148106208096708e-08, + "loss": 0.7606109976768494, + "step": 8103 + }, + { + "epoch": 1.8672811059907835, + "grad_norm": 0.9826738512020193, + "learning_rate": 2.4064957498856177e-08, + "loss": 0.7446529865264893, + "step": 8104 + }, + { + "epoch": 1.8675115207373272, + "grad_norm": 1.0744366993530696, + "learning_rate": 2.398195044529505e-08, + "loss": 0.6086497902870178, + "step": 8105 + }, + { + "epoch": 1.867741935483871, + "grad_norm": 1.5561440229209103, + "learning_rate": 2.389908505946181e-08, + "loss": 0.9348995685577393, + "step": 8106 + }, + { + "epoch": 1.8679723502304149, + "grad_norm": 1.1497120508700005, + "learning_rate": 2.381636135338405e-08, + "loss": 0.6817007660865784, + "step": 8107 + }, + { + "epoch": 1.8682027649769584, + "grad_norm": 1.0815805532535518, + "learning_rate": 2.373377933906917e-08, + "loss": 0.7228778600692749, + "step": 8108 + }, + { + "epoch": 1.8684331797235023, + "grad_norm": 1.2824972753864794, + "learning_rate": 2.3651339028503913e-08, + "loss": 0.6974154114723206, + "step": 8109 + }, + { + "epoch": 1.868663594470046, + "grad_norm": 1.2746687740486187, + "learning_rate": 2.3569040433654264e-08, + "loss": 0.8025680780410767, + "step": 8110 + }, + { + "epoch": 1.8688940092165898, + "grad_norm": 1.0439186994105132, + "learning_rate": 2.3486883566465777e-08, + "loss": 0.7570391893386841, + "step": 8111 + }, + { + "epoch": 1.8691244239631337, + "grad_norm": 1.1353343636911755, + "learning_rate": 2.3404868438863246e-08, + "loss": 0.7982438802719116, + "step": 8112 + }, + { + "epoch": 1.8693548387096774, + "grad_norm": 0.948053216671403, + "learning_rate": 2.3322995062751372e-08, + "loss": 0.6615588665008545, + "step": 8113 + }, + { + "epoch": 1.8695852534562212, + "grad_norm": 1.1794145616088556, + "learning_rate": 2.324126345001376e-08, + "loss": 0.7748852968215942, + "step": 8114 + }, + { + "epoch": 1.8698156682027651, + "grad_norm": 1.146675047414541, + "learning_rate": 2.3159673612513587e-08, + "loss": 0.7238468527793884, + "step": 8115 + }, + { + "epoch": 1.8700460829493086, + "grad_norm": 1.2843830020573481, + "learning_rate": 2.3078225562093822e-08, + "loss": 0.8146705627441406, + "step": 8116 + }, + { + "epoch": 1.8702764976958526, + "grad_norm": 1.0747488287412188, + "learning_rate": 2.2996919310576235e-08, + "loss": 0.8393594026565552, + "step": 8117 + }, + { + "epoch": 1.8705069124423963, + "grad_norm": 1.6346887094004536, + "learning_rate": 2.2915754869762384e-08, + "loss": 0.9619652032852173, + "step": 8118 + }, + { + "epoch": 1.87073732718894, + "grad_norm": 1.6641290836048537, + "learning_rate": 2.2834732251433286e-08, + "loss": 0.8301321268081665, + "step": 8119 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 1.2687107297135523, + "learning_rate": 2.2753851467349206e-08, + "loss": 0.8236079812049866, + "step": 8120 + }, + { + "epoch": 1.8711981566820275, + "grad_norm": 1.430457986003777, + "learning_rate": 2.267311252924975e-08, + "loss": 0.9007565379142761, + "step": 8121 + }, + { + "epoch": 1.8714285714285714, + "grad_norm": 1.1827948115854126, + "learning_rate": 2.2592515448854432e-08, + "loss": 0.7430707216262817, + "step": 8122 + }, + { + "epoch": 1.8716589861751152, + "grad_norm": 1.17432989990484, + "learning_rate": 2.2512060237861452e-08, + "loss": 0.7562465667724609, + "step": 8123 + }, + { + "epoch": 1.871889400921659, + "grad_norm": 1.1839994711227122, + "learning_rate": 2.24317469079488e-08, + "loss": 0.7736096978187561, + "step": 8124 + }, + { + "epoch": 1.8721198156682028, + "grad_norm": 1.1809968020267403, + "learning_rate": 2.2351575470774153e-08, + "loss": 0.7652724981307983, + "step": 8125 + }, + { + "epoch": 1.8723502304147466, + "grad_norm": 1.4664554269524215, + "learning_rate": 2.2271545937973978e-08, + "loss": 0.8034792542457581, + "step": 8126 + }, + { + "epoch": 1.8725806451612903, + "grad_norm": 1.2107856133228136, + "learning_rate": 2.219165832116454e-08, + "loss": 0.6158101558685303, + "step": 8127 + }, + { + "epoch": 1.8728110599078343, + "grad_norm": 1.1984460742665393, + "learning_rate": 2.2111912631941564e-08, + "loss": 0.6514682769775391, + "step": 8128 + }, + { + "epoch": 1.8730414746543778, + "grad_norm": 1.1090676234846621, + "learning_rate": 2.203230888187979e-08, + "loss": 0.833041787147522, + "step": 8129 + }, + { + "epoch": 1.8732718894009217, + "grad_norm": 1.3944148742352294, + "learning_rate": 2.1952847082533864e-08, + "loss": 0.8033208250999451, + "step": 8130 + }, + { + "epoch": 1.8735023041474654, + "grad_norm": 1.2067904980609332, + "learning_rate": 2.187352724543734e-08, + "loss": 0.742051362991333, + "step": 8131 + }, + { + "epoch": 1.8737327188940092, + "grad_norm": 1.2058964422107643, + "learning_rate": 2.1794349382103337e-08, + "loss": 0.7411169409751892, + "step": 8132 + }, + { + "epoch": 1.8739631336405531, + "grad_norm": 1.3201479261882787, + "learning_rate": 2.171531350402467e-08, + "loss": 0.7517165541648865, + "step": 8133 + }, + { + "epoch": 1.8741935483870966, + "grad_norm": 1.2371172479380752, + "learning_rate": 2.1636419622673263e-08, + "loss": 0.8010021448135376, + "step": 8134 + }, + { + "epoch": 1.8744239631336406, + "grad_norm": 1.2501522956166489, + "learning_rate": 2.1557667749500187e-08, + "loss": 0.7265241742134094, + "step": 8135 + }, + { + "epoch": 1.8746543778801843, + "grad_norm": 1.191380870353666, + "learning_rate": 2.1479057895936403e-08, + "loss": 0.6809227466583252, + "step": 8136 + }, + { + "epoch": 1.874884792626728, + "grad_norm": 1.2737037893770147, + "learning_rate": 2.140059007339201e-08, + "loss": 0.8235769271850586, + "step": 8137 + }, + { + "epoch": 1.875115207373272, + "grad_norm": 1.1356268338575812, + "learning_rate": 2.132226429325634e-08, + "loss": 0.7556289434432983, + "step": 8138 + }, + { + "epoch": 1.8753456221198157, + "grad_norm": 1.257264783564694, + "learning_rate": 2.1244080566898638e-08, + "loss": 0.7765048742294312, + "step": 8139 + }, + { + "epoch": 1.8755760368663594, + "grad_norm": 1.1776465139256578, + "learning_rate": 2.1166038905666816e-08, + "loss": 0.7637666463851929, + "step": 8140 + }, + { + "epoch": 1.8758064516129034, + "grad_norm": 1.2471130614608452, + "learning_rate": 2.10881393208886e-08, + "loss": 0.8413453698158264, + "step": 8141 + }, + { + "epoch": 1.8760368663594469, + "grad_norm": 1.443351972543058, + "learning_rate": 2.101038182387105e-08, + "loss": 0.7937475442886353, + "step": 8142 + }, + { + "epoch": 1.8762672811059908, + "grad_norm": 1.1772607773578063, + "learning_rate": 2.0932766425900585e-08, + "loss": 0.7654982805252075, + "step": 8143 + }, + { + "epoch": 1.8764976958525346, + "grad_norm": 1.53397176108589, + "learning_rate": 2.0855293138242968e-08, + "loss": 0.8950663805007935, + "step": 8144 + }, + { + "epoch": 1.8767281105990783, + "grad_norm": 1.250929142335872, + "learning_rate": 2.077796197214332e-08, + "loss": 0.6405420303344727, + "step": 8145 + }, + { + "epoch": 1.8769585253456222, + "grad_norm": 1.085136655013558, + "learning_rate": 2.0700772938826217e-08, + "loss": 0.7724314332008362, + "step": 8146 + }, + { + "epoch": 1.8771889400921657, + "grad_norm": 1.09160242748488, + "learning_rate": 2.0623726049495472e-08, + "loss": 0.7929061651229858, + "step": 8147 + }, + { + "epoch": 1.8774193548387097, + "grad_norm": 1.0975195498555617, + "learning_rate": 2.0546821315334363e-08, + "loss": 0.7207096815109253, + "step": 8148 + }, + { + "epoch": 1.8776497695852534, + "grad_norm": 1.347240880442127, + "learning_rate": 2.0470058747505513e-08, + "loss": 0.9234127402305603, + "step": 8149 + }, + { + "epoch": 1.8778801843317972, + "grad_norm": 1.2189429089634525, + "learning_rate": 2.0393438357150906e-08, + "loss": 0.9006322026252747, + "step": 8150 + }, + { + "epoch": 1.878110599078341, + "grad_norm": 0.9863507376975118, + "learning_rate": 2.0316960155391972e-08, + "loss": 0.6289799809455872, + "step": 8151 + }, + { + "epoch": 1.8783410138248848, + "grad_norm": 1.117182475586666, + "learning_rate": 2.0240624153329168e-08, + "loss": 0.8551793098449707, + "step": 8152 + }, + { + "epoch": 1.8785714285714286, + "grad_norm": 1.1253834649892556, + "learning_rate": 2.016443036204285e-08, + "loss": 0.8065170645713806, + "step": 8153 + }, + { + "epoch": 1.8788018433179725, + "grad_norm": 1.0124272640628642, + "learning_rate": 2.0088378792592286e-08, + "loss": 0.6361274719238281, + "step": 8154 + }, + { + "epoch": 1.879032258064516, + "grad_norm": 1.3966308966349001, + "learning_rate": 2.0012469456016312e-08, + "loss": 0.8539700508117676, + "step": 8155 + }, + { + "epoch": 1.87926267281106, + "grad_norm": 1.380681857214056, + "learning_rate": 1.9936702363333115e-08, + "loss": 0.7424989938735962, + "step": 8156 + }, + { + "epoch": 1.8794930875576037, + "grad_norm": 1.0795560964001287, + "learning_rate": 1.9861077525540116e-08, + "loss": 0.5831520557403564, + "step": 8157 + }, + { + "epoch": 1.8797235023041474, + "grad_norm": 1.3034651332513367, + "learning_rate": 1.9785594953614093e-08, + "loss": 0.8080646991729736, + "step": 8158 + }, + { + "epoch": 1.8799539170506914, + "grad_norm": 1.3028494466110516, + "learning_rate": 1.9710254658511392e-08, + "loss": 0.8008537292480469, + "step": 8159 + }, + { + "epoch": 1.8801843317972349, + "grad_norm": 0.7838996508063781, + "learning_rate": 1.9635056651167492e-08, + "loss": 0.7317294478416443, + "step": 8160 + }, + { + "epoch": 1.8804147465437788, + "grad_norm": 1.240068145392807, + "learning_rate": 1.956000094249721e-08, + "loss": 0.803238034248352, + "step": 8161 + }, + { + "epoch": 1.8806451612903226, + "grad_norm": 1.1592302203633778, + "learning_rate": 1.948508754339506e-08, + "loss": 0.7202219367027283, + "step": 8162 + }, + { + "epoch": 1.8808755760368663, + "grad_norm": 1.3406292816176746, + "learning_rate": 1.9410316464734233e-08, + "loss": 0.7691160440444946, + "step": 8163 + }, + { + "epoch": 1.8811059907834102, + "grad_norm": 1.0898220168427848, + "learning_rate": 1.933568771736782e-08, + "loss": 0.7092962265014648, + "step": 8164 + }, + { + "epoch": 1.881336405529954, + "grad_norm": 1.3165421464208054, + "learning_rate": 1.9261201312128274e-08, + "loss": 0.819804310798645, + "step": 8165 + }, + { + "epoch": 1.8815668202764977, + "grad_norm": 1.2278633726487793, + "learning_rate": 1.918685725982694e-08, + "loss": 0.9127538204193115, + "step": 8166 + }, + { + "epoch": 1.8817972350230416, + "grad_norm": 1.198181344272901, + "learning_rate": 1.9112655571254855e-08, + "loss": 0.8023328185081482, + "step": 8167 + }, + { + "epoch": 1.8820276497695851, + "grad_norm": 1.1150363141436184, + "learning_rate": 1.903859625718218e-08, + "loss": 0.723065972328186, + "step": 8168 + }, + { + "epoch": 1.882258064516129, + "grad_norm": 1.329775802249569, + "learning_rate": 1.896467932835877e-08, + "loss": 0.7838670611381531, + "step": 8169 + }, + { + "epoch": 1.8824884792626728, + "grad_norm": 1.0221481880663403, + "learning_rate": 1.8890904795513475e-08, + "loss": 0.6029871702194214, + "step": 8170 + }, + { + "epoch": 1.8827188940092165, + "grad_norm": 1.1179619592038208, + "learning_rate": 1.8817272669354512e-08, + "loss": 0.7622933387756348, + "step": 8171 + }, + { + "epoch": 1.8829493087557605, + "grad_norm": 1.3471730261003036, + "learning_rate": 1.8743782960569444e-08, + "loss": 0.7702913284301758, + "step": 8172 + }, + { + "epoch": 1.883179723502304, + "grad_norm": 1.1115192812221177, + "learning_rate": 1.867043567982518e-08, + "loss": 0.6385080814361572, + "step": 8173 + }, + { + "epoch": 1.883410138248848, + "grad_norm": 1.1957117872616694, + "learning_rate": 1.8597230837768208e-08, + "loss": 0.6886409521102905, + "step": 8174 + }, + { + "epoch": 1.8836405529953917, + "grad_norm": 1.2615274538141057, + "learning_rate": 1.8524168445023803e-08, + "loss": 0.7697125673294067, + "step": 8175 + }, + { + "epoch": 1.8838709677419354, + "grad_norm": 1.2703572064059772, + "learning_rate": 1.8451248512197148e-08, + "loss": 0.7942332029342651, + "step": 8176 + }, + { + "epoch": 1.8841013824884794, + "grad_norm": 1.2486681210000266, + "learning_rate": 1.8378471049872445e-08, + "loss": 0.7751410007476807, + "step": 8177 + }, + { + "epoch": 1.884331797235023, + "grad_norm": 1.4135289386452112, + "learning_rate": 1.8305836068613023e-08, + "loss": 0.8650992512702942, + "step": 8178 + }, + { + "epoch": 1.8845622119815668, + "grad_norm": 1.255590367160678, + "learning_rate": 1.8233343578962e-08, + "loss": 0.7084495425224304, + "step": 8179 + }, + { + "epoch": 1.8847926267281108, + "grad_norm": 1.2065933395861381, + "learning_rate": 1.8160993591441408e-08, + "loss": 0.7428494691848755, + "step": 8180 + }, + { + "epoch": 1.8850230414746543, + "grad_norm": 1.2721568643853003, + "learning_rate": 1.8088786116552844e-08, + "loss": 0.7431809902191162, + "step": 8181 + }, + { + "epoch": 1.8852534562211982, + "grad_norm": 1.5234831289492186, + "learning_rate": 1.801672116477715e-08, + "loss": 0.8312518000602722, + "step": 8182 + }, + { + "epoch": 1.885483870967742, + "grad_norm": 1.412977003038852, + "learning_rate": 1.7944798746574285e-08, + "loss": 0.8574832081794739, + "step": 8183 + }, + { + "epoch": 1.8857142857142857, + "grad_norm": 1.209006694724365, + "learning_rate": 1.7873018872383793e-08, + "loss": 0.7716966867446899, + "step": 8184 + }, + { + "epoch": 1.8859447004608296, + "grad_norm": 1.1984291768693995, + "learning_rate": 1.780138155262456e-08, + "loss": 0.8536000847816467, + "step": 8185 + }, + { + "epoch": 1.8861751152073731, + "grad_norm": 1.4411910829910872, + "learning_rate": 1.7729886797694606e-08, + "loss": 0.6559889316558838, + "step": 8186 + }, + { + "epoch": 1.886405529953917, + "grad_norm": 1.4146541158068258, + "learning_rate": 1.7658534617971065e-08, + "loss": 0.7371512651443481, + "step": 8187 + }, + { + "epoch": 1.8866359447004608, + "grad_norm": 1.5920989952321163, + "learning_rate": 1.7587325023810773e-08, + "loss": 0.8092008829116821, + "step": 8188 + }, + { + "epoch": 1.8868663594470045, + "grad_norm": 1.1485577131831675, + "learning_rate": 1.751625802554979e-08, + "loss": 0.7793067693710327, + "step": 8189 + }, + { + "epoch": 1.8870967741935485, + "grad_norm": 1.3107398360408737, + "learning_rate": 1.7445333633503312e-08, + "loss": 0.8102752566337585, + "step": 8190 + }, + { + "epoch": 1.8873271889400922, + "grad_norm": 0.9411355693415201, + "learning_rate": 1.737455185796588e-08, + "loss": 0.7141490578651428, + "step": 8191 + }, + { + "epoch": 1.887557603686636, + "grad_norm": 1.3771499753857814, + "learning_rate": 1.7303912709211497e-08, + "loss": 0.8010870218276978, + "step": 8192 + }, + { + "epoch": 1.8877880184331797, + "grad_norm": 1.0040229371574219, + "learning_rate": 1.723341619749319e-08, + "loss": 0.7945431470870972, + "step": 8193 + }, + { + "epoch": 1.8880184331797234, + "grad_norm": 1.5084700431378903, + "learning_rate": 1.7163062333043544e-08, + "loss": 0.765398383140564, + "step": 8194 + }, + { + "epoch": 1.8882488479262673, + "grad_norm": 1.141763186710756, + "learning_rate": 1.709285112607428e-08, + "loss": 0.8645910024642944, + "step": 8195 + }, + { + "epoch": 1.888479262672811, + "grad_norm": 1.4294051802947438, + "learning_rate": 1.7022782586776363e-08, + "loss": 0.7650351524353027, + "step": 8196 + }, + { + "epoch": 1.8887096774193548, + "grad_norm": 1.148441042244908, + "learning_rate": 1.695285672532043e-08, + "loss": 0.8059902191162109, + "step": 8197 + }, + { + "epoch": 1.8889400921658988, + "grad_norm": 1.3019488561633756, + "learning_rate": 1.688307355185592e-08, + "loss": 0.8389305472373962, + "step": 8198 + }, + { + "epoch": 1.8891705069124423, + "grad_norm": 1.3363862822981094, + "learning_rate": 1.681343307651173e-08, + "loss": 0.755578875541687, + "step": 8199 + }, + { + "epoch": 1.8894009216589862, + "grad_norm": 1.2754809499843205, + "learning_rate": 1.6743935309396218e-08, + "loss": 0.822825014591217, + "step": 8200 + }, + { + "epoch": 1.88963133640553, + "grad_norm": 1.2571266177044025, + "learning_rate": 1.667458026059676e-08, + "loss": 0.8229342699050903, + "step": 8201 + }, + { + "epoch": 1.8898617511520737, + "grad_norm": 1.3086181916191966, + "learning_rate": 1.6605367940180303e-08, + "loss": 0.7142254114151001, + "step": 8202 + }, + { + "epoch": 1.8900921658986176, + "grad_norm": 1.1722391698259569, + "learning_rate": 1.6536298358192812e-08, + "loss": 0.8904600739479065, + "step": 8203 + }, + { + "epoch": 1.8903225806451613, + "grad_norm": 1.151403763105922, + "learning_rate": 1.6467371524659603e-08, + "loss": 0.8758517503738403, + "step": 8204 + }, + { + "epoch": 1.890552995391705, + "grad_norm": 1.3083947750625244, + "learning_rate": 1.6398587449585555e-08, + "loss": 0.7609111666679382, + "step": 8205 + }, + { + "epoch": 1.8907834101382488, + "grad_norm": 0.9406449994318669, + "learning_rate": 1.6329946142954353e-08, + "loss": 0.8177064657211304, + "step": 8206 + }, + { + "epoch": 1.8910138248847925, + "grad_norm": 1.1366142550146048, + "learning_rate": 1.626144761472925e-08, + "loss": 0.6342105865478516, + "step": 8207 + }, + { + "epoch": 1.8912442396313365, + "grad_norm": 0.8903675484312013, + "learning_rate": 1.6193091874852627e-08, + "loss": 0.6025499105453491, + "step": 8208 + }, + { + "epoch": 1.8914746543778802, + "grad_norm": 1.3017839387858507, + "learning_rate": 1.6124878933246543e-08, + "loss": 0.78373783826828, + "step": 8209 + }, + { + "epoch": 1.891705069124424, + "grad_norm": 1.336095893979754, + "learning_rate": 1.605680879981164e-08, + "loss": 0.8072086572647095, + "step": 8210 + }, + { + "epoch": 1.8919354838709679, + "grad_norm": 1.5597980072939257, + "learning_rate": 1.5988881484428453e-08, + "loss": 0.9057372212409973, + "step": 8211 + }, + { + "epoch": 1.8921658986175114, + "grad_norm": 1.2099616448625954, + "learning_rate": 1.592109699695643e-08, + "loss": 0.8235929012298584, + "step": 8212 + }, + { + "epoch": 1.8923963133640553, + "grad_norm": 1.2417707847492958, + "learning_rate": 1.5853455347234366e-08, + "loss": 0.6610825061798096, + "step": 8213 + }, + { + "epoch": 1.892626728110599, + "grad_norm": 1.4158986087253451, + "learning_rate": 1.5785956545080415e-08, + "loss": 0.7152366638183594, + "step": 8214 + }, + { + "epoch": 1.8928571428571428, + "grad_norm": 1.330885873092923, + "learning_rate": 1.5718600600292066e-08, + "loss": 0.7971903085708618, + "step": 8215 + }, + { + "epoch": 1.8930875576036867, + "grad_norm": 1.226467557812747, + "learning_rate": 1.565138752264572e-08, + "loss": 0.7639449238777161, + "step": 8216 + }, + { + "epoch": 1.8933179723502302, + "grad_norm": 1.0517976072639703, + "learning_rate": 1.5584317321897356e-08, + "loss": 0.6396117806434631, + "step": 8217 + }, + { + "epoch": 1.8935483870967742, + "grad_norm": 1.328962567982178, + "learning_rate": 1.5517390007782183e-08, + "loss": 0.790566086769104, + "step": 8218 + }, + { + "epoch": 1.893778801843318, + "grad_norm": 1.6769404862380202, + "learning_rate": 1.5450605590014544e-08, + "loss": 0.7948310971260071, + "step": 8219 + }, + { + "epoch": 1.8940092165898617, + "grad_norm": 1.2378052027269906, + "learning_rate": 1.5383964078288124e-08, + "loss": 0.9425654411315918, + "step": 8220 + }, + { + "epoch": 1.8942396313364056, + "grad_norm": 1.2441112834124675, + "learning_rate": 1.531746548227586e-08, + "loss": 0.8001678586006165, + "step": 8221 + }, + { + "epoch": 1.8944700460829493, + "grad_norm": 0.9072642646135723, + "learning_rate": 1.5251109811629915e-08, + "loss": 0.6636781692504883, + "step": 8222 + }, + { + "epoch": 1.894700460829493, + "grad_norm": 1.0313464437335311, + "learning_rate": 1.5184897075981807e-08, + "loss": 0.7884416580200195, + "step": 8223 + }, + { + "epoch": 1.894930875576037, + "grad_norm": 1.0907885139753422, + "learning_rate": 1.511882728494218e-08, + "loss": 0.6888208389282227, + "step": 8224 + }, + { + "epoch": 1.8951612903225805, + "grad_norm": 1.3461823033287323, + "learning_rate": 1.5052900448100815e-08, + "loss": 0.7253614664077759, + "step": 8225 + }, + { + "epoch": 1.8953917050691245, + "grad_norm": 1.2272377599078015, + "learning_rate": 1.498711657502716e-08, + "loss": 0.7865983843803406, + "step": 8226 + }, + { + "epoch": 1.8956221198156682, + "grad_norm": 1.4908955714231082, + "learning_rate": 1.492147567526947e-08, + "loss": 0.8778063654899597, + "step": 8227 + }, + { + "epoch": 1.895852534562212, + "grad_norm": 1.2263224402103408, + "learning_rate": 1.4855977758355675e-08, + "loss": 0.7812581062316895, + "step": 8228 + }, + { + "epoch": 1.8960829493087559, + "grad_norm": 1.2890011409819144, + "learning_rate": 1.4790622833792287e-08, + "loss": 0.7160226106643677, + "step": 8229 + }, + { + "epoch": 1.8963133640552994, + "grad_norm": 1.1613199880989007, + "learning_rate": 1.472541091106594e-08, + "loss": 0.8187412619590759, + "step": 8230 + }, + { + "epoch": 1.8965437788018433, + "grad_norm": 1.1653251647412382, + "learning_rate": 1.4660341999641834e-08, + "loss": 0.7517846822738647, + "step": 8231 + }, + { + "epoch": 1.896774193548387, + "grad_norm": 1.3673338656755198, + "learning_rate": 1.4595416108964753e-08, + "loss": 0.9230127334594727, + "step": 8232 + }, + { + "epoch": 1.8970046082949308, + "grad_norm": 1.228175308993719, + "learning_rate": 1.4530633248458269e-08, + "loss": 0.6803582906723022, + "step": 8233 + }, + { + "epoch": 1.8972350230414747, + "grad_norm": 1.2890219242119376, + "learning_rate": 1.4465993427525968e-08, + "loss": 0.8444511294364929, + "step": 8234 + }, + { + "epoch": 1.8974654377880185, + "grad_norm": 1.4479761110450609, + "learning_rate": 1.4401496655550016e-08, + "loss": 0.7622519731521606, + "step": 8235 + }, + { + "epoch": 1.8976958525345622, + "grad_norm": 1.20875065982799, + "learning_rate": 1.4337142941892033e-08, + "loss": 0.687129020690918, + "step": 8236 + }, + { + "epoch": 1.8979262672811061, + "grad_norm": 1.1827775538431895, + "learning_rate": 1.4272932295892992e-08, + "loss": 0.6421219110488892, + "step": 8237 + }, + { + "epoch": 1.8981566820276496, + "grad_norm": 1.2669401147896007, + "learning_rate": 1.4208864726872772e-08, + "loss": 0.7829388380050659, + "step": 8238 + }, + { + "epoch": 1.8983870967741936, + "grad_norm": 1.3482974956529734, + "learning_rate": 1.4144940244130821e-08, + "loss": 0.7754424810409546, + "step": 8239 + }, + { + "epoch": 1.8986175115207373, + "grad_norm": 1.1130898544931584, + "learning_rate": 1.4081158856945719e-08, + "loss": 0.6544859409332275, + "step": 8240 + }, + { + "epoch": 1.898847926267281, + "grad_norm": 1.0822240775455856, + "learning_rate": 1.4017520574575282e-08, + "loss": 0.8020427227020264, + "step": 8241 + }, + { + "epoch": 1.899078341013825, + "grad_norm": 1.1350657169907092, + "learning_rate": 1.3954025406256343e-08, + "loss": 0.7343212366104126, + "step": 8242 + }, + { + "epoch": 1.8993087557603685, + "grad_norm": 1.2792336145941459, + "learning_rate": 1.3890673361205418e-08, + "loss": 0.7643232345581055, + "step": 8243 + }, + { + "epoch": 1.8995391705069125, + "grad_norm": 1.212662168320899, + "learning_rate": 1.3827464448617709e-08, + "loss": 0.7806165814399719, + "step": 8244 + }, + { + "epoch": 1.8997695852534562, + "grad_norm": 1.6104194734157218, + "learning_rate": 1.3764398677667988e-08, + "loss": 0.8533280491828918, + "step": 8245 + }, + { + "epoch": 1.9, + "grad_norm": 1.1289941083869026, + "learning_rate": 1.3701476057510264e-08, + "loss": 0.773565411567688, + "step": 8246 + }, + { + "epoch": 1.9002304147465439, + "grad_norm": 1.1091300492504157, + "learning_rate": 1.3638696597277677e-08, + "loss": 0.7752503752708435, + "step": 8247 + }, + { + "epoch": 1.9004608294930876, + "grad_norm": 0.9880656776459645, + "learning_rate": 1.3576060306082383e-08, + "loss": 0.7466747760772705, + "step": 8248 + }, + { + "epoch": 1.9006912442396313, + "grad_norm": 1.2177337280417093, + "learning_rate": 1.3513567193016106e-08, + "loss": 0.8103033304214478, + "step": 8249 + }, + { + "epoch": 1.9009216589861753, + "grad_norm": 1.0248826665714235, + "learning_rate": 1.3451217267149595e-08, + "loss": 0.6501287817955017, + "step": 8250 + }, + { + "epoch": 1.9011520737327188, + "grad_norm": 1.210107770730306, + "learning_rate": 1.3389010537532941e-08, + "loss": 0.7329230308532715, + "step": 8251 + }, + { + "epoch": 1.9013824884792627, + "grad_norm": 1.3978474783131303, + "learning_rate": 1.3326947013195255e-08, + "loss": 0.8413917422294617, + "step": 8252 + }, + { + "epoch": 1.9016129032258065, + "grad_norm": 1.4081927433558092, + "learning_rate": 1.3265026703144999e-08, + "loss": 0.7283090353012085, + "step": 8253 + }, + { + "epoch": 1.9018433179723502, + "grad_norm": 1.2553133709092965, + "learning_rate": 1.3203249616369872e-08, + "loss": 0.8378126621246338, + "step": 8254 + }, + { + "epoch": 1.9020737327188941, + "grad_norm": 1.099276496142028, + "learning_rate": 1.3141615761836811e-08, + "loss": 0.7675777673721313, + "step": 8255 + }, + { + "epoch": 1.9023041474654376, + "grad_norm": 1.6916159414604328, + "learning_rate": 1.308012514849155e-08, + "loss": 0.6448104381561279, + "step": 8256 + }, + { + "epoch": 1.9025345622119816, + "grad_norm": 1.3264486635424506, + "learning_rate": 1.3018777785259838e-08, + "loss": 0.8024395704269409, + "step": 8257 + }, + { + "epoch": 1.9027649769585253, + "grad_norm": 1.1900370575281645, + "learning_rate": 1.2957573681045887e-08, + "loss": 0.8159325122833252, + "step": 8258 + }, + { + "epoch": 1.902995391705069, + "grad_norm": 1.1100937535082447, + "learning_rate": 1.2896512844733365e-08, + "loss": 0.7916233539581299, + "step": 8259 + }, + { + "epoch": 1.903225806451613, + "grad_norm": 1.2408177778484295, + "learning_rate": 1.2835595285185296e-08, + "loss": 0.798140823841095, + "step": 8260 + }, + { + "epoch": 1.9034562211981567, + "grad_norm": 1.2142666252173266, + "learning_rate": 1.277482101124383e-08, + "loss": 0.7881651520729065, + "step": 8261 + }, + { + "epoch": 1.9036866359447004, + "grad_norm": 1.3615775077613546, + "learning_rate": 1.2714190031730021e-08, + "loss": 0.7023189663887024, + "step": 8262 + }, + { + "epoch": 1.9039170506912444, + "grad_norm": 1.2537620544817238, + "learning_rate": 1.2653702355444606e-08, + "loss": 0.8286309242248535, + "step": 8263 + }, + { + "epoch": 1.904147465437788, + "grad_norm": 1.4181409914325045, + "learning_rate": 1.259335799116723e-08, + "loss": 0.7626973986625671, + "step": 8264 + }, + { + "epoch": 1.9043778801843319, + "grad_norm": 1.7640804361655256, + "learning_rate": 1.2533156947656665e-08, + "loss": 1.0350267887115479, + "step": 8265 + }, + { + "epoch": 1.9046082949308756, + "grad_norm": 1.0808972871053977, + "learning_rate": 1.2473099233651251e-08, + "loss": 0.6378228664398193, + "step": 8266 + }, + { + "epoch": 1.9048387096774193, + "grad_norm": 1.1012549826430145, + "learning_rate": 1.2413184857868241e-08, + "loss": 0.8265732526779175, + "step": 8267 + }, + { + "epoch": 1.9050691244239633, + "grad_norm": 1.102740322591124, + "learning_rate": 1.23534138290039e-08, + "loss": 0.8545348644256592, + "step": 8268 + }, + { + "epoch": 1.9052995391705068, + "grad_norm": 1.1667419775790697, + "learning_rate": 1.2293786155734176e-08, + "loss": 0.660080075263977, + "step": 8269 + }, + { + "epoch": 1.9055299539170507, + "grad_norm": 1.4258566183231558, + "learning_rate": 1.2234301846713813e-08, + "loss": 0.8409689664840698, + "step": 8270 + }, + { + "epoch": 1.9057603686635944, + "grad_norm": 1.3639053971310304, + "learning_rate": 1.2174960910576904e-08, + "loss": 0.8026434183120728, + "step": 8271 + }, + { + "epoch": 1.9059907834101382, + "grad_norm": 1.1477802786886386, + "learning_rate": 1.2115763355936671e-08, + "loss": 0.8315812945365906, + "step": 8272 + }, + { + "epoch": 1.9062211981566821, + "grad_norm": 1.1488868543504023, + "learning_rate": 1.2056709191385572e-08, + "loss": 0.7373194694519043, + "step": 8273 + }, + { + "epoch": 1.9064516129032258, + "grad_norm": 1.28219548502893, + "learning_rate": 1.1997798425495309e-08, + "loss": 0.7502317428588867, + "step": 8274 + }, + { + "epoch": 1.9066820276497696, + "grad_norm": 1.1940555150789485, + "learning_rate": 1.1939031066816707e-08, + "loss": 0.8208760023117065, + "step": 8275 + }, + { + "epoch": 1.9069124423963135, + "grad_norm": 1.2690336009694645, + "learning_rate": 1.188040712387961e-08, + "loss": 0.7584094405174255, + "step": 8276 + }, + { + "epoch": 1.907142857142857, + "grad_norm": 1.3136164329476003, + "learning_rate": 1.1821926605193433e-08, + "loss": 0.7776647210121155, + "step": 8277 + }, + { + "epoch": 1.907373271889401, + "grad_norm": 1.0778088332238458, + "learning_rate": 1.1763589519246387e-08, + "loss": 0.7739659547805786, + "step": 8278 + }, + { + "epoch": 1.9076036866359447, + "grad_norm": 1.3752880267959628, + "learning_rate": 1.170539587450603e-08, + "loss": 0.7276068925857544, + "step": 8279 + }, + { + "epoch": 1.9078341013824884, + "grad_norm": 1.1782987713077362, + "learning_rate": 1.1647345679419163e-08, + "loss": 0.624208927154541, + "step": 8280 + }, + { + "epoch": 1.9080645161290324, + "grad_norm": 1.0744404873031923, + "learning_rate": 1.1589438942411712e-08, + "loss": 0.7865229845046997, + "step": 8281 + }, + { + "epoch": 1.908294930875576, + "grad_norm": 1.1655122856650737, + "learning_rate": 1.1531675671888619e-08, + "loss": 0.8290715217590332, + "step": 8282 + }, + { + "epoch": 1.9085253456221198, + "grad_norm": 1.4733922787626827, + "learning_rate": 1.1474055876234289e-08, + "loss": 0.8750064969062805, + "step": 8283 + }, + { + "epoch": 1.9087557603686636, + "grad_norm": 1.0358743027064434, + "learning_rate": 1.1416579563812146e-08, + "loss": 0.7946900129318237, + "step": 8284 + }, + { + "epoch": 1.9089861751152073, + "grad_norm": 1.1260650941834194, + "learning_rate": 1.1359246742964623e-08, + "loss": 0.6673855781555176, + "step": 8285 + }, + { + "epoch": 1.9092165898617512, + "grad_norm": 1.5734371068415847, + "learning_rate": 1.1302057422013734e-08, + "loss": 0.8423609137535095, + "step": 8286 + }, + { + "epoch": 1.909447004608295, + "grad_norm": 1.1774099615686673, + "learning_rate": 1.124501160926039e-08, + "loss": 0.7583299279212952, + "step": 8287 + }, + { + "epoch": 1.9096774193548387, + "grad_norm": 1.3632188021099019, + "learning_rate": 1.1188109312984639e-08, + "loss": 0.8489730358123779, + "step": 8288 + }, + { + "epoch": 1.9099078341013827, + "grad_norm": 1.268317857067217, + "learning_rate": 1.1131350541445871e-08, + "loss": 0.7460636496543884, + "step": 8289 + }, + { + "epoch": 1.9101382488479262, + "grad_norm": 1.1951667787690143, + "learning_rate": 1.1074735302882387e-08, + "loss": 0.7310905456542969, + "step": 8290 + }, + { + "epoch": 1.91036866359447, + "grad_norm": 1.1692661015812214, + "learning_rate": 1.1018263605511946e-08, + "loss": 0.8411405086517334, + "step": 8291 + }, + { + "epoch": 1.9105990783410138, + "grad_norm": 1.12451343736832, + "learning_rate": 1.0961935457531323e-08, + "loss": 0.7980802059173584, + "step": 8292 + }, + { + "epoch": 1.9108294930875576, + "grad_norm": 1.2914760603674136, + "learning_rate": 1.0905750867116426e-08, + "loss": 0.779492974281311, + "step": 8293 + }, + { + "epoch": 1.9110599078341015, + "grad_norm": 1.0940139924335759, + "learning_rate": 1.0849709842422283e-08, + "loss": 0.7893733978271484, + "step": 8294 + }, + { + "epoch": 1.911290322580645, + "grad_norm": 1.367510888792546, + "learning_rate": 1.07938123915835e-08, + "loss": 0.8281872272491455, + "step": 8295 + }, + { + "epoch": 1.911520737327189, + "grad_norm": 1.3626141199750628, + "learning_rate": 1.0738058522713144e-08, + "loss": 0.721331775188446, + "step": 8296 + }, + { + "epoch": 1.9117511520737327, + "grad_norm": 0.9302233955509024, + "learning_rate": 1.0682448243904073e-08, + "loss": 0.6043491363525391, + "step": 8297 + }, + { + "epoch": 1.9119815668202764, + "grad_norm": 1.002380139729753, + "learning_rate": 1.0626981563227943e-08, + "loss": 0.7737481594085693, + "step": 8298 + }, + { + "epoch": 1.9122119815668204, + "grad_norm": 1.20563258082351, + "learning_rate": 1.0571658488735536e-08, + "loss": 0.771499514579773, + "step": 8299 + }, + { + "epoch": 1.912442396313364, + "grad_norm": 1.1334287395884057, + "learning_rate": 1.0516479028457204e-08, + "loss": 0.6711971759796143, + "step": 8300 + }, + { + "epoch": 1.9126728110599078, + "grad_norm": 1.1514161835446617, + "learning_rate": 1.0461443190402097e-08, + "loss": 0.691685140132904, + "step": 8301 + }, + { + "epoch": 1.9129032258064518, + "grad_norm": 1.0627327279898275, + "learning_rate": 1.0406550982558382e-08, + "loss": 0.7339159250259399, + "step": 8302 + }, + { + "epoch": 1.9131336405529953, + "grad_norm": 1.098827920572517, + "learning_rate": 1.0351802412893796e-08, + "loss": 0.7832008600234985, + "step": 8303 + }, + { + "epoch": 1.9133640552995392, + "grad_norm": 1.8976948304927823, + "learning_rate": 1.0297197489355092e-08, + "loss": 0.862671971321106, + "step": 8304 + }, + { + "epoch": 1.913594470046083, + "grad_norm": 1.2340137918284608, + "learning_rate": 1.0242736219867821e-08, + "loss": 0.6442357897758484, + "step": 8305 + }, + { + "epoch": 1.9138248847926267, + "grad_norm": 1.3262423414476558, + "learning_rate": 1.0188418612337102e-08, + "loss": 0.8777452707290649, + "step": 8306 + }, + { + "epoch": 1.9140552995391706, + "grad_norm": 1.2308393583128812, + "learning_rate": 1.0134244674647186e-08, + "loss": 0.7672470808029175, + "step": 8307 + }, + { + "epoch": 1.9142857142857141, + "grad_norm": 0.9277990008899878, + "learning_rate": 1.0080214414661226e-08, + "loss": 0.7338177561759949, + "step": 8308 + }, + { + "epoch": 1.914516129032258, + "grad_norm": 1.3815065909330264, + "learning_rate": 1.0026327840221727e-08, + "loss": 0.7546414136886597, + "step": 8309 + }, + { + "epoch": 1.9147465437788018, + "grad_norm": 1.0116807626508924, + "learning_rate": 9.972584959149988e-09, + "loss": 0.621455192565918, + "step": 8310 + }, + { + "epoch": 1.9149769585253456, + "grad_norm": 1.0385626369203964, + "learning_rate": 9.918985779247102e-09, + "loss": 0.7403131723403931, + "step": 8311 + }, + { + "epoch": 1.9152073732718895, + "grad_norm": 1.1027069898803628, + "learning_rate": 9.865530308292624e-09, + "loss": 0.7924279570579529, + "step": 8312 + }, + { + "epoch": 1.9154377880184332, + "grad_norm": 1.1362295208393791, + "learning_rate": 9.81221855404568e-09, + "loss": 0.8831228017807007, + "step": 8313 + }, + { + "epoch": 1.915668202764977, + "grad_norm": 1.1281945792188444, + "learning_rate": 9.759050524244417e-09, + "loss": 0.6786219477653503, + "step": 8314 + }, + { + "epoch": 1.9158986175115207, + "grad_norm": 1.2807157366480393, + "learning_rate": 9.70602622660599e-09, + "loss": 0.7311046123504639, + "step": 8315 + }, + { + "epoch": 1.9161290322580644, + "grad_norm": 1.3847340573145779, + "learning_rate": 9.653145668826912e-09, + "loss": 0.8914301991462708, + "step": 8316 + }, + { + "epoch": 1.9163594470046084, + "grad_norm": 1.4027670914288322, + "learning_rate": 9.600408858582709e-09, + "loss": 0.8144292831420898, + "step": 8317 + }, + { + "epoch": 1.916589861751152, + "grad_norm": 1.1077379444431534, + "learning_rate": 9.547815803528036e-09, + "loss": 0.6670823097229004, + "step": 8318 + }, + { + "epoch": 1.9168202764976958, + "grad_norm": 1.2434106495167774, + "learning_rate": 9.495366511296676e-09, + "loss": 0.6801552772521973, + "step": 8319 + }, + { + "epoch": 1.9170506912442398, + "grad_norm": 1.0098918722618904, + "learning_rate": 9.44306098950165e-09, + "loss": 0.8144240379333496, + "step": 8320 + }, + { + "epoch": 1.9172811059907833, + "grad_norm": 1.0515221920732627, + "learning_rate": 9.390899245734995e-09, + "loss": 0.6352888345718384, + "step": 8321 + }, + { + "epoch": 1.9175115207373272, + "grad_norm": 1.2296941092807456, + "learning_rate": 9.33888128756788e-09, + "loss": 0.7513711452484131, + "step": 8322 + }, + { + "epoch": 1.917741935483871, + "grad_norm": 1.4377668264686976, + "learning_rate": 9.287007122550705e-09, + "loss": 0.7699171304702759, + "step": 8323 + }, + { + "epoch": 1.9179723502304147, + "grad_norm": 1.591632209718944, + "learning_rate": 9.235276758212895e-09, + "loss": 0.8321002721786499, + "step": 8324 + }, + { + "epoch": 1.9182027649769586, + "grad_norm": 1.0453744404830132, + "learning_rate": 9.183690202062999e-09, + "loss": 0.6815298795700073, + "step": 8325 + }, + { + "epoch": 1.9184331797235024, + "grad_norm": 1.0030633247337575, + "learning_rate": 9.132247461588915e-09, + "loss": 0.7135178446769714, + "step": 8326 + }, + { + "epoch": 1.918663594470046, + "grad_norm": 1.3123190228023687, + "learning_rate": 9.080948544257338e-09, + "loss": 0.8452005982398987, + "step": 8327 + }, + { + "epoch": 1.9188940092165898, + "grad_norm": 1.1270879003396566, + "learning_rate": 9.029793457514312e-09, + "loss": 0.7449440956115723, + "step": 8328 + }, + { + "epoch": 1.9191244239631335, + "grad_norm": 1.2310904327231214, + "learning_rate": 8.978782208784897e-09, + "loss": 0.8172955513000488, + "step": 8329 + }, + { + "epoch": 1.9193548387096775, + "grad_norm": 1.0097624251077932, + "learning_rate": 8.92791480547317e-09, + "loss": 0.6682305335998535, + "step": 8330 + }, + { + "epoch": 1.9195852534562212, + "grad_norm": 1.1974701853493588, + "learning_rate": 8.877191254962779e-09, + "loss": 0.6874973773956299, + "step": 8331 + }, + { + "epoch": 1.919815668202765, + "grad_norm": 1.1728345166861331, + "learning_rate": 8.826611564615949e-09, + "loss": 0.8371694684028625, + "step": 8332 + }, + { + "epoch": 1.920046082949309, + "grad_norm": 1.1837626119929445, + "learning_rate": 8.77617574177425e-09, + "loss": 0.7147493362426758, + "step": 8333 + }, + { + "epoch": 1.9202764976958524, + "grad_norm": 1.2783488550083906, + "learning_rate": 8.725883793758382e-09, + "loss": 0.7444115877151489, + "step": 8334 + }, + { + "epoch": 1.9205069124423964, + "grad_norm": 1.3799268170287549, + "learning_rate": 8.675735727868283e-09, + "loss": 0.7772307395935059, + "step": 8335 + }, + { + "epoch": 1.92073732718894, + "grad_norm": 1.2730237375907167, + "learning_rate": 8.625731551382798e-09, + "loss": 0.702937126159668, + "step": 8336 + }, + { + "epoch": 1.9209677419354838, + "grad_norm": 1.316574939310684, + "learning_rate": 8.575871271559898e-09, + "loss": 0.7404709458351135, + "step": 8337 + }, + { + "epoch": 1.9211981566820278, + "grad_norm": 1.4216605594412726, + "learning_rate": 8.526154895636906e-09, + "loss": 0.7142058610916138, + "step": 8338 + }, + { + "epoch": 1.9214285714285713, + "grad_norm": 1.381037068322115, + "learning_rate": 8.476582430830048e-09, + "loss": 0.8950545191764832, + "step": 8339 + }, + { + "epoch": 1.9216589861751152, + "grad_norm": 1.2364573338693037, + "learning_rate": 8.42715388433446e-09, + "loss": 0.6939054131507874, + "step": 8340 + }, + { + "epoch": 1.921889400921659, + "grad_norm": 1.3248307922164142, + "learning_rate": 8.377869263324954e-09, + "loss": 0.7916324138641357, + "step": 8341 + }, + { + "epoch": 1.9221198156682027, + "grad_norm": 1.3092539218499513, + "learning_rate": 8.328728574954924e-09, + "loss": 0.8059754371643066, + "step": 8342 + }, + { + "epoch": 1.9223502304147466, + "grad_norm": 1.1195879983393067, + "learning_rate": 8.279731826357105e-09, + "loss": 0.650648295879364, + "step": 8343 + }, + { + "epoch": 1.9225806451612903, + "grad_norm": 0.9135397053997126, + "learning_rate": 8.230879024643478e-09, + "loss": 0.6912552118301392, + "step": 8344 + }, + { + "epoch": 1.922811059907834, + "grad_norm": 0.8588678436998939, + "learning_rate": 8.182170176904702e-09, + "loss": 0.7430927753448486, + "step": 8345 + }, + { + "epoch": 1.923041474654378, + "grad_norm": 1.1000327691208154, + "learning_rate": 8.133605290210898e-09, + "loss": 0.7550772428512573, + "step": 8346 + }, + { + "epoch": 1.9232718894009215, + "grad_norm": 1.1138393113278757, + "learning_rate": 8.08518437161132e-09, + "loss": 0.7235819101333618, + "step": 8347 + }, + { + "epoch": 1.9235023041474655, + "grad_norm": 1.085631464611088, + "learning_rate": 8.036907428134121e-09, + "loss": 0.790582537651062, + "step": 8348 + }, + { + "epoch": 1.9237327188940092, + "grad_norm": 1.2928878399763604, + "learning_rate": 7.988774466786585e-09, + "loss": 0.7350871562957764, + "step": 8349 + }, + { + "epoch": 1.923963133640553, + "grad_norm": 1.3980478677422172, + "learning_rate": 7.940785494555124e-09, + "loss": 0.86177659034729, + "step": 8350 + }, + { + "epoch": 1.9241935483870969, + "grad_norm": 1.196963381013611, + "learning_rate": 7.892940518405499e-09, + "loss": 0.8039232492446899, + "step": 8351 + }, + { + "epoch": 1.9244239631336404, + "grad_norm": 1.231295549355971, + "learning_rate": 7.845239545282046e-09, + "loss": 0.7130967378616333, + "step": 8352 + }, + { + "epoch": 1.9246543778801843, + "grad_norm": 1.0830506625128473, + "learning_rate": 7.797682582108667e-09, + "loss": 0.7297911047935486, + "step": 8353 + }, + { + "epoch": 1.924884792626728, + "grad_norm": 1.2576048144274934, + "learning_rate": 7.750269635788065e-09, + "loss": 0.7302875518798828, + "step": 8354 + }, + { + "epoch": 1.9251152073732718, + "grad_norm": 1.1228331103171292, + "learning_rate": 7.703000713202401e-09, + "loss": 0.7976555824279785, + "step": 8355 + }, + { + "epoch": 1.9253456221198157, + "grad_norm": 1.1181213613597878, + "learning_rate": 7.65587582121252e-09, + "loss": 0.6747829914093018, + "step": 8356 + }, + { + "epoch": 1.9255760368663595, + "grad_norm": 1.3086474559444063, + "learning_rate": 7.608894966658509e-09, + "loss": 0.7217142581939697, + "step": 8357 + }, + { + "epoch": 1.9258064516129032, + "grad_norm": 1.3893709396765357, + "learning_rate": 7.562058156359685e-09, + "loss": 0.8635888695716858, + "step": 8358 + }, + { + "epoch": 1.9260368663594472, + "grad_norm": 1.3318330118319255, + "learning_rate": 7.515365397114282e-09, + "loss": 0.8435994386672974, + "step": 8359 + }, + { + "epoch": 1.9262672811059907, + "grad_norm": 1.4490671236886896, + "learning_rate": 7.468816695699653e-09, + "loss": 0.8632286787033081, + "step": 8360 + }, + { + "epoch": 1.9264976958525346, + "grad_norm": 1.501498499241499, + "learning_rate": 7.422412058872396e-09, + "loss": 0.7916556596755981, + "step": 8361 + }, + { + "epoch": 1.9267281105990783, + "grad_norm": 1.1808854932681303, + "learning_rate": 7.376151493368121e-09, + "loss": 0.8307663202285767, + "step": 8362 + }, + { + "epoch": 1.926958525345622, + "grad_norm": 1.4156996026964064, + "learning_rate": 7.330035005901236e-09, + "loss": 0.9020388126373291, + "step": 8363 + }, + { + "epoch": 1.927188940092166, + "grad_norm": 1.222606934693838, + "learning_rate": 7.28406260316572e-09, + "loss": 0.7926114797592163, + "step": 8364 + }, + { + "epoch": 1.9274193548387095, + "grad_norm": 1.0417046174216056, + "learning_rate": 7.2382342918343446e-09, + "loss": 0.7609784603118896, + "step": 8365 + }, + { + "epoch": 1.9276497695852535, + "grad_norm": 1.3729827404737949, + "learning_rate": 7.192550078559012e-09, + "loss": 0.6010490655899048, + "step": 8366 + }, + { + "epoch": 1.9278801843317972, + "grad_norm": 1.495271329234438, + "learning_rate": 7.147009969970641e-09, + "loss": 0.8219606876373291, + "step": 8367 + }, + { + "epoch": 1.928110599078341, + "grad_norm": 1.207499145814505, + "learning_rate": 7.101613972679499e-09, + "loss": 0.8688151836395264, + "step": 8368 + }, + { + "epoch": 1.9283410138248849, + "grad_norm": 1.0608698410629562, + "learning_rate": 7.0563620932747595e-09, + "loss": 0.7654411792755127, + "step": 8369 + }, + { + "epoch": 1.9285714285714286, + "grad_norm": 1.0982841652537483, + "learning_rate": 7.01125433832439e-09, + "loss": 0.6878413558006287, + "step": 8370 + }, + { + "epoch": 1.9288018433179723, + "grad_norm": 1.0662803206592244, + "learning_rate": 6.966290714375933e-09, + "loss": 0.6703332662582397, + "step": 8371 + }, + { + "epoch": 1.9290322580645163, + "grad_norm": 1.1405585467491617, + "learning_rate": 6.921471227955833e-09, + "loss": 0.752200722694397, + "step": 8372 + }, + { + "epoch": 1.9292626728110598, + "grad_norm": 1.1122335677850106, + "learning_rate": 6.8767958855695526e-09, + "loss": 0.8107069730758667, + "step": 8373 + }, + { + "epoch": 1.9294930875576037, + "grad_norm": 1.4102834771954489, + "learning_rate": 6.832264693701573e-09, + "loss": 0.8816967010498047, + "step": 8374 + }, + { + "epoch": 1.9297235023041475, + "grad_norm": 1.2593635712728732, + "learning_rate": 6.78787765881561e-09, + "loss": 0.7889697551727295, + "step": 8375 + }, + { + "epoch": 1.9299539170506912, + "grad_norm": 1.2377942170623384, + "learning_rate": 6.743634787354291e-09, + "loss": 0.7218060493469238, + "step": 8376 + }, + { + "epoch": 1.9301843317972351, + "grad_norm": 1.2786458190631131, + "learning_rate": 6.699536085739588e-09, + "loss": 0.8061347007751465, + "step": 8377 + }, + { + "epoch": 1.9304147465437786, + "grad_norm": 1.0571211016932303, + "learning_rate": 6.655581560372159e-09, + "loss": 0.7320632934570312, + "step": 8378 + }, + { + "epoch": 1.9306451612903226, + "grad_norm": 1.2201688729332103, + "learning_rate": 6.611771217632123e-09, + "loss": 0.7039695978164673, + "step": 8379 + }, + { + "epoch": 1.9308755760368663, + "grad_norm": 1.0152325785443144, + "learning_rate": 6.568105063878393e-09, + "loss": 0.7056317925453186, + "step": 8380 + }, + { + "epoch": 1.93110599078341, + "grad_norm": 1.3442992098354511, + "learning_rate": 6.524583105449122e-09, + "loss": 0.9265607595443726, + "step": 8381 + }, + { + "epoch": 1.931336405529954, + "grad_norm": 0.9980232024455323, + "learning_rate": 6.481205348661367e-09, + "loss": 0.7249365448951721, + "step": 8382 + }, + { + "epoch": 1.9315668202764977, + "grad_norm": 1.0217670095742197, + "learning_rate": 6.4379717998114256e-09, + "loss": 0.8216372728347778, + "step": 8383 + }, + { + "epoch": 1.9317972350230415, + "grad_norm": 1.0731967820570871, + "learning_rate": 6.394882465174611e-09, + "loss": 0.6750606894493103, + "step": 8384 + }, + { + "epoch": 1.9320276497695854, + "grad_norm": 1.1382732221343326, + "learning_rate": 6.351937351005143e-09, + "loss": 0.8265045285224915, + "step": 8385 + }, + { + "epoch": 1.932258064516129, + "grad_norm": 1.2033626019579449, + "learning_rate": 6.309136463536591e-09, + "loss": 0.5992317795753479, + "step": 8386 + }, + { + "epoch": 1.9324884792626729, + "grad_norm": 1.026760102298627, + "learning_rate": 6.266479808981428e-09, + "loss": 0.6586567163467407, + "step": 8387 + }, + { + "epoch": 1.9327188940092166, + "grad_norm": 1.1335080912138158, + "learning_rate": 6.223967393531259e-09, + "loss": 0.7496415376663208, + "step": 8388 + }, + { + "epoch": 1.9329493087557603, + "grad_norm": 1.2743344602397095, + "learning_rate": 6.181599223356593e-09, + "loss": 0.8637027740478516, + "step": 8389 + }, + { + "epoch": 1.9331797235023043, + "grad_norm": 1.3348493633535858, + "learning_rate": 6.139375304607064e-09, + "loss": 0.6925984621047974, + "step": 8390 + }, + { + "epoch": 1.9334101382488478, + "grad_norm": 1.3338549311969345, + "learning_rate": 6.0972956434115485e-09, + "loss": 0.8345432877540588, + "step": 8391 + }, + { + "epoch": 1.9336405529953917, + "grad_norm": 1.211546505819517, + "learning_rate": 6.055360245877938e-09, + "loss": 0.797752857208252, + "step": 8392 + }, + { + "epoch": 1.9338709677419355, + "grad_norm": 1.025513773253857, + "learning_rate": 6.013569118092809e-09, + "loss": 0.7460094690322876, + "step": 8393 + }, + { + "epoch": 1.9341013824884792, + "grad_norm": 1.0501792229397418, + "learning_rate": 5.97192226612242e-09, + "loss": 0.7695547342300415, + "step": 8394 + }, + { + "epoch": 1.9343317972350231, + "grad_norm": 1.3341559418127071, + "learning_rate": 5.9304196960113795e-09, + "loss": 0.8372104167938232, + "step": 8395 + }, + { + "epoch": 1.9345622119815669, + "grad_norm": 1.174939684239835, + "learning_rate": 5.889061413784091e-09, + "loss": 0.7647950053215027, + "step": 8396 + }, + { + "epoch": 1.9347926267281106, + "grad_norm": 1.0568987578487792, + "learning_rate": 5.84784742544353e-09, + "loss": 0.6958519220352173, + "step": 8397 + }, + { + "epoch": 1.9350230414746545, + "grad_norm": 1.1905008025272417, + "learning_rate": 5.806777736971691e-09, + "loss": 0.8488763570785522, + "step": 8398 + }, + { + "epoch": 1.935253456221198, + "grad_norm": 1.1975357379056275, + "learning_rate": 5.765852354330025e-09, + "loss": 0.6448318958282471, + "step": 8399 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 1.288117894635522, + "learning_rate": 5.725071283458671e-09, + "loss": 0.7449144124984741, + "step": 8400 + }, + { + "epoch": 1.9357142857142857, + "grad_norm": 1.2060473887345362, + "learning_rate": 5.684434530277005e-09, + "loss": 0.8339489102363586, + "step": 8401 + }, + { + "epoch": 1.9359447004608294, + "grad_norm": 1.355663998015665, + "learning_rate": 5.643942100683308e-09, + "loss": 0.7758409380912781, + "step": 8402 + }, + { + "epoch": 1.9361751152073734, + "grad_norm": 1.2457476365021507, + "learning_rate": 5.60359400055499e-09, + "loss": 0.8604291081428528, + "step": 8403 + }, + { + "epoch": 1.936405529953917, + "grad_norm": 0.9800977546704353, + "learning_rate": 5.5633902357487e-09, + "loss": 0.7379741668701172, + "step": 8404 + }, + { + "epoch": 1.9366359447004609, + "grad_norm": 1.0501931597758303, + "learning_rate": 5.52333081209988e-09, + "loss": 0.6943101286888123, + "step": 8405 + }, + { + "epoch": 1.9368663594470046, + "grad_norm": 1.193280273833338, + "learning_rate": 5.483415735422992e-09, + "loss": 0.7397646903991699, + "step": 8406 + }, + { + "epoch": 1.9370967741935483, + "grad_norm": 1.1298510822998358, + "learning_rate": 5.443645011511844e-09, + "loss": 0.7566234469413757, + "step": 8407 + }, + { + "epoch": 1.9373271889400923, + "grad_norm": 1.322820355956732, + "learning_rate": 5.40401864613893e-09, + "loss": 0.6345827579498291, + "step": 8408 + }, + { + "epoch": 1.937557603686636, + "grad_norm": 1.6653451978671274, + "learning_rate": 5.3645366450560944e-09, + "loss": 0.7259831428527832, + "step": 8409 + }, + { + "epoch": 1.9377880184331797, + "grad_norm": 1.347964952979272, + "learning_rate": 5.325199013993975e-09, + "loss": 0.7897600531578064, + "step": 8410 + }, + { + "epoch": 1.9380184331797237, + "grad_norm": 1.3016062068490681, + "learning_rate": 5.286005758662448e-09, + "loss": 0.8421739339828491, + "step": 8411 + }, + { + "epoch": 1.9382488479262672, + "grad_norm": 1.3347958532899202, + "learning_rate": 5.2469568847504085e-09, + "loss": 0.7652501463890076, + "step": 8412 + }, + { + "epoch": 1.9384792626728111, + "grad_norm": 1.3105993577298032, + "learning_rate": 5.2080523979256556e-09, + "loss": 0.6397069096565247, + "step": 8413 + }, + { + "epoch": 1.9387096774193548, + "grad_norm": 1.2689574006754154, + "learning_rate": 5.169292303835116e-09, + "loss": 0.840052604675293, + "step": 8414 + }, + { + "epoch": 1.9389400921658986, + "grad_norm": 1.344062608291919, + "learning_rate": 5.130676608104845e-09, + "loss": 0.8453920483589172, + "step": 8415 + }, + { + "epoch": 1.9391705069124425, + "grad_norm": 1.3358429095342716, + "learning_rate": 5.092205316339915e-09, + "loss": 0.8301386833190918, + "step": 8416 + }, + { + "epoch": 1.939400921658986, + "grad_norm": 1.0570862677742232, + "learning_rate": 5.0538784341241924e-09, + "loss": 0.6682429313659668, + "step": 8417 + }, + { + "epoch": 1.93963133640553, + "grad_norm": 1.4370850274204425, + "learning_rate": 5.0156959670208945e-09, + "loss": 0.7881286144256592, + "step": 8418 + }, + { + "epoch": 1.9398617511520737, + "grad_norm": 1.1170749783406635, + "learning_rate": 4.9776579205721424e-09, + "loss": 0.7413277626037598, + "step": 8419 + }, + { + "epoch": 1.9400921658986174, + "grad_norm": 1.2672048797390025, + "learning_rate": 4.939764300299187e-09, + "loss": 0.6718757152557373, + "step": 8420 + }, + { + "epoch": 1.9403225806451614, + "grad_norm": 1.1707673461814823, + "learning_rate": 4.9020151117019625e-09, + "loss": 0.8595068454742432, + "step": 8421 + }, + { + "epoch": 1.9405529953917051, + "grad_norm": 1.0350774696905816, + "learning_rate": 4.864410360260085e-09, + "loss": 0.6985205411911011, + "step": 8422 + }, + { + "epoch": 1.9407834101382488, + "grad_norm": 1.222465370246094, + "learning_rate": 4.826950051431522e-09, + "loss": 0.7148889303207397, + "step": 8423 + }, + { + "epoch": 1.9410138248847926, + "grad_norm": 1.320040251210183, + "learning_rate": 4.789634190653813e-09, + "loss": 0.8109019994735718, + "step": 8424 + }, + { + "epoch": 1.9412442396313363, + "grad_norm": 1.4762486891336946, + "learning_rate": 4.752462783343292e-09, + "loss": 0.8268437385559082, + "step": 8425 + }, + { + "epoch": 1.9414746543778802, + "grad_norm": 0.9708535634361853, + "learning_rate": 4.715435834895088e-09, + "loss": 0.7300432920455933, + "step": 8426 + }, + { + "epoch": 1.941705069124424, + "grad_norm": 1.3017508085468754, + "learning_rate": 4.6785533506839005e-09, + "loss": 0.848440408706665, + "step": 8427 + }, + { + "epoch": 1.9419354838709677, + "grad_norm": 1.0873655680994063, + "learning_rate": 4.6418153360630044e-09, + "loss": 0.7526305913925171, + "step": 8428 + }, + { + "epoch": 1.9421658986175117, + "grad_norm": 1.1186105868292944, + "learning_rate": 4.605221796365022e-09, + "loss": 0.6987402439117432, + "step": 8429 + }, + { + "epoch": 1.9423963133640552, + "grad_norm": 1.5889483697201847, + "learning_rate": 4.568772736901261e-09, + "loss": 0.7944519519805908, + "step": 8430 + }, + { + "epoch": 1.942626728110599, + "grad_norm": 1.0443704220390153, + "learning_rate": 4.532468162962378e-09, + "loss": 0.7206175327301025, + "step": 8431 + }, + { + "epoch": 1.9428571428571428, + "grad_norm": 1.332362884391146, + "learning_rate": 4.4963080798179345e-09, + "loss": 0.6892992854118347, + "step": 8432 + }, + { + "epoch": 1.9430875576036866, + "grad_norm": 1.0826330060160456, + "learning_rate": 4.460292492716511e-09, + "loss": 0.696158766746521, + "step": 8433 + }, + { + "epoch": 1.9433179723502305, + "grad_norm": 0.9789941295444919, + "learning_rate": 4.424421406885704e-09, + "loss": 0.8007163405418396, + "step": 8434 + }, + { + "epoch": 1.9435483870967742, + "grad_norm": 1.1286085842961833, + "learning_rate": 4.3886948275320135e-09, + "loss": 0.7969222068786621, + "step": 8435 + }, + { + "epoch": 1.943778801843318, + "grad_norm": 1.2183409512094359, + "learning_rate": 4.353112759841404e-09, + "loss": 0.7752852439880371, + "step": 8436 + }, + { + "epoch": 1.9440092165898617, + "grad_norm": 1.1860536416754315, + "learning_rate": 4.317675208978411e-09, + "loss": 0.7788258790969849, + "step": 8437 + }, + { + "epoch": 1.9442396313364054, + "grad_norm": 1.1863849018136006, + "learning_rate": 4.2823821800866964e-09, + "loss": 0.838456392288208, + "step": 8438 + }, + { + "epoch": 1.9444700460829494, + "grad_norm": 1.0569456831140607, + "learning_rate": 4.2472336782890525e-09, + "loss": 0.7503675222396851, + "step": 8439 + }, + { + "epoch": 1.944700460829493, + "grad_norm": 0.9808278818485672, + "learning_rate": 4.212229708687287e-09, + "loss": 0.810901403427124, + "step": 8440 + }, + { + "epoch": 1.9449308755760368, + "grad_norm": 1.0050063922171069, + "learning_rate": 4.1773702763621135e-09, + "loss": 0.7551805973052979, + "step": 8441 + }, + { + "epoch": 1.9451612903225808, + "grad_norm": 1.2275039222333026, + "learning_rate": 4.142655386373373e-09, + "loss": 0.9387043714523315, + "step": 8442 + }, + { + "epoch": 1.9453917050691243, + "grad_norm": 1.034577232879954, + "learning_rate": 4.1080850437598124e-09, + "loss": 0.7508292198181152, + "step": 8443 + }, + { + "epoch": 1.9456221198156682, + "grad_norm": 0.9799945991508818, + "learning_rate": 4.073659253539308e-09, + "loss": 0.737107515335083, + "step": 8444 + }, + { + "epoch": 1.945852534562212, + "grad_norm": 1.477967097078984, + "learning_rate": 4.03937802070875e-09, + "loss": 0.86794114112854, + "step": 8445 + }, + { + "epoch": 1.9460829493087557, + "grad_norm": 0.9207750837260967, + "learning_rate": 4.005241350243937e-09, + "loss": 0.7629859447479248, + "step": 8446 + }, + { + "epoch": 1.9463133640552996, + "grad_norm": 1.4180879805115079, + "learning_rate": 3.971249247099906e-09, + "loss": 0.7455410957336426, + "step": 8447 + }, + { + "epoch": 1.9465437788018434, + "grad_norm": 1.1941620926103322, + "learning_rate": 3.937401716210376e-09, + "loss": 0.8322222828865051, + "step": 8448 + }, + { + "epoch": 1.946774193548387, + "grad_norm": 1.510433091637528, + "learning_rate": 3.903698762488528e-09, + "loss": 0.7961260676383972, + "step": 8449 + }, + { + "epoch": 1.9470046082949308, + "grad_norm": 1.2160569883363423, + "learning_rate": 3.870140390826005e-09, + "loss": 0.8144096732139587, + "step": 8450 + }, + { + "epoch": 1.9472350230414746, + "grad_norm": 1.2123613138822447, + "learning_rate": 3.8367266060939095e-09, + "loss": 0.7973348498344421, + "step": 8451 + }, + { + "epoch": 1.9474654377880185, + "grad_norm": 1.4038735969349747, + "learning_rate": 3.803457413142253e-09, + "loss": 0.8311715126037598, + "step": 8452 + }, + { + "epoch": 1.9476958525345622, + "grad_norm": 0.9815978065709688, + "learning_rate": 3.770332816799948e-09, + "loss": 0.7851812839508057, + "step": 8453 + }, + { + "epoch": 1.947926267281106, + "grad_norm": 1.3820548975058524, + "learning_rate": 3.737352821875039e-09, + "loss": 0.8721193075180054, + "step": 8454 + }, + { + "epoch": 1.94815668202765, + "grad_norm": 1.2337347998012935, + "learning_rate": 3.704517433154364e-09, + "loss": 0.8594118356704712, + "step": 8455 + }, + { + "epoch": 1.9483870967741934, + "grad_norm": 0.9620755666197012, + "learning_rate": 3.671826655404109e-09, + "loss": 0.6526527404785156, + "step": 8456 + }, + { + "epoch": 1.9486175115207374, + "grad_norm": 0.9198704876253201, + "learning_rate": 3.639280493369368e-09, + "loss": 0.7577145099639893, + "step": 8457 + }, + { + "epoch": 1.948847926267281, + "grad_norm": 1.4898349304718468, + "learning_rate": 3.6068789517739173e-09, + "loss": 0.9176833629608154, + "step": 8458 + }, + { + "epoch": 1.9490783410138248, + "grad_norm": 1.5070373914502264, + "learning_rate": 3.5746220353209956e-09, + "loss": 0.8947671055793762, + "step": 8459 + }, + { + "epoch": 1.9493087557603688, + "grad_norm": 1.2654885409411176, + "learning_rate": 3.542509748692524e-09, + "loss": 0.8791666030883789, + "step": 8460 + }, + { + "epoch": 1.9495391705069123, + "grad_norm": 0.9247331783476281, + "learning_rate": 3.5105420965496626e-09, + "loss": 0.7431247234344482, + "step": 8461 + }, + { + "epoch": 1.9497695852534562, + "grad_norm": 1.3437504272827105, + "learning_rate": 3.4787190835324775e-09, + "loss": 0.7998695373535156, + "step": 8462 + }, + { + "epoch": 1.95, + "grad_norm": 1.359553043789141, + "learning_rate": 3.447040714259941e-09, + "loss": 0.8120161294937134, + "step": 8463 + }, + { + "epoch": 1.9502304147465437, + "grad_norm": 1.063781533705899, + "learning_rate": 3.415506993330153e-09, + "loss": 0.8062546849250793, + "step": 8464 + }, + { + "epoch": 1.9504608294930876, + "grad_norm": 1.3290963135655427, + "learning_rate": 3.384117925320229e-09, + "loss": 0.8100919723510742, + "step": 8465 + }, + { + "epoch": 1.9506912442396314, + "grad_norm": 1.410960677080016, + "learning_rate": 3.352873514786303e-09, + "loss": 0.7376535534858704, + "step": 8466 + }, + { + "epoch": 1.950921658986175, + "grad_norm": 1.1333962819853984, + "learning_rate": 3.321773766263303e-09, + "loss": 0.7534361481666565, + "step": 8467 + }, + { + "epoch": 1.951152073732719, + "grad_norm": 0.956942860373484, + "learning_rate": 3.290818684265506e-09, + "loss": 0.6914925575256348, + "step": 8468 + }, + { + "epoch": 1.9513824884792625, + "grad_norm": 1.40322423242457, + "learning_rate": 3.2600082732858746e-09, + "loss": 0.837024450302124, + "step": 8469 + }, + { + "epoch": 1.9516129032258065, + "grad_norm": 1.3077639635125993, + "learning_rate": 3.229342537796609e-09, + "loss": 0.7960337400436401, + "step": 8470 + }, + { + "epoch": 1.9518433179723502, + "grad_norm": 1.1044299774108808, + "learning_rate": 3.1988214822485928e-09, + "loss": 0.6611788868904114, + "step": 8471 + }, + { + "epoch": 1.952073732718894, + "grad_norm": 1.2652589643459276, + "learning_rate": 3.16844511107206e-09, + "loss": 0.8798158168792725, + "step": 8472 + }, + { + "epoch": 1.952304147465438, + "grad_norm": 1.3477135835069336, + "learning_rate": 3.1382134286761506e-09, + "loss": 0.790015459060669, + "step": 8473 + }, + { + "epoch": 1.9525345622119814, + "grad_norm": 1.062422263250462, + "learning_rate": 3.1081264394489103e-09, + "loss": 0.7676407098770142, + "step": 8474 + }, + { + "epoch": 1.9527649769585254, + "grad_norm": 1.1707572290080033, + "learning_rate": 3.07818414775729e-09, + "loss": 0.8213051557540894, + "step": 8475 + }, + { + "epoch": 1.952995391705069, + "grad_norm": 1.328203051872804, + "learning_rate": 3.048386557947591e-09, + "loss": 0.8909401893615723, + "step": 8476 + }, + { + "epoch": 1.9532258064516128, + "grad_norm": 1.2206551189591073, + "learning_rate": 3.0187336743446867e-09, + "loss": 0.838227391242981, + "step": 8477 + }, + { + "epoch": 1.9534562211981568, + "grad_norm": 1.1958685930192579, + "learning_rate": 2.9892255012528013e-09, + "loss": 0.7297696471214294, + "step": 8478 + }, + { + "epoch": 1.9536866359447005, + "grad_norm": 1.508389266534061, + "learning_rate": 2.9598620429550636e-09, + "loss": 1.0060585737228394, + "step": 8479 + }, + { + "epoch": 1.9539170506912442, + "grad_norm": 1.1858328009290373, + "learning_rate": 2.9306433037132873e-09, + "loss": 0.7812967300415039, + "step": 8480 + }, + { + "epoch": 1.9541474654377882, + "grad_norm": 1.196629989025656, + "learning_rate": 2.901569287768746e-09, + "loss": 0.7349315881729126, + "step": 8481 + }, + { + "epoch": 1.9543778801843317, + "grad_norm": 1.1580071941270487, + "learning_rate": 2.8726399993415085e-09, + "loss": 0.7083498239517212, + "step": 8482 + }, + { + "epoch": 1.9546082949308756, + "grad_norm": 1.3308451395414542, + "learning_rate": 2.8438554426304386e-09, + "loss": 0.7969732880592346, + "step": 8483 + }, + { + "epoch": 1.9548387096774194, + "grad_norm": 1.405840014033905, + "learning_rate": 2.815215621813749e-09, + "loss": 0.7701122164726257, + "step": 8484 + }, + { + "epoch": 1.955069124423963, + "grad_norm": 1.0487330945577633, + "learning_rate": 2.7867205410484485e-09, + "loss": 0.7323017120361328, + "step": 8485 + }, + { + "epoch": 1.955299539170507, + "grad_norm": 0.9842598310766136, + "learning_rate": 2.7583702044704504e-09, + "loss": 0.8357248306274414, + "step": 8486 + }, + { + "epoch": 1.9555299539170505, + "grad_norm": 1.4806137218761686, + "learning_rate": 2.7301646161947966e-09, + "loss": 0.8164674043655396, + "step": 8487 + }, + { + "epoch": 1.9557603686635945, + "grad_norm": 1.2641967325925645, + "learning_rate": 2.7021037803156566e-09, + "loss": 0.7972782850265503, + "step": 8488 + }, + { + "epoch": 1.9559907834101382, + "grad_norm": 1.2417679147004388, + "learning_rate": 2.6741877009058835e-09, + "loss": 0.864342987537384, + "step": 8489 + }, + { + "epoch": 1.956221198156682, + "grad_norm": 1.1067561191492752, + "learning_rate": 2.646416382017458e-09, + "loss": 0.7428402900695801, + "step": 8490 + }, + { + "epoch": 1.956451612903226, + "grad_norm": 1.3211414352422526, + "learning_rate": 2.618789827681378e-09, + "loss": 0.7164437770843506, + "step": 8491 + }, + { + "epoch": 1.9566820276497696, + "grad_norm": 1.153189225005644, + "learning_rate": 2.5913080419075473e-09, + "loss": 0.6997767686843872, + "step": 8492 + }, + { + "epoch": 1.9569124423963133, + "grad_norm": 1.2481992001614755, + "learning_rate": 2.563971028684886e-09, + "loss": 0.6399234533309937, + "step": 8493 + }, + { + "epoch": 1.9571428571428573, + "grad_norm": 1.1639751659112805, + "learning_rate": 2.536778791981553e-09, + "loss": 0.7642914056777954, + "step": 8494 + }, + { + "epoch": 1.9573732718894008, + "grad_norm": 1.218382512158835, + "learning_rate": 2.5097313357442806e-09, + "loss": 0.8284746408462524, + "step": 8495 + }, + { + "epoch": 1.9576036866359448, + "grad_norm": 1.2221524988832009, + "learning_rate": 2.4828286638989282e-09, + "loss": 0.6680238246917725, + "step": 8496 + }, + { + "epoch": 1.9578341013824885, + "grad_norm": 1.2965002342798193, + "learning_rate": 2.4560707803504834e-09, + "loss": 0.7621040344238281, + "step": 8497 + }, + { + "epoch": 1.9580645161290322, + "grad_norm": 1.2947556724815892, + "learning_rate": 2.4294576889827278e-09, + "loss": 0.7326159477233887, + "step": 8498 + }, + { + "epoch": 1.9582949308755762, + "grad_norm": 1.0656455780738308, + "learning_rate": 2.4029893936586833e-09, + "loss": 0.6496877670288086, + "step": 8499 + }, + { + "epoch": 1.9585253456221197, + "grad_norm": 1.241192579535759, + "learning_rate": 2.376665898220054e-09, + "loss": 0.665170431137085, + "step": 8500 + }, + { + "epoch": 1.9587557603686636, + "grad_norm": 1.2593646350179877, + "learning_rate": 2.3504872064876724e-09, + "loss": 0.7238261699676514, + "step": 8501 + }, + { + "epoch": 1.9589861751152073, + "grad_norm": 1.5709730629781664, + "learning_rate": 2.3244533222613882e-09, + "loss": 0.6696983575820923, + "step": 8502 + }, + { + "epoch": 1.959216589861751, + "grad_norm": 1.6011689537620306, + "learning_rate": 2.2985642493199563e-09, + "loss": 0.8414099216461182, + "step": 8503 + }, + { + "epoch": 1.959447004608295, + "grad_norm": 1.1680069988943498, + "learning_rate": 2.2728199914210377e-09, + "loss": 0.7390140295028687, + "step": 8504 + }, + { + "epoch": 1.9596774193548387, + "grad_norm": 1.1922421298842674, + "learning_rate": 2.247220552301532e-09, + "loss": 0.7910370826721191, + "step": 8505 + }, + { + "epoch": 1.9599078341013825, + "grad_norm": 1.2059164746419144, + "learning_rate": 2.2217659356771334e-09, + "loss": 0.8111266493797302, + "step": 8506 + }, + { + "epoch": 1.9601382488479264, + "grad_norm": 1.382769681983927, + "learning_rate": 2.1964561452425535e-09, + "loss": 0.8748809099197388, + "step": 8507 + }, + { + "epoch": 1.96036866359447, + "grad_norm": 1.4348415171969837, + "learning_rate": 2.1712911846714088e-09, + "loss": 0.726898193359375, + "step": 8508 + }, + { + "epoch": 1.9605990783410139, + "grad_norm": 0.903624770648156, + "learning_rate": 2.1462710576163335e-09, + "loss": 0.5221005082130432, + "step": 8509 + }, + { + "epoch": 1.9608294930875576, + "grad_norm": 0.9979524654583228, + "learning_rate": 2.1213957677090887e-09, + "loss": 0.7336875200271606, + "step": 8510 + }, + { + "epoch": 1.9610599078341013, + "grad_norm": 1.4570574984679434, + "learning_rate": 2.096665318560231e-09, + "loss": 0.9653327465057373, + "step": 8511 + }, + { + "epoch": 1.9612903225806453, + "grad_norm": 1.3910033326033395, + "learning_rate": 2.0720797137594448e-09, + "loss": 0.8309473991394043, + "step": 8512 + }, + { + "epoch": 1.9615207373271888, + "grad_norm": 1.250491052702372, + "learning_rate": 2.047638956874986e-09, + "loss": 0.7829124331474304, + "step": 8513 + }, + { + "epoch": 1.9617511520737327, + "grad_norm": 1.6063542888921636, + "learning_rate": 2.0233430514547955e-09, + "loss": 0.8399544358253479, + "step": 8514 + }, + { + "epoch": 1.9619815668202765, + "grad_norm": 1.2304488854915971, + "learning_rate": 1.999192001025163e-09, + "loss": 0.7827579975128174, + "step": 8515 + }, + { + "epoch": 1.9622119815668202, + "grad_norm": 1.7023781342726942, + "learning_rate": 1.9751858090916174e-09, + "loss": 0.8617441654205322, + "step": 8516 + }, + { + "epoch": 1.9624423963133641, + "grad_norm": 1.124873706648068, + "learning_rate": 1.951324479138594e-09, + "loss": 0.758098840713501, + "step": 8517 + }, + { + "epoch": 1.9626728110599079, + "grad_norm": 1.407820551284048, + "learning_rate": 1.927608014629656e-09, + "loss": 0.738059937953949, + "step": 8518 + }, + { + "epoch": 1.9629032258064516, + "grad_norm": 1.2924313700222672, + "learning_rate": 1.9040364190070492e-09, + "loss": 0.6286636590957642, + "step": 8519 + }, + { + "epoch": 1.9631336405529956, + "grad_norm": 1.4040969276884698, + "learning_rate": 1.88060969569237e-09, + "loss": 0.764518141746521, + "step": 8520 + }, + { + "epoch": 1.963364055299539, + "grad_norm": 0.9848782890607348, + "learning_rate": 1.8573278480857878e-09, + "loss": 0.775516152381897, + "step": 8521 + }, + { + "epoch": 1.963594470046083, + "grad_norm": 1.2592904992793421, + "learning_rate": 1.8341908795665994e-09, + "loss": 0.8513185977935791, + "step": 8522 + }, + { + "epoch": 1.9638248847926267, + "grad_norm": 1.4423039825526616, + "learning_rate": 1.8111987934933404e-09, + "loss": 0.7300710082054138, + "step": 8523 + }, + { + "epoch": 1.9640552995391705, + "grad_norm": 1.1896167974085796, + "learning_rate": 1.788351593203119e-09, + "loss": 0.7346746921539307, + "step": 8524 + }, + { + "epoch": 1.9642857142857144, + "grad_norm": 1.3610028359172472, + "learning_rate": 1.7656492820121715e-09, + "loss": 0.8231781721115112, + "step": 8525 + }, + { + "epoch": 1.964516129032258, + "grad_norm": 1.2672154264769777, + "learning_rate": 1.743091863215751e-09, + "loss": 0.6972112655639648, + "step": 8526 + }, + { + "epoch": 1.9647465437788019, + "grad_norm": 1.013160541626117, + "learning_rate": 1.720679340088016e-09, + "loss": 0.6512203812599182, + "step": 8527 + }, + { + "epoch": 1.9649769585253456, + "grad_norm": 1.220658103943082, + "learning_rate": 1.698411715882253e-09, + "loss": 0.6755591630935669, + "step": 8528 + }, + { + "epoch": 1.9652073732718893, + "grad_norm": 1.115552383506669, + "learning_rate": 1.6762889938303215e-09, + "loss": 0.6858727335929871, + "step": 8529 + }, + { + "epoch": 1.9654377880184333, + "grad_norm": 1.1810577023934496, + "learning_rate": 1.6543111771434303e-09, + "loss": 0.7820768356323242, + "step": 8530 + }, + { + "epoch": 1.965668202764977, + "grad_norm": 1.512690235242737, + "learning_rate": 1.6324782690116944e-09, + "loss": 0.7841604948043823, + "step": 8531 + }, + { + "epoch": 1.9658986175115207, + "grad_norm": 1.4015300039500524, + "learning_rate": 1.6107902726040234e-09, + "loss": 0.8665674328804016, + "step": 8532 + }, + { + "epoch": 1.9661290322580647, + "grad_norm": 1.1307460450405855, + "learning_rate": 1.5892471910684547e-09, + "loss": 0.6764376163482666, + "step": 8533 + }, + { + "epoch": 1.9663594470046082, + "grad_norm": 1.4229790787582275, + "learning_rate": 1.5678490275319312e-09, + "loss": 0.8453094959259033, + "step": 8534 + }, + { + "epoch": 1.9665898617511521, + "grad_norm": 1.0573142140796512, + "learning_rate": 1.546595785100413e-09, + "loss": 0.7798272371292114, + "step": 8535 + }, + { + "epoch": 1.9668202764976959, + "grad_norm": 1.1791892730982974, + "learning_rate": 1.5254874668586548e-09, + "loss": 0.7426424026489258, + "step": 8536 + }, + { + "epoch": 1.9670506912442396, + "grad_norm": 1.1309739514060748, + "learning_rate": 1.5045240758706501e-09, + "loss": 0.8443984985351562, + "step": 8537 + }, + { + "epoch": 1.9672811059907835, + "grad_norm": 1.1053257066980806, + "learning_rate": 1.4837056151790762e-09, + "loss": 0.8439072370529175, + "step": 8538 + }, + { + "epoch": 1.967511520737327, + "grad_norm": 1.4135182916864908, + "learning_rate": 1.463032087805849e-09, + "loss": 0.8307704925537109, + "step": 8539 + }, + { + "epoch": 1.967741935483871, + "grad_norm": 1.1593054366438007, + "learning_rate": 1.442503496751568e-09, + "loss": 0.678236722946167, + "step": 8540 + }, + { + "epoch": 1.9679723502304147, + "grad_norm": 1.3372006359269073, + "learning_rate": 1.4221198449960724e-09, + "loss": 0.7072663307189941, + "step": 8541 + }, + { + "epoch": 1.9682027649769585, + "grad_norm": 1.194618240695654, + "learning_rate": 1.4018811354977732e-09, + "loss": 0.7825980186462402, + "step": 8542 + }, + { + "epoch": 1.9684331797235024, + "grad_norm": 1.8366711172437336, + "learning_rate": 1.3817873711945426e-09, + "loss": 0.786361813545227, + "step": 8543 + }, + { + "epoch": 1.9686635944700461, + "grad_norm": 1.6047169504491765, + "learning_rate": 1.3618385550029365e-09, + "loss": 1.00287926197052, + "step": 8544 + }, + { + "epoch": 1.9688940092165899, + "grad_norm": 1.336810745652672, + "learning_rate": 1.3420346898183054e-09, + "loss": 0.7320775389671326, + "step": 8545 + }, + { + "epoch": 1.9691244239631336, + "grad_norm": 1.0018804515064612, + "learning_rate": 1.322375778515461e-09, + "loss": 0.7127507925033569, + "step": 8546 + }, + { + "epoch": 1.9693548387096773, + "grad_norm": 1.4124185296399752, + "learning_rate": 1.3028618239475652e-09, + "loss": 0.818395733833313, + "step": 8547 + }, + { + "epoch": 1.9695852534562213, + "grad_norm": 1.2063998497880193, + "learning_rate": 1.2834928289472413e-09, + "loss": 0.6384972929954529, + "step": 8548 + }, + { + "epoch": 1.969815668202765, + "grad_norm": 1.240783999344712, + "learning_rate": 1.2642687963256849e-09, + "loss": 0.7358517646789551, + "step": 8549 + }, + { + "epoch": 1.9700460829493087, + "grad_norm": 1.1083546443376424, + "learning_rate": 1.2451897288734414e-09, + "loss": 0.7311068773269653, + "step": 8550 + }, + { + "epoch": 1.9702764976958527, + "grad_norm": 1.5415338816809878, + "learning_rate": 1.2262556293597403e-09, + "loss": 0.8390932083129883, + "step": 8551 + }, + { + "epoch": 1.9705069124423962, + "grad_norm": 1.2045586519715463, + "learning_rate": 1.2074665005328277e-09, + "loss": 0.8114689588546753, + "step": 8552 + }, + { + "epoch": 1.9707373271889401, + "grad_norm": 1.4445688810441233, + "learning_rate": 1.1888223451199665e-09, + "loss": 1.0044716596603394, + "step": 8553 + }, + { + "epoch": 1.9709677419354839, + "grad_norm": 1.2243432992298795, + "learning_rate": 1.170323165827214e-09, + "loss": 0.7566370368003845, + "step": 8554 + }, + { + "epoch": 1.9711981566820276, + "grad_norm": 1.2230365473762954, + "learning_rate": 1.1519689653397557e-09, + "loss": 0.7543225288391113, + "step": 8555 + }, + { + "epoch": 1.9714285714285715, + "grad_norm": 1.306226883529119, + "learning_rate": 1.1337597463217941e-09, + "loss": 0.8291902542114258, + "step": 8556 + }, + { + "epoch": 1.9716589861751153, + "grad_norm": 1.1360827313333892, + "learning_rate": 1.1156955114162147e-09, + "loss": 0.7363135814666748, + "step": 8557 + }, + { + "epoch": 1.971889400921659, + "grad_norm": 1.102255040931488, + "learning_rate": 1.0977762632451427e-09, + "loss": 0.7180813550949097, + "step": 8558 + }, + { + "epoch": 1.9721198156682027, + "grad_norm": 1.1849465839861355, + "learning_rate": 1.0800020044093861e-09, + "loss": 0.7220569849014282, + "step": 8559 + }, + { + "epoch": 1.9723502304147464, + "grad_norm": 1.2915012101962247, + "learning_rate": 1.0623727374889925e-09, + "loss": 0.8839110136032104, + "step": 8560 + }, + { + "epoch": 1.9725806451612904, + "grad_norm": 1.2553727673767463, + "learning_rate": 1.0448884650426926e-09, + "loss": 0.7210807800292969, + "step": 8561 + }, + { + "epoch": 1.9728110599078341, + "grad_norm": 1.3474393893445982, + "learning_rate": 1.0275491896084565e-09, + "loss": 0.6993537545204163, + "step": 8562 + }, + { + "epoch": 1.9730414746543778, + "grad_norm": 1.0591927963671788, + "learning_rate": 1.0103549137030486e-09, + "loss": 0.6951562166213989, + "step": 8563 + }, + { + "epoch": 1.9732718894009218, + "grad_norm": 1.0760064093903359, + "learning_rate": 9.933056398220285e-10, + "loss": 0.855778694152832, + "step": 8564 + }, + { + "epoch": 1.9735023041474653, + "grad_norm": 1.3238204379730676, + "learning_rate": 9.76401370440194e-10, + "loss": 0.8461301326751709, + "step": 8565 + }, + { + "epoch": 1.9737327188940093, + "grad_norm": 1.0765880280550415, + "learning_rate": 9.596421080112493e-10, + "loss": 0.6144053936004639, + "step": 8566 + }, + { + "epoch": 1.973963133640553, + "grad_norm": 1.226899728476588, + "learning_rate": 9.430278549675818e-10, + "loss": 0.6623581647872925, + "step": 8567 + }, + { + "epoch": 1.9741935483870967, + "grad_norm": 1.6396403159587711, + "learning_rate": 9.265586137209292e-10, + "loss": 0.9540686011314392, + "step": 8568 + }, + { + "epoch": 1.9744239631336407, + "grad_norm": 1.1121119945854705, + "learning_rate": 9.102343866616014e-10, + "loss": 0.7231987714767456, + "step": 8569 + }, + { + "epoch": 1.9746543778801844, + "grad_norm": 1.3771440446346792, + "learning_rate": 8.940551761592585e-10, + "loss": 0.7759320735931396, + "step": 8570 + }, + { + "epoch": 1.9748847926267281, + "grad_norm": 1.3995632478363096, + "learning_rate": 8.780209845621334e-10, + "loss": 0.8277846574783325, + "step": 8571 + }, + { + "epoch": 1.9751152073732718, + "grad_norm": 1.1615989785178322, + "learning_rate": 8.621318141974754e-10, + "loss": 0.7913431525230408, + "step": 8572 + }, + { + "epoch": 1.9753456221198156, + "grad_norm": 1.1262387789302248, + "learning_rate": 8.46387667371773e-10, + "loss": 0.7011829614639282, + "step": 8573 + }, + { + "epoch": 1.9755760368663595, + "grad_norm": 1.358126505769676, + "learning_rate": 8.30788546370198e-10, + "loss": 0.8762087821960449, + "step": 8574 + }, + { + "epoch": 1.9758064516129032, + "grad_norm": 1.5337262034773564, + "learning_rate": 8.153344534569396e-10, + "loss": 0.7944581508636475, + "step": 8575 + }, + { + "epoch": 1.976036866359447, + "grad_norm": 0.9763562202292912, + "learning_rate": 8.00025390875203e-10, + "loss": 0.7086907625198364, + "step": 8576 + }, + { + "epoch": 1.976267281105991, + "grad_norm": 1.3716397771498143, + "learning_rate": 7.848613608468779e-10, + "loss": 0.7263821959495544, + "step": 8577 + }, + { + "epoch": 1.9764976958525344, + "grad_norm": 1.0912146553836337, + "learning_rate": 7.698423655732034e-10, + "loss": 0.714054524898529, + "step": 8578 + }, + { + "epoch": 1.9767281105990784, + "grad_norm": 1.0671768990247028, + "learning_rate": 7.549684072341023e-10, + "loss": 0.817487359046936, + "step": 8579 + }, + { + "epoch": 1.976958525345622, + "grad_norm": 1.3039849886057633, + "learning_rate": 7.402394879885143e-10, + "loss": 0.7933021783828735, + "step": 8580 + }, + { + "epoch": 1.9771889400921658, + "grad_norm": 1.1473238275849764, + "learning_rate": 7.25655609974396e-10, + "loss": 0.8699008822441101, + "step": 8581 + }, + { + "epoch": 1.9774193548387098, + "grad_norm": 1.2250569758639698, + "learning_rate": 7.112167753083876e-10, + "loss": 0.804245114326477, + "step": 8582 + }, + { + "epoch": 1.9776497695852533, + "grad_norm": 1.322132271674899, + "learning_rate": 6.969229860863679e-10, + "loss": 0.8334434628486633, + "step": 8583 + }, + { + "epoch": 1.9778801843317972, + "grad_norm": 1.1368298808414594, + "learning_rate": 6.827742443831219e-10, + "loss": 0.7549147605895996, + "step": 8584 + }, + { + "epoch": 1.978110599078341, + "grad_norm": 0.9036184179111577, + "learning_rate": 6.687705522522291e-10, + "loss": 0.69701087474823, + "step": 8585 + }, + { + "epoch": 1.9783410138248847, + "grad_norm": 1.259028975685209, + "learning_rate": 6.549119117263969e-10, + "loss": 0.727588415145874, + "step": 8586 + }, + { + "epoch": 1.9785714285714286, + "grad_norm": 1.0967653076646233, + "learning_rate": 6.411983248171271e-10, + "loss": 0.7309392094612122, + "step": 8587 + }, + { + "epoch": 1.9788018433179724, + "grad_norm": 1.6515699626026994, + "learning_rate": 6.276297935149388e-10, + "loss": 0.8299658298492432, + "step": 8588 + }, + { + "epoch": 1.979032258064516, + "grad_norm": 1.362481943616663, + "learning_rate": 6.142063197892566e-10, + "loss": 0.9731055498123169, + "step": 8589 + }, + { + "epoch": 1.97926267281106, + "grad_norm": 1.1190783849934713, + "learning_rate": 6.009279055885219e-10, + "loss": 0.6292351484298706, + "step": 8590 + }, + { + "epoch": 1.9794930875576036, + "grad_norm": 1.2981523800262795, + "learning_rate": 5.877945528400818e-10, + "loss": 0.7881810665130615, + "step": 8591 + }, + { + "epoch": 1.9797235023041475, + "grad_norm": 1.2430793849512602, + "learning_rate": 5.748062634501894e-10, + "loss": 0.7910494804382324, + "step": 8592 + }, + { + "epoch": 1.9799539170506912, + "grad_norm": 1.3789958651744842, + "learning_rate": 5.619630393042252e-10, + "loss": 0.8255902528762817, + "step": 8593 + }, + { + "epoch": 1.980184331797235, + "grad_norm": 1.0908110861505123, + "learning_rate": 5.492648822660318e-10, + "loss": 0.788017749786377, + "step": 8594 + }, + { + "epoch": 1.980414746543779, + "grad_norm": 1.2052887418241187, + "learning_rate": 5.367117941791343e-10, + "loss": 0.8717716932296753, + "step": 8595 + }, + { + "epoch": 1.9806451612903224, + "grad_norm": 1.3810911920135494, + "learning_rate": 5.243037768652981e-10, + "loss": 0.7220178246498108, + "step": 8596 + }, + { + "epoch": 1.9808755760368664, + "grad_norm": 1.2221356933031184, + "learning_rate": 5.120408321256376e-10, + "loss": 0.7536830902099609, + "step": 8597 + }, + { + "epoch": 1.98110599078341, + "grad_norm": 1.13011497917934, + "learning_rate": 4.999229617401735e-10, + "loss": 0.7480939626693726, + "step": 8598 + }, + { + "epoch": 1.9813364055299538, + "grad_norm": 1.1029404069670388, + "learning_rate": 4.879501674676101e-10, + "loss": 0.7168867588043213, + "step": 8599 + }, + { + "epoch": 1.9815668202764978, + "grad_norm": 1.1019009005346911, + "learning_rate": 4.761224510460016e-10, + "loss": 0.8352792263031006, + "step": 8600 + } + ], + "logging_steps": 1, + "max_steps": 8680, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7183809526857728.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-8600/training_args.bin b/checkpoint-8600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7eb191dd44f853b2edd49aafea231852c267845 --- /dev/null +++ b/checkpoint-8600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f95b396ac9a3c4ab0d50e403be4c8c0fd191fd2a0aac0b5d95c7c3b72c8501b +size 6968 diff --git a/checkpoint-8600/zero_to_fp32.py b/checkpoint-8600/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-8600/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-8680/README.md b/checkpoint-8680/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-8680/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-8680/adapter_config.json b/checkpoint-8680/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..07855d838b18d52ab3ab7a1ec1a852f57cf14fd8 --- /dev/null +++ b/checkpoint-8680/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.0.mlp.down_proj", + "v_proj", + "layers.10.mlp.gate_proj", + "layers.1.mlp.up_proj", + "layers.20.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.3.mlp.gate_proj", + "layers.5.mlp.down_proj", + "layers.8.mlp.up_proj", + "layers.4.mlp.down_proj", + "layers.1.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.26.mlp.up_proj", + "layers.3.mlp.up_proj", + "layers.25.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.23.mlp.down_proj", + "layers.15.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.26.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.9.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.12.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.24.mlp.gate_proj", + "layers.16.mlp.gate_proj", + "layers.25.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.0.mlp.up_proj", + "layers.15.mlp.gate_proj", + "layers.9.mlp.gate_proj", + "layers.22.mlp.gate_proj", + "layers.24.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.14.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "k_proj", + "layers.27.mlp.down_proj", + "layers.19.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.19.mlp.up_proj", + "q_proj", + "layers.17.mlp.gate_proj", + "layers.0.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.27.mlp.up_proj", + "layers.2.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.6.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.12.mlp.up_proj", + "layers.10.mlp.down_proj", + "layers.12.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.2.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.2.mlp.up_proj", + "o_proj", + "layers.16.mlp.down_proj", + "layers.4.mlp.gate_proj", + "layers.20.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.22.mlp.up_proj", + "layers.11.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.23.mlp.gate_proj", + "layers.4.mlp.up_proj", + "layers.17.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.6.mlp.gate_proj", + "layers.16.mlp.up_proj", + "layers.18.mlp.up_proj", + "layers.7.mlp.gate_proj", + "layers.8.mlp.down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-8680/adapter_model.safetensors b/checkpoint-8680/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98f120b5eede66c5f031ca2bc6952df22cd29d41 --- /dev/null +++ b/checkpoint-8680/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80f97cfb7cba38b2ff84ae927993a7eb6fcc670bc1c05a488c870cc11232bc01 +size 40428088 diff --git a/checkpoint-8680/chat_template.jinja b/checkpoint-8680/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-8680/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-8680/global_step8680/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-8680/global_step8680/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecef1c76ac2074d89dcd4730a82a729677e64176 --- /dev/null +++ b/checkpoint-8680/global_step8680/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0eb7c90ae520ee994bdb630be6eeef7c86a3b0db776ad0f008a7d0729d9d8e8 +size 242224880 diff --git a/checkpoint-8680/global_step8680/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-8680/global_step8680/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb6aa513c1b8160d42eebde44773c01b38ff8a78 --- /dev/null +++ b/checkpoint-8680/global_step8680/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a11bc124463e6fca908a7fb2e776c9e4ca53ccaf580dff7bb9affb3279ee0e8 +size 460630 diff --git a/checkpoint-8680/latest b/checkpoint-8680/latest new file mode 100644 index 0000000000000000000000000000000000000000..160bbb85cf962a6b65d6fe826aabb28bc8c3a2ab --- /dev/null +++ b/checkpoint-8680/latest @@ -0,0 +1 @@ +global_step8680 \ No newline at end of file diff --git a/checkpoint-8680/processor_config.json b/checkpoint-8680/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-8680/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-8680/rng_state.pth b/checkpoint-8680/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fcf11c9b78de2c2c55fdfc44daef09cd9181c14 --- /dev/null +++ b/checkpoint-8680/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc398a73e46bca50defc25b4467441315246a33383a5d6c80985d238e57127f +size 14244 diff --git a/checkpoint-8680/scheduler.pt b/checkpoint-8680/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e43af7b25e600dfa0a865ee057f58b33a020c58 --- /dev/null +++ b/checkpoint-8680/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f35bbec2ac3db123168e224b69ecfad4a6fcee7d3f24c4863e59a43f39f95d +size 1000 diff --git a/checkpoint-8680/tokenizer.json b/checkpoint-8680/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-8680/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-8680/tokenizer_config.json b/checkpoint-8680/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-8680/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-8680/trainer_state.json b/checkpoint-8680/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..44915747d293cd5ae6a1af54ddd66eeb647af0a3 --- /dev/null +++ b/checkpoint-8680/trainer_state.json @@ -0,0 +1,60794 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 8680, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002304147465437788, + "grad_norm": 0.3584135221139379, + "learning_rate": 0.0, + "loss": 1.1575632095336914, + "step": 1 + }, + { + "epoch": 0.0004608294930875576, + "grad_norm": 0.3035367055626511, + "learning_rate": 4.6082949308755755e-09, + "loss": 0.9973502159118652, + "step": 2 + }, + { + "epoch": 0.0006912442396313364, + "grad_norm": 0.39685233086299543, + "learning_rate": 9.216589861751151e-09, + "loss": 1.0778999328613281, + "step": 3 + }, + { + "epoch": 0.0009216589861751152, + "grad_norm": 0.4029042979509503, + "learning_rate": 1.3824884792626728e-08, + "loss": 1.1912263631820679, + "step": 4 + }, + { + "epoch": 0.001152073732718894, + "grad_norm": 0.3943812894307851, + "learning_rate": 1.8433179723502302e-08, + "loss": 1.136031150817871, + "step": 5 + }, + { + "epoch": 0.0013824884792626728, + "grad_norm": 0.472718552613566, + "learning_rate": 2.304147465437788e-08, + "loss": 1.1647956371307373, + "step": 6 + }, + { + "epoch": 0.0016129032258064516, + "grad_norm": 0.4378363913681294, + "learning_rate": 2.7649769585253456e-08, + "loss": 1.144924283027649, + "step": 7 + }, + { + "epoch": 0.0018433179723502304, + "grad_norm": 0.412264706125121, + "learning_rate": 3.225806451612903e-08, + "loss": 1.1821019649505615, + "step": 8 + }, + { + "epoch": 0.0020737327188940094, + "grad_norm": 0.35864626774735575, + "learning_rate": 3.6866359447004604e-08, + "loss": 1.0586045980453491, + "step": 9 + }, + { + "epoch": 0.002304147465437788, + "grad_norm": 0.497058147699291, + "learning_rate": 4.1474654377880186e-08, + "loss": 1.2029818296432495, + "step": 10 + }, + { + "epoch": 0.002534562211981567, + "grad_norm": 0.465265464928516, + "learning_rate": 4.608294930875576e-08, + "loss": 1.1411634683609009, + "step": 11 + }, + { + "epoch": 0.0027649769585253456, + "grad_norm": 0.4356529753705429, + "learning_rate": 5.069124423963134e-08, + "loss": 1.2719087600708008, + "step": 12 + }, + { + "epoch": 0.0029953917050691246, + "grad_norm": 0.4469831586732583, + "learning_rate": 5.529953917050691e-08, + "loss": 1.1132495403289795, + "step": 13 + }, + { + "epoch": 0.0032258064516129032, + "grad_norm": 0.3918942421249174, + "learning_rate": 5.990783410138249e-08, + "loss": 1.1900808811187744, + "step": 14 + }, + { + "epoch": 0.0034562211981566822, + "grad_norm": 0.33446734054876004, + "learning_rate": 6.451612903225806e-08, + "loss": 1.2273608446121216, + "step": 15 + }, + { + "epoch": 0.003686635944700461, + "grad_norm": 0.4610551419026991, + "learning_rate": 6.912442396313364e-08, + "loss": 1.2130601406097412, + "step": 16 + }, + { + "epoch": 0.00391705069124424, + "grad_norm": 0.4765520188128542, + "learning_rate": 7.373271889400921e-08, + "loss": 1.0534124374389648, + "step": 17 + }, + { + "epoch": 0.004147465437788019, + "grad_norm": 0.4247458361448018, + "learning_rate": 7.834101382488478e-08, + "loss": 1.1796221733093262, + "step": 18 + }, + { + "epoch": 0.004377880184331797, + "grad_norm": 0.42651087679972033, + "learning_rate": 8.294930875576037e-08, + "loss": 1.118175745010376, + "step": 19 + }, + { + "epoch": 0.004608294930875576, + "grad_norm": 0.37538111415149067, + "learning_rate": 8.755760368663594e-08, + "loss": 1.140963077545166, + "step": 20 + }, + { + "epoch": 0.004838709677419355, + "grad_norm": 0.39373769301837386, + "learning_rate": 9.216589861751152e-08, + "loss": 1.107339859008789, + "step": 21 + }, + { + "epoch": 0.005069124423963134, + "grad_norm": 0.5053900590341595, + "learning_rate": 9.677419354838709e-08, + "loss": 1.171803593635559, + "step": 22 + }, + { + "epoch": 0.005299539170506912, + "grad_norm": 0.32897537004851696, + "learning_rate": 1.0138248847926267e-07, + "loss": 0.9935251474380493, + "step": 23 + }, + { + "epoch": 0.005529953917050691, + "grad_norm": 0.4355535799950001, + "learning_rate": 1.0599078341013824e-07, + "loss": 1.0870952606201172, + "step": 24 + }, + { + "epoch": 0.00576036866359447, + "grad_norm": 0.5215895570336967, + "learning_rate": 1.1059907834101383e-07, + "loss": 1.1520278453826904, + "step": 25 + }, + { + "epoch": 0.005990783410138249, + "grad_norm": 0.4878994851998504, + "learning_rate": 1.152073732718894e-07, + "loss": 1.3603750467300415, + "step": 26 + }, + { + "epoch": 0.006221198156682027, + "grad_norm": 0.3985371704289713, + "learning_rate": 1.1981566820276498e-07, + "loss": 1.230550765991211, + "step": 27 + }, + { + "epoch": 0.0064516129032258064, + "grad_norm": 0.4105556408349015, + "learning_rate": 1.2442396313364054e-07, + "loss": 1.267604112625122, + "step": 28 + }, + { + "epoch": 0.0066820276497695855, + "grad_norm": 0.3604672745500653, + "learning_rate": 1.2903225806451611e-07, + "loss": 1.344348669052124, + "step": 29 + }, + { + "epoch": 0.0069124423963133645, + "grad_norm": 0.42234881975895605, + "learning_rate": 1.336405529953917e-07, + "loss": 1.2794291973114014, + "step": 30 + }, + { + "epoch": 0.007142857142857143, + "grad_norm": 0.39749887698930225, + "learning_rate": 1.3824884792626728e-07, + "loss": 1.2841103076934814, + "step": 31 + }, + { + "epoch": 0.007373271889400922, + "grad_norm": 0.34204310388035036, + "learning_rate": 1.4285714285714285e-07, + "loss": 1.1505224704742432, + "step": 32 + }, + { + "epoch": 0.007603686635944701, + "grad_norm": 0.36676388907062357, + "learning_rate": 1.4746543778801842e-07, + "loss": 0.9800833463668823, + "step": 33 + }, + { + "epoch": 0.00783410138248848, + "grad_norm": 0.4216809539302965, + "learning_rate": 1.52073732718894e-07, + "loss": 1.3712589740753174, + "step": 34 + }, + { + "epoch": 0.008064516129032258, + "grad_norm": 0.46644559931224167, + "learning_rate": 1.5668202764976955e-07, + "loss": 1.2274689674377441, + "step": 35 + }, + { + "epoch": 0.008294930875576038, + "grad_norm": 0.41359150478695417, + "learning_rate": 1.6129032258064515e-07, + "loss": 1.0673755407333374, + "step": 36 + }, + { + "epoch": 0.008525345622119816, + "grad_norm": 0.534062363030203, + "learning_rate": 1.6589861751152074e-07, + "loss": 1.242164134979248, + "step": 37 + }, + { + "epoch": 0.008755760368663594, + "grad_norm": 0.48756247774131056, + "learning_rate": 1.705069124423963e-07, + "loss": 1.190554141998291, + "step": 38 + }, + { + "epoch": 0.008986175115207374, + "grad_norm": 0.35848282094721656, + "learning_rate": 1.7511520737327188e-07, + "loss": 1.3119773864746094, + "step": 39 + }, + { + "epoch": 0.009216589861751152, + "grad_norm": 0.4466769921356875, + "learning_rate": 1.7972350230414745e-07, + "loss": 1.2532517910003662, + "step": 40 + }, + { + "epoch": 0.00944700460829493, + "grad_norm": 0.4271763580587928, + "learning_rate": 1.8433179723502305e-07, + "loss": 1.307154655456543, + "step": 41 + }, + { + "epoch": 0.00967741935483871, + "grad_norm": 0.432221455567464, + "learning_rate": 1.889400921658986e-07, + "loss": 1.1899281740188599, + "step": 42 + }, + { + "epoch": 0.009907834101382488, + "grad_norm": 0.48501644393966153, + "learning_rate": 1.9354838709677418e-07, + "loss": 1.1928249597549438, + "step": 43 + }, + { + "epoch": 0.010138248847926268, + "grad_norm": 0.35170632131851265, + "learning_rate": 1.9815668202764975e-07, + "loss": 1.1663157939910889, + "step": 44 + }, + { + "epoch": 0.010368663594470046, + "grad_norm": 0.43449129429745276, + "learning_rate": 2.0276497695852535e-07, + "loss": 1.1806118488311768, + "step": 45 + }, + { + "epoch": 0.010599078341013824, + "grad_norm": 0.39933118678172597, + "learning_rate": 2.073732718894009e-07, + "loss": 1.1704952716827393, + "step": 46 + }, + { + "epoch": 0.010829493087557604, + "grad_norm": 0.46071358975984034, + "learning_rate": 2.1198156682027649e-07, + "loss": 1.2124149799346924, + "step": 47 + }, + { + "epoch": 0.011059907834101382, + "grad_norm": 0.325920139351066, + "learning_rate": 2.1658986175115208e-07, + "loss": 1.041813850402832, + "step": 48 + }, + { + "epoch": 0.01129032258064516, + "grad_norm": 0.4189805583015969, + "learning_rate": 2.2119815668202765e-07, + "loss": 1.255402684211731, + "step": 49 + }, + { + "epoch": 0.01152073732718894, + "grad_norm": 0.369986826532368, + "learning_rate": 2.2580645161290322e-07, + "loss": 1.1115221977233887, + "step": 50 + }, + { + "epoch": 0.011751152073732719, + "grad_norm": 0.501835295036206, + "learning_rate": 2.304147465437788e-07, + "loss": 1.4048426151275635, + "step": 51 + }, + { + "epoch": 0.011981566820276499, + "grad_norm": 0.38759638044019523, + "learning_rate": 2.3502304147465438e-07, + "loss": 1.1690936088562012, + "step": 52 + }, + { + "epoch": 0.012211981566820277, + "grad_norm": 0.43771993971927803, + "learning_rate": 2.3963133640552995e-07, + "loss": 1.164888620376587, + "step": 53 + }, + { + "epoch": 0.012442396313364055, + "grad_norm": 0.5047093250847474, + "learning_rate": 2.442396313364055e-07, + "loss": 1.004424810409546, + "step": 54 + }, + { + "epoch": 0.012672811059907835, + "grad_norm": 0.371768250028493, + "learning_rate": 2.488479262672811e-07, + "loss": 0.8810856342315674, + "step": 55 + }, + { + "epoch": 0.012903225806451613, + "grad_norm": 0.41437582347111235, + "learning_rate": 2.534562211981567e-07, + "loss": 1.300262451171875, + "step": 56 + }, + { + "epoch": 0.013133640552995391, + "grad_norm": 0.44923919860912964, + "learning_rate": 2.5806451612903223e-07, + "loss": 1.3624285459518433, + "step": 57 + }, + { + "epoch": 0.013364055299539171, + "grad_norm": 0.37916325568511644, + "learning_rate": 2.6267281105990777e-07, + "loss": 1.2133375406265259, + "step": 58 + }, + { + "epoch": 0.013594470046082949, + "grad_norm": 0.3665676434937369, + "learning_rate": 2.672811059907834e-07, + "loss": 1.2203283309936523, + "step": 59 + }, + { + "epoch": 0.013824884792626729, + "grad_norm": 0.4314731168039537, + "learning_rate": 2.7188940092165896e-07, + "loss": 1.291412353515625, + "step": 60 + }, + { + "epoch": 0.014055299539170507, + "grad_norm": 0.46787898249820037, + "learning_rate": 2.7649769585253456e-07, + "loss": 1.1596577167510986, + "step": 61 + }, + { + "epoch": 0.014285714285714285, + "grad_norm": 0.34850075759056304, + "learning_rate": 2.8110599078341015e-07, + "loss": 0.9789823889732361, + "step": 62 + }, + { + "epoch": 0.014516129032258065, + "grad_norm": 0.46810420323672, + "learning_rate": 2.857142857142857e-07, + "loss": 1.220383882522583, + "step": 63 + }, + { + "epoch": 0.014746543778801843, + "grad_norm": 0.36577992953429955, + "learning_rate": 2.903225806451613e-07, + "loss": 1.0961871147155762, + "step": 64 + }, + { + "epoch": 0.014976958525345621, + "grad_norm": 0.4155727286496237, + "learning_rate": 2.9493087557603683e-07, + "loss": 1.2281936407089233, + "step": 65 + }, + { + "epoch": 0.015207373271889401, + "grad_norm": 0.48770399467414544, + "learning_rate": 2.9953917050691243e-07, + "loss": 1.279728889465332, + "step": 66 + }, + { + "epoch": 0.01543778801843318, + "grad_norm": 0.3697109399388579, + "learning_rate": 3.04147465437788e-07, + "loss": 1.0932798385620117, + "step": 67 + }, + { + "epoch": 0.01566820276497696, + "grad_norm": 0.4768828309013543, + "learning_rate": 3.0875576036866356e-07, + "loss": 1.1612955331802368, + "step": 68 + }, + { + "epoch": 0.015898617511520736, + "grad_norm": 0.335260500319883, + "learning_rate": 3.133640552995391e-07, + "loss": 1.193152666091919, + "step": 69 + }, + { + "epoch": 0.016129032258064516, + "grad_norm": 0.3754577001974335, + "learning_rate": 3.1797235023041476e-07, + "loss": 1.3303695917129517, + "step": 70 + }, + { + "epoch": 0.016359447004608296, + "grad_norm": 0.5384978005623245, + "learning_rate": 3.225806451612903e-07, + "loss": 1.3735731840133667, + "step": 71 + }, + { + "epoch": 0.016589861751152075, + "grad_norm": 0.44147085813841874, + "learning_rate": 3.271889400921659e-07, + "loss": 1.162925124168396, + "step": 72 + }, + { + "epoch": 0.016820276497695852, + "grad_norm": 0.46260262466297236, + "learning_rate": 3.317972350230415e-07, + "loss": 1.3879203796386719, + "step": 73 + }, + { + "epoch": 0.017050691244239632, + "grad_norm": 0.33864035083037825, + "learning_rate": 3.3640552995391703e-07, + "loss": 1.2721638679504395, + "step": 74 + }, + { + "epoch": 0.01728110599078341, + "grad_norm": 0.5797449954735189, + "learning_rate": 3.410138248847926e-07, + "loss": 1.3997783660888672, + "step": 75 + }, + { + "epoch": 0.017511520737327188, + "grad_norm": 0.3824734589731608, + "learning_rate": 3.4562211981566817e-07, + "loss": 1.1099059581756592, + "step": 76 + }, + { + "epoch": 0.017741935483870968, + "grad_norm": 0.6286343528066216, + "learning_rate": 3.5023041474654376e-07, + "loss": 1.341759204864502, + "step": 77 + }, + { + "epoch": 0.017972350230414748, + "grad_norm": 0.41058458963409694, + "learning_rate": 3.5483870967741936e-07, + "loss": 1.343479871749878, + "step": 78 + }, + { + "epoch": 0.018202764976958524, + "grad_norm": 0.41653629518149576, + "learning_rate": 3.594470046082949e-07, + "loss": 1.2225772142410278, + "step": 79 + }, + { + "epoch": 0.018433179723502304, + "grad_norm": 0.37871730557010347, + "learning_rate": 3.6405529953917044e-07, + "loss": 1.1934573650360107, + "step": 80 + }, + { + "epoch": 0.018663594470046084, + "grad_norm": 0.36930989407616927, + "learning_rate": 3.686635944700461e-07, + "loss": 1.099440336227417, + "step": 81 + }, + { + "epoch": 0.01889400921658986, + "grad_norm": 0.4445938548359885, + "learning_rate": 3.7327188940092163e-07, + "loss": 1.0864269733428955, + "step": 82 + }, + { + "epoch": 0.01912442396313364, + "grad_norm": 0.4183127094774659, + "learning_rate": 3.778801843317972e-07, + "loss": 1.0706703662872314, + "step": 83 + }, + { + "epoch": 0.01935483870967742, + "grad_norm": 0.3377183372891763, + "learning_rate": 3.824884792626728e-07, + "loss": 1.1675662994384766, + "step": 84 + }, + { + "epoch": 0.019585253456221197, + "grad_norm": 0.4219766455348787, + "learning_rate": 3.8709677419354837e-07, + "loss": 1.3294553756713867, + "step": 85 + }, + { + "epoch": 0.019815668202764977, + "grad_norm": 0.39357768126078463, + "learning_rate": 3.9170506912442396e-07, + "loss": 1.050878882408142, + "step": 86 + }, + { + "epoch": 0.020046082949308756, + "grad_norm": 0.5263429396452582, + "learning_rate": 3.963133640552995e-07, + "loss": 1.3243739604949951, + "step": 87 + }, + { + "epoch": 0.020276497695852536, + "grad_norm": 0.4373425676890139, + "learning_rate": 4.009216589861751e-07, + "loss": 1.1350429058074951, + "step": 88 + }, + { + "epoch": 0.020506912442396313, + "grad_norm": 0.39555461421299365, + "learning_rate": 4.055299539170507e-07, + "loss": 1.24526047706604, + "step": 89 + }, + { + "epoch": 0.020737327188940093, + "grad_norm": 0.5372699223271491, + "learning_rate": 4.1013824884792624e-07, + "loss": 1.3459908962249756, + "step": 90 + }, + { + "epoch": 0.020967741935483872, + "grad_norm": 0.45711998906450413, + "learning_rate": 4.147465437788018e-07, + "loss": 1.2129223346710205, + "step": 91 + }, + { + "epoch": 0.02119815668202765, + "grad_norm": 0.396171288478396, + "learning_rate": 4.1935483870967743e-07, + "loss": 1.0522969961166382, + "step": 92 + }, + { + "epoch": 0.02142857142857143, + "grad_norm": 0.4102245507283394, + "learning_rate": 4.2396313364055297e-07, + "loss": 1.3128937482833862, + "step": 93 + }, + { + "epoch": 0.02165898617511521, + "grad_norm": 0.4498995421630644, + "learning_rate": 4.285714285714285e-07, + "loss": 1.3582855463027954, + "step": 94 + }, + { + "epoch": 0.021889400921658985, + "grad_norm": 0.401280081593378, + "learning_rate": 4.3317972350230416e-07, + "loss": 1.3959028720855713, + "step": 95 + }, + { + "epoch": 0.022119815668202765, + "grad_norm": 0.34811166324547105, + "learning_rate": 4.377880184331797e-07, + "loss": 1.149501085281372, + "step": 96 + }, + { + "epoch": 0.022350230414746545, + "grad_norm": 0.48133121679013907, + "learning_rate": 4.423963133640553e-07, + "loss": 1.024135708808899, + "step": 97 + }, + { + "epoch": 0.02258064516129032, + "grad_norm": 0.42298775317954185, + "learning_rate": 4.4700460829493084e-07, + "loss": 0.9255483150482178, + "step": 98 + }, + { + "epoch": 0.0228110599078341, + "grad_norm": 0.4429779269301727, + "learning_rate": 4.5161290322580644e-07, + "loss": 1.1694722175598145, + "step": 99 + }, + { + "epoch": 0.02304147465437788, + "grad_norm": 0.5257102588195529, + "learning_rate": 4.5622119815668203e-07, + "loss": 1.1588457822799683, + "step": 100 + }, + { + "epoch": 0.023271889400921657, + "grad_norm": 0.37478821324150746, + "learning_rate": 4.608294930875576e-07, + "loss": 1.172672986984253, + "step": 101 + }, + { + "epoch": 0.023502304147465437, + "grad_norm": 0.5416446977134604, + "learning_rate": 4.654377880184331e-07, + "loss": 1.092405915260315, + "step": 102 + }, + { + "epoch": 0.023732718894009217, + "grad_norm": 0.40304171727239163, + "learning_rate": 4.7004608294930877e-07, + "loss": 1.11540687084198, + "step": 103 + }, + { + "epoch": 0.023963133640552997, + "grad_norm": 0.46185115643683655, + "learning_rate": 4.746543778801843e-07, + "loss": 1.1380189657211304, + "step": 104 + }, + { + "epoch": 0.024193548387096774, + "grad_norm": 0.4705857339336588, + "learning_rate": 4.792626728110599e-07, + "loss": 1.1031086444854736, + "step": 105 + }, + { + "epoch": 0.024423963133640553, + "grad_norm": 0.38094574356569405, + "learning_rate": 4.838709677419355e-07, + "loss": 1.1988024711608887, + "step": 106 + }, + { + "epoch": 0.024654377880184333, + "grad_norm": 0.48794686062473364, + "learning_rate": 4.88479262672811e-07, + "loss": 1.0814614295959473, + "step": 107 + }, + { + "epoch": 0.02488479262672811, + "grad_norm": 0.41304010922593737, + "learning_rate": 4.930875576036866e-07, + "loss": 1.0541695356369019, + "step": 108 + }, + { + "epoch": 0.02511520737327189, + "grad_norm": 0.4262047073398665, + "learning_rate": 4.976958525345622e-07, + "loss": 1.2281692028045654, + "step": 109 + }, + { + "epoch": 0.02534562211981567, + "grad_norm": 0.4617413170072456, + "learning_rate": 5.023041474654378e-07, + "loss": 1.2542369365692139, + "step": 110 + }, + { + "epoch": 0.025576036866359446, + "grad_norm": 0.46571699511286535, + "learning_rate": 5.069124423963134e-07, + "loss": 1.36039137840271, + "step": 111 + }, + { + "epoch": 0.025806451612903226, + "grad_norm": 0.3893860976585314, + "learning_rate": 5.11520737327189e-07, + "loss": 1.1092976331710815, + "step": 112 + }, + { + "epoch": 0.026036866359447006, + "grad_norm": 0.4636216593448083, + "learning_rate": 5.161290322580645e-07, + "loss": 1.0634076595306396, + "step": 113 + }, + { + "epoch": 0.026267281105990782, + "grad_norm": 0.3440530135190564, + "learning_rate": 5.2073732718894e-07, + "loss": 1.0024809837341309, + "step": 114 + }, + { + "epoch": 0.026497695852534562, + "grad_norm": 0.4346835070660911, + "learning_rate": 5.253456221198155e-07, + "loss": 1.1691724061965942, + "step": 115 + }, + { + "epoch": 0.026728110599078342, + "grad_norm": 0.46992230717269323, + "learning_rate": 5.299539170506912e-07, + "loss": 1.2053219079971313, + "step": 116 + }, + { + "epoch": 0.02695852534562212, + "grad_norm": 0.3668719861525143, + "learning_rate": 5.345622119815668e-07, + "loss": 1.119420051574707, + "step": 117 + }, + { + "epoch": 0.027188940092165898, + "grad_norm": 0.44063509410116297, + "learning_rate": 5.391705069124423e-07, + "loss": 1.1640167236328125, + "step": 118 + }, + { + "epoch": 0.027419354838709678, + "grad_norm": 0.41158620514350025, + "learning_rate": 5.437788018433179e-07, + "loss": 1.180116057395935, + "step": 119 + }, + { + "epoch": 0.027649769585253458, + "grad_norm": 0.4684655855415561, + "learning_rate": 5.483870967741935e-07, + "loss": 1.0726159811019897, + "step": 120 + }, + { + "epoch": 0.027880184331797234, + "grad_norm": 0.44443528947779826, + "learning_rate": 5.529953917050691e-07, + "loss": 1.03219473361969, + "step": 121 + }, + { + "epoch": 0.028110599078341014, + "grad_norm": 0.4615930748718386, + "learning_rate": 5.576036866359447e-07, + "loss": 1.1545735597610474, + "step": 122 + }, + { + "epoch": 0.028341013824884794, + "grad_norm": 0.4154044637047318, + "learning_rate": 5.622119815668203e-07, + "loss": 1.2409746646881104, + "step": 123 + }, + { + "epoch": 0.02857142857142857, + "grad_norm": 0.48642203067509454, + "learning_rate": 5.668202764976958e-07, + "loss": 1.2717409133911133, + "step": 124 + }, + { + "epoch": 0.02880184331797235, + "grad_norm": 0.5633308049530943, + "learning_rate": 5.714285714285714e-07, + "loss": 1.523846983909607, + "step": 125 + }, + { + "epoch": 0.02903225806451613, + "grad_norm": 0.47068700261388136, + "learning_rate": 5.760368663594469e-07, + "loss": 1.3386890888214111, + "step": 126 + }, + { + "epoch": 0.029262672811059907, + "grad_norm": 0.5199142981609907, + "learning_rate": 5.806451612903226e-07, + "loss": 1.3080404996871948, + "step": 127 + }, + { + "epoch": 0.029493087557603687, + "grad_norm": 0.530224330517059, + "learning_rate": 5.852534562211982e-07, + "loss": 1.3194537162780762, + "step": 128 + }, + { + "epoch": 0.029723502304147466, + "grad_norm": 0.49119251759787413, + "learning_rate": 5.898617511520737e-07, + "loss": 1.0546228885650635, + "step": 129 + }, + { + "epoch": 0.029953917050691243, + "grad_norm": 0.44238233872112126, + "learning_rate": 5.944700460829493e-07, + "loss": 1.3160395622253418, + "step": 130 + }, + { + "epoch": 0.030184331797235023, + "grad_norm": 0.5551864793339897, + "learning_rate": 5.990783410138249e-07, + "loss": 1.3497555255889893, + "step": 131 + }, + { + "epoch": 0.030414746543778803, + "grad_norm": 0.41383181378393813, + "learning_rate": 6.036866359447004e-07, + "loss": 1.0863350629806519, + "step": 132 + }, + { + "epoch": 0.03064516129032258, + "grad_norm": 0.4913368059485873, + "learning_rate": 6.08294930875576e-07, + "loss": 1.1640913486480713, + "step": 133 + }, + { + "epoch": 0.03087557603686636, + "grad_norm": 0.4309615007654084, + "learning_rate": 6.129032258064516e-07, + "loss": 1.398510217666626, + "step": 134 + }, + { + "epoch": 0.03110599078341014, + "grad_norm": 0.46249423735581563, + "learning_rate": 6.175115207373271e-07, + "loss": 1.3015594482421875, + "step": 135 + }, + { + "epoch": 0.03133640552995392, + "grad_norm": 0.5511951371835903, + "learning_rate": 6.221198156682027e-07, + "loss": 1.2786016464233398, + "step": 136 + }, + { + "epoch": 0.031566820276497695, + "grad_norm": 0.35056112177409643, + "learning_rate": 6.267281105990782e-07, + "loss": 1.0863161087036133, + "step": 137 + }, + { + "epoch": 0.03179723502304147, + "grad_norm": 0.49469780540978775, + "learning_rate": 6.313364055299539e-07, + "loss": 1.1590030193328857, + "step": 138 + }, + { + "epoch": 0.032027649769585255, + "grad_norm": 0.4498097850802204, + "learning_rate": 6.359447004608295e-07, + "loss": 1.2473185062408447, + "step": 139 + }, + { + "epoch": 0.03225806451612903, + "grad_norm": 0.46996183926649465, + "learning_rate": 6.40552995391705e-07, + "loss": 1.1982496976852417, + "step": 140 + }, + { + "epoch": 0.03248847926267281, + "grad_norm": 0.39627654459475076, + "learning_rate": 6.451612903225806e-07, + "loss": 1.078690528869629, + "step": 141 + }, + { + "epoch": 0.03271889400921659, + "grad_norm": 0.4831308537053794, + "learning_rate": 6.497695852534562e-07, + "loss": 1.1540311574935913, + "step": 142 + }, + { + "epoch": 0.03294930875576037, + "grad_norm": 0.4510531995801552, + "learning_rate": 6.543778801843318e-07, + "loss": 1.319035530090332, + "step": 143 + }, + { + "epoch": 0.03317972350230415, + "grad_norm": 0.46683155201608206, + "learning_rate": 6.589861751152074e-07, + "loss": 1.199448585510254, + "step": 144 + }, + { + "epoch": 0.03341013824884793, + "grad_norm": 0.526397133846452, + "learning_rate": 6.63594470046083e-07, + "loss": 1.212646484375, + "step": 145 + }, + { + "epoch": 0.033640552995391704, + "grad_norm": 0.6339080221663279, + "learning_rate": 6.682027649769585e-07, + "loss": 1.2833064794540405, + "step": 146 + }, + { + "epoch": 0.03387096774193549, + "grad_norm": 0.6111094782416204, + "learning_rate": 6.728110599078341e-07, + "loss": 1.2852118015289307, + "step": 147 + }, + { + "epoch": 0.034101382488479264, + "grad_norm": 0.36790627555446376, + "learning_rate": 6.774193548387096e-07, + "loss": 1.0287699699401855, + "step": 148 + }, + { + "epoch": 0.03433179723502304, + "grad_norm": 0.4705970251054534, + "learning_rate": 6.820276497695853e-07, + "loss": 1.2580914497375488, + "step": 149 + }, + { + "epoch": 0.03456221198156682, + "grad_norm": 0.4446865658925291, + "learning_rate": 6.866359447004608e-07, + "loss": 1.0557801723480225, + "step": 150 + }, + { + "epoch": 0.0347926267281106, + "grad_norm": 0.4962737867323335, + "learning_rate": 6.912442396313363e-07, + "loss": 1.1820557117462158, + "step": 151 + }, + { + "epoch": 0.035023041474654376, + "grad_norm": 0.4496579463689646, + "learning_rate": 6.958525345622119e-07, + "loss": 1.2777981758117676, + "step": 152 + }, + { + "epoch": 0.03525345622119816, + "grad_norm": 0.4664315599937052, + "learning_rate": 7.004608294930875e-07, + "loss": 1.1465356349945068, + "step": 153 + }, + { + "epoch": 0.035483870967741936, + "grad_norm": 0.5245233624695497, + "learning_rate": 7.05069124423963e-07, + "loss": 1.3553744554519653, + "step": 154 + }, + { + "epoch": 0.03571428571428571, + "grad_norm": 0.5474513239817841, + "learning_rate": 7.096774193548387e-07, + "loss": 1.176223874092102, + "step": 155 + }, + { + "epoch": 0.035944700460829496, + "grad_norm": 0.4022708922904972, + "learning_rate": 7.142857142857143e-07, + "loss": 1.1771761178970337, + "step": 156 + }, + { + "epoch": 0.03617511520737327, + "grad_norm": 0.5000685120319052, + "learning_rate": 7.188940092165898e-07, + "loss": 1.1598860025405884, + "step": 157 + }, + { + "epoch": 0.03640552995391705, + "grad_norm": 0.4955460688514832, + "learning_rate": 7.235023041474654e-07, + "loss": 1.0689195394515991, + "step": 158 + }, + { + "epoch": 0.03663594470046083, + "grad_norm": 0.5324202700222229, + "learning_rate": 7.281105990783409e-07, + "loss": 1.1444990634918213, + "step": 159 + }, + { + "epoch": 0.03686635944700461, + "grad_norm": 0.441885052912425, + "learning_rate": 7.327188940092166e-07, + "loss": 1.2261321544647217, + "step": 160 + }, + { + "epoch": 0.037096774193548385, + "grad_norm": 0.47946473640002796, + "learning_rate": 7.373271889400922e-07, + "loss": 0.9325876235961914, + "step": 161 + }, + { + "epoch": 0.03732718894009217, + "grad_norm": 0.46688477365444836, + "learning_rate": 7.419354838709677e-07, + "loss": 1.071167230606079, + "step": 162 + }, + { + "epoch": 0.037557603686635944, + "grad_norm": 0.5188018198616766, + "learning_rate": 7.465437788018433e-07, + "loss": 1.1856298446655273, + "step": 163 + }, + { + "epoch": 0.03778801843317972, + "grad_norm": 0.5279511073474723, + "learning_rate": 7.511520737327189e-07, + "loss": 1.13883376121521, + "step": 164 + }, + { + "epoch": 0.038018433179723504, + "grad_norm": 0.4671725091927055, + "learning_rate": 7.557603686635944e-07, + "loss": 1.2896685600280762, + "step": 165 + }, + { + "epoch": 0.03824884792626728, + "grad_norm": 0.6286776240106037, + "learning_rate": 7.603686635944701e-07, + "loss": 1.3122754096984863, + "step": 166 + }, + { + "epoch": 0.03847926267281106, + "grad_norm": 0.5120060171404104, + "learning_rate": 7.649769585253457e-07, + "loss": 1.165675163269043, + "step": 167 + }, + { + "epoch": 0.03870967741935484, + "grad_norm": 0.5132036652169082, + "learning_rate": 7.695852534562211e-07, + "loss": 1.1348214149475098, + "step": 168 + }, + { + "epoch": 0.03894009216589862, + "grad_norm": 0.5816469452243797, + "learning_rate": 7.741935483870967e-07, + "loss": 1.287818431854248, + "step": 169 + }, + { + "epoch": 0.03917050691244239, + "grad_norm": 0.4886112893618036, + "learning_rate": 7.788018433179722e-07, + "loss": 1.0723031759262085, + "step": 170 + }, + { + "epoch": 0.03940092165898618, + "grad_norm": 0.5572220637370465, + "learning_rate": 7.834101382488479e-07, + "loss": 1.29054594039917, + "step": 171 + }, + { + "epoch": 0.03963133640552995, + "grad_norm": 0.4996602061858042, + "learning_rate": 7.880184331797235e-07, + "loss": 1.201147198677063, + "step": 172 + }, + { + "epoch": 0.03986175115207373, + "grad_norm": 0.47488604971715725, + "learning_rate": 7.92626728110599e-07, + "loss": 1.2529574632644653, + "step": 173 + }, + { + "epoch": 0.04009216589861751, + "grad_norm": 0.5420947446150967, + "learning_rate": 7.972350230414746e-07, + "loss": 1.3255105018615723, + "step": 174 + }, + { + "epoch": 0.04032258064516129, + "grad_norm": 0.5367164884336, + "learning_rate": 8.018433179723502e-07, + "loss": 1.3167433738708496, + "step": 175 + }, + { + "epoch": 0.04055299539170507, + "grad_norm": 0.5124027812324866, + "learning_rate": 8.064516129032257e-07, + "loss": 1.4780502319335938, + "step": 176 + }, + { + "epoch": 0.04078341013824885, + "grad_norm": 0.49049200777499574, + "learning_rate": 8.110599078341014e-07, + "loss": 1.3096996545791626, + "step": 177 + }, + { + "epoch": 0.041013824884792625, + "grad_norm": 0.5684690759624818, + "learning_rate": 8.15668202764977e-07, + "loss": 1.3124895095825195, + "step": 178 + }, + { + "epoch": 0.04124423963133641, + "grad_norm": 0.5746940747619091, + "learning_rate": 8.202764976958525e-07, + "loss": 1.2589681148529053, + "step": 179 + }, + { + "epoch": 0.041474654377880185, + "grad_norm": 0.5351550863930432, + "learning_rate": 8.248847926267281e-07, + "loss": 1.0576659440994263, + "step": 180 + }, + { + "epoch": 0.04170506912442396, + "grad_norm": 0.5804930108989373, + "learning_rate": 8.294930875576036e-07, + "loss": 1.2647404670715332, + "step": 181 + }, + { + "epoch": 0.041935483870967745, + "grad_norm": 0.5527713530674592, + "learning_rate": 8.341013824884793e-07, + "loss": 1.072542428970337, + "step": 182 + }, + { + "epoch": 0.04216589861751152, + "grad_norm": 0.636913740412271, + "learning_rate": 8.387096774193549e-07, + "loss": 1.2417643070220947, + "step": 183 + }, + { + "epoch": 0.0423963133640553, + "grad_norm": 0.4636179655744076, + "learning_rate": 8.433179723502303e-07, + "loss": 1.2490241527557373, + "step": 184 + }, + { + "epoch": 0.04262672811059908, + "grad_norm": 0.5714553493227277, + "learning_rate": 8.479262672811059e-07, + "loss": 1.1169328689575195, + "step": 185 + }, + { + "epoch": 0.04285714285714286, + "grad_norm": 0.5893436962226742, + "learning_rate": 8.525345622119815e-07, + "loss": 1.1799774169921875, + "step": 186 + }, + { + "epoch": 0.043087557603686634, + "grad_norm": 0.4840759402042485, + "learning_rate": 8.57142857142857e-07, + "loss": 0.9655753374099731, + "step": 187 + }, + { + "epoch": 0.04331797235023042, + "grad_norm": 0.5473512318665162, + "learning_rate": 8.617511520737327e-07, + "loss": 1.2863562107086182, + "step": 188 + }, + { + "epoch": 0.043548387096774194, + "grad_norm": 0.5971573505450626, + "learning_rate": 8.663594470046083e-07, + "loss": 1.056877613067627, + "step": 189 + }, + { + "epoch": 0.04377880184331797, + "grad_norm": 0.5903656134268881, + "learning_rate": 8.709677419354838e-07, + "loss": 1.2128019332885742, + "step": 190 + }, + { + "epoch": 0.044009216589861753, + "grad_norm": 0.5042165136835149, + "learning_rate": 8.755760368663594e-07, + "loss": 1.1397441625595093, + "step": 191 + }, + { + "epoch": 0.04423963133640553, + "grad_norm": 0.5007324461761941, + "learning_rate": 8.801843317972349e-07, + "loss": 1.062232255935669, + "step": 192 + }, + { + "epoch": 0.044470046082949306, + "grad_norm": 0.5077694656116347, + "learning_rate": 8.847926267281106e-07, + "loss": 1.0102736949920654, + "step": 193 + }, + { + "epoch": 0.04470046082949309, + "grad_norm": 0.5039275409209952, + "learning_rate": 8.894009216589862e-07, + "loss": 1.155517339706421, + "step": 194 + }, + { + "epoch": 0.044930875576036866, + "grad_norm": 0.4568536555143312, + "learning_rate": 8.940092165898617e-07, + "loss": 1.042372703552246, + "step": 195 + }, + { + "epoch": 0.04516129032258064, + "grad_norm": 0.6118356615587064, + "learning_rate": 8.986175115207373e-07, + "loss": 1.1158320903778076, + "step": 196 + }, + { + "epoch": 0.045391705069124426, + "grad_norm": 0.6547758969058546, + "learning_rate": 9.032258064516129e-07, + "loss": 1.4693050384521484, + "step": 197 + }, + { + "epoch": 0.0456221198156682, + "grad_norm": 0.5189200191294998, + "learning_rate": 9.078341013824884e-07, + "loss": 1.0990574359893799, + "step": 198 + }, + { + "epoch": 0.04585253456221198, + "grad_norm": 0.5123720508165549, + "learning_rate": 9.124423963133641e-07, + "loss": 1.0259861946105957, + "step": 199 + }, + { + "epoch": 0.04608294930875576, + "grad_norm": 0.4638504791285932, + "learning_rate": 9.170506912442397e-07, + "loss": 1.2708477973937988, + "step": 200 + }, + { + "epoch": 0.04631336405529954, + "grad_norm": 0.426472351706666, + "learning_rate": 9.216589861751152e-07, + "loss": 1.052978754043579, + "step": 201 + }, + { + "epoch": 0.046543778801843315, + "grad_norm": 0.5548008737632977, + "learning_rate": 9.262672811059907e-07, + "loss": 1.3405938148498535, + "step": 202 + }, + { + "epoch": 0.0467741935483871, + "grad_norm": 0.4311530218247671, + "learning_rate": 9.308755760368662e-07, + "loss": 0.9464558362960815, + "step": 203 + }, + { + "epoch": 0.047004608294930875, + "grad_norm": 0.6377195135282403, + "learning_rate": 9.354838709677418e-07, + "loss": 1.3019077777862549, + "step": 204 + }, + { + "epoch": 0.04723502304147465, + "grad_norm": 0.6029329005096047, + "learning_rate": 9.400921658986175e-07, + "loss": 1.146841049194336, + "step": 205 + }, + { + "epoch": 0.047465437788018434, + "grad_norm": 0.6136536598800337, + "learning_rate": 9.44700460829493e-07, + "loss": 1.106084942817688, + "step": 206 + }, + { + "epoch": 0.04769585253456221, + "grad_norm": 0.6661299934206126, + "learning_rate": 9.493087557603686e-07, + "loss": 1.2930629253387451, + "step": 207 + }, + { + "epoch": 0.047926267281105994, + "grad_norm": 0.5555271013101563, + "learning_rate": 9.539170506912442e-07, + "loss": 1.1637842655181885, + "step": 208 + }, + { + "epoch": 0.04815668202764977, + "grad_norm": 0.444081897230925, + "learning_rate": 9.585253456221198e-07, + "loss": 1.1753308773040771, + "step": 209 + }, + { + "epoch": 0.04838709677419355, + "grad_norm": 0.5362299776231612, + "learning_rate": 9.631336405529954e-07, + "loss": 1.2304046154022217, + "step": 210 + }, + { + "epoch": 0.04861751152073733, + "grad_norm": 0.6898819231347578, + "learning_rate": 9.67741935483871e-07, + "loss": 1.4326789379119873, + "step": 211 + }, + { + "epoch": 0.04884792626728111, + "grad_norm": 0.614044501232848, + "learning_rate": 9.723502304147466e-07, + "loss": 1.0759861469268799, + "step": 212 + }, + { + "epoch": 0.04907834101382488, + "grad_norm": 0.5971609176488232, + "learning_rate": 9.76958525345622e-07, + "loss": 1.1514811515808105, + "step": 213 + }, + { + "epoch": 0.04930875576036867, + "grad_norm": 0.49252816443356506, + "learning_rate": 9.815668202764976e-07, + "loss": 1.1618578433990479, + "step": 214 + }, + { + "epoch": 0.04953917050691244, + "grad_norm": 0.5677669382006955, + "learning_rate": 9.861751152073732e-07, + "loss": 1.0321345329284668, + "step": 215 + }, + { + "epoch": 0.04976958525345622, + "grad_norm": 0.4551655972629908, + "learning_rate": 9.907834101382488e-07, + "loss": 1.0391438007354736, + "step": 216 + }, + { + "epoch": 0.05, + "grad_norm": 0.6188957189455181, + "learning_rate": 9.953917050691244e-07, + "loss": 1.080418586730957, + "step": 217 + }, + { + "epoch": 0.05023041474654378, + "grad_norm": 0.6531841586974683, + "learning_rate": 1e-06, + "loss": 1.2095223665237427, + "step": 218 + }, + { + "epoch": 0.050460829493087556, + "grad_norm": 0.5036313537560552, + "learning_rate": 1.0046082949308756e-06, + "loss": 1.1144485473632812, + "step": 219 + }, + { + "epoch": 0.05069124423963134, + "grad_norm": 0.6466646674884302, + "learning_rate": 1.0092165898617511e-06, + "loss": 1.2560818195343018, + "step": 220 + }, + { + "epoch": 0.050921658986175115, + "grad_norm": 0.586777516357483, + "learning_rate": 1.0138248847926267e-06, + "loss": 1.1043426990509033, + "step": 221 + }, + { + "epoch": 0.05115207373271889, + "grad_norm": 0.41448570454396455, + "learning_rate": 1.0184331797235021e-06, + "loss": 1.0725831985473633, + "step": 222 + }, + { + "epoch": 0.051382488479262675, + "grad_norm": 0.5713867853647446, + "learning_rate": 1.023041474654378e-06, + "loss": 0.9764004349708557, + "step": 223 + }, + { + "epoch": 0.05161290322580645, + "grad_norm": 0.6662412690615445, + "learning_rate": 1.0276497695852535e-06, + "loss": 1.2172776460647583, + "step": 224 + }, + { + "epoch": 0.05184331797235023, + "grad_norm": 0.610800258000843, + "learning_rate": 1.032258064516129e-06, + "loss": 1.1065070629119873, + "step": 225 + }, + { + "epoch": 0.05207373271889401, + "grad_norm": 0.5057724484519791, + "learning_rate": 1.0368663594470047e-06, + "loss": 1.0840628147125244, + "step": 226 + }, + { + "epoch": 0.05230414746543779, + "grad_norm": 0.5250793281243177, + "learning_rate": 1.04147465437788e-06, + "loss": 1.109276294708252, + "step": 227 + }, + { + "epoch": 0.052534562211981564, + "grad_norm": 0.7348582040933043, + "learning_rate": 1.0460829493087557e-06, + "loss": 1.186352252960205, + "step": 228 + }, + { + "epoch": 0.05276497695852535, + "grad_norm": 0.48569306871313883, + "learning_rate": 1.050691244239631e-06, + "loss": 1.1605256795883179, + "step": 229 + }, + { + "epoch": 0.052995391705069124, + "grad_norm": 0.6312799860168967, + "learning_rate": 1.0552995391705069e-06, + "loss": 1.0269646644592285, + "step": 230 + }, + { + "epoch": 0.0532258064516129, + "grad_norm": 0.6446173917231129, + "learning_rate": 1.0599078341013825e-06, + "loss": 0.9595874547958374, + "step": 231 + }, + { + "epoch": 0.053456221198156684, + "grad_norm": 0.6010998567907583, + "learning_rate": 1.0645161290322579e-06, + "loss": 1.1606154441833496, + "step": 232 + }, + { + "epoch": 0.05368663594470046, + "grad_norm": 0.6379425251609956, + "learning_rate": 1.0691244239631337e-06, + "loss": 0.9920428991317749, + "step": 233 + }, + { + "epoch": 0.05391705069124424, + "grad_norm": 0.6346840342097714, + "learning_rate": 1.073732718894009e-06, + "loss": 1.2124650478363037, + "step": 234 + }, + { + "epoch": 0.05414746543778802, + "grad_norm": 0.5761223431136224, + "learning_rate": 1.0783410138248847e-06, + "loss": 1.2237420082092285, + "step": 235 + }, + { + "epoch": 0.054377880184331796, + "grad_norm": 0.5178799666370111, + "learning_rate": 1.0829493087557605e-06, + "loss": 1.1484715938568115, + "step": 236 + }, + { + "epoch": 0.05460829493087557, + "grad_norm": 0.5910590598999479, + "learning_rate": 1.0875576036866358e-06, + "loss": 1.2143291234970093, + "step": 237 + }, + { + "epoch": 0.054838709677419356, + "grad_norm": 0.568116947952991, + "learning_rate": 1.0921658986175114e-06, + "loss": 1.1995420455932617, + "step": 238 + }, + { + "epoch": 0.05506912442396313, + "grad_norm": 0.6128333972066793, + "learning_rate": 1.096774193548387e-06, + "loss": 1.2577292919158936, + "step": 239 + }, + { + "epoch": 0.055299539170506916, + "grad_norm": 0.6177738975799152, + "learning_rate": 1.1013824884792626e-06, + "loss": 1.2170629501342773, + "step": 240 + }, + { + "epoch": 0.05552995391705069, + "grad_norm": 0.3580107479174479, + "learning_rate": 1.1059907834101382e-06, + "loss": 0.8318669199943542, + "step": 241 + }, + { + "epoch": 0.05576036866359447, + "grad_norm": 0.4976235536822315, + "learning_rate": 1.1105990783410138e-06, + "loss": 1.0760166645050049, + "step": 242 + }, + { + "epoch": 0.05599078341013825, + "grad_norm": 0.7197455436310494, + "learning_rate": 1.1152073732718894e-06, + "loss": 1.2437031269073486, + "step": 243 + }, + { + "epoch": 0.05622119815668203, + "grad_norm": 0.5957655407019126, + "learning_rate": 1.1198156682027648e-06, + "loss": 1.1680852174758911, + "step": 244 + }, + { + "epoch": 0.056451612903225805, + "grad_norm": 0.6708075502500678, + "learning_rate": 1.1244239631336406e-06, + "loss": 1.051478385925293, + "step": 245 + }, + { + "epoch": 0.05668202764976959, + "grad_norm": 0.547285271256248, + "learning_rate": 1.1290322580645162e-06, + "loss": 1.1433100700378418, + "step": 246 + }, + { + "epoch": 0.056912442396313365, + "grad_norm": 0.6428413238154085, + "learning_rate": 1.1336405529953916e-06, + "loss": 0.9521546363830566, + "step": 247 + }, + { + "epoch": 0.05714285714285714, + "grad_norm": 0.6790518899839243, + "learning_rate": 1.1382488479262674e-06, + "loss": 1.226189136505127, + "step": 248 + }, + { + "epoch": 0.057373271889400924, + "grad_norm": 0.7178538920010674, + "learning_rate": 1.1428571428571428e-06, + "loss": 1.108027696609497, + "step": 249 + }, + { + "epoch": 0.0576036866359447, + "grad_norm": 0.4608432366288286, + "learning_rate": 1.1474654377880184e-06, + "loss": 1.042288064956665, + "step": 250 + }, + { + "epoch": 0.05783410138248848, + "grad_norm": 0.8171244559521852, + "learning_rate": 1.1520737327188938e-06, + "loss": 1.193603754043579, + "step": 251 + }, + { + "epoch": 0.05806451612903226, + "grad_norm": 0.6766522772283506, + "learning_rate": 1.1566820276497696e-06, + "loss": 1.193584680557251, + "step": 252 + }, + { + "epoch": 0.05829493087557604, + "grad_norm": 0.5714710938556213, + "learning_rate": 1.1612903225806452e-06, + "loss": 1.2318934202194214, + "step": 253 + }, + { + "epoch": 0.05852534562211981, + "grad_norm": 0.6443899979691422, + "learning_rate": 1.1658986175115205e-06, + "loss": 1.1626521348953247, + "step": 254 + }, + { + "epoch": 0.0587557603686636, + "grad_norm": 0.6336855527034527, + "learning_rate": 1.1705069124423963e-06, + "loss": 1.2402286529541016, + "step": 255 + }, + { + "epoch": 0.05898617511520737, + "grad_norm": 0.599628545600123, + "learning_rate": 1.1751152073732717e-06, + "loss": 1.190323829650879, + "step": 256 + }, + { + "epoch": 0.05921658986175115, + "grad_norm": 0.655955321737197, + "learning_rate": 1.1797235023041473e-06, + "loss": 1.121636986732483, + "step": 257 + }, + { + "epoch": 0.05944700460829493, + "grad_norm": 0.5349922437861245, + "learning_rate": 1.1843317972350231e-06, + "loss": 1.099304437637329, + "step": 258 + }, + { + "epoch": 0.05967741935483871, + "grad_norm": 0.5611568770807159, + "learning_rate": 1.1889400921658985e-06, + "loss": 1.1730690002441406, + "step": 259 + }, + { + "epoch": 0.059907834101382486, + "grad_norm": 0.5874751551203973, + "learning_rate": 1.1935483870967741e-06, + "loss": 1.1450574398040771, + "step": 260 + }, + { + "epoch": 0.06013824884792627, + "grad_norm": 0.6634311667010621, + "learning_rate": 1.1981566820276497e-06, + "loss": 1.1435421705245972, + "step": 261 + }, + { + "epoch": 0.060368663594470046, + "grad_norm": 0.6113712565981082, + "learning_rate": 1.2027649769585253e-06, + "loss": 1.2153000831604004, + "step": 262 + }, + { + "epoch": 0.06059907834101382, + "grad_norm": 0.4715675476477507, + "learning_rate": 1.207373271889401e-06, + "loss": 1.0380406379699707, + "step": 263 + }, + { + "epoch": 0.060829493087557605, + "grad_norm": 0.5396758253019809, + "learning_rate": 1.2119815668202765e-06, + "loss": 1.1639207601547241, + "step": 264 + }, + { + "epoch": 0.06105990783410138, + "grad_norm": 0.7193765184254299, + "learning_rate": 1.216589861751152e-06, + "loss": 1.1862819194793701, + "step": 265 + }, + { + "epoch": 0.06129032258064516, + "grad_norm": 0.5621136552568688, + "learning_rate": 1.2211981566820275e-06, + "loss": 1.2122020721435547, + "step": 266 + }, + { + "epoch": 0.06152073732718894, + "grad_norm": 0.506518590231947, + "learning_rate": 1.2258064516129033e-06, + "loss": 1.1201646327972412, + "step": 267 + }, + { + "epoch": 0.06175115207373272, + "grad_norm": 0.6015371724768855, + "learning_rate": 1.2304147465437787e-06, + "loss": 0.9520926475524902, + "step": 268 + }, + { + "epoch": 0.061981566820276494, + "grad_norm": 0.6815507447701216, + "learning_rate": 1.2350230414746543e-06, + "loss": 1.0426976680755615, + "step": 269 + }, + { + "epoch": 0.06221198156682028, + "grad_norm": 0.5129880337213574, + "learning_rate": 1.23963133640553e-06, + "loss": 0.934493899345398, + "step": 270 + }, + { + "epoch": 0.062442396313364054, + "grad_norm": 0.5416312735509534, + "learning_rate": 1.2442396313364054e-06, + "loss": 1.23980712890625, + "step": 271 + }, + { + "epoch": 0.06267281105990784, + "grad_norm": 0.5947336924258313, + "learning_rate": 1.248847926267281e-06, + "loss": 1.094742774963379, + "step": 272 + }, + { + "epoch": 0.06290322580645161, + "grad_norm": 0.5496219212827214, + "learning_rate": 1.2534562211981564e-06, + "loss": 1.0271551609039307, + "step": 273 + }, + { + "epoch": 0.06313364055299539, + "grad_norm": 0.43924704821878574, + "learning_rate": 1.2580645161290322e-06, + "loss": 1.159210205078125, + "step": 274 + }, + { + "epoch": 0.06336405529953917, + "grad_norm": 0.6336734571964621, + "learning_rate": 1.2626728110599078e-06, + "loss": 1.127510666847229, + "step": 275 + }, + { + "epoch": 0.06359447004608294, + "grad_norm": 0.564136508309977, + "learning_rate": 1.2672811059907832e-06, + "loss": 1.1371517181396484, + "step": 276 + }, + { + "epoch": 0.06382488479262673, + "grad_norm": 0.5092569849346139, + "learning_rate": 1.271889400921659e-06, + "loss": 1.0296730995178223, + "step": 277 + }, + { + "epoch": 0.06405529953917051, + "grad_norm": 0.47819096787751125, + "learning_rate": 1.2764976958525344e-06, + "loss": 1.036975383758545, + "step": 278 + }, + { + "epoch": 0.06428571428571428, + "grad_norm": 0.5933788958917384, + "learning_rate": 1.28110599078341e-06, + "loss": 1.2120393514633179, + "step": 279 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 0.5094532117085869, + "learning_rate": 1.2857142857142858e-06, + "loss": 1.0084068775177002, + "step": 280 + }, + { + "epoch": 0.06474654377880185, + "grad_norm": 0.5556672645421422, + "learning_rate": 1.2903225806451612e-06, + "loss": 1.2005786895751953, + "step": 281 + }, + { + "epoch": 0.06497695852534562, + "grad_norm": 0.5273275990471241, + "learning_rate": 1.2949308755760368e-06, + "loss": 1.1506783962249756, + "step": 282 + }, + { + "epoch": 0.0652073732718894, + "grad_norm": 0.6565311834699108, + "learning_rate": 1.2995391705069124e-06, + "loss": 1.1219947338104248, + "step": 283 + }, + { + "epoch": 0.06543778801843318, + "grad_norm": 0.5392805741788703, + "learning_rate": 1.304147465437788e-06, + "loss": 1.2041170597076416, + "step": 284 + }, + { + "epoch": 0.06566820276497695, + "grad_norm": 0.4958618059812673, + "learning_rate": 1.3087557603686636e-06, + "loss": 1.0903037786483765, + "step": 285 + }, + { + "epoch": 0.06589861751152074, + "grad_norm": 0.5739593792710319, + "learning_rate": 1.3133640552995392e-06, + "loss": 1.2140064239501953, + "step": 286 + }, + { + "epoch": 0.06612903225806452, + "grad_norm": 0.6611408054194472, + "learning_rate": 1.3179723502304148e-06, + "loss": 1.3026092052459717, + "step": 287 + }, + { + "epoch": 0.0663594470046083, + "grad_norm": 0.5994162091601994, + "learning_rate": 1.3225806451612901e-06, + "loss": 1.0937910079956055, + "step": 288 + }, + { + "epoch": 0.06658986175115207, + "grad_norm": 0.5087892316212932, + "learning_rate": 1.327188940092166e-06, + "loss": 1.1768109798431396, + "step": 289 + }, + { + "epoch": 0.06682027649769585, + "grad_norm": 0.6601843016778813, + "learning_rate": 1.3317972350230413e-06, + "loss": 1.0796440839767456, + "step": 290 + }, + { + "epoch": 0.06705069124423964, + "grad_norm": 0.5059222364831474, + "learning_rate": 1.336405529953917e-06, + "loss": 0.9972932934761047, + "step": 291 + }, + { + "epoch": 0.06728110599078341, + "grad_norm": 0.5571474335328804, + "learning_rate": 1.3410138248847927e-06, + "loss": 0.9860717058181763, + "step": 292 + }, + { + "epoch": 0.06751152073732719, + "grad_norm": 0.5418320654969337, + "learning_rate": 1.3456221198156681e-06, + "loss": 1.045119047164917, + "step": 293 + }, + { + "epoch": 0.06774193548387097, + "grad_norm": 0.5469511174229076, + "learning_rate": 1.3502304147465437e-06, + "loss": 1.2740920782089233, + "step": 294 + }, + { + "epoch": 0.06797235023041474, + "grad_norm": 0.5280888059979016, + "learning_rate": 1.354838709677419e-06, + "loss": 1.0860114097595215, + "step": 295 + }, + { + "epoch": 0.06820276497695853, + "grad_norm": 0.6361673375880608, + "learning_rate": 1.359447004608295e-06, + "loss": 1.111539602279663, + "step": 296 + }, + { + "epoch": 0.06843317972350231, + "grad_norm": 0.6640553054344481, + "learning_rate": 1.3640552995391705e-06, + "loss": 1.1628870964050293, + "step": 297 + }, + { + "epoch": 0.06866359447004608, + "grad_norm": 0.5665129055040568, + "learning_rate": 1.3686635944700459e-06, + "loss": 1.042768955230713, + "step": 298 + }, + { + "epoch": 0.06889400921658986, + "grad_norm": 0.43340931133190164, + "learning_rate": 1.3732718894009217e-06, + "loss": 0.9970331192016602, + "step": 299 + }, + { + "epoch": 0.06912442396313365, + "grad_norm": 0.5645710736996077, + "learning_rate": 1.377880184331797e-06, + "loss": 1.1270179748535156, + "step": 300 + }, + { + "epoch": 0.06935483870967742, + "grad_norm": 0.5065704773498506, + "learning_rate": 1.3824884792626727e-06, + "loss": 0.9505646824836731, + "step": 301 + }, + { + "epoch": 0.0695852534562212, + "grad_norm": 0.5178052985950043, + "learning_rate": 1.3870967741935485e-06, + "loss": 1.0997588634490967, + "step": 302 + }, + { + "epoch": 0.06981566820276498, + "grad_norm": 0.46976885146719827, + "learning_rate": 1.3917050691244239e-06, + "loss": 1.1512106657028198, + "step": 303 + }, + { + "epoch": 0.07004608294930875, + "grad_norm": 0.5368431131511487, + "learning_rate": 1.3963133640552995e-06, + "loss": 1.1340759992599487, + "step": 304 + }, + { + "epoch": 0.07027649769585254, + "grad_norm": 0.6153911846871725, + "learning_rate": 1.400921658986175e-06, + "loss": 1.187511682510376, + "step": 305 + }, + { + "epoch": 0.07050691244239632, + "grad_norm": 0.511555535336468, + "learning_rate": 1.4055299539170507e-06, + "loss": 1.0711122751235962, + "step": 306 + }, + { + "epoch": 0.07073732718894009, + "grad_norm": 0.48287298633713555, + "learning_rate": 1.410138248847926e-06, + "loss": 0.9636896848678589, + "step": 307 + }, + { + "epoch": 0.07096774193548387, + "grad_norm": 0.5910127759130634, + "learning_rate": 1.4147465437788018e-06, + "loss": 1.0506833791732788, + "step": 308 + }, + { + "epoch": 0.07119815668202766, + "grad_norm": 0.46621570534633416, + "learning_rate": 1.4193548387096774e-06, + "loss": 1.1076349020004272, + "step": 309 + }, + { + "epoch": 0.07142857142857142, + "grad_norm": 0.5023143786431462, + "learning_rate": 1.4239631336405528e-06, + "loss": 1.0878944396972656, + "step": 310 + }, + { + "epoch": 0.07165898617511521, + "grad_norm": 0.5894127846415432, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.0808600187301636, + "step": 311 + }, + { + "epoch": 0.07188940092165899, + "grad_norm": 0.6608655757057322, + "learning_rate": 1.433179723502304e-06, + "loss": 1.2117588520050049, + "step": 312 + }, + { + "epoch": 0.07211981566820276, + "grad_norm": 0.49227698344069387, + "learning_rate": 1.4377880184331796e-06, + "loss": 1.0899101495742798, + "step": 313 + }, + { + "epoch": 0.07235023041474654, + "grad_norm": 0.4429228185732454, + "learning_rate": 1.4423963133640554e-06, + "loss": 0.9540426135063171, + "step": 314 + }, + { + "epoch": 0.07258064516129033, + "grad_norm": 0.6262415135725191, + "learning_rate": 1.4470046082949308e-06, + "loss": 1.1170068979263306, + "step": 315 + }, + { + "epoch": 0.0728110599078341, + "grad_norm": 0.5398534028349639, + "learning_rate": 1.4516129032258064e-06, + "loss": 1.2030160427093506, + "step": 316 + }, + { + "epoch": 0.07304147465437788, + "grad_norm": 0.5750696017486863, + "learning_rate": 1.4562211981566818e-06, + "loss": 1.1599903106689453, + "step": 317 + }, + { + "epoch": 0.07327188940092166, + "grad_norm": 0.4940370201046436, + "learning_rate": 1.4608294930875576e-06, + "loss": 1.0777950286865234, + "step": 318 + }, + { + "epoch": 0.07350230414746543, + "grad_norm": 0.5527232601625117, + "learning_rate": 1.4654377880184332e-06, + "loss": 1.1250553131103516, + "step": 319 + }, + { + "epoch": 0.07373271889400922, + "grad_norm": 0.4905671836592669, + "learning_rate": 1.4700460829493086e-06, + "loss": 1.10176420211792, + "step": 320 + }, + { + "epoch": 0.073963133640553, + "grad_norm": 0.5457078879226115, + "learning_rate": 1.4746543778801844e-06, + "loss": 1.111799716949463, + "step": 321 + }, + { + "epoch": 0.07419354838709677, + "grad_norm": 0.4195800331952007, + "learning_rate": 1.4792626728110598e-06, + "loss": 1.1555054187774658, + "step": 322 + }, + { + "epoch": 0.07442396313364055, + "grad_norm": 0.46236670595596, + "learning_rate": 1.4838709677419353e-06, + "loss": 1.0977535247802734, + "step": 323 + }, + { + "epoch": 0.07465437788018434, + "grad_norm": 0.5097860724223924, + "learning_rate": 1.4884792626728112e-06, + "loss": 0.9058012962341309, + "step": 324 + }, + { + "epoch": 0.0748847926267281, + "grad_norm": 0.5077577953430894, + "learning_rate": 1.4930875576036865e-06, + "loss": 1.1147960424423218, + "step": 325 + }, + { + "epoch": 0.07511520737327189, + "grad_norm": 0.44169448790763116, + "learning_rate": 1.4976958525345621e-06, + "loss": 1.1315648555755615, + "step": 326 + }, + { + "epoch": 0.07534562211981567, + "grad_norm": 0.5088086763700569, + "learning_rate": 1.5023041474654377e-06, + "loss": 0.9134868383407593, + "step": 327 + }, + { + "epoch": 0.07557603686635944, + "grad_norm": 0.44118138965972864, + "learning_rate": 1.5069124423963133e-06, + "loss": 1.017493724822998, + "step": 328 + }, + { + "epoch": 0.07580645161290323, + "grad_norm": 0.5038134502792564, + "learning_rate": 1.5115207373271887e-06, + "loss": 1.220658540725708, + "step": 329 + }, + { + "epoch": 0.07603686635944701, + "grad_norm": 0.49196264739665124, + "learning_rate": 1.5161290322580645e-06, + "loss": 1.2254307270050049, + "step": 330 + }, + { + "epoch": 0.07626728110599078, + "grad_norm": 0.6442066774537618, + "learning_rate": 1.5207373271889401e-06, + "loss": 1.2653989791870117, + "step": 331 + }, + { + "epoch": 0.07649769585253456, + "grad_norm": 0.5214989033274106, + "learning_rate": 1.5253456221198155e-06, + "loss": 1.199981451034546, + "step": 332 + }, + { + "epoch": 0.07672811059907834, + "grad_norm": 0.5987857165424706, + "learning_rate": 1.5299539170506913e-06, + "loss": 1.1141018867492676, + "step": 333 + }, + { + "epoch": 0.07695852534562211, + "grad_norm": 0.5942646354683767, + "learning_rate": 1.5345622119815667e-06, + "loss": 1.2139991521835327, + "step": 334 + }, + { + "epoch": 0.0771889400921659, + "grad_norm": 0.46506211352562865, + "learning_rate": 1.5391705069124423e-06, + "loss": 1.0647475719451904, + "step": 335 + }, + { + "epoch": 0.07741935483870968, + "grad_norm": 0.41334086285294086, + "learning_rate": 1.543778801843318e-06, + "loss": 0.9740357398986816, + "step": 336 + }, + { + "epoch": 0.07764976958525345, + "grad_norm": 0.3730662683323707, + "learning_rate": 1.5483870967741935e-06, + "loss": 0.877153754234314, + "step": 337 + }, + { + "epoch": 0.07788018433179723, + "grad_norm": 0.5608914234538745, + "learning_rate": 1.552995391705069e-06, + "loss": 1.2472789287567139, + "step": 338 + }, + { + "epoch": 0.07811059907834102, + "grad_norm": 0.49369711494641005, + "learning_rate": 1.5576036866359445e-06, + "loss": 1.1873078346252441, + "step": 339 + }, + { + "epoch": 0.07834101382488479, + "grad_norm": 0.47054639516827856, + "learning_rate": 1.5622119815668203e-06, + "loss": 1.0728449821472168, + "step": 340 + }, + { + "epoch": 0.07857142857142857, + "grad_norm": 0.5084311239727589, + "learning_rate": 1.5668202764976959e-06, + "loss": 0.9974904656410217, + "step": 341 + }, + { + "epoch": 0.07880184331797235, + "grad_norm": 0.5100945633220172, + "learning_rate": 1.5714285714285712e-06, + "loss": 1.0591039657592773, + "step": 342 + }, + { + "epoch": 0.07903225806451612, + "grad_norm": 0.5927330256525677, + "learning_rate": 1.576036866359447e-06, + "loss": 1.04117751121521, + "step": 343 + }, + { + "epoch": 0.0792626728110599, + "grad_norm": 0.40984725482311923, + "learning_rate": 1.5806451612903224e-06, + "loss": 0.934100866317749, + "step": 344 + }, + { + "epoch": 0.07949308755760369, + "grad_norm": 0.4545690285130126, + "learning_rate": 1.585253456221198e-06, + "loss": 1.0333890914916992, + "step": 345 + }, + { + "epoch": 0.07972350230414746, + "grad_norm": 0.4848318013907446, + "learning_rate": 1.5898617511520738e-06, + "loss": 1.1762741804122925, + "step": 346 + }, + { + "epoch": 0.07995391705069124, + "grad_norm": 0.4082821303075448, + "learning_rate": 1.5944700460829492e-06, + "loss": 1.081842303276062, + "step": 347 + }, + { + "epoch": 0.08018433179723503, + "grad_norm": 0.48343962912190763, + "learning_rate": 1.5990783410138248e-06, + "loss": 1.140712022781372, + "step": 348 + }, + { + "epoch": 0.0804147465437788, + "grad_norm": 0.3519464028715392, + "learning_rate": 1.6036866359447004e-06, + "loss": 1.0155198574066162, + "step": 349 + }, + { + "epoch": 0.08064516129032258, + "grad_norm": 0.4719922940268299, + "learning_rate": 1.608294930875576e-06, + "loss": 1.0673280954360962, + "step": 350 + }, + { + "epoch": 0.08087557603686636, + "grad_norm": 0.44336917730159625, + "learning_rate": 1.6129032258064514e-06, + "loss": 1.1061692237854004, + "step": 351 + }, + { + "epoch": 0.08110599078341015, + "grad_norm": 0.6227306591455409, + "learning_rate": 1.6175115207373272e-06, + "loss": 1.0120354890823364, + "step": 352 + }, + { + "epoch": 0.08133640552995391, + "grad_norm": 0.5343939607764295, + "learning_rate": 1.6221198156682028e-06, + "loss": 1.1260986328125, + "step": 353 + }, + { + "epoch": 0.0815668202764977, + "grad_norm": 0.514597043189326, + "learning_rate": 1.6267281105990782e-06, + "loss": 1.0376214981079102, + "step": 354 + }, + { + "epoch": 0.08179723502304148, + "grad_norm": 0.41314458702115897, + "learning_rate": 1.631336405529954e-06, + "loss": 1.0802130699157715, + "step": 355 + }, + { + "epoch": 0.08202764976958525, + "grad_norm": 0.5113844239661658, + "learning_rate": 1.6359447004608294e-06, + "loss": 1.217378854751587, + "step": 356 + }, + { + "epoch": 0.08225806451612903, + "grad_norm": 0.3681604891354872, + "learning_rate": 1.640552995391705e-06, + "loss": 0.9107617139816284, + "step": 357 + }, + { + "epoch": 0.08248847926267282, + "grad_norm": 0.4567828094638987, + "learning_rate": 1.6451612903225808e-06, + "loss": 1.089385986328125, + "step": 358 + }, + { + "epoch": 0.08271889400921659, + "grad_norm": 0.42382031863730735, + "learning_rate": 1.6497695852534561e-06, + "loss": 1.1420392990112305, + "step": 359 + }, + { + "epoch": 0.08294930875576037, + "grad_norm": 0.4385300551654332, + "learning_rate": 1.6543778801843317e-06, + "loss": 0.9308648705482483, + "step": 360 + }, + { + "epoch": 0.08317972350230415, + "grad_norm": 0.4691608891369802, + "learning_rate": 1.6589861751152071e-06, + "loss": 0.9463413953781128, + "step": 361 + }, + { + "epoch": 0.08341013824884792, + "grad_norm": 0.4312953553305326, + "learning_rate": 1.663594470046083e-06, + "loss": 1.0364834070205688, + "step": 362 + }, + { + "epoch": 0.0836405529953917, + "grad_norm": 0.4601141894995307, + "learning_rate": 1.6682027649769585e-06, + "loss": 0.9992797374725342, + "step": 363 + }, + { + "epoch": 0.08387096774193549, + "grad_norm": 0.4110829230093509, + "learning_rate": 1.672811059907834e-06, + "loss": 0.9862687587738037, + "step": 364 + }, + { + "epoch": 0.08410138248847926, + "grad_norm": 0.538237007116734, + "learning_rate": 1.6774193548387097e-06, + "loss": 1.0882744789123535, + "step": 365 + }, + { + "epoch": 0.08433179723502304, + "grad_norm": 0.38129891099780466, + "learning_rate": 1.682027649769585e-06, + "loss": 0.9217149615287781, + "step": 366 + }, + { + "epoch": 0.08456221198156683, + "grad_norm": 0.47566005804946043, + "learning_rate": 1.6866359447004607e-06, + "loss": 1.0384632349014282, + "step": 367 + }, + { + "epoch": 0.0847926267281106, + "grad_norm": 0.41334228678230484, + "learning_rate": 1.6912442396313363e-06, + "loss": 0.8760565519332886, + "step": 368 + }, + { + "epoch": 0.08502304147465438, + "grad_norm": 0.38194490761666694, + "learning_rate": 1.6958525345622119e-06, + "loss": 0.9868614077568054, + "step": 369 + }, + { + "epoch": 0.08525345622119816, + "grad_norm": 0.41853356164893474, + "learning_rate": 1.7004608294930875e-06, + "loss": 1.0386936664581299, + "step": 370 + }, + { + "epoch": 0.08548387096774193, + "grad_norm": 0.4969082634490474, + "learning_rate": 1.705069124423963e-06, + "loss": 1.2201364040374756, + "step": 371 + }, + { + "epoch": 0.08571428571428572, + "grad_norm": 0.45684500070085005, + "learning_rate": 1.7096774193548387e-06, + "loss": 0.9892920255661011, + "step": 372 + }, + { + "epoch": 0.0859447004608295, + "grad_norm": 0.3411435628885881, + "learning_rate": 1.714285714285714e-06, + "loss": 0.9379667639732361, + "step": 373 + }, + { + "epoch": 0.08617511520737327, + "grad_norm": 0.4493279942699278, + "learning_rate": 1.7188940092165899e-06, + "loss": 1.0150624513626099, + "step": 374 + }, + { + "epoch": 0.08640552995391705, + "grad_norm": 0.3873317793310882, + "learning_rate": 1.7235023041474655e-06, + "loss": 0.8724589943885803, + "step": 375 + }, + { + "epoch": 0.08663594470046083, + "grad_norm": 0.451020649692039, + "learning_rate": 1.7281105990783408e-06, + "loss": 1.005715012550354, + "step": 376 + }, + { + "epoch": 0.0868663594470046, + "grad_norm": 0.40515583321904614, + "learning_rate": 1.7327188940092167e-06, + "loss": 1.0238345861434937, + "step": 377 + }, + { + "epoch": 0.08709677419354839, + "grad_norm": 0.5713304603163627, + "learning_rate": 1.737327188940092e-06, + "loss": 1.061020851135254, + "step": 378 + }, + { + "epoch": 0.08732718894009217, + "grad_norm": 0.31543151666242697, + "learning_rate": 1.7419354838709676e-06, + "loss": 0.8607133626937866, + "step": 379 + }, + { + "epoch": 0.08755760368663594, + "grad_norm": 0.396586403800106, + "learning_rate": 1.7465437788018434e-06, + "loss": 0.9070740938186646, + "step": 380 + }, + { + "epoch": 0.08778801843317972, + "grad_norm": 0.4114853593210784, + "learning_rate": 1.7511520737327188e-06, + "loss": 0.993092954158783, + "step": 381 + }, + { + "epoch": 0.08801843317972351, + "grad_norm": 0.5030976624666732, + "learning_rate": 1.7557603686635944e-06, + "loss": 1.1119567155838013, + "step": 382 + }, + { + "epoch": 0.08824884792626728, + "grad_norm": 0.3947649464982104, + "learning_rate": 1.7603686635944698e-06, + "loss": 1.030786395072937, + "step": 383 + }, + { + "epoch": 0.08847926267281106, + "grad_norm": 0.413233744996873, + "learning_rate": 1.7649769585253456e-06, + "loss": 1.0578559637069702, + "step": 384 + }, + { + "epoch": 0.08870967741935484, + "grad_norm": 0.5116874225270758, + "learning_rate": 1.7695852534562212e-06, + "loss": 1.1282391548156738, + "step": 385 + }, + { + "epoch": 0.08894009216589861, + "grad_norm": 0.36883704269137796, + "learning_rate": 1.7741935483870966e-06, + "loss": 0.7838784456253052, + "step": 386 + }, + { + "epoch": 0.0891705069124424, + "grad_norm": 0.4028833159886203, + "learning_rate": 1.7788018433179724e-06, + "loss": 0.9244300127029419, + "step": 387 + }, + { + "epoch": 0.08940092165898618, + "grad_norm": 0.37786066556159736, + "learning_rate": 1.7834101382488478e-06, + "loss": 0.916866660118103, + "step": 388 + }, + { + "epoch": 0.08963133640552995, + "grad_norm": 0.3466207390337416, + "learning_rate": 1.7880184331797234e-06, + "loss": 0.9918155670166016, + "step": 389 + }, + { + "epoch": 0.08986175115207373, + "grad_norm": 0.49146787648511026, + "learning_rate": 1.792626728110599e-06, + "loss": 0.9879001379013062, + "step": 390 + }, + { + "epoch": 0.09009216589861752, + "grad_norm": 0.4467876721911936, + "learning_rate": 1.7972350230414746e-06, + "loss": 1.0252082347869873, + "step": 391 + }, + { + "epoch": 0.09032258064516129, + "grad_norm": 0.4519090202816701, + "learning_rate": 1.8018433179723502e-06, + "loss": 1.0376901626586914, + "step": 392 + }, + { + "epoch": 0.09055299539170507, + "grad_norm": 0.4158305964101772, + "learning_rate": 1.8064516129032258e-06, + "loss": 1.0237072706222534, + "step": 393 + }, + { + "epoch": 0.09078341013824885, + "grad_norm": 0.3903660894092682, + "learning_rate": 1.8110599078341013e-06, + "loss": 1.004181146621704, + "step": 394 + }, + { + "epoch": 0.09101382488479262, + "grad_norm": 0.4844697096481946, + "learning_rate": 1.8156682027649767e-06, + "loss": 1.1162958145141602, + "step": 395 + }, + { + "epoch": 0.0912442396313364, + "grad_norm": 0.43484007077470926, + "learning_rate": 1.8202764976958525e-06, + "loss": 0.9634548425674438, + "step": 396 + }, + { + "epoch": 0.09147465437788019, + "grad_norm": 0.34256483920586434, + "learning_rate": 1.8248847926267281e-06, + "loss": 0.9306463599205017, + "step": 397 + }, + { + "epoch": 0.09170506912442396, + "grad_norm": 0.4291772843094685, + "learning_rate": 1.8294930875576035e-06, + "loss": 1.0243630409240723, + "step": 398 + }, + { + "epoch": 0.09193548387096774, + "grad_norm": 0.37150575195192614, + "learning_rate": 1.8341013824884793e-06, + "loss": 0.9261370897293091, + "step": 399 + }, + { + "epoch": 0.09216589861751152, + "grad_norm": 0.41574639263883495, + "learning_rate": 1.8387096774193547e-06, + "loss": 0.9929264783859253, + "step": 400 + }, + { + "epoch": 0.0923963133640553, + "grad_norm": 0.4086620199652483, + "learning_rate": 1.8433179723502303e-06, + "loss": 1.0245590209960938, + "step": 401 + }, + { + "epoch": 0.09262672811059908, + "grad_norm": 0.4485366734014856, + "learning_rate": 1.8479262672811061e-06, + "loss": 0.9801148176193237, + "step": 402 + }, + { + "epoch": 0.09285714285714286, + "grad_norm": 0.48045286204627596, + "learning_rate": 1.8525345622119815e-06, + "loss": 1.181383728981018, + "step": 403 + }, + { + "epoch": 0.09308755760368663, + "grad_norm": 0.41845043157279344, + "learning_rate": 1.857142857142857e-06, + "loss": 0.9493411779403687, + "step": 404 + }, + { + "epoch": 0.09331797235023041, + "grad_norm": 0.4897744794150158, + "learning_rate": 1.8617511520737325e-06, + "loss": 1.1096491813659668, + "step": 405 + }, + { + "epoch": 0.0935483870967742, + "grad_norm": 0.4480175053230346, + "learning_rate": 1.8663594470046083e-06, + "loss": 1.1019275188446045, + "step": 406 + }, + { + "epoch": 0.09377880184331797, + "grad_norm": 0.3732577959232657, + "learning_rate": 1.8709677419354837e-06, + "loss": 0.973988950252533, + "step": 407 + }, + { + "epoch": 0.09400921658986175, + "grad_norm": 0.4400203989690802, + "learning_rate": 1.8755760368663593e-06, + "loss": 1.1670622825622559, + "step": 408 + }, + { + "epoch": 0.09423963133640553, + "grad_norm": 0.3329146322312322, + "learning_rate": 1.880184331797235e-06, + "loss": 0.8550488948822021, + "step": 409 + }, + { + "epoch": 0.0944700460829493, + "grad_norm": 0.4080056832475701, + "learning_rate": 1.8847926267281104e-06, + "loss": 1.0501651763916016, + "step": 410 + }, + { + "epoch": 0.09470046082949309, + "grad_norm": 0.4667020783139675, + "learning_rate": 1.889400921658986e-06, + "loss": 1.1323202848434448, + "step": 411 + }, + { + "epoch": 0.09493087557603687, + "grad_norm": 0.4438011539128225, + "learning_rate": 1.8940092165898616e-06, + "loss": 1.168154001235962, + "step": 412 + }, + { + "epoch": 0.09516129032258064, + "grad_norm": 0.5043395094497101, + "learning_rate": 1.8986175115207372e-06, + "loss": 1.0667431354522705, + "step": 413 + }, + { + "epoch": 0.09539170506912442, + "grad_norm": 0.42921175733784445, + "learning_rate": 1.9032258064516128e-06, + "loss": 1.1447162628173828, + "step": 414 + }, + { + "epoch": 0.0956221198156682, + "grad_norm": 0.42501454608228506, + "learning_rate": 1.9078341013824884e-06, + "loss": 0.9403433799743652, + "step": 415 + }, + { + "epoch": 0.09585253456221199, + "grad_norm": 0.4016688989337606, + "learning_rate": 1.912442396313364e-06, + "loss": 0.9837527275085449, + "step": 416 + }, + { + "epoch": 0.09608294930875576, + "grad_norm": 0.422068085350648, + "learning_rate": 1.9170506912442396e-06, + "loss": 1.071333408355713, + "step": 417 + }, + { + "epoch": 0.09631336405529954, + "grad_norm": 0.5124388054628781, + "learning_rate": 1.921658986175115e-06, + "loss": 1.0156168937683105, + "step": 418 + }, + { + "epoch": 0.09654377880184332, + "grad_norm": 0.4338501331744671, + "learning_rate": 1.926267281105991e-06, + "loss": 0.9705266952514648, + "step": 419 + }, + { + "epoch": 0.0967741935483871, + "grad_norm": 0.407144156286867, + "learning_rate": 1.930875576036866e-06, + "loss": 1.0570204257965088, + "step": 420 + }, + { + "epoch": 0.09700460829493088, + "grad_norm": 0.43729360857600713, + "learning_rate": 1.935483870967742e-06, + "loss": 1.141861915588379, + "step": 421 + }, + { + "epoch": 0.09723502304147466, + "grad_norm": 0.4507835554387818, + "learning_rate": 1.9400921658986174e-06, + "loss": 0.9849745631217957, + "step": 422 + }, + { + "epoch": 0.09746543778801843, + "grad_norm": 0.4932195036683519, + "learning_rate": 1.944700460829493e-06, + "loss": 1.0279912948608398, + "step": 423 + }, + { + "epoch": 0.09769585253456221, + "grad_norm": 0.4014365475110759, + "learning_rate": 1.9493087557603686e-06, + "loss": 1.0707788467407227, + "step": 424 + }, + { + "epoch": 0.097926267281106, + "grad_norm": 0.37856248369077095, + "learning_rate": 1.953917050691244e-06, + "loss": 0.9391129016876221, + "step": 425 + }, + { + "epoch": 0.09815668202764977, + "grad_norm": 0.3604046417791118, + "learning_rate": 1.9585253456221198e-06, + "loss": 0.9792884588241577, + "step": 426 + }, + { + "epoch": 0.09838709677419355, + "grad_norm": 0.42091691400517506, + "learning_rate": 1.963133640552995e-06, + "loss": 1.0111792087554932, + "step": 427 + }, + { + "epoch": 0.09861751152073733, + "grad_norm": 0.2951881364083913, + "learning_rate": 1.967741935483871e-06, + "loss": 1.0020272731781006, + "step": 428 + }, + { + "epoch": 0.0988479262672811, + "grad_norm": 0.42473763380817414, + "learning_rate": 1.9723502304147463e-06, + "loss": 1.1002991199493408, + "step": 429 + }, + { + "epoch": 0.09907834101382489, + "grad_norm": 0.3977328364337887, + "learning_rate": 1.976958525345622e-06, + "loss": 0.9656131267547607, + "step": 430 + }, + { + "epoch": 0.09930875576036867, + "grad_norm": 0.4163794190517341, + "learning_rate": 1.9815668202764975e-06, + "loss": 1.1845166683197021, + "step": 431 + }, + { + "epoch": 0.09953917050691244, + "grad_norm": 0.4102761511182145, + "learning_rate": 1.9861751152073733e-06, + "loss": 0.8743879795074463, + "step": 432 + }, + { + "epoch": 0.09976958525345622, + "grad_norm": 0.48299006340600875, + "learning_rate": 1.9907834101382487e-06, + "loss": 1.0800082683563232, + "step": 433 + }, + { + "epoch": 0.1, + "grad_norm": 0.39412754669182365, + "learning_rate": 1.995391705069124e-06, + "loss": 1.0410808324813843, + "step": 434 + }, + { + "epoch": 0.10023041474654378, + "grad_norm": 0.4817128357084655, + "learning_rate": 2e-06, + "loss": 1.0214624404907227, + "step": 435 + }, + { + "epoch": 0.10046082949308756, + "grad_norm": 0.4738161753055533, + "learning_rate": 1.9999999274256618e-06, + "loss": 1.0304028987884521, + "step": 436 + }, + { + "epoch": 0.10069124423963134, + "grad_norm": 0.3946923205513698, + "learning_rate": 1.9999997097026583e-06, + "loss": 1.0457626581192017, + "step": 437 + }, + { + "epoch": 0.10092165898617511, + "grad_norm": 0.43567215904100204, + "learning_rate": 1.9999993468310205e-06, + "loss": 0.9837691187858582, + "step": 438 + }, + { + "epoch": 0.1011520737327189, + "grad_norm": 0.5216317957588074, + "learning_rate": 1.9999988388108013e-06, + "loss": 1.0819612741470337, + "step": 439 + }, + { + "epoch": 0.10138248847926268, + "grad_norm": 0.31182314858852395, + "learning_rate": 1.9999981856420743e-06, + "loss": 1.0417449474334717, + "step": 440 + }, + { + "epoch": 0.10161290322580645, + "grad_norm": 0.5477105048499294, + "learning_rate": 1.999997387324935e-06, + "loss": 1.0501068830490112, + "step": 441 + }, + { + "epoch": 0.10184331797235023, + "grad_norm": 0.4106183150059033, + "learning_rate": 1.999996443859498e-06, + "loss": 1.0635120868682861, + "step": 442 + }, + { + "epoch": 0.10207373271889401, + "grad_norm": 0.4873224989082174, + "learning_rate": 1.999995355245902e-06, + "loss": 0.9732234477996826, + "step": 443 + }, + { + "epoch": 0.10230414746543778, + "grad_norm": 0.3718846857755592, + "learning_rate": 1.9999941214843034e-06, + "loss": 0.9493811130523682, + "step": 444 + }, + { + "epoch": 0.10253456221198157, + "grad_norm": 0.5595191439491263, + "learning_rate": 1.9999927425748817e-06, + "loss": 1.1455141305923462, + "step": 445 + }, + { + "epoch": 0.10276497695852535, + "grad_norm": 0.4237177518607636, + "learning_rate": 1.9999912185178374e-06, + "loss": 0.9341592788696289, + "step": 446 + }, + { + "epoch": 0.10299539170506912, + "grad_norm": 0.3913224265375377, + "learning_rate": 1.9999895493133916e-06, + "loss": 0.9535608291625977, + "step": 447 + }, + { + "epoch": 0.1032258064516129, + "grad_norm": 0.4687207319213409, + "learning_rate": 1.999987734961787e-06, + "loss": 1.1977221965789795, + "step": 448 + }, + { + "epoch": 0.10345622119815669, + "grad_norm": 0.45995634872516833, + "learning_rate": 1.999985775463286e-06, + "loss": 1.1658375263214111, + "step": 449 + }, + { + "epoch": 0.10368663594470046, + "grad_norm": 0.47830181543951694, + "learning_rate": 1.9999836708181734e-06, + "loss": 1.1171612739562988, + "step": 450 + }, + { + "epoch": 0.10391705069124424, + "grad_norm": 0.3823354001067843, + "learning_rate": 1.999981421026755e-06, + "loss": 1.0864373445510864, + "step": 451 + }, + { + "epoch": 0.10414746543778802, + "grad_norm": 0.43518989690984766, + "learning_rate": 1.999979026089357e-06, + "loss": 1.1211299896240234, + "step": 452 + }, + { + "epoch": 0.10437788018433179, + "grad_norm": 0.45163820634554874, + "learning_rate": 1.9999764860063277e-06, + "loss": 1.071751594543457, + "step": 453 + }, + { + "epoch": 0.10460829493087558, + "grad_norm": 0.3749468590501543, + "learning_rate": 1.9999738007780347e-06, + "loss": 1.0377576351165771, + "step": 454 + }, + { + "epoch": 0.10483870967741936, + "grad_norm": 0.42625340690366553, + "learning_rate": 1.9999709704048685e-06, + "loss": 0.9658410549163818, + "step": 455 + }, + { + "epoch": 0.10506912442396313, + "grad_norm": 0.4022888050751363, + "learning_rate": 1.9999679948872395e-06, + "loss": 0.9070194959640503, + "step": 456 + }, + { + "epoch": 0.10529953917050691, + "grad_norm": 0.5570523464378584, + "learning_rate": 1.9999648742255803e-06, + "loss": 1.2197664976119995, + "step": 457 + }, + { + "epoch": 0.1055299539170507, + "grad_norm": 0.3961372853294897, + "learning_rate": 1.9999616084203426e-06, + "loss": 0.9032889604568481, + "step": 458 + }, + { + "epoch": 0.10576036866359446, + "grad_norm": 0.39060467678942784, + "learning_rate": 1.9999581974720017e-06, + "loss": 0.9458762407302856, + "step": 459 + }, + { + "epoch": 0.10599078341013825, + "grad_norm": 0.5068153216782157, + "learning_rate": 1.9999546413810526e-06, + "loss": 1.0024757385253906, + "step": 460 + }, + { + "epoch": 0.10622119815668203, + "grad_norm": 0.38148764403186025, + "learning_rate": 1.9999509401480108e-06, + "loss": 0.9499050378799438, + "step": 461 + }, + { + "epoch": 0.1064516129032258, + "grad_norm": 0.4354491299812492, + "learning_rate": 1.9999470937734132e-06, + "loss": 1.0764188766479492, + "step": 462 + }, + { + "epoch": 0.10668202764976958, + "grad_norm": 0.42800401210878014, + "learning_rate": 1.9999431022578194e-06, + "loss": 0.9858300089836121, + "step": 463 + }, + { + "epoch": 0.10691244239631337, + "grad_norm": 0.41132718920336847, + "learning_rate": 1.999938965601808e-06, + "loss": 0.8965580463409424, + "step": 464 + }, + { + "epoch": 0.10714285714285714, + "grad_norm": 0.39699129711694964, + "learning_rate": 1.9999346838059788e-06, + "loss": 0.8860410451889038, + "step": 465 + }, + { + "epoch": 0.10737327188940092, + "grad_norm": 0.48300723462768347, + "learning_rate": 1.9999302568709546e-06, + "loss": 1.0621274709701538, + "step": 466 + }, + { + "epoch": 0.1076036866359447, + "grad_norm": 0.45149909069714367, + "learning_rate": 1.9999256847973774e-06, + "loss": 0.8894643783569336, + "step": 467 + }, + { + "epoch": 0.10783410138248847, + "grad_norm": 0.3529913357119793, + "learning_rate": 1.999920967585911e-06, + "loss": 0.98856520652771, + "step": 468 + }, + { + "epoch": 0.10806451612903226, + "grad_norm": 0.3260735960256147, + "learning_rate": 1.999916105237239e-06, + "loss": 0.7885239124298096, + "step": 469 + }, + { + "epoch": 0.10829493087557604, + "grad_norm": 0.4477697599226733, + "learning_rate": 1.9999110977520687e-06, + "loss": 1.0274477005004883, + "step": 470 + }, + { + "epoch": 0.10852534562211981, + "grad_norm": 0.3938409891368368, + "learning_rate": 1.999905945131126e-06, + "loss": 0.8672109842300415, + "step": 471 + }, + { + "epoch": 0.10875576036866359, + "grad_norm": 0.37173415889586336, + "learning_rate": 1.9999006473751594e-06, + "loss": 0.852576732635498, + "step": 472 + }, + { + "epoch": 0.10898617511520738, + "grad_norm": 0.3670138423827908, + "learning_rate": 1.9998952044849375e-06, + "loss": 0.9553557634353638, + "step": 473 + }, + { + "epoch": 0.10921658986175115, + "grad_norm": 0.4402707979796638, + "learning_rate": 1.99988961646125e-06, + "loss": 1.1375620365142822, + "step": 474 + }, + { + "epoch": 0.10944700460829493, + "grad_norm": 0.4045716386517098, + "learning_rate": 1.9998838833049083e-06, + "loss": 0.9653681516647339, + "step": 475 + }, + { + "epoch": 0.10967741935483871, + "grad_norm": 0.3653559897200667, + "learning_rate": 1.999878005016745e-06, + "loss": 1.1139185428619385, + "step": 476 + }, + { + "epoch": 0.10990783410138248, + "grad_norm": 0.37459420946595523, + "learning_rate": 1.9998719815976127e-06, + "loss": 0.8375418186187744, + "step": 477 + }, + { + "epoch": 0.11013824884792627, + "grad_norm": 0.33053822521695836, + "learning_rate": 1.999865813048386e-06, + "loss": 1.0005979537963867, + "step": 478 + }, + { + "epoch": 0.11036866359447005, + "grad_norm": 0.39083306344420843, + "learning_rate": 1.99985949936996e-06, + "loss": 0.8499772548675537, + "step": 479 + }, + { + "epoch": 0.11059907834101383, + "grad_norm": 0.3575835338316839, + "learning_rate": 1.999853040563252e-06, + "loss": 0.9805284738540649, + "step": 480 + }, + { + "epoch": 0.1108294930875576, + "grad_norm": 0.43340835059987204, + "learning_rate": 1.9998464366291983e-06, + "loss": 0.9462177753448486, + "step": 481 + }, + { + "epoch": 0.11105990783410138, + "grad_norm": 0.44706726559657484, + "learning_rate": 1.999839687568758e-06, + "loss": 1.1023187637329102, + "step": 482 + }, + { + "epoch": 0.11129032258064517, + "grad_norm": 0.3754824087757579, + "learning_rate": 1.9998327933829103e-06, + "loss": 0.9361279010772705, + "step": 483 + }, + { + "epoch": 0.11152073732718894, + "grad_norm": 0.38419186899738067, + "learning_rate": 1.9998257540726567e-06, + "loss": 0.9811379909515381, + "step": 484 + }, + { + "epoch": 0.11175115207373272, + "grad_norm": 0.4030421476721474, + "learning_rate": 1.9998185696390184e-06, + "loss": 1.0246069431304932, + "step": 485 + }, + { + "epoch": 0.1119815668202765, + "grad_norm": 0.4555360249805513, + "learning_rate": 1.9998112400830385e-06, + "loss": 1.0614899396896362, + "step": 486 + }, + { + "epoch": 0.11221198156682027, + "grad_norm": 0.4347652169333907, + "learning_rate": 1.9998037654057803e-06, + "loss": 1.02305269241333, + "step": 487 + }, + { + "epoch": 0.11244239631336406, + "grad_norm": 0.43672158413630835, + "learning_rate": 1.999796145608329e-06, + "loss": 1.044907808303833, + "step": 488 + }, + { + "epoch": 0.11267281105990784, + "grad_norm": 0.4917956866782855, + "learning_rate": 1.999788380691791e-06, + "loss": 0.9669852256774902, + "step": 489 + }, + { + "epoch": 0.11290322580645161, + "grad_norm": 0.3857920087478492, + "learning_rate": 1.9997804706572933e-06, + "loss": 1.0235236883163452, + "step": 490 + }, + { + "epoch": 0.1131336405529954, + "grad_norm": 0.4541175977583441, + "learning_rate": 1.9997724155059835e-06, + "loss": 0.8982692360877991, + "step": 491 + }, + { + "epoch": 0.11336405529953918, + "grad_norm": 0.481910238333043, + "learning_rate": 1.9997642152390312e-06, + "loss": 0.8390282988548279, + "step": 492 + }, + { + "epoch": 0.11359447004608295, + "grad_norm": 0.39882686276748835, + "learning_rate": 1.9997558698576266e-06, + "loss": 0.8938695192337036, + "step": 493 + }, + { + "epoch": 0.11382488479262673, + "grad_norm": 0.5064684870077569, + "learning_rate": 1.9997473793629813e-06, + "loss": 0.9747422933578491, + "step": 494 + }, + { + "epoch": 0.11405529953917051, + "grad_norm": 0.443509358045386, + "learning_rate": 1.999738743756327e-06, + "loss": 1.050918698310852, + "step": 495 + }, + { + "epoch": 0.11428571428571428, + "grad_norm": 0.5368423996158629, + "learning_rate": 1.9997299630389174e-06, + "loss": 0.9169312715530396, + "step": 496 + }, + { + "epoch": 0.11451612903225807, + "grad_norm": 0.452695866401899, + "learning_rate": 1.9997210372120272e-06, + "loss": 1.0258065462112427, + "step": 497 + }, + { + "epoch": 0.11474654377880185, + "grad_norm": 0.3831239007423439, + "learning_rate": 1.9997119662769523e-06, + "loss": 1.066356897354126, + "step": 498 + }, + { + "epoch": 0.11497695852534562, + "grad_norm": 0.4319474855040805, + "learning_rate": 1.9997027502350086e-06, + "loss": 1.0336101055145264, + "step": 499 + }, + { + "epoch": 0.1152073732718894, + "grad_norm": 0.36856882435983085, + "learning_rate": 1.9996933890875342e-06, + "loss": 1.0434989929199219, + "step": 500 + }, + { + "epoch": 0.11543778801843319, + "grad_norm": 0.4366750071509639, + "learning_rate": 1.9996838828358876e-06, + "loss": 1.0081424713134766, + "step": 501 + }, + { + "epoch": 0.11566820276497695, + "grad_norm": 0.4424253641379215, + "learning_rate": 1.999674231481449e-06, + "loss": 1.0998575687408447, + "step": 502 + }, + { + "epoch": 0.11589861751152074, + "grad_norm": 0.43915567985422416, + "learning_rate": 1.9996644350256193e-06, + "loss": 1.0325868129730225, + "step": 503 + }, + { + "epoch": 0.11612903225806452, + "grad_norm": 0.39758687932867864, + "learning_rate": 1.99965449346982e-06, + "loss": 1.0520741939544678, + "step": 504 + }, + { + "epoch": 0.11635944700460829, + "grad_norm": 0.4373332869451062, + "learning_rate": 1.9996444068154943e-06, + "loss": 0.9355484247207642, + "step": 505 + }, + { + "epoch": 0.11658986175115207, + "grad_norm": 0.478944942365821, + "learning_rate": 1.9996341750641067e-06, + "loss": 1.2088062763214111, + "step": 506 + }, + { + "epoch": 0.11682027649769586, + "grad_norm": 0.45703939880277317, + "learning_rate": 1.9996237982171416e-06, + "loss": 1.007477045059204, + "step": 507 + }, + { + "epoch": 0.11705069124423963, + "grad_norm": 0.516029780444843, + "learning_rate": 1.9996132762761054e-06, + "loss": 0.9528911113739014, + "step": 508 + }, + { + "epoch": 0.11728110599078341, + "grad_norm": 0.44144049831872473, + "learning_rate": 1.9996026092425258e-06, + "loss": 1.0906065702438354, + "step": 509 + }, + { + "epoch": 0.1175115207373272, + "grad_norm": 0.45635386377861326, + "learning_rate": 1.9995917971179507e-06, + "loss": 1.1328812837600708, + "step": 510 + }, + { + "epoch": 0.11774193548387096, + "grad_norm": 0.5010986511700435, + "learning_rate": 1.9995808399039493e-06, + "loss": 1.1367099285125732, + "step": 511 + }, + { + "epoch": 0.11797235023041475, + "grad_norm": 0.5738525299064665, + "learning_rate": 1.999569737602112e-06, + "loss": 1.22605562210083, + "step": 512 + }, + { + "epoch": 0.11820276497695853, + "grad_norm": 0.40700112362856533, + "learning_rate": 1.9995584902140514e-06, + "loss": 0.8814148306846619, + "step": 513 + }, + { + "epoch": 0.1184331797235023, + "grad_norm": 0.4018062947026822, + "learning_rate": 1.9995470977413988e-06, + "loss": 0.916766881942749, + "step": 514 + }, + { + "epoch": 0.11866359447004608, + "grad_norm": 0.3907370494982875, + "learning_rate": 1.999535560185808e-06, + "loss": 0.8088599443435669, + "step": 515 + }, + { + "epoch": 0.11889400921658987, + "grad_norm": 0.5585215819507526, + "learning_rate": 1.9995238775489538e-06, + "loss": 1.0029397010803223, + "step": 516 + }, + { + "epoch": 0.11912442396313364, + "grad_norm": 0.47103060321263474, + "learning_rate": 1.9995120498325322e-06, + "loss": 1.157515287399292, + "step": 517 + }, + { + "epoch": 0.11935483870967742, + "grad_norm": 0.43934234876750516, + "learning_rate": 1.99950007703826e-06, + "loss": 0.989453911781311, + "step": 518 + }, + { + "epoch": 0.1195852534562212, + "grad_norm": 0.501533126043576, + "learning_rate": 1.999487959167874e-06, + "loss": 0.9791898727416992, + "step": 519 + }, + { + "epoch": 0.11981566820276497, + "grad_norm": 0.3947583681206324, + "learning_rate": 1.9994756962231343e-06, + "loss": 0.9994203448295593, + "step": 520 + }, + { + "epoch": 0.12004608294930876, + "grad_norm": 0.4064680989752179, + "learning_rate": 1.999463288205821e-06, + "loss": 0.9096299409866333, + "step": 521 + }, + { + "epoch": 0.12027649769585254, + "grad_norm": 0.5675118509929592, + "learning_rate": 1.999450735117734e-06, + "loss": 0.9956046342849731, + "step": 522 + }, + { + "epoch": 0.12050691244239631, + "grad_norm": 0.40854646192247485, + "learning_rate": 1.9994380369606956e-06, + "loss": 1.0336079597473145, + "step": 523 + }, + { + "epoch": 0.12073732718894009, + "grad_norm": 0.4028964743045085, + "learning_rate": 1.99942519373655e-06, + "loss": 0.8828116655349731, + "step": 524 + }, + { + "epoch": 0.12096774193548387, + "grad_norm": 0.4113573248244064, + "learning_rate": 1.9994122054471597e-06, + "loss": 0.8733093738555908, + "step": 525 + }, + { + "epoch": 0.12119815668202764, + "grad_norm": 0.4633889976755098, + "learning_rate": 1.9993990720944114e-06, + "loss": 1.0312494039535522, + "step": 526 + }, + { + "epoch": 0.12142857142857143, + "grad_norm": 0.39342421435973574, + "learning_rate": 1.9993857936802105e-06, + "loss": 0.9229701161384583, + "step": 527 + }, + { + "epoch": 0.12165898617511521, + "grad_norm": 0.4629141668744642, + "learning_rate": 1.9993723702064853e-06, + "loss": 0.8980100154876709, + "step": 528 + }, + { + "epoch": 0.12188940092165898, + "grad_norm": 0.42208035145091816, + "learning_rate": 1.999358801675183e-06, + "loss": 0.939933180809021, + "step": 529 + }, + { + "epoch": 0.12211981566820276, + "grad_norm": 0.3966309171286601, + "learning_rate": 1.9993450880882733e-06, + "loss": 1.0014444589614868, + "step": 530 + }, + { + "epoch": 0.12235023041474655, + "grad_norm": 0.4166874579150977, + "learning_rate": 1.9993312294477477e-06, + "loss": 0.9995889663696289, + "step": 531 + }, + { + "epoch": 0.12258064516129032, + "grad_norm": 0.37598019229960666, + "learning_rate": 1.9993172257556167e-06, + "loss": 1.0010197162628174, + "step": 532 + }, + { + "epoch": 0.1228110599078341, + "grad_norm": 0.3629842057209114, + "learning_rate": 1.9993030770139135e-06, + "loss": 0.972966194152832, + "step": 533 + }, + { + "epoch": 0.12304147465437788, + "grad_norm": 0.4160633061352588, + "learning_rate": 1.9992887832246917e-06, + "loss": 0.8033444881439209, + "step": 534 + }, + { + "epoch": 0.12327188940092165, + "grad_norm": 0.3895553967201257, + "learning_rate": 1.9992743443900254e-06, + "loss": 0.7532742619514465, + "step": 535 + }, + { + "epoch": 0.12350230414746544, + "grad_norm": 0.46964696388446997, + "learning_rate": 1.9992597605120113e-06, + "loss": 1.058760643005371, + "step": 536 + }, + { + "epoch": 0.12373271889400922, + "grad_norm": 0.37591416731208094, + "learning_rate": 1.9992450315927658e-06, + "loss": 0.8559634685516357, + "step": 537 + }, + { + "epoch": 0.12396313364055299, + "grad_norm": 0.4216079229956694, + "learning_rate": 1.9992301576344267e-06, + "loss": 1.053638219833374, + "step": 538 + }, + { + "epoch": 0.12419354838709677, + "grad_norm": 0.5423293655738015, + "learning_rate": 1.9992151386391528e-06, + "loss": 0.8841970562934875, + "step": 539 + }, + { + "epoch": 0.12442396313364056, + "grad_norm": 0.5667972752402203, + "learning_rate": 1.9991999746091247e-06, + "loss": 0.9355173110961914, + "step": 540 + }, + { + "epoch": 0.12465437788018432, + "grad_norm": 0.43323548094659586, + "learning_rate": 1.999184665546543e-06, + "loss": 0.9978284239768982, + "step": 541 + }, + { + "epoch": 0.12488479262672811, + "grad_norm": 0.4166718713190779, + "learning_rate": 1.99916921145363e-06, + "loss": 0.8855264782905579, + "step": 542 + }, + { + "epoch": 0.1251152073732719, + "grad_norm": 0.5314416958418489, + "learning_rate": 1.9991536123326283e-06, + "loss": 0.885519802570343, + "step": 543 + }, + { + "epoch": 0.12534562211981568, + "grad_norm": 0.4381118612604031, + "learning_rate": 1.9991378681858024e-06, + "loss": 0.9772528409957886, + "step": 544 + }, + { + "epoch": 0.12557603686635946, + "grad_norm": 0.46876887659201405, + "learning_rate": 1.999121979015438e-06, + "loss": 0.8817745447158813, + "step": 545 + }, + { + "epoch": 0.12580645161290321, + "grad_norm": 0.36530562318650095, + "learning_rate": 1.9991059448238404e-06, + "loss": 0.9374080896377563, + "step": 546 + }, + { + "epoch": 0.126036866359447, + "grad_norm": 0.3669313811039727, + "learning_rate": 1.9990897656133383e-06, + "loss": 0.9174116253852844, + "step": 547 + }, + { + "epoch": 0.12626728110599078, + "grad_norm": 0.401361126928626, + "learning_rate": 1.999073441386279e-06, + "loss": 0.9514039158821106, + "step": 548 + }, + { + "epoch": 0.12649769585253456, + "grad_norm": 0.4665811721686224, + "learning_rate": 1.999056972145032e-06, + "loss": 1.10535728931427, + "step": 549 + }, + { + "epoch": 0.12672811059907835, + "grad_norm": 0.4609610092344924, + "learning_rate": 1.999040357891989e-06, + "loss": 1.0641597509384155, + "step": 550 + }, + { + "epoch": 0.12695852534562213, + "grad_norm": 0.39409304359090785, + "learning_rate": 1.99902359862956e-06, + "loss": 0.9596017599105835, + "step": 551 + }, + { + "epoch": 0.1271889400921659, + "grad_norm": 0.4899166130843387, + "learning_rate": 1.9990066943601777e-06, + "loss": 1.083927869796753, + "step": 552 + }, + { + "epoch": 0.12741935483870967, + "grad_norm": 0.42007806110658624, + "learning_rate": 1.998989645086297e-06, + "loss": 0.9146738052368164, + "step": 553 + }, + { + "epoch": 0.12764976958525345, + "grad_norm": 0.41224202627344914, + "learning_rate": 1.998972450810391e-06, + "loss": 0.9038050770759583, + "step": 554 + }, + { + "epoch": 0.12788018433179724, + "grad_norm": 0.45759233489952406, + "learning_rate": 1.9989551115349574e-06, + "loss": 0.973220705986023, + "step": 555 + }, + { + "epoch": 0.12811059907834102, + "grad_norm": 0.424280511041039, + "learning_rate": 1.998937627262511e-06, + "loss": 0.8804281949996948, + "step": 556 + }, + { + "epoch": 0.1283410138248848, + "grad_norm": 0.47603807991909786, + "learning_rate": 1.9989199979955903e-06, + "loss": 1.100919485092163, + "step": 557 + }, + { + "epoch": 0.12857142857142856, + "grad_norm": 0.5871199693144976, + "learning_rate": 1.998902223736755e-06, + "loss": 1.1152353286743164, + "step": 558 + }, + { + "epoch": 0.12880184331797234, + "grad_norm": 0.4236469989661471, + "learning_rate": 1.9988843044885837e-06, + "loss": 1.0721793174743652, + "step": 559 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 0.4234271408586371, + "learning_rate": 1.9988662402536783e-06, + "loss": 0.9035133123397827, + "step": 560 + }, + { + "epoch": 0.1292626728110599, + "grad_norm": 0.4210053632602843, + "learning_rate": 1.9988480310346603e-06, + "loss": 1.0053937435150146, + "step": 561 + }, + { + "epoch": 0.1294930875576037, + "grad_norm": 0.5230478085674195, + "learning_rate": 1.9988296768341728e-06, + "loss": 0.8536228537559509, + "step": 562 + }, + { + "epoch": 0.12972350230414748, + "grad_norm": 0.41493118398063783, + "learning_rate": 1.9988111776548797e-06, + "loss": 0.9673396348953247, + "step": 563 + }, + { + "epoch": 0.12995391705069123, + "grad_norm": 0.36295429679681995, + "learning_rate": 1.998792533499467e-06, + "loss": 0.9402456879615784, + "step": 564 + }, + { + "epoch": 0.13018433179723501, + "grad_norm": 0.3983153697524455, + "learning_rate": 1.99877374437064e-06, + "loss": 0.8900678157806396, + "step": 565 + }, + { + "epoch": 0.1304147465437788, + "grad_norm": 0.47587146443270817, + "learning_rate": 1.9987548102711264e-06, + "loss": 0.9112892150878906, + "step": 566 + }, + { + "epoch": 0.13064516129032258, + "grad_norm": 0.3969666466780631, + "learning_rate": 1.9987357312036743e-06, + "loss": 0.763452410697937, + "step": 567 + }, + { + "epoch": 0.13087557603686636, + "grad_norm": 0.44573355289133143, + "learning_rate": 1.9987165071710527e-06, + "loss": 1.0410873889923096, + "step": 568 + }, + { + "epoch": 0.13110599078341015, + "grad_norm": 0.389372329442145, + "learning_rate": 1.9986971381760524e-06, + "loss": 1.029583215713501, + "step": 569 + }, + { + "epoch": 0.1313364055299539, + "grad_norm": 0.46351745512727555, + "learning_rate": 1.9986776242214845e-06, + "loss": 0.994928777217865, + "step": 570 + }, + { + "epoch": 0.1315668202764977, + "grad_norm": 0.49139035828687805, + "learning_rate": 1.9986579653101817e-06, + "loss": 1.001985788345337, + "step": 571 + }, + { + "epoch": 0.13179723502304147, + "grad_norm": 0.45831221525956994, + "learning_rate": 1.998638161444997e-06, + "loss": 0.9813050031661987, + "step": 572 + }, + { + "epoch": 0.13202764976958525, + "grad_norm": 0.45157644768988, + "learning_rate": 1.9986182126288053e-06, + "loss": 0.8678451180458069, + "step": 573 + }, + { + "epoch": 0.13225806451612904, + "grad_norm": 0.42446769343835317, + "learning_rate": 1.998598118864502e-06, + "loss": 1.0393729209899902, + "step": 574 + }, + { + "epoch": 0.13248847926267282, + "grad_norm": 0.39102315770569207, + "learning_rate": 1.998577880155004e-06, + "loss": 0.9631935358047485, + "step": 575 + }, + { + "epoch": 0.1327188940092166, + "grad_norm": 0.37655183711017187, + "learning_rate": 1.9985574965032483e-06, + "loss": 0.8480437994003296, + "step": 576 + }, + { + "epoch": 0.13294930875576036, + "grad_norm": 0.432103661547375, + "learning_rate": 1.998536967912194e-06, + "loss": 1.0450071096420288, + "step": 577 + }, + { + "epoch": 0.13317972350230414, + "grad_norm": 0.5144084645376303, + "learning_rate": 1.9985162943848207e-06, + "loss": 0.9374763369560242, + "step": 578 + }, + { + "epoch": 0.13341013824884793, + "grad_norm": 0.45444537143479036, + "learning_rate": 1.9984954759241293e-06, + "loss": 0.9405182600021362, + "step": 579 + }, + { + "epoch": 0.1336405529953917, + "grad_norm": 0.42824704739155545, + "learning_rate": 1.998474512533141e-06, + "loss": 0.8406375646591187, + "step": 580 + }, + { + "epoch": 0.1338709677419355, + "grad_norm": 0.48847321743709643, + "learning_rate": 1.9984534042148994e-06, + "loss": 0.9323312044143677, + "step": 581 + }, + { + "epoch": 0.13410138248847928, + "grad_norm": 0.43641992007971325, + "learning_rate": 1.998432150972468e-06, + "loss": 1.0704214572906494, + "step": 582 + }, + { + "epoch": 0.13433179723502303, + "grad_norm": 0.38681502715760335, + "learning_rate": 1.9984107528089315e-06, + "loss": 0.8636025190353394, + "step": 583 + }, + { + "epoch": 0.13456221198156681, + "grad_norm": 0.4361205380771583, + "learning_rate": 1.998389209727396e-06, + "loss": 0.9616303443908691, + "step": 584 + }, + { + "epoch": 0.1347926267281106, + "grad_norm": 0.4406937724242653, + "learning_rate": 1.998367521730988e-06, + "loss": 1.0457193851470947, + "step": 585 + }, + { + "epoch": 0.13502304147465438, + "grad_norm": 0.4060450620979949, + "learning_rate": 1.9983456888228566e-06, + "loss": 1.0460572242736816, + "step": 586 + }, + { + "epoch": 0.13525345622119817, + "grad_norm": 0.3771944294411782, + "learning_rate": 1.9983237110061695e-06, + "loss": 0.9693883657455444, + "step": 587 + }, + { + "epoch": 0.13548387096774195, + "grad_norm": 0.4355709352067949, + "learning_rate": 1.9983015882841175e-06, + "loss": 0.8823472857475281, + "step": 588 + }, + { + "epoch": 0.1357142857142857, + "grad_norm": 0.5606637533068962, + "learning_rate": 1.998279320659912e-06, + "loss": 1.1602983474731445, + "step": 589 + }, + { + "epoch": 0.1359447004608295, + "grad_norm": 0.40130101265364443, + "learning_rate": 1.9982569081367843e-06, + "loss": 0.8191353678703308, + "step": 590 + }, + { + "epoch": 0.13617511520737327, + "grad_norm": 0.40863931644700857, + "learning_rate": 1.9982343507179876e-06, + "loss": 1.141557216644287, + "step": 591 + }, + { + "epoch": 0.13640552995391705, + "grad_norm": 0.4712969186607289, + "learning_rate": 1.998211648406797e-06, + "loss": 0.9688570499420166, + "step": 592 + }, + { + "epoch": 0.13663594470046084, + "grad_norm": 0.4543844570436241, + "learning_rate": 1.9981888012065068e-06, + "loss": 1.0218561887741089, + "step": 593 + }, + { + "epoch": 0.13686635944700462, + "grad_norm": 0.5219271265911207, + "learning_rate": 1.9981658091204334e-06, + "loss": 0.9531952142715454, + "step": 594 + }, + { + "epoch": 0.13709677419354838, + "grad_norm": 0.5314208269690397, + "learning_rate": 1.9981426721519143e-06, + "loss": 1.1421492099761963, + "step": 595 + }, + { + "epoch": 0.13732718894009216, + "grad_norm": 0.3970137466851754, + "learning_rate": 1.9981193903043074e-06, + "loss": 0.8173041343688965, + "step": 596 + }, + { + "epoch": 0.13755760368663594, + "grad_norm": 0.43200053855682263, + "learning_rate": 1.998095963580993e-06, + "loss": 0.8842465877532959, + "step": 597 + }, + { + "epoch": 0.13778801843317973, + "grad_norm": 0.6492506358781442, + "learning_rate": 1.9980723919853703e-06, + "loss": 0.8547788858413696, + "step": 598 + }, + { + "epoch": 0.1380184331797235, + "grad_norm": 0.5287255050220753, + "learning_rate": 1.998048675520861e-06, + "loss": 1.0085712671279907, + "step": 599 + }, + { + "epoch": 0.1382488479262673, + "grad_norm": 0.5226769291219134, + "learning_rate": 1.9980248141909083e-06, + "loss": 0.9276378750801086, + "step": 600 + }, + { + "epoch": 0.13847926267281105, + "grad_norm": 0.44292446989501455, + "learning_rate": 1.998000807998975e-06, + "loss": 0.9236693382263184, + "step": 601 + }, + { + "epoch": 0.13870967741935483, + "grad_norm": 0.43514287150953085, + "learning_rate": 1.9979766569485454e-06, + "loss": 1.0353924036026, + "step": 602 + }, + { + "epoch": 0.13894009216589862, + "grad_norm": 0.3831256791535214, + "learning_rate": 1.9979523610431246e-06, + "loss": 0.8456567525863647, + "step": 603 + }, + { + "epoch": 0.1391705069124424, + "grad_norm": 0.46736174894260846, + "learning_rate": 1.997927920286241e-06, + "loss": 0.997468888759613, + "step": 604 + }, + { + "epoch": 0.13940092165898618, + "grad_norm": 0.38558572890255066, + "learning_rate": 1.9979033346814397e-06, + "loss": 0.8962260484695435, + "step": 605 + }, + { + "epoch": 0.13963133640552997, + "grad_norm": 0.4829548009529998, + "learning_rate": 1.997878604232291e-06, + "loss": 0.8586266040802002, + "step": 606 + }, + { + "epoch": 0.13986175115207372, + "grad_norm": 0.4776734917637134, + "learning_rate": 1.9978537289423837e-06, + "loss": 0.9639670848846436, + "step": 607 + }, + { + "epoch": 0.1400921658986175, + "grad_norm": 0.4115822234384495, + "learning_rate": 1.9978287088153286e-06, + "loss": 1.005727767944336, + "step": 608 + }, + { + "epoch": 0.1403225806451613, + "grad_norm": 0.44858527541471366, + "learning_rate": 1.9978035438547575e-06, + "loss": 1.148871898651123, + "step": 609 + }, + { + "epoch": 0.14055299539170507, + "grad_norm": 0.4357664217922314, + "learning_rate": 1.9977782340643226e-06, + "loss": 1.0459539890289307, + "step": 610 + }, + { + "epoch": 0.14078341013824885, + "grad_norm": 0.43229915305128663, + "learning_rate": 1.9977527794476985e-06, + "loss": 0.92689448595047, + "step": 611 + }, + { + "epoch": 0.14101382488479264, + "grad_norm": 0.5514025110097415, + "learning_rate": 1.997727180008579e-06, + "loss": 0.9735790491104126, + "step": 612 + }, + { + "epoch": 0.1412442396313364, + "grad_norm": 0.5114055458545007, + "learning_rate": 1.99770143575068e-06, + "loss": 0.8882870674133301, + "step": 613 + }, + { + "epoch": 0.14147465437788018, + "grad_norm": 0.47604166837238787, + "learning_rate": 1.9976755466777386e-06, + "loss": 0.9229795932769775, + "step": 614 + }, + { + "epoch": 0.14170506912442396, + "grad_norm": 0.39391923738635765, + "learning_rate": 1.997649512793512e-06, + "loss": 0.9097769260406494, + "step": 615 + }, + { + "epoch": 0.14193548387096774, + "grad_norm": 0.429877903042447, + "learning_rate": 1.9976233341017798e-06, + "loss": 0.7751711010932922, + "step": 616 + }, + { + "epoch": 0.14216589861751153, + "grad_norm": 0.4585028421290768, + "learning_rate": 1.9975970106063414e-06, + "loss": 0.9071080684661865, + "step": 617 + }, + { + "epoch": 0.1423963133640553, + "grad_norm": 0.372835081071011, + "learning_rate": 1.997570542311017e-06, + "loss": 0.8444115519523621, + "step": 618 + }, + { + "epoch": 0.14262672811059907, + "grad_norm": 0.46125503087530084, + "learning_rate": 1.9975439292196496e-06, + "loss": 0.9159516096115112, + "step": 619 + }, + { + "epoch": 0.14285714285714285, + "grad_norm": 0.45879242474243875, + "learning_rate": 1.997517171336101e-06, + "loss": 0.9697242975234985, + "step": 620 + }, + { + "epoch": 0.14308755760368663, + "grad_norm": 0.4605305033840643, + "learning_rate": 1.9974902686642557e-06, + "loss": 0.9894170761108398, + "step": 621 + }, + { + "epoch": 0.14331797235023042, + "grad_norm": 0.48517122287493847, + "learning_rate": 1.9974632212080184e-06, + "loss": 1.0364127159118652, + "step": 622 + }, + { + "epoch": 0.1435483870967742, + "grad_norm": 0.39169164845291754, + "learning_rate": 1.997436028971315e-06, + "loss": 0.8980219960212708, + "step": 623 + }, + { + "epoch": 0.14377880184331798, + "grad_norm": 0.4857070397144096, + "learning_rate": 1.9974086919580925e-06, + "loss": 1.0293703079223633, + "step": 624 + }, + { + "epoch": 0.14400921658986174, + "grad_norm": 0.46693238253454916, + "learning_rate": 1.9973812101723186e-06, + "loss": 1.006148099899292, + "step": 625 + }, + { + "epoch": 0.14423963133640552, + "grad_norm": 0.5525790188158035, + "learning_rate": 1.9973535836179825e-06, + "loss": 0.9489799737930298, + "step": 626 + }, + { + "epoch": 0.1444700460829493, + "grad_norm": 0.3704152285915921, + "learning_rate": 1.997325812299094e-06, + "loss": 0.7601498961448669, + "step": 627 + }, + { + "epoch": 0.1447004608294931, + "grad_norm": 0.6225002321802279, + "learning_rate": 1.9972978962196843e-06, + "loss": 0.8345643281936646, + "step": 628 + }, + { + "epoch": 0.14493087557603687, + "grad_norm": 0.48694459235316484, + "learning_rate": 1.9972698353838053e-06, + "loss": 0.8705894947052002, + "step": 629 + }, + { + "epoch": 0.14516129032258066, + "grad_norm": 0.42033173985472694, + "learning_rate": 1.9972416297955294e-06, + "loss": 0.9515185356140137, + "step": 630 + }, + { + "epoch": 0.1453917050691244, + "grad_norm": 0.38157222553103914, + "learning_rate": 1.9972132794589514e-06, + "loss": 0.7616517543792725, + "step": 631 + }, + { + "epoch": 0.1456221198156682, + "grad_norm": 0.47593248323635307, + "learning_rate": 1.9971847843781862e-06, + "loss": 0.8870444297790527, + "step": 632 + }, + { + "epoch": 0.14585253456221198, + "grad_norm": 0.45987330163099194, + "learning_rate": 1.9971561445573696e-06, + "loss": 0.8709393739700317, + "step": 633 + }, + { + "epoch": 0.14608294930875576, + "grad_norm": 0.35616612587319196, + "learning_rate": 1.997127360000658e-06, + "loss": 0.865444540977478, + "step": 634 + }, + { + "epoch": 0.14631336405529954, + "grad_norm": 0.4431578416665891, + "learning_rate": 1.997098430712231e-06, + "loss": 0.9560728073120117, + "step": 635 + }, + { + "epoch": 0.14654377880184333, + "grad_norm": 0.4107966614124612, + "learning_rate": 1.9970693566962866e-06, + "loss": 0.7579058408737183, + "step": 636 + }, + { + "epoch": 0.14677419354838708, + "grad_norm": 0.4609569973718347, + "learning_rate": 1.997040137957045e-06, + "loss": 1.0709021091461182, + "step": 637 + }, + { + "epoch": 0.14700460829493087, + "grad_norm": 0.5029329480654331, + "learning_rate": 1.9970107744987474e-06, + "loss": 0.9911563396453857, + "step": 638 + }, + { + "epoch": 0.14723502304147465, + "grad_norm": 0.45338591583748106, + "learning_rate": 1.996981266325655e-06, + "loss": 0.9673472046852112, + "step": 639 + }, + { + "epoch": 0.14746543778801843, + "grad_norm": 0.3918341582647882, + "learning_rate": 1.9969516134420523e-06, + "loss": 0.7728441953659058, + "step": 640 + }, + { + "epoch": 0.14769585253456222, + "grad_norm": 0.532382418423259, + "learning_rate": 1.9969218158522426e-06, + "loss": 1.0198101997375488, + "step": 641 + }, + { + "epoch": 0.147926267281106, + "grad_norm": 0.45259693038053805, + "learning_rate": 1.996891873560551e-06, + "loss": 0.9710760116577148, + "step": 642 + }, + { + "epoch": 0.14815668202764978, + "grad_norm": 0.41281494255735757, + "learning_rate": 1.9968617865713237e-06, + "loss": 0.9956847429275513, + "step": 643 + }, + { + "epoch": 0.14838709677419354, + "grad_norm": 0.40081796016292187, + "learning_rate": 1.996831554888928e-06, + "loss": 1.0974771976470947, + "step": 644 + }, + { + "epoch": 0.14861751152073732, + "grad_norm": 0.5353172634899142, + "learning_rate": 1.9968011785177513e-06, + "loss": 0.914455771446228, + "step": 645 + }, + { + "epoch": 0.1488479262672811, + "grad_norm": 0.5511418094652546, + "learning_rate": 1.9967706574622033e-06, + "loss": 1.1308314800262451, + "step": 646 + }, + { + "epoch": 0.1490783410138249, + "grad_norm": 0.5114786055194052, + "learning_rate": 1.9967399917267142e-06, + "loss": 0.981814444065094, + "step": 647 + }, + { + "epoch": 0.14930875576036867, + "grad_norm": 0.431645238473459, + "learning_rate": 1.9967091813157345e-06, + "loss": 0.874076247215271, + "step": 648 + }, + { + "epoch": 0.14953917050691246, + "grad_norm": 0.39621973386547166, + "learning_rate": 1.9966782262337365e-06, + "loss": 0.8496171832084656, + "step": 649 + }, + { + "epoch": 0.1497695852534562, + "grad_norm": 0.49468581823361646, + "learning_rate": 1.9966471264852136e-06, + "loss": 0.9395674467086792, + "step": 650 + }, + { + "epoch": 0.15, + "grad_norm": 0.4120224768195847, + "learning_rate": 1.99661588207468e-06, + "loss": 0.8363018035888672, + "step": 651 + }, + { + "epoch": 0.15023041474654378, + "grad_norm": 0.4552124844336583, + "learning_rate": 1.9965844930066696e-06, + "loss": 1.0035831928253174, + "step": 652 + }, + { + "epoch": 0.15046082949308756, + "grad_norm": 0.3910663219458386, + "learning_rate": 1.99655295928574e-06, + "loss": 1.0316795110702515, + "step": 653 + }, + { + "epoch": 0.15069124423963134, + "grad_norm": 0.4287067909796643, + "learning_rate": 1.9965212809164676e-06, + "loss": 0.9545150995254517, + "step": 654 + }, + { + "epoch": 0.15092165898617513, + "grad_norm": 0.4577708396372056, + "learning_rate": 1.99648945790345e-06, + "loss": 0.993801474571228, + "step": 655 + }, + { + "epoch": 0.15115207373271888, + "grad_norm": 0.4032410507172632, + "learning_rate": 1.9964574902513075e-06, + "loss": 0.8666588664054871, + "step": 656 + }, + { + "epoch": 0.15138248847926267, + "grad_norm": 0.48179220104835324, + "learning_rate": 1.9964253779646787e-06, + "loss": 0.9507651925086975, + "step": 657 + }, + { + "epoch": 0.15161290322580645, + "grad_norm": 0.4899406622119438, + "learning_rate": 1.996393121048226e-06, + "loss": 0.8700851202011108, + "step": 658 + }, + { + "epoch": 0.15184331797235023, + "grad_norm": 0.40256613113119405, + "learning_rate": 1.9963607195066307e-06, + "loss": 0.9966975450515747, + "step": 659 + }, + { + "epoch": 0.15207373271889402, + "grad_norm": 0.44964674216674483, + "learning_rate": 1.9963281733445957e-06, + "loss": 0.9552028179168701, + "step": 660 + }, + { + "epoch": 0.1523041474654378, + "grad_norm": 0.47921018457871023, + "learning_rate": 1.9962954825668456e-06, + "loss": 1.0182740688323975, + "step": 661 + }, + { + "epoch": 0.15253456221198156, + "grad_norm": 0.5096203598929419, + "learning_rate": 1.996262647178125e-06, + "loss": 1.0001778602600098, + "step": 662 + }, + { + "epoch": 0.15276497695852534, + "grad_norm": 0.44730944505165277, + "learning_rate": 1.9962296671832e-06, + "loss": 0.9902865886688232, + "step": 663 + }, + { + "epoch": 0.15299539170506912, + "grad_norm": 0.44977913840647327, + "learning_rate": 1.9961965425868575e-06, + "loss": 0.9272845983505249, + "step": 664 + }, + { + "epoch": 0.1532258064516129, + "grad_norm": 0.5011405916103636, + "learning_rate": 1.996163273393906e-06, + "loss": 0.9705777168273926, + "step": 665 + }, + { + "epoch": 0.1534562211981567, + "grad_norm": 0.5035568947424544, + "learning_rate": 1.9961298596091736e-06, + "loss": 0.9472209215164185, + "step": 666 + }, + { + "epoch": 0.15368663594470047, + "grad_norm": 0.3982786140702462, + "learning_rate": 1.9960963012375113e-06, + "loss": 0.9734043478965759, + "step": 667 + }, + { + "epoch": 0.15391705069124423, + "grad_norm": 0.516464356110248, + "learning_rate": 1.9960625982837894e-06, + "loss": 0.8765468001365662, + "step": 668 + }, + { + "epoch": 0.154147465437788, + "grad_norm": 0.6158206412964224, + "learning_rate": 1.9960287507529e-06, + "loss": 1.0224063396453857, + "step": 669 + }, + { + "epoch": 0.1543778801843318, + "grad_norm": 0.4417623345727513, + "learning_rate": 1.995994758649756e-06, + "loss": 0.983299970626831, + "step": 670 + }, + { + "epoch": 0.15460829493087558, + "grad_norm": 0.4680475004359934, + "learning_rate": 1.9959606219792914e-06, + "loss": 1.0524147748947144, + "step": 671 + }, + { + "epoch": 0.15483870967741936, + "grad_norm": 0.45758073401288785, + "learning_rate": 1.9959263407464606e-06, + "loss": 1.1012977361679077, + "step": 672 + }, + { + "epoch": 0.15506912442396314, + "grad_norm": 0.6298296463565526, + "learning_rate": 1.99589191495624e-06, + "loss": 0.8494049310684204, + "step": 673 + }, + { + "epoch": 0.1552995391705069, + "grad_norm": 0.4795860182347848, + "learning_rate": 1.9958573446136263e-06, + "loss": 0.8677045106887817, + "step": 674 + }, + { + "epoch": 0.15552995391705068, + "grad_norm": 0.5514260857685808, + "learning_rate": 1.995822629723638e-06, + "loss": 1.1034941673278809, + "step": 675 + }, + { + "epoch": 0.15576036866359447, + "grad_norm": 0.3959041890885462, + "learning_rate": 1.9957877702913128e-06, + "loss": 0.8428820371627808, + "step": 676 + }, + { + "epoch": 0.15599078341013825, + "grad_norm": 0.5435721054179383, + "learning_rate": 1.9957527663217107e-06, + "loss": 0.8584408760070801, + "step": 677 + }, + { + "epoch": 0.15622119815668203, + "grad_norm": 0.47044010204436115, + "learning_rate": 1.995717617819913e-06, + "loss": 0.8089514970779419, + "step": 678 + }, + { + "epoch": 0.15645161290322582, + "grad_norm": 0.48360065475233177, + "learning_rate": 1.9956823247910217e-06, + "loss": 0.8459775447845459, + "step": 679 + }, + { + "epoch": 0.15668202764976957, + "grad_norm": 0.441023419118176, + "learning_rate": 1.9956468872401583e-06, + "loss": 1.0583066940307617, + "step": 680 + }, + { + "epoch": 0.15691244239631336, + "grad_norm": 0.4427871322496545, + "learning_rate": 1.995611305172468e-06, + "loss": 0.9396135807037354, + "step": 681 + }, + { + "epoch": 0.15714285714285714, + "grad_norm": 0.4888169944824013, + "learning_rate": 1.995575578593114e-06, + "loss": 1.0143593549728394, + "step": 682 + }, + { + "epoch": 0.15737327188940092, + "grad_norm": 0.44801312951365924, + "learning_rate": 1.9955397075072833e-06, + "loss": 0.8822500109672546, + "step": 683 + }, + { + "epoch": 0.1576036866359447, + "grad_norm": 0.4936771776275296, + "learning_rate": 1.995503691920182e-06, + "loss": 0.8841962218284607, + "step": 684 + }, + { + "epoch": 0.1578341013824885, + "grad_norm": 0.4240877666200064, + "learning_rate": 1.9954675318370374e-06, + "loss": 0.8537080883979797, + "step": 685 + }, + { + "epoch": 0.15806451612903225, + "grad_norm": 0.5056113314098377, + "learning_rate": 1.9954312272630985e-06, + "loss": 1.0292394161224365, + "step": 686 + }, + { + "epoch": 0.15829493087557603, + "grad_norm": 0.5106923922410934, + "learning_rate": 1.995394778203635e-06, + "loss": 0.8741706013679504, + "step": 687 + }, + { + "epoch": 0.1585253456221198, + "grad_norm": 0.47911475912836377, + "learning_rate": 1.995358184663937e-06, + "loss": 0.9429572820663452, + "step": 688 + }, + { + "epoch": 0.1587557603686636, + "grad_norm": 0.5562334593954328, + "learning_rate": 1.995321446649316e-06, + "loss": 0.9522494077682495, + "step": 689 + }, + { + "epoch": 0.15898617511520738, + "grad_norm": 0.5394048021515351, + "learning_rate": 1.9952845641651046e-06, + "loss": 0.9743782877922058, + "step": 690 + }, + { + "epoch": 0.15921658986175116, + "grad_norm": 0.4663620909245047, + "learning_rate": 1.995247537216657e-06, + "loss": 0.926364541053772, + "step": 691 + }, + { + "epoch": 0.15944700460829492, + "grad_norm": 0.4595450639525932, + "learning_rate": 1.995210365809346e-06, + "loss": 0.8355565071105957, + "step": 692 + }, + { + "epoch": 0.1596774193548387, + "grad_norm": 0.44548737988500176, + "learning_rate": 1.9951730499485684e-06, + "loss": 0.9200692772865295, + "step": 693 + }, + { + "epoch": 0.15990783410138248, + "grad_norm": 0.36513232613054547, + "learning_rate": 1.99513558963974e-06, + "loss": 0.7571361064910889, + "step": 694 + }, + { + "epoch": 0.16013824884792627, + "grad_norm": 0.48187866859107054, + "learning_rate": 1.995097984888298e-06, + "loss": 0.935307502746582, + "step": 695 + }, + { + "epoch": 0.16036866359447005, + "grad_norm": 0.5833897193983939, + "learning_rate": 1.995060235699701e-06, + "loss": 1.1118557453155518, + "step": 696 + }, + { + "epoch": 0.16059907834101383, + "grad_norm": 0.3866866326578979, + "learning_rate": 1.995022342079428e-06, + "loss": 0.8024749755859375, + "step": 697 + }, + { + "epoch": 0.1608294930875576, + "grad_norm": 0.44217187311148026, + "learning_rate": 1.994984304032979e-06, + "loss": 0.9018943309783936, + "step": 698 + }, + { + "epoch": 0.16105990783410137, + "grad_norm": 0.4729402911259197, + "learning_rate": 1.9949461215658757e-06, + "loss": 0.8571128249168396, + "step": 699 + }, + { + "epoch": 0.16129032258064516, + "grad_norm": 0.4822593475964477, + "learning_rate": 1.99490779468366e-06, + "loss": 0.9707971215248108, + "step": 700 + }, + { + "epoch": 0.16152073732718894, + "grad_norm": 0.4341551988253619, + "learning_rate": 1.994869323391895e-06, + "loss": 0.8157618045806885, + "step": 701 + }, + { + "epoch": 0.16175115207373272, + "grad_norm": 0.4620050649733586, + "learning_rate": 1.994830707696165e-06, + "loss": 0.9009906053543091, + "step": 702 + }, + { + "epoch": 0.1619815668202765, + "grad_norm": 0.5270647594020066, + "learning_rate": 1.9947919476020745e-06, + "loss": 1.0093860626220703, + "step": 703 + }, + { + "epoch": 0.1622119815668203, + "grad_norm": 0.4233068308539462, + "learning_rate": 1.9947530431152494e-06, + "loss": 1.018160343170166, + "step": 704 + }, + { + "epoch": 0.16244239631336405, + "grad_norm": 0.5753809013533212, + "learning_rate": 1.9947139942413378e-06, + "loss": 0.9755370616912842, + "step": 705 + }, + { + "epoch": 0.16267281105990783, + "grad_norm": 0.490686071812002, + "learning_rate": 1.994674800986006e-06, + "loss": 0.9406822919845581, + "step": 706 + }, + { + "epoch": 0.1629032258064516, + "grad_norm": 0.4856505350445516, + "learning_rate": 1.994635463354944e-06, + "loss": 0.9128296971321106, + "step": 707 + }, + { + "epoch": 0.1631336405529954, + "grad_norm": 0.42889971607025285, + "learning_rate": 1.994595981353861e-06, + "loss": 0.929735541343689, + "step": 708 + }, + { + "epoch": 0.16336405529953918, + "grad_norm": 0.5176054911036664, + "learning_rate": 1.994556354988488e-06, + "loss": 0.9021023511886597, + "step": 709 + }, + { + "epoch": 0.16359447004608296, + "grad_norm": 0.46567553841056064, + "learning_rate": 1.994516584264577e-06, + "loss": 0.9187623262405396, + "step": 710 + }, + { + "epoch": 0.16382488479262672, + "grad_norm": 0.4564071002670219, + "learning_rate": 1.9944766691879e-06, + "loss": 0.8283985257148743, + "step": 711 + }, + { + "epoch": 0.1640552995391705, + "grad_norm": 0.5448909609220928, + "learning_rate": 1.994436609764251e-06, + "loss": 1.0592901706695557, + "step": 712 + }, + { + "epoch": 0.16428571428571428, + "grad_norm": 0.5512946720093808, + "learning_rate": 1.9943964059994446e-06, + "loss": 0.98726487159729, + "step": 713 + }, + { + "epoch": 0.16451612903225807, + "grad_norm": 0.5060774432164115, + "learning_rate": 1.9943560578993165e-06, + "loss": 0.8761749267578125, + "step": 714 + }, + { + "epoch": 0.16474654377880185, + "grad_norm": 0.4759569802502017, + "learning_rate": 1.9943155654697227e-06, + "loss": 0.878170371055603, + "step": 715 + }, + { + "epoch": 0.16497695852534563, + "grad_norm": 0.5212205127966931, + "learning_rate": 1.9942749287165414e-06, + "loss": 0.9444767236709595, + "step": 716 + }, + { + "epoch": 0.1652073732718894, + "grad_norm": 0.436107073640643, + "learning_rate": 1.9942341476456697e-06, + "loss": 0.8270057439804077, + "step": 717 + }, + { + "epoch": 0.16543778801843317, + "grad_norm": 0.36828111446023454, + "learning_rate": 1.9941932222630284e-06, + "loss": 0.825955867767334, + "step": 718 + }, + { + "epoch": 0.16566820276497696, + "grad_norm": 0.4748059596727922, + "learning_rate": 1.9941521525745564e-06, + "loss": 0.9384286403656006, + "step": 719 + }, + { + "epoch": 0.16589861751152074, + "grad_norm": 0.5968010950850139, + "learning_rate": 1.994110938586216e-06, + "loss": 0.9627010226249695, + "step": 720 + }, + { + "epoch": 0.16612903225806452, + "grad_norm": 0.40665371786149496, + "learning_rate": 1.9940695803039886e-06, + "loss": 0.8436836004257202, + "step": 721 + }, + { + "epoch": 0.1663594470046083, + "grad_norm": 0.48219849106464674, + "learning_rate": 1.994028077733878e-06, + "loss": 1.0689928531646729, + "step": 722 + }, + { + "epoch": 0.16658986175115206, + "grad_norm": 0.4600242469407339, + "learning_rate": 1.993986430881907e-06, + "loss": 0.911309003829956, + "step": 723 + }, + { + "epoch": 0.16682027649769585, + "grad_norm": 0.5404195969690949, + "learning_rate": 1.993944639754122e-06, + "loss": 0.9897152185440063, + "step": 724 + }, + { + "epoch": 0.16705069124423963, + "grad_norm": 0.48212503869308937, + "learning_rate": 1.9939027043565883e-06, + "loss": 1.0230367183685303, + "step": 725 + }, + { + "epoch": 0.1672811059907834, + "grad_norm": 0.4398728967426152, + "learning_rate": 1.993860624695393e-06, + "loss": 0.8067069053649902, + "step": 726 + }, + { + "epoch": 0.1675115207373272, + "grad_norm": 0.5835576425821721, + "learning_rate": 1.9938184007766434e-06, + "loss": 0.9784343242645264, + "step": 727 + }, + { + "epoch": 0.16774193548387098, + "grad_norm": 0.5139557651921927, + "learning_rate": 1.9937760326064686e-06, + "loss": 0.8617877960205078, + "step": 728 + }, + { + "epoch": 0.16797235023041474, + "grad_norm": 0.5276605551773887, + "learning_rate": 1.9937335201910183e-06, + "loss": 1.0390141010284424, + "step": 729 + }, + { + "epoch": 0.16820276497695852, + "grad_norm": 0.5007165894606777, + "learning_rate": 1.9936908635364633e-06, + "loss": 1.0478965044021606, + "step": 730 + }, + { + "epoch": 0.1684331797235023, + "grad_norm": 0.46789644745982956, + "learning_rate": 1.9936480626489944e-06, + "loss": 0.8396252393722534, + "step": 731 + }, + { + "epoch": 0.16866359447004609, + "grad_norm": 0.4366381763655398, + "learning_rate": 1.9936051175348256e-06, + "loss": 0.8690099120140076, + "step": 732 + }, + { + "epoch": 0.16889400921658987, + "grad_norm": 0.44373038767323764, + "learning_rate": 1.993562028200189e-06, + "loss": 0.944722056388855, + "step": 733 + }, + { + "epoch": 0.16912442396313365, + "grad_norm": 0.4480067961897654, + "learning_rate": 1.9935187946513385e-06, + "loss": 0.7134733200073242, + "step": 734 + }, + { + "epoch": 0.1693548387096774, + "grad_norm": 0.44081731431481436, + "learning_rate": 1.993475416894551e-06, + "loss": 0.8102486729621887, + "step": 735 + }, + { + "epoch": 0.1695852534562212, + "grad_norm": 0.5621249368486638, + "learning_rate": 1.9934318949361215e-06, + "loss": 0.924787163734436, + "step": 736 + }, + { + "epoch": 0.16981566820276497, + "grad_norm": 0.4621168425652111, + "learning_rate": 1.993388228782368e-06, + "loss": 0.9595087766647339, + "step": 737 + }, + { + "epoch": 0.17004608294930876, + "grad_norm": 0.4164356485660062, + "learning_rate": 1.993344418439628e-06, + "loss": 0.9949792623519897, + "step": 738 + }, + { + "epoch": 0.17027649769585254, + "grad_norm": 0.6359964400004778, + "learning_rate": 1.9933004639142604e-06, + "loss": 1.0905860662460327, + "step": 739 + }, + { + "epoch": 0.17050691244239632, + "grad_norm": 0.39800173884382345, + "learning_rate": 1.9932563652126455e-06, + "loss": 0.9638324975967407, + "step": 740 + }, + { + "epoch": 0.17073732718894008, + "grad_norm": 0.4909114039853375, + "learning_rate": 1.9932121223411844e-06, + "loss": 0.9434946179389954, + "step": 741 + }, + { + "epoch": 0.17096774193548386, + "grad_norm": 0.49072837958490606, + "learning_rate": 1.9931677353062983e-06, + "loss": 0.9050095081329346, + "step": 742 + }, + { + "epoch": 0.17119815668202765, + "grad_norm": 0.509303736181324, + "learning_rate": 1.9931232041144303e-06, + "loss": 1.0698316097259521, + "step": 743 + }, + { + "epoch": 0.17142857142857143, + "grad_norm": 0.393391743712663, + "learning_rate": 1.993078528772044e-06, + "loss": 0.7938296794891357, + "step": 744 + }, + { + "epoch": 0.1716589861751152, + "grad_norm": 0.46597408496400117, + "learning_rate": 1.993033709285624e-06, + "loss": 0.8485043048858643, + "step": 745 + }, + { + "epoch": 0.171889400921659, + "grad_norm": 0.4736797887475262, + "learning_rate": 1.9929887456616754e-06, + "loss": 0.8605694770812988, + "step": 746 + }, + { + "epoch": 0.17211981566820275, + "grad_norm": 0.40523028160004354, + "learning_rate": 1.9929436379067253e-06, + "loss": 0.7101563215255737, + "step": 747 + }, + { + "epoch": 0.17235023041474654, + "grad_norm": 0.4519555914654837, + "learning_rate": 1.9928983860273205e-06, + "loss": 1.093912959098816, + "step": 748 + }, + { + "epoch": 0.17258064516129032, + "grad_norm": 0.4930830686705908, + "learning_rate": 1.9928529900300294e-06, + "loss": 0.8099753856658936, + "step": 749 + }, + { + "epoch": 0.1728110599078341, + "grad_norm": 0.3752662958180716, + "learning_rate": 1.992807449921441e-06, + "loss": 0.7816359400749207, + "step": 750 + }, + { + "epoch": 0.17304147465437789, + "grad_norm": 0.5180432792159949, + "learning_rate": 1.9927617657081656e-06, + "loss": 0.8887455463409424, + "step": 751 + }, + { + "epoch": 0.17327188940092167, + "grad_norm": 0.6260862232080928, + "learning_rate": 1.992715937396834e-06, + "loss": 1.0926017761230469, + "step": 752 + }, + { + "epoch": 0.17350230414746542, + "grad_norm": 0.5546410088380269, + "learning_rate": 1.9926699649940985e-06, + "loss": 0.7657707929611206, + "step": 753 + }, + { + "epoch": 0.1737327188940092, + "grad_norm": 0.5766197712214459, + "learning_rate": 1.992623848506632e-06, + "loss": 0.9350340366363525, + "step": 754 + }, + { + "epoch": 0.173963133640553, + "grad_norm": 0.5011774306610247, + "learning_rate": 1.9925775879411276e-06, + "loss": 0.883575439453125, + "step": 755 + }, + { + "epoch": 0.17419354838709677, + "grad_norm": 0.3678933943457833, + "learning_rate": 1.9925311833043e-06, + "loss": 0.814304769039154, + "step": 756 + }, + { + "epoch": 0.17442396313364056, + "grad_norm": 0.5857143887476359, + "learning_rate": 1.992484634602886e-06, + "loss": 0.9263690710067749, + "step": 757 + }, + { + "epoch": 0.17465437788018434, + "grad_norm": 0.49862680540203774, + "learning_rate": 1.9924379418436402e-06, + "loss": 1.0321627855300903, + "step": 758 + }, + { + "epoch": 0.1748847926267281, + "grad_norm": 0.5062063825952041, + "learning_rate": 1.9923911050333413e-06, + "loss": 0.969459056854248, + "step": 759 + }, + { + "epoch": 0.17511520737327188, + "grad_norm": 0.4554436665394103, + "learning_rate": 1.9923441241787874e-06, + "loss": 0.9926396012306213, + "step": 760 + }, + { + "epoch": 0.17534562211981566, + "grad_norm": 0.43315077691547155, + "learning_rate": 1.9922969992867975e-06, + "loss": 0.776180624961853, + "step": 761 + }, + { + "epoch": 0.17557603686635945, + "grad_norm": 0.5350913373105377, + "learning_rate": 1.992249730364212e-06, + "loss": 0.9413800239562988, + "step": 762 + }, + { + "epoch": 0.17580645161290323, + "grad_norm": 0.48045178893419493, + "learning_rate": 1.9922023174178913e-06, + "loss": 0.8365576267242432, + "step": 763 + }, + { + "epoch": 0.17603686635944701, + "grad_norm": 0.47752363664412967, + "learning_rate": 1.992154760454718e-06, + "loss": 1.023102879524231, + "step": 764 + }, + { + "epoch": 0.17626728110599077, + "grad_norm": 0.6035875388891613, + "learning_rate": 1.9921070594815944e-06, + "loss": 1.079930067062378, + "step": 765 + }, + { + "epoch": 0.17649769585253455, + "grad_norm": 0.44885698296531085, + "learning_rate": 1.9920592145054445e-06, + "loss": 0.8974392414093018, + "step": 766 + }, + { + "epoch": 0.17672811059907834, + "grad_norm": 0.5363940338283703, + "learning_rate": 1.9920112255332133e-06, + "loss": 0.9509298205375671, + "step": 767 + }, + { + "epoch": 0.17695852534562212, + "grad_norm": 0.3960858930926947, + "learning_rate": 1.991963092571866e-06, + "loss": 0.938835620880127, + "step": 768 + }, + { + "epoch": 0.1771889400921659, + "grad_norm": 0.3409332869225393, + "learning_rate": 1.9919148156283888e-06, + "loss": 0.7918044328689575, + "step": 769 + }, + { + "epoch": 0.1774193548387097, + "grad_norm": 0.46985590284048473, + "learning_rate": 1.9918663947097893e-06, + "loss": 0.8235958814620972, + "step": 770 + }, + { + "epoch": 0.17764976958525347, + "grad_norm": 0.4734643903674827, + "learning_rate": 1.9918178298230953e-06, + "loss": 0.9079158902168274, + "step": 771 + }, + { + "epoch": 0.17788018433179723, + "grad_norm": 0.5764167010482935, + "learning_rate": 1.9917691209753563e-06, + "loss": 0.8548607230186462, + "step": 772 + }, + { + "epoch": 0.178110599078341, + "grad_norm": 0.47446352682333093, + "learning_rate": 1.9917202681736428e-06, + "loss": 0.8327757120132446, + "step": 773 + }, + { + "epoch": 0.1783410138248848, + "grad_norm": 0.5415533792438672, + "learning_rate": 1.991671271425045e-06, + "loss": 1.0511503219604492, + "step": 774 + }, + { + "epoch": 0.17857142857142858, + "grad_norm": 0.4310425860855909, + "learning_rate": 1.991622130736675e-06, + "loss": 0.9168857932090759, + "step": 775 + }, + { + "epoch": 0.17880184331797236, + "grad_norm": 0.44391822434593214, + "learning_rate": 1.9915728461156654e-06, + "loss": 0.8740782737731934, + "step": 776 + }, + { + "epoch": 0.17903225806451614, + "grad_norm": 0.5841506637592749, + "learning_rate": 1.99152341756917e-06, + "loss": 0.9706588983535767, + "step": 777 + }, + { + "epoch": 0.1792626728110599, + "grad_norm": 0.5492923015057676, + "learning_rate": 1.9914738451043627e-06, + "loss": 1.144281268119812, + "step": 778 + }, + { + "epoch": 0.17949308755760368, + "grad_norm": 0.4170516305027483, + "learning_rate": 1.9914241287284403e-06, + "loss": 0.973777174949646, + "step": 779 + }, + { + "epoch": 0.17972350230414746, + "grad_norm": 0.4502683719091688, + "learning_rate": 1.991374268448617e-06, + "loss": 0.9002145528793335, + "step": 780 + }, + { + "epoch": 0.17995391705069125, + "grad_norm": 0.5526460425242373, + "learning_rate": 1.9913242642721316e-06, + "loss": 0.9234670400619507, + "step": 781 + }, + { + "epoch": 0.18018433179723503, + "grad_norm": 0.4959743401985291, + "learning_rate": 1.9912741162062415e-06, + "loss": 0.9552402496337891, + "step": 782 + }, + { + "epoch": 0.18041474654377881, + "grad_norm": 0.5510111451188886, + "learning_rate": 1.9912238242582257e-06, + "loss": 1.0485708713531494, + "step": 783 + }, + { + "epoch": 0.18064516129032257, + "grad_norm": 0.5447745918227888, + "learning_rate": 1.991173388435384e-06, + "loss": 0.9852809906005859, + "step": 784 + }, + { + "epoch": 0.18087557603686635, + "grad_norm": 0.4726322734582533, + "learning_rate": 1.991122808745037e-06, + "loss": 0.7824808359146118, + "step": 785 + }, + { + "epoch": 0.18110599078341014, + "grad_norm": 0.6534462420793078, + "learning_rate": 1.9910720851945268e-06, + "loss": 1.0380492210388184, + "step": 786 + }, + { + "epoch": 0.18133640552995392, + "grad_norm": 0.48532232647089923, + "learning_rate": 1.991021217791215e-06, + "loss": 0.9808282256126404, + "step": 787 + }, + { + "epoch": 0.1815668202764977, + "grad_norm": 0.4791928008108061, + "learning_rate": 1.9909702065424854e-06, + "loss": 0.8636116981506348, + "step": 788 + }, + { + "epoch": 0.1817972350230415, + "grad_norm": 0.45783287516468024, + "learning_rate": 1.9909190514557427e-06, + "loss": 0.8179407715797424, + "step": 789 + }, + { + "epoch": 0.18202764976958524, + "grad_norm": 0.4760021295113364, + "learning_rate": 1.990867752538411e-06, + "loss": 0.9424594044685364, + "step": 790 + }, + { + "epoch": 0.18225806451612903, + "grad_norm": 0.5558557995369799, + "learning_rate": 1.9908163097979366e-06, + "loss": 0.9429298043251038, + "step": 791 + }, + { + "epoch": 0.1824884792626728, + "grad_norm": 0.5944732273868478, + "learning_rate": 1.990764723241787e-06, + "loss": 0.9671716690063477, + "step": 792 + }, + { + "epoch": 0.1827188940092166, + "grad_norm": 0.6041148299127167, + "learning_rate": 1.9907129928774494e-06, + "loss": 1.0063345432281494, + "step": 793 + }, + { + "epoch": 0.18294930875576038, + "grad_norm": 0.4817475331580677, + "learning_rate": 1.990661118712432e-06, + "loss": 0.9932061433792114, + "step": 794 + }, + { + "epoch": 0.18317972350230416, + "grad_norm": 0.4648544131499562, + "learning_rate": 1.990609100754265e-06, + "loss": 0.859153151512146, + "step": 795 + }, + { + "epoch": 0.18341013824884791, + "grad_norm": 0.4738825500961963, + "learning_rate": 1.9905569390104984e-06, + "loss": 0.9328111410140991, + "step": 796 + }, + { + "epoch": 0.1836405529953917, + "grad_norm": 0.542624486663781, + "learning_rate": 1.9905046334887033e-06, + "loss": 0.9970628619194031, + "step": 797 + }, + { + "epoch": 0.18387096774193548, + "grad_norm": 0.41971271798029636, + "learning_rate": 1.990452184196472e-06, + "loss": 1.0347282886505127, + "step": 798 + }, + { + "epoch": 0.18410138248847926, + "grad_norm": 0.4270967132251902, + "learning_rate": 1.990399591141417e-06, + "loss": 0.9167106747627258, + "step": 799 + }, + { + "epoch": 0.18433179723502305, + "grad_norm": 0.5046236893106074, + "learning_rate": 1.990346854331173e-06, + "loss": 0.8895610570907593, + "step": 800 + }, + { + "epoch": 0.18456221198156683, + "grad_norm": 0.5237845429219861, + "learning_rate": 1.990293973773394e-06, + "loss": 0.8525041341781616, + "step": 801 + }, + { + "epoch": 0.1847926267281106, + "grad_norm": 0.4894836264572075, + "learning_rate": 1.9902409494757553e-06, + "loss": 0.8184069395065308, + "step": 802 + }, + { + "epoch": 0.18502304147465437, + "grad_norm": 0.430895578738413, + "learning_rate": 1.9901877814459544e-06, + "loss": 0.8342509269714355, + "step": 803 + }, + { + "epoch": 0.18525345622119815, + "grad_norm": 0.49779999067704434, + "learning_rate": 1.9901344696917072e-06, + "loss": 0.9254395365715027, + "step": 804 + }, + { + "epoch": 0.18548387096774194, + "grad_norm": 0.5124892914660328, + "learning_rate": 1.990081014220753e-06, + "loss": 0.9537396430969238, + "step": 805 + }, + { + "epoch": 0.18571428571428572, + "grad_norm": 0.47100696643896606, + "learning_rate": 1.99002741504085e-06, + "loss": 0.871498167514801, + "step": 806 + }, + { + "epoch": 0.1859447004608295, + "grad_norm": 0.43363760401100476, + "learning_rate": 1.9899736721597786e-06, + "loss": 0.879954993724823, + "step": 807 + }, + { + "epoch": 0.18617511520737326, + "grad_norm": 0.5651525829110051, + "learning_rate": 1.9899197855853386e-06, + "loss": 0.9238240718841553, + "step": 808 + }, + { + "epoch": 0.18640552995391704, + "grad_norm": 0.43185548411741037, + "learning_rate": 1.9898657553253527e-06, + "loss": 0.7939119935035706, + "step": 809 + }, + { + "epoch": 0.18663594470046083, + "grad_norm": 0.42423118388289394, + "learning_rate": 1.989811581387663e-06, + "loss": 0.8536086082458496, + "step": 810 + }, + { + "epoch": 0.1868663594470046, + "grad_norm": 0.7488569193689159, + "learning_rate": 1.9897572637801322e-06, + "loss": 0.8272225856781006, + "step": 811 + }, + { + "epoch": 0.1870967741935484, + "grad_norm": 0.5639808995976617, + "learning_rate": 1.989702802510645e-06, + "loss": 0.9187904596328735, + "step": 812 + }, + { + "epoch": 0.18732718894009218, + "grad_norm": 0.5096509814307604, + "learning_rate": 1.989648197587106e-06, + "loss": 0.905516505241394, + "step": 813 + }, + { + "epoch": 0.18755760368663593, + "grad_norm": 0.46349746061643887, + "learning_rate": 1.9895934490174415e-06, + "loss": 0.7548567056655884, + "step": 814 + }, + { + "epoch": 0.18778801843317972, + "grad_norm": 0.5916446556749395, + "learning_rate": 1.9895385568095978e-06, + "loss": 0.8242576122283936, + "step": 815 + }, + { + "epoch": 0.1880184331797235, + "grad_norm": 0.47871736963615374, + "learning_rate": 1.9894835209715427e-06, + "loss": 0.9861007928848267, + "step": 816 + }, + { + "epoch": 0.18824884792626728, + "grad_norm": 0.5325996448618295, + "learning_rate": 1.989428341511264e-06, + "loss": 0.9705426096916199, + "step": 817 + }, + { + "epoch": 0.18847926267281107, + "grad_norm": 0.5222036147665577, + "learning_rate": 1.9893730184367722e-06, + "loss": 0.9773565530776978, + "step": 818 + }, + { + "epoch": 0.18870967741935485, + "grad_norm": 0.42837248272258044, + "learning_rate": 1.989317551756096e-06, + "loss": 0.7929856777191162, + "step": 819 + }, + { + "epoch": 0.1889400921658986, + "grad_norm": 0.48925051722314383, + "learning_rate": 1.9892619414772866e-06, + "loss": 0.9749126434326172, + "step": 820 + }, + { + "epoch": 0.1891705069124424, + "grad_norm": 0.49968815355517815, + "learning_rate": 1.9892061876084166e-06, + "loss": 0.9945374727249146, + "step": 821 + }, + { + "epoch": 0.18940092165898617, + "grad_norm": 0.3942389156154952, + "learning_rate": 1.9891502901575776e-06, + "loss": 0.8016892075538635, + "step": 822 + }, + { + "epoch": 0.18963133640552995, + "grad_norm": 0.5604199160430772, + "learning_rate": 1.9890942491328837e-06, + "loss": 0.9389557838439941, + "step": 823 + }, + { + "epoch": 0.18986175115207374, + "grad_norm": 0.38179956879765936, + "learning_rate": 1.9890380645424686e-06, + "loss": 0.724082887172699, + "step": 824 + }, + { + "epoch": 0.19009216589861752, + "grad_norm": 0.5409880819899738, + "learning_rate": 1.988981736394488e-06, + "loss": 0.8877915143966675, + "step": 825 + }, + { + "epoch": 0.19032258064516128, + "grad_norm": 0.6992705135248997, + "learning_rate": 1.9889252646971177e-06, + "loss": 1.207446813583374, + "step": 826 + }, + { + "epoch": 0.19055299539170506, + "grad_norm": 0.5040994233955279, + "learning_rate": 1.9888686494585542e-06, + "loss": 0.9155057668685913, + "step": 827 + }, + { + "epoch": 0.19078341013824884, + "grad_norm": 0.5532998867192596, + "learning_rate": 1.9888118906870154e-06, + "loss": 1.005772352218628, + "step": 828 + }, + { + "epoch": 0.19101382488479263, + "grad_norm": 0.42790166152469256, + "learning_rate": 1.9887549883907394e-06, + "loss": 0.9060605764389038, + "step": 829 + }, + { + "epoch": 0.1912442396313364, + "grad_norm": 0.5177028577691919, + "learning_rate": 1.988697942577986e-06, + "loss": 0.7652161717414856, + "step": 830 + }, + { + "epoch": 0.1914746543778802, + "grad_norm": 0.5981838434161031, + "learning_rate": 1.9886407532570354e-06, + "loss": 1.0191380977630615, + "step": 831 + }, + { + "epoch": 0.19170506912442398, + "grad_norm": 0.4987711114148914, + "learning_rate": 1.9885834204361876e-06, + "loss": 0.9497933387756348, + "step": 832 + }, + { + "epoch": 0.19193548387096773, + "grad_norm": 0.462035144334916, + "learning_rate": 1.9885259441237657e-06, + "loss": 0.7728058099746704, + "step": 833 + }, + { + "epoch": 0.19216589861751152, + "grad_norm": 0.517810203206895, + "learning_rate": 1.9884683243281113e-06, + "loss": 0.8961999416351318, + "step": 834 + }, + { + "epoch": 0.1923963133640553, + "grad_norm": 0.49386963761649333, + "learning_rate": 1.9884105610575885e-06, + "loss": 0.9218904972076416, + "step": 835 + }, + { + "epoch": 0.19262672811059908, + "grad_norm": 0.49785428541631027, + "learning_rate": 1.9883526543205807e-06, + "loss": 0.8411329984664917, + "step": 836 + }, + { + "epoch": 0.19285714285714287, + "grad_norm": 0.42947794662366, + "learning_rate": 1.988294604125494e-06, + "loss": 0.9536285400390625, + "step": 837 + }, + { + "epoch": 0.19308755760368665, + "grad_norm": 0.589338261376726, + "learning_rate": 1.9882364104807535e-06, + "loss": 0.9404321908950806, + "step": 838 + }, + { + "epoch": 0.1933179723502304, + "grad_norm": 0.6889982860652113, + "learning_rate": 1.9881780733948066e-06, + "loss": 1.2520880699157715, + "step": 839 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 0.5071547317768794, + "learning_rate": 1.9881195928761205e-06, + "loss": 0.8961449861526489, + "step": 840 + }, + { + "epoch": 0.19377880184331797, + "grad_norm": 0.5612915327251169, + "learning_rate": 1.9880609689331833e-06, + "loss": 0.8844394683837891, + "step": 841 + }, + { + "epoch": 0.19400921658986175, + "grad_norm": 0.6383643268501873, + "learning_rate": 1.9880022015745044e-06, + "loss": 1.1305835247039795, + "step": 842 + }, + { + "epoch": 0.19423963133640554, + "grad_norm": 0.5396685716999928, + "learning_rate": 1.9879432908086143e-06, + "loss": 0.9980956315994263, + "step": 843 + }, + { + "epoch": 0.19447004608294932, + "grad_norm": 0.46511386172638836, + "learning_rate": 1.987884236644063e-06, + "loss": 0.7613730430603027, + "step": 844 + }, + { + "epoch": 0.19470046082949308, + "grad_norm": 0.6010725617242704, + "learning_rate": 1.987825039089423e-06, + "loss": 0.9742579460144043, + "step": 845 + }, + { + "epoch": 0.19493087557603686, + "grad_norm": 0.4022001131058661, + "learning_rate": 1.9877656981532864e-06, + "loss": 0.7118766903877258, + "step": 846 + }, + { + "epoch": 0.19516129032258064, + "grad_norm": 0.48902949112989696, + "learning_rate": 1.9877062138442657e-06, + "loss": 0.8657095432281494, + "step": 847 + }, + { + "epoch": 0.19539170506912443, + "grad_norm": 0.42720754806325495, + "learning_rate": 1.987646586170996e-06, + "loss": 0.8543902039527893, + "step": 848 + }, + { + "epoch": 0.1956221198156682, + "grad_norm": 0.4842820004763047, + "learning_rate": 1.9875868151421317e-06, + "loss": 0.8896970748901367, + "step": 849 + }, + { + "epoch": 0.195852534562212, + "grad_norm": 0.5225855938017534, + "learning_rate": 1.9875269007663486e-06, + "loss": 0.8662775754928589, + "step": 850 + }, + { + "epoch": 0.19608294930875575, + "grad_norm": 0.48460338230512107, + "learning_rate": 1.9874668430523434e-06, + "loss": 0.8241516351699829, + "step": 851 + }, + { + "epoch": 0.19631336405529953, + "grad_norm": 0.5278134062893883, + "learning_rate": 1.987406642008833e-06, + "loss": 0.973886251449585, + "step": 852 + }, + { + "epoch": 0.19654377880184332, + "grad_norm": 0.48464213201098744, + "learning_rate": 1.9873462976445554e-06, + "loss": 0.8133533000946045, + "step": 853 + }, + { + "epoch": 0.1967741935483871, + "grad_norm": 0.6657370368562822, + "learning_rate": 1.9872858099682697e-06, + "loss": 1.120869755744934, + "step": 854 + }, + { + "epoch": 0.19700460829493088, + "grad_norm": 0.47886128108046017, + "learning_rate": 1.9872251789887562e-06, + "loss": 0.9376444816589355, + "step": 855 + }, + { + "epoch": 0.19723502304147467, + "grad_norm": 0.4627008078705538, + "learning_rate": 1.9871644047148148e-06, + "loss": 0.8763699531555176, + "step": 856 + }, + { + "epoch": 0.19746543778801842, + "grad_norm": 0.5436736732062664, + "learning_rate": 1.9871034871552667e-06, + "loss": 0.7993260622024536, + "step": 857 + }, + { + "epoch": 0.1976958525345622, + "grad_norm": 0.5225344117964711, + "learning_rate": 1.9870424263189542e-06, + "loss": 1.0312654972076416, + "step": 858 + }, + { + "epoch": 0.197926267281106, + "grad_norm": 0.6040828842975151, + "learning_rate": 1.98698122221474e-06, + "loss": 1.0784629583358765, + "step": 859 + }, + { + "epoch": 0.19815668202764977, + "grad_norm": 0.5681257026488339, + "learning_rate": 1.9869198748515085e-06, + "loss": 1.136039137840271, + "step": 860 + }, + { + "epoch": 0.19838709677419356, + "grad_norm": 0.5123381612546825, + "learning_rate": 1.986858384238163e-06, + "loss": 0.834873378276825, + "step": 861 + }, + { + "epoch": 0.19861751152073734, + "grad_norm": 0.5505167057841309, + "learning_rate": 1.98679675038363e-06, + "loss": 0.9705442190170288, + "step": 862 + }, + { + "epoch": 0.1988479262672811, + "grad_norm": 0.6567761197272963, + "learning_rate": 1.9867349732968547e-06, + "loss": 0.9343886375427246, + "step": 863 + }, + { + "epoch": 0.19907834101382488, + "grad_norm": 0.49387008808397015, + "learning_rate": 1.986673052986805e-06, + "loss": 0.9140456914901733, + "step": 864 + }, + { + "epoch": 0.19930875576036866, + "grad_norm": 0.5850607327811402, + "learning_rate": 1.986610989462467e-06, + "loss": 0.9121139049530029, + "step": 865 + }, + { + "epoch": 0.19953917050691244, + "grad_norm": 0.4775789448856378, + "learning_rate": 1.9865487827328505e-06, + "loss": 0.7333672642707825, + "step": 866 + }, + { + "epoch": 0.19976958525345623, + "grad_norm": 0.5039450613377916, + "learning_rate": 1.986486432806984e-06, + "loss": 0.8405989408493042, + "step": 867 + }, + { + "epoch": 0.2, + "grad_norm": 0.47371690470710304, + "learning_rate": 1.9864239396939176e-06, + "loss": 0.8693375587463379, + "step": 868 + }, + { + "epoch": 0.20023041474654377, + "grad_norm": 0.5727654616233698, + "learning_rate": 1.9863613034027223e-06, + "loss": 1.0137104988098145, + "step": 869 + }, + { + "epoch": 0.20046082949308755, + "grad_norm": 0.5382771457657299, + "learning_rate": 1.9862985239424895e-06, + "loss": 1.0283832550048828, + "step": 870 + }, + { + "epoch": 0.20069124423963133, + "grad_norm": 0.6200501422886965, + "learning_rate": 1.9862356013223316e-06, + "loss": 1.117444634437561, + "step": 871 + }, + { + "epoch": 0.20092165898617512, + "grad_norm": 0.6309070895129882, + "learning_rate": 1.986172535551382e-06, + "loss": 0.8861427307128906, + "step": 872 + }, + { + "epoch": 0.2011520737327189, + "grad_norm": 0.5017852774763055, + "learning_rate": 1.9861093266387946e-06, + "loss": 1.0273747444152832, + "step": 873 + }, + { + "epoch": 0.20138248847926268, + "grad_norm": 0.5141875246573869, + "learning_rate": 1.9860459745937437e-06, + "loss": 0.918023943901062, + "step": 874 + }, + { + "epoch": 0.20161290322580644, + "grad_norm": 0.5278755996885149, + "learning_rate": 1.9859824794254246e-06, + "loss": 0.8983356952667236, + "step": 875 + }, + { + "epoch": 0.20184331797235022, + "grad_norm": 0.5803540160351622, + "learning_rate": 1.985918841143054e-06, + "loss": 1.0180974006652832, + "step": 876 + }, + { + "epoch": 0.202073732718894, + "grad_norm": 0.48253787858386377, + "learning_rate": 1.985855059755869e-06, + "loss": 0.9656573534011841, + "step": 877 + }, + { + "epoch": 0.2023041474654378, + "grad_norm": 0.5015537059540116, + "learning_rate": 1.9857911352731273e-06, + "loss": 0.8522181510925293, + "step": 878 + }, + { + "epoch": 0.20253456221198157, + "grad_norm": 0.4883752495192941, + "learning_rate": 1.985727067704107e-06, + "loss": 0.9180892705917358, + "step": 879 + }, + { + "epoch": 0.20276497695852536, + "grad_norm": 0.5817140345419661, + "learning_rate": 1.985662857058108e-06, + "loss": 0.9979432821273804, + "step": 880 + }, + { + "epoch": 0.2029953917050691, + "grad_norm": 0.5608420179715049, + "learning_rate": 1.98559850334445e-06, + "loss": 0.8916480541229248, + "step": 881 + }, + { + "epoch": 0.2032258064516129, + "grad_norm": 0.41973060059994494, + "learning_rate": 1.9855340065724738e-06, + "loss": 0.8755770921707153, + "step": 882 + }, + { + "epoch": 0.20345622119815668, + "grad_norm": 0.5596516763963291, + "learning_rate": 1.9854693667515418e-06, + "loss": 1.0200350284576416, + "step": 883 + }, + { + "epoch": 0.20368663594470046, + "grad_norm": 0.5199867730002389, + "learning_rate": 1.9854045838910353e-06, + "loss": 0.928024172782898, + "step": 884 + }, + { + "epoch": 0.20391705069124424, + "grad_norm": 0.5756725941645391, + "learning_rate": 1.9853396580003582e-06, + "loss": 0.8617212176322937, + "step": 885 + }, + { + "epoch": 0.20414746543778803, + "grad_norm": 0.5415263717139983, + "learning_rate": 1.985274589088934e-06, + "loss": 0.9383209943771362, + "step": 886 + }, + { + "epoch": 0.20437788018433178, + "grad_norm": 0.48094986017269503, + "learning_rate": 1.985209377166208e-06, + "loss": 0.7217687368392944, + "step": 887 + }, + { + "epoch": 0.20460829493087557, + "grad_norm": 0.612593081169746, + "learning_rate": 1.9851440222416446e-06, + "loss": 1.0717028379440308, + "step": 888 + }, + { + "epoch": 0.20483870967741935, + "grad_norm": 0.6063882651782059, + "learning_rate": 1.9850785243247303e-06, + "loss": 1.0137064456939697, + "step": 889 + }, + { + "epoch": 0.20506912442396313, + "grad_norm": 0.5244411173844509, + "learning_rate": 1.985012883424973e-06, + "loss": 0.8569058179855347, + "step": 890 + }, + { + "epoch": 0.20529953917050692, + "grad_norm": 0.6524290996376207, + "learning_rate": 1.9849470995518993e-06, + "loss": 0.9398901462554932, + "step": 891 + }, + { + "epoch": 0.2055299539170507, + "grad_norm": 0.3752296846015947, + "learning_rate": 1.9848811727150577e-06, + "loss": 0.731800377368927, + "step": 892 + }, + { + "epoch": 0.20576036866359446, + "grad_norm": 0.5142990565199794, + "learning_rate": 1.984815102924018e-06, + "loss": 0.8543055653572083, + "step": 893 + }, + { + "epoch": 0.20599078341013824, + "grad_norm": 0.5278314343821748, + "learning_rate": 1.98474889018837e-06, + "loss": 0.9112114906311035, + "step": 894 + }, + { + "epoch": 0.20622119815668202, + "grad_norm": 0.50708997202126, + "learning_rate": 1.984682534517724e-06, + "loss": 0.8272690773010254, + "step": 895 + }, + { + "epoch": 0.2064516129032258, + "grad_norm": 0.5912295968473946, + "learning_rate": 1.984616035921712e-06, + "loss": 0.9680918455123901, + "step": 896 + }, + { + "epoch": 0.2066820276497696, + "grad_norm": 0.6089139321115737, + "learning_rate": 1.984549394409985e-06, + "loss": 0.815123438835144, + "step": 897 + }, + { + "epoch": 0.20691244239631337, + "grad_norm": 0.4952276433479721, + "learning_rate": 1.984482609992218e-06, + "loss": 0.8035521507263184, + "step": 898 + }, + { + "epoch": 0.20714285714285716, + "grad_norm": 0.548354244530079, + "learning_rate": 1.9844156826781027e-06, + "loss": 0.9000132083892822, + "step": 899 + }, + { + "epoch": 0.2073732718894009, + "grad_norm": 0.6652515011666116, + "learning_rate": 1.9843486124773543e-06, + "loss": 1.06328547000885, + "step": 900 + }, + { + "epoch": 0.2076036866359447, + "grad_norm": 0.4596762245312169, + "learning_rate": 1.9842813993997083e-06, + "loss": 0.9028425216674805, + "step": 901 + }, + { + "epoch": 0.20783410138248848, + "grad_norm": 0.5779573613376965, + "learning_rate": 1.9842140434549196e-06, + "loss": 0.7786350250244141, + "step": 902 + }, + { + "epoch": 0.20806451612903226, + "grad_norm": 0.5102795361356062, + "learning_rate": 1.9841465446527656e-06, + "loss": 0.8041539788246155, + "step": 903 + }, + { + "epoch": 0.20829493087557605, + "grad_norm": 0.4348300351835264, + "learning_rate": 1.9840789030030434e-06, + "loss": 0.8380184173583984, + "step": 904 + }, + { + "epoch": 0.20852534562211983, + "grad_norm": 0.7151525379978475, + "learning_rate": 1.984011118515572e-06, + "loss": 0.8191432952880859, + "step": 905 + }, + { + "epoch": 0.20875576036866358, + "grad_norm": 0.5006646807997585, + "learning_rate": 1.9839431912001885e-06, + "loss": 0.8236384391784668, + "step": 906 + }, + { + "epoch": 0.20898617511520737, + "grad_norm": 0.4959155947407375, + "learning_rate": 1.9838751210667534e-06, + "loss": 0.8218076825141907, + "step": 907 + }, + { + "epoch": 0.20921658986175115, + "grad_norm": 0.5127899266702147, + "learning_rate": 1.983806908125147e-06, + "loss": 0.9140353202819824, + "step": 908 + }, + { + "epoch": 0.20944700460829493, + "grad_norm": 0.5063732794644019, + "learning_rate": 1.9837385523852706e-06, + "loss": 0.9179826974868774, + "step": 909 + }, + { + "epoch": 0.20967741935483872, + "grad_norm": 0.5385574519868781, + "learning_rate": 1.9836700538570456e-06, + "loss": 0.8888909816741943, + "step": 910 + }, + { + "epoch": 0.2099078341013825, + "grad_norm": 0.5208969379705799, + "learning_rate": 1.9836014125504143e-06, + "loss": 0.8951253890991211, + "step": 911 + }, + { + "epoch": 0.21013824884792626, + "grad_norm": 0.6093988535410455, + "learning_rate": 1.98353262847534e-06, + "loss": 1.084958553314209, + "step": 912 + }, + { + "epoch": 0.21036866359447004, + "grad_norm": 0.5061127496745415, + "learning_rate": 1.983463701641807e-06, + "loss": 0.8590713739395142, + "step": 913 + }, + { + "epoch": 0.21059907834101382, + "grad_norm": 0.6396228440899432, + "learning_rate": 1.9833946320598195e-06, + "loss": 1.0393706560134888, + "step": 914 + }, + { + "epoch": 0.2108294930875576, + "grad_norm": 0.49567487165870866, + "learning_rate": 1.983325419739403e-06, + "loss": 0.9403085708618164, + "step": 915 + }, + { + "epoch": 0.2110599078341014, + "grad_norm": 0.49912224081019996, + "learning_rate": 1.9832560646906038e-06, + "loss": 0.8431342244148254, + "step": 916 + }, + { + "epoch": 0.21129032258064517, + "grad_norm": 0.5558843704958377, + "learning_rate": 1.9831865669234884e-06, + "loss": 0.9024044871330261, + "step": 917 + }, + { + "epoch": 0.21152073732718893, + "grad_norm": 0.44775113902692637, + "learning_rate": 1.9831169264481443e-06, + "loss": 0.747347354888916, + "step": 918 + }, + { + "epoch": 0.2117511520737327, + "grad_norm": 0.46715914917156914, + "learning_rate": 1.9830471432746796e-06, + "loss": 0.8266197443008423, + "step": 919 + }, + { + "epoch": 0.2119815668202765, + "grad_norm": 0.5566270603086758, + "learning_rate": 1.9829772174132235e-06, + "loss": 0.8633416295051575, + "step": 920 + }, + { + "epoch": 0.21221198156682028, + "grad_norm": 0.5228096908540074, + "learning_rate": 1.9829071488739256e-06, + "loss": 1.0290095806121826, + "step": 921 + }, + { + "epoch": 0.21244239631336406, + "grad_norm": 0.667274912811163, + "learning_rate": 1.9828369376669566e-06, + "loss": 0.8193448781967163, + "step": 922 + }, + { + "epoch": 0.21267281105990785, + "grad_norm": 0.5677549533509479, + "learning_rate": 1.982766583802507e-06, + "loss": 0.8828415870666504, + "step": 923 + }, + { + "epoch": 0.2129032258064516, + "grad_norm": 0.597806988660978, + "learning_rate": 1.9826960872907885e-06, + "loss": 0.8806191682815552, + "step": 924 + }, + { + "epoch": 0.21313364055299538, + "grad_norm": 0.40902701240404726, + "learning_rate": 1.982625448142034e-06, + "loss": 0.8441533446311951, + "step": 925 + }, + { + "epoch": 0.21336405529953917, + "grad_norm": 0.5142754504345473, + "learning_rate": 1.9825546663664963e-06, + "loss": 0.9084080457687378, + "step": 926 + }, + { + "epoch": 0.21359447004608295, + "grad_norm": 0.7318607240255686, + "learning_rate": 1.98248374197445e-06, + "loss": 0.9005601406097412, + "step": 927 + }, + { + "epoch": 0.21382488479262673, + "grad_norm": 0.48930991442842664, + "learning_rate": 1.9824126749761893e-06, + "loss": 1.0415414571762085, + "step": 928 + }, + { + "epoch": 0.21405529953917052, + "grad_norm": 0.4380456409582823, + "learning_rate": 1.982341465382029e-06, + "loss": 0.8130594491958618, + "step": 929 + }, + { + "epoch": 0.21428571428571427, + "grad_norm": 0.4623167832467728, + "learning_rate": 1.9822701132023053e-06, + "loss": 0.9178205728530884, + "step": 930 + }, + { + "epoch": 0.21451612903225806, + "grad_norm": 0.5894382821211327, + "learning_rate": 1.9821986184473754e-06, + "loss": 0.9927947521209717, + "step": 931 + }, + { + "epoch": 0.21474654377880184, + "grad_norm": 0.5621440238225328, + "learning_rate": 1.982126981127616e-06, + "loss": 0.9172670841217041, + "step": 932 + }, + { + "epoch": 0.21497695852534562, + "grad_norm": 0.5805773191302366, + "learning_rate": 1.9820552012534255e-06, + "loss": 0.9513058066368103, + "step": 933 + }, + { + "epoch": 0.2152073732718894, + "grad_norm": 0.6596090379041671, + "learning_rate": 1.9819832788352227e-06, + "loss": 1.014827013015747, + "step": 934 + }, + { + "epoch": 0.2154377880184332, + "grad_norm": 0.5483468550441934, + "learning_rate": 1.9819112138834473e-06, + "loss": 1.0225746631622314, + "step": 935 + }, + { + "epoch": 0.21566820276497695, + "grad_norm": 0.46659867801168237, + "learning_rate": 1.9818390064085584e-06, + "loss": 0.8804227113723755, + "step": 936 + }, + { + "epoch": 0.21589861751152073, + "grad_norm": 0.42738644934381204, + "learning_rate": 1.9817666564210376e-06, + "loss": 0.7215760350227356, + "step": 937 + }, + { + "epoch": 0.2161290322580645, + "grad_norm": 0.6620668522422565, + "learning_rate": 1.981694163931387e-06, + "loss": 0.9978986978530884, + "step": 938 + }, + { + "epoch": 0.2163594470046083, + "grad_norm": 0.5846107454293807, + "learning_rate": 1.981621528950128e-06, + "loss": 0.8646233081817627, + "step": 939 + }, + { + "epoch": 0.21658986175115208, + "grad_norm": 0.44150430663795637, + "learning_rate": 1.981548751487803e-06, + "loss": 0.9619132876396179, + "step": 940 + }, + { + "epoch": 0.21682027649769586, + "grad_norm": 0.543839377462045, + "learning_rate": 1.981475831554976e-06, + "loss": 0.9209504127502441, + "step": 941 + }, + { + "epoch": 0.21705069124423962, + "grad_norm": 0.563351483363654, + "learning_rate": 1.9814027691622318e-06, + "loss": 0.7629299163818359, + "step": 942 + }, + { + "epoch": 0.2172811059907834, + "grad_norm": 0.4885334834965844, + "learning_rate": 1.9813295643201747e-06, + "loss": 0.8702583312988281, + "step": 943 + }, + { + "epoch": 0.21751152073732719, + "grad_norm": 0.5579102568918498, + "learning_rate": 1.9812562170394305e-06, + "loss": 0.9571657180786133, + "step": 944 + }, + { + "epoch": 0.21774193548387097, + "grad_norm": 0.43227127189367615, + "learning_rate": 1.9811827273306456e-06, + "loss": 0.7271617650985718, + "step": 945 + }, + { + "epoch": 0.21797235023041475, + "grad_norm": 0.46137899963900864, + "learning_rate": 1.9811090952044865e-06, + "loss": 0.8189597725868225, + "step": 946 + }, + { + "epoch": 0.21820276497695854, + "grad_norm": 0.49142212284435566, + "learning_rate": 1.981035320671641e-06, + "loss": 0.7933987379074097, + "step": 947 + }, + { + "epoch": 0.2184331797235023, + "grad_norm": 0.48207328184354004, + "learning_rate": 1.9809614037428174e-06, + "loss": 0.9687645435333252, + "step": 948 + }, + { + "epoch": 0.21866359447004607, + "grad_norm": 0.5647695490676888, + "learning_rate": 1.980887344428745e-06, + "loss": 0.8293745517730713, + "step": 949 + }, + { + "epoch": 0.21889400921658986, + "grad_norm": 0.6489579503887147, + "learning_rate": 1.9808131427401727e-06, + "loss": 1.0447471141815186, + "step": 950 + }, + { + "epoch": 0.21912442396313364, + "grad_norm": 0.48010625791746325, + "learning_rate": 1.9807387986878715e-06, + "loss": 0.8916672468185425, + "step": 951 + }, + { + "epoch": 0.21935483870967742, + "grad_norm": 0.5436399520986829, + "learning_rate": 1.980664312282632e-06, + "loss": 0.8380981683731079, + "step": 952 + }, + { + "epoch": 0.2195852534562212, + "grad_norm": 0.4634469099281989, + "learning_rate": 1.9805896835352656e-06, + "loss": 0.887790322303772, + "step": 953 + }, + { + "epoch": 0.21981566820276496, + "grad_norm": 0.5184548533508342, + "learning_rate": 1.9805149124566048e-06, + "loss": 0.8353140950202942, + "step": 954 + }, + { + "epoch": 0.22004608294930875, + "grad_norm": 0.7177333773715296, + "learning_rate": 1.9804399990575026e-06, + "loss": 1.0337531566619873, + "step": 955 + }, + { + "epoch": 0.22027649769585253, + "grad_norm": 0.4262367777660272, + "learning_rate": 1.9803649433488324e-06, + "loss": 0.8845529556274414, + "step": 956 + }, + { + "epoch": 0.2205069124423963, + "grad_norm": 0.4271901286679727, + "learning_rate": 1.9802897453414884e-06, + "loss": 0.7408445477485657, + "step": 957 + }, + { + "epoch": 0.2207373271889401, + "grad_norm": 0.5478873632644168, + "learning_rate": 1.980214405046386e-06, + "loss": 0.873178243637085, + "step": 958 + }, + { + "epoch": 0.22096774193548388, + "grad_norm": 0.556535747180833, + "learning_rate": 1.98013892247446e-06, + "loss": 1.0207639932632446, + "step": 959 + }, + { + "epoch": 0.22119815668202766, + "grad_norm": 0.5890989419509002, + "learning_rate": 1.980063297636667e-06, + "loss": 0.8626997470855713, + "step": 960 + }, + { + "epoch": 0.22142857142857142, + "grad_norm": 0.5912616927968722, + "learning_rate": 1.9799875305439836e-06, + "loss": 0.8961347341537476, + "step": 961 + }, + { + "epoch": 0.2216589861751152, + "grad_norm": 0.495639914718092, + "learning_rate": 1.9799116212074075e-06, + "loss": 0.8115944862365723, + "step": 962 + }, + { + "epoch": 0.22188940092165899, + "grad_norm": 0.5281413221179645, + "learning_rate": 1.979835569637957e-06, + "loss": 0.8274029493331909, + "step": 963 + }, + { + "epoch": 0.22211981566820277, + "grad_norm": 0.5782364794204825, + "learning_rate": 1.9797593758466706e-06, + "loss": 1.020345687866211, + "step": 964 + }, + { + "epoch": 0.22235023041474655, + "grad_norm": 0.586333023609623, + "learning_rate": 1.979683039844608e-06, + "loss": 0.8164723515510559, + "step": 965 + }, + { + "epoch": 0.22258064516129034, + "grad_norm": 0.48956655235723145, + "learning_rate": 1.979606561642849e-06, + "loss": 0.832849383354187, + "step": 966 + }, + { + "epoch": 0.2228110599078341, + "grad_norm": 0.5810232623043905, + "learning_rate": 1.9795299412524945e-06, + "loss": 0.9765876531600952, + "step": 967 + }, + { + "epoch": 0.22304147465437787, + "grad_norm": 0.5610292572060406, + "learning_rate": 1.9794531786846657e-06, + "loss": 0.9280411005020142, + "step": 968 + }, + { + "epoch": 0.22327188940092166, + "grad_norm": 0.6528516733941818, + "learning_rate": 1.9793762739505042e-06, + "loss": 1.122058629989624, + "step": 969 + }, + { + "epoch": 0.22350230414746544, + "grad_norm": 0.4582570301724996, + "learning_rate": 1.9792992270611737e-06, + "loss": 0.824627161026001, + "step": 970 + }, + { + "epoch": 0.22373271889400922, + "grad_norm": 0.750391550156154, + "learning_rate": 1.9792220380278565e-06, + "loss": 1.0583840608596802, + "step": 971 + }, + { + "epoch": 0.223963133640553, + "grad_norm": 0.5277817422831291, + "learning_rate": 1.979144706861757e-06, + "loss": 1.053803563117981, + "step": 972 + }, + { + "epoch": 0.22419354838709676, + "grad_norm": 0.5197675200798639, + "learning_rate": 1.9790672335740993e-06, + "loss": 0.8572183847427368, + "step": 973 + }, + { + "epoch": 0.22442396313364055, + "grad_norm": 0.5956201422774761, + "learning_rate": 1.978989618176129e-06, + "loss": 0.7955416440963745, + "step": 974 + }, + { + "epoch": 0.22465437788018433, + "grad_norm": 0.6931203377433601, + "learning_rate": 1.9789118606791113e-06, + "loss": 0.9455063343048096, + "step": 975 + }, + { + "epoch": 0.2248847926267281, + "grad_norm": 0.5553738972507489, + "learning_rate": 1.978833961094333e-06, + "loss": 0.788895845413208, + "step": 976 + }, + { + "epoch": 0.2251152073732719, + "grad_norm": 0.4854852275390097, + "learning_rate": 1.9787559194331014e-06, + "loss": 0.8344719409942627, + "step": 977 + }, + { + "epoch": 0.22534562211981568, + "grad_norm": 0.5098723288351352, + "learning_rate": 1.9786777357067436e-06, + "loss": 0.85140061378479, + "step": 978 + }, + { + "epoch": 0.22557603686635944, + "grad_norm": 0.43945689098482754, + "learning_rate": 1.978599409926608e-06, + "loss": 0.8511399030685425, + "step": 979 + }, + { + "epoch": 0.22580645161290322, + "grad_norm": 0.4893125980217, + "learning_rate": 1.9785209421040636e-06, + "loss": 0.9243351221084595, + "step": 980 + }, + { + "epoch": 0.226036866359447, + "grad_norm": 0.5349074342918002, + "learning_rate": 1.9784423322504996e-06, + "loss": 0.9043580293655396, + "step": 981 + }, + { + "epoch": 0.2262672811059908, + "grad_norm": 0.654146848198394, + "learning_rate": 1.978363580377327e-06, + "loss": 0.854049563407898, + "step": 982 + }, + { + "epoch": 0.22649769585253457, + "grad_norm": 0.43507484708504635, + "learning_rate": 1.9782846864959754e-06, + "loss": 0.7785296440124512, + "step": 983 + }, + { + "epoch": 0.22672811059907835, + "grad_norm": 0.5830354059161934, + "learning_rate": 1.9782056506178965e-06, + "loss": 0.8464720845222473, + "step": 984 + }, + { + "epoch": 0.2269585253456221, + "grad_norm": 0.5249975809892665, + "learning_rate": 1.9781264727545624e-06, + "loss": 0.8519179821014404, + "step": 985 + }, + { + "epoch": 0.2271889400921659, + "grad_norm": 0.6176158235785483, + "learning_rate": 1.978047152917466e-06, + "loss": 0.956415057182312, + "step": 986 + }, + { + "epoch": 0.22741935483870968, + "grad_norm": 0.5046722242039021, + "learning_rate": 1.97796769111812e-06, + "loss": 1.028620719909668, + "step": 987 + }, + { + "epoch": 0.22764976958525346, + "grad_norm": 0.4889451789926323, + "learning_rate": 1.9778880873680585e-06, + "loss": 0.8707184195518494, + "step": 988 + }, + { + "epoch": 0.22788018433179724, + "grad_norm": 0.5212071576326044, + "learning_rate": 1.9778083416788355e-06, + "loss": 0.9842795729637146, + "step": 989 + }, + { + "epoch": 0.22811059907834103, + "grad_norm": 0.5963522406410062, + "learning_rate": 1.977728454062026e-06, + "loss": 0.8827522993087769, + "step": 990 + }, + { + "epoch": 0.22834101382488478, + "grad_norm": 0.5285989804764033, + "learning_rate": 1.9776484245292256e-06, + "loss": 0.8608568906784058, + "step": 991 + }, + { + "epoch": 0.22857142857142856, + "grad_norm": 0.7428648265675979, + "learning_rate": 1.977568253092051e-06, + "loss": 0.8512595891952515, + "step": 992 + }, + { + "epoch": 0.22880184331797235, + "grad_norm": 0.520235896024025, + "learning_rate": 1.9774879397621383e-06, + "loss": 0.7335344552993774, + "step": 993 + }, + { + "epoch": 0.22903225806451613, + "grad_norm": 0.6711607827981731, + "learning_rate": 1.9774074845511457e-06, + "loss": 1.0301114320755005, + "step": 994 + }, + { + "epoch": 0.22926267281105991, + "grad_norm": 0.515409965463074, + "learning_rate": 1.97732688747075e-06, + "loss": 0.9011565446853638, + "step": 995 + }, + { + "epoch": 0.2294930875576037, + "grad_norm": 0.5657170632178228, + "learning_rate": 1.9772461485326507e-06, + "loss": 0.8644282221794128, + "step": 996 + }, + { + "epoch": 0.22972350230414745, + "grad_norm": 0.49795498598042737, + "learning_rate": 1.9771652677485664e-06, + "loss": 0.8107467889785767, + "step": 997 + }, + { + "epoch": 0.22995391705069124, + "grad_norm": 0.5832229133316258, + "learning_rate": 1.9770842451302373e-06, + "loss": 1.0090508460998535, + "step": 998 + }, + { + "epoch": 0.23018433179723502, + "grad_norm": 0.4910768822506593, + "learning_rate": 1.977003080689424e-06, + "loss": 0.8153292536735535, + "step": 999 + }, + { + "epoch": 0.2304147465437788, + "grad_norm": 0.6502643477323704, + "learning_rate": 1.976921774437906e-06, + "loss": 0.8446916341781616, + "step": 1000 + }, + { + "epoch": 0.2306451612903226, + "grad_norm": 0.5179047651030808, + "learning_rate": 1.9768403263874865e-06, + "loss": 0.759350597858429, + "step": 1001 + }, + { + "epoch": 0.23087557603686637, + "grad_norm": 0.5414654559095757, + "learning_rate": 1.9767587365499862e-06, + "loss": 0.9181695580482483, + "step": 1002 + }, + { + "epoch": 0.23110599078341013, + "grad_norm": 0.4755050115257823, + "learning_rate": 1.976677004937249e-06, + "loss": 0.8450978994369507, + "step": 1003 + }, + { + "epoch": 0.2313364055299539, + "grad_norm": 0.5616575268963485, + "learning_rate": 1.9765951315611365e-06, + "loss": 0.775252640247345, + "step": 1004 + }, + { + "epoch": 0.2315668202764977, + "grad_norm": 0.5248180263396327, + "learning_rate": 1.976513116433534e-06, + "loss": 0.8682440519332886, + "step": 1005 + }, + { + "epoch": 0.23179723502304148, + "grad_norm": 0.6093284414229693, + "learning_rate": 1.9764309595663457e-06, + "loss": 1.0701451301574707, + "step": 1006 + }, + { + "epoch": 0.23202764976958526, + "grad_norm": 0.5747684398408948, + "learning_rate": 1.976348660971496e-06, + "loss": 0.9381946921348572, + "step": 1007 + }, + { + "epoch": 0.23225806451612904, + "grad_norm": 0.5225356801303237, + "learning_rate": 1.976266220660931e-06, + "loss": 0.7836539149284363, + "step": 1008 + }, + { + "epoch": 0.2324884792626728, + "grad_norm": 0.5379097818020191, + "learning_rate": 1.9761836386466156e-06, + "loss": 0.9271948337554932, + "step": 1009 + }, + { + "epoch": 0.23271889400921658, + "grad_norm": 0.514797473753123, + "learning_rate": 1.976100914940538e-06, + "loss": 0.8268035650253296, + "step": 1010 + }, + { + "epoch": 0.23294930875576036, + "grad_norm": 0.5105764513310544, + "learning_rate": 1.976018049554705e-06, + "loss": 0.8266786336898804, + "step": 1011 + }, + { + "epoch": 0.23317972350230415, + "grad_norm": 0.6250953922330988, + "learning_rate": 1.9759350425011435e-06, + "loss": 0.9437457323074341, + "step": 1012 + }, + { + "epoch": 0.23341013824884793, + "grad_norm": 0.5629533372281755, + "learning_rate": 1.9758518937919033e-06, + "loss": 0.9078803062438965, + "step": 1013 + }, + { + "epoch": 0.23364055299539171, + "grad_norm": 0.5994095472581402, + "learning_rate": 1.975768603439052e-06, + "loss": 0.9873687624931335, + "step": 1014 + }, + { + "epoch": 0.23387096774193547, + "grad_norm": 0.5010269853722422, + "learning_rate": 1.97568517145468e-06, + "loss": 0.9450196027755737, + "step": 1015 + }, + { + "epoch": 0.23410138248847925, + "grad_norm": 0.5173338079683222, + "learning_rate": 1.975601597850897e-06, + "loss": 0.8804495334625244, + "step": 1016 + }, + { + "epoch": 0.23433179723502304, + "grad_norm": 0.5286639294307074, + "learning_rate": 1.9755178826398333e-06, + "loss": 0.9646104574203491, + "step": 1017 + }, + { + "epoch": 0.23456221198156682, + "grad_norm": 0.5917923655178416, + "learning_rate": 1.9754340258336403e-06, + "loss": 0.9829385280609131, + "step": 1018 + }, + { + "epoch": 0.2347926267281106, + "grad_norm": 0.5022802882731887, + "learning_rate": 1.97535002744449e-06, + "loss": 0.8433707356452942, + "step": 1019 + }, + { + "epoch": 0.2350230414746544, + "grad_norm": 0.5984717862988072, + "learning_rate": 1.9752658874845744e-06, + "loss": 0.9892767071723938, + "step": 1020 + }, + { + "epoch": 0.23525345622119814, + "grad_norm": 0.5038568694461213, + "learning_rate": 1.9751816059661065e-06, + "loss": 0.8367536664009094, + "step": 1021 + }, + { + "epoch": 0.23548387096774193, + "grad_norm": 0.6009503951092086, + "learning_rate": 1.9750971829013194e-06, + "loss": 0.8947298526763916, + "step": 1022 + }, + { + "epoch": 0.2357142857142857, + "grad_norm": 0.4955473883987944, + "learning_rate": 1.975012618302467e-06, + "loss": 0.9218910336494446, + "step": 1023 + }, + { + "epoch": 0.2359447004608295, + "grad_norm": 0.46527028147066757, + "learning_rate": 1.9749279121818236e-06, + "loss": 0.8744943141937256, + "step": 1024 + }, + { + "epoch": 0.23617511520737328, + "grad_norm": 0.5457797851350515, + "learning_rate": 1.9748430645516845e-06, + "loss": 0.9023007154464722, + "step": 1025 + }, + { + "epoch": 0.23640552995391706, + "grad_norm": 0.5361296427556177, + "learning_rate": 1.974758075424365e-06, + "loss": 0.8475106954574585, + "step": 1026 + }, + { + "epoch": 0.23663594470046084, + "grad_norm": 0.5535275060374267, + "learning_rate": 1.9746729448122013e-06, + "loss": 0.8594635725021362, + "step": 1027 + }, + { + "epoch": 0.2368663594470046, + "grad_norm": 0.6574105474773485, + "learning_rate": 1.97458767272755e-06, + "loss": 0.9601756930351257, + "step": 1028 + }, + { + "epoch": 0.23709677419354838, + "grad_norm": 0.5454698959338334, + "learning_rate": 1.9745022591827886e-06, + "loss": 0.9281105399131775, + "step": 1029 + }, + { + "epoch": 0.23732718894009217, + "grad_norm": 0.4631930883062957, + "learning_rate": 1.9744167041903136e-06, + "loss": 0.8240020275115967, + "step": 1030 + }, + { + "epoch": 0.23755760368663595, + "grad_norm": 0.5116113956014486, + "learning_rate": 1.9743310077625446e-06, + "loss": 0.807030200958252, + "step": 1031 + }, + { + "epoch": 0.23778801843317973, + "grad_norm": 0.5399356518827937, + "learning_rate": 1.9742451699119194e-06, + "loss": 0.8044267892837524, + "step": 1032 + }, + { + "epoch": 0.23801843317972352, + "grad_norm": 0.5022311335968053, + "learning_rate": 1.9741591906508975e-06, + "loss": 0.9198760390281677, + "step": 1033 + }, + { + "epoch": 0.23824884792626727, + "grad_norm": 0.6382005412114766, + "learning_rate": 1.974073069991959e-06, + "loss": 0.7951973676681519, + "step": 1034 + }, + { + "epoch": 0.23847926267281105, + "grad_norm": 0.5488288386867366, + "learning_rate": 1.9739868079476035e-06, + "loss": 0.8366928100585938, + "step": 1035 + }, + { + "epoch": 0.23870967741935484, + "grad_norm": 0.5327938531465227, + "learning_rate": 1.9739004045303524e-06, + "loss": 0.9644484519958496, + "step": 1036 + }, + { + "epoch": 0.23894009216589862, + "grad_norm": 0.47502000880743445, + "learning_rate": 1.9738138597527464e-06, + "loss": 0.8332105875015259, + "step": 1037 + }, + { + "epoch": 0.2391705069124424, + "grad_norm": 0.4812648524584188, + "learning_rate": 1.9737271736273482e-06, + "loss": 0.8923197388648987, + "step": 1038 + }, + { + "epoch": 0.2394009216589862, + "grad_norm": 0.48693803999160823, + "learning_rate": 1.97364034616674e-06, + "loss": 0.861129879951477, + "step": 1039 + }, + { + "epoch": 0.23963133640552994, + "grad_norm": 0.49858003070315154, + "learning_rate": 1.973553377383524e-06, + "loss": 0.8042281270027161, + "step": 1040 + }, + { + "epoch": 0.23986175115207373, + "grad_norm": 0.603264823916037, + "learning_rate": 1.9734662672903247e-06, + "loss": 1.0315792560577393, + "step": 1041 + }, + { + "epoch": 0.2400921658986175, + "grad_norm": 0.524902457294173, + "learning_rate": 1.973379015899785e-06, + "loss": 0.8165839910507202, + "step": 1042 + }, + { + "epoch": 0.2403225806451613, + "grad_norm": 0.5868579839473654, + "learning_rate": 1.97329162322457e-06, + "loss": 1.0002663135528564, + "step": 1043 + }, + { + "epoch": 0.24055299539170508, + "grad_norm": 0.579630177733921, + "learning_rate": 1.9732040892773642e-06, + "loss": 0.9340938925743103, + "step": 1044 + }, + { + "epoch": 0.24078341013824886, + "grad_norm": 0.40394518210500746, + "learning_rate": 1.973116414070873e-06, + "loss": 0.7457709312438965, + "step": 1045 + }, + { + "epoch": 0.24101382488479262, + "grad_norm": 0.5468265646556031, + "learning_rate": 1.9730285976178227e-06, + "loss": 0.846583366394043, + "step": 1046 + }, + { + "epoch": 0.2412442396313364, + "grad_norm": 0.597351972991794, + "learning_rate": 1.9729406399309594e-06, + "loss": 0.9701514840126038, + "step": 1047 + }, + { + "epoch": 0.24147465437788018, + "grad_norm": 0.430042606733588, + "learning_rate": 1.9728525410230506e-06, + "loss": 0.7943054437637329, + "step": 1048 + }, + { + "epoch": 0.24170506912442397, + "grad_norm": 0.690774172762037, + "learning_rate": 1.972764300906883e-06, + "loss": 0.8885551691055298, + "step": 1049 + }, + { + "epoch": 0.24193548387096775, + "grad_norm": 0.522936671850185, + "learning_rate": 1.9726759195952653e-06, + "loss": 0.8258899450302124, + "step": 1050 + }, + { + "epoch": 0.24216589861751153, + "grad_norm": 0.586622666679495, + "learning_rate": 1.9725873971010255e-06, + "loss": 1.0085303783416748, + "step": 1051 + }, + { + "epoch": 0.2423963133640553, + "grad_norm": 0.49596210148454095, + "learning_rate": 1.9724987334370124e-06, + "loss": 0.814777135848999, + "step": 1052 + }, + { + "epoch": 0.24262672811059907, + "grad_norm": 0.5592433145931486, + "learning_rate": 1.9724099286160953e-06, + "loss": 0.8328995704650879, + "step": 1053 + }, + { + "epoch": 0.24285714285714285, + "grad_norm": 0.5857793622474846, + "learning_rate": 1.9723209826511645e-06, + "loss": 0.8699138164520264, + "step": 1054 + }, + { + "epoch": 0.24308755760368664, + "grad_norm": 0.5678867062742812, + "learning_rate": 1.9722318955551303e-06, + "loss": 0.8298562169075012, + "step": 1055 + }, + { + "epoch": 0.24331797235023042, + "grad_norm": 0.5976489688453608, + "learning_rate": 1.9721426673409236e-06, + "loss": 0.9470195770263672, + "step": 1056 + }, + { + "epoch": 0.2435483870967742, + "grad_norm": 0.48875505327809854, + "learning_rate": 1.9720532980214955e-06, + "loss": 0.7733730673789978, + "step": 1057 + }, + { + "epoch": 0.24377880184331796, + "grad_norm": 0.46823524678841166, + "learning_rate": 1.9719637876098184e-06, + "loss": 0.7761770486831665, + "step": 1058 + }, + { + "epoch": 0.24400921658986174, + "grad_norm": 0.445725356281168, + "learning_rate": 1.971874136118884e-06, + "loss": 0.9270585775375366, + "step": 1059 + }, + { + "epoch": 0.24423963133640553, + "grad_norm": 0.42406381632115403, + "learning_rate": 1.971784343561705e-06, + "loss": 0.906977653503418, + "step": 1060 + }, + { + "epoch": 0.2444700460829493, + "grad_norm": 0.6412884076264423, + "learning_rate": 1.971694409951316e-06, + "loss": 0.9668625593185425, + "step": 1061 + }, + { + "epoch": 0.2447004608294931, + "grad_norm": 0.49415949875048953, + "learning_rate": 1.971604335300769e-06, + "loss": 0.8215349316596985, + "step": 1062 + }, + { + "epoch": 0.24493087557603688, + "grad_norm": 0.5322070043492434, + "learning_rate": 1.971514119623139e-06, + "loss": 0.8351551294326782, + "step": 1063 + }, + { + "epoch": 0.24516129032258063, + "grad_norm": 0.47999809865085763, + "learning_rate": 1.9714237629315206e-06, + "loss": 0.8778517246246338, + "step": 1064 + }, + { + "epoch": 0.24539170506912442, + "grad_norm": 0.5396014898113735, + "learning_rate": 1.9713332652390293e-06, + "loss": 0.9415761232376099, + "step": 1065 + }, + { + "epoch": 0.2456221198156682, + "grad_norm": 0.5420605598116663, + "learning_rate": 1.9712426265588e-06, + "loss": 0.9040292501449585, + "step": 1066 + }, + { + "epoch": 0.24585253456221198, + "grad_norm": 0.6005715295467339, + "learning_rate": 1.9711518469039894e-06, + "loss": 0.8886675834655762, + "step": 1067 + }, + { + "epoch": 0.24608294930875577, + "grad_norm": 0.6273079636247865, + "learning_rate": 1.971060926287774e-06, + "loss": 0.8439750671386719, + "step": 1068 + }, + { + "epoch": 0.24631336405529955, + "grad_norm": 0.5872743245126388, + "learning_rate": 1.9709698647233507e-06, + "loss": 0.8698763251304626, + "step": 1069 + }, + { + "epoch": 0.2465437788018433, + "grad_norm": 0.5858508124188764, + "learning_rate": 1.970878662223937e-06, + "loss": 0.7866508364677429, + "step": 1070 + }, + { + "epoch": 0.2467741935483871, + "grad_norm": 0.46529709331014274, + "learning_rate": 1.97078731880277e-06, + "loss": 0.8652541637420654, + "step": 1071 + }, + { + "epoch": 0.24700460829493087, + "grad_norm": 0.4617144249036463, + "learning_rate": 1.97069583447311e-06, + "loss": 0.8614386320114136, + "step": 1072 + }, + { + "epoch": 0.24723502304147466, + "grad_norm": 0.5647954006429063, + "learning_rate": 1.970604209248234e-06, + "loss": 0.9367830753326416, + "step": 1073 + }, + { + "epoch": 0.24746543778801844, + "grad_norm": 0.5744177103855904, + "learning_rate": 1.9705124431414417e-06, + "loss": 0.8851934671401978, + "step": 1074 + }, + { + "epoch": 0.24769585253456222, + "grad_norm": 0.49563724633359013, + "learning_rate": 1.9704205361660534e-06, + "loss": 0.9619653224945068, + "step": 1075 + }, + { + "epoch": 0.24792626728110598, + "grad_norm": 0.5649060756387019, + "learning_rate": 1.9703284883354094e-06, + "loss": 0.8826392889022827, + "step": 1076 + }, + { + "epoch": 0.24815668202764976, + "grad_norm": 0.6563751938003036, + "learning_rate": 1.970236299662869e-06, + "loss": 0.9075444340705872, + "step": 1077 + }, + { + "epoch": 0.24838709677419354, + "grad_norm": 0.5796370649143662, + "learning_rate": 1.9701439701618147e-06, + "loss": 1.048058032989502, + "step": 1078 + }, + { + "epoch": 0.24861751152073733, + "grad_norm": 0.5313768074192232, + "learning_rate": 1.970051499845647e-06, + "loss": 0.8460798263549805, + "step": 1079 + }, + { + "epoch": 0.2488479262672811, + "grad_norm": 0.7193266180122563, + "learning_rate": 1.9699588887277886e-06, + "loss": 0.9410982131958008, + "step": 1080 + }, + { + "epoch": 0.2490783410138249, + "grad_norm": 0.5102129399153178, + "learning_rate": 1.9698661368216816e-06, + "loss": 0.8247401714324951, + "step": 1081 + }, + { + "epoch": 0.24930875576036865, + "grad_norm": 0.5269386839997043, + "learning_rate": 1.969773244140789e-06, + "loss": 0.8543484210968018, + "step": 1082 + }, + { + "epoch": 0.24953917050691243, + "grad_norm": 0.6681776129080308, + "learning_rate": 1.9696802106985933e-06, + "loss": 0.9339861273765564, + "step": 1083 + }, + { + "epoch": 0.24976958525345622, + "grad_norm": 0.6394378735221973, + "learning_rate": 1.969587036508599e-06, + "loss": 0.8268687725067139, + "step": 1084 + }, + { + "epoch": 0.25, + "grad_norm": 0.5565533707237263, + "learning_rate": 1.96949372158433e-06, + "loss": 0.9990735054016113, + "step": 1085 + }, + { + "epoch": 0.2502304147465438, + "grad_norm": 0.5875792221187977, + "learning_rate": 1.9694002659393305e-06, + "loss": 0.871169924736023, + "step": 1086 + }, + { + "epoch": 0.25046082949308757, + "grad_norm": 0.5066699305192991, + "learning_rate": 1.9693066695871657e-06, + "loss": 0.9275476932525635, + "step": 1087 + }, + { + "epoch": 0.25069124423963135, + "grad_norm": 0.5987932412868929, + "learning_rate": 1.969212932541421e-06, + "loss": 0.802006721496582, + "step": 1088 + }, + { + "epoch": 0.25092165898617513, + "grad_norm": 0.6594060142183631, + "learning_rate": 1.9691190548157023e-06, + "loss": 1.158774495124817, + "step": 1089 + }, + { + "epoch": 0.2511520737327189, + "grad_norm": 0.5926971423347241, + "learning_rate": 1.969025036423636e-06, + "loss": 0.8979278802871704, + "step": 1090 + }, + { + "epoch": 0.2513824884792627, + "grad_norm": 0.48149308442816224, + "learning_rate": 1.968930877378868e-06, + "loss": 0.9486579895019531, + "step": 1091 + }, + { + "epoch": 0.25161290322580643, + "grad_norm": 0.5203236583717573, + "learning_rate": 1.968836577695066e-06, + "loss": 0.8661590814590454, + "step": 1092 + }, + { + "epoch": 0.2518433179723502, + "grad_norm": 0.5636787742284843, + "learning_rate": 1.9687421373859173e-06, + "loss": 0.9224900007247925, + "step": 1093 + }, + { + "epoch": 0.252073732718894, + "grad_norm": 0.6117977186323622, + "learning_rate": 1.96864755646513e-06, + "loss": 0.9563734531402588, + "step": 1094 + }, + { + "epoch": 0.2523041474654378, + "grad_norm": 0.535175631127211, + "learning_rate": 1.968552834946432e-06, + "loss": 0.7457284927368164, + "step": 1095 + }, + { + "epoch": 0.25253456221198156, + "grad_norm": 0.5387959310508903, + "learning_rate": 1.9684579728435727e-06, + "loss": 0.8763077259063721, + "step": 1096 + }, + { + "epoch": 0.25276497695852534, + "grad_norm": 0.5765732282352442, + "learning_rate": 1.9683629701703203e-06, + "loss": 0.8476013541221619, + "step": 1097 + }, + { + "epoch": 0.25299539170506913, + "grad_norm": 0.6265041816963897, + "learning_rate": 1.9682678269404647e-06, + "loss": 0.9706464409828186, + "step": 1098 + }, + { + "epoch": 0.2532258064516129, + "grad_norm": 0.5592313042434921, + "learning_rate": 1.968172543167816e-06, + "loss": 0.9898370504379272, + "step": 1099 + }, + { + "epoch": 0.2534562211981567, + "grad_norm": 0.5273265970472166, + "learning_rate": 1.9680771188662043e-06, + "loss": 0.9073352813720703, + "step": 1100 + }, + { + "epoch": 0.2536866359447005, + "grad_norm": 0.5101975110861352, + "learning_rate": 1.9679815540494805e-06, + "loss": 0.698054850101471, + "step": 1101 + }, + { + "epoch": 0.25391705069124426, + "grad_norm": 0.5334723333803978, + "learning_rate": 1.967885848731515e-06, + "loss": 0.8755865097045898, + "step": 1102 + }, + { + "epoch": 0.25414746543778804, + "grad_norm": 0.7353231676630018, + "learning_rate": 1.9677900029262004e-06, + "loss": 0.8884447813034058, + "step": 1103 + }, + { + "epoch": 0.2543778801843318, + "grad_norm": 0.48855032311862734, + "learning_rate": 1.967694016647448e-06, + "loss": 0.738738477230072, + "step": 1104 + }, + { + "epoch": 0.25460829493087556, + "grad_norm": 0.5363150933196312, + "learning_rate": 1.96759788990919e-06, + "loss": 0.8024383783340454, + "step": 1105 + }, + { + "epoch": 0.25483870967741934, + "grad_norm": 0.703802110686274, + "learning_rate": 1.967501622725379e-06, + "loss": 0.8780910968780518, + "step": 1106 + }, + { + "epoch": 0.2550691244239631, + "grad_norm": 0.47799328608287317, + "learning_rate": 1.967405215109989e-06, + "loss": 0.8709204196929932, + "step": 1107 + }, + { + "epoch": 0.2552995391705069, + "grad_norm": 0.5771096865101828, + "learning_rate": 1.9673086670770122e-06, + "loss": 0.8838910460472107, + "step": 1108 + }, + { + "epoch": 0.2555299539170507, + "grad_norm": 0.6122299943883392, + "learning_rate": 1.967211978640463e-06, + "loss": 0.9310617446899414, + "step": 1109 + }, + { + "epoch": 0.2557603686635945, + "grad_norm": 0.5172180782022067, + "learning_rate": 1.9671151498143756e-06, + "loss": 0.8453254699707031, + "step": 1110 + }, + { + "epoch": 0.25599078341013826, + "grad_norm": 0.6724028308795985, + "learning_rate": 1.967018180612804e-06, + "loss": 1.0201973915100098, + "step": 1111 + }, + { + "epoch": 0.25622119815668204, + "grad_norm": 0.5304279166188671, + "learning_rate": 1.9669210710498242e-06, + "loss": 0.84140944480896, + "step": 1112 + }, + { + "epoch": 0.2564516129032258, + "grad_norm": 0.5850181467371437, + "learning_rate": 1.9668238211395308e-06, + "loss": 0.9012273550033569, + "step": 1113 + }, + { + "epoch": 0.2566820276497696, + "grad_norm": 0.5516270166899023, + "learning_rate": 1.9667264308960394e-06, + "loss": 0.820103645324707, + "step": 1114 + }, + { + "epoch": 0.2569124423963134, + "grad_norm": 0.7253674338479518, + "learning_rate": 1.9666289003334868e-06, + "loss": 1.0709048509597778, + "step": 1115 + }, + { + "epoch": 0.2571428571428571, + "grad_norm": 0.6606805333344365, + "learning_rate": 1.966531229466029e-06, + "loss": 0.9408602714538574, + "step": 1116 + }, + { + "epoch": 0.2573732718894009, + "grad_norm": 0.7074764796406602, + "learning_rate": 1.9664334183078425e-06, + "loss": 0.967316210269928, + "step": 1117 + }, + { + "epoch": 0.2576036866359447, + "grad_norm": 0.7069704403267734, + "learning_rate": 1.9663354668731248e-06, + "loss": 0.9483754634857178, + "step": 1118 + }, + { + "epoch": 0.25783410138248847, + "grad_norm": 0.7072881911304519, + "learning_rate": 1.966237375176093e-06, + "loss": 0.7978509664535522, + "step": 1119 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.5719987288484106, + "learning_rate": 1.9661391432309862e-06, + "loss": 0.8720531463623047, + "step": 1120 + }, + { + "epoch": 0.25829493087557603, + "grad_norm": 0.6673697559796071, + "learning_rate": 1.966040771052061e-06, + "loss": 0.7984024286270142, + "step": 1121 + }, + { + "epoch": 0.2585253456221198, + "grad_norm": 0.5693036626081565, + "learning_rate": 1.965942258653597e-06, + "loss": 0.9255385398864746, + "step": 1122 + }, + { + "epoch": 0.2587557603686636, + "grad_norm": 0.5886763980683305, + "learning_rate": 1.9658436060498927e-06, + "loss": 0.9028007984161377, + "step": 1123 + }, + { + "epoch": 0.2589861751152074, + "grad_norm": 0.5256574840125579, + "learning_rate": 1.9657448132552677e-06, + "loss": 0.8773014545440674, + "step": 1124 + }, + { + "epoch": 0.25921658986175117, + "grad_norm": 0.5356122505196939, + "learning_rate": 1.9656458802840617e-06, + "loss": 0.9280908107757568, + "step": 1125 + }, + { + "epoch": 0.25944700460829495, + "grad_norm": 0.6473213250874083, + "learning_rate": 1.9655468071506344e-06, + "loss": 0.820783793926239, + "step": 1126 + }, + { + "epoch": 0.25967741935483873, + "grad_norm": 0.490374992394704, + "learning_rate": 1.9654475938693663e-06, + "loss": 0.7832465171813965, + "step": 1127 + }, + { + "epoch": 0.25990783410138246, + "grad_norm": 0.6097626342555662, + "learning_rate": 1.965348240454658e-06, + "loss": 0.8824669122695923, + "step": 1128 + }, + { + "epoch": 0.26013824884792625, + "grad_norm": 0.5472888524636408, + "learning_rate": 1.9652487469209305e-06, + "loss": 0.8782131671905518, + "step": 1129 + }, + { + "epoch": 0.26036866359447003, + "grad_norm": 0.6689126051687625, + "learning_rate": 1.9651491132826255e-06, + "loss": 0.938920259475708, + "step": 1130 + }, + { + "epoch": 0.2605990783410138, + "grad_norm": 0.5811243675216263, + "learning_rate": 1.965049339554204e-06, + "loss": 0.8733320236206055, + "step": 1131 + }, + { + "epoch": 0.2608294930875576, + "grad_norm": 0.5773916722243296, + "learning_rate": 1.9649494257501485e-06, + "loss": 0.8688358664512634, + "step": 1132 + }, + { + "epoch": 0.2610599078341014, + "grad_norm": 0.5867794198483245, + "learning_rate": 1.9648493718849617e-06, + "loss": 0.9250427484512329, + "step": 1133 + }, + { + "epoch": 0.26129032258064516, + "grad_norm": 0.5093685293336041, + "learning_rate": 1.9647491779731655e-06, + "loss": 0.7890609502792358, + "step": 1134 + }, + { + "epoch": 0.26152073732718895, + "grad_norm": 0.5526465355704269, + "learning_rate": 1.964648844029303e-06, + "loss": 0.83612060546875, + "step": 1135 + }, + { + "epoch": 0.26175115207373273, + "grad_norm": 0.597714005790405, + "learning_rate": 1.9645483700679387e-06, + "loss": 0.7951240539550781, + "step": 1136 + }, + { + "epoch": 0.2619815668202765, + "grad_norm": 0.5785889079746135, + "learning_rate": 1.9644477561036546e-06, + "loss": 0.9746277332305908, + "step": 1137 + }, + { + "epoch": 0.2622119815668203, + "grad_norm": 0.6092572079482067, + "learning_rate": 1.9643470021510556e-06, + "loss": 0.856966495513916, + "step": 1138 + }, + { + "epoch": 0.2624423963133641, + "grad_norm": 0.5158468607686231, + "learning_rate": 1.9642461082247663e-06, + "loss": 0.7419042587280273, + "step": 1139 + }, + { + "epoch": 0.2626728110599078, + "grad_norm": 0.6141847224483623, + "learning_rate": 1.9641450743394304e-06, + "loss": 0.8868693709373474, + "step": 1140 + }, + { + "epoch": 0.2629032258064516, + "grad_norm": 0.6400145867633011, + "learning_rate": 1.9640439005097133e-06, + "loss": 1.0111520290374756, + "step": 1141 + }, + { + "epoch": 0.2631336405529954, + "grad_norm": 0.5946199662941717, + "learning_rate": 1.9639425867503006e-06, + "loss": 0.9379187226295471, + "step": 1142 + }, + { + "epoch": 0.26336405529953916, + "grad_norm": 0.6188285038344139, + "learning_rate": 1.9638411330758973e-06, + "loss": 0.8451071977615356, + "step": 1143 + }, + { + "epoch": 0.26359447004608294, + "grad_norm": 0.6988429276503174, + "learning_rate": 1.9637395395012295e-06, + "loss": 1.0407288074493408, + "step": 1144 + }, + { + "epoch": 0.2638248847926267, + "grad_norm": 0.7122851693009883, + "learning_rate": 1.9636378060410433e-06, + "loss": 0.9594388008117676, + "step": 1145 + }, + { + "epoch": 0.2640552995391705, + "grad_norm": 0.4400072369022715, + "learning_rate": 1.9635359327101057e-06, + "loss": 0.7940789461135864, + "step": 1146 + }, + { + "epoch": 0.2642857142857143, + "grad_norm": 0.6347840140846547, + "learning_rate": 1.9634339195232025e-06, + "loss": 0.9707269668579102, + "step": 1147 + }, + { + "epoch": 0.2645161290322581, + "grad_norm": 0.6349984514987448, + "learning_rate": 1.9633317664951417e-06, + "loss": 0.9554522037506104, + "step": 1148 + }, + { + "epoch": 0.26474654377880186, + "grad_norm": 0.7144693638673882, + "learning_rate": 1.9632294736407497e-06, + "loss": 1.009516716003418, + "step": 1149 + }, + { + "epoch": 0.26497695852534564, + "grad_norm": 0.5429306162333095, + "learning_rate": 1.9631270409748754e-06, + "loss": 0.8337735533714294, + "step": 1150 + }, + { + "epoch": 0.2652073732718894, + "grad_norm": 0.5901765838606909, + "learning_rate": 1.963024468512386e-06, + "loss": 0.9103367328643799, + "step": 1151 + }, + { + "epoch": 0.2654377880184332, + "grad_norm": 0.3703807183273661, + "learning_rate": 1.9629217562681694e-06, + "loss": 0.7258249521255493, + "step": 1152 + }, + { + "epoch": 0.26566820276497694, + "grad_norm": 0.6322578847379198, + "learning_rate": 1.962818904257135e-06, + "loss": 0.7696776390075684, + "step": 1153 + }, + { + "epoch": 0.2658986175115207, + "grad_norm": 0.5842074670437798, + "learning_rate": 1.962715912494211e-06, + "loss": 0.9027894139289856, + "step": 1154 + }, + { + "epoch": 0.2661290322580645, + "grad_norm": 0.6016444551454023, + "learning_rate": 1.962612780994347e-06, + "loss": 1.0412788391113281, + "step": 1155 + }, + { + "epoch": 0.2663594470046083, + "grad_norm": 0.5483158655152818, + "learning_rate": 1.962509509772512e-06, + "loss": 0.8656542897224426, + "step": 1156 + }, + { + "epoch": 0.26658986175115207, + "grad_norm": 0.56350579921959, + "learning_rate": 1.9624060988436964e-06, + "loss": 0.9541186094284058, + "step": 1157 + }, + { + "epoch": 0.26682027649769585, + "grad_norm": 0.6019903664727945, + "learning_rate": 1.962302548222909e-06, + "loss": 0.7684942483901978, + "step": 1158 + }, + { + "epoch": 0.26705069124423964, + "grad_norm": 0.5978642328134118, + "learning_rate": 1.962198857925181e-06, + "loss": 0.8934941291809082, + "step": 1159 + }, + { + "epoch": 0.2672811059907834, + "grad_norm": 0.8041491872239377, + "learning_rate": 1.962095027965562e-06, + "loss": 0.8674842715263367, + "step": 1160 + }, + { + "epoch": 0.2675115207373272, + "grad_norm": 0.5520577783269698, + "learning_rate": 1.9619910583591237e-06, + "loss": 0.8850778937339783, + "step": 1161 + }, + { + "epoch": 0.267741935483871, + "grad_norm": 0.5547632066870658, + "learning_rate": 1.961886949120957e-06, + "loss": 0.9140915870666504, + "step": 1162 + }, + { + "epoch": 0.26797235023041477, + "grad_norm": 0.5171975434439527, + "learning_rate": 1.9617827002661733e-06, + "loss": 0.7557287812232971, + "step": 1163 + }, + { + "epoch": 0.26820276497695855, + "grad_norm": 0.6409514019909783, + "learning_rate": 1.9616783118099032e-06, + "loss": 0.8780542612075806, + "step": 1164 + }, + { + "epoch": 0.2684331797235023, + "grad_norm": 0.5407478984703894, + "learning_rate": 1.9615737837672995e-06, + "loss": 0.8352043628692627, + "step": 1165 + }, + { + "epoch": 0.26866359447004606, + "grad_norm": 0.5628947650252879, + "learning_rate": 1.961469116153534e-06, + "loss": 0.8119357228279114, + "step": 1166 + }, + { + "epoch": 0.26889400921658985, + "grad_norm": 0.5744461460266088, + "learning_rate": 1.9613643089837992e-06, + "loss": 0.8953120708465576, + "step": 1167 + }, + { + "epoch": 0.26912442396313363, + "grad_norm": 0.5867925171054906, + "learning_rate": 1.9612593622733074e-06, + "loss": 0.9078162908554077, + "step": 1168 + }, + { + "epoch": 0.2693548387096774, + "grad_norm": 0.5358654275940312, + "learning_rate": 1.961154276037292e-06, + "loss": 0.9118859767913818, + "step": 1169 + }, + { + "epoch": 0.2695852534562212, + "grad_norm": 0.5501238198976731, + "learning_rate": 1.9610490502910056e-06, + "loss": 0.8456159234046936, + "step": 1170 + }, + { + "epoch": 0.269815668202765, + "grad_norm": 0.6291583788438779, + "learning_rate": 1.9609436850497222e-06, + "loss": 0.7860552072525024, + "step": 1171 + }, + { + "epoch": 0.27004608294930876, + "grad_norm": 0.5078912747038423, + "learning_rate": 1.9608381803287343e-06, + "loss": 0.8121567368507385, + "step": 1172 + }, + { + "epoch": 0.27027649769585255, + "grad_norm": 0.6271384929565738, + "learning_rate": 1.9607325361433574e-06, + "loss": 0.9212384819984436, + "step": 1173 + }, + { + "epoch": 0.27050691244239633, + "grad_norm": 0.5704107274797215, + "learning_rate": 1.960626752508924e-06, + "loss": 0.9528858661651611, + "step": 1174 + }, + { + "epoch": 0.2707373271889401, + "grad_norm": 0.5901390376692353, + "learning_rate": 1.9605208294407894e-06, + "loss": 0.8561227321624756, + "step": 1175 + }, + { + "epoch": 0.2709677419354839, + "grad_norm": 0.5308748660328867, + "learning_rate": 1.960414766954328e-06, + "loss": 0.9333669543266296, + "step": 1176 + }, + { + "epoch": 0.2711981566820276, + "grad_norm": 0.5146250417484006, + "learning_rate": 1.9603085650649345e-06, + "loss": 0.8879388570785522, + "step": 1177 + }, + { + "epoch": 0.2714285714285714, + "grad_norm": 0.6699060572110628, + "learning_rate": 1.9602022237880244e-06, + "loss": 1.0099214315414429, + "step": 1178 + }, + { + "epoch": 0.2716589861751152, + "grad_norm": 0.5456103597772948, + "learning_rate": 1.9600957431390324e-06, + "loss": 0.9341822862625122, + "step": 1179 + }, + { + "epoch": 0.271889400921659, + "grad_norm": 0.48145703185786454, + "learning_rate": 1.9599891231334144e-06, + "loss": 0.7616428136825562, + "step": 1180 + }, + { + "epoch": 0.27211981566820276, + "grad_norm": 0.4889684884403523, + "learning_rate": 1.959882363786646e-06, + "loss": 0.8270235061645508, + "step": 1181 + }, + { + "epoch": 0.27235023041474654, + "grad_norm": 0.5354748169041671, + "learning_rate": 1.9597754651142233e-06, + "loss": 0.8715114593505859, + "step": 1182 + }, + { + "epoch": 0.2725806451612903, + "grad_norm": 0.5251650427533354, + "learning_rate": 1.959668427131662e-06, + "loss": 0.6910781860351562, + "step": 1183 + }, + { + "epoch": 0.2728110599078341, + "grad_norm": 0.5425639259870759, + "learning_rate": 1.9595612498544997e-06, + "loss": 0.9158545136451721, + "step": 1184 + }, + { + "epoch": 0.2730414746543779, + "grad_norm": 0.4274378587816055, + "learning_rate": 1.9594539332982917e-06, + "loss": 0.7129944562911987, + "step": 1185 + }, + { + "epoch": 0.2732718894009217, + "grad_norm": 0.5549453334752472, + "learning_rate": 1.9593464774786155e-06, + "loss": 0.9487595558166504, + "step": 1186 + }, + { + "epoch": 0.27350230414746546, + "grad_norm": 0.490496609840347, + "learning_rate": 1.959238882411068e-06, + "loss": 0.9455368518829346, + "step": 1187 + }, + { + "epoch": 0.27373271889400924, + "grad_norm": 0.5638225468967204, + "learning_rate": 1.959131148111267e-06, + "loss": 0.9005390405654907, + "step": 1188 + }, + { + "epoch": 0.27396313364055297, + "grad_norm": 0.6239187759866925, + "learning_rate": 1.9590232745948494e-06, + "loss": 0.91117262840271, + "step": 1189 + }, + { + "epoch": 0.27419354838709675, + "grad_norm": 0.46530917608588857, + "learning_rate": 1.958915261877473e-06, + "loss": 0.7940579652786255, + "step": 1190 + }, + { + "epoch": 0.27442396313364054, + "grad_norm": 0.5621028227805456, + "learning_rate": 1.9588071099748155e-06, + "loss": 1.0705196857452393, + "step": 1191 + }, + { + "epoch": 0.2746543778801843, + "grad_norm": 0.7402334674842445, + "learning_rate": 1.9586988189025756e-06, + "loss": 0.9311869740486145, + "step": 1192 + }, + { + "epoch": 0.2748847926267281, + "grad_norm": 0.5809380189675816, + "learning_rate": 1.9585903886764715e-06, + "loss": 0.9400506019592285, + "step": 1193 + }, + { + "epoch": 0.2751152073732719, + "grad_norm": 0.5097271764516258, + "learning_rate": 1.958481819312241e-06, + "loss": 0.8282920122146606, + "step": 1194 + }, + { + "epoch": 0.27534562211981567, + "grad_norm": 0.6446418001070287, + "learning_rate": 1.9583731108256435e-06, + "loss": 0.9111119508743286, + "step": 1195 + }, + { + "epoch": 0.27557603686635945, + "grad_norm": 0.6208204199981331, + "learning_rate": 1.9582642632324576e-06, + "loss": 0.9486548900604248, + "step": 1196 + }, + { + "epoch": 0.27580645161290324, + "grad_norm": 0.634036768829364, + "learning_rate": 1.9581552765484828e-06, + "loss": 0.8452764749526978, + "step": 1197 + }, + { + "epoch": 0.276036866359447, + "grad_norm": 0.6457489846855801, + "learning_rate": 1.958046150789538e-06, + "loss": 0.8636663556098938, + "step": 1198 + }, + { + "epoch": 0.2762672811059908, + "grad_norm": 0.6308230498005049, + "learning_rate": 1.9579368859714623e-06, + "loss": 0.9819158315658569, + "step": 1199 + }, + { + "epoch": 0.2764976958525346, + "grad_norm": 0.6100305190055095, + "learning_rate": 1.957827482110116e-06, + "loss": 0.8010607957839966, + "step": 1200 + }, + { + "epoch": 0.2767281105990783, + "grad_norm": 0.44236661935550003, + "learning_rate": 1.957717939221379e-06, + "loss": 0.7686241865158081, + "step": 1201 + }, + { + "epoch": 0.2769585253456221, + "grad_norm": 0.5324278038856628, + "learning_rate": 1.9576082573211507e-06, + "loss": 0.8548723459243774, + "step": 1202 + }, + { + "epoch": 0.2771889400921659, + "grad_norm": 0.5873649231612361, + "learning_rate": 1.957498436425351e-06, + "loss": 0.7866852283477783, + "step": 1203 + }, + { + "epoch": 0.27741935483870966, + "grad_norm": 0.5578610745935356, + "learning_rate": 1.9573884765499215e-06, + "loss": 0.8086235523223877, + "step": 1204 + }, + { + "epoch": 0.27764976958525345, + "grad_norm": 0.6489442522213279, + "learning_rate": 1.9572783777108217e-06, + "loss": 1.0310871601104736, + "step": 1205 + }, + { + "epoch": 0.27788018433179723, + "grad_norm": 0.6639195648959771, + "learning_rate": 1.957168139924033e-06, + "loss": 0.9482970237731934, + "step": 1206 + }, + { + "epoch": 0.278110599078341, + "grad_norm": 0.5595205782283428, + "learning_rate": 1.957057763205556e-06, + "loss": 0.809493899345398, + "step": 1207 + }, + { + "epoch": 0.2783410138248848, + "grad_norm": 0.5835729385419335, + "learning_rate": 1.956947247571411e-06, + "loss": 0.8679298162460327, + "step": 1208 + }, + { + "epoch": 0.2785714285714286, + "grad_norm": 0.5339273489408208, + "learning_rate": 1.95683659303764e-06, + "loss": 0.8870571255683899, + "step": 1209 + }, + { + "epoch": 0.27880184331797236, + "grad_norm": 0.6400258685482293, + "learning_rate": 1.9567257996203046e-06, + "loss": 0.8452431559562683, + "step": 1210 + }, + { + "epoch": 0.27903225806451615, + "grad_norm": 0.585371400581961, + "learning_rate": 1.9566148673354855e-06, + "loss": 0.8376550674438477, + "step": 1211 + }, + { + "epoch": 0.27926267281105993, + "grad_norm": 0.468171015360779, + "learning_rate": 1.9565037961992853e-06, + "loss": 0.7686463594436646, + "step": 1212 + }, + { + "epoch": 0.2794930875576037, + "grad_norm": 0.6305180956441923, + "learning_rate": 1.956392586227825e-06, + "loss": 1.0064536333084106, + "step": 1213 + }, + { + "epoch": 0.27972350230414744, + "grad_norm": 0.5204866621768998, + "learning_rate": 1.956281237437247e-06, + "loss": 0.9087784290313721, + "step": 1214 + }, + { + "epoch": 0.2799539170506912, + "grad_norm": 0.5800831908467822, + "learning_rate": 1.9561697498437133e-06, + "loss": 0.8528383374214172, + "step": 1215 + }, + { + "epoch": 0.280184331797235, + "grad_norm": 0.492586251170718, + "learning_rate": 1.9560581234634062e-06, + "loss": 0.8229737281799316, + "step": 1216 + }, + { + "epoch": 0.2804147465437788, + "grad_norm": 0.6543530371868361, + "learning_rate": 1.9559463583125285e-06, + "loss": 0.8957454562187195, + "step": 1217 + }, + { + "epoch": 0.2806451612903226, + "grad_norm": 0.6116476174626837, + "learning_rate": 1.955834454407302e-06, + "loss": 0.8373404741287231, + "step": 1218 + }, + { + "epoch": 0.28087557603686636, + "grad_norm": 0.6339166918490768, + "learning_rate": 1.9557224117639698e-06, + "loss": 0.9117659330368042, + "step": 1219 + }, + { + "epoch": 0.28110599078341014, + "grad_norm": 0.7009847380548185, + "learning_rate": 1.9556102303987946e-06, + "loss": 0.9079498052597046, + "step": 1220 + }, + { + "epoch": 0.2813364055299539, + "grad_norm": 0.6797187898490639, + "learning_rate": 1.9554979103280597e-06, + "loss": 0.8127235174179077, + "step": 1221 + }, + { + "epoch": 0.2815668202764977, + "grad_norm": 0.4430544694455362, + "learning_rate": 1.9553854515680684e-06, + "loss": 0.6790676712989807, + "step": 1222 + }, + { + "epoch": 0.2817972350230415, + "grad_norm": 0.547920786044559, + "learning_rate": 1.955272854135143e-06, + "loss": 0.93434739112854, + "step": 1223 + }, + { + "epoch": 0.2820276497695853, + "grad_norm": 0.5831429716678932, + "learning_rate": 1.9551601180456274e-06, + "loss": 0.8624403476715088, + "step": 1224 + }, + { + "epoch": 0.28225806451612906, + "grad_norm": 0.5942670172250124, + "learning_rate": 1.9550472433158856e-06, + "loss": 0.8871273994445801, + "step": 1225 + }, + { + "epoch": 0.2824884792626728, + "grad_norm": 0.6403907324028919, + "learning_rate": 1.9549342299623007e-06, + "loss": 1.0226445198059082, + "step": 1226 + }, + { + "epoch": 0.28271889400921657, + "grad_norm": 0.5570530371692032, + "learning_rate": 1.9548210780012764e-06, + "loss": 0.9232503771781921, + "step": 1227 + }, + { + "epoch": 0.28294930875576035, + "grad_norm": 0.5562171255847491, + "learning_rate": 1.9547077874492367e-06, + "loss": 0.944965124130249, + "step": 1228 + }, + { + "epoch": 0.28317972350230414, + "grad_norm": 0.7815951055502713, + "learning_rate": 1.9545943583226255e-06, + "loss": 0.9491870403289795, + "step": 1229 + }, + { + "epoch": 0.2834101382488479, + "grad_norm": 0.5531880644641158, + "learning_rate": 1.9544807906379065e-06, + "loss": 0.8477638363838196, + "step": 1230 + }, + { + "epoch": 0.2836405529953917, + "grad_norm": 0.6334904267465776, + "learning_rate": 1.9543670844115647e-06, + "loss": 0.9733752012252808, + "step": 1231 + }, + { + "epoch": 0.2838709677419355, + "grad_norm": 0.5077250781055755, + "learning_rate": 1.954253239660104e-06, + "loss": 0.8158911466598511, + "step": 1232 + }, + { + "epoch": 0.28410138248847927, + "grad_norm": 0.47003121688563365, + "learning_rate": 1.9541392564000487e-06, + "loss": 0.8814271092414856, + "step": 1233 + }, + { + "epoch": 0.28433179723502305, + "grad_norm": 0.5974631149552703, + "learning_rate": 1.9540251346479435e-06, + "loss": 0.8366897106170654, + "step": 1234 + }, + { + "epoch": 0.28456221198156684, + "grad_norm": 0.5122641090735244, + "learning_rate": 1.953910874420353e-06, + "loss": 0.8043497800827026, + "step": 1235 + }, + { + "epoch": 0.2847926267281106, + "grad_norm": 0.6923450749153209, + "learning_rate": 1.953796475733862e-06, + "loss": 0.904765248298645, + "step": 1236 + }, + { + "epoch": 0.2850230414746544, + "grad_norm": 0.6316427864189956, + "learning_rate": 1.953681938605075e-06, + "loss": 0.9092245101928711, + "step": 1237 + }, + { + "epoch": 0.28525345622119813, + "grad_norm": 0.44433825637231683, + "learning_rate": 1.953567263050617e-06, + "loss": 0.9119021892547607, + "step": 1238 + }, + { + "epoch": 0.2854838709677419, + "grad_norm": 0.5258256580858013, + "learning_rate": 1.9534524490871336e-06, + "loss": 0.8380709886550903, + "step": 1239 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 0.6731382971935342, + "learning_rate": 1.9533374967312894e-06, + "loss": 0.9410983324050903, + "step": 1240 + }, + { + "epoch": 0.2859447004608295, + "grad_norm": 0.5901005556596554, + "learning_rate": 1.953222405999769e-06, + "loss": 0.882665753364563, + "step": 1241 + }, + { + "epoch": 0.28617511520737327, + "grad_norm": 0.600142706864601, + "learning_rate": 1.953107176909279e-06, + "loss": 0.9334039688110352, + "step": 1242 + }, + { + "epoch": 0.28640552995391705, + "grad_norm": 0.649506044390801, + "learning_rate": 1.9529918094765433e-06, + "loss": 0.8743090033531189, + "step": 1243 + }, + { + "epoch": 0.28663594470046083, + "grad_norm": 0.5149777367828677, + "learning_rate": 1.9528763037183086e-06, + "loss": 0.9017846584320068, + "step": 1244 + }, + { + "epoch": 0.2868663594470046, + "grad_norm": 0.6718877038666831, + "learning_rate": 1.95276065965134e-06, + "loss": 0.9412289261817932, + "step": 1245 + }, + { + "epoch": 0.2870967741935484, + "grad_norm": 0.5829455891585096, + "learning_rate": 1.9526448772924222e-06, + "loss": 0.9008835554122925, + "step": 1246 + }, + { + "epoch": 0.2873271889400922, + "grad_norm": 0.5850809594667484, + "learning_rate": 1.9525289566583622e-06, + "loss": 0.803752064704895, + "step": 1247 + }, + { + "epoch": 0.28755760368663597, + "grad_norm": 0.642250740432813, + "learning_rate": 1.952412897765985e-06, + "loss": 0.8354049921035767, + "step": 1248 + }, + { + "epoch": 0.28778801843317975, + "grad_norm": 0.711123311118831, + "learning_rate": 1.9522967006321363e-06, + "loss": 1.047461748123169, + "step": 1249 + }, + { + "epoch": 0.2880184331797235, + "grad_norm": 0.5664585984555107, + "learning_rate": 1.9521803652736826e-06, + "loss": 0.9036056399345398, + "step": 1250 + }, + { + "epoch": 0.28824884792626726, + "grad_norm": 0.6380477461120507, + "learning_rate": 1.952063891707509e-06, + "loss": 0.9534894227981567, + "step": 1251 + }, + { + "epoch": 0.28847926267281104, + "grad_norm": 0.6213868500155985, + "learning_rate": 1.9519472799505217e-06, + "loss": 0.9200841188430786, + "step": 1252 + }, + { + "epoch": 0.2887096774193548, + "grad_norm": 0.6071864938745559, + "learning_rate": 1.9518305300196475e-06, + "loss": 0.8917449712753296, + "step": 1253 + }, + { + "epoch": 0.2889400921658986, + "grad_norm": 0.43859246681042113, + "learning_rate": 1.9517136419318317e-06, + "loss": 0.92131507396698, + "step": 1254 + }, + { + "epoch": 0.2891705069124424, + "grad_norm": 0.5459214675052779, + "learning_rate": 1.951596615704041e-06, + "loss": 0.8862432241439819, + "step": 1255 + }, + { + "epoch": 0.2894009216589862, + "grad_norm": 0.5238034407201325, + "learning_rate": 1.951479451353261e-06, + "loss": 0.7789605855941772, + "step": 1256 + }, + { + "epoch": 0.28963133640552996, + "grad_norm": 0.6480376013887345, + "learning_rate": 1.951362148896498e-06, + "loss": 0.8187062740325928, + "step": 1257 + }, + { + "epoch": 0.28986175115207374, + "grad_norm": 0.651824990199355, + "learning_rate": 1.9512447083507784e-06, + "loss": 1.0575072765350342, + "step": 1258 + }, + { + "epoch": 0.2900921658986175, + "grad_norm": 0.5300946141437952, + "learning_rate": 1.9511271297331493e-06, + "loss": 0.8027279376983643, + "step": 1259 + }, + { + "epoch": 0.2903225806451613, + "grad_norm": 0.549023479491683, + "learning_rate": 1.951009413060676e-06, + "loss": 0.6641743183135986, + "step": 1260 + }, + { + "epoch": 0.2905529953917051, + "grad_norm": 0.4919566770154341, + "learning_rate": 1.950891558350446e-06, + "loss": 0.7937613725662231, + "step": 1261 + }, + { + "epoch": 0.2907834101382488, + "grad_norm": 0.6213972326398296, + "learning_rate": 1.950773565619564e-06, + "loss": 0.9600511193275452, + "step": 1262 + }, + { + "epoch": 0.2910138248847926, + "grad_norm": 0.6514763319649333, + "learning_rate": 1.9506554348851585e-06, + "loss": 0.8275980353355408, + "step": 1263 + }, + { + "epoch": 0.2912442396313364, + "grad_norm": 0.598467260157347, + "learning_rate": 1.950537166164375e-06, + "loss": 0.9008789658546448, + "step": 1264 + }, + { + "epoch": 0.29147465437788017, + "grad_norm": 0.5520168646542984, + "learning_rate": 1.95041875947438e-06, + "loss": 0.8701465129852295, + "step": 1265 + }, + { + "epoch": 0.29170506912442395, + "grad_norm": 0.5793489097336151, + "learning_rate": 1.95030021483236e-06, + "loss": 0.9313883781433105, + "step": 1266 + }, + { + "epoch": 0.29193548387096774, + "grad_norm": 0.5738973536331494, + "learning_rate": 1.9501815322555222e-06, + "loss": 0.883125901222229, + "step": 1267 + }, + { + "epoch": 0.2921658986175115, + "grad_norm": 0.5430628147775056, + "learning_rate": 1.9500627117610927e-06, + "loss": 0.8856269121170044, + "step": 1268 + }, + { + "epoch": 0.2923963133640553, + "grad_norm": 0.4857560088008075, + "learning_rate": 1.9499437533663184e-06, + "loss": 0.8817840218544006, + "step": 1269 + }, + { + "epoch": 0.2926267281105991, + "grad_norm": 0.7079159031386842, + "learning_rate": 1.949824657088466e-06, + "loss": 0.9911330342292786, + "step": 1270 + }, + { + "epoch": 0.29285714285714287, + "grad_norm": 0.6283382634413396, + "learning_rate": 1.949705422944822e-06, + "loss": 0.8902890682220459, + "step": 1271 + }, + { + "epoch": 0.29308755760368665, + "grad_norm": 0.5381213123876506, + "learning_rate": 1.949586050952693e-06, + "loss": 0.6846401691436768, + "step": 1272 + }, + { + "epoch": 0.29331797235023044, + "grad_norm": 0.6164805880844991, + "learning_rate": 1.9494665411294057e-06, + "loss": 0.9186165928840637, + "step": 1273 + }, + { + "epoch": 0.29354838709677417, + "grad_norm": 0.4648178531483389, + "learning_rate": 1.949346893492307e-06, + "loss": 0.8614095449447632, + "step": 1274 + }, + { + "epoch": 0.29377880184331795, + "grad_norm": 0.6146731068970395, + "learning_rate": 1.9492271080587637e-06, + "loss": 0.7824405431747437, + "step": 1275 + }, + { + "epoch": 0.29400921658986173, + "grad_norm": 0.5415059908334089, + "learning_rate": 1.949107184846162e-06, + "loss": 0.8694697618484497, + "step": 1276 + }, + { + "epoch": 0.2942396313364055, + "grad_norm": 0.6070495052767576, + "learning_rate": 1.948987123871909e-06, + "loss": 0.8839597105979919, + "step": 1277 + }, + { + "epoch": 0.2944700460829493, + "grad_norm": 0.5155544169686388, + "learning_rate": 1.948866925153431e-06, + "loss": 0.832268238067627, + "step": 1278 + }, + { + "epoch": 0.2947004608294931, + "grad_norm": 0.48264272480740306, + "learning_rate": 1.948746588708175e-06, + "loss": 0.8243123888969421, + "step": 1279 + }, + { + "epoch": 0.29493087557603687, + "grad_norm": 0.7516695382591614, + "learning_rate": 1.948626114553608e-06, + "loss": 0.99314284324646, + "step": 1280 + }, + { + "epoch": 0.29516129032258065, + "grad_norm": 0.6001488755214682, + "learning_rate": 1.948505502707216e-06, + "loss": 0.8853542804718018, + "step": 1281 + }, + { + "epoch": 0.29539170506912443, + "grad_norm": 0.7940640499991963, + "learning_rate": 1.948384753186506e-06, + "loss": 0.9623305797576904, + "step": 1282 + }, + { + "epoch": 0.2956221198156682, + "grad_norm": 0.64774993620639, + "learning_rate": 1.948263866009005e-06, + "loss": 0.8321142792701721, + "step": 1283 + }, + { + "epoch": 0.295852534562212, + "grad_norm": 0.6059595321597901, + "learning_rate": 1.948142841192258e-06, + "loss": 0.8911606669425964, + "step": 1284 + }, + { + "epoch": 0.2960829493087558, + "grad_norm": 0.6228210357050852, + "learning_rate": 1.948021678753834e-06, + "loss": 0.9501996040344238, + "step": 1285 + }, + { + "epoch": 0.29631336405529957, + "grad_norm": 0.5846881548888203, + "learning_rate": 1.947900378711318e-06, + "loss": 0.8555784225463867, + "step": 1286 + }, + { + "epoch": 0.2965437788018433, + "grad_norm": 0.5726752466099971, + "learning_rate": 1.9477789410823163e-06, + "loss": 0.7703878283500671, + "step": 1287 + }, + { + "epoch": 0.2967741935483871, + "grad_norm": 0.5629458043150717, + "learning_rate": 1.947657365884457e-06, + "loss": 1.0072009563446045, + "step": 1288 + }, + { + "epoch": 0.29700460829493086, + "grad_norm": 0.5698014348408978, + "learning_rate": 1.9475356531353847e-06, + "loss": 0.7633493542671204, + "step": 1289 + }, + { + "epoch": 0.29723502304147464, + "grad_norm": 0.5241558601711666, + "learning_rate": 1.9474138028527674e-06, + "loss": 0.88579261302948, + "step": 1290 + }, + { + "epoch": 0.2974654377880184, + "grad_norm": 0.6037880677787516, + "learning_rate": 1.94729181505429e-06, + "loss": 0.8356794118881226, + "step": 1291 + }, + { + "epoch": 0.2976958525345622, + "grad_norm": 0.6197051238228268, + "learning_rate": 1.94716968975766e-06, + "loss": 0.8330395817756653, + "step": 1292 + }, + { + "epoch": 0.297926267281106, + "grad_norm": 0.6667932213948545, + "learning_rate": 1.947047426980604e-06, + "loss": 0.9219698905944824, + "step": 1293 + }, + { + "epoch": 0.2981566820276498, + "grad_norm": 0.5409653154450632, + "learning_rate": 1.9469250267408674e-06, + "loss": 0.880803644657135, + "step": 1294 + }, + { + "epoch": 0.29838709677419356, + "grad_norm": 0.5789679620224094, + "learning_rate": 1.9468024890562165e-06, + "loss": 0.8212012052536011, + "step": 1295 + }, + { + "epoch": 0.29861751152073734, + "grad_norm": 0.6209106243517916, + "learning_rate": 1.946679813944438e-06, + "loss": 1.0118587017059326, + "step": 1296 + }, + { + "epoch": 0.2988479262672811, + "grad_norm": 0.6374046746708436, + "learning_rate": 1.9465570014233377e-06, + "loss": 0.8708915710449219, + "step": 1297 + }, + { + "epoch": 0.2990783410138249, + "grad_norm": 0.6373146041782783, + "learning_rate": 1.9464340515107415e-06, + "loss": 0.9386067986488342, + "step": 1298 + }, + { + "epoch": 0.29930875576036864, + "grad_norm": 0.5346925830356088, + "learning_rate": 1.9463109642244958e-06, + "loss": 0.8672319650650024, + "step": 1299 + }, + { + "epoch": 0.2995391705069124, + "grad_norm": 0.7198371333215221, + "learning_rate": 1.9461877395824662e-06, + "loss": 0.9002958536148071, + "step": 1300 + }, + { + "epoch": 0.2997695852534562, + "grad_norm": 0.6247724220238058, + "learning_rate": 1.946064377602539e-06, + "loss": 0.9206029772758484, + "step": 1301 + }, + { + "epoch": 0.3, + "grad_norm": 0.8295443472719992, + "learning_rate": 1.94594087830262e-06, + "loss": 1.0063598155975342, + "step": 1302 + }, + { + "epoch": 0.3002304147465438, + "grad_norm": 0.5149695005553171, + "learning_rate": 1.9458172417006346e-06, + "loss": 0.7616912126541138, + "step": 1303 + }, + { + "epoch": 0.30046082949308756, + "grad_norm": 0.5462398029065331, + "learning_rate": 1.945693467814529e-06, + "loss": 0.8385730385780334, + "step": 1304 + }, + { + "epoch": 0.30069124423963134, + "grad_norm": 0.4854220181479302, + "learning_rate": 1.9455695566622677e-06, + "loss": 0.7032216787338257, + "step": 1305 + }, + { + "epoch": 0.3009216589861751, + "grad_norm": 0.5554776786626977, + "learning_rate": 1.9454455082618373e-06, + "loss": 0.7647181749343872, + "step": 1306 + }, + { + "epoch": 0.3011520737327189, + "grad_norm": 0.7119385935860951, + "learning_rate": 1.945321322631243e-06, + "loss": 0.9918918013572693, + "step": 1307 + }, + { + "epoch": 0.3013824884792627, + "grad_norm": 0.5689741757687454, + "learning_rate": 1.945196999788511e-06, + "loss": 0.838451623916626, + "step": 1308 + }, + { + "epoch": 0.3016129032258065, + "grad_norm": 0.7156229049064139, + "learning_rate": 1.945072539751685e-06, + "loss": 0.9739303588867188, + "step": 1309 + }, + { + "epoch": 0.30184331797235026, + "grad_norm": 0.4850858592361209, + "learning_rate": 1.9449479425388305e-06, + "loss": 0.8233742713928223, + "step": 1310 + }, + { + "epoch": 0.302073732718894, + "grad_norm": 0.666231819455408, + "learning_rate": 1.944823208168034e-06, + "loss": 0.9765088558197021, + "step": 1311 + }, + { + "epoch": 0.30230414746543777, + "grad_norm": 0.5940530240559707, + "learning_rate": 1.944698336657399e-06, + "loss": 0.7614048719406128, + "step": 1312 + }, + { + "epoch": 0.30253456221198155, + "grad_norm": 0.5807403996402337, + "learning_rate": 1.9445733280250512e-06, + "loss": 0.760692834854126, + "step": 1313 + }, + { + "epoch": 0.30276497695852533, + "grad_norm": 0.710580819926471, + "learning_rate": 1.944448182289135e-06, + "loss": 0.8484706878662109, + "step": 1314 + }, + { + "epoch": 0.3029953917050691, + "grad_norm": 0.6131916776262658, + "learning_rate": 1.944322899467816e-06, + "loss": 0.8857289552688599, + "step": 1315 + }, + { + "epoch": 0.3032258064516129, + "grad_norm": 0.7120330171482998, + "learning_rate": 1.944197479579278e-06, + "loss": 0.8375179171562195, + "step": 1316 + }, + { + "epoch": 0.3034562211981567, + "grad_norm": 0.5402001956337824, + "learning_rate": 1.9440719226417263e-06, + "loss": 0.8141925930976868, + "step": 1317 + }, + { + "epoch": 0.30368663594470047, + "grad_norm": 0.7607357810019435, + "learning_rate": 1.943946228673384e-06, + "loss": 0.9970111846923828, + "step": 1318 + }, + { + "epoch": 0.30391705069124425, + "grad_norm": 0.5721230302327327, + "learning_rate": 1.9438203976924966e-06, + "loss": 0.9542866349220276, + "step": 1319 + }, + { + "epoch": 0.30414746543778803, + "grad_norm": 0.5904074306009988, + "learning_rate": 1.943694429717328e-06, + "loss": 0.8808399438858032, + "step": 1320 + }, + { + "epoch": 0.3043778801843318, + "grad_norm": 0.5734964183027593, + "learning_rate": 1.9435683247661623e-06, + "loss": 0.8541150093078613, + "step": 1321 + }, + { + "epoch": 0.3046082949308756, + "grad_norm": 0.7749551173384804, + "learning_rate": 1.943442082857303e-06, + "loss": 0.8887044191360474, + "step": 1322 + }, + { + "epoch": 0.30483870967741933, + "grad_norm": 0.6530281616907251, + "learning_rate": 1.9433157040090746e-06, + "loss": 0.8699131011962891, + "step": 1323 + }, + { + "epoch": 0.3050691244239631, + "grad_norm": 0.6811202971751444, + "learning_rate": 1.9431891882398205e-06, + "loss": 0.7096077799797058, + "step": 1324 + }, + { + "epoch": 0.3052995391705069, + "grad_norm": 0.5279135582200482, + "learning_rate": 1.9430625355679045e-06, + "loss": 0.8040453195571899, + "step": 1325 + }, + { + "epoch": 0.3055299539170507, + "grad_norm": 0.5904456084555657, + "learning_rate": 1.9429357460117093e-06, + "loss": 0.8275970220565796, + "step": 1326 + }, + { + "epoch": 0.30576036866359446, + "grad_norm": 0.5947614996956965, + "learning_rate": 1.9428088195896393e-06, + "loss": 0.9724141359329224, + "step": 1327 + }, + { + "epoch": 0.30599078341013825, + "grad_norm": 0.6696756628924122, + "learning_rate": 1.9426817563201174e-06, + "loss": 0.9293274879455566, + "step": 1328 + }, + { + "epoch": 0.30622119815668203, + "grad_norm": 0.5976334939970911, + "learning_rate": 1.9425545562215865e-06, + "loss": 0.9454036951065063, + "step": 1329 + }, + { + "epoch": 0.3064516129032258, + "grad_norm": 0.48928245529374687, + "learning_rate": 1.9424272193125094e-06, + "loss": 0.7751365900039673, + "step": 1330 + }, + { + "epoch": 0.3066820276497696, + "grad_norm": 0.5211050083614731, + "learning_rate": 1.942299745611369e-06, + "loss": 0.8444688320159912, + "step": 1331 + }, + { + "epoch": 0.3069124423963134, + "grad_norm": 0.6370602856216532, + "learning_rate": 1.9421721351366684e-06, + "loss": 0.7751414775848389, + "step": 1332 + }, + { + "epoch": 0.30714285714285716, + "grad_norm": 0.6732034032956694, + "learning_rate": 1.9420443879069287e-06, + "loss": 0.912209153175354, + "step": 1333 + }, + { + "epoch": 0.30737327188940095, + "grad_norm": 0.4990267188564962, + "learning_rate": 1.941916503940694e-06, + "loss": 0.8897542357444763, + "step": 1334 + }, + { + "epoch": 0.3076036866359447, + "grad_norm": 0.6319943447022882, + "learning_rate": 1.9417884832565257e-06, + "loss": 0.8562046885490417, + "step": 1335 + }, + { + "epoch": 0.30783410138248846, + "grad_norm": 0.6422557067377674, + "learning_rate": 1.9416603258730055e-06, + "loss": 0.8886401653289795, + "step": 1336 + }, + { + "epoch": 0.30806451612903224, + "grad_norm": 0.5477788033894715, + "learning_rate": 1.9415320318087354e-06, + "loss": 0.7401903867721558, + "step": 1337 + }, + { + "epoch": 0.308294930875576, + "grad_norm": 0.5300494853019255, + "learning_rate": 1.941403601082338e-06, + "loss": 0.761360764503479, + "step": 1338 + }, + { + "epoch": 0.3085253456221198, + "grad_norm": 0.5079078108497779, + "learning_rate": 1.9412750337124537e-06, + "loss": 0.9223028421401978, + "step": 1339 + }, + { + "epoch": 0.3087557603686636, + "grad_norm": 0.6370349934611669, + "learning_rate": 1.9411463297177446e-06, + "loss": 0.9287113547325134, + "step": 1340 + }, + { + "epoch": 0.3089861751152074, + "grad_norm": 0.6186566628026451, + "learning_rate": 1.941017489116891e-06, + "loss": 0.8548502922058105, + "step": 1341 + }, + { + "epoch": 0.30921658986175116, + "grad_norm": 0.6058300330585435, + "learning_rate": 1.9408885119285953e-06, + "loss": 0.8885709643363953, + "step": 1342 + }, + { + "epoch": 0.30944700460829494, + "grad_norm": 0.6807261513363189, + "learning_rate": 1.940759398171577e-06, + "loss": 0.8970856666564941, + "step": 1343 + }, + { + "epoch": 0.3096774193548387, + "grad_norm": 0.5664251996297385, + "learning_rate": 1.9406301478645783e-06, + "loss": 0.847138524055481, + "step": 1344 + }, + { + "epoch": 0.3099078341013825, + "grad_norm": 0.5354847877369665, + "learning_rate": 1.9405007610263584e-06, + "loss": 0.7892216444015503, + "step": 1345 + }, + { + "epoch": 0.3101382488479263, + "grad_norm": 0.6610649492142503, + "learning_rate": 1.940371237675698e-06, + "loss": 0.8869141340255737, + "step": 1346 + }, + { + "epoch": 0.3103686635944701, + "grad_norm": 0.6628677961578048, + "learning_rate": 1.9402415778313976e-06, + "loss": 0.8405635356903076, + "step": 1347 + }, + { + "epoch": 0.3105990783410138, + "grad_norm": 0.6646875425992601, + "learning_rate": 1.9401117815122768e-06, + "loss": 0.914352536201477, + "step": 1348 + }, + { + "epoch": 0.3108294930875576, + "grad_norm": 0.5494930695935469, + "learning_rate": 1.9399818487371756e-06, + "loss": 0.9059416055679321, + "step": 1349 + }, + { + "epoch": 0.31105990783410137, + "grad_norm": 0.7196818748440269, + "learning_rate": 1.939851779524953e-06, + "loss": 1.0513644218444824, + "step": 1350 + }, + { + "epoch": 0.31129032258064515, + "grad_norm": 0.6538820317800585, + "learning_rate": 1.9397215738944893e-06, + "loss": 0.8673127889633179, + "step": 1351 + }, + { + "epoch": 0.31152073732718893, + "grad_norm": 0.5818727011440767, + "learning_rate": 1.9395912318646827e-06, + "loss": 0.7907043695449829, + "step": 1352 + }, + { + "epoch": 0.3117511520737327, + "grad_norm": 0.6065295506543811, + "learning_rate": 1.9394607534544527e-06, + "loss": 0.8127990961074829, + "step": 1353 + }, + { + "epoch": 0.3119815668202765, + "grad_norm": 0.598693369273182, + "learning_rate": 1.9393301386827387e-06, + "loss": 0.877837061882019, + "step": 1354 + }, + { + "epoch": 0.3122119815668203, + "grad_norm": 0.734976387219602, + "learning_rate": 1.939199387568498e-06, + "loss": 0.8518592715263367, + "step": 1355 + }, + { + "epoch": 0.31244239631336407, + "grad_norm": 0.5473192670176156, + "learning_rate": 1.9390685001307093e-06, + "loss": 0.7151869535446167, + "step": 1356 + }, + { + "epoch": 0.31267281105990785, + "grad_norm": 0.5581062201544951, + "learning_rate": 1.9389374763883716e-06, + "loss": 0.8325271606445312, + "step": 1357 + }, + { + "epoch": 0.31290322580645163, + "grad_norm": 0.6052904551524719, + "learning_rate": 1.938806316360502e-06, + "loss": 0.6875327825546265, + "step": 1358 + }, + { + "epoch": 0.3131336405529954, + "grad_norm": 0.5481097616797531, + "learning_rate": 1.9386750200661382e-06, + "loss": 0.8944000005722046, + "step": 1359 + }, + { + "epoch": 0.31336405529953915, + "grad_norm": 0.4954617799257055, + "learning_rate": 1.938543587524338e-06, + "loss": 0.8544747829437256, + "step": 1360 + }, + { + "epoch": 0.31359447004608293, + "grad_norm": 0.6938391730058001, + "learning_rate": 1.9384120187541788e-06, + "loss": 0.9216448068618774, + "step": 1361 + }, + { + "epoch": 0.3138248847926267, + "grad_norm": 0.6834174981389874, + "learning_rate": 1.938280313774757e-06, + "loss": 0.9239494800567627, + "step": 1362 + }, + { + "epoch": 0.3140552995391705, + "grad_norm": 0.6816430700209432, + "learning_rate": 1.9381484726051896e-06, + "loss": 0.8903297185897827, + "step": 1363 + }, + { + "epoch": 0.3142857142857143, + "grad_norm": 0.46405748253250195, + "learning_rate": 1.9380164952646137e-06, + "loss": 0.7019625306129456, + "step": 1364 + }, + { + "epoch": 0.31451612903225806, + "grad_norm": 0.6844663370999042, + "learning_rate": 1.9378843817721854e-06, + "loss": 0.8668909072875977, + "step": 1365 + }, + { + "epoch": 0.31474654377880185, + "grad_norm": 0.6454844689755169, + "learning_rate": 1.9377521321470804e-06, + "loss": 0.8124282360076904, + "step": 1366 + }, + { + "epoch": 0.31497695852534563, + "grad_norm": 0.5251795296125459, + "learning_rate": 1.937619746408495e-06, + "loss": 0.7543507814407349, + "step": 1367 + }, + { + "epoch": 0.3152073732718894, + "grad_norm": 0.6140420297919054, + "learning_rate": 1.9374872245756448e-06, + "loss": 0.8355506062507629, + "step": 1368 + }, + { + "epoch": 0.3154377880184332, + "grad_norm": 0.5898778959170111, + "learning_rate": 1.937354566667765e-06, + "loss": 0.9642060399055481, + "step": 1369 + }, + { + "epoch": 0.315668202764977, + "grad_norm": 0.5556038505388771, + "learning_rate": 1.93722177270411e-06, + "loss": 0.9044197201728821, + "step": 1370 + }, + { + "epoch": 0.31589861751152076, + "grad_norm": 0.4271939145273097, + "learning_rate": 1.937088842703956e-06, + "loss": 0.8077869415283203, + "step": 1371 + }, + { + "epoch": 0.3161290322580645, + "grad_norm": 0.6032982707731396, + "learning_rate": 1.9369557766865968e-06, + "loss": 0.8319793939590454, + "step": 1372 + }, + { + "epoch": 0.3163594470046083, + "grad_norm": 0.6304953638761566, + "learning_rate": 1.9368225746713475e-06, + "loss": 0.8233131170272827, + "step": 1373 + }, + { + "epoch": 0.31658986175115206, + "grad_norm": 0.6631214954178034, + "learning_rate": 1.936689236677541e-06, + "loss": 0.7898514270782471, + "step": 1374 + }, + { + "epoch": 0.31682027649769584, + "grad_norm": 0.6121849479571054, + "learning_rate": 1.9365557627245326e-06, + "loss": 0.9243249893188477, + "step": 1375 + }, + { + "epoch": 0.3170506912442396, + "grad_norm": 0.5673475924264754, + "learning_rate": 1.9364221528316946e-06, + "loss": 0.8153131008148193, + "step": 1376 + }, + { + "epoch": 0.3172811059907834, + "grad_norm": 0.6767166003638188, + "learning_rate": 1.936288407018421e-06, + "loss": 0.9203826189041138, + "step": 1377 + }, + { + "epoch": 0.3175115207373272, + "grad_norm": 0.6187562743125278, + "learning_rate": 1.936154525304124e-06, + "loss": 0.902605414390564, + "step": 1378 + }, + { + "epoch": 0.317741935483871, + "grad_norm": 0.6256929156852202, + "learning_rate": 1.936020507708238e-06, + "loss": 0.9504558444023132, + "step": 1379 + }, + { + "epoch": 0.31797235023041476, + "grad_norm": 0.6737932441495208, + "learning_rate": 1.9358863542502133e-06, + "loss": 0.8068373203277588, + "step": 1380 + }, + { + "epoch": 0.31820276497695854, + "grad_norm": 0.6309381884158767, + "learning_rate": 1.935752064949524e-06, + "loss": 1.00711989402771, + "step": 1381 + }, + { + "epoch": 0.3184331797235023, + "grad_norm": 0.6297604875594859, + "learning_rate": 1.935617639825661e-06, + "loss": 0.8271746039390564, + "step": 1382 + }, + { + "epoch": 0.3186635944700461, + "grad_norm": 0.658739150286029, + "learning_rate": 1.9354830788981363e-06, + "loss": 0.8478754758834839, + "step": 1383 + }, + { + "epoch": 0.31889400921658984, + "grad_norm": 0.6165108812612344, + "learning_rate": 1.935348382186481e-06, + "loss": 0.9240723252296448, + "step": 1384 + }, + { + "epoch": 0.3191244239631336, + "grad_norm": 0.6446571506984649, + "learning_rate": 1.935213549710246e-06, + "loss": 0.9275645613670349, + "step": 1385 + }, + { + "epoch": 0.3193548387096774, + "grad_norm": 0.6060948743586713, + "learning_rate": 1.9350785814890027e-06, + "loss": 0.9838275909423828, + "step": 1386 + }, + { + "epoch": 0.3195852534562212, + "grad_norm": 0.5765714017880346, + "learning_rate": 1.934943477542341e-06, + "loss": 0.9259177446365356, + "step": 1387 + }, + { + "epoch": 0.31981566820276497, + "grad_norm": 0.6051365106169855, + "learning_rate": 1.9348082378898714e-06, + "loss": 0.9252835512161255, + "step": 1388 + }, + { + "epoch": 0.32004608294930875, + "grad_norm": 0.5670107070091258, + "learning_rate": 1.9346728625512235e-06, + "loss": 0.8929460048675537, + "step": 1389 + }, + { + "epoch": 0.32027649769585254, + "grad_norm": 0.5325931239107909, + "learning_rate": 1.934537351546047e-06, + "loss": 0.8909564018249512, + "step": 1390 + }, + { + "epoch": 0.3205069124423963, + "grad_norm": 0.6295332947946368, + "learning_rate": 1.934401704894011e-06, + "loss": 0.8745983839035034, + "step": 1391 + }, + { + "epoch": 0.3207373271889401, + "grad_norm": 0.5987888846505133, + "learning_rate": 1.934265922614805e-06, + "loss": 0.8622266054153442, + "step": 1392 + }, + { + "epoch": 0.3209677419354839, + "grad_norm": 0.5587707056179402, + "learning_rate": 1.9341300047281365e-06, + "loss": 0.6796590089797974, + "step": 1393 + }, + { + "epoch": 0.32119815668202767, + "grad_norm": 0.6156409956015295, + "learning_rate": 1.9339939512537344e-06, + "loss": 0.9012733697891235, + "step": 1394 + }, + { + "epoch": 0.32142857142857145, + "grad_norm": 0.5898128750933246, + "learning_rate": 1.933857762211347e-06, + "loss": 0.9196282625198364, + "step": 1395 + }, + { + "epoch": 0.3216589861751152, + "grad_norm": 0.716981638669288, + "learning_rate": 1.9337214376207417e-06, + "loss": 0.7717788219451904, + "step": 1396 + }, + { + "epoch": 0.32188940092165896, + "grad_norm": 0.6574432706431985, + "learning_rate": 1.9335849775017057e-06, + "loss": 0.8516619801521301, + "step": 1397 + }, + { + "epoch": 0.32211981566820275, + "grad_norm": 0.6319036543472709, + "learning_rate": 1.933448381874046e-06, + "loss": 0.8089120388031006, + "step": 1398 + }, + { + "epoch": 0.32235023041474653, + "grad_norm": 0.7117992019263996, + "learning_rate": 1.9333116507575895e-06, + "loss": 0.8940925598144531, + "step": 1399 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 1.1103495530975782, + "learning_rate": 1.9331747841721827e-06, + "loss": 1.0240859985351562, + "step": 1400 + }, + { + "epoch": 0.3228110599078341, + "grad_norm": 0.6110124319562482, + "learning_rate": 1.9330377821376916e-06, + "loss": 0.742689847946167, + "step": 1401 + }, + { + "epoch": 0.3230414746543779, + "grad_norm": 0.6830153635526487, + "learning_rate": 1.932900644674001e-06, + "loss": 0.9843875169754028, + "step": 1402 + }, + { + "epoch": 0.32327188940092166, + "grad_norm": 0.6043326796009376, + "learning_rate": 1.932763371801017e-06, + "loss": 0.7289329767227173, + "step": 1403 + }, + { + "epoch": 0.32350230414746545, + "grad_norm": 0.676828647698979, + "learning_rate": 1.9326259635386644e-06, + "loss": 0.7706295251846313, + "step": 1404 + }, + { + "epoch": 0.32373271889400923, + "grad_norm": 0.526047650367784, + "learning_rate": 1.932488419906888e-06, + "loss": 0.87788325548172, + "step": 1405 + }, + { + "epoch": 0.323963133640553, + "grad_norm": 0.5971998478662486, + "learning_rate": 1.9323507409256515e-06, + "loss": 0.863690972328186, + "step": 1406 + }, + { + "epoch": 0.3241935483870968, + "grad_norm": 0.700825296208237, + "learning_rate": 1.9322129266149396e-06, + "loss": 0.9333875179290771, + "step": 1407 + }, + { + "epoch": 0.3244239631336406, + "grad_norm": 0.6642455421211582, + "learning_rate": 1.9320749769947555e-06, + "loss": 0.9170523881912231, + "step": 1408 + }, + { + "epoch": 0.3246543778801843, + "grad_norm": 0.7524235771818621, + "learning_rate": 1.931936892085122e-06, + "loss": 0.9337698221206665, + "step": 1409 + }, + { + "epoch": 0.3248847926267281, + "grad_norm": 0.5832115844679703, + "learning_rate": 1.9317986719060824e-06, + "loss": 0.8436682224273682, + "step": 1410 + }, + { + "epoch": 0.3251152073732719, + "grad_norm": 0.5569674571153642, + "learning_rate": 1.9316603164776996e-06, + "loss": 0.6652755737304688, + "step": 1411 + }, + { + "epoch": 0.32534562211981566, + "grad_norm": 0.5895248621851672, + "learning_rate": 1.931521825820055e-06, + "loss": 0.7966932654380798, + "step": 1412 + }, + { + "epoch": 0.32557603686635944, + "grad_norm": 0.7207375493085693, + "learning_rate": 1.93138319995325e-06, + "loss": 0.9791682958602905, + "step": 1413 + }, + { + "epoch": 0.3258064516129032, + "grad_norm": 0.6505701538481653, + "learning_rate": 1.931244438897407e-06, + "loss": 0.7403467297554016, + "step": 1414 + }, + { + "epoch": 0.326036866359447, + "grad_norm": 0.5881243698924259, + "learning_rate": 1.931105542672667e-06, + "loss": 0.7758523225784302, + "step": 1415 + }, + { + "epoch": 0.3262672811059908, + "grad_norm": 0.6866613437755184, + "learning_rate": 1.9309665112991894e-06, + "loss": 0.8444551229476929, + "step": 1416 + }, + { + "epoch": 0.3264976958525346, + "grad_norm": 0.6987387290897759, + "learning_rate": 1.9308273447971553e-06, + "loss": 0.8796061277389526, + "step": 1417 + }, + { + "epoch": 0.32672811059907836, + "grad_norm": 0.6235742967720523, + "learning_rate": 1.9306880431867643e-06, + "loss": 0.8386640548706055, + "step": 1418 + }, + { + "epoch": 0.32695852534562214, + "grad_norm": 0.669578268248941, + "learning_rate": 1.930548606488236e-06, + "loss": 0.9229142665863037, + "step": 1419 + }, + { + "epoch": 0.3271889400921659, + "grad_norm": 0.6307605261613933, + "learning_rate": 1.9304090347218094e-06, + "loss": 0.9938615560531616, + "step": 1420 + }, + { + "epoch": 0.32741935483870965, + "grad_norm": 0.6526253572614591, + "learning_rate": 1.930269327907743e-06, + "loss": 0.7946186661720276, + "step": 1421 + }, + { + "epoch": 0.32764976958525344, + "grad_norm": 0.6717401804422498, + "learning_rate": 1.930129486066315e-06, + "loss": 0.9456713199615479, + "step": 1422 + }, + { + "epoch": 0.3278801843317972, + "grad_norm": 0.5156577436912951, + "learning_rate": 1.929989509217824e-06, + "loss": 0.844656765460968, + "step": 1423 + }, + { + "epoch": 0.328110599078341, + "grad_norm": 0.5219846430026822, + "learning_rate": 1.9298493973825862e-06, + "loss": 0.7534950971603394, + "step": 1424 + }, + { + "epoch": 0.3283410138248848, + "grad_norm": 0.7328149629860281, + "learning_rate": 1.92970915058094e-06, + "loss": 0.934429407119751, + "step": 1425 + }, + { + "epoch": 0.32857142857142857, + "grad_norm": 0.6913075282966522, + "learning_rate": 1.929568768833241e-06, + "loss": 0.9491959810256958, + "step": 1426 + }, + { + "epoch": 0.32880184331797235, + "grad_norm": 0.6938433783461605, + "learning_rate": 1.9294282521598657e-06, + "loss": 0.9739001989364624, + "step": 1427 + }, + { + "epoch": 0.32903225806451614, + "grad_norm": 0.7260904191446513, + "learning_rate": 1.92928760058121e-06, + "loss": 0.8159639835357666, + "step": 1428 + }, + { + "epoch": 0.3292626728110599, + "grad_norm": 0.6287238530590293, + "learning_rate": 1.9291468141176894e-06, + "loss": 0.8752772808074951, + "step": 1429 + }, + { + "epoch": 0.3294930875576037, + "grad_norm": 0.6480201898337635, + "learning_rate": 1.929005892789739e-06, + "loss": 0.8543882369995117, + "step": 1430 + }, + { + "epoch": 0.3297235023041475, + "grad_norm": 0.7294679881265868, + "learning_rate": 1.928864836617813e-06, + "loss": 0.8837493658065796, + "step": 1431 + }, + { + "epoch": 0.32995391705069127, + "grad_norm": 0.7638461032292205, + "learning_rate": 1.9287236456223854e-06, + "loss": 0.9320387840270996, + "step": 1432 + }, + { + "epoch": 0.330184331797235, + "grad_norm": 0.5042343025936808, + "learning_rate": 1.92858231982395e-06, + "loss": 0.8272919654846191, + "step": 1433 + }, + { + "epoch": 0.3304147465437788, + "grad_norm": 0.6965906133224807, + "learning_rate": 1.9284408592430207e-06, + "loss": 0.9415527582168579, + "step": 1434 + }, + { + "epoch": 0.33064516129032256, + "grad_norm": 0.7215035047368656, + "learning_rate": 1.928299263900129e-06, + "loss": 0.91558837890625, + "step": 1435 + }, + { + "epoch": 0.33087557603686635, + "grad_norm": 0.5956823050741555, + "learning_rate": 1.9281575338158287e-06, + "loss": 0.9333036541938782, + "step": 1436 + }, + { + "epoch": 0.33110599078341013, + "grad_norm": 0.6051938214219355, + "learning_rate": 1.928015669010691e-06, + "loss": 0.7823847532272339, + "step": 1437 + }, + { + "epoch": 0.3313364055299539, + "grad_norm": 0.7462826372754077, + "learning_rate": 1.9278736695053075e-06, + "loss": 0.8436610102653503, + "step": 1438 + }, + { + "epoch": 0.3315668202764977, + "grad_norm": 0.7254037554281902, + "learning_rate": 1.927731535320289e-06, + "loss": 0.8658925890922546, + "step": 1439 + }, + { + "epoch": 0.3317972350230415, + "grad_norm": 0.6229809292573231, + "learning_rate": 1.9275892664762665e-06, + "loss": 0.8510075807571411, + "step": 1440 + }, + { + "epoch": 0.33202764976958526, + "grad_norm": 0.6349856559462502, + "learning_rate": 1.9274468629938897e-06, + "loss": 0.8002004623413086, + "step": 1441 + }, + { + "epoch": 0.33225806451612905, + "grad_norm": 0.6766111098462606, + "learning_rate": 1.9273043248938287e-06, + "loss": 1.0030219554901123, + "step": 1442 + }, + { + "epoch": 0.33248847926267283, + "grad_norm": 0.6313930076569801, + "learning_rate": 1.9271616521967723e-06, + "loss": 0.8415981531143188, + "step": 1443 + }, + { + "epoch": 0.3327188940092166, + "grad_norm": 0.5599899399531522, + "learning_rate": 1.9270188449234295e-06, + "loss": 0.7704254388809204, + "step": 1444 + }, + { + "epoch": 0.33294930875576034, + "grad_norm": 0.5742869826690059, + "learning_rate": 1.9268759030945294e-06, + "loss": 0.8350723385810852, + "step": 1445 + }, + { + "epoch": 0.3331797235023041, + "grad_norm": 0.7177949171518314, + "learning_rate": 1.926732826730818e-06, + "loss": 0.8729690313339233, + "step": 1446 + }, + { + "epoch": 0.3334101382488479, + "grad_norm": 0.64691268148931, + "learning_rate": 1.926589615853064e-06, + "loss": 0.7758746147155762, + "step": 1447 + }, + { + "epoch": 0.3336405529953917, + "grad_norm": 0.6330035443782508, + "learning_rate": 1.926446270482054e-06, + "loss": 0.7895134687423706, + "step": 1448 + }, + { + "epoch": 0.3338709677419355, + "grad_norm": 0.5710370240153678, + "learning_rate": 1.9263027906385936e-06, + "loss": 1.0239053964614868, + "step": 1449 + }, + { + "epoch": 0.33410138248847926, + "grad_norm": 0.6423159813237256, + "learning_rate": 1.9261591763435104e-06, + "loss": 0.9294595122337341, + "step": 1450 + }, + { + "epoch": 0.33433179723502304, + "grad_norm": 0.690830605411519, + "learning_rate": 1.9260154276176484e-06, + "loss": 0.9786148071289062, + "step": 1451 + }, + { + "epoch": 0.3345622119815668, + "grad_norm": 0.5115027993477321, + "learning_rate": 1.925871544481873e-06, + "loss": 0.8513587117195129, + "step": 1452 + }, + { + "epoch": 0.3347926267281106, + "grad_norm": 0.4974492616751121, + "learning_rate": 1.9257275269570686e-06, + "loss": 0.7737371921539307, + "step": 1453 + }, + { + "epoch": 0.3350230414746544, + "grad_norm": 0.6186615203368176, + "learning_rate": 1.9255833750641392e-06, + "loss": 0.8567382097244263, + "step": 1454 + }, + { + "epoch": 0.3352534562211982, + "grad_norm": 0.5498745898568592, + "learning_rate": 1.9254390888240078e-06, + "loss": 0.893741250038147, + "step": 1455 + }, + { + "epoch": 0.33548387096774196, + "grad_norm": 0.5996544133152318, + "learning_rate": 1.9252946682576184e-06, + "loss": 0.9558119773864746, + "step": 1456 + }, + { + "epoch": 0.3357142857142857, + "grad_norm": 0.6629164295929078, + "learning_rate": 1.9251501133859323e-06, + "loss": 0.7055593729019165, + "step": 1457 + }, + { + "epoch": 0.33594470046082947, + "grad_norm": 0.652213418545905, + "learning_rate": 1.9250054242299326e-06, + "loss": 0.8409907817840576, + "step": 1458 + }, + { + "epoch": 0.33617511520737325, + "grad_norm": 0.5648924790833157, + "learning_rate": 1.9248606008106196e-06, + "loss": 0.9459772109985352, + "step": 1459 + }, + { + "epoch": 0.33640552995391704, + "grad_norm": 0.6285611694534835, + "learning_rate": 1.924715643149015e-06, + "loss": 0.7848879098892212, + "step": 1460 + }, + { + "epoch": 0.3366359447004608, + "grad_norm": 0.8030718131506138, + "learning_rate": 1.924570551266159e-06, + "loss": 1.0365980863571167, + "step": 1461 + }, + { + "epoch": 0.3368663594470046, + "grad_norm": 0.6014174038703485, + "learning_rate": 1.924425325183111e-06, + "loss": 0.7331318855285645, + "step": 1462 + }, + { + "epoch": 0.3370967741935484, + "grad_norm": 0.6427865459032713, + "learning_rate": 1.9242799649209515e-06, + "loss": 0.8536237478256226, + "step": 1463 + }, + { + "epoch": 0.33732718894009217, + "grad_norm": 0.6525839289073214, + "learning_rate": 1.9241344705007784e-06, + "loss": 0.9296326637268066, + "step": 1464 + }, + { + "epoch": 0.33755760368663595, + "grad_norm": 0.887947392639257, + "learning_rate": 1.92398884194371e-06, + "loss": 0.9084932804107666, + "step": 1465 + }, + { + "epoch": 0.33778801843317974, + "grad_norm": 0.5270165853452017, + "learning_rate": 1.9238430792708847e-06, + "loss": 0.7426833510398865, + "step": 1466 + }, + { + "epoch": 0.3380184331797235, + "grad_norm": 0.5410658114261949, + "learning_rate": 1.9236971825034595e-06, + "loss": 0.7655431032180786, + "step": 1467 + }, + { + "epoch": 0.3382488479262673, + "grad_norm": 0.8331011387344854, + "learning_rate": 1.923551151662611e-06, + "loss": 0.9463646411895752, + "step": 1468 + }, + { + "epoch": 0.3384792626728111, + "grad_norm": 0.5486811314665706, + "learning_rate": 1.9234049867695355e-06, + "loss": 0.75661301612854, + "step": 1469 + }, + { + "epoch": 0.3387096774193548, + "grad_norm": 0.6386489226368193, + "learning_rate": 1.9232586878454486e-06, + "loss": 0.7411723136901855, + "step": 1470 + }, + { + "epoch": 0.3389400921658986, + "grad_norm": 0.6921074075590697, + "learning_rate": 1.9231122549115854e-06, + "loss": 0.9537360072135925, + "step": 1471 + }, + { + "epoch": 0.3391705069124424, + "grad_norm": 0.6895160542670777, + "learning_rate": 1.9229656879892004e-06, + "loss": 0.9527197480201721, + "step": 1472 + }, + { + "epoch": 0.33940092165898617, + "grad_norm": 0.7025720730409266, + "learning_rate": 1.9228189870995674e-06, + "loss": 0.9083822965621948, + "step": 1473 + }, + { + "epoch": 0.33963133640552995, + "grad_norm": 0.5301970222083436, + "learning_rate": 1.9226721522639804e-06, + "loss": 0.8546823263168335, + "step": 1474 + }, + { + "epoch": 0.33986175115207373, + "grad_norm": 0.6709689097402769, + "learning_rate": 1.922525183503752e-06, + "loss": 0.7429832816123962, + "step": 1475 + }, + { + "epoch": 0.3400921658986175, + "grad_norm": 0.62032231336291, + "learning_rate": 1.922378080840214e-06, + "loss": 0.8805499076843262, + "step": 1476 + }, + { + "epoch": 0.3403225806451613, + "grad_norm": 0.681736765273056, + "learning_rate": 1.9222308442947193e-06, + "loss": 1.0177074670791626, + "step": 1477 + }, + { + "epoch": 0.3405529953917051, + "grad_norm": 0.5202393927717802, + "learning_rate": 1.922083473888638e-06, + "loss": 0.778317391872406, + "step": 1478 + }, + { + "epoch": 0.34078341013824887, + "grad_norm": 0.5628134051805, + "learning_rate": 1.921935969643361e-06, + "loss": 0.8461896181106567, + "step": 1479 + }, + { + "epoch": 0.34101382488479265, + "grad_norm": 0.5553667327802273, + "learning_rate": 1.921788331580299e-06, + "loss": 0.8028895258903503, + "step": 1480 + }, + { + "epoch": 0.34124423963133643, + "grad_norm": 0.5368047903298083, + "learning_rate": 1.9216405597208803e-06, + "loss": 0.9071121215820312, + "step": 1481 + }, + { + "epoch": 0.34147465437788016, + "grad_norm": 0.6427007304701287, + "learning_rate": 1.921492654086555e-06, + "loss": 0.7715062499046326, + "step": 1482 + }, + { + "epoch": 0.34170506912442394, + "grad_norm": 0.5552851307839923, + "learning_rate": 1.9213446146987907e-06, + "loss": 0.8446664810180664, + "step": 1483 + }, + { + "epoch": 0.3419354838709677, + "grad_norm": 0.712846002939772, + "learning_rate": 1.9211964415790754e-06, + "loss": 0.9835283756256104, + "step": 1484 + }, + { + "epoch": 0.3421658986175115, + "grad_norm": 0.8210412746012221, + "learning_rate": 1.921048134748916e-06, + "loss": 1.0630817413330078, + "step": 1485 + }, + { + "epoch": 0.3423963133640553, + "grad_norm": 0.6748930312757173, + "learning_rate": 1.920899694229839e-06, + "loss": 0.8514837622642517, + "step": 1486 + }, + { + "epoch": 0.3426267281105991, + "grad_norm": 0.6222560657794074, + "learning_rate": 1.920751120043391e-06, + "loss": 0.7302432060241699, + "step": 1487 + }, + { + "epoch": 0.34285714285714286, + "grad_norm": 0.7079869651359869, + "learning_rate": 1.920602412211136e-06, + "loss": 0.778337836265564, + "step": 1488 + }, + { + "epoch": 0.34308755760368664, + "grad_norm": 0.6890026561089317, + "learning_rate": 1.92045357075466e-06, + "loss": 0.815348207950592, + "step": 1489 + }, + { + "epoch": 0.3433179723502304, + "grad_norm": 0.5476065495891982, + "learning_rate": 1.920304595695567e-06, + "loss": 0.7844003438949585, + "step": 1490 + }, + { + "epoch": 0.3435483870967742, + "grad_norm": 0.6758218109549144, + "learning_rate": 1.92015548705548e-06, + "loss": 0.9513435363769531, + "step": 1491 + }, + { + "epoch": 0.343778801843318, + "grad_norm": 0.6450445262879821, + "learning_rate": 1.9200062448560424e-06, + "loss": 0.7506752610206604, + "step": 1492 + }, + { + "epoch": 0.3440092165898618, + "grad_norm": 0.6233205865485715, + "learning_rate": 1.919856869118916e-06, + "loss": 0.739554762840271, + "step": 1493 + }, + { + "epoch": 0.3442396313364055, + "grad_norm": 0.7436551378630792, + "learning_rate": 1.9197073598657826e-06, + "loss": 0.8167033791542053, + "step": 1494 + }, + { + "epoch": 0.3444700460829493, + "grad_norm": 0.6904439986569212, + "learning_rate": 1.919557717118344e-06, + "loss": 0.9308677911758423, + "step": 1495 + }, + { + "epoch": 0.34470046082949307, + "grad_norm": 0.6340340245140523, + "learning_rate": 1.9194079408983197e-06, + "loss": 0.8601467609405518, + "step": 1496 + }, + { + "epoch": 0.34493087557603686, + "grad_norm": 0.5645119744435318, + "learning_rate": 1.91925803122745e-06, + "loss": 0.8062653541564941, + "step": 1497 + }, + { + "epoch": 0.34516129032258064, + "grad_norm": 0.6267130901098985, + "learning_rate": 1.9191079881274943e-06, + "loss": 0.8910555839538574, + "step": 1498 + }, + { + "epoch": 0.3453917050691244, + "grad_norm": 0.6398235864437706, + "learning_rate": 1.9189578116202307e-06, + "loss": 0.8604668378829956, + "step": 1499 + }, + { + "epoch": 0.3456221198156682, + "grad_norm": 0.660935387898433, + "learning_rate": 1.918807501727457e-06, + "loss": 0.7255126237869263, + "step": 1500 + }, + { + "epoch": 0.345852534562212, + "grad_norm": 0.6873891579533423, + "learning_rate": 1.9186570584709912e-06, + "loss": 0.998108983039856, + "step": 1501 + }, + { + "epoch": 0.34608294930875577, + "grad_norm": 0.6220147185177797, + "learning_rate": 1.918506481872669e-06, + "loss": 0.7660422325134277, + "step": 1502 + }, + { + "epoch": 0.34631336405529956, + "grad_norm": 0.6579892645247903, + "learning_rate": 1.9183557719543472e-06, + "loss": 0.868739902973175, + "step": 1503 + }, + { + "epoch": 0.34654377880184334, + "grad_norm": 0.5789973673480234, + "learning_rate": 1.918204928737901e-06, + "loss": 0.6630350351333618, + "step": 1504 + }, + { + "epoch": 0.3467741935483871, + "grad_norm": 0.5444610824332694, + "learning_rate": 1.9180539522452247e-06, + "loss": 0.8651586771011353, + "step": 1505 + }, + { + "epoch": 0.34700460829493085, + "grad_norm": 0.5927111235913876, + "learning_rate": 1.9179028424982326e-06, + "loss": 0.8584417700767517, + "step": 1506 + }, + { + "epoch": 0.34723502304147463, + "grad_norm": 0.5575547611441275, + "learning_rate": 1.917751599518858e-06, + "loss": 0.7793893814086914, + "step": 1507 + }, + { + "epoch": 0.3474654377880184, + "grad_norm": 0.768634414143097, + "learning_rate": 1.9176002233290542e-06, + "loss": 0.8499815464019775, + "step": 1508 + }, + { + "epoch": 0.3476958525345622, + "grad_norm": 0.7795460044280101, + "learning_rate": 1.917448713950792e-06, + "loss": 0.7914199829101562, + "step": 1509 + }, + { + "epoch": 0.347926267281106, + "grad_norm": 0.8510793838671106, + "learning_rate": 1.9172970714060637e-06, + "loss": 0.942331850528717, + "step": 1510 + }, + { + "epoch": 0.34815668202764977, + "grad_norm": 0.621963787262809, + "learning_rate": 1.9171452957168803e-06, + "loss": 0.7780032157897949, + "step": 1511 + }, + { + "epoch": 0.34838709677419355, + "grad_norm": 0.6399045325995384, + "learning_rate": 1.916993386905271e-06, + "loss": 0.8544708490371704, + "step": 1512 + }, + { + "epoch": 0.34861751152073733, + "grad_norm": 0.6890752127070114, + "learning_rate": 1.9168413449932855e-06, + "loss": 0.798173725605011, + "step": 1513 + }, + { + "epoch": 0.3488479262672811, + "grad_norm": 0.7396810139453504, + "learning_rate": 1.9166891700029922e-06, + "loss": 0.9426852464675903, + "step": 1514 + }, + { + "epoch": 0.3490783410138249, + "grad_norm": 0.7455227520654529, + "learning_rate": 1.91653686195648e-06, + "loss": 0.922240138053894, + "step": 1515 + }, + { + "epoch": 0.3493087557603687, + "grad_norm": 0.630161091555718, + "learning_rate": 1.9163844208758556e-06, + "loss": 0.7997978925704956, + "step": 1516 + }, + { + "epoch": 0.34953917050691247, + "grad_norm": 0.7560374253096135, + "learning_rate": 1.9162318467832455e-06, + "loss": 1.0597525835037231, + "step": 1517 + }, + { + "epoch": 0.3497695852534562, + "grad_norm": 0.6669142658812499, + "learning_rate": 1.9160791397007957e-06, + "loss": 0.8211681842803955, + "step": 1518 + }, + { + "epoch": 0.35, + "grad_norm": 0.6134468456903489, + "learning_rate": 1.9159262996506716e-06, + "loss": 0.8078022003173828, + "step": 1519 + }, + { + "epoch": 0.35023041474654376, + "grad_norm": 0.8800559709758627, + "learning_rate": 1.915773326655057e-06, + "loss": 0.9449256658554077, + "step": 1520 + }, + { + "epoch": 0.35046082949308754, + "grad_norm": 0.6806561068219223, + "learning_rate": 1.915620220736157e-06, + "loss": 0.8744012117385864, + "step": 1521 + }, + { + "epoch": 0.35069124423963133, + "grad_norm": 0.501693303726274, + "learning_rate": 1.9154669819161946e-06, + "loss": 0.9503095746040344, + "step": 1522 + }, + { + "epoch": 0.3509216589861751, + "grad_norm": 0.7422922368497302, + "learning_rate": 1.9153136102174106e-06, + "loss": 1.055432915687561, + "step": 1523 + }, + { + "epoch": 0.3511520737327189, + "grad_norm": 0.7420134076461076, + "learning_rate": 1.9151601056620684e-06, + "loss": 0.8540226221084595, + "step": 1524 + }, + { + "epoch": 0.3513824884792627, + "grad_norm": 0.6432500784024293, + "learning_rate": 1.915006468272448e-06, + "loss": 0.8846266865730286, + "step": 1525 + }, + { + "epoch": 0.35161290322580646, + "grad_norm": 0.6065038491164693, + "learning_rate": 1.9148526980708507e-06, + "loss": 0.8941656947135925, + "step": 1526 + }, + { + "epoch": 0.35184331797235024, + "grad_norm": 0.657637251938276, + "learning_rate": 1.914698795079595e-06, + "loss": 0.868419885635376, + "step": 1527 + }, + { + "epoch": 0.35207373271889403, + "grad_norm": 0.6471997072963731, + "learning_rate": 1.91454475932102e-06, + "loss": 0.7375580072402954, + "step": 1528 + }, + { + "epoch": 0.3523041474654378, + "grad_norm": 0.5813494020686044, + "learning_rate": 1.9143905908174844e-06, + "loss": 0.9415492415428162, + "step": 1529 + }, + { + "epoch": 0.35253456221198154, + "grad_norm": 0.5845641741459107, + "learning_rate": 1.9142362895913646e-06, + "loss": 0.8395911455154419, + "step": 1530 + }, + { + "epoch": 0.3527649769585253, + "grad_norm": 0.6214793611789142, + "learning_rate": 1.914081855665057e-06, + "loss": 0.831234335899353, + "step": 1531 + }, + { + "epoch": 0.3529953917050691, + "grad_norm": 0.6337865377576076, + "learning_rate": 1.9139272890609794e-06, + "loss": 0.8975566029548645, + "step": 1532 + }, + { + "epoch": 0.3532258064516129, + "grad_norm": 0.629586080319263, + "learning_rate": 1.913772589801565e-06, + "loss": 0.8134264945983887, + "step": 1533 + }, + { + "epoch": 0.3534562211981567, + "grad_norm": 0.6728325426784268, + "learning_rate": 1.913617757909269e-06, + "loss": 0.9507275819778442, + "step": 1534 + }, + { + "epoch": 0.35368663594470046, + "grad_norm": 0.6431752162471284, + "learning_rate": 1.913462793406565e-06, + "loss": 0.8839038610458374, + "step": 1535 + }, + { + "epoch": 0.35391705069124424, + "grad_norm": 0.5543997844984022, + "learning_rate": 1.9133076963159453e-06, + "loss": 0.8708392381668091, + "step": 1536 + }, + { + "epoch": 0.354147465437788, + "grad_norm": 0.6062385114401656, + "learning_rate": 1.913152466659923e-06, + "loss": 0.7609391212463379, + "step": 1537 + }, + { + "epoch": 0.3543778801843318, + "grad_norm": 0.7180303128257083, + "learning_rate": 1.912997104461029e-06, + "loss": 0.9231283664703369, + "step": 1538 + }, + { + "epoch": 0.3546082949308756, + "grad_norm": 0.6890910539107805, + "learning_rate": 1.912841609741814e-06, + "loss": 1.0297726392745972, + "step": 1539 + }, + { + "epoch": 0.3548387096774194, + "grad_norm": 0.75971130189085, + "learning_rate": 1.9126859825248475e-06, + "loss": 0.8798987865447998, + "step": 1540 + }, + { + "epoch": 0.35506912442396316, + "grad_norm": 0.7030378763019209, + "learning_rate": 1.912530222832719e-06, + "loss": 0.9104069471359253, + "step": 1541 + }, + { + "epoch": 0.35529953917050694, + "grad_norm": 0.6534729730017157, + "learning_rate": 1.9123743306880368e-06, + "loss": 0.7618073225021362, + "step": 1542 + }, + { + "epoch": 0.35552995391705067, + "grad_norm": 0.7461748863693719, + "learning_rate": 1.912218306113428e-06, + "loss": 0.8397510051727295, + "step": 1543 + }, + { + "epoch": 0.35576036866359445, + "grad_norm": 0.7060377086024656, + "learning_rate": 1.91206214913154e-06, + "loss": 0.9884299039840698, + "step": 1544 + }, + { + "epoch": 0.35599078341013823, + "grad_norm": 0.9576081524625122, + "learning_rate": 1.9119058597650385e-06, + "loss": 0.9878349304199219, + "step": 1545 + }, + { + "epoch": 0.356221198156682, + "grad_norm": 0.6493274093007226, + "learning_rate": 1.9117494380366086e-06, + "loss": 0.8790488243103027, + "step": 1546 + }, + { + "epoch": 0.3564516129032258, + "grad_norm": 0.5310131973918355, + "learning_rate": 1.9115928839689546e-06, + "loss": 0.7390745878219604, + "step": 1547 + }, + { + "epoch": 0.3566820276497696, + "grad_norm": 0.6882029258971281, + "learning_rate": 1.9114361975848004e-06, + "loss": 0.7354288101196289, + "step": 1548 + }, + { + "epoch": 0.35691244239631337, + "grad_norm": 0.7667535594605746, + "learning_rate": 1.911279378906889e-06, + "loss": 0.9234673976898193, + "step": 1549 + }, + { + "epoch": 0.35714285714285715, + "grad_norm": 0.6115013610277281, + "learning_rate": 1.911122427957982e-06, + "loss": 0.8913710117340088, + "step": 1550 + }, + { + "epoch": 0.35737327188940093, + "grad_norm": 0.7050561523779678, + "learning_rate": 1.9109653447608605e-06, + "loss": 0.754358172416687, + "step": 1551 + }, + { + "epoch": 0.3576036866359447, + "grad_norm": 0.784312775933048, + "learning_rate": 1.910808129338325e-06, + "loss": 0.7361906170845032, + "step": 1552 + }, + { + "epoch": 0.3578341013824885, + "grad_norm": 0.7799572736490341, + "learning_rate": 1.9106507817131957e-06, + "loss": 0.8167279362678528, + "step": 1553 + }, + { + "epoch": 0.3580645161290323, + "grad_norm": 0.5335250967831215, + "learning_rate": 1.910493301908311e-06, + "loss": 0.7504739761352539, + "step": 1554 + }, + { + "epoch": 0.358294930875576, + "grad_norm": 0.7032319483863736, + "learning_rate": 1.9103356899465287e-06, + "loss": 0.8452355861663818, + "step": 1555 + }, + { + "epoch": 0.3585253456221198, + "grad_norm": 0.6126249946093243, + "learning_rate": 1.9101779458507263e-06, + "loss": 0.891547679901123, + "step": 1556 + }, + { + "epoch": 0.3587557603686636, + "grad_norm": 0.6935978783962933, + "learning_rate": 1.9100200696438e-06, + "loss": 0.8132680654525757, + "step": 1557 + }, + { + "epoch": 0.35898617511520736, + "grad_norm": 0.6519674133121284, + "learning_rate": 1.9098620613486646e-06, + "loss": 0.799482524394989, + "step": 1558 + }, + { + "epoch": 0.35921658986175115, + "grad_norm": 0.5904521460015955, + "learning_rate": 1.909703920988256e-06, + "loss": 0.8490267992019653, + "step": 1559 + }, + { + "epoch": 0.35944700460829493, + "grad_norm": 0.6819976276562522, + "learning_rate": 1.9095456485855277e-06, + "loss": 0.8608428239822388, + "step": 1560 + }, + { + "epoch": 0.3596774193548387, + "grad_norm": 0.710056379748393, + "learning_rate": 1.9093872441634526e-06, + "loss": 0.8460499048233032, + "step": 1561 + }, + { + "epoch": 0.3599078341013825, + "grad_norm": 0.7727130217690178, + "learning_rate": 1.9092287077450226e-06, + "loss": 0.9268433451652527, + "step": 1562 + }, + { + "epoch": 0.3601382488479263, + "grad_norm": 0.612809776724531, + "learning_rate": 1.90907003935325e-06, + "loss": 0.7354154586791992, + "step": 1563 + }, + { + "epoch": 0.36036866359447006, + "grad_norm": 0.6941943523357101, + "learning_rate": 1.9089112390111637e-06, + "loss": 0.87982177734375, + "step": 1564 + }, + { + "epoch": 0.36059907834101385, + "grad_norm": 0.7092001355075633, + "learning_rate": 1.9087523067418148e-06, + "loss": 0.994953453540802, + "step": 1565 + }, + { + "epoch": 0.36082949308755763, + "grad_norm": 0.7240785511234525, + "learning_rate": 1.9085932425682715e-06, + "loss": 0.8623256087303162, + "step": 1566 + }, + { + "epoch": 0.36105990783410136, + "grad_norm": 0.7577571727617612, + "learning_rate": 1.908434046513622e-06, + "loss": 0.8752846717834473, + "step": 1567 + }, + { + "epoch": 0.36129032258064514, + "grad_norm": 0.7538020694732109, + "learning_rate": 1.908274718600973e-06, + "loss": 0.9002033472061157, + "step": 1568 + }, + { + "epoch": 0.3615207373271889, + "grad_norm": 0.6751938160957709, + "learning_rate": 1.908115258853451e-06, + "loss": 0.7290444374084473, + "step": 1569 + }, + { + "epoch": 0.3617511520737327, + "grad_norm": 0.5739449847646289, + "learning_rate": 1.9079556672942016e-06, + "loss": 0.6833889484405518, + "step": 1570 + }, + { + "epoch": 0.3619815668202765, + "grad_norm": 0.7271514059808825, + "learning_rate": 1.907795943946389e-06, + "loss": 1.0033842325210571, + "step": 1571 + }, + { + "epoch": 0.3622119815668203, + "grad_norm": 0.7261786878454322, + "learning_rate": 1.907636088833197e-06, + "loss": 0.9590950012207031, + "step": 1572 + }, + { + "epoch": 0.36244239631336406, + "grad_norm": 0.6796147019608265, + "learning_rate": 1.907476101977828e-06, + "loss": 0.8812122344970703, + "step": 1573 + }, + { + "epoch": 0.36267281105990784, + "grad_norm": 0.5509770826635522, + "learning_rate": 1.9073159834035045e-06, + "loss": 0.7549433708190918, + "step": 1574 + }, + { + "epoch": 0.3629032258064516, + "grad_norm": 0.8344983468044503, + "learning_rate": 1.9071557331334667e-06, + "loss": 0.9235562086105347, + "step": 1575 + }, + { + "epoch": 0.3631336405529954, + "grad_norm": 0.6317903590715543, + "learning_rate": 1.9069953511909755e-06, + "loss": 0.8468542098999023, + "step": 1576 + }, + { + "epoch": 0.3633640552995392, + "grad_norm": 0.5574642699953357, + "learning_rate": 1.9068348375993096e-06, + "loss": 0.8804000616073608, + "step": 1577 + }, + { + "epoch": 0.363594470046083, + "grad_norm": 0.5912501411899118, + "learning_rate": 1.9066741923817676e-06, + "loss": 0.762598991394043, + "step": 1578 + }, + { + "epoch": 0.3638248847926267, + "grad_norm": 0.7706966706442087, + "learning_rate": 1.9065134155616666e-06, + "loss": 0.8791940212249756, + "step": 1579 + }, + { + "epoch": 0.3640552995391705, + "grad_norm": 0.7168527524200441, + "learning_rate": 1.9063525071623439e-06, + "loss": 0.7041842937469482, + "step": 1580 + }, + { + "epoch": 0.36428571428571427, + "grad_norm": 0.6160916310238944, + "learning_rate": 1.9061914672071543e-06, + "loss": 0.9526468515396118, + "step": 1581 + }, + { + "epoch": 0.36451612903225805, + "grad_norm": 0.7118890640067297, + "learning_rate": 1.906030295719473e-06, + "loss": 0.9388316869735718, + "step": 1582 + }, + { + "epoch": 0.36474654377880183, + "grad_norm": 0.6899284739234433, + "learning_rate": 1.9058689927226936e-06, + "loss": 0.7295777797698975, + "step": 1583 + }, + { + "epoch": 0.3649769585253456, + "grad_norm": 0.773766722090894, + "learning_rate": 1.905707558240229e-06, + "loss": 0.7540932297706604, + "step": 1584 + }, + { + "epoch": 0.3652073732718894, + "grad_norm": 0.7012558071518832, + "learning_rate": 1.9055459922955118e-06, + "loss": 0.9457792639732361, + "step": 1585 + }, + { + "epoch": 0.3654377880184332, + "grad_norm": 0.8248538436303866, + "learning_rate": 1.9053842949119923e-06, + "loss": 0.9121883511543274, + "step": 1586 + }, + { + "epoch": 0.36566820276497697, + "grad_norm": 0.7283384308967912, + "learning_rate": 1.905222466113141e-06, + "loss": 0.8140746355056763, + "step": 1587 + }, + { + "epoch": 0.36589861751152075, + "grad_norm": 0.6419705545105435, + "learning_rate": 1.905060505922447e-06, + "loss": 0.7403484582901001, + "step": 1588 + }, + { + "epoch": 0.36612903225806454, + "grad_norm": 0.581047347336086, + "learning_rate": 1.9048984143634188e-06, + "loss": 0.9040734171867371, + "step": 1589 + }, + { + "epoch": 0.3663594470046083, + "grad_norm": 0.8763582049227886, + "learning_rate": 1.9047361914595834e-06, + "loss": 0.9060958623886108, + "step": 1590 + }, + { + "epoch": 0.36658986175115205, + "grad_norm": 0.563240407907546, + "learning_rate": 1.904573837234488e-06, + "loss": 0.6925936937332153, + "step": 1591 + }, + { + "epoch": 0.36682027649769583, + "grad_norm": 0.6465995527416484, + "learning_rate": 1.9044113517116973e-06, + "loss": 0.8120197057723999, + "step": 1592 + }, + { + "epoch": 0.3670506912442396, + "grad_norm": 0.6544256373051048, + "learning_rate": 1.9042487349147965e-06, + "loss": 0.796414852142334, + "step": 1593 + }, + { + "epoch": 0.3672811059907834, + "grad_norm": 0.5916998574283423, + "learning_rate": 1.9040859868673885e-06, + "loss": 0.8390822410583496, + "step": 1594 + }, + { + "epoch": 0.3675115207373272, + "grad_norm": 0.6567403008386238, + "learning_rate": 1.9039231075930967e-06, + "loss": 0.990093469619751, + "step": 1595 + }, + { + "epoch": 0.36774193548387096, + "grad_norm": 0.733917290012865, + "learning_rate": 1.9037600971155623e-06, + "loss": 0.8548597097396851, + "step": 1596 + }, + { + "epoch": 0.36797235023041475, + "grad_norm": 0.5429475903618856, + "learning_rate": 1.9035969554584464e-06, + "loss": 0.687299370765686, + "step": 1597 + }, + { + "epoch": 0.36820276497695853, + "grad_norm": 0.9276548262086025, + "learning_rate": 1.9034336826454282e-06, + "loss": 0.7857942581176758, + "step": 1598 + }, + { + "epoch": 0.3684331797235023, + "grad_norm": 0.7345227244712206, + "learning_rate": 1.9032702787002072e-06, + "loss": 0.8836538195610046, + "step": 1599 + }, + { + "epoch": 0.3686635944700461, + "grad_norm": 0.723858907192251, + "learning_rate": 1.9031067436465011e-06, + "loss": 0.8132715225219727, + "step": 1600 + }, + { + "epoch": 0.3688940092165899, + "grad_norm": 0.6649285274594987, + "learning_rate": 1.9029430775080467e-06, + "loss": 0.7632347345352173, + "step": 1601 + }, + { + "epoch": 0.36912442396313366, + "grad_norm": 0.6319858893374919, + "learning_rate": 1.9027792803086e-06, + "loss": 0.8616297841072083, + "step": 1602 + }, + { + "epoch": 0.36935483870967745, + "grad_norm": 0.6067565637769744, + "learning_rate": 1.9026153520719358e-06, + "loss": 0.8418172597885132, + "step": 1603 + }, + { + "epoch": 0.3695852534562212, + "grad_norm": 0.7094320350542224, + "learning_rate": 1.902451292821848e-06, + "loss": 0.7253717184066772, + "step": 1604 + }, + { + "epoch": 0.36981566820276496, + "grad_norm": 0.8059000016280097, + "learning_rate": 1.90228710258215e-06, + "loss": 0.9746035933494568, + "step": 1605 + }, + { + "epoch": 0.37004608294930874, + "grad_norm": 0.5259402340057983, + "learning_rate": 1.9021227813766733e-06, + "loss": 0.7722853422164917, + "step": 1606 + }, + { + "epoch": 0.3702764976958525, + "grad_norm": 0.6925264238716391, + "learning_rate": 1.9019583292292693e-06, + "loss": 0.8278614282608032, + "step": 1607 + }, + { + "epoch": 0.3705069124423963, + "grad_norm": 0.6439238935194896, + "learning_rate": 1.9017937461638078e-06, + "loss": 0.7433085441589355, + "step": 1608 + }, + { + "epoch": 0.3707373271889401, + "grad_norm": 0.5505689424398915, + "learning_rate": 1.901629032204178e-06, + "loss": 0.9194153547286987, + "step": 1609 + }, + { + "epoch": 0.3709677419354839, + "grad_norm": 0.5866951472740422, + "learning_rate": 1.9014641873742877e-06, + "loss": 0.8502616882324219, + "step": 1610 + }, + { + "epoch": 0.37119815668202766, + "grad_norm": 0.6242266615517361, + "learning_rate": 1.9012992116980637e-06, + "loss": 0.8494570255279541, + "step": 1611 + }, + { + "epoch": 0.37142857142857144, + "grad_norm": 0.7369836132356214, + "learning_rate": 1.9011341051994526e-06, + "loss": 0.8567800521850586, + "step": 1612 + }, + { + "epoch": 0.3716589861751152, + "grad_norm": 0.6246604791910833, + "learning_rate": 1.9009688679024189e-06, + "loss": 0.7739682197570801, + "step": 1613 + }, + { + "epoch": 0.371889400921659, + "grad_norm": 0.754158311495332, + "learning_rate": 1.900803499830947e-06, + "loss": 0.8548814058303833, + "step": 1614 + }, + { + "epoch": 0.3721198156682028, + "grad_norm": 0.5813822362984273, + "learning_rate": 1.9006380010090395e-06, + "loss": 0.7444359064102173, + "step": 1615 + }, + { + "epoch": 0.3723502304147465, + "grad_norm": 1.02732235167255, + "learning_rate": 1.9004723714607183e-06, + "loss": 1.0483827590942383, + "step": 1616 + }, + { + "epoch": 0.3725806451612903, + "grad_norm": 0.7020606936102383, + "learning_rate": 1.9003066112100248e-06, + "loss": 0.7734435200691223, + "step": 1617 + }, + { + "epoch": 0.3728110599078341, + "grad_norm": 0.7388837596699729, + "learning_rate": 1.9001407202810181e-06, + "loss": 0.856806755065918, + "step": 1618 + }, + { + "epoch": 0.37304147465437787, + "grad_norm": 0.6630252498689021, + "learning_rate": 1.8999746986977776e-06, + "loss": 0.8708832263946533, + "step": 1619 + }, + { + "epoch": 0.37327188940092165, + "grad_norm": 0.7833548721469644, + "learning_rate": 1.899808546484401e-06, + "loss": 0.9295653104782104, + "step": 1620 + }, + { + "epoch": 0.37350230414746544, + "grad_norm": 0.8120612065986471, + "learning_rate": 1.8996422636650054e-06, + "loss": 0.8799598217010498, + "step": 1621 + }, + { + "epoch": 0.3737327188940092, + "grad_norm": 0.6113644757026901, + "learning_rate": 1.8994758502637259e-06, + "loss": 0.8014140725135803, + "step": 1622 + }, + { + "epoch": 0.373963133640553, + "grad_norm": 0.7305462035644114, + "learning_rate": 1.8993093063047174e-06, + "loss": 0.8252615928649902, + "step": 1623 + }, + { + "epoch": 0.3741935483870968, + "grad_norm": 0.5571708900709818, + "learning_rate": 1.899142631812154e-06, + "loss": 0.8617361783981323, + "step": 1624 + }, + { + "epoch": 0.37442396313364057, + "grad_norm": 0.7088005059034134, + "learning_rate": 1.8989758268102274e-06, + "loss": 0.9316745400428772, + "step": 1625 + }, + { + "epoch": 0.37465437788018435, + "grad_norm": 0.5449801119846465, + "learning_rate": 1.89880889132315e-06, + "loss": 0.8195457458496094, + "step": 1626 + }, + { + "epoch": 0.37488479262672814, + "grad_norm": 0.7143201633211917, + "learning_rate": 1.8986418253751516e-06, + "loss": 0.7828787565231323, + "step": 1627 + }, + { + "epoch": 0.37511520737327186, + "grad_norm": 0.6506165386805676, + "learning_rate": 1.898474628990482e-06, + "loss": 0.8130955696105957, + "step": 1628 + }, + { + "epoch": 0.37534562211981565, + "grad_norm": 0.7388682274593752, + "learning_rate": 1.8983073021934097e-06, + "loss": 0.9925695657730103, + "step": 1629 + }, + { + "epoch": 0.37557603686635943, + "grad_norm": 0.7851734301973293, + "learning_rate": 1.8981398450082216e-06, + "loss": 0.8547999858856201, + "step": 1630 + }, + { + "epoch": 0.3758064516129032, + "grad_norm": 0.7016894400602667, + "learning_rate": 1.897972257459224e-06, + "loss": 0.8922954797744751, + "step": 1631 + }, + { + "epoch": 0.376036866359447, + "grad_norm": 0.641235710173759, + "learning_rate": 1.8978045395707415e-06, + "loss": 0.8553646802902222, + "step": 1632 + }, + { + "epoch": 0.3762672811059908, + "grad_norm": 0.6780369843564141, + "learning_rate": 1.897636691367119e-06, + "loss": 0.7854139804840088, + "step": 1633 + }, + { + "epoch": 0.37649769585253456, + "grad_norm": 0.8291834208164379, + "learning_rate": 1.897468712872719e-06, + "loss": 0.8968626260757446, + "step": 1634 + }, + { + "epoch": 0.37672811059907835, + "grad_norm": 0.8135056284613995, + "learning_rate": 1.8973006041119234e-06, + "loss": 0.8898152112960815, + "step": 1635 + }, + { + "epoch": 0.37695852534562213, + "grad_norm": 0.7215595529410248, + "learning_rate": 1.8971323651091332e-06, + "loss": 0.8499374389648438, + "step": 1636 + }, + { + "epoch": 0.3771889400921659, + "grad_norm": 0.5955881573233954, + "learning_rate": 1.8969639958887677e-06, + "loss": 0.7803430557250977, + "step": 1637 + }, + { + "epoch": 0.3774193548387097, + "grad_norm": 0.672225539346555, + "learning_rate": 1.8967954964752657e-06, + "loss": 0.7669799327850342, + "step": 1638 + }, + { + "epoch": 0.3776497695852535, + "grad_norm": 0.7164416850564317, + "learning_rate": 1.8966268668930845e-06, + "loss": 0.9085204601287842, + "step": 1639 + }, + { + "epoch": 0.3778801843317972, + "grad_norm": 0.8492247946008473, + "learning_rate": 1.8964581071667005e-06, + "loss": 0.7793002724647522, + "step": 1640 + }, + { + "epoch": 0.378110599078341, + "grad_norm": 0.6359200183287212, + "learning_rate": 1.896289217320609e-06, + "loss": 0.8649430274963379, + "step": 1641 + }, + { + "epoch": 0.3783410138248848, + "grad_norm": 0.6424804906800053, + "learning_rate": 1.8961201973793243e-06, + "loss": 0.856898844242096, + "step": 1642 + }, + { + "epoch": 0.37857142857142856, + "grad_norm": 0.7702312360726356, + "learning_rate": 1.895951047367379e-06, + "loss": 0.8221957087516785, + "step": 1643 + }, + { + "epoch": 0.37880184331797234, + "grad_norm": 0.7163935487823062, + "learning_rate": 1.8957817673093256e-06, + "loss": 0.8158079385757446, + "step": 1644 + }, + { + "epoch": 0.3790322580645161, + "grad_norm": 0.8008902981825888, + "learning_rate": 1.8956123572297343e-06, + "loss": 0.7803312540054321, + "step": 1645 + }, + { + "epoch": 0.3792626728110599, + "grad_norm": 0.7902834195938876, + "learning_rate": 1.8954428171531949e-06, + "loss": 1.035685420036316, + "step": 1646 + }, + { + "epoch": 0.3794930875576037, + "grad_norm": 0.6044824314396153, + "learning_rate": 1.8952731471043161e-06, + "loss": 0.6871123313903809, + "step": 1647 + }, + { + "epoch": 0.3797235023041475, + "grad_norm": 0.6400629937897654, + "learning_rate": 1.8951033471077253e-06, + "loss": 0.9651780128479004, + "step": 1648 + }, + { + "epoch": 0.37995391705069126, + "grad_norm": 0.7485926311468839, + "learning_rate": 1.8949334171880687e-06, + "loss": 1.018349528312683, + "step": 1649 + }, + { + "epoch": 0.38018433179723504, + "grad_norm": 0.6571349103626993, + "learning_rate": 1.894763357370011e-06, + "loss": 0.6839278936386108, + "step": 1650 + }, + { + "epoch": 0.3804147465437788, + "grad_norm": 0.6757724586058976, + "learning_rate": 1.894593167678237e-06, + "loss": 0.8442174196243286, + "step": 1651 + }, + { + "epoch": 0.38064516129032255, + "grad_norm": 0.6368918088972565, + "learning_rate": 1.8944228481374484e-06, + "loss": 0.8224585056304932, + "step": 1652 + }, + { + "epoch": 0.38087557603686634, + "grad_norm": 0.6970802562618803, + "learning_rate": 1.8942523987723678e-06, + "loss": 0.8570500612258911, + "step": 1653 + }, + { + "epoch": 0.3811059907834101, + "grad_norm": 0.731718201815575, + "learning_rate": 1.8940818196077354e-06, + "loss": 0.7696554660797119, + "step": 1654 + }, + { + "epoch": 0.3813364055299539, + "grad_norm": 0.7456139352122005, + "learning_rate": 1.8939111106683103e-06, + "loss": 0.822563886642456, + "step": 1655 + }, + { + "epoch": 0.3815668202764977, + "grad_norm": 0.46565320695076334, + "learning_rate": 1.8937402719788711e-06, + "loss": 0.6537219882011414, + "step": 1656 + }, + { + "epoch": 0.38179723502304147, + "grad_norm": 0.8414098679023442, + "learning_rate": 1.8935693035642145e-06, + "loss": 0.9081932306289673, + "step": 1657 + }, + { + "epoch": 0.38202764976958525, + "grad_norm": 0.5018818977531995, + "learning_rate": 1.8933982054491563e-06, + "loss": 0.6839661598205566, + "step": 1658 + }, + { + "epoch": 0.38225806451612904, + "grad_norm": 0.6964355972832653, + "learning_rate": 1.8932269776585313e-06, + "loss": 0.9187283515930176, + "step": 1659 + }, + { + "epoch": 0.3824884792626728, + "grad_norm": 0.8100260748701062, + "learning_rate": 1.893055620217193e-06, + "loss": 0.9567047357559204, + "step": 1660 + }, + { + "epoch": 0.3827188940092166, + "grad_norm": 0.7345697660292878, + "learning_rate": 1.8928841331500136e-06, + "loss": 0.785561203956604, + "step": 1661 + }, + { + "epoch": 0.3829493087557604, + "grad_norm": 0.882033286363023, + "learning_rate": 1.8927125164818842e-06, + "loss": 0.8986088037490845, + "step": 1662 + }, + { + "epoch": 0.38317972350230417, + "grad_norm": 0.7191553093714457, + "learning_rate": 1.892540770237715e-06, + "loss": 1.0027087926864624, + "step": 1663 + }, + { + "epoch": 0.38341013824884795, + "grad_norm": 0.6970721775230337, + "learning_rate": 1.8923688944424346e-06, + "loss": 0.8502041697502136, + "step": 1664 + }, + { + "epoch": 0.3836405529953917, + "grad_norm": 0.6684142159321271, + "learning_rate": 1.8921968891209907e-06, + "loss": 0.8526991605758667, + "step": 1665 + }, + { + "epoch": 0.38387096774193546, + "grad_norm": 0.7082372977886758, + "learning_rate": 1.8920247542983492e-06, + "loss": 0.8084676265716553, + "step": 1666 + }, + { + "epoch": 0.38410138248847925, + "grad_norm": 0.6206558140284871, + "learning_rate": 1.8918524899994957e-06, + "loss": 0.8922938704490662, + "step": 1667 + }, + { + "epoch": 0.38433179723502303, + "grad_norm": 0.768771022868596, + "learning_rate": 1.8916800962494337e-06, + "loss": 0.7965600490570068, + "step": 1668 + }, + { + "epoch": 0.3845622119815668, + "grad_norm": 0.6752105100256773, + "learning_rate": 1.8915075730731865e-06, + "loss": 0.9505549073219299, + "step": 1669 + }, + { + "epoch": 0.3847926267281106, + "grad_norm": 0.6897214722687708, + "learning_rate": 1.8913349204957947e-06, + "loss": 0.9459924697875977, + "step": 1670 + }, + { + "epoch": 0.3850230414746544, + "grad_norm": 0.6215985429421047, + "learning_rate": 1.8911621385423195e-06, + "loss": 0.8433674573898315, + "step": 1671 + }, + { + "epoch": 0.38525345622119817, + "grad_norm": 0.7790027974124772, + "learning_rate": 1.8909892272378398e-06, + "loss": 0.8945955038070679, + "step": 1672 + }, + { + "epoch": 0.38548387096774195, + "grad_norm": 0.6828005324330048, + "learning_rate": 1.890816186607453e-06, + "loss": 0.8580358624458313, + "step": 1673 + }, + { + "epoch": 0.38571428571428573, + "grad_norm": 0.6249387555876122, + "learning_rate": 1.8906430166762761e-06, + "loss": 0.7708698511123657, + "step": 1674 + }, + { + "epoch": 0.3859447004608295, + "grad_norm": 0.7418139824839276, + "learning_rate": 1.8904697174694446e-06, + "loss": 0.8647153377532959, + "step": 1675 + }, + { + "epoch": 0.3861751152073733, + "grad_norm": 0.7428074816121766, + "learning_rate": 1.890296289012112e-06, + "loss": 0.9380506277084351, + "step": 1676 + }, + { + "epoch": 0.386405529953917, + "grad_norm": 0.6218965089791644, + "learning_rate": 1.8901227313294519e-06, + "loss": 0.8814103603363037, + "step": 1677 + }, + { + "epoch": 0.3866359447004608, + "grad_norm": 0.7768206335574417, + "learning_rate": 1.8899490444466556e-06, + "loss": 0.9348419904708862, + "step": 1678 + }, + { + "epoch": 0.3868663594470046, + "grad_norm": 0.5956095891599564, + "learning_rate": 1.8897752283889338e-06, + "loss": 0.7502046823501587, + "step": 1679 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 0.567040551050712, + "learning_rate": 1.8896012831815155e-06, + "loss": 0.8499769568443298, + "step": 1680 + }, + { + "epoch": 0.38732718894009216, + "grad_norm": 0.6506272613615357, + "learning_rate": 1.8894272088496487e-06, + "loss": 0.8253993391990662, + "step": 1681 + }, + { + "epoch": 0.38755760368663594, + "grad_norm": 0.7707626449058277, + "learning_rate": 1.8892530054185998e-06, + "loss": 0.8494073152542114, + "step": 1682 + }, + { + "epoch": 0.3877880184331797, + "grad_norm": 0.7608738547672518, + "learning_rate": 1.8890786729136546e-06, + "loss": 0.8836106061935425, + "step": 1683 + }, + { + "epoch": 0.3880184331797235, + "grad_norm": 0.636256009552465, + "learning_rate": 1.8889042113601166e-06, + "loss": 0.8949145078659058, + "step": 1684 + }, + { + "epoch": 0.3882488479262673, + "grad_norm": 0.5966436023392323, + "learning_rate": 1.8887296207833095e-06, + "loss": 0.6210965514183044, + "step": 1685 + }, + { + "epoch": 0.3884792626728111, + "grad_norm": 0.8527942588919344, + "learning_rate": 1.8885549012085744e-06, + "loss": 0.9216527938842773, + "step": 1686 + }, + { + "epoch": 0.38870967741935486, + "grad_norm": 0.6878600463475216, + "learning_rate": 1.8883800526612715e-06, + "loss": 0.9266358613967896, + "step": 1687 + }, + { + "epoch": 0.38894009216589864, + "grad_norm": 0.7261249184769291, + "learning_rate": 1.88820507516678e-06, + "loss": 0.8550606966018677, + "step": 1688 + }, + { + "epoch": 0.38917050691244237, + "grad_norm": 0.702582367534852, + "learning_rate": 1.888029968750498e-06, + "loss": 0.8632181882858276, + "step": 1689 + }, + { + "epoch": 0.38940092165898615, + "grad_norm": 0.8055419508573982, + "learning_rate": 1.8878547334378415e-06, + "loss": 0.8795493841171265, + "step": 1690 + }, + { + "epoch": 0.38963133640552994, + "grad_norm": 0.8491490559655837, + "learning_rate": 1.8876793692542456e-06, + "loss": 0.9750456809997559, + "step": 1691 + }, + { + "epoch": 0.3898617511520737, + "grad_norm": 0.7818793926101317, + "learning_rate": 1.8875038762251645e-06, + "loss": 0.9270161390304565, + "step": 1692 + }, + { + "epoch": 0.3900921658986175, + "grad_norm": 0.7260894881906815, + "learning_rate": 1.8873282543760705e-06, + "loss": 0.8154089450836182, + "step": 1693 + }, + { + "epoch": 0.3903225806451613, + "grad_norm": 0.692223503364103, + "learning_rate": 1.887152503732455e-06, + "loss": 0.9245043992996216, + "step": 1694 + }, + { + "epoch": 0.39055299539170507, + "grad_norm": 0.7622355519095229, + "learning_rate": 1.8869766243198284e-06, + "loss": 0.9218056201934814, + "step": 1695 + }, + { + "epoch": 0.39078341013824885, + "grad_norm": 0.5749624768358436, + "learning_rate": 1.8868006161637192e-06, + "loss": 0.7753894329071045, + "step": 1696 + }, + { + "epoch": 0.39101382488479264, + "grad_norm": 0.7181901167791495, + "learning_rate": 1.8866244792896739e-06, + "loss": 0.8455277681350708, + "step": 1697 + }, + { + "epoch": 0.3912442396313364, + "grad_norm": 0.7361657621974459, + "learning_rate": 1.8864482137232596e-06, + "loss": 0.8301571607589722, + "step": 1698 + }, + { + "epoch": 0.3914746543778802, + "grad_norm": 0.5504243602930398, + "learning_rate": 1.8862718194900602e-06, + "loss": 0.9768285155296326, + "step": 1699 + }, + { + "epoch": 0.391705069124424, + "grad_norm": 0.7416616964447972, + "learning_rate": 1.8860952966156798e-06, + "loss": 0.9659395217895508, + "step": 1700 + }, + { + "epoch": 0.3919354838709677, + "grad_norm": 0.731283063502841, + "learning_rate": 1.8859186451257401e-06, + "loss": 0.9975444078445435, + "step": 1701 + }, + { + "epoch": 0.3921658986175115, + "grad_norm": 0.712824030540976, + "learning_rate": 1.8857418650458816e-06, + "loss": 0.9248796701431274, + "step": 1702 + }, + { + "epoch": 0.3923963133640553, + "grad_norm": 0.6864309886370629, + "learning_rate": 1.8855649564017642e-06, + "loss": 0.8792428970336914, + "step": 1703 + }, + { + "epoch": 0.39262672811059907, + "grad_norm": 0.7264626081176593, + "learning_rate": 1.8853879192190657e-06, + "loss": 0.8387417197227478, + "step": 1704 + }, + { + "epoch": 0.39285714285714285, + "grad_norm": 0.707677593822268, + "learning_rate": 1.8852107535234828e-06, + "loss": 0.7020218372344971, + "step": 1705 + }, + { + "epoch": 0.39308755760368663, + "grad_norm": 0.673092322659609, + "learning_rate": 1.885033459340731e-06, + "loss": 0.7388321161270142, + "step": 1706 + }, + { + "epoch": 0.3933179723502304, + "grad_norm": 0.7503922468030345, + "learning_rate": 1.8848560366965441e-06, + "loss": 0.7536240220069885, + "step": 1707 + }, + { + "epoch": 0.3935483870967742, + "grad_norm": 0.7237343332600692, + "learning_rate": 1.8846784856166746e-06, + "loss": 0.747667670249939, + "step": 1708 + }, + { + "epoch": 0.393778801843318, + "grad_norm": 0.7263541821971573, + "learning_rate": 1.8845008061268945e-06, + "loss": 0.8068975210189819, + "step": 1709 + }, + { + "epoch": 0.39400921658986177, + "grad_norm": 0.7581453840562968, + "learning_rate": 1.8843229982529932e-06, + "loss": 0.7613410949707031, + "step": 1710 + }, + { + "epoch": 0.39423963133640555, + "grad_norm": 0.6546080156681554, + "learning_rate": 1.8841450620207793e-06, + "loss": 0.8579158782958984, + "step": 1711 + }, + { + "epoch": 0.39447004608294933, + "grad_norm": 0.6400652758844664, + "learning_rate": 1.88396699745608e-06, + "loss": 0.8754673004150391, + "step": 1712 + }, + { + "epoch": 0.39470046082949306, + "grad_norm": 0.7227539443635326, + "learning_rate": 1.8837888045847415e-06, + "loss": 0.7988177537918091, + "step": 1713 + }, + { + "epoch": 0.39493087557603684, + "grad_norm": 0.7533730909693769, + "learning_rate": 1.8836104834326279e-06, + "loss": 0.8658367395401001, + "step": 1714 + }, + { + "epoch": 0.3951612903225806, + "grad_norm": 0.7819630929666835, + "learning_rate": 1.8834320340256223e-06, + "loss": 0.8777489066123962, + "step": 1715 + }, + { + "epoch": 0.3953917050691244, + "grad_norm": 0.6763778401068745, + "learning_rate": 1.8832534563896264e-06, + "loss": 0.9785901308059692, + "step": 1716 + }, + { + "epoch": 0.3956221198156682, + "grad_norm": 0.7796554840537433, + "learning_rate": 1.883074750550561e-06, + "loss": 0.847503125667572, + "step": 1717 + }, + { + "epoch": 0.395852534562212, + "grad_norm": 0.7786503806499795, + "learning_rate": 1.8828959165343643e-06, + "loss": 1.0159538984298706, + "step": 1718 + }, + { + "epoch": 0.39608294930875576, + "grad_norm": 0.8472423063084373, + "learning_rate": 1.882716954366994e-06, + "loss": 0.9064888954162598, + "step": 1719 + }, + { + "epoch": 0.39631336405529954, + "grad_norm": 0.7664117713246195, + "learning_rate": 1.8825378640744264e-06, + "loss": 0.956849217414856, + "step": 1720 + }, + { + "epoch": 0.3965437788018433, + "grad_norm": 0.758389558529891, + "learning_rate": 1.882358645682656e-06, + "loss": 0.8983441591262817, + "step": 1721 + }, + { + "epoch": 0.3967741935483871, + "grad_norm": 0.5702990900386659, + "learning_rate": 1.8821792992176967e-06, + "loss": 0.7698956727981567, + "step": 1722 + }, + { + "epoch": 0.3970046082949309, + "grad_norm": 0.8118873070872795, + "learning_rate": 1.8819998247055797e-06, + "loss": 0.9376351833343506, + "step": 1723 + }, + { + "epoch": 0.3972350230414747, + "grad_norm": 0.8486728692509508, + "learning_rate": 1.881820222172356e-06, + "loss": 0.8776079416275024, + "step": 1724 + }, + { + "epoch": 0.39746543778801846, + "grad_norm": 0.9552617438975642, + "learning_rate": 1.8816404916440942e-06, + "loss": 0.9776726961135864, + "step": 1725 + }, + { + "epoch": 0.3976958525345622, + "grad_norm": 0.5841959382882552, + "learning_rate": 1.8814606331468822e-06, + "loss": 0.7699686288833618, + "step": 1726 + }, + { + "epoch": 0.39792626728110597, + "grad_norm": 0.7581748259398383, + "learning_rate": 1.8812806467068265e-06, + "loss": 0.8256866931915283, + "step": 1727 + }, + { + "epoch": 0.39815668202764976, + "grad_norm": 0.6320724280659841, + "learning_rate": 1.881100532350051e-06, + "loss": 0.8493847846984863, + "step": 1728 + }, + { + "epoch": 0.39838709677419354, + "grad_norm": 0.6592895509903398, + "learning_rate": 1.8809202901027002e-06, + "loss": 0.8138688802719116, + "step": 1729 + }, + { + "epoch": 0.3986175115207373, + "grad_norm": 0.7569638843586648, + "learning_rate": 1.880739919990935e-06, + "loss": 0.8637882471084595, + "step": 1730 + }, + { + "epoch": 0.3988479262672811, + "grad_norm": 0.5847233582227849, + "learning_rate": 1.880559422040937e-06, + "loss": 0.8988152742385864, + "step": 1731 + }, + { + "epoch": 0.3990783410138249, + "grad_norm": 0.4724369020135308, + "learning_rate": 1.880378796278904e-06, + "loss": 0.8247279524803162, + "step": 1732 + }, + { + "epoch": 0.39930875576036867, + "grad_norm": 0.8071560192562027, + "learning_rate": 1.8801980427310546e-06, + "loss": 0.9699070453643799, + "step": 1733 + }, + { + "epoch": 0.39953917050691246, + "grad_norm": 0.8108307817175047, + "learning_rate": 1.8800171614236241e-06, + "loss": 0.9516465663909912, + "step": 1734 + }, + { + "epoch": 0.39976958525345624, + "grad_norm": 0.655632769560408, + "learning_rate": 1.879836152382868e-06, + "loss": 0.9553602933883667, + "step": 1735 + }, + { + "epoch": 0.4, + "grad_norm": 0.666214042250043, + "learning_rate": 1.879655015635059e-06, + "loss": 0.7805094718933105, + "step": 1736 + }, + { + "epoch": 0.4002304147465438, + "grad_norm": 0.730264537734651, + "learning_rate": 1.8794737512064888e-06, + "loss": 0.9509962797164917, + "step": 1737 + }, + { + "epoch": 0.40046082949308753, + "grad_norm": 0.6755335543884481, + "learning_rate": 1.8792923591234683e-06, + "loss": 0.8663454055786133, + "step": 1738 + }, + { + "epoch": 0.4006912442396313, + "grad_norm": 0.7325230471707477, + "learning_rate": 1.8791108394123257e-06, + "loss": 0.8773336410522461, + "step": 1739 + }, + { + "epoch": 0.4009216589861751, + "grad_norm": 0.6493515009165077, + "learning_rate": 1.8789291920994086e-06, + "loss": 0.7201284766197205, + "step": 1740 + }, + { + "epoch": 0.4011520737327189, + "grad_norm": 0.6665806307840867, + "learning_rate": 1.8787474172110826e-06, + "loss": 0.799161434173584, + "step": 1741 + }, + { + "epoch": 0.40138248847926267, + "grad_norm": 0.8651407328311, + "learning_rate": 1.8785655147737326e-06, + "loss": 0.8987375497817993, + "step": 1742 + }, + { + "epoch": 0.40161290322580645, + "grad_norm": 0.8706739093465035, + "learning_rate": 1.878383484813761e-06, + "loss": 0.8553296327590942, + "step": 1743 + }, + { + "epoch": 0.40184331797235023, + "grad_norm": 0.6706596266673751, + "learning_rate": 1.8782013273575895e-06, + "loss": 0.8376551270484924, + "step": 1744 + }, + { + "epoch": 0.402073732718894, + "grad_norm": 0.7963067027250083, + "learning_rate": 1.8780190424316578e-06, + "loss": 0.8220775723457336, + "step": 1745 + }, + { + "epoch": 0.4023041474654378, + "grad_norm": 0.7339356821882034, + "learning_rate": 1.8778366300624244e-06, + "loss": 0.8614820241928101, + "step": 1746 + }, + { + "epoch": 0.4025345622119816, + "grad_norm": 0.8065421465945496, + "learning_rate": 1.8776540902763665e-06, + "loss": 0.9434851408004761, + "step": 1747 + }, + { + "epoch": 0.40276497695852537, + "grad_norm": 0.8102544073977809, + "learning_rate": 1.877471423099979e-06, + "loss": 0.8150373101234436, + "step": 1748 + }, + { + "epoch": 0.40299539170506915, + "grad_norm": 0.5910178895755134, + "learning_rate": 1.8772886285597762e-06, + "loss": 0.7660368084907532, + "step": 1749 + }, + { + "epoch": 0.4032258064516129, + "grad_norm": 0.7262631962712356, + "learning_rate": 1.8771057066822903e-06, + "loss": 0.7647032141685486, + "step": 1750 + }, + { + "epoch": 0.40345622119815666, + "grad_norm": 0.6238918567790319, + "learning_rate": 1.8769226574940723e-06, + "loss": 0.6034061908721924, + "step": 1751 + }, + { + "epoch": 0.40368663594470044, + "grad_norm": 0.7344154412243011, + "learning_rate": 1.8767394810216914e-06, + "loss": 1.0062675476074219, + "step": 1752 + }, + { + "epoch": 0.40391705069124423, + "grad_norm": 0.6966552417777933, + "learning_rate": 1.8765561772917354e-06, + "loss": 0.9791489839553833, + "step": 1753 + }, + { + "epoch": 0.404147465437788, + "grad_norm": 0.5825611392130148, + "learning_rate": 1.8763727463308108e-06, + "loss": 0.9054251909255981, + "step": 1754 + }, + { + "epoch": 0.4043778801843318, + "grad_norm": 0.7455727854900284, + "learning_rate": 1.8761891881655423e-06, + "loss": 0.9156093597412109, + "step": 1755 + }, + { + "epoch": 0.4046082949308756, + "grad_norm": 0.6983601123297067, + "learning_rate": 1.876005502822573e-06, + "loss": 0.7525647878646851, + "step": 1756 + }, + { + "epoch": 0.40483870967741936, + "grad_norm": 0.6156689393045622, + "learning_rate": 1.8758216903285643e-06, + "loss": 0.8321493864059448, + "step": 1757 + }, + { + "epoch": 0.40506912442396314, + "grad_norm": 0.888147060404811, + "learning_rate": 1.8756377507101973e-06, + "loss": 0.9937042593955994, + "step": 1758 + }, + { + "epoch": 0.40529953917050693, + "grad_norm": 0.553604524827559, + "learning_rate": 1.8754536839941694e-06, + "loss": 0.7001460790634155, + "step": 1759 + }, + { + "epoch": 0.4055299539170507, + "grad_norm": 0.7747422377442987, + "learning_rate": 1.8752694902071986e-06, + "loss": 1.0062569379806519, + "step": 1760 + }, + { + "epoch": 0.4057603686635945, + "grad_norm": 0.7145787925683823, + "learning_rate": 1.8750851693760199e-06, + "loss": 0.7414188385009766, + "step": 1761 + }, + { + "epoch": 0.4059907834101382, + "grad_norm": 0.6306403135362045, + "learning_rate": 1.8749007215273873e-06, + "loss": 0.7181771397590637, + "step": 1762 + }, + { + "epoch": 0.406221198156682, + "grad_norm": 0.7763317855361268, + "learning_rate": 1.8747161466880732e-06, + "loss": 0.8797845244407654, + "step": 1763 + }, + { + "epoch": 0.4064516129032258, + "grad_norm": 0.6123636271862207, + "learning_rate": 1.8745314448848684e-06, + "loss": 0.7774960398674011, + "step": 1764 + }, + { + "epoch": 0.4066820276497696, + "grad_norm": 0.9110978120854332, + "learning_rate": 1.874346616144582e-06, + "loss": 0.8499422073364258, + "step": 1765 + }, + { + "epoch": 0.40691244239631336, + "grad_norm": 0.6306854745937814, + "learning_rate": 1.874161660494042e-06, + "loss": 0.7070250511169434, + "step": 1766 + }, + { + "epoch": 0.40714285714285714, + "grad_norm": 0.6762437905211294, + "learning_rate": 1.8739765779600939e-06, + "loss": 0.8009281158447266, + "step": 1767 + }, + { + "epoch": 0.4073732718894009, + "grad_norm": 0.6084135312041689, + "learning_rate": 1.8737913685696027e-06, + "loss": 0.6866155862808228, + "step": 1768 + }, + { + "epoch": 0.4076036866359447, + "grad_norm": 0.7813040754942882, + "learning_rate": 1.873606032349451e-06, + "loss": 0.8200059533119202, + "step": 1769 + }, + { + "epoch": 0.4078341013824885, + "grad_norm": 0.629385301974861, + "learning_rate": 1.8734205693265404e-06, + "loss": 0.8413814902305603, + "step": 1770 + }, + { + "epoch": 0.4080645161290323, + "grad_norm": 0.776612651465312, + "learning_rate": 1.8732349795277903e-06, + "loss": 0.9935271143913269, + "step": 1771 + }, + { + "epoch": 0.40829493087557606, + "grad_norm": 0.6589503544607032, + "learning_rate": 1.873049262980139e-06, + "loss": 0.8718058466911316, + "step": 1772 + }, + { + "epoch": 0.40852534562211984, + "grad_norm": 0.8620050398467397, + "learning_rate": 1.8728634197105428e-06, + "loss": 0.9009358882904053, + "step": 1773 + }, + { + "epoch": 0.40875576036866357, + "grad_norm": 0.7755306532739165, + "learning_rate": 1.8726774497459768e-06, + "loss": 0.9128156900405884, + "step": 1774 + }, + { + "epoch": 0.40898617511520735, + "grad_norm": 0.6450271750629438, + "learning_rate": 1.8724913531134342e-06, + "loss": 0.8524078130722046, + "step": 1775 + }, + { + "epoch": 0.40921658986175113, + "grad_norm": 0.7569328214438452, + "learning_rate": 1.872305129839927e-06, + "loss": 0.9431420564651489, + "step": 1776 + }, + { + "epoch": 0.4094470046082949, + "grad_norm": 0.6746261931292995, + "learning_rate": 1.8721187799524846e-06, + "loss": 0.7666694521903992, + "step": 1777 + }, + { + "epoch": 0.4096774193548387, + "grad_norm": 0.6448149830483173, + "learning_rate": 1.871932303478156e-06, + "loss": 0.872551679611206, + "step": 1778 + }, + { + "epoch": 0.4099078341013825, + "grad_norm": 0.6320914450645303, + "learning_rate": 1.8717457004440079e-06, + "loss": 0.7596250176429749, + "step": 1779 + }, + { + "epoch": 0.41013824884792627, + "grad_norm": 0.9751786230729174, + "learning_rate": 1.8715589708771253e-06, + "loss": 1.0098414421081543, + "step": 1780 + }, + { + "epoch": 0.41036866359447005, + "grad_norm": 0.9695096083628231, + "learning_rate": 1.871372114804612e-06, + "loss": 0.9961523413658142, + "step": 1781 + }, + { + "epoch": 0.41059907834101383, + "grad_norm": 0.8458697864526913, + "learning_rate": 1.8711851322535896e-06, + "loss": 0.9065390825271606, + "step": 1782 + }, + { + "epoch": 0.4108294930875576, + "grad_norm": 0.5445685826440523, + "learning_rate": 1.8709980232511987e-06, + "loss": 0.7906428575515747, + "step": 1783 + }, + { + "epoch": 0.4110599078341014, + "grad_norm": 0.5783797348856774, + "learning_rate": 1.8708107878245976e-06, + "loss": 0.798285722732544, + "step": 1784 + }, + { + "epoch": 0.4112903225806452, + "grad_norm": 0.7492534516122694, + "learning_rate": 1.870623426000964e-06, + "loss": 0.7809790372848511, + "step": 1785 + }, + { + "epoch": 0.4115207373271889, + "grad_norm": 0.8776810150838931, + "learning_rate": 1.8704359378074921e-06, + "loss": 0.8931630849838257, + "step": 1786 + }, + { + "epoch": 0.4117511520737327, + "grad_norm": 0.6321595970525742, + "learning_rate": 1.870248323271396e-06, + "loss": 0.8219889402389526, + "step": 1787 + }, + { + "epoch": 0.4119815668202765, + "grad_norm": 0.9973808347817518, + "learning_rate": 1.8700605824199084e-06, + "loss": 0.8371819257736206, + "step": 1788 + }, + { + "epoch": 0.41221198156682026, + "grad_norm": 0.7869196176383942, + "learning_rate": 1.8698727152802789e-06, + "loss": 0.951171875, + "step": 1789 + }, + { + "epoch": 0.41244239631336405, + "grad_norm": 0.6763081680317143, + "learning_rate": 1.8696847218797763e-06, + "loss": 0.7678385972976685, + "step": 1790 + }, + { + "epoch": 0.41267281105990783, + "grad_norm": 0.567634539573834, + "learning_rate": 1.8694966022456872e-06, + "loss": 0.9296993017196655, + "step": 1791 + }, + { + "epoch": 0.4129032258064516, + "grad_norm": 0.5450828031444163, + "learning_rate": 1.8693083564053178e-06, + "loss": 0.8991763591766357, + "step": 1792 + }, + { + "epoch": 0.4131336405529954, + "grad_norm": 0.5967294444907658, + "learning_rate": 1.8691199843859913e-06, + "loss": 0.8332901000976562, + "step": 1793 + }, + { + "epoch": 0.4133640552995392, + "grad_norm": 0.7571962190593917, + "learning_rate": 1.8689314862150497e-06, + "loss": 0.7723548412322998, + "step": 1794 + }, + { + "epoch": 0.41359447004608296, + "grad_norm": 0.6588409150246594, + "learning_rate": 1.868742861919853e-06, + "loss": 0.7768993377685547, + "step": 1795 + }, + { + "epoch": 0.41382488479262675, + "grad_norm": 0.43193778142300604, + "learning_rate": 1.86855411152778e-06, + "loss": 0.6058932542800903, + "step": 1796 + }, + { + "epoch": 0.41405529953917053, + "grad_norm": 0.8667574432138021, + "learning_rate": 1.8683652350662274e-06, + "loss": 0.8711605072021484, + "step": 1797 + }, + { + "epoch": 0.4142857142857143, + "grad_norm": 0.8780154463369872, + "learning_rate": 1.8681762325626104e-06, + "loss": 0.9023469090461731, + "step": 1798 + }, + { + "epoch": 0.41451612903225804, + "grad_norm": 0.6070102500189553, + "learning_rate": 1.867987104044363e-06, + "loss": 0.7735910415649414, + "step": 1799 + }, + { + "epoch": 0.4147465437788018, + "grad_norm": 0.6293725885471063, + "learning_rate": 1.8677978495389364e-06, + "loss": 0.6609020829200745, + "step": 1800 + }, + { + "epoch": 0.4149769585253456, + "grad_norm": 0.6485782104038655, + "learning_rate": 1.8676084690738005e-06, + "loss": 0.7823291420936584, + "step": 1801 + }, + { + "epoch": 0.4152073732718894, + "grad_norm": 0.8472581681306268, + "learning_rate": 1.867418962676444e-06, + "loss": 0.9076563715934753, + "step": 1802 + }, + { + "epoch": 0.4154377880184332, + "grad_norm": 0.561807586977654, + "learning_rate": 1.8672293303743735e-06, + "loss": 0.8645772933959961, + "step": 1803 + }, + { + "epoch": 0.41566820276497696, + "grad_norm": 0.6821058596015542, + "learning_rate": 1.8670395721951135e-06, + "loss": 0.8071421384811401, + "step": 1804 + }, + { + "epoch": 0.41589861751152074, + "grad_norm": 0.7396557376618352, + "learning_rate": 1.8668496881662077e-06, + "loss": 0.8459846377372742, + "step": 1805 + }, + { + "epoch": 0.4161290322580645, + "grad_norm": 0.7167052224732033, + "learning_rate": 1.866659678315217e-06, + "loss": 0.8467865586280823, + "step": 1806 + }, + { + "epoch": 0.4163594470046083, + "grad_norm": 0.8262164291061972, + "learning_rate": 1.8664695426697215e-06, + "loss": 0.8963291645050049, + "step": 1807 + }, + { + "epoch": 0.4165898617511521, + "grad_norm": 0.528766323006704, + "learning_rate": 1.8662792812573188e-06, + "loss": 0.7901826500892639, + "step": 1808 + }, + { + "epoch": 0.4168202764976959, + "grad_norm": 0.8974116604603759, + "learning_rate": 1.8660888941056252e-06, + "loss": 0.807115912437439, + "step": 1809 + }, + { + "epoch": 0.41705069124423966, + "grad_norm": 0.6271237317374816, + "learning_rate": 1.8658983812422753e-06, + "loss": 0.8439537286758423, + "step": 1810 + }, + { + "epoch": 0.4172811059907834, + "grad_norm": 0.8360600380108553, + "learning_rate": 1.8657077426949214e-06, + "loss": 0.6920834183692932, + "step": 1811 + }, + { + "epoch": 0.41751152073732717, + "grad_norm": 0.7603232216568709, + "learning_rate": 1.865516978491235e-06, + "loss": 0.8712124824523926, + "step": 1812 + }, + { + "epoch": 0.41774193548387095, + "grad_norm": 0.718498571919399, + "learning_rate": 1.865326088658905e-06, + "loss": 0.7720927596092224, + "step": 1813 + }, + { + "epoch": 0.41797235023041474, + "grad_norm": 0.6953832780918029, + "learning_rate": 1.8651350732256386e-06, + "loss": 0.8003814220428467, + "step": 1814 + }, + { + "epoch": 0.4182027649769585, + "grad_norm": 0.838076886250554, + "learning_rate": 1.8649439322191616e-06, + "loss": 0.8999850749969482, + "step": 1815 + }, + { + "epoch": 0.4184331797235023, + "grad_norm": 0.584714014216153, + "learning_rate": 1.8647526656672179e-06, + "loss": 0.6752324104309082, + "step": 1816 + }, + { + "epoch": 0.4186635944700461, + "grad_norm": 0.7365325720475113, + "learning_rate": 1.8645612735975696e-06, + "loss": 0.8521262407302856, + "step": 1817 + }, + { + "epoch": 0.41889400921658987, + "grad_norm": 0.7194058023938104, + "learning_rate": 1.864369756037997e-06, + "loss": 0.8813315629959106, + "step": 1818 + }, + { + "epoch": 0.41912442396313365, + "grad_norm": 0.742428235010686, + "learning_rate": 1.8641781130162986e-06, + "loss": 0.8358273506164551, + "step": 1819 + }, + { + "epoch": 0.41935483870967744, + "grad_norm": 0.591500867449821, + "learning_rate": 1.863986344560291e-06, + "loss": 0.8051023483276367, + "step": 1820 + }, + { + "epoch": 0.4195852534562212, + "grad_norm": 0.7791039105049288, + "learning_rate": 1.863794450697809e-06, + "loss": 0.768791675567627, + "step": 1821 + }, + { + "epoch": 0.419815668202765, + "grad_norm": 0.9369354252226071, + "learning_rate": 1.8636024314567065e-06, + "loss": 0.8420040607452393, + "step": 1822 + }, + { + "epoch": 0.42004608294930873, + "grad_norm": 0.673055652482875, + "learning_rate": 1.8634102868648542e-06, + "loss": 0.7670450806617737, + "step": 1823 + }, + { + "epoch": 0.4202764976958525, + "grad_norm": 0.6699812957272996, + "learning_rate": 1.863218016950142e-06, + "loss": 0.8292283415794373, + "step": 1824 + }, + { + "epoch": 0.4205069124423963, + "grad_norm": 0.6058254395333167, + "learning_rate": 1.8630256217404767e-06, + "loss": 0.8005781769752502, + "step": 1825 + }, + { + "epoch": 0.4207373271889401, + "grad_norm": 0.923190166351158, + "learning_rate": 1.8628331012637854e-06, + "loss": 0.8214897513389587, + "step": 1826 + }, + { + "epoch": 0.42096774193548386, + "grad_norm": 0.6734314204378448, + "learning_rate": 1.8626404555480118e-06, + "loss": 0.7938524484634399, + "step": 1827 + }, + { + "epoch": 0.42119815668202765, + "grad_norm": 0.7824933974022145, + "learning_rate": 1.862447684621118e-06, + "loss": 1.0047048330307007, + "step": 1828 + }, + { + "epoch": 0.42142857142857143, + "grad_norm": 0.7060449091561402, + "learning_rate": 1.862254788511084e-06, + "loss": 0.7660601139068604, + "step": 1829 + }, + { + "epoch": 0.4216589861751152, + "grad_norm": 0.7940468118829026, + "learning_rate": 1.8620617672459096e-06, + "loss": 0.8227912783622742, + "step": 1830 + }, + { + "epoch": 0.421889400921659, + "grad_norm": 0.8322274877206185, + "learning_rate": 1.8618686208536106e-06, + "loss": 0.8570956587791443, + "step": 1831 + }, + { + "epoch": 0.4221198156682028, + "grad_norm": 0.6215191834076389, + "learning_rate": 1.8616753493622221e-06, + "loss": 0.7472532987594604, + "step": 1832 + }, + { + "epoch": 0.42235023041474656, + "grad_norm": 0.702673502332975, + "learning_rate": 1.8614819527997976e-06, + "loss": 0.812872052192688, + "step": 1833 + }, + { + "epoch": 0.42258064516129035, + "grad_norm": 0.7168526420375322, + "learning_rate": 1.861288431194408e-06, + "loss": 0.7801386117935181, + "step": 1834 + }, + { + "epoch": 0.4228110599078341, + "grad_norm": 0.8740851917776313, + "learning_rate": 1.8610947845741426e-06, + "loss": 0.7834687829017639, + "step": 1835 + }, + { + "epoch": 0.42304147465437786, + "grad_norm": 0.8009990500080056, + "learning_rate": 1.8609010129671097e-06, + "loss": 0.786865234375, + "step": 1836 + }, + { + "epoch": 0.42327188940092164, + "grad_norm": 0.6559457181196078, + "learning_rate": 1.860707116401434e-06, + "loss": 0.7728738784790039, + "step": 1837 + }, + { + "epoch": 0.4235023041474654, + "grad_norm": 0.6384024302830484, + "learning_rate": 1.8605130949052598e-06, + "loss": 0.6508793830871582, + "step": 1838 + }, + { + "epoch": 0.4237327188940092, + "grad_norm": 0.6544986461362278, + "learning_rate": 1.8603189485067492e-06, + "loss": 0.7949484586715698, + "step": 1839 + }, + { + "epoch": 0.423963133640553, + "grad_norm": 0.7679729608195138, + "learning_rate": 1.8601246772340822e-06, + "loss": 0.7151408195495605, + "step": 1840 + }, + { + "epoch": 0.4241935483870968, + "grad_norm": 0.6910188883895837, + "learning_rate": 1.859930281115457e-06, + "loss": 0.7678598165512085, + "step": 1841 + }, + { + "epoch": 0.42442396313364056, + "grad_norm": 0.6547923584739629, + "learning_rate": 1.8597357601790895e-06, + "loss": 0.8042058944702148, + "step": 1842 + }, + { + "epoch": 0.42465437788018434, + "grad_norm": 0.6889925049755639, + "learning_rate": 1.859541114453215e-06, + "loss": 0.7328081130981445, + "step": 1843 + }, + { + "epoch": 0.4248847926267281, + "grad_norm": 0.7385850960276812, + "learning_rate": 1.8593463439660853e-06, + "loss": 0.7646626234054565, + "step": 1844 + }, + { + "epoch": 0.4251152073732719, + "grad_norm": 0.7455331415840897, + "learning_rate": 1.8591514487459717e-06, + "loss": 0.8965721726417542, + "step": 1845 + }, + { + "epoch": 0.4253456221198157, + "grad_norm": 0.6783955368622289, + "learning_rate": 1.8589564288211623e-06, + "loss": 0.8892468810081482, + "step": 1846 + }, + { + "epoch": 0.4255760368663594, + "grad_norm": 0.669354336924349, + "learning_rate": 1.8587612842199648e-06, + "loss": 0.8314409255981445, + "step": 1847 + }, + { + "epoch": 0.4258064516129032, + "grad_norm": 0.7299222952808436, + "learning_rate": 1.8585660149707034e-06, + "loss": 0.7713892459869385, + "step": 1848 + }, + { + "epoch": 0.426036866359447, + "grad_norm": 0.7583328231707663, + "learning_rate": 1.8583706211017216e-06, + "loss": 0.9349459409713745, + "step": 1849 + }, + { + "epoch": 0.42626728110599077, + "grad_norm": 0.7309436500165829, + "learning_rate": 1.8581751026413805e-06, + "loss": 0.8438700437545776, + "step": 1850 + }, + { + "epoch": 0.42649769585253455, + "grad_norm": 1.0171962155435006, + "learning_rate": 1.8579794596180594e-06, + "loss": 0.9559776782989502, + "step": 1851 + }, + { + "epoch": 0.42672811059907834, + "grad_norm": 0.6701533748146308, + "learning_rate": 1.8577836920601556e-06, + "loss": 0.7124872803688049, + "step": 1852 + }, + { + "epoch": 0.4269585253456221, + "grad_norm": 0.8613289026694887, + "learning_rate": 1.8575877999960842e-06, + "loss": 0.7935503125190735, + "step": 1853 + }, + { + "epoch": 0.4271889400921659, + "grad_norm": 0.7107096707504692, + "learning_rate": 1.8573917834542792e-06, + "loss": 0.9145890474319458, + "step": 1854 + }, + { + "epoch": 0.4274193548387097, + "grad_norm": 0.7290504646059204, + "learning_rate": 1.8571956424631918e-06, + "loss": 0.8239228129386902, + "step": 1855 + }, + { + "epoch": 0.42764976958525347, + "grad_norm": 0.6018983094431002, + "learning_rate": 1.8569993770512916e-06, + "loss": 0.8767688274383545, + "step": 1856 + }, + { + "epoch": 0.42788018433179725, + "grad_norm": 0.6742014961339767, + "learning_rate": 1.8568029872470663e-06, + "loss": 0.7860859632492065, + "step": 1857 + }, + { + "epoch": 0.42811059907834104, + "grad_norm": 0.6990668023927343, + "learning_rate": 1.8566064730790218e-06, + "loss": 0.8855729103088379, + "step": 1858 + }, + { + "epoch": 0.4283410138248848, + "grad_norm": 0.8518974155898882, + "learning_rate": 1.8564098345756815e-06, + "loss": 1.023299217224121, + "step": 1859 + }, + { + "epoch": 0.42857142857142855, + "grad_norm": 0.7174059285774532, + "learning_rate": 1.8562130717655878e-06, + "loss": 0.7665202617645264, + "step": 1860 + }, + { + "epoch": 0.42880184331797233, + "grad_norm": 0.7036772811538429, + "learning_rate": 1.8560161846773e-06, + "loss": 0.8456651568412781, + "step": 1861 + }, + { + "epoch": 0.4290322580645161, + "grad_norm": 0.7229483822116546, + "learning_rate": 1.8558191733393964e-06, + "loss": 0.8920061588287354, + "step": 1862 + }, + { + "epoch": 0.4292626728110599, + "grad_norm": 0.8104170426239989, + "learning_rate": 1.8556220377804723e-06, + "loss": 0.8686853051185608, + "step": 1863 + }, + { + "epoch": 0.4294930875576037, + "grad_norm": 0.5832986779631602, + "learning_rate": 1.8554247780291425e-06, + "loss": 0.6976242065429688, + "step": 1864 + }, + { + "epoch": 0.42972350230414746, + "grad_norm": 0.7347161353185314, + "learning_rate": 1.8552273941140387e-06, + "loss": 0.9612032771110535, + "step": 1865 + }, + { + "epoch": 0.42995391705069125, + "grad_norm": 0.6243829709767468, + "learning_rate": 1.8550298860638108e-06, + "loss": 0.9288003444671631, + "step": 1866 + }, + { + "epoch": 0.43018433179723503, + "grad_norm": 0.6743712494799082, + "learning_rate": 1.8548322539071263e-06, + "loss": 0.8397525548934937, + "step": 1867 + }, + { + "epoch": 0.4304147465437788, + "grad_norm": 0.5881426126037044, + "learning_rate": 1.8546344976726722e-06, + "loss": 0.6311365365982056, + "step": 1868 + }, + { + "epoch": 0.4306451612903226, + "grad_norm": 0.7497017851812813, + "learning_rate": 1.8544366173891523e-06, + "loss": 0.7868270874023438, + "step": 1869 + }, + { + "epoch": 0.4308755760368664, + "grad_norm": 0.6265515804052451, + "learning_rate": 1.8542386130852883e-06, + "loss": 0.9197052717208862, + "step": 1870 + }, + { + "epoch": 0.43110599078341016, + "grad_norm": 0.7018278829983491, + "learning_rate": 1.8540404847898206e-06, + "loss": 0.7875635027885437, + "step": 1871 + }, + { + "epoch": 0.4313364055299539, + "grad_norm": 0.7789284724063816, + "learning_rate": 1.853842232531507e-06, + "loss": 0.9805077910423279, + "step": 1872 + }, + { + "epoch": 0.4315668202764977, + "grad_norm": 0.838470325159009, + "learning_rate": 1.8536438563391236e-06, + "loss": 0.8906866312026978, + "step": 1873 + }, + { + "epoch": 0.43179723502304146, + "grad_norm": 0.73247587866706, + "learning_rate": 1.8534453562414649e-06, + "loss": 0.7506693601608276, + "step": 1874 + }, + { + "epoch": 0.43202764976958524, + "grad_norm": 0.6576915367586517, + "learning_rate": 1.8532467322673422e-06, + "loss": 0.6173181533813477, + "step": 1875 + }, + { + "epoch": 0.432258064516129, + "grad_norm": 0.6907344817423696, + "learning_rate": 1.853047984445586e-06, + "loss": 0.9217972755432129, + "step": 1876 + }, + { + "epoch": 0.4324884792626728, + "grad_norm": 0.8808471726659616, + "learning_rate": 1.8528491128050442e-06, + "loss": 0.8300588130950928, + "step": 1877 + }, + { + "epoch": 0.4327188940092166, + "grad_norm": 0.7869544847637374, + "learning_rate": 1.8526501173745826e-06, + "loss": 0.8109279870986938, + "step": 1878 + }, + { + "epoch": 0.4329493087557604, + "grad_norm": 0.8253705845492948, + "learning_rate": 1.852450998183085e-06, + "loss": 0.9243700504302979, + "step": 1879 + }, + { + "epoch": 0.43317972350230416, + "grad_norm": 0.7291726511705204, + "learning_rate": 1.8522517552594539e-06, + "loss": 0.7983531951904297, + "step": 1880 + }, + { + "epoch": 0.43341013824884794, + "grad_norm": 0.837506072245515, + "learning_rate": 1.8520523886326088e-06, + "loss": 0.9931240081787109, + "step": 1881 + }, + { + "epoch": 0.4336405529953917, + "grad_norm": 0.7782064692415819, + "learning_rate": 1.8518528983314874e-06, + "loss": 0.923255443572998, + "step": 1882 + }, + { + "epoch": 0.4338709677419355, + "grad_norm": 0.5003052765919304, + "learning_rate": 1.8516532843850454e-06, + "loss": 0.8470325469970703, + "step": 1883 + }, + { + "epoch": 0.43410138248847924, + "grad_norm": 0.7497886449083292, + "learning_rate": 1.8514535468222566e-06, + "loss": 0.9175074696540833, + "step": 1884 + }, + { + "epoch": 0.434331797235023, + "grad_norm": 0.7474680310474195, + "learning_rate": 1.8512536856721126e-06, + "loss": 0.8617827892303467, + "step": 1885 + }, + { + "epoch": 0.4345622119815668, + "grad_norm": 0.6779026169933022, + "learning_rate": 1.8510537009636231e-06, + "loss": 0.6787248849868774, + "step": 1886 + }, + { + "epoch": 0.4347926267281106, + "grad_norm": 0.6948062534132075, + "learning_rate": 1.8508535927258157e-06, + "loss": 0.8031569719314575, + "step": 1887 + }, + { + "epoch": 0.43502304147465437, + "grad_norm": 0.8219581995376891, + "learning_rate": 1.8506533609877354e-06, + "loss": 1.0252577066421509, + "step": 1888 + }, + { + "epoch": 0.43525345622119815, + "grad_norm": 0.6297691459816858, + "learning_rate": 1.850453005778446e-06, + "loss": 0.7947444915771484, + "step": 1889 + }, + { + "epoch": 0.43548387096774194, + "grad_norm": 0.7974729793994046, + "learning_rate": 1.8502525271270288e-06, + "loss": 0.817523717880249, + "step": 1890 + }, + { + "epoch": 0.4357142857142857, + "grad_norm": 0.905445482286677, + "learning_rate": 1.850051925062583e-06, + "loss": 0.8029658794403076, + "step": 1891 + }, + { + "epoch": 0.4359447004608295, + "grad_norm": 0.7902601112013473, + "learning_rate": 1.8498511996142253e-06, + "loss": 0.871408224105835, + "step": 1892 + }, + { + "epoch": 0.4361751152073733, + "grad_norm": 0.7279346643764769, + "learning_rate": 1.849650350811091e-06, + "loss": 1.0133098363876343, + "step": 1893 + }, + { + "epoch": 0.43640552995391707, + "grad_norm": 0.5859043876213773, + "learning_rate": 1.8494493786823333e-06, + "loss": 0.8320624828338623, + "step": 1894 + }, + { + "epoch": 0.43663594470046085, + "grad_norm": 0.7240549495084485, + "learning_rate": 1.8492482832571225e-06, + "loss": 0.7757631540298462, + "step": 1895 + }, + { + "epoch": 0.4368663594470046, + "grad_norm": 0.7606146142454437, + "learning_rate": 1.8490470645646479e-06, + "loss": 0.8503100872039795, + "step": 1896 + }, + { + "epoch": 0.43709677419354837, + "grad_norm": 0.7560932530175453, + "learning_rate": 1.8488457226341158e-06, + "loss": 0.8145939707756042, + "step": 1897 + }, + { + "epoch": 0.43732718894009215, + "grad_norm": 0.8041258430075643, + "learning_rate": 1.848644257494751e-06, + "loss": 0.831500232219696, + "step": 1898 + }, + { + "epoch": 0.43755760368663593, + "grad_norm": 0.6473340838552745, + "learning_rate": 1.8484426691757956e-06, + "loss": 0.9340692758560181, + "step": 1899 + }, + { + "epoch": 0.4377880184331797, + "grad_norm": 0.7851684163129825, + "learning_rate": 1.8482409577065097e-06, + "loss": 1.011988639831543, + "step": 1900 + }, + { + "epoch": 0.4380184331797235, + "grad_norm": 0.6819650200659566, + "learning_rate": 1.848039123116172e-06, + "loss": 0.8110378980636597, + "step": 1901 + }, + { + "epoch": 0.4382488479262673, + "grad_norm": 0.6310651453357742, + "learning_rate": 1.8478371654340779e-06, + "loss": 0.8230330944061279, + "step": 1902 + }, + { + "epoch": 0.43847926267281107, + "grad_norm": 0.8335502206603579, + "learning_rate": 1.8476350846895419e-06, + "loss": 0.875052809715271, + "step": 1903 + }, + { + "epoch": 0.43870967741935485, + "grad_norm": 0.7394371211482306, + "learning_rate": 1.8474328809118953e-06, + "loss": 0.9373071193695068, + "step": 1904 + }, + { + "epoch": 0.43894009216589863, + "grad_norm": 0.7538115820848524, + "learning_rate": 1.847230554130488e-06, + "loss": 0.8341633677482605, + "step": 1905 + }, + { + "epoch": 0.4391705069124424, + "grad_norm": 0.6579829053639499, + "learning_rate": 1.8470281043746873e-06, + "loss": 0.8147767782211304, + "step": 1906 + }, + { + "epoch": 0.4394009216589862, + "grad_norm": 0.6022228592985512, + "learning_rate": 1.8468255316738785e-06, + "loss": 0.740512490272522, + "step": 1907 + }, + { + "epoch": 0.4396313364055299, + "grad_norm": 0.7743265443588842, + "learning_rate": 1.846622836057465e-06, + "loss": 0.7754743099212646, + "step": 1908 + }, + { + "epoch": 0.4398617511520737, + "grad_norm": 0.7535493986684056, + "learning_rate": 1.8464200175548677e-06, + "loss": 0.9131484031677246, + "step": 1909 + }, + { + "epoch": 0.4400921658986175, + "grad_norm": 0.7099012564704421, + "learning_rate": 1.8462170761955252e-06, + "loss": 0.7084713578224182, + "step": 1910 + }, + { + "epoch": 0.4403225806451613, + "grad_norm": 0.7949281739735957, + "learning_rate": 1.8460140120088945e-06, + "loss": 0.8535224199295044, + "step": 1911 + }, + { + "epoch": 0.44055299539170506, + "grad_norm": 0.8579322326008002, + "learning_rate": 1.8458108250244498e-06, + "loss": 0.7661323547363281, + "step": 1912 + }, + { + "epoch": 0.44078341013824884, + "grad_norm": 0.7355189670899542, + "learning_rate": 1.8456075152716837e-06, + "loss": 0.8064024448394775, + "step": 1913 + }, + { + "epoch": 0.4410138248847926, + "grad_norm": 0.7422340222781728, + "learning_rate": 1.8454040827801058e-06, + "loss": 0.7858735918998718, + "step": 1914 + }, + { + "epoch": 0.4412442396313364, + "grad_norm": 0.6589873136371734, + "learning_rate": 1.8452005275792448e-06, + "loss": 0.9251735210418701, + "step": 1915 + }, + { + "epoch": 0.4414746543778802, + "grad_norm": 0.718018605876598, + "learning_rate": 1.8449968496986461e-06, + "loss": 0.7237124443054199, + "step": 1916 + }, + { + "epoch": 0.441705069124424, + "grad_norm": 0.7573893032737062, + "learning_rate": 1.8447930491678732e-06, + "loss": 0.8939133882522583, + "step": 1917 + }, + { + "epoch": 0.44193548387096776, + "grad_norm": 0.8373489922925343, + "learning_rate": 1.8445891260165076e-06, + "loss": 0.8815577626228333, + "step": 1918 + }, + { + "epoch": 0.44216589861751154, + "grad_norm": 0.8703539982402225, + "learning_rate": 1.8443850802741485e-06, + "loss": 0.943426787853241, + "step": 1919 + }, + { + "epoch": 0.4423963133640553, + "grad_norm": 0.6998600920537428, + "learning_rate": 1.8441809119704126e-06, + "loss": 0.8001632690429688, + "step": 1920 + }, + { + "epoch": 0.44262672811059905, + "grad_norm": 0.8531362441371287, + "learning_rate": 1.8439766211349352e-06, + "loss": 0.8656308650970459, + "step": 1921 + }, + { + "epoch": 0.44285714285714284, + "grad_norm": 0.7261410922718881, + "learning_rate": 1.8437722077973686e-06, + "loss": 0.9774024486541748, + "step": 1922 + }, + { + "epoch": 0.4430875576036866, + "grad_norm": 0.728823767818971, + "learning_rate": 1.8435676719873827e-06, + "loss": 0.7655738592147827, + "step": 1923 + }, + { + "epoch": 0.4433179723502304, + "grad_norm": 0.6595509202419896, + "learning_rate": 1.8433630137346657e-06, + "loss": 0.6455004811286926, + "step": 1924 + }, + { + "epoch": 0.4435483870967742, + "grad_norm": 0.7214853647491487, + "learning_rate": 1.8431582330689243e-06, + "loss": 0.8221153020858765, + "step": 1925 + }, + { + "epoch": 0.44377880184331797, + "grad_norm": 0.7718374957528886, + "learning_rate": 1.8429533300198816e-06, + "loss": 0.7878339886665344, + "step": 1926 + }, + { + "epoch": 0.44400921658986175, + "grad_norm": 0.7666174978175726, + "learning_rate": 1.8427483046172787e-06, + "loss": 0.8292763829231262, + "step": 1927 + }, + { + "epoch": 0.44423963133640554, + "grad_norm": 0.7395800766154846, + "learning_rate": 1.842543156890875e-06, + "loss": 0.7774572372436523, + "step": 1928 + }, + { + "epoch": 0.4444700460829493, + "grad_norm": 0.7419338266362171, + "learning_rate": 1.8423378868704476e-06, + "loss": 0.7327601909637451, + "step": 1929 + }, + { + "epoch": 0.4447004608294931, + "grad_norm": 0.7176112305038147, + "learning_rate": 1.8421324945857909e-06, + "loss": 0.8067511320114136, + "step": 1930 + }, + { + "epoch": 0.4449308755760369, + "grad_norm": 0.780684647138278, + "learning_rate": 1.8419269800667173e-06, + "loss": 0.851010799407959, + "step": 1931 + }, + { + "epoch": 0.44516129032258067, + "grad_norm": 0.7848772154457995, + "learning_rate": 1.8417213433430576e-06, + "loss": 0.8402234315872192, + "step": 1932 + }, + { + "epoch": 0.4453917050691244, + "grad_norm": 0.7848428302916386, + "learning_rate": 1.8415155844446591e-06, + "loss": 0.8857355117797852, + "step": 1933 + }, + { + "epoch": 0.4456221198156682, + "grad_norm": 0.6465222204250215, + "learning_rate": 1.841309703401387e-06, + "loss": 0.7517881393432617, + "step": 1934 + }, + { + "epoch": 0.44585253456221197, + "grad_norm": 0.8220839741097039, + "learning_rate": 1.8411037002431257e-06, + "loss": 0.8583779335021973, + "step": 1935 + }, + { + "epoch": 0.44608294930875575, + "grad_norm": 0.7149579567670102, + "learning_rate": 1.8408975749997758e-06, + "loss": 0.7691524028778076, + "step": 1936 + }, + { + "epoch": 0.44631336405529953, + "grad_norm": 0.6891731440130011, + "learning_rate": 1.8406913277012558e-06, + "loss": 0.9164496660232544, + "step": 1937 + }, + { + "epoch": 0.4465437788018433, + "grad_norm": 0.6382978906826758, + "learning_rate": 1.8404849583775025e-06, + "loss": 0.843226432800293, + "step": 1938 + }, + { + "epoch": 0.4467741935483871, + "grad_norm": 0.843769912689158, + "learning_rate": 1.8402784670584706e-06, + "loss": 0.8492633104324341, + "step": 1939 + }, + { + "epoch": 0.4470046082949309, + "grad_norm": 0.7117202181402426, + "learning_rate": 1.8400718537741314e-06, + "loss": 0.8088324069976807, + "step": 1940 + }, + { + "epoch": 0.44723502304147467, + "grad_norm": 0.8584564611753391, + "learning_rate": 1.8398651185544746e-06, + "loss": 0.8879667520523071, + "step": 1941 + }, + { + "epoch": 0.44746543778801845, + "grad_norm": 0.6515549607308898, + "learning_rate": 1.8396582614295078e-06, + "loss": 0.8926588892936707, + "step": 1942 + }, + { + "epoch": 0.44769585253456223, + "grad_norm": 0.6885634929225364, + "learning_rate": 1.8394512824292558e-06, + "loss": 0.8007583618164062, + "step": 1943 + }, + { + "epoch": 0.447926267281106, + "grad_norm": 0.6940540666117992, + "learning_rate": 1.8392441815837613e-06, + "loss": 0.7420827746391296, + "step": 1944 + }, + { + "epoch": 0.44815668202764974, + "grad_norm": 0.6846873323136197, + "learning_rate": 1.839036958923085e-06, + "loss": 0.7653264999389648, + "step": 1945 + }, + { + "epoch": 0.4483870967741935, + "grad_norm": 0.6684685460178057, + "learning_rate": 1.838829614477305e-06, + "loss": 0.886576771736145, + "step": 1946 + }, + { + "epoch": 0.4486175115207373, + "grad_norm": 0.7769567865097903, + "learning_rate": 1.8386221482765168e-06, + "loss": 0.904376745223999, + "step": 1947 + }, + { + "epoch": 0.4488479262672811, + "grad_norm": 0.6833196213451335, + "learning_rate": 1.838414560350834e-06, + "loss": 0.6791579723358154, + "step": 1948 + }, + { + "epoch": 0.4490783410138249, + "grad_norm": 0.8296885335278092, + "learning_rate": 1.838206850730388e-06, + "loss": 0.9402183294296265, + "step": 1949 + }, + { + "epoch": 0.44930875576036866, + "grad_norm": 0.9215175287627321, + "learning_rate": 1.8379990194453265e-06, + "loss": 0.9756022691726685, + "step": 1950 + }, + { + "epoch": 0.44953917050691244, + "grad_norm": 0.9502651388093868, + "learning_rate": 1.8377910665258173e-06, + "loss": 0.7311051487922668, + "step": 1951 + }, + { + "epoch": 0.4497695852534562, + "grad_norm": 0.5687721596613555, + "learning_rate": 1.8375829920020438e-06, + "loss": 0.6966956853866577, + "step": 1952 + }, + { + "epoch": 0.45, + "grad_norm": 0.7191813033419734, + "learning_rate": 1.8373747959042076e-06, + "loss": 0.7327426671981812, + "step": 1953 + }, + { + "epoch": 0.4502304147465438, + "grad_norm": 0.8067848664348717, + "learning_rate": 1.8371664782625285e-06, + "loss": 0.8650925755500793, + "step": 1954 + }, + { + "epoch": 0.4504608294930876, + "grad_norm": 0.8028206677205298, + "learning_rate": 1.8369580391072431e-06, + "loss": 0.876739501953125, + "step": 1955 + }, + { + "epoch": 0.45069124423963136, + "grad_norm": 0.7092651204784524, + "learning_rate": 1.8367494784686066e-06, + "loss": 0.7787455320358276, + "step": 1956 + }, + { + "epoch": 0.4509216589861751, + "grad_norm": 0.7762123563340246, + "learning_rate": 1.836540796376891e-06, + "loss": 0.8874029517173767, + "step": 1957 + }, + { + "epoch": 0.4511520737327189, + "grad_norm": 0.7670080315961673, + "learning_rate": 1.8363319928623862e-06, + "loss": 0.8944835662841797, + "step": 1958 + }, + { + "epoch": 0.45138248847926266, + "grad_norm": 0.570293089893543, + "learning_rate": 1.8361230679553996e-06, + "loss": 0.7106739282608032, + "step": 1959 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.7068996407627426, + "learning_rate": 1.835914021686257e-06, + "loss": 0.8668634295463562, + "step": 1960 + }, + { + "epoch": 0.4518433179723502, + "grad_norm": 0.7818076957354034, + "learning_rate": 1.8357048540853003e-06, + "loss": 0.8123712539672852, + "step": 1961 + }, + { + "epoch": 0.452073732718894, + "grad_norm": 0.7369058807274856, + "learning_rate": 1.8354955651828907e-06, + "loss": 0.865728497505188, + "step": 1962 + }, + { + "epoch": 0.4523041474654378, + "grad_norm": 0.7502978391788373, + "learning_rate": 1.8352861550094056e-06, + "loss": 0.8066651225090027, + "step": 1963 + }, + { + "epoch": 0.4525345622119816, + "grad_norm": 1.2076261262226256, + "learning_rate": 1.835076623595241e-06, + "loss": 1.020591139793396, + "step": 1964 + }, + { + "epoch": 0.45276497695852536, + "grad_norm": 0.7642119123557376, + "learning_rate": 1.83486697097081e-06, + "loss": 0.839346706867218, + "step": 1965 + }, + { + "epoch": 0.45299539170506914, + "grad_norm": 0.663652311830839, + "learning_rate": 1.8346571971665434e-06, + "loss": 0.7707340121269226, + "step": 1966 + }, + { + "epoch": 0.4532258064516129, + "grad_norm": 0.6603686601649886, + "learning_rate": 1.8344473022128897e-06, + "loss": 0.7969534397125244, + "step": 1967 + }, + { + "epoch": 0.4534562211981567, + "grad_norm": 0.8431782882642489, + "learning_rate": 1.8342372861403143e-06, + "loss": 0.9371283650398254, + "step": 1968 + }, + { + "epoch": 0.45368663594470043, + "grad_norm": 0.7102966402282939, + "learning_rate": 1.8340271489793015e-06, + "loss": 0.7915256023406982, + "step": 1969 + }, + { + "epoch": 0.4539170506912442, + "grad_norm": 0.6028172078632871, + "learning_rate": 1.8338168907603522e-06, + "loss": 0.8394884467124939, + "step": 1970 + }, + { + "epoch": 0.454147465437788, + "grad_norm": 0.8133055611447335, + "learning_rate": 1.833606511513985e-06, + "loss": 0.7786067128181458, + "step": 1971 + }, + { + "epoch": 0.4543778801843318, + "grad_norm": 0.905741517676821, + "learning_rate": 1.833396011270736e-06, + "loss": 0.9237443208694458, + "step": 1972 + }, + { + "epoch": 0.45460829493087557, + "grad_norm": 0.9055049100464759, + "learning_rate": 1.8331853900611596e-06, + "loss": 0.7530162334442139, + "step": 1973 + }, + { + "epoch": 0.45483870967741935, + "grad_norm": 0.7172947421019107, + "learning_rate": 1.8329746479158263e-06, + "loss": 0.8349624872207642, + "step": 1974 + }, + { + "epoch": 0.45506912442396313, + "grad_norm": 0.9222448487169791, + "learning_rate": 1.8327637848653259e-06, + "loss": 0.8748637437820435, + "step": 1975 + }, + { + "epoch": 0.4552995391705069, + "grad_norm": 0.7416851295200875, + "learning_rate": 1.832552800940265e-06, + "loss": 0.9111478924751282, + "step": 1976 + }, + { + "epoch": 0.4555299539170507, + "grad_norm": 0.6251856024732342, + "learning_rate": 1.8323416961712665e-06, + "loss": 0.8108797073364258, + "step": 1977 + }, + { + "epoch": 0.4557603686635945, + "grad_norm": 0.9459625715160394, + "learning_rate": 1.832130470588973e-06, + "loss": 0.9266520738601685, + "step": 1978 + }, + { + "epoch": 0.45599078341013827, + "grad_norm": 0.7773850051724754, + "learning_rate": 1.831919124224043e-06, + "loss": 0.9092522859573364, + "step": 1979 + }, + { + "epoch": 0.45622119815668205, + "grad_norm": 0.664954530341155, + "learning_rate": 1.8317076571071536e-06, + "loss": 0.8249068260192871, + "step": 1980 + }, + { + "epoch": 0.45645161290322583, + "grad_norm": 0.770896895795481, + "learning_rate": 1.8314960692689992e-06, + "loss": 0.7497084140777588, + "step": 1981 + }, + { + "epoch": 0.45668202764976956, + "grad_norm": 0.7450904317902424, + "learning_rate": 1.8312843607402907e-06, + "loss": 0.7360142469406128, + "step": 1982 + }, + { + "epoch": 0.45691244239631335, + "grad_norm": 0.7224490513690306, + "learning_rate": 1.8310725315517578e-06, + "loss": 0.8443512320518494, + "step": 1983 + }, + { + "epoch": 0.45714285714285713, + "grad_norm": 0.6770718154001021, + "learning_rate": 1.830860581734147e-06, + "loss": 0.7995656728744507, + "step": 1984 + }, + { + "epoch": 0.4573732718894009, + "grad_norm": 0.8305927985197211, + "learning_rate": 1.8306485113182229e-06, + "loss": 0.7396436929702759, + "step": 1985 + }, + { + "epoch": 0.4576036866359447, + "grad_norm": 0.7351757860546534, + "learning_rate": 1.8304363203347668e-06, + "loss": 0.7415385246276855, + "step": 1986 + }, + { + "epoch": 0.4578341013824885, + "grad_norm": 0.8416697439034252, + "learning_rate": 1.8302240088145784e-06, + "loss": 0.9316694736480713, + "step": 1987 + }, + { + "epoch": 0.45806451612903226, + "grad_norm": 0.6482250359686991, + "learning_rate": 1.830011576788474e-06, + "loss": 0.7692697048187256, + "step": 1988 + }, + { + "epoch": 0.45829493087557605, + "grad_norm": 0.7546540101557039, + "learning_rate": 1.829799024287288e-06, + "loss": 0.8377524614334106, + "step": 1989 + }, + { + "epoch": 0.45852534562211983, + "grad_norm": 0.800432018333432, + "learning_rate": 1.8295863513418724e-06, + "loss": 0.8005630970001221, + "step": 1990 + }, + { + "epoch": 0.4587557603686636, + "grad_norm": 0.6132717130341248, + "learning_rate": 1.829373557983096e-06, + "loss": 0.8609297275543213, + "step": 1991 + }, + { + "epoch": 0.4589861751152074, + "grad_norm": 0.7611348757483902, + "learning_rate": 1.8291606442418454e-06, + "loss": 0.9111521244049072, + "step": 1992 + }, + { + "epoch": 0.4592165898617512, + "grad_norm": 0.6486046074488622, + "learning_rate": 1.8289476101490254e-06, + "loss": 0.7540388107299805, + "step": 1993 + }, + { + "epoch": 0.4594470046082949, + "grad_norm": 0.7891604292973137, + "learning_rate": 1.8287344557355565e-06, + "loss": 0.9018936157226562, + "step": 1994 + }, + { + "epoch": 0.4596774193548387, + "grad_norm": 0.8558307889574596, + "learning_rate": 1.8285211810323791e-06, + "loss": 0.918912947177887, + "step": 1995 + }, + { + "epoch": 0.4599078341013825, + "grad_norm": 0.6889746928021416, + "learning_rate": 1.8283077860704488e-06, + "loss": 0.7777351140975952, + "step": 1996 + }, + { + "epoch": 0.46013824884792626, + "grad_norm": 0.8546199279018112, + "learning_rate": 1.82809427088074e-06, + "loss": 0.9283437132835388, + "step": 1997 + }, + { + "epoch": 0.46036866359447004, + "grad_norm": 0.7206983576837674, + "learning_rate": 1.8278806354942442e-06, + "loss": 0.7032894492149353, + "step": 1998 + }, + { + "epoch": 0.4605990783410138, + "grad_norm": 0.7084552833839082, + "learning_rate": 1.8276668799419696e-06, + "loss": 0.8392905592918396, + "step": 1999 + }, + { + "epoch": 0.4608294930875576, + "grad_norm": 0.8216520324249929, + "learning_rate": 1.8274530042549434e-06, + "loss": 0.8059369325637817, + "step": 2000 + }, + { + "epoch": 0.4610599078341014, + "grad_norm": 0.7022225516164876, + "learning_rate": 1.827239008464209e-06, + "loss": 0.7738519906997681, + "step": 2001 + }, + { + "epoch": 0.4612903225806452, + "grad_norm": 0.894321981759021, + "learning_rate": 1.8270248926008275e-06, + "loss": 0.9189014434814453, + "step": 2002 + }, + { + "epoch": 0.46152073732718896, + "grad_norm": 0.9750927332357222, + "learning_rate": 1.8268106566958782e-06, + "loss": 0.8878552913665771, + "step": 2003 + }, + { + "epoch": 0.46175115207373274, + "grad_norm": 0.7601663032895281, + "learning_rate": 1.826596300780456e-06, + "loss": 0.9786058664321899, + "step": 2004 + }, + { + "epoch": 0.4619815668202765, + "grad_norm": 0.7513085122069586, + "learning_rate": 1.8263818248856754e-06, + "loss": 0.7887653112411499, + "step": 2005 + }, + { + "epoch": 0.46221198156682025, + "grad_norm": 0.7571825247765968, + "learning_rate": 1.8261672290426668e-06, + "loss": 0.8773549795150757, + "step": 2006 + }, + { + "epoch": 0.46244239631336403, + "grad_norm": 0.6543768471355319, + "learning_rate": 1.8259525132825786e-06, + "loss": 0.6929831504821777, + "step": 2007 + }, + { + "epoch": 0.4626728110599078, + "grad_norm": 0.8544099497368944, + "learning_rate": 1.8257376776365765e-06, + "loss": 0.9438232183456421, + "step": 2008 + }, + { + "epoch": 0.4629032258064516, + "grad_norm": 0.6803330432545487, + "learning_rate": 1.8255227221358435e-06, + "loss": 0.7559594511985779, + "step": 2009 + }, + { + "epoch": 0.4631336405529954, + "grad_norm": 0.7347158890455135, + "learning_rate": 1.8253076468115805e-06, + "loss": 0.8990212678909302, + "step": 2010 + }, + { + "epoch": 0.46336405529953917, + "grad_norm": 0.7325838411869188, + "learning_rate": 1.825092451695005e-06, + "loss": 0.8638331890106201, + "step": 2011 + }, + { + "epoch": 0.46359447004608295, + "grad_norm": 0.7537964319175384, + "learning_rate": 1.8248771368173522e-06, + "loss": 0.9262570142745972, + "step": 2012 + }, + { + "epoch": 0.46382488479262673, + "grad_norm": 0.770620841657562, + "learning_rate": 1.8246617022098754e-06, + "loss": 0.7412514090538025, + "step": 2013 + }, + { + "epoch": 0.4640552995391705, + "grad_norm": 0.8304378021605247, + "learning_rate": 1.8244461479038437e-06, + "loss": 0.8680287599563599, + "step": 2014 + }, + { + "epoch": 0.4642857142857143, + "grad_norm": 0.7004084931574237, + "learning_rate": 1.8242304739305457e-06, + "loss": 0.7774302959442139, + "step": 2015 + }, + { + "epoch": 0.4645161290322581, + "grad_norm": 0.8275882534036313, + "learning_rate": 1.824014680321285e-06, + "loss": 0.9278442859649658, + "step": 2016 + }, + { + "epoch": 0.46474654377880187, + "grad_norm": 0.6808747325759799, + "learning_rate": 1.8237987671073846e-06, + "loss": 0.9617106914520264, + "step": 2017 + }, + { + "epoch": 0.4649769585253456, + "grad_norm": 0.682915952128137, + "learning_rate": 1.8235827343201838e-06, + "loss": 0.7983255386352539, + "step": 2018 + }, + { + "epoch": 0.4652073732718894, + "grad_norm": 0.7878897167758285, + "learning_rate": 1.8233665819910393e-06, + "loss": 0.7966747283935547, + "step": 2019 + }, + { + "epoch": 0.46543778801843316, + "grad_norm": 0.893729443286113, + "learning_rate": 1.8231503101513253e-06, + "loss": 0.8977803587913513, + "step": 2020 + }, + { + "epoch": 0.46566820276497695, + "grad_norm": 0.6522874054217892, + "learning_rate": 1.8229339188324334e-06, + "loss": 0.7098231911659241, + "step": 2021 + }, + { + "epoch": 0.46589861751152073, + "grad_norm": 0.6971785978535421, + "learning_rate": 1.822717408065773e-06, + "loss": 0.6402776837348938, + "step": 2022 + }, + { + "epoch": 0.4661290322580645, + "grad_norm": 0.7272467550896602, + "learning_rate": 1.8225007778827698e-06, + "loss": 0.797479510307312, + "step": 2023 + }, + { + "epoch": 0.4663594470046083, + "grad_norm": 0.7464543289112394, + "learning_rate": 1.8222840283148675e-06, + "loss": 0.8205317258834839, + "step": 2024 + }, + { + "epoch": 0.4665898617511521, + "grad_norm": 0.755319646803663, + "learning_rate": 1.822067159393527e-06, + "loss": 0.8123108148574829, + "step": 2025 + }, + { + "epoch": 0.46682027649769586, + "grad_norm": 0.7470494916721893, + "learning_rate": 1.8218501711502262e-06, + "loss": 0.9103116989135742, + "step": 2026 + }, + { + "epoch": 0.46705069124423965, + "grad_norm": 0.8399971318490079, + "learning_rate": 1.8216330636164617e-06, + "loss": 0.725040078163147, + "step": 2027 + }, + { + "epoch": 0.46728110599078343, + "grad_norm": 0.8693243601175246, + "learning_rate": 1.8214158368237456e-06, + "loss": 0.8598217964172363, + "step": 2028 + }, + { + "epoch": 0.4675115207373272, + "grad_norm": 0.9587381766929439, + "learning_rate": 1.821198490803608e-06, + "loss": 0.9139465093612671, + "step": 2029 + }, + { + "epoch": 0.46774193548387094, + "grad_norm": 0.7850806397253399, + "learning_rate": 1.8209810255875966e-06, + "loss": 0.8331620097160339, + "step": 2030 + }, + { + "epoch": 0.4679723502304147, + "grad_norm": 0.8908286579751021, + "learning_rate": 1.8207634412072764e-06, + "loss": 0.7901387810707092, + "step": 2031 + }, + { + "epoch": 0.4682027649769585, + "grad_norm": 0.6861413854458724, + "learning_rate": 1.8205457376942288e-06, + "loss": 0.7651060819625854, + "step": 2032 + }, + { + "epoch": 0.4684331797235023, + "grad_norm": 0.7738923235394239, + "learning_rate": 1.820327915080054e-06, + "loss": 0.7382134199142456, + "step": 2033 + }, + { + "epoch": 0.4686635944700461, + "grad_norm": 0.6962774548883505, + "learning_rate": 1.8201099733963682e-06, + "loss": 0.7851507067680359, + "step": 2034 + }, + { + "epoch": 0.46889400921658986, + "grad_norm": 0.8995005169228616, + "learning_rate": 1.8198919126748056e-06, + "loss": 0.9357708692550659, + "step": 2035 + }, + { + "epoch": 0.46912442396313364, + "grad_norm": 0.8238296907521364, + "learning_rate": 1.819673732947017e-06, + "loss": 0.8188502788543701, + "step": 2036 + }, + { + "epoch": 0.4693548387096774, + "grad_norm": 1.0258349340262545, + "learning_rate": 1.8194554342446712e-06, + "loss": 0.81590735912323, + "step": 2037 + }, + { + "epoch": 0.4695852534562212, + "grad_norm": 0.811644542087897, + "learning_rate": 1.8192370165994544e-06, + "loss": 0.6879743933677673, + "step": 2038 + }, + { + "epoch": 0.469815668202765, + "grad_norm": 0.8669848845646889, + "learning_rate": 1.8190184800430686e-06, + "loss": 0.9287742376327515, + "step": 2039 + }, + { + "epoch": 0.4700460829493088, + "grad_norm": 0.9807524438459786, + "learning_rate": 1.818799824607235e-06, + "loss": 0.9625484943389893, + "step": 2040 + }, + { + "epoch": 0.47027649769585256, + "grad_norm": 0.8259194997097902, + "learning_rate": 1.8185810503236904e-06, + "loss": 0.8267782926559448, + "step": 2041 + }, + { + "epoch": 0.4705069124423963, + "grad_norm": 0.8404148332122154, + "learning_rate": 1.8183621572241904e-06, + "loss": 0.8827054500579834, + "step": 2042 + }, + { + "epoch": 0.47073732718894007, + "grad_norm": 0.7550183773883651, + "learning_rate": 1.8181431453405067e-06, + "loss": 0.7755721807479858, + "step": 2043 + }, + { + "epoch": 0.47096774193548385, + "grad_norm": 0.9234865066349518, + "learning_rate": 1.8179240147044285e-06, + "loss": 0.8320283889770508, + "step": 2044 + }, + { + "epoch": 0.47119815668202764, + "grad_norm": 0.7077773446032107, + "learning_rate": 1.8177047653477619e-06, + "loss": 0.8737574815750122, + "step": 2045 + }, + { + "epoch": 0.4714285714285714, + "grad_norm": 0.8821209974643925, + "learning_rate": 1.8174853973023317e-06, + "loss": 0.7007719278335571, + "step": 2046 + }, + { + "epoch": 0.4716589861751152, + "grad_norm": 0.822666216900424, + "learning_rate": 1.817265910599978e-06, + "loss": 0.8062577247619629, + "step": 2047 + }, + { + "epoch": 0.471889400921659, + "grad_norm": 0.6775605665320994, + "learning_rate": 1.8170463052725594e-06, + "loss": 0.7059667110443115, + "step": 2048 + }, + { + "epoch": 0.47211981566820277, + "grad_norm": 0.7830423922028903, + "learning_rate": 1.816826581351951e-06, + "loss": 0.9025841951370239, + "step": 2049 + }, + { + "epoch": 0.47235023041474655, + "grad_norm": 0.8388278274768075, + "learning_rate": 1.8166067388700458e-06, + "loss": 0.7534186840057373, + "step": 2050 + }, + { + "epoch": 0.47258064516129034, + "grad_norm": 0.7623620329649421, + "learning_rate": 1.8163867778587534e-06, + "loss": 0.9447616338729858, + "step": 2051 + }, + { + "epoch": 0.4728110599078341, + "grad_norm": 0.6423913345578718, + "learning_rate": 1.8161666983500012e-06, + "loss": 0.7092128992080688, + "step": 2052 + }, + { + "epoch": 0.4730414746543779, + "grad_norm": 0.8648864734786782, + "learning_rate": 1.815946500375733e-06, + "loss": 0.8689497113227844, + "step": 2053 + }, + { + "epoch": 0.4732718894009217, + "grad_norm": 0.8941588190294093, + "learning_rate": 1.8157261839679105e-06, + "loss": 0.9298638105392456, + "step": 2054 + }, + { + "epoch": 0.4735023041474654, + "grad_norm": 0.6527064378770876, + "learning_rate": 1.8155057491585125e-06, + "loss": 0.7138030529022217, + "step": 2055 + }, + { + "epoch": 0.4737327188940092, + "grad_norm": 0.6699370139228978, + "learning_rate": 1.815285195979534e-06, + "loss": 0.825221836566925, + "step": 2056 + }, + { + "epoch": 0.473963133640553, + "grad_norm": 0.8559190132682327, + "learning_rate": 1.8150645244629891e-06, + "loss": 0.8643208742141724, + "step": 2057 + }, + { + "epoch": 0.47419354838709676, + "grad_norm": 0.8338353738235549, + "learning_rate": 1.8148437346409073e-06, + "loss": 0.9611828327178955, + "step": 2058 + }, + { + "epoch": 0.47442396313364055, + "grad_norm": 0.8119567978397472, + "learning_rate": 1.8146228265453363e-06, + "loss": 0.8609912991523743, + "step": 2059 + }, + { + "epoch": 0.47465437788018433, + "grad_norm": 0.7540582566966652, + "learning_rate": 1.8144018002083404e-06, + "loss": 0.8277603387832642, + "step": 2060 + }, + { + "epoch": 0.4748847926267281, + "grad_norm": 0.8438703930452028, + "learning_rate": 1.814180655662001e-06, + "loss": 0.8601360321044922, + "step": 2061 + }, + { + "epoch": 0.4751152073732719, + "grad_norm": 0.7023202538855939, + "learning_rate": 1.8139593929384178e-06, + "loss": 0.8454653024673462, + "step": 2062 + }, + { + "epoch": 0.4753456221198157, + "grad_norm": 0.8270167900724995, + "learning_rate": 1.8137380120697059e-06, + "loss": 0.870082437992096, + "step": 2063 + }, + { + "epoch": 0.47557603686635946, + "grad_norm": 0.8497953303327396, + "learning_rate": 1.8135165130879988e-06, + "loss": 0.8064073324203491, + "step": 2064 + }, + { + "epoch": 0.47580645161290325, + "grad_norm": 0.5532170457954219, + "learning_rate": 1.813294896025447e-06, + "loss": 0.829608678817749, + "step": 2065 + }, + { + "epoch": 0.47603686635944703, + "grad_norm": 0.7131662100806325, + "learning_rate": 1.8130731609142176e-06, + "loss": 0.8185791969299316, + "step": 2066 + }, + { + "epoch": 0.47626728110599076, + "grad_norm": 0.9405207635689381, + "learning_rate": 1.812851307786495e-06, + "loss": 0.8855293989181519, + "step": 2067 + }, + { + "epoch": 0.47649769585253454, + "grad_norm": 0.6766659884445188, + "learning_rate": 1.8126293366744815e-06, + "loss": 0.7495461106300354, + "step": 2068 + }, + { + "epoch": 0.4767281105990783, + "grad_norm": 0.9706294845402844, + "learning_rate": 1.8124072476103956e-06, + "loss": 0.9435098171234131, + "step": 2069 + }, + { + "epoch": 0.4769585253456221, + "grad_norm": 0.7637936743615437, + "learning_rate": 1.8121850406264727e-06, + "loss": 0.9299448728561401, + "step": 2070 + }, + { + "epoch": 0.4771889400921659, + "grad_norm": 0.9500813357187163, + "learning_rate": 1.8119627157549665e-06, + "loss": 0.9011991024017334, + "step": 2071 + }, + { + "epoch": 0.4774193548387097, + "grad_norm": 0.6847341374863515, + "learning_rate": 1.8117402730281476e-06, + "loss": 0.7326598167419434, + "step": 2072 + }, + { + "epoch": 0.47764976958525346, + "grad_norm": 0.7364560962143368, + "learning_rate": 1.8115177124783024e-06, + "loss": 0.8137445449829102, + "step": 2073 + }, + { + "epoch": 0.47788018433179724, + "grad_norm": 0.9429635333298672, + "learning_rate": 1.811295034137735e-06, + "loss": 0.8653519153594971, + "step": 2074 + }, + { + "epoch": 0.478110599078341, + "grad_norm": 0.8511205154632088, + "learning_rate": 1.811072238038768e-06, + "loss": 0.9140677452087402, + "step": 2075 + }, + { + "epoch": 0.4783410138248848, + "grad_norm": 0.8012710450337872, + "learning_rate": 1.810849324213739e-06, + "loss": 0.8878934979438782, + "step": 2076 + }, + { + "epoch": 0.4785714285714286, + "grad_norm": 0.6571390792752639, + "learning_rate": 1.8106262926950045e-06, + "loss": 0.8238190412521362, + "step": 2077 + }, + { + "epoch": 0.4788018433179724, + "grad_norm": 0.8097531572330602, + "learning_rate": 1.8104031435149362e-06, + "loss": 0.7722488641738892, + "step": 2078 + }, + { + "epoch": 0.4790322580645161, + "grad_norm": 0.890992078514086, + "learning_rate": 1.8101798767059248e-06, + "loss": 0.9338192939758301, + "step": 2079 + }, + { + "epoch": 0.4792626728110599, + "grad_norm": 0.8000986035452533, + "learning_rate": 1.8099564923003767e-06, + "loss": 0.7342168688774109, + "step": 2080 + }, + { + "epoch": 0.47949308755760367, + "grad_norm": 0.7644530181466097, + "learning_rate": 1.809732990330716e-06, + "loss": 0.8445772528648376, + "step": 2081 + }, + { + "epoch": 0.47972350230414745, + "grad_norm": 0.7291725333905612, + "learning_rate": 1.8095093708293839e-06, + "loss": 0.825678825378418, + "step": 2082 + }, + { + "epoch": 0.47995391705069124, + "grad_norm": 0.8072481370959372, + "learning_rate": 1.8092856338288381e-06, + "loss": 0.7995405197143555, + "step": 2083 + }, + { + "epoch": 0.480184331797235, + "grad_norm": 0.8193777121106555, + "learning_rate": 1.8090617793615536e-06, + "loss": 0.7811745405197144, + "step": 2084 + }, + { + "epoch": 0.4804147465437788, + "grad_norm": 0.7364459454678961, + "learning_rate": 1.8088378074600231e-06, + "loss": 0.842727780342102, + "step": 2085 + }, + { + "epoch": 0.4806451612903226, + "grad_norm": 0.7640299868769393, + "learning_rate": 1.808613718156756e-06, + "loss": 0.840941309928894, + "step": 2086 + }, + { + "epoch": 0.48087557603686637, + "grad_norm": 0.7783965916533324, + "learning_rate": 1.808389511484278e-06, + "loss": 0.9024466872215271, + "step": 2087 + }, + { + "epoch": 0.48110599078341015, + "grad_norm": 0.8943218774431004, + "learning_rate": 1.8081651874751325e-06, + "loss": 0.9112771153450012, + "step": 2088 + }, + { + "epoch": 0.48133640552995394, + "grad_norm": 0.6675207900987881, + "learning_rate": 1.8079407461618797e-06, + "loss": 0.834719181060791, + "step": 2089 + }, + { + "epoch": 0.4815668202764977, + "grad_norm": 0.8421358450475633, + "learning_rate": 1.8077161875770971e-06, + "loss": 0.8472555875778198, + "step": 2090 + }, + { + "epoch": 0.48179723502304145, + "grad_norm": 0.7303169649115268, + "learning_rate": 1.8074915117533796e-06, + "loss": 0.8459140062332153, + "step": 2091 + }, + { + "epoch": 0.48202764976958523, + "grad_norm": 0.6945162401362365, + "learning_rate": 1.807266718723338e-06, + "loss": 0.6570066213607788, + "step": 2092 + }, + { + "epoch": 0.482258064516129, + "grad_norm": 0.7314212575092469, + "learning_rate": 1.8070418085196006e-06, + "loss": 0.8897342681884766, + "step": 2093 + }, + { + "epoch": 0.4824884792626728, + "grad_norm": 0.8312385191950623, + "learning_rate": 1.8068167811748132e-06, + "loss": 0.8339060544967651, + "step": 2094 + }, + { + "epoch": 0.4827188940092166, + "grad_norm": 0.7547678583050421, + "learning_rate": 1.8065916367216383e-06, + "loss": 0.7972484827041626, + "step": 2095 + }, + { + "epoch": 0.48294930875576036, + "grad_norm": 0.7424060773179767, + "learning_rate": 1.806366375192755e-06, + "loss": 0.7894760966300964, + "step": 2096 + }, + { + "epoch": 0.48317972350230415, + "grad_norm": 0.7408232706643347, + "learning_rate": 1.8061409966208597e-06, + "loss": 0.713944673538208, + "step": 2097 + }, + { + "epoch": 0.48341013824884793, + "grad_norm": 0.8423029874540192, + "learning_rate": 1.8059155010386662e-06, + "loss": 0.7832180261611938, + "step": 2098 + }, + { + "epoch": 0.4836405529953917, + "grad_norm": 0.6563887159918735, + "learning_rate": 1.8056898884789043e-06, + "loss": 0.8873809576034546, + "step": 2099 + }, + { + "epoch": 0.4838709677419355, + "grad_norm": 0.8864132111812594, + "learning_rate": 1.8054641589743218e-06, + "loss": 0.8174929618835449, + "step": 2100 + }, + { + "epoch": 0.4841013824884793, + "grad_norm": 0.6797946394214075, + "learning_rate": 1.805238312557683e-06, + "loss": 0.876921534538269, + "step": 2101 + }, + { + "epoch": 0.48433179723502306, + "grad_norm": 0.7629892942789464, + "learning_rate": 1.8050123492617693e-06, + "loss": 0.9455937147140503, + "step": 2102 + }, + { + "epoch": 0.4845622119815668, + "grad_norm": 0.6880522665173857, + "learning_rate": 1.8047862691193784e-06, + "loss": 0.8146508932113647, + "step": 2103 + }, + { + "epoch": 0.4847926267281106, + "grad_norm": 0.762873599305404, + "learning_rate": 1.8045600721633262e-06, + "loss": 0.8513495326042175, + "step": 2104 + }, + { + "epoch": 0.48502304147465436, + "grad_norm": 0.8329533644475985, + "learning_rate": 1.8043337584264443e-06, + "loss": 0.8430027961730957, + "step": 2105 + }, + { + "epoch": 0.48525345622119814, + "grad_norm": 0.6323595862794837, + "learning_rate": 1.8041073279415826e-06, + "loss": 0.7683960199356079, + "step": 2106 + }, + { + "epoch": 0.4854838709677419, + "grad_norm": 0.6620613064117244, + "learning_rate": 1.8038807807416067e-06, + "loss": 0.7099664211273193, + "step": 2107 + }, + { + "epoch": 0.4857142857142857, + "grad_norm": 0.725415262213876, + "learning_rate": 1.8036541168593994e-06, + "loss": 0.8046330213546753, + "step": 2108 + }, + { + "epoch": 0.4859447004608295, + "grad_norm": 0.7817858416968994, + "learning_rate": 1.803427336327861e-06, + "loss": 0.8387504816055298, + "step": 2109 + }, + { + "epoch": 0.4861751152073733, + "grad_norm": 0.7135784962709865, + "learning_rate": 1.8032004391799085e-06, + "loss": 0.883955717086792, + "step": 2110 + }, + { + "epoch": 0.48640552995391706, + "grad_norm": 0.7408960119431725, + "learning_rate": 1.8029734254484756e-06, + "loss": 0.7622070908546448, + "step": 2111 + }, + { + "epoch": 0.48663594470046084, + "grad_norm": 0.7726145388563513, + "learning_rate": 1.802746295166513e-06, + "loss": 0.6625584363937378, + "step": 2112 + }, + { + "epoch": 0.4868663594470046, + "grad_norm": 0.8189497209718242, + "learning_rate": 1.8025190483669878e-06, + "loss": 0.8232327699661255, + "step": 2113 + }, + { + "epoch": 0.4870967741935484, + "grad_norm": 0.8528139298235252, + "learning_rate": 1.8022916850828857e-06, + "loss": 0.9083148241043091, + "step": 2114 + }, + { + "epoch": 0.4873271889400922, + "grad_norm": 0.7392938308731752, + "learning_rate": 1.8020642053472074e-06, + "loss": 0.8248398303985596, + "step": 2115 + }, + { + "epoch": 0.4875576036866359, + "grad_norm": 0.7121240208517446, + "learning_rate": 1.8018366091929717e-06, + "loss": 0.8055423498153687, + "step": 2116 + }, + { + "epoch": 0.4877880184331797, + "grad_norm": 0.778973471543998, + "learning_rate": 1.8016088966532135e-06, + "loss": 0.8716787695884705, + "step": 2117 + }, + { + "epoch": 0.4880184331797235, + "grad_norm": 0.7561230225795058, + "learning_rate": 1.801381067760985e-06, + "loss": 0.8530780673027039, + "step": 2118 + }, + { + "epoch": 0.48824884792626727, + "grad_norm": 0.6774037273322415, + "learning_rate": 1.8011531225493557e-06, + "loss": 0.7958484888076782, + "step": 2119 + }, + { + "epoch": 0.48847926267281105, + "grad_norm": 0.8596146173926187, + "learning_rate": 1.800925061051411e-06, + "loss": 0.8312872648239136, + "step": 2120 + }, + { + "epoch": 0.48870967741935484, + "grad_norm": 0.8135900564482533, + "learning_rate": 1.8006968833002541e-06, + "loss": 0.8097391128540039, + "step": 2121 + }, + { + "epoch": 0.4889400921658986, + "grad_norm": 0.9139337120301166, + "learning_rate": 1.8004685893290046e-06, + "loss": 0.8636112213134766, + "step": 2122 + }, + { + "epoch": 0.4891705069124424, + "grad_norm": 0.9088930992891967, + "learning_rate": 1.800240179170799e-06, + "loss": 0.9122721552848816, + "step": 2123 + }, + { + "epoch": 0.4894009216589862, + "grad_norm": 0.914017678688966, + "learning_rate": 1.8000116528587907e-06, + "loss": 0.8172330856323242, + "step": 2124 + }, + { + "epoch": 0.48963133640552997, + "grad_norm": 0.8007018337125341, + "learning_rate": 1.7997830104261502e-06, + "loss": 0.7377575635910034, + "step": 2125 + }, + { + "epoch": 0.48986175115207375, + "grad_norm": 0.9218847107737449, + "learning_rate": 1.7995542519060644e-06, + "loss": 0.7278136014938354, + "step": 2126 + }, + { + "epoch": 0.49009216589861754, + "grad_norm": 0.8808842591031234, + "learning_rate": 1.7993253773317374e-06, + "loss": 0.8977715969085693, + "step": 2127 + }, + { + "epoch": 0.49032258064516127, + "grad_norm": 0.7019593909183576, + "learning_rate": 1.7990963867363902e-06, + "loss": 0.789979100227356, + "step": 2128 + }, + { + "epoch": 0.49055299539170505, + "grad_norm": 0.7069412826082713, + "learning_rate": 1.7988672801532602e-06, + "loss": 0.8304328322410583, + "step": 2129 + }, + { + "epoch": 0.49078341013824883, + "grad_norm": 0.7922910084647693, + "learning_rate": 1.7986380576156019e-06, + "loss": 0.7597516179084778, + "step": 2130 + }, + { + "epoch": 0.4910138248847926, + "grad_norm": 0.6007262757544611, + "learning_rate": 1.7984087191566873e-06, + "loss": 0.661639928817749, + "step": 2131 + }, + { + "epoch": 0.4912442396313364, + "grad_norm": 0.7484873666922557, + "learning_rate": 1.7981792648098035e-06, + "loss": 0.7871333360671997, + "step": 2132 + }, + { + "epoch": 0.4914746543778802, + "grad_norm": 0.7758289248832314, + "learning_rate": 1.7979496946082565e-06, + "loss": 0.8166402578353882, + "step": 2133 + }, + { + "epoch": 0.49170506912442397, + "grad_norm": 0.6906377275927077, + "learning_rate": 1.7977200085853674e-06, + "loss": 0.7112412452697754, + "step": 2134 + }, + { + "epoch": 0.49193548387096775, + "grad_norm": 0.8103572300867555, + "learning_rate": 1.7974902067744752e-06, + "loss": 0.8358132839202881, + "step": 2135 + }, + { + "epoch": 0.49216589861751153, + "grad_norm": 0.7103875590554449, + "learning_rate": 1.7972602892089353e-06, + "loss": 0.8544377088546753, + "step": 2136 + }, + { + "epoch": 0.4923963133640553, + "grad_norm": 0.9004573017295656, + "learning_rate": 1.7970302559221197e-06, + "loss": 1.0105161666870117, + "step": 2137 + }, + { + "epoch": 0.4926267281105991, + "grad_norm": 0.7525179633837843, + "learning_rate": 1.7968001069474176e-06, + "loss": 0.7666197419166565, + "step": 2138 + }, + { + "epoch": 0.4928571428571429, + "grad_norm": 0.9209694432294897, + "learning_rate": 1.7965698423182349e-06, + "loss": 0.9250742197036743, + "step": 2139 + }, + { + "epoch": 0.4930875576036866, + "grad_norm": 0.8066717978287462, + "learning_rate": 1.7963394620679942e-06, + "loss": 0.8269995450973511, + "step": 2140 + }, + { + "epoch": 0.4933179723502304, + "grad_norm": 0.9533305612537857, + "learning_rate": 1.7961089662301346e-06, + "loss": 1.0431339740753174, + "step": 2141 + }, + { + "epoch": 0.4935483870967742, + "grad_norm": 0.7107784117562762, + "learning_rate": 1.7958783548381125e-06, + "loss": 0.7474809288978577, + "step": 2142 + }, + { + "epoch": 0.49377880184331796, + "grad_norm": 0.7729911498332706, + "learning_rate": 1.7956476279254007e-06, + "loss": 0.8850520849227905, + "step": 2143 + }, + { + "epoch": 0.49400921658986174, + "grad_norm": 0.8566824172714074, + "learning_rate": 1.7954167855254893e-06, + "loss": 0.8898880481719971, + "step": 2144 + }, + { + "epoch": 0.4942396313364055, + "grad_norm": 0.886855392770134, + "learning_rate": 1.7951858276718842e-06, + "loss": 0.8718239068984985, + "step": 2145 + }, + { + "epoch": 0.4944700460829493, + "grad_norm": 0.7604278475621951, + "learning_rate": 1.794954754398109e-06, + "loss": 0.8407484292984009, + "step": 2146 + }, + { + "epoch": 0.4947004608294931, + "grad_norm": 0.9582215314216729, + "learning_rate": 1.7947235657377036e-06, + "loss": 0.8453764915466309, + "step": 2147 + }, + { + "epoch": 0.4949308755760369, + "grad_norm": 0.6332693049941237, + "learning_rate": 1.794492261724225e-06, + "loss": 0.5795568227767944, + "step": 2148 + }, + { + "epoch": 0.49516129032258066, + "grad_norm": 0.9864343717736791, + "learning_rate": 1.794260842391246e-06, + "loss": 0.8601347208023071, + "step": 2149 + }, + { + "epoch": 0.49539170506912444, + "grad_norm": 0.8909931853274754, + "learning_rate": 1.7940293077723573e-06, + "loss": 0.8328324556350708, + "step": 2150 + }, + { + "epoch": 0.4956221198156682, + "grad_norm": 0.6691517417241877, + "learning_rate": 1.7937976579011655e-06, + "loss": 0.8924463391304016, + "step": 2151 + }, + { + "epoch": 0.49585253456221196, + "grad_norm": 0.7983254161536232, + "learning_rate": 1.7935658928112947e-06, + "loss": 0.9725968837738037, + "step": 2152 + }, + { + "epoch": 0.49608294930875574, + "grad_norm": 0.7649378566504706, + "learning_rate": 1.7933340125363855e-06, + "loss": 0.7814322710037231, + "step": 2153 + }, + { + "epoch": 0.4963133640552995, + "grad_norm": 0.795129549448148, + "learning_rate": 1.793102017110094e-06, + "loss": 0.8022886514663696, + "step": 2154 + }, + { + "epoch": 0.4965437788018433, + "grad_norm": 0.9455352743035539, + "learning_rate": 1.7928699065660951e-06, + "loss": 0.9747333526611328, + "step": 2155 + }, + { + "epoch": 0.4967741935483871, + "grad_norm": 1.0353782305768249, + "learning_rate": 1.7926376809380783e-06, + "loss": 0.9039797782897949, + "step": 2156 + }, + { + "epoch": 0.49700460829493087, + "grad_norm": 1.000992925643121, + "learning_rate": 1.7924053402597518e-06, + "loss": 0.9444677829742432, + "step": 2157 + }, + { + "epoch": 0.49723502304147466, + "grad_norm": 0.7688551400180308, + "learning_rate": 1.7921728845648393e-06, + "loss": 0.8442031741142273, + "step": 2158 + }, + { + "epoch": 0.49746543778801844, + "grad_norm": 0.8590371435800439, + "learning_rate": 1.7919403138870813e-06, + "loss": 0.9410362839698792, + "step": 2159 + }, + { + "epoch": 0.4976958525345622, + "grad_norm": 0.8168398725206235, + "learning_rate": 1.791707628260235e-06, + "loss": 0.8929172158241272, + "step": 2160 + }, + { + "epoch": 0.497926267281106, + "grad_norm": 0.970370102226972, + "learning_rate": 1.7914748277180745e-06, + "loss": 0.9259560108184814, + "step": 2161 + }, + { + "epoch": 0.4981566820276498, + "grad_norm": 0.7778204252845836, + "learning_rate": 1.7912419122943904e-06, + "loss": 0.8201638460159302, + "step": 2162 + }, + { + "epoch": 0.49838709677419357, + "grad_norm": 0.7628075269760098, + "learning_rate": 1.7910088820229907e-06, + "loss": 0.7554556131362915, + "step": 2163 + }, + { + "epoch": 0.4986175115207373, + "grad_norm": 0.7698860809397133, + "learning_rate": 1.7907757369376984e-06, + "loss": 0.8206801414489746, + "step": 2164 + }, + { + "epoch": 0.4988479262672811, + "grad_norm": 0.7606971261006891, + "learning_rate": 1.7905424770723551e-06, + "loss": 0.765400767326355, + "step": 2165 + }, + { + "epoch": 0.49907834101382487, + "grad_norm": 0.9629614917036793, + "learning_rate": 1.7903091024608177e-06, + "loss": 0.9191527366638184, + "step": 2166 + }, + { + "epoch": 0.49930875576036865, + "grad_norm": 1.0883591834210613, + "learning_rate": 1.7900756131369601e-06, + "loss": 0.8515042662620544, + "step": 2167 + }, + { + "epoch": 0.49953917050691243, + "grad_norm": 0.7623230395498896, + "learning_rate": 1.7898420091346736e-06, + "loss": 0.8509752750396729, + "step": 2168 + }, + { + "epoch": 0.4997695852534562, + "grad_norm": 0.7417934516303272, + "learning_rate": 1.7896082904878647e-06, + "loss": 0.8007084131240845, + "step": 2169 + }, + { + "epoch": 0.5, + "grad_norm": 0.8597818097533757, + "learning_rate": 1.789374457230458e-06, + "loss": 0.8395413756370544, + "step": 2170 + }, + { + "epoch": 0.5002304147465437, + "grad_norm": 0.7232889708808644, + "learning_rate": 1.7891405093963937e-06, + "loss": 0.8624853491783142, + "step": 2171 + }, + { + "epoch": 0.5004608294930876, + "grad_norm": 0.6629899968556545, + "learning_rate": 1.788906447019629e-06, + "loss": 0.8141548037528992, + "step": 2172 + }, + { + "epoch": 0.5006912442396313, + "grad_norm": 0.6495144260680482, + "learning_rate": 1.7886722701341382e-06, + "loss": 0.6764500141143799, + "step": 2173 + }, + { + "epoch": 0.5009216589861751, + "grad_norm": 0.6701022764652186, + "learning_rate": 1.7884379787739112e-06, + "loss": 0.710756778717041, + "step": 2174 + }, + { + "epoch": 0.5011520737327189, + "grad_norm": 0.8273999117205362, + "learning_rate": 1.7882035729729555e-06, + "loss": 0.8090574145317078, + "step": 2175 + }, + { + "epoch": 0.5013824884792627, + "grad_norm": 0.6977221855783239, + "learning_rate": 1.7879690527652943e-06, + "loss": 0.7639138102531433, + "step": 2176 + }, + { + "epoch": 0.5016129032258064, + "grad_norm": 0.9185836860641033, + "learning_rate": 1.7877344181849687e-06, + "loss": 0.8093903660774231, + "step": 2177 + }, + { + "epoch": 0.5018433179723503, + "grad_norm": 0.7610855435865236, + "learning_rate": 1.7874996692660348e-06, + "loss": 0.8705824017524719, + "step": 2178 + }, + { + "epoch": 0.502073732718894, + "grad_norm": 0.7815265219501579, + "learning_rate": 1.7872648060425666e-06, + "loss": 0.7365947961807251, + "step": 2179 + }, + { + "epoch": 0.5023041474654378, + "grad_norm": 0.8989287933893153, + "learning_rate": 1.787029828548654e-06, + "loss": 0.9405299425125122, + "step": 2180 + }, + { + "epoch": 0.5025345622119816, + "grad_norm": 0.907417749032586, + "learning_rate": 1.7867947368184036e-06, + "loss": 0.9232017993927002, + "step": 2181 + }, + { + "epoch": 0.5027649769585254, + "grad_norm": 1.0801728154122552, + "learning_rate": 1.7865595308859388e-06, + "loss": 0.9941537380218506, + "step": 2182 + }, + { + "epoch": 0.5029953917050691, + "grad_norm": 0.7341611336832391, + "learning_rate": 1.7863242107853993e-06, + "loss": 0.6981802582740784, + "step": 2183 + }, + { + "epoch": 0.5032258064516129, + "grad_norm": 0.8346521198909456, + "learning_rate": 1.7860887765509417e-06, + "loss": 0.8155109882354736, + "step": 2184 + }, + { + "epoch": 0.5034562211981567, + "grad_norm": 0.8846374910749497, + "learning_rate": 1.7858532282167385e-06, + "loss": 0.7246255874633789, + "step": 2185 + }, + { + "epoch": 0.5036866359447004, + "grad_norm": 0.7027049895049993, + "learning_rate": 1.7856175658169796e-06, + "loss": 0.7042064666748047, + "step": 2186 + }, + { + "epoch": 0.5039170506912443, + "grad_norm": 0.8633735424450812, + "learning_rate": 1.7853817893858714e-06, + "loss": 0.7522145509719849, + "step": 2187 + }, + { + "epoch": 0.504147465437788, + "grad_norm": 0.8170927084265063, + "learning_rate": 1.7851458989576359e-06, + "loss": 1.0157709121704102, + "step": 2188 + }, + { + "epoch": 0.5043778801843318, + "grad_norm": 0.8537305826863457, + "learning_rate": 1.7849098945665127e-06, + "loss": 0.7096433639526367, + "step": 2189 + }, + { + "epoch": 0.5046082949308756, + "grad_norm": 0.8293401368813538, + "learning_rate": 1.7846737762467572e-06, + "loss": 0.7743037939071655, + "step": 2190 + }, + { + "epoch": 0.5048387096774194, + "grad_norm": 0.802261593558941, + "learning_rate": 1.784437544032642e-06, + "loss": 0.7907241582870483, + "step": 2191 + }, + { + "epoch": 0.5050691244239631, + "grad_norm": 0.9488985791352184, + "learning_rate": 1.7842011979584557e-06, + "loss": 0.8692185878753662, + "step": 2192 + }, + { + "epoch": 0.505299539170507, + "grad_norm": 1.0636987469588612, + "learning_rate": 1.783964738058504e-06, + "loss": 0.9678715467453003, + "step": 2193 + }, + { + "epoch": 0.5055299539170507, + "grad_norm": 0.7713527005281836, + "learning_rate": 1.7837281643671077e-06, + "loss": 0.855170726776123, + "step": 2194 + }, + { + "epoch": 0.5057603686635944, + "grad_norm": 0.7469430705420217, + "learning_rate": 1.7834914769186065e-06, + "loss": 0.8452733755111694, + "step": 2195 + }, + { + "epoch": 0.5059907834101383, + "grad_norm": 0.6866121153572871, + "learning_rate": 1.7832546757473543e-06, + "loss": 0.7517217397689819, + "step": 2196 + }, + { + "epoch": 0.506221198156682, + "grad_norm": 0.7453227048555126, + "learning_rate": 1.783017760887723e-06, + "loss": 0.6971632838249207, + "step": 2197 + }, + { + "epoch": 0.5064516129032258, + "grad_norm": 0.7964964192157018, + "learning_rate": 1.7827807323741002e-06, + "loss": 0.8638256192207336, + "step": 2198 + }, + { + "epoch": 0.5066820276497696, + "grad_norm": 0.7941877452524988, + "learning_rate": 1.7825435902408903e-06, + "loss": 0.8410143256187439, + "step": 2199 + }, + { + "epoch": 0.5069124423963134, + "grad_norm": 0.7902588767037179, + "learning_rate": 1.7823063345225143e-06, + "loss": 0.8127691745758057, + "step": 2200 + }, + { + "epoch": 0.5071428571428571, + "grad_norm": 0.7618481515663807, + "learning_rate": 1.7820689652534096e-06, + "loss": 0.7351404428482056, + "step": 2201 + }, + { + "epoch": 0.507373271889401, + "grad_norm": 0.6691944306500267, + "learning_rate": 1.7818314824680298e-06, + "loss": 0.7258716821670532, + "step": 2202 + }, + { + "epoch": 0.5076036866359447, + "grad_norm": 1.0029859864492747, + "learning_rate": 1.7815938862008454e-06, + "loss": 0.9509599208831787, + "step": 2203 + }, + { + "epoch": 0.5078341013824885, + "grad_norm": 0.7738532710061052, + "learning_rate": 1.7813561764863429e-06, + "loss": 0.8600929379463196, + "step": 2204 + }, + { + "epoch": 0.5080645161290323, + "grad_norm": 0.9689099485850551, + "learning_rate": 1.7811183533590257e-06, + "loss": 0.8688119649887085, + "step": 2205 + }, + { + "epoch": 0.5082949308755761, + "grad_norm": 0.7599344683888546, + "learning_rate": 1.780880416853414e-06, + "loss": 0.8447986841201782, + "step": 2206 + }, + { + "epoch": 0.5085253456221198, + "grad_norm": 0.6953642388755117, + "learning_rate": 1.7806423670040433e-06, + "loss": 0.8262573480606079, + "step": 2207 + }, + { + "epoch": 0.5087557603686635, + "grad_norm": 0.7640117945069856, + "learning_rate": 1.7804042038454666e-06, + "loss": 0.9534487724304199, + "step": 2208 + }, + { + "epoch": 0.5089861751152074, + "grad_norm": 0.7513792438385134, + "learning_rate": 1.7801659274122527e-06, + "loss": 0.7712565064430237, + "step": 2209 + }, + { + "epoch": 0.5092165898617511, + "grad_norm": 0.8714588056175714, + "learning_rate": 1.7799275377389873e-06, + "loss": 0.8190760016441345, + "step": 2210 + }, + { + "epoch": 0.509447004608295, + "grad_norm": 0.9379540710774249, + "learning_rate": 1.7796890348602722e-06, + "loss": 0.8647592067718506, + "step": 2211 + }, + { + "epoch": 0.5096774193548387, + "grad_norm": 0.7912467632232041, + "learning_rate": 1.7794504188107257e-06, + "loss": 0.7788198590278625, + "step": 2212 + }, + { + "epoch": 0.5099078341013825, + "grad_norm": 0.7053754197084299, + "learning_rate": 1.779211689624983e-06, + "loss": 0.8610718250274658, + "step": 2213 + }, + { + "epoch": 0.5101382488479262, + "grad_norm": 0.7783569383566119, + "learning_rate": 1.7789728473376952e-06, + "loss": 0.832200825214386, + "step": 2214 + }, + { + "epoch": 0.5103686635944701, + "grad_norm": 0.7823482622118234, + "learning_rate": 1.7787338919835298e-06, + "loss": 0.7325488328933716, + "step": 2215 + }, + { + "epoch": 0.5105990783410138, + "grad_norm": 0.8903627357495159, + "learning_rate": 1.7784948235971707e-06, + "loss": 0.8038203716278076, + "step": 2216 + }, + { + "epoch": 0.5108294930875577, + "grad_norm": 0.6275186054972087, + "learning_rate": 1.7782556422133185e-06, + "loss": 0.7016317248344421, + "step": 2217 + }, + { + "epoch": 0.5110599078341014, + "grad_norm": 0.8951545762278973, + "learning_rate": 1.7780163478666905e-06, + "loss": 0.7964655160903931, + "step": 2218 + }, + { + "epoch": 0.5112903225806451, + "grad_norm": 0.7709224710894249, + "learning_rate": 1.777776940592019e-06, + "loss": 0.6681785583496094, + "step": 2219 + }, + { + "epoch": 0.511520737327189, + "grad_norm": 0.8934880823893885, + "learning_rate": 1.7775374204240547e-06, + "loss": 0.835777759552002, + "step": 2220 + }, + { + "epoch": 0.5117511520737327, + "grad_norm": 1.0248178001051076, + "learning_rate": 1.777297787397563e-06, + "loss": 0.9442443251609802, + "step": 2221 + }, + { + "epoch": 0.5119815668202765, + "grad_norm": 1.072158922361294, + "learning_rate": 1.7770580415473267e-06, + "loss": 0.9351231455802917, + "step": 2222 + }, + { + "epoch": 0.5122119815668202, + "grad_norm": 0.878332211622375, + "learning_rate": 1.776818182908144e-06, + "loss": 0.7238374352455139, + "step": 2223 + }, + { + "epoch": 0.5124423963133641, + "grad_norm": 0.7001659306792695, + "learning_rate": 1.7765782115148308e-06, + "loss": 0.8206230998039246, + "step": 2224 + }, + { + "epoch": 0.5126728110599078, + "grad_norm": 0.6546302150578799, + "learning_rate": 1.7763381274022176e-06, + "loss": 0.748784065246582, + "step": 2225 + }, + { + "epoch": 0.5129032258064516, + "grad_norm": 0.7566703422977776, + "learning_rate": 1.7760979306051533e-06, + "loss": 0.7980858087539673, + "step": 2226 + }, + { + "epoch": 0.5131336405529954, + "grad_norm": 0.8877968508757134, + "learning_rate": 1.7758576211585018e-06, + "loss": 0.8631168603897095, + "step": 2227 + }, + { + "epoch": 0.5133640552995392, + "grad_norm": 0.7405217897025548, + "learning_rate": 1.7756171990971441e-06, + "loss": 0.9405999779701233, + "step": 2228 + }, + { + "epoch": 0.5135944700460829, + "grad_norm": 0.8867257371824923, + "learning_rate": 1.7753766644559763e-06, + "loss": 0.9055094718933105, + "step": 2229 + }, + { + "epoch": 0.5138248847926268, + "grad_norm": 0.827493910498757, + "learning_rate": 1.775136017269912e-06, + "loss": 0.7583146691322327, + "step": 2230 + }, + { + "epoch": 0.5140552995391705, + "grad_norm": 0.8689067612775456, + "learning_rate": 1.7748952575738811e-06, + "loss": 0.8728743195533752, + "step": 2231 + }, + { + "epoch": 0.5142857142857142, + "grad_norm": 0.7067707521741841, + "learning_rate": 1.7746543854028295e-06, + "loss": 0.8133460283279419, + "step": 2232 + }, + { + "epoch": 0.5145161290322581, + "grad_norm": 0.7177694794353267, + "learning_rate": 1.7744134007917194e-06, + "loss": 0.8389721512794495, + "step": 2233 + }, + { + "epoch": 0.5147465437788018, + "grad_norm": 0.9617522193850644, + "learning_rate": 1.774172303775529e-06, + "loss": 0.7016798257827759, + "step": 2234 + }, + { + "epoch": 0.5149769585253456, + "grad_norm": 0.7999711451764379, + "learning_rate": 1.7739310943892538e-06, + "loss": 0.7920540571212769, + "step": 2235 + }, + { + "epoch": 0.5152073732718894, + "grad_norm": 0.6990088891534603, + "learning_rate": 1.7736897726679048e-06, + "loss": 0.900149405002594, + "step": 2236 + }, + { + "epoch": 0.5154377880184332, + "grad_norm": 0.743220745754201, + "learning_rate": 1.7734483386465096e-06, + "loss": 0.8537915349006653, + "step": 2237 + }, + { + "epoch": 0.5156682027649769, + "grad_norm": 0.8134323205434837, + "learning_rate": 1.7732067923601121e-06, + "loss": 0.7418123483657837, + "step": 2238 + }, + { + "epoch": 0.5158986175115208, + "grad_norm": 1.108361921569266, + "learning_rate": 1.7729651338437721e-06, + "loss": 0.8890011310577393, + "step": 2239 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9841321811418366, + "learning_rate": 1.7727233631325663e-06, + "loss": 0.9082813262939453, + "step": 2240 + }, + { + "epoch": 0.5163594470046083, + "grad_norm": 0.9268737545625799, + "learning_rate": 1.7724814802615868e-06, + "loss": 0.8337695598602295, + "step": 2241 + }, + { + "epoch": 0.5165898617511521, + "grad_norm": 1.1037050608526282, + "learning_rate": 1.7722394852659437e-06, + "loss": 0.8990765810012817, + "step": 2242 + }, + { + "epoch": 0.5168202764976959, + "grad_norm": 0.8552834719912825, + "learning_rate": 1.7719973781807614e-06, + "loss": 0.720890998840332, + "step": 2243 + }, + { + "epoch": 0.5170506912442396, + "grad_norm": 0.6406815235154244, + "learning_rate": 1.7717551590411817e-06, + "loss": 0.7966938018798828, + "step": 2244 + }, + { + "epoch": 0.5172811059907834, + "grad_norm": 0.8614270693246835, + "learning_rate": 1.7715128278823622e-06, + "loss": 0.9290107488632202, + "step": 2245 + }, + { + "epoch": 0.5175115207373272, + "grad_norm": 0.8755598994931274, + "learning_rate": 1.771270384739477e-06, + "loss": 0.8388533592224121, + "step": 2246 + }, + { + "epoch": 0.5177419354838709, + "grad_norm": 0.8200932411512113, + "learning_rate": 1.7710278296477169e-06, + "loss": 0.8845043182373047, + "step": 2247 + }, + { + "epoch": 0.5179723502304148, + "grad_norm": 0.8499976704860752, + "learning_rate": 1.7707851626422875e-06, + "loss": 0.879709780216217, + "step": 2248 + }, + { + "epoch": 0.5182027649769585, + "grad_norm": 0.8407815201465851, + "learning_rate": 1.7705423837584123e-06, + "loss": 0.8215152025222778, + "step": 2249 + }, + { + "epoch": 0.5184331797235023, + "grad_norm": 0.8770027311962882, + "learning_rate": 1.7702994930313305e-06, + "loss": 0.8108627796173096, + "step": 2250 + }, + { + "epoch": 0.5186635944700461, + "grad_norm": 0.9106818329739914, + "learning_rate": 1.7700564904962966e-06, + "loss": 0.8391602039337158, + "step": 2251 + }, + { + "epoch": 0.5188940092165899, + "grad_norm": 0.82724043269172, + "learning_rate": 1.769813376188583e-06, + "loss": 0.8664923906326294, + "step": 2252 + }, + { + "epoch": 0.5191244239631336, + "grad_norm": 0.8478256896643234, + "learning_rate": 1.7695701501434765e-06, + "loss": 0.9670882821083069, + "step": 2253 + }, + { + "epoch": 0.5193548387096775, + "grad_norm": 0.8831524743377538, + "learning_rate": 1.7693268123962816e-06, + "loss": 0.946273684501648, + "step": 2254 + }, + { + "epoch": 0.5195852534562212, + "grad_norm": 0.7643743435262689, + "learning_rate": 1.7690833629823184e-06, + "loss": 0.9691795706748962, + "step": 2255 + }, + { + "epoch": 0.5198156682027649, + "grad_norm": 0.7833370135674333, + "learning_rate": 1.7688398019369232e-06, + "loss": 0.8086103200912476, + "step": 2256 + }, + { + "epoch": 0.5200460829493088, + "grad_norm": 0.8183770044685874, + "learning_rate": 1.7685961292954486e-06, + "loss": 0.8574277758598328, + "step": 2257 + }, + { + "epoch": 0.5202764976958525, + "grad_norm": 0.7089387180946831, + "learning_rate": 1.7683523450932633e-06, + "loss": 0.7841963171958923, + "step": 2258 + }, + { + "epoch": 0.5205069124423963, + "grad_norm": 0.7629735238937895, + "learning_rate": 1.7681084493657523e-06, + "loss": 0.6972980499267578, + "step": 2259 + }, + { + "epoch": 0.5207373271889401, + "grad_norm": 0.7917333859989639, + "learning_rate": 1.7678644421483163e-06, + "loss": 0.9193723201751709, + "step": 2260 + }, + { + "epoch": 0.5209677419354839, + "grad_norm": 0.9714597630384237, + "learning_rate": 1.7676203234763736e-06, + "loss": 0.7902654409408569, + "step": 2261 + }, + { + "epoch": 0.5211981566820276, + "grad_norm": 0.7983060164629807, + "learning_rate": 1.767376093385357e-06, + "loss": 0.8804734945297241, + "step": 2262 + }, + { + "epoch": 0.5214285714285715, + "grad_norm": 0.9065709846386143, + "learning_rate": 1.7671317519107163e-06, + "loss": 0.7884976863861084, + "step": 2263 + }, + { + "epoch": 0.5216589861751152, + "grad_norm": 0.9252417906886758, + "learning_rate": 1.7668872990879173e-06, + "loss": 0.8233190774917603, + "step": 2264 + }, + { + "epoch": 0.521889400921659, + "grad_norm": 0.7126124532622758, + "learning_rate": 1.766642734952442e-06, + "loss": 0.7985334396362305, + "step": 2265 + }, + { + "epoch": 0.5221198156682028, + "grad_norm": 0.8073440338214538, + "learning_rate": 1.7663980595397887e-06, + "loss": 0.7805646657943726, + "step": 2266 + }, + { + "epoch": 0.5223502304147466, + "grad_norm": 0.9455838488830395, + "learning_rate": 1.7661532728854718e-06, + "loss": 0.8528248071670532, + "step": 2267 + }, + { + "epoch": 0.5225806451612903, + "grad_norm": 0.882590365173732, + "learning_rate": 1.7659083750250215e-06, + "loss": 0.7714066505432129, + "step": 2268 + }, + { + "epoch": 0.522811059907834, + "grad_norm": 0.7632999883965862, + "learning_rate": 1.7656633659939843e-06, + "loss": 0.8250499963760376, + "step": 2269 + }, + { + "epoch": 0.5230414746543779, + "grad_norm": 0.6787990523098465, + "learning_rate": 1.7654182458279231e-06, + "loss": 0.7878777384757996, + "step": 2270 + }, + { + "epoch": 0.5232718894009216, + "grad_norm": 0.8263772967033729, + "learning_rate": 1.7651730145624174e-06, + "loss": 0.9080224633216858, + "step": 2271 + }, + { + "epoch": 0.5235023041474655, + "grad_norm": 0.8137376292994275, + "learning_rate": 1.7649276722330607e-06, + "loss": 0.8010937571525574, + "step": 2272 + }, + { + "epoch": 0.5237327188940092, + "grad_norm": 0.8996847055009526, + "learning_rate": 1.7646822188754658e-06, + "loss": 0.903404951095581, + "step": 2273 + }, + { + "epoch": 0.523963133640553, + "grad_norm": 0.928692707021516, + "learning_rate": 1.7644366545252589e-06, + "loss": 0.9009061455726624, + "step": 2274 + }, + { + "epoch": 0.5241935483870968, + "grad_norm": 0.7651260343716183, + "learning_rate": 1.7641909792180834e-06, + "loss": 0.7158697843551636, + "step": 2275 + }, + { + "epoch": 0.5244239631336406, + "grad_norm": 0.8041302440889452, + "learning_rate": 1.763945192989599e-06, + "loss": 0.8101463317871094, + "step": 2276 + }, + { + "epoch": 0.5246543778801843, + "grad_norm": 0.8174455436475604, + "learning_rate": 1.7636992958754812e-06, + "loss": 0.758610725402832, + "step": 2277 + }, + { + "epoch": 0.5248847926267282, + "grad_norm": 0.9651314388158028, + "learning_rate": 1.7634532879114216e-06, + "loss": 0.9469501972198486, + "step": 2278 + }, + { + "epoch": 0.5251152073732719, + "grad_norm": 0.6853415956002341, + "learning_rate": 1.7632071691331281e-06, + "loss": 0.7528036236763, + "step": 2279 + }, + { + "epoch": 0.5253456221198156, + "grad_norm": 0.9124447697867164, + "learning_rate": 1.7629609395763242e-06, + "loss": 0.8519324064254761, + "step": 2280 + }, + { + "epoch": 0.5255760368663595, + "grad_norm": 0.9239480610002251, + "learning_rate": 1.7627145992767498e-06, + "loss": 0.8620004653930664, + "step": 2281 + }, + { + "epoch": 0.5258064516129032, + "grad_norm": 0.7831738680942184, + "learning_rate": 1.762468148270161e-06, + "loss": 0.8066067695617676, + "step": 2282 + }, + { + "epoch": 0.526036866359447, + "grad_norm": 0.8314773622163678, + "learning_rate": 1.7622215865923301e-06, + "loss": 0.865642786026001, + "step": 2283 + }, + { + "epoch": 0.5262672811059907, + "grad_norm": 0.7269170910166286, + "learning_rate": 1.761974914279045e-06, + "loss": 0.8478001356124878, + "step": 2284 + }, + { + "epoch": 0.5264976958525346, + "grad_norm": 0.8461811606118353, + "learning_rate": 1.7617281313661098e-06, + "loss": 0.7984344363212585, + "step": 2285 + }, + { + "epoch": 0.5267281105990783, + "grad_norm": 0.8489168247147351, + "learning_rate": 1.7614812378893444e-06, + "loss": 0.8480801582336426, + "step": 2286 + }, + { + "epoch": 0.5269585253456222, + "grad_norm": 0.9126795310234661, + "learning_rate": 1.7612342338845859e-06, + "loss": 0.8667479753494263, + "step": 2287 + }, + { + "epoch": 0.5271889400921659, + "grad_norm": 0.9533468835174431, + "learning_rate": 1.7609871193876854e-06, + "loss": 0.8431364297866821, + "step": 2288 + }, + { + "epoch": 0.5274193548387097, + "grad_norm": 0.8628781350943807, + "learning_rate": 1.7607398944345127e-06, + "loss": 0.8544220924377441, + "step": 2289 + }, + { + "epoch": 0.5276497695852534, + "grad_norm": 0.9575259696859837, + "learning_rate": 1.760492559060951e-06, + "loss": 0.9298971891403198, + "step": 2290 + }, + { + "epoch": 0.5278801843317973, + "grad_norm": 0.8854664005974592, + "learning_rate": 1.760245113302901e-06, + "loss": 0.739667534828186, + "step": 2291 + }, + { + "epoch": 0.528110599078341, + "grad_norm": 0.9418693515744256, + "learning_rate": 1.7599975571962796e-06, + "loss": 0.8981268405914307, + "step": 2292 + }, + { + "epoch": 0.5283410138248847, + "grad_norm": 0.8489202000746718, + "learning_rate": 1.7597498907770185e-06, + "loss": 0.8027834892272949, + "step": 2293 + }, + { + "epoch": 0.5285714285714286, + "grad_norm": 0.7244957329263912, + "learning_rate": 1.7595021140810669e-06, + "loss": 0.7018242478370667, + "step": 2294 + }, + { + "epoch": 0.5288018433179723, + "grad_norm": 0.8699196704594798, + "learning_rate": 1.7592542271443887e-06, + "loss": 0.7655147910118103, + "step": 2295 + }, + { + "epoch": 0.5290322580645161, + "grad_norm": 0.8169123509935803, + "learning_rate": 1.7590062300029644e-06, + "loss": 0.8283153772354126, + "step": 2296 + }, + { + "epoch": 0.5292626728110599, + "grad_norm": 1.0550792201388366, + "learning_rate": 1.7587581226927907e-06, + "loss": 1.0430598258972168, + "step": 2297 + }, + { + "epoch": 0.5294930875576037, + "grad_norm": 0.7609036061197976, + "learning_rate": 1.7585099052498802e-06, + "loss": 0.6683472990989685, + "step": 2298 + }, + { + "epoch": 0.5297235023041474, + "grad_norm": 0.7278178698575015, + "learning_rate": 1.7582615777102609e-06, + "loss": 0.7254939079284668, + "step": 2299 + }, + { + "epoch": 0.5299539170506913, + "grad_norm": 0.7049477325497308, + "learning_rate": 1.7580131401099774e-06, + "loss": 0.7913245558738708, + "step": 2300 + }, + { + "epoch": 0.530184331797235, + "grad_norm": 0.8416230641508338, + "learning_rate": 1.75776459248509e-06, + "loss": 0.7832915782928467, + "step": 2301 + }, + { + "epoch": 0.5304147465437788, + "grad_norm": 0.7722959383546871, + "learning_rate": 1.7575159348716754e-06, + "loss": 0.9754987955093384, + "step": 2302 + }, + { + "epoch": 0.5306451612903226, + "grad_norm": 0.8614799765536667, + "learning_rate": 1.7572671673058254e-06, + "loss": 0.8343901634216309, + "step": 2303 + }, + { + "epoch": 0.5308755760368664, + "grad_norm": 0.862069962418511, + "learning_rate": 1.757018289823649e-06, + "loss": 0.9836198091506958, + "step": 2304 + }, + { + "epoch": 0.5311059907834101, + "grad_norm": 0.7978699236275345, + "learning_rate": 1.7567693024612695e-06, + "loss": 0.8258972764015198, + "step": 2305 + }, + { + "epoch": 0.5313364055299539, + "grad_norm": 0.8169244061103897, + "learning_rate": 1.7565202052548277e-06, + "loss": 0.8822964429855347, + "step": 2306 + }, + { + "epoch": 0.5315668202764977, + "grad_norm": 0.8094894252842574, + "learning_rate": 1.7562709982404797e-06, + "loss": 0.721222996711731, + "step": 2307 + }, + { + "epoch": 0.5317972350230414, + "grad_norm": 0.7759663122688174, + "learning_rate": 1.7560216814543974e-06, + "loss": 0.7273069620132446, + "step": 2308 + }, + { + "epoch": 0.5320276497695853, + "grad_norm": 0.749740659090673, + "learning_rate": 1.755772254932769e-06, + "loss": 0.8031520843505859, + "step": 2309 + }, + { + "epoch": 0.532258064516129, + "grad_norm": 0.8746676083569236, + "learning_rate": 1.7555227187117982e-06, + "loss": 0.8767163157463074, + "step": 2310 + }, + { + "epoch": 0.5324884792626728, + "grad_norm": 1.052374988916139, + "learning_rate": 1.755273072827705e-06, + "loss": 0.8018463850021362, + "step": 2311 + }, + { + "epoch": 0.5327188940092166, + "grad_norm": 0.9632384627648846, + "learning_rate": 1.7550233173167252e-06, + "loss": 0.8281232118606567, + "step": 2312 + }, + { + "epoch": 0.5329493087557604, + "grad_norm": 0.9472067369973646, + "learning_rate": 1.7547734522151103e-06, + "loss": 0.8802565336227417, + "step": 2313 + }, + { + "epoch": 0.5331797235023041, + "grad_norm": 0.7195582219345643, + "learning_rate": 1.754523477559128e-06, + "loss": 0.8055544495582581, + "step": 2314 + }, + { + "epoch": 0.533410138248848, + "grad_norm": 0.9358658916449707, + "learning_rate": 1.754273393385062e-06, + "loss": 0.8163481950759888, + "step": 2315 + }, + { + "epoch": 0.5336405529953917, + "grad_norm": 0.9365559775291885, + "learning_rate": 1.7540231997292111e-06, + "loss": 0.8308255076408386, + "step": 2316 + }, + { + "epoch": 0.5338709677419354, + "grad_norm": 0.9031429015213124, + "learning_rate": 1.7537728966278913e-06, + "loss": 0.8387685418128967, + "step": 2317 + }, + { + "epoch": 0.5341013824884793, + "grad_norm": 0.7470153179334161, + "learning_rate": 1.7535224841174333e-06, + "loss": 0.8668780326843262, + "step": 2318 + }, + { + "epoch": 0.534331797235023, + "grad_norm": 0.7449540611731051, + "learning_rate": 1.7532719622341842e-06, + "loss": 0.8394712209701538, + "step": 2319 + }, + { + "epoch": 0.5345622119815668, + "grad_norm": 0.7539905771593468, + "learning_rate": 1.7530213310145073e-06, + "loss": 0.7755688428878784, + "step": 2320 + }, + { + "epoch": 0.5347926267281106, + "grad_norm": 0.8150738821263226, + "learning_rate": 1.7527705904947805e-06, + "loss": 0.7714632749557495, + "step": 2321 + }, + { + "epoch": 0.5350230414746544, + "grad_norm": 0.807680924946579, + "learning_rate": 1.7525197407113997e-06, + "loss": 0.8810869455337524, + "step": 2322 + }, + { + "epoch": 0.5352534562211981, + "grad_norm": 1.0672299468188131, + "learning_rate": 1.7522687817007742e-06, + "loss": 0.8445242643356323, + "step": 2323 + }, + { + "epoch": 0.535483870967742, + "grad_norm": 1.1338085945775938, + "learning_rate": 1.7520177134993311e-06, + "loss": 0.9602948427200317, + "step": 2324 + }, + { + "epoch": 0.5357142857142857, + "grad_norm": 0.7789379367396811, + "learning_rate": 1.7517665361435126e-06, + "loss": 0.7865237593650818, + "step": 2325 + }, + { + "epoch": 0.5359447004608295, + "grad_norm": 0.8870578602537817, + "learning_rate": 1.7515152496697763e-06, + "loss": 0.8062880039215088, + "step": 2326 + }, + { + "epoch": 0.5361751152073733, + "grad_norm": 0.9742037408160464, + "learning_rate": 1.7512638541145966e-06, + "loss": 0.8386664986610413, + "step": 2327 + }, + { + "epoch": 0.5364055299539171, + "grad_norm": 1.0154937609139327, + "learning_rate": 1.7510123495144629e-06, + "loss": 0.973692774772644, + "step": 2328 + }, + { + "epoch": 0.5366359447004608, + "grad_norm": 0.9023959356834507, + "learning_rate": 1.7507607359058808e-06, + "loss": 0.8250089883804321, + "step": 2329 + }, + { + "epoch": 0.5368663594470046, + "grad_norm": 0.8457870176131529, + "learning_rate": 1.750509013325372e-06, + "loss": 0.8578102588653564, + "step": 2330 + }, + { + "epoch": 0.5370967741935484, + "grad_norm": 0.8804595958614453, + "learning_rate": 1.7502571818094732e-06, + "loss": 0.916475236415863, + "step": 2331 + }, + { + "epoch": 0.5373271889400921, + "grad_norm": 0.9225430635370255, + "learning_rate": 1.7500052413947377e-06, + "loss": 0.8210046291351318, + "step": 2332 + }, + { + "epoch": 0.537557603686636, + "grad_norm": 0.7091387099201478, + "learning_rate": 1.7497531921177344e-06, + "loss": 0.816267728805542, + "step": 2333 + }, + { + "epoch": 0.5377880184331797, + "grad_norm": 0.9764630645457667, + "learning_rate": 1.7495010340150478e-06, + "loss": 1.0091882944107056, + "step": 2334 + }, + { + "epoch": 0.5380184331797235, + "grad_norm": 0.982812584725329, + "learning_rate": 1.7492487671232783e-06, + "loss": 0.7549277544021606, + "step": 2335 + }, + { + "epoch": 0.5382488479262673, + "grad_norm": 0.8589431412898547, + "learning_rate": 1.7489963914790423e-06, + "loss": 0.9584934711456299, + "step": 2336 + }, + { + "epoch": 0.5384792626728111, + "grad_norm": 0.7167225081500926, + "learning_rate": 1.7487439071189713e-06, + "loss": 0.8189069628715515, + "step": 2337 + }, + { + "epoch": 0.5387096774193548, + "grad_norm": 0.976466384445042, + "learning_rate": 1.7484913140797138e-06, + "loss": 0.7529993057250977, + "step": 2338 + }, + { + "epoch": 0.5389400921658987, + "grad_norm": 0.9894954868399615, + "learning_rate": 1.7482386123979324e-06, + "loss": 0.8611496686935425, + "step": 2339 + }, + { + "epoch": 0.5391705069124424, + "grad_norm": 1.2753256885249857, + "learning_rate": 1.7479858021103074e-06, + "loss": 0.9400241374969482, + "step": 2340 + }, + { + "epoch": 0.5394009216589861, + "grad_norm": 0.7513824016722385, + "learning_rate": 1.7477328832535332e-06, + "loss": 0.6686737537384033, + "step": 2341 + }, + { + "epoch": 0.53963133640553, + "grad_norm": 0.7834119073150019, + "learning_rate": 1.747479855864321e-06, + "loss": 0.864795982837677, + "step": 2342 + }, + { + "epoch": 0.5398617511520737, + "grad_norm": 0.9942068845664563, + "learning_rate": 1.7472267199793971e-06, + "loss": 0.9579563140869141, + "step": 2343 + }, + { + "epoch": 0.5400921658986175, + "grad_norm": 0.9464284115225821, + "learning_rate": 1.746973475635504e-06, + "loss": 0.7492884397506714, + "step": 2344 + }, + { + "epoch": 0.5403225806451613, + "grad_norm": 1.1301826150440575, + "learning_rate": 1.7467201228694e-06, + "loss": 1.020420789718628, + "step": 2345 + }, + { + "epoch": 0.5405529953917051, + "grad_norm": 0.8996882097606888, + "learning_rate": 1.7464666617178585e-06, + "loss": 0.8277238011360168, + "step": 2346 + }, + { + "epoch": 0.5407834101382488, + "grad_norm": 0.8343415166384458, + "learning_rate": 1.7462130922176694e-06, + "loss": 0.8160337209701538, + "step": 2347 + }, + { + "epoch": 0.5410138248847927, + "grad_norm": 0.940177897473061, + "learning_rate": 1.7459594144056378e-06, + "loss": 0.8742454648017883, + "step": 2348 + }, + { + "epoch": 0.5412442396313364, + "grad_norm": 0.8263630155636004, + "learning_rate": 1.7457056283185847e-06, + "loss": 0.7987914085388184, + "step": 2349 + }, + { + "epoch": 0.5414746543778802, + "grad_norm": 0.8096196719588583, + "learning_rate": 1.7454517339933467e-06, + "loss": 0.6917734146118164, + "step": 2350 + }, + { + "epoch": 0.541705069124424, + "grad_norm": 0.9860357050478065, + "learning_rate": 1.7451977314667763e-06, + "loss": 0.8338258266448975, + "step": 2351 + }, + { + "epoch": 0.5419354838709678, + "grad_norm": 0.6906626367704619, + "learning_rate": 1.7449436207757418e-06, + "loss": 0.8308743238449097, + "step": 2352 + }, + { + "epoch": 0.5421658986175115, + "grad_norm": 0.7126371911422212, + "learning_rate": 1.744689401957127e-06, + "loss": 0.7843145728111267, + "step": 2353 + }, + { + "epoch": 0.5423963133640552, + "grad_norm": 0.6637904176126797, + "learning_rate": 1.7444350750478314e-06, + "loss": 0.9088687896728516, + "step": 2354 + }, + { + "epoch": 0.5426267281105991, + "grad_norm": 1.1601519737508017, + "learning_rate": 1.74418064008477e-06, + "loss": 0.876841127872467, + "step": 2355 + }, + { + "epoch": 0.5428571428571428, + "grad_norm": 0.804702758707697, + "learning_rate": 1.743926097104874e-06, + "loss": 0.7169051170349121, + "step": 2356 + }, + { + "epoch": 0.5430875576036867, + "grad_norm": 0.8414445338031196, + "learning_rate": 1.7436714461450897e-06, + "loss": 0.7979093194007874, + "step": 2357 + }, + { + "epoch": 0.5433179723502304, + "grad_norm": 0.796767744969521, + "learning_rate": 1.7434166872423795e-06, + "loss": 0.9152545928955078, + "step": 2358 + }, + { + "epoch": 0.5435483870967742, + "grad_norm": 0.8612716514728646, + "learning_rate": 1.7431618204337212e-06, + "loss": 0.8968983888626099, + "step": 2359 + }, + { + "epoch": 0.543778801843318, + "grad_norm": 0.7451796864953032, + "learning_rate": 1.7429068457561086e-06, + "loss": 0.7591085433959961, + "step": 2360 + }, + { + "epoch": 0.5440092165898618, + "grad_norm": 0.8434007797764556, + "learning_rate": 1.7426517632465508e-06, + "loss": 0.6931861639022827, + "step": 2361 + }, + { + "epoch": 0.5442396313364055, + "grad_norm": 0.816030716232177, + "learning_rate": 1.7423965729420729e-06, + "loss": 0.7715095281600952, + "step": 2362 + }, + { + "epoch": 0.5444700460829494, + "grad_norm": 0.7333839549943538, + "learning_rate": 1.742141274879715e-06, + "loss": 0.8282119035720825, + "step": 2363 + }, + { + "epoch": 0.5447004608294931, + "grad_norm": 0.8282161479585932, + "learning_rate": 1.7418858690965337e-06, + "loss": 0.7595704197883606, + "step": 2364 + }, + { + "epoch": 0.5449308755760369, + "grad_norm": 0.8861519618227073, + "learning_rate": 1.7416303556296005e-06, + "loss": 0.8738422393798828, + "step": 2365 + }, + { + "epoch": 0.5451612903225806, + "grad_norm": 0.819062403403448, + "learning_rate": 1.741374734516003e-06, + "loss": 0.8399837017059326, + "step": 2366 + }, + { + "epoch": 0.5453917050691244, + "grad_norm": 0.9147252373002325, + "learning_rate": 1.7411190057928442e-06, + "loss": 0.8213151693344116, + "step": 2367 + }, + { + "epoch": 0.5456221198156682, + "grad_norm": 0.862161359681962, + "learning_rate": 1.740863169497243e-06, + "loss": 0.748835563659668, + "step": 2368 + }, + { + "epoch": 0.5458525345622119, + "grad_norm": 0.6925915187477067, + "learning_rate": 1.7406072256663333e-06, + "loss": 0.9222339391708374, + "step": 2369 + }, + { + "epoch": 0.5460829493087558, + "grad_norm": 0.6352006169320189, + "learning_rate": 1.7403511743372655e-06, + "loss": 0.6543160676956177, + "step": 2370 + }, + { + "epoch": 0.5463133640552995, + "grad_norm": 0.9993386394035012, + "learning_rate": 1.7400950155472046e-06, + "loss": 0.9828567504882812, + "step": 2371 + }, + { + "epoch": 0.5465437788018433, + "grad_norm": 0.9620494284169527, + "learning_rate": 1.739838749333332e-06, + "loss": 0.95346599817276, + "step": 2372 + }, + { + "epoch": 0.5467741935483871, + "grad_norm": 0.4533946729074916, + "learning_rate": 1.7395823757328442e-06, + "loss": 0.626889705657959, + "step": 2373 + }, + { + "epoch": 0.5470046082949309, + "grad_norm": 0.6641652944774505, + "learning_rate": 1.739325894782954e-06, + "loss": 0.8152071833610535, + "step": 2374 + }, + { + "epoch": 0.5472350230414746, + "grad_norm": 0.7149653321076401, + "learning_rate": 1.7390693065208889e-06, + "loss": 0.8244980573654175, + "step": 2375 + }, + { + "epoch": 0.5474654377880185, + "grad_norm": 0.8801604517186058, + "learning_rate": 1.738812610983892e-06, + "loss": 0.8234372138977051, + "step": 2376 + }, + { + "epoch": 0.5476958525345622, + "grad_norm": 0.8626749383303203, + "learning_rate": 1.7385558082092228e-06, + "loss": 0.9334712624549866, + "step": 2377 + }, + { + "epoch": 0.5479262672811059, + "grad_norm": 0.8866496689156442, + "learning_rate": 1.7382988982341557e-06, + "loss": 0.7873882055282593, + "step": 2378 + }, + { + "epoch": 0.5481566820276498, + "grad_norm": 0.7814140858155267, + "learning_rate": 1.7380418810959814e-06, + "loss": 0.7971000671386719, + "step": 2379 + }, + { + "epoch": 0.5483870967741935, + "grad_norm": 0.7452714019733373, + "learning_rate": 1.7377847568320046e-06, + "loss": 0.8617004156112671, + "step": 2380 + }, + { + "epoch": 0.5486175115207373, + "grad_norm": 0.7316280745753603, + "learning_rate": 1.7375275254795472e-06, + "loss": 0.6798374056816101, + "step": 2381 + }, + { + "epoch": 0.5488479262672811, + "grad_norm": 0.8600424341995414, + "learning_rate": 1.7372701870759459e-06, + "loss": 0.8621633052825928, + "step": 2382 + }, + { + "epoch": 0.5490783410138249, + "grad_norm": 0.78685909041996, + "learning_rate": 1.7370127416585527e-06, + "loss": 0.6533470153808594, + "step": 2383 + }, + { + "epoch": 0.5493087557603686, + "grad_norm": 0.9199843580999427, + "learning_rate": 1.736755189264736e-06, + "loss": 0.8854461908340454, + "step": 2384 + }, + { + "epoch": 0.5495391705069125, + "grad_norm": 1.0020485772603467, + "learning_rate": 1.7364975299318786e-06, + "loss": 0.9461240768432617, + "step": 2385 + }, + { + "epoch": 0.5497695852534562, + "grad_norm": 1.0179837516521926, + "learning_rate": 1.73623976369738e-06, + "loss": 0.8936882019042969, + "step": 2386 + }, + { + "epoch": 0.55, + "grad_norm": 0.7527230779520249, + "learning_rate": 1.7359818905986544e-06, + "loss": 0.8177640438079834, + "step": 2387 + }, + { + "epoch": 0.5502304147465438, + "grad_norm": 0.7539178622826256, + "learning_rate": 1.7357239106731317e-06, + "loss": 0.793328046798706, + "step": 2388 + }, + { + "epoch": 0.5504608294930876, + "grad_norm": 0.8548599569350254, + "learning_rate": 1.7354658239582572e-06, + "loss": 0.8837069272994995, + "step": 2389 + }, + { + "epoch": 0.5506912442396313, + "grad_norm": 0.8764277126116193, + "learning_rate": 1.7352076304914918e-06, + "loss": 0.8801138401031494, + "step": 2390 + }, + { + "epoch": 0.5509216589861751, + "grad_norm": 0.7981260720892804, + "learning_rate": 1.7349493303103123e-06, + "loss": 0.865073025226593, + "step": 2391 + }, + { + "epoch": 0.5511520737327189, + "grad_norm": 0.5938962289027067, + "learning_rate": 1.7346909234522107e-06, + "loss": 0.8712339401245117, + "step": 2392 + }, + { + "epoch": 0.5513824884792626, + "grad_norm": 0.6857068624612402, + "learning_rate": 1.7344324099546938e-06, + "loss": 0.7689294815063477, + "step": 2393 + }, + { + "epoch": 0.5516129032258065, + "grad_norm": 0.6784843872797971, + "learning_rate": 1.7341737898552851e-06, + "loss": 0.9228999614715576, + "step": 2394 + }, + { + "epoch": 0.5518433179723502, + "grad_norm": 1.025443261317525, + "learning_rate": 1.7339150631915228e-06, + "loss": 0.9473327398300171, + "step": 2395 + }, + { + "epoch": 0.552073732718894, + "grad_norm": 0.9317831571882359, + "learning_rate": 1.7336562300009604e-06, + "loss": 0.7724621295928955, + "step": 2396 + }, + { + "epoch": 0.5523041474654378, + "grad_norm": 0.7823556125482615, + "learning_rate": 1.7333972903211675e-06, + "loss": 0.8646600246429443, + "step": 2397 + }, + { + "epoch": 0.5525345622119816, + "grad_norm": 0.6673069571562762, + "learning_rate": 1.7331382441897286e-06, + "loss": 0.7143402099609375, + "step": 2398 + }, + { + "epoch": 0.5527649769585253, + "grad_norm": 0.9600129950475998, + "learning_rate": 1.7328790916442446e-06, + "loss": 0.8229624032974243, + "step": 2399 + }, + { + "epoch": 0.5529953917050692, + "grad_norm": 0.8815652742153803, + "learning_rate": 1.7326198327223303e-06, + "loss": 0.7244875431060791, + "step": 2400 + }, + { + "epoch": 0.5532258064516129, + "grad_norm": 0.8586401947703556, + "learning_rate": 1.7323604674616173e-06, + "loss": 0.7797688245773315, + "step": 2401 + }, + { + "epoch": 0.5534562211981566, + "grad_norm": 0.7923271764392044, + "learning_rate": 1.7321009958997519e-06, + "loss": 0.752421498298645, + "step": 2402 + }, + { + "epoch": 0.5536866359447005, + "grad_norm": 0.880725843060538, + "learning_rate": 1.7318414180743962e-06, + "loss": 0.8285892009735107, + "step": 2403 + }, + { + "epoch": 0.5539170506912442, + "grad_norm": 0.7844500606150882, + "learning_rate": 1.7315817340232272e-06, + "loss": 0.8247888088226318, + "step": 2404 + }, + { + "epoch": 0.554147465437788, + "grad_norm": 0.7041289847587934, + "learning_rate": 1.7313219437839384e-06, + "loss": 0.7713418006896973, + "step": 2405 + }, + { + "epoch": 0.5543778801843318, + "grad_norm": 0.8575067968238488, + "learning_rate": 1.7310620473942374e-06, + "loss": 0.8748825788497925, + "step": 2406 + }, + { + "epoch": 0.5546082949308756, + "grad_norm": 0.899949436927101, + "learning_rate": 1.730802044891848e-06, + "loss": 0.9255902767181396, + "step": 2407 + }, + { + "epoch": 0.5548387096774193, + "grad_norm": 0.7968868837370462, + "learning_rate": 1.7305419363145093e-06, + "loss": 0.7226976156234741, + "step": 2408 + }, + { + "epoch": 0.5550691244239632, + "grad_norm": 0.8868777191693532, + "learning_rate": 1.7302817216999754e-06, + "loss": 0.9024704694747925, + "step": 2409 + }, + { + "epoch": 0.5552995391705069, + "grad_norm": 0.8331382998314191, + "learning_rate": 1.7300214010860168e-06, + "loss": 0.7857767343521118, + "step": 2410 + }, + { + "epoch": 0.5555299539170507, + "grad_norm": 0.7111146090264087, + "learning_rate": 1.7297609745104183e-06, + "loss": 0.7280064821243286, + "step": 2411 + }, + { + "epoch": 0.5557603686635945, + "grad_norm": 0.8916895272866717, + "learning_rate": 1.72950044201098e-06, + "loss": 0.8909369111061096, + "step": 2412 + }, + { + "epoch": 0.5559907834101383, + "grad_norm": 0.8724458169518867, + "learning_rate": 1.7292398036255183e-06, + "loss": 0.8543871641159058, + "step": 2413 + }, + { + "epoch": 0.556221198156682, + "grad_norm": 0.7364121573266219, + "learning_rate": 1.7289790593918648e-06, + "loss": 0.6934928894042969, + "step": 2414 + }, + { + "epoch": 0.5564516129032258, + "grad_norm": 0.7288921937743348, + "learning_rate": 1.7287182093478658e-06, + "loss": 0.6323058605194092, + "step": 2415 + }, + { + "epoch": 0.5566820276497696, + "grad_norm": 0.9203399963548066, + "learning_rate": 1.7284572535313833e-06, + "loss": 0.8607437014579773, + "step": 2416 + }, + { + "epoch": 0.5569124423963133, + "grad_norm": 0.8312318653257402, + "learning_rate": 1.7281961919802948e-06, + "loss": 0.932594358921051, + "step": 2417 + }, + { + "epoch": 0.5571428571428572, + "grad_norm": 0.8132622554262421, + "learning_rate": 1.727935024732493e-06, + "loss": 0.7239062786102295, + "step": 2418 + }, + { + "epoch": 0.5573732718894009, + "grad_norm": 0.770772581447816, + "learning_rate": 1.727673751825886e-06, + "loss": 0.7600498199462891, + "step": 2419 + }, + { + "epoch": 0.5576036866359447, + "grad_norm": 0.9553759629640377, + "learning_rate": 1.7274123732983977e-06, + "loss": 0.6888710260391235, + "step": 2420 + }, + { + "epoch": 0.5578341013824885, + "grad_norm": 0.9472816188704319, + "learning_rate": 1.7271508891879657e-06, + "loss": 0.9768370389938354, + "step": 2421 + }, + { + "epoch": 0.5580645161290323, + "grad_norm": 0.7612474564207412, + "learning_rate": 1.7268892995325453e-06, + "loss": 0.7302272319793701, + "step": 2422 + }, + { + "epoch": 0.558294930875576, + "grad_norm": 0.952809818405442, + "learning_rate": 1.7266276043701052e-06, + "loss": 0.7664496898651123, + "step": 2423 + }, + { + "epoch": 0.5585253456221199, + "grad_norm": 0.7105308716985692, + "learning_rate": 1.72636580373863e-06, + "loss": 0.7672723531723022, + "step": 2424 + }, + { + "epoch": 0.5587557603686636, + "grad_norm": 0.9094827818764729, + "learning_rate": 1.7261038976761203e-06, + "loss": 0.7467625141143799, + "step": 2425 + }, + { + "epoch": 0.5589861751152074, + "grad_norm": 1.0609555724090778, + "learning_rate": 1.7258418862205908e-06, + "loss": 0.899692177772522, + "step": 2426 + }, + { + "epoch": 0.5592165898617512, + "grad_norm": 0.8726314105037919, + "learning_rate": 1.7255797694100724e-06, + "loss": 0.9654138088226318, + "step": 2427 + }, + { + "epoch": 0.5594470046082949, + "grad_norm": 1.0261431779245342, + "learning_rate": 1.725317547282611e-06, + "loss": 0.8487396836280823, + "step": 2428 + }, + { + "epoch": 0.5596774193548387, + "grad_norm": 0.7692614118612008, + "learning_rate": 1.7250552198762682e-06, + "loss": 0.7785199284553528, + "step": 2429 + }, + { + "epoch": 0.5599078341013825, + "grad_norm": 0.7931069179642137, + "learning_rate": 1.7247927872291198e-06, + "loss": 0.9243934750556946, + "step": 2430 + }, + { + "epoch": 0.5601382488479263, + "grad_norm": 0.6935679959823647, + "learning_rate": 1.724530249379258e-06, + "loss": 0.8674443960189819, + "step": 2431 + }, + { + "epoch": 0.56036866359447, + "grad_norm": 0.7564063858493598, + "learning_rate": 1.7242676063647895e-06, + "loss": 0.8022270202636719, + "step": 2432 + }, + { + "epoch": 0.5605990783410139, + "grad_norm": 0.8222900385869091, + "learning_rate": 1.7240048582238367e-06, + "loss": 0.8696796894073486, + "step": 2433 + }, + { + "epoch": 0.5608294930875576, + "grad_norm": 0.8560234672396506, + "learning_rate": 1.7237420049945374e-06, + "loss": 0.7752439975738525, + "step": 2434 + }, + { + "epoch": 0.5610599078341014, + "grad_norm": 0.9286340475505503, + "learning_rate": 1.723479046715044e-06, + "loss": 0.7660201787948608, + "step": 2435 + }, + { + "epoch": 0.5612903225806452, + "grad_norm": 0.7639410477119124, + "learning_rate": 1.7232159834235249e-06, + "loss": 0.9319918155670166, + "step": 2436 + }, + { + "epoch": 0.561520737327189, + "grad_norm": 0.8121463742755932, + "learning_rate": 1.722952815158163e-06, + "loss": 0.8175421357154846, + "step": 2437 + }, + { + "epoch": 0.5617511520737327, + "grad_norm": 0.5646145066796834, + "learning_rate": 1.7226895419571573e-06, + "loss": 0.6959598064422607, + "step": 2438 + }, + { + "epoch": 0.5619815668202764, + "grad_norm": 0.9804875774075569, + "learning_rate": 1.722426163858721e-06, + "loss": 0.8629111051559448, + "step": 2439 + }, + { + "epoch": 0.5622119815668203, + "grad_norm": 1.1148628556143985, + "learning_rate": 1.7221626809010833e-06, + "loss": 0.8222612142562866, + "step": 2440 + }, + { + "epoch": 0.562442396313364, + "grad_norm": 0.7126052614291007, + "learning_rate": 1.721899093122489e-06, + "loss": 0.8329352140426636, + "step": 2441 + }, + { + "epoch": 0.5626728110599079, + "grad_norm": 0.7803804718208336, + "learning_rate": 1.7216354005611966e-06, + "loss": 0.8777236938476562, + "step": 2442 + }, + { + "epoch": 0.5629032258064516, + "grad_norm": 0.8601336969746237, + "learning_rate": 1.7213716032554814e-06, + "loss": 0.8487246036529541, + "step": 2443 + }, + { + "epoch": 0.5631336405529954, + "grad_norm": 0.9035051311861264, + "learning_rate": 1.7211077012436327e-06, + "loss": 0.8429645299911499, + "step": 2444 + }, + { + "epoch": 0.5633640552995391, + "grad_norm": 0.9883668092610399, + "learning_rate": 1.720843694563956e-06, + "loss": 0.7683241367340088, + "step": 2445 + }, + { + "epoch": 0.563594470046083, + "grad_norm": 0.839045001132387, + "learning_rate": 1.7205795832547715e-06, + "loss": 0.8468153476715088, + "step": 2446 + }, + { + "epoch": 0.5638248847926267, + "grad_norm": 0.7865527461309724, + "learning_rate": 1.7203153673544136e-06, + "loss": 0.7957276105880737, + "step": 2447 + }, + { + "epoch": 0.5640552995391706, + "grad_norm": 0.7301149604369097, + "learning_rate": 1.7200510469012343e-06, + "loss": 0.703586757183075, + "step": 2448 + }, + { + "epoch": 0.5642857142857143, + "grad_norm": 0.9237896103754119, + "learning_rate": 1.7197866219335988e-06, + "loss": 0.8399583101272583, + "step": 2449 + }, + { + "epoch": 0.5645161290322581, + "grad_norm": 0.9147331037465749, + "learning_rate": 1.7195220924898882e-06, + "loss": 0.8198127746582031, + "step": 2450 + }, + { + "epoch": 0.5647465437788018, + "grad_norm": 0.8751939719560463, + "learning_rate": 1.7192574586084977e-06, + "loss": 0.8345620632171631, + "step": 2451 + }, + { + "epoch": 0.5649769585253456, + "grad_norm": 0.5798955427424709, + "learning_rate": 1.71899272032784e-06, + "loss": 0.7717207670211792, + "step": 2452 + }, + { + "epoch": 0.5652073732718894, + "grad_norm": 1.0279650439820616, + "learning_rate": 1.7187278776863402e-06, + "loss": 0.9178022146224976, + "step": 2453 + }, + { + "epoch": 0.5654377880184331, + "grad_norm": 0.8586126622693072, + "learning_rate": 1.7184629307224405e-06, + "loss": 0.802221417427063, + "step": 2454 + }, + { + "epoch": 0.565668202764977, + "grad_norm": 0.9691589621671786, + "learning_rate": 1.718197879474598e-06, + "loss": 0.8785420656204224, + "step": 2455 + }, + { + "epoch": 0.5658986175115207, + "grad_norm": 0.8087978885886937, + "learning_rate": 1.7179327239812835e-06, + "loss": 0.866797924041748, + "step": 2456 + }, + { + "epoch": 0.5661290322580645, + "grad_norm": 0.7850858892434726, + "learning_rate": 1.7176674642809848e-06, + "loss": 0.8483223915100098, + "step": 2457 + }, + { + "epoch": 0.5663594470046083, + "grad_norm": 0.7634922973789945, + "learning_rate": 1.7174021004122038e-06, + "loss": 0.815066933631897, + "step": 2458 + }, + { + "epoch": 0.5665898617511521, + "grad_norm": 0.7286124953848899, + "learning_rate": 1.7171366324134575e-06, + "loss": 0.8584767580032349, + "step": 2459 + }, + { + "epoch": 0.5668202764976958, + "grad_norm": 0.8250445352678845, + "learning_rate": 1.7168710603232783e-06, + "loss": 0.8710953593254089, + "step": 2460 + }, + { + "epoch": 0.5670506912442397, + "grad_norm": 0.9434416859632441, + "learning_rate": 1.7166053841802137e-06, + "loss": 0.8174586892127991, + "step": 2461 + }, + { + "epoch": 0.5672811059907834, + "grad_norm": 0.8270311207697365, + "learning_rate": 1.7163396040228263e-06, + "loss": 0.7240795493125916, + "step": 2462 + }, + { + "epoch": 0.5675115207373271, + "grad_norm": 0.9011815170935621, + "learning_rate": 1.7160737198896938e-06, + "loss": 0.8026313781738281, + "step": 2463 + }, + { + "epoch": 0.567741935483871, + "grad_norm": 0.906377679717593, + "learning_rate": 1.7158077318194088e-06, + "loss": 0.8170863389968872, + "step": 2464 + }, + { + "epoch": 0.5679723502304147, + "grad_norm": 0.7708394273236241, + "learning_rate": 1.7155416398505794e-06, + "loss": 0.7524861097335815, + "step": 2465 + }, + { + "epoch": 0.5682027649769585, + "grad_norm": 1.053627484653556, + "learning_rate": 1.7152754440218278e-06, + "loss": 0.9895739555358887, + "step": 2466 + }, + { + "epoch": 0.5684331797235023, + "grad_norm": 0.8044893250734789, + "learning_rate": 1.7150091443717924e-06, + "loss": 0.840786874294281, + "step": 2467 + }, + { + "epoch": 0.5686635944700461, + "grad_norm": 0.7235386782272144, + "learning_rate": 1.7147427409391265e-06, + "loss": 0.8896929025650024, + "step": 2468 + }, + { + "epoch": 0.5688940092165898, + "grad_norm": 0.930785639448215, + "learning_rate": 1.714476233762498e-06, + "loss": 0.9940589666366577, + "step": 2469 + }, + { + "epoch": 0.5691244239631337, + "grad_norm": 0.8541894175832414, + "learning_rate": 1.7142096228805896e-06, + "loss": 0.8827046155929565, + "step": 2470 + }, + { + "epoch": 0.5693548387096774, + "grad_norm": 0.8477738552913107, + "learning_rate": 1.7139429083321003e-06, + "loss": 0.8402417302131653, + "step": 2471 + }, + { + "epoch": 0.5695852534562212, + "grad_norm": 1.0681644319875638, + "learning_rate": 1.7136760901557428e-06, + "loss": 0.9298208951950073, + "step": 2472 + }, + { + "epoch": 0.569815668202765, + "grad_norm": 0.799198798955049, + "learning_rate": 1.7134091683902456e-06, + "loss": 0.7272841930389404, + "step": 2473 + }, + { + "epoch": 0.5700460829493088, + "grad_norm": 0.9504491625382946, + "learning_rate": 1.7131421430743522e-06, + "loss": 0.7767274379730225, + "step": 2474 + }, + { + "epoch": 0.5702764976958525, + "grad_norm": 0.8321899881110706, + "learning_rate": 1.7128750142468205e-06, + "loss": 0.8381883502006531, + "step": 2475 + }, + { + "epoch": 0.5705069124423963, + "grad_norm": 0.722993858034587, + "learning_rate": 1.7126077819464247e-06, + "loss": 0.6917109489440918, + "step": 2476 + }, + { + "epoch": 0.5707373271889401, + "grad_norm": 0.8529687693157456, + "learning_rate": 1.712340446211952e-06, + "loss": 0.848122239112854, + "step": 2477 + }, + { + "epoch": 0.5709677419354838, + "grad_norm": 0.8115142651418973, + "learning_rate": 1.7120730070822074e-06, + "loss": 0.7880194187164307, + "step": 2478 + }, + { + "epoch": 0.5711981566820277, + "grad_norm": 0.7900923038142705, + "learning_rate": 1.7118054645960077e-06, + "loss": 0.8782297372817993, + "step": 2479 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 0.8386744568018749, + "learning_rate": 1.7115378187921876e-06, + "loss": 0.9030005931854248, + "step": 2480 + }, + { + "epoch": 0.5716589861751152, + "grad_norm": 1.0512780177061767, + "learning_rate": 1.7112700697095953e-06, + "loss": 0.9950683116912842, + "step": 2481 + }, + { + "epoch": 0.571889400921659, + "grad_norm": 0.7851257012482162, + "learning_rate": 1.7110022173870933e-06, + "loss": 0.8825187683105469, + "step": 2482 + }, + { + "epoch": 0.5721198156682028, + "grad_norm": 0.7742449968104124, + "learning_rate": 1.710734261863561e-06, + "loss": 0.7918775081634521, + "step": 2483 + }, + { + "epoch": 0.5723502304147465, + "grad_norm": 0.8385191739759446, + "learning_rate": 1.7104662031778916e-06, + "loss": 1.0219467878341675, + "step": 2484 + }, + { + "epoch": 0.5725806451612904, + "grad_norm": 0.7273611559924746, + "learning_rate": 1.7101980413689931e-06, + "loss": 0.7633316516876221, + "step": 2485 + }, + { + "epoch": 0.5728110599078341, + "grad_norm": 0.9207367628977638, + "learning_rate": 1.7099297764757891e-06, + "loss": 0.8972171545028687, + "step": 2486 + }, + { + "epoch": 0.5730414746543778, + "grad_norm": 0.9268590747994748, + "learning_rate": 1.7096614085372183e-06, + "loss": 0.9467268586158752, + "step": 2487 + }, + { + "epoch": 0.5732718894009217, + "grad_norm": 0.6697903314360253, + "learning_rate": 1.709392937592233e-06, + "loss": 0.7688668370246887, + "step": 2488 + }, + { + "epoch": 0.5735023041474654, + "grad_norm": 0.9069250629096394, + "learning_rate": 1.7091243636798022e-06, + "loss": 0.8521163463592529, + "step": 2489 + }, + { + "epoch": 0.5737327188940092, + "grad_norm": 1.1876566208797892, + "learning_rate": 1.7088556868389087e-06, + "loss": 0.937403678894043, + "step": 2490 + }, + { + "epoch": 0.573963133640553, + "grad_norm": 0.7484200220587712, + "learning_rate": 1.7085869071085507e-06, + "loss": 0.929175853729248, + "step": 2491 + }, + { + "epoch": 0.5741935483870968, + "grad_norm": 0.75868423962596, + "learning_rate": 1.708318024527741e-06, + "loss": 0.8213154673576355, + "step": 2492 + }, + { + "epoch": 0.5744239631336405, + "grad_norm": 0.8570973138589657, + "learning_rate": 1.708049039135508e-06, + "loss": 0.7666962146759033, + "step": 2493 + }, + { + "epoch": 0.5746543778801844, + "grad_norm": 0.944726193523685, + "learning_rate": 1.707779950970894e-06, + "loss": 0.9787846803665161, + "step": 2494 + }, + { + "epoch": 0.5748847926267281, + "grad_norm": 0.9499725243145639, + "learning_rate": 1.7075107600729575e-06, + "loss": 0.9688804149627686, + "step": 2495 + }, + { + "epoch": 0.5751152073732719, + "grad_norm": 0.7169812071362754, + "learning_rate": 1.7072414664807706e-06, + "loss": 0.7186019420623779, + "step": 2496 + }, + { + "epoch": 0.5753456221198157, + "grad_norm": 0.8737696103531859, + "learning_rate": 1.706972070233421e-06, + "loss": 0.814068615436554, + "step": 2497 + }, + { + "epoch": 0.5755760368663595, + "grad_norm": 0.8930538892783126, + "learning_rate": 1.7067025713700111e-06, + "loss": 0.8439940214157104, + "step": 2498 + }, + { + "epoch": 0.5758064516129032, + "grad_norm": 1.0358274070142592, + "learning_rate": 1.706432969929659e-06, + "loss": 1.0199556350708008, + "step": 2499 + }, + { + "epoch": 0.576036866359447, + "grad_norm": 0.8418547467759998, + "learning_rate": 1.7061632659514964e-06, + "loss": 0.9422338008880615, + "step": 2500 + }, + { + "epoch": 0.5762672811059908, + "grad_norm": 0.8692517624840741, + "learning_rate": 1.7058934594746704e-06, + "loss": 0.9307081699371338, + "step": 2501 + }, + { + "epoch": 0.5764976958525345, + "grad_norm": 0.8121605874769848, + "learning_rate": 1.7056235505383433e-06, + "loss": 0.7202768325805664, + "step": 2502 + }, + { + "epoch": 0.5767281105990784, + "grad_norm": 0.915285295701684, + "learning_rate": 1.7053535391816923e-06, + "loss": 1.0184223651885986, + "step": 2503 + }, + { + "epoch": 0.5769585253456221, + "grad_norm": 0.8238573361353964, + "learning_rate": 1.7050834254439085e-06, + "loss": 0.7957574129104614, + "step": 2504 + }, + { + "epoch": 0.5771889400921659, + "grad_norm": 0.9632097611385487, + "learning_rate": 1.7048132093641989e-06, + "loss": 0.9694541096687317, + "step": 2505 + }, + { + "epoch": 0.5774193548387097, + "grad_norm": 0.7406781740567284, + "learning_rate": 1.704542890981785e-06, + "loss": 0.8427075147628784, + "step": 2506 + }, + { + "epoch": 0.5776497695852535, + "grad_norm": 0.7137957479223747, + "learning_rate": 1.7042724703359032e-06, + "loss": 0.7745763063430786, + "step": 2507 + }, + { + "epoch": 0.5778801843317972, + "grad_norm": 0.8935647722203462, + "learning_rate": 1.7040019474658047e-06, + "loss": 0.8179641962051392, + "step": 2508 + }, + { + "epoch": 0.5781105990783411, + "grad_norm": 0.9010033541227577, + "learning_rate": 1.7037313224107557e-06, + "loss": 0.8118200302124023, + "step": 2509 + }, + { + "epoch": 0.5783410138248848, + "grad_norm": 0.7297456575398072, + "learning_rate": 1.7034605952100364e-06, + "loss": 0.7892665863037109, + "step": 2510 + }, + { + "epoch": 0.5785714285714286, + "grad_norm": 0.736874372872981, + "learning_rate": 1.7031897659029434e-06, + "loss": 0.7442026734352112, + "step": 2511 + }, + { + "epoch": 0.5788018433179724, + "grad_norm": 0.9375581770522491, + "learning_rate": 1.7029188345287865e-06, + "loss": 0.8179585933685303, + "step": 2512 + }, + { + "epoch": 0.5790322580645161, + "grad_norm": 0.8710660194733852, + "learning_rate": 1.7026478011268918e-06, + "loss": 0.7569797039031982, + "step": 2513 + }, + { + "epoch": 0.5792626728110599, + "grad_norm": 0.8952615874674131, + "learning_rate": 1.7023766657365984e-06, + "loss": 0.8464581966400146, + "step": 2514 + }, + { + "epoch": 0.5794930875576036, + "grad_norm": 0.9645554070219402, + "learning_rate": 1.702105428397262e-06, + "loss": 0.7326645255088806, + "step": 2515 + }, + { + "epoch": 0.5797235023041475, + "grad_norm": 0.8243138835822689, + "learning_rate": 1.7018340891482522e-06, + "loss": 0.7993732690811157, + "step": 2516 + }, + { + "epoch": 0.5799539170506912, + "grad_norm": 0.7406582307230963, + "learning_rate": 1.7015626480289532e-06, + "loss": 0.8124513626098633, + "step": 2517 + }, + { + "epoch": 0.580184331797235, + "grad_norm": 0.7758431888553803, + "learning_rate": 1.701291105078765e-06, + "loss": 0.9075840711593628, + "step": 2518 + }, + { + "epoch": 0.5804147465437788, + "grad_norm": 0.8900052121004013, + "learning_rate": 1.7010194603371009e-06, + "loss": 0.8212069272994995, + "step": 2519 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.8737089153257858, + "learning_rate": 1.7007477138433903e-06, + "loss": 0.7582074999809265, + "step": 2520 + }, + { + "epoch": 0.5808755760368663, + "grad_norm": 0.7402264811343096, + "learning_rate": 1.7004758656370769e-06, + "loss": 0.8917636871337891, + "step": 2521 + }, + { + "epoch": 0.5811059907834102, + "grad_norm": 0.9496944008191128, + "learning_rate": 1.7002039157576186e-06, + "loss": 0.8919704556465149, + "step": 2522 + }, + { + "epoch": 0.5813364055299539, + "grad_norm": 0.8803733592170607, + "learning_rate": 1.699931864244489e-06, + "loss": 0.7474988698959351, + "step": 2523 + }, + { + "epoch": 0.5815668202764976, + "grad_norm": 0.9179665061824968, + "learning_rate": 1.6996597111371758e-06, + "loss": 0.8596241474151611, + "step": 2524 + }, + { + "epoch": 0.5817972350230415, + "grad_norm": 0.8260474861422493, + "learning_rate": 1.699387456475182e-06, + "loss": 0.9316335916519165, + "step": 2525 + }, + { + "epoch": 0.5820276497695852, + "grad_norm": 0.7937616616577486, + "learning_rate": 1.6991151002980248e-06, + "loss": 0.7364813089370728, + "step": 2526 + }, + { + "epoch": 0.582258064516129, + "grad_norm": 0.9072210580359311, + "learning_rate": 1.698842642645236e-06, + "loss": 0.789472758769989, + "step": 2527 + }, + { + "epoch": 0.5824884792626728, + "grad_norm": 0.9988239379820413, + "learning_rate": 1.6985700835563627e-06, + "loss": 1.024861216545105, + "step": 2528 + }, + { + "epoch": 0.5827188940092166, + "grad_norm": 0.9746619752287254, + "learning_rate": 1.6982974230709667e-06, + "loss": 0.8465025424957275, + "step": 2529 + }, + { + "epoch": 0.5829493087557603, + "grad_norm": 1.0146741583341603, + "learning_rate": 1.6980246612286244e-06, + "loss": 0.7502799034118652, + "step": 2530 + }, + { + "epoch": 0.5831797235023042, + "grad_norm": 0.866831185770848, + "learning_rate": 1.6977517980689264e-06, + "loss": 0.8019870519638062, + "step": 2531 + }, + { + "epoch": 0.5834101382488479, + "grad_norm": 0.783761351839215, + "learning_rate": 1.6974788336314788e-06, + "loss": 0.9048774242401123, + "step": 2532 + }, + { + "epoch": 0.5836405529953917, + "grad_norm": 0.8577409607010705, + "learning_rate": 1.6972057679559018e-06, + "loss": 0.8411067724227905, + "step": 2533 + }, + { + "epoch": 0.5838709677419355, + "grad_norm": 0.7158353942796929, + "learning_rate": 1.6969326010818304e-06, + "loss": 0.7399133443832397, + "step": 2534 + }, + { + "epoch": 0.5841013824884793, + "grad_norm": 0.7309631229110555, + "learning_rate": 1.6966593330489144e-06, + "loss": 0.7553995847702026, + "step": 2535 + }, + { + "epoch": 0.584331797235023, + "grad_norm": 0.7563702103772202, + "learning_rate": 1.6963859638968188e-06, + "loss": 0.8405054807662964, + "step": 2536 + }, + { + "epoch": 0.5845622119815668, + "grad_norm": 0.739785555800379, + "learning_rate": 1.6961124936652223e-06, + "loss": 0.7619640231132507, + "step": 2537 + }, + { + "epoch": 0.5847926267281106, + "grad_norm": 0.6189871014888121, + "learning_rate": 1.6958389223938187e-06, + "loss": 0.7785576581954956, + "step": 2538 + }, + { + "epoch": 0.5850230414746543, + "grad_norm": 1.0593569746028593, + "learning_rate": 1.695565250122317e-06, + "loss": 0.9230754375457764, + "step": 2539 + }, + { + "epoch": 0.5852534562211982, + "grad_norm": 0.9087046574881754, + "learning_rate": 1.69529147689044e-06, + "loss": 0.798599362373352, + "step": 2540 + }, + { + "epoch": 0.5854838709677419, + "grad_norm": 0.7546263570181881, + "learning_rate": 1.6950176027379253e-06, + "loss": 0.8491491079330444, + "step": 2541 + }, + { + "epoch": 0.5857142857142857, + "grad_norm": 0.9063392015432612, + "learning_rate": 1.694743627704526e-06, + "loss": 0.7906054854393005, + "step": 2542 + }, + { + "epoch": 0.5859447004608295, + "grad_norm": 0.8834118839199732, + "learning_rate": 1.6944695518300084e-06, + "loss": 0.8178746700286865, + "step": 2543 + }, + { + "epoch": 0.5861751152073733, + "grad_norm": 0.9444844508582247, + "learning_rate": 1.6941953751541552e-06, + "loss": 0.867972731590271, + "step": 2544 + }, + { + "epoch": 0.586405529953917, + "grad_norm": 0.8815618278989616, + "learning_rate": 1.6939210977167622e-06, + "loss": 0.8000613451004028, + "step": 2545 + }, + { + "epoch": 0.5866359447004609, + "grad_norm": 0.938056940810552, + "learning_rate": 1.6936467195576403e-06, + "loss": 0.8473562002182007, + "step": 2546 + }, + { + "epoch": 0.5868663594470046, + "grad_norm": 0.960324746454341, + "learning_rate": 1.6933722407166156e-06, + "loss": 0.971686065196991, + "step": 2547 + }, + { + "epoch": 0.5870967741935483, + "grad_norm": 0.718798566737211, + "learning_rate": 1.6930976612335276e-06, + "loss": 0.6679604053497314, + "step": 2548 + }, + { + "epoch": 0.5873271889400922, + "grad_norm": 0.8662288511956259, + "learning_rate": 1.692822981148232e-06, + "loss": 0.81952303647995, + "step": 2549 + }, + { + "epoch": 0.5875576036866359, + "grad_norm": 0.7171085968938, + "learning_rate": 1.6925482005005978e-06, + "loss": 0.8711779713630676, + "step": 2550 + }, + { + "epoch": 0.5877880184331797, + "grad_norm": 0.8419799604008648, + "learning_rate": 1.6922733193305093e-06, + "loss": 0.930451512336731, + "step": 2551 + }, + { + "epoch": 0.5880184331797235, + "grad_norm": 0.8349862719015169, + "learning_rate": 1.6919983376778647e-06, + "loss": 0.8435598611831665, + "step": 2552 + }, + { + "epoch": 0.5882488479262673, + "grad_norm": 0.8491940209701643, + "learning_rate": 1.6917232555825774e-06, + "loss": 0.8868621587753296, + "step": 2553 + }, + { + "epoch": 0.588479262672811, + "grad_norm": 0.7537041162487105, + "learning_rate": 1.6914480730845752e-06, + "loss": 0.6821786165237427, + "step": 2554 + }, + { + "epoch": 0.5887096774193549, + "grad_norm": 0.8487688242201222, + "learning_rate": 1.691172790223801e-06, + "loss": 0.7241402864456177, + "step": 2555 + }, + { + "epoch": 0.5889400921658986, + "grad_norm": 0.7422220828348832, + "learning_rate": 1.690897407040211e-06, + "loss": 0.7477490305900574, + "step": 2556 + }, + { + "epoch": 0.5891705069124424, + "grad_norm": 0.7636915444427955, + "learning_rate": 1.690621923573777e-06, + "loss": 0.7881484031677246, + "step": 2557 + }, + { + "epoch": 0.5894009216589862, + "grad_norm": 0.959692830610789, + "learning_rate": 1.6903463398644848e-06, + "loss": 0.8292979001998901, + "step": 2558 + }, + { + "epoch": 0.58963133640553, + "grad_norm": 0.711937804642515, + "learning_rate": 1.690070655952336e-06, + "loss": 0.7068917751312256, + "step": 2559 + }, + { + "epoch": 0.5898617511520737, + "grad_norm": 1.1143023950252693, + "learning_rate": 1.6897948718773443e-06, + "loss": 0.8907356262207031, + "step": 2560 + }, + { + "epoch": 0.5900921658986175, + "grad_norm": 0.7930222105996996, + "learning_rate": 1.6895189876795405e-06, + "loss": 0.7762824892997742, + "step": 2561 + }, + { + "epoch": 0.5903225806451613, + "grad_norm": 1.0922797891559575, + "learning_rate": 1.6892430033989685e-06, + "loss": 0.9682759046554565, + "step": 2562 + }, + { + "epoch": 0.590552995391705, + "grad_norm": 0.8231082510824629, + "learning_rate": 1.6889669190756866e-06, + "loss": 0.7594735622406006, + "step": 2563 + }, + { + "epoch": 0.5907834101382489, + "grad_norm": 0.8117866090414669, + "learning_rate": 1.6886907347497687e-06, + "loss": 0.8161605000495911, + "step": 2564 + }, + { + "epoch": 0.5910138248847926, + "grad_norm": 0.8557086150703954, + "learning_rate": 1.6884144504613023e-06, + "loss": 0.9390331506729126, + "step": 2565 + }, + { + "epoch": 0.5912442396313364, + "grad_norm": 0.9387748138594502, + "learning_rate": 1.68813806625039e-06, + "loss": 0.8895832300186157, + "step": 2566 + }, + { + "epoch": 0.5914746543778802, + "grad_norm": 0.8802161511936953, + "learning_rate": 1.687861582157148e-06, + "loss": 0.7779919505119324, + "step": 2567 + }, + { + "epoch": 0.591705069124424, + "grad_norm": 1.139110447936057, + "learning_rate": 1.687584998221708e-06, + "loss": 0.8974252343177795, + "step": 2568 + }, + { + "epoch": 0.5919354838709677, + "grad_norm": 0.8073269492940187, + "learning_rate": 1.687308314484216e-06, + "loss": 0.8487393856048584, + "step": 2569 + }, + { + "epoch": 0.5921658986175116, + "grad_norm": 0.8310515688854938, + "learning_rate": 1.6870315309848318e-06, + "loss": 0.8356295824050903, + "step": 2570 + }, + { + "epoch": 0.5923963133640553, + "grad_norm": 0.9033360313158958, + "learning_rate": 1.6867546477637307e-06, + "loss": 0.8180248737335205, + "step": 2571 + }, + { + "epoch": 0.5926267281105991, + "grad_norm": 0.6950974205275126, + "learning_rate": 1.6864776648611013e-06, + "loss": 0.8456830978393555, + "step": 2572 + }, + { + "epoch": 0.5928571428571429, + "grad_norm": 0.9039181033590447, + "learning_rate": 1.6862005823171476e-06, + "loss": 0.8378905057907104, + "step": 2573 + }, + { + "epoch": 0.5930875576036866, + "grad_norm": 0.835432630485808, + "learning_rate": 1.685923400172088e-06, + "loss": 0.8060408234596252, + "step": 2574 + }, + { + "epoch": 0.5933179723502304, + "grad_norm": 0.8354491785263655, + "learning_rate": 1.685646118466155e-06, + "loss": 0.7550709247589111, + "step": 2575 + }, + { + "epoch": 0.5935483870967742, + "grad_norm": 0.805260271869055, + "learning_rate": 1.6853687372395955e-06, + "loss": 0.8475208282470703, + "step": 2576 + }, + { + "epoch": 0.593778801843318, + "grad_norm": 1.0626255995304192, + "learning_rate": 1.6850912565326709e-06, + "loss": 0.8681533336639404, + "step": 2577 + }, + { + "epoch": 0.5940092165898617, + "grad_norm": 0.9000714044087056, + "learning_rate": 1.6848136763856573e-06, + "loss": 0.7756578922271729, + "step": 2578 + }, + { + "epoch": 0.5942396313364056, + "grad_norm": 1.1163759985623336, + "learning_rate": 1.6845359968388456e-06, + "loss": 0.8910564184188843, + "step": 2579 + }, + { + "epoch": 0.5944700460829493, + "grad_norm": 0.7484768523036672, + "learning_rate": 1.6842582179325397e-06, + "loss": 0.7293382883071899, + "step": 2580 + }, + { + "epoch": 0.5947004608294931, + "grad_norm": 0.8208214849988605, + "learning_rate": 1.6839803397070597e-06, + "loss": 0.8497427105903625, + "step": 2581 + }, + { + "epoch": 0.5949308755760369, + "grad_norm": 0.9124854441462121, + "learning_rate": 1.6837023622027386e-06, + "loss": 0.800891637802124, + "step": 2582 + }, + { + "epoch": 0.5951612903225807, + "grad_norm": 0.8887114325795745, + "learning_rate": 1.683424285459925e-06, + "loss": 0.889703631401062, + "step": 2583 + }, + { + "epoch": 0.5953917050691244, + "grad_norm": 0.83139201735135, + "learning_rate": 1.6831461095189808e-06, + "loss": 0.7500913143157959, + "step": 2584 + }, + { + "epoch": 0.5956221198156681, + "grad_norm": 0.8260167845821169, + "learning_rate": 1.6828678344202834e-06, + "loss": 0.8575263023376465, + "step": 2585 + }, + { + "epoch": 0.595852534562212, + "grad_norm": 0.8796083393133354, + "learning_rate": 1.6825894602042238e-06, + "loss": 0.7754372358322144, + "step": 2586 + }, + { + "epoch": 0.5960829493087557, + "grad_norm": 1.0529816523070568, + "learning_rate": 1.6823109869112074e-06, + "loss": 0.8861502408981323, + "step": 2587 + }, + { + "epoch": 0.5963133640552996, + "grad_norm": 0.7738036894554111, + "learning_rate": 1.6820324145816548e-06, + "loss": 0.725920557975769, + "step": 2588 + }, + { + "epoch": 0.5965437788018433, + "grad_norm": 0.7887605961214393, + "learning_rate": 1.6817537432559998e-06, + "loss": 0.6195499897003174, + "step": 2589 + }, + { + "epoch": 0.5967741935483871, + "grad_norm": 0.8405918169035362, + "learning_rate": 1.6814749729746918e-06, + "loss": 0.8757472038269043, + "step": 2590 + }, + { + "epoch": 0.5970046082949308, + "grad_norm": 0.8710168774832879, + "learning_rate": 1.6811961037781934e-06, + "loss": 0.8024059534072876, + "step": 2591 + }, + { + "epoch": 0.5972350230414747, + "grad_norm": 1.1763814328442668, + "learning_rate": 1.6809171357069825e-06, + "loss": 0.8397082090377808, + "step": 2592 + }, + { + "epoch": 0.5974654377880184, + "grad_norm": 0.8163820389720032, + "learning_rate": 1.6806380688015507e-06, + "loss": 0.7693872451782227, + "step": 2593 + }, + { + "epoch": 0.5976958525345623, + "grad_norm": 0.7668441612993817, + "learning_rate": 1.6803589031024043e-06, + "loss": 0.7918043732643127, + "step": 2594 + }, + { + "epoch": 0.597926267281106, + "grad_norm": 0.7951277033960863, + "learning_rate": 1.680079638650064e-06, + "loss": 0.8046969175338745, + "step": 2595 + }, + { + "epoch": 0.5981566820276498, + "grad_norm": 0.9724191958452253, + "learning_rate": 1.6798002754850643e-06, + "loss": 0.7889789938926697, + "step": 2596 + }, + { + "epoch": 0.5983870967741935, + "grad_norm": 0.8356070849986357, + "learning_rate": 1.6795208136479543e-06, + "loss": 0.874780535697937, + "step": 2597 + }, + { + "epoch": 0.5986175115207373, + "grad_norm": 0.8380940855873632, + "learning_rate": 1.679241253179298e-06, + "loss": 0.8728631734848022, + "step": 2598 + }, + { + "epoch": 0.5988479262672811, + "grad_norm": 0.7909132896338992, + "learning_rate": 1.678961594119673e-06, + "loss": 0.5940345525741577, + "step": 2599 + }, + { + "epoch": 0.5990783410138248, + "grad_norm": 0.7873638428289793, + "learning_rate": 1.6786818365096712e-06, + "loss": 0.8524528741836548, + "step": 2600 + }, + { + "epoch": 0.5993087557603687, + "grad_norm": 1.2099119623298256, + "learning_rate": 1.6784019803899e-06, + "loss": 1.0738554000854492, + "step": 2601 + }, + { + "epoch": 0.5995391705069124, + "grad_norm": 0.9987206599474828, + "learning_rate": 1.6781220258009787e-06, + "loss": 0.9146362543106079, + "step": 2602 + }, + { + "epoch": 0.5997695852534562, + "grad_norm": 0.9546196333490053, + "learning_rate": 1.6778419727835434e-06, + "loss": 0.8846019506454468, + "step": 2603 + }, + { + "epoch": 0.6, + "grad_norm": 1.0356705992849526, + "learning_rate": 1.6775618213782427e-06, + "loss": 0.9564694166183472, + "step": 2604 + }, + { + "epoch": 0.6002304147465438, + "grad_norm": 0.8649265876220377, + "learning_rate": 1.6772815716257411e-06, + "loss": 0.7311475276947021, + "step": 2605 + }, + { + "epoch": 0.6004608294930875, + "grad_norm": 0.9996641063184493, + "learning_rate": 1.6770012235667157e-06, + "loss": 0.8198719024658203, + "step": 2606 + }, + { + "epoch": 0.6006912442396314, + "grad_norm": 0.8625199282325245, + "learning_rate": 1.676720777241859e-06, + "loss": 0.7667897939682007, + "step": 2607 + }, + { + "epoch": 0.6009216589861751, + "grad_norm": 0.8068998344787891, + "learning_rate": 1.6764402326918775e-06, + "loss": 0.8438166379928589, + "step": 2608 + }, + { + "epoch": 0.6011520737327188, + "grad_norm": 0.8540979807575545, + "learning_rate": 1.6761595899574913e-06, + "loss": 0.801039457321167, + "step": 2609 + }, + { + "epoch": 0.6013824884792627, + "grad_norm": 0.8234203241271092, + "learning_rate": 1.6758788490794362e-06, + "loss": 0.8063384294509888, + "step": 2610 + }, + { + "epoch": 0.6016129032258064, + "grad_norm": 0.6526013686548677, + "learning_rate": 1.6755980100984609e-06, + "loss": 0.7574378848075867, + "step": 2611 + }, + { + "epoch": 0.6018433179723502, + "grad_norm": 0.9515660687698646, + "learning_rate": 1.6753170730553285e-06, + "loss": 0.7640282511711121, + "step": 2612 + }, + { + "epoch": 0.602073732718894, + "grad_norm": 0.8028588885811085, + "learning_rate": 1.675036037990817e-06, + "loss": 0.8366582989692688, + "step": 2613 + }, + { + "epoch": 0.6023041474654378, + "grad_norm": 0.9790278189412774, + "learning_rate": 1.6747549049457184e-06, + "loss": 0.851488471031189, + "step": 2614 + }, + { + "epoch": 0.6025345622119815, + "grad_norm": 0.8888933014827352, + "learning_rate": 1.6744736739608385e-06, + "loss": 0.6821870803833008, + "step": 2615 + }, + { + "epoch": 0.6027649769585254, + "grad_norm": 0.9884428615602953, + "learning_rate": 1.6741923450769977e-06, + "loss": 0.9263452887535095, + "step": 2616 + }, + { + "epoch": 0.6029953917050691, + "grad_norm": 0.7660541738576696, + "learning_rate": 1.6739109183350303e-06, + "loss": 0.7471155524253845, + "step": 2617 + }, + { + "epoch": 0.603225806451613, + "grad_norm": 0.8463548916487829, + "learning_rate": 1.6736293937757858e-06, + "loss": 0.8859940767288208, + "step": 2618 + }, + { + "epoch": 0.6034562211981567, + "grad_norm": 0.7725702923302962, + "learning_rate": 1.673347771440126e-06, + "loss": 0.8078656792640686, + "step": 2619 + }, + { + "epoch": 0.6036866359447005, + "grad_norm": 0.8796637852565455, + "learning_rate": 1.673066051368929e-06, + "loss": 0.7663185596466064, + "step": 2620 + }, + { + "epoch": 0.6039170506912442, + "grad_norm": 0.7762146466532337, + "learning_rate": 1.6727842336030855e-06, + "loss": 0.7924770712852478, + "step": 2621 + }, + { + "epoch": 0.604147465437788, + "grad_norm": 0.6362525346897695, + "learning_rate": 1.672502318183501e-06, + "loss": 0.7781439423561096, + "step": 2622 + }, + { + "epoch": 0.6043778801843318, + "grad_norm": 0.7824821748809755, + "learning_rate": 1.6722203051510953e-06, + "loss": 0.9342260360717773, + "step": 2623 + }, + { + "epoch": 0.6046082949308755, + "grad_norm": 0.9113412146225311, + "learning_rate": 1.6719381945468024e-06, + "loss": 0.8589230179786682, + "step": 2624 + }, + { + "epoch": 0.6048387096774194, + "grad_norm": 0.9092021688294594, + "learning_rate": 1.67165598641157e-06, + "loss": 0.8692198991775513, + "step": 2625 + }, + { + "epoch": 0.6050691244239631, + "grad_norm": 0.9811252814075038, + "learning_rate": 1.6713736807863606e-06, + "loss": 0.9220771789550781, + "step": 2626 + }, + { + "epoch": 0.6052995391705069, + "grad_norm": 0.7869789442575379, + "learning_rate": 1.6710912777121497e-06, + "loss": 0.670639157295227, + "step": 2627 + }, + { + "epoch": 0.6055299539170507, + "grad_norm": 0.8458627233906328, + "learning_rate": 1.6708087772299287e-06, + "loss": 0.780914306640625, + "step": 2628 + }, + { + "epoch": 0.6057603686635945, + "grad_norm": 0.7718782555310939, + "learning_rate": 1.6705261793807014e-06, + "loss": 0.836430549621582, + "step": 2629 + }, + { + "epoch": 0.6059907834101382, + "grad_norm": 0.8965474432723056, + "learning_rate": 1.670243484205487e-06, + "loss": 0.84266197681427, + "step": 2630 + }, + { + "epoch": 0.6062211981566821, + "grad_norm": 0.8992013517980091, + "learning_rate": 1.6699606917453184e-06, + "loss": 0.9276752471923828, + "step": 2631 + }, + { + "epoch": 0.6064516129032258, + "grad_norm": 0.8740634897243095, + "learning_rate": 1.6696778020412418e-06, + "loss": 0.8319100141525269, + "step": 2632 + }, + { + "epoch": 0.6066820276497696, + "grad_norm": 0.9778851785690291, + "learning_rate": 1.669394815134319e-06, + "loss": 0.7511987686157227, + "step": 2633 + }, + { + "epoch": 0.6069124423963134, + "grad_norm": 0.9559089829828732, + "learning_rate": 1.6691117310656249e-06, + "loss": 0.7847566604614258, + "step": 2634 + }, + { + "epoch": 0.6071428571428571, + "grad_norm": 0.7352732117136743, + "learning_rate": 1.668828549876249e-06, + "loss": 0.8598428964614868, + "step": 2635 + }, + { + "epoch": 0.6073732718894009, + "grad_norm": 0.9632462301651329, + "learning_rate": 1.6685452716072942e-06, + "loss": 0.8676267266273499, + "step": 2636 + }, + { + "epoch": 0.6076036866359447, + "grad_norm": 0.9796050613045469, + "learning_rate": 1.6682618962998787e-06, + "loss": 0.8139858841896057, + "step": 2637 + }, + { + "epoch": 0.6078341013824885, + "grad_norm": 0.9214980939594923, + "learning_rate": 1.6679784239951334e-06, + "loss": 0.878848671913147, + "step": 2638 + }, + { + "epoch": 0.6080645161290322, + "grad_norm": 0.8942413316087445, + "learning_rate": 1.6676948547342038e-06, + "loss": 0.7094229459762573, + "step": 2639 + }, + { + "epoch": 0.6082949308755761, + "grad_norm": 0.7183954232108332, + "learning_rate": 1.6674111885582502e-06, + "loss": 0.7908186912536621, + "step": 2640 + }, + { + "epoch": 0.6085253456221198, + "grad_norm": 0.705517985038791, + "learning_rate": 1.6671274255084465e-06, + "loss": 0.7205992341041565, + "step": 2641 + }, + { + "epoch": 0.6087557603686636, + "grad_norm": 0.937951031991606, + "learning_rate": 1.6668435656259796e-06, + "loss": 0.8098955750465393, + "step": 2642 + }, + { + "epoch": 0.6089861751152074, + "grad_norm": 0.8047793122116887, + "learning_rate": 1.6665596089520522e-06, + "loss": 0.9344205856323242, + "step": 2643 + }, + { + "epoch": 0.6092165898617512, + "grad_norm": 0.73132257965357, + "learning_rate": 1.6662755555278798e-06, + "loss": 0.6149121522903442, + "step": 2644 + }, + { + "epoch": 0.6094470046082949, + "grad_norm": 1.1550816011183633, + "learning_rate": 1.6659914053946929e-06, + "loss": 0.790631115436554, + "step": 2645 + }, + { + "epoch": 0.6096774193548387, + "grad_norm": 0.9832349740984434, + "learning_rate": 1.6657071585937349e-06, + "loss": 0.7789372801780701, + "step": 2646 + }, + { + "epoch": 0.6099078341013825, + "grad_norm": 0.7425679816784971, + "learning_rate": 1.6654228151662641e-06, + "loss": 0.9119753837585449, + "step": 2647 + }, + { + "epoch": 0.6101382488479262, + "grad_norm": 1.0635804319271085, + "learning_rate": 1.6651383751535526e-06, + "loss": 0.827568769454956, + "step": 2648 + }, + { + "epoch": 0.6103686635944701, + "grad_norm": 0.9620609244203838, + "learning_rate": 1.6648538385968865e-06, + "loss": 0.8862377405166626, + "step": 2649 + }, + { + "epoch": 0.6105990783410138, + "grad_norm": 0.7954209003880245, + "learning_rate": 1.6645692055375658e-06, + "loss": 0.7765665054321289, + "step": 2650 + }, + { + "epoch": 0.6108294930875576, + "grad_norm": 0.7698374340240739, + "learning_rate": 1.6642844760169048e-06, + "loss": 0.7673745155334473, + "step": 2651 + }, + { + "epoch": 0.6110599078341014, + "grad_norm": 1.051257553540871, + "learning_rate": 1.6639996500762313e-06, + "loss": 0.8539090752601624, + "step": 2652 + }, + { + "epoch": 0.6112903225806452, + "grad_norm": 0.8676017636407886, + "learning_rate": 1.663714727756888e-06, + "loss": 0.9146299362182617, + "step": 2653 + }, + { + "epoch": 0.6115207373271889, + "grad_norm": 0.9802646170879412, + "learning_rate": 1.6634297091002304e-06, + "loss": 0.6720675230026245, + "step": 2654 + }, + { + "epoch": 0.6117511520737328, + "grad_norm": 0.9963804792413621, + "learning_rate": 1.6631445941476287e-06, + "loss": 0.876419186592102, + "step": 2655 + }, + { + "epoch": 0.6119815668202765, + "grad_norm": 0.8251901500966289, + "learning_rate": 1.6628593829404673e-06, + "loss": 0.781826376914978, + "step": 2656 + }, + { + "epoch": 0.6122119815668203, + "grad_norm": 1.0156308960299383, + "learning_rate": 1.662574075520144e-06, + "loss": 0.8700725436210632, + "step": 2657 + }, + { + "epoch": 0.6124423963133641, + "grad_norm": 0.8730333366815507, + "learning_rate": 1.6622886719280703e-06, + "loss": 0.7927212715148926, + "step": 2658 + }, + { + "epoch": 0.6126728110599078, + "grad_norm": 0.9472958125063492, + "learning_rate": 1.6620031722056732e-06, + "loss": 0.8402982354164124, + "step": 2659 + }, + { + "epoch": 0.6129032258064516, + "grad_norm": 0.9246784332742947, + "learning_rate": 1.6617175763943916e-06, + "loss": 0.844031572341919, + "step": 2660 + }, + { + "epoch": 0.6131336405529954, + "grad_norm": 1.1749754124811849, + "learning_rate": 1.66143188453568e-06, + "loss": 0.7927590608596802, + "step": 2661 + }, + { + "epoch": 0.6133640552995392, + "grad_norm": 0.7562363270320578, + "learning_rate": 1.6611460966710057e-06, + "loss": 0.6881238222122192, + "step": 2662 + }, + { + "epoch": 0.6135944700460829, + "grad_norm": 0.7503304726479316, + "learning_rate": 1.6608602128418512e-06, + "loss": 0.8782250881195068, + "step": 2663 + }, + { + "epoch": 0.6138248847926268, + "grad_norm": 0.764429872232153, + "learning_rate": 1.6605742330897112e-06, + "loss": 0.810072124004364, + "step": 2664 + }, + { + "epoch": 0.6140552995391705, + "grad_norm": 0.7959070796498304, + "learning_rate": 1.660288157456096e-06, + "loss": 0.9278649091720581, + "step": 2665 + }, + { + "epoch": 0.6142857142857143, + "grad_norm": 0.8518702716538695, + "learning_rate": 1.6600019859825287e-06, + "loss": 0.7821990251541138, + "step": 2666 + }, + { + "epoch": 0.614516129032258, + "grad_norm": 0.8000150810917545, + "learning_rate": 1.6597157187105474e-06, + "loss": 0.7945138216018677, + "step": 2667 + }, + { + "epoch": 0.6147465437788019, + "grad_norm": 0.9158855636867193, + "learning_rate": 1.659429355681702e-06, + "loss": 0.7796168327331543, + "step": 2668 + }, + { + "epoch": 0.6149769585253456, + "grad_norm": 0.8778480996767207, + "learning_rate": 1.659142896937559e-06, + "loss": 0.8412867784500122, + "step": 2669 + }, + { + "epoch": 0.6152073732718893, + "grad_norm": 0.8776586025383009, + "learning_rate": 1.6588563425196976e-06, + "loss": 0.8507891893386841, + "step": 2670 + }, + { + "epoch": 0.6154377880184332, + "grad_norm": 0.7470530836348557, + "learning_rate": 1.6585696924697097e-06, + "loss": 0.7538737654685974, + "step": 2671 + }, + { + "epoch": 0.6156682027649769, + "grad_norm": 0.7938343055651664, + "learning_rate": 1.6582829468292027e-06, + "loss": 0.7241994142532349, + "step": 2672 + }, + { + "epoch": 0.6158986175115208, + "grad_norm": 0.7740707689038899, + "learning_rate": 1.6579961056397979e-06, + "loss": 0.8282276391983032, + "step": 2673 + }, + { + "epoch": 0.6161290322580645, + "grad_norm": 0.9834275785675608, + "learning_rate": 1.657709168943129e-06, + "loss": 0.7823094725608826, + "step": 2674 + }, + { + "epoch": 0.6163594470046083, + "grad_norm": 0.7814560466718257, + "learning_rate": 1.6574221367808452e-06, + "loss": 0.7682117819786072, + "step": 2675 + }, + { + "epoch": 0.616589861751152, + "grad_norm": 0.791790817396352, + "learning_rate": 1.6571350091946084e-06, + "loss": 0.7483188509941101, + "step": 2676 + }, + { + "epoch": 0.6168202764976959, + "grad_norm": 0.7904062559480196, + "learning_rate": 1.656847786226095e-06, + "loss": 0.8244579434394836, + "step": 2677 + }, + { + "epoch": 0.6170506912442396, + "grad_norm": 0.935192090002093, + "learning_rate": 1.6565604679169951e-06, + "loss": 0.9741685390472412, + "step": 2678 + }, + { + "epoch": 0.6172811059907835, + "grad_norm": 1.2715516239943523, + "learning_rate": 1.6562730543090122e-06, + "loss": 1.0004706382751465, + "step": 2679 + }, + { + "epoch": 0.6175115207373272, + "grad_norm": 0.7382412100690486, + "learning_rate": 1.6559855454438644e-06, + "loss": 0.6897011399269104, + "step": 2680 + }, + { + "epoch": 0.617741935483871, + "grad_norm": 0.6330897297720288, + "learning_rate": 1.6556979413632833e-06, + "loss": 0.7250478267669678, + "step": 2681 + }, + { + "epoch": 0.6179723502304147, + "grad_norm": 0.9717515360338855, + "learning_rate": 1.6554102421090137e-06, + "loss": 0.850714385509491, + "step": 2682 + }, + { + "epoch": 0.6182027649769585, + "grad_norm": 0.917367886199939, + "learning_rate": 1.6551224477228152e-06, + "loss": 0.8389794230461121, + "step": 2683 + }, + { + "epoch": 0.6184331797235023, + "grad_norm": 0.8244704754842406, + "learning_rate": 1.6548345582464608e-06, + "loss": 0.8004277944564819, + "step": 2684 + }, + { + "epoch": 0.618663594470046, + "grad_norm": 0.9438052955461359, + "learning_rate": 1.654546573721737e-06, + "loss": 0.8439298868179321, + "step": 2685 + }, + { + "epoch": 0.6188940092165899, + "grad_norm": 0.9506767899718855, + "learning_rate": 1.6542584941904448e-06, + "loss": 0.7715939283370972, + "step": 2686 + }, + { + "epoch": 0.6191244239631336, + "grad_norm": 0.7277066195828455, + "learning_rate": 1.6539703196943982e-06, + "loss": 0.8521275520324707, + "step": 2687 + }, + { + "epoch": 0.6193548387096774, + "grad_norm": 0.9502964788805838, + "learning_rate": 1.6536820502754249e-06, + "loss": 0.8773370981216431, + "step": 2688 + }, + { + "epoch": 0.6195852534562212, + "grad_norm": 0.8896877670997408, + "learning_rate": 1.653393685975368e-06, + "loss": 0.7613356113433838, + "step": 2689 + }, + { + "epoch": 0.619815668202765, + "grad_norm": 0.7872525626089157, + "learning_rate": 1.6531052268360823e-06, + "loss": 0.7534692287445068, + "step": 2690 + }, + { + "epoch": 0.6200460829493087, + "grad_norm": 0.8888603991720845, + "learning_rate": 1.652816672899438e-06, + "loss": 0.861242413520813, + "step": 2691 + }, + { + "epoch": 0.6202764976958526, + "grad_norm": 1.0955455640383855, + "learning_rate": 1.652528024207317e-06, + "loss": 0.9778954982757568, + "step": 2692 + }, + { + "epoch": 0.6205069124423963, + "grad_norm": 0.8389124431813023, + "learning_rate": 1.6522392808016176e-06, + "loss": 0.7874879240989685, + "step": 2693 + }, + { + "epoch": 0.6207373271889401, + "grad_norm": 1.038077147354541, + "learning_rate": 1.6519504427242503e-06, + "loss": 0.8306739330291748, + "step": 2694 + }, + { + "epoch": 0.6209677419354839, + "grad_norm": 0.890554970207788, + "learning_rate": 1.651661510017139e-06, + "loss": 0.7617331743240356, + "step": 2695 + }, + { + "epoch": 0.6211981566820276, + "grad_norm": 0.8325839299854928, + "learning_rate": 1.6513724827222223e-06, + "loss": 0.8912776708602905, + "step": 2696 + }, + { + "epoch": 0.6214285714285714, + "grad_norm": 0.9626202232237234, + "learning_rate": 1.6510833608814519e-06, + "loss": 0.832025945186615, + "step": 2697 + }, + { + "epoch": 0.6216589861751152, + "grad_norm": 0.8573045739455887, + "learning_rate": 1.6507941445367934e-06, + "loss": 0.7391358613967896, + "step": 2698 + }, + { + "epoch": 0.621889400921659, + "grad_norm": 0.8417803604945624, + "learning_rate": 1.6505048337302267e-06, + "loss": 0.7968891263008118, + "step": 2699 + }, + { + "epoch": 0.6221198156682027, + "grad_norm": 0.7943584636642551, + "learning_rate": 1.6502154285037446e-06, + "loss": 0.8268226981163025, + "step": 2700 + }, + { + "epoch": 0.6223502304147466, + "grad_norm": 0.8943748659016423, + "learning_rate": 1.6499259288993536e-06, + "loss": 0.8727509379386902, + "step": 2701 + }, + { + "epoch": 0.6225806451612903, + "grad_norm": 0.9781149876582625, + "learning_rate": 1.6496363349590746e-06, + "loss": 0.8419584035873413, + "step": 2702 + }, + { + "epoch": 0.6228110599078341, + "grad_norm": 0.9222004845701074, + "learning_rate": 1.6493466467249415e-06, + "loss": 0.7753620743751526, + "step": 2703 + }, + { + "epoch": 0.6230414746543779, + "grad_norm": 0.8188505837862442, + "learning_rate": 1.6490568642390022e-06, + "loss": 0.7735302448272705, + "step": 2704 + }, + { + "epoch": 0.6232718894009217, + "grad_norm": 0.892742684163995, + "learning_rate": 1.6487669875433183e-06, + "loss": 0.8730747699737549, + "step": 2705 + }, + { + "epoch": 0.6235023041474654, + "grad_norm": 1.081206789540213, + "learning_rate": 1.648477016679965e-06, + "loss": 1.026259183883667, + "step": 2706 + }, + { + "epoch": 0.6237327188940092, + "grad_norm": 1.1700615414540931, + "learning_rate": 1.6481869516910314e-06, + "loss": 1.0710067749023438, + "step": 2707 + }, + { + "epoch": 0.623963133640553, + "grad_norm": 0.8750649396873535, + "learning_rate": 1.6478967926186196e-06, + "loss": 0.8451842069625854, + "step": 2708 + }, + { + "epoch": 0.6241935483870967, + "grad_norm": 1.0025312740636694, + "learning_rate": 1.6476065395048463e-06, + "loss": 0.8114550113677979, + "step": 2709 + }, + { + "epoch": 0.6244239631336406, + "grad_norm": 0.9543936745980088, + "learning_rate": 1.6473161923918408e-06, + "loss": 0.9158897399902344, + "step": 2710 + }, + { + "epoch": 0.6246543778801843, + "grad_norm": 0.9073320322912862, + "learning_rate": 1.6470257513217471e-06, + "loss": 0.8455985188484192, + "step": 2711 + }, + { + "epoch": 0.6248847926267281, + "grad_norm": 0.9409835862192949, + "learning_rate": 1.6467352163367224e-06, + "loss": 0.7869806885719299, + "step": 2712 + }, + { + "epoch": 0.6251152073732719, + "grad_norm": 0.9720046165998673, + "learning_rate": 1.6464445874789369e-06, + "loss": 0.7813467979431152, + "step": 2713 + }, + { + "epoch": 0.6253456221198157, + "grad_norm": 0.9253768349404401, + "learning_rate": 1.646153864790575e-06, + "loss": 0.7607834339141846, + "step": 2714 + }, + { + "epoch": 0.6255760368663594, + "grad_norm": 0.7655542834849622, + "learning_rate": 1.6458630483138354e-06, + "loss": 0.6316394209861755, + "step": 2715 + }, + { + "epoch": 0.6258064516129033, + "grad_norm": 1.0037920503955002, + "learning_rate": 1.6455721380909293e-06, + "loss": 0.8613089323043823, + "step": 2716 + }, + { + "epoch": 0.626036866359447, + "grad_norm": 0.900314234710346, + "learning_rate": 1.6452811341640823e-06, + "loss": 0.8521597385406494, + "step": 2717 + }, + { + "epoch": 0.6262672811059908, + "grad_norm": 0.863334614503053, + "learning_rate": 1.6449900365755322e-06, + "loss": 0.7649816870689392, + "step": 2718 + }, + { + "epoch": 0.6264976958525346, + "grad_norm": 0.7921235061169694, + "learning_rate": 1.6446988453675327e-06, + "loss": 0.669215738773346, + "step": 2719 + }, + { + "epoch": 0.6267281105990783, + "grad_norm": 1.0085146323707468, + "learning_rate": 1.6444075605823491e-06, + "loss": 0.7795897722244263, + "step": 2720 + }, + { + "epoch": 0.6269585253456221, + "grad_norm": 1.0985096718321175, + "learning_rate": 1.6441161822622612e-06, + "loss": 0.9773029088973999, + "step": 2721 + }, + { + "epoch": 0.6271889400921659, + "grad_norm": 0.88062279724108, + "learning_rate": 1.6438247104495622e-06, + "loss": 0.8313496112823486, + "step": 2722 + }, + { + "epoch": 0.6274193548387097, + "grad_norm": 0.8741823244787398, + "learning_rate": 1.6435331451865589e-06, + "loss": 0.822803258895874, + "step": 2723 + }, + { + "epoch": 0.6276497695852534, + "grad_norm": 1.1191623839144935, + "learning_rate": 1.643241486515571e-06, + "loss": 0.8933405876159668, + "step": 2724 + }, + { + "epoch": 0.6278801843317973, + "grad_norm": 0.8721873626078817, + "learning_rate": 1.6429497344789334e-06, + "loss": 0.865382194519043, + "step": 2725 + }, + { + "epoch": 0.628110599078341, + "grad_norm": 0.6623424743433429, + "learning_rate": 1.6426578891189929e-06, + "loss": 0.5955609679222107, + "step": 2726 + }, + { + "epoch": 0.6283410138248848, + "grad_norm": 0.9379654908769754, + "learning_rate": 1.6423659504781102e-06, + "loss": 0.7832648754119873, + "step": 2727 + }, + { + "epoch": 0.6285714285714286, + "grad_norm": 0.9904172136436726, + "learning_rate": 1.6420739185986606e-06, + "loss": 0.8939651250839233, + "step": 2728 + }, + { + "epoch": 0.6288018433179724, + "grad_norm": 0.8754504203733118, + "learning_rate": 1.6417817935230316e-06, + "loss": 0.7950553894042969, + "step": 2729 + }, + { + "epoch": 0.6290322580645161, + "grad_norm": 0.7473547756110924, + "learning_rate": 1.6414895752936247e-06, + "loss": 0.7011410593986511, + "step": 2730 + }, + { + "epoch": 0.6292626728110599, + "grad_norm": 0.8298073820867625, + "learning_rate": 1.6411972639528553e-06, + "loss": 0.8745814561843872, + "step": 2731 + }, + { + "epoch": 0.6294930875576037, + "grad_norm": 0.9643129286331958, + "learning_rate": 1.640904859543152e-06, + "loss": 0.9487906694412231, + "step": 2732 + }, + { + "epoch": 0.6297235023041474, + "grad_norm": 1.0003996457820634, + "learning_rate": 1.6406123621069565e-06, + "loss": 0.8493598103523254, + "step": 2733 + }, + { + "epoch": 0.6299539170506913, + "grad_norm": 0.7043952970778223, + "learning_rate": 1.640319771686725e-06, + "loss": 0.8176105618476868, + "step": 2734 + }, + { + "epoch": 0.630184331797235, + "grad_norm": 1.1365398207749948, + "learning_rate": 1.640027088324926e-06, + "loss": 0.8331952691078186, + "step": 2735 + }, + { + "epoch": 0.6304147465437788, + "grad_norm": 0.9152153352251905, + "learning_rate": 1.6397343120640428e-06, + "loss": 0.7507727146148682, + "step": 2736 + }, + { + "epoch": 0.6306451612903226, + "grad_norm": 0.8498087936716523, + "learning_rate": 1.6394414429465707e-06, + "loss": 0.7681083679199219, + "step": 2737 + }, + { + "epoch": 0.6308755760368664, + "grad_norm": 1.0207970870125542, + "learning_rate": 1.6391484810150197e-06, + "loss": 0.86592036485672, + "step": 2738 + }, + { + "epoch": 0.6311059907834101, + "grad_norm": 0.7893726077346048, + "learning_rate": 1.6388554263119133e-06, + "loss": 0.6561422348022461, + "step": 2739 + }, + { + "epoch": 0.631336405529954, + "grad_norm": 0.8691518888981297, + "learning_rate": 1.6385622788797871e-06, + "loss": 1.0149214267730713, + "step": 2740 + }, + { + "epoch": 0.6315668202764977, + "grad_norm": 3.1459869291369578, + "learning_rate": 1.6382690387611912e-06, + "loss": 0.8542313575744629, + "step": 2741 + }, + { + "epoch": 0.6317972350230415, + "grad_norm": 0.8459688860048273, + "learning_rate": 1.6379757059986898e-06, + "loss": 0.8561190366744995, + "step": 2742 + }, + { + "epoch": 0.6320276497695853, + "grad_norm": 0.8945733601522768, + "learning_rate": 1.6376822806348591e-06, + "loss": 0.7487457990646362, + "step": 2743 + }, + { + "epoch": 0.632258064516129, + "grad_norm": 0.7710656021686645, + "learning_rate": 1.6373887627122894e-06, + "loss": 0.6169087886810303, + "step": 2744 + }, + { + "epoch": 0.6324884792626728, + "grad_norm": 0.9363459151732765, + "learning_rate": 1.6370951522735848e-06, + "loss": 0.8384301662445068, + "step": 2745 + }, + { + "epoch": 0.6327188940092165, + "grad_norm": 0.8816116065345285, + "learning_rate": 1.636801449361362e-06, + "loss": 0.8009958267211914, + "step": 2746 + }, + { + "epoch": 0.6329493087557604, + "grad_norm": 0.7782605199549586, + "learning_rate": 1.6365076540182518e-06, + "loss": 0.7277840375900269, + "step": 2747 + }, + { + "epoch": 0.6331797235023041, + "grad_norm": 0.8629211607674182, + "learning_rate": 1.6362137662868988e-06, + "loss": 0.7994974255561829, + "step": 2748 + }, + { + "epoch": 0.633410138248848, + "grad_norm": 0.9972871876044257, + "learning_rate": 1.6359197862099592e-06, + "loss": 0.9940546751022339, + "step": 2749 + }, + { + "epoch": 0.6336405529953917, + "grad_norm": 0.7083636808435892, + "learning_rate": 1.6356257138301048e-06, + "loss": 0.776983916759491, + "step": 2750 + }, + { + "epoch": 0.6338709677419355, + "grad_norm": 1.0813287689618403, + "learning_rate": 1.6353315491900194e-06, + "loss": 0.8218704462051392, + "step": 2751 + }, + { + "epoch": 0.6341013824884792, + "grad_norm": 0.9285197745822434, + "learning_rate": 1.635037292332401e-06, + "loss": 0.8437784910202026, + "step": 2752 + }, + { + "epoch": 0.6343317972350231, + "grad_norm": 0.7951039096878332, + "learning_rate": 1.63474294329996e-06, + "loss": 0.7774004340171814, + "step": 2753 + }, + { + "epoch": 0.6345622119815668, + "grad_norm": 0.7998446978982631, + "learning_rate": 1.634448502135421e-06, + "loss": 0.8480523824691772, + "step": 2754 + }, + { + "epoch": 0.6347926267281107, + "grad_norm": 0.8710356721404071, + "learning_rate": 1.634153968881522e-06, + "loss": 0.838944673538208, + "step": 2755 + }, + { + "epoch": 0.6350230414746544, + "grad_norm": 0.9609360504840417, + "learning_rate": 1.633859343581014e-06, + "loss": 0.7989159822463989, + "step": 2756 + }, + { + "epoch": 0.6352534562211981, + "grad_norm": 0.8906618388597183, + "learning_rate": 1.6335646262766612e-06, + "loss": 0.8122522234916687, + "step": 2757 + }, + { + "epoch": 0.635483870967742, + "grad_norm": 1.0306905026592958, + "learning_rate": 1.6332698170112418e-06, + "loss": 0.7472352981567383, + "step": 2758 + }, + { + "epoch": 0.6357142857142857, + "grad_norm": 0.7470082329854858, + "learning_rate": 1.6329749158275466e-06, + "loss": 0.7160866260528564, + "step": 2759 + }, + { + "epoch": 0.6359447004608295, + "grad_norm": 0.9276359862380839, + "learning_rate": 1.6326799227683803e-06, + "loss": 0.850339412689209, + "step": 2760 + }, + { + "epoch": 0.6361751152073732, + "grad_norm": 0.8334408182150722, + "learning_rate": 1.632384837876561e-06, + "loss": 0.7683566808700562, + "step": 2761 + }, + { + "epoch": 0.6364055299539171, + "grad_norm": 1.0070287688728312, + "learning_rate": 1.6320896611949197e-06, + "loss": 0.820326030254364, + "step": 2762 + }, + { + "epoch": 0.6366359447004608, + "grad_norm": 0.9088399606663712, + "learning_rate": 1.6317943927663005e-06, + "loss": 0.9319206476211548, + "step": 2763 + }, + { + "epoch": 0.6368663594470046, + "grad_norm": 0.854101738795234, + "learning_rate": 1.6314990326335619e-06, + "loss": 0.8473616242408752, + "step": 2764 + }, + { + "epoch": 0.6370967741935484, + "grad_norm": 0.9083270544798837, + "learning_rate": 1.6312035808395746e-06, + "loss": 0.7515239715576172, + "step": 2765 + }, + { + "epoch": 0.6373271889400922, + "grad_norm": 0.9691327918436982, + "learning_rate": 1.630908037427223e-06, + "loss": 0.8780150413513184, + "step": 2766 + }, + { + "epoch": 0.6375576036866359, + "grad_norm": 0.8183908015853972, + "learning_rate": 1.6306124024394051e-06, + "loss": 0.7502909898757935, + "step": 2767 + }, + { + "epoch": 0.6377880184331797, + "grad_norm": 1.0244030314506845, + "learning_rate": 1.630316675919032e-06, + "loss": 0.8440920114517212, + "step": 2768 + }, + { + "epoch": 0.6380184331797235, + "grad_norm": 0.9479398820781787, + "learning_rate": 1.6300208579090275e-06, + "loss": 0.7769831418991089, + "step": 2769 + }, + { + "epoch": 0.6382488479262672, + "grad_norm": 0.7616107153752498, + "learning_rate": 1.6297249484523297e-06, + "loss": 0.6217764616012573, + "step": 2770 + }, + { + "epoch": 0.6384792626728111, + "grad_norm": 0.7961962297717475, + "learning_rate": 1.6294289475918891e-06, + "loss": 0.8726013898849487, + "step": 2771 + }, + { + "epoch": 0.6387096774193548, + "grad_norm": 0.9993347618775529, + "learning_rate": 1.6291328553706702e-06, + "loss": 0.9624546766281128, + "step": 2772 + }, + { + "epoch": 0.6389400921658986, + "grad_norm": 0.9073330627878557, + "learning_rate": 1.62883667183165e-06, + "loss": 0.733322024345398, + "step": 2773 + }, + { + "epoch": 0.6391705069124424, + "grad_norm": 0.828990327728417, + "learning_rate": 1.6285403970178197e-06, + "loss": 0.7944040298461914, + "step": 2774 + }, + { + "epoch": 0.6394009216589862, + "grad_norm": 0.945508092850191, + "learning_rate": 1.6282440309721825e-06, + "loss": 0.8006964921951294, + "step": 2775 + }, + { + "epoch": 0.6396313364055299, + "grad_norm": 0.8235251563991838, + "learning_rate": 1.6279475737377562e-06, + "loss": 0.8226393461227417, + "step": 2776 + }, + { + "epoch": 0.6398617511520738, + "grad_norm": 0.9205648176506509, + "learning_rate": 1.6276510253575707e-06, + "loss": 0.8216049671173096, + "step": 2777 + }, + { + "epoch": 0.6400921658986175, + "grad_norm": 1.2879339929003093, + "learning_rate": 1.6273543858746698e-06, + "loss": 0.9556760191917419, + "step": 2778 + }, + { + "epoch": 0.6403225806451613, + "grad_norm": 1.226309717633737, + "learning_rate": 1.6270576553321103e-06, + "loss": 0.9736160039901733, + "step": 2779 + }, + { + "epoch": 0.6405529953917051, + "grad_norm": 0.7107959971647043, + "learning_rate": 1.6267608337729622e-06, + "loss": 0.6930527687072754, + "step": 2780 + }, + { + "epoch": 0.6407834101382488, + "grad_norm": 0.8158686811134676, + "learning_rate": 1.6264639212403089e-06, + "loss": 0.8047456741333008, + "step": 2781 + }, + { + "epoch": 0.6410138248847926, + "grad_norm": 0.8454524938044947, + "learning_rate": 1.6261669177772465e-06, + "loss": 0.7278450727462769, + "step": 2782 + }, + { + "epoch": 0.6412442396313364, + "grad_norm": 0.8520417006771478, + "learning_rate": 1.6258698234268852e-06, + "loss": 0.7768574357032776, + "step": 2783 + }, + { + "epoch": 0.6414746543778802, + "grad_norm": 1.0890287289964238, + "learning_rate": 1.6255726382323475e-06, + "loss": 0.7621645331382751, + "step": 2784 + }, + { + "epoch": 0.6417050691244239, + "grad_norm": 0.7437513689171984, + "learning_rate": 1.6252753622367695e-06, + "loss": 0.7566754221916199, + "step": 2785 + }, + { + "epoch": 0.6419354838709678, + "grad_norm": 0.8832427803322862, + "learning_rate": 1.6249779954833005e-06, + "loss": 0.7609840631484985, + "step": 2786 + }, + { + "epoch": 0.6421658986175115, + "grad_norm": 0.7482883809435998, + "learning_rate": 1.6246805380151028e-06, + "loss": 0.7360000610351562, + "step": 2787 + }, + { + "epoch": 0.6423963133640553, + "grad_norm": 1.1130271498528226, + "learning_rate": 1.624382989875352e-06, + "loss": 0.7951081395149231, + "step": 2788 + }, + { + "epoch": 0.6426267281105991, + "grad_norm": 0.7939855049580037, + "learning_rate": 1.6240853511072367e-06, + "loss": 0.7273311614990234, + "step": 2789 + }, + { + "epoch": 0.6428571428571429, + "grad_norm": 1.0416971384804878, + "learning_rate": 1.6237876217539588e-06, + "loss": 0.9270737171173096, + "step": 2790 + }, + { + "epoch": 0.6430875576036866, + "grad_norm": 0.97801359210753, + "learning_rate": 1.6234898018587336e-06, + "loss": 0.7624385356903076, + "step": 2791 + }, + { + "epoch": 0.6433179723502304, + "grad_norm": 0.8529799225121792, + "learning_rate": 1.6231918914647889e-06, + "loss": 0.8266719579696655, + "step": 2792 + }, + { + "epoch": 0.6435483870967742, + "grad_norm": 0.6435153338840431, + "learning_rate": 1.6228938906153663e-06, + "loss": 0.7606902122497559, + "step": 2793 + }, + { + "epoch": 0.6437788018433179, + "grad_norm": 1.022572162531227, + "learning_rate": 1.6225957993537197e-06, + "loss": 0.8239191174507141, + "step": 2794 + }, + { + "epoch": 0.6440092165898618, + "grad_norm": 0.8871272102711673, + "learning_rate": 1.6222976177231174e-06, + "loss": 0.8313608169555664, + "step": 2795 + }, + { + "epoch": 0.6442396313364055, + "grad_norm": 0.7541910127898682, + "learning_rate": 1.6219993457668396e-06, + "loss": 0.7725037932395935, + "step": 2796 + }, + { + "epoch": 0.6444700460829493, + "grad_norm": 0.8887584465014293, + "learning_rate": 1.6217009835281802e-06, + "loss": 0.8791182041168213, + "step": 2797 + }, + { + "epoch": 0.6447004608294931, + "grad_norm": 0.9285171614449231, + "learning_rate": 1.621402531050446e-06, + "loss": 0.7157453298568726, + "step": 2798 + }, + { + "epoch": 0.6449308755760369, + "grad_norm": 0.9675001114911925, + "learning_rate": 1.621103988376957e-06, + "loss": 0.8248307704925537, + "step": 2799 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8114025469253138, + "learning_rate": 1.6208053555510467e-06, + "loss": 0.7094661593437195, + "step": 2800 + }, + { + "epoch": 0.6453917050691245, + "grad_norm": 0.997320269594231, + "learning_rate": 1.6205066326160605e-06, + "loss": 0.9130781888961792, + "step": 2801 + }, + { + "epoch": 0.6456221198156682, + "grad_norm": 0.8555561883924394, + "learning_rate": 1.620207819615358e-06, + "loss": 0.7140541076660156, + "step": 2802 + }, + { + "epoch": 0.645852534562212, + "grad_norm": 0.8223075667705522, + "learning_rate": 1.6199089165923116e-06, + "loss": 0.8638602495193481, + "step": 2803 + }, + { + "epoch": 0.6460829493087558, + "grad_norm": 0.8487880176317714, + "learning_rate": 1.6196099235903068e-06, + "loss": 0.9055536389350891, + "step": 2804 + }, + { + "epoch": 0.6463133640552995, + "grad_norm": 0.9356547902583738, + "learning_rate": 1.6193108406527416e-06, + "loss": 0.7694590091705322, + "step": 2805 + }, + { + "epoch": 0.6465437788018433, + "grad_norm": 0.9047595380936525, + "learning_rate": 1.619011667823028e-06, + "loss": 0.7512019872665405, + "step": 2806 + }, + { + "epoch": 0.646774193548387, + "grad_norm": 0.8406537006369587, + "learning_rate": 1.6187124051445903e-06, + "loss": 0.6362565159797668, + "step": 2807 + }, + { + "epoch": 0.6470046082949309, + "grad_norm": 1.328031327807814, + "learning_rate": 1.6184130526608656e-06, + "loss": 0.885259747505188, + "step": 2808 + }, + { + "epoch": 0.6472350230414746, + "grad_norm": 0.9445009081248091, + "learning_rate": 1.6181136104153054e-06, + "loss": 0.7868754863739014, + "step": 2809 + }, + { + "epoch": 0.6474654377880185, + "grad_norm": 0.901923102146858, + "learning_rate": 1.6178140784513729e-06, + "loss": 0.889660120010376, + "step": 2810 + }, + { + "epoch": 0.6476958525345622, + "grad_norm": 0.7380215273328754, + "learning_rate": 1.6175144568125444e-06, + "loss": 0.8460343480110168, + "step": 2811 + }, + { + "epoch": 0.647926267281106, + "grad_norm": 0.9963582050847237, + "learning_rate": 1.6172147455423105e-06, + "loss": 0.8729731440544128, + "step": 2812 + }, + { + "epoch": 0.6481566820276498, + "grad_norm": 0.9500689129739934, + "learning_rate": 1.616914944684173e-06, + "loss": 0.7937173843383789, + "step": 2813 + }, + { + "epoch": 0.6483870967741936, + "grad_norm": 1.068299419221943, + "learning_rate": 1.6166150542816483e-06, + "loss": 0.8764641284942627, + "step": 2814 + }, + { + "epoch": 0.6486175115207373, + "grad_norm": 0.8942547003902331, + "learning_rate": 1.6163150743782645e-06, + "loss": 0.8078420758247375, + "step": 2815 + }, + { + "epoch": 0.6488479262672812, + "grad_norm": 0.9410598977678883, + "learning_rate": 1.6160150050175636e-06, + "loss": 0.9124993085861206, + "step": 2816 + }, + { + "epoch": 0.6490783410138249, + "grad_norm": 0.8852573714623596, + "learning_rate": 1.6157148462431003e-06, + "loss": 0.9584136009216309, + "step": 2817 + }, + { + "epoch": 0.6493087557603686, + "grad_norm": 1.0833527157774228, + "learning_rate": 1.6154145980984422e-06, + "loss": 0.8404672145843506, + "step": 2818 + }, + { + "epoch": 0.6495391705069125, + "grad_norm": 0.9498348014278839, + "learning_rate": 1.6151142606271695e-06, + "loss": 0.7928001880645752, + "step": 2819 + }, + { + "epoch": 0.6497695852534562, + "grad_norm": 0.8444903444994009, + "learning_rate": 1.6148138338728766e-06, + "loss": 0.7877479791641235, + "step": 2820 + }, + { + "epoch": 0.65, + "grad_norm": 0.814898961059689, + "learning_rate": 1.6145133178791695e-06, + "loss": 0.9502429366111755, + "step": 2821 + }, + { + "epoch": 0.6502304147465438, + "grad_norm": 0.791549779828082, + "learning_rate": 1.6142127126896679e-06, + "loss": 0.7866412401199341, + "step": 2822 + }, + { + "epoch": 0.6504608294930876, + "grad_norm": 0.7841896313928699, + "learning_rate": 1.613912018348004e-06, + "loss": 0.8315345644950867, + "step": 2823 + }, + { + "epoch": 0.6506912442396313, + "grad_norm": 0.6841019539216254, + "learning_rate": 1.6136112348978236e-06, + "loss": 0.9718044400215149, + "step": 2824 + }, + { + "epoch": 0.6509216589861752, + "grad_norm": 0.6502753552916141, + "learning_rate": 1.6133103623827843e-06, + "loss": 0.5874941349029541, + "step": 2825 + }, + { + "epoch": 0.6511520737327189, + "grad_norm": 0.8954999916723304, + "learning_rate": 1.613009400846558e-06, + "loss": 0.9498391151428223, + "step": 2826 + }, + { + "epoch": 0.6513824884792627, + "grad_norm": 0.9527387242959447, + "learning_rate": 1.612708350332829e-06, + "loss": 0.858715295791626, + "step": 2827 + }, + { + "epoch": 0.6516129032258065, + "grad_norm": 0.7771583744459308, + "learning_rate": 1.6124072108852938e-06, + "loss": 0.8618113994598389, + "step": 2828 + }, + { + "epoch": 0.6518433179723502, + "grad_norm": 0.7504136233680345, + "learning_rate": 1.6121059825476628e-06, + "loss": 0.8024446964263916, + "step": 2829 + }, + { + "epoch": 0.652073732718894, + "grad_norm": 0.8461077162414828, + "learning_rate": 1.6118046653636586e-06, + "loss": 0.8021122813224792, + "step": 2830 + }, + { + "epoch": 0.6523041474654377, + "grad_norm": 0.8330044091738112, + "learning_rate": 1.6115032593770176e-06, + "loss": 0.8092107772827148, + "step": 2831 + }, + { + "epoch": 0.6525345622119816, + "grad_norm": 0.8480183578387018, + "learning_rate": 1.6112017646314872e-06, + "loss": 0.9842641353607178, + "step": 2832 + }, + { + "epoch": 0.6527649769585253, + "grad_norm": 0.8051494817524167, + "learning_rate": 1.6109001811708305e-06, + "loss": 0.744353175163269, + "step": 2833 + }, + { + "epoch": 0.6529953917050692, + "grad_norm": 1.0610555371871784, + "learning_rate": 1.6105985090388209e-06, + "loss": 0.7089616060256958, + "step": 2834 + }, + { + "epoch": 0.6532258064516129, + "grad_norm": 0.9119028582239228, + "learning_rate": 1.610296748279246e-06, + "loss": 0.9043736457824707, + "step": 2835 + }, + { + "epoch": 0.6534562211981567, + "grad_norm": 1.0078987757698072, + "learning_rate": 1.6099948989359061e-06, + "loss": 0.9170948266983032, + "step": 2836 + }, + { + "epoch": 0.6536866359447004, + "grad_norm": 0.9289963097672949, + "learning_rate": 1.6096929610526145e-06, + "loss": 0.8275802135467529, + "step": 2837 + }, + { + "epoch": 0.6539170506912443, + "grad_norm": 0.9146670757237039, + "learning_rate": 1.6093909346731965e-06, + "loss": 0.9180251955986023, + "step": 2838 + }, + { + "epoch": 0.654147465437788, + "grad_norm": 0.708269208459363, + "learning_rate": 1.6090888198414908e-06, + "loss": 0.8041235208511353, + "step": 2839 + }, + { + "epoch": 0.6543778801843319, + "grad_norm": 0.9431191202102605, + "learning_rate": 1.6087866166013492e-06, + "loss": 0.7833176851272583, + "step": 2840 + }, + { + "epoch": 0.6546082949308756, + "grad_norm": 0.8680924352570318, + "learning_rate": 1.6084843249966364e-06, + "loss": 0.838886022567749, + "step": 2841 + }, + { + "epoch": 0.6548387096774193, + "grad_norm": 0.8317233103954151, + "learning_rate": 1.6081819450712293e-06, + "loss": 0.837687611579895, + "step": 2842 + }, + { + "epoch": 0.6550691244239631, + "grad_norm": 0.8737630969117387, + "learning_rate": 1.607879476869018e-06, + "loss": 0.6572843790054321, + "step": 2843 + }, + { + "epoch": 0.6552995391705069, + "grad_norm": 0.8513917948170456, + "learning_rate": 1.6075769204339053e-06, + "loss": 0.7698653936386108, + "step": 2844 + }, + { + "epoch": 0.6555299539170507, + "grad_norm": 0.9469558820500475, + "learning_rate": 1.607274275809807e-06, + "loss": 0.8639169335365295, + "step": 2845 + }, + { + "epoch": 0.6557603686635944, + "grad_norm": 0.8250799867539951, + "learning_rate": 1.6069715430406517e-06, + "loss": 0.837492823600769, + "step": 2846 + }, + { + "epoch": 0.6559907834101383, + "grad_norm": 0.9277000604833184, + "learning_rate": 1.6066687221703803e-06, + "loss": 0.8824087381362915, + "step": 2847 + }, + { + "epoch": 0.656221198156682, + "grad_norm": 0.9304701724719217, + "learning_rate": 1.6063658132429468e-06, + "loss": 0.8161731958389282, + "step": 2848 + }, + { + "epoch": 0.6564516129032258, + "grad_norm": 0.7988044282931124, + "learning_rate": 1.6060628163023183e-06, + "loss": 0.8365877270698547, + "step": 2849 + }, + { + "epoch": 0.6566820276497696, + "grad_norm": 0.8477393490951164, + "learning_rate": 1.6057597313924745e-06, + "loss": 0.877829909324646, + "step": 2850 + }, + { + "epoch": 0.6569124423963134, + "grad_norm": 0.857078285622655, + "learning_rate": 1.6054565585574075e-06, + "loss": 0.756903886795044, + "step": 2851 + }, + { + "epoch": 0.6571428571428571, + "grad_norm": 1.0124401818225557, + "learning_rate": 1.6051532978411223e-06, + "loss": 0.7777276039123535, + "step": 2852 + }, + { + "epoch": 0.6573732718894009, + "grad_norm": 0.9464152715401636, + "learning_rate": 1.6048499492876375e-06, + "loss": 0.9191532135009766, + "step": 2853 + }, + { + "epoch": 0.6576036866359447, + "grad_norm": 0.7885787618366824, + "learning_rate": 1.6045465129409829e-06, + "loss": 0.7693309783935547, + "step": 2854 + }, + { + "epoch": 0.6578341013824884, + "grad_norm": 0.8787314035574895, + "learning_rate": 1.6042429888452024e-06, + "loss": 0.7865023612976074, + "step": 2855 + }, + { + "epoch": 0.6580645161290323, + "grad_norm": 0.8588996745183644, + "learning_rate": 1.6039393770443521e-06, + "loss": 0.844336748123169, + "step": 2856 + }, + { + "epoch": 0.658294930875576, + "grad_norm": 0.9455502994869639, + "learning_rate": 1.6036356775825009e-06, + "loss": 0.9590705633163452, + "step": 2857 + }, + { + "epoch": 0.6585253456221198, + "grad_norm": 0.904582718768817, + "learning_rate": 1.6033318905037297e-06, + "loss": 0.8687748312950134, + "step": 2858 + }, + { + "epoch": 0.6587557603686636, + "grad_norm": 0.8848681311153475, + "learning_rate": 1.6030280158521336e-06, + "loss": 0.8669745922088623, + "step": 2859 + }, + { + "epoch": 0.6589861751152074, + "grad_norm": 0.8829211466390271, + "learning_rate": 1.6027240536718191e-06, + "loss": 0.6929436922073364, + "step": 2860 + }, + { + "epoch": 0.6592165898617511, + "grad_norm": 0.9047325967091919, + "learning_rate": 1.6024200040069065e-06, + "loss": 0.6965433359146118, + "step": 2861 + }, + { + "epoch": 0.659447004608295, + "grad_norm": 0.9743729570848424, + "learning_rate": 1.6021158669015273e-06, + "loss": 0.780353307723999, + "step": 2862 + }, + { + "epoch": 0.6596774193548387, + "grad_norm": 0.7726382879850381, + "learning_rate": 1.6018116423998277e-06, + "loss": 0.685762882232666, + "step": 2863 + }, + { + "epoch": 0.6599078341013825, + "grad_norm": 0.8607619933867399, + "learning_rate": 1.6015073305459646e-06, + "loss": 0.8249918222427368, + "step": 2864 + }, + { + "epoch": 0.6601382488479263, + "grad_norm": 0.7388237148259402, + "learning_rate": 1.6012029313841086e-06, + "loss": 0.7327184677124023, + "step": 2865 + }, + { + "epoch": 0.66036866359447, + "grad_norm": 0.9554378042614118, + "learning_rate": 1.6008984449584433e-06, + "loss": 0.7785891890525818, + "step": 2866 + }, + { + "epoch": 0.6605990783410138, + "grad_norm": 0.7196967379779726, + "learning_rate": 1.600593871313164e-06, + "loss": 0.7307751178741455, + "step": 2867 + }, + { + "epoch": 0.6608294930875576, + "grad_norm": 1.2601680054093507, + "learning_rate": 1.6002892104924796e-06, + "loss": 0.8802257180213928, + "step": 2868 + }, + { + "epoch": 0.6610599078341014, + "grad_norm": 1.0302753711943056, + "learning_rate": 1.5999844625406106e-06, + "loss": 0.8699140548706055, + "step": 2869 + }, + { + "epoch": 0.6612903225806451, + "grad_norm": 0.8146336951608913, + "learning_rate": 1.5996796275017914e-06, + "loss": 0.6453604102134705, + "step": 2870 + }, + { + "epoch": 0.661520737327189, + "grad_norm": 0.807532897551279, + "learning_rate": 1.5993747054202682e-06, + "loss": 0.7319324016571045, + "step": 2871 + }, + { + "epoch": 0.6617511520737327, + "grad_norm": 0.9337023535064233, + "learning_rate": 1.5990696963402998e-06, + "loss": 0.8357574343681335, + "step": 2872 + }, + { + "epoch": 0.6619815668202765, + "grad_norm": 0.854915024221744, + "learning_rate": 1.5987646003061581e-06, + "loss": 0.7647984027862549, + "step": 2873 + }, + { + "epoch": 0.6622119815668203, + "grad_norm": 1.0099884737934117, + "learning_rate": 1.5984594173621274e-06, + "loss": 0.8542075753211975, + "step": 2874 + }, + { + "epoch": 0.6624423963133641, + "grad_norm": 0.9685596460194386, + "learning_rate": 1.5981541475525044e-06, + "loss": 0.7689328193664551, + "step": 2875 + }, + { + "epoch": 0.6626728110599078, + "grad_norm": 0.8183777315007433, + "learning_rate": 1.5978487909215987e-06, + "loss": 0.7459174990653992, + "step": 2876 + }, + { + "epoch": 0.6629032258064517, + "grad_norm": 0.8697380019030229, + "learning_rate": 1.5975433475137329e-06, + "loss": 0.8268495202064514, + "step": 2877 + }, + { + "epoch": 0.6631336405529954, + "grad_norm": 0.9013422410425754, + "learning_rate": 1.5972378173732406e-06, + "loss": 0.8254266977310181, + "step": 2878 + }, + { + "epoch": 0.6633640552995391, + "grad_norm": 1.0427681980244552, + "learning_rate": 1.59693220054447e-06, + "loss": 0.8552727103233337, + "step": 2879 + }, + { + "epoch": 0.663594470046083, + "grad_norm": 0.7469699255899254, + "learning_rate": 1.596626497071781e-06, + "loss": 0.7196269035339355, + "step": 2880 + }, + { + "epoch": 0.6638248847926267, + "grad_norm": 0.9146202447996906, + "learning_rate": 1.5963207069995455e-06, + "loss": 0.815540075302124, + "step": 2881 + }, + { + "epoch": 0.6640552995391705, + "grad_norm": 0.8585411055523222, + "learning_rate": 1.596014830372149e-06, + "loss": 0.8040128350257874, + "step": 2882 + }, + { + "epoch": 0.6642857142857143, + "grad_norm": 0.8592608746136836, + "learning_rate": 1.5957088672339887e-06, + "loss": 0.7990812659263611, + "step": 2883 + }, + { + "epoch": 0.6645161290322581, + "grad_norm": 0.9139395957334936, + "learning_rate": 1.5954028176294746e-06, + "loss": 0.956179141998291, + "step": 2884 + }, + { + "epoch": 0.6647465437788018, + "grad_norm": 0.9544806325504157, + "learning_rate": 1.5950966816030304e-06, + "loss": 0.7730144262313843, + "step": 2885 + }, + { + "epoch": 0.6649769585253457, + "grad_norm": 1.0230957824823068, + "learning_rate": 1.5947904591990904e-06, + "loss": 0.902834415435791, + "step": 2886 + }, + { + "epoch": 0.6652073732718894, + "grad_norm": 0.8987169052425068, + "learning_rate": 1.5944841504621027e-06, + "loss": 0.7234599590301514, + "step": 2887 + }, + { + "epoch": 0.6654377880184332, + "grad_norm": 0.9849005395145788, + "learning_rate": 1.5941777554365271e-06, + "loss": 1.0267843008041382, + "step": 2888 + }, + { + "epoch": 0.665668202764977, + "grad_norm": 1.1615941669691254, + "learning_rate": 1.5938712741668376e-06, + "loss": 0.7431002855300903, + "step": 2889 + }, + { + "epoch": 0.6658986175115207, + "grad_norm": 0.8013605201375282, + "learning_rate": 1.5935647066975185e-06, + "loss": 0.7843111753463745, + "step": 2890 + }, + { + "epoch": 0.6661290322580645, + "grad_norm": 0.9498522711625995, + "learning_rate": 1.593258053073068e-06, + "loss": 0.8775256872177124, + "step": 2891 + }, + { + "epoch": 0.6663594470046083, + "grad_norm": 0.8363878343517416, + "learning_rate": 1.5929513133379966e-06, + "loss": 0.7861695289611816, + "step": 2892 + }, + { + "epoch": 0.6665898617511521, + "grad_norm": 1.1446598361432248, + "learning_rate": 1.5926444875368267e-06, + "loss": 0.8721977472305298, + "step": 2893 + }, + { + "epoch": 0.6668202764976958, + "grad_norm": 0.7591669830135314, + "learning_rate": 1.5923375757140941e-06, + "loss": 0.648263692855835, + "step": 2894 + }, + { + "epoch": 0.6670506912442397, + "grad_norm": 0.8984763952333247, + "learning_rate": 1.592030577914347e-06, + "loss": 0.8334729075431824, + "step": 2895 + }, + { + "epoch": 0.6672811059907834, + "grad_norm": 0.7757586607492352, + "learning_rate": 1.591723494182145e-06, + "loss": 0.6105949878692627, + "step": 2896 + }, + { + "epoch": 0.6675115207373272, + "grad_norm": 0.8562379620561761, + "learning_rate": 1.5914163245620608e-06, + "loss": 0.7895448207855225, + "step": 2897 + }, + { + "epoch": 0.667741935483871, + "grad_norm": 0.9487051467126763, + "learning_rate": 1.5911090690986805e-06, + "loss": 0.8728576302528381, + "step": 2898 + }, + { + "epoch": 0.6679723502304148, + "grad_norm": 0.7480056751597441, + "learning_rate": 1.590801727836601e-06, + "loss": 0.7637856006622314, + "step": 2899 + }, + { + "epoch": 0.6682027649769585, + "grad_norm": 1.0125939986027075, + "learning_rate": 1.590494300820433e-06, + "loss": 0.8988397717475891, + "step": 2900 + }, + { + "epoch": 0.6684331797235024, + "grad_norm": 0.9324485554010499, + "learning_rate": 1.590186788094799e-06, + "loss": 0.7486827373504639, + "step": 2901 + }, + { + "epoch": 0.6686635944700461, + "grad_norm": 0.7629631437151, + "learning_rate": 1.589879189704334e-06, + "loss": 0.8212865591049194, + "step": 2902 + }, + { + "epoch": 0.6688940092165898, + "grad_norm": 0.7640149838894683, + "learning_rate": 1.5895715056936853e-06, + "loss": 0.7421284914016724, + "step": 2903 + }, + { + "epoch": 0.6691244239631337, + "grad_norm": 0.8407199034997399, + "learning_rate": 1.5892637361075132e-06, + "loss": 0.8721676468849182, + "step": 2904 + }, + { + "epoch": 0.6693548387096774, + "grad_norm": 0.9214400782360851, + "learning_rate": 1.58895588099049e-06, + "loss": 0.7265836000442505, + "step": 2905 + }, + { + "epoch": 0.6695852534562212, + "grad_norm": 0.959235173078028, + "learning_rate": 1.5886479403873e-06, + "loss": 0.863615870475769, + "step": 2906 + }, + { + "epoch": 0.669815668202765, + "grad_norm": 0.788219849900096, + "learning_rate": 1.588339914342641e-06, + "loss": 0.8362177610397339, + "step": 2907 + }, + { + "epoch": 0.6700460829493088, + "grad_norm": 1.0142262876785297, + "learning_rate": 1.5880318029012223e-06, + "loss": 0.9076892137527466, + "step": 2908 + }, + { + "epoch": 0.6702764976958525, + "grad_norm": 0.957653217332238, + "learning_rate": 1.5877236061077658e-06, + "loss": 0.9149065017700195, + "step": 2909 + }, + { + "epoch": 0.6705069124423964, + "grad_norm": 0.8820705070600866, + "learning_rate": 1.5874153240070062e-06, + "loss": 0.7761013507843018, + "step": 2910 + }, + { + "epoch": 0.6707373271889401, + "grad_norm": 1.049261864076062, + "learning_rate": 1.5871069566436894e-06, + "loss": 0.8671830892562866, + "step": 2911 + }, + { + "epoch": 0.6709677419354839, + "grad_norm": 0.9461120142941367, + "learning_rate": 1.5867985040625755e-06, + "loss": 0.9433870315551758, + "step": 2912 + }, + { + "epoch": 0.6711981566820276, + "grad_norm": 0.934114103387592, + "learning_rate": 1.5864899663084352e-06, + "loss": 0.8009352684020996, + "step": 2913 + }, + { + "epoch": 0.6714285714285714, + "grad_norm": 0.9285902098427739, + "learning_rate": 1.5861813434260528e-06, + "loss": 0.6813808083534241, + "step": 2914 + }, + { + "epoch": 0.6716589861751152, + "grad_norm": 0.7891360814530397, + "learning_rate": 1.5858726354602248e-06, + "loss": 0.712783932685852, + "step": 2915 + }, + { + "epoch": 0.6718894009216589, + "grad_norm": 0.9971879600214522, + "learning_rate": 1.5855638424557588e-06, + "loss": 0.7871056795120239, + "step": 2916 + }, + { + "epoch": 0.6721198156682028, + "grad_norm": 0.9551471269364743, + "learning_rate": 1.5852549644574766e-06, + "loss": 0.8590981960296631, + "step": 2917 + }, + { + "epoch": 0.6723502304147465, + "grad_norm": 0.9338373296128487, + "learning_rate": 1.584946001510211e-06, + "loss": 0.7952913641929626, + "step": 2918 + }, + { + "epoch": 0.6725806451612903, + "grad_norm": 1.0716689971646949, + "learning_rate": 1.5846369536588078e-06, + "loss": 0.8567384481430054, + "step": 2919 + }, + { + "epoch": 0.6728110599078341, + "grad_norm": 1.0797852963412387, + "learning_rate": 1.5843278209481246e-06, + "loss": 0.859541654586792, + "step": 2920 + }, + { + "epoch": 0.6730414746543779, + "grad_norm": 1.1734504357127358, + "learning_rate": 1.5840186034230318e-06, + "loss": 0.7843801975250244, + "step": 2921 + }, + { + "epoch": 0.6732718894009216, + "grad_norm": 0.7736885985619673, + "learning_rate": 1.5837093011284118e-06, + "loss": 0.7448940277099609, + "step": 2922 + }, + { + "epoch": 0.6735023041474655, + "grad_norm": 1.0803788544256392, + "learning_rate": 1.5833999141091593e-06, + "loss": 0.9325242042541504, + "step": 2923 + }, + { + "epoch": 0.6737327188940092, + "grad_norm": 1.2302390941080075, + "learning_rate": 1.5830904424101816e-06, + "loss": 0.8005647659301758, + "step": 2924 + }, + { + "epoch": 0.673963133640553, + "grad_norm": 0.9271295903754758, + "learning_rate": 1.5827808860763984e-06, + "loss": 0.8897464275360107, + "step": 2925 + }, + { + "epoch": 0.6741935483870968, + "grad_norm": 1.0218758099034497, + "learning_rate": 1.5824712451527409e-06, + "loss": 0.8319039344787598, + "step": 2926 + }, + { + "epoch": 0.6744239631336405, + "grad_norm": 1.0734614103347653, + "learning_rate": 1.5821615196841533e-06, + "loss": 0.7638111114501953, + "step": 2927 + }, + { + "epoch": 0.6746543778801843, + "grad_norm": 0.8552316991076688, + "learning_rate": 1.581851709715592e-06, + "loss": 0.7617092132568359, + "step": 2928 + }, + { + "epoch": 0.6748847926267281, + "grad_norm": 1.0119419737078916, + "learning_rate": 1.581541815292025e-06, + "loss": 0.813319742679596, + "step": 2929 + }, + { + "epoch": 0.6751152073732719, + "grad_norm": 0.8324815306646182, + "learning_rate": 1.5812318364584334e-06, + "loss": 0.7495343089103699, + "step": 2930 + }, + { + "epoch": 0.6753456221198156, + "grad_norm": 1.0070331562925772, + "learning_rate": 1.5809217732598103e-06, + "loss": 0.9064745306968689, + "step": 2931 + }, + { + "epoch": 0.6755760368663595, + "grad_norm": 0.77529378116571, + "learning_rate": 1.580611625741161e-06, + "loss": 0.699098527431488, + "step": 2932 + }, + { + "epoch": 0.6758064516129032, + "grad_norm": 0.9525126023464006, + "learning_rate": 1.5803013939475025e-06, + "loss": 0.9168096780776978, + "step": 2933 + }, + { + "epoch": 0.676036866359447, + "grad_norm": 0.8145178437764095, + "learning_rate": 1.5799910779238652e-06, + "loss": 0.8848644495010376, + "step": 2934 + }, + { + "epoch": 0.6762672811059908, + "grad_norm": 0.8852934324704809, + "learning_rate": 1.5796806777152903e-06, + "loss": 0.7795228958129883, + "step": 2935 + }, + { + "epoch": 0.6764976958525346, + "grad_norm": 0.9901973226971541, + "learning_rate": 1.5793701933668327e-06, + "loss": 0.9287698268890381, + "step": 2936 + }, + { + "epoch": 0.6767281105990783, + "grad_norm": 0.9605403793187631, + "learning_rate": 1.5790596249235587e-06, + "loss": 0.8661396503448486, + "step": 2937 + }, + { + "epoch": 0.6769585253456222, + "grad_norm": 1.0073544692346657, + "learning_rate": 1.5787489724305464e-06, + "loss": 0.7544706463813782, + "step": 2938 + }, + { + "epoch": 0.6771889400921659, + "grad_norm": 1.350397583464208, + "learning_rate": 1.5784382359328872e-06, + "loss": 0.8613651990890503, + "step": 2939 + }, + { + "epoch": 0.6774193548387096, + "grad_norm": 1.0225856960398716, + "learning_rate": 1.5781274154756833e-06, + "loss": 0.8695065975189209, + "step": 2940 + }, + { + "epoch": 0.6776497695852535, + "grad_norm": 1.1450515007973723, + "learning_rate": 1.577816511104051e-06, + "loss": 0.9453287720680237, + "step": 2941 + }, + { + "epoch": 0.6778801843317972, + "grad_norm": 0.7720442193305806, + "learning_rate": 1.577505522863117e-06, + "loss": 0.8599261045455933, + "step": 2942 + }, + { + "epoch": 0.678110599078341, + "grad_norm": 0.8831442525084486, + "learning_rate": 1.5771944507980205e-06, + "loss": 0.8143391609191895, + "step": 2943 + }, + { + "epoch": 0.6783410138248848, + "grad_norm": 0.9328639928073722, + "learning_rate": 1.576883294953914e-06, + "loss": 0.9558438062667847, + "step": 2944 + }, + { + "epoch": 0.6785714285714286, + "grad_norm": 0.6484366074680237, + "learning_rate": 1.5765720553759605e-06, + "loss": 0.7348268628120422, + "step": 2945 + }, + { + "epoch": 0.6788018433179723, + "grad_norm": 1.0387482604326927, + "learning_rate": 1.5762607321093366e-06, + "loss": 0.9361155033111572, + "step": 2946 + }, + { + "epoch": 0.6790322580645162, + "grad_norm": 0.9855095789147831, + "learning_rate": 1.5759493251992303e-06, + "loss": 0.8094985485076904, + "step": 2947 + }, + { + "epoch": 0.6792626728110599, + "grad_norm": 1.631714554631539, + "learning_rate": 1.575637834690842e-06, + "loss": 0.8746658563613892, + "step": 2948 + }, + { + "epoch": 0.6794930875576037, + "grad_norm": 0.9249217331606766, + "learning_rate": 1.575326260629384e-06, + "loss": 0.7433050870895386, + "step": 2949 + }, + { + "epoch": 0.6797235023041475, + "grad_norm": 0.9856239464338491, + "learning_rate": 1.5750146030600808e-06, + "loss": 0.8621053695678711, + "step": 2950 + }, + { + "epoch": 0.6799539170506912, + "grad_norm": 0.9119478915395727, + "learning_rate": 1.5747028620281695e-06, + "loss": 0.7541971206665039, + "step": 2951 + }, + { + "epoch": 0.680184331797235, + "grad_norm": 1.0099311239329205, + "learning_rate": 1.5743910375788982e-06, + "loss": 0.9817987680435181, + "step": 2952 + }, + { + "epoch": 0.6804147465437788, + "grad_norm": 1.046074262522893, + "learning_rate": 1.5740791297575283e-06, + "loss": 0.7763534188270569, + "step": 2953 + }, + { + "epoch": 0.6806451612903226, + "grad_norm": 1.0303747349913415, + "learning_rate": 1.573767138609333e-06, + "loss": 0.7482337355613708, + "step": 2954 + }, + { + "epoch": 0.6808755760368663, + "grad_norm": 1.0308347032013807, + "learning_rate": 1.5734550641795967e-06, + "loss": 0.7352473735809326, + "step": 2955 + }, + { + "epoch": 0.6811059907834102, + "grad_norm": 0.9086715245515472, + "learning_rate": 1.573142906513617e-06, + "loss": 0.8657293319702148, + "step": 2956 + }, + { + "epoch": 0.6813364055299539, + "grad_norm": 0.9597438975913184, + "learning_rate": 1.5728306656567033e-06, + "loss": 0.8035376667976379, + "step": 2957 + }, + { + "epoch": 0.6815668202764977, + "grad_norm": 0.9481340627224691, + "learning_rate": 1.572518341654177e-06, + "loss": 0.8030140399932861, + "step": 2958 + }, + { + "epoch": 0.6817972350230415, + "grad_norm": 0.956950799259568, + "learning_rate": 1.5722059345513711e-06, + "loss": 0.797377347946167, + "step": 2959 + }, + { + "epoch": 0.6820276497695853, + "grad_norm": 0.7086079395333297, + "learning_rate": 1.5718934443936311e-06, + "loss": 0.7041053175926208, + "step": 2960 + }, + { + "epoch": 0.682258064516129, + "grad_norm": 1.0251660128790803, + "learning_rate": 1.571580871226315e-06, + "loss": 0.7911885976791382, + "step": 2961 + }, + { + "epoch": 0.6824884792626729, + "grad_norm": 0.8834527581303466, + "learning_rate": 1.5712682150947922e-06, + "loss": 0.7908599376678467, + "step": 2962 + }, + { + "epoch": 0.6827188940092166, + "grad_norm": 0.8159267525070817, + "learning_rate": 1.5709554760444442e-06, + "loss": 0.860281229019165, + "step": 2963 + }, + { + "epoch": 0.6829493087557603, + "grad_norm": 0.8226887233242035, + "learning_rate": 1.5706426541206645e-06, + "loss": 0.6987707018852234, + "step": 2964 + }, + { + "epoch": 0.6831797235023042, + "grad_norm": 0.8719992040747229, + "learning_rate": 1.5703297493688592e-06, + "loss": 0.7198495864868164, + "step": 2965 + }, + { + "epoch": 0.6834101382488479, + "grad_norm": 1.1775957395401402, + "learning_rate": 1.5700167618344455e-06, + "loss": 0.8232598304748535, + "step": 2966 + }, + { + "epoch": 0.6836405529953917, + "grad_norm": 0.8962037845514019, + "learning_rate": 1.569703691562854e-06, + "loss": 0.8425456285476685, + "step": 2967 + }, + { + "epoch": 0.6838709677419355, + "grad_norm": 0.8746880672166448, + "learning_rate": 1.5693905385995252e-06, + "loss": 0.7758797407150269, + "step": 2968 + }, + { + "epoch": 0.6841013824884793, + "grad_norm": 0.9739325658587258, + "learning_rate": 1.569077302989914e-06, + "loss": 0.7478910684585571, + "step": 2969 + }, + { + "epoch": 0.684331797235023, + "grad_norm": 0.88099670074057, + "learning_rate": 1.5687639847794854e-06, + "loss": 0.8274309635162354, + "step": 2970 + }, + { + "epoch": 0.6845622119815669, + "grad_norm": 0.9125307567181903, + "learning_rate": 1.5684505840137173e-06, + "loss": 0.6800183653831482, + "step": 2971 + }, + { + "epoch": 0.6847926267281106, + "grad_norm": 1.1416810893109246, + "learning_rate": 1.5681371007380996e-06, + "loss": 0.7768006324768066, + "step": 2972 + }, + { + "epoch": 0.6850230414746544, + "grad_norm": 0.8308804334079786, + "learning_rate": 1.5678235349981338e-06, + "loss": 0.7462732195854187, + "step": 2973 + }, + { + "epoch": 0.6852534562211982, + "grad_norm": 0.935725297382271, + "learning_rate": 1.5675098868393335e-06, + "loss": 0.8461781144142151, + "step": 2974 + }, + { + "epoch": 0.6854838709677419, + "grad_norm": 0.9717984846524689, + "learning_rate": 1.5671961563072244e-06, + "loss": 0.7968491911888123, + "step": 2975 + }, + { + "epoch": 0.6857142857142857, + "grad_norm": 0.9710985084042064, + "learning_rate": 1.5668823434473443e-06, + "loss": 0.805394172668457, + "step": 2976 + }, + { + "epoch": 0.6859447004608294, + "grad_norm": 0.9297793560483373, + "learning_rate": 1.5665684483052424e-06, + "loss": 0.7241736650466919, + "step": 2977 + }, + { + "epoch": 0.6861751152073733, + "grad_norm": 0.9673260038513803, + "learning_rate": 1.5662544709264801e-06, + "loss": 0.7345866560935974, + "step": 2978 + }, + { + "epoch": 0.686405529953917, + "grad_norm": 0.8604134561659843, + "learning_rate": 1.5659404113566312e-06, + "loss": 0.7605085372924805, + "step": 2979 + }, + { + "epoch": 0.6866359447004609, + "grad_norm": 0.9618303204830516, + "learning_rate": 1.5656262696412808e-06, + "loss": 0.8555188179016113, + "step": 2980 + }, + { + "epoch": 0.6868663594470046, + "grad_norm": 0.8604009092225049, + "learning_rate": 1.5653120458260261e-06, + "loss": 0.7139542698860168, + "step": 2981 + }, + { + "epoch": 0.6870967741935484, + "grad_norm": 0.9290410772154322, + "learning_rate": 1.564997739956476e-06, + "loss": 0.8676587343215942, + "step": 2982 + }, + { + "epoch": 0.6873271889400921, + "grad_norm": 0.9524807718966832, + "learning_rate": 1.5646833520782523e-06, + "loss": 0.8121025562286377, + "step": 2983 + }, + { + "epoch": 0.687557603686636, + "grad_norm": 0.7889521702672326, + "learning_rate": 1.5643688822369873e-06, + "loss": 0.7757136821746826, + "step": 2984 + }, + { + "epoch": 0.6877880184331797, + "grad_norm": 0.8884194014759353, + "learning_rate": 1.5640543304783264e-06, + "loss": 0.8357381820678711, + "step": 2985 + }, + { + "epoch": 0.6880184331797236, + "grad_norm": 0.9725078170053829, + "learning_rate": 1.563739696847926e-06, + "loss": 0.8635811805725098, + "step": 2986 + }, + { + "epoch": 0.6882488479262673, + "grad_norm": 0.9539959391598165, + "learning_rate": 1.563424981391455e-06, + "loss": 0.90900057554245, + "step": 2987 + }, + { + "epoch": 0.688479262672811, + "grad_norm": 1.056070683011334, + "learning_rate": 1.563110184154594e-06, + "loss": 0.9001314043998718, + "step": 2988 + }, + { + "epoch": 0.6887096774193548, + "grad_norm": 0.7893194308475292, + "learning_rate": 1.5627953051830353e-06, + "loss": 0.7482000589370728, + "step": 2989 + }, + { + "epoch": 0.6889400921658986, + "grad_norm": 1.0183435769639337, + "learning_rate": 1.5624803445224829e-06, + "loss": 0.8504235744476318, + "step": 2990 + }, + { + "epoch": 0.6891705069124424, + "grad_norm": 0.9687684393899343, + "learning_rate": 1.5621653022186526e-06, + "loss": 0.7887089252471924, + "step": 2991 + }, + { + "epoch": 0.6894009216589861, + "grad_norm": 0.9412995775666883, + "learning_rate": 1.5618501783172735e-06, + "loss": 0.8745719790458679, + "step": 2992 + }, + { + "epoch": 0.68963133640553, + "grad_norm": 0.8960957701589951, + "learning_rate": 1.5615349728640848e-06, + "loss": 0.8269633054733276, + "step": 2993 + }, + { + "epoch": 0.6898617511520737, + "grad_norm": 0.802430248071724, + "learning_rate": 1.5612196859048382e-06, + "loss": 0.7355072498321533, + "step": 2994 + }, + { + "epoch": 0.6900921658986175, + "grad_norm": 0.9768940563158048, + "learning_rate": 1.5609043174852966e-06, + "loss": 0.857653021812439, + "step": 2995 + }, + { + "epoch": 0.6903225806451613, + "grad_norm": 1.0766498115550724, + "learning_rate": 1.5605888676512365e-06, + "loss": 0.8575785160064697, + "step": 2996 + }, + { + "epoch": 0.6905529953917051, + "grad_norm": 0.8803208034747956, + "learning_rate": 1.560273336448444e-06, + "loss": 0.8631561994552612, + "step": 2997 + }, + { + "epoch": 0.6907834101382488, + "grad_norm": 1.0014936433552548, + "learning_rate": 1.5599577239227185e-06, + "loss": 0.7993800044059753, + "step": 2998 + }, + { + "epoch": 0.6910138248847926, + "grad_norm": 0.8990076202156756, + "learning_rate": 1.5596420301198707e-06, + "loss": 0.7961007356643677, + "step": 2999 + }, + { + "epoch": 0.6912442396313364, + "grad_norm": 1.0216355950582598, + "learning_rate": 1.5593262550857232e-06, + "loss": 0.7536421418190002, + "step": 3000 + }, + { + "epoch": 0.6914746543778801, + "grad_norm": 0.8348839196110558, + "learning_rate": 1.55901039886611e-06, + "loss": 0.70341956615448, + "step": 3001 + }, + { + "epoch": 0.691705069124424, + "grad_norm": 1.0093771985733984, + "learning_rate": 1.5586944615068776e-06, + "loss": 0.8152127265930176, + "step": 3002 + }, + { + "epoch": 0.6919354838709677, + "grad_norm": 0.9332692294841357, + "learning_rate": 1.5583784430538838e-06, + "loss": 0.6728770732879639, + "step": 3003 + }, + { + "epoch": 0.6921658986175115, + "grad_norm": 1.0871891474224546, + "learning_rate": 1.558062343552998e-06, + "loss": 0.8406884670257568, + "step": 3004 + }, + { + "epoch": 0.6923963133640553, + "grad_norm": 0.8920706269230131, + "learning_rate": 1.5577461630501018e-06, + "loss": 0.766754686832428, + "step": 3005 + }, + { + "epoch": 0.6926267281105991, + "grad_norm": 0.714004026253109, + "learning_rate": 1.5574299015910889e-06, + "loss": 0.7456642389297485, + "step": 3006 + }, + { + "epoch": 0.6928571428571428, + "grad_norm": 0.8290815943958627, + "learning_rate": 1.557113559221863e-06, + "loss": 0.7834097743034363, + "step": 3007 + }, + { + "epoch": 0.6930875576036867, + "grad_norm": 0.91346801287595, + "learning_rate": 1.556797135988342e-06, + "loss": 0.7425946593284607, + "step": 3008 + }, + { + "epoch": 0.6933179723502304, + "grad_norm": 1.0483330104966306, + "learning_rate": 1.5564806319364534e-06, + "loss": 0.7914093732833862, + "step": 3009 + }, + { + "epoch": 0.6935483870967742, + "grad_norm": 0.9665010461345012, + "learning_rate": 1.556164047112138e-06, + "loss": 0.819783091545105, + "step": 3010 + }, + { + "epoch": 0.693778801843318, + "grad_norm": 0.985903986481312, + "learning_rate": 1.5558473815613474e-06, + "loss": 0.7147302627563477, + "step": 3011 + }, + { + "epoch": 0.6940092165898617, + "grad_norm": 1.1240220664371217, + "learning_rate": 1.5555306353300452e-06, + "loss": 0.7247470617294312, + "step": 3012 + }, + { + "epoch": 0.6942396313364055, + "grad_norm": 1.2403633886338306, + "learning_rate": 1.5552138084642067e-06, + "loss": 0.8277294635772705, + "step": 3013 + }, + { + "epoch": 0.6944700460829493, + "grad_norm": 0.9054626931882043, + "learning_rate": 1.554896901009819e-06, + "loss": 0.8014394640922546, + "step": 3014 + }, + { + "epoch": 0.6947004608294931, + "grad_norm": 0.9274937399954835, + "learning_rate": 1.5545799130128808e-06, + "loss": 0.7468869686126709, + "step": 3015 + }, + { + "epoch": 0.6949308755760368, + "grad_norm": 0.8904964499744723, + "learning_rate": 1.554262844519402e-06, + "loss": 0.7854933142662048, + "step": 3016 + }, + { + "epoch": 0.6951612903225807, + "grad_norm": 0.9536718451900233, + "learning_rate": 1.5539456955754053e-06, + "loss": 0.8359543681144714, + "step": 3017 + }, + { + "epoch": 0.6953917050691244, + "grad_norm": 0.8313774511874621, + "learning_rate": 1.5536284662269243e-06, + "loss": 0.7767773866653442, + "step": 3018 + }, + { + "epoch": 0.6956221198156682, + "grad_norm": 0.7370790678700915, + "learning_rate": 1.5533111565200044e-06, + "loss": 0.8388162851333618, + "step": 3019 + }, + { + "epoch": 0.695852534562212, + "grad_norm": 0.9159856551917743, + "learning_rate": 1.5529937665007024e-06, + "loss": 0.7791208028793335, + "step": 3020 + }, + { + "epoch": 0.6960829493087558, + "grad_norm": 0.9740300384215894, + "learning_rate": 1.5526762962150875e-06, + "loss": 0.8662698864936829, + "step": 3021 + }, + { + "epoch": 0.6963133640552995, + "grad_norm": 0.7004253764922403, + "learning_rate": 1.5523587457092394e-06, + "loss": 0.737492024898529, + "step": 3022 + }, + { + "epoch": 0.6965437788018434, + "grad_norm": 1.0408775765092733, + "learning_rate": 1.552041115029251e-06, + "loss": 0.83610999584198, + "step": 3023 + }, + { + "epoch": 0.6967741935483871, + "grad_norm": 1.1134023704947162, + "learning_rate": 1.5517234042212254e-06, + "loss": 0.930977463722229, + "step": 3024 + }, + { + "epoch": 0.6970046082949308, + "grad_norm": 0.8756044667716456, + "learning_rate": 1.551405613331278e-06, + "loss": 0.7587058544158936, + "step": 3025 + }, + { + "epoch": 0.6972350230414747, + "grad_norm": 0.7720525053545241, + "learning_rate": 1.551087742405536e-06, + "loss": 0.7549247741699219, + "step": 3026 + }, + { + "epoch": 0.6974654377880184, + "grad_norm": 0.8108175030001162, + "learning_rate": 1.5507697914901376e-06, + "loss": 0.6906812787055969, + "step": 3027 + }, + { + "epoch": 0.6976958525345622, + "grad_norm": 0.7358502568670926, + "learning_rate": 1.5504517606312332e-06, + "loss": 0.7806124687194824, + "step": 3028 + }, + { + "epoch": 0.697926267281106, + "grad_norm": 0.8191496367359047, + "learning_rate": 1.5501336498749846e-06, + "loss": 0.8091036081314087, + "step": 3029 + }, + { + "epoch": 0.6981566820276498, + "grad_norm": 0.923718506351422, + "learning_rate": 1.5498154592675646e-06, + "loss": 0.721937894821167, + "step": 3030 + }, + { + "epoch": 0.6983870967741935, + "grad_norm": 0.729194360630959, + "learning_rate": 1.5494971888551587e-06, + "loss": 0.712378740310669, + "step": 3031 + }, + { + "epoch": 0.6986175115207374, + "grad_norm": 0.9809936276606201, + "learning_rate": 1.5491788386839635e-06, + "loss": 0.8106495141983032, + "step": 3032 + }, + { + "epoch": 0.6988479262672811, + "grad_norm": 1.0550994014291641, + "learning_rate": 1.5488604088001866e-06, + "loss": 0.7886521816253662, + "step": 3033 + }, + { + "epoch": 0.6990783410138249, + "grad_norm": 0.9413909460240358, + "learning_rate": 1.5485418992500479e-06, + "loss": 0.7483402490615845, + "step": 3034 + }, + { + "epoch": 0.6993087557603687, + "grad_norm": 0.9735513924670123, + "learning_rate": 1.5482233100797788e-06, + "loss": 0.6236725449562073, + "step": 3035 + }, + { + "epoch": 0.6995391705069124, + "grad_norm": 1.023064942988146, + "learning_rate": 1.5479046413356222e-06, + "loss": 0.9477910995483398, + "step": 3036 + }, + { + "epoch": 0.6997695852534562, + "grad_norm": 1.0993186685690193, + "learning_rate": 1.5475858930638322e-06, + "loss": 0.8921213746070862, + "step": 3037 + }, + { + "epoch": 0.7, + "grad_norm": 0.7179145673247356, + "learning_rate": 1.5472670653106744e-06, + "loss": 0.7460963726043701, + "step": 3038 + }, + { + "epoch": 0.7002304147465438, + "grad_norm": 0.8319225077693166, + "learning_rate": 1.5469481581224271e-06, + "loss": 0.6135849356651306, + "step": 3039 + }, + { + "epoch": 0.7004608294930875, + "grad_norm": 0.8739744675210649, + "learning_rate": 1.546629171545378e-06, + "loss": 0.8039313554763794, + "step": 3040 + }, + { + "epoch": 0.7006912442396314, + "grad_norm": 1.2210857419731846, + "learning_rate": 1.5463101056258289e-06, + "loss": 0.8751651048660278, + "step": 3041 + }, + { + "epoch": 0.7009216589861751, + "grad_norm": 0.9070575590392688, + "learning_rate": 1.545990960410091e-06, + "loss": 0.7600879669189453, + "step": 3042 + }, + { + "epoch": 0.7011520737327189, + "grad_norm": 0.9983949583794295, + "learning_rate": 1.545671735944488e-06, + "loss": 0.8118841648101807, + "step": 3043 + }, + { + "epoch": 0.7013824884792627, + "grad_norm": 0.7470799565000998, + "learning_rate": 1.5453524322753546e-06, + "loss": 0.7144184112548828, + "step": 3044 + }, + { + "epoch": 0.7016129032258065, + "grad_norm": 1.149288210915265, + "learning_rate": 1.545033049449038e-06, + "loss": 0.9730075001716614, + "step": 3045 + }, + { + "epoch": 0.7018433179723502, + "grad_norm": 0.9334735321523672, + "learning_rate": 1.5447135875118957e-06, + "loss": 0.6930910348892212, + "step": 3046 + }, + { + "epoch": 0.7020737327188941, + "grad_norm": 1.0190518922073715, + "learning_rate": 1.5443940465102973e-06, + "loss": 0.8517031669616699, + "step": 3047 + }, + { + "epoch": 0.7023041474654378, + "grad_norm": 0.9199109424213672, + "learning_rate": 1.5440744264906237e-06, + "loss": 0.7939779758453369, + "step": 3048 + }, + { + "epoch": 0.7025345622119815, + "grad_norm": 1.0310125567194028, + "learning_rate": 1.5437547274992672e-06, + "loss": 0.8946782350540161, + "step": 3049 + }, + { + "epoch": 0.7027649769585254, + "grad_norm": 1.1682685309372194, + "learning_rate": 1.543434949582632e-06, + "loss": 0.9273954033851624, + "step": 3050 + }, + { + "epoch": 0.7029953917050691, + "grad_norm": 0.8496559046178408, + "learning_rate": 1.5431150927871333e-06, + "loss": 0.7731457352638245, + "step": 3051 + }, + { + "epoch": 0.7032258064516129, + "grad_norm": 0.9900519408386056, + "learning_rate": 1.542795157159198e-06, + "loss": 0.7982608079910278, + "step": 3052 + }, + { + "epoch": 0.7034562211981567, + "grad_norm": 1.0252185126476046, + "learning_rate": 1.542475142745264e-06, + "loss": 0.8422989845275879, + "step": 3053 + }, + { + "epoch": 0.7036866359447005, + "grad_norm": 1.1364598749635721, + "learning_rate": 1.542155049591781e-06, + "loss": 0.8344876766204834, + "step": 3054 + }, + { + "epoch": 0.7039170506912442, + "grad_norm": 1.3240029855230715, + "learning_rate": 1.541834877745211e-06, + "loss": 0.8830629587173462, + "step": 3055 + }, + { + "epoch": 0.7041474654377881, + "grad_norm": 0.8841605120149971, + "learning_rate": 1.5415146272520247e-06, + "loss": 0.823864221572876, + "step": 3056 + }, + { + "epoch": 0.7043778801843318, + "grad_norm": 1.226256029650695, + "learning_rate": 1.5411942981587077e-06, + "loss": 0.8577016592025757, + "step": 3057 + }, + { + "epoch": 0.7046082949308756, + "grad_norm": 0.9938154526101401, + "learning_rate": 1.540873890511755e-06, + "loss": 0.7431750297546387, + "step": 3058 + }, + { + "epoch": 0.7048387096774194, + "grad_norm": 1.3100911793106818, + "learning_rate": 1.5405534043576729e-06, + "loss": 0.8219394683837891, + "step": 3059 + }, + { + "epoch": 0.7050691244239631, + "grad_norm": 0.8179546123014678, + "learning_rate": 1.5402328397429795e-06, + "loss": 0.706437349319458, + "step": 3060 + }, + { + "epoch": 0.7052995391705069, + "grad_norm": 0.9400567182130463, + "learning_rate": 1.5399121967142051e-06, + "loss": 0.8669443130493164, + "step": 3061 + }, + { + "epoch": 0.7055299539170506, + "grad_norm": 0.9808762608140087, + "learning_rate": 1.5395914753178897e-06, + "loss": 0.7995564937591553, + "step": 3062 + }, + { + "epoch": 0.7057603686635945, + "grad_norm": 1.0691077372052262, + "learning_rate": 1.5392706756005862e-06, + "loss": 0.7840889692306519, + "step": 3063 + }, + { + "epoch": 0.7059907834101382, + "grad_norm": 0.9593102373354429, + "learning_rate": 1.5389497976088582e-06, + "loss": 0.8231604695320129, + "step": 3064 + }, + { + "epoch": 0.706221198156682, + "grad_norm": 1.0423471516482703, + "learning_rate": 1.5386288413892801e-06, + "loss": 0.7821571826934814, + "step": 3065 + }, + { + "epoch": 0.7064516129032258, + "grad_norm": 0.9221304357539406, + "learning_rate": 1.538307806988439e-06, + "loss": 0.736830472946167, + "step": 3066 + }, + { + "epoch": 0.7066820276497696, + "grad_norm": 0.8124713959576904, + "learning_rate": 1.537986694452932e-06, + "loss": 0.7783113718032837, + "step": 3067 + }, + { + "epoch": 0.7069124423963133, + "grad_norm": 0.8679700879266566, + "learning_rate": 1.5376655038293692e-06, + "loss": 0.8000421524047852, + "step": 3068 + }, + { + "epoch": 0.7071428571428572, + "grad_norm": 0.8513728527683974, + "learning_rate": 1.5373442351643696e-06, + "loss": 0.7446980476379395, + "step": 3069 + }, + { + "epoch": 0.7073732718894009, + "grad_norm": 0.8188336762916474, + "learning_rate": 1.537022888504566e-06, + "loss": 0.7018321752548218, + "step": 3070 + }, + { + "epoch": 0.7076036866359448, + "grad_norm": 0.8259052522128728, + "learning_rate": 1.5367014638966008e-06, + "loss": 0.6903716325759888, + "step": 3071 + }, + { + "epoch": 0.7078341013824885, + "grad_norm": 1.0909385113291765, + "learning_rate": 1.5363799613871289e-06, + "loss": 0.9635254144668579, + "step": 3072 + }, + { + "epoch": 0.7080645161290322, + "grad_norm": 0.7335179559352851, + "learning_rate": 1.5360583810228156e-06, + "loss": 0.8612154722213745, + "step": 3073 + }, + { + "epoch": 0.708294930875576, + "grad_norm": 0.9395034612023028, + "learning_rate": 1.5357367228503376e-06, + "loss": 0.8632407784461975, + "step": 3074 + }, + { + "epoch": 0.7085253456221198, + "grad_norm": 0.9383639731759232, + "learning_rate": 1.5354149869163839e-06, + "loss": 0.8117856979370117, + "step": 3075 + }, + { + "epoch": 0.7087557603686636, + "grad_norm": 0.9770895875008837, + "learning_rate": 1.5350931732676538e-06, + "loss": 0.8062559366226196, + "step": 3076 + }, + { + "epoch": 0.7089861751152073, + "grad_norm": 0.9191794034062433, + "learning_rate": 1.5347712819508576e-06, + "loss": 0.7918965816497803, + "step": 3077 + }, + { + "epoch": 0.7092165898617512, + "grad_norm": 0.7897301018455927, + "learning_rate": 1.534449313012718e-06, + "loss": 0.7564986944198608, + "step": 3078 + }, + { + "epoch": 0.7094470046082949, + "grad_norm": 0.774017262501344, + "learning_rate": 1.534127266499968e-06, + "loss": 0.8261928558349609, + "step": 3079 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.9288792217475005, + "learning_rate": 1.5338051424593524e-06, + "loss": 0.705269455909729, + "step": 3080 + }, + { + "epoch": 0.7099078341013825, + "grad_norm": 0.8500383243043894, + "learning_rate": 1.5334829409376271e-06, + "loss": 0.823144793510437, + "step": 3081 + }, + { + "epoch": 0.7101382488479263, + "grad_norm": 0.7512588375717618, + "learning_rate": 1.5331606619815588e-06, + "loss": 0.7772066593170166, + "step": 3082 + }, + { + "epoch": 0.71036866359447, + "grad_norm": 1.0827682012637947, + "learning_rate": 1.5328383056379265e-06, + "loss": 0.8901097178459167, + "step": 3083 + }, + { + "epoch": 0.7105990783410139, + "grad_norm": 0.9540489638748495, + "learning_rate": 1.5325158719535196e-06, + "loss": 0.8454819917678833, + "step": 3084 + }, + { + "epoch": 0.7108294930875576, + "grad_norm": 0.8879734338037916, + "learning_rate": 1.5321933609751388e-06, + "loss": 0.8444693684577942, + "step": 3085 + }, + { + "epoch": 0.7110599078341013, + "grad_norm": 1.0157021807199436, + "learning_rate": 1.5318707727495964e-06, + "loss": 0.7893826961517334, + "step": 3086 + }, + { + "epoch": 0.7112903225806452, + "grad_norm": 0.9711563338551928, + "learning_rate": 1.531548107323715e-06, + "loss": 0.7536686658859253, + "step": 3087 + }, + { + "epoch": 0.7115207373271889, + "grad_norm": 1.1272305964721914, + "learning_rate": 1.53122536474433e-06, + "loss": 0.8105358481407166, + "step": 3088 + }, + { + "epoch": 0.7117511520737327, + "grad_norm": 0.8430783893005721, + "learning_rate": 1.530902545058286e-06, + "loss": 0.8104212284088135, + "step": 3089 + }, + { + "epoch": 0.7119815668202765, + "grad_norm": 1.1740010494566606, + "learning_rate": 1.5305796483124405e-06, + "loss": 0.7738373279571533, + "step": 3090 + }, + { + "epoch": 0.7122119815668203, + "grad_norm": 0.8346644560955941, + "learning_rate": 1.5302566745536618e-06, + "loss": 0.7583746910095215, + "step": 3091 + }, + { + "epoch": 0.712442396313364, + "grad_norm": 1.0290772907257426, + "learning_rate": 1.5299336238288286e-06, + "loss": 0.8370871543884277, + "step": 3092 + }, + { + "epoch": 0.7126728110599079, + "grad_norm": 0.8908237623549358, + "learning_rate": 1.5296104961848314e-06, + "loss": 0.7833988666534424, + "step": 3093 + }, + { + "epoch": 0.7129032258064516, + "grad_norm": 1.135734716262211, + "learning_rate": 1.5292872916685717e-06, + "loss": 0.8024515509605408, + "step": 3094 + }, + { + "epoch": 0.7131336405529954, + "grad_norm": 0.8156588034123838, + "learning_rate": 1.5289640103269623e-06, + "loss": 0.8044738173484802, + "step": 3095 + }, + { + "epoch": 0.7133640552995392, + "grad_norm": 0.846268334708117, + "learning_rate": 1.5286406522069273e-06, + "loss": 0.7783721685409546, + "step": 3096 + }, + { + "epoch": 0.7135944700460829, + "grad_norm": 0.8004616169511741, + "learning_rate": 1.5283172173554014e-06, + "loss": 0.693443238735199, + "step": 3097 + }, + { + "epoch": 0.7138248847926267, + "grad_norm": 0.9862921565687749, + "learning_rate": 1.527993705819331e-06, + "loss": 0.8142237663269043, + "step": 3098 + }, + { + "epoch": 0.7140552995391705, + "grad_norm": 0.9077662799949481, + "learning_rate": 1.5276701176456726e-06, + "loss": 0.790626049041748, + "step": 3099 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 1.0485200242859731, + "learning_rate": 1.5273464528813953e-06, + "loss": 0.9460805654525757, + "step": 3100 + }, + { + "epoch": 0.714516129032258, + "grad_norm": 0.902776913050398, + "learning_rate": 1.5270227115734789e-06, + "loss": 0.6906337738037109, + "step": 3101 + }, + { + "epoch": 0.7147465437788019, + "grad_norm": 0.8514512995363496, + "learning_rate": 1.526698893768913e-06, + "loss": 0.8828556537628174, + "step": 3102 + }, + { + "epoch": 0.7149769585253456, + "grad_norm": 1.0568586756231748, + "learning_rate": 1.5263749995147004e-06, + "loss": 0.8395771980285645, + "step": 3103 + }, + { + "epoch": 0.7152073732718894, + "grad_norm": 0.814014727084384, + "learning_rate": 1.5260510288578535e-06, + "loss": 0.7103895545005798, + "step": 3104 + }, + { + "epoch": 0.7154377880184332, + "grad_norm": 1.0670304040497072, + "learning_rate": 1.5257269818453956e-06, + "loss": 0.9780298471450806, + "step": 3105 + }, + { + "epoch": 0.715668202764977, + "grad_norm": 0.777700102492748, + "learning_rate": 1.525402858524363e-06, + "loss": 0.8176128268241882, + "step": 3106 + }, + { + "epoch": 0.7158986175115207, + "grad_norm": 0.8127092170976247, + "learning_rate": 1.5250786589418008e-06, + "loss": 0.6766567230224609, + "step": 3107 + }, + { + "epoch": 0.7161290322580646, + "grad_norm": 0.8076252538068988, + "learning_rate": 1.5247543831447662e-06, + "loss": 0.7910950183868408, + "step": 3108 + }, + { + "epoch": 0.7163594470046083, + "grad_norm": 0.76882132080824, + "learning_rate": 1.5244300311803275e-06, + "loss": 0.8444501161575317, + "step": 3109 + }, + { + "epoch": 0.716589861751152, + "grad_norm": 0.9073390489490682, + "learning_rate": 1.5241056030955642e-06, + "loss": 0.7180038690567017, + "step": 3110 + }, + { + "epoch": 0.7168202764976959, + "grad_norm": 0.8535510406326756, + "learning_rate": 1.5237810989375663e-06, + "loss": 0.8563181757926941, + "step": 3111 + }, + { + "epoch": 0.7170506912442396, + "grad_norm": 0.7281554723991874, + "learning_rate": 1.5234565187534353e-06, + "loss": 0.7792840003967285, + "step": 3112 + }, + { + "epoch": 0.7172811059907834, + "grad_norm": 1.2546504724448617, + "learning_rate": 1.5231318625902835e-06, + "loss": 0.8414837121963501, + "step": 3113 + }, + { + "epoch": 0.7175115207373272, + "grad_norm": 0.9151299107605344, + "learning_rate": 1.5228071304952348e-06, + "loss": 0.8549888134002686, + "step": 3114 + }, + { + "epoch": 0.717741935483871, + "grad_norm": 0.8858229770055023, + "learning_rate": 1.5224823225154228e-06, + "loss": 0.7973321676254272, + "step": 3115 + }, + { + "epoch": 0.7179723502304147, + "grad_norm": 0.8923496131316503, + "learning_rate": 1.5221574386979937e-06, + "loss": 0.7328228950500488, + "step": 3116 + }, + { + "epoch": 0.7182027649769586, + "grad_norm": 0.8315355877258431, + "learning_rate": 1.5218324790901033e-06, + "loss": 0.8953883051872253, + "step": 3117 + }, + { + "epoch": 0.7184331797235023, + "grad_norm": 0.8252416441396693, + "learning_rate": 1.5215074437389195e-06, + "loss": 0.7804527282714844, + "step": 3118 + }, + { + "epoch": 0.7186635944700461, + "grad_norm": 1.0592650685202745, + "learning_rate": 1.5211823326916204e-06, + "loss": 0.7581363320350647, + "step": 3119 + }, + { + "epoch": 0.7188940092165899, + "grad_norm": 0.9812896234713268, + "learning_rate": 1.520857145995396e-06, + "loss": 0.7720214128494263, + "step": 3120 + }, + { + "epoch": 0.7191244239631336, + "grad_norm": 0.8448153689850479, + "learning_rate": 1.5205318836974463e-06, + "loss": 0.7142826914787292, + "step": 3121 + }, + { + "epoch": 0.7193548387096774, + "grad_norm": 1.0627992363231917, + "learning_rate": 1.520206545844983e-06, + "loss": 0.715612530708313, + "step": 3122 + }, + { + "epoch": 0.7195852534562212, + "grad_norm": 1.1048993433011334, + "learning_rate": 1.5198811324852277e-06, + "loss": 0.8851219415664673, + "step": 3123 + }, + { + "epoch": 0.719815668202765, + "grad_norm": 0.9292687584217408, + "learning_rate": 1.5195556436654146e-06, + "loss": 0.981631875038147, + "step": 3124 + }, + { + "epoch": 0.7200460829493087, + "grad_norm": 1.043088312445038, + "learning_rate": 1.5192300794327876e-06, + "loss": 0.8586313724517822, + "step": 3125 + }, + { + "epoch": 0.7202764976958526, + "grad_norm": 1.082548105463139, + "learning_rate": 1.518904439834602e-06, + "loss": 0.8863250017166138, + "step": 3126 + }, + { + "epoch": 0.7205069124423963, + "grad_norm": 0.8136107336174612, + "learning_rate": 1.5185787249181239e-06, + "loss": 0.864910900592804, + "step": 3127 + }, + { + "epoch": 0.7207373271889401, + "grad_norm": 0.9898417106954193, + "learning_rate": 1.5182529347306302e-06, + "loss": 0.8120951652526855, + "step": 3128 + }, + { + "epoch": 0.7209677419354839, + "grad_norm": 1.008844559262399, + "learning_rate": 1.517927069319409e-06, + "loss": 0.7866026163101196, + "step": 3129 + }, + { + "epoch": 0.7211981566820277, + "grad_norm": 0.9577789377394936, + "learning_rate": 1.5176011287317598e-06, + "loss": 0.8610655069351196, + "step": 3130 + }, + { + "epoch": 0.7214285714285714, + "grad_norm": 0.8861108738387133, + "learning_rate": 1.5172751130149915e-06, + "loss": 0.7463846206665039, + "step": 3131 + }, + { + "epoch": 0.7216589861751153, + "grad_norm": 0.7361410685782023, + "learning_rate": 1.5169490222164254e-06, + "loss": 0.6578936576843262, + "step": 3132 + }, + { + "epoch": 0.721889400921659, + "grad_norm": 0.9361369886672088, + "learning_rate": 1.516622856383393e-06, + "loss": 0.6849668025970459, + "step": 3133 + }, + { + "epoch": 0.7221198156682027, + "grad_norm": 1.0686822202217916, + "learning_rate": 1.5162966155632372e-06, + "loss": 0.9549611806869507, + "step": 3134 + }, + { + "epoch": 0.7223502304147466, + "grad_norm": 0.9063080856885865, + "learning_rate": 1.5159702998033113e-06, + "loss": 0.8005616664886475, + "step": 3135 + }, + { + "epoch": 0.7225806451612903, + "grad_norm": 1.089721709643384, + "learning_rate": 1.5156439091509793e-06, + "loss": 0.8980830311775208, + "step": 3136 + }, + { + "epoch": 0.7228110599078341, + "grad_norm": 1.012161312959267, + "learning_rate": 1.5153174436536166e-06, + "loss": 0.8247464895248413, + "step": 3137 + }, + { + "epoch": 0.7230414746543778, + "grad_norm": 0.9582357561913161, + "learning_rate": 1.5149909033586088e-06, + "loss": 0.818629264831543, + "step": 3138 + }, + { + "epoch": 0.7232718894009217, + "grad_norm": 0.7730251673290138, + "learning_rate": 1.5146642883133532e-06, + "loss": 0.8928704261779785, + "step": 3139 + }, + { + "epoch": 0.7235023041474654, + "grad_norm": 1.199560365249708, + "learning_rate": 1.5143375985652576e-06, + "loss": 0.9330282807350159, + "step": 3140 + }, + { + "epoch": 0.7237327188940093, + "grad_norm": 0.9749101527395967, + "learning_rate": 1.5140108341617405e-06, + "loss": 0.7961822748184204, + "step": 3141 + }, + { + "epoch": 0.723963133640553, + "grad_norm": 0.9244859383947029, + "learning_rate": 1.513683995150231e-06, + "loss": 0.8073769807815552, + "step": 3142 + }, + { + "epoch": 0.7241935483870968, + "grad_norm": 1.0469784848396728, + "learning_rate": 1.51335708157817e-06, + "loss": 0.946292519569397, + "step": 3143 + }, + { + "epoch": 0.7244239631336405, + "grad_norm": 0.8214787899217685, + "learning_rate": 1.513030093493008e-06, + "loss": 0.806084156036377, + "step": 3144 + }, + { + "epoch": 0.7246543778801844, + "grad_norm": 0.9086362129225068, + "learning_rate": 1.5127030309422072e-06, + "loss": 0.8804534673690796, + "step": 3145 + }, + { + "epoch": 0.7248847926267281, + "grad_norm": 0.973773267534968, + "learning_rate": 1.51237589397324e-06, + "loss": 0.7489848136901855, + "step": 3146 + }, + { + "epoch": 0.7251152073732718, + "grad_norm": 1.047973105384132, + "learning_rate": 1.5120486826335905e-06, + "loss": 0.875586986541748, + "step": 3147 + }, + { + "epoch": 0.7253456221198157, + "grad_norm": 0.8473382638758681, + "learning_rate": 1.5117213969707522e-06, + "loss": 0.8334758281707764, + "step": 3148 + }, + { + "epoch": 0.7255760368663594, + "grad_norm": 0.8693445792084491, + "learning_rate": 1.5113940370322306e-06, + "loss": 0.8010859489440918, + "step": 3149 + }, + { + "epoch": 0.7258064516129032, + "grad_norm": 0.8638975130346471, + "learning_rate": 1.5110666028655417e-06, + "loss": 0.7907547950744629, + "step": 3150 + }, + { + "epoch": 0.726036866359447, + "grad_norm": 0.9542895726151109, + "learning_rate": 1.5107390945182117e-06, + "loss": 0.8922848105430603, + "step": 3151 + }, + { + "epoch": 0.7262672811059908, + "grad_norm": 0.7865624103758176, + "learning_rate": 1.5104115120377783e-06, + "loss": 0.7418628931045532, + "step": 3152 + }, + { + "epoch": 0.7264976958525345, + "grad_norm": 1.0285540479216404, + "learning_rate": 1.51008385547179e-06, + "loss": 0.9063338041305542, + "step": 3153 + }, + { + "epoch": 0.7267281105990784, + "grad_norm": 1.0080575916686718, + "learning_rate": 1.5097561248678047e-06, + "loss": 0.8718822002410889, + "step": 3154 + }, + { + "epoch": 0.7269585253456221, + "grad_norm": 1.0055226715830414, + "learning_rate": 1.5094283202733934e-06, + "loss": 0.950742244720459, + "step": 3155 + }, + { + "epoch": 0.727188940092166, + "grad_norm": 1.126636802719941, + "learning_rate": 1.5091004417361353e-06, + "loss": 0.7963443994522095, + "step": 3156 + }, + { + "epoch": 0.7274193548387097, + "grad_norm": 1.0644638923319971, + "learning_rate": 1.5087724893036225e-06, + "loss": 0.8428621888160706, + "step": 3157 + }, + { + "epoch": 0.7276497695852534, + "grad_norm": 1.0421355661787988, + "learning_rate": 1.508444463023456e-06, + "loss": 0.8271539211273193, + "step": 3158 + }, + { + "epoch": 0.7278801843317972, + "grad_norm": 0.7345991655152693, + "learning_rate": 1.508116362943249e-06, + "loss": 0.7899917364120483, + "step": 3159 + }, + { + "epoch": 0.728110599078341, + "grad_norm": 1.1916065857121023, + "learning_rate": 1.5077881891106246e-06, + "loss": 0.8734809160232544, + "step": 3160 + }, + { + "epoch": 0.7283410138248848, + "grad_norm": 1.0138536766133128, + "learning_rate": 1.5074599415732164e-06, + "loss": 0.7740491628646851, + "step": 3161 + }, + { + "epoch": 0.7285714285714285, + "grad_norm": 0.8952462084516831, + "learning_rate": 1.5071316203786698e-06, + "loss": 0.7219515442848206, + "step": 3162 + }, + { + "epoch": 0.7288018433179724, + "grad_norm": 0.7779518912065628, + "learning_rate": 1.50680322557464e-06, + "loss": 0.8122725486755371, + "step": 3163 + }, + { + "epoch": 0.7290322580645161, + "grad_norm": 0.9965727720770509, + "learning_rate": 1.5064747572087923e-06, + "loss": 0.8280072212219238, + "step": 3164 + }, + { + "epoch": 0.7292626728110599, + "grad_norm": 0.9097690003119847, + "learning_rate": 1.5061462153288047e-06, + "loss": 0.7287842035293579, + "step": 3165 + }, + { + "epoch": 0.7294930875576037, + "grad_norm": 1.0497146109580189, + "learning_rate": 1.5058175999823639e-06, + "loss": 0.8404949903488159, + "step": 3166 + }, + { + "epoch": 0.7297235023041475, + "grad_norm": 0.9887517999095412, + "learning_rate": 1.505488911217168e-06, + "loss": 0.6572415828704834, + "step": 3167 + }, + { + "epoch": 0.7299539170506912, + "grad_norm": 1.0946078663351873, + "learning_rate": 1.5051601490809257e-06, + "loss": 0.8924484848976135, + "step": 3168 + }, + { + "epoch": 0.7301843317972351, + "grad_norm": 1.1648951213224894, + "learning_rate": 1.5048313136213566e-06, + "loss": 0.8701428174972534, + "step": 3169 + }, + { + "epoch": 0.7304147465437788, + "grad_norm": 1.1475520143482136, + "learning_rate": 1.5045024048861906e-06, + "loss": 0.8327716588973999, + "step": 3170 + }, + { + "epoch": 0.7306451612903225, + "grad_norm": 0.9261768702303601, + "learning_rate": 1.5041734229231686e-06, + "loss": 0.8379253149032593, + "step": 3171 + }, + { + "epoch": 0.7308755760368664, + "grad_norm": 0.944084791074753, + "learning_rate": 1.5038443677800413e-06, + "loss": 0.7475664019584656, + "step": 3172 + }, + { + "epoch": 0.7311059907834101, + "grad_norm": 1.2226580752686416, + "learning_rate": 1.5035152395045714e-06, + "loss": 0.9002243280410767, + "step": 3173 + }, + { + "epoch": 0.7313364055299539, + "grad_norm": 0.8355701729873874, + "learning_rate": 1.503186038144531e-06, + "loss": 0.6718685626983643, + "step": 3174 + }, + { + "epoch": 0.7315668202764977, + "grad_norm": 0.8961232238271665, + "learning_rate": 1.5028567637477033e-06, + "loss": 0.6836501359939575, + "step": 3175 + }, + { + "epoch": 0.7317972350230415, + "grad_norm": 0.8859536342600928, + "learning_rate": 1.502527416361882e-06, + "loss": 0.7548954486846924, + "step": 3176 + }, + { + "epoch": 0.7320276497695852, + "grad_norm": 0.9826706955950207, + "learning_rate": 1.5021979960348714e-06, + "loss": 0.8385212421417236, + "step": 3177 + }, + { + "epoch": 0.7322580645161291, + "grad_norm": 0.8341383572022868, + "learning_rate": 1.5018685028144864e-06, + "loss": 0.8605425357818604, + "step": 3178 + }, + { + "epoch": 0.7324884792626728, + "grad_norm": 0.9464588739740442, + "learning_rate": 1.501538936748553e-06, + "loss": 0.8831393718719482, + "step": 3179 + }, + { + "epoch": 0.7327188940092166, + "grad_norm": 0.8991947067614845, + "learning_rate": 1.5012092978849062e-06, + "loss": 0.6965172290802002, + "step": 3180 + }, + { + "epoch": 0.7329493087557604, + "grad_norm": 1.0090692893685214, + "learning_rate": 1.500879586271394e-06, + "loss": 0.8062859773635864, + "step": 3181 + }, + { + "epoch": 0.7331797235023041, + "grad_norm": 0.7952177607289516, + "learning_rate": 1.5005498019558724e-06, + "loss": 0.8285790681838989, + "step": 3182 + }, + { + "epoch": 0.7334101382488479, + "grad_norm": 0.9848452236152132, + "learning_rate": 1.50021994498621e-06, + "loss": 0.612429141998291, + "step": 3183 + }, + { + "epoch": 0.7336405529953917, + "grad_norm": 0.9156545700522013, + "learning_rate": 1.4998900154102847e-06, + "loss": 0.8271423578262329, + "step": 3184 + }, + { + "epoch": 0.7338709677419355, + "grad_norm": 1.033787601007848, + "learning_rate": 1.499560013275986e-06, + "loss": 0.838964581489563, + "step": 3185 + }, + { + "epoch": 0.7341013824884792, + "grad_norm": 0.973220548768116, + "learning_rate": 1.4992299386312119e-06, + "loss": 0.7902333736419678, + "step": 3186 + }, + { + "epoch": 0.7343317972350231, + "grad_norm": 1.0086369878855088, + "learning_rate": 1.4988997915238735e-06, + "loss": 0.8520635366439819, + "step": 3187 + }, + { + "epoch": 0.7345622119815668, + "grad_norm": 0.9892742658321851, + "learning_rate": 1.4985695720018905e-06, + "loss": 0.8666567206382751, + "step": 3188 + }, + { + "epoch": 0.7347926267281106, + "grad_norm": 0.9672613309802366, + "learning_rate": 1.4982392801131944e-06, + "loss": 0.6930691003799438, + "step": 3189 + }, + { + "epoch": 0.7350230414746544, + "grad_norm": 0.7049869743164157, + "learning_rate": 1.4979089159057263e-06, + "loss": 0.7957722544670105, + "step": 3190 + }, + { + "epoch": 0.7352534562211982, + "grad_norm": 1.0247601673009343, + "learning_rate": 1.4975784794274383e-06, + "loss": 0.8966697454452515, + "step": 3191 + }, + { + "epoch": 0.7354838709677419, + "grad_norm": 0.9082832739975722, + "learning_rate": 1.4972479707262926e-06, + "loss": 0.7478537559509277, + "step": 3192 + }, + { + "epoch": 0.7357142857142858, + "grad_norm": 0.9541041339746362, + "learning_rate": 1.4969173898502624e-06, + "loss": 0.8862416744232178, + "step": 3193 + }, + { + "epoch": 0.7359447004608295, + "grad_norm": 0.8171852448254098, + "learning_rate": 1.4965867368473306e-06, + "loss": 0.7910712957382202, + "step": 3194 + }, + { + "epoch": 0.7361751152073732, + "grad_norm": 1.1219879646982642, + "learning_rate": 1.4962560117654916e-06, + "loss": 0.7371944785118103, + "step": 3195 + }, + { + "epoch": 0.7364055299539171, + "grad_norm": 1.097733223938739, + "learning_rate": 1.4959252146527496e-06, + "loss": 0.7966737151145935, + "step": 3196 + }, + { + "epoch": 0.7366359447004608, + "grad_norm": 1.0499505243286467, + "learning_rate": 1.4955943455571188e-06, + "loss": 0.8474653363227844, + "step": 3197 + }, + { + "epoch": 0.7368663594470046, + "grad_norm": 1.1042914253537062, + "learning_rate": 1.4952634045266249e-06, + "loss": 1.0197458267211914, + "step": 3198 + }, + { + "epoch": 0.7370967741935484, + "grad_norm": 1.054872102822339, + "learning_rate": 1.4949323916093036e-06, + "loss": 0.8813979625701904, + "step": 3199 + }, + { + "epoch": 0.7373271889400922, + "grad_norm": 0.9264193586497762, + "learning_rate": 1.4946013068532008e-06, + "loss": 0.9323042631149292, + "step": 3200 + }, + { + "epoch": 0.7375576036866359, + "grad_norm": 1.1184797510334814, + "learning_rate": 1.494270150306373e-06, + "loss": 0.8637902736663818, + "step": 3201 + }, + { + "epoch": 0.7377880184331798, + "grad_norm": 1.1006860616870338, + "learning_rate": 1.4939389220168875e-06, + "loss": 0.8046854734420776, + "step": 3202 + }, + { + "epoch": 0.7380184331797235, + "grad_norm": 0.9882241685181946, + "learning_rate": 1.4936076220328211e-06, + "loss": 0.7616177201271057, + "step": 3203 + }, + { + "epoch": 0.7382488479262673, + "grad_norm": 1.0795779512267711, + "learning_rate": 1.4932762504022619e-06, + "loss": 0.8548959493637085, + "step": 3204 + }, + { + "epoch": 0.738479262672811, + "grad_norm": 0.7907178615166577, + "learning_rate": 1.492944807173308e-06, + "loss": 0.8062562942504883, + "step": 3205 + }, + { + "epoch": 0.7387096774193549, + "grad_norm": 1.3004819436990922, + "learning_rate": 1.492613292394068e-06, + "loss": 0.8776403069496155, + "step": 3206 + }, + { + "epoch": 0.7389400921658986, + "grad_norm": 1.0654471822316505, + "learning_rate": 1.4922817061126605e-06, + "loss": 0.7528336048126221, + "step": 3207 + }, + { + "epoch": 0.7391705069124423, + "grad_norm": 0.9288011243231857, + "learning_rate": 1.4919500483772152e-06, + "loss": 0.7441881895065308, + "step": 3208 + }, + { + "epoch": 0.7394009216589862, + "grad_norm": 0.9496581250230889, + "learning_rate": 1.4916183192358715e-06, + "loss": 0.8925758004188538, + "step": 3209 + }, + { + "epoch": 0.7396313364055299, + "grad_norm": 0.999519243113449, + "learning_rate": 1.4912865187367798e-06, + "loss": 0.7527008652687073, + "step": 3210 + }, + { + "epoch": 0.7398617511520738, + "grad_norm": 0.8631940848050832, + "learning_rate": 1.4909546469281e-06, + "loss": 0.753572404384613, + "step": 3211 + }, + { + "epoch": 0.7400921658986175, + "grad_norm": 0.938203260102219, + "learning_rate": 1.4906227038580036e-06, + "loss": 0.8884274959564209, + "step": 3212 + }, + { + "epoch": 0.7403225806451613, + "grad_norm": 0.7835821294972823, + "learning_rate": 1.4902906895746707e-06, + "loss": 0.7702244520187378, + "step": 3213 + }, + { + "epoch": 0.740552995391705, + "grad_norm": 1.0140732775513552, + "learning_rate": 1.4899586041262936e-06, + "loss": 0.8662835359573364, + "step": 3214 + }, + { + "epoch": 0.7407834101382489, + "grad_norm": 1.0357827096613574, + "learning_rate": 1.4896264475610736e-06, + "loss": 0.9819997549057007, + "step": 3215 + }, + { + "epoch": 0.7410138248847926, + "grad_norm": 1.0094197188590162, + "learning_rate": 1.4892942199272232e-06, + "loss": 0.9137614965438843, + "step": 3216 + }, + { + "epoch": 0.7412442396313365, + "grad_norm": 0.8442315992670393, + "learning_rate": 1.488961921272964e-06, + "loss": 0.7554785013198853, + "step": 3217 + }, + { + "epoch": 0.7414746543778802, + "grad_norm": 1.1172745597106868, + "learning_rate": 1.4886295516465296e-06, + "loss": 0.8528940677642822, + "step": 3218 + }, + { + "epoch": 0.7417050691244239, + "grad_norm": 0.9056918439443091, + "learning_rate": 1.4882971110961626e-06, + "loss": 0.7212377786636353, + "step": 3219 + }, + { + "epoch": 0.7419354838709677, + "grad_norm": 0.9349124518247459, + "learning_rate": 1.4879645996701161e-06, + "loss": 0.7767617702484131, + "step": 3220 + }, + { + "epoch": 0.7421658986175115, + "grad_norm": 0.8749389005214587, + "learning_rate": 1.4876320174166542e-06, + "loss": 0.8083292245864868, + "step": 3221 + }, + { + "epoch": 0.7423963133640553, + "grad_norm": 1.14484646357819, + "learning_rate": 1.4872993643840506e-06, + "loss": 0.8652364015579224, + "step": 3222 + }, + { + "epoch": 0.742626728110599, + "grad_norm": 0.9176030431238368, + "learning_rate": 1.486966640620589e-06, + "loss": 0.7455019950866699, + "step": 3223 + }, + { + "epoch": 0.7428571428571429, + "grad_norm": 1.0637469159007076, + "learning_rate": 1.4866338461745644e-06, + "loss": 0.7881917953491211, + "step": 3224 + }, + { + "epoch": 0.7430875576036866, + "grad_norm": 1.0955814961304737, + "learning_rate": 1.4863009810942813e-06, + "loss": 0.8148372173309326, + "step": 3225 + }, + { + "epoch": 0.7433179723502304, + "grad_norm": 0.7991384008669099, + "learning_rate": 1.4859680454280547e-06, + "loss": 0.6574658751487732, + "step": 3226 + }, + { + "epoch": 0.7435483870967742, + "grad_norm": 0.9231484623709659, + "learning_rate": 1.4856350392242094e-06, + "loss": 0.7831655740737915, + "step": 3227 + }, + { + "epoch": 0.743778801843318, + "grad_norm": 0.8080817272772121, + "learning_rate": 1.485301962531081e-06, + "loss": 0.7406231164932251, + "step": 3228 + }, + { + "epoch": 0.7440092165898617, + "grad_norm": 0.9500561612529754, + "learning_rate": 1.4849688153970154e-06, + "loss": 0.8092324733734131, + "step": 3229 + }, + { + "epoch": 0.7442396313364056, + "grad_norm": 0.969093760928221, + "learning_rate": 1.4846355978703679e-06, + "loss": 0.6662560701370239, + "step": 3230 + }, + { + "epoch": 0.7444700460829493, + "grad_norm": 0.8941354868939383, + "learning_rate": 1.4843023099995052e-06, + "loss": 0.8064731359481812, + "step": 3231 + }, + { + "epoch": 0.744700460829493, + "grad_norm": 1.0463529761361023, + "learning_rate": 1.4839689518328037e-06, + "loss": 0.7424519658088684, + "step": 3232 + }, + { + "epoch": 0.7449308755760369, + "grad_norm": 0.9618875213680247, + "learning_rate": 1.4836355234186489e-06, + "loss": 0.7851438522338867, + "step": 3233 + }, + { + "epoch": 0.7451612903225806, + "grad_norm": 1.2534680382280676, + "learning_rate": 1.4833020248054381e-06, + "loss": 0.896986722946167, + "step": 3234 + }, + { + "epoch": 0.7453917050691244, + "grad_norm": 1.3688846458082455, + "learning_rate": 1.4829684560415787e-06, + "loss": 0.9469928741455078, + "step": 3235 + }, + { + "epoch": 0.7456221198156682, + "grad_norm": 0.8653442286827894, + "learning_rate": 1.4826348171754872e-06, + "loss": 0.7527188062667847, + "step": 3236 + }, + { + "epoch": 0.745852534562212, + "grad_norm": 0.9575212903893582, + "learning_rate": 1.4823011082555907e-06, + "loss": 0.7758080959320068, + "step": 3237 + }, + { + "epoch": 0.7460829493087557, + "grad_norm": 0.9454436343118328, + "learning_rate": 1.481967329330327e-06, + "loss": 0.8359881043434143, + "step": 3238 + }, + { + "epoch": 0.7463133640552996, + "grad_norm": 0.7567559878181612, + "learning_rate": 1.4816334804481434e-06, + "loss": 0.6576982736587524, + "step": 3239 + }, + { + "epoch": 0.7465437788018433, + "grad_norm": 1.0012365138594377, + "learning_rate": 1.4812995616574978e-06, + "loss": 0.7919917106628418, + "step": 3240 + }, + { + "epoch": 0.7467741935483871, + "grad_norm": 0.7865137499791297, + "learning_rate": 1.480965573006858e-06, + "loss": 0.7682263851165771, + "step": 3241 + }, + { + "epoch": 0.7470046082949309, + "grad_norm": 1.0123241682054298, + "learning_rate": 1.4806315145447017e-06, + "loss": 0.8573193550109863, + "step": 3242 + }, + { + "epoch": 0.7472350230414746, + "grad_norm": 0.8191884786597581, + "learning_rate": 1.4802973863195174e-06, + "loss": 0.8473606109619141, + "step": 3243 + }, + { + "epoch": 0.7474654377880184, + "grad_norm": 0.8754073951862541, + "learning_rate": 1.4799631883798033e-06, + "loss": 0.8110678195953369, + "step": 3244 + }, + { + "epoch": 0.7476958525345622, + "grad_norm": 1.2161581760732987, + "learning_rate": 1.4796289207740681e-06, + "loss": 0.6624661087989807, + "step": 3245 + }, + { + "epoch": 0.747926267281106, + "grad_norm": 0.7356293873938221, + "learning_rate": 1.47929458355083e-06, + "loss": 0.8145536184310913, + "step": 3246 + }, + { + "epoch": 0.7481566820276497, + "grad_norm": 0.921128997158793, + "learning_rate": 1.4789601767586172e-06, + "loss": 0.7819876074790955, + "step": 3247 + }, + { + "epoch": 0.7483870967741936, + "grad_norm": 0.973465003660405, + "learning_rate": 1.4786257004459692e-06, + "loss": 0.7573810815811157, + "step": 3248 + }, + { + "epoch": 0.7486175115207373, + "grad_norm": 1.061603620628762, + "learning_rate": 1.4782911546614343e-06, + "loss": 0.8149522542953491, + "step": 3249 + }, + { + "epoch": 0.7488479262672811, + "grad_norm": 1.023358335101362, + "learning_rate": 1.4779565394535714e-06, + "loss": 0.9935284852981567, + "step": 3250 + }, + { + "epoch": 0.7490783410138249, + "grad_norm": 0.8488935416479958, + "learning_rate": 1.4776218548709497e-06, + "loss": 0.8673371076583862, + "step": 3251 + }, + { + "epoch": 0.7493087557603687, + "grad_norm": 1.0304468521950305, + "learning_rate": 1.4772871009621477e-06, + "loss": 0.8569149374961853, + "step": 3252 + }, + { + "epoch": 0.7495391705069124, + "grad_norm": 0.8613722173703313, + "learning_rate": 1.4769522777757551e-06, + "loss": 0.7177854776382446, + "step": 3253 + }, + { + "epoch": 0.7497695852534563, + "grad_norm": 1.0681726446759283, + "learning_rate": 1.4766173853603706e-06, + "loss": 0.8115622997283936, + "step": 3254 + }, + { + "epoch": 0.75, + "grad_norm": 0.782977490159237, + "learning_rate": 1.4762824237646038e-06, + "loss": 0.7209019660949707, + "step": 3255 + }, + { + "epoch": 0.7502304147465437, + "grad_norm": 0.9264325214188774, + "learning_rate": 1.4759473930370736e-06, + "loss": 0.8433470726013184, + "step": 3256 + }, + { + "epoch": 0.7504608294930876, + "grad_norm": 1.0399152705693322, + "learning_rate": 1.4756122932264093e-06, + "loss": 0.853674054145813, + "step": 3257 + }, + { + "epoch": 0.7506912442396313, + "grad_norm": 0.9978956076189626, + "learning_rate": 1.4752771243812503e-06, + "loss": 0.8645769357681274, + "step": 3258 + }, + { + "epoch": 0.7509216589861751, + "grad_norm": 1.4046905803968728, + "learning_rate": 1.474941886550246e-06, + "loss": 0.927452564239502, + "step": 3259 + }, + { + "epoch": 0.7511520737327189, + "grad_norm": 0.8642581213790671, + "learning_rate": 1.4746065797820552e-06, + "loss": 0.7461255788803101, + "step": 3260 + }, + { + "epoch": 0.7513824884792627, + "grad_norm": 0.9230380534710827, + "learning_rate": 1.4742712041253481e-06, + "loss": 0.8737163543701172, + "step": 3261 + }, + { + "epoch": 0.7516129032258064, + "grad_norm": 0.8624828182814519, + "learning_rate": 1.4739357596288036e-06, + "loss": 0.7148758172988892, + "step": 3262 + }, + { + "epoch": 0.7518433179723503, + "grad_norm": 0.8930446588032352, + "learning_rate": 1.4736002463411108e-06, + "loss": 0.738334596157074, + "step": 3263 + }, + { + "epoch": 0.752073732718894, + "grad_norm": 0.9237791770446419, + "learning_rate": 1.4732646643109692e-06, + "loss": 0.7733340263366699, + "step": 3264 + }, + { + "epoch": 0.7523041474654378, + "grad_norm": 0.8815526032135323, + "learning_rate": 1.4729290135870883e-06, + "loss": 0.7882881164550781, + "step": 3265 + }, + { + "epoch": 0.7525345622119816, + "grad_norm": 1.029688172185613, + "learning_rate": 1.472593294218187e-06, + "loss": 0.7908357381820679, + "step": 3266 + }, + { + "epoch": 0.7527649769585254, + "grad_norm": 1.0791156682188368, + "learning_rate": 1.4722575062529946e-06, + "loss": 0.8818062543869019, + "step": 3267 + }, + { + "epoch": 0.7529953917050691, + "grad_norm": 0.9552677127935061, + "learning_rate": 1.4719216497402504e-06, + "loss": 0.7152599692344666, + "step": 3268 + }, + { + "epoch": 0.7532258064516129, + "grad_norm": 0.8322037056106782, + "learning_rate": 1.4715857247287036e-06, + "loss": 0.8503165245056152, + "step": 3269 + }, + { + "epoch": 0.7534562211981567, + "grad_norm": 0.9223729567181368, + "learning_rate": 1.4712497312671128e-06, + "loss": 0.8382623195648193, + "step": 3270 + }, + { + "epoch": 0.7536866359447004, + "grad_norm": 1.0456882119229616, + "learning_rate": 1.4709136694042479e-06, + "loss": 0.8358533382415771, + "step": 3271 + }, + { + "epoch": 0.7539170506912443, + "grad_norm": 0.850717529465525, + "learning_rate": 1.4705775391888868e-06, + "loss": 0.6735624670982361, + "step": 3272 + }, + { + "epoch": 0.754147465437788, + "grad_norm": 0.8890452669379437, + "learning_rate": 1.470241340669819e-06, + "loss": 0.8343949317932129, + "step": 3273 + }, + { + "epoch": 0.7543778801843318, + "grad_norm": 0.9508610560109901, + "learning_rate": 1.4699050738958434e-06, + "loss": 0.8204318284988403, + "step": 3274 + }, + { + "epoch": 0.7546082949308756, + "grad_norm": 0.9484772286558124, + "learning_rate": 1.4695687389157684e-06, + "loss": 0.7541854977607727, + "step": 3275 + }, + { + "epoch": 0.7548387096774194, + "grad_norm": 0.8425504123859369, + "learning_rate": 1.4692323357784122e-06, + "loss": 0.8144943714141846, + "step": 3276 + }, + { + "epoch": 0.7550691244239631, + "grad_norm": 0.8699783126306536, + "learning_rate": 1.468895864532604e-06, + "loss": 0.9045677781105042, + "step": 3277 + }, + { + "epoch": 0.755299539170507, + "grad_norm": 1.1586104318366583, + "learning_rate": 1.4685593252271816e-06, + "loss": 0.8818730115890503, + "step": 3278 + }, + { + "epoch": 0.7555299539170507, + "grad_norm": 1.013621065000431, + "learning_rate": 1.4682227179109932e-06, + "loss": 0.8582229614257812, + "step": 3279 + }, + { + "epoch": 0.7557603686635944, + "grad_norm": 1.016541372354986, + "learning_rate": 1.4678860426328977e-06, + "loss": 0.8769974708557129, + "step": 3280 + }, + { + "epoch": 0.7559907834101383, + "grad_norm": 0.8474484944100091, + "learning_rate": 1.467549299441762e-06, + "loss": 0.8034937381744385, + "step": 3281 + }, + { + "epoch": 0.756221198156682, + "grad_norm": 0.9998169463505984, + "learning_rate": 1.4672124883864646e-06, + "loss": 0.9057378768920898, + "step": 3282 + }, + { + "epoch": 0.7564516129032258, + "grad_norm": 0.9160359407680143, + "learning_rate": 1.4668756095158929e-06, + "loss": 0.8039969205856323, + "step": 3283 + }, + { + "epoch": 0.7566820276497696, + "grad_norm": 0.7311572278532684, + "learning_rate": 1.4665386628789448e-06, + "loss": 0.887493908405304, + "step": 3284 + }, + { + "epoch": 0.7569124423963134, + "grad_norm": 0.9749833066021305, + "learning_rate": 1.4662016485245271e-06, + "loss": 0.783561646938324, + "step": 3285 + }, + { + "epoch": 0.7571428571428571, + "grad_norm": 1.1972955361865625, + "learning_rate": 1.4658645665015579e-06, + "loss": 0.7526337504386902, + "step": 3286 + }, + { + "epoch": 0.757373271889401, + "grad_norm": 1.0074911468135093, + "learning_rate": 1.4655274168589633e-06, + "loss": 0.8583099842071533, + "step": 3287 + }, + { + "epoch": 0.7576036866359447, + "grad_norm": 0.9193819222275846, + "learning_rate": 1.4651901996456802e-06, + "loss": 0.743253767490387, + "step": 3288 + }, + { + "epoch": 0.7578341013824885, + "grad_norm": 0.9481332173734432, + "learning_rate": 1.4648529149106555e-06, + "loss": 0.8763987421989441, + "step": 3289 + }, + { + "epoch": 0.7580645161290323, + "grad_norm": 0.9531439206540595, + "learning_rate": 1.4645155627028455e-06, + "loss": 0.8388645648956299, + "step": 3290 + }, + { + "epoch": 0.7582949308755761, + "grad_norm": 0.9430549047432926, + "learning_rate": 1.4641781430712167e-06, + "loss": 0.8943589925765991, + "step": 3291 + }, + { + "epoch": 0.7585253456221198, + "grad_norm": 0.897306276129885, + "learning_rate": 1.463840656064745e-06, + "loss": 0.9224259257316589, + "step": 3292 + }, + { + "epoch": 0.7587557603686635, + "grad_norm": 0.7118962108569266, + "learning_rate": 1.463503101732416e-06, + "loss": 0.5836232900619507, + "step": 3293 + }, + { + "epoch": 0.7589861751152074, + "grad_norm": 1.2610309452085111, + "learning_rate": 1.4631654801232255e-06, + "loss": 0.6700382828712463, + "step": 3294 + }, + { + "epoch": 0.7592165898617511, + "grad_norm": 0.9159006934526643, + "learning_rate": 1.4628277912861785e-06, + "loss": 0.7876112461090088, + "step": 3295 + }, + { + "epoch": 0.759447004608295, + "grad_norm": 0.9073380438964382, + "learning_rate": 1.4624900352702905e-06, + "loss": 0.8410799503326416, + "step": 3296 + }, + { + "epoch": 0.7596774193548387, + "grad_norm": 0.931630117662002, + "learning_rate": 1.4621522121245859e-06, + "loss": 0.9615974426269531, + "step": 3297 + }, + { + "epoch": 0.7599078341013825, + "grad_norm": 1.1213393394374043, + "learning_rate": 1.4618143218980996e-06, + "loss": 0.7973389625549316, + "step": 3298 + }, + { + "epoch": 0.7601382488479262, + "grad_norm": 0.7835636014361216, + "learning_rate": 1.461476364639876e-06, + "loss": 0.7734094858169556, + "step": 3299 + }, + { + "epoch": 0.7603686635944701, + "grad_norm": 0.9681758067915807, + "learning_rate": 1.461138340398969e-06, + "loss": 0.7365939617156982, + "step": 3300 + }, + { + "epoch": 0.7605990783410138, + "grad_norm": 0.9251627601521192, + "learning_rate": 1.4608002492244421e-06, + "loss": 0.822052001953125, + "step": 3301 + }, + { + "epoch": 0.7608294930875577, + "grad_norm": 0.83536047590978, + "learning_rate": 1.460462091165369e-06, + "loss": 0.7220577001571655, + "step": 3302 + }, + { + "epoch": 0.7610599078341014, + "grad_norm": 0.9806834080573716, + "learning_rate": 1.4601238662708332e-06, + "loss": 0.9795923233032227, + "step": 3303 + }, + { + "epoch": 0.7612903225806451, + "grad_norm": 1.0452301496717684, + "learning_rate": 1.4597855745899273e-06, + "loss": 0.804523229598999, + "step": 3304 + }, + { + "epoch": 0.761520737327189, + "grad_norm": 0.936039712838613, + "learning_rate": 1.4594472161717536e-06, + "loss": 0.7630297541618347, + "step": 3305 + }, + { + "epoch": 0.7617511520737327, + "grad_norm": 1.008258749087615, + "learning_rate": 1.4591087910654254e-06, + "loss": 0.7088560461997986, + "step": 3306 + }, + { + "epoch": 0.7619815668202765, + "grad_norm": 0.8612515545716848, + "learning_rate": 1.4587702993200637e-06, + "loss": 0.6627416014671326, + "step": 3307 + }, + { + "epoch": 0.7622119815668202, + "grad_norm": 1.0700034611745908, + "learning_rate": 1.4584317409848001e-06, + "loss": 0.7931111454963684, + "step": 3308 + }, + { + "epoch": 0.7624423963133641, + "grad_norm": 0.918004873184285, + "learning_rate": 1.4580931161087763e-06, + "loss": 0.8107850551605225, + "step": 3309 + }, + { + "epoch": 0.7626728110599078, + "grad_norm": 1.1251596055699022, + "learning_rate": 1.4577544247411431e-06, + "loss": 0.8211404085159302, + "step": 3310 + }, + { + "epoch": 0.7629032258064516, + "grad_norm": 1.1825093837600291, + "learning_rate": 1.457415666931061e-06, + "loss": 0.9861341714859009, + "step": 3311 + }, + { + "epoch": 0.7631336405529954, + "grad_norm": 1.0573079532917569, + "learning_rate": 1.4570768427277007e-06, + "loss": 0.8963409662246704, + "step": 3312 + }, + { + "epoch": 0.7633640552995392, + "grad_norm": 1.1183054914337, + "learning_rate": 1.4567379521802416e-06, + "loss": 0.7510147094726562, + "step": 3313 + }, + { + "epoch": 0.7635944700460829, + "grad_norm": 1.0312269750408198, + "learning_rate": 1.4563989953378734e-06, + "loss": 0.7761805057525635, + "step": 3314 + }, + { + "epoch": 0.7638248847926268, + "grad_norm": 0.782434581691777, + "learning_rate": 1.4560599722497953e-06, + "loss": 0.6202781200408936, + "step": 3315 + }, + { + "epoch": 0.7640552995391705, + "grad_norm": 0.9114320197488165, + "learning_rate": 1.4557208829652159e-06, + "loss": 0.711891770362854, + "step": 3316 + }, + { + "epoch": 0.7642857142857142, + "grad_norm": 1.0888571874972786, + "learning_rate": 1.4553817275333537e-06, + "loss": 0.8689517974853516, + "step": 3317 + }, + { + "epoch": 0.7645161290322581, + "grad_norm": 0.847547372029402, + "learning_rate": 1.4550425060034365e-06, + "loss": 0.7323688268661499, + "step": 3318 + }, + { + "epoch": 0.7647465437788018, + "grad_norm": 0.954006429800706, + "learning_rate": 1.4547032184247022e-06, + "loss": 0.8934407234191895, + "step": 3319 + }, + { + "epoch": 0.7649769585253456, + "grad_norm": 0.9830574702749578, + "learning_rate": 1.4543638648463975e-06, + "loss": 0.7729885578155518, + "step": 3320 + }, + { + "epoch": 0.7652073732718894, + "grad_norm": 0.9967355019103026, + "learning_rate": 1.454024445317779e-06, + "loss": 0.8962388038635254, + "step": 3321 + }, + { + "epoch": 0.7654377880184332, + "grad_norm": 0.8821073382766633, + "learning_rate": 1.4536849598881137e-06, + "loss": 0.8655213117599487, + "step": 3322 + }, + { + "epoch": 0.7656682027649769, + "grad_norm": 0.8780656658271131, + "learning_rate": 1.453345408606677e-06, + "loss": 0.6471779346466064, + "step": 3323 + }, + { + "epoch": 0.7658986175115208, + "grad_norm": 0.7335596828312507, + "learning_rate": 1.4530057915227545e-06, + "loss": 0.8665071129798889, + "step": 3324 + }, + { + "epoch": 0.7661290322580645, + "grad_norm": 1.054528188345679, + "learning_rate": 1.4526661086856407e-06, + "loss": 0.9504371285438538, + "step": 3325 + }, + { + "epoch": 0.7663594470046083, + "grad_norm": 1.017396914206461, + "learning_rate": 1.452326360144641e-06, + "loss": 0.8122013807296753, + "step": 3326 + }, + { + "epoch": 0.7665898617511521, + "grad_norm": 1.0019111601549837, + "learning_rate": 1.4519865459490687e-06, + "loss": 0.817001223564148, + "step": 3327 + }, + { + "epoch": 0.7668202764976959, + "grad_norm": 0.9387626004792055, + "learning_rate": 1.4516466661482474e-06, + "loss": 0.732322096824646, + "step": 3328 + }, + { + "epoch": 0.7670506912442396, + "grad_norm": 0.8844021324185192, + "learning_rate": 1.4513067207915106e-06, + "loss": 0.7961580157279968, + "step": 3329 + }, + { + "epoch": 0.7672811059907834, + "grad_norm": 0.9579783239612414, + "learning_rate": 1.4509667099282007e-06, + "loss": 0.7660717368125916, + "step": 3330 + }, + { + "epoch": 0.7675115207373272, + "grad_norm": 0.8487336367256668, + "learning_rate": 1.4506266336076698e-06, + "loss": 0.8279193639755249, + "step": 3331 + }, + { + "epoch": 0.7677419354838709, + "grad_norm": 0.8431407438554851, + "learning_rate": 1.4502864918792796e-06, + "loss": 0.7050153017044067, + "step": 3332 + }, + { + "epoch": 0.7679723502304148, + "grad_norm": 0.9386347952909049, + "learning_rate": 1.4499462847924013e-06, + "loss": 0.8146064877510071, + "step": 3333 + }, + { + "epoch": 0.7682027649769585, + "grad_norm": 0.8248232070769104, + "learning_rate": 1.4496060123964153e-06, + "loss": 0.8300814628601074, + "step": 3334 + }, + { + "epoch": 0.7684331797235023, + "grad_norm": 0.848400587593364, + "learning_rate": 1.4492656747407117e-06, + "loss": 0.8240403532981873, + "step": 3335 + }, + { + "epoch": 0.7686635944700461, + "grad_norm": 1.1661360506901004, + "learning_rate": 1.4489252718746908e-06, + "loss": 0.901625394821167, + "step": 3336 + }, + { + "epoch": 0.7688940092165899, + "grad_norm": 0.8620744709914054, + "learning_rate": 1.4485848038477604e-06, + "loss": 0.827139675617218, + "step": 3337 + }, + { + "epoch": 0.7691244239631336, + "grad_norm": 1.111541176491108, + "learning_rate": 1.4482442707093397e-06, + "loss": 0.7032946348190308, + "step": 3338 + }, + { + "epoch": 0.7693548387096775, + "grad_norm": 0.8506038004087974, + "learning_rate": 1.4479036725088564e-06, + "loss": 0.6805816888809204, + "step": 3339 + }, + { + "epoch": 0.7695852534562212, + "grad_norm": 0.8063208135295213, + "learning_rate": 1.447563009295748e-06, + "loss": 0.673591136932373, + "step": 3340 + }, + { + "epoch": 0.7698156682027649, + "grad_norm": 0.8116035277545482, + "learning_rate": 1.4472222811194614e-06, + "loss": 0.6513386964797974, + "step": 3341 + }, + { + "epoch": 0.7700460829493088, + "grad_norm": 0.7654089652768199, + "learning_rate": 1.4468814880294529e-06, + "loss": 0.7367297410964966, + "step": 3342 + }, + { + "epoch": 0.7702764976958525, + "grad_norm": 1.0405555538712603, + "learning_rate": 1.4465406300751878e-06, + "loss": 0.7393670082092285, + "step": 3343 + }, + { + "epoch": 0.7705069124423963, + "grad_norm": 0.7135144631405288, + "learning_rate": 1.4461997073061411e-06, + "loss": 0.7525930404663086, + "step": 3344 + }, + { + "epoch": 0.7707373271889401, + "grad_norm": 0.7583677101512988, + "learning_rate": 1.445858719771798e-06, + "loss": 0.6679942011833191, + "step": 3345 + }, + { + "epoch": 0.7709677419354839, + "grad_norm": 1.0903018310329022, + "learning_rate": 1.4455176675216518e-06, + "loss": 0.8440653085708618, + "step": 3346 + }, + { + "epoch": 0.7711981566820276, + "grad_norm": 0.9929368208299709, + "learning_rate": 1.4451765506052063e-06, + "loss": 0.8765773177146912, + "step": 3347 + }, + { + "epoch": 0.7714285714285715, + "grad_norm": 0.9183070258317377, + "learning_rate": 1.4448353690719732e-06, + "loss": 0.7309157848358154, + "step": 3348 + }, + { + "epoch": 0.7716589861751152, + "grad_norm": 0.8130162073408548, + "learning_rate": 1.4444941229714758e-06, + "loss": 0.8043340444564819, + "step": 3349 + }, + { + "epoch": 0.771889400921659, + "grad_norm": 0.8488386913998837, + "learning_rate": 1.4441528123532443e-06, + "loss": 0.6528831124305725, + "step": 3350 + }, + { + "epoch": 0.7721198156682028, + "grad_norm": 0.7632405080168834, + "learning_rate": 1.4438114372668202e-06, + "loss": 0.7973155975341797, + "step": 3351 + }, + { + "epoch": 0.7723502304147466, + "grad_norm": 0.8366450624031991, + "learning_rate": 1.443469997761754e-06, + "loss": 0.940142810344696, + "step": 3352 + }, + { + "epoch": 0.7725806451612903, + "grad_norm": 1.0048812991349738, + "learning_rate": 1.443128493887604e-06, + "loss": 0.7936829328536987, + "step": 3353 + }, + { + "epoch": 0.772811059907834, + "grad_norm": 0.8583665989338275, + "learning_rate": 1.44278692569394e-06, + "loss": 0.8369218111038208, + "step": 3354 + }, + { + "epoch": 0.7730414746543779, + "grad_norm": 1.313808566044562, + "learning_rate": 1.4424452932303398e-06, + "loss": 0.9305802583694458, + "step": 3355 + }, + { + "epoch": 0.7732718894009216, + "grad_norm": 0.8862565116465879, + "learning_rate": 1.4421035965463916e-06, + "loss": 0.913454532623291, + "step": 3356 + }, + { + "epoch": 0.7735023041474655, + "grad_norm": 1.0772806984700294, + "learning_rate": 1.4417618356916912e-06, + "loss": 0.8552114963531494, + "step": 3357 + }, + { + "epoch": 0.7737327188940092, + "grad_norm": 1.080720564237515, + "learning_rate": 1.4414200107158452e-06, + "loss": 0.8674488067626953, + "step": 3358 + }, + { + "epoch": 0.773963133640553, + "grad_norm": 1.0999604158561203, + "learning_rate": 1.441078121668469e-06, + "loss": 0.9142898321151733, + "step": 3359 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 1.0964749277789683, + "learning_rate": 1.4407361685991872e-06, + "loss": 0.8258639574050903, + "step": 3360 + }, + { + "epoch": 0.7744239631336406, + "grad_norm": 1.062716295700188, + "learning_rate": 1.4403941515576343e-06, + "loss": 0.773646354675293, + "step": 3361 + }, + { + "epoch": 0.7746543778801843, + "grad_norm": 1.1397221950146432, + "learning_rate": 1.440052070593453e-06, + "loss": 0.9481985569000244, + "step": 3362 + }, + { + "epoch": 0.7748847926267282, + "grad_norm": 1.0332478363266029, + "learning_rate": 1.4397099257562965e-06, + "loss": 0.7915977239608765, + "step": 3363 + }, + { + "epoch": 0.7751152073732719, + "grad_norm": 1.057946693927254, + "learning_rate": 1.4393677170958261e-06, + "loss": 0.887650191783905, + "step": 3364 + }, + { + "epoch": 0.7753456221198156, + "grad_norm": 0.8250912024788589, + "learning_rate": 1.4390254446617137e-06, + "loss": 0.8516546487808228, + "step": 3365 + }, + { + "epoch": 0.7755760368663595, + "grad_norm": 0.9895329351481195, + "learning_rate": 1.4386831085036386e-06, + "loss": 0.8076090812683105, + "step": 3366 + }, + { + "epoch": 0.7758064516129032, + "grad_norm": 0.9203902257484836, + "learning_rate": 1.4383407086712913e-06, + "loss": 0.7480059862136841, + "step": 3367 + }, + { + "epoch": 0.776036866359447, + "grad_norm": 1.1101542314671893, + "learning_rate": 1.4379982452143704e-06, + "loss": 0.8586190938949585, + "step": 3368 + }, + { + "epoch": 0.7762672811059907, + "grad_norm": 0.9197679868181698, + "learning_rate": 1.4376557181825842e-06, + "loss": 0.7581472396850586, + "step": 3369 + }, + { + "epoch": 0.7764976958525346, + "grad_norm": 1.2064630913320733, + "learning_rate": 1.4373131276256495e-06, + "loss": 0.7482568621635437, + "step": 3370 + }, + { + "epoch": 0.7767281105990783, + "grad_norm": 1.2204489088505164, + "learning_rate": 1.4369704735932935e-06, + "loss": 0.8822590112686157, + "step": 3371 + }, + { + "epoch": 0.7769585253456222, + "grad_norm": 0.9171528830764245, + "learning_rate": 1.4366277561352517e-06, + "loss": 0.7762279510498047, + "step": 3372 + }, + { + "epoch": 0.7771889400921659, + "grad_norm": 0.9649262790570658, + "learning_rate": 1.4362849753012692e-06, + "loss": 0.8059147596359253, + "step": 3373 + }, + { + "epoch": 0.7774193548387097, + "grad_norm": 1.0529652703364816, + "learning_rate": 1.4359421311411e-06, + "loss": 0.778538703918457, + "step": 3374 + }, + { + "epoch": 0.7776497695852534, + "grad_norm": 1.1587212424703164, + "learning_rate": 1.4355992237045077e-06, + "loss": 0.9422975778579712, + "step": 3375 + }, + { + "epoch": 0.7778801843317973, + "grad_norm": 1.0109308621512796, + "learning_rate": 1.4352562530412645e-06, + "loss": 0.7437118291854858, + "step": 3376 + }, + { + "epoch": 0.778110599078341, + "grad_norm": 0.8961203034935337, + "learning_rate": 1.4349132192011525e-06, + "loss": 0.6935930252075195, + "step": 3377 + }, + { + "epoch": 0.7783410138248847, + "grad_norm": 1.1629979064489353, + "learning_rate": 1.4345701222339628e-06, + "loss": 0.7797117829322815, + "step": 3378 + }, + { + "epoch": 0.7785714285714286, + "grad_norm": 1.0591342199366531, + "learning_rate": 1.434226962189495e-06, + "loss": 0.8795931339263916, + "step": 3379 + }, + { + "epoch": 0.7788018433179723, + "grad_norm": 1.071603440273884, + "learning_rate": 1.433883739117558e-06, + "loss": 0.8936992287635803, + "step": 3380 + }, + { + "epoch": 0.7790322580645161, + "grad_norm": 1.0412928095771106, + "learning_rate": 1.4335404530679708e-06, + "loss": 0.9142701625823975, + "step": 3381 + }, + { + "epoch": 0.7792626728110599, + "grad_norm": 1.0966643259622728, + "learning_rate": 1.4331971040905613e-06, + "loss": 0.8996907472610474, + "step": 3382 + }, + { + "epoch": 0.7794930875576037, + "grad_norm": 1.020250921022328, + "learning_rate": 1.4328536922351654e-06, + "loss": 0.9645330905914307, + "step": 3383 + }, + { + "epoch": 0.7797235023041474, + "grad_norm": 0.7173807290755059, + "learning_rate": 1.4325102175516289e-06, + "loss": 0.5122036933898926, + "step": 3384 + }, + { + "epoch": 0.7799539170506913, + "grad_norm": 0.8487864939918429, + "learning_rate": 1.432166680089807e-06, + "loss": 0.6556990742683411, + "step": 3385 + }, + { + "epoch": 0.780184331797235, + "grad_norm": 0.7980125905366343, + "learning_rate": 1.4318230798995634e-06, + "loss": 0.6642920970916748, + "step": 3386 + }, + { + "epoch": 0.7804147465437788, + "grad_norm": 1.1205844690065134, + "learning_rate": 1.4314794170307718e-06, + "loss": 0.9373915195465088, + "step": 3387 + }, + { + "epoch": 0.7806451612903226, + "grad_norm": 1.1583496011366634, + "learning_rate": 1.4311356915333139e-06, + "loss": 0.8295063972473145, + "step": 3388 + }, + { + "epoch": 0.7808755760368664, + "grad_norm": 1.0075666840710995, + "learning_rate": 1.4307919034570809e-06, + "loss": 0.8167035579681396, + "step": 3389 + }, + { + "epoch": 0.7811059907834101, + "grad_norm": 1.045465756545736, + "learning_rate": 1.4304480528519736e-06, + "loss": 0.8444087505340576, + "step": 3390 + }, + { + "epoch": 0.7813364055299539, + "grad_norm": 0.9731986846355507, + "learning_rate": 1.4301041397679012e-06, + "loss": 0.7753941416740417, + "step": 3391 + }, + { + "epoch": 0.7815668202764977, + "grad_norm": 1.0117493931274548, + "learning_rate": 1.4297601642547824e-06, + "loss": 0.7885915040969849, + "step": 3392 + }, + { + "epoch": 0.7817972350230414, + "grad_norm": 0.9902641403084854, + "learning_rate": 1.4294161263625444e-06, + "loss": 0.730733335018158, + "step": 3393 + }, + { + "epoch": 0.7820276497695853, + "grad_norm": 0.8781208509199174, + "learning_rate": 1.4290720261411241e-06, + "loss": 0.8505427837371826, + "step": 3394 + }, + { + "epoch": 0.782258064516129, + "grad_norm": 0.9435888376510791, + "learning_rate": 1.4287278636404676e-06, + "loss": 0.7370787858963013, + "step": 3395 + }, + { + "epoch": 0.7824884792626728, + "grad_norm": 0.8683550268652552, + "learning_rate": 1.428383638910529e-06, + "loss": 0.6776250600814819, + "step": 3396 + }, + { + "epoch": 0.7827188940092166, + "grad_norm": 1.158711583120319, + "learning_rate": 1.4280393520012726e-06, + "loss": 0.8878101706504822, + "step": 3397 + }, + { + "epoch": 0.7829493087557604, + "grad_norm": 1.0028929146104306, + "learning_rate": 1.427695002962671e-06, + "loss": 0.789238691329956, + "step": 3398 + }, + { + "epoch": 0.7831797235023041, + "grad_norm": 1.0382561381902518, + "learning_rate": 1.4273505918447052e-06, + "loss": 0.772524356842041, + "step": 3399 + }, + { + "epoch": 0.783410138248848, + "grad_norm": 0.8483839499127978, + "learning_rate": 1.4270061186973673e-06, + "loss": 0.682374119758606, + "step": 3400 + }, + { + "epoch": 0.7836405529953917, + "grad_norm": 0.9396222987314208, + "learning_rate": 1.4266615835706566e-06, + "loss": 0.874775767326355, + "step": 3401 + }, + { + "epoch": 0.7838709677419354, + "grad_norm": 1.3780294752863322, + "learning_rate": 1.4263169865145816e-06, + "loss": 0.9141736626625061, + "step": 3402 + }, + { + "epoch": 0.7841013824884793, + "grad_norm": 1.0849695477918648, + "learning_rate": 1.4259723275791603e-06, + "loss": 0.8533145189285278, + "step": 3403 + }, + { + "epoch": 0.784331797235023, + "grad_norm": 0.9340136683520418, + "learning_rate": 1.4256276068144198e-06, + "loss": 0.7920266389846802, + "step": 3404 + }, + { + "epoch": 0.7845622119815668, + "grad_norm": 0.9462841256440514, + "learning_rate": 1.4252828242703957e-06, + "loss": 0.7822731733322144, + "step": 3405 + }, + { + "epoch": 0.7847926267281106, + "grad_norm": 0.9890597976168253, + "learning_rate": 1.4249379799971324e-06, + "loss": 0.7103791832923889, + "step": 3406 + }, + { + "epoch": 0.7850230414746544, + "grad_norm": 1.0298833059227221, + "learning_rate": 1.4245930740446841e-06, + "loss": 0.7857639789581299, + "step": 3407 + }, + { + "epoch": 0.7852534562211981, + "grad_norm": 1.1065594183312877, + "learning_rate": 1.4242481064631134e-06, + "loss": 0.8069730997085571, + "step": 3408 + }, + { + "epoch": 0.785483870967742, + "grad_norm": 1.0472042802008708, + "learning_rate": 1.4239030773024912e-06, + "loss": 0.8758031129837036, + "step": 3409 + }, + { + "epoch": 0.7857142857142857, + "grad_norm": 1.015785019886056, + "learning_rate": 1.4235579866128983e-06, + "loss": 0.895712673664093, + "step": 3410 + }, + { + "epoch": 0.7859447004608295, + "grad_norm": 0.9442660407745113, + "learning_rate": 1.423212834444425e-06, + "loss": 0.7904561758041382, + "step": 3411 + }, + { + "epoch": 0.7861751152073733, + "grad_norm": 1.0957623852355893, + "learning_rate": 1.4228676208471685e-06, + "loss": 0.9322203993797302, + "step": 3412 + }, + { + "epoch": 0.7864055299539171, + "grad_norm": 0.7668753687506044, + "learning_rate": 1.422522345871237e-06, + "loss": 0.9693628549575806, + "step": 3413 + }, + { + "epoch": 0.7866359447004608, + "grad_norm": 0.8417164970136307, + "learning_rate": 1.4221770095667462e-06, + "loss": 0.6737014651298523, + "step": 3414 + }, + { + "epoch": 0.7868663594470046, + "grad_norm": 1.1466654292657967, + "learning_rate": 1.4218316119838215e-06, + "loss": 0.8682050108909607, + "step": 3415 + }, + { + "epoch": 0.7870967741935484, + "grad_norm": 1.058324160083765, + "learning_rate": 1.4214861531725966e-06, + "loss": 0.7920347452163696, + "step": 3416 + }, + { + "epoch": 0.7873271889400921, + "grad_norm": 1.0147867893383273, + "learning_rate": 1.4211406331832144e-06, + "loss": 0.8330510854721069, + "step": 3417 + }, + { + "epoch": 0.787557603686636, + "grad_norm": 0.8802491842183522, + "learning_rate": 1.4207950520658272e-06, + "loss": 0.8314074873924255, + "step": 3418 + }, + { + "epoch": 0.7877880184331797, + "grad_norm": 1.069355954495663, + "learning_rate": 1.420449409870595e-06, + "loss": 0.7045331001281738, + "step": 3419 + }, + { + "epoch": 0.7880184331797235, + "grad_norm": 0.9484390721895568, + "learning_rate": 1.4201037066476876e-06, + "loss": 0.7825411558151245, + "step": 3420 + }, + { + "epoch": 0.7882488479262673, + "grad_norm": 0.86611108370867, + "learning_rate": 1.4197579424472834e-06, + "loss": 0.6960075497627258, + "step": 3421 + }, + { + "epoch": 0.7884792626728111, + "grad_norm": 1.038692849963906, + "learning_rate": 1.4194121173195694e-06, + "loss": 0.8366748094558716, + "step": 3422 + }, + { + "epoch": 0.7887096774193548, + "grad_norm": 0.8605441828045868, + "learning_rate": 1.4190662313147419e-06, + "loss": 0.8859039545059204, + "step": 3423 + }, + { + "epoch": 0.7889400921658987, + "grad_norm": 1.0572382908005622, + "learning_rate": 1.4187202844830057e-06, + "loss": 0.7098245620727539, + "step": 3424 + }, + { + "epoch": 0.7891705069124424, + "grad_norm": 0.9126448008384304, + "learning_rate": 1.4183742768745743e-06, + "loss": 0.7410455942153931, + "step": 3425 + }, + { + "epoch": 0.7894009216589861, + "grad_norm": 0.8007200450015498, + "learning_rate": 1.4180282085396706e-06, + "loss": 0.7414010763168335, + "step": 3426 + }, + { + "epoch": 0.78963133640553, + "grad_norm": 1.090062212374054, + "learning_rate": 1.417682079528526e-06, + "loss": 0.9043526649475098, + "step": 3427 + }, + { + "epoch": 0.7898617511520737, + "grad_norm": 0.8510201071166715, + "learning_rate": 1.4173358898913804e-06, + "loss": 0.7709499597549438, + "step": 3428 + }, + { + "epoch": 0.7900921658986175, + "grad_norm": 1.0829385459770577, + "learning_rate": 1.416989639678483e-06, + "loss": 0.7499940395355225, + "step": 3429 + }, + { + "epoch": 0.7903225806451613, + "grad_norm": 0.766744185733082, + "learning_rate": 1.4166433289400911e-06, + "loss": 0.7401680946350098, + "step": 3430 + }, + { + "epoch": 0.7905529953917051, + "grad_norm": 0.8802012939982503, + "learning_rate": 1.4162969577264718e-06, + "loss": 1.0132567882537842, + "step": 3431 + }, + { + "epoch": 0.7907834101382488, + "grad_norm": 0.9758763490715631, + "learning_rate": 1.4159505260879004e-06, + "loss": 0.8438389301300049, + "step": 3432 + }, + { + "epoch": 0.7910138248847927, + "grad_norm": 1.2075583274029744, + "learning_rate": 1.4156040340746603e-06, + "loss": 0.9149703979492188, + "step": 3433 + }, + { + "epoch": 0.7912442396313364, + "grad_norm": 1.4960555955584764, + "learning_rate": 1.4152574817370451e-06, + "loss": 0.9141047596931458, + "step": 3434 + }, + { + "epoch": 0.7914746543778802, + "grad_norm": 0.924125511762228, + "learning_rate": 1.414910869125356e-06, + "loss": 0.6896570324897766, + "step": 3435 + }, + { + "epoch": 0.791705069124424, + "grad_norm": 0.9277571830040596, + "learning_rate": 1.4145641962899035e-06, + "loss": 0.742916464805603, + "step": 3436 + }, + { + "epoch": 0.7919354838709678, + "grad_norm": 1.0041274553911197, + "learning_rate": 1.414217463281007e-06, + "loss": 0.9315029382705688, + "step": 3437 + }, + { + "epoch": 0.7921658986175115, + "grad_norm": 0.9532695013501692, + "learning_rate": 1.4138706701489942e-06, + "loss": 0.7645175457000732, + "step": 3438 + }, + { + "epoch": 0.7923963133640552, + "grad_norm": 1.0166687927137474, + "learning_rate": 1.413523816944201e-06, + "loss": 0.8253934383392334, + "step": 3439 + }, + { + "epoch": 0.7926267281105991, + "grad_norm": 1.055807296618818, + "learning_rate": 1.4131769037169736e-06, + "loss": 0.8650136590003967, + "step": 3440 + }, + { + "epoch": 0.7928571428571428, + "grad_norm": 1.0239985264965783, + "learning_rate": 1.4128299305176654e-06, + "loss": 0.7453975677490234, + "step": 3441 + }, + { + "epoch": 0.7930875576036867, + "grad_norm": 1.1689392671270256, + "learning_rate": 1.4124828973966392e-06, + "loss": 0.9121813774108887, + "step": 3442 + }, + { + "epoch": 0.7933179723502304, + "grad_norm": 1.16007005259146, + "learning_rate": 1.4121358044042667e-06, + "loss": 0.9097952842712402, + "step": 3443 + }, + { + "epoch": 0.7935483870967742, + "grad_norm": 0.9263687778783555, + "learning_rate": 1.4117886515909277e-06, + "loss": 0.7185770273208618, + "step": 3444 + }, + { + "epoch": 0.793778801843318, + "grad_norm": 0.9816189958888628, + "learning_rate": 1.4114414390070111e-06, + "loss": 0.8192715644836426, + "step": 3445 + }, + { + "epoch": 0.7940092165898618, + "grad_norm": 0.8830372557771754, + "learning_rate": 1.4110941667029143e-06, + "loss": 0.7864251136779785, + "step": 3446 + }, + { + "epoch": 0.7942396313364055, + "grad_norm": 0.9262266668392852, + "learning_rate": 1.4107468347290431e-06, + "loss": 0.7433357834815979, + "step": 3447 + }, + { + "epoch": 0.7944700460829494, + "grad_norm": 0.8826486406616629, + "learning_rate": 1.4103994431358133e-06, + "loss": 0.8196350336074829, + "step": 3448 + }, + { + "epoch": 0.7947004608294931, + "grad_norm": 1.0379031741076927, + "learning_rate": 1.410051991973647e-06, + "loss": 0.7698987126350403, + "step": 3449 + }, + { + "epoch": 0.7949308755760369, + "grad_norm": 1.228700210939763, + "learning_rate": 1.4097044812929776e-06, + "loss": 0.9404128789901733, + "step": 3450 + }, + { + "epoch": 0.7951612903225806, + "grad_norm": 0.9114628140508482, + "learning_rate": 1.4093569111442443e-06, + "loss": 0.827290952205658, + "step": 3451 + }, + { + "epoch": 0.7953917050691244, + "grad_norm": 1.0612294009838623, + "learning_rate": 1.4090092815778976e-06, + "loss": 0.8126389384269714, + "step": 3452 + }, + { + "epoch": 0.7956221198156682, + "grad_norm": 0.9598694992596972, + "learning_rate": 1.4086615926443953e-06, + "loss": 0.7439650297164917, + "step": 3453 + }, + { + "epoch": 0.7958525345622119, + "grad_norm": 0.9952168701899716, + "learning_rate": 1.4083138443942036e-06, + "loss": 0.7505590915679932, + "step": 3454 + }, + { + "epoch": 0.7960829493087558, + "grad_norm": 0.8299073365871691, + "learning_rate": 1.407966036877798e-06, + "loss": 0.7070168256759644, + "step": 3455 + }, + { + "epoch": 0.7963133640552995, + "grad_norm": 0.9422601313607071, + "learning_rate": 1.4076181701456623e-06, + "loss": 0.8271987438201904, + "step": 3456 + }, + { + "epoch": 0.7965437788018433, + "grad_norm": 0.8558890366072001, + "learning_rate": 1.4072702442482886e-06, + "loss": 0.72886061668396, + "step": 3457 + }, + { + "epoch": 0.7967741935483871, + "grad_norm": 1.1355616522222822, + "learning_rate": 1.4069222592361784e-06, + "loss": 0.838603138923645, + "step": 3458 + }, + { + "epoch": 0.7970046082949309, + "grad_norm": 1.1314183210174298, + "learning_rate": 1.4065742151598408e-06, + "loss": 0.9829634428024292, + "step": 3459 + }, + { + "epoch": 0.7972350230414746, + "grad_norm": 1.0528251173572156, + "learning_rate": 1.406226112069794e-06, + "loss": 0.8269632458686829, + "step": 3460 + }, + { + "epoch": 0.7974654377880185, + "grad_norm": 1.0290510208624037, + "learning_rate": 1.405877950016565e-06, + "loss": 0.7234654426574707, + "step": 3461 + }, + { + "epoch": 0.7976958525345622, + "grad_norm": 0.89079385428478, + "learning_rate": 1.4055297290506887e-06, + "loss": 0.7843908071517944, + "step": 3462 + }, + { + "epoch": 0.7979262672811059, + "grad_norm": 0.8247890912721374, + "learning_rate": 1.4051814492227094e-06, + "loss": 0.7294371128082275, + "step": 3463 + }, + { + "epoch": 0.7981566820276498, + "grad_norm": 1.1727486785997119, + "learning_rate": 1.4048331105831787e-06, + "loss": 0.8805780410766602, + "step": 3464 + }, + { + "epoch": 0.7983870967741935, + "grad_norm": 0.9922079942807702, + "learning_rate": 1.404484713182658e-06, + "loss": 0.6933708190917969, + "step": 3465 + }, + { + "epoch": 0.7986175115207373, + "grad_norm": 1.0638183747733119, + "learning_rate": 1.404136257071717e-06, + "loss": 0.8720458745956421, + "step": 3466 + }, + { + "epoch": 0.7988479262672811, + "grad_norm": 1.1404138575251217, + "learning_rate": 1.403787742300933e-06, + "loss": 0.7675988674163818, + "step": 3467 + }, + { + "epoch": 0.7990783410138249, + "grad_norm": 1.0188982193786602, + "learning_rate": 1.403439168920893e-06, + "loss": 0.7630051374435425, + "step": 3468 + }, + { + "epoch": 0.7993087557603686, + "grad_norm": 0.9607713149142998, + "learning_rate": 1.4030905369821914e-06, + "loss": 0.9195173978805542, + "step": 3469 + }, + { + "epoch": 0.7995391705069125, + "grad_norm": 0.966603725031027, + "learning_rate": 1.402741846535432e-06, + "loss": 0.9347431659698486, + "step": 3470 + }, + { + "epoch": 0.7997695852534562, + "grad_norm": 1.0423944793385256, + "learning_rate": 1.4023930976312271e-06, + "loss": 0.7812551259994507, + "step": 3471 + }, + { + "epoch": 0.8, + "grad_norm": 1.0230073164776583, + "learning_rate": 1.4020442903201963e-06, + "loss": 0.7655330896377563, + "step": 3472 + }, + { + "epoch": 0.8002304147465438, + "grad_norm": 1.2791975931288466, + "learning_rate": 1.4016954246529694e-06, + "loss": 0.7543904185295105, + "step": 3473 + }, + { + "epoch": 0.8004608294930876, + "grad_norm": 0.8246426244987128, + "learning_rate": 1.4013465006801833e-06, + "loss": 0.9343980550765991, + "step": 3474 + }, + { + "epoch": 0.8006912442396313, + "grad_norm": 1.1458439395589735, + "learning_rate": 1.4009975184524838e-06, + "loss": 0.7366182208061218, + "step": 3475 + }, + { + "epoch": 0.8009216589861751, + "grad_norm": 1.0109168818205314, + "learning_rate": 1.4006484780205254e-06, + "loss": 0.7028899192810059, + "step": 3476 + }, + { + "epoch": 0.8011520737327189, + "grad_norm": 1.1092959183189253, + "learning_rate": 1.4002993794349708e-06, + "loss": 0.9259153604507446, + "step": 3477 + }, + { + "epoch": 0.8013824884792626, + "grad_norm": 1.091442085001374, + "learning_rate": 1.3999502227464914e-06, + "loss": 0.7263842225074768, + "step": 3478 + }, + { + "epoch": 0.8016129032258065, + "grad_norm": 0.9964781390280828, + "learning_rate": 1.3996010080057664e-06, + "loss": 0.8177748918533325, + "step": 3479 + }, + { + "epoch": 0.8018433179723502, + "grad_norm": 1.080145531043834, + "learning_rate": 1.3992517352634842e-06, + "loss": 0.8526895046234131, + "step": 3480 + }, + { + "epoch": 0.802073732718894, + "grad_norm": 1.031018616296166, + "learning_rate": 1.398902404570341e-06, + "loss": 0.7914575338363647, + "step": 3481 + }, + { + "epoch": 0.8023041474654378, + "grad_norm": 0.816157508913072, + "learning_rate": 1.398553015977042e-06, + "loss": 0.7546013593673706, + "step": 3482 + }, + { + "epoch": 0.8025345622119816, + "grad_norm": 1.0408293581677805, + "learning_rate": 1.3982035695343005e-06, + "loss": 0.7250038385391235, + "step": 3483 + }, + { + "epoch": 0.8027649769585253, + "grad_norm": 1.023275477136697, + "learning_rate": 1.3978540652928376e-06, + "loss": 0.8650141954421997, + "step": 3484 + }, + { + "epoch": 0.8029953917050692, + "grad_norm": 0.9633891302798026, + "learning_rate": 1.3975045033033838e-06, + "loss": 0.8020066022872925, + "step": 3485 + }, + { + "epoch": 0.8032258064516129, + "grad_norm": 0.9146174916063312, + "learning_rate": 1.3971548836166782e-06, + "loss": 0.7376772165298462, + "step": 3486 + }, + { + "epoch": 0.8034562211981566, + "grad_norm": 0.9278800283054291, + "learning_rate": 1.3968052062834665e-06, + "loss": 0.8440769910812378, + "step": 3487 + }, + { + "epoch": 0.8036866359447005, + "grad_norm": 0.8964312010034259, + "learning_rate": 1.3964554713545047e-06, + "loss": 0.7886836528778076, + "step": 3488 + }, + { + "epoch": 0.8039170506912442, + "grad_norm": 0.9177920963823754, + "learning_rate": 1.396105678880556e-06, + "loss": 0.9167575836181641, + "step": 3489 + }, + { + "epoch": 0.804147465437788, + "grad_norm": 0.8367032180339474, + "learning_rate": 1.3957558289123922e-06, + "loss": 0.6761677861213684, + "step": 3490 + }, + { + "epoch": 0.8043778801843318, + "grad_norm": 0.9716984065235628, + "learning_rate": 1.3954059215007938e-06, + "loss": 0.7775592803955078, + "step": 3491 + }, + { + "epoch": 0.8046082949308756, + "grad_norm": 1.00005526663364, + "learning_rate": 1.3950559566965494e-06, + "loss": 0.8127217292785645, + "step": 3492 + }, + { + "epoch": 0.8048387096774193, + "grad_norm": 1.007116682040637, + "learning_rate": 1.394705934550456e-06, + "loss": 0.8134229779243469, + "step": 3493 + }, + { + "epoch": 0.8050691244239632, + "grad_norm": 1.3224030787110577, + "learning_rate": 1.3943558551133186e-06, + "loss": 0.8853167295455933, + "step": 3494 + }, + { + "epoch": 0.8052995391705069, + "grad_norm": 1.0544152264027669, + "learning_rate": 1.3940057184359506e-06, + "loss": 0.8024332523345947, + "step": 3495 + }, + { + "epoch": 0.8055299539170507, + "grad_norm": 0.6779010833647611, + "learning_rate": 1.3936555245691745e-06, + "loss": 0.7581099271774292, + "step": 3496 + }, + { + "epoch": 0.8057603686635945, + "grad_norm": 1.0509729333579008, + "learning_rate": 1.3933052735638203e-06, + "loss": 0.979412317276001, + "step": 3497 + }, + { + "epoch": 0.8059907834101383, + "grad_norm": 0.9816833973848147, + "learning_rate": 1.392954965470726e-06, + "loss": 0.7917830944061279, + "step": 3498 + }, + { + "epoch": 0.806221198156682, + "grad_norm": 0.9622725908619084, + "learning_rate": 1.392604600340739e-06, + "loss": 0.8565326929092407, + "step": 3499 + }, + { + "epoch": 0.8064516129032258, + "grad_norm": 1.0170451339424116, + "learning_rate": 1.3922541782247136e-06, + "loss": 0.7276358604431152, + "step": 3500 + }, + { + "epoch": 0.8066820276497696, + "grad_norm": 0.8351645839157906, + "learning_rate": 1.3919036991735138e-06, + "loss": 0.734528660774231, + "step": 3501 + }, + { + "epoch": 0.8069124423963133, + "grad_norm": 1.1746648423168138, + "learning_rate": 1.391553163238011e-06, + "loss": 0.8786039352416992, + "step": 3502 + }, + { + "epoch": 0.8071428571428572, + "grad_norm": 1.1050955424788658, + "learning_rate": 1.3912025704690844e-06, + "loss": 0.9509482383728027, + "step": 3503 + }, + { + "epoch": 0.8073732718894009, + "grad_norm": 0.8741751886687131, + "learning_rate": 1.3908519209176225e-06, + "loss": 0.7188615202903748, + "step": 3504 + }, + { + "epoch": 0.8076036866359447, + "grad_norm": 1.0307846021250762, + "learning_rate": 1.3905012146345221e-06, + "loss": 0.7681115865707397, + "step": 3505 + }, + { + "epoch": 0.8078341013824885, + "grad_norm": 1.0988034793572021, + "learning_rate": 1.3901504516706874e-06, + "loss": 0.8835415840148926, + "step": 3506 + }, + { + "epoch": 0.8080645161290323, + "grad_norm": 1.0724177836810997, + "learning_rate": 1.389799632077031e-06, + "loss": 0.8179003000259399, + "step": 3507 + }, + { + "epoch": 0.808294930875576, + "grad_norm": 1.1244187286361234, + "learning_rate": 1.3894487559044742e-06, + "loss": 0.9690247774124146, + "step": 3508 + }, + { + "epoch": 0.8085253456221199, + "grad_norm": 0.9601740737567672, + "learning_rate": 1.389097823203946e-06, + "loss": 0.9759812951087952, + "step": 3509 + }, + { + "epoch": 0.8087557603686636, + "grad_norm": 0.8953376224758026, + "learning_rate": 1.3887468340263838e-06, + "loss": 0.6649112105369568, + "step": 3510 + }, + { + "epoch": 0.8089861751152074, + "grad_norm": 0.8803647716437188, + "learning_rate": 1.388395788422733e-06, + "loss": 0.7824583053588867, + "step": 3511 + }, + { + "epoch": 0.8092165898617512, + "grad_norm": 1.0776551292843717, + "learning_rate": 1.3880446864439482e-06, + "loss": 0.8226176500320435, + "step": 3512 + }, + { + "epoch": 0.8094470046082949, + "grad_norm": 1.0775758718001336, + "learning_rate": 1.3876935281409904e-06, + "loss": 0.7708876729011536, + "step": 3513 + }, + { + "epoch": 0.8096774193548387, + "grad_norm": 1.1275141981575327, + "learning_rate": 1.3873423135648303e-06, + "loss": 0.7162825465202332, + "step": 3514 + }, + { + "epoch": 0.8099078341013825, + "grad_norm": 1.1973823780619761, + "learning_rate": 1.3869910427664464e-06, + "loss": 0.815816342830658, + "step": 3515 + }, + { + "epoch": 0.8101382488479263, + "grad_norm": 1.0491570029475803, + "learning_rate": 1.3866397157968248e-06, + "loss": 0.9166251420974731, + "step": 3516 + }, + { + "epoch": 0.81036866359447, + "grad_norm": 1.185963303947227, + "learning_rate": 1.3862883327069606e-06, + "loss": 0.9193897843360901, + "step": 3517 + }, + { + "epoch": 0.8105990783410139, + "grad_norm": 1.1492579516601074, + "learning_rate": 1.3859368935478557e-06, + "loss": 0.9019489288330078, + "step": 3518 + }, + { + "epoch": 0.8108294930875576, + "grad_norm": 1.0706438739080621, + "learning_rate": 1.3855853983705222e-06, + "loss": 0.8616153597831726, + "step": 3519 + }, + { + "epoch": 0.8110599078341014, + "grad_norm": 0.9368530229676858, + "learning_rate": 1.3852338472259782e-06, + "loss": 0.8898462057113647, + "step": 3520 + }, + { + "epoch": 0.8112903225806452, + "grad_norm": 0.9891797921278073, + "learning_rate": 1.3848822401652513e-06, + "loss": 0.770263135433197, + "step": 3521 + }, + { + "epoch": 0.811520737327189, + "grad_norm": 0.950594228231774, + "learning_rate": 1.384530577239377e-06, + "loss": 0.7524563074111938, + "step": 3522 + }, + { + "epoch": 0.8117511520737327, + "grad_norm": 0.8975349550091929, + "learning_rate": 1.3841788584993981e-06, + "loss": 0.776715874671936, + "step": 3523 + }, + { + "epoch": 0.8119815668202764, + "grad_norm": 0.6412822466784485, + "learning_rate": 1.3838270839963666e-06, + "loss": 0.7165439128875732, + "step": 3524 + }, + { + "epoch": 0.8122119815668203, + "grad_norm": 1.0082147827954213, + "learning_rate": 1.383475253781342e-06, + "loss": 0.7641004323959351, + "step": 3525 + }, + { + "epoch": 0.812442396313364, + "grad_norm": 0.9278762834298543, + "learning_rate": 1.3831233679053921e-06, + "loss": 0.7493933439254761, + "step": 3526 + }, + { + "epoch": 0.8126728110599079, + "grad_norm": 1.1064599998463516, + "learning_rate": 1.3827714264195924e-06, + "loss": 0.7981607913970947, + "step": 3527 + }, + { + "epoch": 0.8129032258064516, + "grad_norm": 1.2555949352929368, + "learning_rate": 1.3824194293750272e-06, + "loss": 0.9130103588104248, + "step": 3528 + }, + { + "epoch": 0.8131336405529954, + "grad_norm": 1.0192840808161379, + "learning_rate": 1.3820673768227878e-06, + "loss": 0.7208644151687622, + "step": 3529 + }, + { + "epoch": 0.8133640552995391, + "grad_norm": 0.9880323858602741, + "learning_rate": 1.3817152688139745e-06, + "loss": 0.9134006500244141, + "step": 3530 + }, + { + "epoch": 0.813594470046083, + "grad_norm": 0.836575472485664, + "learning_rate": 1.381363105399695e-06, + "loss": 0.7383376359939575, + "step": 3531 + }, + { + "epoch": 0.8138248847926267, + "grad_norm": 1.4743208995655537, + "learning_rate": 1.381010886631066e-06, + "loss": 0.9143035411834717, + "step": 3532 + }, + { + "epoch": 0.8140552995391706, + "grad_norm": 0.8030889519622723, + "learning_rate": 1.3806586125592107e-06, + "loss": 0.7972506284713745, + "step": 3533 + }, + { + "epoch": 0.8142857142857143, + "grad_norm": 0.9706054308316248, + "learning_rate": 1.380306283235262e-06, + "loss": 0.8999859094619751, + "step": 3534 + }, + { + "epoch": 0.8145161290322581, + "grad_norm": 1.4136312048518, + "learning_rate": 1.37995389871036e-06, + "loss": 0.7759672999382019, + "step": 3535 + }, + { + "epoch": 0.8147465437788018, + "grad_norm": 0.8852561621502252, + "learning_rate": 1.3796014590356522e-06, + "loss": 0.7915023565292358, + "step": 3536 + }, + { + "epoch": 0.8149769585253456, + "grad_norm": 1.0626460640648143, + "learning_rate": 1.3792489642622956e-06, + "loss": 0.8259623050689697, + "step": 3537 + }, + { + "epoch": 0.8152073732718894, + "grad_norm": 0.9193643373115533, + "learning_rate": 1.3788964144414534e-06, + "loss": 0.7786526679992676, + "step": 3538 + }, + { + "epoch": 0.8154377880184331, + "grad_norm": 0.8743120056652736, + "learning_rate": 1.3785438096242987e-06, + "loss": 0.8655314445495605, + "step": 3539 + }, + { + "epoch": 0.815668202764977, + "grad_norm": 1.073925215345039, + "learning_rate": 1.3781911498620108e-06, + "loss": 0.8116016387939453, + "step": 3540 + }, + { + "epoch": 0.8158986175115207, + "grad_norm": 1.07781870851745, + "learning_rate": 1.3778384352057781e-06, + "loss": 0.712907075881958, + "step": 3541 + }, + { + "epoch": 0.8161290322580645, + "grad_norm": 0.9419481549244654, + "learning_rate": 1.377485665706797e-06, + "loss": 0.8271318674087524, + "step": 3542 + }, + { + "epoch": 0.8163594470046083, + "grad_norm": 1.231349694992367, + "learning_rate": 1.3771328414162713e-06, + "loss": 0.9161353707313538, + "step": 3543 + }, + { + "epoch": 0.8165898617511521, + "grad_norm": 1.1900246832578463, + "learning_rate": 1.3767799623854125e-06, + "loss": 0.9555908441543579, + "step": 3544 + }, + { + "epoch": 0.8168202764976958, + "grad_norm": 0.9121338000164769, + "learning_rate": 1.3764270286654414e-06, + "loss": 0.7863249778747559, + "step": 3545 + }, + { + "epoch": 0.8170506912442397, + "grad_norm": 1.0362996056258458, + "learning_rate": 1.3760740403075853e-06, + "loss": 0.9086883068084717, + "step": 3546 + }, + { + "epoch": 0.8172811059907834, + "grad_norm": 0.9211768991499883, + "learning_rate": 1.37572099736308e-06, + "loss": 0.6231412887573242, + "step": 3547 + }, + { + "epoch": 0.8175115207373271, + "grad_norm": 0.94903309328564, + "learning_rate": 1.3753678998831692e-06, + "loss": 0.8221716284751892, + "step": 3548 + }, + { + "epoch": 0.817741935483871, + "grad_norm": 1.0641797094094223, + "learning_rate": 1.375014747919105e-06, + "loss": 0.8077783584594727, + "step": 3549 + }, + { + "epoch": 0.8179723502304147, + "grad_norm": 1.0675643850007648, + "learning_rate": 1.3746615415221463e-06, + "loss": 0.6882060766220093, + "step": 3550 + }, + { + "epoch": 0.8182027649769585, + "grad_norm": 0.8393670588117293, + "learning_rate": 1.3743082807435614e-06, + "loss": 0.700161337852478, + "step": 3551 + }, + { + "epoch": 0.8184331797235023, + "grad_norm": 0.8856084645963668, + "learning_rate": 1.3739549656346243e-06, + "loss": 0.737981915473938, + "step": 3552 + }, + { + "epoch": 0.8186635944700461, + "grad_norm": 0.8562104816360829, + "learning_rate": 1.3736015962466193e-06, + "loss": 0.8025717735290527, + "step": 3553 + }, + { + "epoch": 0.8188940092165898, + "grad_norm": 1.1233745076434911, + "learning_rate": 1.3732481726308372e-06, + "loss": 0.8855722546577454, + "step": 3554 + }, + { + "epoch": 0.8191244239631337, + "grad_norm": 1.2861487220187957, + "learning_rate": 1.3728946948385768e-06, + "loss": 0.819130539894104, + "step": 3555 + }, + { + "epoch": 0.8193548387096774, + "grad_norm": 1.086213399760416, + "learning_rate": 1.3725411629211454e-06, + "loss": 0.8419625759124756, + "step": 3556 + }, + { + "epoch": 0.8195852534562212, + "grad_norm": 0.8659477904111433, + "learning_rate": 1.3721875769298575e-06, + "loss": 0.8478890657424927, + "step": 3557 + }, + { + "epoch": 0.819815668202765, + "grad_norm": 0.9446742102947047, + "learning_rate": 1.371833936916035e-06, + "loss": 0.8654077053070068, + "step": 3558 + }, + { + "epoch": 0.8200460829493088, + "grad_norm": 1.132873117876266, + "learning_rate": 1.371480242931009e-06, + "loss": 0.8898686170578003, + "step": 3559 + }, + { + "epoch": 0.8202764976958525, + "grad_norm": 1.0419861877874252, + "learning_rate": 1.3711264950261176e-06, + "loss": 0.873773455619812, + "step": 3560 + }, + { + "epoch": 0.8205069124423963, + "grad_norm": 0.8068261635969198, + "learning_rate": 1.3707726932527068e-06, + "loss": 0.6323572397232056, + "step": 3561 + }, + { + "epoch": 0.8207373271889401, + "grad_norm": 1.1038849604905803, + "learning_rate": 1.3704188376621304e-06, + "loss": 0.7018281817436218, + "step": 3562 + }, + { + "epoch": 0.8209677419354838, + "grad_norm": 1.084497532058705, + "learning_rate": 1.37006492830575e-06, + "loss": 0.8052775859832764, + "step": 3563 + }, + { + "epoch": 0.8211981566820277, + "grad_norm": 1.0795040103988192, + "learning_rate": 1.3697109652349352e-06, + "loss": 0.8057233095169067, + "step": 3564 + }, + { + "epoch": 0.8214285714285714, + "grad_norm": 1.1240440402053398, + "learning_rate": 1.3693569485010633e-06, + "loss": 0.8647899627685547, + "step": 3565 + }, + { + "epoch": 0.8216589861751152, + "grad_norm": 0.9167509343069911, + "learning_rate": 1.369002878155519e-06, + "loss": 0.8022265434265137, + "step": 3566 + }, + { + "epoch": 0.821889400921659, + "grad_norm": 1.0569217144551386, + "learning_rate": 1.368648754249696e-06, + "loss": 0.8534140586853027, + "step": 3567 + }, + { + "epoch": 0.8221198156682028, + "grad_norm": 1.1336199597215886, + "learning_rate": 1.3682945768349935e-06, + "loss": 0.905183732509613, + "step": 3568 + }, + { + "epoch": 0.8223502304147465, + "grad_norm": 1.0114816874699049, + "learning_rate": 1.3679403459628215e-06, + "loss": 0.6096831560134888, + "step": 3569 + }, + { + "epoch": 0.8225806451612904, + "grad_norm": 1.0433167842442863, + "learning_rate": 1.367586061684595e-06, + "loss": 0.7220188975334167, + "step": 3570 + }, + { + "epoch": 0.8228110599078341, + "grad_norm": 1.2434665139770538, + "learning_rate": 1.3672317240517386e-06, + "loss": 0.8028903007507324, + "step": 3571 + }, + { + "epoch": 0.8230414746543778, + "grad_norm": 0.8999816334081224, + "learning_rate": 1.3668773331156831e-06, + "loss": 0.8121141791343689, + "step": 3572 + }, + { + "epoch": 0.8232718894009217, + "grad_norm": 0.9985064007808814, + "learning_rate": 1.3665228889278687e-06, + "loss": 0.8259282112121582, + "step": 3573 + }, + { + "epoch": 0.8235023041474654, + "grad_norm": 1.0492496227314838, + "learning_rate": 1.3661683915397423e-06, + "loss": 0.9356029033660889, + "step": 3574 + }, + { + "epoch": 0.8237327188940092, + "grad_norm": 0.9103215470779688, + "learning_rate": 1.3658138410027582e-06, + "loss": 0.738788366317749, + "step": 3575 + }, + { + "epoch": 0.823963133640553, + "grad_norm": 0.9813034370683628, + "learning_rate": 1.3654592373683794e-06, + "loss": 0.7775605320930481, + "step": 3576 + }, + { + "epoch": 0.8241935483870968, + "grad_norm": 1.0650813981062164, + "learning_rate": 1.3651045806880766e-06, + "loss": 0.7645376324653625, + "step": 3577 + }, + { + "epoch": 0.8244239631336405, + "grad_norm": 0.9731809944135928, + "learning_rate": 1.3647498710133272e-06, + "loss": 0.7713958024978638, + "step": 3578 + }, + { + "epoch": 0.8246543778801844, + "grad_norm": 1.148498187573576, + "learning_rate": 1.3643951083956165e-06, + "loss": 0.6920947432518005, + "step": 3579 + }, + { + "epoch": 0.8248847926267281, + "grad_norm": 0.8263814798727009, + "learning_rate": 1.3640402928864382e-06, + "loss": 0.7108405828475952, + "step": 3580 + }, + { + "epoch": 0.8251152073732719, + "grad_norm": 1.0141959867722847, + "learning_rate": 1.3636854245372936e-06, + "loss": 0.7879295945167542, + "step": 3581 + }, + { + "epoch": 0.8253456221198157, + "grad_norm": 0.8796188222287911, + "learning_rate": 1.3633305033996909e-06, + "loss": 0.8173119425773621, + "step": 3582 + }, + { + "epoch": 0.8255760368663595, + "grad_norm": 1.230625652029921, + "learning_rate": 1.3629755295251466e-06, + "loss": 0.8530454635620117, + "step": 3583 + }, + { + "epoch": 0.8258064516129032, + "grad_norm": 0.7851178128331011, + "learning_rate": 1.3626205029651846e-06, + "loss": 0.7749553918838501, + "step": 3584 + }, + { + "epoch": 0.826036866359447, + "grad_norm": 0.9879629515788971, + "learning_rate": 1.362265423771337e-06, + "loss": 0.8313847780227661, + "step": 3585 + }, + { + "epoch": 0.8262672811059908, + "grad_norm": 0.9997153587851354, + "learning_rate": 1.3619102919951424e-06, + "loss": 0.7285455465316772, + "step": 3586 + }, + { + "epoch": 0.8264976958525345, + "grad_norm": 1.053529475482116, + "learning_rate": 1.361555107688148e-06, + "loss": 0.8084003925323486, + "step": 3587 + }, + { + "epoch": 0.8267281105990784, + "grad_norm": 1.1979034262658517, + "learning_rate": 1.3611998709019088e-06, + "loss": 0.8506543040275574, + "step": 3588 + }, + { + "epoch": 0.8269585253456221, + "grad_norm": 1.150137696376644, + "learning_rate": 1.3608445816879864e-06, + "loss": 0.8320293426513672, + "step": 3589 + }, + { + "epoch": 0.8271889400921659, + "grad_norm": 1.0954200087136678, + "learning_rate": 1.3604892400979501e-06, + "loss": 0.8116205930709839, + "step": 3590 + }, + { + "epoch": 0.8274193548387097, + "grad_norm": 0.988607654244707, + "learning_rate": 1.3601338461833785e-06, + "loss": 0.8317450284957886, + "step": 3591 + }, + { + "epoch": 0.8276497695852535, + "grad_norm": 1.0502248139840338, + "learning_rate": 1.3597783999958553e-06, + "loss": 0.7348642349243164, + "step": 3592 + }, + { + "epoch": 0.8278801843317972, + "grad_norm": 0.8829971344500126, + "learning_rate": 1.359422901586974e-06, + "loss": 0.8087270259857178, + "step": 3593 + }, + { + "epoch": 0.8281105990783411, + "grad_norm": 1.1012699484003496, + "learning_rate": 1.3590673510083345e-06, + "loss": 0.7964637875556946, + "step": 3594 + }, + { + "epoch": 0.8283410138248848, + "grad_norm": 0.8597833865541051, + "learning_rate": 1.358711748311544e-06, + "loss": 0.6192176342010498, + "step": 3595 + }, + { + "epoch": 0.8285714285714286, + "grad_norm": 1.458647590594062, + "learning_rate": 1.3583560935482182e-06, + "loss": 0.7735739946365356, + "step": 3596 + }, + { + "epoch": 0.8288018433179724, + "grad_norm": 1.209934555151429, + "learning_rate": 1.35800038676998e-06, + "loss": 0.7965315580368042, + "step": 3597 + }, + { + "epoch": 0.8290322580645161, + "grad_norm": 1.0086229436787473, + "learning_rate": 1.3576446280284595e-06, + "loss": 0.6489244699478149, + "step": 3598 + }, + { + "epoch": 0.8292626728110599, + "grad_norm": 1.041271189758682, + "learning_rate": 1.3572888173752946e-06, + "loss": 0.8073695302009583, + "step": 3599 + }, + { + "epoch": 0.8294930875576036, + "grad_norm": 0.7544591630478071, + "learning_rate": 1.3569329548621309e-06, + "loss": 0.7925900816917419, + "step": 3600 + }, + { + "epoch": 0.8297235023041475, + "grad_norm": 1.1274353505725723, + "learning_rate": 1.356577040540621e-06, + "loss": 0.83954918384552, + "step": 3601 + }, + { + "epoch": 0.8299539170506912, + "grad_norm": 0.69092010707332, + "learning_rate": 1.356221074462426e-06, + "loss": 0.6384706497192383, + "step": 3602 + }, + { + "epoch": 0.830184331797235, + "grad_norm": 0.8604009933780791, + "learning_rate": 1.3558650566792136e-06, + "loss": 0.8308184146881104, + "step": 3603 + }, + { + "epoch": 0.8304147465437788, + "grad_norm": 0.9893567222365065, + "learning_rate": 1.3555089872426596e-06, + "loss": 0.7972864508628845, + "step": 3604 + }, + { + "epoch": 0.8306451612903226, + "grad_norm": 1.0575497381629144, + "learning_rate": 1.3551528662044463e-06, + "loss": 0.8038849830627441, + "step": 3605 + }, + { + "epoch": 0.8308755760368663, + "grad_norm": 1.0146034272672162, + "learning_rate": 1.3547966936162646e-06, + "loss": 0.7735980749130249, + "step": 3606 + }, + { + "epoch": 0.8311059907834102, + "grad_norm": 1.169701687059532, + "learning_rate": 1.354440469529813e-06, + "loss": 0.7717504501342773, + "step": 3607 + }, + { + "epoch": 0.8313364055299539, + "grad_norm": 0.8981514617249363, + "learning_rate": 1.3540841939967962e-06, + "loss": 0.9405615329742432, + "step": 3608 + }, + { + "epoch": 0.8315668202764976, + "grad_norm": 0.9913743440349779, + "learning_rate": 1.3537278670689273e-06, + "loss": 0.7730603814125061, + "step": 3609 + }, + { + "epoch": 0.8317972350230415, + "grad_norm": 1.1958069213876743, + "learning_rate": 1.353371488797927e-06, + "loss": 0.8677463531494141, + "step": 3610 + }, + { + "epoch": 0.8320276497695852, + "grad_norm": 1.0362704574624084, + "learning_rate": 1.3530150592355227e-06, + "loss": 0.8261700868606567, + "step": 3611 + }, + { + "epoch": 0.832258064516129, + "grad_norm": 0.9430749395940993, + "learning_rate": 1.35265857843345e-06, + "loss": 0.6799050569534302, + "step": 3612 + }, + { + "epoch": 0.8324884792626728, + "grad_norm": 1.0479319081515341, + "learning_rate": 1.3523020464434514e-06, + "loss": 0.9117664098739624, + "step": 3613 + }, + { + "epoch": 0.8327188940092166, + "grad_norm": 1.0691436327470698, + "learning_rate": 1.3519454633172771e-06, + "loss": 0.8637168407440186, + "step": 3614 + }, + { + "epoch": 0.8329493087557603, + "grad_norm": 0.8579929983536723, + "learning_rate": 1.3515888291066848e-06, + "loss": 0.8169793486595154, + "step": 3615 + }, + { + "epoch": 0.8331797235023042, + "grad_norm": 0.920659117563804, + "learning_rate": 1.3512321438634392e-06, + "loss": 0.6901019811630249, + "step": 3616 + }, + { + "epoch": 0.8334101382488479, + "grad_norm": 1.350300242304736, + "learning_rate": 1.3508754076393133e-06, + "loss": 0.868461012840271, + "step": 3617 + }, + { + "epoch": 0.8336405529953917, + "grad_norm": 0.9765625383196332, + "learning_rate": 1.3505186204860864e-06, + "loss": 0.7916195392608643, + "step": 3618 + }, + { + "epoch": 0.8338709677419355, + "grad_norm": 0.9685384546753151, + "learning_rate": 1.3501617824555456e-06, + "loss": 0.7078498601913452, + "step": 3619 + }, + { + "epoch": 0.8341013824884793, + "grad_norm": 1.2242730037688179, + "learning_rate": 1.3498048935994857e-06, + "loss": 0.890669584274292, + "step": 3620 + }, + { + "epoch": 0.834331797235023, + "grad_norm": 0.8358453705503323, + "learning_rate": 1.3494479539697087e-06, + "loss": 0.8162761926651001, + "step": 3621 + }, + { + "epoch": 0.8345622119815668, + "grad_norm": 1.013077112717635, + "learning_rate": 1.3490909636180233e-06, + "loss": 0.7743235230445862, + "step": 3622 + }, + { + "epoch": 0.8347926267281106, + "grad_norm": 1.0099386147746707, + "learning_rate": 1.3487339225962472e-06, + "loss": 0.8297950029373169, + "step": 3623 + }, + { + "epoch": 0.8350230414746543, + "grad_norm": 1.1865830325248257, + "learning_rate": 1.3483768309562035e-06, + "loss": 0.9550352692604065, + "step": 3624 + }, + { + "epoch": 0.8352534562211982, + "grad_norm": 0.9576603479694407, + "learning_rate": 1.3480196887497242e-06, + "loss": 0.7343823909759521, + "step": 3625 + }, + { + "epoch": 0.8354838709677419, + "grad_norm": 1.0312198523972542, + "learning_rate": 1.3476624960286479e-06, + "loss": 0.8942683935165405, + "step": 3626 + }, + { + "epoch": 0.8357142857142857, + "grad_norm": 1.0216203737583824, + "learning_rate": 1.34730525284482e-06, + "loss": 0.778289794921875, + "step": 3627 + }, + { + "epoch": 0.8359447004608295, + "grad_norm": 0.8374039418656565, + "learning_rate": 1.3469479592500951e-06, + "loss": 0.5924088954925537, + "step": 3628 + }, + { + "epoch": 0.8361751152073733, + "grad_norm": 1.6640914693337763, + "learning_rate": 1.3465906152963329e-06, + "loss": 1.0363706350326538, + "step": 3629 + }, + { + "epoch": 0.836405529953917, + "grad_norm": 1.1094517477504633, + "learning_rate": 1.346233221035402e-06, + "loss": 0.7927669286727905, + "step": 3630 + }, + { + "epoch": 0.8366359447004609, + "grad_norm": 1.017803676905956, + "learning_rate": 1.345875776519177e-06, + "loss": 0.8428707718849182, + "step": 3631 + }, + { + "epoch": 0.8368663594470046, + "grad_norm": 1.0894705086513103, + "learning_rate": 1.345518281799541e-06, + "loss": 0.7975403070449829, + "step": 3632 + }, + { + "epoch": 0.8370967741935483, + "grad_norm": 1.0032068733109394, + "learning_rate": 1.3451607369283842e-06, + "loss": 0.8383880853652954, + "step": 3633 + }, + { + "epoch": 0.8373271889400922, + "grad_norm": 1.007543360201824, + "learning_rate": 1.3448031419576028e-06, + "loss": 0.9033386707305908, + "step": 3634 + }, + { + "epoch": 0.8375576036866359, + "grad_norm": 1.1312406567077748, + "learning_rate": 1.3444454969391021e-06, + "loss": 0.8913514018058777, + "step": 3635 + }, + { + "epoch": 0.8377880184331797, + "grad_norm": 1.4041014769308477, + "learning_rate": 1.3440878019247936e-06, + "loss": 0.9051915407180786, + "step": 3636 + }, + { + "epoch": 0.8380184331797235, + "grad_norm": 0.9777048211867199, + "learning_rate": 1.343730056966596e-06, + "loss": 0.8240993618965149, + "step": 3637 + }, + { + "epoch": 0.8382488479262673, + "grad_norm": 1.1788464491037272, + "learning_rate": 1.3433722621164358e-06, + "loss": 0.8276345133781433, + "step": 3638 + }, + { + "epoch": 0.838479262672811, + "grad_norm": 1.1512835626079758, + "learning_rate": 1.343014417426246e-06, + "loss": 0.8250508904457092, + "step": 3639 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 1.0066201319773938, + "learning_rate": 1.342656522947968e-06, + "loss": 0.7872868180274963, + "step": 3640 + }, + { + "epoch": 0.8389400921658986, + "grad_norm": 0.8473767849665474, + "learning_rate": 1.3422985787335491e-06, + "loss": 0.7634146809577942, + "step": 3641 + }, + { + "epoch": 0.8391705069124424, + "grad_norm": 0.9991956505737468, + "learning_rate": 1.3419405848349448e-06, + "loss": 0.63923180103302, + "step": 3642 + }, + { + "epoch": 0.8394009216589862, + "grad_norm": 0.8936657519523178, + "learning_rate": 1.3415825413041173e-06, + "loss": 0.900942325592041, + "step": 3643 + }, + { + "epoch": 0.83963133640553, + "grad_norm": 0.8086145892134451, + "learning_rate": 1.341224448193036e-06, + "loss": 0.6415199041366577, + "step": 3644 + }, + { + "epoch": 0.8398617511520737, + "grad_norm": 0.7541710851332, + "learning_rate": 1.3408663055536775e-06, + "loss": 0.7750275135040283, + "step": 3645 + }, + { + "epoch": 0.8400921658986175, + "grad_norm": 1.0677810215945565, + "learning_rate": 1.3405081134380264e-06, + "loss": 0.8159983158111572, + "step": 3646 + }, + { + "epoch": 0.8403225806451613, + "grad_norm": 1.0361250834896671, + "learning_rate": 1.3401498718980733e-06, + "loss": 0.6870952844619751, + "step": 3647 + }, + { + "epoch": 0.840552995391705, + "grad_norm": 1.0057736881312165, + "learning_rate": 1.3397915809858168e-06, + "loss": 0.8588749170303345, + "step": 3648 + }, + { + "epoch": 0.8407834101382489, + "grad_norm": 0.8944864050117411, + "learning_rate": 1.3394332407532619e-06, + "loss": 0.6926778554916382, + "step": 3649 + }, + { + "epoch": 0.8410138248847926, + "grad_norm": 0.9996715673645244, + "learning_rate": 1.3390748512524213e-06, + "loss": 0.7165309190750122, + "step": 3650 + }, + { + "epoch": 0.8412442396313364, + "grad_norm": 0.8676606625906299, + "learning_rate": 1.3387164125353149e-06, + "loss": 0.7782741189002991, + "step": 3651 + }, + { + "epoch": 0.8414746543778802, + "grad_norm": 1.2076812224962883, + "learning_rate": 1.3383579246539698e-06, + "loss": 0.9153795838356018, + "step": 3652 + }, + { + "epoch": 0.841705069124424, + "grad_norm": 0.9194313077193984, + "learning_rate": 1.33799938766042e-06, + "loss": 0.8419643044471741, + "step": 3653 + }, + { + "epoch": 0.8419354838709677, + "grad_norm": 0.9325821466469247, + "learning_rate": 1.3376408016067064e-06, + "loss": 0.6927728652954102, + "step": 3654 + }, + { + "epoch": 0.8421658986175116, + "grad_norm": 0.8795285549516815, + "learning_rate": 1.3372821665448774e-06, + "loss": 0.7721414566040039, + "step": 3655 + }, + { + "epoch": 0.8423963133640553, + "grad_norm": 0.8650877944504008, + "learning_rate": 1.3369234825269887e-06, + "loss": 0.7277967929840088, + "step": 3656 + }, + { + "epoch": 0.8426267281105991, + "grad_norm": 0.8893990009557013, + "learning_rate": 1.336564749605102e-06, + "loss": 0.7764936089515686, + "step": 3657 + }, + { + "epoch": 0.8428571428571429, + "grad_norm": 1.0366422012708214, + "learning_rate": 1.336205967831288e-06, + "loss": 0.7445545196533203, + "step": 3658 + }, + { + "epoch": 0.8430875576036866, + "grad_norm": 0.9883734306246509, + "learning_rate": 1.3358471372576227e-06, + "loss": 0.8359465599060059, + "step": 3659 + }, + { + "epoch": 0.8433179723502304, + "grad_norm": 1.1992732184975974, + "learning_rate": 1.33548825793619e-06, + "loss": 0.8634141683578491, + "step": 3660 + }, + { + "epoch": 0.8435483870967742, + "grad_norm": 0.9932267949840192, + "learning_rate": 1.3351293299190804e-06, + "loss": 0.7365708351135254, + "step": 3661 + }, + { + "epoch": 0.843778801843318, + "grad_norm": 1.0553779905834517, + "learning_rate": 1.3347703532583927e-06, + "loss": 0.7135465145111084, + "step": 3662 + }, + { + "epoch": 0.8440092165898617, + "grad_norm": 0.9366872036776951, + "learning_rate": 1.3344113280062313e-06, + "loss": 0.7411447763442993, + "step": 3663 + }, + { + "epoch": 0.8442396313364056, + "grad_norm": 1.1654296408446096, + "learning_rate": 1.3340522542147081e-06, + "loss": 0.7765100002288818, + "step": 3664 + }, + { + "epoch": 0.8444700460829493, + "grad_norm": 0.9657216098787882, + "learning_rate": 1.3336931319359426e-06, + "loss": 0.7638096809387207, + "step": 3665 + }, + { + "epoch": 0.8447004608294931, + "grad_norm": 0.8148482611092309, + "learning_rate": 1.3333339612220606e-06, + "loss": 0.7114577889442444, + "step": 3666 + }, + { + "epoch": 0.8449308755760369, + "grad_norm": 1.075345107734405, + "learning_rate": 1.3329747421251955e-06, + "loss": 0.8702960014343262, + "step": 3667 + }, + { + "epoch": 0.8451612903225807, + "grad_norm": 0.8702936794654799, + "learning_rate": 1.3326154746974878e-06, + "loss": 0.7248300313949585, + "step": 3668 + }, + { + "epoch": 0.8453917050691244, + "grad_norm": 1.0810218150457531, + "learning_rate": 1.332256158991084e-06, + "loss": 0.7648389339447021, + "step": 3669 + }, + { + "epoch": 0.8456221198156681, + "grad_norm": 1.1179174327015893, + "learning_rate": 1.3318967950581383e-06, + "loss": 0.7075401544570923, + "step": 3670 + }, + { + "epoch": 0.845852534562212, + "grad_norm": 0.9497106076514022, + "learning_rate": 1.3315373829508122e-06, + "loss": 0.6923220157623291, + "step": 3671 + }, + { + "epoch": 0.8460829493087557, + "grad_norm": 1.100773813694407, + "learning_rate": 1.3311779227212742e-06, + "loss": 0.7522361874580383, + "step": 3672 + }, + { + "epoch": 0.8463133640552996, + "grad_norm": 1.026931960572947, + "learning_rate": 1.3308184144216989e-06, + "loss": 0.7087293863296509, + "step": 3673 + }, + { + "epoch": 0.8465437788018433, + "grad_norm": 0.793322008156401, + "learning_rate": 1.3304588581042688e-06, + "loss": 0.782098650932312, + "step": 3674 + }, + { + "epoch": 0.8467741935483871, + "grad_norm": 1.029621860148689, + "learning_rate": 1.330099253821173e-06, + "loss": 0.7671197652816772, + "step": 3675 + }, + { + "epoch": 0.8470046082949308, + "grad_norm": 0.8604911309489864, + "learning_rate": 1.3297396016246073e-06, + "loss": 0.8098698258399963, + "step": 3676 + }, + { + "epoch": 0.8472350230414747, + "grad_norm": 0.9021265860196932, + "learning_rate": 1.3293799015667751e-06, + "loss": 0.7671023011207581, + "step": 3677 + }, + { + "epoch": 0.8474654377880184, + "grad_norm": 0.9115553667327773, + "learning_rate": 1.3290201536998862e-06, + "loss": 0.7448668479919434, + "step": 3678 + }, + { + "epoch": 0.8476958525345623, + "grad_norm": 1.4463207292378697, + "learning_rate": 1.3286603580761576e-06, + "loss": 0.946117639541626, + "step": 3679 + }, + { + "epoch": 0.847926267281106, + "grad_norm": 0.932975472082494, + "learning_rate": 1.328300514747813e-06, + "loss": 0.8134163618087769, + "step": 3680 + }, + { + "epoch": 0.8481566820276498, + "grad_norm": 1.0433920810873991, + "learning_rate": 1.327940623767083e-06, + "loss": 0.725477933883667, + "step": 3681 + }, + { + "epoch": 0.8483870967741935, + "grad_norm": 0.9434209059724857, + "learning_rate": 1.3275806851862061e-06, + "loss": 0.8278200626373291, + "step": 3682 + }, + { + "epoch": 0.8486175115207373, + "grad_norm": 1.2837572025692205, + "learning_rate": 1.327220699057426e-06, + "loss": 0.8437181711196899, + "step": 3683 + }, + { + "epoch": 0.8488479262672811, + "grad_norm": 1.0932618965520366, + "learning_rate": 1.326860665432995e-06, + "loss": 0.8921856880187988, + "step": 3684 + }, + { + "epoch": 0.8490783410138248, + "grad_norm": 0.9850919430921788, + "learning_rate": 1.326500584365171e-06, + "loss": 0.7285119295120239, + "step": 3685 + }, + { + "epoch": 0.8493087557603687, + "grad_norm": 1.0119244636074918, + "learning_rate": 1.3261404559062196e-06, + "loss": 0.8968918323516846, + "step": 3686 + }, + { + "epoch": 0.8495391705069124, + "grad_norm": 0.9862869524570133, + "learning_rate": 1.3257802801084123e-06, + "loss": 0.6794285774230957, + "step": 3687 + }, + { + "epoch": 0.8497695852534562, + "grad_norm": 1.1495746754769118, + "learning_rate": 1.3254200570240291e-06, + "loss": 0.869774341583252, + "step": 3688 + }, + { + "epoch": 0.85, + "grad_norm": 1.1620464557259493, + "learning_rate": 1.3250597867053553e-06, + "loss": 0.7862332463264465, + "step": 3689 + }, + { + "epoch": 0.8502304147465438, + "grad_norm": 1.1253065949092746, + "learning_rate": 1.3246994692046835e-06, + "loss": 0.8424299955368042, + "step": 3690 + }, + { + "epoch": 0.8504608294930875, + "grad_norm": 0.7041532260107465, + "learning_rate": 1.3243391045743137e-06, + "loss": 0.6232138276100159, + "step": 3691 + }, + { + "epoch": 0.8506912442396314, + "grad_norm": 0.9563538572085633, + "learning_rate": 1.3239786928665523e-06, + "loss": 0.7108159065246582, + "step": 3692 + }, + { + "epoch": 0.8509216589861751, + "grad_norm": 1.0262733388108027, + "learning_rate": 1.3236182341337126e-06, + "loss": 0.7282330393791199, + "step": 3693 + }, + { + "epoch": 0.8511520737327188, + "grad_norm": 1.2079736335999256, + "learning_rate": 1.3232577284281147e-06, + "loss": 0.7864304780960083, + "step": 3694 + }, + { + "epoch": 0.8513824884792627, + "grad_norm": 0.9682428596442779, + "learning_rate": 1.3228971758020852e-06, + "loss": 0.7826365232467651, + "step": 3695 + }, + { + "epoch": 0.8516129032258064, + "grad_norm": 1.0308498953586989, + "learning_rate": 1.322536576307958e-06, + "loss": 0.8429988026618958, + "step": 3696 + }, + { + "epoch": 0.8518433179723502, + "grad_norm": 1.106791902142165, + "learning_rate": 1.322175929998074e-06, + "loss": 0.771148145198822, + "step": 3697 + }, + { + "epoch": 0.852073732718894, + "grad_norm": 1.2323556662321768, + "learning_rate": 1.3218152369247804e-06, + "loss": 0.9610496759414673, + "step": 3698 + }, + { + "epoch": 0.8523041474654378, + "grad_norm": 1.0124488299649408, + "learning_rate": 1.321454497140431e-06, + "loss": 0.7286547422409058, + "step": 3699 + }, + { + "epoch": 0.8525345622119815, + "grad_norm": 0.8362780560832063, + "learning_rate": 1.321093710697387e-06, + "loss": 0.7446750402450562, + "step": 3700 + }, + { + "epoch": 0.8527649769585254, + "grad_norm": 0.8774754337310029, + "learning_rate": 1.3207328776480156e-06, + "loss": 0.7211639881134033, + "step": 3701 + }, + { + "epoch": 0.8529953917050691, + "grad_norm": 0.9667628641735269, + "learning_rate": 1.320371998044692e-06, + "loss": 0.765962541103363, + "step": 3702 + }, + { + "epoch": 0.853225806451613, + "grad_norm": 1.0775083181101466, + "learning_rate": 1.3200110719397967e-06, + "loss": 0.9090084433555603, + "step": 3703 + }, + { + "epoch": 0.8534562211981567, + "grad_norm": 0.9604272002153474, + "learning_rate": 1.319650099385718e-06, + "loss": 0.8222901225090027, + "step": 3704 + }, + { + "epoch": 0.8536866359447005, + "grad_norm": 1.0297311955715076, + "learning_rate": 1.3192890804348508e-06, + "loss": 0.7929965853691101, + "step": 3705 + }, + { + "epoch": 0.8539170506912442, + "grad_norm": 0.9788103737354025, + "learning_rate": 1.318928015139596e-06, + "loss": 0.89229816198349, + "step": 3706 + }, + { + "epoch": 0.854147465437788, + "grad_norm": 1.1185541946390394, + "learning_rate": 1.3185669035523621e-06, + "loss": 0.8348276615142822, + "step": 3707 + }, + { + "epoch": 0.8543778801843318, + "grad_norm": 1.0960703003892842, + "learning_rate": 1.3182057457255639e-06, + "loss": 0.9006820917129517, + "step": 3708 + }, + { + "epoch": 0.8546082949308755, + "grad_norm": 0.8300224623954644, + "learning_rate": 1.3178445417116233e-06, + "loss": 0.665691614151001, + "step": 3709 + }, + { + "epoch": 0.8548387096774194, + "grad_norm": 0.6677558949928035, + "learning_rate": 1.3174832915629677e-06, + "loss": 0.7073110342025757, + "step": 3710 + }, + { + "epoch": 0.8550691244239631, + "grad_norm": 1.0807205184602706, + "learning_rate": 1.317121995332033e-06, + "loss": 0.7125800848007202, + "step": 3711 + }, + { + "epoch": 0.8552995391705069, + "grad_norm": 1.1504081133401938, + "learning_rate": 1.31676065307126e-06, + "loss": 0.847205638885498, + "step": 3712 + }, + { + "epoch": 0.8555299539170507, + "grad_norm": 1.1272186923536152, + "learning_rate": 1.3163992648330979e-06, + "loss": 0.860866904258728, + "step": 3713 + }, + { + "epoch": 0.8557603686635945, + "grad_norm": 0.9974272492162177, + "learning_rate": 1.3160378306700014e-06, + "loss": 0.811161994934082, + "step": 3714 + }, + { + "epoch": 0.8559907834101382, + "grad_norm": 1.059693566679631, + "learning_rate": 1.3156763506344318e-06, + "loss": 1.0276790857315063, + "step": 3715 + }, + { + "epoch": 0.8562211981566821, + "grad_norm": 0.8617440282777447, + "learning_rate": 1.3153148247788584e-06, + "loss": 0.7462253570556641, + "step": 3716 + }, + { + "epoch": 0.8564516129032258, + "grad_norm": 1.281384523734545, + "learning_rate": 1.314953253155755e-06, + "loss": 0.9181896448135376, + "step": 3717 + }, + { + "epoch": 0.8566820276497696, + "grad_norm": 0.7940667691684741, + "learning_rate": 1.3145916358176044e-06, + "loss": 0.5943678021430969, + "step": 3718 + }, + { + "epoch": 0.8569124423963134, + "grad_norm": 0.9268739898787507, + "learning_rate": 1.3142299728168942e-06, + "loss": 0.7908656597137451, + "step": 3719 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 1.2242140267734891, + "learning_rate": 1.3138682642061192e-06, + "loss": 0.8716393709182739, + "step": 3720 + }, + { + "epoch": 0.8573732718894009, + "grad_norm": 0.9921811812486295, + "learning_rate": 1.3135065100377814e-06, + "loss": 0.76909339427948, + "step": 3721 + }, + { + "epoch": 0.8576036866359447, + "grad_norm": 1.0272733292998222, + "learning_rate": 1.3131447103643884e-06, + "loss": 0.7896728515625, + "step": 3722 + }, + { + "epoch": 0.8578341013824885, + "grad_norm": 1.0326134494637835, + "learning_rate": 1.3127828652384554e-06, + "loss": 0.8458575010299683, + "step": 3723 + }, + { + "epoch": 0.8580645161290322, + "grad_norm": 0.9849414066001893, + "learning_rate": 1.3124209747125036e-06, + "loss": 0.7419729232788086, + "step": 3724 + }, + { + "epoch": 0.8582949308755761, + "grad_norm": 0.9131603734827297, + "learning_rate": 1.3120590388390608e-06, + "loss": 0.8801093697547913, + "step": 3725 + }, + { + "epoch": 0.8585253456221198, + "grad_norm": 0.7986933302941567, + "learning_rate": 1.3116970576706617e-06, + "loss": 0.6337816715240479, + "step": 3726 + }, + { + "epoch": 0.8587557603686636, + "grad_norm": 1.1352865331161706, + "learning_rate": 1.3113350312598472e-06, + "loss": 0.8099665641784668, + "step": 3727 + }, + { + "epoch": 0.8589861751152074, + "grad_norm": 1.0467011868433627, + "learning_rate": 1.3109729596591651e-06, + "loss": 0.7430413961410522, + "step": 3728 + }, + { + "epoch": 0.8592165898617512, + "grad_norm": 1.0569982664185076, + "learning_rate": 1.3106108429211699e-06, + "loss": 0.7374905347824097, + "step": 3729 + }, + { + "epoch": 0.8594470046082949, + "grad_norm": 0.7857724004075162, + "learning_rate": 1.3102486810984217e-06, + "loss": 0.71753990650177, + "step": 3730 + }, + { + "epoch": 0.8596774193548387, + "grad_norm": 1.0554970253272185, + "learning_rate": 1.3098864742434885e-06, + "loss": 0.9126461744308472, + "step": 3731 + }, + { + "epoch": 0.8599078341013825, + "grad_norm": 1.1141466235187625, + "learning_rate": 1.3095242224089434e-06, + "loss": 0.846487283706665, + "step": 3732 + }, + { + "epoch": 0.8601382488479262, + "grad_norm": 0.9640305278845377, + "learning_rate": 1.3091619256473671e-06, + "loss": 0.7026070952415466, + "step": 3733 + }, + { + "epoch": 0.8603686635944701, + "grad_norm": 1.2209599470129553, + "learning_rate": 1.3087995840113471e-06, + "loss": 1.0044158697128296, + "step": 3734 + }, + { + "epoch": 0.8605990783410138, + "grad_norm": 1.2732308696122019, + "learning_rate": 1.3084371975534759e-06, + "loss": 0.8061608076095581, + "step": 3735 + }, + { + "epoch": 0.8608294930875576, + "grad_norm": 1.2155874878372677, + "learning_rate": 1.308074766326354e-06, + "loss": 0.9189345836639404, + "step": 3736 + }, + { + "epoch": 0.8610599078341014, + "grad_norm": 3.0839554304770314, + "learning_rate": 1.3077122903825875e-06, + "loss": 0.8183290958404541, + "step": 3737 + }, + { + "epoch": 0.8612903225806452, + "grad_norm": 0.9202037098580877, + "learning_rate": 1.3073497697747893e-06, + "loss": 0.860893726348877, + "step": 3738 + }, + { + "epoch": 0.8615207373271889, + "grad_norm": 0.7717429741205805, + "learning_rate": 1.306987204555579e-06, + "loss": 0.6732957363128662, + "step": 3739 + }, + { + "epoch": 0.8617511520737328, + "grad_norm": 0.9444170667577415, + "learning_rate": 1.3066245947775821e-06, + "loss": 0.7910758256912231, + "step": 3740 + }, + { + "epoch": 0.8619815668202765, + "grad_norm": 1.316217805471382, + "learning_rate": 1.3062619404934317e-06, + "loss": 0.9422181844711304, + "step": 3741 + }, + { + "epoch": 0.8622119815668203, + "grad_norm": 0.9698503213179374, + "learning_rate": 1.3058992417557657e-06, + "loss": 0.7731142044067383, + "step": 3742 + }, + { + "epoch": 0.8624423963133641, + "grad_norm": 0.9561313394387324, + "learning_rate": 1.3055364986172296e-06, + "loss": 0.8419089317321777, + "step": 3743 + }, + { + "epoch": 0.8626728110599078, + "grad_norm": 0.8852750785802604, + "learning_rate": 1.3051737111304757e-06, + "loss": 0.7535419464111328, + "step": 3744 + }, + { + "epoch": 0.8629032258064516, + "grad_norm": 0.8636514927767351, + "learning_rate": 1.3048108793481614e-06, + "loss": 0.7744847536087036, + "step": 3745 + }, + { + "epoch": 0.8631336405529954, + "grad_norm": 1.04058809416254, + "learning_rate": 1.3044480033229513e-06, + "loss": 0.7578398585319519, + "step": 3746 + }, + { + "epoch": 0.8633640552995392, + "grad_norm": 1.2334871836764278, + "learning_rate": 1.3040850831075168e-06, + "loss": 0.8767418265342712, + "step": 3747 + }, + { + "epoch": 0.8635944700460829, + "grad_norm": 1.1256734507930313, + "learning_rate": 1.303722118754535e-06, + "loss": 0.7484671473503113, + "step": 3748 + }, + { + "epoch": 0.8638248847926268, + "grad_norm": 0.9064086460386975, + "learning_rate": 1.3033591103166897e-06, + "loss": 0.7231101989746094, + "step": 3749 + }, + { + "epoch": 0.8640552995391705, + "grad_norm": 0.896473034432068, + "learning_rate": 1.3029960578466709e-06, + "loss": 0.7626307606697083, + "step": 3750 + }, + { + "epoch": 0.8642857142857143, + "grad_norm": 1.0608055188685264, + "learning_rate": 1.302632961397176e-06, + "loss": 0.7244704961776733, + "step": 3751 + }, + { + "epoch": 0.864516129032258, + "grad_norm": 1.0368271143877468, + "learning_rate": 1.3022698210209066e-06, + "loss": 0.8575884103775024, + "step": 3752 + }, + { + "epoch": 0.8647465437788019, + "grad_norm": 1.050928094888414, + "learning_rate": 1.3019066367705733e-06, + "loss": 0.7617322206497192, + "step": 3753 + }, + { + "epoch": 0.8649769585253456, + "grad_norm": 1.0524737157850867, + "learning_rate": 1.3015434086988914e-06, + "loss": 0.7899904251098633, + "step": 3754 + }, + { + "epoch": 0.8652073732718893, + "grad_norm": 0.7826254299372721, + "learning_rate": 1.3011801368585825e-06, + "loss": 0.6405949592590332, + "step": 3755 + }, + { + "epoch": 0.8654377880184332, + "grad_norm": 1.004484214855527, + "learning_rate": 1.300816821302376e-06, + "loss": 0.8473223447799683, + "step": 3756 + }, + { + "epoch": 0.8656682027649769, + "grad_norm": 1.0318183916575985, + "learning_rate": 1.3004534620830059e-06, + "loss": 0.7843037843704224, + "step": 3757 + }, + { + "epoch": 0.8658986175115208, + "grad_norm": 0.8527211236886993, + "learning_rate": 1.3000900592532134e-06, + "loss": 0.7418329119682312, + "step": 3758 + }, + { + "epoch": 0.8661290322580645, + "grad_norm": 1.1686967012789897, + "learning_rate": 1.2997266128657462e-06, + "loss": 0.9007542133331299, + "step": 3759 + }, + { + "epoch": 0.8663594470046083, + "grad_norm": 1.0002999248018631, + "learning_rate": 1.2993631229733582e-06, + "loss": 0.7214536666870117, + "step": 3760 + }, + { + "epoch": 0.866589861751152, + "grad_norm": 1.060698383579802, + "learning_rate": 1.2989995896288085e-06, + "loss": 0.6538300514221191, + "step": 3761 + }, + { + "epoch": 0.8668202764976959, + "grad_norm": 0.8939424364373206, + "learning_rate": 1.2986360128848647e-06, + "loss": 0.8132497668266296, + "step": 3762 + }, + { + "epoch": 0.8670506912442396, + "grad_norm": 1.2692579875098073, + "learning_rate": 1.2982723927942987e-06, + "loss": 0.8940386176109314, + "step": 3763 + }, + { + "epoch": 0.8672811059907835, + "grad_norm": 0.9095968882110219, + "learning_rate": 1.2979087294098904e-06, + "loss": 0.7426153421401978, + "step": 3764 + }, + { + "epoch": 0.8675115207373272, + "grad_norm": 1.2314721218727755, + "learning_rate": 1.2975450227844236e-06, + "loss": 0.8140754103660583, + "step": 3765 + }, + { + "epoch": 0.867741935483871, + "grad_norm": 1.165847048536148, + "learning_rate": 1.2971812729706907e-06, + "loss": 0.9078278541564941, + "step": 3766 + }, + { + "epoch": 0.8679723502304147, + "grad_norm": 0.8581444329277982, + "learning_rate": 1.29681748002149e-06, + "loss": 0.6632627248764038, + "step": 3767 + }, + { + "epoch": 0.8682027649769585, + "grad_norm": 1.0737542944031577, + "learning_rate": 1.2964536439896245e-06, + "loss": 0.913419246673584, + "step": 3768 + }, + { + "epoch": 0.8684331797235023, + "grad_norm": 0.9232699220030103, + "learning_rate": 1.2960897649279054e-06, + "loss": 0.776391863822937, + "step": 3769 + }, + { + "epoch": 0.868663594470046, + "grad_norm": 0.7836255693570048, + "learning_rate": 1.2957258428891488e-06, + "loss": 0.7171014547348022, + "step": 3770 + }, + { + "epoch": 0.8688940092165899, + "grad_norm": 1.072840063629104, + "learning_rate": 1.2953618779261776e-06, + "loss": 0.8848521709442139, + "step": 3771 + }, + { + "epoch": 0.8691244239631336, + "grad_norm": 0.9374655640180731, + "learning_rate": 1.2949978700918207e-06, + "loss": 0.6794570684432983, + "step": 3772 + }, + { + "epoch": 0.8693548387096774, + "grad_norm": 1.1765914680464367, + "learning_rate": 1.2946338194389137e-06, + "loss": 0.7128770351409912, + "step": 3773 + }, + { + "epoch": 0.8695852534562212, + "grad_norm": 1.0061805151394425, + "learning_rate": 1.2942697260202976e-06, + "loss": 0.7794370651245117, + "step": 3774 + }, + { + "epoch": 0.869815668202765, + "grad_norm": 0.8201503807835805, + "learning_rate": 1.2939055898888203e-06, + "loss": 0.7946528196334839, + "step": 3775 + }, + { + "epoch": 0.8700460829493087, + "grad_norm": 0.8253544658473864, + "learning_rate": 1.2935414110973357e-06, + "loss": 0.7052137851715088, + "step": 3776 + }, + { + "epoch": 0.8702764976958526, + "grad_norm": 1.1148062721900278, + "learning_rate": 1.293177189698704e-06, + "loss": 0.785929799079895, + "step": 3777 + }, + { + "epoch": 0.8705069124423963, + "grad_norm": 1.0434715730493578, + "learning_rate": 1.2928129257457915e-06, + "loss": 0.7907861471176147, + "step": 3778 + }, + { + "epoch": 0.8707373271889401, + "grad_norm": 1.0141295879138945, + "learning_rate": 1.2924486192914704e-06, + "loss": 0.9145845770835876, + "step": 3779 + }, + { + "epoch": 0.8709677419354839, + "grad_norm": 1.2821040685334846, + "learning_rate": 1.2920842703886191e-06, + "loss": 0.8332167863845825, + "step": 3780 + }, + { + "epoch": 0.8711981566820276, + "grad_norm": 1.1443987508087015, + "learning_rate": 1.2917198790901229e-06, + "loss": 0.9593367576599121, + "step": 3781 + }, + { + "epoch": 0.8714285714285714, + "grad_norm": 1.1001262078147525, + "learning_rate": 1.2913554454488723e-06, + "loss": 0.9269144535064697, + "step": 3782 + }, + { + "epoch": 0.8716589861751152, + "grad_norm": 0.8577227656018163, + "learning_rate": 1.2909909695177645e-06, + "loss": 0.8474053144454956, + "step": 3783 + }, + { + "epoch": 0.871889400921659, + "grad_norm": 1.0482742591675172, + "learning_rate": 1.2906264513497027e-06, + "loss": 0.8098207116127014, + "step": 3784 + }, + { + "epoch": 0.8721198156682027, + "grad_norm": 0.9400670599728106, + "learning_rate": 1.2902618909975962e-06, + "loss": 0.7394517064094543, + "step": 3785 + }, + { + "epoch": 0.8723502304147466, + "grad_norm": 1.199479550356467, + "learning_rate": 1.2898972885143606e-06, + "loss": 0.8667110204696655, + "step": 3786 + }, + { + "epoch": 0.8725806451612903, + "grad_norm": 1.2600204383371998, + "learning_rate": 1.289532643952917e-06, + "loss": 0.826819121837616, + "step": 3787 + }, + { + "epoch": 0.8728110599078341, + "grad_norm": 0.9212030006613351, + "learning_rate": 1.2891679573661937e-06, + "loss": 0.7765695452690125, + "step": 3788 + }, + { + "epoch": 0.8730414746543779, + "grad_norm": 0.8409152224560986, + "learning_rate": 1.2888032288071245e-06, + "loss": 0.7180448770523071, + "step": 3789 + }, + { + "epoch": 0.8732718894009217, + "grad_norm": 0.9734045628890519, + "learning_rate": 1.2884384583286486e-06, + "loss": 0.7619662880897522, + "step": 3790 + }, + { + "epoch": 0.8735023041474654, + "grad_norm": 1.0439158459354512, + "learning_rate": 1.2880736459837123e-06, + "loss": 0.8332309126853943, + "step": 3791 + }, + { + "epoch": 0.8737327188940092, + "grad_norm": 1.019583919621154, + "learning_rate": 1.2877087918252676e-06, + "loss": 0.9314864277839661, + "step": 3792 + }, + { + "epoch": 0.873963133640553, + "grad_norm": 1.0252621742811456, + "learning_rate": 1.287343895906273e-06, + "loss": 0.8505650758743286, + "step": 3793 + }, + { + "epoch": 0.8741935483870967, + "grad_norm": 1.1808911521686665, + "learning_rate": 1.286978958279692e-06, + "loss": 0.8086442351341248, + "step": 3794 + }, + { + "epoch": 0.8744239631336406, + "grad_norm": 0.9931096763073582, + "learning_rate": 1.2866139789984951e-06, + "loss": 0.9369934797286987, + "step": 3795 + }, + { + "epoch": 0.8746543778801843, + "grad_norm": 1.0923174237783717, + "learning_rate": 1.2862489581156585e-06, + "loss": 0.6776204705238342, + "step": 3796 + }, + { + "epoch": 0.8748847926267281, + "grad_norm": 1.1437930163109349, + "learning_rate": 1.2858838956841646e-06, + "loss": 0.8742507100105286, + "step": 3797 + }, + { + "epoch": 0.8751152073732719, + "grad_norm": 0.8088256156858264, + "learning_rate": 1.285518791757002e-06, + "loss": 0.6592123508453369, + "step": 3798 + }, + { + "epoch": 0.8753456221198157, + "grad_norm": 1.064419209573929, + "learning_rate": 1.2851536463871646e-06, + "loss": 0.727974534034729, + "step": 3799 + }, + { + "epoch": 0.8755760368663594, + "grad_norm": 1.1114963626056278, + "learning_rate": 1.284788459627653e-06, + "loss": 0.734921395778656, + "step": 3800 + }, + { + "epoch": 0.8758064516129033, + "grad_norm": 1.1341924912712853, + "learning_rate": 1.2844232315314734e-06, + "loss": 0.8848391771316528, + "step": 3801 + }, + { + "epoch": 0.876036866359447, + "grad_norm": 0.9036415522550547, + "learning_rate": 1.284057962151638e-06, + "loss": 0.7014757394790649, + "step": 3802 + }, + { + "epoch": 0.8762672811059908, + "grad_norm": 1.1253352689452834, + "learning_rate": 1.2836926515411662e-06, + "loss": 0.9037606716156006, + "step": 3803 + }, + { + "epoch": 0.8764976958525346, + "grad_norm": 1.0304179621449525, + "learning_rate": 1.2833272997530808e-06, + "loss": 0.7842103242874146, + "step": 3804 + }, + { + "epoch": 0.8767281105990783, + "grad_norm": 0.8881021582469312, + "learning_rate": 1.282961906840413e-06, + "loss": 0.7233899831771851, + "step": 3805 + }, + { + "epoch": 0.8769585253456221, + "grad_norm": 1.0965629604169354, + "learning_rate": 1.2825964728561995e-06, + "loss": 0.8439977169036865, + "step": 3806 + }, + { + "epoch": 0.8771889400921659, + "grad_norm": 0.9011702646392625, + "learning_rate": 1.2822309978534817e-06, + "loss": 0.6734062433242798, + "step": 3807 + }, + { + "epoch": 0.8774193548387097, + "grad_norm": 0.8611901516189409, + "learning_rate": 1.2818654818853082e-06, + "loss": 0.8132908344268799, + "step": 3808 + }, + { + "epoch": 0.8776497695852534, + "grad_norm": 1.0055540352806662, + "learning_rate": 1.2814999250047334e-06, + "loss": 0.7867386341094971, + "step": 3809 + }, + { + "epoch": 0.8778801843317973, + "grad_norm": 0.9631857828899055, + "learning_rate": 1.2811343272648172e-06, + "loss": 0.7367507219314575, + "step": 3810 + }, + { + "epoch": 0.878110599078341, + "grad_norm": 0.9475758390620135, + "learning_rate": 1.280768688718625e-06, + "loss": 0.8154586553573608, + "step": 3811 + }, + { + "epoch": 0.8783410138248848, + "grad_norm": 1.2471162716233217, + "learning_rate": 1.2804030094192297e-06, + "loss": 0.9962621331214905, + "step": 3812 + }, + { + "epoch": 0.8785714285714286, + "grad_norm": 0.9442759022004834, + "learning_rate": 1.280037289419709e-06, + "loss": 0.8720508813858032, + "step": 3813 + }, + { + "epoch": 0.8788018433179724, + "grad_norm": 0.9970556206238078, + "learning_rate": 1.2796715287731461e-06, + "loss": 0.7211558818817139, + "step": 3814 + }, + { + "epoch": 0.8790322580645161, + "grad_norm": 1.0985560987492957, + "learning_rate": 1.279305727532631e-06, + "loss": 0.8354029059410095, + "step": 3815 + }, + { + "epoch": 0.8792626728110599, + "grad_norm": 1.2983425606164107, + "learning_rate": 1.2789398857512597e-06, + "loss": 0.9136772155761719, + "step": 3816 + }, + { + "epoch": 0.8794930875576037, + "grad_norm": 1.099731879502331, + "learning_rate": 1.2785740034821328e-06, + "loss": 0.7603391408920288, + "step": 3817 + }, + { + "epoch": 0.8797235023041474, + "grad_norm": 1.0043618459346715, + "learning_rate": 1.2782080807783582e-06, + "loss": 0.8938640356063843, + "step": 3818 + }, + { + "epoch": 0.8799539170506913, + "grad_norm": 0.9668042432935031, + "learning_rate": 1.2778421176930492e-06, + "loss": 0.8041675090789795, + "step": 3819 + }, + { + "epoch": 0.880184331797235, + "grad_norm": 0.858269124078789, + "learning_rate": 1.2774761142793246e-06, + "loss": 0.7128704786300659, + "step": 3820 + }, + { + "epoch": 0.8804147465437788, + "grad_norm": 1.01263470571454, + "learning_rate": 1.277110070590309e-06, + "loss": 0.7927603721618652, + "step": 3821 + }, + { + "epoch": 0.8806451612903226, + "grad_norm": 0.8447601312860044, + "learning_rate": 1.2767439866791342e-06, + "loss": 0.8294891119003296, + "step": 3822 + }, + { + "epoch": 0.8808755760368664, + "grad_norm": 1.0620381421224903, + "learning_rate": 1.2763778625989354e-06, + "loss": 0.8058860301971436, + "step": 3823 + }, + { + "epoch": 0.8811059907834101, + "grad_norm": 1.1264235058600618, + "learning_rate": 1.2760116984028559e-06, + "loss": 0.9073271751403809, + "step": 3824 + }, + { + "epoch": 0.881336405529954, + "grad_norm": 0.9871957246708625, + "learning_rate": 1.2756454941440439e-06, + "loss": 0.755131721496582, + "step": 3825 + }, + { + "epoch": 0.8815668202764977, + "grad_norm": 0.9177831986454672, + "learning_rate": 1.2752792498756532e-06, + "loss": 0.7571133375167847, + "step": 3826 + }, + { + "epoch": 0.8817972350230415, + "grad_norm": 1.0303718222421674, + "learning_rate": 1.2749129656508438e-06, + "loss": 0.8021755218505859, + "step": 3827 + }, + { + "epoch": 0.8820276497695853, + "grad_norm": 0.9628359079626025, + "learning_rate": 1.2745466415227812e-06, + "loss": 0.7817519903182983, + "step": 3828 + }, + { + "epoch": 0.882258064516129, + "grad_norm": 0.9923984386602839, + "learning_rate": 1.2741802775446375e-06, + "loss": 0.7144416570663452, + "step": 3829 + }, + { + "epoch": 0.8824884792626728, + "grad_norm": 1.1770010674703593, + "learning_rate": 1.2738138737695894e-06, + "loss": 0.8154206275939941, + "step": 3830 + }, + { + "epoch": 0.8827188940092165, + "grad_norm": 1.0860031408073831, + "learning_rate": 1.2734474302508199e-06, + "loss": 0.7478733062744141, + "step": 3831 + }, + { + "epoch": 0.8829493087557604, + "grad_norm": 0.9998255564669785, + "learning_rate": 1.2730809470415177e-06, + "loss": 0.7792314291000366, + "step": 3832 + }, + { + "epoch": 0.8831797235023041, + "grad_norm": 1.1952265957395494, + "learning_rate": 1.2727144241948776e-06, + "loss": 0.8550708293914795, + "step": 3833 + }, + { + "epoch": 0.883410138248848, + "grad_norm": 1.14972903127367, + "learning_rate": 1.2723478617641e-06, + "loss": 0.9415113925933838, + "step": 3834 + }, + { + "epoch": 0.8836405529953917, + "grad_norm": 1.1062517985394071, + "learning_rate": 1.2719812598023909e-06, + "loss": 0.8359560370445251, + "step": 3835 + }, + { + "epoch": 0.8838709677419355, + "grad_norm": 1.2039080793867758, + "learning_rate": 1.2716146183629618e-06, + "loss": 0.9515634775161743, + "step": 3836 + }, + { + "epoch": 0.8841013824884792, + "grad_norm": 1.1195735084656264, + "learning_rate": 1.2712479374990302e-06, + "loss": 0.9433277249336243, + "step": 3837 + }, + { + "epoch": 0.8843317972350231, + "grad_norm": 1.022594144324791, + "learning_rate": 1.27088121726382e-06, + "loss": 0.809203028678894, + "step": 3838 + }, + { + "epoch": 0.8845622119815668, + "grad_norm": 1.0243153152488458, + "learning_rate": 1.2705144577105596e-06, + "loss": 0.8003803491592407, + "step": 3839 + }, + { + "epoch": 0.8847926267281107, + "grad_norm": 1.0509871208480976, + "learning_rate": 1.2701476588924837e-06, + "loss": 0.8258087038993835, + "step": 3840 + }, + { + "epoch": 0.8850230414746544, + "grad_norm": 0.8336199164135607, + "learning_rate": 1.2697808208628326e-06, + "loss": 0.7337249517440796, + "step": 3841 + }, + { + "epoch": 0.8852534562211981, + "grad_norm": 1.1988508685394492, + "learning_rate": 1.269413943674853e-06, + "loss": 0.6963306665420532, + "step": 3842 + }, + { + "epoch": 0.885483870967742, + "grad_norm": 1.1494175494849699, + "learning_rate": 1.2690470273817955e-06, + "loss": 0.8849321603775024, + "step": 3843 + }, + { + "epoch": 0.8857142857142857, + "grad_norm": 0.9311581320318796, + "learning_rate": 1.2686800720369183e-06, + "loss": 0.804117739200592, + "step": 3844 + }, + { + "epoch": 0.8859447004608295, + "grad_norm": 0.9139368239237865, + "learning_rate": 1.2683130776934848e-06, + "loss": 0.7873985767364502, + "step": 3845 + }, + { + "epoch": 0.8861751152073732, + "grad_norm": 1.0475484077031534, + "learning_rate": 1.2679460444047627e-06, + "loss": 0.7401156425476074, + "step": 3846 + }, + { + "epoch": 0.8864055299539171, + "grad_norm": 1.1867976153376456, + "learning_rate": 1.2675789722240274e-06, + "loss": 0.8216343522071838, + "step": 3847 + }, + { + "epoch": 0.8866359447004608, + "grad_norm": 1.1126927795380483, + "learning_rate": 1.2672118612045583e-06, + "loss": 0.9367883205413818, + "step": 3848 + }, + { + "epoch": 0.8868663594470046, + "grad_norm": 1.333436966015092, + "learning_rate": 1.2668447113996411e-06, + "loss": 0.959208607673645, + "step": 3849 + }, + { + "epoch": 0.8870967741935484, + "grad_norm": 1.019926575329533, + "learning_rate": 1.2664775228625678e-06, + "loss": 0.754011869430542, + "step": 3850 + }, + { + "epoch": 0.8873271889400922, + "grad_norm": 1.0679613059424808, + "learning_rate": 1.2661102956466343e-06, + "loss": 0.7200918793678284, + "step": 3851 + }, + { + "epoch": 0.8875576036866359, + "grad_norm": 1.1470470713937198, + "learning_rate": 1.2657430298051441e-06, + "loss": 0.7819997072219849, + "step": 3852 + }, + { + "epoch": 0.8877880184331797, + "grad_norm": 0.7442261609023784, + "learning_rate": 1.2653757253914045e-06, + "loss": 0.6145305037498474, + "step": 3853 + }, + { + "epoch": 0.8880184331797235, + "grad_norm": 1.0307629205268725, + "learning_rate": 1.2650083824587298e-06, + "loss": 0.8730908036231995, + "step": 3854 + }, + { + "epoch": 0.8882488479262672, + "grad_norm": 0.8412211397931054, + "learning_rate": 1.2646410010604395e-06, + "loss": 0.7595944404602051, + "step": 3855 + }, + { + "epoch": 0.8884792626728111, + "grad_norm": 1.1742884385001073, + "learning_rate": 1.264273581249858e-06, + "loss": 0.8533104658126831, + "step": 3856 + }, + { + "epoch": 0.8887096774193548, + "grad_norm": 0.9075889816265436, + "learning_rate": 1.263906123080316e-06, + "loss": 0.7239818572998047, + "step": 3857 + }, + { + "epoch": 0.8889400921658986, + "grad_norm": 1.1211735744208717, + "learning_rate": 1.2635386266051498e-06, + "loss": 0.7675650119781494, + "step": 3858 + }, + { + "epoch": 0.8891705069124424, + "grad_norm": 1.03231156560467, + "learning_rate": 1.2631710918777007e-06, + "loss": 0.8886630535125732, + "step": 3859 + }, + { + "epoch": 0.8894009216589862, + "grad_norm": 1.078590523668252, + "learning_rate": 1.2628035189513159e-06, + "loss": 0.798930287361145, + "step": 3860 + }, + { + "epoch": 0.8896313364055299, + "grad_norm": 0.9635414297502106, + "learning_rate": 1.2624359078793484e-06, + "loss": 0.7189278602600098, + "step": 3861 + }, + { + "epoch": 0.8898617511520738, + "grad_norm": 1.0909939790359444, + "learning_rate": 1.2620682587151565e-06, + "loss": 0.8187342882156372, + "step": 3862 + }, + { + "epoch": 0.8900921658986175, + "grad_norm": 1.1174191800105742, + "learning_rate": 1.2617005715121034e-06, + "loss": 0.880839467048645, + "step": 3863 + }, + { + "epoch": 0.8903225806451613, + "grad_norm": 0.9160208180175933, + "learning_rate": 1.2613328463235586e-06, + "loss": 0.84575355052948, + "step": 3864 + }, + { + "epoch": 0.8905529953917051, + "grad_norm": 0.8361425077510937, + "learning_rate": 1.2609650832028978e-06, + "loss": 0.6823658347129822, + "step": 3865 + }, + { + "epoch": 0.8907834101382488, + "grad_norm": 1.0695425966983703, + "learning_rate": 1.2605972822035e-06, + "loss": 0.8295711278915405, + "step": 3866 + }, + { + "epoch": 0.8910138248847926, + "grad_norm": 1.1932993089448705, + "learning_rate": 1.2602294433787518e-06, + "loss": 0.8684213161468506, + "step": 3867 + }, + { + "epoch": 0.8912442396313364, + "grad_norm": 0.8493371065418897, + "learning_rate": 1.2598615667820447e-06, + "loss": 0.6560889482498169, + "step": 3868 + }, + { + "epoch": 0.8914746543778802, + "grad_norm": 1.0552959260029386, + "learning_rate": 1.259493652466775e-06, + "loss": 0.740487277507782, + "step": 3869 + }, + { + "epoch": 0.8917050691244239, + "grad_norm": 0.9680726179927289, + "learning_rate": 1.2591257004863453e-06, + "loss": 0.8167253732681274, + "step": 3870 + }, + { + "epoch": 0.8919354838709678, + "grad_norm": 0.8741208745575088, + "learning_rate": 1.2587577108941634e-06, + "loss": 0.8521690368652344, + "step": 3871 + }, + { + "epoch": 0.8921658986175115, + "grad_norm": 1.263426910808872, + "learning_rate": 1.2583896837436418e-06, + "loss": 0.8830848932266235, + "step": 3872 + }, + { + "epoch": 0.8923963133640553, + "grad_norm": 0.9234650272103238, + "learning_rate": 1.2580216190881999e-06, + "loss": 0.7080649137496948, + "step": 3873 + }, + { + "epoch": 0.8926267281105991, + "grad_norm": 0.9098984938292525, + "learning_rate": 1.2576535169812614e-06, + "loss": 0.8013911247253418, + "step": 3874 + }, + { + "epoch": 0.8928571428571429, + "grad_norm": 0.9781454154869316, + "learning_rate": 1.2572853774762564e-06, + "loss": 0.8307033777236938, + "step": 3875 + }, + { + "epoch": 0.8930875576036866, + "grad_norm": 1.003074779947638, + "learning_rate": 1.256917200626619e-06, + "loss": 0.7514123916625977, + "step": 3876 + }, + { + "epoch": 0.8933179723502304, + "grad_norm": 1.3024082731165083, + "learning_rate": 1.2565489864857903e-06, + "loss": 0.7608132362365723, + "step": 3877 + }, + { + "epoch": 0.8935483870967742, + "grad_norm": 0.9570998315665514, + "learning_rate": 1.256180735107216e-06, + "loss": 0.8011139631271362, + "step": 3878 + }, + { + "epoch": 0.8937788018433179, + "grad_norm": 1.134653936381734, + "learning_rate": 1.2558124465443467e-06, + "loss": 0.9760414958000183, + "step": 3879 + }, + { + "epoch": 0.8940092165898618, + "grad_norm": 1.0547420638261442, + "learning_rate": 1.2554441208506399e-06, + "loss": 0.7292976379394531, + "step": 3880 + }, + { + "epoch": 0.8942396313364055, + "grad_norm": 1.0683215421992245, + "learning_rate": 1.255075758079557e-06, + "loss": 0.819061279296875, + "step": 3881 + }, + { + "epoch": 0.8944700460829493, + "grad_norm": 1.006803716245281, + "learning_rate": 1.2547073582845652e-06, + "loss": 0.8407306671142578, + "step": 3882 + }, + { + "epoch": 0.8947004608294931, + "grad_norm": 0.8233707920449198, + "learning_rate": 1.2543389215191379e-06, + "loss": 0.7452164888381958, + "step": 3883 + }, + { + "epoch": 0.8949308755760369, + "grad_norm": 1.049978361878961, + "learning_rate": 1.2539704478367525e-06, + "loss": 0.9001756310462952, + "step": 3884 + }, + { + "epoch": 0.8951612903225806, + "grad_norm": 0.8057583780945189, + "learning_rate": 1.253601937290893e-06, + "loss": 0.7006322741508484, + "step": 3885 + }, + { + "epoch": 0.8953917050691245, + "grad_norm": 0.9116907763776896, + "learning_rate": 1.253233389935048e-06, + "loss": 0.8464070558547974, + "step": 3886 + }, + { + "epoch": 0.8956221198156682, + "grad_norm": 0.9768693849406578, + "learning_rate": 1.2528648058227117e-06, + "loss": 0.8153925538063049, + "step": 3887 + }, + { + "epoch": 0.895852534562212, + "grad_norm": 0.9311867207234187, + "learning_rate": 1.2524961850073835e-06, + "loss": 0.7093103528022766, + "step": 3888 + }, + { + "epoch": 0.8960829493087558, + "grad_norm": 0.8533841155936702, + "learning_rate": 1.2521275275425685e-06, + "loss": 0.676047682762146, + "step": 3889 + }, + { + "epoch": 0.8963133640552995, + "grad_norm": 0.87097687176947, + "learning_rate": 1.2517588334817765e-06, + "loss": 0.6980170011520386, + "step": 3890 + }, + { + "epoch": 0.8965437788018433, + "grad_norm": 0.9291831127411667, + "learning_rate": 1.2513901028785232e-06, + "loss": 0.7343952655792236, + "step": 3891 + }, + { + "epoch": 0.896774193548387, + "grad_norm": 1.0285752510532034, + "learning_rate": 1.251021335786329e-06, + "loss": 0.6836012005805969, + "step": 3892 + }, + { + "epoch": 0.8970046082949309, + "grad_norm": 0.9328635468922583, + "learning_rate": 1.2506525322587204e-06, + "loss": 0.7405731678009033, + "step": 3893 + }, + { + "epoch": 0.8972350230414746, + "grad_norm": 0.9162563014074782, + "learning_rate": 1.2502836923492288e-06, + "loss": 0.7626791596412659, + "step": 3894 + }, + { + "epoch": 0.8974654377880185, + "grad_norm": 0.8530894630449782, + "learning_rate": 1.2499148161113904e-06, + "loss": 0.951126754283905, + "step": 3895 + }, + { + "epoch": 0.8976958525345622, + "grad_norm": 1.0356266230162976, + "learning_rate": 1.249545903598747e-06, + "loss": 0.8248430490493774, + "step": 3896 + }, + { + "epoch": 0.897926267281106, + "grad_norm": 1.0696916510331513, + "learning_rate": 1.2491769548648466e-06, + "loss": 0.9306991100311279, + "step": 3897 + }, + { + "epoch": 0.8981566820276498, + "grad_norm": 1.2546361240375576, + "learning_rate": 1.2488079699632406e-06, + "loss": 0.8529196977615356, + "step": 3898 + }, + { + "epoch": 0.8983870967741936, + "grad_norm": 1.1432122269665714, + "learning_rate": 1.2484389489474873e-06, + "loss": 0.8614317178726196, + "step": 3899 + }, + { + "epoch": 0.8986175115207373, + "grad_norm": 0.8777341649032664, + "learning_rate": 1.2480698918711494e-06, + "loss": 0.723548173904419, + "step": 3900 + }, + { + "epoch": 0.8988479262672812, + "grad_norm": 0.8559428728446495, + "learning_rate": 1.2477007987877953e-06, + "loss": 0.9424235820770264, + "step": 3901 + }, + { + "epoch": 0.8990783410138249, + "grad_norm": 1.1966583189697881, + "learning_rate": 1.2473316697509982e-06, + "loss": 0.8307658433914185, + "step": 3902 + }, + { + "epoch": 0.8993087557603686, + "grad_norm": 0.9430977683906336, + "learning_rate": 1.2469625048143364e-06, + "loss": 0.7164772748947144, + "step": 3903 + }, + { + "epoch": 0.8995391705069125, + "grad_norm": 1.0578567003352413, + "learning_rate": 1.2465933040313941e-06, + "loss": 0.824491024017334, + "step": 3904 + }, + { + "epoch": 0.8997695852534562, + "grad_norm": 0.9955753469888821, + "learning_rate": 1.24622406745576e-06, + "loss": 0.7468826770782471, + "step": 3905 + }, + { + "epoch": 0.9, + "grad_norm": 1.0419833775918754, + "learning_rate": 1.2458547951410285e-06, + "loss": 0.8049126863479614, + "step": 3906 + }, + { + "epoch": 0.9002304147465438, + "grad_norm": 1.0794114769462158, + "learning_rate": 1.245485487140799e-06, + "loss": 0.658754825592041, + "step": 3907 + }, + { + "epoch": 0.9004608294930876, + "grad_norm": 0.9848364091798514, + "learning_rate": 1.245116143508676e-06, + "loss": 0.6772202849388123, + "step": 3908 + }, + { + "epoch": 0.9006912442396313, + "grad_norm": 0.9291487276824166, + "learning_rate": 1.2447467642982697e-06, + "loss": 0.8160394430160522, + "step": 3909 + }, + { + "epoch": 0.9009216589861752, + "grad_norm": 1.3459000002689838, + "learning_rate": 1.244377349563194e-06, + "loss": 0.8289823532104492, + "step": 3910 + }, + { + "epoch": 0.9011520737327189, + "grad_norm": 1.0130598759262572, + "learning_rate": 1.24400789935707e-06, + "loss": 0.7574084997177124, + "step": 3911 + }, + { + "epoch": 0.9013824884792627, + "grad_norm": 0.9665886404424858, + "learning_rate": 1.2436384137335218e-06, + "loss": 0.8116365671157837, + "step": 3912 + }, + { + "epoch": 0.9016129032258065, + "grad_norm": 1.0860329839978788, + "learning_rate": 1.2432688927461808e-06, + "loss": 0.814805805683136, + "step": 3913 + }, + { + "epoch": 0.9018433179723502, + "grad_norm": 0.9783977746996081, + "learning_rate": 1.2428993364486822e-06, + "loss": 0.7947453260421753, + "step": 3914 + }, + { + "epoch": 0.902073732718894, + "grad_norm": 1.1432103627131167, + "learning_rate": 1.2425297448946661e-06, + "loss": 0.939562976360321, + "step": 3915 + }, + { + "epoch": 0.9023041474654377, + "grad_norm": 0.9342812306918719, + "learning_rate": 1.2421601181377787e-06, + "loss": 0.9460225105285645, + "step": 3916 + }, + { + "epoch": 0.9025345622119816, + "grad_norm": 1.1417876456910938, + "learning_rate": 1.241790456231671e-06, + "loss": 0.9183799028396606, + "step": 3917 + }, + { + "epoch": 0.9027649769585253, + "grad_norm": 1.1195959115117728, + "learning_rate": 1.2414207592299984e-06, + "loss": 0.6793398857116699, + "step": 3918 + }, + { + "epoch": 0.9029953917050692, + "grad_norm": 0.9758451113738527, + "learning_rate": 1.2410510271864222e-06, + "loss": 0.7796125411987305, + "step": 3919 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 1.127885346985943, + "learning_rate": 1.2406812601546085e-06, + "loss": 0.8164567351341248, + "step": 3920 + }, + { + "epoch": 0.9034562211981567, + "grad_norm": 1.327729370966401, + "learning_rate": 1.2403114581882288e-06, + "loss": 0.7267879247665405, + "step": 3921 + }, + { + "epoch": 0.9036866359447004, + "grad_norm": 0.9644037075475709, + "learning_rate": 1.2399416213409586e-06, + "loss": 0.7277103066444397, + "step": 3922 + }, + { + "epoch": 0.9039170506912443, + "grad_norm": 1.1653209742127064, + "learning_rate": 1.23957174966648e-06, + "loss": 0.8507979512214661, + "step": 3923 + }, + { + "epoch": 0.904147465437788, + "grad_norm": 1.2024221808183382, + "learning_rate": 1.2392018432184792e-06, + "loss": 0.9431333541870117, + "step": 3924 + }, + { + "epoch": 0.9043778801843319, + "grad_norm": 0.9610849982223711, + "learning_rate": 1.2388319020506473e-06, + "loss": 0.669041633605957, + "step": 3925 + }, + { + "epoch": 0.9046082949308756, + "grad_norm": 1.0428863031922808, + "learning_rate": 1.2384619262166808e-06, + "loss": 0.7639964818954468, + "step": 3926 + }, + { + "epoch": 0.9048387096774193, + "grad_norm": 0.9055700075744166, + "learning_rate": 1.2380919157702819e-06, + "loss": 0.7390594482421875, + "step": 3927 + }, + { + "epoch": 0.9050691244239631, + "grad_norm": 1.0183193149474203, + "learning_rate": 1.2377218707651562e-06, + "loss": 0.8320105075836182, + "step": 3928 + }, + { + "epoch": 0.9052995391705069, + "grad_norm": 0.9604555269461571, + "learning_rate": 1.237351791255016e-06, + "loss": 0.6820249557495117, + "step": 3929 + }, + { + "epoch": 0.9055299539170507, + "grad_norm": 1.0758012435150028, + "learning_rate": 1.2369816772935773e-06, + "loss": 0.8548537492752075, + "step": 3930 + }, + { + "epoch": 0.9057603686635944, + "grad_norm": 1.0169473440313737, + "learning_rate": 1.236611528934562e-06, + "loss": 0.7226318120956421, + "step": 3931 + }, + { + "epoch": 0.9059907834101383, + "grad_norm": 1.2196278844047388, + "learning_rate": 1.2362413462316963e-06, + "loss": 0.879987359046936, + "step": 3932 + }, + { + "epoch": 0.906221198156682, + "grad_norm": 0.8628507992206548, + "learning_rate": 1.2358711292387122e-06, + "loss": 0.7919881343841553, + "step": 3933 + }, + { + "epoch": 0.9064516129032258, + "grad_norm": 1.0779297510278616, + "learning_rate": 1.2355008780093456e-06, + "loss": 0.8232694268226624, + "step": 3934 + }, + { + "epoch": 0.9066820276497696, + "grad_norm": 1.249487252121194, + "learning_rate": 1.2351305925973385e-06, + "loss": 0.80347740650177, + "step": 3935 + }, + { + "epoch": 0.9069124423963134, + "grad_norm": 1.2510529509996382, + "learning_rate": 1.234760273056437e-06, + "loss": 0.7818408012390137, + "step": 3936 + }, + { + "epoch": 0.9071428571428571, + "grad_norm": 1.1620371895322128, + "learning_rate": 1.2343899194403931e-06, + "loss": 0.8391210436820984, + "step": 3937 + }, + { + "epoch": 0.9073732718894009, + "grad_norm": 1.1380529418025975, + "learning_rate": 1.2340195318029622e-06, + "loss": 0.7937500476837158, + "step": 3938 + }, + { + "epoch": 0.9076036866359447, + "grad_norm": 0.973433345758839, + "learning_rate": 1.2336491101979065e-06, + "loss": 0.7158668041229248, + "step": 3939 + }, + { + "epoch": 0.9078341013824884, + "grad_norm": 0.9549803277521113, + "learning_rate": 1.2332786546789915e-06, + "loss": 0.6956034898757935, + "step": 3940 + }, + { + "epoch": 0.9080645161290323, + "grad_norm": 1.035574155623001, + "learning_rate": 1.2329081652999887e-06, + "loss": 0.7252948880195618, + "step": 3941 + }, + { + "epoch": 0.908294930875576, + "grad_norm": 1.2086784459715743, + "learning_rate": 1.2325376421146739e-06, + "loss": 0.7131162881851196, + "step": 3942 + }, + { + "epoch": 0.9085253456221198, + "grad_norm": 0.8781165558243194, + "learning_rate": 1.2321670851768285e-06, + "loss": 0.7383663654327393, + "step": 3943 + }, + { + "epoch": 0.9087557603686636, + "grad_norm": 0.9355062944038273, + "learning_rate": 1.2317964945402374e-06, + "loss": 0.8296892642974854, + "step": 3944 + }, + { + "epoch": 0.9089861751152074, + "grad_norm": 1.1131069336270092, + "learning_rate": 1.2314258702586923e-06, + "loss": 0.8314273357391357, + "step": 3945 + }, + { + "epoch": 0.9092165898617511, + "grad_norm": 0.9647703306046335, + "learning_rate": 1.2310552123859888e-06, + "loss": 0.7264384031295776, + "step": 3946 + }, + { + "epoch": 0.909447004608295, + "grad_norm": 0.7580621867286127, + "learning_rate": 1.230684520975927e-06, + "loss": 0.6757937073707581, + "step": 3947 + }, + { + "epoch": 0.9096774193548387, + "grad_norm": 0.8884108342506404, + "learning_rate": 1.230313796082312e-06, + "loss": 0.8318504691123962, + "step": 3948 + }, + { + "epoch": 0.9099078341013825, + "grad_norm": 0.7767337233620181, + "learning_rate": 1.2299430377589547e-06, + "loss": 0.7043207883834839, + "step": 3949 + }, + { + "epoch": 0.9101382488479263, + "grad_norm": 1.0668368590995472, + "learning_rate": 1.2295722460596696e-06, + "loss": 0.8499487638473511, + "step": 3950 + }, + { + "epoch": 0.91036866359447, + "grad_norm": 1.1145902688644103, + "learning_rate": 1.2292014210382772e-06, + "loss": 0.8219600319862366, + "step": 3951 + }, + { + "epoch": 0.9105990783410138, + "grad_norm": 1.2329010539695853, + "learning_rate": 1.2288305627486017e-06, + "loss": 0.8136317133903503, + "step": 3952 + }, + { + "epoch": 0.9108294930875576, + "grad_norm": 1.1220482069317936, + "learning_rate": 1.2284596712444735e-06, + "loss": 0.7858958840370178, + "step": 3953 + }, + { + "epoch": 0.9110599078341014, + "grad_norm": 1.182019995516566, + "learning_rate": 1.2280887465797259e-06, + "loss": 0.8108563423156738, + "step": 3954 + }, + { + "epoch": 0.9112903225806451, + "grad_norm": 1.17197106565382, + "learning_rate": 1.2277177888081987e-06, + "loss": 0.8061145544052124, + "step": 3955 + }, + { + "epoch": 0.911520737327189, + "grad_norm": 1.1140830632516712, + "learning_rate": 1.2273467979837361e-06, + "loss": 0.7769665718078613, + "step": 3956 + }, + { + "epoch": 0.9117511520737327, + "grad_norm": 1.5134088570090107, + "learning_rate": 1.2269757741601867e-06, + "loss": 1.0548570156097412, + "step": 3957 + }, + { + "epoch": 0.9119815668202765, + "grad_norm": 0.9732476833800602, + "learning_rate": 1.226604717391404e-06, + "loss": 0.7095952033996582, + "step": 3958 + }, + { + "epoch": 0.9122119815668203, + "grad_norm": 0.8435340807921997, + "learning_rate": 1.226233627731247e-06, + "loss": 0.7330363392829895, + "step": 3959 + }, + { + "epoch": 0.9124423963133641, + "grad_norm": 0.9706068481575616, + "learning_rate": 1.225862505233578e-06, + "loss": 0.7328442931175232, + "step": 3960 + }, + { + "epoch": 0.9126728110599078, + "grad_norm": 1.059740258312267, + "learning_rate": 1.2254913499522656e-06, + "loss": 0.7572993040084839, + "step": 3961 + }, + { + "epoch": 0.9129032258064517, + "grad_norm": 1.0542941153492202, + "learning_rate": 1.2251201619411823e-06, + "loss": 0.7706469297409058, + "step": 3962 + }, + { + "epoch": 0.9131336405529954, + "grad_norm": 1.1436826868313579, + "learning_rate": 1.2247489412542053e-06, + "loss": 0.7830193042755127, + "step": 3963 + }, + { + "epoch": 0.9133640552995391, + "grad_norm": 1.0827904871592715, + "learning_rate": 1.224377687945217e-06, + "loss": 0.8415955901145935, + "step": 3964 + }, + { + "epoch": 0.913594470046083, + "grad_norm": 1.1895924425921953, + "learning_rate": 1.2240064020681044e-06, + "loss": 0.7383062839508057, + "step": 3965 + }, + { + "epoch": 0.9138248847926267, + "grad_norm": 1.1432920832791855, + "learning_rate": 1.2236350836767593e-06, + "loss": 0.7372882962226868, + "step": 3966 + }, + { + "epoch": 0.9140552995391705, + "grad_norm": 1.0941013432151616, + "learning_rate": 1.2232637328250776e-06, + "loss": 0.7914254665374756, + "step": 3967 + }, + { + "epoch": 0.9142857142857143, + "grad_norm": 0.9886213418734634, + "learning_rate": 1.2228923495669605e-06, + "loss": 0.8510675430297852, + "step": 3968 + }, + { + "epoch": 0.9145161290322581, + "grad_norm": 1.045281864627849, + "learning_rate": 1.2225209339563143e-06, + "loss": 0.7391757369041443, + "step": 3969 + }, + { + "epoch": 0.9147465437788018, + "grad_norm": 0.8746728562097662, + "learning_rate": 1.2221494860470491e-06, + "loss": 0.69194495677948, + "step": 3970 + }, + { + "epoch": 0.9149769585253457, + "grad_norm": 1.0907421288179358, + "learning_rate": 1.22177800589308e-06, + "loss": 0.7593865394592285, + "step": 3971 + }, + { + "epoch": 0.9152073732718894, + "grad_norm": 1.037234739347401, + "learning_rate": 1.2214064935483268e-06, + "loss": 0.7831966876983643, + "step": 3972 + }, + { + "epoch": 0.9154377880184332, + "grad_norm": 1.1150279108134162, + "learning_rate": 1.2210349490667145e-06, + "loss": 0.8858723640441895, + "step": 3973 + }, + { + "epoch": 0.915668202764977, + "grad_norm": 1.1381126617682915, + "learning_rate": 1.2206633725021715e-06, + "loss": 0.8645567893981934, + "step": 3974 + }, + { + "epoch": 0.9158986175115207, + "grad_norm": 0.9188905804582469, + "learning_rate": 1.2202917639086322e-06, + "loss": 0.7619047164916992, + "step": 3975 + }, + { + "epoch": 0.9161290322580645, + "grad_norm": 1.0126992141273314, + "learning_rate": 1.2199201233400355e-06, + "loss": 0.8652681112289429, + "step": 3976 + }, + { + "epoch": 0.9163594470046083, + "grad_norm": 0.9961259698766619, + "learning_rate": 1.2195484508503234e-06, + "loss": 0.6860940456390381, + "step": 3977 + }, + { + "epoch": 0.9165898617511521, + "grad_norm": 0.8860870600955693, + "learning_rate": 1.2191767464934444e-06, + "loss": 0.7372464537620544, + "step": 3978 + }, + { + "epoch": 0.9168202764976958, + "grad_norm": 1.3495413684840594, + "learning_rate": 1.218805010323351e-06, + "loss": 0.8719853162765503, + "step": 3979 + }, + { + "epoch": 0.9170506912442397, + "grad_norm": 0.9968927276513252, + "learning_rate": 1.2184332423940003e-06, + "loss": 0.8203779458999634, + "step": 3980 + }, + { + "epoch": 0.9172811059907834, + "grad_norm": 1.197176686739939, + "learning_rate": 1.218061442759353e-06, + "loss": 0.8648861646652222, + "step": 3981 + }, + { + "epoch": 0.9175115207373272, + "grad_norm": 1.0630748229990676, + "learning_rate": 1.2176896114733766e-06, + "loss": 0.7651659250259399, + "step": 3982 + }, + { + "epoch": 0.917741935483871, + "grad_norm": 1.20459191964974, + "learning_rate": 1.2173177485900408e-06, + "loss": 0.8495512008666992, + "step": 3983 + }, + { + "epoch": 0.9179723502304148, + "grad_norm": 1.3559959351470627, + "learning_rate": 1.2169458541633216e-06, + "loss": 0.7997228503227234, + "step": 3984 + }, + { + "epoch": 0.9182027649769585, + "grad_norm": 0.9870494686008755, + "learning_rate": 1.2165739282471987e-06, + "loss": 0.8353173136711121, + "step": 3985 + }, + { + "epoch": 0.9184331797235024, + "grad_norm": 1.2277323881843956, + "learning_rate": 1.216201970895657e-06, + "loss": 0.9039655327796936, + "step": 3986 + }, + { + "epoch": 0.9186635944700461, + "grad_norm": 0.9209288499077958, + "learning_rate": 1.2158299821626854e-06, + "loss": 0.8158592581748962, + "step": 3987 + }, + { + "epoch": 0.9188940092165898, + "grad_norm": 1.2007654555954255, + "learning_rate": 1.2154579621022776e-06, + "loss": 0.8443971872329712, + "step": 3988 + }, + { + "epoch": 0.9191244239631337, + "grad_norm": 0.916322848733307, + "learning_rate": 1.2150859107684318e-06, + "loss": 0.7934167385101318, + "step": 3989 + }, + { + "epoch": 0.9193548387096774, + "grad_norm": 1.1576910593833736, + "learning_rate": 1.2147138282151512e-06, + "loss": 0.750052809715271, + "step": 3990 + }, + { + "epoch": 0.9195852534562212, + "grad_norm": 1.0948767691124337, + "learning_rate": 1.2143417144964423e-06, + "loss": 0.813056468963623, + "step": 3991 + }, + { + "epoch": 0.919815668202765, + "grad_norm": 1.1487977592190233, + "learning_rate": 1.2139695696663174e-06, + "loss": 0.9478945732116699, + "step": 3992 + }, + { + "epoch": 0.9200460829493088, + "grad_norm": 0.9711264468634061, + "learning_rate": 1.2135973937787927e-06, + "loss": 0.687637448310852, + "step": 3993 + }, + { + "epoch": 0.9202764976958525, + "grad_norm": 1.071392128639805, + "learning_rate": 1.213225186887889e-06, + "loss": 0.8073818683624268, + "step": 3994 + }, + { + "epoch": 0.9205069124423964, + "grad_norm": 1.1074324196567935, + "learning_rate": 1.2128529490476318e-06, + "loss": 0.6684166789054871, + "step": 3995 + }, + { + "epoch": 0.9207373271889401, + "grad_norm": 1.1910033963986806, + "learning_rate": 1.2124806803120506e-06, + "loss": 0.7897466421127319, + "step": 3996 + }, + { + "epoch": 0.9209677419354839, + "grad_norm": 1.0375797321803883, + "learning_rate": 1.21210838073518e-06, + "loss": 0.832312822341919, + "step": 3997 + }, + { + "epoch": 0.9211981566820276, + "grad_norm": 1.036059468253791, + "learning_rate": 1.2117360503710588e-06, + "loss": 0.9536067247390747, + "step": 3998 + }, + { + "epoch": 0.9214285714285714, + "grad_norm": 1.123926651312402, + "learning_rate": 1.2113636892737302e-06, + "loss": 0.8959759473800659, + "step": 3999 + }, + { + "epoch": 0.9216589861751152, + "grad_norm": 0.9405530325495998, + "learning_rate": 1.2109912974972422e-06, + "loss": 0.6789166927337646, + "step": 4000 + }, + { + "epoch": 0.9218894009216589, + "grad_norm": 0.9327551909921717, + "learning_rate": 1.2106188750956464e-06, + "loss": 0.7336491346359253, + "step": 4001 + }, + { + "epoch": 0.9221198156682028, + "grad_norm": 0.8000293761487048, + "learning_rate": 1.2102464221229997e-06, + "loss": 0.7838259935379028, + "step": 4002 + }, + { + "epoch": 0.9223502304147465, + "grad_norm": 1.2907858896278495, + "learning_rate": 1.2098739386333631e-06, + "loss": 0.9147623777389526, + "step": 4003 + }, + { + "epoch": 0.9225806451612903, + "grad_norm": 1.3691019040487797, + "learning_rate": 1.2095014246808022e-06, + "loss": 0.7296491265296936, + "step": 4004 + }, + { + "epoch": 0.9228110599078341, + "grad_norm": 1.1028104717001235, + "learning_rate": 1.2091288803193868e-06, + "loss": 0.7898432016372681, + "step": 4005 + }, + { + "epoch": 0.9230414746543779, + "grad_norm": 1.1562470474736035, + "learning_rate": 1.2087563056031914e-06, + "loss": 0.8190659284591675, + "step": 4006 + }, + { + "epoch": 0.9232718894009216, + "grad_norm": 1.4146112766933352, + "learning_rate": 1.2083837005862945e-06, + "loss": 0.8383443355560303, + "step": 4007 + }, + { + "epoch": 0.9235023041474655, + "grad_norm": 0.7251077105825574, + "learning_rate": 1.2080110653227796e-06, + "loss": 0.5987120866775513, + "step": 4008 + }, + { + "epoch": 0.9237327188940092, + "grad_norm": 1.056645940510342, + "learning_rate": 1.2076383998667334e-06, + "loss": 0.8811358213424683, + "step": 4009 + }, + { + "epoch": 0.923963133640553, + "grad_norm": 0.8867108269493398, + "learning_rate": 1.2072657042722486e-06, + "loss": 0.7958807349205017, + "step": 4010 + }, + { + "epoch": 0.9241935483870968, + "grad_norm": 1.1776412427000924, + "learning_rate": 1.2068929785934215e-06, + "loss": 0.7192457914352417, + "step": 4011 + }, + { + "epoch": 0.9244239631336405, + "grad_norm": 1.0545419352254402, + "learning_rate": 1.2065202228843523e-06, + "loss": 0.6854838132858276, + "step": 4012 + }, + { + "epoch": 0.9246543778801843, + "grad_norm": 1.0759672957343283, + "learning_rate": 1.2061474371991457e-06, + "loss": 0.7334680557250977, + "step": 4013 + }, + { + "epoch": 0.9248847926267281, + "grad_norm": 0.9536076812745731, + "learning_rate": 1.205774621591912e-06, + "loss": 0.7614402770996094, + "step": 4014 + }, + { + "epoch": 0.9251152073732719, + "grad_norm": 1.3871826739545572, + "learning_rate": 1.2054017761167644e-06, + "loss": 0.7502505779266357, + "step": 4015 + }, + { + "epoch": 0.9253456221198156, + "grad_norm": 1.044146949688276, + "learning_rate": 1.2050289008278205e-06, + "loss": 0.7922523021697998, + "step": 4016 + }, + { + "epoch": 0.9255760368663595, + "grad_norm": 1.2025329853302307, + "learning_rate": 1.2046559957792032e-06, + "loss": 0.7534265518188477, + "step": 4017 + }, + { + "epoch": 0.9258064516129032, + "grad_norm": 0.9478426591249515, + "learning_rate": 1.2042830610250395e-06, + "loss": 0.6997093558311462, + "step": 4018 + }, + { + "epoch": 0.926036866359447, + "grad_norm": 1.050086676036124, + "learning_rate": 1.2039100966194594e-06, + "loss": 0.7009599208831787, + "step": 4019 + }, + { + "epoch": 0.9262672811059908, + "grad_norm": 1.108108705874163, + "learning_rate": 1.203537102616599e-06, + "loss": 0.795873761177063, + "step": 4020 + }, + { + "epoch": 0.9264976958525346, + "grad_norm": 1.1836803264586404, + "learning_rate": 1.2031640790705972e-06, + "loss": 0.7860225439071655, + "step": 4021 + }, + { + "epoch": 0.9267281105990783, + "grad_norm": 0.9036535621632875, + "learning_rate": 1.2027910260355989e-06, + "loss": 0.7657063007354736, + "step": 4022 + }, + { + "epoch": 0.9269585253456222, + "grad_norm": 1.0407468417409953, + "learning_rate": 1.2024179435657512e-06, + "loss": 0.782909631729126, + "step": 4023 + }, + { + "epoch": 0.9271889400921659, + "grad_norm": 0.8628791908243046, + "learning_rate": 1.202044831715207e-06, + "loss": 0.713431715965271, + "step": 4024 + }, + { + "epoch": 0.9274193548387096, + "grad_norm": 0.9826922843740741, + "learning_rate": 1.201671690538123e-06, + "loss": 0.9126790165901184, + "step": 4025 + }, + { + "epoch": 0.9276497695852535, + "grad_norm": 0.9552497173996132, + "learning_rate": 1.20129852008866e-06, + "loss": 0.8640999794006348, + "step": 4026 + }, + { + "epoch": 0.9278801843317972, + "grad_norm": 1.0290580406520045, + "learning_rate": 1.2009253204209832e-06, + "loss": 0.723473072052002, + "step": 4027 + }, + { + "epoch": 0.928110599078341, + "grad_norm": 0.9995947167655078, + "learning_rate": 1.2005520915892626e-06, + "loss": 0.6764041185379028, + "step": 4028 + }, + { + "epoch": 0.9283410138248848, + "grad_norm": 1.1315388960653066, + "learning_rate": 1.200178833647671e-06, + "loss": 0.8525882959365845, + "step": 4029 + }, + { + "epoch": 0.9285714285714286, + "grad_norm": 1.1279047416289067, + "learning_rate": 1.1998055466503872e-06, + "loss": 0.714957058429718, + "step": 4030 + }, + { + "epoch": 0.9288018433179723, + "grad_norm": 0.9055007840106456, + "learning_rate": 1.1994322306515925e-06, + "loss": 0.8015910387039185, + "step": 4031 + }, + { + "epoch": 0.9290322580645162, + "grad_norm": 1.1314666315910753, + "learning_rate": 1.1990588857054733e-06, + "loss": 1.0306739807128906, + "step": 4032 + }, + { + "epoch": 0.9292626728110599, + "grad_norm": 1.0078215910327748, + "learning_rate": 1.1986855118662205e-06, + "loss": 0.8307464122772217, + "step": 4033 + }, + { + "epoch": 0.9294930875576037, + "grad_norm": 0.9974753472669955, + "learning_rate": 1.1983121091880286e-06, + "loss": 0.8720347881317139, + "step": 4034 + }, + { + "epoch": 0.9297235023041475, + "grad_norm": 1.0249437684832297, + "learning_rate": 1.1979386777250968e-06, + "loss": 0.7716174721717834, + "step": 4035 + }, + { + "epoch": 0.9299539170506912, + "grad_norm": 0.9533075514678258, + "learning_rate": 1.1975652175316279e-06, + "loss": 0.8968960046768188, + "step": 4036 + }, + { + "epoch": 0.930184331797235, + "grad_norm": 1.0235472692311864, + "learning_rate": 1.197191728661829e-06, + "loss": 0.7472472786903381, + "step": 4037 + }, + { + "epoch": 0.9304147465437788, + "grad_norm": 1.209577738801564, + "learning_rate": 1.196818211169912e-06, + "loss": 0.7969691753387451, + "step": 4038 + }, + { + "epoch": 0.9306451612903226, + "grad_norm": 0.8592343628435503, + "learning_rate": 1.196444665110092e-06, + "loss": 0.6187525987625122, + "step": 4039 + }, + { + "epoch": 0.9308755760368663, + "grad_norm": 1.0503056259771648, + "learning_rate": 1.1960710905365893e-06, + "loss": 0.8715502619743347, + "step": 4040 + }, + { + "epoch": 0.9311059907834102, + "grad_norm": 0.9918268480034713, + "learning_rate": 1.1956974875036273e-06, + "loss": 0.7174774408340454, + "step": 4041 + }, + { + "epoch": 0.9313364055299539, + "grad_norm": 0.8743867275561935, + "learning_rate": 1.1953238560654337e-06, + "loss": 0.6546192169189453, + "step": 4042 + }, + { + "epoch": 0.9315668202764977, + "grad_norm": 1.1024794232135675, + "learning_rate": 1.194950196276241e-06, + "loss": 0.8688700199127197, + "step": 4043 + }, + { + "epoch": 0.9317972350230415, + "grad_norm": 1.0449187982587707, + "learning_rate": 1.1945765081902856e-06, + "loss": 0.7679718732833862, + "step": 4044 + }, + { + "epoch": 0.9320276497695853, + "grad_norm": 0.9426197124643214, + "learning_rate": 1.1942027918618073e-06, + "loss": 0.6335175037384033, + "step": 4045 + }, + { + "epoch": 0.932258064516129, + "grad_norm": 1.0452657366695544, + "learning_rate": 1.1938290473450513e-06, + "loss": 0.785153865814209, + "step": 4046 + }, + { + "epoch": 0.9324884792626729, + "grad_norm": 0.9145063707903602, + "learning_rate": 1.1934552746942653e-06, + "loss": 0.6873019337654114, + "step": 4047 + }, + { + "epoch": 0.9327188940092166, + "grad_norm": 0.9707470479007109, + "learning_rate": 1.1930814739637025e-06, + "loss": 0.7416094541549683, + "step": 4048 + }, + { + "epoch": 0.9329493087557603, + "grad_norm": 1.2103943548089806, + "learning_rate": 1.1927076452076193e-06, + "loss": 0.7206372618675232, + "step": 4049 + }, + { + "epoch": 0.9331797235023042, + "grad_norm": 1.1043264858931607, + "learning_rate": 1.1923337884802767e-06, + "loss": 0.8352477550506592, + "step": 4050 + }, + { + "epoch": 0.9334101382488479, + "grad_norm": 1.116832001192149, + "learning_rate": 1.191959903835939e-06, + "loss": 0.8243483304977417, + "step": 4051 + }, + { + "epoch": 0.9336405529953917, + "grad_norm": 1.4110893804735163, + "learning_rate": 1.1915859913288756e-06, + "loss": 0.827987790107727, + "step": 4052 + }, + { + "epoch": 0.9338709677419355, + "grad_norm": 1.1514055762505417, + "learning_rate": 1.1912120510133589e-06, + "loss": 0.8624123334884644, + "step": 4053 + }, + { + "epoch": 0.9341013824884793, + "grad_norm": 1.2091942284642192, + "learning_rate": 1.1908380829436667e-06, + "loss": 0.8615037202835083, + "step": 4054 + }, + { + "epoch": 0.934331797235023, + "grad_norm": 1.2500115524653743, + "learning_rate": 1.190464087174079e-06, + "loss": 0.9367121458053589, + "step": 4055 + }, + { + "epoch": 0.9345622119815669, + "grad_norm": 1.4503623207353766, + "learning_rate": 1.190090063758881e-06, + "loss": 0.927996039390564, + "step": 4056 + }, + { + "epoch": 0.9347926267281106, + "grad_norm": 1.0709061746508743, + "learning_rate": 1.1897160127523623e-06, + "loss": 0.841314435005188, + "step": 4057 + }, + { + "epoch": 0.9350230414746544, + "grad_norm": 1.1021939339887863, + "learning_rate": 1.189341934208815e-06, + "loss": 0.864904522895813, + "step": 4058 + }, + { + "epoch": 0.9352534562211982, + "grad_norm": 1.148301781904619, + "learning_rate": 1.188967828182537e-06, + "loss": 0.9505404829978943, + "step": 4059 + }, + { + "epoch": 0.9354838709677419, + "grad_norm": 1.0791372441668663, + "learning_rate": 1.188593694727829e-06, + "loss": 0.7347132563591003, + "step": 4060 + }, + { + "epoch": 0.9357142857142857, + "grad_norm": 1.1367351426324537, + "learning_rate": 1.1882195338989958e-06, + "loss": 0.6267231106758118, + "step": 4061 + }, + { + "epoch": 0.9359447004608294, + "grad_norm": 1.0946102482081315, + "learning_rate": 1.1878453457503464e-06, + "loss": 0.8052406907081604, + "step": 4062 + }, + { + "epoch": 0.9361751152073733, + "grad_norm": 1.1032845960202522, + "learning_rate": 1.1874711303361933e-06, + "loss": 0.7928211688995361, + "step": 4063 + }, + { + "epoch": 0.936405529953917, + "grad_norm": 1.1265414942472118, + "learning_rate": 1.1870968877108545e-06, + "loss": 0.8863959312438965, + "step": 4064 + }, + { + "epoch": 0.9366359447004609, + "grad_norm": 1.0592501761240638, + "learning_rate": 1.1867226179286496e-06, + "loss": 0.8749874830245972, + "step": 4065 + }, + { + "epoch": 0.9368663594470046, + "grad_norm": 0.9223254168257967, + "learning_rate": 1.186348321043904e-06, + "loss": 0.7516318559646606, + "step": 4066 + }, + { + "epoch": 0.9370967741935484, + "grad_norm": 1.0863969007807137, + "learning_rate": 1.1859739971109467e-06, + "loss": 0.8435031771659851, + "step": 4067 + }, + { + "epoch": 0.9373271889400921, + "grad_norm": 1.08570563607149, + "learning_rate": 1.1855996461841093e-06, + "loss": 0.8766932487487793, + "step": 4068 + }, + { + "epoch": 0.937557603686636, + "grad_norm": 1.2630999347152494, + "learning_rate": 1.1852252683177293e-06, + "loss": 0.8748513460159302, + "step": 4069 + }, + { + "epoch": 0.9377880184331797, + "grad_norm": 1.2689555695038703, + "learning_rate": 1.184850863566147e-06, + "loss": 0.8917855024337769, + "step": 4070 + }, + { + "epoch": 0.9380184331797236, + "grad_norm": 1.0628114663297852, + "learning_rate": 1.1844764319837064e-06, + "loss": 0.7631640434265137, + "step": 4071 + }, + { + "epoch": 0.9382488479262673, + "grad_norm": 1.0140155614547266, + "learning_rate": 1.1841019736247557e-06, + "loss": 0.8354158401489258, + "step": 4072 + }, + { + "epoch": 0.938479262672811, + "grad_norm": 0.8561335978546013, + "learning_rate": 1.1837274885436473e-06, + "loss": 0.8122761845588684, + "step": 4073 + }, + { + "epoch": 0.9387096774193548, + "grad_norm": 1.5776279194471237, + "learning_rate": 1.1833529767947374e-06, + "loss": 0.8281430006027222, + "step": 4074 + }, + { + "epoch": 0.9389400921658986, + "grad_norm": 1.3828203317822199, + "learning_rate": 1.1829784384323856e-06, + "loss": 0.8291982412338257, + "step": 4075 + }, + { + "epoch": 0.9391705069124424, + "grad_norm": 1.3096607265096822, + "learning_rate": 1.1826038735109553e-06, + "loss": 0.8951852321624756, + "step": 4076 + }, + { + "epoch": 0.9394009216589861, + "grad_norm": 1.2165058417213606, + "learning_rate": 1.182229282084815e-06, + "loss": 0.7006446123123169, + "step": 4077 + }, + { + "epoch": 0.93963133640553, + "grad_norm": 1.1269330295000342, + "learning_rate": 1.1818546642083353e-06, + "loss": 0.8944047689437866, + "step": 4078 + }, + { + "epoch": 0.9398617511520737, + "grad_norm": 0.9351299115123082, + "learning_rate": 1.1814800199358919e-06, + "loss": 0.8252646923065186, + "step": 4079 + }, + { + "epoch": 0.9400921658986175, + "grad_norm": 1.2255680666736817, + "learning_rate": 1.181105349321864e-06, + "loss": 0.7852828502655029, + "step": 4080 + }, + { + "epoch": 0.9403225806451613, + "grad_norm": 1.0734973037527151, + "learning_rate": 1.1807306524206347e-06, + "loss": 0.7758563160896301, + "step": 4081 + }, + { + "epoch": 0.9405529953917051, + "grad_norm": 1.0672387708424669, + "learning_rate": 1.1803559292865899e-06, + "loss": 0.7297114133834839, + "step": 4082 + }, + { + "epoch": 0.9407834101382488, + "grad_norm": 1.1802096748579922, + "learning_rate": 1.1799811799741209e-06, + "loss": 0.7974321842193604, + "step": 4083 + }, + { + "epoch": 0.9410138248847926, + "grad_norm": 1.2930194654348013, + "learning_rate": 1.179606404537622e-06, + "loss": 0.6406733989715576, + "step": 4084 + }, + { + "epoch": 0.9412442396313364, + "grad_norm": 0.9862268230007224, + "learning_rate": 1.179231603031491e-06, + "loss": 0.6925486326217651, + "step": 4085 + }, + { + "epoch": 0.9414746543778801, + "grad_norm": 0.9201295652583962, + "learning_rate": 1.17885677551013e-06, + "loss": 0.792647123336792, + "step": 4086 + }, + { + "epoch": 0.941705069124424, + "grad_norm": 1.0460531669846371, + "learning_rate": 1.1784819220279454e-06, + "loss": 0.7499191761016846, + "step": 4087 + }, + { + "epoch": 0.9419354838709677, + "grad_norm": 1.120763335726602, + "learning_rate": 1.1781070426393455e-06, + "loss": 0.8307451009750366, + "step": 4088 + }, + { + "epoch": 0.9421658986175115, + "grad_norm": 1.1015455973526673, + "learning_rate": 1.1777321373987445e-06, + "loss": 0.7859289646148682, + "step": 4089 + }, + { + "epoch": 0.9423963133640553, + "grad_norm": 1.0291702780651948, + "learning_rate": 1.177357206360559e-06, + "loss": 0.761134922504425, + "step": 4090 + }, + { + "epoch": 0.9426267281105991, + "grad_norm": 1.240188832472171, + "learning_rate": 1.1769822495792098e-06, + "loss": 0.8697078227996826, + "step": 4091 + }, + { + "epoch": 0.9428571428571428, + "grad_norm": 1.0395615260234665, + "learning_rate": 1.1766072671091212e-06, + "loss": 0.731541633605957, + "step": 4092 + }, + { + "epoch": 0.9430875576036867, + "grad_norm": 1.1056530512213054, + "learning_rate": 1.1762322590047219e-06, + "loss": 0.7501940727233887, + "step": 4093 + }, + { + "epoch": 0.9433179723502304, + "grad_norm": 1.1531150840189341, + "learning_rate": 1.1758572253204431e-06, + "loss": 0.9448602199554443, + "step": 4094 + }, + { + "epoch": 0.9435483870967742, + "grad_norm": 0.8884441593083074, + "learning_rate": 1.175482166110721e-06, + "loss": 0.7704026699066162, + "step": 4095 + }, + { + "epoch": 0.943778801843318, + "grad_norm": 0.8973060402184874, + "learning_rate": 1.1751070814299947e-06, + "loss": 0.7905057668685913, + "step": 4096 + }, + { + "epoch": 0.9440092165898617, + "grad_norm": 1.238350046583652, + "learning_rate": 1.1747319713327078e-06, + "loss": 0.8957202434539795, + "step": 4097 + }, + { + "epoch": 0.9442396313364055, + "grad_norm": 0.9896078596502195, + "learning_rate": 1.174356835873306e-06, + "loss": 0.7922521233558655, + "step": 4098 + }, + { + "epoch": 0.9444700460829493, + "grad_norm": 0.9974151293119675, + "learning_rate": 1.1739816751062404e-06, + "loss": 0.6501933336257935, + "step": 4099 + }, + { + "epoch": 0.9447004608294931, + "grad_norm": 0.9673699554437744, + "learning_rate": 1.1736064890859654e-06, + "loss": 0.6743361353874207, + "step": 4100 + }, + { + "epoch": 0.9449308755760368, + "grad_norm": 1.0381670362595088, + "learning_rate": 1.173231277866938e-06, + "loss": 0.920632004737854, + "step": 4101 + }, + { + "epoch": 0.9451612903225807, + "grad_norm": 0.872889135902432, + "learning_rate": 1.1728560415036199e-06, + "loss": 0.7498964071273804, + "step": 4102 + }, + { + "epoch": 0.9453917050691244, + "grad_norm": 0.8444235514312883, + "learning_rate": 1.1724807800504765e-06, + "loss": 0.7665064334869385, + "step": 4103 + }, + { + "epoch": 0.9456221198156682, + "grad_norm": 0.8729439782855682, + "learning_rate": 1.172105493561976e-06, + "loss": 0.75946044921875, + "step": 4104 + }, + { + "epoch": 0.945852534562212, + "grad_norm": 1.016811663523364, + "learning_rate": 1.1717301820925908e-06, + "loss": 0.7701961398124695, + "step": 4105 + }, + { + "epoch": 0.9460829493087558, + "grad_norm": 0.9708618505769702, + "learning_rate": 1.1713548456967974e-06, + "loss": 0.7775348424911499, + "step": 4106 + }, + { + "epoch": 0.9463133640552995, + "grad_norm": 0.8519325609053343, + "learning_rate": 1.1709794844290745e-06, + "loss": 0.8149436712265015, + "step": 4107 + }, + { + "epoch": 0.9465437788018434, + "grad_norm": 0.8519085263981432, + "learning_rate": 1.170604098343906e-06, + "loss": 0.7136009335517883, + "step": 4108 + }, + { + "epoch": 0.9467741935483871, + "grad_norm": 1.2048256186284507, + "learning_rate": 1.1702286874957786e-06, + "loss": 0.7678873538970947, + "step": 4109 + }, + { + "epoch": 0.9470046082949308, + "grad_norm": 0.9842223659547223, + "learning_rate": 1.1698532519391827e-06, + "loss": 0.7506710290908813, + "step": 4110 + }, + { + "epoch": 0.9472350230414747, + "grad_norm": 0.900893049038478, + "learning_rate": 1.1694777917286118e-06, + "loss": 0.6646897792816162, + "step": 4111 + }, + { + "epoch": 0.9474654377880184, + "grad_norm": 1.3857066059132386, + "learning_rate": 1.1691023069185639e-06, + "loss": 0.820647120475769, + "step": 4112 + }, + { + "epoch": 0.9476958525345622, + "grad_norm": 0.9795728799566645, + "learning_rate": 1.1687267975635402e-06, + "loss": 0.872378408908844, + "step": 4113 + }, + { + "epoch": 0.947926267281106, + "grad_norm": 1.0760361173899362, + "learning_rate": 1.168351263718045e-06, + "loss": 0.7920655608177185, + "step": 4114 + }, + { + "epoch": 0.9481566820276498, + "grad_norm": 1.1709025489256302, + "learning_rate": 1.1679757054365866e-06, + "loss": 0.6593836545944214, + "step": 4115 + }, + { + "epoch": 0.9483870967741935, + "grad_norm": 1.0965626572699905, + "learning_rate": 1.1676001227736772e-06, + "loss": 0.7473627328872681, + "step": 4116 + }, + { + "epoch": 0.9486175115207374, + "grad_norm": 1.2027339281506744, + "learning_rate": 1.1672245157838317e-06, + "loss": 0.8001665472984314, + "step": 4117 + }, + { + "epoch": 0.9488479262672811, + "grad_norm": 0.9543944768909415, + "learning_rate": 1.1668488845215689e-06, + "loss": 0.7342571020126343, + "step": 4118 + }, + { + "epoch": 0.9490783410138249, + "grad_norm": 1.2428163281726954, + "learning_rate": 1.1664732290414118e-06, + "loss": 0.7616822719573975, + "step": 4119 + }, + { + "epoch": 0.9493087557603687, + "grad_norm": 1.2486031522636918, + "learning_rate": 1.1660975493978857e-06, + "loss": 0.8885634541511536, + "step": 4120 + }, + { + "epoch": 0.9495391705069124, + "grad_norm": 1.1323168185847523, + "learning_rate": 1.1657218456455205e-06, + "loss": 0.7816281318664551, + "step": 4121 + }, + { + "epoch": 0.9497695852534562, + "grad_norm": 0.9570364600334796, + "learning_rate": 1.1653461178388485e-06, + "loss": 0.7412079572677612, + "step": 4122 + }, + { + "epoch": 0.95, + "grad_norm": 0.957883425985998, + "learning_rate": 1.1649703660324064e-06, + "loss": 0.8096172213554382, + "step": 4123 + }, + { + "epoch": 0.9502304147465438, + "grad_norm": 1.0359903594582591, + "learning_rate": 1.164594590280734e-06, + "loss": 0.6690856218338013, + "step": 4124 + }, + { + "epoch": 0.9504608294930875, + "grad_norm": 0.9697541149080181, + "learning_rate": 1.1642187906383746e-06, + "loss": 0.7509289979934692, + "step": 4125 + }, + { + "epoch": 0.9506912442396314, + "grad_norm": 0.8506285939807987, + "learning_rate": 1.1638429671598754e-06, + "loss": 0.6643730401992798, + "step": 4126 + }, + { + "epoch": 0.9509216589861751, + "grad_norm": 0.994475544194171, + "learning_rate": 1.1634671198997864e-06, + "loss": 0.8100850582122803, + "step": 4127 + }, + { + "epoch": 0.9511520737327189, + "grad_norm": 1.392121351288023, + "learning_rate": 1.1630912489126612e-06, + "loss": 0.919742226600647, + "step": 4128 + }, + { + "epoch": 0.9513824884792627, + "grad_norm": 1.144319413666889, + "learning_rate": 1.1627153542530571e-06, + "loss": 0.8953771591186523, + "step": 4129 + }, + { + "epoch": 0.9516129032258065, + "grad_norm": 0.9663802093818391, + "learning_rate": 1.162339435975535e-06, + "loss": 0.7401770949363708, + "step": 4130 + }, + { + "epoch": 0.9518433179723502, + "grad_norm": 1.0071840947097435, + "learning_rate": 1.1619634941346585e-06, + "loss": 0.7618032097816467, + "step": 4131 + }, + { + "epoch": 0.9520737327188941, + "grad_norm": 1.3156218418351784, + "learning_rate": 1.1615875287849955e-06, + "loss": 0.9134000539779663, + "step": 4132 + }, + { + "epoch": 0.9523041474654378, + "grad_norm": 0.9617492928251477, + "learning_rate": 1.1612115399811162e-06, + "loss": 0.7555145025253296, + "step": 4133 + }, + { + "epoch": 0.9525345622119815, + "grad_norm": 0.9434517704683025, + "learning_rate": 1.1608355277775955e-06, + "loss": 0.9125050902366638, + "step": 4134 + }, + { + "epoch": 0.9527649769585254, + "grad_norm": 0.9082549396493419, + "learning_rate": 1.1604594922290106e-06, + "loss": 0.6575542688369751, + "step": 4135 + }, + { + "epoch": 0.9529953917050691, + "grad_norm": 1.0750997369204898, + "learning_rate": 1.1600834333899431e-06, + "loss": 0.7530527114868164, + "step": 4136 + }, + { + "epoch": 0.9532258064516129, + "grad_norm": 0.9603596342147773, + "learning_rate": 1.159707351314977e-06, + "loss": 0.8818701505661011, + "step": 4137 + }, + { + "epoch": 0.9534562211981567, + "grad_norm": 0.9491169409805379, + "learning_rate": 1.1593312460587003e-06, + "loss": 0.7172919511795044, + "step": 4138 + }, + { + "epoch": 0.9536866359447005, + "grad_norm": 1.1122266085503043, + "learning_rate": 1.1589551176757044e-06, + "loss": 0.8701400756835938, + "step": 4139 + }, + { + "epoch": 0.9539170506912442, + "grad_norm": 1.3285866575691943, + "learning_rate": 1.1585789662205834e-06, + "loss": 0.867475152015686, + "step": 4140 + }, + { + "epoch": 0.9541474654377881, + "grad_norm": 1.1851362026267, + "learning_rate": 1.1582027917479356e-06, + "loss": 0.7809052467346191, + "step": 4141 + }, + { + "epoch": 0.9543778801843318, + "grad_norm": 1.1986202884801196, + "learning_rate": 1.1578265943123619e-06, + "loss": 0.8589099645614624, + "step": 4142 + }, + { + "epoch": 0.9546082949308756, + "grad_norm": 0.893566517908755, + "learning_rate": 1.157450373968467e-06, + "loss": 0.7826642394065857, + "step": 4143 + }, + { + "epoch": 0.9548387096774194, + "grad_norm": 1.3652425128856092, + "learning_rate": 1.1570741307708585e-06, + "loss": 0.9550029635429382, + "step": 4144 + }, + { + "epoch": 0.9550691244239631, + "grad_norm": 1.0826442844044148, + "learning_rate": 1.1566978647741478e-06, + "loss": 0.8607431650161743, + "step": 4145 + }, + { + "epoch": 0.9552995391705069, + "grad_norm": 0.8247649155112424, + "learning_rate": 1.15632157603295e-06, + "loss": 0.7350449562072754, + "step": 4146 + }, + { + "epoch": 0.9555299539170506, + "grad_norm": 1.033301557916291, + "learning_rate": 1.1559452646018818e-06, + "loss": 0.853142261505127, + "step": 4147 + }, + { + "epoch": 0.9557603686635945, + "grad_norm": 1.0495554531445934, + "learning_rate": 1.1555689305355651e-06, + "loss": 0.7137192487716675, + "step": 4148 + }, + { + "epoch": 0.9559907834101382, + "grad_norm": 1.158813208265862, + "learning_rate": 1.1551925738886244e-06, + "loss": 0.9007513523101807, + "step": 4149 + }, + { + "epoch": 0.956221198156682, + "grad_norm": 1.1071306366128357, + "learning_rate": 1.1548161947156867e-06, + "loss": 0.8499083518981934, + "step": 4150 + }, + { + "epoch": 0.9564516129032258, + "grad_norm": 0.874419574252059, + "learning_rate": 1.1544397930713836e-06, + "loss": 0.8068628311157227, + "step": 4151 + }, + { + "epoch": 0.9566820276497696, + "grad_norm": 1.1729788609256337, + "learning_rate": 1.1540633690103487e-06, + "loss": 0.8357307314872742, + "step": 4152 + }, + { + "epoch": 0.9569124423963133, + "grad_norm": 1.262397502444813, + "learning_rate": 1.1536869225872198e-06, + "loss": 0.7650378942489624, + "step": 4153 + }, + { + "epoch": 0.9571428571428572, + "grad_norm": 0.9933463317010283, + "learning_rate": 1.1533104538566376e-06, + "loss": 0.8717354536056519, + "step": 4154 + }, + { + "epoch": 0.9573732718894009, + "grad_norm": 0.9807638290234347, + "learning_rate": 1.152933962873246e-06, + "loss": 0.6314762830734253, + "step": 4155 + }, + { + "epoch": 0.9576036866359448, + "grad_norm": 1.1279705073097503, + "learning_rate": 1.152557449691692e-06, + "loss": 0.8949059844017029, + "step": 4156 + }, + { + "epoch": 0.9578341013824885, + "grad_norm": 1.137203803563717, + "learning_rate": 1.1521809143666261e-06, + "loss": 0.7862699031829834, + "step": 4157 + }, + { + "epoch": 0.9580645161290322, + "grad_norm": 0.8970512868442762, + "learning_rate": 1.151804356952702e-06, + "loss": 0.7954641580581665, + "step": 4158 + }, + { + "epoch": 0.958294930875576, + "grad_norm": 1.0478069911824797, + "learning_rate": 1.1514277775045766e-06, + "loss": 0.7654163241386414, + "step": 4159 + }, + { + "epoch": 0.9585253456221198, + "grad_norm": 1.0321973050954667, + "learning_rate": 1.1510511760769097e-06, + "loss": 0.7050681114196777, + "step": 4160 + }, + { + "epoch": 0.9587557603686636, + "grad_norm": 1.0667493196933242, + "learning_rate": 1.1506745527243646e-06, + "loss": 0.8646515607833862, + "step": 4161 + }, + { + "epoch": 0.9589861751152073, + "grad_norm": 0.9392654190881413, + "learning_rate": 1.1502979075016078e-06, + "loss": 0.7427883148193359, + "step": 4162 + }, + { + "epoch": 0.9592165898617512, + "grad_norm": 1.2506151155745373, + "learning_rate": 1.1499212404633083e-06, + "loss": 0.7800190448760986, + "step": 4163 + }, + { + "epoch": 0.9594470046082949, + "grad_norm": 1.0487739651932841, + "learning_rate": 1.1495445516641394e-06, + "loss": 0.789481520652771, + "step": 4164 + }, + { + "epoch": 0.9596774193548387, + "grad_norm": 0.8332785453272284, + "learning_rate": 1.1491678411587768e-06, + "loss": 0.7975008487701416, + "step": 4165 + }, + { + "epoch": 0.9599078341013825, + "grad_norm": 0.9306560917040928, + "learning_rate": 1.1487911090018994e-06, + "loss": 0.7964596748352051, + "step": 4166 + }, + { + "epoch": 0.9601382488479263, + "grad_norm": 0.8915843631095149, + "learning_rate": 1.1484143552481895e-06, + "loss": 0.7008803486824036, + "step": 4167 + }, + { + "epoch": 0.96036866359447, + "grad_norm": 0.888889684402262, + "learning_rate": 1.1480375799523328e-06, + "loss": 0.708189070224762, + "step": 4168 + }, + { + "epoch": 0.9605990783410139, + "grad_norm": 1.1069917813185677, + "learning_rate": 1.1476607831690167e-06, + "loss": 0.8207682371139526, + "step": 4169 + }, + { + "epoch": 0.9608294930875576, + "grad_norm": 1.200280235865814, + "learning_rate": 1.1472839649529337e-06, + "loss": 0.7682942152023315, + "step": 4170 + }, + { + "epoch": 0.9610599078341013, + "grad_norm": 1.0122999990692296, + "learning_rate": 1.1469071253587785e-06, + "loss": 0.8435598611831665, + "step": 4171 + }, + { + "epoch": 0.9612903225806452, + "grad_norm": 0.79536207500534, + "learning_rate": 1.1465302644412483e-06, + "loss": 0.7516113519668579, + "step": 4172 + }, + { + "epoch": 0.9615207373271889, + "grad_norm": 0.881539477347835, + "learning_rate": 1.1461533822550442e-06, + "loss": 0.7125411629676819, + "step": 4173 + }, + { + "epoch": 0.9617511520737327, + "grad_norm": 0.9108745928942158, + "learning_rate": 1.14577647885487e-06, + "loss": 0.7560747861862183, + "step": 4174 + }, + { + "epoch": 0.9619815668202765, + "grad_norm": 0.9027443230900505, + "learning_rate": 1.1453995542954332e-06, + "loss": 0.6702673435211182, + "step": 4175 + }, + { + "epoch": 0.9622119815668203, + "grad_norm": 1.1520258504461998, + "learning_rate": 1.1450226086314433e-06, + "loss": 0.8083088397979736, + "step": 4176 + }, + { + "epoch": 0.962442396313364, + "grad_norm": 0.9906259449003554, + "learning_rate": 1.1446456419176135e-06, + "loss": 0.7579925060272217, + "step": 4177 + }, + { + "epoch": 0.9626728110599079, + "grad_norm": 0.9460352601625827, + "learning_rate": 1.1442686542086609e-06, + "loss": 0.713416576385498, + "step": 4178 + }, + { + "epoch": 0.9629032258064516, + "grad_norm": 1.1770844867552515, + "learning_rate": 1.1438916455593035e-06, + "loss": 0.7767639756202698, + "step": 4179 + }, + { + "epoch": 0.9631336405529954, + "grad_norm": 1.0244180953454374, + "learning_rate": 1.1435146160242645e-06, + "loss": 0.7493964433670044, + "step": 4180 + }, + { + "epoch": 0.9633640552995392, + "grad_norm": 1.1249907720020325, + "learning_rate": 1.1431375656582692e-06, + "loss": 0.8789365291595459, + "step": 4181 + }, + { + "epoch": 0.9635944700460829, + "grad_norm": 1.177047767616621, + "learning_rate": 1.1427604945160457e-06, + "loss": 0.7750524878501892, + "step": 4182 + }, + { + "epoch": 0.9638248847926267, + "grad_norm": 1.1195166665130392, + "learning_rate": 1.142383402652325e-06, + "loss": 0.9330715537071228, + "step": 4183 + }, + { + "epoch": 0.9640552995391705, + "grad_norm": 0.933339002257347, + "learning_rate": 1.142006290121842e-06, + "loss": 0.6845035552978516, + "step": 4184 + }, + { + "epoch": 0.9642857142857143, + "grad_norm": 0.9794843601160967, + "learning_rate": 1.1416291569793343e-06, + "loss": 0.7295390963554382, + "step": 4185 + }, + { + "epoch": 0.964516129032258, + "grad_norm": 1.0666753158619988, + "learning_rate": 1.1412520032795419e-06, + "loss": 0.6869080066680908, + "step": 4186 + }, + { + "epoch": 0.9647465437788019, + "grad_norm": 1.506743316898968, + "learning_rate": 1.140874829077208e-06, + "loss": 1.0916842222213745, + "step": 4187 + }, + { + "epoch": 0.9649769585253456, + "grad_norm": 1.0539994363877199, + "learning_rate": 1.1404976344270793e-06, + "loss": 0.7487984299659729, + "step": 4188 + }, + { + "epoch": 0.9652073732718894, + "grad_norm": 1.024674697115665, + "learning_rate": 1.140120419383905e-06, + "loss": 0.8852604627609253, + "step": 4189 + }, + { + "epoch": 0.9654377880184332, + "grad_norm": 1.065174441144157, + "learning_rate": 1.139743184002437e-06, + "loss": 0.7384698987007141, + "step": 4190 + }, + { + "epoch": 0.965668202764977, + "grad_norm": 1.2009691028192717, + "learning_rate": 1.1393659283374312e-06, + "loss": 0.8033223152160645, + "step": 4191 + }, + { + "epoch": 0.9658986175115207, + "grad_norm": 1.2698866658546557, + "learning_rate": 1.1389886524436453e-06, + "loss": 0.8870355486869812, + "step": 4192 + }, + { + "epoch": 0.9661290322580646, + "grad_norm": 1.1198376045036553, + "learning_rate": 1.1386113563758405e-06, + "loss": 0.869537353515625, + "step": 4193 + }, + { + "epoch": 0.9663594470046083, + "grad_norm": 1.027781409519754, + "learning_rate": 1.1382340401887808e-06, + "loss": 0.8564068675041199, + "step": 4194 + }, + { + "epoch": 0.966589861751152, + "grad_norm": 0.9894593103049535, + "learning_rate": 1.1378567039372332e-06, + "loss": 0.7988623380661011, + "step": 4195 + }, + { + "epoch": 0.9668202764976959, + "grad_norm": 1.0843651981255995, + "learning_rate": 1.1374793476759673e-06, + "loss": 0.9405556917190552, + "step": 4196 + }, + { + "epoch": 0.9670506912442396, + "grad_norm": 0.8756334921680484, + "learning_rate": 1.137101971459756e-06, + "loss": 0.6757407188415527, + "step": 4197 + }, + { + "epoch": 0.9672811059907834, + "grad_norm": 1.1855730012050456, + "learning_rate": 1.1367245753433757e-06, + "loss": 0.7521541118621826, + "step": 4198 + }, + { + "epoch": 0.9675115207373272, + "grad_norm": 1.0137943151941313, + "learning_rate": 1.1363471593816037e-06, + "loss": 0.7306162714958191, + "step": 4199 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8912209844157076, + "learning_rate": 1.135969723629222e-06, + "loss": 0.6884766817092896, + "step": 4200 + }, + { + "epoch": 0.9679723502304147, + "grad_norm": 1.2084507323846643, + "learning_rate": 1.1355922681410152e-06, + "loss": 0.8420373201370239, + "step": 4201 + }, + { + "epoch": 0.9682027649769586, + "grad_norm": 0.7638761509020496, + "learning_rate": 1.1352147929717704e-06, + "loss": 0.7252322435379028, + "step": 4202 + }, + { + "epoch": 0.9684331797235023, + "grad_norm": 0.9448982669089191, + "learning_rate": 1.134837298176277e-06, + "loss": 0.6375538110733032, + "step": 4203 + }, + { + "epoch": 0.9686635944700461, + "grad_norm": 1.0629192948024473, + "learning_rate": 1.1344597838093283e-06, + "loss": 0.713671863079071, + "step": 4204 + }, + { + "epoch": 0.9688940092165899, + "grad_norm": 1.0319385361068514, + "learning_rate": 1.1340822499257201e-06, + "loss": 0.8591479063034058, + "step": 4205 + }, + { + "epoch": 0.9691244239631336, + "grad_norm": 1.0671754327237228, + "learning_rate": 1.1337046965802505e-06, + "loss": 0.7638808488845825, + "step": 4206 + }, + { + "epoch": 0.9693548387096774, + "grad_norm": 1.1032489557963816, + "learning_rate": 1.1333271238277215e-06, + "loss": 0.8133253455162048, + "step": 4207 + }, + { + "epoch": 0.9695852534562212, + "grad_norm": 0.9621754998556686, + "learning_rate": 1.132949531722937e-06, + "loss": 0.6938756704330444, + "step": 4208 + }, + { + "epoch": 0.969815668202765, + "grad_norm": 1.171557608199449, + "learning_rate": 1.132571920320704e-06, + "loss": 0.793639063835144, + "step": 4209 + }, + { + "epoch": 0.9700460829493087, + "grad_norm": 1.066219056403929, + "learning_rate": 1.132194289675832e-06, + "loss": 0.7188536524772644, + "step": 4210 + }, + { + "epoch": 0.9702764976958526, + "grad_norm": 1.2873690827507545, + "learning_rate": 1.1318166398431343e-06, + "loss": 0.8076587319374084, + "step": 4211 + }, + { + "epoch": 0.9705069124423963, + "grad_norm": 1.2434961707112964, + "learning_rate": 1.1314389708774258e-06, + "loss": 0.8390023708343506, + "step": 4212 + }, + { + "epoch": 0.9707373271889401, + "grad_norm": 1.2800250293744322, + "learning_rate": 1.1310612828335243e-06, + "loss": 0.8395706415176392, + "step": 4213 + }, + { + "epoch": 0.9709677419354839, + "grad_norm": 1.1156221851257155, + "learning_rate": 1.1306835757662515e-06, + "loss": 0.9672995805740356, + "step": 4214 + }, + { + "epoch": 0.9711981566820277, + "grad_norm": 1.1859433022618981, + "learning_rate": 1.1303058497304303e-06, + "loss": 0.7716202735900879, + "step": 4215 + }, + { + "epoch": 0.9714285714285714, + "grad_norm": 0.9257750691433206, + "learning_rate": 1.1299281047808876e-06, + "loss": 0.6318329572677612, + "step": 4216 + }, + { + "epoch": 0.9716589861751153, + "grad_norm": 1.1802189065520408, + "learning_rate": 1.1295503409724525e-06, + "loss": 0.8287553787231445, + "step": 4217 + }, + { + "epoch": 0.971889400921659, + "grad_norm": 0.835147088990129, + "learning_rate": 1.129172558359957e-06, + "loss": 0.6903107762336731, + "step": 4218 + }, + { + "epoch": 0.9721198156682027, + "grad_norm": 0.9693907793654548, + "learning_rate": 1.1287947569982355e-06, + "loss": 0.684443473815918, + "step": 4219 + }, + { + "epoch": 0.9723502304147466, + "grad_norm": 1.2152908203730401, + "learning_rate": 1.1284169369421254e-06, + "loss": 0.8566167950630188, + "step": 4220 + }, + { + "epoch": 0.9725806451612903, + "grad_norm": 1.0787740661687364, + "learning_rate": 1.1280390982464673e-06, + "loss": 0.8103536367416382, + "step": 4221 + }, + { + "epoch": 0.9728110599078341, + "grad_norm": 1.115333195517037, + "learning_rate": 1.1276612409661036e-06, + "loss": 0.8027071356773376, + "step": 4222 + }, + { + "epoch": 0.9730414746543778, + "grad_norm": 1.1442493875477038, + "learning_rate": 1.1272833651558796e-06, + "loss": 0.8251115679740906, + "step": 4223 + }, + { + "epoch": 0.9732718894009217, + "grad_norm": 1.1151561398542829, + "learning_rate": 1.1269054708706437e-06, + "loss": 0.6468047499656677, + "step": 4224 + }, + { + "epoch": 0.9735023041474654, + "grad_norm": 1.129830296326307, + "learning_rate": 1.1265275581652465e-06, + "loss": 0.8085706233978271, + "step": 4225 + }, + { + "epoch": 0.9737327188940093, + "grad_norm": 1.139574441171448, + "learning_rate": 1.1261496270945418e-06, + "loss": 0.8396503925323486, + "step": 4226 + }, + { + "epoch": 0.973963133640553, + "grad_norm": 0.9978900351940978, + "learning_rate": 1.1257716777133861e-06, + "loss": 0.7860006093978882, + "step": 4227 + }, + { + "epoch": 0.9741935483870968, + "grad_norm": 1.1484873689809545, + "learning_rate": 1.1253937100766373e-06, + "loss": 0.8630701303482056, + "step": 4228 + }, + { + "epoch": 0.9744239631336405, + "grad_norm": 0.9488769562872501, + "learning_rate": 1.1250157242391577e-06, + "loss": 0.8363114595413208, + "step": 4229 + }, + { + "epoch": 0.9746543778801844, + "grad_norm": 1.1415512207130691, + "learning_rate": 1.1246377202558114e-06, + "loss": 0.7837141156196594, + "step": 4230 + }, + { + "epoch": 0.9748847926267281, + "grad_norm": 1.3474534084840375, + "learning_rate": 1.1242596981814648e-06, + "loss": 0.8283151984214783, + "step": 4231 + }, + { + "epoch": 0.9751152073732718, + "grad_norm": 1.2728043293758005, + "learning_rate": 1.1238816580709878e-06, + "loss": 0.9232061505317688, + "step": 4232 + }, + { + "epoch": 0.9753456221198157, + "grad_norm": 1.125514954365521, + "learning_rate": 1.123503599979252e-06, + "loss": 0.8721164464950562, + "step": 4233 + }, + { + "epoch": 0.9755760368663594, + "grad_norm": 1.0382014546922784, + "learning_rate": 1.1231255239611321e-06, + "loss": 0.9398131370544434, + "step": 4234 + }, + { + "epoch": 0.9758064516129032, + "grad_norm": 1.0916134182788353, + "learning_rate": 1.1227474300715054e-06, + "loss": 0.8124324083328247, + "step": 4235 + }, + { + "epoch": 0.976036866359447, + "grad_norm": 0.8607187401974831, + "learning_rate": 1.1223693183652515e-06, + "loss": 0.8532534837722778, + "step": 4236 + }, + { + "epoch": 0.9762672811059908, + "grad_norm": 1.10871517745179, + "learning_rate": 1.1219911888972536e-06, + "loss": 0.7547662258148193, + "step": 4237 + }, + { + "epoch": 0.9764976958525345, + "grad_norm": 1.036940513326952, + "learning_rate": 1.1216130417223956e-06, + "loss": 0.7407231330871582, + "step": 4238 + }, + { + "epoch": 0.9767281105990784, + "grad_norm": 1.0573090435680337, + "learning_rate": 1.1212348768955657e-06, + "loss": 0.8190197944641113, + "step": 4239 + }, + { + "epoch": 0.9769585253456221, + "grad_norm": 1.111465926757279, + "learning_rate": 1.1208566944716542e-06, + "loss": 0.6641337871551514, + "step": 4240 + }, + { + "epoch": 0.977188940092166, + "grad_norm": 1.224342353107687, + "learning_rate": 1.120478494505553e-06, + "loss": 0.8953202962875366, + "step": 4241 + }, + { + "epoch": 0.9774193548387097, + "grad_norm": 0.9676272600083323, + "learning_rate": 1.1201002770521583e-06, + "loss": 0.7803191542625427, + "step": 4242 + }, + { + "epoch": 0.9776497695852534, + "grad_norm": 1.1107043139306134, + "learning_rate": 1.1197220421663674e-06, + "loss": 0.6827100515365601, + "step": 4243 + }, + { + "epoch": 0.9778801843317972, + "grad_norm": 1.2085442462659117, + "learning_rate": 1.1193437899030802e-06, + "loss": 0.8513565063476562, + "step": 4244 + }, + { + "epoch": 0.978110599078341, + "grad_norm": 0.9785496460004156, + "learning_rate": 1.1189655203172e-06, + "loss": 0.7196829915046692, + "step": 4245 + }, + { + "epoch": 0.9783410138248848, + "grad_norm": 1.0764048064511267, + "learning_rate": 1.1185872334636319e-06, + "loss": 0.7823485136032104, + "step": 4246 + }, + { + "epoch": 0.9785714285714285, + "grad_norm": 1.0963006166840967, + "learning_rate": 1.1182089293972841e-06, + "loss": 0.7178136110305786, + "step": 4247 + }, + { + "epoch": 0.9788018433179724, + "grad_norm": 1.0782886091125194, + "learning_rate": 1.1178306081730664e-06, + "loss": 0.7746715545654297, + "step": 4248 + }, + { + "epoch": 0.9790322580645161, + "grad_norm": 0.9177757629071243, + "learning_rate": 1.117452269845892e-06, + "loss": 0.8829167485237122, + "step": 4249 + }, + { + "epoch": 0.9792626728110599, + "grad_norm": 0.9096983569344097, + "learning_rate": 1.1170739144706764e-06, + "loss": 0.7592206001281738, + "step": 4250 + }, + { + "epoch": 0.9794930875576037, + "grad_norm": 0.8361017174057647, + "learning_rate": 1.1166955421023368e-06, + "loss": 0.8107382655143738, + "step": 4251 + }, + { + "epoch": 0.9797235023041475, + "grad_norm": 0.9837092835211146, + "learning_rate": 1.116317152795794e-06, + "loss": 0.6807001829147339, + "step": 4252 + }, + { + "epoch": 0.9799539170506912, + "grad_norm": 1.1872199804636603, + "learning_rate": 1.1159387466059705e-06, + "loss": 0.7752517461776733, + "step": 4253 + }, + { + "epoch": 0.9801843317972351, + "grad_norm": 0.8560133871531077, + "learning_rate": 1.115560323587791e-06, + "loss": 0.7484745383262634, + "step": 4254 + }, + { + "epoch": 0.9804147465437788, + "grad_norm": 1.153488759551228, + "learning_rate": 1.1151818837961838e-06, + "loss": 0.877413809299469, + "step": 4255 + }, + { + "epoch": 0.9806451612903225, + "grad_norm": 1.0087457568089837, + "learning_rate": 1.1148034272860785e-06, + "loss": 0.7806656360626221, + "step": 4256 + }, + { + "epoch": 0.9808755760368664, + "grad_norm": 0.849135201735791, + "learning_rate": 1.1144249541124078e-06, + "loss": 0.6938076019287109, + "step": 4257 + }, + { + "epoch": 0.9811059907834101, + "grad_norm": 1.0559339187336096, + "learning_rate": 1.1140464643301064e-06, + "loss": 0.8832957148551941, + "step": 4258 + }, + { + "epoch": 0.9813364055299539, + "grad_norm": 1.1632523287766907, + "learning_rate": 1.1136679579941117e-06, + "loss": 0.7794016003608704, + "step": 4259 + }, + { + "epoch": 0.9815668202764977, + "grad_norm": 0.9689102084269609, + "learning_rate": 1.1132894351593636e-06, + "loss": 0.6877585053443909, + "step": 4260 + }, + { + "epoch": 0.9817972350230415, + "grad_norm": 1.0902109747190951, + "learning_rate": 1.1129108958808037e-06, + "loss": 0.8268473148345947, + "step": 4261 + }, + { + "epoch": 0.9820276497695852, + "grad_norm": 1.0260596307079526, + "learning_rate": 1.112532340213377e-06, + "loss": 0.6717547178268433, + "step": 4262 + }, + { + "epoch": 0.9822580645161291, + "grad_norm": 1.0646130416760407, + "learning_rate": 1.11215376821203e-06, + "loss": 0.849999725818634, + "step": 4263 + }, + { + "epoch": 0.9824884792626728, + "grad_norm": 1.005034332417578, + "learning_rate": 1.1117751799317118e-06, + "loss": 0.6562552452087402, + "step": 4264 + }, + { + "epoch": 0.9827188940092166, + "grad_norm": 1.0885536317886024, + "learning_rate": 1.1113965754273743e-06, + "loss": 0.7734784483909607, + "step": 4265 + }, + { + "epoch": 0.9829493087557604, + "grad_norm": 1.0527283904271951, + "learning_rate": 1.1110179547539717e-06, + "loss": 0.7580564022064209, + "step": 4266 + }, + { + "epoch": 0.9831797235023041, + "grad_norm": 1.121984331535499, + "learning_rate": 1.1106393179664595e-06, + "loss": 0.9207481145858765, + "step": 4267 + }, + { + "epoch": 0.9834101382488479, + "grad_norm": 1.1182241685665208, + "learning_rate": 1.1102606651197968e-06, + "loss": 0.8987482786178589, + "step": 4268 + }, + { + "epoch": 0.9836405529953917, + "grad_norm": 0.8558732255272679, + "learning_rate": 1.1098819962689445e-06, + "loss": 0.7486778497695923, + "step": 4269 + }, + { + "epoch": 0.9838709677419355, + "grad_norm": 0.9905311956335509, + "learning_rate": 1.1095033114688662e-06, + "loss": 0.7387109994888306, + "step": 4270 + }, + { + "epoch": 0.9841013824884792, + "grad_norm": 0.913366940312768, + "learning_rate": 1.109124610774527e-06, + "loss": 0.7337637543678284, + "step": 4271 + }, + { + "epoch": 0.9843317972350231, + "grad_norm": 1.1127819698251733, + "learning_rate": 1.1087458942408952e-06, + "loss": 0.7419463396072388, + "step": 4272 + }, + { + "epoch": 0.9845622119815668, + "grad_norm": 1.0024132905496845, + "learning_rate": 1.1083671619229407e-06, + "loss": 0.7525068521499634, + "step": 4273 + }, + { + "epoch": 0.9847926267281106, + "grad_norm": 1.2794306882440036, + "learning_rate": 1.107988413875636e-06, + "loss": 0.8593931198120117, + "step": 4274 + }, + { + "epoch": 0.9850230414746544, + "grad_norm": 1.1058497522784536, + "learning_rate": 1.107609650153956e-06, + "loss": 0.9123519659042358, + "step": 4275 + }, + { + "epoch": 0.9852534562211982, + "grad_norm": 1.0134863035075283, + "learning_rate": 1.107230870812878e-06, + "loss": 0.7099615335464478, + "step": 4276 + }, + { + "epoch": 0.9854838709677419, + "grad_norm": 1.0305482113277953, + "learning_rate": 1.1068520759073807e-06, + "loss": 0.9525141716003418, + "step": 4277 + }, + { + "epoch": 0.9857142857142858, + "grad_norm": 1.078520213597711, + "learning_rate": 1.106473265492446e-06, + "loss": 0.8360154628753662, + "step": 4278 + }, + { + "epoch": 0.9859447004608295, + "grad_norm": 0.835665323629814, + "learning_rate": 1.106094439623058e-06, + "loss": 0.7788960933685303, + "step": 4279 + }, + { + "epoch": 0.9861751152073732, + "grad_norm": 1.4332707697001132, + "learning_rate": 1.1057155983542024e-06, + "loss": 0.76897132396698, + "step": 4280 + }, + { + "epoch": 0.9864055299539171, + "grad_norm": 1.2788839563876278, + "learning_rate": 1.1053367417408678e-06, + "loss": 0.8062764406204224, + "step": 4281 + }, + { + "epoch": 0.9866359447004608, + "grad_norm": 1.0759322336892816, + "learning_rate": 1.1049578698380446e-06, + "loss": 0.6796555519104004, + "step": 4282 + }, + { + "epoch": 0.9868663594470046, + "grad_norm": 1.2156156083740777, + "learning_rate": 1.1045789827007256e-06, + "loss": 0.8495693206787109, + "step": 4283 + }, + { + "epoch": 0.9870967741935484, + "grad_norm": 1.1065961656311563, + "learning_rate": 1.1042000803839054e-06, + "loss": 0.9202588200569153, + "step": 4284 + }, + { + "epoch": 0.9873271889400922, + "grad_norm": 1.0492103887070696, + "learning_rate": 1.1038211629425815e-06, + "loss": 0.8204039335250854, + "step": 4285 + }, + { + "epoch": 0.9875576036866359, + "grad_norm": 1.3424135227199923, + "learning_rate": 1.1034422304317534e-06, + "loss": 0.921082615852356, + "step": 4286 + }, + { + "epoch": 0.9877880184331798, + "grad_norm": 1.1158968493314756, + "learning_rate": 1.1030632829064225e-06, + "loss": 0.8114739656448364, + "step": 4287 + }, + { + "epoch": 0.9880184331797235, + "grad_norm": 1.160400130956272, + "learning_rate": 1.1026843204215924e-06, + "loss": 0.7394933700561523, + "step": 4288 + }, + { + "epoch": 0.9882488479262673, + "grad_norm": 1.102093260654992, + "learning_rate": 1.1023053430322692e-06, + "loss": 0.9515210390090942, + "step": 4289 + }, + { + "epoch": 0.988479262672811, + "grad_norm": 1.0914130901392678, + "learning_rate": 1.1019263507934611e-06, + "loss": 0.6729186773300171, + "step": 4290 + }, + { + "epoch": 0.9887096774193549, + "grad_norm": 0.9547635126100301, + "learning_rate": 1.1015473437601776e-06, + "loss": 0.6455283164978027, + "step": 4291 + }, + { + "epoch": 0.9889400921658986, + "grad_norm": 1.1259220869244864, + "learning_rate": 1.1011683219874322e-06, + "loss": 0.8071424961090088, + "step": 4292 + }, + { + "epoch": 0.9891705069124423, + "grad_norm": 0.8980294635582122, + "learning_rate": 1.1007892855302385e-06, + "loss": 0.7287160754203796, + "step": 4293 + }, + { + "epoch": 0.9894009216589862, + "grad_norm": 0.956104694967055, + "learning_rate": 1.1004102344436135e-06, + "loss": 0.7916513681411743, + "step": 4294 + }, + { + "epoch": 0.9896313364055299, + "grad_norm": 0.948939194234829, + "learning_rate": 1.1000311687825757e-06, + "loss": 0.8075610399246216, + "step": 4295 + }, + { + "epoch": 0.9898617511520738, + "grad_norm": 0.8467724433306772, + "learning_rate": 1.0996520886021465e-06, + "loss": 0.6144437193870544, + "step": 4296 + }, + { + "epoch": 0.9900921658986175, + "grad_norm": 1.1816936561057356, + "learning_rate": 1.0992729939573482e-06, + "loss": 0.830337643623352, + "step": 4297 + }, + { + "epoch": 0.9903225806451613, + "grad_norm": 1.1631921516982922, + "learning_rate": 1.0988938849032063e-06, + "loss": 0.7104393243789673, + "step": 4298 + }, + { + "epoch": 0.990552995391705, + "grad_norm": 1.0166827801425276, + "learning_rate": 1.0985147614947484e-06, + "loss": 0.746238112449646, + "step": 4299 + }, + { + "epoch": 0.9907834101382489, + "grad_norm": 0.8744941548736713, + "learning_rate": 1.0981356237870027e-06, + "loss": 0.7309597730636597, + "step": 4300 + }, + { + "epoch": 0.9910138248847926, + "grad_norm": 1.1787483382236952, + "learning_rate": 1.0977564718350013e-06, + "loss": 0.799136757850647, + "step": 4301 + }, + { + "epoch": 0.9912442396313365, + "grad_norm": 1.146252036070138, + "learning_rate": 1.0973773056937776e-06, + "loss": 0.7477747201919556, + "step": 4302 + }, + { + "epoch": 0.9914746543778802, + "grad_norm": 1.1466743668258872, + "learning_rate": 1.0969981254183668e-06, + "loss": 0.8051053285598755, + "step": 4303 + }, + { + "epoch": 0.9917050691244239, + "grad_norm": 0.9910519080633017, + "learning_rate": 1.0966189310638063e-06, + "loss": 0.8023163080215454, + "step": 4304 + }, + { + "epoch": 0.9919354838709677, + "grad_norm": 0.9483313078672773, + "learning_rate": 1.096239722685136e-06, + "loss": 0.6804348230361938, + "step": 4305 + }, + { + "epoch": 0.9921658986175115, + "grad_norm": 1.119857177527024, + "learning_rate": 1.0958605003373976e-06, + "loss": 0.8276509046554565, + "step": 4306 + }, + { + "epoch": 0.9923963133640553, + "grad_norm": 1.2511674827094457, + "learning_rate": 1.095481264075634e-06, + "loss": 0.9733830690383911, + "step": 4307 + }, + { + "epoch": 0.992626728110599, + "grad_norm": 1.070745120202566, + "learning_rate": 1.0951020139548917e-06, + "loss": 0.824803352355957, + "step": 4308 + }, + { + "epoch": 0.9928571428571429, + "grad_norm": 1.100108017822232, + "learning_rate": 1.094722750030218e-06, + "loss": 0.8144090175628662, + "step": 4309 + }, + { + "epoch": 0.9930875576036866, + "grad_norm": 1.1329325704330306, + "learning_rate": 1.0943434723566623e-06, + "loss": 0.8394016027450562, + "step": 4310 + }, + { + "epoch": 0.9933179723502304, + "grad_norm": 1.0464489724076296, + "learning_rate": 1.0939641809892766e-06, + "loss": 0.7688177824020386, + "step": 4311 + }, + { + "epoch": 0.9935483870967742, + "grad_norm": 1.0599291427198123, + "learning_rate": 1.0935848759831144e-06, + "loss": 0.8157391548156738, + "step": 4312 + }, + { + "epoch": 0.993778801843318, + "grad_norm": 1.0072726544693649, + "learning_rate": 1.0932055573932316e-06, + "loss": 0.7618423700332642, + "step": 4313 + }, + { + "epoch": 0.9940092165898617, + "grad_norm": 0.8996295977906229, + "learning_rate": 1.0928262252746848e-06, + "loss": 0.7404567003250122, + "step": 4314 + }, + { + "epoch": 0.9942396313364056, + "grad_norm": 0.8729845318677907, + "learning_rate": 1.092446879682535e-06, + "loss": 0.6825613975524902, + "step": 4315 + }, + { + "epoch": 0.9944700460829493, + "grad_norm": 0.886318283085954, + "learning_rate": 1.0920675206718428e-06, + "loss": 0.6607732772827148, + "step": 4316 + }, + { + "epoch": 0.994700460829493, + "grad_norm": 1.1703494407740602, + "learning_rate": 1.0916881482976716e-06, + "loss": 0.715195894241333, + "step": 4317 + }, + { + "epoch": 0.9949308755760369, + "grad_norm": 1.0266525014281969, + "learning_rate": 1.0913087626150872e-06, + "loss": 0.7593914270401001, + "step": 4318 + }, + { + "epoch": 0.9951612903225806, + "grad_norm": 0.9546142286310197, + "learning_rate": 1.090929363679157e-06, + "loss": 0.8368399143218994, + "step": 4319 + }, + { + "epoch": 0.9953917050691244, + "grad_norm": 1.0080836713071024, + "learning_rate": 1.0905499515449499e-06, + "loss": 0.7799170613288879, + "step": 4320 + }, + { + "epoch": 0.9956221198156682, + "grad_norm": 1.0450181436512773, + "learning_rate": 1.0901705262675372e-06, + "loss": 0.8194636702537537, + "step": 4321 + }, + { + "epoch": 0.995852534562212, + "grad_norm": 0.7482572391575254, + "learning_rate": 1.0897910879019917e-06, + "loss": 0.7150344848632812, + "step": 4322 + }, + { + "epoch": 0.9960829493087557, + "grad_norm": 1.0624528328831144, + "learning_rate": 1.089411636503389e-06, + "loss": 0.737568736076355, + "step": 4323 + }, + { + "epoch": 0.9963133640552996, + "grad_norm": 0.9578129661977193, + "learning_rate": 1.0890321721268056e-06, + "loss": 0.7037359476089478, + "step": 4324 + }, + { + "epoch": 0.9965437788018433, + "grad_norm": 1.1660806477651886, + "learning_rate": 1.0886526948273206e-06, + "loss": 0.7664542198181152, + "step": 4325 + }, + { + "epoch": 0.9967741935483871, + "grad_norm": 1.1927624722703807, + "learning_rate": 1.0882732046600138e-06, + "loss": 0.7700943946838379, + "step": 4326 + }, + { + "epoch": 0.9970046082949309, + "grad_norm": 0.9828460552540413, + "learning_rate": 1.0878937016799683e-06, + "loss": 0.7634885311126709, + "step": 4327 + }, + { + "epoch": 0.9972350230414746, + "grad_norm": 0.9138031795649807, + "learning_rate": 1.0875141859422685e-06, + "loss": 0.6784960031509399, + "step": 4328 + }, + { + "epoch": 0.9974654377880184, + "grad_norm": 0.9227707667287056, + "learning_rate": 1.0871346575020002e-06, + "loss": 0.7224948406219482, + "step": 4329 + }, + { + "epoch": 0.9976958525345622, + "grad_norm": 1.140456315375248, + "learning_rate": 1.086755116414252e-06, + "loss": 0.7886664867401123, + "step": 4330 + }, + { + "epoch": 0.997926267281106, + "grad_norm": 0.8735584486255558, + "learning_rate": 1.0863755627341133e-06, + "loss": 0.7871295809745789, + "step": 4331 + }, + { + "epoch": 0.9981566820276497, + "grad_norm": 0.9703663985745814, + "learning_rate": 1.085995996516676e-06, + "loss": 0.700717568397522, + "step": 4332 + }, + { + "epoch": 0.9983870967741936, + "grad_norm": 1.0137806073331785, + "learning_rate": 1.085616417817034e-06, + "loss": 0.9090461730957031, + "step": 4333 + }, + { + "epoch": 0.9986175115207373, + "grad_norm": 0.8161279565195018, + "learning_rate": 1.0852368266902818e-06, + "loss": 0.7697109580039978, + "step": 4334 + }, + { + "epoch": 0.9988479262672811, + "grad_norm": 1.1335275167371797, + "learning_rate": 1.0848572231915177e-06, + "loss": 0.8135972023010254, + "step": 4335 + }, + { + "epoch": 0.9990783410138249, + "grad_norm": 0.9620227504979613, + "learning_rate": 1.0844776073758392e-06, + "loss": 0.803811252117157, + "step": 4336 + }, + { + "epoch": 0.9993087557603687, + "grad_norm": 1.1159399325844028, + "learning_rate": 1.0840979792983482e-06, + "loss": 0.874006986618042, + "step": 4337 + }, + { + "epoch": 0.9995391705069124, + "grad_norm": 1.0695664725891423, + "learning_rate": 1.0837183390141472e-06, + "loss": 0.7424730062484741, + "step": 4338 + }, + { + "epoch": 0.9997695852534563, + "grad_norm": 1.0413618177070603, + "learning_rate": 1.0833386865783393e-06, + "loss": 0.8219665884971619, + "step": 4339 + }, + { + "epoch": 1.0, + "grad_norm": 1.2200287736254531, + "learning_rate": 1.0829590220460319e-06, + "loss": 0.7065195441246033, + "step": 4340 + }, + { + "epoch": 1.0002304147465437, + "grad_norm": 1.4255251627812264, + "learning_rate": 1.0825793454723324e-06, + "loss": 0.7988346219062805, + "step": 4341 + }, + { + "epoch": 1.0004608294930875, + "grad_norm": 0.9544404961531333, + "learning_rate": 1.08219965691235e-06, + "loss": 0.6731617450714111, + "step": 4342 + }, + { + "epoch": 1.0006912442396314, + "grad_norm": 1.0713203032897287, + "learning_rate": 1.0818199564211964e-06, + "loss": 0.8058687448501587, + "step": 4343 + }, + { + "epoch": 1.0009216589861751, + "grad_norm": 1.2330384736552804, + "learning_rate": 1.081440244053984e-06, + "loss": 0.8351448178291321, + "step": 4344 + }, + { + "epoch": 1.0011520737327189, + "grad_norm": 0.9578484310628987, + "learning_rate": 1.0810605198658286e-06, + "loss": 0.8619185090065002, + "step": 4345 + }, + { + "epoch": 1.0013824884792626, + "grad_norm": 1.030004028036847, + "learning_rate": 1.0806807839118455e-06, + "loss": 0.7600966691970825, + "step": 4346 + }, + { + "epoch": 1.0016129032258065, + "grad_norm": 1.103182000242006, + "learning_rate": 1.0803010362471536e-06, + "loss": 0.8123422265052795, + "step": 4347 + }, + { + "epoch": 1.0018433179723503, + "grad_norm": 1.0359331933938025, + "learning_rate": 1.0799212769268727e-06, + "loss": 0.8277603983879089, + "step": 4348 + }, + { + "epoch": 1.002073732718894, + "grad_norm": 0.7466130076646643, + "learning_rate": 1.079541506006124e-06, + "loss": 0.6666774153709412, + "step": 4349 + }, + { + "epoch": 1.0023041474654377, + "grad_norm": 1.0582236596847403, + "learning_rate": 1.0791617235400313e-06, + "loss": 0.8483254909515381, + "step": 4350 + }, + { + "epoch": 1.0025345622119817, + "grad_norm": 0.9094409000603249, + "learning_rate": 1.0787819295837193e-06, + "loss": 0.6585661172866821, + "step": 4351 + }, + { + "epoch": 1.0027649769585254, + "grad_norm": 1.0274936512349702, + "learning_rate": 1.0784021241923142e-06, + "loss": 0.7591124773025513, + "step": 4352 + }, + { + "epoch": 1.0029953917050691, + "grad_norm": 1.0201165998262116, + "learning_rate": 1.078022307420945e-06, + "loss": 0.7305805683135986, + "step": 4353 + }, + { + "epoch": 1.0032258064516129, + "grad_norm": 0.8894858318623733, + "learning_rate": 1.0776424793247407e-06, + "loss": 0.6558996438980103, + "step": 4354 + }, + { + "epoch": 1.0034562211981566, + "grad_norm": 1.313034349644303, + "learning_rate": 1.0772626399588336e-06, + "loss": 0.6837360262870789, + "step": 4355 + }, + { + "epoch": 1.0036866359447005, + "grad_norm": 0.9187212026563307, + "learning_rate": 1.0768827893783562e-06, + "loss": 0.778124988079071, + "step": 4356 + }, + { + "epoch": 1.0039170506912443, + "grad_norm": 1.0828207561971888, + "learning_rate": 1.0765029276384438e-06, + "loss": 0.7676408886909485, + "step": 4357 + }, + { + "epoch": 1.004147465437788, + "grad_norm": 1.1604376015370672, + "learning_rate": 1.0761230547942333e-06, + "loss": 0.854246973991394, + "step": 4358 + }, + { + "epoch": 1.0043778801843317, + "grad_norm": 0.9177073619188721, + "learning_rate": 1.0757431709008615e-06, + "loss": 0.716766893863678, + "step": 4359 + }, + { + "epoch": 1.0046082949308757, + "grad_norm": 0.9439720321299626, + "learning_rate": 1.075363276013469e-06, + "loss": 0.6827799081802368, + "step": 4360 + }, + { + "epoch": 1.0048387096774194, + "grad_norm": 0.9539231430903122, + "learning_rate": 1.074983370187197e-06, + "loss": 0.7977348566055298, + "step": 4361 + }, + { + "epoch": 1.0050691244239631, + "grad_norm": 1.1227456227969494, + "learning_rate": 1.0746034534771878e-06, + "loss": 0.6958035826683044, + "step": 4362 + }, + { + "epoch": 1.0052995391705069, + "grad_norm": 0.9288361874867539, + "learning_rate": 1.0742235259385861e-06, + "loss": 0.8407979607582092, + "step": 4363 + }, + { + "epoch": 1.0055299539170508, + "grad_norm": 0.8466973629768922, + "learning_rate": 1.073843587626538e-06, + "loss": 0.8180495500564575, + "step": 4364 + }, + { + "epoch": 1.0057603686635945, + "grad_norm": 0.9973113541484702, + "learning_rate": 1.0734636385961907e-06, + "loss": 0.7551306486129761, + "step": 4365 + }, + { + "epoch": 1.0059907834101383, + "grad_norm": 1.1054013447474482, + "learning_rate": 1.0730836789026936e-06, + "loss": 0.6598455309867859, + "step": 4366 + }, + { + "epoch": 1.006221198156682, + "grad_norm": 0.9578758202335947, + "learning_rate": 1.0727037086011971e-06, + "loss": 0.9186126589775085, + "step": 4367 + }, + { + "epoch": 1.0064516129032257, + "grad_norm": 1.0208878451508383, + "learning_rate": 1.0723237277468538e-06, + "loss": 0.8491259813308716, + "step": 4368 + }, + { + "epoch": 1.0066820276497697, + "grad_norm": 1.0678483382751343, + "learning_rate": 1.071943736394817e-06, + "loss": 0.6938691139221191, + "step": 4369 + }, + { + "epoch": 1.0069124423963134, + "grad_norm": 1.1084737690479445, + "learning_rate": 1.0715637346002423e-06, + "loss": 0.801313579082489, + "step": 4370 + }, + { + "epoch": 1.0071428571428571, + "grad_norm": 0.983698557868892, + "learning_rate": 1.071183722418286e-06, + "loss": 0.7663706541061401, + "step": 4371 + }, + { + "epoch": 1.0073732718894008, + "grad_norm": 0.8508185045615759, + "learning_rate": 1.070803699904107e-06, + "loss": 0.7434467077255249, + "step": 4372 + }, + { + "epoch": 1.0076036866359448, + "grad_norm": 1.331303605136832, + "learning_rate": 1.0704236671128643e-06, + "loss": 0.8366774320602417, + "step": 4373 + }, + { + "epoch": 1.0078341013824885, + "grad_norm": 1.276875198714222, + "learning_rate": 1.07004362409972e-06, + "loss": 0.7027710676193237, + "step": 4374 + }, + { + "epoch": 1.0080645161290323, + "grad_norm": 1.1122995966371962, + "learning_rate": 1.0696635709198357e-06, + "loss": 0.7965548038482666, + "step": 4375 + }, + { + "epoch": 1.008294930875576, + "grad_norm": 1.0387807228424288, + "learning_rate": 1.0692835076283768e-06, + "loss": 0.8058432340621948, + "step": 4376 + }, + { + "epoch": 1.0085253456221197, + "grad_norm": 1.1870264013217662, + "learning_rate": 1.0689034342805085e-06, + "loss": 0.9056248068809509, + "step": 4377 + }, + { + "epoch": 1.0087557603686637, + "grad_norm": 1.0069765876574615, + "learning_rate": 1.0685233509313979e-06, + "loss": 0.8407673835754395, + "step": 4378 + }, + { + "epoch": 1.0089861751152074, + "grad_norm": 1.3133023777292065, + "learning_rate": 1.0681432576362133e-06, + "loss": 0.9138794541358948, + "step": 4379 + }, + { + "epoch": 1.0092165898617511, + "grad_norm": 1.3361237624577444, + "learning_rate": 1.067763154450125e-06, + "loss": 0.6640630960464478, + "step": 4380 + }, + { + "epoch": 1.0094470046082948, + "grad_norm": 1.4646712113013267, + "learning_rate": 1.0673830414283051e-06, + "loss": 0.9387146234512329, + "step": 4381 + }, + { + "epoch": 1.0096774193548388, + "grad_norm": 1.0228212242769696, + "learning_rate": 1.067002918625926e-06, + "loss": 0.7288271188735962, + "step": 4382 + }, + { + "epoch": 1.0099078341013825, + "grad_norm": 1.1693551967727813, + "learning_rate": 1.0666227860981613e-06, + "loss": 0.7886035442352295, + "step": 4383 + }, + { + "epoch": 1.0101382488479262, + "grad_norm": 1.056596025284508, + "learning_rate": 1.066242643900188e-06, + "loss": 0.6929852962493896, + "step": 4384 + }, + { + "epoch": 1.01036866359447, + "grad_norm": 0.9057033157053335, + "learning_rate": 1.065862492087182e-06, + "loss": 0.7709990739822388, + "step": 4385 + }, + { + "epoch": 1.010599078341014, + "grad_norm": 1.0362803754904506, + "learning_rate": 1.065482330714323e-06, + "loss": 0.811382532119751, + "step": 4386 + }, + { + "epoch": 1.0108294930875577, + "grad_norm": 1.2204693151649666, + "learning_rate": 1.0651021598367905e-06, + "loss": 0.8274353742599487, + "step": 4387 + }, + { + "epoch": 1.0110599078341014, + "grad_norm": 0.9995911348883496, + "learning_rate": 1.0647219795097651e-06, + "loss": 0.7449204921722412, + "step": 4388 + }, + { + "epoch": 1.011290322580645, + "grad_norm": 0.906861932756066, + "learning_rate": 1.0643417897884303e-06, + "loss": 0.675945520401001, + "step": 4389 + }, + { + "epoch": 1.0115207373271888, + "grad_norm": 1.183632210098949, + "learning_rate": 1.06396159072797e-06, + "loss": 0.7329400777816772, + "step": 4390 + }, + { + "epoch": 1.0117511520737328, + "grad_norm": 0.9566645616399831, + "learning_rate": 1.0635813823835692e-06, + "loss": 0.7809139490127563, + "step": 4391 + }, + { + "epoch": 1.0119815668202765, + "grad_norm": 1.0167427862718812, + "learning_rate": 1.0632011648104155e-06, + "loss": 0.799081563949585, + "step": 4392 + }, + { + "epoch": 1.0122119815668202, + "grad_norm": 1.0484890321007356, + "learning_rate": 1.062820938063696e-06, + "loss": 0.7738279104232788, + "step": 4393 + }, + { + "epoch": 1.012442396313364, + "grad_norm": 0.9791695127555486, + "learning_rate": 1.0624407021986007e-06, + "loss": 0.895797610282898, + "step": 4394 + }, + { + "epoch": 1.012672811059908, + "grad_norm": 0.9476041908693101, + "learning_rate": 1.0620604572703198e-06, + "loss": 0.6887848973274231, + "step": 4395 + }, + { + "epoch": 1.0129032258064516, + "grad_norm": 1.0915270783702586, + "learning_rate": 1.0616802033340457e-06, + "loss": 0.9540888071060181, + "step": 4396 + }, + { + "epoch": 1.0131336405529954, + "grad_norm": 1.3368596619746418, + "learning_rate": 1.0612999404449721e-06, + "loss": 0.9047783017158508, + "step": 4397 + }, + { + "epoch": 1.013364055299539, + "grad_norm": 0.924946076870977, + "learning_rate": 1.0609196686582931e-06, + "loss": 0.7030448913574219, + "step": 4398 + }, + { + "epoch": 1.013594470046083, + "grad_norm": 0.9501232585433265, + "learning_rate": 1.0605393880292046e-06, + "loss": 0.8097348213195801, + "step": 4399 + }, + { + "epoch": 1.0138248847926268, + "grad_norm": 1.0163791343408108, + "learning_rate": 1.0601590986129045e-06, + "loss": 0.7446185350418091, + "step": 4400 + }, + { + "epoch": 1.0140552995391705, + "grad_norm": 1.0548185515811, + "learning_rate": 1.0597788004645908e-06, + "loss": 0.7450964450836182, + "step": 4401 + }, + { + "epoch": 1.0142857142857142, + "grad_norm": 1.1891450532947472, + "learning_rate": 1.0593984936394632e-06, + "loss": 0.8326355218887329, + "step": 4402 + }, + { + "epoch": 1.014516129032258, + "grad_norm": 1.0194370020803867, + "learning_rate": 1.0590181781927227e-06, + "loss": 0.7013953924179077, + "step": 4403 + }, + { + "epoch": 1.014746543778802, + "grad_norm": 1.2634402455639506, + "learning_rate": 1.0586378541795723e-06, + "loss": 0.7806364297866821, + "step": 4404 + }, + { + "epoch": 1.0149769585253456, + "grad_norm": 1.2061797737844093, + "learning_rate": 1.0582575216552146e-06, + "loss": 0.8207389116287231, + "step": 4405 + }, + { + "epoch": 1.0152073732718894, + "grad_norm": 1.123863770924685, + "learning_rate": 1.0578771806748545e-06, + "loss": 0.8042873740196228, + "step": 4406 + }, + { + "epoch": 1.015437788018433, + "grad_norm": 0.9837741196260199, + "learning_rate": 1.057496831293699e-06, + "loss": 0.7225071787834167, + "step": 4407 + }, + { + "epoch": 1.015668202764977, + "grad_norm": 0.8165867352878113, + "learning_rate": 1.0571164735669538e-06, + "loss": 0.7783743143081665, + "step": 4408 + }, + { + "epoch": 1.0158986175115208, + "grad_norm": 1.1050702802288892, + "learning_rate": 1.0567361075498286e-06, + "loss": 0.7455039024353027, + "step": 4409 + }, + { + "epoch": 1.0161290322580645, + "grad_norm": 1.0331220241961572, + "learning_rate": 1.0563557332975322e-06, + "loss": 0.7819615602493286, + "step": 4410 + }, + { + "epoch": 1.0163594470046082, + "grad_norm": 1.052305833495017, + "learning_rate": 1.0559753508652758e-06, + "loss": 0.6466404795646667, + "step": 4411 + }, + { + "epoch": 1.0165898617511522, + "grad_norm": 0.9503687927611121, + "learning_rate": 1.0555949603082715e-06, + "loss": 0.8728539943695068, + "step": 4412 + }, + { + "epoch": 1.016820276497696, + "grad_norm": 0.9080353373358744, + "learning_rate": 1.055214561681732e-06, + "loss": 0.6082659959793091, + "step": 4413 + }, + { + "epoch": 1.0170506912442396, + "grad_norm": 1.1401384988886654, + "learning_rate": 1.054834155040872e-06, + "loss": 0.8429103493690491, + "step": 4414 + }, + { + "epoch": 1.0172811059907834, + "grad_norm": 0.9060045457810262, + "learning_rate": 1.0544537404409073e-06, + "loss": 0.7953135967254639, + "step": 4415 + }, + { + "epoch": 1.017511520737327, + "grad_norm": 0.6713482182574511, + "learning_rate": 1.0540733179370542e-06, + "loss": 0.7243527173995972, + "step": 4416 + }, + { + "epoch": 1.017741935483871, + "grad_norm": 1.4572192259453962, + "learning_rate": 1.0536928875845303e-06, + "loss": 0.6882613897323608, + "step": 4417 + }, + { + "epoch": 1.0179723502304148, + "grad_norm": 0.9719982264568039, + "learning_rate": 1.053312449438555e-06, + "loss": 0.9157286882400513, + "step": 4418 + }, + { + "epoch": 1.0182027649769585, + "grad_norm": 1.1196456434566004, + "learning_rate": 1.0529320035543482e-06, + "loss": 0.7224643230438232, + "step": 4419 + }, + { + "epoch": 1.0184331797235022, + "grad_norm": 1.4712628070157254, + "learning_rate": 1.0525515499871311e-06, + "loss": 0.874829888343811, + "step": 4420 + }, + { + "epoch": 1.0186635944700462, + "grad_norm": 0.9184049522457163, + "learning_rate": 1.0521710887921262e-06, + "loss": 0.6911267042160034, + "step": 4421 + }, + { + "epoch": 1.01889400921659, + "grad_norm": 1.1423796554253005, + "learning_rate": 1.051790620024557e-06, + "loss": 0.9065574407577515, + "step": 4422 + }, + { + "epoch": 1.0191244239631336, + "grad_norm": 1.225714416603257, + "learning_rate": 1.0514101437396474e-06, + "loss": 0.7671108245849609, + "step": 4423 + }, + { + "epoch": 1.0193548387096774, + "grad_norm": 1.3506661037387142, + "learning_rate": 1.051029659992624e-06, + "loss": 0.8706510066986084, + "step": 4424 + }, + { + "epoch": 1.019585253456221, + "grad_norm": 1.4185673299670827, + "learning_rate": 1.0506491688387128e-06, + "loss": 0.741087794303894, + "step": 4425 + }, + { + "epoch": 1.019815668202765, + "grad_norm": 1.0122076007105019, + "learning_rate": 1.0502686703331419e-06, + "loss": 0.8045330047607422, + "step": 4426 + }, + { + "epoch": 1.0200460829493088, + "grad_norm": 1.1768435258548835, + "learning_rate": 1.0498881645311398e-06, + "loss": 0.8464969992637634, + "step": 4427 + }, + { + "epoch": 1.0202764976958525, + "grad_norm": 1.1260966872974236, + "learning_rate": 1.0495076514879367e-06, + "loss": 0.7660650610923767, + "step": 4428 + }, + { + "epoch": 1.0205069124423962, + "grad_norm": 1.0026539513539563, + "learning_rate": 1.0491271312587636e-06, + "loss": 0.8565669059753418, + "step": 4429 + }, + { + "epoch": 1.0207373271889402, + "grad_norm": 1.306851956145893, + "learning_rate": 1.0487466038988525e-06, + "loss": 0.8884295225143433, + "step": 4430 + }, + { + "epoch": 1.020967741935484, + "grad_norm": 1.0672501887857282, + "learning_rate": 1.0483660694634361e-06, + "loss": 0.7300036549568176, + "step": 4431 + }, + { + "epoch": 1.0211981566820276, + "grad_norm": 1.261937486377886, + "learning_rate": 1.0479855280077493e-06, + "loss": 0.7879898548126221, + "step": 4432 + }, + { + "epoch": 1.0214285714285714, + "grad_norm": 1.5182696761272942, + "learning_rate": 1.0476049795870263e-06, + "loss": 0.9811698198318481, + "step": 4433 + }, + { + "epoch": 1.0216589861751153, + "grad_norm": 1.1962738461411733, + "learning_rate": 1.0472244242565034e-06, + "loss": 0.7706241607666016, + "step": 4434 + }, + { + "epoch": 1.021889400921659, + "grad_norm": 1.289215010975763, + "learning_rate": 1.046843862071418e-06, + "loss": 0.761093020439148, + "step": 4435 + }, + { + "epoch": 1.0221198156682028, + "grad_norm": 1.2142929670752842, + "learning_rate": 1.046463293087008e-06, + "loss": 0.8306092619895935, + "step": 4436 + }, + { + "epoch": 1.0223502304147465, + "grad_norm": 1.0820298518439184, + "learning_rate": 1.0460827173585125e-06, + "loss": 0.9669788479804993, + "step": 4437 + }, + { + "epoch": 1.0225806451612902, + "grad_norm": 1.173748576404213, + "learning_rate": 1.0457021349411715e-06, + "loss": 0.8461639285087585, + "step": 4438 + }, + { + "epoch": 1.0228110599078342, + "grad_norm": 1.0738697424760002, + "learning_rate": 1.0453215458902262e-06, + "loss": 0.7230383157730103, + "step": 4439 + }, + { + "epoch": 1.023041474654378, + "grad_norm": 1.195555915731222, + "learning_rate": 1.0449409502609186e-06, + "loss": 0.7506514191627502, + "step": 4440 + }, + { + "epoch": 1.0232718894009216, + "grad_norm": 1.2468090783946124, + "learning_rate": 1.0445603481084914e-06, + "loss": 0.7530048489570618, + "step": 4441 + }, + { + "epoch": 1.0235023041474653, + "grad_norm": 1.1659142578592716, + "learning_rate": 1.044179739488189e-06, + "loss": 0.8402249813079834, + "step": 4442 + }, + { + "epoch": 1.0237327188940093, + "grad_norm": 0.9379480482149454, + "learning_rate": 1.0437991244552557e-06, + "loss": 0.7661963701248169, + "step": 4443 + }, + { + "epoch": 1.023963133640553, + "grad_norm": 1.484925993605904, + "learning_rate": 1.043418503064937e-06, + "loss": 0.7982668876647949, + "step": 4444 + }, + { + "epoch": 1.0241935483870968, + "grad_norm": 1.5153078123946815, + "learning_rate": 1.0430378753724807e-06, + "loss": 0.899538516998291, + "step": 4445 + }, + { + "epoch": 1.0244239631336405, + "grad_norm": 1.0283178313705175, + "learning_rate": 1.0426572414331337e-06, + "loss": 0.8027441501617432, + "step": 4446 + }, + { + "epoch": 1.0246543778801844, + "grad_norm": 1.0275551729897887, + "learning_rate": 1.0422766013021442e-06, + "loss": 0.8575221300125122, + "step": 4447 + }, + { + "epoch": 1.0248847926267282, + "grad_norm": 1.0529216327738424, + "learning_rate": 1.0418959550347622e-06, + "loss": 0.7001699209213257, + "step": 4448 + }, + { + "epoch": 1.0251152073732719, + "grad_norm": 1.344629476023339, + "learning_rate": 1.041515302686238e-06, + "loss": 0.9296507835388184, + "step": 4449 + }, + { + "epoch": 1.0253456221198156, + "grad_norm": 1.1736142719382505, + "learning_rate": 1.0411346443118222e-06, + "loss": 0.8214550018310547, + "step": 4450 + }, + { + "epoch": 1.0255760368663593, + "grad_norm": 1.111485424859677, + "learning_rate": 1.0407539799667673e-06, + "loss": 0.7598673701286316, + "step": 4451 + }, + { + "epoch": 1.0258064516129033, + "grad_norm": 1.1453890077051856, + "learning_rate": 1.0403733097063265e-06, + "loss": 0.8222990036010742, + "step": 4452 + }, + { + "epoch": 1.026036866359447, + "grad_norm": 0.8681765527907143, + "learning_rate": 1.039992633585753e-06, + "loss": 0.7860872745513916, + "step": 4453 + }, + { + "epoch": 1.0262672811059907, + "grad_norm": 0.7352315377021262, + "learning_rate": 1.0396119516603018e-06, + "loss": 0.6602796912193298, + "step": 4454 + }, + { + "epoch": 1.0264976958525345, + "grad_norm": 0.7865024675454858, + "learning_rate": 1.0392312639852278e-06, + "loss": 0.554654598236084, + "step": 4455 + }, + { + "epoch": 1.0267281105990784, + "grad_norm": 0.997694873166315, + "learning_rate": 1.0388505706157885e-06, + "loss": 0.7977210879325867, + "step": 4456 + }, + { + "epoch": 1.0269585253456222, + "grad_norm": 0.9315155505189272, + "learning_rate": 1.0384698716072398e-06, + "loss": 0.8770938515663147, + "step": 4457 + }, + { + "epoch": 1.0271889400921659, + "grad_norm": 1.1958306146081352, + "learning_rate": 1.0380891670148403e-06, + "loss": 0.710452675819397, + "step": 4458 + }, + { + "epoch": 1.0274193548387096, + "grad_norm": 1.0231453414790668, + "learning_rate": 1.0377084568938485e-06, + "loss": 0.8876768946647644, + "step": 4459 + }, + { + "epoch": 1.0276497695852536, + "grad_norm": 1.1707146109643827, + "learning_rate": 1.0373277412995241e-06, + "loss": 0.7770971059799194, + "step": 4460 + }, + { + "epoch": 1.0278801843317973, + "grad_norm": 1.2438301523835749, + "learning_rate": 1.0369470202871275e-06, + "loss": 0.9199050068855286, + "step": 4461 + }, + { + "epoch": 1.028110599078341, + "grad_norm": 1.225766455591599, + "learning_rate": 1.0365662939119199e-06, + "loss": 0.7931548357009888, + "step": 4462 + }, + { + "epoch": 1.0283410138248847, + "grad_norm": 0.9403888957806107, + "learning_rate": 1.0361855622291636e-06, + "loss": 0.7484941482543945, + "step": 4463 + }, + { + "epoch": 1.0285714285714285, + "grad_norm": 1.1077517121943607, + "learning_rate": 1.03580482529412e-06, + "loss": 0.7639475464820862, + "step": 4464 + }, + { + "epoch": 1.0288018433179724, + "grad_norm": 0.9266455289292281, + "learning_rate": 1.035424083162054e-06, + "loss": 0.7705268859863281, + "step": 4465 + }, + { + "epoch": 1.0290322580645161, + "grad_norm": 1.0602296301972336, + "learning_rate": 1.0350433358882288e-06, + "loss": 0.7714117169380188, + "step": 4466 + }, + { + "epoch": 1.0292626728110599, + "grad_norm": 0.9812855436464868, + "learning_rate": 1.0346625835279102e-06, + "loss": 0.851073145866394, + "step": 4467 + }, + { + "epoch": 1.0294930875576036, + "grad_norm": 0.9352903997309275, + "learning_rate": 1.0342818261363631e-06, + "loss": 0.8001583218574524, + "step": 4468 + }, + { + "epoch": 1.0297235023041476, + "grad_norm": 1.1158901092617035, + "learning_rate": 1.0339010637688547e-06, + "loss": 0.8352588415145874, + "step": 4469 + }, + { + "epoch": 1.0299539170506913, + "grad_norm": 0.91245372061127, + "learning_rate": 1.0335202964806515e-06, + "loss": 0.8136032223701477, + "step": 4470 + }, + { + "epoch": 1.030184331797235, + "grad_norm": 1.1248571903620148, + "learning_rate": 1.0331395243270215e-06, + "loss": 0.8041108846664429, + "step": 4471 + }, + { + "epoch": 1.0304147465437787, + "grad_norm": 0.9370378251466553, + "learning_rate": 1.032758747363234e-06, + "loss": 0.6961067914962769, + "step": 4472 + }, + { + "epoch": 1.0306451612903227, + "grad_norm": 0.8328897533850071, + "learning_rate": 1.0323779656445572e-06, + "loss": 0.8063983917236328, + "step": 4473 + }, + { + "epoch": 1.0308755760368664, + "grad_norm": 1.01915176563276, + "learning_rate": 1.0319971792262618e-06, + "loss": 0.706061601638794, + "step": 4474 + }, + { + "epoch": 1.0311059907834101, + "grad_norm": 1.1193687254143303, + "learning_rate": 1.0316163881636181e-06, + "loss": 0.8510581254959106, + "step": 4475 + }, + { + "epoch": 1.0313364055299539, + "grad_norm": 0.8459775762451333, + "learning_rate": 1.0312355925118975e-06, + "loss": 0.7169028520584106, + "step": 4476 + }, + { + "epoch": 1.0315668202764976, + "grad_norm": 0.8345675502163972, + "learning_rate": 1.0308547923263718e-06, + "loss": 0.7513360977172852, + "step": 4477 + }, + { + "epoch": 1.0317972350230415, + "grad_norm": 1.1826641384928935, + "learning_rate": 1.030473987662314e-06, + "loss": 0.7408783435821533, + "step": 4478 + }, + { + "epoch": 1.0320276497695853, + "grad_norm": 1.2135549739175484, + "learning_rate": 1.0300931785749974e-06, + "loss": 0.8177747130393982, + "step": 4479 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 1.074036475926982, + "learning_rate": 1.0297123651196954e-06, + "loss": 0.7530791759490967, + "step": 4480 + }, + { + "epoch": 1.0324884792626727, + "grad_norm": 1.2947307404575235, + "learning_rate": 1.0293315473516832e-06, + "loss": 0.7958859205245972, + "step": 4481 + }, + { + "epoch": 1.0327188940092167, + "grad_norm": 1.2482360288136136, + "learning_rate": 1.0289507253262357e-06, + "loss": 0.8719943761825562, + "step": 4482 + }, + { + "epoch": 1.0329493087557604, + "grad_norm": 1.0347953021678673, + "learning_rate": 1.028569899098629e-06, + "loss": 0.7584139108657837, + "step": 4483 + }, + { + "epoch": 1.0331797235023041, + "grad_norm": 1.1621251755994506, + "learning_rate": 1.0281890687241387e-06, + "loss": 0.852983832359314, + "step": 4484 + }, + { + "epoch": 1.0334101382488479, + "grad_norm": 0.995758429643109, + "learning_rate": 1.027808234258043e-06, + "loss": 0.7455692291259766, + "step": 4485 + }, + { + "epoch": 1.0336405529953918, + "grad_norm": 0.9126434588001895, + "learning_rate": 1.0274273957556185e-06, + "loss": 0.7078343629837036, + "step": 4486 + }, + { + "epoch": 1.0338709677419355, + "grad_norm": 1.056440353383354, + "learning_rate": 1.027046553272144e-06, + "loss": 0.7580842971801758, + "step": 4487 + }, + { + "epoch": 1.0341013824884793, + "grad_norm": 0.9071452550966383, + "learning_rate": 1.026665706862898e-06, + "loss": 0.7271389961242676, + "step": 4488 + }, + { + "epoch": 1.034331797235023, + "grad_norm": 1.3819767756673818, + "learning_rate": 1.0262848565831599e-06, + "loss": 0.8271546363830566, + "step": 4489 + }, + { + "epoch": 1.0345622119815667, + "grad_norm": 1.1533046933911033, + "learning_rate": 1.0259040024882098e-06, + "loss": 0.6799920201301575, + "step": 4490 + }, + { + "epoch": 1.0347926267281107, + "grad_norm": 0.7837273040397605, + "learning_rate": 1.0255231446333277e-06, + "loss": 0.6962645053863525, + "step": 4491 + }, + { + "epoch": 1.0350230414746544, + "grad_norm": 1.2060107344479347, + "learning_rate": 1.0251422830737955e-06, + "loss": 0.8722797632217407, + "step": 4492 + }, + { + "epoch": 1.0352534562211981, + "grad_norm": 1.0328841633467782, + "learning_rate": 1.024761417864894e-06, + "loss": 0.8054880499839783, + "step": 4493 + }, + { + "epoch": 1.0354838709677419, + "grad_norm": 0.9178345615112383, + "learning_rate": 1.0243805490619053e-06, + "loss": 0.8196548223495483, + "step": 4494 + }, + { + "epoch": 1.0357142857142858, + "grad_norm": 1.5010413914558958, + "learning_rate": 1.0239996767201122e-06, + "loss": 0.8197275400161743, + "step": 4495 + }, + { + "epoch": 1.0359447004608295, + "grad_norm": 1.1223467429515472, + "learning_rate": 1.0236188008947978e-06, + "loss": 0.7704858779907227, + "step": 4496 + }, + { + "epoch": 1.0361751152073733, + "grad_norm": 1.2288506828429187, + "learning_rate": 1.0232379216412459e-06, + "loss": 0.8296232223510742, + "step": 4497 + }, + { + "epoch": 1.036405529953917, + "grad_norm": 1.1910482399414777, + "learning_rate": 1.0228570390147404e-06, + "loss": 0.6546601057052612, + "step": 4498 + }, + { + "epoch": 1.036635944700461, + "grad_norm": 1.0493042801064925, + "learning_rate": 1.0224761530705656e-06, + "loss": 0.808987021446228, + "step": 4499 + }, + { + "epoch": 1.0368663594470047, + "grad_norm": 1.0198435860671902, + "learning_rate": 1.0220952638640073e-06, + "loss": 0.862627387046814, + "step": 4500 + }, + { + "epoch": 1.0370967741935484, + "grad_norm": 0.9314966888515314, + "learning_rate": 1.0217143714503507e-06, + "loss": 0.781114935874939, + "step": 4501 + }, + { + "epoch": 1.0373271889400921, + "grad_norm": 1.1732597442137338, + "learning_rate": 1.0213334758848814e-06, + "loss": 0.7186112403869629, + "step": 4502 + }, + { + "epoch": 1.0375576036866359, + "grad_norm": 0.9870711221115687, + "learning_rate": 1.0209525772228868e-06, + "loss": 0.8112529516220093, + "step": 4503 + }, + { + "epoch": 1.0377880184331798, + "grad_norm": 1.1558866878107408, + "learning_rate": 1.020571675519653e-06, + "loss": 0.7364751100540161, + "step": 4504 + }, + { + "epoch": 1.0380184331797235, + "grad_norm": 1.296821231113786, + "learning_rate": 1.0201907708304681e-06, + "loss": 0.7015886902809143, + "step": 4505 + }, + { + "epoch": 1.0382488479262673, + "grad_norm": 0.8755063657778166, + "learning_rate": 1.0198098632106197e-06, + "loss": 0.7018470168113708, + "step": 4506 + }, + { + "epoch": 1.038479262672811, + "grad_norm": 0.9958013421397902, + "learning_rate": 1.0194289527153953e-06, + "loss": 0.820391058921814, + "step": 4507 + }, + { + "epoch": 1.038709677419355, + "grad_norm": 1.2026544914516983, + "learning_rate": 1.0190480394000844e-06, + "loss": 0.8341129422187805, + "step": 4508 + }, + { + "epoch": 1.0389400921658987, + "grad_norm": 0.8606365913019236, + "learning_rate": 1.0186671233199757e-06, + "loss": 0.7345695495605469, + "step": 4509 + }, + { + "epoch": 1.0391705069124424, + "grad_norm": 1.375974242893794, + "learning_rate": 1.0182862045303589e-06, + "loss": 0.8899500370025635, + "step": 4510 + }, + { + "epoch": 1.0394009216589861, + "grad_norm": 1.001562990779633, + "learning_rate": 1.0179052830865238e-06, + "loss": 0.8158663511276245, + "step": 4511 + }, + { + "epoch": 1.0396313364055298, + "grad_norm": 1.1574048409080129, + "learning_rate": 1.0175243590437604e-06, + "loss": 0.734848141670227, + "step": 4512 + }, + { + "epoch": 1.0398617511520738, + "grad_norm": 1.062511127484639, + "learning_rate": 1.0171434324573596e-06, + "loss": 0.7920876741409302, + "step": 4513 + }, + { + "epoch": 1.0400921658986175, + "grad_norm": 1.2131341489328324, + "learning_rate": 1.0167625033826122e-06, + "loss": 0.9224791526794434, + "step": 4514 + }, + { + "epoch": 1.0403225806451613, + "grad_norm": 1.152494191321953, + "learning_rate": 1.0163815718748096e-06, + "loss": 0.7086025476455688, + "step": 4515 + }, + { + "epoch": 1.040552995391705, + "grad_norm": 1.0223491213154539, + "learning_rate": 1.0160006379892434e-06, + "loss": 0.7657936811447144, + "step": 4516 + }, + { + "epoch": 1.040783410138249, + "grad_norm": 1.11296257844156, + "learning_rate": 1.0156197017812058e-06, + "loss": 0.786298394203186, + "step": 4517 + }, + { + "epoch": 1.0410138248847927, + "grad_norm": 1.1998728834800867, + "learning_rate": 1.0152387633059895e-06, + "loss": 0.8667294979095459, + "step": 4518 + }, + { + "epoch": 1.0412442396313364, + "grad_norm": 1.0233425185279803, + "learning_rate": 1.0148578226188866e-06, + "loss": 0.8479517102241516, + "step": 4519 + }, + { + "epoch": 1.0414746543778801, + "grad_norm": 0.8930216519245627, + "learning_rate": 1.0144768797751904e-06, + "loss": 0.6430692076683044, + "step": 4520 + }, + { + "epoch": 1.041705069124424, + "grad_norm": 1.122852329570553, + "learning_rate": 1.0140959348301946e-06, + "loss": 0.874313473701477, + "step": 4521 + }, + { + "epoch": 1.0419354838709678, + "grad_norm": 1.101097598838231, + "learning_rate": 1.013714987839192e-06, + "loss": 0.8439676761627197, + "step": 4522 + }, + { + "epoch": 1.0421658986175115, + "grad_norm": 1.2477053670484948, + "learning_rate": 1.0133340388574774e-06, + "loss": 0.7480089664459229, + "step": 4523 + }, + { + "epoch": 1.0423963133640552, + "grad_norm": 1.3143250159570112, + "learning_rate": 1.012953087940345e-06, + "loss": 0.8786139488220215, + "step": 4524 + }, + { + "epoch": 1.042626728110599, + "grad_norm": 1.1897211165926171, + "learning_rate": 1.0125721351430885e-06, + "loss": 0.8333299160003662, + "step": 4525 + }, + { + "epoch": 1.042857142857143, + "grad_norm": 1.055645356383861, + "learning_rate": 1.0121911805210032e-06, + "loss": 0.8201998472213745, + "step": 4526 + }, + { + "epoch": 1.0430875576036867, + "grad_norm": 1.160199033506195, + "learning_rate": 1.0118102241293847e-06, + "loss": 0.7793110609054565, + "step": 4527 + }, + { + "epoch": 1.0433179723502304, + "grad_norm": 1.045720270383819, + "learning_rate": 1.0114292660235272e-06, + "loss": 0.7148817777633667, + "step": 4528 + }, + { + "epoch": 1.043548387096774, + "grad_norm": 1.0726942336798908, + "learning_rate": 1.011048306258727e-06, + "loss": 0.7945176362991333, + "step": 4529 + }, + { + "epoch": 1.043778801843318, + "grad_norm": 1.0532791972453868, + "learning_rate": 1.01066734489028e-06, + "loss": 0.7246826887130737, + "step": 4530 + }, + { + "epoch": 1.0440092165898618, + "grad_norm": 1.230297656368, + "learning_rate": 1.0102863819734822e-06, + "loss": 0.7342358827590942, + "step": 4531 + }, + { + "epoch": 1.0442396313364055, + "grad_norm": 1.1072867148521375, + "learning_rate": 1.0099054175636292e-06, + "loss": 0.6837234497070312, + "step": 4532 + }, + { + "epoch": 1.0444700460829492, + "grad_norm": 0.8847188010063922, + "learning_rate": 1.0095244517160184e-06, + "loss": 0.6941408514976501, + "step": 4533 + }, + { + "epoch": 1.0447004608294932, + "grad_norm": 0.9992175314765978, + "learning_rate": 1.009143484485946e-06, + "loss": 0.7835201025009155, + "step": 4534 + }, + { + "epoch": 1.044930875576037, + "grad_norm": 1.1533173348493126, + "learning_rate": 1.0087625159287086e-06, + "loss": 0.7887566089630127, + "step": 4535 + }, + { + "epoch": 1.0451612903225806, + "grad_norm": 0.9980831932241371, + "learning_rate": 1.0083815460996036e-06, + "loss": 0.7106727361679077, + "step": 4536 + }, + { + "epoch": 1.0453917050691244, + "grad_norm": 1.1003103489016812, + "learning_rate": 1.0080005750539287e-06, + "loss": 0.8316382169723511, + "step": 4537 + }, + { + "epoch": 1.045622119815668, + "grad_norm": 1.278017855977623, + "learning_rate": 1.0076196028469805e-06, + "loss": 0.7535592317581177, + "step": 4538 + }, + { + "epoch": 1.045852534562212, + "grad_norm": 1.2167524484109087, + "learning_rate": 1.0072386295340571e-06, + "loss": 0.9255459308624268, + "step": 4539 + }, + { + "epoch": 1.0460829493087558, + "grad_norm": 0.9884104383515986, + "learning_rate": 1.0068576551704561e-06, + "loss": 0.7415009140968323, + "step": 4540 + }, + { + "epoch": 1.0463133640552995, + "grad_norm": 0.9221193872044946, + "learning_rate": 1.0064766798114758e-06, + "loss": 0.673210620880127, + "step": 4541 + }, + { + "epoch": 1.0465437788018432, + "grad_norm": 1.2907861596502346, + "learning_rate": 1.006095703512414e-06, + "loss": 0.7063118815422058, + "step": 4542 + }, + { + "epoch": 1.0467741935483872, + "grad_norm": 1.0344490200256125, + "learning_rate": 1.005714726328569e-06, + "loss": 0.73606276512146, + "step": 4543 + }, + { + "epoch": 1.047004608294931, + "grad_norm": 1.1024687809140408, + "learning_rate": 1.005333748315239e-06, + "loss": 0.6723713874816895, + "step": 4544 + }, + { + "epoch": 1.0472350230414746, + "grad_norm": 1.0566239460690536, + "learning_rate": 1.0049527695277223e-06, + "loss": 0.643845796585083, + "step": 4545 + }, + { + "epoch": 1.0474654377880184, + "grad_norm": 1.1196128686458957, + "learning_rate": 1.0045717900213175e-06, + "loss": 0.8820847272872925, + "step": 4546 + }, + { + "epoch": 1.047695852534562, + "grad_norm": 1.177142500227169, + "learning_rate": 1.0041908098513239e-06, + "loss": 0.6555176973342896, + "step": 4547 + }, + { + "epoch": 1.047926267281106, + "grad_norm": 1.4046987769414077, + "learning_rate": 1.0038098290730394e-06, + "loss": 0.8142974376678467, + "step": 4548 + }, + { + "epoch": 1.0481566820276498, + "grad_norm": 1.3843242800793498, + "learning_rate": 1.0034288477417634e-06, + "loss": 0.8107532262802124, + "step": 4549 + }, + { + "epoch": 1.0483870967741935, + "grad_norm": 1.093115680939654, + "learning_rate": 1.0030478659127947e-06, + "loss": 0.7078464031219482, + "step": 4550 + }, + { + "epoch": 1.0486175115207372, + "grad_norm": 1.3647000829373368, + "learning_rate": 1.0026668836414322e-06, + "loss": 0.9168295860290527, + "step": 4551 + }, + { + "epoch": 1.0488479262672812, + "grad_norm": 0.7154125463388302, + "learning_rate": 1.0022859009829752e-06, + "loss": 0.7384864091873169, + "step": 4552 + }, + { + "epoch": 1.049078341013825, + "grad_norm": 0.9459016715465385, + "learning_rate": 1.0019049179927229e-06, + "loss": 0.6092562675476074, + "step": 4553 + }, + { + "epoch": 1.0493087557603686, + "grad_norm": 1.159695075830992, + "learning_rate": 1.001523934725974e-06, + "loss": 0.713464617729187, + "step": 4554 + }, + { + "epoch": 1.0495391705069124, + "grad_norm": 0.9471368467961162, + "learning_rate": 1.001142951238028e-06, + "loss": 0.7514123916625977, + "step": 4555 + }, + { + "epoch": 1.0497695852534563, + "grad_norm": 1.1414214053095963, + "learning_rate": 1.000761967584184e-06, + "loss": 0.8092095851898193, + "step": 4556 + }, + { + "epoch": 1.05, + "grad_norm": 0.830509770117895, + "learning_rate": 1.000380983819742e-06, + "loss": 0.7609254717826843, + "step": 4557 + }, + { + "epoch": 1.0502304147465438, + "grad_norm": 0.8874333429433436, + "learning_rate": 1e-06, + "loss": 0.8363404273986816, + "step": 4558 + }, + { + "epoch": 1.0504608294930875, + "grad_norm": 1.1983399653767088, + "learning_rate": 9.996190161802584e-07, + "loss": 0.8139501810073853, + "step": 4559 + }, + { + "epoch": 1.0506912442396312, + "grad_norm": 0.8984420952696672, + "learning_rate": 9.992380324158157e-07, + "loss": 0.8064978122711182, + "step": 4560 + }, + { + "epoch": 1.0509216589861752, + "grad_norm": 0.9258651657418774, + "learning_rate": 9.988570487619721e-07, + "loss": 0.7162975072860718, + "step": 4561 + }, + { + "epoch": 1.051152073732719, + "grad_norm": 1.2196516767947119, + "learning_rate": 9.984760652740261e-07, + "loss": 0.9298074245452881, + "step": 4562 + }, + { + "epoch": 1.0513824884792626, + "grad_norm": 1.0770268299074148, + "learning_rate": 9.980950820072773e-07, + "loss": 0.6929144859313965, + "step": 4563 + }, + { + "epoch": 1.0516129032258064, + "grad_norm": 0.919564091111097, + "learning_rate": 9.97714099017025e-07, + "loss": 0.6516381502151489, + "step": 4564 + }, + { + "epoch": 1.0518433179723503, + "grad_norm": 1.091105354713726, + "learning_rate": 9.97333116358568e-07, + "loss": 0.864730715751648, + "step": 4565 + }, + { + "epoch": 1.052073732718894, + "grad_norm": 0.9113453911026408, + "learning_rate": 9.969521340872052e-07, + "loss": 0.7911246418952942, + "step": 4566 + }, + { + "epoch": 1.0523041474654378, + "grad_norm": 1.032556518691269, + "learning_rate": 9.965711522582367e-07, + "loss": 0.7766593098640442, + "step": 4567 + }, + { + "epoch": 1.0525345622119815, + "grad_norm": 1.1309615036566574, + "learning_rate": 9.961901709269607e-07, + "loss": 0.7703378200531006, + "step": 4568 + }, + { + "epoch": 1.0527649769585254, + "grad_norm": 0.9296180823184125, + "learning_rate": 9.958091901486762e-07, + "loss": 0.7068926692008972, + "step": 4569 + }, + { + "epoch": 1.0529953917050692, + "grad_norm": 1.0589255494911889, + "learning_rate": 9.954282099786824e-07, + "loss": 0.740556538105011, + "step": 4570 + }, + { + "epoch": 1.053225806451613, + "grad_norm": 1.1264720214776667, + "learning_rate": 9.950472304722778e-07, + "loss": 0.798403263092041, + "step": 4571 + }, + { + "epoch": 1.0534562211981566, + "grad_norm": 0.9551633921802427, + "learning_rate": 9.94666251684761e-07, + "loss": 0.6945887804031372, + "step": 4572 + }, + { + "epoch": 1.0536866359447004, + "grad_norm": 1.0978186377940822, + "learning_rate": 9.942852736714312e-07, + "loss": 0.8257915377616882, + "step": 4573 + }, + { + "epoch": 1.0539170506912443, + "grad_norm": 1.108870855150134, + "learning_rate": 9.939042964875859e-07, + "loss": 0.751315712928772, + "step": 4574 + }, + { + "epoch": 1.054147465437788, + "grad_norm": 0.8929134755319279, + "learning_rate": 9.935233201885241e-07, + "loss": 0.6607721447944641, + "step": 4575 + }, + { + "epoch": 1.0543778801843318, + "grad_norm": 1.1623094406064765, + "learning_rate": 9.931423448295438e-07, + "loss": 0.9135023355484009, + "step": 4576 + }, + { + "epoch": 1.0546082949308755, + "grad_norm": 1.1079901137426853, + "learning_rate": 9.927613704659428e-07, + "loss": 0.8238483667373657, + "step": 4577 + }, + { + "epoch": 1.0548387096774194, + "grad_norm": 1.0927838633299076, + "learning_rate": 9.923803971530196e-07, + "loss": 0.7657001614570618, + "step": 4578 + }, + { + "epoch": 1.0550691244239632, + "grad_norm": 1.0858899027259339, + "learning_rate": 9.919994249460717e-07, + "loss": 0.6360250115394592, + "step": 4579 + }, + { + "epoch": 1.055299539170507, + "grad_norm": 3.1983788784304843, + "learning_rate": 9.916184539003963e-07, + "loss": 0.6958763003349304, + "step": 4580 + }, + { + "epoch": 1.0555299539170506, + "grad_norm": 1.0079237517587447, + "learning_rate": 9.912374840712915e-07, + "loss": 0.7093038558959961, + "step": 4581 + }, + { + "epoch": 1.0557603686635946, + "grad_norm": 1.0680215254508902, + "learning_rate": 9.908565155140544e-07, + "loss": 0.7641304731369019, + "step": 4582 + }, + { + "epoch": 1.0559907834101383, + "grad_norm": 0.8923201066182703, + "learning_rate": 9.904755482839817e-07, + "loss": 0.7976446151733398, + "step": 4583 + }, + { + "epoch": 1.056221198156682, + "grad_norm": 1.0963737907088362, + "learning_rate": 9.900945824363707e-07, + "loss": 0.8407114744186401, + "step": 4584 + }, + { + "epoch": 1.0564516129032258, + "grad_norm": 1.0695401976763876, + "learning_rate": 9.897136180265181e-07, + "loss": 0.7988634705543518, + "step": 4585 + }, + { + "epoch": 1.0566820276497695, + "grad_norm": 1.072342293651018, + "learning_rate": 9.893326551097198e-07, + "loss": 0.7847359776496887, + "step": 4586 + }, + { + "epoch": 1.0569124423963134, + "grad_norm": 1.0629893453410204, + "learning_rate": 9.889516937412728e-07, + "loss": 0.8458963632583618, + "step": 4587 + }, + { + "epoch": 1.0571428571428572, + "grad_norm": 1.1301054626559641, + "learning_rate": 9.88570733976473e-07, + "loss": 0.8479788899421692, + "step": 4588 + }, + { + "epoch": 1.057373271889401, + "grad_norm": 1.180492999769349, + "learning_rate": 9.881897758706154e-07, + "loss": 0.7467283010482788, + "step": 4589 + }, + { + "epoch": 1.0576036866359446, + "grad_norm": 1.1676226241505752, + "learning_rate": 9.878088194789967e-07, + "loss": 0.9400098323822021, + "step": 4590 + }, + { + "epoch": 1.0578341013824886, + "grad_norm": 1.2151292863225376, + "learning_rate": 9.874278648569118e-07, + "loss": 0.8901257514953613, + "step": 4591 + }, + { + "epoch": 1.0580645161290323, + "grad_norm": 1.2956773767909102, + "learning_rate": 9.870469120596552e-07, + "loss": 0.840053379535675, + "step": 4592 + }, + { + "epoch": 1.058294930875576, + "grad_norm": 0.9938952111506293, + "learning_rate": 9.866659611425225e-07, + "loss": 0.6825235486030579, + "step": 4593 + }, + { + "epoch": 1.0585253456221198, + "grad_norm": 1.2521534530730631, + "learning_rate": 9.86285012160808e-07, + "loss": 0.7783857583999634, + "step": 4594 + }, + { + "epoch": 1.0587557603686637, + "grad_norm": 1.0517032997656734, + "learning_rate": 9.859040651698055e-07, + "loss": 0.7901174426078796, + "step": 4595 + }, + { + "epoch": 1.0589861751152074, + "grad_norm": 1.2211963787816231, + "learning_rate": 9.855231202248097e-07, + "loss": 0.9475124478340149, + "step": 4596 + }, + { + "epoch": 1.0592165898617512, + "grad_norm": 1.1872676544788658, + "learning_rate": 9.851421773811133e-07, + "loss": 0.8582692742347717, + "step": 4597 + }, + { + "epoch": 1.0594470046082949, + "grad_norm": 1.1723948726757356, + "learning_rate": 9.847612366940106e-07, + "loss": 0.7885586023330688, + "step": 4598 + }, + { + "epoch": 1.0596774193548386, + "grad_norm": 1.17635061110199, + "learning_rate": 9.843802982187943e-07, + "loss": 0.7981748580932617, + "step": 4599 + }, + { + "epoch": 1.0599078341013826, + "grad_norm": 0.9066343519689628, + "learning_rate": 9.839993620107563e-07, + "loss": 0.7060403823852539, + "step": 4600 + }, + { + "epoch": 1.0601382488479263, + "grad_norm": 1.2126688495293467, + "learning_rate": 9.836184281251905e-07, + "loss": 0.7902223467826843, + "step": 4601 + }, + { + "epoch": 1.06036866359447, + "grad_norm": 0.9972491115312556, + "learning_rate": 9.83237496617388e-07, + "loss": 0.7074719071388245, + "step": 4602 + }, + { + "epoch": 1.0605990783410137, + "grad_norm": 0.9455936494800175, + "learning_rate": 9.828565675426405e-07, + "loss": 0.7180163264274597, + "step": 4603 + }, + { + "epoch": 1.0608294930875577, + "grad_norm": 0.8990997781996365, + "learning_rate": 9.824756409562397e-07, + "loss": 0.7040787935256958, + "step": 4604 + }, + { + "epoch": 1.0610599078341014, + "grad_norm": 1.0311368456712493, + "learning_rate": 9.820947169134765e-07, + "loss": 0.8387063145637512, + "step": 4605 + }, + { + "epoch": 1.0612903225806452, + "grad_norm": 1.0692817612993422, + "learning_rate": 9.81713795469641e-07, + "loss": 0.8587188124656677, + "step": 4606 + }, + { + "epoch": 1.0615207373271889, + "grad_norm": 1.0418289468184643, + "learning_rate": 9.813328766800242e-07, + "loss": 0.729094386100769, + "step": 4607 + }, + { + "epoch": 1.0617511520737328, + "grad_norm": 1.1884134090864242, + "learning_rate": 9.809519605999158e-07, + "loss": 1.0576609373092651, + "step": 4608 + }, + { + "epoch": 1.0619815668202766, + "grad_norm": 1.1124938149620707, + "learning_rate": 9.805710472846044e-07, + "loss": 0.7605572938919067, + "step": 4609 + }, + { + "epoch": 1.0622119815668203, + "grad_norm": 0.9566684121068049, + "learning_rate": 9.801901367893807e-07, + "loss": 0.722477912902832, + "step": 4610 + }, + { + "epoch": 1.062442396313364, + "grad_norm": 0.9185071862681494, + "learning_rate": 9.79809229169532e-07, + "loss": 0.7335925698280334, + "step": 4611 + }, + { + "epoch": 1.0626728110599077, + "grad_norm": 1.0494538531790283, + "learning_rate": 9.794283244803466e-07, + "loss": 0.8116357922554016, + "step": 4612 + }, + { + "epoch": 1.0629032258064517, + "grad_norm": 1.0519905027101895, + "learning_rate": 9.79047422777113e-07, + "loss": 0.8004311323165894, + "step": 4613 + }, + { + "epoch": 1.0631336405529954, + "grad_norm": 0.9803128568921189, + "learning_rate": 9.786665241151185e-07, + "loss": 0.8198168277740479, + "step": 4614 + }, + { + "epoch": 1.0633640552995391, + "grad_norm": 0.9841178854805237, + "learning_rate": 9.782856285496494e-07, + "loss": 0.7031205892562866, + "step": 4615 + }, + { + "epoch": 1.0635944700460829, + "grad_norm": 1.055262322588535, + "learning_rate": 9.779047361359928e-07, + "loss": 0.7303737998008728, + "step": 4616 + }, + { + "epoch": 1.0638248847926268, + "grad_norm": 1.1694198331033647, + "learning_rate": 9.775238469294345e-07, + "loss": 0.8775424957275391, + "step": 4617 + }, + { + "epoch": 1.0640552995391706, + "grad_norm": 0.9013154484602001, + "learning_rate": 9.771429609852597e-07, + "loss": 0.7463759183883667, + "step": 4618 + }, + { + "epoch": 1.0642857142857143, + "grad_norm": 0.8792691967623277, + "learning_rate": 9.767620783587542e-07, + "loss": 0.7200205326080322, + "step": 4619 + }, + { + "epoch": 1.064516129032258, + "grad_norm": 0.9102194522316246, + "learning_rate": 9.763811991052019e-07, + "loss": 0.8255786299705505, + "step": 4620 + }, + { + "epoch": 1.064746543778802, + "grad_norm": 1.2552865619465912, + "learning_rate": 9.760003232798877e-07, + "loss": 0.7975195050239563, + "step": 4621 + }, + { + "epoch": 1.0649769585253457, + "grad_norm": 0.9993977940644363, + "learning_rate": 9.756194509380948e-07, + "loss": 0.6993064880371094, + "step": 4622 + }, + { + "epoch": 1.0652073732718894, + "grad_norm": 1.314757658160511, + "learning_rate": 9.752385821351062e-07, + "loss": 0.818634033203125, + "step": 4623 + }, + { + "epoch": 1.0654377880184331, + "grad_norm": 1.0949894149977886, + "learning_rate": 9.748577169262046e-07, + "loss": 0.707933783531189, + "step": 4624 + }, + { + "epoch": 1.0656682027649769, + "grad_norm": 1.1439419332653986, + "learning_rate": 9.744768553666723e-07, + "loss": 0.8133440017700195, + "step": 4625 + }, + { + "epoch": 1.0658986175115208, + "grad_norm": 1.1394394770433072, + "learning_rate": 9.740959975117901e-07, + "loss": 0.8818857669830322, + "step": 4626 + }, + { + "epoch": 1.0661290322580645, + "grad_norm": 0.9617616601353652, + "learning_rate": 9.737151434168402e-07, + "loss": 0.6057544946670532, + "step": 4627 + }, + { + "epoch": 1.0663594470046083, + "grad_norm": 1.047486055121172, + "learning_rate": 9.733342931371023e-07, + "loss": 0.7560185194015503, + "step": 4628 + }, + { + "epoch": 1.066589861751152, + "grad_norm": 1.233360971442642, + "learning_rate": 9.72953446727856e-07, + "loss": 0.8196524381637573, + "step": 4629 + }, + { + "epoch": 1.066820276497696, + "grad_norm": 1.031309795003994, + "learning_rate": 9.725726042443814e-07, + "loss": 0.8695862889289856, + "step": 4630 + }, + { + "epoch": 1.0670506912442397, + "grad_norm": 0.9769847065094724, + "learning_rate": 9.721917657419573e-07, + "loss": 0.7753207683563232, + "step": 4631 + }, + { + "epoch": 1.0672811059907834, + "grad_norm": 1.0908524037443617, + "learning_rate": 9.718109312758612e-07, + "loss": 0.8245481252670288, + "step": 4632 + }, + { + "epoch": 1.0675115207373271, + "grad_norm": 1.201628166799481, + "learning_rate": 9.71430100901371e-07, + "loss": 0.8654806613922119, + "step": 4633 + }, + { + "epoch": 1.067741935483871, + "grad_norm": 1.22982718965067, + "learning_rate": 9.710492746737642e-07, + "loss": 0.8667370080947876, + "step": 4634 + }, + { + "epoch": 1.0679723502304148, + "grad_norm": 1.2635323967888392, + "learning_rate": 9.706684526483167e-07, + "loss": 0.7786421775817871, + "step": 4635 + }, + { + "epoch": 1.0682027649769585, + "grad_norm": 1.037203898616246, + "learning_rate": 9.702876348803045e-07, + "loss": 0.7788090705871582, + "step": 4636 + }, + { + "epoch": 1.0684331797235023, + "grad_norm": 1.1815160856137523, + "learning_rate": 9.69906821425003e-07, + "loss": 0.812332034111023, + "step": 4637 + }, + { + "epoch": 1.068663594470046, + "grad_norm": 1.2578908038434822, + "learning_rate": 9.69526012337686e-07, + "loss": 0.7884202599525452, + "step": 4638 + }, + { + "epoch": 1.06889400921659, + "grad_norm": 1.0539526708204177, + "learning_rate": 9.69145207673628e-07, + "loss": 0.725990891456604, + "step": 4639 + }, + { + "epoch": 1.0691244239631337, + "grad_norm": 1.01343921612526, + "learning_rate": 9.687644074881028e-07, + "loss": 0.7277272343635559, + "step": 4640 + }, + { + "epoch": 1.0693548387096774, + "grad_norm": 1.0871506025213427, + "learning_rate": 9.683836118363818e-07, + "loss": 0.8081945180892944, + "step": 4641 + }, + { + "epoch": 1.0695852534562211, + "grad_norm": 1.1050642405984226, + "learning_rate": 9.680028207737383e-07, + "loss": 0.8633503913879395, + "step": 4642 + }, + { + "epoch": 1.069815668202765, + "grad_norm": 0.9415461517108813, + "learning_rate": 9.67622034355443e-07, + "loss": 0.7873313426971436, + "step": 4643 + }, + { + "epoch": 1.0700460829493088, + "grad_norm": 1.269353126640295, + "learning_rate": 9.67241252636766e-07, + "loss": 0.7927644848823547, + "step": 4644 + }, + { + "epoch": 1.0702764976958525, + "grad_norm": 1.395156348091843, + "learning_rate": 9.668604756729784e-07, + "loss": 0.9458138942718506, + "step": 4645 + }, + { + "epoch": 1.0705069124423963, + "grad_norm": 1.2621680271291411, + "learning_rate": 9.664797035193484e-07, + "loss": 0.7471280097961426, + "step": 4646 + }, + { + "epoch": 1.07073732718894, + "grad_norm": 1.0373772164844823, + "learning_rate": 9.660989362311455e-07, + "loss": 0.7666789293289185, + "step": 4647 + }, + { + "epoch": 1.070967741935484, + "grad_norm": 0.8355654249705468, + "learning_rate": 9.65718173863637e-07, + "loss": 0.7846331000328064, + "step": 4648 + }, + { + "epoch": 1.0711981566820277, + "grad_norm": 1.1393955111251446, + "learning_rate": 9.653374164720897e-07, + "loss": 0.7790371179580688, + "step": 4649 + }, + { + "epoch": 1.0714285714285714, + "grad_norm": 1.110758470727215, + "learning_rate": 9.64956664111771e-07, + "loss": 0.9056169986724854, + "step": 4650 + }, + { + "epoch": 1.0716589861751151, + "grad_norm": 0.84240400487228, + "learning_rate": 9.645759168379461e-07, + "loss": 0.6839256286621094, + "step": 4651 + }, + { + "epoch": 1.071889400921659, + "grad_norm": 1.377334701305697, + "learning_rate": 9.641951747058799e-07, + "loss": 0.7071784138679504, + "step": 4652 + }, + { + "epoch": 1.0721198156682028, + "grad_norm": 1.1683127374870803, + "learning_rate": 9.638144377708366e-07, + "loss": 0.8166929483413696, + "step": 4653 + }, + { + "epoch": 1.0723502304147465, + "grad_norm": 1.239204160701412, + "learning_rate": 9.6343370608808e-07, + "loss": 0.8013010621070862, + "step": 4654 + }, + { + "epoch": 1.0725806451612903, + "grad_norm": 1.0825444957318084, + "learning_rate": 9.630529797128722e-07, + "loss": 0.8157169818878174, + "step": 4655 + }, + { + "epoch": 1.072811059907834, + "grad_norm": 1.0890180382455945, + "learning_rate": 9.626722587004758e-07, + "loss": 0.6467397212982178, + "step": 4656 + }, + { + "epoch": 1.073041474654378, + "grad_norm": 0.840613071204114, + "learning_rate": 9.622915431061519e-07, + "loss": 0.6623806953430176, + "step": 4657 + }, + { + "epoch": 1.0732718894009217, + "grad_norm": 0.9242647901691624, + "learning_rate": 9.619108329851596e-07, + "loss": 0.8333703279495239, + "step": 4658 + }, + { + "epoch": 1.0735023041474654, + "grad_norm": 1.1552752606597634, + "learning_rate": 9.615301283927603e-07, + "loss": 0.8798840045928955, + "step": 4659 + }, + { + "epoch": 1.0737327188940091, + "grad_norm": 1.1547075721097313, + "learning_rate": 9.611494293842119e-07, + "loss": 0.8712242841720581, + "step": 4660 + }, + { + "epoch": 1.073963133640553, + "grad_norm": 1.030127804248938, + "learning_rate": 9.60768736014772e-07, + "loss": 0.720801591873169, + "step": 4661 + }, + { + "epoch": 1.0741935483870968, + "grad_norm": 1.0305643381766019, + "learning_rate": 9.603880483396983e-07, + "loss": 0.7974982857704163, + "step": 4662 + }, + { + "epoch": 1.0744239631336405, + "grad_norm": 1.1569753217458012, + "learning_rate": 9.600073664142471e-07, + "loss": 0.7656542062759399, + "step": 4663 + }, + { + "epoch": 1.0746543778801843, + "grad_norm": 1.2831377014983525, + "learning_rate": 9.596266902936737e-07, + "loss": 0.8274385333061218, + "step": 4664 + }, + { + "epoch": 1.0748847926267282, + "grad_norm": 1.1261587516242995, + "learning_rate": 9.592460200332328e-07, + "loss": 0.6508798599243164, + "step": 4665 + }, + { + "epoch": 1.075115207373272, + "grad_norm": 0.8712727383997491, + "learning_rate": 9.588653556881781e-07, + "loss": 0.6393407583236694, + "step": 4666 + }, + { + "epoch": 1.0753456221198157, + "grad_norm": 0.8300127743505744, + "learning_rate": 9.58484697313762e-07, + "loss": 0.7857781052589417, + "step": 4667 + }, + { + "epoch": 1.0755760368663594, + "grad_norm": 1.0591582120645788, + "learning_rate": 9.58104044965238e-07, + "loss": 0.7433615922927856, + "step": 4668 + }, + { + "epoch": 1.0758064516129031, + "grad_norm": 0.9252765779736452, + "learning_rate": 9.57723398697856e-07, + "loss": 0.6694349646568298, + "step": 4669 + }, + { + "epoch": 1.076036866359447, + "grad_norm": 1.06633744555344, + "learning_rate": 9.573427585668664e-07, + "loss": 0.7849506735801697, + "step": 4670 + }, + { + "epoch": 1.0762672811059908, + "grad_norm": 0.948086558097784, + "learning_rate": 9.569621246275194e-07, + "loss": 0.5924462080001831, + "step": 4671 + }, + { + "epoch": 1.0764976958525345, + "grad_norm": 1.0764379613448063, + "learning_rate": 9.565814969350628e-07, + "loss": 0.7679359316825867, + "step": 4672 + }, + { + "epoch": 1.0767281105990782, + "grad_norm": 0.8770076747846444, + "learning_rate": 9.562008755447444e-07, + "loss": 0.803286612033844, + "step": 4673 + }, + { + "epoch": 1.0769585253456222, + "grad_norm": 0.9139287879253918, + "learning_rate": 9.558202605118112e-07, + "loss": 0.6302975416183472, + "step": 4674 + }, + { + "epoch": 1.077188940092166, + "grad_norm": 1.1929014758233443, + "learning_rate": 9.554396518915085e-07, + "loss": 0.7441667914390564, + "step": 4675 + }, + { + "epoch": 1.0774193548387097, + "grad_norm": 1.1469726623234646, + "learning_rate": 9.550590497390815e-07, + "loss": 0.805221438407898, + "step": 4676 + }, + { + "epoch": 1.0776497695852534, + "grad_norm": 1.1540692428304171, + "learning_rate": 9.54678454109774e-07, + "loss": 0.9557743072509766, + "step": 4677 + }, + { + "epoch": 1.0778801843317973, + "grad_norm": 1.0781366924036009, + "learning_rate": 9.542978650588284e-07, + "loss": 0.7361980080604553, + "step": 4678 + }, + { + "epoch": 1.078110599078341, + "grad_norm": 1.2143012487351885, + "learning_rate": 9.539172826414876e-07, + "loss": 0.7474843263626099, + "step": 4679 + }, + { + "epoch": 1.0783410138248848, + "grad_norm": 1.0143818885553835, + "learning_rate": 9.535367069129923e-07, + "loss": 0.595927357673645, + "step": 4680 + }, + { + "epoch": 1.0785714285714285, + "grad_norm": 1.1128254146821686, + "learning_rate": 9.531561379285818e-07, + "loss": 0.894598126411438, + "step": 4681 + }, + { + "epoch": 1.0788018433179722, + "grad_norm": 1.3233034879697116, + "learning_rate": 9.527755757434966e-07, + "loss": 0.915902853012085, + "step": 4682 + }, + { + "epoch": 1.0790322580645162, + "grad_norm": 1.3436084997047495, + "learning_rate": 9.523950204129739e-07, + "loss": 0.8670432567596436, + "step": 4683 + }, + { + "epoch": 1.07926267281106, + "grad_norm": 1.119487791223308, + "learning_rate": 9.520144719922508e-07, + "loss": 0.7829893231391907, + "step": 4684 + }, + { + "epoch": 1.0794930875576036, + "grad_norm": 1.1633745895382166, + "learning_rate": 9.516339305365638e-07, + "loss": 0.6584970951080322, + "step": 4685 + }, + { + "epoch": 1.0797235023041474, + "grad_norm": 1.0240703451548752, + "learning_rate": 9.512533961011478e-07, + "loss": 0.7853457927703857, + "step": 4686 + }, + { + "epoch": 1.0799539170506913, + "grad_norm": 0.8755927642296618, + "learning_rate": 9.508728687412364e-07, + "loss": 0.7890632152557373, + "step": 4687 + }, + { + "epoch": 1.080184331797235, + "grad_norm": 1.1475809434863895, + "learning_rate": 9.504923485120634e-07, + "loss": 0.8281408548355103, + "step": 4688 + }, + { + "epoch": 1.0804147465437788, + "grad_norm": 0.9222741947208914, + "learning_rate": 9.501118354688605e-07, + "loss": 0.7878601551055908, + "step": 4689 + }, + { + "epoch": 1.0806451612903225, + "grad_norm": 1.3827368592572105, + "learning_rate": 9.497313296668582e-07, + "loss": 0.8332592844963074, + "step": 4690 + }, + { + "epoch": 1.0808755760368665, + "grad_norm": 1.0564274993228098, + "learning_rate": 9.493508311612874e-07, + "loss": 0.7680759429931641, + "step": 4691 + }, + { + "epoch": 1.0811059907834102, + "grad_norm": 0.9446139934289677, + "learning_rate": 9.489703400073762e-07, + "loss": 0.6368690729141235, + "step": 4692 + }, + { + "epoch": 1.081336405529954, + "grad_norm": 1.1588361552017052, + "learning_rate": 9.485898562603525e-07, + "loss": 0.7018477916717529, + "step": 4693 + }, + { + "epoch": 1.0815668202764976, + "grad_norm": 1.057066552712669, + "learning_rate": 9.482093799754432e-07, + "loss": 0.8494987487792969, + "step": 4694 + }, + { + "epoch": 1.0817972350230414, + "grad_norm": 1.0119994692546468, + "learning_rate": 9.478289112078736e-07, + "loss": 0.8146306276321411, + "step": 4695 + }, + { + "epoch": 1.0820276497695853, + "grad_norm": 1.054771760893497, + "learning_rate": 9.474484500128689e-07, + "loss": 0.7832612991333008, + "step": 4696 + }, + { + "epoch": 1.082258064516129, + "grad_norm": 1.0487197763357414, + "learning_rate": 9.470679964456519e-07, + "loss": 0.8569360971450806, + "step": 4697 + }, + { + "epoch": 1.0824884792626728, + "grad_norm": 1.1432115985173055, + "learning_rate": 9.466875505614449e-07, + "loss": 0.8145112991333008, + "step": 4698 + }, + { + "epoch": 1.0827188940092165, + "grad_norm": 1.0578814317560323, + "learning_rate": 9.463071124154697e-07, + "loss": 0.6632689237594604, + "step": 4699 + }, + { + "epoch": 1.0829493087557605, + "grad_norm": 1.1233922356996344, + "learning_rate": 9.459266820629461e-07, + "loss": 0.6299769878387451, + "step": 4700 + }, + { + "epoch": 1.0831797235023042, + "grad_norm": 1.0275349813599226, + "learning_rate": 9.455462595590925e-07, + "loss": 0.7722063064575195, + "step": 4701 + }, + { + "epoch": 1.083410138248848, + "grad_norm": 1.2023285008908922, + "learning_rate": 9.451658449591278e-07, + "loss": 0.8219027519226074, + "step": 4702 + }, + { + "epoch": 1.0836405529953916, + "grad_norm": 1.1618110682341312, + "learning_rate": 9.44785438318268e-07, + "loss": 0.9078400731086731, + "step": 4703 + }, + { + "epoch": 1.0838709677419356, + "grad_norm": 1.087404948952653, + "learning_rate": 9.444050396917286e-07, + "loss": 0.8062041997909546, + "step": 4704 + }, + { + "epoch": 1.0841013824884793, + "grad_norm": 0.9599318157385525, + "learning_rate": 9.440246491347242e-07, + "loss": 0.6379001140594482, + "step": 4705 + }, + { + "epoch": 1.084331797235023, + "grad_norm": 1.179840039843376, + "learning_rate": 9.436442667024679e-07, + "loss": 0.919986367225647, + "step": 4706 + }, + { + "epoch": 1.0845622119815668, + "grad_norm": 1.025427308273649, + "learning_rate": 9.432638924501715e-07, + "loss": 0.6534138917922974, + "step": 4707 + }, + { + "epoch": 1.0847926267281105, + "grad_norm": 1.1537368190719173, + "learning_rate": 9.428835264330462e-07, + "loss": 0.8340045809745789, + "step": 4708 + }, + { + "epoch": 1.0850230414746544, + "grad_norm": 1.2598648406656967, + "learning_rate": 9.425031687063014e-07, + "loss": 0.8347625732421875, + "step": 4709 + }, + { + "epoch": 1.0852534562211982, + "grad_norm": 1.080310831214647, + "learning_rate": 9.421228193251452e-07, + "loss": 0.807063639163971, + "step": 4710 + }, + { + "epoch": 1.085483870967742, + "grad_norm": 0.8480154931503633, + "learning_rate": 9.417424783447855e-07, + "loss": 0.7375985383987427, + "step": 4711 + }, + { + "epoch": 1.0857142857142856, + "grad_norm": 0.9219258926876724, + "learning_rate": 9.413621458204281e-07, + "loss": 0.5723168849945068, + "step": 4712 + }, + { + "epoch": 1.0859447004608296, + "grad_norm": 1.20469026899904, + "learning_rate": 9.409818218072772e-07, + "loss": 0.8272668123245239, + "step": 4713 + }, + { + "epoch": 1.0861751152073733, + "grad_norm": 1.0744380351617728, + "learning_rate": 9.406015063605368e-07, + "loss": 0.6400803327560425, + "step": 4714 + }, + { + "epoch": 1.086405529953917, + "grad_norm": 0.9959690478635643, + "learning_rate": 9.402211995354095e-07, + "loss": 0.6829795837402344, + "step": 4715 + }, + { + "epoch": 1.0866359447004608, + "grad_norm": 1.0434747079590168, + "learning_rate": 9.398409013870954e-07, + "loss": 0.8509865999221802, + "step": 4716 + }, + { + "epoch": 1.0868663594470047, + "grad_norm": 1.0730582514021882, + "learning_rate": 9.394606119707954e-07, + "loss": 0.895818829536438, + "step": 4717 + }, + { + "epoch": 1.0870967741935484, + "grad_norm": 1.2584943519033869, + "learning_rate": 9.390803313417072e-07, + "loss": 0.8534268140792847, + "step": 4718 + }, + { + "epoch": 1.0873271889400922, + "grad_norm": 1.0910485662903118, + "learning_rate": 9.38700059555028e-07, + "loss": 0.8603401184082031, + "step": 4719 + }, + { + "epoch": 1.087557603686636, + "grad_norm": 1.1060380385520165, + "learning_rate": 9.383197966659542e-07, + "loss": 0.8810417652130127, + "step": 4720 + }, + { + "epoch": 1.0877880184331796, + "grad_norm": 1.078874247367276, + "learning_rate": 9.3793954272968e-07, + "loss": 0.7144299149513245, + "step": 4721 + }, + { + "epoch": 1.0880184331797236, + "grad_norm": 1.3140311568193026, + "learning_rate": 9.375592978013994e-07, + "loss": 0.8780069351196289, + "step": 4722 + }, + { + "epoch": 1.0882488479262673, + "grad_norm": 1.1329108063995987, + "learning_rate": 9.371790619363041e-07, + "loss": 0.7976780533790588, + "step": 4723 + }, + { + "epoch": 1.088479262672811, + "grad_norm": 1.0979402846559465, + "learning_rate": 9.367988351895846e-07, + "loss": 0.9183385372161865, + "step": 4724 + }, + { + "epoch": 1.0887096774193548, + "grad_norm": 1.0551038276717553, + "learning_rate": 9.364186176164306e-07, + "loss": 0.7891188859939575, + "step": 4725 + }, + { + "epoch": 1.0889400921658987, + "grad_norm": 0.9930223107211231, + "learning_rate": 9.360384092720301e-07, + "loss": 0.7586535215377808, + "step": 4726 + }, + { + "epoch": 1.0891705069124424, + "grad_norm": 1.1542507976324667, + "learning_rate": 9.356582102115696e-07, + "loss": 0.7915316224098206, + "step": 4727 + }, + { + "epoch": 1.0894009216589862, + "grad_norm": 0.901378484170352, + "learning_rate": 9.352780204902349e-07, + "loss": 0.6608257293701172, + "step": 4728 + }, + { + "epoch": 1.08963133640553, + "grad_norm": 1.1982692712799377, + "learning_rate": 9.3489784016321e-07, + "loss": 0.8375273942947388, + "step": 4729 + }, + { + "epoch": 1.0898617511520738, + "grad_norm": 1.43591815259741, + "learning_rate": 9.345176692856768e-07, + "loss": 0.7629055976867676, + "step": 4730 + }, + { + "epoch": 1.0900921658986176, + "grad_norm": 1.3741081876453818, + "learning_rate": 9.341375079128177e-07, + "loss": 0.8037875890731812, + "step": 4731 + }, + { + "epoch": 1.0903225806451613, + "grad_norm": 1.1252370555828741, + "learning_rate": 9.337573560998123e-07, + "loss": 0.8843437433242798, + "step": 4732 + }, + { + "epoch": 1.090552995391705, + "grad_norm": 1.058447534132799, + "learning_rate": 9.333772139018387e-07, + "loss": 0.7164910435676575, + "step": 4733 + }, + { + "epoch": 1.0907834101382488, + "grad_norm": 1.144703504042011, + "learning_rate": 9.329970813740742e-07, + "loss": 0.8076978921890259, + "step": 4734 + }, + { + "epoch": 1.0910138248847927, + "grad_norm": 1.091507904535434, + "learning_rate": 9.326169585716949e-07, + "loss": 0.7265340089797974, + "step": 4735 + }, + { + "epoch": 1.0912442396313364, + "grad_norm": 0.9010611551057135, + "learning_rate": 9.322368455498747e-07, + "loss": 0.7438681125640869, + "step": 4736 + }, + { + "epoch": 1.0914746543778802, + "grad_norm": 1.455573835192626, + "learning_rate": 9.318567423637868e-07, + "loss": 0.8760604858398438, + "step": 4737 + }, + { + "epoch": 1.0917050691244239, + "grad_norm": 1.064698472707054, + "learning_rate": 9.314766490686026e-07, + "loss": 0.7216911315917969, + "step": 4738 + }, + { + "epoch": 1.0919354838709678, + "grad_norm": 1.207051606070953, + "learning_rate": 9.310965657194916e-07, + "loss": 0.8003707528114319, + "step": 4739 + }, + { + "epoch": 1.0921658986175116, + "grad_norm": 0.9484074376515712, + "learning_rate": 9.307164923716233e-07, + "loss": 0.6496548652648926, + "step": 4740 + }, + { + "epoch": 1.0923963133640553, + "grad_norm": 1.0304975730869472, + "learning_rate": 9.303364290801644e-07, + "loss": 0.7659108638763428, + "step": 4741 + }, + { + "epoch": 1.092626728110599, + "grad_norm": 1.016478094690519, + "learning_rate": 9.299563759002802e-07, + "loss": 0.7799512147903442, + "step": 4742 + }, + { + "epoch": 1.092857142857143, + "grad_norm": 0.9921566283768914, + "learning_rate": 9.295763328871357e-07, + "loss": 0.7675691246986389, + "step": 4743 + }, + { + "epoch": 1.0930875576036867, + "grad_norm": 1.0513054078420998, + "learning_rate": 9.291963000958931e-07, + "loss": 0.677080512046814, + "step": 4744 + }, + { + "epoch": 1.0933179723502304, + "grad_norm": 1.0842277521538888, + "learning_rate": 9.28816277581714e-07, + "loss": 0.7885928153991699, + "step": 4745 + }, + { + "epoch": 1.0935483870967742, + "grad_norm": 1.07543209238493, + "learning_rate": 9.28436265399758e-07, + "loss": 0.6568010449409485, + "step": 4746 + }, + { + "epoch": 1.0937788018433179, + "grad_norm": 1.076830779801181, + "learning_rate": 9.280562636051827e-07, + "loss": 0.9438225030899048, + "step": 4747 + }, + { + "epoch": 1.0940092165898618, + "grad_norm": 1.0420094595322553, + "learning_rate": 9.276762722531461e-07, + "loss": 0.8119498491287231, + "step": 4748 + }, + { + "epoch": 1.0942396313364056, + "grad_norm": 0.8228863679585698, + "learning_rate": 9.272962913988029e-07, + "loss": 0.7570452690124512, + "step": 4749 + }, + { + "epoch": 1.0944700460829493, + "grad_norm": 1.0990726312613297, + "learning_rate": 9.269163210973063e-07, + "loss": 0.7541190385818481, + "step": 4750 + }, + { + "epoch": 1.094700460829493, + "grad_norm": 1.015570437282189, + "learning_rate": 9.265363614038093e-07, + "loss": 0.6481921672821045, + "step": 4751 + }, + { + "epoch": 1.094930875576037, + "grad_norm": 1.1173263478947815, + "learning_rate": 9.261564123734623e-07, + "loss": 0.7997267246246338, + "step": 4752 + }, + { + "epoch": 1.0951612903225807, + "grad_norm": 1.4388540160892265, + "learning_rate": 9.25776474061414e-07, + "loss": 0.9093008637428284, + "step": 4753 + }, + { + "epoch": 1.0953917050691244, + "grad_norm": 1.3909093606880625, + "learning_rate": 9.253965465228122e-07, + "loss": 0.7609673142433167, + "step": 4754 + }, + { + "epoch": 1.0956221198156681, + "grad_norm": 1.311027419629587, + "learning_rate": 9.250166298128032e-07, + "loss": 0.8338878154754639, + "step": 4755 + }, + { + "epoch": 1.095852534562212, + "grad_norm": 1.1912490488387477, + "learning_rate": 9.246367239865308e-07, + "loss": 0.7503781318664551, + "step": 4756 + }, + { + "epoch": 1.0960829493087558, + "grad_norm": 1.0417471668794835, + "learning_rate": 9.242568290991384e-07, + "loss": 0.7630816698074341, + "step": 4757 + }, + { + "epoch": 1.0963133640552996, + "grad_norm": 1.4287601409586015, + "learning_rate": 9.238769452057671e-07, + "loss": 0.8026378154754639, + "step": 4758 + }, + { + "epoch": 1.0965437788018433, + "grad_norm": 1.0309152969100308, + "learning_rate": 9.234970723615558e-07, + "loss": 0.8256090879440308, + "step": 4759 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 1.1197681925892131, + "learning_rate": 9.231172106216437e-07, + "loss": 0.7331836223602295, + "step": 4760 + }, + { + "epoch": 1.097004608294931, + "grad_norm": 1.1300301361381715, + "learning_rate": 9.227373600411667e-07, + "loss": 0.886203944683075, + "step": 4761 + }, + { + "epoch": 1.0972350230414747, + "grad_norm": 1.113695044174903, + "learning_rate": 9.223575206752592e-07, + "loss": 0.7802814245223999, + "step": 4762 + }, + { + "epoch": 1.0974654377880184, + "grad_norm": 1.3075634566953063, + "learning_rate": 9.219776925790552e-07, + "loss": 0.9682798385620117, + "step": 4763 + }, + { + "epoch": 1.0976958525345621, + "grad_norm": 1.1689607681364365, + "learning_rate": 9.215978758076858e-07, + "loss": 0.8733793497085571, + "step": 4764 + }, + { + "epoch": 1.097926267281106, + "grad_norm": 1.0890238577837303, + "learning_rate": 9.212180704162809e-07, + "loss": 0.8403818607330322, + "step": 4765 + }, + { + "epoch": 1.0981566820276498, + "grad_norm": 1.0898706001284595, + "learning_rate": 9.208382764599688e-07, + "loss": 0.7957059144973755, + "step": 4766 + }, + { + "epoch": 1.0983870967741935, + "grad_norm": 1.290224136897281, + "learning_rate": 9.204584939938761e-07, + "loss": 0.8943477272987366, + "step": 4767 + }, + { + "epoch": 1.0986175115207373, + "grad_norm": 1.0710230295284595, + "learning_rate": 9.200787230731273e-07, + "loss": 0.7084406018257141, + "step": 4768 + }, + { + "epoch": 1.098847926267281, + "grad_norm": 1.190836398847277, + "learning_rate": 9.196989637528465e-07, + "loss": 0.8374637365341187, + "step": 4769 + }, + { + "epoch": 1.099078341013825, + "grad_norm": 1.3757022429132086, + "learning_rate": 9.193192160881543e-07, + "loss": 0.6963578462600708, + "step": 4770 + }, + { + "epoch": 1.0993087557603687, + "grad_norm": 0.9887346096468936, + "learning_rate": 9.189394801341716e-07, + "loss": 0.6732540130615234, + "step": 4771 + }, + { + "epoch": 1.0995391705069124, + "grad_norm": 1.092710990198668, + "learning_rate": 9.185597559460159e-07, + "loss": 0.7104849219322205, + "step": 4772 + }, + { + "epoch": 1.0997695852534561, + "grad_norm": 1.3885045688613133, + "learning_rate": 9.181800435788037e-07, + "loss": 0.8461153507232666, + "step": 4773 + }, + { + "epoch": 1.1, + "grad_norm": 1.0447899457724443, + "learning_rate": 9.178003430876502e-07, + "loss": 0.7120847105979919, + "step": 4774 + }, + { + "epoch": 1.1002304147465438, + "grad_norm": 1.0881207229188647, + "learning_rate": 9.174206545276677e-07, + "loss": 0.8108617067337036, + "step": 4775 + }, + { + "epoch": 1.1004608294930875, + "grad_norm": 0.9153115264713604, + "learning_rate": 9.170409779539678e-07, + "loss": 0.7019558548927307, + "step": 4776 + }, + { + "epoch": 1.1006912442396313, + "grad_norm": 0.9272452690627847, + "learning_rate": 9.166613134216605e-07, + "loss": 0.7563629150390625, + "step": 4777 + }, + { + "epoch": 1.100921658986175, + "grad_norm": 0.9795708897837844, + "learning_rate": 9.162816609858533e-07, + "loss": 0.777009129524231, + "step": 4778 + }, + { + "epoch": 1.101152073732719, + "grad_norm": 1.143317572483065, + "learning_rate": 9.159020207016516e-07, + "loss": 0.812334418296814, + "step": 4779 + }, + { + "epoch": 1.1013824884792627, + "grad_norm": 0.8685579046345627, + "learning_rate": 9.155223926241608e-07, + "loss": 0.609114408493042, + "step": 4780 + }, + { + "epoch": 1.1016129032258064, + "grad_norm": 1.1689773804888128, + "learning_rate": 9.151427768084828e-07, + "loss": 0.8277549147605896, + "step": 4781 + }, + { + "epoch": 1.1018433179723501, + "grad_norm": 1.2556834532396843, + "learning_rate": 9.147631733097179e-07, + "loss": 0.8649400472640991, + "step": 4782 + }, + { + "epoch": 1.102073732718894, + "grad_norm": 0.8878271909604711, + "learning_rate": 9.14383582182966e-07, + "loss": 0.7894293665885925, + "step": 4783 + }, + { + "epoch": 1.1023041474654378, + "grad_norm": 1.3844953995401048, + "learning_rate": 9.14004003483324e-07, + "loss": 0.9121778011322021, + "step": 4784 + }, + { + "epoch": 1.1025345622119815, + "grad_norm": 1.0899535734318635, + "learning_rate": 9.136244372658867e-07, + "loss": 0.7162299156188965, + "step": 4785 + }, + { + "epoch": 1.1027649769585253, + "grad_norm": 1.1193596859001855, + "learning_rate": 9.132448835857482e-07, + "loss": 0.7059808969497681, + "step": 4786 + }, + { + "epoch": 1.1029953917050692, + "grad_norm": 1.2034226051758443, + "learning_rate": 9.128653424979999e-07, + "loss": 0.8172405958175659, + "step": 4787 + }, + { + "epoch": 1.103225806451613, + "grad_norm": 0.876114016677297, + "learning_rate": 9.124858140577316e-07, + "loss": 0.7672706842422485, + "step": 4788 + }, + { + "epoch": 1.1034562211981567, + "grad_norm": 1.2578760464526295, + "learning_rate": 9.121062983200318e-07, + "loss": 0.7054900527000427, + "step": 4789 + }, + { + "epoch": 1.1036866359447004, + "grad_norm": 1.0063162295686867, + "learning_rate": 9.117267953399865e-07, + "loss": 0.888538122177124, + "step": 4790 + }, + { + "epoch": 1.1039170506912441, + "grad_norm": 1.1758406583219614, + "learning_rate": 9.113473051726796e-07, + "loss": 0.7918668985366821, + "step": 4791 + }, + { + "epoch": 1.104147465437788, + "grad_norm": 1.220328177578168, + "learning_rate": 9.109678278731942e-07, + "loss": 0.7385697960853577, + "step": 4792 + }, + { + "epoch": 1.1043778801843318, + "grad_norm": 1.0627777124669568, + "learning_rate": 9.105883634966107e-07, + "loss": 0.6394056081771851, + "step": 4793 + }, + { + "epoch": 1.1046082949308755, + "grad_norm": 1.2147960582385422, + "learning_rate": 9.102089120980081e-07, + "loss": 0.8372077941894531, + "step": 4794 + }, + { + "epoch": 1.1048387096774193, + "grad_norm": 1.0764884273918471, + "learning_rate": 9.098294737324628e-07, + "loss": 0.6944066286087036, + "step": 4795 + }, + { + "epoch": 1.1050691244239632, + "grad_norm": 1.3210680270500303, + "learning_rate": 9.0945004845505e-07, + "loss": 0.8480994701385498, + "step": 4796 + }, + { + "epoch": 1.105299539170507, + "grad_norm": 1.3778825395187644, + "learning_rate": 9.090706363208431e-07, + "loss": 0.837437629699707, + "step": 4797 + }, + { + "epoch": 1.1055299539170507, + "grad_norm": 1.2126670676110476, + "learning_rate": 9.086912373849128e-07, + "loss": 0.8610002398490906, + "step": 4798 + }, + { + "epoch": 1.1057603686635944, + "grad_norm": 1.1204211704902753, + "learning_rate": 9.083118517023281e-07, + "loss": 0.7323784828186035, + "step": 4799 + }, + { + "epoch": 1.1059907834101383, + "grad_norm": 1.394483021595883, + "learning_rate": 9.079324793281573e-07, + "loss": 0.7838932871818542, + "step": 4800 + }, + { + "epoch": 1.106221198156682, + "grad_norm": 1.1333807320340106, + "learning_rate": 9.075531203174651e-07, + "loss": 0.7655705213546753, + "step": 4801 + }, + { + "epoch": 1.1064516129032258, + "grad_norm": 1.199812107745982, + "learning_rate": 9.071737747253148e-07, + "loss": 0.8320151567459106, + "step": 4802 + }, + { + "epoch": 1.1066820276497695, + "grad_norm": 1.0428789095876687, + "learning_rate": 9.067944426067687e-07, + "loss": 0.7434612512588501, + "step": 4803 + }, + { + "epoch": 1.1069124423963133, + "grad_norm": 1.348302596081637, + "learning_rate": 9.064151240168857e-07, + "loss": 0.8351321220397949, + "step": 4804 + }, + { + "epoch": 1.1071428571428572, + "grad_norm": 0.9731377071478325, + "learning_rate": 9.060358190107233e-07, + "loss": 0.6648053526878357, + "step": 4805 + }, + { + "epoch": 1.107373271889401, + "grad_norm": 1.236779616553706, + "learning_rate": 9.056565276433377e-07, + "loss": 0.7507585287094116, + "step": 4806 + }, + { + "epoch": 1.1076036866359447, + "grad_norm": 1.0866303306873377, + "learning_rate": 9.052772499697823e-07, + "loss": 0.7638635635375977, + "step": 4807 + }, + { + "epoch": 1.1078341013824884, + "grad_norm": 1.3204341922490346, + "learning_rate": 9.048979860451081e-07, + "loss": 0.8066626191139221, + "step": 4808 + }, + { + "epoch": 1.1080645161290323, + "grad_norm": 0.9459322006964221, + "learning_rate": 9.045187359243659e-07, + "loss": 0.7090466022491455, + "step": 4809 + }, + { + "epoch": 1.108294930875576, + "grad_norm": 1.1112578831827626, + "learning_rate": 9.041394996626027e-07, + "loss": 0.7071142792701721, + "step": 4810 + }, + { + "epoch": 1.1085253456221198, + "grad_norm": 1.0134445673972028, + "learning_rate": 9.037602773148638e-07, + "loss": 0.7103942036628723, + "step": 4811 + }, + { + "epoch": 1.1087557603686635, + "grad_norm": 1.1348721368793189, + "learning_rate": 9.033810689361936e-07, + "loss": 0.8408492207527161, + "step": 4812 + }, + { + "epoch": 1.1089861751152075, + "grad_norm": 0.9439878571651674, + "learning_rate": 9.030018745816335e-07, + "loss": 0.7621495723724365, + "step": 4813 + }, + { + "epoch": 1.1092165898617512, + "grad_norm": 1.152461687801826, + "learning_rate": 9.026226943062225e-07, + "loss": 0.7105196714401245, + "step": 4814 + }, + { + "epoch": 1.109447004608295, + "grad_norm": 1.079152769158689, + "learning_rate": 9.022435281649986e-07, + "loss": 0.8733636140823364, + "step": 4815 + }, + { + "epoch": 1.1096774193548387, + "grad_norm": 1.223534472251507, + "learning_rate": 9.018643762129974e-07, + "loss": 0.9097845554351807, + "step": 4816 + }, + { + "epoch": 1.1099078341013824, + "grad_norm": 1.2220607424054495, + "learning_rate": 9.014852385052519e-07, + "loss": 0.8743059635162354, + "step": 4817 + }, + { + "epoch": 1.1101382488479263, + "grad_norm": 1.0404677289419784, + "learning_rate": 9.011061150967937e-07, + "loss": 0.7898736000061035, + "step": 4818 + }, + { + "epoch": 1.11036866359447, + "grad_norm": 1.1698125073586854, + "learning_rate": 9.007270060426516e-07, + "loss": 0.871254563331604, + "step": 4819 + }, + { + "epoch": 1.1105990783410138, + "grad_norm": 1.323286168379092, + "learning_rate": 9.003479113978536e-07, + "loss": 0.6833579540252686, + "step": 4820 + }, + { + "epoch": 1.1108294930875575, + "grad_norm": 1.285642784687423, + "learning_rate": 8.999688312174243e-07, + "loss": 0.8289071321487427, + "step": 4821 + }, + { + "epoch": 1.1110599078341015, + "grad_norm": 1.1884737282905606, + "learning_rate": 8.995897655563864e-07, + "loss": 0.6798583269119263, + "step": 4822 + }, + { + "epoch": 1.1112903225806452, + "grad_norm": 1.1108358813410262, + "learning_rate": 8.992107144697614e-07, + "loss": 0.6518250703811646, + "step": 4823 + }, + { + "epoch": 1.111520737327189, + "grad_norm": 1.3596600109698966, + "learning_rate": 8.988316780125679e-07, + "loss": 0.9316667318344116, + "step": 4824 + }, + { + "epoch": 1.1117511520737327, + "grad_norm": 0.9951654747842746, + "learning_rate": 8.98452656239822e-07, + "loss": 0.755483865737915, + "step": 4825 + }, + { + "epoch": 1.1119815668202766, + "grad_norm": 1.0146600815927005, + "learning_rate": 8.980736492065391e-07, + "loss": 0.7892755270004272, + "step": 4826 + }, + { + "epoch": 1.1122119815668203, + "grad_norm": 0.9930161298314518, + "learning_rate": 8.976946569677308e-07, + "loss": 0.703255295753479, + "step": 4827 + }, + { + "epoch": 1.112442396313364, + "grad_norm": 1.1559327578235137, + "learning_rate": 8.973156795784073e-07, + "loss": 0.7885171175003052, + "step": 4828 + }, + { + "epoch": 1.1126728110599078, + "grad_norm": 1.1407519814570228, + "learning_rate": 8.969367170935776e-07, + "loss": 0.8035199642181396, + "step": 4829 + }, + { + "epoch": 1.1129032258064515, + "grad_norm": 1.0245821351407076, + "learning_rate": 8.965577695682467e-07, + "loss": 0.8272112607955933, + "step": 4830 + }, + { + "epoch": 1.1131336405529955, + "grad_norm": 1.1104598721433627, + "learning_rate": 8.961788370574182e-07, + "loss": 0.8734478950500488, + "step": 4831 + }, + { + "epoch": 1.1133640552995392, + "grad_norm": 1.2722110058519596, + "learning_rate": 8.957999196160946e-07, + "loss": 0.7487469911575317, + "step": 4832 + }, + { + "epoch": 1.113594470046083, + "grad_norm": 1.3783344397611896, + "learning_rate": 8.954210172992748e-07, + "loss": 0.9193693399429321, + "step": 4833 + }, + { + "epoch": 1.1138248847926266, + "grad_norm": 1.4522583636726432, + "learning_rate": 8.950421301619555e-07, + "loss": 0.8228428959846497, + "step": 4834 + }, + { + "epoch": 1.1140552995391706, + "grad_norm": 0.9646412535671615, + "learning_rate": 8.946632582591324e-07, + "loss": 0.7419015169143677, + "step": 4835 + }, + { + "epoch": 1.1142857142857143, + "grad_norm": 1.1957500872812925, + "learning_rate": 8.942844016457975e-07, + "loss": 0.827411949634552, + "step": 4836 + }, + { + "epoch": 1.114516129032258, + "grad_norm": 0.9975223373000859, + "learning_rate": 8.93905560376942e-07, + "loss": 0.7066754102706909, + "step": 4837 + }, + { + "epoch": 1.1147465437788018, + "grad_norm": 1.2336329306802043, + "learning_rate": 8.93526734507554e-07, + "loss": 0.7201621532440186, + "step": 4838 + }, + { + "epoch": 1.1149769585253457, + "grad_norm": 0.8521980282185057, + "learning_rate": 8.931479240926196e-07, + "loss": 0.6363521814346313, + "step": 4839 + }, + { + "epoch": 1.1152073732718895, + "grad_norm": 1.0065898101647581, + "learning_rate": 8.927691291871223e-07, + "loss": 0.8232909440994263, + "step": 4840 + }, + { + "epoch": 1.1154377880184332, + "grad_norm": 1.0354249430711853, + "learning_rate": 8.923903498460441e-07, + "loss": 0.7006033658981323, + "step": 4841 + }, + { + "epoch": 1.115668202764977, + "grad_norm": 1.1957171429651339, + "learning_rate": 8.920115861243638e-07, + "loss": 0.6982721090316772, + "step": 4842 + }, + { + "epoch": 1.1158986175115206, + "grad_norm": 1.039109039901578, + "learning_rate": 8.916328380770593e-07, + "loss": 0.7735922336578369, + "step": 4843 + }, + { + "epoch": 1.1161290322580646, + "grad_norm": 1.189307260310029, + "learning_rate": 8.912541057591049e-07, + "loss": 0.7430423498153687, + "step": 4844 + }, + { + "epoch": 1.1163594470046083, + "grad_norm": 1.0189703427385546, + "learning_rate": 8.908753892254729e-07, + "loss": 0.7783932685852051, + "step": 4845 + }, + { + "epoch": 1.116589861751152, + "grad_norm": 0.895546986970967, + "learning_rate": 8.904966885311339e-07, + "loss": 0.726211428642273, + "step": 4846 + }, + { + "epoch": 1.1168202764976958, + "grad_norm": 1.0042101088511581, + "learning_rate": 8.901180037310555e-07, + "loss": 0.664351761341095, + "step": 4847 + }, + { + "epoch": 1.1170506912442397, + "grad_norm": 1.192545271664204, + "learning_rate": 8.897393348802031e-07, + "loss": 0.8246554136276245, + "step": 4848 + }, + { + "epoch": 1.1172811059907835, + "grad_norm": 1.3113785088290244, + "learning_rate": 8.893606820335405e-07, + "loss": 0.9435447454452515, + "step": 4849 + }, + { + "epoch": 1.1175115207373272, + "grad_norm": 1.1196400925650334, + "learning_rate": 8.889820452460286e-07, + "loss": 0.8471171855926514, + "step": 4850 + }, + { + "epoch": 1.117741935483871, + "grad_norm": 0.9950597161448561, + "learning_rate": 8.886034245726254e-07, + "loss": 0.6038233041763306, + "step": 4851 + }, + { + "epoch": 1.1179723502304149, + "grad_norm": 1.1171540360532777, + "learning_rate": 8.882248200682881e-07, + "loss": 0.8186997771263123, + "step": 4852 + }, + { + "epoch": 1.1182027649769586, + "grad_norm": 1.2436642718372632, + "learning_rate": 8.878462317879702e-07, + "loss": 0.789948582649231, + "step": 4853 + }, + { + "epoch": 1.1184331797235023, + "grad_norm": 1.0789321556804603, + "learning_rate": 8.87467659786623e-07, + "loss": 0.7543652057647705, + "step": 4854 + }, + { + "epoch": 1.118663594470046, + "grad_norm": 1.0717127208024606, + "learning_rate": 8.870891041191963e-07, + "loss": 0.5985269546508789, + "step": 4855 + }, + { + "epoch": 1.1188940092165898, + "grad_norm": 1.109115113465042, + "learning_rate": 8.867105648406364e-07, + "loss": 0.7676643133163452, + "step": 4856 + }, + { + "epoch": 1.1191244239631337, + "grad_norm": 1.0078052507528568, + "learning_rate": 8.863320420058881e-07, + "loss": 0.7317303419113159, + "step": 4857 + }, + { + "epoch": 1.1193548387096774, + "grad_norm": 1.117240479042085, + "learning_rate": 8.859535356698936e-07, + "loss": 0.8357843160629272, + "step": 4858 + }, + { + "epoch": 1.1195852534562212, + "grad_norm": 1.2827717071860176, + "learning_rate": 8.855750458875923e-07, + "loss": 0.7149945497512817, + "step": 4859 + }, + { + "epoch": 1.119815668202765, + "grad_norm": 1.1258754685876486, + "learning_rate": 8.851965727139214e-07, + "loss": 0.7059169411659241, + "step": 4860 + }, + { + "epoch": 1.1200460829493089, + "grad_norm": 1.0779991100813224, + "learning_rate": 8.848181162038163e-07, + "loss": 0.7530190944671631, + "step": 4861 + }, + { + "epoch": 1.1202764976958526, + "grad_norm": 1.12578616970897, + "learning_rate": 8.844396764122092e-07, + "loss": 0.808814287185669, + "step": 4862 + }, + { + "epoch": 1.1205069124423963, + "grad_norm": 1.174668121226261, + "learning_rate": 8.840612533940295e-07, + "loss": 0.7205604910850525, + "step": 4863 + }, + { + "epoch": 1.12073732718894, + "grad_norm": 1.0284636891818573, + "learning_rate": 8.83682847204206e-07, + "loss": 0.7493274211883545, + "step": 4864 + }, + { + "epoch": 1.120967741935484, + "grad_norm": 1.1974475439930412, + "learning_rate": 8.833044578976631e-07, + "loss": 0.8115849494934082, + "step": 4865 + }, + { + "epoch": 1.1211981566820277, + "grad_norm": 1.2224514970634248, + "learning_rate": 8.829260855293237e-07, + "loss": 0.8188419342041016, + "step": 4866 + }, + { + "epoch": 1.1214285714285714, + "grad_norm": 1.372584236180193, + "learning_rate": 8.82547730154108e-07, + "loss": 0.6152349710464478, + "step": 4867 + }, + { + "epoch": 1.1216589861751152, + "grad_norm": 0.9364210771252817, + "learning_rate": 8.821693918269333e-07, + "loss": 0.7629969120025635, + "step": 4868 + }, + { + "epoch": 1.121889400921659, + "grad_norm": 1.0637191210851928, + "learning_rate": 8.81791070602716e-07, + "loss": 0.7063733339309692, + "step": 4869 + }, + { + "epoch": 1.1221198156682028, + "grad_norm": 1.2221996591019166, + "learning_rate": 8.814127665363682e-07, + "loss": 0.729676365852356, + "step": 4870 + }, + { + "epoch": 1.1223502304147466, + "grad_norm": 1.2363948838699006, + "learning_rate": 8.810344796827999e-07, + "loss": 0.8188877105712891, + "step": 4871 + }, + { + "epoch": 1.1225806451612903, + "grad_norm": 1.4364824515163135, + "learning_rate": 8.806562100969199e-07, + "loss": 0.70793217420578, + "step": 4872 + }, + { + "epoch": 1.122811059907834, + "grad_norm": 1.2471671753090219, + "learning_rate": 8.802779578336329e-07, + "loss": 0.8086484670639038, + "step": 4873 + }, + { + "epoch": 1.123041474654378, + "grad_norm": 1.209058465827679, + "learning_rate": 8.798997229478417e-07, + "loss": 0.8954081535339355, + "step": 4874 + }, + { + "epoch": 1.1232718894009217, + "grad_norm": 1.0352094557860352, + "learning_rate": 8.795215054944469e-07, + "loss": 0.6615205407142639, + "step": 4875 + }, + { + "epoch": 1.1235023041474654, + "grad_norm": 1.3182700744777898, + "learning_rate": 8.79143305528346e-07, + "loss": 0.6851116418838501, + "step": 4876 + }, + { + "epoch": 1.1237327188940092, + "grad_norm": 0.9311237252586447, + "learning_rate": 8.787651231044342e-07, + "loss": 0.7594672441482544, + "step": 4877 + }, + { + "epoch": 1.123963133640553, + "grad_norm": 1.2505187148095604, + "learning_rate": 8.783869582776044e-07, + "loss": 0.7170572280883789, + "step": 4878 + }, + { + "epoch": 1.1241935483870968, + "grad_norm": 1.1244851690255748, + "learning_rate": 8.780088111027467e-07, + "loss": 0.9139137864112854, + "step": 4879 + }, + { + "epoch": 1.1244239631336406, + "grad_norm": 1.2468380143920514, + "learning_rate": 8.776306816347482e-07, + "loss": 0.8716791868209839, + "step": 4880 + }, + { + "epoch": 1.1246543778801843, + "grad_norm": 1.5043743610246187, + "learning_rate": 8.772525699284946e-07, + "loss": 0.840330958366394, + "step": 4881 + }, + { + "epoch": 1.124884792626728, + "grad_norm": 1.28802116274467, + "learning_rate": 8.768744760388681e-07, + "loss": 0.7713445425033569, + "step": 4882 + }, + { + "epoch": 1.125115207373272, + "grad_norm": 1.2058132743835892, + "learning_rate": 8.764964000207479e-07, + "loss": 0.8964767456054688, + "step": 4883 + }, + { + "epoch": 1.1253456221198157, + "grad_norm": 1.12361515551762, + "learning_rate": 8.761183419290121e-07, + "loss": 0.8038421869277954, + "step": 4884 + }, + { + "epoch": 1.1255760368663594, + "grad_norm": 0.7722654284456119, + "learning_rate": 8.757403018185351e-07, + "loss": 0.6601011753082275, + "step": 4885 + }, + { + "epoch": 1.1258064516129032, + "grad_norm": 0.8011265369746955, + "learning_rate": 8.753622797441885e-07, + "loss": 0.8226664066314697, + "step": 4886 + }, + { + "epoch": 1.1260368663594469, + "grad_norm": 1.0633366554284305, + "learning_rate": 8.749842757608422e-07, + "loss": 0.7062248587608337, + "step": 4887 + }, + { + "epoch": 1.1262672811059908, + "grad_norm": 1.318395948514478, + "learning_rate": 8.746062899233628e-07, + "loss": 0.8642051815986633, + "step": 4888 + }, + { + "epoch": 1.1264976958525346, + "grad_norm": 1.2332349128972684, + "learning_rate": 8.74228322286614e-07, + "loss": 0.8194048404693604, + "step": 4889 + }, + { + "epoch": 1.1267281105990783, + "grad_norm": 1.121678775220638, + "learning_rate": 8.738503729054583e-07, + "loss": 0.6957820653915405, + "step": 4890 + }, + { + "epoch": 1.1269585253456222, + "grad_norm": 0.9775692035561586, + "learning_rate": 8.734724418347537e-07, + "loss": 0.8107770681381226, + "step": 4891 + }, + { + "epoch": 1.127188940092166, + "grad_norm": 1.1508754542191086, + "learning_rate": 8.730945291293563e-07, + "loss": 0.7727551460266113, + "step": 4892 + }, + { + "epoch": 1.1274193548387097, + "grad_norm": 1.1347047929449647, + "learning_rate": 8.727166348441207e-07, + "loss": 0.7389936447143555, + "step": 4893 + }, + { + "epoch": 1.1276497695852534, + "grad_norm": 1.2733389095695957, + "learning_rate": 8.723387590338964e-07, + "loss": 0.7666463851928711, + "step": 4894 + }, + { + "epoch": 1.1278801843317972, + "grad_norm": 1.1990629153183452, + "learning_rate": 8.719609017535328e-07, + "loss": 0.7795453071594238, + "step": 4895 + }, + { + "epoch": 1.128110599078341, + "grad_norm": 1.1062968437903737, + "learning_rate": 8.715830630578746e-07, + "loss": 0.8560752272605896, + "step": 4896 + }, + { + "epoch": 1.1283410138248848, + "grad_norm": 1.2251043883259816, + "learning_rate": 8.712052430017645e-07, + "loss": 0.7574455738067627, + "step": 4897 + }, + { + "epoch": 1.1285714285714286, + "grad_norm": 1.3025894471719623, + "learning_rate": 8.708274416400432e-07, + "loss": 0.8017276525497437, + "step": 4898 + }, + { + "epoch": 1.1288018433179723, + "grad_norm": 0.9942840399227726, + "learning_rate": 8.704496590275477e-07, + "loss": 0.7046157121658325, + "step": 4899 + }, + { + "epoch": 1.129032258064516, + "grad_norm": 1.187705347283351, + "learning_rate": 8.700718952191124e-07, + "loss": 0.7352035641670227, + "step": 4900 + }, + { + "epoch": 1.12926267281106, + "grad_norm": 0.9471130432852718, + "learning_rate": 8.696941502695698e-07, + "loss": 0.6444690227508545, + "step": 4901 + }, + { + "epoch": 1.1294930875576037, + "grad_norm": 1.0628821586759927, + "learning_rate": 8.69316424233749e-07, + "loss": 0.7909440994262695, + "step": 4902 + }, + { + "epoch": 1.1297235023041474, + "grad_norm": 0.9483928902743061, + "learning_rate": 8.689387171664756e-07, + "loss": 0.646790087223053, + "step": 4903 + }, + { + "epoch": 1.1299539170506911, + "grad_norm": 1.2796319408131067, + "learning_rate": 8.685610291225744e-07, + "loss": 0.786831796169281, + "step": 4904 + }, + { + "epoch": 1.130184331797235, + "grad_norm": 1.143272972798168, + "learning_rate": 8.681833601568657e-07, + "loss": 0.8004348278045654, + "step": 4905 + }, + { + "epoch": 1.1304147465437788, + "grad_norm": 0.996600703731369, + "learning_rate": 8.678057103241677e-07, + "loss": 0.6846532821655273, + "step": 4906 + }, + { + "epoch": 1.1306451612903226, + "grad_norm": 1.299426572962062, + "learning_rate": 8.67428079679296e-07, + "loss": 0.7555707693099976, + "step": 4907 + }, + { + "epoch": 1.1308755760368663, + "grad_norm": 1.3809719247833205, + "learning_rate": 8.67050468277063e-07, + "loss": 0.852725625038147, + "step": 4908 + }, + { + "epoch": 1.1311059907834102, + "grad_norm": 0.9844151846464619, + "learning_rate": 8.666728761722782e-07, + "loss": 0.6990044713020325, + "step": 4909 + }, + { + "epoch": 1.131336405529954, + "grad_norm": 1.223366973696945, + "learning_rate": 8.662953034197493e-07, + "loss": 0.8050999641418457, + "step": 4910 + }, + { + "epoch": 1.1315668202764977, + "grad_norm": 1.3085197840977536, + "learning_rate": 8.659177500742802e-07, + "loss": 0.8169291019439697, + "step": 4911 + }, + { + "epoch": 1.1317972350230414, + "grad_norm": 1.081294035300873, + "learning_rate": 8.655402161906716e-07, + "loss": 0.7814679145812988, + "step": 4912 + }, + { + "epoch": 1.1320276497695851, + "grad_norm": 1.237970773045493, + "learning_rate": 8.651627018237231e-07, + "loss": 0.6734834313392639, + "step": 4913 + }, + { + "epoch": 1.132258064516129, + "grad_norm": 1.1143770605215586, + "learning_rate": 8.647852070282299e-07, + "loss": 0.8765416145324707, + "step": 4914 + }, + { + "epoch": 1.1324884792626728, + "grad_norm": 1.3797966848789986, + "learning_rate": 8.644077318589847e-07, + "loss": 1.0023764371871948, + "step": 4915 + }, + { + "epoch": 1.1327188940092165, + "grad_norm": 1.0387287080137257, + "learning_rate": 8.64030276370778e-07, + "loss": 0.7561393976211548, + "step": 4916 + }, + { + "epoch": 1.1329493087557603, + "grad_norm": 1.123376400728965, + "learning_rate": 8.636528406183961e-07, + "loss": 0.8252062797546387, + "step": 4917 + }, + { + "epoch": 1.1331797235023042, + "grad_norm": 1.3939443114820729, + "learning_rate": 8.632754246566246e-07, + "loss": 0.7598097324371338, + "step": 4918 + }, + { + "epoch": 1.133410138248848, + "grad_norm": 0.8823184534346743, + "learning_rate": 8.628980285402438e-07, + "loss": 0.6113640069961548, + "step": 4919 + }, + { + "epoch": 1.1336405529953917, + "grad_norm": 1.096652563873467, + "learning_rate": 8.625206523240325e-07, + "loss": 0.7457853555679321, + "step": 4920 + }, + { + "epoch": 1.1338709677419354, + "grad_norm": 1.0304826450193199, + "learning_rate": 8.62143296062767e-07, + "loss": 0.7334161996841431, + "step": 4921 + }, + { + "epoch": 1.1341013824884794, + "grad_norm": 1.1383631487720753, + "learning_rate": 8.617659598112195e-07, + "loss": 0.7446962594985962, + "step": 4922 + }, + { + "epoch": 1.134331797235023, + "grad_norm": 0.9360514056176105, + "learning_rate": 8.613886436241594e-07, + "loss": 0.7074497938156128, + "step": 4923 + }, + { + "epoch": 1.1345622119815668, + "grad_norm": 0.9945384740922374, + "learning_rate": 8.610113475563547e-07, + "loss": 0.6728851795196533, + "step": 4924 + }, + { + "epoch": 1.1347926267281105, + "grad_norm": 1.0533766436674836, + "learning_rate": 8.606340716625689e-07, + "loss": 0.7732793092727661, + "step": 4925 + }, + { + "epoch": 1.1350230414746543, + "grad_norm": 1.2301857240081557, + "learning_rate": 8.60256815997563e-07, + "loss": 0.7514671683311462, + "step": 4926 + }, + { + "epoch": 1.1352534562211982, + "grad_norm": 1.2507291163181513, + "learning_rate": 8.598795806160952e-07, + "loss": 0.7824795842170715, + "step": 4927 + }, + { + "epoch": 1.135483870967742, + "grad_norm": 1.1585997268920079, + "learning_rate": 8.59502365572921e-07, + "loss": 0.789236307144165, + "step": 4928 + }, + { + "epoch": 1.1357142857142857, + "grad_norm": 1.1796078109098491, + "learning_rate": 8.591251709227919e-07, + "loss": 0.7005175948143005, + "step": 4929 + }, + { + "epoch": 1.1359447004608294, + "grad_norm": 1.2299124062921447, + "learning_rate": 8.587479967204582e-07, + "loss": 0.7851300239562988, + "step": 4930 + }, + { + "epoch": 1.1361751152073734, + "grad_norm": 1.5129438725714193, + "learning_rate": 8.583708430206658e-07, + "loss": 0.8901405334472656, + "step": 4931 + }, + { + "epoch": 1.136405529953917, + "grad_norm": 1.1049343524856345, + "learning_rate": 8.579937098781576e-07, + "loss": 0.8118528127670288, + "step": 4932 + }, + { + "epoch": 1.1366359447004608, + "grad_norm": 1.0631974751851168, + "learning_rate": 8.57616597347675e-07, + "loss": 0.6500028371810913, + "step": 4933 + }, + { + "epoch": 1.1368663594470045, + "grad_norm": 1.057066415615051, + "learning_rate": 8.572395054839547e-07, + "loss": 0.7752922773361206, + "step": 4934 + }, + { + "epoch": 1.1370967741935485, + "grad_norm": 1.124364781444334, + "learning_rate": 8.568624343417309e-07, + "loss": 0.7346245050430298, + "step": 4935 + }, + { + "epoch": 1.1373271889400922, + "grad_norm": 1.4547001781507483, + "learning_rate": 8.564853839757356e-07, + "loss": 0.9249104261398315, + "step": 4936 + }, + { + "epoch": 1.137557603686636, + "grad_norm": 1.0350864816884677, + "learning_rate": 8.561083544406965e-07, + "loss": 0.7407078742980957, + "step": 4937 + }, + { + "epoch": 1.1377880184331797, + "grad_norm": 1.197156559440129, + "learning_rate": 8.557313457913393e-07, + "loss": 0.7615865468978882, + "step": 4938 + }, + { + "epoch": 1.1380184331797234, + "grad_norm": 1.2125718427071739, + "learning_rate": 8.553543580823866e-07, + "loss": 0.757561445236206, + "step": 4939 + }, + { + "epoch": 1.1382488479262673, + "grad_norm": 1.1468001082336654, + "learning_rate": 8.549773913685572e-07, + "loss": 0.7130411863327026, + "step": 4940 + }, + { + "epoch": 1.138479262672811, + "grad_norm": 1.1282357144069963, + "learning_rate": 8.54600445704567e-07, + "loss": 0.7507551312446594, + "step": 4941 + }, + { + "epoch": 1.1387096774193548, + "grad_norm": 1.0556143227749322, + "learning_rate": 8.542235211451301e-07, + "loss": 0.896443247795105, + "step": 4942 + }, + { + "epoch": 1.1389400921658985, + "grad_norm": 1.145222677509159, + "learning_rate": 8.538466177449557e-07, + "loss": 0.7530815601348877, + "step": 4943 + }, + { + "epoch": 1.1391705069124425, + "grad_norm": 1.2481258172783056, + "learning_rate": 8.534697355587517e-07, + "loss": 0.8730431795120239, + "step": 4944 + }, + { + "epoch": 1.1394009216589862, + "grad_norm": 1.3010516024158107, + "learning_rate": 8.530928746412216e-07, + "loss": 0.6452720165252686, + "step": 4945 + }, + { + "epoch": 1.13963133640553, + "grad_norm": 1.1712957128451178, + "learning_rate": 8.527160350470661e-07, + "loss": 0.7679018974304199, + "step": 4946 + }, + { + "epoch": 1.1398617511520737, + "grad_norm": 1.402874429077297, + "learning_rate": 8.523392168309832e-07, + "loss": 0.8186824321746826, + "step": 4947 + }, + { + "epoch": 1.1400921658986176, + "grad_norm": 1.1669467278440648, + "learning_rate": 8.519624200476676e-07, + "loss": 0.666642427444458, + "step": 4948 + }, + { + "epoch": 1.1403225806451613, + "grad_norm": 1.0160881327834055, + "learning_rate": 8.515856447518104e-07, + "loss": 0.7478682994842529, + "step": 4949 + }, + { + "epoch": 1.140552995391705, + "grad_norm": 1.2340329971083113, + "learning_rate": 8.512088909981007e-07, + "loss": 0.7527793645858765, + "step": 4950 + }, + { + "epoch": 1.1407834101382488, + "grad_norm": 1.136863530366948, + "learning_rate": 8.508321588412235e-07, + "loss": 0.7614094018936157, + "step": 4951 + }, + { + "epoch": 1.1410138248847925, + "grad_norm": 1.2371366016065355, + "learning_rate": 8.504554483358605e-07, + "loss": 0.8294994831085205, + "step": 4952 + }, + { + "epoch": 1.1412442396313365, + "grad_norm": 1.4759487382386114, + "learning_rate": 8.500787595366919e-07, + "loss": 0.8900095224380493, + "step": 4953 + }, + { + "epoch": 1.1414746543778802, + "grad_norm": 1.0721192735972314, + "learning_rate": 8.497020924983926e-07, + "loss": 0.8403744697570801, + "step": 4954 + }, + { + "epoch": 1.141705069124424, + "grad_norm": 1.0449510164412683, + "learning_rate": 8.493254472756355e-07, + "loss": 0.7046208381652832, + "step": 4955 + }, + { + "epoch": 1.1419354838709677, + "grad_norm": 1.3018714779233174, + "learning_rate": 8.489488239230904e-07, + "loss": 0.8226789832115173, + "step": 4956 + }, + { + "epoch": 1.1421658986175116, + "grad_norm": 1.058902427650911, + "learning_rate": 8.485722224954236e-07, + "loss": 0.7248969674110413, + "step": 4957 + }, + { + "epoch": 1.1423963133640553, + "grad_norm": 1.1327549620980084, + "learning_rate": 8.481956430472979e-07, + "loss": 0.8116840124130249, + "step": 4958 + }, + { + "epoch": 1.142626728110599, + "grad_norm": 1.062622286893391, + "learning_rate": 8.478190856333739e-07, + "loss": 0.7534138560295105, + "step": 4959 + }, + { + "epoch": 1.1428571428571428, + "grad_norm": 1.3427980825750856, + "learning_rate": 8.474425503083082e-07, + "loss": 0.8945306539535522, + "step": 4960 + }, + { + "epoch": 1.1430875576036867, + "grad_norm": 1.1592346473165394, + "learning_rate": 8.47066037126754e-07, + "loss": 0.7554503083229065, + "step": 4961 + }, + { + "epoch": 1.1433179723502305, + "grad_norm": 1.4596388821753403, + "learning_rate": 8.466895461433625e-07, + "loss": 0.832726776599884, + "step": 4962 + }, + { + "epoch": 1.1435483870967742, + "grad_norm": 1.250046955776058, + "learning_rate": 8.463130774127804e-07, + "loss": 0.8312773704528809, + "step": 4963 + }, + { + "epoch": 1.143778801843318, + "grad_norm": 0.9153601791246997, + "learning_rate": 8.459366309896512e-07, + "loss": 0.6484537124633789, + "step": 4964 + }, + { + "epoch": 1.1440092165898617, + "grad_norm": 1.2863432770713337, + "learning_rate": 8.455602069286165e-07, + "loss": 0.9216604828834534, + "step": 4965 + }, + { + "epoch": 1.1442396313364056, + "grad_norm": 1.134985678431753, + "learning_rate": 8.451838052843131e-07, + "loss": 0.6213096380233765, + "step": 4966 + }, + { + "epoch": 1.1444700460829493, + "grad_norm": 0.9562822723791001, + "learning_rate": 8.448074261113756e-07, + "loss": 0.6873677968978882, + "step": 4967 + }, + { + "epoch": 1.144700460829493, + "grad_norm": 1.215560824144924, + "learning_rate": 8.444310694644348e-07, + "loss": 0.7883448600769043, + "step": 4968 + }, + { + "epoch": 1.1449308755760368, + "grad_norm": 1.1944176371651494, + "learning_rate": 8.440547353981178e-07, + "loss": 0.724172830581665, + "step": 4969 + }, + { + "epoch": 1.1451612903225807, + "grad_norm": 1.0792006702141475, + "learning_rate": 8.4367842396705e-07, + "loss": 0.7115252017974854, + "step": 4970 + }, + { + "epoch": 1.1453917050691245, + "grad_norm": 1.0823773323138404, + "learning_rate": 8.433021352258521e-07, + "loss": 0.7165110111236572, + "step": 4971 + }, + { + "epoch": 1.1456221198156682, + "grad_norm": 1.0874360604645514, + "learning_rate": 8.429258692291413e-07, + "loss": 0.7563315629959106, + "step": 4972 + }, + { + "epoch": 1.145852534562212, + "grad_norm": 1.1334099478279698, + "learning_rate": 8.425496260315331e-07, + "loss": 0.7528449892997742, + "step": 4973 + }, + { + "epoch": 1.1460829493087559, + "grad_norm": 1.1141426795021205, + "learning_rate": 8.421734056876383e-07, + "loss": 0.7976171970367432, + "step": 4974 + }, + { + "epoch": 1.1463133640552996, + "grad_norm": 1.020985144100356, + "learning_rate": 8.417972082520644e-07, + "loss": 0.7498095035552979, + "step": 4975 + }, + { + "epoch": 1.1465437788018433, + "grad_norm": 1.3446642320448154, + "learning_rate": 8.414210337794165e-07, + "loss": 0.9568856954574585, + "step": 4976 + }, + { + "epoch": 1.146774193548387, + "grad_norm": 0.9499457055768262, + "learning_rate": 8.410448823242957e-07, + "loss": 0.6402908563613892, + "step": 4977 + }, + { + "epoch": 1.1470046082949308, + "grad_norm": 1.1759709167305108, + "learning_rate": 8.406687539412995e-07, + "loss": 0.8224657773971558, + "step": 4978 + }, + { + "epoch": 1.1472350230414747, + "grad_norm": 1.2886598107348421, + "learning_rate": 8.402926486850229e-07, + "loss": 0.7804544568061829, + "step": 4979 + }, + { + "epoch": 1.1474654377880185, + "grad_norm": 1.1861127295236977, + "learning_rate": 8.39916566610057e-07, + "loss": 0.7920527458190918, + "step": 4980 + }, + { + "epoch": 1.1476958525345622, + "grad_norm": 1.1244888328051699, + "learning_rate": 8.395405077709891e-07, + "loss": 0.7672078609466553, + "step": 4981 + }, + { + "epoch": 1.147926267281106, + "grad_norm": 1.2427545332028853, + "learning_rate": 8.391644722224047e-07, + "loss": 0.6997950077056885, + "step": 4982 + }, + { + "epoch": 1.1481566820276499, + "grad_norm": 1.057637628401912, + "learning_rate": 8.38788460018884e-07, + "loss": 0.7754349708557129, + "step": 4983 + }, + { + "epoch": 1.1483870967741936, + "grad_norm": 1.1458978330134115, + "learning_rate": 8.384124712150046e-07, + "loss": 0.706238329410553, + "step": 4984 + }, + { + "epoch": 1.1486175115207373, + "grad_norm": 0.8874927618348325, + "learning_rate": 8.380365058653415e-07, + "loss": 0.7115224599838257, + "step": 4985 + }, + { + "epoch": 1.148847926267281, + "grad_norm": 1.349182229007694, + "learning_rate": 8.376605640244652e-07, + "loss": 0.9026098847389221, + "step": 4986 + }, + { + "epoch": 1.149078341013825, + "grad_norm": 1.359066441839043, + "learning_rate": 8.372846457469428e-07, + "loss": 0.9123632311820984, + "step": 4987 + }, + { + "epoch": 1.1493087557603687, + "grad_norm": 1.1389830084868187, + "learning_rate": 8.369087510873389e-07, + "loss": 0.8365681171417236, + "step": 4988 + }, + { + "epoch": 1.1495391705069125, + "grad_norm": 1.1572327597453433, + "learning_rate": 8.36532880100214e-07, + "loss": 0.7506389617919922, + "step": 4989 + }, + { + "epoch": 1.1497695852534562, + "grad_norm": 1.1932866122784214, + "learning_rate": 8.361570328401246e-07, + "loss": 0.7736936807632446, + "step": 4990 + }, + { + "epoch": 1.15, + "grad_norm": 1.0939095427412457, + "learning_rate": 8.357812093616254e-07, + "loss": 0.7364238500595093, + "step": 4991 + }, + { + "epoch": 1.1502304147465439, + "grad_norm": 1.154457809524142, + "learning_rate": 8.354054097192659e-07, + "loss": 0.8588067293167114, + "step": 4992 + }, + { + "epoch": 1.1504608294930876, + "grad_norm": 1.0040260335609983, + "learning_rate": 8.350296339675938e-07, + "loss": 0.777319073677063, + "step": 4993 + }, + { + "epoch": 1.1506912442396313, + "grad_norm": 1.2472613338245313, + "learning_rate": 8.346538821611517e-07, + "loss": 0.6695454716682434, + "step": 4994 + }, + { + "epoch": 1.150921658986175, + "grad_norm": 1.1333204343634593, + "learning_rate": 8.342781543544796e-07, + "loss": 0.7785383462905884, + "step": 4995 + }, + { + "epoch": 1.1511520737327188, + "grad_norm": 1.2063502081148214, + "learning_rate": 8.339024506021143e-07, + "loss": 0.7386239767074585, + "step": 4996 + }, + { + "epoch": 1.1513824884792627, + "grad_norm": 1.015973129089863, + "learning_rate": 8.335267709585884e-07, + "loss": 0.8044750690460205, + "step": 4997 + }, + { + "epoch": 1.1516129032258065, + "grad_norm": 0.991689333823338, + "learning_rate": 8.331511154784307e-07, + "loss": 0.6925652623176575, + "step": 4998 + }, + { + "epoch": 1.1518433179723502, + "grad_norm": 1.1362021503644928, + "learning_rate": 8.327754842161684e-07, + "loss": 0.7906935214996338, + "step": 4999 + }, + { + "epoch": 1.1520737327188941, + "grad_norm": 1.0865966340855062, + "learning_rate": 8.323998772263231e-07, + "loss": 0.7131960988044739, + "step": 5000 + }, + { + "epoch": 1.1523041474654379, + "grad_norm": 1.0459163670419733, + "learning_rate": 8.320242945634132e-07, + "loss": 0.8412370085716248, + "step": 5001 + }, + { + "epoch": 1.1525345622119816, + "grad_norm": 1.219248495471204, + "learning_rate": 8.316487362819551e-07, + "loss": 0.7800952792167664, + "step": 5002 + }, + { + "epoch": 1.1527649769585253, + "grad_norm": 1.2269188284281454, + "learning_rate": 8.312732024364602e-07, + "loss": 0.8620247840881348, + "step": 5003 + }, + { + "epoch": 1.152995391705069, + "grad_norm": 1.1576962368399284, + "learning_rate": 8.30897693081436e-07, + "loss": 0.7551721334457397, + "step": 5004 + }, + { + "epoch": 1.153225806451613, + "grad_norm": 1.1081098689134552, + "learning_rate": 8.305222082713882e-07, + "loss": 0.8510593175888062, + "step": 5005 + }, + { + "epoch": 1.1534562211981567, + "grad_norm": 1.0356186889640762, + "learning_rate": 8.301467480608176e-07, + "loss": 0.6503845453262329, + "step": 5006 + }, + { + "epoch": 1.1536866359447004, + "grad_norm": 1.1593829978588668, + "learning_rate": 8.297713125042212e-07, + "loss": 0.7729237079620361, + "step": 5007 + }, + { + "epoch": 1.1539170506912442, + "grad_norm": 1.0812796919286354, + "learning_rate": 8.293959016560939e-07, + "loss": 0.77802574634552, + "step": 5008 + }, + { + "epoch": 1.154147465437788, + "grad_norm": 0.9915519400035699, + "learning_rate": 8.290205155709256e-07, + "loss": 0.7977825999259949, + "step": 5009 + }, + { + "epoch": 1.1543778801843319, + "grad_norm": 1.1128731733324948, + "learning_rate": 8.286451543032027e-07, + "loss": 0.7479745149612427, + "step": 5010 + }, + { + "epoch": 1.1546082949308756, + "grad_norm": 1.0554376798438097, + "learning_rate": 8.282698179074092e-07, + "loss": 0.7631532549858093, + "step": 5011 + }, + { + "epoch": 1.1548387096774193, + "grad_norm": 1.1424098237872247, + "learning_rate": 8.278945064380243e-07, + "loss": 0.7437061071395874, + "step": 5012 + }, + { + "epoch": 1.1550691244239633, + "grad_norm": 1.2208599961881346, + "learning_rate": 8.275192199495236e-07, + "loss": 0.9334282875061035, + "step": 5013 + }, + { + "epoch": 1.155299539170507, + "grad_norm": 1.1846438304674103, + "learning_rate": 8.2714395849638e-07, + "loss": 0.7119227647781372, + "step": 5014 + }, + { + "epoch": 1.1555299539170507, + "grad_norm": 1.202224273678675, + "learning_rate": 8.267687221330619e-07, + "loss": 0.8335816860198975, + "step": 5015 + }, + { + "epoch": 1.1557603686635944, + "grad_norm": 1.290989413518125, + "learning_rate": 8.263935109140347e-07, + "loss": 0.6130940914154053, + "step": 5016 + }, + { + "epoch": 1.1559907834101382, + "grad_norm": 1.1118999574659398, + "learning_rate": 8.260183248937595e-07, + "loss": 0.8223903179168701, + "step": 5017 + }, + { + "epoch": 1.1562211981566821, + "grad_norm": 1.1042026567968168, + "learning_rate": 8.256431641266938e-07, + "loss": 0.8024790287017822, + "step": 5018 + }, + { + "epoch": 1.1564516129032258, + "grad_norm": 1.2308316211864536, + "learning_rate": 8.252680286672924e-07, + "loss": 0.7425345182418823, + "step": 5019 + }, + { + "epoch": 1.1566820276497696, + "grad_norm": 0.9907420981370885, + "learning_rate": 8.248929185700053e-07, + "loss": 0.7729727029800415, + "step": 5020 + }, + { + "epoch": 1.1569124423963133, + "grad_norm": 1.096476255015683, + "learning_rate": 8.245178338892788e-07, + "loss": 0.8451874256134033, + "step": 5021 + }, + { + "epoch": 1.157142857142857, + "grad_norm": 1.1584589365926052, + "learning_rate": 8.241427746795569e-07, + "loss": 0.8666542768478394, + "step": 5022 + }, + { + "epoch": 1.157373271889401, + "grad_norm": 1.2897904410488261, + "learning_rate": 8.237677409952784e-07, + "loss": 0.740352988243103, + "step": 5023 + }, + { + "epoch": 1.1576036866359447, + "grad_norm": 0.9937724952342799, + "learning_rate": 8.233927328908788e-07, + "loss": 0.6325985193252563, + "step": 5024 + }, + { + "epoch": 1.1578341013824884, + "grad_norm": 1.0099472902179978, + "learning_rate": 8.230177504207901e-07, + "loss": 0.8075892925262451, + "step": 5025 + }, + { + "epoch": 1.1580645161290322, + "grad_norm": 1.0459718249244707, + "learning_rate": 8.22642793639441e-07, + "loss": 0.7176432609558105, + "step": 5026 + }, + { + "epoch": 1.1582949308755761, + "grad_norm": 1.1804726429614583, + "learning_rate": 8.222678626012554e-07, + "loss": 0.7734829187393188, + "step": 5027 + }, + { + "epoch": 1.1585253456221198, + "grad_norm": 1.3220222245590558, + "learning_rate": 8.218929573606544e-07, + "loss": 0.8642655611038208, + "step": 5028 + }, + { + "epoch": 1.1587557603686636, + "grad_norm": 1.0337487495481472, + "learning_rate": 8.215180779720548e-07, + "loss": 0.7788450121879578, + "step": 5029 + }, + { + "epoch": 1.1589861751152073, + "grad_norm": 0.9361659768144168, + "learning_rate": 8.211432244898696e-07, + "loss": 0.7470313310623169, + "step": 5030 + }, + { + "epoch": 1.1592165898617512, + "grad_norm": 0.9907043815397547, + "learning_rate": 8.207683969685091e-07, + "loss": 0.7691675424575806, + "step": 5031 + }, + { + "epoch": 1.159447004608295, + "grad_norm": 0.9920310393320094, + "learning_rate": 8.203935954623783e-07, + "loss": 0.7060209512710571, + "step": 5032 + }, + { + "epoch": 1.1596774193548387, + "grad_norm": 1.189958639239752, + "learning_rate": 8.20018820025879e-07, + "loss": 0.7617488503456116, + "step": 5033 + }, + { + "epoch": 1.1599078341013824, + "grad_norm": 1.2174023482004634, + "learning_rate": 8.196440707134102e-07, + "loss": 0.7016350626945496, + "step": 5034 + }, + { + "epoch": 1.1601382488479262, + "grad_norm": 1.3407340114210469, + "learning_rate": 8.192693475793657e-07, + "loss": 0.8375445604324341, + "step": 5035 + }, + { + "epoch": 1.16036866359447, + "grad_norm": 1.2333127293881232, + "learning_rate": 8.188946506781359e-07, + "loss": 0.8903663158416748, + "step": 5036 + }, + { + "epoch": 1.1605990783410138, + "grad_norm": 1.1046448662682735, + "learning_rate": 8.18519980064108e-07, + "loss": 0.7613073587417603, + "step": 5037 + }, + { + "epoch": 1.1608294930875576, + "grad_norm": 1.2358045096315418, + "learning_rate": 8.181453357916649e-07, + "loss": 0.7443521022796631, + "step": 5038 + }, + { + "epoch": 1.1610599078341013, + "grad_norm": 1.0132222940739166, + "learning_rate": 8.17770717915185e-07, + "loss": 0.7986443042755127, + "step": 5039 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 1.1475221794766963, + "learning_rate": 8.173961264890447e-07, + "loss": 0.7128815650939941, + "step": 5040 + }, + { + "epoch": 1.161520737327189, + "grad_norm": 2.1353174029488593, + "learning_rate": 8.170215615676144e-07, + "loss": 0.7189117074012756, + "step": 5041 + }, + { + "epoch": 1.1617511520737327, + "grad_norm": 1.0970239097626442, + "learning_rate": 8.166470232052626e-07, + "loss": 0.8358731269836426, + "step": 5042 + }, + { + "epoch": 1.1619815668202764, + "grad_norm": 1.3103703595946257, + "learning_rate": 8.162725114563527e-07, + "loss": 0.7734829187393188, + "step": 5043 + }, + { + "epoch": 1.1622119815668204, + "grad_norm": 1.0836793655881298, + "learning_rate": 8.158980263752443e-07, + "loss": 0.842268705368042, + "step": 5044 + }, + { + "epoch": 1.162442396313364, + "grad_norm": 1.0953254817646525, + "learning_rate": 8.155235680162937e-07, + "loss": 0.7973036766052246, + "step": 5045 + }, + { + "epoch": 1.1626728110599078, + "grad_norm": 1.1431491680692596, + "learning_rate": 8.151491364338532e-07, + "loss": 0.743615984916687, + "step": 5046 + }, + { + "epoch": 1.1629032258064516, + "grad_norm": 1.2354800674331334, + "learning_rate": 8.147747316822705e-07, + "loss": 0.799458384513855, + "step": 5047 + }, + { + "epoch": 1.1631336405529953, + "grad_norm": 1.4365906916451476, + "learning_rate": 8.144003538158907e-07, + "loss": 0.8368128538131714, + "step": 5048 + }, + { + "epoch": 1.1633640552995392, + "grad_norm": 1.0543438991079201, + "learning_rate": 8.140260028890537e-07, + "loss": 0.8543322086334229, + "step": 5049 + }, + { + "epoch": 1.163594470046083, + "grad_norm": 1.4010693577495907, + "learning_rate": 8.136516789560957e-07, + "loss": 0.9586522579193115, + "step": 5050 + }, + { + "epoch": 1.1638248847926267, + "grad_norm": 1.0831898931931903, + "learning_rate": 8.132773820713505e-07, + "loss": 0.7781316041946411, + "step": 5051 + }, + { + "epoch": 1.1640552995391704, + "grad_norm": 1.1820241176000723, + "learning_rate": 8.129031122891459e-07, + "loss": 0.7726340293884277, + "step": 5052 + }, + { + "epoch": 1.1642857142857144, + "grad_norm": 1.2561245635498344, + "learning_rate": 8.125288696638064e-07, + "loss": 0.886093258857727, + "step": 5053 + }, + { + "epoch": 1.164516129032258, + "grad_norm": 1.1568232893052595, + "learning_rate": 8.121546542496538e-07, + "loss": 0.7896960973739624, + "step": 5054 + }, + { + "epoch": 1.1647465437788018, + "grad_norm": 1.066019166680275, + "learning_rate": 8.117804661010045e-07, + "loss": 0.8272452354431152, + "step": 5055 + }, + { + "epoch": 1.1649769585253456, + "grad_norm": 1.216096321256879, + "learning_rate": 8.11406305272171e-07, + "loss": 0.8452264070510864, + "step": 5056 + }, + { + "epoch": 1.1652073732718895, + "grad_norm": 1.1423033593169452, + "learning_rate": 8.11032171817463e-07, + "loss": 0.7973369359970093, + "step": 5057 + }, + { + "epoch": 1.1654377880184332, + "grad_norm": 0.9573952961126706, + "learning_rate": 8.10658065791185e-07, + "loss": 0.8045153617858887, + "step": 5058 + }, + { + "epoch": 1.165668202764977, + "grad_norm": 1.2070626820317865, + "learning_rate": 8.102839872476378e-07, + "loss": 0.8921254873275757, + "step": 5059 + }, + { + "epoch": 1.1658986175115207, + "grad_norm": 1.1196640968944265, + "learning_rate": 8.099099362411191e-07, + "loss": 0.7633669376373291, + "step": 5060 + }, + { + "epoch": 1.1661290322580644, + "grad_norm": 1.4676357149183228, + "learning_rate": 8.095359128259214e-07, + "loss": 0.9303205013275146, + "step": 5061 + }, + { + "epoch": 1.1663594470046084, + "grad_norm": 1.1532839170590041, + "learning_rate": 8.091619170563335e-07, + "loss": 0.867104709148407, + "step": 5062 + }, + { + "epoch": 1.166589861751152, + "grad_norm": 1.2071495700843942, + "learning_rate": 8.087879489866409e-07, + "loss": 0.8136844038963318, + "step": 5063 + }, + { + "epoch": 1.1668202764976958, + "grad_norm": 1.5482117252744063, + "learning_rate": 8.084140086711246e-07, + "loss": 0.9016939997673035, + "step": 5064 + }, + { + "epoch": 1.1670506912442395, + "grad_norm": 1.5795186850129557, + "learning_rate": 8.080400961640608e-07, + "loss": 0.8621236085891724, + "step": 5065 + }, + { + "epoch": 1.1672811059907835, + "grad_norm": 1.336449231038986, + "learning_rate": 8.076662115197234e-07, + "loss": 0.856648862361908, + "step": 5066 + }, + { + "epoch": 1.1675115207373272, + "grad_norm": 1.3107118910408024, + "learning_rate": 8.072923547923805e-07, + "loss": 0.7752784490585327, + "step": 5067 + }, + { + "epoch": 1.167741935483871, + "grad_norm": 1.3093385224686542, + "learning_rate": 8.069185260362974e-07, + "loss": 0.8573904037475586, + "step": 5068 + }, + { + "epoch": 1.1679723502304147, + "grad_norm": 1.1636599679682322, + "learning_rate": 8.065447253057347e-07, + "loss": 0.724372148513794, + "step": 5069 + }, + { + "epoch": 1.1682027649769586, + "grad_norm": 1.146758460237727, + "learning_rate": 8.061709526549486e-07, + "loss": 0.7428436875343323, + "step": 5070 + }, + { + "epoch": 1.1684331797235024, + "grad_norm": 1.273017047999111, + "learning_rate": 8.057972081381925e-07, + "loss": 0.8888595104217529, + "step": 5071 + }, + { + "epoch": 1.168663594470046, + "grad_norm": 0.9497262022662447, + "learning_rate": 8.054234918097146e-07, + "loss": 0.5753290057182312, + "step": 5072 + }, + { + "epoch": 1.1688940092165898, + "grad_norm": 1.037170746248572, + "learning_rate": 8.050498037237589e-07, + "loss": 0.6724086999893188, + "step": 5073 + }, + { + "epoch": 1.1691244239631335, + "grad_norm": 1.1504888789916348, + "learning_rate": 8.046761439345664e-07, + "loss": 0.7410751581192017, + "step": 5074 + }, + { + "epoch": 1.1693548387096775, + "grad_norm": 1.2658920818717738, + "learning_rate": 8.043025124963731e-07, + "loss": 0.8522979021072388, + "step": 5075 + }, + { + "epoch": 1.1695852534562212, + "grad_norm": 0.9918624551952729, + "learning_rate": 8.039289094634109e-07, + "loss": 0.6243441700935364, + "step": 5076 + }, + { + "epoch": 1.169815668202765, + "grad_norm": 1.113826210544245, + "learning_rate": 8.03555334889908e-07, + "loss": 0.9332150220870972, + "step": 5077 + }, + { + "epoch": 1.1700460829493087, + "grad_norm": 1.17170377289517, + "learning_rate": 8.031817888300883e-07, + "loss": 0.7620645761489868, + "step": 5078 + }, + { + "epoch": 1.1702764976958526, + "grad_norm": 1.2693395517069683, + "learning_rate": 8.028082713381708e-07, + "loss": 0.6983245015144348, + "step": 5079 + }, + { + "epoch": 1.1705069124423964, + "grad_norm": 1.049572082944252, + "learning_rate": 8.024347824683723e-07, + "loss": 0.6220129728317261, + "step": 5080 + }, + { + "epoch": 1.17073732718894, + "grad_norm": 1.0906919021349344, + "learning_rate": 8.020613222749034e-07, + "loss": 0.7363810539245605, + "step": 5081 + }, + { + "epoch": 1.1709677419354838, + "grad_norm": 1.1450127350480972, + "learning_rate": 8.016878908119713e-07, + "loss": 0.6864198446273804, + "step": 5082 + }, + { + "epoch": 1.1711981566820278, + "grad_norm": 1.061738817269073, + "learning_rate": 8.013144881337795e-07, + "loss": 0.758607029914856, + "step": 5083 + }, + { + "epoch": 1.1714285714285715, + "grad_norm": 1.038630253415404, + "learning_rate": 8.009411142945269e-07, + "loss": 0.7519336938858032, + "step": 5084 + }, + { + "epoch": 1.1716589861751152, + "grad_norm": 1.132431622302542, + "learning_rate": 8.005677693484076e-07, + "loss": 0.7681798934936523, + "step": 5085 + }, + { + "epoch": 1.171889400921659, + "grad_norm": 1.1022208744006678, + "learning_rate": 8.00194453349613e-07, + "loss": 0.6808522939682007, + "step": 5086 + }, + { + "epoch": 1.1721198156682027, + "grad_norm": 1.039877694159321, + "learning_rate": 7.99821166352329e-07, + "loss": 0.7373358607292175, + "step": 5087 + }, + { + "epoch": 1.1723502304147466, + "grad_norm": 1.0199898679930943, + "learning_rate": 7.994479084107374e-07, + "loss": 0.7272510528564453, + "step": 5088 + }, + { + "epoch": 1.1725806451612903, + "grad_norm": 1.2473385255320408, + "learning_rate": 7.990746795790166e-07, + "loss": 0.845584511756897, + "step": 5089 + }, + { + "epoch": 1.172811059907834, + "grad_norm": 1.188342902392479, + "learning_rate": 7.987014799113397e-07, + "loss": 0.7751157283782959, + "step": 5090 + }, + { + "epoch": 1.1730414746543778, + "grad_norm": 1.1193246813934836, + "learning_rate": 7.98328309461877e-07, + "loss": 0.679701566696167, + "step": 5091 + }, + { + "epoch": 1.1732718894009218, + "grad_norm": 1.1116687434739936, + "learning_rate": 7.979551682847932e-07, + "loss": 0.7630679607391357, + "step": 5092 + }, + { + "epoch": 1.1735023041474655, + "grad_norm": 1.0309555153446328, + "learning_rate": 7.975820564342487e-07, + "loss": 0.700912594795227, + "step": 5093 + }, + { + "epoch": 1.1737327188940092, + "grad_norm": 1.097867809116453, + "learning_rate": 7.972089739644012e-07, + "loss": 0.6789706945419312, + "step": 5094 + }, + { + "epoch": 1.173963133640553, + "grad_norm": 1.411041629986285, + "learning_rate": 7.968359209294027e-07, + "loss": 0.6744855642318726, + "step": 5095 + }, + { + "epoch": 1.1741935483870969, + "grad_norm": 1.060959542495881, + "learning_rate": 7.964628973834011e-07, + "loss": 0.7551798820495605, + "step": 5096 + }, + { + "epoch": 1.1744239631336406, + "grad_norm": 0.9743982939550204, + "learning_rate": 7.960899033805407e-07, + "loss": 0.711478054523468, + "step": 5097 + }, + { + "epoch": 1.1746543778801843, + "grad_norm": 1.1281696794434548, + "learning_rate": 7.95716938974961e-07, + "loss": 0.7464019060134888, + "step": 5098 + }, + { + "epoch": 1.174884792626728, + "grad_norm": 1.2269121334355921, + "learning_rate": 7.953440042207966e-07, + "loss": 0.7667930126190186, + "step": 5099 + }, + { + "epoch": 1.1751152073732718, + "grad_norm": 0.9314104563097803, + "learning_rate": 7.949710991721796e-07, + "loss": 0.7574796676635742, + "step": 5100 + }, + { + "epoch": 1.1753456221198157, + "grad_norm": 0.9285474016256665, + "learning_rate": 7.945982238832361e-07, + "loss": 0.6627304553985596, + "step": 5101 + }, + { + "epoch": 1.1755760368663595, + "grad_norm": 1.2503590742658475, + "learning_rate": 7.942253784080879e-07, + "loss": 0.6803916692733765, + "step": 5102 + }, + { + "epoch": 1.1758064516129032, + "grad_norm": 1.1622603764445048, + "learning_rate": 7.938525628008541e-07, + "loss": 0.7107337713241577, + "step": 5103 + }, + { + "epoch": 1.176036866359447, + "grad_norm": 1.0411872319848583, + "learning_rate": 7.934797771156481e-07, + "loss": 0.7669517993927002, + "step": 5104 + }, + { + "epoch": 1.1762672811059907, + "grad_norm": 1.185214338142044, + "learning_rate": 7.931070214065787e-07, + "loss": 0.7431854605674744, + "step": 5105 + }, + { + "epoch": 1.1764976958525346, + "grad_norm": 1.121798206744332, + "learning_rate": 7.927342957277512e-07, + "loss": 0.7778047323226929, + "step": 5106 + }, + { + "epoch": 1.1767281105990783, + "grad_norm": 1.1095356364162186, + "learning_rate": 7.923616001332666e-07, + "loss": 0.7759886980056763, + "step": 5107 + }, + { + "epoch": 1.176958525345622, + "grad_norm": 1.236811676128496, + "learning_rate": 7.919889346772206e-07, + "loss": 0.8010379076004028, + "step": 5108 + }, + { + "epoch": 1.177188940092166, + "grad_norm": 1.06629818182004, + "learning_rate": 7.916162994137055e-07, + "loss": 0.6671626567840576, + "step": 5109 + }, + { + "epoch": 1.1774193548387097, + "grad_norm": 1.3043487682811514, + "learning_rate": 7.912436943968088e-07, + "loss": 0.7521620988845825, + "step": 5110 + }, + { + "epoch": 1.1776497695852535, + "grad_norm": 1.0243889894502596, + "learning_rate": 7.908711196806131e-07, + "loss": 0.7626729011535645, + "step": 5111 + }, + { + "epoch": 1.1778801843317972, + "grad_norm": 1.2636422633100723, + "learning_rate": 7.904985753191979e-07, + "loss": 0.8247047066688538, + "step": 5112 + }, + { + "epoch": 1.178110599078341, + "grad_norm": 0.9958902943746148, + "learning_rate": 7.901260613666372e-07, + "loss": 0.6851831078529358, + "step": 5113 + }, + { + "epoch": 1.1783410138248849, + "grad_norm": 1.114469339271613, + "learning_rate": 7.897535778770003e-07, + "loss": 0.7752102613449097, + "step": 5114 + }, + { + "epoch": 1.1785714285714286, + "grad_norm": 1.0998339013097813, + "learning_rate": 7.893811249043537e-07, + "loss": 0.8885148167610168, + "step": 5115 + }, + { + "epoch": 1.1788018433179723, + "grad_norm": 1.3062040351627935, + "learning_rate": 7.890087025027579e-07, + "loss": 0.7530373334884644, + "step": 5116 + }, + { + "epoch": 1.179032258064516, + "grad_norm": 1.0400370692656624, + "learning_rate": 7.886363107262697e-07, + "loss": 0.7795672416687012, + "step": 5117 + }, + { + "epoch": 1.1792626728110598, + "grad_norm": 1.0719443222612952, + "learning_rate": 7.882639496289413e-07, + "loss": 0.7563966512680054, + "step": 5118 + }, + { + "epoch": 1.1794930875576037, + "grad_norm": 0.9799024359449507, + "learning_rate": 7.878916192648198e-07, + "loss": 0.7218793630599976, + "step": 5119 + }, + { + "epoch": 1.1797235023041475, + "grad_norm": 1.3292879414667447, + "learning_rate": 7.875193196879494e-07, + "loss": 0.8213250637054443, + "step": 5120 + }, + { + "epoch": 1.1799539170506912, + "grad_norm": 1.118163280715499, + "learning_rate": 7.871470509523685e-07, + "loss": 0.8134827613830566, + "step": 5121 + }, + { + "epoch": 1.1801843317972351, + "grad_norm": 0.9613119464109229, + "learning_rate": 7.867748131121109e-07, + "loss": 0.6135407090187073, + "step": 5122 + }, + { + "epoch": 1.1804147465437789, + "grad_norm": 1.2999694720426915, + "learning_rate": 7.864026062212073e-07, + "loss": 0.8110366463661194, + "step": 5123 + }, + { + "epoch": 1.1806451612903226, + "grad_norm": 0.9962674732824631, + "learning_rate": 7.860304303336827e-07, + "loss": 0.6723964214324951, + "step": 5124 + }, + { + "epoch": 1.1808755760368663, + "grad_norm": 1.2942490465484493, + "learning_rate": 7.856582855035577e-07, + "loss": 0.8308886885643005, + "step": 5125 + }, + { + "epoch": 1.18110599078341, + "grad_norm": 1.023999175845692, + "learning_rate": 7.852861717848488e-07, + "loss": 0.7960010766983032, + "step": 5126 + }, + { + "epoch": 1.181336405529954, + "grad_norm": 1.2456351777125307, + "learning_rate": 7.84914089231568e-07, + "loss": 0.7931640148162842, + "step": 5127 + }, + { + "epoch": 1.1815668202764977, + "grad_norm": 1.2288164842517166, + "learning_rate": 7.845420378977222e-07, + "loss": 0.762995719909668, + "step": 5128 + }, + { + "epoch": 1.1817972350230415, + "grad_norm": 1.373671152705427, + "learning_rate": 7.841700178373146e-07, + "loss": 0.9416301250457764, + "step": 5129 + }, + { + "epoch": 1.1820276497695852, + "grad_norm": 1.0032147289786453, + "learning_rate": 7.837980291043431e-07, + "loss": 0.7666923999786377, + "step": 5130 + }, + { + "epoch": 1.182258064516129, + "grad_norm": 1.1123898953678502, + "learning_rate": 7.834260717528012e-07, + "loss": 0.7668861150741577, + "step": 5131 + }, + { + "epoch": 1.1824884792626729, + "grad_norm": 1.1236616956881595, + "learning_rate": 7.830541458366786e-07, + "loss": 0.7576566934585571, + "step": 5132 + }, + { + "epoch": 1.1827188940092166, + "grad_norm": 1.0432406760791426, + "learning_rate": 7.826822514099595e-07, + "loss": 0.6288204193115234, + "step": 5133 + }, + { + "epoch": 1.1829493087557603, + "grad_norm": 1.2747953745069134, + "learning_rate": 7.823103885266236e-07, + "loss": 0.8332630395889282, + "step": 5134 + }, + { + "epoch": 1.1831797235023043, + "grad_norm": 1.3987532245853456, + "learning_rate": 7.819385572406469e-07, + "loss": 0.9294546246528625, + "step": 5135 + }, + { + "epoch": 1.183410138248848, + "grad_norm": 0.9911973140133253, + "learning_rate": 7.81566757606e-07, + "loss": 0.637617826461792, + "step": 5136 + }, + { + "epoch": 1.1836405529953917, + "grad_norm": 1.2295561738436023, + "learning_rate": 7.81194989676649e-07, + "loss": 0.7614878416061401, + "step": 5137 + }, + { + "epoch": 1.1838709677419355, + "grad_norm": 1.2939539056978149, + "learning_rate": 7.808232535065556e-07, + "loss": 0.8612164258956909, + "step": 5138 + }, + { + "epoch": 1.1841013824884792, + "grad_norm": 1.0758125620247463, + "learning_rate": 7.804515491496765e-07, + "loss": 0.7530151605606079, + "step": 5139 + }, + { + "epoch": 1.1843317972350231, + "grad_norm": 0.9883281570065391, + "learning_rate": 7.800798766599648e-07, + "loss": 0.7739782929420471, + "step": 5140 + }, + { + "epoch": 1.1845622119815669, + "grad_norm": 1.0835226521428547, + "learning_rate": 7.797082360913678e-07, + "loss": 0.7992277145385742, + "step": 5141 + }, + { + "epoch": 1.1847926267281106, + "grad_norm": 1.2343955942215838, + "learning_rate": 7.793366274978284e-07, + "loss": 0.8744574785232544, + "step": 5142 + }, + { + "epoch": 1.1850230414746543, + "grad_norm": 0.9992165946111031, + "learning_rate": 7.789650509332857e-07, + "loss": 0.7522493600845337, + "step": 5143 + }, + { + "epoch": 1.185253456221198, + "grad_norm": 1.1095107175779666, + "learning_rate": 7.785935064516733e-07, + "loss": 0.8811007142066956, + "step": 5144 + }, + { + "epoch": 1.185483870967742, + "grad_norm": 0.9512882648642599, + "learning_rate": 7.782219941069201e-07, + "loss": 0.8141417503356934, + "step": 5145 + }, + { + "epoch": 1.1857142857142857, + "grad_norm": 1.3048397777053706, + "learning_rate": 7.778505139529509e-07, + "loss": 0.9473680257797241, + "step": 5146 + }, + { + "epoch": 1.1859447004608294, + "grad_norm": 1.1561666933094623, + "learning_rate": 7.774790660436857e-07, + "loss": 0.740132212638855, + "step": 5147 + }, + { + "epoch": 1.1861751152073732, + "grad_norm": 1.1265716565789026, + "learning_rate": 7.771076504330392e-07, + "loss": 0.7904594540596008, + "step": 5148 + }, + { + "epoch": 1.1864055299539171, + "grad_norm": 1.1481555737803508, + "learning_rate": 7.767362671749224e-07, + "loss": 0.8085094690322876, + "step": 5149 + }, + { + "epoch": 1.1866359447004609, + "grad_norm": 1.3362082879917547, + "learning_rate": 7.76364916323241e-07, + "loss": 0.6954756379127502, + "step": 5150 + }, + { + "epoch": 1.1868663594470046, + "grad_norm": 1.175085216674836, + "learning_rate": 7.759935979318953e-07, + "loss": 0.8575167059898376, + "step": 5151 + }, + { + "epoch": 1.1870967741935483, + "grad_norm": 0.9330545417113619, + "learning_rate": 7.756223120547829e-07, + "loss": 0.6125110387802124, + "step": 5152 + }, + { + "epoch": 1.1873271889400923, + "grad_norm": 1.1387987197615417, + "learning_rate": 7.752510587457949e-07, + "loss": 0.7737400531768799, + "step": 5153 + }, + { + "epoch": 1.187557603686636, + "grad_norm": 0.9473095115528148, + "learning_rate": 7.748798380588177e-07, + "loss": 0.7300955653190613, + "step": 5154 + }, + { + "epoch": 1.1877880184331797, + "grad_norm": 0.9479432315278626, + "learning_rate": 7.745086500477343e-07, + "loss": 0.7974356412887573, + "step": 5155 + }, + { + "epoch": 1.1880184331797234, + "grad_norm": 1.120213603018525, + "learning_rate": 7.74137494766422e-07, + "loss": 0.8158693313598633, + "step": 5156 + }, + { + "epoch": 1.1882488479262672, + "grad_norm": 0.9086968377624679, + "learning_rate": 7.737663722687531e-07, + "loss": 0.6656177639961243, + "step": 5157 + }, + { + "epoch": 1.1884792626728111, + "grad_norm": 1.284345958176322, + "learning_rate": 7.733952826085958e-07, + "loss": 0.7796640992164612, + "step": 5158 + }, + { + "epoch": 1.1887096774193548, + "grad_norm": 1.1079992534891525, + "learning_rate": 7.730242258398135e-07, + "loss": 0.9224779009819031, + "step": 5159 + }, + { + "epoch": 1.1889400921658986, + "grad_norm": 1.2013047291849663, + "learning_rate": 7.726532020162639e-07, + "loss": 0.7105277180671692, + "step": 5160 + }, + { + "epoch": 1.1891705069124423, + "grad_norm": 0.9139263319393289, + "learning_rate": 7.722822111918012e-07, + "loss": 0.5793930292129517, + "step": 5161 + }, + { + "epoch": 1.1894009216589863, + "grad_norm": 0.9419478266668957, + "learning_rate": 7.719112534202743e-07, + "loss": 0.7319367527961731, + "step": 5162 + }, + { + "epoch": 1.18963133640553, + "grad_norm": 1.182614737199728, + "learning_rate": 7.715403287555266e-07, + "loss": 0.7517954111099243, + "step": 5163 + }, + { + "epoch": 1.1898617511520737, + "grad_norm": 1.1800441614309307, + "learning_rate": 7.711694372513981e-07, + "loss": 0.8633241057395935, + "step": 5164 + }, + { + "epoch": 1.1900921658986174, + "grad_norm": 1.280920610105802, + "learning_rate": 7.707985789617227e-07, + "loss": 0.6453210115432739, + "step": 5165 + }, + { + "epoch": 1.1903225806451614, + "grad_norm": 1.1209224749220659, + "learning_rate": 7.704277539403303e-07, + "loss": 0.7609909772872925, + "step": 5166 + }, + { + "epoch": 1.1905529953917051, + "grad_norm": 1.1829891287159422, + "learning_rate": 7.700569622410453e-07, + "loss": 0.7419755458831787, + "step": 5167 + }, + { + "epoch": 1.1907834101382488, + "grad_norm": 1.0759571852853795, + "learning_rate": 7.696862039176879e-07, + "loss": 0.849078357219696, + "step": 5168 + }, + { + "epoch": 1.1910138248847926, + "grad_norm": 1.3077976619104341, + "learning_rate": 7.693154790240732e-07, + "loss": 0.8147921562194824, + "step": 5169 + }, + { + "epoch": 1.1912442396313363, + "grad_norm": 1.1349568865686221, + "learning_rate": 7.689447876140114e-07, + "loss": 0.7660118937492371, + "step": 5170 + }, + { + "epoch": 1.1914746543778802, + "grad_norm": 0.9919046297525586, + "learning_rate": 7.685741297413075e-07, + "loss": 0.7775185108184814, + "step": 5171 + }, + { + "epoch": 1.191705069124424, + "grad_norm": 1.0634336005518812, + "learning_rate": 7.682035054597624e-07, + "loss": 0.7184321880340576, + "step": 5172 + }, + { + "epoch": 1.1919354838709677, + "grad_norm": 0.9191067866194278, + "learning_rate": 7.678329148231719e-07, + "loss": 0.7108585834503174, + "step": 5173 + }, + { + "epoch": 1.1921658986175114, + "grad_norm": 1.169972531551494, + "learning_rate": 7.674623578853259e-07, + "loss": 0.7252670526504517, + "step": 5174 + }, + { + "epoch": 1.1923963133640554, + "grad_norm": 1.0227424567448893, + "learning_rate": 7.670918347000113e-07, + "loss": 0.818352460861206, + "step": 5175 + }, + { + "epoch": 1.192626728110599, + "grad_norm": 0.8768631462521176, + "learning_rate": 7.667213453210086e-07, + "loss": 0.6538013815879822, + "step": 5176 + }, + { + "epoch": 1.1928571428571428, + "grad_norm": 1.1216359209528128, + "learning_rate": 7.663508898020935e-07, + "loss": 0.7058148384094238, + "step": 5177 + }, + { + "epoch": 1.1930875576036866, + "grad_norm": 1.0528263608484594, + "learning_rate": 7.659804681970377e-07, + "loss": 0.7003160715103149, + "step": 5178 + }, + { + "epoch": 1.1933179723502305, + "grad_norm": 1.2339709506043992, + "learning_rate": 7.656100805596072e-07, + "loss": 0.84567791223526, + "step": 5179 + }, + { + "epoch": 1.1935483870967742, + "grad_norm": 1.239861543806107, + "learning_rate": 7.652397269435626e-07, + "loss": 0.7994743585586548, + "step": 5180 + }, + { + "epoch": 1.193778801843318, + "grad_norm": 1.3106444419652792, + "learning_rate": 7.648694074026615e-07, + "loss": 0.8177791833877563, + "step": 5181 + }, + { + "epoch": 1.1940092165898617, + "grad_norm": 1.362939104353802, + "learning_rate": 7.644991219906545e-07, + "loss": 0.6663975715637207, + "step": 5182 + }, + { + "epoch": 1.1942396313364054, + "grad_norm": 1.1422405746222943, + "learning_rate": 7.641288707612878e-07, + "loss": 0.8275883197784424, + "step": 5183 + }, + { + "epoch": 1.1944700460829494, + "grad_norm": 1.1201157873973466, + "learning_rate": 7.637586537683036e-07, + "loss": 0.7710767388343811, + "step": 5184 + }, + { + "epoch": 1.194700460829493, + "grad_norm": 1.1629669577400157, + "learning_rate": 7.633884710654382e-07, + "loss": 0.7628582715988159, + "step": 5185 + }, + { + "epoch": 1.1949308755760368, + "grad_norm": 1.3793540006541976, + "learning_rate": 7.630183227064227e-07, + "loss": 0.7002676725387573, + "step": 5186 + }, + { + "epoch": 1.1951612903225806, + "grad_norm": 0.9948455527839576, + "learning_rate": 7.626482087449841e-07, + "loss": 0.8272073268890381, + "step": 5187 + }, + { + "epoch": 1.1953917050691245, + "grad_norm": 1.0711227380559258, + "learning_rate": 7.622781292348435e-07, + "loss": 0.7881417274475098, + "step": 5188 + }, + { + "epoch": 1.1956221198156682, + "grad_norm": 1.0728428578693516, + "learning_rate": 7.61908084229718e-07, + "loss": 0.797294020652771, + "step": 5189 + }, + { + "epoch": 1.195852534562212, + "grad_norm": 1.0264450399364256, + "learning_rate": 7.615380737833191e-07, + "loss": 0.7752290964126587, + "step": 5190 + }, + { + "epoch": 1.1960829493087557, + "grad_norm": 1.0830464595218987, + "learning_rate": 7.611680979493525e-07, + "loss": 0.7299143075942993, + "step": 5191 + }, + { + "epoch": 1.1963133640552996, + "grad_norm": 1.4839567137751186, + "learning_rate": 7.60798156781521e-07, + "loss": 0.6749997138977051, + "step": 5192 + }, + { + "epoch": 1.1965437788018434, + "grad_norm": 1.2717197322235172, + "learning_rate": 7.6042825033352e-07, + "loss": 0.7933796048164368, + "step": 5193 + }, + { + "epoch": 1.196774193548387, + "grad_norm": 1.1254669600910374, + "learning_rate": 7.600583786590411e-07, + "loss": 0.7214919328689575, + "step": 5194 + }, + { + "epoch": 1.1970046082949308, + "grad_norm": 1.0000165841598083, + "learning_rate": 7.596885418117713e-07, + "loss": 0.7804256081581116, + "step": 5195 + }, + { + "epoch": 1.1972350230414746, + "grad_norm": 1.2738023107912249, + "learning_rate": 7.593187398453915e-07, + "loss": 0.7615138292312622, + "step": 5196 + }, + { + "epoch": 1.1974654377880185, + "grad_norm": 1.0493977127227612, + "learning_rate": 7.589489728135778e-07, + "loss": 0.8473657369613647, + "step": 5197 + }, + { + "epoch": 1.1976958525345622, + "grad_norm": 1.2204301678409606, + "learning_rate": 7.585792407700018e-07, + "loss": 0.7302027940750122, + "step": 5198 + }, + { + "epoch": 1.197926267281106, + "grad_norm": 1.123276567811957, + "learning_rate": 7.582095437683294e-07, + "loss": 0.7631692886352539, + "step": 5199 + }, + { + "epoch": 1.1981566820276497, + "grad_norm": 1.339389807954867, + "learning_rate": 7.578398818622211e-07, + "loss": 0.7982754707336426, + "step": 5200 + }, + { + "epoch": 1.1983870967741936, + "grad_norm": 1.3949436336418501, + "learning_rate": 7.574702551053339e-07, + "loss": 0.8445635437965393, + "step": 5201 + }, + { + "epoch": 1.1986175115207374, + "grad_norm": 1.267881130363425, + "learning_rate": 7.571006635513182e-07, + "loss": 0.8486276268959045, + "step": 5202 + }, + { + "epoch": 1.198847926267281, + "grad_norm": 1.2841422228776138, + "learning_rate": 7.567311072538191e-07, + "loss": 0.8433184623718262, + "step": 5203 + }, + { + "epoch": 1.1990783410138248, + "grad_norm": 1.5895945882971518, + "learning_rate": 7.56361586266478e-07, + "loss": 0.9772260189056396, + "step": 5204 + }, + { + "epoch": 1.1993087557603688, + "grad_norm": 1.1927959868338558, + "learning_rate": 7.559921006429304e-07, + "loss": 0.8349692821502686, + "step": 5205 + }, + { + "epoch": 1.1995391705069125, + "grad_norm": 1.070076083870323, + "learning_rate": 7.556226504368059e-07, + "loss": 0.7454575300216675, + "step": 5206 + }, + { + "epoch": 1.1997695852534562, + "grad_norm": 0.882927792535501, + "learning_rate": 7.552532357017303e-07, + "loss": 0.6680991649627686, + "step": 5207 + }, + { + "epoch": 1.2, + "grad_norm": 1.1844993546767875, + "learning_rate": 7.54883856491324e-07, + "loss": 0.6528318524360657, + "step": 5208 + }, + { + "epoch": 1.2002304147465437, + "grad_norm": 1.0482736751922475, + "learning_rate": 7.545145128592008e-07, + "loss": 0.7711834907531738, + "step": 5209 + }, + { + "epoch": 1.2004608294930876, + "grad_norm": 1.022603342926927, + "learning_rate": 7.541452048589714e-07, + "loss": 0.6378746628761292, + "step": 5210 + }, + { + "epoch": 1.2006912442396314, + "grad_norm": 0.9309859008896244, + "learning_rate": 7.537759325442402e-07, + "loss": 0.7489340305328369, + "step": 5211 + }, + { + "epoch": 1.200921658986175, + "grad_norm": 1.0825673838806515, + "learning_rate": 7.53406695968606e-07, + "loss": 0.7869534492492676, + "step": 5212 + }, + { + "epoch": 1.2011520737327188, + "grad_norm": 1.1316888770375757, + "learning_rate": 7.530374951856637e-07, + "loss": 0.7252482175827026, + "step": 5213 + }, + { + "epoch": 1.2013824884792628, + "grad_norm": 1.1337087819491523, + "learning_rate": 7.526683302490018e-07, + "loss": 0.763259768486023, + "step": 5214 + }, + { + "epoch": 1.2016129032258065, + "grad_norm": 1.405277715760194, + "learning_rate": 7.522992012122046e-07, + "loss": 0.8135688304901123, + "step": 5215 + }, + { + "epoch": 1.2018433179723502, + "grad_norm": 1.5589534049714566, + "learning_rate": 7.519301081288504e-07, + "loss": 0.9282290935516357, + "step": 5216 + }, + { + "epoch": 1.202073732718894, + "grad_norm": 1.2621340712897178, + "learning_rate": 7.515610510525125e-07, + "loss": 0.7968727946281433, + "step": 5217 + }, + { + "epoch": 1.202304147465438, + "grad_norm": 1.4154309582650375, + "learning_rate": 7.511920300367594e-07, + "loss": 0.9495606422424316, + "step": 5218 + }, + { + "epoch": 1.2025345622119816, + "grad_norm": 1.120709992771365, + "learning_rate": 7.508230451351537e-07, + "loss": 0.6790425181388855, + "step": 5219 + }, + { + "epoch": 1.2027649769585254, + "grad_norm": 1.1216778132469425, + "learning_rate": 7.504540964012527e-07, + "loss": 0.7269036173820496, + "step": 5220 + }, + { + "epoch": 1.202995391705069, + "grad_norm": 1.4394573291388193, + "learning_rate": 7.500851838886097e-07, + "loss": 0.820799708366394, + "step": 5221 + }, + { + "epoch": 1.2032258064516128, + "grad_norm": 1.1080457725700354, + "learning_rate": 7.497163076507715e-07, + "loss": 0.7693401575088501, + "step": 5222 + }, + { + "epoch": 1.2034562211981568, + "grad_norm": 1.1611837511561531, + "learning_rate": 7.493474677412793e-07, + "loss": 0.7687606811523438, + "step": 5223 + }, + { + "epoch": 1.2036866359447005, + "grad_norm": 0.9784122136232752, + "learning_rate": 7.489786642136709e-07, + "loss": 0.6858488321304321, + "step": 5224 + }, + { + "epoch": 1.2039170506912442, + "grad_norm": 0.8776412008252917, + "learning_rate": 7.486098971214769e-07, + "loss": 0.7575044631958008, + "step": 5225 + }, + { + "epoch": 1.204147465437788, + "grad_norm": 0.8129887936087057, + "learning_rate": 7.482411665182236e-07, + "loss": 0.6799627542495728, + "step": 5226 + }, + { + "epoch": 1.2043778801843317, + "grad_norm": 1.4994332488998736, + "learning_rate": 7.478724724574317e-07, + "loss": 0.8882759809494019, + "step": 5227 + }, + { + "epoch": 1.2046082949308756, + "grad_norm": 1.10750930167245, + "learning_rate": 7.475038149926165e-07, + "loss": 0.7835016250610352, + "step": 5228 + }, + { + "epoch": 1.2048387096774194, + "grad_norm": 1.3325922049902164, + "learning_rate": 7.471351941772883e-07, + "loss": 0.9264512062072754, + "step": 5229 + }, + { + "epoch": 1.205069124423963, + "grad_norm": 1.225862576818596, + "learning_rate": 7.467666100649521e-07, + "loss": 0.8094228506088257, + "step": 5230 + }, + { + "epoch": 1.205299539170507, + "grad_norm": 1.167425367358343, + "learning_rate": 7.463980627091073e-07, + "loss": 0.7782102823257446, + "step": 5231 + }, + { + "epoch": 1.2055299539170508, + "grad_norm": 1.2892161969383955, + "learning_rate": 7.460295521632474e-07, + "loss": 0.7946768999099731, + "step": 5232 + }, + { + "epoch": 1.2057603686635945, + "grad_norm": 1.2538288509415036, + "learning_rate": 7.456610784808624e-07, + "loss": 0.7571625709533691, + "step": 5233 + }, + { + "epoch": 1.2059907834101382, + "grad_norm": 1.3786667467707436, + "learning_rate": 7.45292641715435e-07, + "loss": 0.9760236144065857, + "step": 5234 + }, + { + "epoch": 1.206221198156682, + "grad_norm": 1.0717694328508904, + "learning_rate": 7.449242419204431e-07, + "loss": 0.6370055675506592, + "step": 5235 + }, + { + "epoch": 1.206451612903226, + "grad_norm": 1.226412390848778, + "learning_rate": 7.445558791493603e-07, + "loss": 0.7991320490837097, + "step": 5236 + }, + { + "epoch": 1.2066820276497696, + "grad_norm": 1.0607083796487833, + "learning_rate": 7.441875534556531e-07, + "loss": 0.8840054273605347, + "step": 5237 + }, + { + "epoch": 1.2069124423963133, + "grad_norm": 1.0615184698087237, + "learning_rate": 7.438192648927841e-07, + "loss": 0.8634533882141113, + "step": 5238 + }, + { + "epoch": 1.207142857142857, + "grad_norm": 0.9816687263450602, + "learning_rate": 7.434510135142098e-07, + "loss": 0.7081723213195801, + "step": 5239 + }, + { + "epoch": 1.2073732718894008, + "grad_norm": 1.1398058732045784, + "learning_rate": 7.430827993733808e-07, + "loss": 0.7160249352455139, + "step": 5240 + }, + { + "epoch": 1.2076036866359448, + "grad_norm": 0.8011837684152103, + "learning_rate": 7.427146225237438e-07, + "loss": 0.5323421955108643, + "step": 5241 + }, + { + "epoch": 1.2078341013824885, + "grad_norm": 1.0448270993907307, + "learning_rate": 7.423464830187386e-07, + "loss": 0.6439197063446045, + "step": 5242 + }, + { + "epoch": 1.2080645161290322, + "grad_norm": 1.2861588666790074, + "learning_rate": 7.419783809117999e-07, + "loss": 0.8268016576766968, + "step": 5243 + }, + { + "epoch": 1.2082949308755762, + "grad_norm": 1.0010661947708184, + "learning_rate": 7.416103162563582e-07, + "loss": 0.8115339279174805, + "step": 5244 + }, + { + "epoch": 1.2085253456221199, + "grad_norm": 1.05524382659239, + "learning_rate": 7.41242289105837e-07, + "loss": 0.8677197694778442, + "step": 5245 + }, + { + "epoch": 1.2087557603686636, + "grad_norm": 1.3337261104998102, + "learning_rate": 7.408742995136547e-07, + "loss": 0.7942948937416077, + "step": 5246 + }, + { + "epoch": 1.2089861751152073, + "grad_norm": 1.4261507552200647, + "learning_rate": 7.405063475332249e-07, + "loss": 0.8457766771316528, + "step": 5247 + }, + { + "epoch": 1.209216589861751, + "grad_norm": 1.2992145711475631, + "learning_rate": 7.401384332179552e-07, + "loss": 0.8463923931121826, + "step": 5248 + }, + { + "epoch": 1.209447004608295, + "grad_norm": 1.2576660242210724, + "learning_rate": 7.397705566212479e-07, + "loss": 0.9192875623703003, + "step": 5249 + }, + { + "epoch": 1.2096774193548387, + "grad_norm": 1.257257688865163, + "learning_rate": 7.394027177964999e-07, + "loss": 0.7461347579956055, + "step": 5250 + }, + { + "epoch": 1.2099078341013825, + "grad_norm": 1.150791607540225, + "learning_rate": 7.390349167971025e-07, + "loss": 0.6953321695327759, + "step": 5251 + }, + { + "epoch": 1.2101382488479262, + "grad_norm": 1.0284326235023098, + "learning_rate": 7.38667153676441e-07, + "loss": 0.7226089835166931, + "step": 5252 + }, + { + "epoch": 1.21036866359447, + "grad_norm": 0.8781484717910895, + "learning_rate": 7.382994284878967e-07, + "loss": 0.6746406555175781, + "step": 5253 + }, + { + "epoch": 1.2105990783410139, + "grad_norm": 1.109396083619457, + "learning_rate": 7.379317412848438e-07, + "loss": 0.7600215673446655, + "step": 5254 + }, + { + "epoch": 1.2108294930875576, + "grad_norm": 1.0821310147954002, + "learning_rate": 7.375640921206514e-07, + "loss": 0.7530734539031982, + "step": 5255 + }, + { + "epoch": 1.2110599078341013, + "grad_norm": 1.0572444642243028, + "learning_rate": 7.371964810486839e-07, + "loss": 0.8103033304214478, + "step": 5256 + }, + { + "epoch": 1.2112903225806453, + "grad_norm": 1.5370115848017, + "learning_rate": 7.368289081222994e-07, + "loss": 0.8916831016540527, + "step": 5257 + }, + { + "epoch": 1.211520737327189, + "grad_norm": 0.9972990737801745, + "learning_rate": 7.364613733948501e-07, + "loss": 0.6728129386901855, + "step": 5258 + }, + { + "epoch": 1.2117511520737327, + "grad_norm": 1.2459715050980873, + "learning_rate": 7.360938769196841e-07, + "loss": 0.8609380722045898, + "step": 5259 + }, + { + "epoch": 1.2119815668202765, + "grad_norm": 1.2704694196315967, + "learning_rate": 7.357264187501422e-07, + "loss": 0.9370373487472534, + "step": 5260 + }, + { + "epoch": 1.2122119815668202, + "grad_norm": 1.1080973982930933, + "learning_rate": 7.353589989395604e-07, + "loss": 0.6812434196472168, + "step": 5261 + }, + { + "epoch": 1.2124423963133641, + "grad_norm": 1.1917998982451765, + "learning_rate": 7.349916175412701e-07, + "loss": 0.7661731243133545, + "step": 5262 + }, + { + "epoch": 1.2126728110599079, + "grad_norm": 1.175052294784061, + "learning_rate": 7.346242746085951e-07, + "loss": 0.7306643128395081, + "step": 5263 + }, + { + "epoch": 1.2129032258064516, + "grad_norm": 1.2065862060559862, + "learning_rate": 7.34256970194856e-07, + "loss": 0.7189076542854309, + "step": 5264 + }, + { + "epoch": 1.2131336405529953, + "grad_norm": 0.8932044441494517, + "learning_rate": 7.338897043533656e-07, + "loss": 0.6935977935791016, + "step": 5265 + }, + { + "epoch": 1.213364055299539, + "grad_norm": 1.1224428177486496, + "learning_rate": 7.335224771374323e-07, + "loss": 0.8451323509216309, + "step": 5266 + }, + { + "epoch": 1.213594470046083, + "grad_norm": 1.1211043364668347, + "learning_rate": 7.331552886003589e-07, + "loss": 0.7936843037605286, + "step": 5267 + }, + { + "epoch": 1.2138248847926267, + "grad_norm": 1.1507587511456696, + "learning_rate": 7.327881387954418e-07, + "loss": 0.7989950776100159, + "step": 5268 + }, + { + "epoch": 1.2140552995391705, + "grad_norm": 1.1166217189865624, + "learning_rate": 7.324210277759726e-07, + "loss": 0.7579236030578613, + "step": 5269 + }, + { + "epoch": 1.2142857142857142, + "grad_norm": 1.1276787851795544, + "learning_rate": 7.320539555952372e-07, + "loss": 0.7101268768310547, + "step": 5270 + }, + { + "epoch": 1.2145161290322581, + "grad_norm": 1.0342829920040018, + "learning_rate": 7.316869223065155e-07, + "loss": 0.7920513153076172, + "step": 5271 + }, + { + "epoch": 1.2147465437788019, + "grad_norm": 1.4357028015234437, + "learning_rate": 7.313199279630814e-07, + "loss": 0.9241428375244141, + "step": 5272 + }, + { + "epoch": 1.2149769585253456, + "grad_norm": 1.1653282891915406, + "learning_rate": 7.309529726182044e-07, + "loss": 0.8278338313102722, + "step": 5273 + }, + { + "epoch": 1.2152073732718893, + "grad_norm": 0.9443953324177181, + "learning_rate": 7.305860563251473e-07, + "loss": 0.8230598568916321, + "step": 5274 + }, + { + "epoch": 1.2154377880184333, + "grad_norm": 0.9783962526324749, + "learning_rate": 7.302191791371672e-07, + "loss": 0.7791799902915955, + "step": 5275 + }, + { + "epoch": 1.215668202764977, + "grad_norm": 1.1070826926760935, + "learning_rate": 7.298523411075163e-07, + "loss": 0.705475926399231, + "step": 5276 + }, + { + "epoch": 1.2158986175115207, + "grad_norm": 1.2064718691511076, + "learning_rate": 7.294855422894406e-07, + "loss": 0.8078421354293823, + "step": 5277 + }, + { + "epoch": 1.2161290322580645, + "grad_norm": 1.2182160993977798, + "learning_rate": 7.2911878273618e-07, + "loss": 0.8115853667259216, + "step": 5278 + }, + { + "epoch": 1.2163594470046082, + "grad_norm": 1.0596504935928797, + "learning_rate": 7.287520625009698e-07, + "loss": 0.6917247772216797, + "step": 5279 + }, + { + "epoch": 1.2165898617511521, + "grad_norm": 1.0522660082790807, + "learning_rate": 7.283853816370386e-07, + "loss": 0.7131551504135132, + "step": 5280 + }, + { + "epoch": 1.2168202764976959, + "grad_norm": 0.9495683492221387, + "learning_rate": 7.280187401976093e-07, + "loss": 0.713994562625885, + "step": 5281 + }, + { + "epoch": 1.2170506912442396, + "grad_norm": 1.0845439765546743, + "learning_rate": 7.276521382359001e-07, + "loss": 0.7123454809188843, + "step": 5282 + }, + { + "epoch": 1.2172811059907833, + "grad_norm": 1.395671188469518, + "learning_rate": 7.272855758051226e-07, + "loss": 0.7805770635604858, + "step": 5283 + }, + { + "epoch": 1.2175115207373273, + "grad_norm": 0.9191020761831104, + "learning_rate": 7.269190529584823e-07, + "loss": 0.756670355796814, + "step": 5284 + }, + { + "epoch": 1.217741935483871, + "grad_norm": 0.9614002237797926, + "learning_rate": 7.265525697491804e-07, + "loss": 0.5992655754089355, + "step": 5285 + }, + { + "epoch": 1.2179723502304147, + "grad_norm": 1.1857893348181308, + "learning_rate": 7.26186126230411e-07, + "loss": 0.7552722692489624, + "step": 5286 + }, + { + "epoch": 1.2182027649769585, + "grad_norm": 1.3153742960319537, + "learning_rate": 7.258197224553627e-07, + "loss": 0.7189064025878906, + "step": 5287 + }, + { + "epoch": 1.2184331797235024, + "grad_norm": 1.115820306372996, + "learning_rate": 7.254533584772188e-07, + "loss": 0.8277319669723511, + "step": 5288 + }, + { + "epoch": 1.2186635944700461, + "grad_norm": 1.0584826489222536, + "learning_rate": 7.250870343491561e-07, + "loss": 0.6655987501144409, + "step": 5289 + }, + { + "epoch": 1.2188940092165899, + "grad_norm": 1.3888484350972408, + "learning_rate": 7.247207501243469e-07, + "loss": 0.8654178380966187, + "step": 5290 + }, + { + "epoch": 1.2191244239631336, + "grad_norm": 1.1781514985004269, + "learning_rate": 7.243545058559564e-07, + "loss": 0.9148486852645874, + "step": 5291 + }, + { + "epoch": 1.2193548387096773, + "grad_norm": 1.0525236851594717, + "learning_rate": 7.239883015971439e-07, + "loss": 0.8003618717193604, + "step": 5292 + }, + { + "epoch": 1.2195852534562213, + "grad_norm": 1.1614945814905475, + "learning_rate": 7.236221374010647e-07, + "loss": 0.7290889024734497, + "step": 5293 + }, + { + "epoch": 1.219815668202765, + "grad_norm": 0.963434252776205, + "learning_rate": 7.232560133208663e-07, + "loss": 0.5989147424697876, + "step": 5294 + }, + { + "epoch": 1.2200460829493087, + "grad_norm": 0.8766403983792901, + "learning_rate": 7.228899294096907e-07, + "loss": 0.8424522876739502, + "step": 5295 + }, + { + "epoch": 1.2202764976958524, + "grad_norm": 1.1686896205403536, + "learning_rate": 7.225238857206754e-07, + "loss": 0.7753746509552002, + "step": 5296 + }, + { + "epoch": 1.2205069124423964, + "grad_norm": 1.1424848742103464, + "learning_rate": 7.221578823069508e-07, + "loss": 0.693191647529602, + "step": 5297 + }, + { + "epoch": 1.2207373271889401, + "grad_norm": 1.177332636609729, + "learning_rate": 7.217919192216417e-07, + "loss": 0.7561964988708496, + "step": 5298 + }, + { + "epoch": 1.2209677419354839, + "grad_norm": 0.9927977088932712, + "learning_rate": 7.214259965178673e-07, + "loss": 0.7721199989318848, + "step": 5299 + }, + { + "epoch": 1.2211981566820276, + "grad_norm": 1.39798744468456, + "learning_rate": 7.210601142487407e-07, + "loss": 0.8100659251213074, + "step": 5300 + }, + { + "epoch": 1.2214285714285715, + "grad_norm": 1.0570396078634527, + "learning_rate": 7.206942724673688e-07, + "loss": 0.6753256916999817, + "step": 5301 + }, + { + "epoch": 1.2216589861751153, + "grad_norm": 1.1020954128293505, + "learning_rate": 7.20328471226854e-07, + "loss": 0.7534425854682922, + "step": 5302 + }, + { + "epoch": 1.221889400921659, + "grad_norm": 1.5962153366210945, + "learning_rate": 7.199627105802913e-07, + "loss": 0.8275027275085449, + "step": 5303 + }, + { + "epoch": 1.2221198156682027, + "grad_norm": 1.1431238814592317, + "learning_rate": 7.195969905807702e-07, + "loss": 0.728579580783844, + "step": 5304 + }, + { + "epoch": 1.2223502304147464, + "grad_norm": 1.1008777946014818, + "learning_rate": 7.192313112813749e-07, + "loss": 0.8221413493156433, + "step": 5305 + }, + { + "epoch": 1.2225806451612904, + "grad_norm": 1.0255386420970887, + "learning_rate": 7.188656727351832e-07, + "loss": 0.7819123268127441, + "step": 5306 + }, + { + "epoch": 1.2228110599078341, + "grad_norm": 1.1141595278176613, + "learning_rate": 7.185000749952666e-07, + "loss": 0.7474294900894165, + "step": 5307 + }, + { + "epoch": 1.2230414746543778, + "grad_norm": 1.4333018176649106, + "learning_rate": 7.181345181146919e-07, + "loss": 0.8072259426116943, + "step": 5308 + }, + { + "epoch": 1.2232718894009216, + "grad_norm": 1.3449246489382425, + "learning_rate": 7.177690021465184e-07, + "loss": 0.8718069791793823, + "step": 5309 + }, + { + "epoch": 1.2235023041474655, + "grad_norm": 1.1090181258933243, + "learning_rate": 7.174035271438006e-07, + "loss": 0.8374875783920288, + "step": 5310 + }, + { + "epoch": 1.2237327188940093, + "grad_norm": 1.2085386756305507, + "learning_rate": 7.170380931595869e-07, + "loss": 0.6669566631317139, + "step": 5311 + }, + { + "epoch": 1.223963133640553, + "grad_norm": 1.1706882886588135, + "learning_rate": 7.16672700246919e-07, + "loss": 0.8735665678977966, + "step": 5312 + }, + { + "epoch": 1.2241935483870967, + "grad_norm": 1.1826163019402958, + "learning_rate": 7.16307348458834e-07, + "loss": 0.8312361240386963, + "step": 5313 + }, + { + "epoch": 1.2244239631336407, + "grad_norm": 1.1102424714986416, + "learning_rate": 7.159420378483619e-07, + "loss": 0.7927724123001099, + "step": 5314 + }, + { + "epoch": 1.2246543778801844, + "grad_norm": 1.0527049283172933, + "learning_rate": 7.155767684685264e-07, + "loss": 0.7641698122024536, + "step": 5315 + }, + { + "epoch": 1.2248847926267281, + "grad_norm": 1.0508850668326304, + "learning_rate": 7.15211540372347e-07, + "loss": 0.7490028142929077, + "step": 5316 + }, + { + "epoch": 1.2251152073732718, + "grad_norm": 1.0604993776512237, + "learning_rate": 7.148463536128354e-07, + "loss": 0.7194815874099731, + "step": 5317 + }, + { + "epoch": 1.2253456221198156, + "grad_norm": 1.2779756064695784, + "learning_rate": 7.144812082429979e-07, + "loss": 0.8328256607055664, + "step": 5318 + }, + { + "epoch": 1.2255760368663595, + "grad_norm": 1.1539197608232337, + "learning_rate": 7.141161043158352e-07, + "loss": 0.9124876260757446, + "step": 5319 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 1.346989410896588, + "learning_rate": 7.137510418843416e-07, + "loss": 0.8183319568634033, + "step": 5320 + }, + { + "epoch": 1.226036866359447, + "grad_norm": 1.0902088619882297, + "learning_rate": 7.133860210015048e-07, + "loss": 0.8423885107040405, + "step": 5321 + }, + { + "epoch": 1.2262672811059907, + "grad_norm": 1.064962271727849, + "learning_rate": 7.130210417203082e-07, + "loss": 0.8175387382507324, + "step": 5322 + }, + { + "epoch": 1.2264976958525347, + "grad_norm": 1.0111617635250245, + "learning_rate": 7.126561040937274e-07, + "loss": 0.8415048718452454, + "step": 5323 + }, + { + "epoch": 1.2267281105990784, + "grad_norm": 1.4241774929740556, + "learning_rate": 7.122912081747321e-07, + "loss": 0.6891156435012817, + "step": 5324 + }, + { + "epoch": 1.226958525345622, + "grad_norm": 1.1236132104045742, + "learning_rate": 7.119263540162876e-07, + "loss": 0.667617678642273, + "step": 5325 + }, + { + "epoch": 1.2271889400921658, + "grad_norm": 1.21591291521647, + "learning_rate": 7.115615416713517e-07, + "loss": 0.7752082347869873, + "step": 5326 + }, + { + "epoch": 1.2274193548387098, + "grad_norm": 1.0094697644265302, + "learning_rate": 7.111967711928757e-07, + "loss": 0.6582639813423157, + "step": 5327 + }, + { + "epoch": 1.2276497695852535, + "grad_norm": 0.9823209869062589, + "learning_rate": 7.108320426338063e-07, + "loss": 0.6996462345123291, + "step": 5328 + }, + { + "epoch": 1.2278801843317972, + "grad_norm": 1.1364634127826816, + "learning_rate": 7.104673560470828e-07, + "loss": 0.7132028341293335, + "step": 5329 + }, + { + "epoch": 1.228110599078341, + "grad_norm": 1.1959075580849723, + "learning_rate": 7.101027114856395e-07, + "loss": 0.7344096899032593, + "step": 5330 + }, + { + "epoch": 1.2283410138248847, + "grad_norm": 1.2810764573761082, + "learning_rate": 7.097381090024039e-07, + "loss": 0.7805585861206055, + "step": 5331 + }, + { + "epoch": 1.2285714285714286, + "grad_norm": 1.2310137220528714, + "learning_rate": 7.093735486502976e-07, + "loss": 0.6785855889320374, + "step": 5332 + }, + { + "epoch": 1.2288018433179724, + "grad_norm": 1.3226389203047557, + "learning_rate": 7.090090304822355e-07, + "loss": 0.7465041875839233, + "step": 5333 + }, + { + "epoch": 1.229032258064516, + "grad_norm": 1.0465247410006058, + "learning_rate": 7.086445545511278e-07, + "loss": 0.7400432825088501, + "step": 5334 + }, + { + "epoch": 1.2292626728110598, + "grad_norm": 0.9732969942350592, + "learning_rate": 7.082801209098774e-07, + "loss": 0.8567768335342407, + "step": 5335 + }, + { + "epoch": 1.2294930875576038, + "grad_norm": 1.133102602749406, + "learning_rate": 7.079157296113807e-07, + "loss": 0.7451025247573853, + "step": 5336 + }, + { + "epoch": 1.2297235023041475, + "grad_norm": 1.2953309888801026, + "learning_rate": 7.075513807085299e-07, + "loss": 0.7178194522857666, + "step": 5337 + }, + { + "epoch": 1.2299539170506912, + "grad_norm": 1.114794382407599, + "learning_rate": 7.071870742542086e-07, + "loss": 0.7538058161735535, + "step": 5338 + }, + { + "epoch": 1.230184331797235, + "grad_norm": 1.2706015052011863, + "learning_rate": 7.068228103012959e-07, + "loss": 0.7853896021842957, + "step": 5339 + }, + { + "epoch": 1.230414746543779, + "grad_norm": 1.6145088717882257, + "learning_rate": 7.064585889026644e-07, + "loss": 0.9359887838363647, + "step": 5340 + }, + { + "epoch": 1.2306451612903226, + "grad_norm": 1.2876289498435494, + "learning_rate": 7.060944101111797e-07, + "loss": 0.8590530753135681, + "step": 5341 + }, + { + "epoch": 1.2308755760368664, + "grad_norm": 1.0245387562303532, + "learning_rate": 7.057302739797025e-07, + "loss": 0.7047204971313477, + "step": 5342 + }, + { + "epoch": 1.23110599078341, + "grad_norm": 1.3069544437359595, + "learning_rate": 7.053661805610867e-07, + "loss": 0.8826072216033936, + "step": 5343 + }, + { + "epoch": 1.2313364055299538, + "grad_norm": 1.2593962984780245, + "learning_rate": 7.050021299081792e-07, + "loss": 0.9394192695617676, + "step": 5344 + }, + { + "epoch": 1.2315668202764978, + "grad_norm": 1.1109567819341923, + "learning_rate": 7.046381220738224e-07, + "loss": 0.7814885377883911, + "step": 5345 + }, + { + "epoch": 1.2317972350230415, + "grad_norm": 1.1819250736895568, + "learning_rate": 7.042741571108512e-07, + "loss": 0.781699538230896, + "step": 5346 + }, + { + "epoch": 1.2320276497695852, + "grad_norm": 1.1116588757864085, + "learning_rate": 7.039102350720946e-07, + "loss": 0.6554632186889648, + "step": 5347 + }, + { + "epoch": 1.232258064516129, + "grad_norm": 0.9564548780258206, + "learning_rate": 7.035463560103753e-07, + "loss": 0.6449903249740601, + "step": 5348 + }, + { + "epoch": 1.2324884792626727, + "grad_norm": 1.3130676696714008, + "learning_rate": 7.031825199785101e-07, + "loss": 0.8222958445549011, + "step": 5349 + }, + { + "epoch": 1.2327188940092166, + "grad_norm": 1.073654969776922, + "learning_rate": 7.02818727029309e-07, + "loss": 0.8315533399581909, + "step": 5350 + }, + { + "epoch": 1.2329493087557604, + "grad_norm": 0.9980466179862664, + "learning_rate": 7.024549772155764e-07, + "loss": 0.8065732717514038, + "step": 5351 + }, + { + "epoch": 1.233179723502304, + "grad_norm": 1.3823215182318742, + "learning_rate": 7.020912705901101e-07, + "loss": 0.7607216835021973, + "step": 5352 + }, + { + "epoch": 1.233410138248848, + "grad_norm": 1.3000097773568569, + "learning_rate": 7.01727607205701e-07, + "loss": 0.877311110496521, + "step": 5353 + }, + { + "epoch": 1.2336405529953918, + "grad_norm": 1.1855641794195606, + "learning_rate": 7.013639871151354e-07, + "loss": 0.7352526187896729, + "step": 5354 + }, + { + "epoch": 1.2338709677419355, + "grad_norm": 1.1123782494693044, + "learning_rate": 7.010004103711915e-07, + "loss": 0.7676074504852295, + "step": 5355 + }, + { + "epoch": 1.2341013824884792, + "grad_norm": 1.1035546011135826, + "learning_rate": 7.00636877026642e-07, + "loss": 0.7802003622055054, + "step": 5356 + }, + { + "epoch": 1.234331797235023, + "grad_norm": 1.0576568317960378, + "learning_rate": 7.002733871342537e-07, + "loss": 0.747033953666687, + "step": 5357 + }, + { + "epoch": 1.234562211981567, + "grad_norm": 1.1565555542506367, + "learning_rate": 6.999099407467865e-07, + "loss": 0.8086956739425659, + "step": 5358 + }, + { + "epoch": 1.2347926267281106, + "grad_norm": 1.450692015608809, + "learning_rate": 6.995465379169941e-07, + "loss": 0.9362099170684814, + "step": 5359 + }, + { + "epoch": 1.2350230414746544, + "grad_norm": 1.0699993470783844, + "learning_rate": 6.991831786976241e-07, + "loss": 0.6784812211990356, + "step": 5360 + }, + { + "epoch": 1.235253456221198, + "grad_norm": 1.0206889971672557, + "learning_rate": 6.988198631414171e-07, + "loss": 0.7733708620071411, + "step": 5361 + }, + { + "epoch": 1.2354838709677418, + "grad_norm": 1.1745502344238163, + "learning_rate": 6.984565913011087e-07, + "loss": 0.8747115135192871, + "step": 5362 + }, + { + "epoch": 1.2357142857142858, + "grad_norm": 1.0659966645754941, + "learning_rate": 6.980933632294268e-07, + "loss": 0.6947430372238159, + "step": 5363 + }, + { + "epoch": 1.2359447004608295, + "grad_norm": 1.206089262306805, + "learning_rate": 6.97730178979093e-07, + "loss": 0.7128404378890991, + "step": 5364 + }, + { + "epoch": 1.2361751152073732, + "grad_norm": 1.1120167642627505, + "learning_rate": 6.973670386028242e-07, + "loss": 0.7190830707550049, + "step": 5365 + }, + { + "epoch": 1.2364055299539172, + "grad_norm": 1.1367562157166997, + "learning_rate": 6.970039421533291e-07, + "loss": 0.7625770568847656, + "step": 5366 + }, + { + "epoch": 1.236635944700461, + "grad_norm": 1.109720416461976, + "learning_rate": 6.966408896833104e-07, + "loss": 0.7942707538604736, + "step": 5367 + }, + { + "epoch": 1.2368663594470046, + "grad_norm": 1.2413354296268997, + "learning_rate": 6.962778812454652e-07, + "loss": 0.8329455852508545, + "step": 5368 + }, + { + "epoch": 1.2370967741935484, + "grad_norm": 0.8823115581397621, + "learning_rate": 6.959149168924833e-07, + "loss": 0.6034290790557861, + "step": 5369 + }, + { + "epoch": 1.237327188940092, + "grad_norm": 1.1119487486974622, + "learning_rate": 6.955519966770486e-07, + "loss": 0.8424680233001709, + "step": 5370 + }, + { + "epoch": 1.237557603686636, + "grad_norm": 1.4443979353165184, + "learning_rate": 6.951891206518388e-07, + "loss": 0.8670322895050049, + "step": 5371 + }, + { + "epoch": 1.2377880184331798, + "grad_norm": 1.2577295715670245, + "learning_rate": 6.948262888695244e-07, + "loss": 0.7283621430397034, + "step": 5372 + }, + { + "epoch": 1.2380184331797235, + "grad_norm": 1.1772858057268798, + "learning_rate": 6.9446350138277e-07, + "loss": 0.7990118265151978, + "step": 5373 + }, + { + "epoch": 1.2382488479262672, + "grad_norm": 1.3359682917878526, + "learning_rate": 6.941007582442342e-07, + "loss": 0.945558488368988, + "step": 5374 + }, + { + "epoch": 1.238479262672811, + "grad_norm": 1.186182272846314, + "learning_rate": 6.937380595065685e-07, + "loss": 0.6905936002731323, + "step": 5375 + }, + { + "epoch": 1.238709677419355, + "grad_norm": 1.1665515184197677, + "learning_rate": 6.933754052224176e-07, + "loss": 0.7757662534713745, + "step": 5376 + }, + { + "epoch": 1.2389400921658986, + "grad_norm": 1.1107589407670702, + "learning_rate": 6.930127954444209e-07, + "loss": 0.63062584400177, + "step": 5377 + }, + { + "epoch": 1.2391705069124423, + "grad_norm": 1.2453155093106256, + "learning_rate": 6.926502302252109e-07, + "loss": 0.7341021299362183, + "step": 5378 + }, + { + "epoch": 1.2394009216589863, + "grad_norm": 0.9019761448377311, + "learning_rate": 6.922877096174127e-07, + "loss": 0.572767972946167, + "step": 5379 + }, + { + "epoch": 1.23963133640553, + "grad_norm": 1.274761976544521, + "learning_rate": 6.919252336736463e-07, + "loss": 0.630276083946228, + "step": 5380 + }, + { + "epoch": 1.2398617511520738, + "grad_norm": 1.0769631455551745, + "learning_rate": 6.915628024465244e-07, + "loss": 0.668334424495697, + "step": 5381 + }, + { + "epoch": 1.2400921658986175, + "grad_norm": 0.9444198657704267, + "learning_rate": 6.912004159886529e-07, + "loss": 0.6766513586044312, + "step": 5382 + }, + { + "epoch": 1.2403225806451612, + "grad_norm": 1.3884668691330446, + "learning_rate": 6.908380743526328e-07, + "loss": 0.7016473412513733, + "step": 5383 + }, + { + "epoch": 1.2405529953917052, + "grad_norm": 1.378738366714881, + "learning_rate": 6.904757775910568e-07, + "loss": 0.8837979435920715, + "step": 5384 + }, + { + "epoch": 1.2407834101382489, + "grad_norm": 0.9305030195638431, + "learning_rate": 6.901135257565116e-07, + "loss": 0.7187714576721191, + "step": 5385 + }, + { + "epoch": 1.2410138248847926, + "grad_norm": 1.0935814864632027, + "learning_rate": 6.897513189015782e-07, + "loss": 0.8227157592773438, + "step": 5386 + }, + { + "epoch": 1.2412442396313363, + "grad_norm": 1.278600897043475, + "learning_rate": 6.893891570788301e-07, + "loss": 0.8812209367752075, + "step": 5387 + }, + { + "epoch": 1.24147465437788, + "grad_norm": 1.0426681195674332, + "learning_rate": 6.890270403408348e-07, + "loss": 0.6702297925949097, + "step": 5388 + }, + { + "epoch": 1.241705069124424, + "grad_norm": 1.1718249382850798, + "learning_rate": 6.886649687401529e-07, + "loss": 0.646358847618103, + "step": 5389 + }, + { + "epoch": 1.2419354838709677, + "grad_norm": 1.1131010301922042, + "learning_rate": 6.883029423293383e-07, + "loss": 0.6514080762863159, + "step": 5390 + }, + { + "epoch": 1.2421658986175115, + "grad_norm": 1.0826812738863971, + "learning_rate": 6.879409611609393e-07, + "loss": 0.6938437819480896, + "step": 5391 + }, + { + "epoch": 1.2423963133640552, + "grad_norm": 1.3710627721954263, + "learning_rate": 6.875790252874967e-07, + "loss": 0.8601399064064026, + "step": 5392 + }, + { + "epoch": 1.2426267281105992, + "grad_norm": 1.1590300352526421, + "learning_rate": 6.872171347615445e-07, + "loss": 0.6641080379486084, + "step": 5393 + }, + { + "epoch": 1.2428571428571429, + "grad_norm": 1.0046628491787142, + "learning_rate": 6.868552896356117e-07, + "loss": 0.7109012603759766, + "step": 5394 + }, + { + "epoch": 1.2430875576036866, + "grad_norm": 1.261042767669179, + "learning_rate": 6.864934899622191e-07, + "loss": 0.8558728694915771, + "step": 5395 + }, + { + "epoch": 1.2433179723502303, + "grad_norm": 1.1243133400823155, + "learning_rate": 6.861317357938807e-07, + "loss": 0.6119382977485657, + "step": 5396 + }, + { + "epoch": 1.2435483870967743, + "grad_norm": 1.2850449121793286, + "learning_rate": 6.857700271831059e-07, + "loss": 0.7527587413787842, + "step": 5397 + }, + { + "epoch": 1.243778801843318, + "grad_norm": 1.3104214277299573, + "learning_rate": 6.854083641823957e-07, + "loss": 0.8082761168479919, + "step": 5398 + }, + { + "epoch": 1.2440092165898617, + "grad_norm": 1.0664271007055484, + "learning_rate": 6.850467468442447e-07, + "loss": 0.7289307117462158, + "step": 5399 + }, + { + "epoch": 1.2442396313364055, + "grad_norm": 1.2684124709337747, + "learning_rate": 6.846851752211418e-07, + "loss": 0.8824148178100586, + "step": 5400 + }, + { + "epoch": 1.2444700460829492, + "grad_norm": 1.2011621536911168, + "learning_rate": 6.843236493655682e-07, + "loss": 0.7046724557876587, + "step": 5401 + }, + { + "epoch": 1.2447004608294931, + "grad_norm": 1.0456601321771188, + "learning_rate": 6.839621693299987e-07, + "loss": 0.8192921876907349, + "step": 5402 + }, + { + "epoch": 1.2449308755760369, + "grad_norm": 1.1031705508374716, + "learning_rate": 6.83600735166902e-07, + "loss": 0.7651070356369019, + "step": 5403 + }, + { + "epoch": 1.2451612903225806, + "grad_norm": 1.10155120943284, + "learning_rate": 6.832393469287401e-07, + "loss": 0.7689340114593506, + "step": 5404 + }, + { + "epoch": 1.2453917050691243, + "grad_norm": 1.438313566898243, + "learning_rate": 6.828780046679671e-07, + "loss": 0.9214832782745361, + "step": 5405 + }, + { + "epoch": 1.2456221198156683, + "grad_norm": 1.1160237214981186, + "learning_rate": 6.825167084370322e-07, + "loss": 0.7210682034492493, + "step": 5406 + }, + { + "epoch": 1.245852534562212, + "grad_norm": 1.1608936823977416, + "learning_rate": 6.82155458288377e-07, + "loss": 0.871317446231842, + "step": 5407 + }, + { + "epoch": 1.2460829493087557, + "grad_norm": 1.2750147741770517, + "learning_rate": 6.817942542744359e-07, + "loss": 0.7669065594673157, + "step": 5408 + }, + { + "epoch": 1.2463133640552995, + "grad_norm": 1.0693548196930358, + "learning_rate": 6.814330964476379e-07, + "loss": 0.7317448854446411, + "step": 5409 + }, + { + "epoch": 1.2465437788018434, + "grad_norm": 1.2936969678285373, + "learning_rate": 6.810719848604036e-07, + "loss": 0.7873220443725586, + "step": 5410 + }, + { + "epoch": 1.2467741935483871, + "grad_norm": 1.2973675980536, + "learning_rate": 6.807109195651492e-07, + "loss": 0.713294267654419, + "step": 5411 + }, + { + "epoch": 1.2470046082949309, + "grad_norm": 1.2551238151306954, + "learning_rate": 6.803499006142819e-07, + "loss": 0.7592979669570923, + "step": 5412 + }, + { + "epoch": 1.2472350230414746, + "grad_norm": 1.3113983649465133, + "learning_rate": 6.79988928060203e-07, + "loss": 0.7805737257003784, + "step": 5413 + }, + { + "epoch": 1.2474654377880183, + "grad_norm": 0.8180058983934718, + "learning_rate": 6.79628001955308e-07, + "loss": 0.7706440687179565, + "step": 5414 + }, + { + "epoch": 1.2476958525345623, + "grad_norm": 1.3696824329137627, + "learning_rate": 6.792671223519844e-07, + "loss": 0.772534966468811, + "step": 5415 + }, + { + "epoch": 1.247926267281106, + "grad_norm": 1.2283026355612159, + "learning_rate": 6.789062893026129e-07, + "loss": 0.7939096093177795, + "step": 5416 + }, + { + "epoch": 1.2481566820276497, + "grad_norm": 1.263037130888269, + "learning_rate": 6.78545502859569e-07, + "loss": 0.7062902450561523, + "step": 5417 + }, + { + "epoch": 1.2483870967741935, + "grad_norm": 1.042353004558378, + "learning_rate": 6.781847630752197e-07, + "loss": 0.8296496868133545, + "step": 5418 + }, + { + "epoch": 1.2486175115207374, + "grad_norm": 1.4186103660131706, + "learning_rate": 6.778240700019258e-07, + "loss": 0.926125168800354, + "step": 5419 + }, + { + "epoch": 1.2488479262672811, + "grad_norm": 1.1816532525816696, + "learning_rate": 6.774634236920419e-07, + "loss": 0.7301739454269409, + "step": 5420 + }, + { + "epoch": 1.2490783410138249, + "grad_norm": 1.366957713339659, + "learning_rate": 6.771028241979151e-07, + "loss": 0.7313426733016968, + "step": 5421 + }, + { + "epoch": 1.2493087557603686, + "grad_norm": 0.9539446793763906, + "learning_rate": 6.767422715718853e-07, + "loss": 0.7193025946617126, + "step": 5422 + }, + { + "epoch": 1.2495391705069125, + "grad_norm": 1.1735826178809459, + "learning_rate": 6.763817658662874e-07, + "loss": 0.6544638872146606, + "step": 5423 + }, + { + "epoch": 1.2497695852534563, + "grad_norm": 1.1828661707349362, + "learning_rate": 6.760213071334478e-07, + "loss": 0.8402822613716125, + "step": 5424 + }, + { + "epoch": 1.25, + "grad_norm": 1.1854670368859663, + "learning_rate": 6.756608954256861e-07, + "loss": 0.6840100288391113, + "step": 5425 + }, + { + "epoch": 1.2502304147465437, + "grad_norm": 1.1842873946027908, + "learning_rate": 6.753005307953165e-07, + "loss": 0.7315107583999634, + "step": 5426 + }, + { + "epoch": 1.2504608294930875, + "grad_norm": 0.9743094512393712, + "learning_rate": 6.74940213294645e-07, + "loss": 0.6369785070419312, + "step": 5427 + }, + { + "epoch": 1.2506912442396314, + "grad_norm": 1.0769824502789231, + "learning_rate": 6.745799429759711e-07, + "loss": 0.7700424790382385, + "step": 5428 + }, + { + "epoch": 1.2509216589861751, + "grad_norm": 1.2719323162039158, + "learning_rate": 6.742197198915877e-07, + "loss": 0.7436221241950989, + "step": 5429 + }, + { + "epoch": 1.2511520737327189, + "grad_norm": 1.235326047289827, + "learning_rate": 6.738595440937809e-07, + "loss": 0.8028342723846436, + "step": 5430 + }, + { + "epoch": 1.2513824884792628, + "grad_norm": 1.1651221420823998, + "learning_rate": 6.734994156348288e-07, + "loss": 0.7705515623092651, + "step": 5431 + }, + { + "epoch": 1.2516129032258063, + "grad_norm": 1.509633589240068, + "learning_rate": 6.73139334567005e-07, + "loss": 0.7110899686813354, + "step": 5432 + }, + { + "epoch": 1.2518433179723503, + "grad_norm": 1.0701201128505256, + "learning_rate": 6.727793009425739e-07, + "loss": 0.7495337128639221, + "step": 5433 + }, + { + "epoch": 1.252073732718894, + "grad_norm": 1.1393040143384143, + "learning_rate": 6.724193148137938e-07, + "loss": 0.7735337018966675, + "step": 5434 + }, + { + "epoch": 1.2523041474654377, + "grad_norm": 1.5709409365174263, + "learning_rate": 6.720593762329167e-07, + "loss": 0.8655617237091064, + "step": 5435 + }, + { + "epoch": 1.2525345622119817, + "grad_norm": 1.0969772466203969, + "learning_rate": 6.716994852521871e-07, + "loss": 0.7989616394042969, + "step": 5436 + }, + { + "epoch": 1.2527649769585254, + "grad_norm": 1.2186152186967236, + "learning_rate": 6.713396419238424e-07, + "loss": 0.8090296983718872, + "step": 5437 + }, + { + "epoch": 1.2529953917050691, + "grad_norm": 1.175751705980128, + "learning_rate": 6.709798463001138e-07, + "loss": 0.7150726318359375, + "step": 5438 + }, + { + "epoch": 1.2532258064516129, + "grad_norm": 1.1350361891486582, + "learning_rate": 6.706200984332249e-07, + "loss": 0.7136287689208984, + "step": 5439 + }, + { + "epoch": 1.2534562211981566, + "grad_norm": 1.2991395376590593, + "learning_rate": 6.702603983753927e-07, + "loss": 0.8538687229156494, + "step": 5440 + }, + { + "epoch": 1.2536866359447005, + "grad_norm": 1.5253402941485412, + "learning_rate": 6.699007461788272e-07, + "loss": 0.7960666418075562, + "step": 5441 + }, + { + "epoch": 1.2539170506912443, + "grad_norm": 0.9539757778238315, + "learning_rate": 6.695411418957309e-07, + "loss": 0.7462595701217651, + "step": 5442 + }, + { + "epoch": 1.254147465437788, + "grad_norm": 1.482445221768143, + "learning_rate": 6.691815855783009e-07, + "loss": 0.795913577079773, + "step": 5443 + }, + { + "epoch": 1.2543778801843317, + "grad_norm": 1.071717267875031, + "learning_rate": 6.688220772787258e-07, + "loss": 0.7589330077171326, + "step": 5444 + }, + { + "epoch": 1.2546082949308754, + "grad_norm": 1.4795497320121442, + "learning_rate": 6.684626170491874e-07, + "loss": 0.7719615697860718, + "step": 5445 + }, + { + "epoch": 1.2548387096774194, + "grad_norm": 1.06581311441289, + "learning_rate": 6.681032049418616e-07, + "loss": 0.8516664505004883, + "step": 5446 + }, + { + "epoch": 1.2550691244239631, + "grad_norm": 1.466555451116343, + "learning_rate": 6.677438410089163e-07, + "loss": 0.8597210049629211, + "step": 5447 + }, + { + "epoch": 1.2552995391705069, + "grad_norm": 1.2172979010742704, + "learning_rate": 6.673845253025124e-07, + "loss": 0.7101171016693115, + "step": 5448 + }, + { + "epoch": 1.2555299539170508, + "grad_norm": 1.105900547055049, + "learning_rate": 6.670252578748044e-07, + "loss": 0.6946178078651428, + "step": 5449 + }, + { + "epoch": 1.2557603686635945, + "grad_norm": 1.687580161954866, + "learning_rate": 6.666660387779395e-07, + "loss": 0.9912126660346985, + "step": 5450 + }, + { + "epoch": 1.2559907834101383, + "grad_norm": 1.087382323913162, + "learning_rate": 6.663068680640573e-07, + "loss": 0.6495379209518433, + "step": 5451 + }, + { + "epoch": 1.256221198156682, + "grad_norm": 1.0213661473677353, + "learning_rate": 6.65947745785292e-07, + "loss": 0.6276426315307617, + "step": 5452 + }, + { + "epoch": 1.2564516129032257, + "grad_norm": 1.082562870265783, + "learning_rate": 6.655886719937691e-07, + "loss": 0.7273461818695068, + "step": 5453 + }, + { + "epoch": 1.2566820276497697, + "grad_norm": 1.258671733492057, + "learning_rate": 6.652296467416073e-07, + "loss": 0.8248249292373657, + "step": 5454 + }, + { + "epoch": 1.2569124423963134, + "grad_norm": 1.2124691152915896, + "learning_rate": 6.648706700809196e-07, + "loss": 0.8709753751754761, + "step": 5455 + }, + { + "epoch": 1.2571428571428571, + "grad_norm": 1.4025604957471465, + "learning_rate": 6.645117420638105e-07, + "loss": 0.8207283020019531, + "step": 5456 + }, + { + "epoch": 1.2573732718894008, + "grad_norm": 1.0867491150840567, + "learning_rate": 6.641528627423774e-07, + "loss": 0.8222801685333252, + "step": 5457 + }, + { + "epoch": 1.2576036866359446, + "grad_norm": 1.0891862457945214, + "learning_rate": 6.637940321687121e-07, + "loss": 0.7684904336929321, + "step": 5458 + }, + { + "epoch": 1.2578341013824885, + "grad_norm": 1.106565522930133, + "learning_rate": 6.634352503948979e-07, + "loss": 0.7930517196655273, + "step": 5459 + }, + { + "epoch": 1.2580645161290323, + "grad_norm": 1.255727738748605, + "learning_rate": 6.630765174730116e-07, + "loss": 0.7414563298225403, + "step": 5460 + }, + { + "epoch": 1.258294930875576, + "grad_norm": 1.0415923536335177, + "learning_rate": 6.627178334551227e-07, + "loss": 0.7959232926368713, + "step": 5461 + }, + { + "epoch": 1.25852534562212, + "grad_norm": 1.2823788828450395, + "learning_rate": 6.623591983932935e-07, + "loss": 0.6722866296768188, + "step": 5462 + }, + { + "epoch": 1.2587557603686637, + "grad_norm": 1.0428819037253236, + "learning_rate": 6.620006123395799e-07, + "loss": 0.7688727378845215, + "step": 5463 + }, + { + "epoch": 1.2589861751152074, + "grad_norm": 1.1454091886933473, + "learning_rate": 6.616420753460301e-07, + "loss": 0.7543724179267883, + "step": 5464 + }, + { + "epoch": 1.2592165898617511, + "grad_norm": 1.3156243556780545, + "learning_rate": 6.612835874646847e-07, + "loss": 0.7097430229187012, + "step": 5465 + }, + { + "epoch": 1.2594470046082948, + "grad_norm": 1.1699591097632744, + "learning_rate": 6.609251487475786e-07, + "loss": 0.8640443682670593, + "step": 5466 + }, + { + "epoch": 1.2596774193548388, + "grad_norm": 1.4552439697890553, + "learning_rate": 6.605667592467384e-07, + "loss": 0.7872523069381714, + "step": 5467 + }, + { + "epoch": 1.2599078341013825, + "grad_norm": 1.3601390048962447, + "learning_rate": 6.602084190141835e-07, + "loss": 0.8647557497024536, + "step": 5468 + }, + { + "epoch": 1.2601382488479262, + "grad_norm": 0.9953963267515464, + "learning_rate": 6.598501281019268e-07, + "loss": 0.7323553562164307, + "step": 5469 + }, + { + "epoch": 1.26036866359447, + "grad_norm": 1.2478057023441294, + "learning_rate": 6.594918865619739e-07, + "loss": 0.8214852809906006, + "step": 5470 + }, + { + "epoch": 1.2605990783410137, + "grad_norm": 1.1743890995374524, + "learning_rate": 6.591336944463223e-07, + "loss": 0.8011265397071838, + "step": 5471 + }, + { + "epoch": 1.2608294930875577, + "grad_norm": 0.9651307194588488, + "learning_rate": 6.587755518069642e-07, + "loss": 0.798862636089325, + "step": 5472 + }, + { + "epoch": 1.2610599078341014, + "grad_norm": 1.1888872240865054, + "learning_rate": 6.58417458695883e-07, + "loss": 0.7231202721595764, + "step": 5473 + }, + { + "epoch": 1.261290322580645, + "grad_norm": 1.25713690411949, + "learning_rate": 6.580594151650551e-07, + "loss": 0.8816685676574707, + "step": 5474 + }, + { + "epoch": 1.261520737327189, + "grad_norm": 1.0218552259688816, + "learning_rate": 6.577014212664509e-07, + "loss": 0.6343427300453186, + "step": 5475 + }, + { + "epoch": 1.2617511520737328, + "grad_norm": 1.2062270864209526, + "learning_rate": 6.573434770520321e-07, + "loss": 0.7785895466804504, + "step": 5476 + }, + { + "epoch": 1.2619815668202765, + "grad_norm": 1.2086458816060426, + "learning_rate": 6.569855825737536e-07, + "loss": 0.7408698797225952, + "step": 5477 + }, + { + "epoch": 1.2622119815668202, + "grad_norm": 1.2755490666336102, + "learning_rate": 6.566277378835643e-07, + "loss": 0.8481286764144897, + "step": 5478 + }, + { + "epoch": 1.262442396313364, + "grad_norm": 1.0772225233745287, + "learning_rate": 6.56269943033404e-07, + "loss": 0.8221831917762756, + "step": 5479 + }, + { + "epoch": 1.262672811059908, + "grad_norm": 1.1202704150930312, + "learning_rate": 6.559121980752065e-07, + "loss": 0.805405855178833, + "step": 5480 + }, + { + "epoch": 1.2629032258064516, + "grad_norm": 1.4925713527432443, + "learning_rate": 6.55554503060898e-07, + "loss": 0.8643565773963928, + "step": 5481 + }, + { + "epoch": 1.2631336405529954, + "grad_norm": 1.038997236699539, + "learning_rate": 6.551968580423973e-07, + "loss": 0.7087225914001465, + "step": 5482 + }, + { + "epoch": 1.263364055299539, + "grad_norm": 1.3080505612178328, + "learning_rate": 6.54839263071616e-07, + "loss": 0.8401756882667542, + "step": 5483 + }, + { + "epoch": 1.2635944700460828, + "grad_norm": 0.974231759030553, + "learning_rate": 6.544817182004589e-07, + "loss": 0.76345294713974, + "step": 5484 + }, + { + "epoch": 1.2638248847926268, + "grad_norm": 0.9975788463971886, + "learning_rate": 6.541242234808228e-07, + "loss": 0.7177271842956543, + "step": 5485 + }, + { + "epoch": 1.2640552995391705, + "grad_norm": 1.0524467641617976, + "learning_rate": 6.537667789645981e-07, + "loss": 0.7436186075210571, + "step": 5486 + }, + { + "epoch": 1.2642857142857142, + "grad_norm": 1.025347292021162, + "learning_rate": 6.53409384703667e-07, + "loss": 0.6526673436164856, + "step": 5487 + }, + { + "epoch": 1.2645161290322582, + "grad_norm": 1.4422505610217646, + "learning_rate": 6.530520407499049e-07, + "loss": 0.879219651222229, + "step": 5488 + }, + { + "epoch": 1.264746543778802, + "grad_norm": 1.1643268817299548, + "learning_rate": 6.526947471551798e-07, + "loss": 0.7005003690719604, + "step": 5489 + }, + { + "epoch": 1.2649769585253456, + "grad_norm": 1.276974659887974, + "learning_rate": 6.523375039713525e-07, + "loss": 0.716349720954895, + "step": 5490 + }, + { + "epoch": 1.2652073732718894, + "grad_norm": 1.307490301718017, + "learning_rate": 6.519803112502758e-07, + "loss": 0.8524413704872131, + "step": 5491 + }, + { + "epoch": 1.265437788018433, + "grad_norm": 1.3886244481055607, + "learning_rate": 6.516231690437966e-07, + "loss": 0.8032857179641724, + "step": 5492 + }, + { + "epoch": 1.265668202764977, + "grad_norm": 1.3026581508138244, + "learning_rate": 6.512660774037531e-07, + "loss": 0.8912144899368286, + "step": 5493 + }, + { + "epoch": 1.2658986175115208, + "grad_norm": 1.1001846572449894, + "learning_rate": 6.509090363819764e-07, + "loss": 0.6526974439620972, + "step": 5494 + }, + { + "epoch": 1.2661290322580645, + "grad_norm": 1.1539964772442708, + "learning_rate": 6.505520460302916e-07, + "loss": 0.7436610460281372, + "step": 5495 + }, + { + "epoch": 1.2663594470046082, + "grad_norm": 1.0590907210895066, + "learning_rate": 6.501951064005145e-07, + "loss": 0.7112951874732971, + "step": 5496 + }, + { + "epoch": 1.266589861751152, + "grad_norm": 1.136772271419419, + "learning_rate": 6.498382175444545e-07, + "loss": 0.6908622980117798, + "step": 5497 + }, + { + "epoch": 1.266820276497696, + "grad_norm": 1.2936126009346398, + "learning_rate": 6.494813795139137e-07, + "loss": 0.8169400691986084, + "step": 5498 + }, + { + "epoch": 1.2670506912442396, + "grad_norm": 1.1611805763062155, + "learning_rate": 6.491245923606868e-07, + "loss": 0.7577871084213257, + "step": 5499 + }, + { + "epoch": 1.2672811059907834, + "grad_norm": 1.2166617406598321, + "learning_rate": 6.487678561365606e-07, + "loss": 0.7470887303352356, + "step": 5500 + }, + { + "epoch": 1.2675115207373273, + "grad_norm": 1.2499100792685887, + "learning_rate": 6.484111708933153e-07, + "loss": 0.7862193584442139, + "step": 5501 + }, + { + "epoch": 1.267741935483871, + "grad_norm": 1.0856856438170979, + "learning_rate": 6.48054536682723e-07, + "loss": 0.6809444427490234, + "step": 5502 + }, + { + "epoch": 1.2679723502304148, + "grad_norm": 1.1883483456973896, + "learning_rate": 6.476979535565486e-07, + "loss": 0.7560738921165466, + "step": 5503 + }, + { + "epoch": 1.2682027649769585, + "grad_norm": 1.060654462751894, + "learning_rate": 6.473414215665501e-07, + "loss": 0.6961003541946411, + "step": 5504 + }, + { + "epoch": 1.2684331797235022, + "grad_norm": 1.1318601167609275, + "learning_rate": 6.469849407644775e-07, + "loss": 0.762688159942627, + "step": 5505 + }, + { + "epoch": 1.2686635944700462, + "grad_norm": 1.3318780914664468, + "learning_rate": 6.46628511202073e-07, + "loss": 0.8735007047653198, + "step": 5506 + }, + { + "epoch": 1.26889400921659, + "grad_norm": 1.2498993266864264, + "learning_rate": 6.462721329310727e-07, + "loss": 0.7127432823181152, + "step": 5507 + }, + { + "epoch": 1.2691244239631336, + "grad_norm": 1.1810894491038926, + "learning_rate": 6.45915806003204e-07, + "loss": 0.7720422744750977, + "step": 5508 + }, + { + "epoch": 1.2693548387096774, + "grad_norm": 1.3742393921911886, + "learning_rate": 6.455595304701871e-07, + "loss": 0.8046890497207642, + "step": 5509 + }, + { + "epoch": 1.269585253456221, + "grad_norm": 1.433035812490825, + "learning_rate": 6.452033063837354e-07, + "loss": 0.8218742609024048, + "step": 5510 + }, + { + "epoch": 1.269815668202765, + "grad_norm": 1.3642640568886157, + "learning_rate": 6.448471337955536e-07, + "loss": 0.912622332572937, + "step": 5511 + }, + { + "epoch": 1.2700460829493088, + "grad_norm": 1.3101181049427244, + "learning_rate": 6.444910127573407e-07, + "loss": 0.7940733432769775, + "step": 5512 + }, + { + "epoch": 1.2702764976958525, + "grad_norm": 1.0982469100789136, + "learning_rate": 6.441349433207864e-07, + "loss": 0.7085565328598022, + "step": 5513 + }, + { + "epoch": 1.2705069124423964, + "grad_norm": 1.241687978637031, + "learning_rate": 6.437789255375739e-07, + "loss": 0.9316935539245605, + "step": 5514 + }, + { + "epoch": 1.2707373271889402, + "grad_norm": 0.9697190322352798, + "learning_rate": 6.43422959459379e-07, + "loss": 0.7412574291229248, + "step": 5515 + }, + { + "epoch": 1.270967741935484, + "grad_norm": 0.9713506680995111, + "learning_rate": 6.430670451378695e-07, + "loss": 0.7476450204849243, + "step": 5516 + }, + { + "epoch": 1.2711981566820276, + "grad_norm": 1.1272976564667934, + "learning_rate": 6.427111826247056e-07, + "loss": 0.8530189990997314, + "step": 5517 + }, + { + "epoch": 1.2714285714285714, + "grad_norm": 1.3163108639601895, + "learning_rate": 6.423553719715406e-07, + "loss": 0.8193017840385437, + "step": 5518 + }, + { + "epoch": 1.2716589861751153, + "grad_norm": 1.002275086425174, + "learning_rate": 6.419996132300203e-07, + "loss": 0.7444974780082703, + "step": 5519 + }, + { + "epoch": 1.271889400921659, + "grad_norm": 1.0214749663440856, + "learning_rate": 6.416439064517818e-07, + "loss": 0.7422837018966675, + "step": 5520 + }, + { + "epoch": 1.2721198156682028, + "grad_norm": 1.2499390785362547, + "learning_rate": 6.412882516884562e-07, + "loss": 1.0155640840530396, + "step": 5521 + }, + { + "epoch": 1.2723502304147465, + "grad_norm": 1.489615968336023, + "learning_rate": 6.409326489916658e-07, + "loss": 0.8097087144851685, + "step": 5522 + }, + { + "epoch": 1.2725806451612902, + "grad_norm": 1.293861875643454, + "learning_rate": 6.405770984130257e-07, + "loss": 0.8545565009117126, + "step": 5523 + }, + { + "epoch": 1.2728110599078342, + "grad_norm": 0.9914622760341439, + "learning_rate": 6.402216000041445e-07, + "loss": 0.6765652298927307, + "step": 5524 + }, + { + "epoch": 1.273041474654378, + "grad_norm": 1.103390848542702, + "learning_rate": 6.398661538166217e-07, + "loss": 0.7964426875114441, + "step": 5525 + }, + { + "epoch": 1.2732718894009216, + "grad_norm": 1.2196724846653912, + "learning_rate": 6.395107599020495e-07, + "loss": 0.7449651956558228, + "step": 5526 + }, + { + "epoch": 1.2735023041474656, + "grad_norm": 1.5614043870867116, + "learning_rate": 6.391554183120138e-07, + "loss": 0.8639888167381287, + "step": 5527 + }, + { + "epoch": 1.2737327188940093, + "grad_norm": 1.046130673497984, + "learning_rate": 6.388001290980914e-07, + "loss": 0.7668901681900024, + "step": 5528 + }, + { + "epoch": 1.273963133640553, + "grad_norm": 1.082923428749424, + "learning_rate": 6.384448923118517e-07, + "loss": 0.6461849212646484, + "step": 5529 + }, + { + "epoch": 1.2741935483870968, + "grad_norm": 1.1539877219125736, + "learning_rate": 6.380897080048576e-07, + "loss": 0.7045707702636719, + "step": 5530 + }, + { + "epoch": 1.2744239631336405, + "grad_norm": 1.1893221959186644, + "learning_rate": 6.377345762286632e-07, + "loss": 0.8303793668746948, + "step": 5531 + }, + { + "epoch": 1.2746543778801844, + "grad_norm": 1.112799220738114, + "learning_rate": 6.373794970348152e-07, + "loss": 0.808259129524231, + "step": 5532 + }, + { + "epoch": 1.2748847926267282, + "grad_norm": 1.527249581557179, + "learning_rate": 6.370244704748535e-07, + "loss": 0.8224689960479736, + "step": 5533 + }, + { + "epoch": 1.2751152073732719, + "grad_norm": 1.4408900318423565, + "learning_rate": 6.366694966003089e-07, + "loss": 0.8559266328811646, + "step": 5534 + }, + { + "epoch": 1.2753456221198156, + "grad_norm": 1.3225808297843282, + "learning_rate": 6.363145754627063e-07, + "loss": 0.7972407341003418, + "step": 5535 + }, + { + "epoch": 1.2755760368663593, + "grad_norm": 0.9700139233174567, + "learning_rate": 6.359597071135618e-07, + "loss": 0.7750328779220581, + "step": 5536 + }, + { + "epoch": 1.2758064516129033, + "grad_norm": 1.3472908531853058, + "learning_rate": 6.356048916043836e-07, + "loss": 0.807072639465332, + "step": 5537 + }, + { + "epoch": 1.276036866359447, + "grad_norm": 1.2153299361350896, + "learning_rate": 6.35250128986673e-07, + "loss": 0.8459323048591614, + "step": 5538 + }, + { + "epoch": 1.2762672811059907, + "grad_norm": 1.1921452547723677, + "learning_rate": 6.348954193119233e-07, + "loss": 0.7874447107315063, + "step": 5539 + }, + { + "epoch": 1.2764976958525347, + "grad_norm": 1.243785118643696, + "learning_rate": 6.345407626316202e-07, + "loss": 0.8817394971847534, + "step": 5540 + }, + { + "epoch": 1.2767281105990782, + "grad_norm": 1.0210963009280363, + "learning_rate": 6.341861589972417e-07, + "loss": 0.7936382293701172, + "step": 5541 + }, + { + "epoch": 1.2769585253456222, + "grad_norm": 1.1288567171733945, + "learning_rate": 6.33831608460258e-07, + "loss": 0.7301348447799683, + "step": 5542 + }, + { + "epoch": 1.2771889400921659, + "grad_norm": 0.9930019172389213, + "learning_rate": 6.334771110721311e-07, + "loss": 0.6546784043312073, + "step": 5543 + }, + { + "epoch": 1.2774193548387096, + "grad_norm": 1.1320345708885517, + "learning_rate": 6.331226668843168e-07, + "loss": 0.798918604850769, + "step": 5544 + }, + { + "epoch": 1.2776497695852536, + "grad_norm": 1.0677491026042323, + "learning_rate": 6.327682759482618e-07, + "loss": 0.6275264620780945, + "step": 5545 + }, + { + "epoch": 1.2778801843317973, + "grad_norm": 1.1056891749814017, + "learning_rate": 6.324139383154048e-07, + "loss": 0.6870732307434082, + "step": 5546 + }, + { + "epoch": 1.278110599078341, + "grad_norm": 1.113302907194177, + "learning_rate": 6.320596540371785e-07, + "loss": 0.8280556201934814, + "step": 5547 + }, + { + "epoch": 1.2783410138248847, + "grad_norm": 1.0958194382001605, + "learning_rate": 6.317054231650063e-07, + "loss": 0.8053648471832275, + "step": 5548 + }, + { + "epoch": 1.2785714285714285, + "grad_norm": 1.1500355966221105, + "learning_rate": 6.313512457503043e-07, + "loss": 0.7628893852233887, + "step": 5549 + }, + { + "epoch": 1.2788018433179724, + "grad_norm": 1.1770420137500979, + "learning_rate": 6.30997121844481e-07, + "loss": 0.8075753450393677, + "step": 5550 + }, + { + "epoch": 1.2790322580645161, + "grad_norm": 1.1420933628102303, + "learning_rate": 6.306430514989371e-07, + "loss": 0.7883275747299194, + "step": 5551 + }, + { + "epoch": 1.2792626728110599, + "grad_norm": 1.238710939895555, + "learning_rate": 6.302890347650648e-07, + "loss": 0.7438768744468689, + "step": 5552 + }, + { + "epoch": 1.2794930875576038, + "grad_norm": 1.261177122589368, + "learning_rate": 6.299350716942501e-07, + "loss": 0.7756023406982422, + "step": 5553 + }, + { + "epoch": 1.2797235023041473, + "grad_norm": 1.0915753285175969, + "learning_rate": 6.295811623378698e-07, + "loss": 0.7128444910049438, + "step": 5554 + }, + { + "epoch": 1.2799539170506913, + "grad_norm": 0.9707581386208312, + "learning_rate": 6.292273067472931e-07, + "loss": 0.7611228823661804, + "step": 5555 + }, + { + "epoch": 1.280184331797235, + "grad_norm": 1.0553125250063393, + "learning_rate": 6.288735049738822e-07, + "loss": 0.7803670167922974, + "step": 5556 + }, + { + "epoch": 1.2804147465437787, + "grad_norm": 1.0703973986821036, + "learning_rate": 6.28519757068991e-07, + "loss": 0.958204448223114, + "step": 5557 + }, + { + "epoch": 1.2806451612903227, + "grad_norm": 1.1879640741186497, + "learning_rate": 6.28166063083965e-07, + "loss": 0.7220249772071838, + "step": 5558 + }, + { + "epoch": 1.2808755760368664, + "grad_norm": 1.4250311227945265, + "learning_rate": 6.278124230701427e-07, + "loss": 0.7396695613861084, + "step": 5559 + }, + { + "epoch": 1.2811059907834101, + "grad_norm": 1.1549531480718158, + "learning_rate": 6.274588370788545e-07, + "loss": 0.819474458694458, + "step": 5560 + }, + { + "epoch": 1.2813364055299539, + "grad_norm": 1.0583859146786307, + "learning_rate": 6.271053051614231e-07, + "loss": 0.6997617483139038, + "step": 5561 + }, + { + "epoch": 1.2815668202764976, + "grad_norm": 1.1462805534929357, + "learning_rate": 6.26751827369163e-07, + "loss": 0.7526183128356934, + "step": 5562 + }, + { + "epoch": 1.2817972350230415, + "grad_norm": 1.3576714493720627, + "learning_rate": 6.263984037533805e-07, + "loss": 0.7185813188552856, + "step": 5563 + }, + { + "epoch": 1.2820276497695853, + "grad_norm": 0.9722151716418193, + "learning_rate": 6.260450343653757e-07, + "loss": 0.7739845514297485, + "step": 5564 + }, + { + "epoch": 1.282258064516129, + "grad_norm": 1.0387058407540612, + "learning_rate": 6.25691719256439e-07, + "loss": 0.698557436466217, + "step": 5565 + }, + { + "epoch": 1.2824884792626727, + "grad_norm": 1.1402265972621366, + "learning_rate": 6.253384584778534e-07, + "loss": 0.6946271657943726, + "step": 5566 + }, + { + "epoch": 1.2827188940092165, + "grad_norm": 1.2349626326096388, + "learning_rate": 6.24985252080895e-07, + "loss": 0.7746025323867798, + "step": 5567 + }, + { + "epoch": 1.2829493087557604, + "grad_norm": 1.050385772264468, + "learning_rate": 6.246321001168306e-07, + "loss": 0.8759660720825195, + "step": 5568 + }, + { + "epoch": 1.2831797235023041, + "grad_norm": 1.1535965526965875, + "learning_rate": 6.2427900263692e-07, + "loss": 0.741111159324646, + "step": 5569 + }, + { + "epoch": 1.2834101382488479, + "grad_norm": 1.2619269860039752, + "learning_rate": 6.239259596924149e-07, + "loss": 0.8580630421638489, + "step": 5570 + }, + { + "epoch": 1.2836405529953918, + "grad_norm": 1.0890841483076914, + "learning_rate": 6.235729713345588e-07, + "loss": 0.7139618992805481, + "step": 5571 + }, + { + "epoch": 1.2838709677419355, + "grad_norm": 1.1260979019373678, + "learning_rate": 6.232200376145873e-07, + "loss": 0.8300976753234863, + "step": 5572 + }, + { + "epoch": 1.2841013824884793, + "grad_norm": 1.091655687939806, + "learning_rate": 6.228671585837288e-07, + "loss": 0.7193114757537842, + "step": 5573 + }, + { + "epoch": 1.284331797235023, + "grad_norm": 1.289214780103651, + "learning_rate": 6.225143342932031e-07, + "loss": 0.8802851438522339, + "step": 5574 + }, + { + "epoch": 1.2845622119815667, + "grad_norm": 1.069264068692084, + "learning_rate": 6.221615647942217e-07, + "loss": 0.749543309211731, + "step": 5575 + }, + { + "epoch": 1.2847926267281107, + "grad_norm": 1.1044047193035296, + "learning_rate": 6.218088501379892e-07, + "loss": 0.703508734703064, + "step": 5576 + }, + { + "epoch": 1.2850230414746544, + "grad_norm": 1.4722305319077136, + "learning_rate": 6.214561903757017e-07, + "loss": 0.7519023418426514, + "step": 5577 + }, + { + "epoch": 1.2852534562211981, + "grad_norm": 1.4130549197431626, + "learning_rate": 6.211035855585466e-07, + "loss": 0.9525241851806641, + "step": 5578 + }, + { + "epoch": 1.2854838709677419, + "grad_norm": 1.3149636986285136, + "learning_rate": 6.207510357377046e-07, + "loss": 0.8288872241973877, + "step": 5579 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.3691241647074333, + "learning_rate": 6.203985409643478e-07, + "loss": 0.8531112670898438, + "step": 5580 + }, + { + "epoch": 1.2859447004608295, + "grad_norm": 1.121519108666965, + "learning_rate": 6.200461012896401e-07, + "loss": 0.7106495499610901, + "step": 5581 + }, + { + "epoch": 1.2861751152073733, + "grad_norm": 1.426451214846877, + "learning_rate": 6.19693716764738e-07, + "loss": 0.714931845664978, + "step": 5582 + }, + { + "epoch": 1.286405529953917, + "grad_norm": 1.3296169647206766, + "learning_rate": 6.19341387440789e-07, + "loss": 0.8281360268592834, + "step": 5583 + }, + { + "epoch": 1.286635944700461, + "grad_norm": 1.4833656768811476, + "learning_rate": 6.189891133689342e-07, + "loss": 0.9155910611152649, + "step": 5584 + }, + { + "epoch": 1.2868663594470047, + "grad_norm": 1.3432683189972507, + "learning_rate": 6.186368946003051e-07, + "loss": 0.7573060989379883, + "step": 5585 + }, + { + "epoch": 1.2870967741935484, + "grad_norm": 1.2055594370265132, + "learning_rate": 6.182847311860255e-07, + "loss": 0.6994235515594482, + "step": 5586 + }, + { + "epoch": 1.2873271889400921, + "grad_norm": 1.0775806715124838, + "learning_rate": 6.179326231772123e-07, + "loss": 0.771092414855957, + "step": 5587 + }, + { + "epoch": 1.2875576036866359, + "grad_norm": 1.269208775599209, + "learning_rate": 6.17580570624973e-07, + "loss": 0.7470684051513672, + "step": 5588 + }, + { + "epoch": 1.2877880184331798, + "grad_norm": 1.5425254092924614, + "learning_rate": 6.172285735804075e-07, + "loss": 0.918886125087738, + "step": 5589 + }, + { + "epoch": 1.2880184331797235, + "grad_norm": 1.0377944178544696, + "learning_rate": 6.16876632094608e-07, + "loss": 0.7232617139816284, + "step": 5590 + }, + { + "epoch": 1.2882488479262673, + "grad_norm": 1.1703799662994099, + "learning_rate": 6.16524746218658e-07, + "loss": 0.7367006540298462, + "step": 5591 + }, + { + "epoch": 1.288479262672811, + "grad_norm": 1.1904508940632728, + "learning_rate": 6.161729160036333e-07, + "loss": 0.8783999681472778, + "step": 5592 + }, + { + "epoch": 1.2887096774193547, + "grad_norm": 1.1869935665885074, + "learning_rate": 6.158211415006019e-07, + "loss": 0.8266523480415344, + "step": 5593 + }, + { + "epoch": 1.2889400921658987, + "grad_norm": 1.1675308279856504, + "learning_rate": 6.154694227606234e-07, + "loss": 0.8528730869293213, + "step": 5594 + }, + { + "epoch": 1.2891705069124424, + "grad_norm": 1.3182250244296418, + "learning_rate": 6.151177598347485e-07, + "loss": 0.7586283683776855, + "step": 5595 + }, + { + "epoch": 1.2894009216589861, + "grad_norm": 1.4182043487427547, + "learning_rate": 6.147661527740217e-07, + "loss": 0.8671954870223999, + "step": 5596 + }, + { + "epoch": 1.28963133640553, + "grad_norm": 1.081063839615246, + "learning_rate": 6.14414601629478e-07, + "loss": 0.7354376316070557, + "step": 5597 + }, + { + "epoch": 1.2898617511520738, + "grad_norm": 1.051384434692424, + "learning_rate": 6.140631064521443e-07, + "loss": 0.8515663146972656, + "step": 5598 + }, + { + "epoch": 1.2900921658986175, + "grad_norm": 1.3608023513745535, + "learning_rate": 6.137116672930395e-07, + "loss": 0.9068351984024048, + "step": 5599 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 1.4956373283031226, + "learning_rate": 6.133602842031752e-07, + "loss": 0.7260826230049133, + "step": 5600 + }, + { + "epoch": 1.290552995391705, + "grad_norm": 1.1400144341772105, + "learning_rate": 6.130089572335535e-07, + "loss": 0.7162504196166992, + "step": 5601 + }, + { + "epoch": 1.290783410138249, + "grad_norm": 1.2203621133034757, + "learning_rate": 6.126576864351695e-07, + "loss": 0.7625414133071899, + "step": 5602 + }, + { + "epoch": 1.2910138248847927, + "grad_norm": 1.0985405517526388, + "learning_rate": 6.123064718590099e-07, + "loss": 0.787274956703186, + "step": 5603 + }, + { + "epoch": 1.2912442396313364, + "grad_norm": 1.0173148522997915, + "learning_rate": 6.119553135560519e-07, + "loss": 0.6539326310157776, + "step": 5604 + }, + { + "epoch": 1.2914746543778801, + "grad_norm": 1.0405810111847797, + "learning_rate": 6.11604211577267e-07, + "loss": 0.8481189012527466, + "step": 5605 + }, + { + "epoch": 1.2917050691244238, + "grad_norm": 1.1908108884253377, + "learning_rate": 6.112531659736164e-07, + "loss": 0.794892430305481, + "step": 5606 + }, + { + "epoch": 1.2919354838709678, + "grad_norm": 1.0728869697567227, + "learning_rate": 6.10902176796054e-07, + "loss": 0.6738630533218384, + "step": 5607 + }, + { + "epoch": 1.2921658986175115, + "grad_norm": 1.2190379429225964, + "learning_rate": 6.105512440955258e-07, + "loss": 0.7220937609672546, + "step": 5608 + }, + { + "epoch": 1.2923963133640552, + "grad_norm": 0.9117229942004119, + "learning_rate": 6.102003679229688e-07, + "loss": 0.6831785440444946, + "step": 5609 + }, + { + "epoch": 1.2926267281105992, + "grad_norm": 1.0925904509799125, + "learning_rate": 6.098495483293125e-07, + "loss": 0.7033277750015259, + "step": 5610 + }, + { + "epoch": 1.292857142857143, + "grad_norm": 0.9024231402190447, + "learning_rate": 6.094987853654779e-07, + "loss": 0.7063429355621338, + "step": 5611 + }, + { + "epoch": 1.2930875576036867, + "grad_norm": 1.1531814321684226, + "learning_rate": 6.091480790823771e-07, + "loss": 0.7791472673416138, + "step": 5612 + }, + { + "epoch": 1.2933179723502304, + "grad_norm": 1.3904591821034944, + "learning_rate": 6.087974295309157e-07, + "loss": 0.8674220442771912, + "step": 5613 + }, + { + "epoch": 1.293548387096774, + "grad_norm": 1.0513898416349883, + "learning_rate": 6.084468367619895e-07, + "loss": 0.7878479957580566, + "step": 5614 + }, + { + "epoch": 1.293778801843318, + "grad_norm": 0.9253694996288483, + "learning_rate": 6.080963008264861e-07, + "loss": 0.7019612789154053, + "step": 5615 + }, + { + "epoch": 1.2940092165898618, + "grad_norm": 1.1163623788947772, + "learning_rate": 6.077458217752863e-07, + "loss": 0.68759685754776, + "step": 5616 + }, + { + "epoch": 1.2942396313364055, + "grad_norm": 1.1326420080908837, + "learning_rate": 6.073953996592612e-07, + "loss": 0.851733922958374, + "step": 5617 + }, + { + "epoch": 1.2944700460829492, + "grad_norm": 1.1539848484030915, + "learning_rate": 6.070450345292739e-07, + "loss": 0.699798047542572, + "step": 5618 + }, + { + "epoch": 1.294700460829493, + "grad_norm": 1.3439745934739915, + "learning_rate": 6.066947264361798e-07, + "loss": 0.8625125885009766, + "step": 5619 + }, + { + "epoch": 1.294930875576037, + "grad_norm": 1.2395704270447963, + "learning_rate": 6.063444754308253e-07, + "loss": 0.759062647819519, + "step": 5620 + }, + { + "epoch": 1.2951612903225806, + "grad_norm": 1.1349706072725887, + "learning_rate": 6.059942815640491e-07, + "loss": 0.7549973726272583, + "step": 5621 + }, + { + "epoch": 1.2953917050691244, + "grad_norm": 1.2217826699562653, + "learning_rate": 6.056441448866816e-07, + "loss": 0.8142743110656738, + "step": 5622 + }, + { + "epoch": 1.2956221198156683, + "grad_norm": 1.0818175637274867, + "learning_rate": 6.052940654495442e-07, + "loss": 0.7881144881248474, + "step": 5623 + }, + { + "epoch": 1.295852534562212, + "grad_norm": 1.2201407031885296, + "learning_rate": 6.049440433034505e-07, + "loss": 0.7922053933143616, + "step": 5624 + }, + { + "epoch": 1.2960829493087558, + "grad_norm": 1.1955381878542082, + "learning_rate": 6.045940784992061e-07, + "loss": 0.6808311939239502, + "step": 5625 + }, + { + "epoch": 1.2963133640552995, + "grad_norm": 1.203534246478074, + "learning_rate": 6.04244171087608e-07, + "loss": 0.933373749256134, + "step": 5626 + }, + { + "epoch": 1.2965437788018432, + "grad_norm": 1.3722573775025653, + "learning_rate": 6.038943211194439e-07, + "loss": 0.8077404499053955, + "step": 5627 + }, + { + "epoch": 1.2967741935483872, + "grad_norm": 1.2263754202708472, + "learning_rate": 6.035445286454953e-07, + "loss": 0.7920867204666138, + "step": 5628 + }, + { + "epoch": 1.297004608294931, + "grad_norm": 1.1574994086499075, + "learning_rate": 6.031947937165335e-07, + "loss": 0.5872117280960083, + "step": 5629 + }, + { + "epoch": 1.2972350230414746, + "grad_norm": 1.2959093642025599, + "learning_rate": 6.02845116383322e-07, + "loss": 0.8593505620956421, + "step": 5630 + }, + { + "epoch": 1.2974654377880184, + "grad_norm": 1.4149025135483138, + "learning_rate": 6.02495496696616e-07, + "loss": 0.8352359533309937, + "step": 5631 + }, + { + "epoch": 1.297695852534562, + "grad_norm": 1.1724909355958724, + "learning_rate": 6.021459347071623e-07, + "loss": 0.7316182255744934, + "step": 5632 + }, + { + "epoch": 1.297926267281106, + "grad_norm": 1.1972298924235394, + "learning_rate": 6.017964304656997e-07, + "loss": 0.7294400334358215, + "step": 5633 + }, + { + "epoch": 1.2981566820276498, + "grad_norm": 1.0769002788322786, + "learning_rate": 6.014469840229581e-07, + "loss": 0.6595947742462158, + "step": 5634 + }, + { + "epoch": 1.2983870967741935, + "grad_norm": 1.308087510592029, + "learning_rate": 6.010975954296587e-07, + "loss": 0.7849195003509521, + "step": 5635 + }, + { + "epoch": 1.2986175115207375, + "grad_norm": 1.0709465804551583, + "learning_rate": 6.007482647365159e-07, + "loss": 0.6915944218635559, + "step": 5636 + }, + { + "epoch": 1.2988479262672812, + "grad_norm": 1.1595852934519908, + "learning_rate": 6.003989919942338e-07, + "loss": 0.6821994781494141, + "step": 5637 + }, + { + "epoch": 1.299078341013825, + "grad_norm": 1.0472078656298618, + "learning_rate": 6.000497772535087e-07, + "loss": 0.7333718538284302, + "step": 5638 + }, + { + "epoch": 1.2993087557603686, + "grad_norm": 1.0656731272596272, + "learning_rate": 5.997006205650292e-07, + "loss": 0.8069280385971069, + "step": 5639 + }, + { + "epoch": 1.2995391705069124, + "grad_norm": 1.0655856429852437, + "learning_rate": 5.993515219794745e-07, + "loss": 0.6989297866821289, + "step": 5640 + }, + { + "epoch": 1.2997695852534563, + "grad_norm": 1.187477589278957, + "learning_rate": 5.990024815475161e-07, + "loss": 0.7784403562545776, + "step": 5641 + }, + { + "epoch": 1.3, + "grad_norm": 1.2512602653388225, + "learning_rate": 5.986534993198168e-07, + "loss": 0.6554181575775146, + "step": 5642 + }, + { + "epoch": 1.3002304147465438, + "grad_norm": 1.298436931300319, + "learning_rate": 5.983045753470307e-07, + "loss": 0.7647836208343506, + "step": 5643 + }, + { + "epoch": 1.3004608294930875, + "grad_norm": 0.9269247679622435, + "learning_rate": 5.979557096798033e-07, + "loss": 0.7787084579467773, + "step": 5644 + }, + { + "epoch": 1.3006912442396312, + "grad_norm": 1.0646184845326898, + "learning_rate": 5.97606902368773e-07, + "loss": 0.6367940902709961, + "step": 5645 + }, + { + "epoch": 1.3009216589861752, + "grad_norm": 1.0481428990706296, + "learning_rate": 5.972581534645679e-07, + "loss": 0.7650243043899536, + "step": 5646 + }, + { + "epoch": 1.301152073732719, + "grad_norm": 0.9452672150266047, + "learning_rate": 5.969094630178084e-07, + "loss": 0.6506018042564392, + "step": 5647 + }, + { + "epoch": 1.3013824884792626, + "grad_norm": 1.4764262273840163, + "learning_rate": 5.965608310791071e-07, + "loss": 0.7351242303848267, + "step": 5648 + }, + { + "epoch": 1.3016129032258066, + "grad_norm": 1.2210251097969258, + "learning_rate": 5.96212257699067e-07, + "loss": 0.7327077984809875, + "step": 5649 + }, + { + "epoch": 1.3018433179723503, + "grad_norm": 1.0681197005600311, + "learning_rate": 5.958637429282831e-07, + "loss": 0.6448171138763428, + "step": 5650 + }, + { + "epoch": 1.302073732718894, + "grad_norm": 1.18574113940407, + "learning_rate": 5.955152868173418e-07, + "loss": 0.8347861766815186, + "step": 5651 + }, + { + "epoch": 1.3023041474654378, + "grad_norm": 1.2733315501094051, + "learning_rate": 5.951668894168215e-07, + "loss": 0.736280620098114, + "step": 5652 + }, + { + "epoch": 1.3025345622119815, + "grad_norm": 1.2627292373923777, + "learning_rate": 5.948185507772908e-07, + "loss": 0.8677594661712646, + "step": 5653 + }, + { + "epoch": 1.3027649769585254, + "grad_norm": 1.1729788728933164, + "learning_rate": 5.944702709493113e-07, + "loss": 0.6598676443099976, + "step": 5654 + }, + { + "epoch": 1.3029953917050692, + "grad_norm": 1.1072155159392119, + "learning_rate": 5.941220499834352e-07, + "loss": 0.7795349359512329, + "step": 5655 + }, + { + "epoch": 1.303225806451613, + "grad_norm": 1.1312979891837796, + "learning_rate": 5.937738879302058e-07, + "loss": 0.6929318904876709, + "step": 5656 + }, + { + "epoch": 1.3034562211981566, + "grad_norm": 1.19931324162024, + "learning_rate": 5.934257848401593e-07, + "loss": 0.859328031539917, + "step": 5657 + }, + { + "epoch": 1.3036866359447004, + "grad_norm": 1.435339518052459, + "learning_rate": 5.930777407638216e-07, + "loss": 1.0015549659729004, + "step": 5658 + }, + { + "epoch": 1.3039170506912443, + "grad_norm": 1.0471647927751007, + "learning_rate": 5.927297557517115e-07, + "loss": 0.6775785088539124, + "step": 5659 + }, + { + "epoch": 1.304147465437788, + "grad_norm": 1.0488503999959857, + "learning_rate": 5.923818298543378e-07, + "loss": 0.7228262424468994, + "step": 5660 + }, + { + "epoch": 1.3043778801843318, + "grad_norm": 0.9177755631443217, + "learning_rate": 5.92033963122202e-07, + "loss": 0.6139897108078003, + "step": 5661 + }, + { + "epoch": 1.3046082949308757, + "grad_norm": 1.062819188029367, + "learning_rate": 5.916861556057965e-07, + "loss": 0.7336323261260986, + "step": 5662 + }, + { + "epoch": 1.3048387096774192, + "grad_norm": 1.1985877666304134, + "learning_rate": 5.913384073556049e-07, + "loss": 0.9223559498786926, + "step": 5663 + }, + { + "epoch": 1.3050691244239632, + "grad_norm": 1.1960311086176088, + "learning_rate": 5.909907184221023e-07, + "loss": 0.7230484485626221, + "step": 5664 + }, + { + "epoch": 1.305299539170507, + "grad_norm": 1.1557586988240278, + "learning_rate": 5.906430888557556e-07, + "loss": 0.753510594367981, + "step": 5665 + }, + { + "epoch": 1.3055299539170506, + "grad_norm": 1.2167084005991546, + "learning_rate": 5.902955187070229e-07, + "loss": 0.8960593938827515, + "step": 5666 + }, + { + "epoch": 1.3057603686635946, + "grad_norm": 0.9226031223011045, + "learning_rate": 5.899480080263527e-07, + "loss": 0.6865993738174438, + "step": 5667 + }, + { + "epoch": 1.3059907834101383, + "grad_norm": 1.2350884878154553, + "learning_rate": 5.896005568641868e-07, + "loss": 0.7748720645904541, + "step": 5668 + }, + { + "epoch": 1.306221198156682, + "grad_norm": 1.437104451012044, + "learning_rate": 5.892531652709567e-07, + "loss": 0.834233283996582, + "step": 5669 + }, + { + "epoch": 1.3064516129032258, + "grad_norm": 1.2209490689427414, + "learning_rate": 5.889058332970858e-07, + "loss": 0.8398417234420776, + "step": 5670 + }, + { + "epoch": 1.3066820276497695, + "grad_norm": 0.8546573405192346, + "learning_rate": 5.885585609929891e-07, + "loss": 0.6889529228210449, + "step": 5671 + }, + { + "epoch": 1.3069124423963134, + "grad_norm": 1.1935289122089947, + "learning_rate": 5.882113484090725e-07, + "loss": 0.6625782251358032, + "step": 5672 + }, + { + "epoch": 1.3071428571428572, + "grad_norm": 1.2286244905882078, + "learning_rate": 5.878641955957334e-07, + "loss": 0.7774407267570496, + "step": 5673 + }, + { + "epoch": 1.307373271889401, + "grad_norm": 1.066003573867245, + "learning_rate": 5.875171026033608e-07, + "loss": 0.7799595594406128, + "step": 5674 + }, + { + "epoch": 1.3076036866359446, + "grad_norm": 1.2859461118878832, + "learning_rate": 5.87170069482335e-07, + "loss": 0.800041913986206, + "step": 5675 + }, + { + "epoch": 1.3078341013824883, + "grad_norm": 1.2986825545894243, + "learning_rate": 5.868230962830265e-07, + "loss": 0.7478667497634888, + "step": 5676 + }, + { + "epoch": 1.3080645161290323, + "grad_norm": 0.9705514903251621, + "learning_rate": 5.86476183055799e-07, + "loss": 0.7538981437683105, + "step": 5677 + }, + { + "epoch": 1.308294930875576, + "grad_norm": 1.4195819337110585, + "learning_rate": 5.861293298510061e-07, + "loss": 0.7556810975074768, + "step": 5678 + }, + { + "epoch": 1.3085253456221198, + "grad_norm": 0.9225289666667563, + "learning_rate": 5.85782536718993e-07, + "loss": 0.670037031173706, + "step": 5679 + }, + { + "epoch": 1.3087557603686637, + "grad_norm": 1.1667524105558311, + "learning_rate": 5.854358037100964e-07, + "loss": 0.6238662600517273, + "step": 5680 + }, + { + "epoch": 1.3089861751152074, + "grad_norm": 1.1817165911107195, + "learning_rate": 5.85089130874644e-07, + "loss": 0.7972823977470398, + "step": 5681 + }, + { + "epoch": 1.3092165898617512, + "grad_norm": 1.0746427307389195, + "learning_rate": 5.847425182629549e-07, + "loss": 0.7332338094711304, + "step": 5682 + }, + { + "epoch": 1.3094470046082949, + "grad_norm": 1.2496997052714673, + "learning_rate": 5.843959659253398e-07, + "loss": 0.8186966180801392, + "step": 5683 + }, + { + "epoch": 1.3096774193548386, + "grad_norm": 1.2708999919485935, + "learning_rate": 5.840494739120996e-07, + "loss": 0.8207032680511475, + "step": 5684 + }, + { + "epoch": 1.3099078341013826, + "grad_norm": 1.4960688490449285, + "learning_rate": 5.83703042273528e-07, + "loss": 0.848265528678894, + "step": 5685 + }, + { + "epoch": 1.3101382488479263, + "grad_norm": 1.0212687278019523, + "learning_rate": 5.833566710599088e-07, + "loss": 0.7766404151916504, + "step": 5686 + }, + { + "epoch": 1.31036866359447, + "grad_norm": 1.2185059104564926, + "learning_rate": 5.830103603215168e-07, + "loss": 0.7570784687995911, + "step": 5687 + }, + { + "epoch": 1.3105990783410137, + "grad_norm": 1.1006353524996257, + "learning_rate": 5.826641101086194e-07, + "loss": 0.7551493644714355, + "step": 5688 + }, + { + "epoch": 1.3108294930875575, + "grad_norm": 1.3664942507199704, + "learning_rate": 5.823179204714739e-07, + "loss": 0.8589804172515869, + "step": 5689 + }, + { + "epoch": 1.3110599078341014, + "grad_norm": 1.2869604696659869, + "learning_rate": 5.819717914603288e-07, + "loss": 0.8252761960029602, + "step": 5690 + }, + { + "epoch": 1.3112903225806452, + "grad_norm": 1.0886628872971145, + "learning_rate": 5.816257231254254e-07, + "loss": 0.7784370183944702, + "step": 5691 + }, + { + "epoch": 1.3115207373271889, + "grad_norm": 1.1343775846575583, + "learning_rate": 5.812797155169942e-07, + "loss": 0.8040215969085693, + "step": 5692 + }, + { + "epoch": 1.3117511520737328, + "grad_norm": 1.013609351306971, + "learning_rate": 5.809337686852582e-07, + "loss": 0.8355100154876709, + "step": 5693 + }, + { + "epoch": 1.3119815668202766, + "grad_norm": 1.466649672488184, + "learning_rate": 5.805878826804303e-07, + "loss": 0.8233312368392944, + "step": 5694 + }, + { + "epoch": 1.3122119815668203, + "grad_norm": 1.1563119764352225, + "learning_rate": 5.802420575527165e-07, + "loss": 0.7756507992744446, + "step": 5695 + }, + { + "epoch": 1.312442396313364, + "grad_norm": 1.1867005828091945, + "learning_rate": 5.798962933523124e-07, + "loss": 0.7503829002380371, + "step": 5696 + }, + { + "epoch": 1.3126728110599077, + "grad_norm": 1.506327103479739, + "learning_rate": 5.795505901294051e-07, + "loss": 0.749663770198822, + "step": 5697 + }, + { + "epoch": 1.3129032258064517, + "grad_norm": 1.440884605575443, + "learning_rate": 5.792049479341732e-07, + "loss": 0.9003115296363831, + "step": 5698 + }, + { + "epoch": 1.3131336405529954, + "grad_norm": 1.059615932759845, + "learning_rate": 5.788593668167854e-07, + "loss": 0.655732274055481, + "step": 5699 + }, + { + "epoch": 1.3133640552995391, + "grad_norm": 0.9900775273356892, + "learning_rate": 5.785138468274036e-07, + "loss": 0.7318822145462036, + "step": 5700 + }, + { + "epoch": 1.3135944700460829, + "grad_norm": 0.9099775921199348, + "learning_rate": 5.781683880161788e-07, + "loss": 0.6512752771377563, + "step": 5701 + }, + { + "epoch": 1.3138248847926266, + "grad_norm": 1.1289875219473309, + "learning_rate": 5.778229904332537e-07, + "loss": 0.7232785820960999, + "step": 5702 + }, + { + "epoch": 1.3140552995391706, + "grad_norm": 1.2645196269426846, + "learning_rate": 5.77477654128763e-07, + "loss": 0.837032675743103, + "step": 5703 + }, + { + "epoch": 1.3142857142857143, + "grad_norm": 1.4984544841183642, + "learning_rate": 5.771323791528315e-07, + "loss": 0.926714301109314, + "step": 5704 + }, + { + "epoch": 1.314516129032258, + "grad_norm": 1.1221666474084682, + "learning_rate": 5.76787165555575e-07, + "loss": 0.7228986620903015, + "step": 5705 + }, + { + "epoch": 1.314746543778802, + "grad_norm": 1.3618848390091767, + "learning_rate": 5.764420133871015e-07, + "loss": 0.8330450057983398, + "step": 5706 + }, + { + "epoch": 1.3149769585253457, + "grad_norm": 1.2680150111326054, + "learning_rate": 5.760969226975088e-07, + "loss": 0.793700098991394, + "step": 5707 + }, + { + "epoch": 1.3152073732718894, + "grad_norm": 1.2897950240071954, + "learning_rate": 5.757518935368868e-07, + "loss": 0.8797321319580078, + "step": 5708 + }, + { + "epoch": 1.3154377880184331, + "grad_norm": 1.1147531221594877, + "learning_rate": 5.754069259553159e-07, + "loss": 0.8772039413452148, + "step": 5709 + }, + { + "epoch": 1.3156682027649769, + "grad_norm": 0.820739065285044, + "learning_rate": 5.750620200028672e-07, + "loss": 0.5998358726501465, + "step": 5710 + }, + { + "epoch": 1.3158986175115208, + "grad_norm": 1.7932534766511148, + "learning_rate": 5.747171757296041e-07, + "loss": 0.7694767713546753, + "step": 5711 + }, + { + "epoch": 1.3161290322580645, + "grad_norm": 1.2782062967169578, + "learning_rate": 5.7437239318558e-07, + "loss": 0.8526760339736938, + "step": 5712 + }, + { + "epoch": 1.3163594470046083, + "grad_norm": 1.199230266468518, + "learning_rate": 5.740276724208396e-07, + "loss": 0.8407987356185913, + "step": 5713 + }, + { + "epoch": 1.316589861751152, + "grad_norm": 1.289466266523787, + "learning_rate": 5.736830134854183e-07, + "loss": 0.9731476306915283, + "step": 5714 + }, + { + "epoch": 1.3168202764976957, + "grad_norm": 1.134122607422213, + "learning_rate": 5.733384164293434e-07, + "loss": 0.7230468988418579, + "step": 5715 + }, + { + "epoch": 1.3170506912442397, + "grad_norm": 1.2031868742095575, + "learning_rate": 5.729938813026327e-07, + "loss": 0.8260238766670227, + "step": 5716 + }, + { + "epoch": 1.3172811059907834, + "grad_norm": 1.0909604007760305, + "learning_rate": 5.726494081552948e-07, + "loss": 0.7616437673568726, + "step": 5717 + }, + { + "epoch": 1.3175115207373271, + "grad_norm": 1.1614064666034054, + "learning_rate": 5.723049970373295e-07, + "loss": 0.7628509998321533, + "step": 5718 + }, + { + "epoch": 1.317741935483871, + "grad_norm": 1.2522299219195512, + "learning_rate": 5.719606479987273e-07, + "loss": 0.744842529296875, + "step": 5719 + }, + { + "epoch": 1.3179723502304148, + "grad_norm": 0.9975745357037148, + "learning_rate": 5.716163610894708e-07, + "loss": 0.7228065133094788, + "step": 5720 + }, + { + "epoch": 1.3182027649769585, + "grad_norm": 1.5461378865588107, + "learning_rate": 5.712721363595325e-07, + "loss": 0.8764907121658325, + "step": 5721 + }, + { + "epoch": 1.3184331797235023, + "grad_norm": 1.0737882176659082, + "learning_rate": 5.709279738588757e-07, + "loss": 0.7966248393058777, + "step": 5722 + }, + { + "epoch": 1.318663594470046, + "grad_norm": 1.4239755183906653, + "learning_rate": 5.705838736374558e-07, + "loss": 0.8983157873153687, + "step": 5723 + }, + { + "epoch": 1.31889400921659, + "grad_norm": 1.1693207378088453, + "learning_rate": 5.70239835745218e-07, + "loss": 0.7349347472190857, + "step": 5724 + }, + { + "epoch": 1.3191244239631337, + "grad_norm": 1.4511397115268243, + "learning_rate": 5.698958602320988e-07, + "loss": 0.9297066926956177, + "step": 5725 + }, + { + "epoch": 1.3193548387096774, + "grad_norm": 1.0721204261694746, + "learning_rate": 5.695519471480266e-07, + "loss": 0.7106038331985474, + "step": 5726 + }, + { + "epoch": 1.3195852534562211, + "grad_norm": 1.3074916303787611, + "learning_rate": 5.692080965429193e-07, + "loss": 0.8759022951126099, + "step": 5727 + }, + { + "epoch": 1.3198156682027649, + "grad_norm": 1.2039841953988952, + "learning_rate": 5.688643084666862e-07, + "loss": 0.8337300419807434, + "step": 5728 + }, + { + "epoch": 1.3200460829493088, + "grad_norm": 1.2975435530580146, + "learning_rate": 5.685205829692283e-07, + "loss": 0.8543391227722168, + "step": 5729 + }, + { + "epoch": 1.3202764976958525, + "grad_norm": 0.9960252179140261, + "learning_rate": 5.681769201004366e-07, + "loss": 0.7497329711914062, + "step": 5730 + }, + { + "epoch": 1.3205069124423963, + "grad_norm": 1.0615580947761494, + "learning_rate": 5.678333199101929e-07, + "loss": 0.8190964460372925, + "step": 5731 + }, + { + "epoch": 1.3207373271889402, + "grad_norm": 1.1486652227224357, + "learning_rate": 5.674897824483711e-07, + "loss": 0.8233011960983276, + "step": 5732 + }, + { + "epoch": 1.320967741935484, + "grad_norm": 1.2086113696285639, + "learning_rate": 5.671463077648348e-07, + "loss": 0.75257408618927, + "step": 5733 + }, + { + "epoch": 1.3211981566820277, + "grad_norm": 1.0357997575051858, + "learning_rate": 5.668028959094386e-07, + "loss": 0.6468796133995056, + "step": 5734 + }, + { + "epoch": 1.3214285714285714, + "grad_norm": 0.869693175338726, + "learning_rate": 5.664595469320288e-07, + "loss": 0.6756174564361572, + "step": 5735 + }, + { + "epoch": 1.3216589861751151, + "grad_norm": 1.2928038093451135, + "learning_rate": 5.661162608824419e-07, + "loss": 0.9040344953536987, + "step": 5736 + }, + { + "epoch": 1.321889400921659, + "grad_norm": 1.013287726627938, + "learning_rate": 5.657730378105055e-07, + "loss": 0.8082150816917419, + "step": 5737 + }, + { + "epoch": 1.3221198156682028, + "grad_norm": 1.2602760490074278, + "learning_rate": 5.654298777660375e-07, + "loss": 0.8760210275650024, + "step": 5738 + }, + { + "epoch": 1.3223502304147465, + "grad_norm": 1.4464070872810626, + "learning_rate": 5.650867807988473e-07, + "loss": 0.6980990171432495, + "step": 5739 + }, + { + "epoch": 1.3225806451612903, + "grad_norm": 0.927469939331727, + "learning_rate": 5.647437469587355e-07, + "loss": 0.6552839279174805, + "step": 5740 + }, + { + "epoch": 1.322811059907834, + "grad_norm": 0.9934566913252004, + "learning_rate": 5.644007762954925e-07, + "loss": 0.8304816484451294, + "step": 5741 + }, + { + "epoch": 1.323041474654378, + "grad_norm": 1.1691146043820817, + "learning_rate": 5.640578688589e-07, + "loss": 0.7977567315101624, + "step": 5742 + }, + { + "epoch": 1.3232718894009217, + "grad_norm": 1.4376891352576404, + "learning_rate": 5.637150246987308e-07, + "loss": 0.7656992673873901, + "step": 5743 + }, + { + "epoch": 1.3235023041474654, + "grad_norm": 1.1120822444951537, + "learning_rate": 5.633722438647483e-07, + "loss": 0.921256422996521, + "step": 5744 + }, + { + "epoch": 1.3237327188940093, + "grad_norm": 1.2718785752085355, + "learning_rate": 5.630295264067063e-07, + "loss": 0.8012785315513611, + "step": 5745 + }, + { + "epoch": 1.323963133640553, + "grad_norm": 1.2403067439539972, + "learning_rate": 5.626868723743504e-07, + "loss": 0.613241970539093, + "step": 5746 + }, + { + "epoch": 1.3241935483870968, + "grad_norm": 1.34086331204533, + "learning_rate": 5.623442818174161e-07, + "loss": 0.7134846448898315, + "step": 5747 + }, + { + "epoch": 1.3244239631336405, + "grad_norm": 1.3127547947642921, + "learning_rate": 5.620017547856295e-07, + "loss": 0.8963242173194885, + "step": 5748 + }, + { + "epoch": 1.3246543778801843, + "grad_norm": 1.3476788930677732, + "learning_rate": 5.616592913287087e-07, + "loss": 0.8401378393173218, + "step": 5749 + }, + { + "epoch": 1.3248847926267282, + "grad_norm": 1.0346861015576712, + "learning_rate": 5.613168914963615e-07, + "loss": 0.6455308198928833, + "step": 5750 + }, + { + "epoch": 1.325115207373272, + "grad_norm": 1.105933895384034, + "learning_rate": 5.609745553382863e-07, + "loss": 0.6920031905174255, + "step": 5751 + }, + { + "epoch": 1.3253456221198157, + "grad_norm": 1.1001754091297298, + "learning_rate": 5.606322829041737e-07, + "loss": 0.9099706411361694, + "step": 5752 + }, + { + "epoch": 1.3255760368663594, + "grad_norm": 1.3286482905641974, + "learning_rate": 5.602900742437036e-07, + "loss": 0.8034265637397766, + "step": 5753 + }, + { + "epoch": 1.3258064516129031, + "grad_norm": 0.9956708814709011, + "learning_rate": 5.599479294065471e-07, + "loss": 0.7216918468475342, + "step": 5754 + }, + { + "epoch": 1.326036866359447, + "grad_norm": 1.1406371859334326, + "learning_rate": 5.596058484423655e-07, + "loss": 0.7428277730941772, + "step": 5755 + }, + { + "epoch": 1.3262672811059908, + "grad_norm": 1.3052741120899958, + "learning_rate": 5.592638314008127e-07, + "loss": 0.7636011838912964, + "step": 5756 + }, + { + "epoch": 1.3264976958525345, + "grad_norm": 1.3474656843000283, + "learning_rate": 5.589218783315311e-07, + "loss": 0.7765215635299683, + "step": 5757 + }, + { + "epoch": 1.3267281105990785, + "grad_norm": 1.0612768168901736, + "learning_rate": 5.585799892841551e-07, + "loss": 0.6524033546447754, + "step": 5758 + }, + { + "epoch": 1.3269585253456222, + "grad_norm": 1.134076107561282, + "learning_rate": 5.582381643083087e-07, + "loss": 0.8105186223983765, + "step": 5759 + }, + { + "epoch": 1.327188940092166, + "grad_norm": 1.2647095323418043, + "learning_rate": 5.578964034536084e-07, + "loss": 0.7654449939727783, + "step": 5760 + }, + { + "epoch": 1.3274193548387097, + "grad_norm": 1.0086580295069412, + "learning_rate": 5.5755470676966e-07, + "loss": 0.6545592546463013, + "step": 5761 + }, + { + "epoch": 1.3276497695852534, + "grad_norm": 1.1744400728961766, + "learning_rate": 5.572130743060597e-07, + "loss": 0.7116275429725647, + "step": 5762 + }, + { + "epoch": 1.3278801843317973, + "grad_norm": 1.246651079531453, + "learning_rate": 5.568715061123959e-07, + "loss": 0.8396822214126587, + "step": 5763 + }, + { + "epoch": 1.328110599078341, + "grad_norm": 1.2492632037634621, + "learning_rate": 5.565300022382464e-07, + "loss": 0.6729685664176941, + "step": 5764 + }, + { + "epoch": 1.3283410138248848, + "grad_norm": 1.1356361065972511, + "learning_rate": 5.561885627331795e-07, + "loss": 0.6891340017318726, + "step": 5765 + }, + { + "epoch": 1.3285714285714285, + "grad_norm": 1.1361488307123824, + "learning_rate": 5.558471876467556e-07, + "loss": 0.7232956886291504, + "step": 5766 + }, + { + "epoch": 1.3288018433179722, + "grad_norm": 1.3213229777584583, + "learning_rate": 5.555058770285246e-07, + "loss": 0.7800660133361816, + "step": 5767 + }, + { + "epoch": 1.3290322580645162, + "grad_norm": 1.106817082140827, + "learning_rate": 5.551646309280266e-07, + "loss": 0.6794005036354065, + "step": 5768 + }, + { + "epoch": 1.32926267281106, + "grad_norm": 1.223898149625345, + "learning_rate": 5.548234493947939e-07, + "loss": 0.7739551067352295, + "step": 5769 + }, + { + "epoch": 1.3294930875576036, + "grad_norm": 1.0605861943491384, + "learning_rate": 5.544823324783482e-07, + "loss": 0.759978711605072, + "step": 5770 + }, + { + "epoch": 1.3297235023041476, + "grad_norm": 0.9593168779270222, + "learning_rate": 5.541412802282017e-07, + "loss": 0.7563333511352539, + "step": 5771 + }, + { + "epoch": 1.3299539170506913, + "grad_norm": 1.2126531853296405, + "learning_rate": 5.538002926938587e-07, + "loss": 0.6705852746963501, + "step": 5772 + }, + { + "epoch": 1.330184331797235, + "grad_norm": 1.4207541622240418, + "learning_rate": 5.534593699248124e-07, + "loss": 0.8343281745910645, + "step": 5773 + }, + { + "epoch": 1.3304147465437788, + "grad_norm": 1.4106880574063376, + "learning_rate": 5.531185119705474e-07, + "loss": 0.7158486843109131, + "step": 5774 + }, + { + "epoch": 1.3306451612903225, + "grad_norm": 1.5132468140839932, + "learning_rate": 5.527777188805385e-07, + "loss": 0.8888766765594482, + "step": 5775 + }, + { + "epoch": 1.3308755760368665, + "grad_norm": 1.0939731159249404, + "learning_rate": 5.524369907042519e-07, + "loss": 0.873813271522522, + "step": 5776 + }, + { + "epoch": 1.3311059907834102, + "grad_norm": 1.1685988919933143, + "learning_rate": 5.520963274911437e-07, + "loss": 0.7654919624328613, + "step": 5777 + }, + { + "epoch": 1.331336405529954, + "grad_norm": 0.8790821291361985, + "learning_rate": 5.517557292906606e-07, + "loss": 0.6976190805435181, + "step": 5778 + }, + { + "epoch": 1.3315668202764976, + "grad_norm": 1.0920428500423505, + "learning_rate": 5.5141519615224e-07, + "loss": 0.8356388807296753, + "step": 5779 + }, + { + "epoch": 1.3317972350230414, + "grad_norm": 1.1881219631842852, + "learning_rate": 5.510747281253094e-07, + "loss": 0.719998836517334, + "step": 5780 + }, + { + "epoch": 1.3320276497695853, + "grad_norm": 1.4093262324111957, + "learning_rate": 5.507343252592882e-07, + "loss": 0.8432124853134155, + "step": 5781 + }, + { + "epoch": 1.332258064516129, + "grad_norm": 1.2484869478133402, + "learning_rate": 5.503939876035845e-07, + "loss": 0.8426402807235718, + "step": 5782 + }, + { + "epoch": 1.3324884792626728, + "grad_norm": 1.1843136616988323, + "learning_rate": 5.500537152075986e-07, + "loss": 0.8133292198181152, + "step": 5783 + }, + { + "epoch": 1.3327188940092167, + "grad_norm": 1.2203561788081314, + "learning_rate": 5.497135081207205e-07, + "loss": 0.8097467422485352, + "step": 5784 + }, + { + "epoch": 1.3329493087557602, + "grad_norm": 0.9964838781032684, + "learning_rate": 5.493733663923299e-07, + "loss": 0.6943382024765015, + "step": 5785 + }, + { + "epoch": 1.3331797235023042, + "grad_norm": 0.8958647048569846, + "learning_rate": 5.490332900717993e-07, + "loss": 0.5896245837211609, + "step": 5786 + }, + { + "epoch": 1.333410138248848, + "grad_norm": 1.2066217319918868, + "learning_rate": 5.486932792084895e-07, + "loss": 0.6837725639343262, + "step": 5787 + }, + { + "epoch": 1.3336405529953916, + "grad_norm": 1.3459237431541746, + "learning_rate": 5.483533338517523e-07, + "loss": 0.8371915221214294, + "step": 5788 + }, + { + "epoch": 1.3338709677419356, + "grad_norm": 1.1649657355507903, + "learning_rate": 5.480134540509313e-07, + "loss": 0.8001077175140381, + "step": 5789 + }, + { + "epoch": 1.3341013824884793, + "grad_norm": 1.4458040399946648, + "learning_rate": 5.476736398553591e-07, + "loss": 0.9070717096328735, + "step": 5790 + }, + { + "epoch": 1.334331797235023, + "grad_norm": 1.256932465914866, + "learning_rate": 5.473338913143589e-07, + "loss": 0.9061849117279053, + "step": 5791 + }, + { + "epoch": 1.3345622119815668, + "grad_norm": 1.2993559451282939, + "learning_rate": 5.469942084772454e-07, + "loss": 0.8465786576271057, + "step": 5792 + }, + { + "epoch": 1.3347926267281105, + "grad_norm": 1.2333173266054418, + "learning_rate": 5.466545913933229e-07, + "loss": 0.8221259117126465, + "step": 5793 + }, + { + "epoch": 1.3350230414746544, + "grad_norm": 1.1214356414954587, + "learning_rate": 5.463150401118864e-07, + "loss": 0.594088077545166, + "step": 5794 + }, + { + "epoch": 1.3352534562211982, + "grad_norm": 1.0900215939620008, + "learning_rate": 5.459755546822207e-07, + "loss": 0.6983529925346375, + "step": 5795 + }, + { + "epoch": 1.335483870967742, + "grad_norm": 1.3561793320800521, + "learning_rate": 5.456361351536027e-07, + "loss": 0.7720709443092346, + "step": 5796 + }, + { + "epoch": 1.3357142857142856, + "grad_norm": 1.1798730390657586, + "learning_rate": 5.45296781575298e-07, + "loss": 0.8087977766990662, + "step": 5797 + }, + { + "epoch": 1.3359447004608294, + "grad_norm": 1.123982224882924, + "learning_rate": 5.449574939965636e-07, + "loss": 0.6808000802993774, + "step": 5798 + }, + { + "epoch": 1.3361751152073733, + "grad_norm": 1.0634688756756818, + "learning_rate": 5.446182724666466e-07, + "loss": 0.7222881317138672, + "step": 5799 + }, + { + "epoch": 1.336405529953917, + "grad_norm": 1.1919279054292256, + "learning_rate": 5.44279117034784e-07, + "loss": 0.872687578201294, + "step": 5800 + }, + { + "epoch": 1.3366359447004608, + "grad_norm": 1.3648460603559815, + "learning_rate": 5.439400277502048e-07, + "loss": 0.7728114128112793, + "step": 5801 + }, + { + "epoch": 1.3368663594470047, + "grad_norm": 1.0445795610107291, + "learning_rate": 5.436010046621267e-07, + "loss": 0.807528018951416, + "step": 5802 + }, + { + "epoch": 1.3370967741935484, + "grad_norm": 1.151575262421327, + "learning_rate": 5.432620478197583e-07, + "loss": 0.6997063159942627, + "step": 5803 + }, + { + "epoch": 1.3373271889400922, + "grad_norm": 1.309125931504039, + "learning_rate": 5.429231572722995e-07, + "loss": 0.797568678855896, + "step": 5804 + }, + { + "epoch": 1.337557603686636, + "grad_norm": 1.0057472643211554, + "learning_rate": 5.425843330689386e-07, + "loss": 0.6412359476089478, + "step": 5805 + }, + { + "epoch": 1.3377880184331796, + "grad_norm": 1.1290308654729904, + "learning_rate": 5.422455752588569e-07, + "loss": 0.8605507612228394, + "step": 5806 + }, + { + "epoch": 1.3380184331797236, + "grad_norm": 1.0459165137008808, + "learning_rate": 5.419068838912238e-07, + "loss": 0.856192946434021, + "step": 5807 + }, + { + "epoch": 1.3382488479262673, + "grad_norm": 1.1354202830657318, + "learning_rate": 5.415682590151998e-07, + "loss": 0.8614650368690491, + "step": 5808 + }, + { + "epoch": 1.338479262672811, + "grad_norm": 1.6619444336877072, + "learning_rate": 5.412297006799365e-07, + "loss": 0.9675840139389038, + "step": 5809 + }, + { + "epoch": 1.3387096774193548, + "grad_norm": 1.0659647985310448, + "learning_rate": 5.408912089345747e-07, + "loss": 0.7333405017852783, + "step": 5810 + }, + { + "epoch": 1.3389400921658985, + "grad_norm": 1.3540380425785927, + "learning_rate": 5.405527838282457e-07, + "loss": 0.8271909952163696, + "step": 5811 + }, + { + "epoch": 1.3391705069124424, + "grad_norm": 1.3562249096622705, + "learning_rate": 5.402144254100724e-07, + "loss": 0.8036069869995117, + "step": 5812 + }, + { + "epoch": 1.3394009216589862, + "grad_norm": 1.3975514954701582, + "learning_rate": 5.398761337291667e-07, + "loss": 0.855912446975708, + "step": 5813 + }, + { + "epoch": 1.33963133640553, + "grad_norm": 1.2830545749549949, + "learning_rate": 5.395379088346309e-07, + "loss": 0.8198536038398743, + "step": 5814 + }, + { + "epoch": 1.3398617511520738, + "grad_norm": 1.2130249913700057, + "learning_rate": 5.391997507755581e-07, + "loss": 0.8931646347045898, + "step": 5815 + }, + { + "epoch": 1.3400921658986176, + "grad_norm": 0.9981529734228639, + "learning_rate": 5.388616596010312e-07, + "loss": 0.7073954343795776, + "step": 5816 + }, + { + "epoch": 1.3403225806451613, + "grad_norm": 1.2450035085912274, + "learning_rate": 5.385236353601241e-07, + "loss": 0.7758424282073975, + "step": 5817 + }, + { + "epoch": 1.340552995391705, + "grad_norm": 1.1707291956273551, + "learning_rate": 5.381856781019005e-07, + "loss": 0.6805497407913208, + "step": 5818 + }, + { + "epoch": 1.3407834101382488, + "grad_norm": 1.251540768140409, + "learning_rate": 5.378477878754144e-07, + "loss": 0.8956538438796997, + "step": 5819 + }, + { + "epoch": 1.3410138248847927, + "grad_norm": 1.0594639846580987, + "learning_rate": 5.375099647297096e-07, + "loss": 0.7819657921791077, + "step": 5820 + }, + { + "epoch": 1.3412442396313364, + "grad_norm": 1.0523114055014655, + "learning_rate": 5.371722087138217e-07, + "loss": 0.5764007568359375, + "step": 5821 + }, + { + "epoch": 1.3414746543778802, + "grad_norm": 1.1661401559135987, + "learning_rate": 5.368345198767749e-07, + "loss": 0.697022557258606, + "step": 5822 + }, + { + "epoch": 1.3417050691244239, + "grad_norm": 1.3065346909259405, + "learning_rate": 5.364968982675839e-07, + "loss": 0.7773014307022095, + "step": 5823 + }, + { + "epoch": 1.3419354838709676, + "grad_norm": 1.3340944249973796, + "learning_rate": 5.361593439352551e-07, + "loss": 0.7395004034042358, + "step": 5824 + }, + { + "epoch": 1.3421658986175116, + "grad_norm": 1.0762295080363014, + "learning_rate": 5.358218569287834e-07, + "loss": 0.7989716529846191, + "step": 5825 + }, + { + "epoch": 1.3423963133640553, + "grad_norm": 1.280549478612159, + "learning_rate": 5.354844372971543e-07, + "loss": 0.8894884586334229, + "step": 5826 + }, + { + "epoch": 1.342626728110599, + "grad_norm": 1.5586577225053506, + "learning_rate": 5.351470850893446e-07, + "loss": 0.8415021300315857, + "step": 5827 + }, + { + "epoch": 1.342857142857143, + "grad_norm": 1.4272313895508615, + "learning_rate": 5.3480980035432e-07, + "loss": 0.9963078498840332, + "step": 5828 + }, + { + "epoch": 1.3430875576036867, + "grad_norm": 1.1680739887228044, + "learning_rate": 5.344725831410368e-07, + "loss": 0.8489943742752075, + "step": 5829 + }, + { + "epoch": 1.3433179723502304, + "grad_norm": 0.9897462108554296, + "learning_rate": 5.341354334984422e-07, + "loss": 0.6949954032897949, + "step": 5830 + }, + { + "epoch": 1.3435483870967742, + "grad_norm": 1.1225897948987795, + "learning_rate": 5.337983514754722e-07, + "loss": 0.878408670425415, + "step": 5831 + }, + { + "epoch": 1.3437788018433179, + "grad_norm": 1.2067617593706235, + "learning_rate": 5.334613371210549e-07, + "loss": 0.722877025604248, + "step": 5832 + }, + { + "epoch": 1.3440092165898618, + "grad_norm": 1.04123853110292, + "learning_rate": 5.331243904841068e-07, + "loss": 0.670013427734375, + "step": 5833 + }, + { + "epoch": 1.3442396313364056, + "grad_norm": 1.0789084686611892, + "learning_rate": 5.327875116135354e-07, + "loss": 0.8336968421936035, + "step": 5834 + }, + { + "epoch": 1.3444700460829493, + "grad_norm": 1.2348261826059375, + "learning_rate": 5.324507005582381e-07, + "loss": 0.7917020916938782, + "step": 5835 + }, + { + "epoch": 1.344700460829493, + "grad_norm": 1.288528901659057, + "learning_rate": 5.321139573671024e-07, + "loss": 0.7479217052459717, + "step": 5836 + }, + { + "epoch": 1.3449308755760367, + "grad_norm": 1.206901718846971, + "learning_rate": 5.317772820890068e-07, + "loss": 0.8059084415435791, + "step": 5837 + }, + { + "epoch": 1.3451612903225807, + "grad_norm": 1.0687058344207596, + "learning_rate": 5.314406747728186e-07, + "loss": 0.6853187680244446, + "step": 5838 + }, + { + "epoch": 1.3453917050691244, + "grad_norm": 1.2007310227541288, + "learning_rate": 5.311041354673964e-07, + "loss": 0.7769491672515869, + "step": 5839 + }, + { + "epoch": 1.3456221198156681, + "grad_norm": 1.007121872066712, + "learning_rate": 5.307676642215877e-07, + "loss": 0.6669384241104126, + "step": 5840 + }, + { + "epoch": 1.345852534562212, + "grad_norm": 1.091111253411437, + "learning_rate": 5.304312610842319e-07, + "loss": 0.7884945869445801, + "step": 5841 + }, + { + "epoch": 1.3460829493087558, + "grad_norm": 1.2799296704263758, + "learning_rate": 5.300949261041567e-07, + "loss": 0.8030047416687012, + "step": 5842 + }, + { + "epoch": 1.3463133640552996, + "grad_norm": 1.293856241707333, + "learning_rate": 5.297586593301806e-07, + "loss": 0.7792675495147705, + "step": 5843 + }, + { + "epoch": 1.3465437788018433, + "grad_norm": 1.450964712660266, + "learning_rate": 5.29422460811113e-07, + "loss": 0.8699119091033936, + "step": 5844 + }, + { + "epoch": 1.346774193548387, + "grad_norm": 1.1164478098944863, + "learning_rate": 5.290863305957523e-07, + "loss": 0.8075394630432129, + "step": 5845 + }, + { + "epoch": 1.347004608294931, + "grad_norm": 1.2025668698948455, + "learning_rate": 5.287502687328868e-07, + "loss": 0.7875077128410339, + "step": 5846 + }, + { + "epoch": 1.3472350230414747, + "grad_norm": 1.2743475952279586, + "learning_rate": 5.284142752712965e-07, + "loss": 0.6799413561820984, + "step": 5847 + }, + { + "epoch": 1.3474654377880184, + "grad_norm": 1.3570475044053845, + "learning_rate": 5.280783502597496e-07, + "loss": 0.914801299571991, + "step": 5848 + }, + { + "epoch": 1.3476958525345621, + "grad_norm": 1.4096481978785727, + "learning_rate": 5.277424937470052e-07, + "loss": 0.8591992855072021, + "step": 5849 + }, + { + "epoch": 1.3479262672811059, + "grad_norm": 1.1971358109064123, + "learning_rate": 5.27406705781813e-07, + "loss": 0.7830478549003601, + "step": 5850 + }, + { + "epoch": 1.3481566820276498, + "grad_norm": 1.397466179292115, + "learning_rate": 5.270709864129119e-07, + "loss": 0.8365499973297119, + "step": 5851 + }, + { + "epoch": 1.3483870967741935, + "grad_norm": 1.5417932199175834, + "learning_rate": 5.267353356890305e-07, + "loss": 0.8342669010162354, + "step": 5852 + }, + { + "epoch": 1.3486175115207373, + "grad_norm": 1.0532947941417055, + "learning_rate": 5.263997536588891e-07, + "loss": 0.7802393436431885, + "step": 5853 + }, + { + "epoch": 1.3488479262672812, + "grad_norm": 1.2005511445865484, + "learning_rate": 5.260642403711964e-07, + "loss": 0.8245328068733215, + "step": 5854 + }, + { + "epoch": 1.349078341013825, + "grad_norm": 1.043405656704728, + "learning_rate": 5.257287958746519e-07, + "loss": 0.7209265232086182, + "step": 5855 + }, + { + "epoch": 1.3493087557603687, + "grad_norm": 1.254105643009189, + "learning_rate": 5.253934202179444e-07, + "loss": 0.9258058071136475, + "step": 5856 + }, + { + "epoch": 1.3495391705069124, + "grad_norm": 1.3493584028342165, + "learning_rate": 5.25058113449754e-07, + "loss": 0.6889467835426331, + "step": 5857 + }, + { + "epoch": 1.3497695852534561, + "grad_norm": 1.113027412487739, + "learning_rate": 5.247228756187498e-07, + "loss": 0.8810057640075684, + "step": 5858 + }, + { + "epoch": 1.35, + "grad_norm": 1.140989478824924, + "learning_rate": 5.243877067735909e-07, + "loss": 0.7236393690109253, + "step": 5859 + }, + { + "epoch": 1.3502304147465438, + "grad_norm": 1.1712872152312954, + "learning_rate": 5.240526069629264e-07, + "loss": 0.8287979364395142, + "step": 5860 + }, + { + "epoch": 1.3504608294930875, + "grad_norm": 0.9764543402246563, + "learning_rate": 5.237175762353964e-07, + "loss": 0.8268846869468689, + "step": 5861 + }, + { + "epoch": 1.3506912442396313, + "grad_norm": 1.08770217121451, + "learning_rate": 5.233826146396296e-07, + "loss": 0.7995575666427612, + "step": 5862 + }, + { + "epoch": 1.350921658986175, + "grad_norm": 1.185939350431103, + "learning_rate": 5.230477222242449e-07, + "loss": 0.7379493713378906, + "step": 5863 + }, + { + "epoch": 1.351152073732719, + "grad_norm": 1.1532350043824988, + "learning_rate": 5.227128990378524e-07, + "loss": 0.729906439781189, + "step": 5864 + }, + { + "epoch": 1.3513824884792627, + "grad_norm": 1.3775772205538213, + "learning_rate": 5.223781451290506e-07, + "loss": 0.8356789350509644, + "step": 5865 + }, + { + "epoch": 1.3516129032258064, + "grad_norm": 1.4707388081384496, + "learning_rate": 5.220434605464285e-07, + "loss": 0.8130582571029663, + "step": 5866 + }, + { + "epoch": 1.3518433179723504, + "grad_norm": 1.3840431554185126, + "learning_rate": 5.217088453385658e-07, + "loss": 0.7686447501182556, + "step": 5867 + }, + { + "epoch": 1.352073732718894, + "grad_norm": 1.4824685151456765, + "learning_rate": 5.213742995540309e-07, + "loss": 0.7945844531059265, + "step": 5868 + }, + { + "epoch": 1.3523041474654378, + "grad_norm": 0.9715413572597766, + "learning_rate": 5.210398232413824e-07, + "loss": 0.8082837462425232, + "step": 5869 + }, + { + "epoch": 1.3525345622119815, + "grad_norm": 1.2398246007417328, + "learning_rate": 5.2070541644917e-07, + "loss": 0.7826153039932251, + "step": 5870 + }, + { + "epoch": 1.3527649769585253, + "grad_norm": 1.2471684178108737, + "learning_rate": 5.203710792259318e-07, + "loss": 0.6853276491165161, + "step": 5871 + }, + { + "epoch": 1.3529953917050692, + "grad_norm": 1.2891891865978977, + "learning_rate": 5.200368116201962e-07, + "loss": 0.8354780673980713, + "step": 5872 + }, + { + "epoch": 1.353225806451613, + "grad_norm": 1.1178862343459024, + "learning_rate": 5.197026136804823e-07, + "loss": 0.7857648134231567, + "step": 5873 + }, + { + "epoch": 1.3534562211981567, + "grad_norm": 0.9168225851850988, + "learning_rate": 5.193684854552982e-07, + "loss": 0.663504958152771, + "step": 5874 + }, + { + "epoch": 1.3536866359447004, + "grad_norm": 1.329771615602396, + "learning_rate": 5.190344269931423e-07, + "loss": 0.8192203044891357, + "step": 5875 + }, + { + "epoch": 1.3539170506912441, + "grad_norm": 1.4861685476717017, + "learning_rate": 5.187004383425024e-07, + "loss": 0.801753044128418, + "step": 5876 + }, + { + "epoch": 1.354147465437788, + "grad_norm": 1.3551621393598028, + "learning_rate": 5.183665195518566e-07, + "loss": 0.9427206516265869, + "step": 5877 + }, + { + "epoch": 1.3543778801843318, + "grad_norm": 1.1121835630605517, + "learning_rate": 5.18032670669673e-07, + "loss": 0.7801729440689087, + "step": 5878 + }, + { + "epoch": 1.3546082949308755, + "grad_norm": 1.3936797390586833, + "learning_rate": 5.176988917444094e-07, + "loss": 0.8224533796310425, + "step": 5879 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.9505008459531469, + "learning_rate": 5.173651828245127e-07, + "loss": 0.7800098657608032, + "step": 5880 + }, + { + "epoch": 1.3550691244239632, + "grad_norm": 0.9654380749861797, + "learning_rate": 5.170315439584212e-07, + "loss": 0.7612746953964233, + "step": 5881 + }, + { + "epoch": 1.355299539170507, + "grad_norm": 1.191616140078335, + "learning_rate": 5.166979751945617e-07, + "loss": 0.8027492761611938, + "step": 5882 + }, + { + "epoch": 1.3555299539170507, + "grad_norm": 1.167147993456773, + "learning_rate": 5.163644765813508e-07, + "loss": 0.7509280443191528, + "step": 5883 + }, + { + "epoch": 1.3557603686635944, + "grad_norm": 1.2102231125675782, + "learning_rate": 5.160310481671966e-07, + "loss": 0.7663145661354065, + "step": 5884 + }, + { + "epoch": 1.3559907834101383, + "grad_norm": 1.246862901799125, + "learning_rate": 5.156976900004948e-07, + "loss": 0.7598870396614075, + "step": 5885 + }, + { + "epoch": 1.356221198156682, + "grad_norm": 1.127184650819857, + "learning_rate": 5.153644021296317e-07, + "loss": 0.7923038005828857, + "step": 5886 + }, + { + "epoch": 1.3564516129032258, + "grad_norm": 1.2664053097126295, + "learning_rate": 5.150311846029846e-07, + "loss": 0.8711799383163452, + "step": 5887 + }, + { + "epoch": 1.3566820276497695, + "grad_norm": 1.294570667250746, + "learning_rate": 5.146980374689191e-07, + "loss": 0.7852096557617188, + "step": 5888 + }, + { + "epoch": 1.3569124423963133, + "grad_norm": 1.1426360408928755, + "learning_rate": 5.143649607757905e-07, + "loss": 0.7259876132011414, + "step": 5889 + }, + { + "epoch": 1.3571428571428572, + "grad_norm": 0.9810253925795782, + "learning_rate": 5.140319545719454e-07, + "loss": 0.7612321376800537, + "step": 5890 + }, + { + "epoch": 1.357373271889401, + "grad_norm": 1.2919477789807814, + "learning_rate": 5.136990189057187e-07, + "loss": 0.7881298661231995, + "step": 5891 + }, + { + "epoch": 1.3576036866359447, + "grad_norm": 1.0310706760740191, + "learning_rate": 5.133661538254353e-07, + "loss": 0.6956340074539185, + "step": 5892 + }, + { + "epoch": 1.3578341013824886, + "grad_norm": 1.0277045355993415, + "learning_rate": 5.130333593794107e-07, + "loss": 0.7800698280334473, + "step": 5893 + }, + { + "epoch": 1.3580645161290323, + "grad_norm": 1.0373100274796343, + "learning_rate": 5.127006356159496e-07, + "loss": 0.6920318603515625, + "step": 5894 + }, + { + "epoch": 1.358294930875576, + "grad_norm": 0.9870224446835288, + "learning_rate": 5.123679825833458e-07, + "loss": 0.6972872018814087, + "step": 5895 + }, + { + "epoch": 1.3585253456221198, + "grad_norm": 1.1473583592012562, + "learning_rate": 5.12035400329884e-07, + "loss": 0.8820276260375977, + "step": 5896 + }, + { + "epoch": 1.3587557603686635, + "grad_norm": 1.1566218274104645, + "learning_rate": 5.117028889038375e-07, + "loss": 0.8834109306335449, + "step": 5897 + }, + { + "epoch": 1.3589861751152075, + "grad_norm": 1.1393544418506285, + "learning_rate": 5.113704483534704e-07, + "loss": 0.6981096267700195, + "step": 5898 + }, + { + "epoch": 1.3592165898617512, + "grad_norm": 1.329102048560067, + "learning_rate": 5.11038078727036e-07, + "loss": 0.7617249488830566, + "step": 5899 + }, + { + "epoch": 1.359447004608295, + "grad_norm": 1.7116569149164136, + "learning_rate": 5.107057800727773e-07, + "loss": 0.8373798131942749, + "step": 5900 + }, + { + "epoch": 1.3596774193548387, + "grad_norm": 1.3064563550321244, + "learning_rate": 5.103735524389264e-07, + "loss": 0.7176666855812073, + "step": 5901 + }, + { + "epoch": 1.3599078341013824, + "grad_norm": 0.9003342699900779, + "learning_rate": 5.100413958737067e-07, + "loss": 0.7872966527938843, + "step": 5902 + }, + { + "epoch": 1.3601382488479263, + "grad_norm": 1.1723157653802474, + "learning_rate": 5.097093104253295e-07, + "loss": 0.6668897271156311, + "step": 5903 + }, + { + "epoch": 1.36036866359447, + "grad_norm": 1.2119302484042467, + "learning_rate": 5.093772961419967e-07, + "loss": 0.8413408994674683, + "step": 5904 + }, + { + "epoch": 1.3605990783410138, + "grad_norm": 0.9857990015136971, + "learning_rate": 5.090453530719e-07, + "loss": 0.632825493812561, + "step": 5905 + }, + { + "epoch": 1.3608294930875577, + "grad_norm": 1.2378128322555926, + "learning_rate": 5.087134812632201e-07, + "loss": 0.737346887588501, + "step": 5906 + }, + { + "epoch": 1.3610599078341012, + "grad_norm": 1.2614434601380542, + "learning_rate": 5.083816807641283e-07, + "loss": 1.00008225440979, + "step": 5907 + }, + { + "epoch": 1.3612903225806452, + "grad_norm": 1.2168755174090398, + "learning_rate": 5.08049951622785e-07, + "loss": 0.7844079732894897, + "step": 5908 + }, + { + "epoch": 1.361520737327189, + "grad_norm": 1.0532681425474226, + "learning_rate": 5.077182938873393e-07, + "loss": 0.8615080118179321, + "step": 5909 + }, + { + "epoch": 1.3617511520737327, + "grad_norm": 1.279562028421048, + "learning_rate": 5.073867076059321e-07, + "loss": 0.6930621862411499, + "step": 5910 + }, + { + "epoch": 1.3619815668202766, + "grad_norm": 1.4077453728560791, + "learning_rate": 5.07055192826692e-07, + "loss": 0.7020307183265686, + "step": 5911 + }, + { + "epoch": 1.3622119815668203, + "grad_norm": 1.4403791813866107, + "learning_rate": 5.067237495977379e-07, + "loss": 0.7281042337417603, + "step": 5912 + }, + { + "epoch": 1.362442396313364, + "grad_norm": 1.014203177200115, + "learning_rate": 5.063923779671789e-07, + "loss": 0.8092719316482544, + "step": 5913 + }, + { + "epoch": 1.3626728110599078, + "grad_norm": 1.2597384594296865, + "learning_rate": 5.060610779831125e-07, + "loss": 0.7323317527770996, + "step": 5914 + }, + { + "epoch": 1.3629032258064515, + "grad_norm": 0.9804861205409557, + "learning_rate": 5.05729849693627e-07, + "loss": 0.7370069622993469, + "step": 5915 + }, + { + "epoch": 1.3631336405529955, + "grad_norm": 1.1355071333670705, + "learning_rate": 5.053986931467994e-07, + "loss": 0.7175320386886597, + "step": 5916 + }, + { + "epoch": 1.3633640552995392, + "grad_norm": 1.2834592450306632, + "learning_rate": 5.050676083906964e-07, + "loss": 0.8643501996994019, + "step": 5917 + }, + { + "epoch": 1.363594470046083, + "grad_norm": 1.2479698704612106, + "learning_rate": 5.047365954733752e-07, + "loss": 0.9110950827598572, + "step": 5918 + }, + { + "epoch": 1.3638248847926266, + "grad_norm": 1.6104451195946936, + "learning_rate": 5.044056544428814e-07, + "loss": 0.9242197275161743, + "step": 5919 + }, + { + "epoch": 1.3640552995391704, + "grad_norm": 1.2769108446030992, + "learning_rate": 5.040747853472509e-07, + "loss": 0.9218860864639282, + "step": 5920 + }, + { + "epoch": 1.3642857142857143, + "grad_norm": 1.3302527755174611, + "learning_rate": 5.037439882345084e-07, + "loss": 0.970054030418396, + "step": 5921 + }, + { + "epoch": 1.364516129032258, + "grad_norm": 1.0075132364725619, + "learning_rate": 5.034132631526695e-07, + "loss": 0.7707182168960571, + "step": 5922 + }, + { + "epoch": 1.3647465437788018, + "grad_norm": 1.1036594577594991, + "learning_rate": 5.03082610149738e-07, + "loss": 0.7673811912536621, + "step": 5923 + }, + { + "epoch": 1.3649769585253457, + "grad_norm": 1.2758650519526258, + "learning_rate": 5.027520292737073e-07, + "loss": 0.7387198209762573, + "step": 5924 + }, + { + "epoch": 1.3652073732718895, + "grad_norm": 1.139448521744241, + "learning_rate": 5.024215205725619e-07, + "loss": 0.7803019881248474, + "step": 5925 + }, + { + "epoch": 1.3654377880184332, + "grad_norm": 1.3985269621197394, + "learning_rate": 5.020910840942738e-07, + "loss": 0.8753018379211426, + "step": 5926 + }, + { + "epoch": 1.365668202764977, + "grad_norm": 1.0358625157915384, + "learning_rate": 5.017607198868055e-07, + "loss": 0.7917389869689941, + "step": 5927 + }, + { + "epoch": 1.3658986175115206, + "grad_norm": 1.2995608187995562, + "learning_rate": 5.014304279981095e-07, + "loss": 0.8393691182136536, + "step": 5928 + }, + { + "epoch": 1.3661290322580646, + "grad_norm": 1.2671721961788391, + "learning_rate": 5.011002084761264e-07, + "loss": 0.6635205745697021, + "step": 5929 + }, + { + "epoch": 1.3663594470046083, + "grad_norm": 1.2038857805513816, + "learning_rate": 5.007700613687879e-07, + "loss": 0.7058769464492798, + "step": 5930 + }, + { + "epoch": 1.366589861751152, + "grad_norm": 1.1784688857731938, + "learning_rate": 5.004399867240143e-07, + "loss": 0.841168224811554, + "step": 5931 + }, + { + "epoch": 1.3668202764976958, + "grad_norm": 1.3760327619217738, + "learning_rate": 5.001099845897148e-07, + "loss": 0.7385121583938599, + "step": 5932 + }, + { + "epoch": 1.3670506912442395, + "grad_norm": 1.1633525983686732, + "learning_rate": 4.997800550137897e-07, + "loss": 0.6525158882141113, + "step": 5933 + }, + { + "epoch": 1.3672811059907835, + "grad_norm": 1.2331358286597804, + "learning_rate": 4.994501980441274e-07, + "loss": 0.7838844060897827, + "step": 5934 + }, + { + "epoch": 1.3675115207373272, + "grad_norm": 1.4450953979822279, + "learning_rate": 4.991204137286061e-07, + "loss": 0.8831999897956848, + "step": 5935 + }, + { + "epoch": 1.367741935483871, + "grad_norm": 1.0408031352355525, + "learning_rate": 4.987907021150938e-07, + "loss": 0.8053784966468811, + "step": 5936 + }, + { + "epoch": 1.3679723502304149, + "grad_norm": 1.1356206370071746, + "learning_rate": 4.984610632514475e-07, + "loss": 0.8093301057815552, + "step": 5937 + }, + { + "epoch": 1.3682027649769586, + "grad_norm": 1.0230530705292329, + "learning_rate": 4.981314971855136e-07, + "loss": 0.7609653472900391, + "step": 5938 + }, + { + "epoch": 1.3684331797235023, + "grad_norm": 1.4109994154981755, + "learning_rate": 4.978020039651288e-07, + "loss": 0.7131600379943848, + "step": 5939 + }, + { + "epoch": 1.368663594470046, + "grad_norm": 1.3192550042799691, + "learning_rate": 4.974725836381184e-07, + "loss": 0.6555063724517822, + "step": 5940 + }, + { + "epoch": 1.3688940092165898, + "grad_norm": 1.1278604970222592, + "learning_rate": 4.971432362522968e-07, + "loss": 0.8349519968032837, + "step": 5941 + }, + { + "epoch": 1.3691244239631337, + "grad_norm": 1.2138732932202303, + "learning_rate": 4.968139618554691e-07, + "loss": 0.7335611581802368, + "step": 5942 + }, + { + "epoch": 1.3693548387096774, + "grad_norm": 1.050807913168598, + "learning_rate": 4.964847604954287e-07, + "loss": 0.8349814414978027, + "step": 5943 + }, + { + "epoch": 1.3695852534562212, + "grad_norm": 1.07716704849378, + "learning_rate": 4.961556322199585e-07, + "loss": 0.6816729307174683, + "step": 5944 + }, + { + "epoch": 1.369815668202765, + "grad_norm": 1.5220059571304148, + "learning_rate": 4.958265770768315e-07, + "loss": 0.847672164440155, + "step": 5945 + }, + { + "epoch": 1.3700460829493086, + "grad_norm": 1.267067930725286, + "learning_rate": 4.954975951138095e-07, + "loss": 0.6674519777297974, + "step": 5946 + }, + { + "epoch": 1.3702764976958526, + "grad_norm": 1.0820409905680344, + "learning_rate": 4.951686863786432e-07, + "loss": 0.7836427092552185, + "step": 5947 + }, + { + "epoch": 1.3705069124423963, + "grad_norm": 1.0577780792239002, + "learning_rate": 4.948398509190742e-07, + "loss": 0.640183687210083, + "step": 5948 + }, + { + "epoch": 1.37073732718894, + "grad_norm": 1.223963669470004, + "learning_rate": 4.945110887828322e-07, + "loss": 0.8438451290130615, + "step": 5949 + }, + { + "epoch": 1.370967741935484, + "grad_norm": 1.5483267377377474, + "learning_rate": 4.94182400017636e-07, + "loss": 0.9311714172363281, + "step": 5950 + }, + { + "epoch": 1.3711981566820277, + "grad_norm": 1.2352509732193302, + "learning_rate": 4.938537846711952e-07, + "loss": 0.7332801818847656, + "step": 5951 + }, + { + "epoch": 1.3714285714285714, + "grad_norm": 1.127354832681604, + "learning_rate": 4.935252427912075e-07, + "loss": 0.7189289331436157, + "step": 5952 + }, + { + "epoch": 1.3716589861751152, + "grad_norm": 1.451594181977691, + "learning_rate": 4.9319677442536e-07, + "loss": 0.827372670173645, + "step": 5953 + }, + { + "epoch": 1.371889400921659, + "grad_norm": 1.2273788913776413, + "learning_rate": 4.9286837962133e-07, + "loss": 0.7607625722885132, + "step": 5954 + }, + { + "epoch": 1.3721198156682028, + "grad_norm": 1.1935199245873378, + "learning_rate": 4.925400584267836e-07, + "loss": 0.9420886635780334, + "step": 5955 + }, + { + "epoch": 1.3723502304147466, + "grad_norm": 1.1557325656206936, + "learning_rate": 4.922118108893757e-07, + "loss": 0.7605317831039429, + "step": 5956 + }, + { + "epoch": 1.3725806451612903, + "grad_norm": 1.059494459687004, + "learning_rate": 4.918836370567513e-07, + "loss": 0.8353599309921265, + "step": 5957 + }, + { + "epoch": 1.372811059907834, + "grad_norm": 1.2571100340874592, + "learning_rate": 4.915555369765439e-07, + "loss": 0.8540027141571045, + "step": 5958 + }, + { + "epoch": 1.3730414746543778, + "grad_norm": 1.027809306304352, + "learning_rate": 4.912275106963778e-07, + "loss": 0.6965712308883667, + "step": 5959 + }, + { + "epoch": 1.3732718894009217, + "grad_norm": 1.0356479101830274, + "learning_rate": 4.908995582638648e-07, + "loss": 0.7460787296295166, + "step": 5960 + }, + { + "epoch": 1.3735023041474654, + "grad_norm": 1.1845566109999182, + "learning_rate": 4.905716797266067e-07, + "loss": 0.8652873039245605, + "step": 5961 + }, + { + "epoch": 1.3737327188940092, + "grad_norm": 1.1300176885770365, + "learning_rate": 4.902438751321952e-07, + "loss": 0.7757953405380249, + "step": 5962 + }, + { + "epoch": 1.3739631336405531, + "grad_norm": 1.2945741727860514, + "learning_rate": 4.899161445282102e-07, + "loss": 0.8842452168464661, + "step": 5963 + }, + { + "epoch": 1.3741935483870968, + "grad_norm": 1.1415902309445607, + "learning_rate": 4.895884879622215e-07, + "loss": 0.7259113788604736, + "step": 5964 + }, + { + "epoch": 1.3744239631336406, + "grad_norm": 1.3855842779268248, + "learning_rate": 4.892609054817883e-07, + "loss": 0.8871402144432068, + "step": 5965 + }, + { + "epoch": 1.3746543778801843, + "grad_norm": 1.3262407740428463, + "learning_rate": 4.889333971344586e-07, + "loss": 0.7564518451690674, + "step": 5966 + }, + { + "epoch": 1.374884792626728, + "grad_norm": 1.2010368462649357, + "learning_rate": 4.886059629677692e-07, + "loss": 0.7886015176773071, + "step": 5967 + }, + { + "epoch": 1.375115207373272, + "grad_norm": 1.199947155848343, + "learning_rate": 4.882786030292479e-07, + "loss": 0.8256035447120667, + "step": 5968 + }, + { + "epoch": 1.3753456221198157, + "grad_norm": 1.3084738837241086, + "learning_rate": 4.879513173664099e-07, + "loss": 0.9351227283477783, + "step": 5969 + }, + { + "epoch": 1.3755760368663594, + "grad_norm": 1.1794682657820328, + "learning_rate": 4.876241060267598e-07, + "loss": 0.7221553921699524, + "step": 5970 + }, + { + "epoch": 1.3758064516129032, + "grad_norm": 1.3959950512058854, + "learning_rate": 4.872969690577928e-07, + "loss": 0.7451514005661011, + "step": 5971 + }, + { + "epoch": 1.3760368663594469, + "grad_norm": 2.704793745814284, + "learning_rate": 4.86969906506992e-07, + "loss": 0.810903787612915, + "step": 5972 + }, + { + "epoch": 1.3762672811059908, + "grad_norm": 1.0363767093510534, + "learning_rate": 4.866429184218298e-07, + "loss": 0.6279938817024231, + "step": 5973 + }, + { + "epoch": 1.3764976958525346, + "grad_norm": 1.4075128359986724, + "learning_rate": 4.863160048497688e-07, + "loss": 0.7742956876754761, + "step": 5974 + }, + { + "epoch": 1.3767281105990783, + "grad_norm": 1.0416061346586747, + "learning_rate": 4.859891658382597e-07, + "loss": 0.7423844933509827, + "step": 5975 + }, + { + "epoch": 1.3769585253456222, + "grad_norm": 1.0348526250721313, + "learning_rate": 4.856624014347426e-07, + "loss": 0.8387676477432251, + "step": 5976 + }, + { + "epoch": 1.377188940092166, + "grad_norm": 1.3906652341525882, + "learning_rate": 4.853357116866471e-07, + "loss": 0.7959855794906616, + "step": 5977 + }, + { + "epoch": 1.3774193548387097, + "grad_norm": 1.2781418274310543, + "learning_rate": 4.850090966413913e-07, + "loss": 0.7086259722709656, + "step": 5978 + }, + { + "epoch": 1.3776497695852534, + "grad_norm": 1.113262974989995, + "learning_rate": 4.846825563463838e-07, + "loss": 0.7219396829605103, + "step": 5979 + }, + { + "epoch": 1.3778801843317972, + "grad_norm": 1.2693838975886846, + "learning_rate": 4.84356090849021e-07, + "loss": 0.8383582830429077, + "step": 5980 + }, + { + "epoch": 1.378110599078341, + "grad_norm": 1.2004259850017622, + "learning_rate": 4.840297001966887e-07, + "loss": 0.7624244689941406, + "step": 5981 + }, + { + "epoch": 1.3783410138248848, + "grad_norm": 1.3275243269089372, + "learning_rate": 4.837033844367626e-07, + "loss": 0.7901623249053955, + "step": 5982 + }, + { + "epoch": 1.3785714285714286, + "grad_norm": 1.0665581903589285, + "learning_rate": 4.833771436166068e-07, + "loss": 0.7732094526290894, + "step": 5983 + }, + { + "epoch": 1.3788018433179723, + "grad_norm": 1.221680510593368, + "learning_rate": 4.830509777835744e-07, + "loss": 0.7882228493690491, + "step": 5984 + }, + { + "epoch": 1.379032258064516, + "grad_norm": 1.3954212415484932, + "learning_rate": 4.827248869850086e-07, + "loss": 0.8601159453392029, + "step": 5985 + }, + { + "epoch": 1.37926267281106, + "grad_norm": 1.442537797357167, + "learning_rate": 4.823988712682406e-07, + "loss": 0.8828538656234741, + "step": 5986 + }, + { + "epoch": 1.3794930875576037, + "grad_norm": 1.2814445672112398, + "learning_rate": 4.820729306805907e-07, + "loss": 0.8586058020591736, + "step": 5987 + }, + { + "epoch": 1.3797235023041474, + "grad_norm": 1.3476469386797916, + "learning_rate": 4.8174706526937e-07, + "loss": 0.8276243209838867, + "step": 5988 + }, + { + "epoch": 1.3799539170506914, + "grad_norm": 1.1504215702512235, + "learning_rate": 4.814212750818764e-07, + "loss": 0.837665855884552, + "step": 5989 + }, + { + "epoch": 1.380184331797235, + "grad_norm": 1.0830851541320008, + "learning_rate": 4.810955601653978e-07, + "loss": 0.7493194341659546, + "step": 5990 + }, + { + "epoch": 1.3804147465437788, + "grad_norm": 0.9470923738615639, + "learning_rate": 4.807699205672123e-07, + "loss": 0.8382525444030762, + "step": 5991 + }, + { + "epoch": 1.3806451612903226, + "grad_norm": 1.302996846441217, + "learning_rate": 4.804443563345854e-07, + "loss": 0.8152645826339722, + "step": 5992 + }, + { + "epoch": 1.3808755760368663, + "grad_norm": 1.1087518210488847, + "learning_rate": 4.801188675147719e-07, + "loss": 0.7168164849281311, + "step": 5993 + }, + { + "epoch": 1.3811059907834102, + "grad_norm": 1.3971974855003246, + "learning_rate": 4.79793454155017e-07, + "loss": 0.883512556552887, + "step": 5994 + }, + { + "epoch": 1.381336405529954, + "grad_norm": 1.1775999496250547, + "learning_rate": 4.794681163025536e-07, + "loss": 0.7258438467979431, + "step": 5995 + }, + { + "epoch": 1.3815668202764977, + "grad_norm": 1.102316858629444, + "learning_rate": 4.79142854004604e-07, + "loss": 0.8408991098403931, + "step": 5996 + }, + { + "epoch": 1.3817972350230414, + "grad_norm": 1.2549882230845555, + "learning_rate": 4.788176673083796e-07, + "loss": 0.6506227254867554, + "step": 5997 + }, + { + "epoch": 1.3820276497695851, + "grad_norm": 1.145761304273299, + "learning_rate": 4.784925562610809e-07, + "loss": 0.6971127986907959, + "step": 5998 + }, + { + "epoch": 1.382258064516129, + "grad_norm": 1.3037562977083754, + "learning_rate": 4.781675209098967e-07, + "loss": 0.8399784564971924, + "step": 5999 + }, + { + "epoch": 1.3824884792626728, + "grad_norm": 1.1085204750545832, + "learning_rate": 4.778425613020067e-07, + "loss": 0.6451772451400757, + "step": 6000 + }, + { + "epoch": 1.3827188940092165, + "grad_norm": 1.2906420363235995, + "learning_rate": 4.775176774845774e-07, + "loss": 0.7794390916824341, + "step": 6001 + }, + { + "epoch": 1.3829493087557605, + "grad_norm": 1.2681207047961411, + "learning_rate": 4.771928695047652e-07, + "loss": 0.7743663191795349, + "step": 6002 + }, + { + "epoch": 1.3831797235023042, + "grad_norm": 1.3900227492937691, + "learning_rate": 4.768681374097165e-07, + "loss": 0.7654878497123718, + "step": 6003 + }, + { + "epoch": 1.383410138248848, + "grad_norm": 0.9597367840932265, + "learning_rate": 4.765434812465645e-07, + "loss": 0.634769082069397, + "step": 6004 + }, + { + "epoch": 1.3836405529953917, + "grad_norm": 1.506039076037628, + "learning_rate": 4.762189010624337e-07, + "loss": 0.7941944599151611, + "step": 6005 + }, + { + "epoch": 1.3838709677419354, + "grad_norm": 1.015987334283248, + "learning_rate": 4.75894396904436e-07, + "loss": 0.7437179088592529, + "step": 6006 + }, + { + "epoch": 1.3841013824884794, + "grad_norm": 1.4064808788220893, + "learning_rate": 4.7556996881967236e-07, + "loss": 0.7854535579681396, + "step": 6007 + }, + { + "epoch": 1.384331797235023, + "grad_norm": 1.1454067558015728, + "learning_rate": 4.752456168552339e-07, + "loss": 0.7506910562515259, + "step": 6008 + }, + { + "epoch": 1.3845622119815668, + "grad_norm": 1.3378490743548084, + "learning_rate": 4.749213410581995e-07, + "loss": 0.8967334032058716, + "step": 6009 + }, + { + "epoch": 1.3847926267281105, + "grad_norm": 0.9073367214802157, + "learning_rate": 4.7459714147563677e-07, + "loss": 0.7053096294403076, + "step": 6010 + }, + { + "epoch": 1.3850230414746543, + "grad_norm": 1.4011875457574152, + "learning_rate": 4.7427301815460396e-07, + "loss": 0.8759415149688721, + "step": 6011 + }, + { + "epoch": 1.3852534562211982, + "grad_norm": 1.2083846258038176, + "learning_rate": 4.739489711421466e-07, + "loss": 0.8827483654022217, + "step": 6012 + }, + { + "epoch": 1.385483870967742, + "grad_norm": 0.9892327750407551, + "learning_rate": 4.736250004852993e-07, + "loss": 0.7268258929252625, + "step": 6013 + }, + { + "epoch": 1.3857142857142857, + "grad_norm": 1.3354283922456354, + "learning_rate": 4.7330110623108665e-07, + "loss": 0.7142586708068848, + "step": 6014 + }, + { + "epoch": 1.3859447004608296, + "grad_norm": 0.9791582073391492, + "learning_rate": 4.7297728842652116e-07, + "loss": 0.7123303413391113, + "step": 6015 + }, + { + "epoch": 1.3861751152073734, + "grad_norm": 1.1089770586845422, + "learning_rate": 4.726535471186047e-07, + "loss": 0.7548067569732666, + "step": 6016 + }, + { + "epoch": 1.386405529953917, + "grad_norm": 1.205868893691031, + "learning_rate": 4.723298823543277e-07, + "loss": 0.7792191505432129, + "step": 6017 + }, + { + "epoch": 1.3866359447004608, + "grad_norm": 1.313401532453458, + "learning_rate": 4.7200629418066975e-07, + "loss": 0.8658785820007324, + "step": 6018 + }, + { + "epoch": 1.3868663594470045, + "grad_norm": 1.20345203638671, + "learning_rate": 4.716827826445987e-07, + "loss": 0.7173904776573181, + "step": 6019 + }, + { + "epoch": 1.3870967741935485, + "grad_norm": 1.0016118220950732, + "learning_rate": 4.7135934779307284e-07, + "loss": 0.6675543785095215, + "step": 6020 + }, + { + "epoch": 1.3873271889400922, + "grad_norm": 1.2559637316001069, + "learning_rate": 4.710359896730378e-07, + "loss": 0.8164724111557007, + "step": 6021 + }, + { + "epoch": 1.387557603686636, + "grad_norm": 1.474439832240672, + "learning_rate": 4.707127083314283e-07, + "loss": 0.8354332447052002, + "step": 6022 + }, + { + "epoch": 1.3877880184331797, + "grad_norm": 1.1544900465349175, + "learning_rate": 4.7038950381516885e-07, + "loss": 0.8414663672447205, + "step": 6023 + }, + { + "epoch": 1.3880184331797234, + "grad_norm": 1.2150035811173532, + "learning_rate": 4.700663761711717e-07, + "loss": 0.7693418264389038, + "step": 6024 + }, + { + "epoch": 1.3882488479262673, + "grad_norm": 1.0071958767588902, + "learning_rate": 4.697433254463382e-07, + "loss": 0.7809267044067383, + "step": 6025 + }, + { + "epoch": 1.388479262672811, + "grad_norm": 1.203482571104156, + "learning_rate": 4.6942035168755944e-07, + "loss": 0.7455927133560181, + "step": 6026 + }, + { + "epoch": 1.3887096774193548, + "grad_norm": 1.3018105004563159, + "learning_rate": 4.6909745494171383e-07, + "loss": 0.8217881917953491, + "step": 6027 + }, + { + "epoch": 1.3889400921658988, + "grad_norm": 1.3723027057230852, + "learning_rate": 4.687746352556703e-07, + "loss": 0.8138882517814636, + "step": 6028 + }, + { + "epoch": 1.3891705069124423, + "grad_norm": 1.241759909967513, + "learning_rate": 4.6845189267628505e-07, + "loss": 0.8926469087600708, + "step": 6029 + }, + { + "epoch": 1.3894009216589862, + "grad_norm": 1.3027918343739477, + "learning_rate": 4.681292272504036e-07, + "loss": 0.797023355960846, + "step": 6030 + }, + { + "epoch": 1.38963133640553, + "grad_norm": 0.8383796462842409, + "learning_rate": 4.6780663902486104e-07, + "loss": 0.6767498254776001, + "step": 6031 + }, + { + "epoch": 1.3898617511520737, + "grad_norm": 1.2727364252127855, + "learning_rate": 4.674841280464804e-07, + "loss": 0.7514280080795288, + "step": 6032 + }, + { + "epoch": 1.3900921658986176, + "grad_norm": 1.3853363805552346, + "learning_rate": 4.671616943620731e-07, + "loss": 0.8879726529121399, + "step": 6033 + }, + { + "epoch": 1.3903225806451613, + "grad_norm": 0.8270134553121277, + "learning_rate": 4.66839338018441e-07, + "loss": 0.6674140095710754, + "step": 6034 + }, + { + "epoch": 1.390552995391705, + "grad_norm": 1.078021820178179, + "learning_rate": 4.6651705906237307e-07, + "loss": 0.9094855785369873, + "step": 6035 + }, + { + "epoch": 1.3907834101382488, + "grad_norm": 1.2561393182724931, + "learning_rate": 4.661948575406478e-07, + "loss": 0.8334506750106812, + "step": 6036 + }, + { + "epoch": 1.3910138248847925, + "grad_norm": 1.040119500616202, + "learning_rate": 4.658727335000323e-07, + "loss": 0.6545997858047485, + "step": 6037 + }, + { + "epoch": 1.3912442396313365, + "grad_norm": 1.1967093206075838, + "learning_rate": 4.6555068698728237e-07, + "loss": 0.7810590267181396, + "step": 6038 + }, + { + "epoch": 1.3914746543778802, + "grad_norm": 1.0756703494881659, + "learning_rate": 4.652287180491424e-07, + "loss": 0.7581864595413208, + "step": 6039 + }, + { + "epoch": 1.391705069124424, + "grad_norm": 1.2754594039466507, + "learning_rate": 4.649068267323465e-07, + "loss": 0.7134817242622375, + "step": 6040 + }, + { + "epoch": 1.3919354838709677, + "grad_norm": 0.9730020123763279, + "learning_rate": 4.645850130836162e-07, + "loss": 0.7050445079803467, + "step": 6041 + }, + { + "epoch": 1.3921658986175114, + "grad_norm": 1.146073776977597, + "learning_rate": 4.642632771496622e-07, + "loss": 0.8510535955429077, + "step": 6042 + }, + { + "epoch": 1.3923963133640553, + "grad_norm": 1.3940656685053847, + "learning_rate": 4.6394161897718454e-07, + "loss": 0.8627035617828369, + "step": 6043 + }, + { + "epoch": 1.392626728110599, + "grad_norm": 1.2671457951329919, + "learning_rate": 4.6362003861287127e-07, + "loss": 0.89891517162323, + "step": 6044 + }, + { + "epoch": 1.3928571428571428, + "grad_norm": 1.3215265337916509, + "learning_rate": 4.6329853610339896e-07, + "loss": 0.7267141342163086, + "step": 6045 + }, + { + "epoch": 1.3930875576036867, + "grad_norm": 1.4814794045534565, + "learning_rate": 4.6297711149543405e-07, + "loss": 0.8021189570426941, + "step": 6046 + }, + { + "epoch": 1.3933179723502305, + "grad_norm": 1.0954918085269951, + "learning_rate": 4.6265576483563054e-07, + "loss": 0.7836861610412598, + "step": 6047 + }, + { + "epoch": 1.3935483870967742, + "grad_norm": 1.1158269152355589, + "learning_rate": 4.623344961706309e-07, + "loss": 0.816940188407898, + "step": 6048 + }, + { + "epoch": 1.393778801843318, + "grad_norm": 1.4383712223724088, + "learning_rate": 4.6201330554706773e-07, + "loss": 0.77923583984375, + "step": 6049 + }, + { + "epoch": 1.3940092165898617, + "grad_norm": 1.3116759273395542, + "learning_rate": 4.6169219301156117e-07, + "loss": 0.8017981052398682, + "step": 6050 + }, + { + "epoch": 1.3942396313364056, + "grad_norm": 0.9886522563222937, + "learning_rate": 4.6137115861071973e-07, + "loss": 0.6786847114562988, + "step": 6051 + }, + { + "epoch": 1.3944700460829493, + "grad_norm": 1.1651814302030006, + "learning_rate": 4.61050202391142e-07, + "loss": 0.7802412509918213, + "step": 6052 + }, + { + "epoch": 1.394700460829493, + "grad_norm": 1.1955845105043188, + "learning_rate": 4.6072932439941347e-07, + "loss": 0.7434886693954468, + "step": 6053 + }, + { + "epoch": 1.3949308755760368, + "grad_norm": 1.2231160523968054, + "learning_rate": 4.6040852468211e-07, + "loss": 0.7590811252593994, + "step": 6054 + }, + { + "epoch": 1.3951612903225805, + "grad_norm": 1.5534904257800726, + "learning_rate": 4.600878032857949e-07, + "loss": 0.8952670097351074, + "step": 6055 + }, + { + "epoch": 1.3953917050691245, + "grad_norm": 1.1221688640413483, + "learning_rate": 4.5976716025702036e-07, + "loss": 0.8055328130722046, + "step": 6056 + }, + { + "epoch": 1.3956221198156682, + "grad_norm": 1.2064570897657243, + "learning_rate": 4.5944659564232725e-07, + "loss": 0.8919316530227661, + "step": 6057 + }, + { + "epoch": 1.395852534562212, + "grad_norm": 1.1074605434156857, + "learning_rate": 4.591261094882453e-07, + "loss": 0.701945960521698, + "step": 6058 + }, + { + "epoch": 1.3960829493087559, + "grad_norm": 1.1766452414586335, + "learning_rate": 4.5880570184129206e-07, + "loss": 0.7457436323165894, + "step": 6059 + }, + { + "epoch": 1.3963133640552996, + "grad_norm": 1.193782401804385, + "learning_rate": 4.5848537274797527e-07, + "loss": 0.8093513250350952, + "step": 6060 + }, + { + "epoch": 1.3965437788018433, + "grad_norm": 1.5454221039375025, + "learning_rate": 4.5816512225478965e-07, + "loss": 0.7098822593688965, + "step": 6061 + }, + { + "epoch": 1.396774193548387, + "grad_norm": 1.2339994165792372, + "learning_rate": 4.578449504082189e-07, + "loss": 0.7423167824745178, + "step": 6062 + }, + { + "epoch": 1.3970046082949308, + "grad_norm": 1.1302042774482615, + "learning_rate": 4.5752485725473624e-07, + "loss": 0.8730076551437378, + "step": 6063 + }, + { + "epoch": 1.3972350230414747, + "grad_norm": 1.124374396794659, + "learning_rate": 4.572048428408024e-07, + "loss": 0.6914420127868652, + "step": 6064 + }, + { + "epoch": 1.3974654377880185, + "grad_norm": 1.3148006815381303, + "learning_rate": 4.5688490721286664e-07, + "loss": 0.8051402568817139, + "step": 6065 + }, + { + "epoch": 1.3976958525345622, + "grad_norm": 1.548390651351193, + "learning_rate": 4.5656505041736803e-07, + "loss": 0.9185452461242676, + "step": 6066 + }, + { + "epoch": 1.397926267281106, + "grad_norm": 1.1772485518113056, + "learning_rate": 4.5624527250073287e-07, + "loss": 0.766645073890686, + "step": 6067 + }, + { + "epoch": 1.3981566820276496, + "grad_norm": 1.3246112666718692, + "learning_rate": 4.559255735093763e-07, + "loss": 0.8005224466323853, + "step": 6068 + }, + { + "epoch": 1.3983870967741936, + "grad_norm": 1.2624209909197728, + "learning_rate": 4.5560595348970275e-07, + "loss": 0.8072810173034668, + "step": 6069 + }, + { + "epoch": 1.3986175115207373, + "grad_norm": 1.2197415999956105, + "learning_rate": 4.552864124881045e-07, + "loss": 0.7537474632263184, + "step": 6070 + }, + { + "epoch": 1.398847926267281, + "grad_norm": 1.3524984308216321, + "learning_rate": 4.549669505509619e-07, + "loss": 0.8396750092506409, + "step": 6071 + }, + { + "epoch": 1.399078341013825, + "grad_norm": 1.3095033527266953, + "learning_rate": 4.546475677246453e-07, + "loss": 0.8456804752349854, + "step": 6072 + }, + { + "epoch": 1.3993087557603687, + "grad_norm": 1.212970447769736, + "learning_rate": 4.543282640555123e-07, + "loss": 0.6150076389312744, + "step": 6073 + }, + { + "epoch": 1.3995391705069125, + "grad_norm": 1.1345047277741707, + "learning_rate": 4.540090395899089e-07, + "loss": 0.667172908782959, + "step": 6074 + }, + { + "epoch": 1.3997695852534562, + "grad_norm": 1.1269214154073468, + "learning_rate": 4.5368989437417116e-07, + "loss": 0.7918317914009094, + "step": 6075 + }, + { + "epoch": 1.4, + "grad_norm": 1.070411671989194, + "learning_rate": 4.5337082845462193e-07, + "loss": 0.6800580024719238, + "step": 6076 + }, + { + "epoch": 1.4002304147465439, + "grad_norm": 1.3908779413221009, + "learning_rate": 4.530518418775733e-07, + "loss": 0.9205034971237183, + "step": 6077 + }, + { + "epoch": 1.4004608294930876, + "grad_norm": 0.9376373503434607, + "learning_rate": 4.5273293468932585e-07, + "loss": 0.7228822708129883, + "step": 6078 + }, + { + "epoch": 1.4006912442396313, + "grad_norm": 1.0019153673681407, + "learning_rate": 4.524141069361679e-07, + "loss": 0.6827987432479858, + "step": 6079 + }, + { + "epoch": 1.400921658986175, + "grad_norm": 1.086076018779761, + "learning_rate": 4.520953586643779e-07, + "loss": 0.6272581815719604, + "step": 6080 + }, + { + "epoch": 1.4011520737327188, + "grad_norm": 1.1153873233388363, + "learning_rate": 4.5177668992022125e-07, + "loss": 0.8041881322860718, + "step": 6081 + }, + { + "epoch": 1.4013824884792627, + "grad_norm": 0.986104576594979, + "learning_rate": 4.5145810074995194e-07, + "loss": 0.7284958362579346, + "step": 6082 + }, + { + "epoch": 1.4016129032258065, + "grad_norm": 1.227152604501521, + "learning_rate": 4.511395911998135e-07, + "loss": 0.7653781175613403, + "step": 6083 + }, + { + "epoch": 1.4018433179723502, + "grad_norm": 1.0466936448387898, + "learning_rate": 4.5082116131603677e-07, + "loss": 0.8037170171737671, + "step": 6084 + }, + { + "epoch": 1.4020737327188941, + "grad_norm": 1.1911735797842866, + "learning_rate": 4.505028111448411e-07, + "loss": 0.783043384552002, + "step": 6085 + }, + { + "epoch": 1.4023041474654379, + "grad_norm": 1.0547410930732963, + "learning_rate": 4.501845407324354e-07, + "loss": 0.6712161302566528, + "step": 6086 + }, + { + "epoch": 1.4025345622119816, + "grad_norm": 1.6406574524985842, + "learning_rate": 4.4986635012501575e-07, + "loss": 0.9537261724472046, + "step": 6087 + }, + { + "epoch": 1.4027649769585253, + "grad_norm": 1.4091085059994304, + "learning_rate": 4.495482393687666e-07, + "loss": 0.8984304666519165, + "step": 6088 + }, + { + "epoch": 1.402995391705069, + "grad_norm": 1.0430973660752654, + "learning_rate": 4.4923020850986224e-07, + "loss": 0.6894555687904358, + "step": 6089 + }, + { + "epoch": 1.403225806451613, + "grad_norm": 1.1542541609725157, + "learning_rate": 4.489122575944639e-07, + "loss": 0.685502290725708, + "step": 6090 + }, + { + "epoch": 1.4034562211981567, + "grad_norm": 1.1082950627991512, + "learning_rate": 4.485943866687216e-07, + "loss": 0.6794239282608032, + "step": 6091 + }, + { + "epoch": 1.4036866359447004, + "grad_norm": 1.0717636346133315, + "learning_rate": 4.482765957787744e-07, + "loss": 0.7647888660430908, + "step": 6092 + }, + { + "epoch": 1.4039170506912442, + "grad_norm": 1.3476206179513355, + "learning_rate": 4.4795888497074896e-07, + "loss": 0.798794150352478, + "step": 6093 + }, + { + "epoch": 1.404147465437788, + "grad_norm": 1.0358789181259667, + "learning_rate": 4.4764125429076026e-07, + "loss": 0.79430091381073, + "step": 6094 + }, + { + "epoch": 1.4043778801843319, + "grad_norm": 1.4040182367122596, + "learning_rate": 4.4732370378491255e-07, + "loss": 0.9089795351028442, + "step": 6095 + }, + { + "epoch": 1.4046082949308756, + "grad_norm": 0.9307801992196251, + "learning_rate": 4.4700623349929757e-07, + "loss": 0.8270718455314636, + "step": 6096 + }, + { + "epoch": 1.4048387096774193, + "grad_norm": 1.082228260794844, + "learning_rate": 4.466888434799958e-07, + "loss": 0.7550361156463623, + "step": 6097 + }, + { + "epoch": 1.4050691244239633, + "grad_norm": 1.15557625190535, + "learning_rate": 4.463715337730759e-07, + "loss": 0.7406442165374756, + "step": 6098 + }, + { + "epoch": 1.405299539170507, + "grad_norm": 1.4065045960279658, + "learning_rate": 4.460543044245949e-07, + "loss": 0.830552875995636, + "step": 6099 + }, + { + "epoch": 1.4055299539170507, + "grad_norm": 1.4160409051991987, + "learning_rate": 4.45737155480598e-07, + "loss": 0.8961822390556335, + "step": 6100 + }, + { + "epoch": 1.4057603686635944, + "grad_norm": 1.2630678724710616, + "learning_rate": 4.454200869871195e-07, + "loss": 0.6307489275932312, + "step": 6101 + }, + { + "epoch": 1.4059907834101382, + "grad_norm": 1.437795392364305, + "learning_rate": 4.451030989901808e-07, + "loss": 0.8682084083557129, + "step": 6102 + }, + { + "epoch": 1.4062211981566821, + "grad_norm": 1.1897592960029226, + "learning_rate": 4.4478619153579323e-07, + "loss": 0.7157681584358215, + "step": 6103 + }, + { + "epoch": 1.4064516129032258, + "grad_norm": 1.196767224907471, + "learning_rate": 4.4446936466995486e-07, + "loss": 0.7267071008682251, + "step": 6104 + }, + { + "epoch": 1.4066820276497696, + "grad_norm": 1.1191501401801882, + "learning_rate": 4.4415261843865246e-07, + "loss": 0.8435063362121582, + "step": 6105 + }, + { + "epoch": 1.4069124423963133, + "grad_norm": 1.2220260712556485, + "learning_rate": 4.43835952887862e-07, + "loss": 0.8895175457000732, + "step": 6106 + }, + { + "epoch": 1.407142857142857, + "grad_norm": 1.0150052474935476, + "learning_rate": 4.435193680635467e-07, + "loss": 0.7470073699951172, + "step": 6107 + }, + { + "epoch": 1.407373271889401, + "grad_norm": 1.376675993117338, + "learning_rate": 4.432028640116581e-07, + "loss": 0.7993630170822144, + "step": 6108 + }, + { + "epoch": 1.4076036866359447, + "grad_norm": 1.2675455750766673, + "learning_rate": 4.4288644077813695e-07, + "loss": 0.823069155216217, + "step": 6109 + }, + { + "epoch": 1.4078341013824884, + "grad_norm": 1.374585518914166, + "learning_rate": 4.4257009840891146e-07, + "loss": 0.7665367126464844, + "step": 6110 + }, + { + "epoch": 1.4080645161290324, + "grad_norm": 1.1174810423449963, + "learning_rate": 4.422538369498979e-07, + "loss": 0.7173991799354553, + "step": 6111 + }, + { + "epoch": 1.4082949308755761, + "grad_norm": 0.9476955630635919, + "learning_rate": 4.4193765644700186e-07, + "loss": 0.8288347125053406, + "step": 6112 + }, + { + "epoch": 1.4085253456221198, + "grad_norm": 1.206088367901853, + "learning_rate": 4.4162155694611636e-07, + "loss": 0.8589911460876465, + "step": 6113 + }, + { + "epoch": 1.4087557603686636, + "grad_norm": 1.2884473987369411, + "learning_rate": 4.4130553849312213e-07, + "loss": 0.8783868551254272, + "step": 6114 + }, + { + "epoch": 1.4089861751152073, + "grad_norm": 1.0994332560949611, + "learning_rate": 4.409896011338898e-07, + "loss": 0.7625287771224976, + "step": 6115 + }, + { + "epoch": 1.4092165898617512, + "grad_norm": 1.1571434855502665, + "learning_rate": 4.406737449142769e-07, + "loss": 0.7412571907043457, + "step": 6116 + }, + { + "epoch": 1.409447004608295, + "grad_norm": 0.9525276096114424, + "learning_rate": 4.4035796988012943e-07, + "loss": 0.6248455047607422, + "step": 6117 + }, + { + "epoch": 1.4096774193548387, + "grad_norm": 1.1843810443395109, + "learning_rate": 4.400422760772817e-07, + "loss": 0.7970919609069824, + "step": 6118 + }, + { + "epoch": 1.4099078341013824, + "grad_norm": 1.0403384039115238, + "learning_rate": 4.397266635515563e-07, + "loss": 0.6184223294258118, + "step": 6119 + }, + { + "epoch": 1.4101382488479262, + "grad_norm": 1.07818776364935, + "learning_rate": 4.394111323487637e-07, + "loss": 0.9014843702316284, + "step": 6120 + }, + { + "epoch": 1.41036866359447, + "grad_norm": 1.1660248005288976, + "learning_rate": 4.390956825147034e-07, + "loss": 0.8468939661979675, + "step": 6121 + }, + { + "epoch": 1.4105990783410138, + "grad_norm": 1.0810631729189881, + "learning_rate": 4.3878031409516234e-07, + "loss": 0.7832604646682739, + "step": 6122 + }, + { + "epoch": 1.4108294930875576, + "grad_norm": 1.0700225295832282, + "learning_rate": 4.3846502713591527e-07, + "loss": 0.7202898263931274, + "step": 6123 + }, + { + "epoch": 1.4110599078341015, + "grad_norm": 1.1788285042234896, + "learning_rate": 4.3814982168272664e-07, + "loss": 0.6785540580749512, + "step": 6124 + }, + { + "epoch": 1.4112903225806452, + "grad_norm": 1.3040233352486812, + "learning_rate": 4.378346977813474e-07, + "loss": 0.795532763004303, + "step": 6125 + }, + { + "epoch": 1.411520737327189, + "grad_norm": 0.8875056644654742, + "learning_rate": 4.3751965547751735e-07, + "loss": 0.7715259790420532, + "step": 6126 + }, + { + "epoch": 1.4117511520737327, + "grad_norm": 1.4252318364105403, + "learning_rate": 4.37204694816965e-07, + "loss": 0.8657132983207703, + "step": 6127 + }, + { + "epoch": 1.4119815668202764, + "grad_norm": 1.0207817658354317, + "learning_rate": 4.3688981584540586e-07, + "loss": 0.7253363132476807, + "step": 6128 + }, + { + "epoch": 1.4122119815668204, + "grad_norm": 1.2055823367063212, + "learning_rate": 4.365750186085447e-07, + "loss": 0.8511998653411865, + "step": 6129 + }, + { + "epoch": 1.412442396313364, + "grad_norm": 1.3256931814656627, + "learning_rate": 4.3626030315207386e-07, + "loss": 0.7936528921127319, + "step": 6130 + }, + { + "epoch": 1.4126728110599078, + "grad_norm": 1.1878967804503957, + "learning_rate": 4.3594566952167324e-07, + "loss": 0.758521556854248, + "step": 6131 + }, + { + "epoch": 1.4129032258064516, + "grad_norm": 1.242405288398936, + "learning_rate": 4.3563111776301243e-07, + "loss": 0.8202048540115356, + "step": 6132 + }, + { + "epoch": 1.4131336405529953, + "grad_norm": 1.075213759854547, + "learning_rate": 4.3531664792174773e-07, + "loss": 0.7864067554473877, + "step": 6133 + }, + { + "epoch": 1.4133640552995392, + "grad_norm": 1.472991105564755, + "learning_rate": 4.350022600435236e-07, + "loss": 0.8051233291625977, + "step": 6134 + }, + { + "epoch": 1.413594470046083, + "grad_norm": 1.0811225554895896, + "learning_rate": 4.34687954173974e-07, + "loss": 0.7617348432540894, + "step": 6135 + }, + { + "epoch": 1.4138248847926267, + "grad_norm": 1.299621377240526, + "learning_rate": 4.3437373035871927e-07, + "loss": 0.7899652719497681, + "step": 6136 + }, + { + "epoch": 1.4140552995391706, + "grad_norm": 1.1704157180732915, + "learning_rate": 4.340595886433689e-07, + "loss": 0.8467222452163696, + "step": 6137 + }, + { + "epoch": 1.4142857142857144, + "grad_norm": 1.294364382858993, + "learning_rate": 4.3374552907352003e-07, + "loss": 0.8451426029205322, + "step": 6138 + }, + { + "epoch": 1.414516129032258, + "grad_norm": 1.1053072195052795, + "learning_rate": 4.3343155169475797e-07, + "loss": 0.7140414714813232, + "step": 6139 + }, + { + "epoch": 1.4147465437788018, + "grad_norm": 1.365344165744123, + "learning_rate": 4.331176565526558e-07, + "loss": 0.7680803537368774, + "step": 6140 + }, + { + "epoch": 1.4149769585253456, + "grad_norm": 1.0970331390876962, + "learning_rate": 4.328038436927757e-07, + "loss": 0.7262120246887207, + "step": 6141 + }, + { + "epoch": 1.4152073732718895, + "grad_norm": 1.2176292189863585, + "learning_rate": 4.3249011316066676e-07, + "loss": 0.7788687944412231, + "step": 6142 + }, + { + "epoch": 1.4154377880184332, + "grad_norm": 1.4880584379115793, + "learning_rate": 4.321764650018662e-07, + "loss": 0.7613503336906433, + "step": 6143 + }, + { + "epoch": 1.415668202764977, + "grad_norm": 0.9554644370778598, + "learning_rate": 4.3186289926190056e-07, + "loss": 0.6778309345245361, + "step": 6144 + }, + { + "epoch": 1.4158986175115207, + "grad_norm": 1.5159867718873894, + "learning_rate": 4.315494159862829e-07, + "loss": 0.8626673221588135, + "step": 6145 + }, + { + "epoch": 1.4161290322580644, + "grad_norm": 1.194727935560369, + "learning_rate": 4.312360152205147e-07, + "loss": 0.8321051597595215, + "step": 6146 + }, + { + "epoch": 1.4163594470046084, + "grad_norm": 1.146293428483721, + "learning_rate": 4.309226970100861e-07, + "loss": 0.9317119717597961, + "step": 6147 + }, + { + "epoch": 1.416589861751152, + "grad_norm": 1.4669878139895565, + "learning_rate": 4.306094614004748e-07, + "loss": 0.9479870200157166, + "step": 6148 + }, + { + "epoch": 1.4168202764976958, + "grad_norm": 1.0166991353273056, + "learning_rate": 4.3029630843714606e-07, + "loss": 0.8222699165344238, + "step": 6149 + }, + { + "epoch": 1.4170506912442398, + "grad_norm": 1.427356205375722, + "learning_rate": 4.2998323816555427e-07, + "loss": 0.8232519030570984, + "step": 6150 + }, + { + "epoch": 1.4172811059907833, + "grad_norm": 1.156719588287236, + "learning_rate": 4.2967025063114057e-07, + "loss": 0.7423735857009888, + "step": 6151 + }, + { + "epoch": 1.4175115207373272, + "grad_norm": 1.1009896479281802, + "learning_rate": 4.2935734587933527e-07, + "loss": 0.6947557926177979, + "step": 6152 + }, + { + "epoch": 1.417741935483871, + "grad_norm": 1.2980025668504918, + "learning_rate": 4.290445239555558e-07, + "loss": 0.789128303527832, + "step": 6153 + }, + { + "epoch": 1.4179723502304147, + "grad_norm": 1.344185599290992, + "learning_rate": 4.2873178490520745e-07, + "loss": 0.8025885820388794, + "step": 6154 + }, + { + "epoch": 1.4182027649769586, + "grad_norm": 1.3491619317054568, + "learning_rate": 4.284191287736847e-07, + "loss": 0.8139045238494873, + "step": 6155 + }, + { + "epoch": 1.4184331797235024, + "grad_norm": 1.1246209635446252, + "learning_rate": 4.2810655560636864e-07, + "loss": 0.8154167532920837, + "step": 6156 + }, + { + "epoch": 1.418663594470046, + "grad_norm": 1.0954033524128675, + "learning_rate": 4.2779406544862896e-07, + "loss": 0.6383910775184631, + "step": 6157 + }, + { + "epoch": 1.4188940092165898, + "grad_norm": 1.217902628448707, + "learning_rate": 4.2748165834582316e-07, + "loss": 0.7008179426193237, + "step": 6158 + }, + { + "epoch": 1.4191244239631335, + "grad_norm": 1.2584275851601723, + "learning_rate": 4.2716933434329684e-07, + "loss": 0.9458012580871582, + "step": 6159 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 1.1170402428175406, + "learning_rate": 4.268570934863829e-07, + "loss": 0.7354133725166321, + "step": 6160 + }, + { + "epoch": 1.4195852534562212, + "grad_norm": 1.050503834766047, + "learning_rate": 4.265449358204034e-07, + "loss": 0.7146268486976624, + "step": 6161 + }, + { + "epoch": 1.419815668202765, + "grad_norm": 1.3602740783757037, + "learning_rate": 4.262328613906674e-07, + "loss": 0.7357315421104431, + "step": 6162 + }, + { + "epoch": 1.4200460829493087, + "grad_norm": 1.5139772991772644, + "learning_rate": 4.2592087024247157e-07, + "loss": 0.8006314039230347, + "step": 6163 + }, + { + "epoch": 1.4202764976958524, + "grad_norm": 1.2194249079603743, + "learning_rate": 4.256089624211018e-07, + "loss": 0.8299369812011719, + "step": 6164 + }, + { + "epoch": 1.4205069124423964, + "grad_norm": 1.3878054713959478, + "learning_rate": 4.252971379718308e-07, + "loss": 0.7018890380859375, + "step": 6165 + }, + { + "epoch": 1.42073732718894, + "grad_norm": 1.0332854509364862, + "learning_rate": 4.24985396939919e-07, + "loss": 0.6501315236091614, + "step": 6166 + }, + { + "epoch": 1.4209677419354838, + "grad_norm": 1.6385767983913562, + "learning_rate": 4.24673739370616e-07, + "loss": 0.8379749059677124, + "step": 6167 + }, + { + "epoch": 1.4211981566820278, + "grad_norm": 1.3590615179836698, + "learning_rate": 4.24362165309158e-07, + "loss": 0.7996747493743896, + "step": 6168 + }, + { + "epoch": 1.4214285714285715, + "grad_norm": 1.2270246479776195, + "learning_rate": 4.240506748007695e-07, + "loss": 0.7258181571960449, + "step": 6169 + }, + { + "epoch": 1.4216589861751152, + "grad_norm": 0.9997463365032918, + "learning_rate": 4.237392678906633e-07, + "loss": 0.6035803556442261, + "step": 6170 + }, + { + "epoch": 1.421889400921659, + "grad_norm": 1.1041316785012205, + "learning_rate": 4.2342794462403954e-07, + "loss": 0.7668799757957458, + "step": 6171 + }, + { + "epoch": 1.4221198156682027, + "grad_norm": 0.9385556238542058, + "learning_rate": 4.23116705046086e-07, + "loss": 0.7816733121871948, + "step": 6172 + }, + { + "epoch": 1.4223502304147466, + "grad_norm": 1.2003519134278278, + "learning_rate": 4.228055492019793e-07, + "loss": 0.8753983974456787, + "step": 6173 + }, + { + "epoch": 1.4225806451612903, + "grad_norm": 1.1591394093837553, + "learning_rate": 4.224944771368831e-07, + "loss": 0.8319464921951294, + "step": 6174 + }, + { + "epoch": 1.422811059907834, + "grad_norm": 1.1444278460686073, + "learning_rate": 4.2218348889594866e-07, + "loss": 0.6670328378677368, + "step": 6175 + }, + { + "epoch": 1.4230414746543778, + "grad_norm": 0.9949133230999909, + "learning_rate": 4.218725845243163e-07, + "loss": 0.7879645824432373, + "step": 6176 + }, + { + "epoch": 1.4232718894009215, + "grad_norm": 1.1897456513351008, + "learning_rate": 4.2156176406711287e-07, + "loss": 0.709680438041687, + "step": 6177 + }, + { + "epoch": 1.4235023041474655, + "grad_norm": 1.2454467445687987, + "learning_rate": 4.2125102756945364e-07, + "loss": 0.7990894317626953, + "step": 6178 + }, + { + "epoch": 1.4237327188940092, + "grad_norm": 0.899401568311558, + "learning_rate": 4.2094037507644165e-07, + "loss": 0.7283308506011963, + "step": 6179 + }, + { + "epoch": 1.423963133640553, + "grad_norm": 1.1017464258775596, + "learning_rate": 4.2062980663316715e-07, + "loss": 0.8763309717178345, + "step": 6180 + }, + { + "epoch": 1.4241935483870969, + "grad_norm": 1.5313476968397717, + "learning_rate": 4.2031932228470966e-07, + "loss": 0.9370014667510986, + "step": 6181 + }, + { + "epoch": 1.4244239631336406, + "grad_norm": 1.2317913481286529, + "learning_rate": 4.2000892207613526e-07, + "loss": 0.7883036136627197, + "step": 6182 + }, + { + "epoch": 1.4246543778801843, + "grad_norm": 1.0986212570485994, + "learning_rate": 4.196986060524975e-07, + "loss": 0.7021682262420654, + "step": 6183 + }, + { + "epoch": 1.424884792626728, + "grad_norm": 1.6809928588875014, + "learning_rate": 4.193883742588393e-07, + "loss": 0.842636227607727, + "step": 6184 + }, + { + "epoch": 1.4251152073732718, + "grad_norm": 1.3804520546599122, + "learning_rate": 4.190782267401899e-07, + "loss": 0.8003957867622375, + "step": 6185 + }, + { + "epoch": 1.4253456221198157, + "grad_norm": 1.4234115388616575, + "learning_rate": 4.1876816354156655e-07, + "loss": 0.9799495935440063, + "step": 6186 + }, + { + "epoch": 1.4255760368663595, + "grad_norm": 1.4430834747300494, + "learning_rate": 4.184581847079751e-07, + "loss": 0.8726102113723755, + "step": 6187 + }, + { + "epoch": 1.4258064516129032, + "grad_norm": 1.4779961873749974, + "learning_rate": 4.181482902844082e-07, + "loss": 0.8771729469299316, + "step": 6188 + }, + { + "epoch": 1.426036866359447, + "grad_norm": 0.932904262005563, + "learning_rate": 4.1783848031584644e-07, + "loss": 0.5891281962394714, + "step": 6189 + }, + { + "epoch": 1.4262672811059907, + "grad_norm": 1.0356433358815755, + "learning_rate": 4.1752875484725904e-07, + "loss": 0.8133054971694946, + "step": 6190 + }, + { + "epoch": 1.4264976958525346, + "grad_norm": 1.2051464792634443, + "learning_rate": 4.1721911392360164e-07, + "loss": 0.7175684571266174, + "step": 6191 + }, + { + "epoch": 1.4267281105990783, + "grad_norm": 1.2483759508518841, + "learning_rate": 4.16909557589818e-07, + "loss": 0.7112927436828613, + "step": 6192 + }, + { + "epoch": 1.426958525345622, + "grad_norm": 1.3756845434805187, + "learning_rate": 4.166000858908406e-07, + "loss": 0.8564406037330627, + "step": 6193 + }, + { + "epoch": 1.427188940092166, + "grad_norm": 1.2070686503198162, + "learning_rate": 4.162906988715883e-07, + "loss": 0.7630729675292969, + "step": 6194 + }, + { + "epoch": 1.4274193548387097, + "grad_norm": 0.971140934311516, + "learning_rate": 4.1598139657696806e-07, + "loss": 0.6810768246650696, + "step": 6195 + }, + { + "epoch": 1.4276497695852535, + "grad_norm": 0.9185719080310675, + "learning_rate": 4.1567217905187535e-07, + "loss": 0.8482312560081482, + "step": 6196 + }, + { + "epoch": 1.4278801843317972, + "grad_norm": 1.4356078879259653, + "learning_rate": 4.1536304634119225e-07, + "loss": 0.845355749130249, + "step": 6197 + }, + { + "epoch": 1.428110599078341, + "grad_norm": 1.3990653285356356, + "learning_rate": 4.1505399848978896e-07, + "loss": 0.8082824349403381, + "step": 6198 + }, + { + "epoch": 1.4283410138248849, + "grad_norm": 1.5497395393382225, + "learning_rate": 4.147450355425235e-07, + "loss": 0.8141404390335083, + "step": 6199 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 1.0209015709753073, + "learning_rate": 4.14436157544241e-07, + "loss": 0.8144549131393433, + "step": 6200 + }, + { + "epoch": 1.4288018433179723, + "grad_norm": 1.2316152605954584, + "learning_rate": 4.141273645397754e-07, + "loss": 0.6554359793663025, + "step": 6201 + }, + { + "epoch": 1.429032258064516, + "grad_norm": 1.2095729612520494, + "learning_rate": 4.138186565739472e-07, + "loss": 0.8035449981689453, + "step": 6202 + }, + { + "epoch": 1.4292626728110598, + "grad_norm": 1.348688453980758, + "learning_rate": 4.1351003369156467e-07, + "loss": 0.7848105430603027, + "step": 6203 + }, + { + "epoch": 1.4294930875576037, + "grad_norm": 1.167048125389705, + "learning_rate": 4.132014959374246e-07, + "loss": 0.7064214944839478, + "step": 6204 + }, + { + "epoch": 1.4297235023041475, + "grad_norm": 1.236002479887974, + "learning_rate": 4.128930433563107e-07, + "loss": 0.7636318802833557, + "step": 6205 + }, + { + "epoch": 1.4299539170506912, + "grad_norm": 1.2440935326289273, + "learning_rate": 4.1258467599299395e-07, + "loss": 0.6839499473571777, + "step": 6206 + }, + { + "epoch": 1.4301843317972351, + "grad_norm": 1.1802386777878584, + "learning_rate": 4.122763938922341e-07, + "loss": 0.8355294466018677, + "step": 6207 + }, + { + "epoch": 1.4304147465437789, + "grad_norm": 1.1238131581281627, + "learning_rate": 4.1196819709877773e-07, + "loss": 0.7563334107398987, + "step": 6208 + }, + { + "epoch": 1.4306451612903226, + "grad_norm": 1.1336601077663977, + "learning_rate": 4.116600856573588e-07, + "loss": 0.6991991996765137, + "step": 6209 + }, + { + "epoch": 1.4308755760368663, + "grad_norm": 1.2669311049959366, + "learning_rate": 4.113520596126998e-07, + "loss": 0.7249872088432312, + "step": 6210 + }, + { + "epoch": 1.43110599078341, + "grad_norm": 0.9386622429459606, + "learning_rate": 4.110441190095101e-07, + "loss": 0.6570736169815063, + "step": 6211 + }, + { + "epoch": 1.431336405529954, + "grad_norm": 1.0652944602016763, + "learning_rate": 4.107362638924865e-07, + "loss": 0.7137724161148071, + "step": 6212 + }, + { + "epoch": 1.4315668202764977, + "grad_norm": 1.1571956532799377, + "learning_rate": 4.1042849430631453e-07, + "loss": 0.7620561122894287, + "step": 6213 + }, + { + "epoch": 1.4317972350230415, + "grad_norm": 1.118516282963539, + "learning_rate": 4.1012081029566616e-07, + "loss": 0.8186367750167847, + "step": 6214 + }, + { + "epoch": 1.4320276497695852, + "grad_norm": 1.2414517851095686, + "learning_rate": 4.098132119052008e-07, + "loss": 0.8068171739578247, + "step": 6215 + }, + { + "epoch": 1.432258064516129, + "grad_norm": 1.3160335320341774, + "learning_rate": 4.095056991795668e-07, + "loss": 0.8640002012252808, + "step": 6216 + }, + { + "epoch": 1.4324884792626729, + "grad_norm": 1.4376158954775202, + "learning_rate": 4.0919827216339887e-07, + "loss": 0.8886386156082153, + "step": 6217 + }, + { + "epoch": 1.4327188940092166, + "grad_norm": 1.072787779438559, + "learning_rate": 4.0889093090131965e-07, + "loss": 0.6853137016296387, + "step": 6218 + }, + { + "epoch": 1.4329493087557603, + "grad_norm": 1.0751813749856631, + "learning_rate": 4.0858367543793923e-07, + "loss": 0.7423670291900635, + "step": 6219 + }, + { + "epoch": 1.4331797235023043, + "grad_norm": 1.2596005033506457, + "learning_rate": 4.0827650581785544e-07, + "loss": 0.7969200611114502, + "step": 6220 + }, + { + "epoch": 1.433410138248848, + "grad_norm": 1.1441853902577663, + "learning_rate": 4.079694220856531e-07, + "loss": 0.8506221771240234, + "step": 6221 + }, + { + "epoch": 1.4336405529953917, + "grad_norm": 1.107985966829949, + "learning_rate": 4.076624242859058e-07, + "loss": 0.6755083799362183, + "step": 6222 + }, + { + "epoch": 1.4338709677419355, + "grad_norm": 1.0751582832116895, + "learning_rate": 4.0735551246317333e-07, + "loss": 0.7734944820404053, + "step": 6223 + }, + { + "epoch": 1.4341013824884792, + "grad_norm": 1.1828392807290495, + "learning_rate": 4.0704868666200345e-07, + "loss": 0.8564216494560242, + "step": 6224 + }, + { + "epoch": 1.4343317972350231, + "grad_norm": 0.8521811929477493, + "learning_rate": 4.067419469269321e-07, + "loss": 0.6858065128326416, + "step": 6225 + }, + { + "epoch": 1.4345622119815669, + "grad_norm": 1.4454169020848073, + "learning_rate": 4.064352933024813e-07, + "loss": 0.684749960899353, + "step": 6226 + }, + { + "epoch": 1.4347926267281106, + "grad_norm": 1.0124943930771644, + "learning_rate": 4.061287258331624e-07, + "loss": 0.7648766040802002, + "step": 6227 + }, + { + "epoch": 1.4350230414746543, + "grad_norm": 1.2226521022766697, + "learning_rate": 4.058222445634727e-07, + "loss": 0.924850583076477, + "step": 6228 + }, + { + "epoch": 1.435253456221198, + "grad_norm": 1.2841804739911125, + "learning_rate": 4.055158495378972e-07, + "loss": 0.906406581401825, + "step": 6229 + }, + { + "epoch": 1.435483870967742, + "grad_norm": 1.1497462597145154, + "learning_rate": 4.052095408009095e-07, + "loss": 0.9169156551361084, + "step": 6230 + }, + { + "epoch": 1.4357142857142857, + "grad_norm": 0.9291011874506654, + "learning_rate": 4.0490331839696967e-07, + "loss": 0.7367587685585022, + "step": 6231 + }, + { + "epoch": 1.4359447004608294, + "grad_norm": 0.9837392218179005, + "learning_rate": 4.045971823705249e-07, + "loss": 0.7608749270439148, + "step": 6232 + }, + { + "epoch": 1.4361751152073734, + "grad_norm": 1.006459600101246, + "learning_rate": 4.0429113276601134e-07, + "loss": 0.7008038759231567, + "step": 6233 + }, + { + "epoch": 1.4364055299539171, + "grad_norm": 1.3644950830796674, + "learning_rate": 4.039851696278511e-07, + "loss": 0.8581372499465942, + "step": 6234 + }, + { + "epoch": 1.4366359447004609, + "grad_norm": 1.1117269621825037, + "learning_rate": 4.036792930004542e-07, + "loss": 0.6602354049682617, + "step": 6235 + }, + { + "epoch": 1.4368663594470046, + "grad_norm": 1.1136625894629528, + "learning_rate": 4.0337350292821893e-07, + "loss": 0.8560018539428711, + "step": 6236 + }, + { + "epoch": 1.4370967741935483, + "grad_norm": 1.5699670277885023, + "learning_rate": 4.030677994555298e-07, + "loss": 0.8837640285491943, + "step": 6237 + }, + { + "epoch": 1.4373271889400923, + "grad_norm": 1.1788518631283098, + "learning_rate": 4.027621826267593e-07, + "loss": 0.8214797973632812, + "step": 6238 + }, + { + "epoch": 1.437557603686636, + "grad_norm": 1.091488147712342, + "learning_rate": 4.024566524862675e-07, + "loss": 0.7590944766998291, + "step": 6239 + }, + { + "epoch": 1.4377880184331797, + "grad_norm": 1.5224250495012106, + "learning_rate": 4.021512090784014e-07, + "loss": 0.8792011141777039, + "step": 6240 + }, + { + "epoch": 1.4380184331797234, + "grad_norm": 0.9801567843215049, + "learning_rate": 4.0184585244749556e-07, + "loss": 0.8309401273727417, + "step": 6241 + }, + { + "epoch": 1.4382488479262672, + "grad_norm": 1.2518924977337436, + "learning_rate": 4.015405826378727e-07, + "loss": 0.7474797964096069, + "step": 6242 + }, + { + "epoch": 1.4384792626728111, + "grad_norm": 1.0203221096159534, + "learning_rate": 4.012353996938421e-07, + "loss": 0.7376091480255127, + "step": 6243 + }, + { + "epoch": 1.4387096774193548, + "grad_norm": 1.4049798692682764, + "learning_rate": 4.0093030365970014e-07, + "loss": 0.7809054851531982, + "step": 6244 + }, + { + "epoch": 1.4389400921658986, + "grad_norm": 1.206100995388555, + "learning_rate": 4.0062529457973194e-07, + "loss": 0.8551669120788574, + "step": 6245 + }, + { + "epoch": 1.4391705069124425, + "grad_norm": 1.3285364918408127, + "learning_rate": 4.0032037249820874e-07, + "loss": 0.7874705791473389, + "step": 6246 + }, + { + "epoch": 1.4394009216589863, + "grad_norm": 1.220500481419073, + "learning_rate": 4.0001553745938923e-07, + "loss": 0.8032190799713135, + "step": 6247 + }, + { + "epoch": 1.43963133640553, + "grad_norm": 1.1833761956090303, + "learning_rate": 3.9971078950752057e-07, + "loss": 0.7600107192993164, + "step": 6248 + }, + { + "epoch": 1.4398617511520737, + "grad_norm": 1.0770488794400255, + "learning_rate": 3.994061286868361e-07, + "loss": 0.7738933563232422, + "step": 6249 + }, + { + "epoch": 1.4400921658986174, + "grad_norm": 1.2036013798832181, + "learning_rate": 3.9910155504155665e-07, + "loss": 0.701007604598999, + "step": 6250 + }, + { + "epoch": 1.4403225806451614, + "grad_norm": 1.2067244620095277, + "learning_rate": 3.9879706861589126e-07, + "loss": 0.8962818384170532, + "step": 6251 + }, + { + "epoch": 1.4405529953917051, + "grad_norm": 1.4532648423769148, + "learning_rate": 3.9849266945403513e-07, + "loss": 0.7636146545410156, + "step": 6252 + }, + { + "epoch": 1.4407834101382488, + "grad_norm": 1.4158432417231142, + "learning_rate": 3.981883576001722e-07, + "loss": 0.8816943168640137, + "step": 6253 + }, + { + "epoch": 1.4410138248847926, + "grad_norm": 1.2321816109724755, + "learning_rate": 3.978841330984725e-07, + "loss": 0.7252858877182007, + "step": 6254 + }, + { + "epoch": 1.4412442396313363, + "grad_norm": 1.1568327683598156, + "learning_rate": 3.975799959930932e-07, + "loss": 0.6720175743103027, + "step": 6255 + }, + { + "epoch": 1.4414746543778802, + "grad_norm": 0.981779637597959, + "learning_rate": 3.972759463281805e-07, + "loss": 0.8000779151916504, + "step": 6256 + }, + { + "epoch": 1.441705069124424, + "grad_norm": 1.2561538909400267, + "learning_rate": 3.9697198414786626e-07, + "loss": 0.7356371283531189, + "step": 6257 + }, + { + "epoch": 1.4419354838709677, + "grad_norm": 1.3228468777834088, + "learning_rate": 3.966681094962703e-07, + "loss": 0.708438515663147, + "step": 6258 + }, + { + "epoch": 1.4421658986175117, + "grad_norm": 1.1635121950639566, + "learning_rate": 3.963643224174994e-07, + "loss": 0.709287166595459, + "step": 6259 + }, + { + "epoch": 1.4423963133640554, + "grad_norm": 1.2638923885979756, + "learning_rate": 3.9606062295564813e-07, + "loss": 0.743755578994751, + "step": 6260 + }, + { + "epoch": 1.442626728110599, + "grad_norm": 1.119467668131696, + "learning_rate": 3.9575701115479744e-07, + "loss": 0.9727948904037476, + "step": 6261 + }, + { + "epoch": 1.4428571428571428, + "grad_norm": 1.165539680123963, + "learning_rate": 3.9545348705901703e-07, + "loss": 0.9070688486099243, + "step": 6262 + }, + { + "epoch": 1.4430875576036866, + "grad_norm": 1.3995169117674358, + "learning_rate": 3.951500507123627e-07, + "loss": 0.8167496919631958, + "step": 6263 + }, + { + "epoch": 1.4433179723502305, + "grad_norm": 1.1204443462300027, + "learning_rate": 3.948467021588775e-07, + "loss": 0.7691773772239685, + "step": 6264 + }, + { + "epoch": 1.4435483870967742, + "grad_norm": 1.2915211655205685, + "learning_rate": 3.945434414425927e-07, + "loss": 0.7638411521911621, + "step": 6265 + }, + { + "epoch": 1.443778801843318, + "grad_norm": 1.0311097608426527, + "learning_rate": 3.942402686075258e-07, + "loss": 0.8138284683227539, + "step": 6266 + }, + { + "epoch": 1.4440092165898617, + "grad_norm": 1.430800234304149, + "learning_rate": 3.939371836976816e-07, + "loss": 0.8404628038406372, + "step": 6267 + }, + { + "epoch": 1.4442396313364054, + "grad_norm": 1.0744818989251388, + "learning_rate": 3.936341867570533e-07, + "loss": 0.7354726791381836, + "step": 6268 + }, + { + "epoch": 1.4444700460829494, + "grad_norm": 1.2516347720495873, + "learning_rate": 3.9333127782962003e-07, + "loss": 0.8607511520385742, + "step": 6269 + }, + { + "epoch": 1.444700460829493, + "grad_norm": 1.03787633948696, + "learning_rate": 3.930284569593483e-07, + "loss": 0.7372239232063293, + "step": 6270 + }, + { + "epoch": 1.4449308755760368, + "grad_norm": 1.205690175362699, + "learning_rate": 3.927257241901929e-07, + "loss": 0.8902593851089478, + "step": 6271 + }, + { + "epoch": 1.4451612903225808, + "grad_norm": 1.0978426997676995, + "learning_rate": 3.924230795660947e-07, + "loss": 0.7481765747070312, + "step": 6272 + }, + { + "epoch": 1.4453917050691243, + "grad_norm": 1.1624854693895736, + "learning_rate": 3.9212052313098177e-07, + "loss": 0.6868888139724731, + "step": 6273 + }, + { + "epoch": 1.4456221198156682, + "grad_norm": 1.219538424407328, + "learning_rate": 3.918180549287705e-07, + "loss": 0.6867324709892273, + "step": 6274 + }, + { + "epoch": 1.445852534562212, + "grad_norm": 1.4192898010151693, + "learning_rate": 3.9151567500336323e-07, + "loss": 0.8473105430603027, + "step": 6275 + }, + { + "epoch": 1.4460829493087557, + "grad_norm": 1.2236253801186994, + "learning_rate": 3.912133833986504e-07, + "loss": 0.7629631757736206, + "step": 6276 + }, + { + "epoch": 1.4463133640552996, + "grad_norm": 1.0502703605539807, + "learning_rate": 3.909111801585091e-07, + "loss": 0.9501597881317139, + "step": 6277 + }, + { + "epoch": 1.4465437788018434, + "grad_norm": 1.0568805239624584, + "learning_rate": 3.906090653268037e-07, + "loss": 0.7330536842346191, + "step": 6278 + }, + { + "epoch": 1.446774193548387, + "grad_norm": 1.199243558298224, + "learning_rate": 3.903070389473857e-07, + "loss": 0.907101571559906, + "step": 6279 + }, + { + "epoch": 1.4470046082949308, + "grad_norm": 1.1269939172893009, + "learning_rate": 3.900051010640939e-07, + "loss": 0.8177503347396851, + "step": 6280 + }, + { + "epoch": 1.4472350230414746, + "grad_norm": 1.373102048695832, + "learning_rate": 3.897032517207538e-07, + "loss": 0.7851059436798096, + "step": 6281 + }, + { + "epoch": 1.4474654377880185, + "grad_norm": 0.8801777971944739, + "learning_rate": 3.8940149096117914e-07, + "loss": 0.7056214809417725, + "step": 6282 + }, + { + "epoch": 1.4476958525345622, + "grad_norm": 1.0831833275731695, + "learning_rate": 3.8909981882916975e-07, + "loss": 0.784143328666687, + "step": 6283 + }, + { + "epoch": 1.447926267281106, + "grad_norm": 1.2368924313085696, + "learning_rate": 3.8879823536851253e-07, + "loss": 0.8157210350036621, + "step": 6284 + }, + { + "epoch": 1.4481566820276497, + "grad_norm": 1.276176943713772, + "learning_rate": 3.884967406229828e-07, + "loss": 0.7329680323600769, + "step": 6285 + }, + { + "epoch": 1.4483870967741934, + "grad_norm": 1.4518343581804805, + "learning_rate": 3.8819533463634145e-07, + "loss": 0.9214208722114563, + "step": 6286 + }, + { + "epoch": 1.4486175115207374, + "grad_norm": 1.835142969551997, + "learning_rate": 3.8789401745233706e-07, + "loss": 0.8118722438812256, + "step": 6287 + }, + { + "epoch": 1.448847926267281, + "grad_norm": 1.0485981202236783, + "learning_rate": 3.8759278911470615e-07, + "loss": 0.7517364025115967, + "step": 6288 + }, + { + "epoch": 1.4490783410138248, + "grad_norm": 1.0879409814064, + "learning_rate": 3.872916496671711e-07, + "loss": 0.8979834318161011, + "step": 6289 + }, + { + "epoch": 1.4493087557603688, + "grad_norm": 1.6674549792368192, + "learning_rate": 3.8699059915344166e-07, + "loss": 0.9159818887710571, + "step": 6290 + }, + { + "epoch": 1.4495391705069125, + "grad_norm": 1.2582380909324238, + "learning_rate": 3.8668963761721563e-07, + "loss": 0.8176029324531555, + "step": 6291 + }, + { + "epoch": 1.4497695852534562, + "grad_norm": 1.3257834277786367, + "learning_rate": 3.8638876510217666e-07, + "loss": 0.7077589631080627, + "step": 6292 + }, + { + "epoch": 1.45, + "grad_norm": 1.0304546829516872, + "learning_rate": 3.8608798165199585e-07, + "loss": 0.8107718825340271, + "step": 6293 + }, + { + "epoch": 1.4502304147465437, + "grad_norm": 1.278146889045901, + "learning_rate": 3.8578728731033214e-07, + "loss": 0.9021201133728027, + "step": 6294 + }, + { + "epoch": 1.4504608294930876, + "grad_norm": 1.5907360314325336, + "learning_rate": 3.854866821208306e-07, + "loss": 0.9134507179260254, + "step": 6295 + }, + { + "epoch": 1.4506912442396314, + "grad_norm": 1.2431886164023473, + "learning_rate": 3.8518616612712317e-07, + "loss": 0.9081463813781738, + "step": 6296 + }, + { + "epoch": 1.450921658986175, + "grad_norm": 1.394869861453301, + "learning_rate": 3.848857393728303e-07, + "loss": 0.7892032861709595, + "step": 6297 + }, + { + "epoch": 1.4511520737327188, + "grad_norm": 1.1702087372951315, + "learning_rate": 3.8458540190155796e-07, + "loss": 0.753928542137146, + "step": 6298 + }, + { + "epoch": 1.4513824884792625, + "grad_norm": 1.1800339185606825, + "learning_rate": 3.8428515375689996e-07, + "loss": 0.6316792964935303, + "step": 6299 + }, + { + "epoch": 1.4516129032258065, + "grad_norm": 1.0510746352372813, + "learning_rate": 3.8398499498243665e-07, + "loss": 0.6569210290908813, + "step": 6300 + }, + { + "epoch": 1.4518433179723502, + "grad_norm": 1.2827982624069105, + "learning_rate": 3.836849256217355e-07, + "loss": 0.9082256555557251, + "step": 6301 + }, + { + "epoch": 1.452073732718894, + "grad_norm": 1.2539326790404104, + "learning_rate": 3.833849457183519e-07, + "loss": 0.6533655524253845, + "step": 6302 + }, + { + "epoch": 1.452304147465438, + "grad_norm": 1.1962706885387824, + "learning_rate": 3.830850553158271e-07, + "loss": 0.8181168437004089, + "step": 6303 + }, + { + "epoch": 1.4525345622119816, + "grad_norm": 1.191632474290621, + "learning_rate": 3.827852544576895e-07, + "loss": 0.8258780241012573, + "step": 6304 + }, + { + "epoch": 1.4527649769585254, + "grad_norm": 1.2200843626761786, + "learning_rate": 3.824855431874555e-07, + "loss": 0.7917114496231079, + "step": 6305 + }, + { + "epoch": 1.452995391705069, + "grad_norm": 1.1119249100754447, + "learning_rate": 3.821859215486274e-07, + "loss": 0.7523643970489502, + "step": 6306 + }, + { + "epoch": 1.4532258064516128, + "grad_norm": 1.173507656799684, + "learning_rate": 3.818863895846945e-07, + "loss": 0.7248106002807617, + "step": 6307 + }, + { + "epoch": 1.4534562211981568, + "grad_norm": 1.0384099625968284, + "learning_rate": 3.815869473391343e-07, + "loss": 0.6663920879364014, + "step": 6308 + }, + { + "epoch": 1.4536866359447005, + "grad_norm": 1.2904533830018654, + "learning_rate": 3.8128759485540995e-07, + "loss": 0.887082576751709, + "step": 6309 + }, + { + "epoch": 1.4539170506912442, + "grad_norm": 1.176731626067417, + "learning_rate": 3.8098833217697193e-07, + "loss": 0.8491328954696655, + "step": 6310 + }, + { + "epoch": 1.454147465437788, + "grad_norm": 0.995531509886264, + "learning_rate": 3.806891593472582e-07, + "loss": 0.6749746799468994, + "step": 6311 + }, + { + "epoch": 1.4543778801843317, + "grad_norm": 1.2359927269681388, + "learning_rate": 3.803900764096932e-07, + "loss": 0.7607502937316895, + "step": 6312 + }, + { + "epoch": 1.4546082949308756, + "grad_norm": 0.9855772687954082, + "learning_rate": 3.8009108340768804e-07, + "loss": 0.6713626980781555, + "step": 6313 + }, + { + "epoch": 1.4548387096774194, + "grad_norm": 1.0335982949651026, + "learning_rate": 3.797921803846419e-07, + "loss": 0.7031810879707336, + "step": 6314 + }, + { + "epoch": 1.455069124423963, + "grad_norm": 1.2499044478276522, + "learning_rate": 3.7949336738393955e-07, + "loss": 0.7233775854110718, + "step": 6315 + }, + { + "epoch": 1.455299539170507, + "grad_norm": 1.1902627494977487, + "learning_rate": 3.791946444489532e-07, + "loss": 0.7446990013122559, + "step": 6316 + }, + { + "epoch": 1.4555299539170508, + "grad_norm": 1.0356528338667375, + "learning_rate": 3.7889601162304273e-07, + "loss": 0.731992244720459, + "step": 6317 + }, + { + "epoch": 1.4557603686635945, + "grad_norm": 0.9012124257356037, + "learning_rate": 3.785974689495539e-07, + "loss": 0.7167335152626038, + "step": 6318 + }, + { + "epoch": 1.4559907834101382, + "grad_norm": 1.0367746360279544, + "learning_rate": 3.7829901647181993e-07, + "loss": 0.7634297609329224, + "step": 6319 + }, + { + "epoch": 1.456221198156682, + "grad_norm": 1.323601627974345, + "learning_rate": 3.7800065423316066e-07, + "loss": 0.7584050893783569, + "step": 6320 + }, + { + "epoch": 1.456451612903226, + "grad_norm": 1.3168506305563585, + "learning_rate": 3.777023822768829e-07, + "loss": 0.7150899171829224, + "step": 6321 + }, + { + "epoch": 1.4566820276497696, + "grad_norm": 1.3142694869577929, + "learning_rate": 3.7740420064628034e-07, + "loss": 0.7821052670478821, + "step": 6322 + }, + { + "epoch": 1.4569124423963133, + "grad_norm": 1.1890463822517086, + "learning_rate": 3.7710610938463405e-07, + "loss": 0.8678094148635864, + "step": 6323 + }, + { + "epoch": 1.457142857142857, + "grad_norm": 1.0929926711457507, + "learning_rate": 3.7680810853521107e-07, + "loss": 0.6953635215759277, + "step": 6324 + }, + { + "epoch": 1.4573732718894008, + "grad_norm": 1.392687245093679, + "learning_rate": 3.765101981412665e-07, + "loss": 0.765946626663208, + "step": 6325 + }, + { + "epoch": 1.4576036866359448, + "grad_norm": 1.2287803375758581, + "learning_rate": 3.7621237824604137e-07, + "loss": 0.8828680515289307, + "step": 6326 + }, + { + "epoch": 1.4578341013824885, + "grad_norm": 1.4191080683791804, + "learning_rate": 3.7591464889276326e-07, + "loss": 0.8916178345680237, + "step": 6327 + }, + { + "epoch": 1.4580645161290322, + "grad_norm": 1.4414543071479498, + "learning_rate": 3.756170101246481e-07, + "loss": 0.7563039064407349, + "step": 6328 + }, + { + "epoch": 1.4582949308755762, + "grad_norm": 1.1488058177567217, + "learning_rate": 3.7531946198489725e-07, + "loss": 0.8548855781555176, + "step": 6329 + }, + { + "epoch": 1.4585253456221199, + "grad_norm": 1.2471941201918813, + "learning_rate": 3.750220045166993e-07, + "loss": 0.8337546586990356, + "step": 6330 + }, + { + "epoch": 1.4587557603686636, + "grad_norm": 1.2665043024049272, + "learning_rate": 3.7472463776323036e-07, + "loss": 0.8909939527511597, + "step": 6331 + }, + { + "epoch": 1.4589861751152073, + "grad_norm": 0.9459101838544814, + "learning_rate": 3.744273617676524e-07, + "loss": 0.629026472568512, + "step": 6332 + }, + { + "epoch": 1.459216589861751, + "grad_norm": 1.245577103796106, + "learning_rate": 3.7413017657311454e-07, + "loss": 0.7264849543571472, + "step": 6333 + }, + { + "epoch": 1.459447004608295, + "grad_norm": 1.0987416494814488, + "learning_rate": 3.738330822227532e-07, + "loss": 0.808081865310669, + "step": 6334 + }, + { + "epoch": 1.4596774193548387, + "grad_norm": 1.145687515640666, + "learning_rate": 3.7353607875969115e-07, + "loss": 0.6092932820320129, + "step": 6335 + }, + { + "epoch": 1.4599078341013825, + "grad_norm": 1.2636271324745916, + "learning_rate": 3.7323916622703756e-07, + "loss": 0.8700584173202515, + "step": 6336 + }, + { + "epoch": 1.4601382488479262, + "grad_norm": 1.2867446987977476, + "learning_rate": 3.7294234466788954e-07, + "loss": 0.8424433469772339, + "step": 6337 + }, + { + "epoch": 1.46036866359447, + "grad_norm": 1.1929868573019329, + "learning_rate": 3.7264561412533013e-07, + "loss": 0.8587443828582764, + "step": 6338 + }, + { + "epoch": 1.4605990783410139, + "grad_norm": 1.1369944171843958, + "learning_rate": 3.7234897464242934e-07, + "loss": 0.7708064913749695, + "step": 6339 + }, + { + "epoch": 1.4608294930875576, + "grad_norm": 0.9599493655503268, + "learning_rate": 3.7205242626224395e-07, + "loss": 0.8226567506790161, + "step": 6340 + }, + { + "epoch": 1.4610599078341013, + "grad_norm": 1.6926769297162396, + "learning_rate": 3.717559690278176e-07, + "loss": 0.8414342403411865, + "step": 6341 + }, + { + "epoch": 1.4612903225806453, + "grad_norm": 1.136325082903018, + "learning_rate": 3.714596029821804e-07, + "loss": 0.765863299369812, + "step": 6342 + }, + { + "epoch": 1.461520737327189, + "grad_norm": 1.2033696575950952, + "learning_rate": 3.7116332816834997e-07, + "loss": 0.7253202199935913, + "step": 6343 + }, + { + "epoch": 1.4617511520737327, + "grad_norm": 1.2614732245354896, + "learning_rate": 3.7086714462933e-07, + "loss": 0.786415696144104, + "step": 6344 + }, + { + "epoch": 1.4619815668202765, + "grad_norm": 1.3398597613096093, + "learning_rate": 3.705710524081108e-07, + "loss": 0.8382824659347534, + "step": 6345 + }, + { + "epoch": 1.4622119815668202, + "grad_norm": 1.1421503229190921, + "learning_rate": 3.702750515476705e-07, + "loss": 0.7953319549560547, + "step": 6346 + }, + { + "epoch": 1.4624423963133641, + "grad_norm": 1.1953524657169348, + "learning_rate": 3.699791420909727e-07, + "loss": 0.7897430658340454, + "step": 6347 + }, + { + "epoch": 1.4626728110599079, + "grad_norm": 1.0462269201726477, + "learning_rate": 3.6968332408096804e-07, + "loss": 0.7276254892349243, + "step": 6348 + }, + { + "epoch": 1.4629032258064516, + "grad_norm": 1.2576670635193097, + "learning_rate": 3.693875975605949e-07, + "loss": 0.7318450212478638, + "step": 6349 + }, + { + "epoch": 1.4631336405529953, + "grad_norm": 1.3298595608160129, + "learning_rate": 3.6909196257277676e-07, + "loss": 0.8438090085983276, + "step": 6350 + }, + { + "epoch": 1.463364055299539, + "grad_norm": 1.1958819221255177, + "learning_rate": 3.6879641916042534e-07, + "loss": 0.7977915406227112, + "step": 6351 + }, + { + "epoch": 1.463594470046083, + "grad_norm": 1.5876789525233332, + "learning_rate": 3.685009673664382e-07, + "loss": 0.8845348358154297, + "step": 6352 + }, + { + "epoch": 1.4638248847926267, + "grad_norm": 1.1089282393569035, + "learning_rate": 3.682056072336992e-07, + "loss": 0.8971320986747742, + "step": 6353 + }, + { + "epoch": 1.4640552995391705, + "grad_norm": 1.1499585685789093, + "learning_rate": 3.679103388050803e-07, + "loss": 0.7015302181243896, + "step": 6354 + }, + { + "epoch": 1.4642857142857144, + "grad_norm": 1.058413373940715, + "learning_rate": 3.676151621234389e-07, + "loss": 0.5953146815299988, + "step": 6355 + }, + { + "epoch": 1.4645161290322581, + "grad_norm": 0.940762320723037, + "learning_rate": 3.673200772316193e-07, + "loss": 0.5794636011123657, + "step": 6356 + }, + { + "epoch": 1.4647465437788019, + "grad_norm": 1.4093031765021824, + "learning_rate": 3.6702508417245324e-07, + "loss": 0.8272292017936707, + "step": 6357 + }, + { + "epoch": 1.4649769585253456, + "grad_norm": 1.2004626750502272, + "learning_rate": 3.6673018298875826e-07, + "loss": 0.7239755392074585, + "step": 6358 + }, + { + "epoch": 1.4652073732718893, + "grad_norm": 1.0592207409293348, + "learning_rate": 3.6643537372333886e-07, + "loss": 0.8597465753555298, + "step": 6359 + }, + { + "epoch": 1.4654377880184333, + "grad_norm": 1.3768417389873642, + "learning_rate": 3.661406564189862e-07, + "loss": 0.7540475130081177, + "step": 6360 + }, + { + "epoch": 1.465668202764977, + "grad_norm": 1.2300552177842492, + "learning_rate": 3.658460311184782e-07, + "loss": 0.793259859085083, + "step": 6361 + }, + { + "epoch": 1.4658986175115207, + "grad_norm": 1.1933122341650848, + "learning_rate": 3.6555149786457883e-07, + "loss": 0.797966718673706, + "step": 6362 + }, + { + "epoch": 1.4661290322580645, + "grad_norm": 1.082541374270611, + "learning_rate": 3.6525705670004016e-07, + "loss": 0.7466796636581421, + "step": 6363 + }, + { + "epoch": 1.4663594470046082, + "grad_norm": 0.9612262339874744, + "learning_rate": 3.6496270766759927e-07, + "loss": 0.7694044709205627, + "step": 6364 + }, + { + "epoch": 1.4665898617511521, + "grad_norm": 1.753828188679532, + "learning_rate": 3.6466845080998043e-07, + "loss": 0.7701553106307983, + "step": 6365 + }, + { + "epoch": 1.4668202764976959, + "grad_norm": 1.0670832455899337, + "learning_rate": 3.643742861698952e-07, + "loss": 0.6718326807022095, + "step": 6366 + }, + { + "epoch": 1.4670506912442396, + "grad_norm": 1.1220075290963027, + "learning_rate": 3.6408021379004086e-07, + "loss": 0.7099052667617798, + "step": 6367 + }, + { + "epoch": 1.4672811059907835, + "grad_norm": 1.0614563823752192, + "learning_rate": 3.6378623371310126e-07, + "loss": 0.8650654554367065, + "step": 6368 + }, + { + "epoch": 1.4675115207373273, + "grad_norm": 1.18691798498221, + "learning_rate": 3.6349234598174794e-07, + "loss": 0.7920950055122375, + "step": 6369 + }, + { + "epoch": 1.467741935483871, + "grad_norm": 1.3672164620265899, + "learning_rate": 3.63198550638638e-07, + "loss": 0.7927969098091125, + "step": 6370 + }, + { + "epoch": 1.4679723502304147, + "grad_norm": 1.6817643007938734, + "learning_rate": 3.6290484772641514e-07, + "loss": 0.9403868913650513, + "step": 6371 + }, + { + "epoch": 1.4682027649769585, + "grad_norm": 1.188245842937741, + "learning_rate": 3.626112372877106e-07, + "loss": 0.9157334566116333, + "step": 6372 + }, + { + "epoch": 1.4684331797235024, + "grad_norm": 1.0918511661649737, + "learning_rate": 3.6231771936514067e-07, + "loss": 0.7742066979408264, + "step": 6373 + }, + { + "epoch": 1.4686635944700461, + "grad_norm": 1.0472722321327697, + "learning_rate": 3.6202429400131006e-07, + "loss": 0.69399094581604, + "step": 6374 + }, + { + "epoch": 1.4688940092165899, + "grad_norm": 1.243240675298042, + "learning_rate": 3.6173096123880854e-07, + "loss": 0.874832272529602, + "step": 6375 + }, + { + "epoch": 1.4691244239631336, + "grad_norm": 0.9504044447465768, + "learning_rate": 3.6143772112021275e-07, + "loss": 0.6685272455215454, + "step": 6376 + }, + { + "epoch": 1.4693548387096773, + "grad_norm": 1.2588614059189167, + "learning_rate": 3.611445736880867e-07, + "loss": 0.7422738671302795, + "step": 6377 + }, + { + "epoch": 1.4695852534562213, + "grad_norm": 1.1563672807518934, + "learning_rate": 3.6085151898498e-07, + "loss": 0.8208622932434082, + "step": 6378 + }, + { + "epoch": 1.469815668202765, + "grad_norm": 1.278791922768039, + "learning_rate": 3.605585570534293e-07, + "loss": 0.8001033663749695, + "step": 6379 + }, + { + "epoch": 1.4700460829493087, + "grad_norm": 1.4073194030234843, + "learning_rate": 3.6026568793595744e-07, + "loss": 0.789332926273346, + "step": 6380 + }, + { + "epoch": 1.4702764976958527, + "grad_norm": 1.1542499539799642, + "learning_rate": 3.599729116750742e-07, + "loss": 0.8071820139884949, + "step": 6381 + }, + { + "epoch": 1.4705069124423962, + "grad_norm": 1.3369229588575535, + "learning_rate": 3.5968022831327506e-07, + "loss": 0.8028534054756165, + "step": 6382 + }, + { + "epoch": 1.4707373271889401, + "grad_norm": 1.0119395143433376, + "learning_rate": 3.593876378930435e-07, + "loss": 0.6888329982757568, + "step": 6383 + }, + { + "epoch": 1.4709677419354839, + "grad_norm": 1.285773441215651, + "learning_rate": 3.590951404568483e-07, + "loss": 0.8176132440567017, + "step": 6384 + }, + { + "epoch": 1.4711981566820276, + "grad_norm": 0.9429108192029542, + "learning_rate": 3.588027360471446e-07, + "loss": 0.6715027689933777, + "step": 6385 + }, + { + "epoch": 1.4714285714285715, + "grad_norm": 1.2177133807456715, + "learning_rate": 3.585104247063753e-07, + "loss": 0.8622937798500061, + "step": 6386 + }, + { + "epoch": 1.4716589861751153, + "grad_norm": 1.252482813795077, + "learning_rate": 3.5821820647696864e-07, + "loss": 0.7244299650192261, + "step": 6387 + }, + { + "epoch": 1.471889400921659, + "grad_norm": 1.2422776234152886, + "learning_rate": 3.579260814013393e-07, + "loss": 0.8130464553833008, + "step": 6388 + }, + { + "epoch": 1.4721198156682027, + "grad_norm": 1.739841773852821, + "learning_rate": 3.576340495218897e-07, + "loss": 0.8563692569732666, + "step": 6389 + }, + { + "epoch": 1.4723502304147464, + "grad_norm": 1.1474783445098509, + "learning_rate": 3.573421108810073e-07, + "loss": 0.8315908908843994, + "step": 6390 + }, + { + "epoch": 1.4725806451612904, + "grad_norm": 1.0916407928923948, + "learning_rate": 3.5705026552106645e-07, + "loss": 0.653038740158081, + "step": 6391 + }, + { + "epoch": 1.4728110599078341, + "grad_norm": 1.250110377436999, + "learning_rate": 3.5675851348442876e-07, + "loss": 0.7511966228485107, + "step": 6392 + }, + { + "epoch": 1.4730414746543778, + "grad_norm": 1.226967151246929, + "learning_rate": 3.564668548134413e-07, + "loss": 0.8675990104675293, + "step": 6393 + }, + { + "epoch": 1.4732718894009218, + "grad_norm": 1.2481066388566375, + "learning_rate": 3.5617528955043765e-07, + "loss": 0.7574094533920288, + "step": 6394 + }, + { + "epoch": 1.4735023041474653, + "grad_norm": 1.3612516426224104, + "learning_rate": 3.5588381773773866e-07, + "loss": 0.7004787921905518, + "step": 6395 + }, + { + "epoch": 1.4737327188940093, + "grad_norm": 1.193988835000252, + "learning_rate": 3.555924394176508e-07, + "loss": 0.680101215839386, + "step": 6396 + }, + { + "epoch": 1.473963133640553, + "grad_norm": 1.2956197944669767, + "learning_rate": 3.55301154632467e-07, + "loss": 0.8340710401535034, + "step": 6397 + }, + { + "epoch": 1.4741935483870967, + "grad_norm": 1.2156451361937963, + "learning_rate": 3.5500996342446756e-07, + "loss": 0.8307079076766968, + "step": 6398 + }, + { + "epoch": 1.4744239631336407, + "grad_norm": 1.3824459968937755, + "learning_rate": 3.547188658359179e-07, + "loss": 0.9614958167076111, + "step": 6399 + }, + { + "epoch": 1.4746543778801844, + "grad_norm": 1.2140973914551956, + "learning_rate": 3.544278619090707e-07, + "loss": 0.782494068145752, + "step": 6400 + }, + { + "epoch": 1.4748847926267281, + "grad_norm": 1.372883571978596, + "learning_rate": 3.5413695168616474e-07, + "loss": 0.7474460601806641, + "step": 6401 + }, + { + "epoch": 1.4751152073732718, + "grad_norm": 1.0929029713656226, + "learning_rate": 3.5384613520942484e-07, + "loss": 0.7182635068893433, + "step": 6402 + }, + { + "epoch": 1.4753456221198156, + "grad_norm": 1.1562679128127753, + "learning_rate": 3.5355541252106336e-07, + "loss": 0.8116436004638672, + "step": 6403 + }, + { + "epoch": 1.4755760368663595, + "grad_norm": 1.1320096436261353, + "learning_rate": 3.5326478366327806e-07, + "loss": 0.8007283210754395, + "step": 6404 + }, + { + "epoch": 1.4758064516129032, + "grad_norm": 1.060451283065696, + "learning_rate": 3.5297424867825276e-07, + "loss": 0.7707732915878296, + "step": 6405 + }, + { + "epoch": 1.476036866359447, + "grad_norm": 1.319974893721661, + "learning_rate": 3.5268380760815917e-07, + "loss": 0.8031977415084839, + "step": 6406 + }, + { + "epoch": 1.4762672811059907, + "grad_norm": 1.0847497024921582, + "learning_rate": 3.5239346049515397e-07, + "loss": 0.7113008499145508, + "step": 6407 + }, + { + "epoch": 1.4764976958525344, + "grad_norm": 1.490354792200027, + "learning_rate": 3.521032073813802e-07, + "loss": 0.8069616556167603, + "step": 6408 + }, + { + "epoch": 1.4767281105990784, + "grad_norm": 1.6536617293382079, + "learning_rate": 3.518130483089686e-07, + "loss": 0.9780417680740356, + "step": 6409 + }, + { + "epoch": 1.476958525345622, + "grad_norm": 1.0393285063529043, + "learning_rate": 3.515229833200351e-07, + "loss": 0.765299379825592, + "step": 6410 + }, + { + "epoch": 1.4771889400921658, + "grad_norm": 0.9792702634570369, + "learning_rate": 3.512330124566816e-07, + "loss": 0.7279179096221924, + "step": 6411 + }, + { + "epoch": 1.4774193548387098, + "grad_norm": 1.3765526641198769, + "learning_rate": 3.509431357609978e-07, + "loss": 0.8429825901985168, + "step": 6412 + }, + { + "epoch": 1.4776497695852535, + "grad_norm": 1.2876523066268597, + "learning_rate": 3.506533532750586e-07, + "loss": 0.741936206817627, + "step": 6413 + }, + { + "epoch": 1.4778801843317972, + "grad_norm": 1.0841845353527741, + "learning_rate": 3.5036366504092527e-07, + "loss": 0.6841387748718262, + "step": 6414 + }, + { + "epoch": 1.478110599078341, + "grad_norm": 1.1361546476433346, + "learning_rate": 3.5007407110064626e-07, + "loss": 0.7136961221694946, + "step": 6415 + }, + { + "epoch": 1.4783410138248847, + "grad_norm": 1.1942730912918724, + "learning_rate": 3.497845714962554e-07, + "loss": 0.8483344912528992, + "step": 6416 + }, + { + "epoch": 1.4785714285714286, + "grad_norm": 1.1525838724707749, + "learning_rate": 3.4949516626977294e-07, + "loss": 0.7060235738754272, + "step": 6417 + }, + { + "epoch": 1.4788018433179724, + "grad_norm": 1.2546190088001288, + "learning_rate": 3.4920585546320625e-07, + "loss": 0.7351587414741516, + "step": 6418 + }, + { + "epoch": 1.479032258064516, + "grad_norm": 1.4082190266306274, + "learning_rate": 3.489166391185482e-07, + "loss": 0.7445269823074341, + "step": 6419 + }, + { + "epoch": 1.4792626728110598, + "grad_norm": 1.2308828080413103, + "learning_rate": 3.4862751727777796e-07, + "loss": 0.795128583908081, + "step": 6420 + }, + { + "epoch": 1.4794930875576036, + "grad_norm": 1.3455737723646244, + "learning_rate": 3.4833848998286133e-07, + "loss": 0.7916193008422852, + "step": 6421 + }, + { + "epoch": 1.4797235023041475, + "grad_norm": 1.2062461099240058, + "learning_rate": 3.480495572757497e-07, + "loss": 0.8279474973678589, + "step": 6422 + }, + { + "epoch": 1.4799539170506912, + "grad_norm": 1.3615355231577309, + "learning_rate": 3.477607191983822e-07, + "loss": 0.9339898824691772, + "step": 6423 + }, + { + "epoch": 1.480184331797235, + "grad_norm": 1.2958649175302657, + "learning_rate": 3.4747197579268296e-07, + "loss": 0.8579660654067993, + "step": 6424 + }, + { + "epoch": 1.480414746543779, + "grad_norm": 1.1935735021965341, + "learning_rate": 3.471833271005622e-07, + "loss": 0.7637878060340881, + "step": 6425 + }, + { + "epoch": 1.4806451612903226, + "grad_norm": 1.2997741786350927, + "learning_rate": 3.4689477316391756e-07, + "loss": 0.8600465059280396, + "step": 6426 + }, + { + "epoch": 1.4808755760368664, + "grad_norm": 0.9725758019670567, + "learning_rate": 3.46606314024632e-07, + "loss": 0.6576759815216064, + "step": 6427 + }, + { + "epoch": 1.48110599078341, + "grad_norm": 1.1289750059608772, + "learning_rate": 3.463179497245747e-07, + "loss": 0.7556706666946411, + "step": 6428 + }, + { + "epoch": 1.4813364055299538, + "grad_norm": 1.3449392913610907, + "learning_rate": 3.4602968030560196e-07, + "loss": 0.8826701641082764, + "step": 6429 + }, + { + "epoch": 1.4815668202764978, + "grad_norm": 1.1499087478485694, + "learning_rate": 3.457415058095554e-07, + "loss": 0.7352213263511658, + "step": 6430 + }, + { + "epoch": 1.4817972350230415, + "grad_norm": 1.4434298728988502, + "learning_rate": 3.454534262782628e-07, + "loss": 0.8108851909637451, + "step": 6431 + }, + { + "epoch": 1.4820276497695852, + "grad_norm": 1.3070168078927469, + "learning_rate": 3.4516544175353914e-07, + "loss": 0.8595583438873291, + "step": 6432 + }, + { + "epoch": 1.482258064516129, + "grad_norm": 1.1496814595283131, + "learning_rate": 3.448775522771847e-07, + "loss": 0.7194280028343201, + "step": 6433 + }, + { + "epoch": 1.4824884792626727, + "grad_norm": 1.2788780172510947, + "learning_rate": 3.445897578909861e-07, + "loss": 0.8966056108474731, + "step": 6434 + }, + { + "epoch": 1.4827188940092166, + "grad_norm": 1.4168806857520198, + "learning_rate": 3.443020586367167e-07, + "loss": 0.8089771270751953, + "step": 6435 + }, + { + "epoch": 1.4829493087557604, + "grad_norm": 1.3086078413537297, + "learning_rate": 3.4401445455613555e-07, + "loss": 0.7835644483566284, + "step": 6436 + }, + { + "epoch": 1.483179723502304, + "grad_norm": 1.242850049469479, + "learning_rate": 3.4372694569098746e-07, + "loss": 0.7285257577896118, + "step": 6437 + }, + { + "epoch": 1.483410138248848, + "grad_norm": 1.4884020116718253, + "learning_rate": 3.434395320830048e-07, + "loss": 0.9108592867851257, + "step": 6438 + }, + { + "epoch": 1.4836405529953918, + "grad_norm": 1.265305751937672, + "learning_rate": 3.431522137739049e-07, + "loss": 0.7154395580291748, + "step": 6439 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 1.0883673646660943, + "learning_rate": 3.428649908053917e-07, + "loss": 0.6483602523803711, + "step": 6440 + }, + { + "epoch": 1.4841013824884792, + "grad_norm": 1.457129029114168, + "learning_rate": 3.425778632191551e-07, + "loss": 0.8090662956237793, + "step": 6441 + }, + { + "epoch": 1.484331797235023, + "grad_norm": 1.428702771444548, + "learning_rate": 3.422908310568712e-07, + "loss": 0.7884642481803894, + "step": 6442 + }, + { + "epoch": 1.484562211981567, + "grad_norm": 1.2738553778883674, + "learning_rate": 3.4200389436020225e-07, + "loss": 0.8628194332122803, + "step": 6443 + }, + { + "epoch": 1.4847926267281106, + "grad_norm": 1.1838310809928603, + "learning_rate": 3.4171705317079723e-07, + "loss": 0.8192269802093506, + "step": 6444 + }, + { + "epoch": 1.4850230414746544, + "grad_norm": 1.316668872684636, + "learning_rate": 3.4143030753029054e-07, + "loss": 0.7768012285232544, + "step": 6445 + }, + { + "epoch": 1.485253456221198, + "grad_norm": 1.2324282268735118, + "learning_rate": 3.411436574803026e-07, + "loss": 0.7420791387557983, + "step": 6446 + }, + { + "epoch": 1.4854838709677418, + "grad_norm": 1.3102449774544425, + "learning_rate": 3.4085710306244086e-07, + "loss": 0.823938250541687, + "step": 6447 + }, + { + "epoch": 1.4857142857142858, + "grad_norm": 1.1672900255965821, + "learning_rate": 3.405706443182976e-07, + "loss": 0.7215089201927185, + "step": 6448 + }, + { + "epoch": 1.4859447004608295, + "grad_norm": 1.138949819615918, + "learning_rate": 3.4028428128945286e-07, + "loss": 0.8301436901092529, + "step": 6449 + }, + { + "epoch": 1.4861751152073732, + "grad_norm": 1.1171858572091258, + "learning_rate": 3.399980140174712e-07, + "loss": 0.6727990508079529, + "step": 6450 + }, + { + "epoch": 1.4864055299539172, + "grad_norm": 1.0969379356045603, + "learning_rate": 3.397118425439038e-07, + "loss": 0.8364754319190979, + "step": 6451 + }, + { + "epoch": 1.486635944700461, + "grad_norm": 1.2714499604529865, + "learning_rate": 3.394257669102887e-07, + "loss": 0.7241604328155518, + "step": 6452 + }, + { + "epoch": 1.4868663594470046, + "grad_norm": 1.429435383993002, + "learning_rate": 3.3913978715814897e-07, + "loss": 0.7762489914894104, + "step": 6453 + }, + { + "epoch": 1.4870967741935484, + "grad_norm": 1.3862601382620485, + "learning_rate": 3.38853903328994e-07, + "loss": 0.9278200268745422, + "step": 6454 + }, + { + "epoch": 1.487327188940092, + "grad_norm": 0.9454491284474441, + "learning_rate": 3.3856811546431994e-07, + "loss": 0.693070113658905, + "step": 6455 + }, + { + "epoch": 1.487557603686636, + "grad_norm": 1.4631261008304832, + "learning_rate": 3.382824236056084e-07, + "loss": 0.8541949987411499, + "step": 6456 + }, + { + "epoch": 1.4877880184331798, + "grad_norm": 1.1080747331787868, + "learning_rate": 3.379968277943267e-07, + "loss": 0.7638850212097168, + "step": 6457 + }, + { + "epoch": 1.4880184331797235, + "grad_norm": 1.5396868765343736, + "learning_rate": 3.377113280719295e-07, + "loss": 0.8240739107131958, + "step": 6458 + }, + { + "epoch": 1.4882488479262672, + "grad_norm": 1.037738997106509, + "learning_rate": 3.374259244798562e-07, + "loss": 0.7360633015632629, + "step": 6459 + }, + { + "epoch": 1.488479262672811, + "grad_norm": 1.1287418173516828, + "learning_rate": 3.371406170595328e-07, + "loss": 0.8626362085342407, + "step": 6460 + }, + { + "epoch": 1.488709677419355, + "grad_norm": 1.553133844655672, + "learning_rate": 3.368554058523713e-07, + "loss": 0.8499895334243774, + "step": 6461 + }, + { + "epoch": 1.4889400921658986, + "grad_norm": 1.1568237777707882, + "learning_rate": 3.3657029089976985e-07, + "loss": 0.8335039615631104, + "step": 6462 + }, + { + "epoch": 1.4891705069124423, + "grad_norm": 1.1957026633378731, + "learning_rate": 3.3628527224311196e-07, + "loss": 0.8154790997505188, + "step": 6463 + }, + { + "epoch": 1.4894009216589863, + "grad_norm": 1.2851436413791164, + "learning_rate": 3.3600034992376856e-07, + "loss": 0.7952951192855835, + "step": 6464 + }, + { + "epoch": 1.48963133640553, + "grad_norm": 1.5993164682006433, + "learning_rate": 3.3571552398309535e-07, + "loss": 0.7227598428726196, + "step": 6465 + }, + { + "epoch": 1.4898617511520738, + "grad_norm": 1.1773028491207966, + "learning_rate": 3.3543079446243404e-07, + "loss": 0.6703250408172607, + "step": 6466 + }, + { + "epoch": 1.4900921658986175, + "grad_norm": 1.152932493736184, + "learning_rate": 3.351461614031136e-07, + "loss": 0.7468122243881226, + "step": 6467 + }, + { + "epoch": 1.4903225806451612, + "grad_norm": 1.2933114629854674, + "learning_rate": 3.348616248464475e-07, + "loss": 0.8649178743362427, + "step": 6468 + }, + { + "epoch": 1.4905529953917052, + "grad_norm": 1.013990280281903, + "learning_rate": 3.345771848337359e-07, + "loss": 0.8229554295539856, + "step": 6469 + }, + { + "epoch": 1.4907834101382489, + "grad_norm": 1.3471402030282535, + "learning_rate": 3.342928414062652e-07, + "loss": 0.7275597453117371, + "step": 6470 + }, + { + "epoch": 1.4910138248847926, + "grad_norm": 1.095192106330462, + "learning_rate": 3.3400859460530737e-07, + "loss": 0.657899796962738, + "step": 6471 + }, + { + "epoch": 1.4912442396313363, + "grad_norm": 1.0853913135805695, + "learning_rate": 3.3372444447212e-07, + "loss": 0.7579425573348999, + "step": 6472 + }, + { + "epoch": 1.49147465437788, + "grad_norm": 1.1304988993649205, + "learning_rate": 3.334403910479479e-07, + "loss": 0.8707751631736755, + "step": 6473 + }, + { + "epoch": 1.491705069124424, + "grad_norm": 1.3454806591137698, + "learning_rate": 3.331564343740201e-07, + "loss": 0.7923752665519714, + "step": 6474 + }, + { + "epoch": 1.4919354838709677, + "grad_norm": 1.2646674876263875, + "learning_rate": 3.328725744915536e-07, + "loss": 0.8308948278427124, + "step": 6475 + }, + { + "epoch": 1.4921658986175115, + "grad_norm": 1.4029553470676885, + "learning_rate": 3.3258881144174967e-07, + "loss": 0.8984559774398804, + "step": 6476 + }, + { + "epoch": 1.4923963133640554, + "grad_norm": 1.2358798089346714, + "learning_rate": 3.3230514526579614e-07, + "loss": 0.9279792308807373, + "step": 6477 + }, + { + "epoch": 1.4926267281105992, + "grad_norm": 1.4094728162225774, + "learning_rate": 3.3202157600486655e-07, + "loss": 0.7934520244598389, + "step": 6478 + }, + { + "epoch": 1.4928571428571429, + "grad_norm": 1.658388461731414, + "learning_rate": 3.3173810370012136e-07, + "loss": 0.8463613390922546, + "step": 6479 + }, + { + "epoch": 1.4930875576036866, + "grad_norm": 1.339159678666659, + "learning_rate": 3.314547283927057e-07, + "loss": 0.8087350130081177, + "step": 6480 + }, + { + "epoch": 1.4933179723502303, + "grad_norm": 1.2350842201271304, + "learning_rate": 3.3117145012375113e-07, + "loss": 0.7711254358291626, + "step": 6481 + }, + { + "epoch": 1.4935483870967743, + "grad_norm": 1.2753839749074636, + "learning_rate": 3.3088826893437526e-07, + "loss": 0.7140679359436035, + "step": 6482 + }, + { + "epoch": 1.493778801843318, + "grad_norm": 1.1506161777222865, + "learning_rate": 3.3060518486568103e-07, + "loss": 0.7074463367462158, + "step": 6483 + }, + { + "epoch": 1.4940092165898617, + "grad_norm": 0.8291232249474376, + "learning_rate": 3.3032219795875827e-07, + "loss": 0.7560559511184692, + "step": 6484 + }, + { + "epoch": 1.4942396313364055, + "grad_norm": 1.4344445687170468, + "learning_rate": 3.3003930825468194e-07, + "loss": 0.7699435353279114, + "step": 6485 + }, + { + "epoch": 1.4944700460829492, + "grad_norm": 1.277197987117764, + "learning_rate": 3.297565157945129e-07, + "loss": 0.817488431930542, + "step": 6486 + }, + { + "epoch": 1.4947004608294931, + "grad_norm": 1.1511534488778172, + "learning_rate": 3.294738206192985e-07, + "loss": 0.7534141540527344, + "step": 6487 + }, + { + "epoch": 1.4949308755760369, + "grad_norm": 1.1924480850963226, + "learning_rate": 3.291912227700715e-07, + "loss": 0.7423536777496338, + "step": 6488 + }, + { + "epoch": 1.4951612903225806, + "grad_norm": 0.952322784205302, + "learning_rate": 3.2890872228785003e-07, + "loss": 0.7181985378265381, + "step": 6489 + }, + { + "epoch": 1.4953917050691246, + "grad_norm": 1.270224090305602, + "learning_rate": 3.286263192136396e-07, + "loss": 0.7143938541412354, + "step": 6490 + }, + { + "epoch": 1.4956221198156683, + "grad_norm": 1.3995714023195414, + "learning_rate": 3.2834401358843e-07, + "loss": 0.8247631788253784, + "step": 6491 + }, + { + "epoch": 1.495852534562212, + "grad_norm": 1.1449759372564834, + "learning_rate": 3.280618054531974e-07, + "loss": 0.8627001047134399, + "step": 6492 + }, + { + "epoch": 1.4960829493087557, + "grad_norm": 1.3482725665599868, + "learning_rate": 3.2777969484890456e-07, + "loss": 0.813239574432373, + "step": 6493 + }, + { + "epoch": 1.4963133640552995, + "grad_norm": 0.9200346218481302, + "learning_rate": 3.2749768181649904e-07, + "loss": 0.6633884310722351, + "step": 6494 + }, + { + "epoch": 1.4965437788018434, + "grad_norm": 1.4278232440541767, + "learning_rate": 3.272157663969144e-07, + "loss": 0.7760038375854492, + "step": 6495 + }, + { + "epoch": 1.4967741935483871, + "grad_norm": 1.3200918095184475, + "learning_rate": 3.2693394863107105e-07, + "loss": 0.9352993369102478, + "step": 6496 + }, + { + "epoch": 1.4970046082949309, + "grad_norm": 1.2344539392280847, + "learning_rate": 3.2665222855987397e-07, + "loss": 0.7011485695838928, + "step": 6497 + }, + { + "epoch": 1.4972350230414746, + "grad_norm": 1.2183950494067446, + "learning_rate": 3.263706062242142e-07, + "loss": 0.9008398056030273, + "step": 6498 + }, + { + "epoch": 1.4974654377880183, + "grad_norm": 1.194608222128912, + "learning_rate": 3.260890816649694e-07, + "loss": 0.768037736415863, + "step": 6499 + }, + { + "epoch": 1.4976958525345623, + "grad_norm": 0.9220148240054391, + "learning_rate": 3.258076549230024e-07, + "loss": 0.7603639364242554, + "step": 6500 + }, + { + "epoch": 1.497926267281106, + "grad_norm": 1.3821459764557307, + "learning_rate": 3.2552632603916177e-07, + "loss": 0.7984024286270142, + "step": 6501 + }, + { + "epoch": 1.4981566820276497, + "grad_norm": 1.415424035035242, + "learning_rate": 3.2524509505428187e-07, + "loss": 0.8466978073120117, + "step": 6502 + }, + { + "epoch": 1.4983870967741937, + "grad_norm": 1.3670825801142161, + "learning_rate": 3.24963962009183e-07, + "loss": 0.7964911460876465, + "step": 6503 + }, + { + "epoch": 1.4986175115207372, + "grad_norm": 1.3123478568754847, + "learning_rate": 3.246829269446716e-07, + "loss": 0.7551665306091309, + "step": 6504 + }, + { + "epoch": 1.4988479262672811, + "grad_norm": 1.3193018902055227, + "learning_rate": 3.2440198990153945e-07, + "loss": 0.6468057632446289, + "step": 6505 + }, + { + "epoch": 1.4990783410138249, + "grad_norm": 1.2139801652485203, + "learning_rate": 3.241211509205638e-07, + "loss": 0.7739330530166626, + "step": 6506 + }, + { + "epoch": 1.4993087557603686, + "grad_norm": 1.3659144717848737, + "learning_rate": 3.238404100425085e-07, + "loss": 0.8205568790435791, + "step": 6507 + }, + { + "epoch": 1.4995391705069125, + "grad_norm": 0.958982052367848, + "learning_rate": 3.235597673081227e-07, + "loss": 0.667822003364563, + "step": 6508 + }, + { + "epoch": 1.4997695852534563, + "grad_norm": 1.2374356667574686, + "learning_rate": 3.232792227581409e-07, + "loss": 0.7829990386962891, + "step": 6509 + }, + { + "epoch": 1.5, + "grad_norm": 1.1404525757399535, + "learning_rate": 3.229987764332843e-07, + "loss": 0.768509566783905, + "step": 6510 + }, + { + "epoch": 1.5002304147465437, + "grad_norm": 1.3651547247057954, + "learning_rate": 3.227184283742591e-07, + "loss": 0.8448585867881775, + "step": 6511 + }, + { + "epoch": 1.5004608294930875, + "grad_norm": 1.2722097281432705, + "learning_rate": 3.2243817862175705e-07, + "loss": 0.6929391622543335, + "step": 6512 + }, + { + "epoch": 1.5006912442396314, + "grad_norm": 0.8983294061831201, + "learning_rate": 3.221580272164567e-07, + "loss": 0.6453005075454712, + "step": 6513 + }, + { + "epoch": 1.5009216589861751, + "grad_norm": 1.135934251126359, + "learning_rate": 3.2187797419902143e-07, + "loss": 0.7870811820030212, + "step": 6514 + }, + { + "epoch": 1.5011520737327189, + "grad_norm": 1.264885386654941, + "learning_rate": 3.2159801961010013e-07, + "loss": 0.7032002210617065, + "step": 6515 + }, + { + "epoch": 1.5013824884792628, + "grad_norm": 1.5122369312915371, + "learning_rate": 3.213181634903285e-07, + "loss": 0.8018448352813721, + "step": 6516 + }, + { + "epoch": 1.5016129032258063, + "grad_norm": 1.0930874016239036, + "learning_rate": 3.2103840588032707e-07, + "loss": 0.7066134810447693, + "step": 6517 + }, + { + "epoch": 1.5018433179723503, + "grad_norm": 1.049874936950677, + "learning_rate": 3.207587468207018e-07, + "loss": 0.6835265159606934, + "step": 6518 + }, + { + "epoch": 1.502073732718894, + "grad_norm": 1.1994114231897615, + "learning_rate": 3.204791863520455e-07, + "loss": 0.6679749488830566, + "step": 6519 + }, + { + "epoch": 1.5023041474654377, + "grad_norm": 1.1780261658003046, + "learning_rate": 3.201997245149358e-07, + "loss": 0.781232476234436, + "step": 6520 + }, + { + "epoch": 1.5025345622119817, + "grad_norm": 1.156188659495686, + "learning_rate": 3.1992036134993616e-07, + "loss": 0.7853572368621826, + "step": 6521 + }, + { + "epoch": 1.5027649769585254, + "grad_norm": 1.3156565650023675, + "learning_rate": 3.1964109689759576e-07, + "loss": 0.8220832943916321, + "step": 6522 + }, + { + "epoch": 1.5029953917050691, + "grad_norm": 1.0874952614272322, + "learning_rate": 3.193619311984491e-07, + "loss": 0.8046013116836548, + "step": 6523 + }, + { + "epoch": 1.5032258064516129, + "grad_norm": 1.1481673715256613, + "learning_rate": 3.190828642930174e-07, + "loss": 0.7123414874076843, + "step": 6524 + }, + { + "epoch": 1.5034562211981566, + "grad_norm": 1.2507360463805697, + "learning_rate": 3.188038962218066e-07, + "loss": 0.7913625240325928, + "step": 6525 + }, + { + "epoch": 1.5036866359447005, + "grad_norm": 1.2264479129016654, + "learning_rate": 3.185250270253081e-07, + "loss": 0.7837327718734741, + "step": 6526 + }, + { + "epoch": 1.5039170506912443, + "grad_norm": 1.3223188543102071, + "learning_rate": 3.182462567440002e-07, + "loss": 0.7799992561340332, + "step": 6527 + }, + { + "epoch": 1.504147465437788, + "grad_norm": 1.2906027927929307, + "learning_rate": 3.1796758541834545e-07, + "loss": 0.8591268062591553, + "step": 6528 + }, + { + "epoch": 1.504377880184332, + "grad_norm": 1.1175058933428492, + "learning_rate": 3.176890130887926e-07, + "loss": 0.6886378526687622, + "step": 6529 + }, + { + "epoch": 1.5046082949308754, + "grad_norm": 1.4969255628781877, + "learning_rate": 3.1741053979577647e-07, + "loss": 0.8641641139984131, + "step": 6530 + }, + { + "epoch": 1.5048387096774194, + "grad_norm": 1.3022265823882768, + "learning_rate": 3.1713216557971687e-07, + "loss": 0.8215552568435669, + "step": 6531 + }, + { + "epoch": 1.5050691244239631, + "grad_norm": 1.332125606212464, + "learning_rate": 3.1685389048101906e-07, + "loss": 0.8506371974945068, + "step": 6532 + }, + { + "epoch": 1.5052995391705069, + "grad_norm": 1.371517957091787, + "learning_rate": 3.1657571454007515e-07, + "loss": 0.740912675857544, + "step": 6533 + }, + { + "epoch": 1.5055299539170508, + "grad_norm": 1.0380741302125553, + "learning_rate": 3.162976377972614e-07, + "loss": 0.6458308696746826, + "step": 6534 + }, + { + "epoch": 1.5057603686635943, + "grad_norm": 1.0737980819278299, + "learning_rate": 3.1601966029294013e-07, + "loss": 0.7368316650390625, + "step": 6535 + }, + { + "epoch": 1.5059907834101383, + "grad_norm": 1.1008143995933475, + "learning_rate": 3.1574178206746003e-07, + "loss": 0.6648637056350708, + "step": 6536 + }, + { + "epoch": 1.506221198156682, + "grad_norm": 1.2751679142768328, + "learning_rate": 3.154640031611544e-07, + "loss": 0.706688404083252, + "step": 6537 + }, + { + "epoch": 1.5064516129032257, + "grad_norm": 1.0597131508477158, + "learning_rate": 3.1518632361434263e-07, + "loss": 0.722059965133667, + "step": 6538 + }, + { + "epoch": 1.5066820276497697, + "grad_norm": 1.1420297201861054, + "learning_rate": 3.14908743467329e-07, + "loss": 0.7098807096481323, + "step": 6539 + }, + { + "epoch": 1.5069124423963134, + "grad_norm": 1.1123804283277692, + "learning_rate": 3.1463126276040454e-07, + "loss": 0.7131781578063965, + "step": 6540 + }, + { + "epoch": 1.5071428571428571, + "grad_norm": 0.757735402153, + "learning_rate": 3.143538815338451e-07, + "loss": 0.7292109727859497, + "step": 6541 + }, + { + "epoch": 1.507373271889401, + "grad_norm": 1.1145586582073062, + "learning_rate": 3.1407659982791204e-07, + "loss": 0.7305347919464111, + "step": 6542 + }, + { + "epoch": 1.5076036866359446, + "grad_norm": 1.3246030999705258, + "learning_rate": 3.1379941768285247e-07, + "loss": 0.8072094321250916, + "step": 6543 + }, + { + "epoch": 1.5078341013824885, + "grad_norm": 1.2831968996332677, + "learning_rate": 3.135223351388987e-07, + "loss": 0.8772450685501099, + "step": 6544 + }, + { + "epoch": 1.5080645161290323, + "grad_norm": 1.1816139196453221, + "learning_rate": 3.1324535223626957e-07, + "loss": 0.8463687896728516, + "step": 6545 + }, + { + "epoch": 1.508294930875576, + "grad_norm": 1.1937564350019036, + "learning_rate": 3.1296846901516806e-07, + "loss": 0.6764696836471558, + "step": 6546 + }, + { + "epoch": 1.50852534562212, + "grad_norm": 1.198918569491841, + "learning_rate": 3.126916855157841e-07, + "loss": 0.8395411968231201, + "step": 6547 + }, + { + "epoch": 1.5087557603686634, + "grad_norm": 1.0607235882989698, + "learning_rate": 3.1241500177829195e-07, + "loss": 0.8227219581604004, + "step": 6548 + }, + { + "epoch": 1.5089861751152074, + "grad_norm": 1.1677688606359355, + "learning_rate": 3.121384178428519e-07, + "loss": 0.7079675197601318, + "step": 6549 + }, + { + "epoch": 1.5092165898617511, + "grad_norm": 1.2218836381096956, + "learning_rate": 3.1186193374961014e-07, + "loss": 0.7792578935623169, + "step": 6550 + }, + { + "epoch": 1.5094470046082948, + "grad_norm": 1.403777710630671, + "learning_rate": 3.1158554953869776e-07, + "loss": 0.7821195125579834, + "step": 6551 + }, + { + "epoch": 1.5096774193548388, + "grad_norm": 1.0979873084769438, + "learning_rate": 3.1130926525023114e-07, + "loss": 0.6640183329582214, + "step": 6552 + }, + { + "epoch": 1.5099078341013825, + "grad_norm": 1.4504991573195685, + "learning_rate": 3.110330809243134e-07, + "loss": 0.8087342977523804, + "step": 6553 + }, + { + "epoch": 1.5101382488479262, + "grad_norm": 1.283455986462282, + "learning_rate": 3.1075699660103184e-07, + "loss": 0.7716038227081299, + "step": 6554 + }, + { + "epoch": 1.5103686635944702, + "grad_norm": 1.3208739442859028, + "learning_rate": 3.1048101232045943e-07, + "loss": 0.8408910632133484, + "step": 6555 + }, + { + "epoch": 1.5105990783410137, + "grad_norm": 1.3122071557376964, + "learning_rate": 3.1020512812265564e-07, + "loss": 0.8799750804901123, + "step": 6556 + }, + { + "epoch": 1.5108294930875577, + "grad_norm": 1.0380590960697713, + "learning_rate": 3.0992934404766426e-07, + "loss": 0.6534945368766785, + "step": 6557 + }, + { + "epoch": 1.5110599078341014, + "grad_norm": 1.2848519038217197, + "learning_rate": 3.0965366013551507e-07, + "loss": 0.8293032646179199, + "step": 6558 + }, + { + "epoch": 1.511290322580645, + "grad_norm": 1.2211391336842954, + "learning_rate": 3.0937807642622295e-07, + "loss": 0.7486997842788696, + "step": 6559 + }, + { + "epoch": 1.511520737327189, + "grad_norm": 0.9417803138002018, + "learning_rate": 3.0910259295978914e-07, + "loss": 0.7431119680404663, + "step": 6560 + }, + { + "epoch": 1.5117511520737326, + "grad_norm": 1.255174714268565, + "learning_rate": 3.0882720977619927e-07, + "loss": 0.945859432220459, + "step": 6561 + }, + { + "epoch": 1.5119815668202765, + "grad_norm": 1.3772354918121292, + "learning_rate": 3.0855192691542487e-07, + "loss": 0.7894721031188965, + "step": 6562 + }, + { + "epoch": 1.5122119815668202, + "grad_norm": 1.59783124946433, + "learning_rate": 3.082767444174229e-07, + "loss": 0.8593465089797974, + "step": 6563 + }, + { + "epoch": 1.512442396313364, + "grad_norm": 1.1763179818399963, + "learning_rate": 3.080016623221355e-07, + "loss": 0.8297405242919922, + "step": 6564 + }, + { + "epoch": 1.512672811059908, + "grad_norm": 1.2146736952008175, + "learning_rate": 3.07726680669491e-07, + "loss": 0.7372928857803345, + "step": 6565 + }, + { + "epoch": 1.5129032258064516, + "grad_norm": 0.987456734373793, + "learning_rate": 3.0745179949940235e-07, + "loss": 0.7031347751617432, + "step": 6566 + }, + { + "epoch": 1.5131336405529954, + "grad_norm": 0.9788253167457012, + "learning_rate": 3.071770188517679e-07, + "loss": 0.7086467742919922, + "step": 6567 + }, + { + "epoch": 1.5133640552995393, + "grad_norm": 1.2854493361240282, + "learning_rate": 3.069023387664723e-07, + "loss": 0.9091345071792603, + "step": 6568 + }, + { + "epoch": 1.5135944700460828, + "grad_norm": 1.3979394006170445, + "learning_rate": 3.066277592833847e-07, + "loss": 0.7470624446868896, + "step": 6569 + }, + { + "epoch": 1.5138248847926268, + "grad_norm": 1.2458050386964743, + "learning_rate": 3.0635328044235965e-07, + "loss": 0.75694739818573, + "step": 6570 + }, + { + "epoch": 1.5140552995391705, + "grad_norm": 1.1257752667184633, + "learning_rate": 3.0607890228323796e-07, + "loss": 0.7832024693489075, + "step": 6571 + }, + { + "epoch": 1.5142857142857142, + "grad_norm": 1.4206979397737705, + "learning_rate": 3.0580462484584455e-07, + "loss": 0.6777220368385315, + "step": 6572 + }, + { + "epoch": 1.5145161290322582, + "grad_norm": 1.1010797667803915, + "learning_rate": 3.055304481699913e-07, + "loss": 0.7748236060142517, + "step": 6573 + }, + { + "epoch": 1.5147465437788017, + "grad_norm": 1.1639246159957346, + "learning_rate": 3.052563722954741e-07, + "loss": 0.7495633363723755, + "step": 6574 + }, + { + "epoch": 1.5149769585253456, + "grad_norm": 1.1319897669216112, + "learning_rate": 3.049823972620744e-07, + "loss": 0.8011484742164612, + "step": 6575 + }, + { + "epoch": 1.5152073732718894, + "grad_norm": 1.3878273723563577, + "learning_rate": 3.0470852310956e-07, + "loss": 0.7480140924453735, + "step": 6576 + }, + { + "epoch": 1.515437788018433, + "grad_norm": 1.1963673851290149, + "learning_rate": 3.0443474987768305e-07, + "loss": 0.6561319828033447, + "step": 6577 + }, + { + "epoch": 1.515668202764977, + "grad_norm": 1.1887729560806304, + "learning_rate": 3.041610776061813e-07, + "loss": 0.7437188029289246, + "step": 6578 + }, + { + "epoch": 1.5158986175115208, + "grad_norm": 1.2420532978964127, + "learning_rate": 3.0388750633477766e-07, + "loss": 0.7429096698760986, + "step": 6579 + }, + { + "epoch": 1.5161290322580645, + "grad_norm": 1.3505114972693866, + "learning_rate": 3.0361403610318125e-07, + "loss": 0.859411358833313, + "step": 6580 + }, + { + "epoch": 1.5163594470046085, + "grad_norm": 0.9758931256825946, + "learning_rate": 3.0334066695108565e-07, + "loss": 0.7636305093765259, + "step": 6581 + }, + { + "epoch": 1.516589861751152, + "grad_norm": 1.1796162666849943, + "learning_rate": 3.030673989181699e-07, + "loss": 0.8331989049911499, + "step": 6582 + }, + { + "epoch": 1.516820276497696, + "grad_norm": 1.0763217337155384, + "learning_rate": 3.0279423204409857e-07, + "loss": 0.770574688911438, + "step": 6583 + }, + { + "epoch": 1.5170506912442396, + "grad_norm": 1.3524367915089308, + "learning_rate": 3.025211663685213e-07, + "loss": 0.7470898628234863, + "step": 6584 + }, + { + "epoch": 1.5172811059907834, + "grad_norm": 1.2515745730030696, + "learning_rate": 3.022482019310736e-07, + "loss": 0.7907510995864868, + "step": 6585 + }, + { + "epoch": 1.5175115207373273, + "grad_norm": 1.1087989572536945, + "learning_rate": 3.019753387713757e-07, + "loss": 0.751417338848114, + "step": 6586 + }, + { + "epoch": 1.5177419354838708, + "grad_norm": 1.3862652872284045, + "learning_rate": 3.01702576929033e-07, + "loss": 0.8987867832183838, + "step": 6587 + }, + { + "epoch": 1.5179723502304148, + "grad_norm": 1.2098170472034613, + "learning_rate": 3.0142991644363714e-07, + "loss": 0.7618268728256226, + "step": 6588 + }, + { + "epoch": 1.5182027649769585, + "grad_norm": 1.4029958928912587, + "learning_rate": 3.011573573547641e-07, + "loss": 0.9358207583427429, + "step": 6589 + }, + { + "epoch": 1.5184331797235022, + "grad_norm": 1.4434031985489326, + "learning_rate": 3.008848997019753e-07, + "loss": 0.6549144387245178, + "step": 6590 + }, + { + "epoch": 1.5186635944700462, + "grad_norm": 1.293720092884626, + "learning_rate": 3.00612543524818e-07, + "loss": 0.8642100095748901, + "step": 6591 + }, + { + "epoch": 1.51889400921659, + "grad_norm": 1.2852982676947153, + "learning_rate": 3.003402888628241e-07, + "loss": 0.7348824143409729, + "step": 6592 + }, + { + "epoch": 1.5191244239631336, + "grad_norm": 1.0897732641421132, + "learning_rate": 3.000681357555108e-07, + "loss": 0.8737039566040039, + "step": 6593 + }, + { + "epoch": 1.5193548387096776, + "grad_norm": 1.3095413820866733, + "learning_rate": 2.9979608424238134e-07, + "loss": 0.749860405921936, + "step": 6594 + }, + { + "epoch": 1.519585253456221, + "grad_norm": 1.4291988493830527, + "learning_rate": 2.99524134362923e-07, + "loss": 0.7583779096603394, + "step": 6595 + }, + { + "epoch": 1.519815668202765, + "grad_norm": 1.1886499728868618, + "learning_rate": 2.992522861566095e-07, + "loss": 0.7096224427223206, + "step": 6596 + }, + { + "epoch": 1.5200460829493088, + "grad_norm": 1.3265073494412316, + "learning_rate": 2.9898053966289904e-07, + "loss": 0.7813585996627808, + "step": 6597 + }, + { + "epoch": 1.5202764976958525, + "grad_norm": 1.3753919073529044, + "learning_rate": 2.9870889492123517e-07, + "loss": 0.7744605541229248, + "step": 6598 + }, + { + "epoch": 1.5205069124423964, + "grad_norm": 1.4661404938087315, + "learning_rate": 2.984373519710469e-07, + "loss": 0.8398552536964417, + "step": 6599 + }, + { + "epoch": 1.52073732718894, + "grad_norm": 1.1837780856173943, + "learning_rate": 2.981659108517478e-07, + "loss": 0.6853294372558594, + "step": 6600 + }, + { + "epoch": 1.520967741935484, + "grad_norm": 0.9892560165373243, + "learning_rate": 2.97894571602738e-07, + "loss": 0.7673987150192261, + "step": 6601 + }, + { + "epoch": 1.5211981566820276, + "grad_norm": 1.0638042713840496, + "learning_rate": 2.976233342634017e-07, + "loss": 0.7000377774238586, + "step": 6602 + }, + { + "epoch": 1.5214285714285714, + "grad_norm": 1.2089273111808856, + "learning_rate": 2.9735219887310857e-07, + "loss": 0.8429346680641174, + "step": 6603 + }, + { + "epoch": 1.5216589861751153, + "grad_norm": 1.4255685153178952, + "learning_rate": 2.970811654712133e-07, + "loss": 0.9118648767471313, + "step": 6604 + }, + { + "epoch": 1.521889400921659, + "grad_norm": 1.0974145188834663, + "learning_rate": 2.9681023409705666e-07, + "loss": 0.7745784521102905, + "step": 6605 + }, + { + "epoch": 1.5221198156682028, + "grad_norm": 1.234720575381531, + "learning_rate": 2.9653940478996367e-07, + "loss": 0.8481245040893555, + "step": 6606 + }, + { + "epoch": 1.5223502304147467, + "grad_norm": 1.1446582960275502, + "learning_rate": 2.9626867758924436e-07, + "loss": 0.8643463850021362, + "step": 6607 + }, + { + "epoch": 1.5225806451612902, + "grad_norm": 1.6406368897457513, + "learning_rate": 2.959980525341953e-07, + "loss": 0.9524952173233032, + "step": 6608 + }, + { + "epoch": 1.5228110599078342, + "grad_norm": 1.067119300713527, + "learning_rate": 2.9572752966409686e-07, + "loss": 0.7153829336166382, + "step": 6609 + }, + { + "epoch": 1.523041474654378, + "grad_norm": 1.1739681134356785, + "learning_rate": 2.954571090182149e-07, + "loss": 0.8332774639129639, + "step": 6610 + }, + { + "epoch": 1.5232718894009216, + "grad_norm": 1.3773090684366749, + "learning_rate": 2.9518679063580123e-07, + "loss": 0.7511743307113647, + "step": 6611 + }, + { + "epoch": 1.5235023041474656, + "grad_norm": 1.2327774867248482, + "learning_rate": 2.9491657455609175e-07, + "loss": 0.715233325958252, + "step": 6612 + }, + { + "epoch": 1.523732718894009, + "grad_norm": 1.139323635074032, + "learning_rate": 2.946464608183078e-07, + "loss": 0.7386246919631958, + "step": 6613 + }, + { + "epoch": 1.523963133640553, + "grad_norm": 1.1904592003911236, + "learning_rate": 2.943764494616565e-07, + "loss": 0.8337790369987488, + "step": 6614 + }, + { + "epoch": 1.5241935483870968, + "grad_norm": 1.394927398157402, + "learning_rate": 2.941065405253296e-07, + "loss": 0.8447855710983276, + "step": 6615 + }, + { + "epoch": 1.5244239631336405, + "grad_norm": 1.1307960049130217, + "learning_rate": 2.938367340485035e-07, + "loss": 0.7430610060691833, + "step": 6616 + }, + { + "epoch": 1.5246543778801844, + "grad_norm": 1.134552871583557, + "learning_rate": 2.9356703007034087e-07, + "loss": 0.7740806937217712, + "step": 6617 + }, + { + "epoch": 1.5248847926267282, + "grad_norm": 1.2516085920875086, + "learning_rate": 2.9329742862998875e-07, + "loss": 0.7824152708053589, + "step": 6618 + }, + { + "epoch": 1.5251152073732719, + "grad_norm": 1.0852675062610386, + "learning_rate": 2.930279297665792e-07, + "loss": 0.9222463965415955, + "step": 6619 + }, + { + "epoch": 1.5253456221198156, + "grad_norm": 1.8096931577931101, + "learning_rate": 2.927585335192294e-07, + "loss": 0.9548497200012207, + "step": 6620 + }, + { + "epoch": 1.5255760368663593, + "grad_norm": 1.497275795232007, + "learning_rate": 2.9248923992704255e-07, + "loss": 0.9007906913757324, + "step": 6621 + }, + { + "epoch": 1.5258064516129033, + "grad_norm": 1.0647051889661132, + "learning_rate": 2.9222004902910593e-07, + "loss": 0.6932169198989868, + "step": 6622 + }, + { + "epoch": 1.526036866359447, + "grad_norm": 0.9763599663388729, + "learning_rate": 2.919509608644922e-07, + "loss": 0.7327853441238403, + "step": 6623 + }, + { + "epoch": 1.5262672811059907, + "grad_norm": 1.423305414970627, + "learning_rate": 2.916819754722588e-07, + "loss": 0.617963433265686, + "step": 6624 + }, + { + "epoch": 1.5264976958525347, + "grad_norm": 1.3790687935494703, + "learning_rate": 2.914130928914493e-07, + "loss": 1.0567349195480347, + "step": 6625 + }, + { + "epoch": 1.5267281105990782, + "grad_norm": 1.243824261339929, + "learning_rate": 2.9114431316109145e-07, + "loss": 0.7362378835678101, + "step": 6626 + }, + { + "epoch": 1.5269585253456222, + "grad_norm": 1.1636178458595106, + "learning_rate": 2.9087563632019774e-07, + "loss": 0.6879991888999939, + "step": 6627 + }, + { + "epoch": 1.5271889400921659, + "grad_norm": 1.2540530060828472, + "learning_rate": 2.9060706240776686e-07, + "loss": 0.7804177403450012, + "step": 6628 + }, + { + "epoch": 1.5274193548387096, + "grad_norm": 1.2450061818881997, + "learning_rate": 2.9033859146278197e-07, + "loss": 0.7459548711776733, + "step": 6629 + }, + { + "epoch": 1.5276497695852536, + "grad_norm": 1.1214229491247267, + "learning_rate": 2.900702235242106e-07, + "loss": 0.7392233610153198, + "step": 6630 + }, + { + "epoch": 1.5278801843317973, + "grad_norm": 1.0862664338119448, + "learning_rate": 2.8980195863100675e-07, + "loss": 0.6956135034561157, + "step": 6631 + }, + { + "epoch": 1.528110599078341, + "grad_norm": 1.1232709572579735, + "learning_rate": 2.8953379682210856e-07, + "loss": 0.7042561769485474, + "step": 6632 + }, + { + "epoch": 1.5283410138248847, + "grad_norm": 1.070241779197473, + "learning_rate": 2.8926573813643884e-07, + "loss": 0.7114298343658447, + "step": 6633 + }, + { + "epoch": 1.5285714285714285, + "grad_norm": 1.0297537166419386, + "learning_rate": 2.8899778261290664e-07, + "loss": 0.862826943397522, + "step": 6634 + }, + { + "epoch": 1.5288018433179724, + "grad_norm": 1.3240716498057261, + "learning_rate": 2.8872993029040506e-07, + "loss": 0.8229889869689941, + "step": 6635 + }, + { + "epoch": 1.5290322580645161, + "grad_norm": 1.2292174291080764, + "learning_rate": 2.884621812078122e-07, + "loss": 0.8058778047561646, + "step": 6636 + }, + { + "epoch": 1.5292626728110599, + "grad_norm": 1.2782782809475366, + "learning_rate": 2.881945354039921e-07, + "loss": 0.8150385618209839, + "step": 6637 + }, + { + "epoch": 1.5294930875576038, + "grad_norm": 1.1137449533588037, + "learning_rate": 2.8792699291779276e-07, + "loss": 0.7067136168479919, + "step": 6638 + }, + { + "epoch": 1.5297235023041473, + "grad_norm": 1.2793329729310776, + "learning_rate": 2.8765955378804784e-07, + "loss": 0.7725155353546143, + "step": 6639 + }, + { + "epoch": 1.5299539170506913, + "grad_norm": 1.0584861581127705, + "learning_rate": 2.873922180535754e-07, + "loss": 0.5956720113754272, + "step": 6640 + }, + { + "epoch": 1.530184331797235, + "grad_norm": 1.1955034677005214, + "learning_rate": 2.8712498575317934e-07, + "loss": 0.6506170630455017, + "step": 6641 + }, + { + "epoch": 1.5304147465437787, + "grad_norm": 1.0781697188392338, + "learning_rate": 2.86857856925648e-07, + "loss": 0.7860926985740662, + "step": 6642 + }, + { + "epoch": 1.5306451612903227, + "grad_norm": 1.1840723689685375, + "learning_rate": 2.8659083160975464e-07, + "loss": 0.7003993391990662, + "step": 6643 + }, + { + "epoch": 1.5308755760368664, + "grad_norm": 1.1562706768971642, + "learning_rate": 2.8632390984425746e-07, + "loss": 0.6887079477310181, + "step": 6644 + }, + { + "epoch": 1.5311059907834101, + "grad_norm": 1.243117329825752, + "learning_rate": 2.860570916678998e-07, + "loss": 0.788282036781311, + "step": 6645 + }, + { + "epoch": 1.5313364055299539, + "grad_norm": 1.273283187040626, + "learning_rate": 2.8579037711941043e-07, + "loss": 0.771350085735321, + "step": 6646 + }, + { + "epoch": 1.5315668202764976, + "grad_norm": 1.1000030346921834, + "learning_rate": 2.855237662375021e-07, + "loss": 0.6418509483337402, + "step": 6647 + }, + { + "epoch": 1.5317972350230415, + "grad_norm": 1.022873677691871, + "learning_rate": 2.852572590608735e-07, + "loss": 0.6606692671775818, + "step": 6648 + }, + { + "epoch": 1.5320276497695853, + "grad_norm": 1.4727879897773712, + "learning_rate": 2.849908556282076e-07, + "loss": 0.8623934984207153, + "step": 6649 + }, + { + "epoch": 1.532258064516129, + "grad_norm": 1.1678986803146219, + "learning_rate": 2.8472455597817215e-07, + "loss": 0.848737359046936, + "step": 6650 + }, + { + "epoch": 1.532488479262673, + "grad_norm": 1.2265451299303025, + "learning_rate": 2.844583601494207e-07, + "loss": 0.7156505584716797, + "step": 6651 + }, + { + "epoch": 1.5327188940092165, + "grad_norm": 1.157360063816448, + "learning_rate": 2.8419226818059116e-07, + "loss": 0.598319411277771, + "step": 6652 + }, + { + "epoch": 1.5329493087557604, + "grad_norm": 1.0128877845083564, + "learning_rate": 2.8392628011030585e-07, + "loss": 0.6320680379867554, + "step": 6653 + }, + { + "epoch": 1.5331797235023041, + "grad_norm": 1.2437383042471344, + "learning_rate": 2.836603959771734e-07, + "loss": 0.8770536184310913, + "step": 6654 + }, + { + "epoch": 1.5334101382488479, + "grad_norm": 1.3327586940769975, + "learning_rate": 2.833946158197862e-07, + "loss": 0.896265983581543, + "step": 6655 + }, + { + "epoch": 1.5336405529953918, + "grad_norm": 1.1058301341236145, + "learning_rate": 2.8312893967672145e-07, + "loss": 0.7194868326187134, + "step": 6656 + }, + { + "epoch": 1.5338709677419353, + "grad_norm": 1.1479450761132848, + "learning_rate": 2.828633675865425e-07, + "loss": 0.7993383407592773, + "step": 6657 + }, + { + "epoch": 1.5341013824884793, + "grad_norm": 1.3252275312162691, + "learning_rate": 2.8259789958779635e-07, + "loss": 0.6808127760887146, + "step": 6658 + }, + { + "epoch": 1.534331797235023, + "grad_norm": 1.3083456260381565, + "learning_rate": 2.823325357190153e-07, + "loss": 0.7348822355270386, + "step": 6659 + }, + { + "epoch": 1.5345622119815667, + "grad_norm": 1.4520629186425333, + "learning_rate": 2.820672760187166e-07, + "loss": 0.7729920744895935, + "step": 6660 + }, + { + "epoch": 1.5347926267281107, + "grad_norm": 1.1927593175103235, + "learning_rate": 2.818021205254021e-07, + "loss": 0.803922176361084, + "step": 6661 + }, + { + "epoch": 1.5350230414746544, + "grad_norm": 1.1316086785563555, + "learning_rate": 2.815370692775594e-07, + "loss": 0.7931007146835327, + "step": 6662 + }, + { + "epoch": 1.5352534562211981, + "grad_norm": 0.9381855495475373, + "learning_rate": 2.8127212231365995e-07, + "loss": 0.7990511655807495, + "step": 6663 + }, + { + "epoch": 1.535483870967742, + "grad_norm": 1.1449374360466444, + "learning_rate": 2.8100727967216043e-07, + "loss": 0.8163471817970276, + "step": 6664 + }, + { + "epoch": 1.5357142857142856, + "grad_norm": 1.126530672311672, + "learning_rate": 2.8074254139150225e-07, + "loss": 0.7628358602523804, + "step": 6665 + }, + { + "epoch": 1.5359447004608295, + "grad_norm": 1.216707261403855, + "learning_rate": 2.8047790751011216e-07, + "loss": 0.8008173704147339, + "step": 6666 + }, + { + "epoch": 1.5361751152073733, + "grad_norm": 1.4385072008960633, + "learning_rate": 2.802133780664013e-07, + "loss": 0.9139487743377686, + "step": 6667 + }, + { + "epoch": 1.536405529953917, + "grad_norm": 1.479452922561271, + "learning_rate": 2.7994895309876555e-07, + "loss": 0.9436901211738586, + "step": 6668 + }, + { + "epoch": 1.536635944700461, + "grad_norm": 1.1137684825301204, + "learning_rate": 2.7968463264558617e-07, + "loss": 0.8072221875190735, + "step": 6669 + }, + { + "epoch": 1.5368663594470044, + "grad_norm": 1.4031563621096825, + "learning_rate": 2.7942041674522866e-07, + "loss": 0.7434822916984558, + "step": 6670 + }, + { + "epoch": 1.5370967741935484, + "grad_norm": 1.1245525381043615, + "learning_rate": 2.7915630543604394e-07, + "loss": 0.6729850769042969, + "step": 6671 + }, + { + "epoch": 1.5373271889400921, + "grad_norm": 1.2279789151687839, + "learning_rate": 2.7889229875636723e-07, + "loss": 0.8752315044403076, + "step": 6672 + }, + { + "epoch": 1.5375576036866359, + "grad_norm": 1.2125823370266373, + "learning_rate": 2.786283967445184e-07, + "loss": 0.8519413471221924, + "step": 6673 + }, + { + "epoch": 1.5377880184331798, + "grad_norm": 1.2674824603159123, + "learning_rate": 2.783645994388032e-07, + "loss": 0.8868448734283447, + "step": 6674 + }, + { + "epoch": 1.5380184331797235, + "grad_norm": 1.2984993367707722, + "learning_rate": 2.78100906877511e-07, + "loss": 0.9223456978797913, + "step": 6675 + }, + { + "epoch": 1.5382488479262673, + "grad_norm": 1.0080180068423799, + "learning_rate": 2.7783731909891616e-07, + "loss": 0.799191951751709, + "step": 6676 + }, + { + "epoch": 1.5384792626728112, + "grad_norm": 1.1987572506109172, + "learning_rate": 2.775738361412788e-07, + "loss": 0.7092995643615723, + "step": 6677 + }, + { + "epoch": 1.5387096774193547, + "grad_norm": 1.2206610409098804, + "learning_rate": 2.7731045804284283e-07, + "loss": 0.674687385559082, + "step": 6678 + }, + { + "epoch": 1.5389400921658987, + "grad_norm": 1.4910052625734944, + "learning_rate": 2.77047184841837e-07, + "loss": 0.7366930246353149, + "step": 6679 + }, + { + "epoch": 1.5391705069124424, + "grad_norm": 1.169385374165895, + "learning_rate": 2.767840165764753e-07, + "loss": 0.838137149810791, + "step": 6680 + }, + { + "epoch": 1.5394009216589861, + "grad_norm": 1.2120746756764942, + "learning_rate": 2.765209532849558e-07, + "loss": 0.7507175803184509, + "step": 6681 + }, + { + "epoch": 1.53963133640553, + "grad_norm": 1.2981666739842812, + "learning_rate": 2.7625799500546267e-07, + "loss": 0.8157602548599243, + "step": 6682 + }, + { + "epoch": 1.5398617511520736, + "grad_norm": 1.2345607869860449, + "learning_rate": 2.7599514177616333e-07, + "loss": 0.7779219150543213, + "step": 6683 + }, + { + "epoch": 1.5400921658986175, + "grad_norm": 1.186692939443946, + "learning_rate": 2.757323936352106e-07, + "loss": 0.8261638879776001, + "step": 6684 + }, + { + "epoch": 1.5403225806451613, + "grad_norm": 0.8917527422638705, + "learning_rate": 2.7546975062074197e-07, + "loss": 0.6139177680015564, + "step": 6685 + }, + { + "epoch": 1.540552995391705, + "grad_norm": 1.0945474995666544, + "learning_rate": 2.752072127708802e-07, + "loss": 0.744202733039856, + "step": 6686 + }, + { + "epoch": 1.540783410138249, + "grad_norm": 1.279582503351568, + "learning_rate": 2.749447801237319e-07, + "loss": 0.7685158848762512, + "step": 6687 + }, + { + "epoch": 1.5410138248847927, + "grad_norm": 1.4134776465364736, + "learning_rate": 2.7468245271738865e-07, + "loss": 0.7483633756637573, + "step": 6688 + }, + { + "epoch": 1.5412442396313364, + "grad_norm": 1.4452963556936742, + "learning_rate": 2.7442023058992746e-07, + "loss": 0.8967286348342896, + "step": 6689 + }, + { + "epoch": 1.5414746543778803, + "grad_norm": 3.4447797406152922, + "learning_rate": 2.7415811377940933e-07, + "loss": 0.8035085201263428, + "step": 6690 + }, + { + "epoch": 1.5417050691244238, + "grad_norm": 1.2535208224880003, + "learning_rate": 2.738961023238798e-07, + "loss": 0.8504149913787842, + "step": 6691 + }, + { + "epoch": 1.5419354838709678, + "grad_norm": 1.408249398601243, + "learning_rate": 2.736341962613701e-07, + "loss": 0.7612431049346924, + "step": 6692 + }, + { + "epoch": 1.5421658986175115, + "grad_norm": 1.3117649202054886, + "learning_rate": 2.733723956298951e-07, + "loss": 0.6974390745162964, + "step": 6693 + }, + { + "epoch": 1.5423963133640552, + "grad_norm": 1.10015572050179, + "learning_rate": 2.7311070046745476e-07, + "loss": 0.7946817874908447, + "step": 6694 + }, + { + "epoch": 1.5426267281105992, + "grad_norm": 1.3598767034128523, + "learning_rate": 2.728491108120342e-07, + "loss": 0.7801793813705444, + "step": 6695 + }, + { + "epoch": 1.5428571428571427, + "grad_norm": 1.0989233619042245, + "learning_rate": 2.725876267016023e-07, + "loss": 0.720335066318512, + "step": 6696 + }, + { + "epoch": 1.5430875576036867, + "grad_norm": 0.9331707903973574, + "learning_rate": 2.7232624817411376e-07, + "loss": 0.6820393800735474, + "step": 6697 + }, + { + "epoch": 1.5433179723502304, + "grad_norm": 1.2636082158419006, + "learning_rate": 2.7206497526750694e-07, + "loss": 0.8217613697052002, + "step": 6698 + }, + { + "epoch": 1.543548387096774, + "grad_norm": 1.2388683954169015, + "learning_rate": 2.7180380801970525e-07, + "loss": 0.7600520849227905, + "step": 6699 + }, + { + "epoch": 1.543778801843318, + "grad_norm": 1.2564669684453122, + "learning_rate": 2.7154274646861687e-07, + "loss": 0.9402344226837158, + "step": 6700 + }, + { + "epoch": 1.5440092165898618, + "grad_norm": 1.0720415723340906, + "learning_rate": 2.7128179065213417e-07, + "loss": 0.7470760345458984, + "step": 6701 + }, + { + "epoch": 1.5442396313364055, + "grad_norm": 1.0091593723711232, + "learning_rate": 2.710209406081353e-07, + "loss": 0.6915948390960693, + "step": 6702 + }, + { + "epoch": 1.5444700460829495, + "grad_norm": 1.1829806437851378, + "learning_rate": 2.707601963744817e-07, + "loss": 0.7554904222488403, + "step": 6703 + }, + { + "epoch": 1.544700460829493, + "grad_norm": 0.9892324198221251, + "learning_rate": 2.7049955798902026e-07, + "loss": 0.8197575807571411, + "step": 6704 + }, + { + "epoch": 1.544930875576037, + "grad_norm": 1.3144339350992138, + "learning_rate": 2.702390254895819e-07, + "loss": 0.7106794118881226, + "step": 6705 + }, + { + "epoch": 1.5451612903225806, + "grad_norm": 1.1715761852419602, + "learning_rate": 2.699785989139832e-07, + "loss": 0.6320512294769287, + "step": 6706 + }, + { + "epoch": 1.5453917050691244, + "grad_norm": 1.2156391686389374, + "learning_rate": 2.697182783000246e-07, + "loss": 0.8327566385269165, + "step": 6707 + }, + { + "epoch": 1.5456221198156683, + "grad_norm": 1.2605126330062313, + "learning_rate": 2.6945806368549063e-07, + "loss": 0.8732178211212158, + "step": 6708 + }, + { + "epoch": 1.5458525345622118, + "grad_norm": 1.3881676599881438, + "learning_rate": 2.69197955108152e-07, + "loss": 0.8709380626678467, + "step": 6709 + }, + { + "epoch": 1.5460829493087558, + "grad_norm": 1.2029107229444744, + "learning_rate": 2.689379526057628e-07, + "loss": 0.7821739912033081, + "step": 6710 + }, + { + "epoch": 1.5463133640552995, + "grad_norm": 1.2268892680878298, + "learning_rate": 2.686780562160615e-07, + "loss": 0.8658162355422974, + "step": 6711 + }, + { + "epoch": 1.5465437788018432, + "grad_norm": 0.9914521746084854, + "learning_rate": 2.6841826597677274e-07, + "loss": 0.6354731321334839, + "step": 6712 + }, + { + "epoch": 1.5467741935483872, + "grad_norm": 1.132983970089502, + "learning_rate": 2.68158581925604e-07, + "loss": 0.8000082969665527, + "step": 6713 + }, + { + "epoch": 1.547004608294931, + "grad_norm": 1.0140012222754493, + "learning_rate": 2.6789900410024804e-07, + "loss": 0.7998030185699463, + "step": 6714 + }, + { + "epoch": 1.5472350230414746, + "grad_norm": 1.2207312006862205, + "learning_rate": 2.676395325383827e-07, + "loss": 0.861609935760498, + "step": 6715 + }, + { + "epoch": 1.5474654377880186, + "grad_norm": 1.2739007648131329, + "learning_rate": 2.6738016727766976e-07, + "loss": 0.8119577765464783, + "step": 6716 + }, + { + "epoch": 1.547695852534562, + "grad_norm": 1.1272023201701244, + "learning_rate": 2.671209083557553e-07, + "loss": 0.7704594135284424, + "step": 6717 + }, + { + "epoch": 1.547926267281106, + "grad_norm": 1.1924986504981143, + "learning_rate": 2.6686175581027114e-07, + "loss": 0.7577236890792847, + "step": 6718 + }, + { + "epoch": 1.5481566820276498, + "grad_norm": 1.438095427566863, + "learning_rate": 2.666027096788326e-07, + "loss": 0.8362265825271606, + "step": 6719 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 1.3282450269784174, + "learning_rate": 2.6634376999903984e-07, + "loss": 0.7604315280914307, + "step": 6720 + }, + { + "epoch": 1.5486175115207375, + "grad_norm": 1.0996855935996066, + "learning_rate": 2.6608493680847757e-07, + "loss": 0.7181323766708374, + "step": 6721 + }, + { + "epoch": 1.548847926267281, + "grad_norm": 1.408245929611007, + "learning_rate": 2.6582621014471495e-07, + "loss": 0.8613896369934082, + "step": 6722 + }, + { + "epoch": 1.549078341013825, + "grad_norm": 1.1355853758662044, + "learning_rate": 2.6556759004530616e-07, + "loss": 0.6254151463508606, + "step": 6723 + }, + { + "epoch": 1.5493087557603686, + "grad_norm": 1.1737642272227355, + "learning_rate": 2.6530907654778957e-07, + "loss": 0.7960973381996155, + "step": 6724 + }, + { + "epoch": 1.5495391705069124, + "grad_norm": 1.1419390810119388, + "learning_rate": 2.6505066968968747e-07, + "loss": 0.7899094820022583, + "step": 6725 + }, + { + "epoch": 1.5497695852534563, + "grad_norm": 0.9820941780775652, + "learning_rate": 2.647923695085081e-07, + "loss": 0.6578950881958008, + "step": 6726 + }, + { + "epoch": 1.55, + "grad_norm": 1.3013325638388529, + "learning_rate": 2.64534176041743e-07, + "loss": 0.737798810005188, + "step": 6727 + }, + { + "epoch": 1.5502304147465438, + "grad_norm": 0.9487414790323747, + "learning_rate": 2.642760893268684e-07, + "loss": 0.7809627056121826, + "step": 6728 + }, + { + "epoch": 1.5504608294930877, + "grad_norm": 0.9991258167716155, + "learning_rate": 2.640181094013456e-07, + "loss": 0.6693655252456665, + "step": 6729 + }, + { + "epoch": 1.5506912442396312, + "grad_norm": 0.8705752911958233, + "learning_rate": 2.6376023630262003e-07, + "loss": 0.7264609932899475, + "step": 6730 + }, + { + "epoch": 1.5509216589861752, + "grad_norm": 1.0975251127061347, + "learning_rate": 2.635024700681211e-07, + "loss": 0.7585712671279907, + "step": 6731 + }, + { + "epoch": 1.551152073732719, + "grad_norm": 1.520332751892112, + "learning_rate": 2.6324481073526404e-07, + "loss": 0.7335324287414551, + "step": 6732 + }, + { + "epoch": 1.5513824884792626, + "grad_norm": 1.1271215778218124, + "learning_rate": 2.629872583414473e-07, + "loss": 0.835372805595398, + "step": 6733 + }, + { + "epoch": 1.5516129032258066, + "grad_norm": 1.231737661164668, + "learning_rate": 2.6272981292405405e-07, + "loss": 0.8069926500320435, + "step": 6734 + }, + { + "epoch": 1.55184331797235, + "grad_norm": 1.2110282300687614, + "learning_rate": 2.6247247452045285e-07, + "loss": 0.7548434138298035, + "step": 6735 + }, + { + "epoch": 1.552073732718894, + "grad_norm": 1.281837931597139, + "learning_rate": 2.6221524316799546e-07, + "loss": 0.6907505989074707, + "step": 6736 + }, + { + "epoch": 1.5523041474654378, + "grad_norm": 1.2384070012918627, + "learning_rate": 2.619581189040185e-07, + "loss": 0.8544988632202148, + "step": 6737 + }, + { + "epoch": 1.5525345622119815, + "grad_norm": 1.024260684065218, + "learning_rate": 2.6170110176584404e-07, + "loss": 0.7176710367202759, + "step": 6738 + }, + { + "epoch": 1.5527649769585254, + "grad_norm": 1.1771656195687117, + "learning_rate": 2.6144419179077715e-07, + "loss": 0.7160323858261108, + "step": 6739 + }, + { + "epoch": 1.5529953917050692, + "grad_norm": 1.2619778254885654, + "learning_rate": 2.6118738901610806e-07, + "loss": 0.7749248743057251, + "step": 6740 + }, + { + "epoch": 1.553225806451613, + "grad_norm": 1.3014936029444653, + "learning_rate": 2.6093069347911145e-07, + "loss": 0.7701436281204224, + "step": 6741 + }, + { + "epoch": 1.5534562211981566, + "grad_norm": 1.2206842608778186, + "learning_rate": 2.606741052170459e-07, + "loss": 0.6725181341171265, + "step": 6742 + }, + { + "epoch": 1.5536866359447004, + "grad_norm": 1.0193653205430255, + "learning_rate": 2.6041762426715563e-07, + "loss": 0.7730624675750732, + "step": 6743 + }, + { + "epoch": 1.5539170506912443, + "grad_norm": 0.9417911057706564, + "learning_rate": 2.601612506666682e-07, + "loss": 0.7083867788314819, + "step": 6744 + }, + { + "epoch": 1.554147465437788, + "grad_norm": 1.1436343405561136, + "learning_rate": 2.599049844527953e-07, + "loss": 0.7680408954620361, + "step": 6745 + }, + { + "epoch": 1.5543778801843318, + "grad_norm": 0.9401611092461176, + "learning_rate": 2.596488256627346e-07, + "loss": 0.7145194411277771, + "step": 6746 + }, + { + "epoch": 1.5546082949308757, + "grad_norm": 1.6305632532659482, + "learning_rate": 2.593927743336667e-07, + "loss": 0.8626812696456909, + "step": 6747 + }, + { + "epoch": 1.5548387096774192, + "grad_norm": 1.1326626029703477, + "learning_rate": 2.591368305027569e-07, + "loss": 0.775201678276062, + "step": 6748 + }, + { + "epoch": 1.5550691244239632, + "grad_norm": 1.1775115850016065, + "learning_rate": 2.588809942071557e-07, + "loss": 0.9363858699798584, + "step": 6749 + }, + { + "epoch": 1.555299539170507, + "grad_norm": 1.0406152793499837, + "learning_rate": 2.5862526548399697e-07, + "loss": 0.8079385757446289, + "step": 6750 + }, + { + "epoch": 1.5555299539170506, + "grad_norm": 1.2405408742249928, + "learning_rate": 2.5836964437039934e-07, + "loss": 0.8635082840919495, + "step": 6751 + }, + { + "epoch": 1.5557603686635946, + "grad_norm": 1.072904507718934, + "learning_rate": 2.581141309034662e-07, + "loss": 0.7840827703475952, + "step": 6752 + }, + { + "epoch": 1.5559907834101383, + "grad_norm": 1.202200191511419, + "learning_rate": 2.5785872512028497e-07, + "loss": 0.7833336591720581, + "step": 6753 + }, + { + "epoch": 1.556221198156682, + "grad_norm": 1.2301348726534915, + "learning_rate": 2.576034270579269e-07, + "loss": 0.7340226173400879, + "step": 6754 + }, + { + "epoch": 1.5564516129032258, + "grad_norm": 0.9782804135142905, + "learning_rate": 2.5734823675344895e-07, + "loss": 0.6423541307449341, + "step": 6755 + }, + { + "epoch": 1.5566820276497695, + "grad_norm": 1.1992594758940591, + "learning_rate": 2.570931542438913e-07, + "loss": 0.7772454619407654, + "step": 6756 + }, + { + "epoch": 1.5569124423963134, + "grad_norm": 1.192101331643462, + "learning_rate": 2.568381795662785e-07, + "loss": 0.8113390803337097, + "step": 6757 + }, + { + "epoch": 1.5571428571428572, + "grad_norm": 1.1257023205339645, + "learning_rate": 2.5658331275762045e-07, + "loss": 0.6688467264175415, + "step": 6758 + }, + { + "epoch": 1.557373271889401, + "grad_norm": 1.0966214019602503, + "learning_rate": 2.5632855385491037e-07, + "loss": 0.8140766620635986, + "step": 6759 + }, + { + "epoch": 1.5576036866359448, + "grad_norm": 1.0260387911312179, + "learning_rate": 2.560739028951262e-07, + "loss": 0.7661154270172119, + "step": 6760 + }, + { + "epoch": 1.5578341013824883, + "grad_norm": 1.2298722431512563, + "learning_rate": 2.558193599152302e-07, + "loss": 0.6781749725341797, + "step": 6761 + }, + { + "epoch": 1.5580645161290323, + "grad_norm": 1.62266115954538, + "learning_rate": 2.5556492495216865e-07, + "loss": 0.8885331749916077, + "step": 6762 + }, + { + "epoch": 1.558294930875576, + "grad_norm": 1.3197551931331304, + "learning_rate": 2.55310598042873e-07, + "loss": 0.799277663230896, + "step": 6763 + }, + { + "epoch": 1.5585253456221198, + "grad_norm": 1.205426943239231, + "learning_rate": 2.550563792242583e-07, + "loss": 0.8288404941558838, + "step": 6764 + }, + { + "epoch": 1.5587557603686637, + "grad_norm": 1.1206026594489704, + "learning_rate": 2.5480226853322397e-07, + "loss": 0.9452340602874756, + "step": 6765 + }, + { + "epoch": 1.5589861751152074, + "grad_norm": 1.068059951967386, + "learning_rate": 2.5454826600665347e-07, + "loss": 0.6716231107711792, + "step": 6766 + }, + { + "epoch": 1.5592165898617512, + "grad_norm": 0.9885922984637816, + "learning_rate": 2.542943716814157e-07, + "loss": 0.90239417552948, + "step": 6767 + }, + { + "epoch": 1.5594470046082949, + "grad_norm": 1.306788685526263, + "learning_rate": 2.5404058559436225e-07, + "loss": 0.7895521521568298, + "step": 6768 + }, + { + "epoch": 1.5596774193548386, + "grad_norm": 1.1707304874415911, + "learning_rate": 2.537869077823307e-07, + "loss": 0.8097352385520935, + "step": 6769 + }, + { + "epoch": 1.5599078341013826, + "grad_norm": 1.2075274904697726, + "learning_rate": 2.535333382821415e-07, + "loss": 0.7599455118179321, + "step": 6770 + }, + { + "epoch": 1.5601382488479263, + "grad_norm": 1.3869678105449568, + "learning_rate": 2.5327987713059986e-07, + "loss": 0.8735921382904053, + "step": 6771 + }, + { + "epoch": 1.56036866359447, + "grad_norm": 1.3359870563601237, + "learning_rate": 2.530265243644958e-07, + "loss": 0.7263825535774231, + "step": 6772 + }, + { + "epoch": 1.560599078341014, + "grad_norm": 1.2240386443766704, + "learning_rate": 2.5277328002060296e-07, + "loss": 0.8642966747283936, + "step": 6773 + }, + { + "epoch": 1.5608294930875575, + "grad_norm": 0.9860249164323385, + "learning_rate": 2.525201441356789e-07, + "loss": 0.6928948163986206, + "step": 6774 + }, + { + "epoch": 1.5610599078341014, + "grad_norm": 1.0977742625281808, + "learning_rate": 2.522671167464667e-07, + "loss": 0.7841427326202393, + "step": 6775 + }, + { + "epoch": 1.5612903225806452, + "grad_norm": 1.102415991736206, + "learning_rate": 2.5201419788969267e-07, + "loss": 0.6539766192436218, + "step": 6776 + }, + { + "epoch": 1.5615207373271889, + "grad_norm": 1.3087388284847004, + "learning_rate": 2.5176138760206734e-07, + "loss": 0.7817956805229187, + "step": 6777 + }, + { + "epoch": 1.5617511520737328, + "grad_norm": 1.2176256508295467, + "learning_rate": 2.5150868592028626e-07, + "loss": 0.7847198843955994, + "step": 6778 + }, + { + "epoch": 1.5619815668202763, + "grad_norm": 1.1618742898915668, + "learning_rate": 2.5125609288102856e-07, + "loss": 0.8248952627182007, + "step": 6779 + }, + { + "epoch": 1.5622119815668203, + "grad_norm": 1.315546210832164, + "learning_rate": 2.510036085209578e-07, + "loss": 0.8099820613861084, + "step": 6780 + }, + { + "epoch": 1.562442396313364, + "grad_norm": 1.0605499181430498, + "learning_rate": 2.5075123287672173e-07, + "loss": 0.7764754295349121, + "step": 6781 + }, + { + "epoch": 1.5626728110599077, + "grad_norm": 1.376636441531992, + "learning_rate": 2.5049896598495234e-07, + "loss": 0.8055214285850525, + "step": 6782 + }, + { + "epoch": 1.5629032258064517, + "grad_norm": 1.2086842805836235, + "learning_rate": 2.502468078822656e-07, + "loss": 0.7536123991012573, + "step": 6783 + }, + { + "epoch": 1.5631336405529954, + "grad_norm": 1.3330366284043236, + "learning_rate": 2.499947586052623e-07, + "loss": 0.8212461471557617, + "step": 6784 + }, + { + "epoch": 1.5633640552995391, + "grad_norm": 1.087165735027238, + "learning_rate": 2.49742818190527e-07, + "loss": 0.7297977209091187, + "step": 6785 + }, + { + "epoch": 1.563594470046083, + "grad_norm": 1.3633339944793545, + "learning_rate": 2.494909866746282e-07, + "loss": 0.752082109451294, + "step": 6786 + }, + { + "epoch": 1.5638248847926266, + "grad_norm": 1.231077416550479, + "learning_rate": 2.4923926409411934e-07, + "loss": 0.9181928634643555, + "step": 6787 + }, + { + "epoch": 1.5640552995391706, + "grad_norm": 1.263799738870316, + "learning_rate": 2.489876504855374e-07, + "loss": 0.8607058525085449, + "step": 6788 + }, + { + "epoch": 1.5642857142857143, + "grad_norm": 1.4864085600196295, + "learning_rate": 2.4873614588540347e-07, + "loss": 0.9659625887870789, + "step": 6789 + }, + { + "epoch": 1.564516129032258, + "grad_norm": 1.7132447669994355, + "learning_rate": 2.4848475033022377e-07, + "loss": 0.8357822299003601, + "step": 6790 + }, + { + "epoch": 1.564746543778802, + "grad_norm": 1.4493565138453182, + "learning_rate": 2.482334638564877e-07, + "loss": 0.7871281504631042, + "step": 6791 + }, + { + "epoch": 1.5649769585253455, + "grad_norm": 0.9644716518923556, + "learning_rate": 2.4798228650066874e-07, + "loss": 0.7221591472625732, + "step": 6792 + }, + { + "epoch": 1.5652073732718894, + "grad_norm": 1.217051022182652, + "learning_rate": 2.4773121829922586e-07, + "loss": 0.7399123907089233, + "step": 6793 + }, + { + "epoch": 1.5654377880184331, + "grad_norm": 1.6036073035934815, + "learning_rate": 2.474802592886003e-07, + "loss": 0.8159279227256775, + "step": 6794 + }, + { + "epoch": 1.5656682027649769, + "grad_norm": 1.208678395846015, + "learning_rate": 2.472294095052192e-07, + "loss": 0.8222753405570984, + "step": 6795 + }, + { + "epoch": 1.5658986175115208, + "grad_norm": 1.0411919729384558, + "learning_rate": 2.469786689854928e-07, + "loss": 0.6586673259735107, + "step": 6796 + }, + { + "epoch": 1.5661290322580645, + "grad_norm": 1.0728597460775429, + "learning_rate": 2.467280377658154e-07, + "loss": 0.8361790180206299, + "step": 6797 + }, + { + "epoch": 1.5663594470046083, + "grad_norm": 1.2928413385952742, + "learning_rate": 2.464775158825665e-07, + "loss": 0.7669099569320679, + "step": 6798 + }, + { + "epoch": 1.5665898617511522, + "grad_norm": 1.331214255352709, + "learning_rate": 2.462271033721086e-07, + "loss": 0.7876452207565308, + "step": 6799 + }, + { + "epoch": 1.5668202764976957, + "grad_norm": 1.2617656160077577, + "learning_rate": 2.459768002707887e-07, + "loss": 0.7932916879653931, + "step": 6800 + }, + { + "epoch": 1.5670506912442397, + "grad_norm": 1.1101874723309544, + "learning_rate": 2.457266066149382e-07, + "loss": 0.734020471572876, + "step": 6801 + }, + { + "epoch": 1.5672811059907834, + "grad_norm": 1.2001011742733312, + "learning_rate": 2.4547652244087216e-07, + "loss": 0.6975284814834595, + "step": 6802 + }, + { + "epoch": 1.5675115207373271, + "grad_norm": 1.213830843525294, + "learning_rate": 2.452265477848896e-07, + "loss": 0.7214465737342834, + "step": 6803 + }, + { + "epoch": 1.567741935483871, + "grad_norm": 1.1586033079782525, + "learning_rate": 2.4497668268327485e-07, + "loss": 0.8645110130310059, + "step": 6804 + }, + { + "epoch": 1.5679723502304146, + "grad_norm": 1.0991857687698348, + "learning_rate": 2.4472692717229504e-07, + "loss": 0.7389887571334839, + "step": 6805 + }, + { + "epoch": 1.5682027649769585, + "grad_norm": 1.206958266137894, + "learning_rate": 2.4447728128820165e-07, + "loss": 0.8462876081466675, + "step": 6806 + }, + { + "epoch": 1.5684331797235023, + "grad_norm": 1.2507487710365972, + "learning_rate": 2.44227745067231e-07, + "loss": 0.824936032295227, + "step": 6807 + }, + { + "epoch": 1.568663594470046, + "grad_norm": 1.2566804457387248, + "learning_rate": 2.439783185456027e-07, + "loss": 0.8516823053359985, + "step": 6808 + }, + { + "epoch": 1.56889400921659, + "grad_norm": 1.065798809017728, + "learning_rate": 2.4372900175952015e-07, + "loss": 0.6154674291610718, + "step": 6809 + }, + { + "epoch": 1.5691244239631337, + "grad_norm": 1.2816681742105784, + "learning_rate": 2.434797947451722e-07, + "loss": 0.7769260406494141, + "step": 6810 + }, + { + "epoch": 1.5693548387096774, + "grad_norm": 1.2232245245328917, + "learning_rate": 2.432306975387306e-07, + "loss": 0.9525332450866699, + "step": 6811 + }, + { + "epoch": 1.5695852534562214, + "grad_norm": 1.3409057347397177, + "learning_rate": 2.429817101763511e-07, + "loss": 0.7537581920623779, + "step": 6812 + }, + { + "epoch": 1.5698156682027649, + "grad_norm": 1.3548012775304474, + "learning_rate": 2.427328326941744e-07, + "loss": 0.814711332321167, + "step": 6813 + }, + { + "epoch": 1.5700460829493088, + "grad_norm": 1.3820372699413255, + "learning_rate": 2.4248406512832466e-07, + "loss": 0.708736777305603, + "step": 6814 + }, + { + "epoch": 1.5702764976958525, + "grad_norm": 1.1061554332755352, + "learning_rate": 2.422354075149098e-07, + "loss": 0.6757712960243225, + "step": 6815 + }, + { + "epoch": 1.5705069124423963, + "grad_norm": 1.0865188505414496, + "learning_rate": 2.4198685989002257e-07, + "loss": 0.736266553401947, + "step": 6816 + }, + { + "epoch": 1.5707373271889402, + "grad_norm": 1.1180343138508952, + "learning_rate": 2.417384222897392e-07, + "loss": 0.7423173189163208, + "step": 6817 + }, + { + "epoch": 1.5709677419354837, + "grad_norm": 1.2076049425001651, + "learning_rate": 2.414900947501197e-07, + "loss": 0.7260550260543823, + "step": 6818 + }, + { + "epoch": 1.5711981566820277, + "grad_norm": 1.241277027009942, + "learning_rate": 2.4124187730720915e-07, + "loss": 0.7125939130783081, + "step": 6819 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 1.1330555560067848, + "learning_rate": 2.409937699970356e-07, + "loss": 0.7429558634757996, + "step": 6820 + }, + { + "epoch": 1.5716589861751151, + "grad_norm": 1.1709438494600335, + "learning_rate": 2.407457728556115e-07, + "loss": 0.7166736721992493, + "step": 6821 + }, + { + "epoch": 1.571889400921659, + "grad_norm": 1.1783418664080478, + "learning_rate": 2.4049788591893336e-07, + "loss": 0.7438491582870483, + "step": 6822 + }, + { + "epoch": 1.5721198156682028, + "grad_norm": 1.3579191422740273, + "learning_rate": 2.402501092229814e-07, + "loss": 0.8031798601150513, + "step": 6823 + }, + { + "epoch": 1.5723502304147465, + "grad_norm": 1.3256875261480106, + "learning_rate": 2.400024428037206e-07, + "loss": 0.7067087888717651, + "step": 6824 + }, + { + "epoch": 1.5725806451612905, + "grad_norm": 1.1524386121511956, + "learning_rate": 2.3975488669709906e-07, + "loss": 0.7147783041000366, + "step": 6825 + }, + { + "epoch": 1.572811059907834, + "grad_norm": 1.2529979656124484, + "learning_rate": 2.395074409390491e-07, + "loss": 0.8534795641899109, + "step": 6826 + }, + { + "epoch": 1.573041474654378, + "grad_norm": 1.0527069171574706, + "learning_rate": 2.392601055654875e-07, + "loss": 0.7630984783172607, + "step": 6827 + }, + { + "epoch": 1.5732718894009217, + "grad_norm": 1.3268090351372508, + "learning_rate": 2.390128806123145e-07, + "loss": 0.9395428895950317, + "step": 6828 + }, + { + "epoch": 1.5735023041474654, + "grad_norm": 1.1905263432335205, + "learning_rate": 2.3876576611541423e-07, + "loss": 0.7086023092269897, + "step": 6829 + }, + { + "epoch": 1.5737327188940093, + "grad_norm": 1.0320188306367468, + "learning_rate": 2.385187621106555e-07, + "loss": 0.6937201619148254, + "step": 6830 + }, + { + "epoch": 1.5739631336405528, + "grad_norm": 1.1238131407833931, + "learning_rate": 2.3827186863389037e-07, + "loss": 0.7339247465133667, + "step": 6831 + }, + { + "epoch": 1.5741935483870968, + "grad_norm": 0.9948868064813976, + "learning_rate": 2.3802508572095493e-07, + "loss": 0.8453131318092346, + "step": 6832 + }, + { + "epoch": 1.5744239631336405, + "grad_norm": 1.2870129222879585, + "learning_rate": 2.377784134076698e-07, + "loss": 0.7303619384765625, + "step": 6833 + }, + { + "epoch": 1.5746543778801843, + "grad_norm": 1.1663952236638828, + "learning_rate": 2.3753185172983893e-07, + "loss": 0.9635858535766602, + "step": 6834 + }, + { + "epoch": 1.5748847926267282, + "grad_norm": 0.9711435467160289, + "learning_rate": 2.3728540072324998e-07, + "loss": 0.7174761295318604, + "step": 6835 + }, + { + "epoch": 1.575115207373272, + "grad_norm": 1.0168865512931398, + "learning_rate": 2.3703906042367584e-07, + "loss": 0.7375633716583252, + "step": 6836 + }, + { + "epoch": 1.5753456221198157, + "grad_norm": 1.0569071581049987, + "learning_rate": 2.3679283086687206e-07, + "loss": 0.8202652931213379, + "step": 6837 + }, + { + "epoch": 1.5755760368663596, + "grad_norm": 1.4428887155533328, + "learning_rate": 2.3654671208857823e-07, + "loss": 0.8448499441146851, + "step": 6838 + }, + { + "epoch": 1.5758064516129031, + "grad_norm": 1.3297185542360797, + "learning_rate": 2.3630070412451864e-07, + "loss": 0.7840893268585205, + "step": 6839 + }, + { + "epoch": 1.576036866359447, + "grad_norm": 1.1930310177318706, + "learning_rate": 2.3605480701040092e-07, + "loss": 0.8036940693855286, + "step": 6840 + }, + { + "epoch": 1.5762672811059908, + "grad_norm": 1.2730513650169084, + "learning_rate": 2.3580902078191666e-07, + "loss": 0.8333625793457031, + "step": 6841 + }, + { + "epoch": 1.5764976958525345, + "grad_norm": 1.3288439351572012, + "learning_rate": 2.3556334547474133e-07, + "loss": 0.804919958114624, + "step": 6842 + }, + { + "epoch": 1.5767281105990785, + "grad_norm": 1.2605928054638793, + "learning_rate": 2.3531778112453416e-07, + "loss": 0.752541720867157, + "step": 6843 + }, + { + "epoch": 1.576958525345622, + "grad_norm": 1.0378289852617786, + "learning_rate": 2.3507232776693896e-07, + "loss": 0.647051215171814, + "step": 6844 + }, + { + "epoch": 1.577188940092166, + "grad_norm": 1.1139826400416593, + "learning_rate": 2.3482698543758285e-07, + "loss": 0.7546517848968506, + "step": 6845 + }, + { + "epoch": 1.5774193548387097, + "grad_norm": 1.0118514872509952, + "learning_rate": 2.345817541720766e-07, + "loss": 0.8773425817489624, + "step": 6846 + }, + { + "epoch": 1.5776497695852534, + "grad_norm": 1.1485612061840695, + "learning_rate": 2.3433663400601567e-07, + "loss": 0.9538160562515259, + "step": 6847 + }, + { + "epoch": 1.5778801843317973, + "grad_norm": 1.0298677066929223, + "learning_rate": 2.340916249749787e-07, + "loss": 0.6275157332420349, + "step": 6848 + }, + { + "epoch": 1.578110599078341, + "grad_norm": 1.1889533964841936, + "learning_rate": 2.3384672711452812e-07, + "loss": 0.7729284167289734, + "step": 6849 + }, + { + "epoch": 1.5783410138248848, + "grad_norm": 1.4210079123943715, + "learning_rate": 2.3360194046021108e-07, + "loss": 0.8361644148826599, + "step": 6850 + }, + { + "epoch": 1.5785714285714287, + "grad_norm": 1.2305172757518368, + "learning_rate": 2.3335726504755793e-07, + "loss": 0.6782940626144409, + "step": 6851 + }, + { + "epoch": 1.5788018433179722, + "grad_norm": 1.3612688278959233, + "learning_rate": 2.3311270091208256e-07, + "loss": 0.8036615252494812, + "step": 6852 + }, + { + "epoch": 1.5790322580645162, + "grad_norm": 1.1729176601878941, + "learning_rate": 2.3286824808928362e-07, + "loss": 0.8450125455856323, + "step": 6853 + }, + { + "epoch": 1.57926267281106, + "grad_norm": 1.2162582175159786, + "learning_rate": 2.3262390661464303e-07, + "loss": 0.6546198725700378, + "step": 6854 + }, + { + "epoch": 1.5794930875576036, + "grad_norm": 1.4056383803669428, + "learning_rate": 2.3237967652362612e-07, + "loss": 0.8201385140419006, + "step": 6855 + }, + { + "epoch": 1.5797235023041476, + "grad_norm": 1.3504561324932176, + "learning_rate": 2.3213555785168336e-07, + "loss": 0.8753508925437927, + "step": 6856 + }, + { + "epoch": 1.579953917050691, + "grad_norm": 1.2672866740553073, + "learning_rate": 2.3189155063424782e-07, + "loss": 0.5884093642234802, + "step": 6857 + }, + { + "epoch": 1.580184331797235, + "grad_norm": 1.0135145180947078, + "learning_rate": 2.3164765490673654e-07, + "loss": 0.6494029760360718, + "step": 6858 + }, + { + "epoch": 1.5804147465437788, + "grad_norm": 1.1478304397345402, + "learning_rate": 2.3140387070455126e-07, + "loss": 0.7407097220420837, + "step": 6859 + }, + { + "epoch": 1.5806451612903225, + "grad_norm": 1.3351942864944542, + "learning_rate": 2.3116019806307673e-07, + "loss": 0.8934177160263062, + "step": 6860 + }, + { + "epoch": 1.5808755760368665, + "grad_norm": 1.0696222163552975, + "learning_rate": 2.309166370176816e-07, + "loss": 0.7487956881523132, + "step": 6861 + }, + { + "epoch": 1.5811059907834102, + "grad_norm": 1.316829236490256, + "learning_rate": 2.3067318760371845e-07, + "loss": 0.7744357585906982, + "step": 6862 + }, + { + "epoch": 1.581336405529954, + "grad_norm": 1.3202738468289819, + "learning_rate": 2.304298498565237e-07, + "loss": 0.8871743679046631, + "step": 6863 + }, + { + "epoch": 1.5815668202764976, + "grad_norm": 1.3064493000042272, + "learning_rate": 2.3018662381141717e-07, + "loss": 0.7865666151046753, + "step": 6864 + }, + { + "epoch": 1.5817972350230414, + "grad_norm": 1.4125222114326161, + "learning_rate": 2.2994350950370334e-07, + "loss": 0.8416531682014465, + "step": 6865 + }, + { + "epoch": 1.5820276497695853, + "grad_norm": 1.5275008378701445, + "learning_rate": 2.2970050696866972e-07, + "loss": 0.8443950414657593, + "step": 6866 + }, + { + "epoch": 1.582258064516129, + "grad_norm": 1.4005476364990852, + "learning_rate": 2.2945761624158756e-07, + "loss": 0.7770054340362549, + "step": 6867 + }, + { + "epoch": 1.5824884792626728, + "grad_norm": 1.2159355438440163, + "learning_rate": 2.2921483735771252e-07, + "loss": 0.7263047695159912, + "step": 6868 + }, + { + "epoch": 1.5827188940092167, + "grad_norm": 1.3958985609002883, + "learning_rate": 2.2897217035228312e-07, + "loss": 0.8288376927375793, + "step": 6869 + }, + { + "epoch": 1.5829493087557602, + "grad_norm": 1.2630380344196672, + "learning_rate": 2.2872961526052292e-07, + "loss": 0.8325462937355042, + "step": 6870 + }, + { + "epoch": 1.5831797235023042, + "grad_norm": 1.317005879944655, + "learning_rate": 2.284871721176379e-07, + "loss": 0.7412815093994141, + "step": 6871 + }, + { + "epoch": 1.583410138248848, + "grad_norm": 1.2813974132427688, + "learning_rate": 2.2824484095881823e-07, + "loss": 0.8958117961883545, + "step": 6872 + }, + { + "epoch": 1.5836405529953916, + "grad_norm": 1.228628782021168, + "learning_rate": 2.2800262181923858e-07, + "loss": 0.8374444246292114, + "step": 6873 + }, + { + "epoch": 1.5838709677419356, + "grad_norm": 1.2394995315660131, + "learning_rate": 2.2776051473405634e-07, + "loss": 0.7900353670120239, + "step": 6874 + }, + { + "epoch": 1.5841013824884793, + "grad_norm": 1.129671125708823, + "learning_rate": 2.2751851973841285e-07, + "loss": 0.7420408725738525, + "step": 6875 + }, + { + "epoch": 1.584331797235023, + "grad_norm": 1.3245275433928243, + "learning_rate": 2.2727663686743382e-07, + "loss": 0.8902314305305481, + "step": 6876 + }, + { + "epoch": 1.5845622119815668, + "grad_norm": 1.2122656586799572, + "learning_rate": 2.27034866156228e-07, + "loss": 0.739869236946106, + "step": 6877 + }, + { + "epoch": 1.5847926267281105, + "grad_norm": 1.169654737499052, + "learning_rate": 2.2679320763988775e-07, + "loss": 0.8340646624565125, + "step": 6878 + }, + { + "epoch": 1.5850230414746544, + "grad_norm": 1.3076425110312813, + "learning_rate": 2.2655166135349013e-07, + "loss": 0.7501030564308167, + "step": 6879 + }, + { + "epoch": 1.5852534562211982, + "grad_norm": 1.0619799072208593, + "learning_rate": 2.2631022733209504e-07, + "loss": 0.722623348236084, + "step": 6880 + }, + { + "epoch": 1.585483870967742, + "grad_norm": 1.4046404033814042, + "learning_rate": 2.260689056107461e-07, + "loss": 0.8319696187973022, + "step": 6881 + }, + { + "epoch": 1.5857142857142859, + "grad_norm": 1.2824383261655956, + "learning_rate": 2.2582769622447107e-07, + "loss": 0.85502028465271, + "step": 6882 + }, + { + "epoch": 1.5859447004608294, + "grad_norm": 1.444500113904039, + "learning_rate": 2.2558659920828095e-07, + "loss": 0.7942626476287842, + "step": 6883 + }, + { + "epoch": 1.5861751152073733, + "grad_norm": 0.9346347634599198, + "learning_rate": 2.253456145971705e-07, + "loss": 0.6731030941009521, + "step": 6884 + }, + { + "epoch": 1.586405529953917, + "grad_norm": 1.2567565363582325, + "learning_rate": 2.2510474242611887e-07, + "loss": 0.8479423522949219, + "step": 6885 + }, + { + "epoch": 1.5866359447004608, + "grad_norm": 1.0824322707106273, + "learning_rate": 2.2486398273008812e-07, + "loss": 0.7398810386657715, + "step": 6886 + }, + { + "epoch": 1.5868663594470047, + "grad_norm": 1.4531636253389437, + "learning_rate": 2.246233355440238e-07, + "loss": 0.8422881364822388, + "step": 6887 + }, + { + "epoch": 1.5870967741935482, + "grad_norm": 1.4298247398214885, + "learning_rate": 2.2438280090285612e-07, + "loss": 0.8307279944419861, + "step": 6888 + }, + { + "epoch": 1.5873271889400922, + "grad_norm": 1.3280924437525041, + "learning_rate": 2.2414237884149821e-07, + "loss": 0.8329004049301147, + "step": 6889 + }, + { + "epoch": 1.587557603686636, + "grad_norm": 1.196093026387475, + "learning_rate": 2.2390206939484645e-07, + "loss": 0.801641583442688, + "step": 6890 + }, + { + "epoch": 1.5877880184331796, + "grad_norm": 1.359543687074451, + "learning_rate": 2.2366187259778235e-07, + "loss": 0.9850986003875732, + "step": 6891 + }, + { + "epoch": 1.5880184331797236, + "grad_norm": 1.2770195506897435, + "learning_rate": 2.2342178848516935e-07, + "loss": 0.7169715166091919, + "step": 6892 + }, + { + "epoch": 1.5882488479262673, + "grad_norm": 1.1258655345605515, + "learning_rate": 2.2318181709185603e-07, + "loss": 0.7509033679962158, + "step": 6893 + }, + { + "epoch": 1.588479262672811, + "grad_norm": 1.2429319924869415, + "learning_rate": 2.2294195845267348e-07, + "loss": 0.6974655985832214, + "step": 6894 + }, + { + "epoch": 1.588709677419355, + "grad_norm": 1.1949954122245936, + "learning_rate": 2.227022126024367e-07, + "loss": 0.7388278245925903, + "step": 6895 + }, + { + "epoch": 1.5889400921658985, + "grad_norm": 1.1219112420315915, + "learning_rate": 2.2246257957594506e-07, + "loss": 0.6479122638702393, + "step": 6896 + }, + { + "epoch": 1.5891705069124424, + "grad_norm": 1.2556673774557678, + "learning_rate": 2.222230594079807e-07, + "loss": 0.759338915348053, + "step": 6897 + }, + { + "epoch": 1.5894009216589862, + "grad_norm": 1.1747779352742982, + "learning_rate": 2.2198365213330937e-07, + "loss": 0.7299938201904297, + "step": 6898 + }, + { + "epoch": 1.58963133640553, + "grad_norm": 1.2072520940330866, + "learning_rate": 2.2174435778668122e-07, + "loss": 0.707555890083313, + "step": 6899 + }, + { + "epoch": 1.5898617511520738, + "grad_norm": 1.3083069601374675, + "learning_rate": 2.2150517640282918e-07, + "loss": 0.8311065435409546, + "step": 6900 + }, + { + "epoch": 1.5900921658986173, + "grad_norm": 1.1585381591481734, + "learning_rate": 2.2126610801647028e-07, + "loss": 0.6494649648666382, + "step": 6901 + }, + { + "epoch": 1.5903225806451613, + "grad_norm": 1.006735116508423, + "learning_rate": 2.2102715266230486e-07, + "loss": 0.6563294529914856, + "step": 6902 + }, + { + "epoch": 1.590552995391705, + "grad_norm": 1.0365958828861261, + "learning_rate": 2.207883103750171e-07, + "loss": 0.7426891326904297, + "step": 6903 + }, + { + "epoch": 1.5907834101382488, + "grad_norm": 0.9164747480191582, + "learning_rate": 2.2054958118927413e-07, + "loss": 0.7074661254882812, + "step": 6904 + }, + { + "epoch": 1.5910138248847927, + "grad_norm": 1.4657092079572216, + "learning_rate": 2.203109651397279e-07, + "loss": 0.8407880663871765, + "step": 6905 + }, + { + "epoch": 1.5912442396313364, + "grad_norm": 1.014884431152031, + "learning_rate": 2.2007246226101296e-07, + "loss": 0.7228440642356873, + "step": 6906 + }, + { + "epoch": 1.5914746543778802, + "grad_norm": 1.1100543617790197, + "learning_rate": 2.1983407258774733e-07, + "loss": 0.6988812685012817, + "step": 6907 + }, + { + "epoch": 1.591705069124424, + "grad_norm": 1.3237351414434337, + "learning_rate": 2.195957961545335e-07, + "loss": 0.793757438659668, + "step": 6908 + }, + { + "epoch": 1.5919354838709676, + "grad_norm": 1.2485526093365642, + "learning_rate": 2.1935763299595678e-07, + "loss": 0.8621397018432617, + "step": 6909 + }, + { + "epoch": 1.5921658986175116, + "grad_norm": 1.2314950700356975, + "learning_rate": 2.1911958314658598e-07, + "loss": 0.7661364078521729, + "step": 6910 + }, + { + "epoch": 1.5923963133640553, + "grad_norm": 1.1937782252155265, + "learning_rate": 2.1888164664097408e-07, + "loss": 0.9322741031646729, + "step": 6911 + }, + { + "epoch": 1.592626728110599, + "grad_norm": 1.1787479005369867, + "learning_rate": 2.1864382351365717e-07, + "loss": 0.8411989212036133, + "step": 6912 + }, + { + "epoch": 1.592857142857143, + "grad_norm": 1.515351393614885, + "learning_rate": 2.1840611379915464e-07, + "loss": 0.8212479948997498, + "step": 6913 + }, + { + "epoch": 1.5930875576036865, + "grad_norm": 1.5032885798825617, + "learning_rate": 2.181685175319702e-07, + "loss": 0.7875508069992065, + "step": 6914 + }, + { + "epoch": 1.5933179723502304, + "grad_norm": 1.178669163135756, + "learning_rate": 2.1793103474659047e-07, + "loss": 0.8389852046966553, + "step": 6915 + }, + { + "epoch": 1.5935483870967742, + "grad_norm": 1.388906101423199, + "learning_rate": 2.1769366547748546e-07, + "loss": 0.8223046660423279, + "step": 6916 + }, + { + "epoch": 1.5937788018433179, + "grad_norm": 1.0682255683615596, + "learning_rate": 2.1745640975910962e-07, + "loss": 0.8427159786224365, + "step": 6917 + }, + { + "epoch": 1.5940092165898618, + "grad_norm": 1.2770752550871127, + "learning_rate": 2.172192676258996e-07, + "loss": 0.7448060512542725, + "step": 6918 + }, + { + "epoch": 1.5942396313364056, + "grad_norm": 1.1028377529507616, + "learning_rate": 2.1698223911227686e-07, + "loss": 0.7122288346290588, + "step": 6919 + }, + { + "epoch": 1.5944700460829493, + "grad_norm": 1.3801420957349657, + "learning_rate": 2.1674532425264548e-07, + "loss": 0.7712994813919067, + "step": 6920 + }, + { + "epoch": 1.5947004608294932, + "grad_norm": 1.4967308024498271, + "learning_rate": 2.1650852308139355e-07, + "loss": 0.9656664729118347, + "step": 6921 + }, + { + "epoch": 1.5949308755760367, + "grad_norm": 1.3725078407101703, + "learning_rate": 2.162718356328922e-07, + "loss": 0.748894214630127, + "step": 6922 + }, + { + "epoch": 1.5951612903225807, + "grad_norm": 1.0191925895935576, + "learning_rate": 2.1603526194149635e-07, + "loss": 0.6875454187393188, + "step": 6923 + }, + { + "epoch": 1.5953917050691244, + "grad_norm": 1.099493651981713, + "learning_rate": 2.1579880204154412e-07, + "loss": 0.8258690237998962, + "step": 6924 + }, + { + "epoch": 1.5956221198156681, + "grad_norm": 1.2228052738114181, + "learning_rate": 2.15562455967358e-07, + "loss": 0.7647902965545654, + "step": 6925 + }, + { + "epoch": 1.595852534562212, + "grad_norm": 1.0716326843288577, + "learning_rate": 2.1532622375324284e-07, + "loss": 0.7004281282424927, + "step": 6926 + }, + { + "epoch": 1.5960829493087556, + "grad_norm": 1.1015601686618846, + "learning_rate": 2.1509010543348726e-07, + "loss": 0.7500345706939697, + "step": 6927 + }, + { + "epoch": 1.5963133640552996, + "grad_norm": 1.1261005927566234, + "learning_rate": 2.148541010423641e-07, + "loss": 0.7300195693969727, + "step": 6928 + }, + { + "epoch": 1.5965437788018433, + "grad_norm": 1.0927883255058508, + "learning_rate": 2.1461821061412876e-07, + "loss": 0.7592284679412842, + "step": 6929 + }, + { + "epoch": 1.596774193548387, + "grad_norm": 1.265065855875648, + "learning_rate": 2.1438243418302016e-07, + "loss": 0.7179796099662781, + "step": 6930 + }, + { + "epoch": 1.597004608294931, + "grad_norm": 1.167267121775029, + "learning_rate": 2.1414677178326157e-07, + "loss": 0.8829631805419922, + "step": 6931 + }, + { + "epoch": 1.5972350230414747, + "grad_norm": 1.4030936435750112, + "learning_rate": 2.1391122344905865e-07, + "loss": 0.8661972880363464, + "step": 6932 + }, + { + "epoch": 1.5974654377880184, + "grad_norm": 1.4609293147197595, + "learning_rate": 2.136757892146007e-07, + "loss": 0.7774989604949951, + "step": 6933 + }, + { + "epoch": 1.5976958525345624, + "grad_norm": 1.2556066222087972, + "learning_rate": 2.1344046911406132e-07, + "loss": 0.7343888878822327, + "step": 6934 + }, + { + "epoch": 1.5979262672811059, + "grad_norm": 1.1442684268001395, + "learning_rate": 2.132052631815966e-07, + "loss": 0.7810107469558716, + "step": 6935 + }, + { + "epoch": 1.5981566820276498, + "grad_norm": 1.343676205067389, + "learning_rate": 2.12970171451346e-07, + "loss": 0.7585299611091614, + "step": 6936 + }, + { + "epoch": 1.5983870967741935, + "grad_norm": 1.2827689520736418, + "learning_rate": 2.1273519395743344e-07, + "loss": 0.886371910572052, + "step": 6937 + }, + { + "epoch": 1.5986175115207373, + "grad_norm": 0.9767702062550015, + "learning_rate": 2.1250033073396523e-07, + "loss": 0.6986823081970215, + "step": 6938 + }, + { + "epoch": 1.5988479262672812, + "grad_norm": 1.2062052682782651, + "learning_rate": 2.122655818150312e-07, + "loss": 0.7524189352989197, + "step": 6939 + }, + { + "epoch": 1.5990783410138247, + "grad_norm": 1.1473232496595593, + "learning_rate": 2.120309472347055e-07, + "loss": 0.7699365615844727, + "step": 6940 + }, + { + "epoch": 1.5993087557603687, + "grad_norm": 1.188421090787615, + "learning_rate": 2.1179642702704458e-07, + "loss": 0.8112696409225464, + "step": 6941 + }, + { + "epoch": 1.5995391705069124, + "grad_norm": 1.377266755106213, + "learning_rate": 2.115620212260889e-07, + "loss": 0.7067416906356812, + "step": 6942 + }, + { + "epoch": 1.5997695852534561, + "grad_norm": 0.80841875970131, + "learning_rate": 2.1132772986586211e-07, + "loss": 0.787110447883606, + "step": 6943 + }, + { + "epoch": 1.6, + "grad_norm": 1.282613261539406, + "learning_rate": 2.11093552980371e-07, + "loss": 0.7356789112091064, + "step": 6944 + }, + { + "epoch": 1.6002304147465438, + "grad_norm": 1.2747758780049527, + "learning_rate": 2.1085949060360653e-07, + "loss": 0.8057125806808472, + "step": 6945 + }, + { + "epoch": 1.6004608294930875, + "grad_norm": 1.1828340962550294, + "learning_rate": 2.1062554276954225e-07, + "loss": 0.7169399261474609, + "step": 6946 + }, + { + "epoch": 1.6006912442396315, + "grad_norm": 1.2018304676070681, + "learning_rate": 2.1039170951213526e-07, + "loss": 0.7219180464744568, + "step": 6947 + }, + { + "epoch": 1.600921658986175, + "grad_norm": 1.2736335133966967, + "learning_rate": 2.101579908653266e-07, + "loss": 0.7530789375305176, + "step": 6948 + }, + { + "epoch": 1.601152073732719, + "grad_norm": 1.2374620271602483, + "learning_rate": 2.0992438686303993e-07, + "loss": 0.8192377090454102, + "step": 6949 + }, + { + "epoch": 1.6013824884792627, + "grad_norm": 1.0987195977670645, + "learning_rate": 2.0969089753918223e-07, + "loss": 0.6904648542404175, + "step": 6950 + }, + { + "epoch": 1.6016129032258064, + "grad_norm": 1.2558183684284059, + "learning_rate": 2.0945752292764495e-07, + "loss": 0.7289770245552063, + "step": 6951 + }, + { + "epoch": 1.6018433179723504, + "grad_norm": 1.260283902447682, + "learning_rate": 2.0922426306230157e-07, + "loss": 0.8467620611190796, + "step": 6952 + }, + { + "epoch": 1.6020737327188939, + "grad_norm": 1.3650999598924758, + "learning_rate": 2.089911179770093e-07, + "loss": 0.7835153937339783, + "step": 6953 + }, + { + "epoch": 1.6023041474654378, + "grad_norm": 0.847985634813149, + "learning_rate": 2.0875808770560933e-07, + "loss": 0.6696668267250061, + "step": 6954 + }, + { + "epoch": 1.6025345622119815, + "grad_norm": 1.441689312728025, + "learning_rate": 2.0852517228192556e-07, + "loss": 0.8451364636421204, + "step": 6955 + }, + { + "epoch": 1.6027649769585253, + "grad_norm": 1.2628900414882365, + "learning_rate": 2.0829237173976487e-07, + "loss": 0.7917240858078003, + "step": 6956 + }, + { + "epoch": 1.6029953917050692, + "grad_norm": 1.2514606025933794, + "learning_rate": 2.0805968611291867e-07, + "loss": 0.791597843170166, + "step": 6957 + }, + { + "epoch": 1.603225806451613, + "grad_norm": 1.2854657657217543, + "learning_rate": 2.0782711543516063e-07, + "loss": 0.7571247816085815, + "step": 6958 + }, + { + "epoch": 1.6034562211981567, + "grad_norm": 1.1996866839711877, + "learning_rate": 2.075946597402478e-07, + "loss": 0.9196302890777588, + "step": 6959 + }, + { + "epoch": 1.6036866359447006, + "grad_norm": 0.9955085341059975, + "learning_rate": 2.0736231906192136e-07, + "loss": 0.7106618881225586, + "step": 6960 + }, + { + "epoch": 1.6039170506912441, + "grad_norm": 0.9090693582601959, + "learning_rate": 2.071300934339051e-07, + "loss": 0.8923465013504028, + "step": 6961 + }, + { + "epoch": 1.604147465437788, + "grad_norm": 1.1524730844586952, + "learning_rate": 2.0689798288990601e-07, + "loss": 0.6929241418838501, + "step": 6962 + }, + { + "epoch": 1.6043778801843318, + "grad_norm": 1.4736872345919192, + "learning_rate": 2.0666598746361487e-07, + "loss": 0.935944676399231, + "step": 6963 + }, + { + "epoch": 1.6046082949308755, + "grad_norm": 1.3002916307222088, + "learning_rate": 2.0643410718870536e-07, + "loss": 0.7442188262939453, + "step": 6964 + }, + { + "epoch": 1.6048387096774195, + "grad_norm": 1.13007905720726, + "learning_rate": 2.0620234209883446e-07, + "loss": 0.7340278625488281, + "step": 6965 + }, + { + "epoch": 1.605069124423963, + "grad_norm": 1.1841454047560163, + "learning_rate": 2.0597069222764297e-07, + "loss": 0.7436190247535706, + "step": 6966 + }, + { + "epoch": 1.605299539170507, + "grad_norm": 1.1998918795301519, + "learning_rate": 2.0573915760875406e-07, + "loss": 0.9109283685684204, + "step": 6967 + }, + { + "epoch": 1.6055299539170507, + "grad_norm": 1.362187790875206, + "learning_rate": 2.0550773827577518e-07, + "loss": 0.86224365234375, + "step": 6968 + }, + { + "epoch": 1.6057603686635944, + "grad_norm": 1.0973288140018649, + "learning_rate": 2.0527643426229636e-07, + "loss": 0.6873685121536255, + "step": 6969 + }, + { + "epoch": 1.6059907834101383, + "grad_norm": 1.2862613183491987, + "learning_rate": 2.0504524560189074e-07, + "loss": 0.7634609937667847, + "step": 6970 + }, + { + "epoch": 1.606221198156682, + "grad_norm": 1.642442078921259, + "learning_rate": 2.0481417232811572e-07, + "loss": 0.7940595149993896, + "step": 6971 + }, + { + "epoch": 1.6064516129032258, + "grad_norm": 1.0579671129687211, + "learning_rate": 2.0458321447451078e-07, + "loss": 0.7109687924385071, + "step": 6972 + }, + { + "epoch": 1.6066820276497698, + "grad_norm": 1.3780414286693414, + "learning_rate": 2.04352372074599e-07, + "loss": 0.9476398825645447, + "step": 6973 + }, + { + "epoch": 1.6069124423963133, + "grad_norm": 1.3106188238946987, + "learning_rate": 2.0412164516188747e-07, + "loss": 0.7563579678535461, + "step": 6974 + }, + { + "epoch": 1.6071428571428572, + "grad_norm": 1.1912217950342037, + "learning_rate": 2.0389103376986538e-07, + "loss": 0.7928751707077026, + "step": 6975 + }, + { + "epoch": 1.607373271889401, + "grad_norm": 1.1927610489358789, + "learning_rate": 2.0366053793200565e-07, + "loss": 0.776961624622345, + "step": 6976 + }, + { + "epoch": 1.6076036866359447, + "grad_norm": 1.1830668942381175, + "learning_rate": 2.0343015768176496e-07, + "loss": 0.6511167883872986, + "step": 6977 + }, + { + "epoch": 1.6078341013824886, + "grad_norm": 1.3541662729221868, + "learning_rate": 2.0319989305258235e-07, + "loss": 0.6487337350845337, + "step": 6978 + }, + { + "epoch": 1.6080645161290321, + "grad_norm": 1.5271951763204938, + "learning_rate": 2.0296974407788004e-07, + "loss": 0.921454131603241, + "step": 6979 + }, + { + "epoch": 1.608294930875576, + "grad_norm": 1.0476613319531645, + "learning_rate": 2.0273971079106467e-07, + "loss": 0.8145809769630432, + "step": 6980 + }, + { + "epoch": 1.6085253456221198, + "grad_norm": 0.9495439447317249, + "learning_rate": 2.0250979322552474e-07, + "loss": 0.6655904054641724, + "step": 6981 + }, + { + "epoch": 1.6087557603686635, + "grad_norm": 1.1486957458539049, + "learning_rate": 2.0227999141463258e-07, + "loss": 0.777961254119873, + "step": 6982 + }, + { + "epoch": 1.6089861751152075, + "grad_norm": 1.3274428663782127, + "learning_rate": 2.0205030539174361e-07, + "loss": 0.6543164253234863, + "step": 6983 + }, + { + "epoch": 1.6092165898617512, + "grad_norm": 1.233780092778412, + "learning_rate": 2.018207351901966e-07, + "loss": 0.7842000722885132, + "step": 6984 + }, + { + "epoch": 1.609447004608295, + "grad_norm": 0.999384175284256, + "learning_rate": 2.0159128084331278e-07, + "loss": 0.7264418005943298, + "step": 6985 + }, + { + "epoch": 1.6096774193548387, + "grad_norm": 1.313414021265448, + "learning_rate": 2.0136194238439795e-07, + "loss": 0.8722596168518066, + "step": 6986 + }, + { + "epoch": 1.6099078341013824, + "grad_norm": 1.3518278161266697, + "learning_rate": 2.0113271984673997e-07, + "loss": 0.8162735104560852, + "step": 6987 + }, + { + "epoch": 1.6101382488479263, + "grad_norm": 1.212757185466248, + "learning_rate": 2.0090361326360982e-07, + "loss": 0.6962481737136841, + "step": 6988 + }, + { + "epoch": 1.61036866359447, + "grad_norm": 1.133716172506403, + "learning_rate": 2.0067462266826264e-07, + "loss": 0.8186852931976318, + "step": 6989 + }, + { + "epoch": 1.6105990783410138, + "grad_norm": 1.505728867210405, + "learning_rate": 2.0044574809393543e-07, + "loss": 0.8935987949371338, + "step": 6990 + }, + { + "epoch": 1.6108294930875577, + "grad_norm": 1.2824355796337807, + "learning_rate": 2.002169895738498e-07, + "loss": 0.9152865409851074, + "step": 6991 + }, + { + "epoch": 1.6110599078341012, + "grad_norm": 1.521529078332145, + "learning_rate": 1.9998834714120928e-07, + "loss": 0.8042874336242676, + "step": 6992 + }, + { + "epoch": 1.6112903225806452, + "grad_norm": 1.3198117612600044, + "learning_rate": 1.9975982082920083e-07, + "loss": 0.9621129035949707, + "step": 6993 + }, + { + "epoch": 1.611520737327189, + "grad_norm": 1.1154614331355635, + "learning_rate": 1.9953141067099533e-07, + "loss": 0.8296995162963867, + "step": 6994 + }, + { + "epoch": 1.6117511520737327, + "grad_norm": 1.0827522335122797, + "learning_rate": 1.9930311669974587e-07, + "loss": 0.8129373788833618, + "step": 6995 + }, + { + "epoch": 1.6119815668202766, + "grad_norm": 1.359695561767368, + "learning_rate": 1.9907493894858874e-07, + "loss": 0.7450911998748779, + "step": 6996 + }, + { + "epoch": 1.6122119815668203, + "grad_norm": 1.2367503665171555, + "learning_rate": 1.9884687745064422e-07, + "loss": 0.798037052154541, + "step": 6997 + }, + { + "epoch": 1.612442396313364, + "grad_norm": 1.218969884225304, + "learning_rate": 1.9861893223901494e-07, + "loss": 0.8118857145309448, + "step": 6998 + }, + { + "epoch": 1.6126728110599078, + "grad_norm": 1.2176008366956401, + "learning_rate": 1.9839110334678632e-07, + "loss": 0.7954392433166504, + "step": 6999 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 1.2233633618619175, + "learning_rate": 1.9816339080702825e-07, + "loss": 0.8055616617202759, + "step": 7000 + }, + { + "epoch": 1.6131336405529955, + "grad_norm": 1.503254744382692, + "learning_rate": 1.979357946527924e-07, + "loss": 0.8949761986732483, + "step": 7001 + }, + { + "epoch": 1.6133640552995392, + "grad_norm": 1.376056206509758, + "learning_rate": 1.9770831491711427e-07, + "loss": 0.8327617645263672, + "step": 7002 + }, + { + "epoch": 1.613594470046083, + "grad_norm": 1.2867855951178133, + "learning_rate": 1.9748095163301215e-07, + "loss": 0.7593148946762085, + "step": 7003 + }, + { + "epoch": 1.6138248847926269, + "grad_norm": 1.2449007241812073, + "learning_rate": 1.9725370483348737e-07, + "loss": 0.7639665603637695, + "step": 7004 + }, + { + "epoch": 1.6140552995391704, + "grad_norm": 1.2839981076373308, + "learning_rate": 1.9702657455152448e-07, + "loss": 0.8561587929725647, + "step": 7005 + }, + { + "epoch": 1.6142857142857143, + "grad_norm": 1.4345782240891563, + "learning_rate": 1.9679956082009154e-07, + "loss": 0.835313081741333, + "step": 7006 + }, + { + "epoch": 1.614516129032258, + "grad_norm": 1.680229749258956, + "learning_rate": 1.9657266367213898e-07, + "loss": 0.831456184387207, + "step": 7007 + }, + { + "epoch": 1.6147465437788018, + "grad_norm": 1.1797102347566437, + "learning_rate": 1.963458831406005e-07, + "loss": 0.699436604976654, + "step": 7008 + }, + { + "epoch": 1.6149769585253457, + "grad_norm": 1.2382287230628872, + "learning_rate": 1.9611921925839337e-07, + "loss": 0.7821902632713318, + "step": 7009 + }, + { + "epoch": 1.6152073732718892, + "grad_norm": 1.035873020643515, + "learning_rate": 1.9589267205841742e-07, + "loss": 0.7491241097450256, + "step": 7010 + }, + { + "epoch": 1.6154377880184332, + "grad_norm": 1.3212550422299536, + "learning_rate": 1.956662415735554e-07, + "loss": 0.7299652099609375, + "step": 7011 + }, + { + "epoch": 1.615668202764977, + "grad_norm": 1.2121144450441814, + "learning_rate": 1.9543992783667385e-07, + "loss": 0.692190408706665, + "step": 7012 + }, + { + "epoch": 1.6158986175115206, + "grad_norm": 1.5397188528974992, + "learning_rate": 1.9521373088062166e-07, + "loss": 0.8727273941040039, + "step": 7013 + }, + { + "epoch": 1.6161290322580646, + "grad_norm": 0.9576172656761047, + "learning_rate": 1.9498765073823077e-07, + "loss": 0.6441171169281006, + "step": 7014 + }, + { + "epoch": 1.6163594470046083, + "grad_norm": 1.202013067822893, + "learning_rate": 1.947616874423169e-07, + "loss": 0.6960387229919434, + "step": 7015 + }, + { + "epoch": 1.616589861751152, + "grad_norm": 1.3238157552069112, + "learning_rate": 1.9453584102567788e-07, + "loss": 0.9231700301170349, + "step": 7016 + }, + { + "epoch": 1.616820276497696, + "grad_norm": 1.5395552640428811, + "learning_rate": 1.9431011152109555e-07, + "loss": 0.6957401037216187, + "step": 7017 + }, + { + "epoch": 1.6170506912442395, + "grad_norm": 1.221595091148929, + "learning_rate": 1.9408449896133384e-07, + "loss": 0.6608580350875854, + "step": 7018 + }, + { + "epoch": 1.6172811059907835, + "grad_norm": 1.386134285673899, + "learning_rate": 1.9385900337913997e-07, + "loss": 0.7322397232055664, + "step": 7019 + }, + { + "epoch": 1.6175115207373272, + "grad_norm": 1.1188269604657235, + "learning_rate": 1.9363362480724488e-07, + "loss": 0.6996288299560547, + "step": 7020 + }, + { + "epoch": 1.617741935483871, + "grad_norm": 1.022000935531768, + "learning_rate": 1.9340836327836163e-07, + "loss": 0.7928623557090759, + "step": 7021 + }, + { + "epoch": 1.6179723502304149, + "grad_norm": 0.9992379944358776, + "learning_rate": 1.9318321882518674e-07, + "loss": 0.6275026202201843, + "step": 7022 + }, + { + "epoch": 1.6182027649769584, + "grad_norm": 1.26569218150676, + "learning_rate": 1.9295819148039948e-07, + "loss": 0.6660110950469971, + "step": 7023 + }, + { + "epoch": 1.6184331797235023, + "grad_norm": 1.0401535425644861, + "learning_rate": 1.9273328127666232e-07, + "loss": 0.8129480481147766, + "step": 7024 + }, + { + "epoch": 1.618663594470046, + "grad_norm": 1.146646002030878, + "learning_rate": 1.9250848824662046e-07, + "loss": 0.8070700168609619, + "step": 7025 + }, + { + "epoch": 1.6188940092165898, + "grad_norm": 1.4109951707076815, + "learning_rate": 1.922838124229028e-07, + "loss": 0.8123769760131836, + "step": 7026 + }, + { + "epoch": 1.6191244239631337, + "grad_norm": 0.9906397496222884, + "learning_rate": 1.920592538381205e-07, + "loss": 0.6552244424819946, + "step": 7027 + }, + { + "epoch": 1.6193548387096774, + "grad_norm": 1.0749749429025204, + "learning_rate": 1.9183481252486767e-07, + "loss": 0.8764367699623108, + "step": 7028 + }, + { + "epoch": 1.6195852534562212, + "grad_norm": 1.8347518044142406, + "learning_rate": 1.9161048851572215e-07, + "loss": 0.9075809717178345, + "step": 7029 + }, + { + "epoch": 1.6198156682027651, + "grad_norm": 1.1695152473088226, + "learning_rate": 1.9138628184324412e-07, + "loss": 0.7308327555656433, + "step": 7030 + }, + { + "epoch": 1.6200460829493086, + "grad_norm": 1.4269673355519676, + "learning_rate": 1.9116219253997655e-07, + "loss": 0.838142991065979, + "step": 7031 + }, + { + "epoch": 1.6202764976958526, + "grad_norm": 1.5286648636126694, + "learning_rate": 1.9093822063844623e-07, + "loss": 0.7681041359901428, + "step": 7032 + }, + { + "epoch": 1.6205069124423963, + "grad_norm": 1.1858134701081806, + "learning_rate": 1.907143661711621e-07, + "loss": 0.7179980278015137, + "step": 7033 + }, + { + "epoch": 1.62073732718894, + "grad_norm": 1.2400863874788628, + "learning_rate": 1.9049062917061609e-07, + "loss": 0.8688361644744873, + "step": 7034 + }, + { + "epoch": 1.620967741935484, + "grad_norm": 1.0795907835047491, + "learning_rate": 1.9026700966928388e-07, + "loss": 0.6540178656578064, + "step": 7035 + }, + { + "epoch": 1.6211981566820275, + "grad_norm": 0.9042431894176799, + "learning_rate": 1.900435076996233e-07, + "loss": 0.7834869623184204, + "step": 7036 + }, + { + "epoch": 1.6214285714285714, + "grad_norm": 1.4376571546925008, + "learning_rate": 1.8982012329407505e-07, + "loss": 0.8895971775054932, + "step": 7037 + }, + { + "epoch": 1.6216589861751152, + "grad_norm": 1.1211547009425467, + "learning_rate": 1.8959685648506362e-07, + "loss": 0.6625858545303345, + "step": 7038 + }, + { + "epoch": 1.621889400921659, + "grad_norm": 1.4181930826937483, + "learning_rate": 1.893737073049957e-07, + "loss": 0.651193380355835, + "step": 7039 + }, + { + "epoch": 1.6221198156682028, + "grad_norm": 1.49480203283565, + "learning_rate": 1.8915067578626065e-07, + "loss": 0.8716636896133423, + "step": 7040 + }, + { + "epoch": 1.6223502304147466, + "grad_norm": 1.2037531898880258, + "learning_rate": 1.8892776196123196e-07, + "loss": 0.812637984752655, + "step": 7041 + }, + { + "epoch": 1.6225806451612903, + "grad_norm": 1.4952425500537936, + "learning_rate": 1.887049658622648e-07, + "loss": 0.7803184986114502, + "step": 7042 + }, + { + "epoch": 1.6228110599078343, + "grad_norm": 1.4542796613479354, + "learning_rate": 1.8848228752169793e-07, + "loss": 0.7884814739227295, + "step": 7043 + }, + { + "epoch": 1.6230414746543778, + "grad_norm": 1.3474838088832628, + "learning_rate": 1.8825972697185265e-07, + "loss": 0.7250671982765198, + "step": 7044 + }, + { + "epoch": 1.6232718894009217, + "grad_norm": 1.2055929150487366, + "learning_rate": 1.880372842450332e-07, + "loss": 0.8078780174255371, + "step": 7045 + }, + { + "epoch": 1.6235023041474654, + "grad_norm": 1.2023825853188168, + "learning_rate": 1.878149593735272e-07, + "loss": 0.8523818254470825, + "step": 7046 + }, + { + "epoch": 1.6237327188940092, + "grad_norm": 1.2683431455334386, + "learning_rate": 1.875927523896047e-07, + "loss": 0.8772249221801758, + "step": 7047 + }, + { + "epoch": 1.6239631336405531, + "grad_norm": 1.0815338842817483, + "learning_rate": 1.8737066332551843e-07, + "loss": 0.7906323671340942, + "step": 7048 + }, + { + "epoch": 1.6241935483870966, + "grad_norm": 1.3048529080567755, + "learning_rate": 1.8714869221350492e-07, + "loss": 0.8010337352752686, + "step": 7049 + }, + { + "epoch": 1.6244239631336406, + "grad_norm": 1.365899691735964, + "learning_rate": 1.8692683908578267e-07, + "loss": 0.8978049755096436, + "step": 7050 + }, + { + "epoch": 1.6246543778801843, + "grad_norm": 1.159165616843268, + "learning_rate": 1.8670510397455297e-07, + "loss": 0.6622864007949829, + "step": 7051 + }, + { + "epoch": 1.624884792626728, + "grad_norm": 1.048079119212609, + "learning_rate": 1.8648348691200112e-07, + "loss": 0.7795406579971313, + "step": 7052 + }, + { + "epoch": 1.625115207373272, + "grad_norm": 1.2605630326093136, + "learning_rate": 1.8626198793029423e-07, + "loss": 0.9152054786682129, + "step": 7053 + }, + { + "epoch": 1.6253456221198157, + "grad_norm": 1.1757865506402991, + "learning_rate": 1.860406070615822e-07, + "loss": 0.719946563243866, + "step": 7054 + }, + { + "epoch": 1.6255760368663594, + "grad_norm": 1.2991129477224903, + "learning_rate": 1.8581934433799884e-07, + "loss": 0.782962441444397, + "step": 7055 + }, + { + "epoch": 1.6258064516129034, + "grad_norm": 1.118392005824248, + "learning_rate": 1.855981997916597e-07, + "loss": 0.8119732737541199, + "step": 7056 + }, + { + "epoch": 1.6260368663594469, + "grad_norm": 1.2362407544063627, + "learning_rate": 1.8537717345466351e-07, + "loss": 0.7585981488227844, + "step": 7057 + }, + { + "epoch": 1.6262672811059908, + "grad_norm": 1.158465388331893, + "learning_rate": 1.8515626535909258e-07, + "loss": 0.6846082210540771, + "step": 7058 + }, + { + "epoch": 1.6264976958525346, + "grad_norm": 1.230933966400155, + "learning_rate": 1.8493547553701083e-07, + "loss": 0.7355546951293945, + "step": 7059 + }, + { + "epoch": 1.6267281105990783, + "grad_norm": 1.15836260056471, + "learning_rate": 1.847148040204657e-07, + "loss": 0.6828340291976929, + "step": 7060 + }, + { + "epoch": 1.6269585253456222, + "grad_norm": 1.0499975056987365, + "learning_rate": 1.8449425084148763e-07, + "loss": 0.8513988256454468, + "step": 7061 + }, + { + "epoch": 1.6271889400921657, + "grad_norm": 1.0253802645646743, + "learning_rate": 1.8427381603208947e-07, + "loss": 0.6817762851715088, + "step": 7062 + }, + { + "epoch": 1.6274193548387097, + "grad_norm": 0.9793159138955572, + "learning_rate": 1.8405349962426699e-07, + "loss": 0.7314180731773376, + "step": 7063 + }, + { + "epoch": 1.6276497695852534, + "grad_norm": 1.326821994662743, + "learning_rate": 1.8383330164999898e-07, + "loss": 0.8193466663360596, + "step": 7064 + }, + { + "epoch": 1.6278801843317972, + "grad_norm": 1.2511428182189692, + "learning_rate": 1.8361322214124643e-07, + "loss": 0.7469823360443115, + "step": 7065 + }, + { + "epoch": 1.628110599078341, + "grad_norm": 1.4366505105110272, + "learning_rate": 1.8339326112995423e-07, + "loss": 0.8578816652297974, + "step": 7066 + }, + { + "epoch": 1.6283410138248848, + "grad_norm": 1.4615192025781363, + "learning_rate": 1.8317341864804903e-07, + "loss": 0.8384239077568054, + "step": 7067 + }, + { + "epoch": 1.6285714285714286, + "grad_norm": 1.122194991625306, + "learning_rate": 1.829536947274406e-07, + "loss": 0.8707646131515503, + "step": 7068 + }, + { + "epoch": 1.6288018433179725, + "grad_norm": 1.2319397578647793, + "learning_rate": 1.82734089400022e-07, + "loss": 0.6869943141937256, + "step": 7069 + }, + { + "epoch": 1.629032258064516, + "grad_norm": 1.3893487386527597, + "learning_rate": 1.8251460269766848e-07, + "loss": 0.7776129245758057, + "step": 7070 + }, + { + "epoch": 1.62926267281106, + "grad_norm": 1.104887091227765, + "learning_rate": 1.8229523465223785e-07, + "loss": 0.8126854300498962, + "step": 7071 + }, + { + "epoch": 1.6294930875576037, + "grad_norm": 1.0317016664034484, + "learning_rate": 1.8207598529557166e-07, + "loss": 0.6570720672607422, + "step": 7072 + }, + { + "epoch": 1.6297235023041474, + "grad_norm": 0.8859395443506812, + "learning_rate": 1.818568546594934e-07, + "loss": 0.6485599875450134, + "step": 7073 + }, + { + "epoch": 1.6299539170506914, + "grad_norm": 1.206554438869518, + "learning_rate": 1.816378427758093e-07, + "loss": 0.9132766723632812, + "step": 7074 + }, + { + "epoch": 1.6301843317972349, + "grad_norm": 1.4945592359199265, + "learning_rate": 1.8141894967630932e-07, + "loss": 0.8277286291122437, + "step": 7075 + }, + { + "epoch": 1.6304147465437788, + "grad_norm": 1.3670934774676884, + "learning_rate": 1.812001753927651e-07, + "loss": 0.7409358024597168, + "step": 7076 + }, + { + "epoch": 1.6306451612903226, + "grad_norm": 1.2664504423738472, + "learning_rate": 1.809815199569311e-07, + "loss": 0.8233339786529541, + "step": 7077 + }, + { + "epoch": 1.6308755760368663, + "grad_norm": 1.3727275296136565, + "learning_rate": 1.8076298340054563e-07, + "loss": 0.8704487085342407, + "step": 7078 + }, + { + "epoch": 1.6311059907834102, + "grad_norm": 1.503472652590263, + "learning_rate": 1.8054456575532862e-07, + "loss": 0.8845789432525635, + "step": 7079 + }, + { + "epoch": 1.631336405529954, + "grad_norm": 1.0523258046250148, + "learning_rate": 1.8032626705298272e-07, + "loss": 0.7241162061691284, + "step": 7080 + }, + { + "epoch": 1.6315668202764977, + "grad_norm": 1.193290512437584, + "learning_rate": 1.8010808732519433e-07, + "loss": 0.7065681219100952, + "step": 7081 + }, + { + "epoch": 1.6317972350230416, + "grad_norm": 1.281102564788521, + "learning_rate": 1.7989002660363162e-07, + "loss": 0.6492339372634888, + "step": 7082 + }, + { + "epoch": 1.6320276497695851, + "grad_norm": 0.9673694389198546, + "learning_rate": 1.79672084919946e-07, + "loss": 0.7089248895645142, + "step": 7083 + }, + { + "epoch": 1.632258064516129, + "grad_norm": 1.0367687290608978, + "learning_rate": 1.794542623057712e-07, + "loss": 0.7030316591262817, + "step": 7084 + }, + { + "epoch": 1.6324884792626728, + "grad_norm": 1.1008255373775855, + "learning_rate": 1.792365587927239e-07, + "loss": 0.8626528978347778, + "step": 7085 + }, + { + "epoch": 1.6327188940092165, + "grad_norm": 1.1079176271315754, + "learning_rate": 1.7901897441240333e-07, + "loss": 0.8468672037124634, + "step": 7086 + }, + { + "epoch": 1.6329493087557605, + "grad_norm": 1.4611904004596754, + "learning_rate": 1.7880150919639214e-07, + "loss": 0.8546739816665649, + "step": 7087 + }, + { + "epoch": 1.633179723502304, + "grad_norm": 1.1949871550520017, + "learning_rate": 1.7858416317625468e-07, + "loss": 0.9187895655632019, + "step": 7088 + }, + { + "epoch": 1.633410138248848, + "grad_norm": 1.077248232790752, + "learning_rate": 1.7836693638353827e-07, + "loss": 0.7496293783187866, + "step": 7089 + }, + { + "epoch": 1.6336405529953917, + "grad_norm": 1.0517765508552415, + "learning_rate": 1.7814982884977358e-07, + "loss": 0.682653546333313, + "step": 7090 + }, + { + "epoch": 1.6338709677419354, + "grad_norm": 1.5003665522833143, + "learning_rate": 1.7793284060647295e-07, + "loss": 0.8065551519393921, + "step": 7091 + }, + { + "epoch": 1.6341013824884794, + "grad_norm": 1.134711484772771, + "learning_rate": 1.7771597168513263e-07, + "loss": 0.6605588793754578, + "step": 7092 + }, + { + "epoch": 1.634331797235023, + "grad_norm": 1.0012250391371058, + "learning_rate": 1.7749922211723034e-07, + "loss": 0.7257254123687744, + "step": 7093 + }, + { + "epoch": 1.6345622119815668, + "grad_norm": 1.1831263140816395, + "learning_rate": 1.772825919342269e-07, + "loss": 0.7438890933990479, + "step": 7094 + }, + { + "epoch": 1.6347926267281108, + "grad_norm": 1.250595895627981, + "learning_rate": 1.770660811675664e-07, + "loss": 0.8546249866485596, + "step": 7095 + }, + { + "epoch": 1.6350230414746543, + "grad_norm": 1.1835928544530323, + "learning_rate": 1.7684968984867466e-07, + "loss": 0.727516770362854, + "step": 7096 + }, + { + "epoch": 1.6352534562211982, + "grad_norm": 1.36586374940823, + "learning_rate": 1.766334180089606e-07, + "loss": 0.7578408718109131, + "step": 7097 + }, + { + "epoch": 1.635483870967742, + "grad_norm": 1.4255838450352876, + "learning_rate": 1.7641726567981606e-07, + "loss": 0.8253650665283203, + "step": 7098 + }, + { + "epoch": 1.6357142857142857, + "grad_norm": 1.3615057524495244, + "learning_rate": 1.7620123289261523e-07, + "loss": 0.8932347297668457, + "step": 7099 + }, + { + "epoch": 1.6359447004608296, + "grad_norm": 1.0770953977682685, + "learning_rate": 1.7598531967871465e-07, + "loss": 0.6661143898963928, + "step": 7100 + }, + { + "epoch": 1.6361751152073731, + "grad_norm": 1.2408264386151553, + "learning_rate": 1.7576952606945415e-07, + "loss": 0.8413572311401367, + "step": 7101 + }, + { + "epoch": 1.636405529953917, + "grad_norm": 1.2084626250429713, + "learning_rate": 1.7555385209615603e-07, + "loss": 0.713816225528717, + "step": 7102 + }, + { + "epoch": 1.6366359447004608, + "grad_norm": 1.67339389064804, + "learning_rate": 1.7533829779012466e-07, + "loss": 0.8588179349899292, + "step": 7103 + }, + { + "epoch": 1.6368663594470045, + "grad_norm": 1.3521357251955939, + "learning_rate": 1.7512286318264778e-07, + "loss": 0.8666437864303589, + "step": 7104 + }, + { + "epoch": 1.6370967741935485, + "grad_norm": 1.340257158830322, + "learning_rate": 1.7490754830499522e-07, + "loss": 0.9219843745231628, + "step": 7105 + }, + { + "epoch": 1.6373271889400922, + "grad_norm": 1.3285275552241094, + "learning_rate": 1.7469235318841956e-07, + "loss": 0.93767249584198, + "step": 7106 + }, + { + "epoch": 1.637557603686636, + "grad_norm": 1.2782247944953928, + "learning_rate": 1.7447727786415644e-07, + "loss": 0.7317457795143127, + "step": 7107 + }, + { + "epoch": 1.6377880184331797, + "grad_norm": 1.1023935137429937, + "learning_rate": 1.7426232236342365e-07, + "loss": 0.850578784942627, + "step": 7108 + }, + { + "epoch": 1.6380184331797234, + "grad_norm": 1.1932749051362488, + "learning_rate": 1.7404748671742143e-07, + "loss": 0.7580707669258118, + "step": 7109 + }, + { + "epoch": 1.6382488479262673, + "grad_norm": 1.4967576950530754, + "learning_rate": 1.738327709573333e-07, + "loss": 0.8393806219100952, + "step": 7110 + }, + { + "epoch": 1.638479262672811, + "grad_norm": 1.0170127852420416, + "learning_rate": 1.7361817511432474e-07, + "loss": 0.6641673445701599, + "step": 7111 + }, + { + "epoch": 1.6387096774193548, + "grad_norm": 1.2746608671167614, + "learning_rate": 1.734036992195438e-07, + "loss": 0.7570137977600098, + "step": 7112 + }, + { + "epoch": 1.6389400921658988, + "grad_norm": 1.1366436885649456, + "learning_rate": 1.7318934330412194e-07, + "loss": 0.78557288646698, + "step": 7113 + }, + { + "epoch": 1.6391705069124423, + "grad_norm": 1.3443988626089514, + "learning_rate": 1.729751073991721e-07, + "loss": 0.8309692740440369, + "step": 7114 + }, + { + "epoch": 1.6394009216589862, + "grad_norm": 1.0791152795033432, + "learning_rate": 1.727609915357908e-07, + "loss": 0.6409872770309448, + "step": 7115 + }, + { + "epoch": 1.63963133640553, + "grad_norm": 1.0106967037974632, + "learning_rate": 1.7254699574505648e-07, + "loss": 0.7916153073310852, + "step": 7116 + }, + { + "epoch": 1.6398617511520737, + "grad_norm": 1.5121844712494004, + "learning_rate": 1.7233312005803015e-07, + "loss": 0.7925357818603516, + "step": 7117 + }, + { + "epoch": 1.6400921658986176, + "grad_norm": 1.5493448906965575, + "learning_rate": 1.7211936450575594e-07, + "loss": 0.9124211668968201, + "step": 7118 + }, + { + "epoch": 1.6403225806451613, + "grad_norm": 1.2418161556418856, + "learning_rate": 1.7190572911925994e-07, + "loss": 0.8905198574066162, + "step": 7119 + }, + { + "epoch": 1.640552995391705, + "grad_norm": 1.0755844253909046, + "learning_rate": 1.716922139295509e-07, + "loss": 0.8139728307723999, + "step": 7120 + }, + { + "epoch": 1.6407834101382488, + "grad_norm": 1.3621014779170746, + "learning_rate": 1.7147881896762074e-07, + "loss": 0.7607166767120361, + "step": 7121 + }, + { + "epoch": 1.6410138248847925, + "grad_norm": 1.282778120557478, + "learning_rate": 1.7126554426444316e-07, + "loss": 0.806864857673645, + "step": 7122 + }, + { + "epoch": 1.6412442396313365, + "grad_norm": 1.352241351446694, + "learning_rate": 1.710523898509747e-07, + "loss": 0.697334885597229, + "step": 7123 + }, + { + "epoch": 1.6414746543778802, + "grad_norm": 1.4205201103890581, + "learning_rate": 1.7083935575815455e-07, + "loss": 0.7313966751098633, + "step": 7124 + }, + { + "epoch": 1.641705069124424, + "grad_norm": 1.3868798260826238, + "learning_rate": 1.7062644201690413e-07, + "loss": 0.8857930898666382, + "step": 7125 + }, + { + "epoch": 1.6419354838709679, + "grad_norm": 1.0686783154078314, + "learning_rate": 1.7041364865812758e-07, + "loss": 0.7451884746551514, + "step": 7126 + }, + { + "epoch": 1.6421658986175114, + "grad_norm": 1.2220777026134708, + "learning_rate": 1.7020097571271186e-07, + "loss": 0.7023841142654419, + "step": 7127 + }, + { + "epoch": 1.6423963133640553, + "grad_norm": 1.2608302557028366, + "learning_rate": 1.6998842321152607e-07, + "loss": 0.708385705947876, + "step": 7128 + }, + { + "epoch": 1.642626728110599, + "grad_norm": 1.3854146642080662, + "learning_rate": 1.697759911854215e-07, + "loss": 0.7885474562644958, + "step": 7129 + }, + { + "epoch": 1.6428571428571428, + "grad_norm": 1.161295661131579, + "learning_rate": 1.695636796652331e-07, + "loss": 0.7054568529129028, + "step": 7130 + }, + { + "epoch": 1.6430875576036867, + "grad_norm": 1.1652742930387396, + "learning_rate": 1.6935148868177718e-07, + "loss": 0.6899726986885071, + "step": 7131 + }, + { + "epoch": 1.6433179723502302, + "grad_norm": 1.4011600897250127, + "learning_rate": 1.6913941826585288e-07, + "loss": 0.8558614253997803, + "step": 7132 + }, + { + "epoch": 1.6435483870967742, + "grad_norm": 1.2947217762783314, + "learning_rate": 1.6892746844824223e-07, + "loss": 0.7741858959197998, + "step": 7133 + }, + { + "epoch": 1.643778801843318, + "grad_norm": 1.130755528536183, + "learning_rate": 1.6871563925970943e-07, + "loss": 0.7332532405853271, + "step": 7134 + }, + { + "epoch": 1.6440092165898617, + "grad_norm": 1.4331915051670545, + "learning_rate": 1.6850393073100078e-07, + "loss": 0.8288085460662842, + "step": 7135 + }, + { + "epoch": 1.6442396313364056, + "grad_norm": 1.493040320153856, + "learning_rate": 1.682923428928461e-07, + "loss": 0.9470697641372681, + "step": 7136 + }, + { + "epoch": 1.6444700460829493, + "grad_norm": 1.1093535752232264, + "learning_rate": 1.6808087577595686e-07, + "loss": 0.7123041749000549, + "step": 7137 + }, + { + "epoch": 1.644700460829493, + "grad_norm": 1.3701909416221987, + "learning_rate": 1.6786952941102694e-07, + "loss": 0.8077690005302429, + "step": 7138 + }, + { + "epoch": 1.644930875576037, + "grad_norm": 1.3400770079054931, + "learning_rate": 1.6765830382873348e-07, + "loss": 0.767215371131897, + "step": 7139 + }, + { + "epoch": 1.6451612903225805, + "grad_norm": 1.3723903093182923, + "learning_rate": 1.6744719905973502e-07, + "loss": 0.7488540410995483, + "step": 7140 + }, + { + "epoch": 1.6453917050691245, + "grad_norm": 1.4546211260208752, + "learning_rate": 1.6723621513467378e-07, + "loss": 0.7841323018074036, + "step": 7141 + }, + { + "epoch": 1.6456221198156682, + "grad_norm": 1.2167195095267902, + "learning_rate": 1.6702535208417346e-07, + "loss": 0.65464186668396, + "step": 7142 + }, + { + "epoch": 1.645852534562212, + "grad_norm": 1.3347329400915569, + "learning_rate": 1.6681460993884056e-07, + "loss": 0.8845036029815674, + "step": 7143 + }, + { + "epoch": 1.6460829493087559, + "grad_norm": 1.3318983430245122, + "learning_rate": 1.6660398872926396e-07, + "loss": 0.6741687655448914, + "step": 7144 + }, + { + "epoch": 1.6463133640552994, + "grad_norm": 1.4438874912830426, + "learning_rate": 1.663934884860152e-07, + "loss": 0.8656717538833618, + "step": 7145 + }, + { + "epoch": 1.6465437788018433, + "grad_norm": 1.3298318800949103, + "learning_rate": 1.6618310923964785e-07, + "loss": 0.7588434219360352, + "step": 7146 + }, + { + "epoch": 1.646774193548387, + "grad_norm": 1.3262924093620256, + "learning_rate": 1.6597285102069846e-07, + "loss": 0.7180176973342896, + "step": 7147 + }, + { + "epoch": 1.6470046082949308, + "grad_norm": 1.2551409816382322, + "learning_rate": 1.6576271385968576e-07, + "loss": 0.8253776431083679, + "step": 7148 + }, + { + "epoch": 1.6472350230414747, + "grad_norm": 1.2281736040805922, + "learning_rate": 1.6555269778711046e-07, + "loss": 0.7200941443443298, + "step": 7149 + }, + { + "epoch": 1.6474654377880185, + "grad_norm": 1.1059198918963296, + "learning_rate": 1.653428028334567e-07, + "loss": 0.7076164484024048, + "step": 7150 + }, + { + "epoch": 1.6476958525345622, + "grad_norm": 1.195055160265343, + "learning_rate": 1.6513302902919003e-07, + "loss": 0.8068090677261353, + "step": 7151 + }, + { + "epoch": 1.6479262672811061, + "grad_norm": 1.3947857709427287, + "learning_rate": 1.6492337640475884e-07, + "loss": 0.9712029099464417, + "step": 7152 + }, + { + "epoch": 1.6481566820276496, + "grad_norm": 1.406808701456467, + "learning_rate": 1.6471384499059438e-07, + "loss": 0.8359737992286682, + "step": 7153 + }, + { + "epoch": 1.6483870967741936, + "grad_norm": 1.0570634795327605, + "learning_rate": 1.645044348171094e-07, + "loss": 0.8066359758377075, + "step": 7154 + }, + { + "epoch": 1.6486175115207373, + "grad_norm": 1.3810484659709985, + "learning_rate": 1.642951459146995e-07, + "loss": 0.8717833757400513, + "step": 7155 + }, + { + "epoch": 1.648847926267281, + "grad_norm": 1.0992736543757442, + "learning_rate": 1.6408597831374305e-07, + "loss": 0.7335910201072693, + "step": 7156 + }, + { + "epoch": 1.649078341013825, + "grad_norm": 1.2397456033121492, + "learning_rate": 1.6387693204460028e-07, + "loss": 0.816049337387085, + "step": 7157 + }, + { + "epoch": 1.6493087557603685, + "grad_norm": 1.4068842390673124, + "learning_rate": 1.6366800713761364e-07, + "loss": 0.8060640096664429, + "step": 7158 + }, + { + "epoch": 1.6495391705069125, + "grad_norm": 1.2074799471388065, + "learning_rate": 1.6345920362310894e-07, + "loss": 0.8477619886398315, + "step": 7159 + }, + { + "epoch": 1.6497695852534562, + "grad_norm": 1.332601091577715, + "learning_rate": 1.6325052153139329e-07, + "loss": 0.9793992638587952, + "step": 7160 + }, + { + "epoch": 1.65, + "grad_norm": 1.1909988829986036, + "learning_rate": 1.6304196089275658e-07, + "loss": 0.8020002245903015, + "step": 7161 + }, + { + "epoch": 1.6502304147465439, + "grad_norm": 1.3231428787162685, + "learning_rate": 1.6283352173747146e-07, + "loss": 0.8226429224014282, + "step": 7162 + }, + { + "epoch": 1.6504608294930876, + "grad_norm": 1.2483952861501775, + "learning_rate": 1.6262520409579227e-07, + "loss": 0.7029248476028442, + "step": 7163 + }, + { + "epoch": 1.6506912442396313, + "grad_norm": 1.0969129808942812, + "learning_rate": 1.6241700799795631e-07, + "loss": 0.7234015464782715, + "step": 7164 + }, + { + "epoch": 1.6509216589861753, + "grad_norm": 1.3383637969539028, + "learning_rate": 1.6220893347418285e-07, + "loss": 0.854112982749939, + "step": 7165 + }, + { + "epoch": 1.6511520737327188, + "grad_norm": 1.2277405230752314, + "learning_rate": 1.6200098055467325e-07, + "loss": 0.8098663091659546, + "step": 7166 + }, + { + "epoch": 1.6513824884792627, + "grad_norm": 1.286099874995443, + "learning_rate": 1.617931492696123e-07, + "loss": 0.9032876491546631, + "step": 7167 + }, + { + "epoch": 1.6516129032258065, + "grad_norm": 1.0239384348378415, + "learning_rate": 1.6158543964916606e-07, + "loss": 0.7048916816711426, + "step": 7168 + }, + { + "epoch": 1.6518433179723502, + "grad_norm": 1.2354879671689736, + "learning_rate": 1.6137785172348307e-07, + "loss": 0.879542350769043, + "step": 7169 + }, + { + "epoch": 1.6520737327188941, + "grad_norm": 1.1499858637392877, + "learning_rate": 1.611703855226949e-07, + "loss": 0.7851279377937317, + "step": 7170 + }, + { + "epoch": 1.6523041474654376, + "grad_norm": 1.3219595195357319, + "learning_rate": 1.6096304107691493e-07, + "loss": 0.779682457447052, + "step": 7171 + }, + { + "epoch": 1.6525345622119816, + "grad_norm": 1.2160096597693908, + "learning_rate": 1.6075581841623854e-07, + "loss": 0.7761027812957764, + "step": 7172 + }, + { + "epoch": 1.6527649769585253, + "grad_norm": 1.2474814185415584, + "learning_rate": 1.605487175707443e-07, + "loss": 0.726230263710022, + "step": 7173 + }, + { + "epoch": 1.652995391705069, + "grad_norm": 1.4211290590725025, + "learning_rate": 1.6034173857049238e-07, + "loss": 0.915956437587738, + "step": 7174 + }, + { + "epoch": 1.653225806451613, + "grad_norm": 1.2631109729400856, + "learning_rate": 1.6013488144552534e-07, + "loss": 0.8435969352722168, + "step": 7175 + }, + { + "epoch": 1.6534562211981567, + "grad_norm": 1.4370024530537882, + "learning_rate": 1.599281462258687e-07, + "loss": 0.7775791883468628, + "step": 7176 + }, + { + "epoch": 1.6536866359447004, + "grad_norm": 1.2504716465033257, + "learning_rate": 1.5972153294152945e-07, + "loss": 0.7578383684158325, + "step": 7177 + }, + { + "epoch": 1.6539170506912444, + "grad_norm": 1.25108951979748, + "learning_rate": 1.5951504162249706e-07, + "loss": 0.8378545045852661, + "step": 7178 + }, + { + "epoch": 1.654147465437788, + "grad_norm": 0.8833465476140244, + "learning_rate": 1.59308672298744e-07, + "loss": 0.7071488499641418, + "step": 7179 + }, + { + "epoch": 1.6543778801843319, + "grad_norm": 1.315489910714214, + "learning_rate": 1.591024250002243e-07, + "loss": 0.7424521446228027, + "step": 7180 + }, + { + "epoch": 1.6546082949308756, + "grad_norm": 1.2002526550771535, + "learning_rate": 1.5889629975687401e-07, + "loss": 0.6503180265426636, + "step": 7181 + }, + { + "epoch": 1.6548387096774193, + "grad_norm": 1.1861762089682637, + "learning_rate": 1.5869029659861265e-07, + "loss": 0.7589888572692871, + "step": 7182 + }, + { + "epoch": 1.6550691244239633, + "grad_norm": 1.2877948406073703, + "learning_rate": 1.5848441555534109e-07, + "loss": 0.7609498500823975, + "step": 7183 + }, + { + "epoch": 1.6552995391705068, + "grad_norm": 1.1756552735153392, + "learning_rate": 1.582786566569425e-07, + "loss": 0.7813476324081421, + "step": 7184 + }, + { + "epoch": 1.6555299539170507, + "grad_norm": 1.1595327374780875, + "learning_rate": 1.5807301993328258e-07, + "loss": 0.7386292219161987, + "step": 7185 + }, + { + "epoch": 1.6557603686635944, + "grad_norm": 1.4106740697965885, + "learning_rate": 1.5786750541420922e-07, + "loss": 1.0402865409851074, + "step": 7186 + }, + { + "epoch": 1.6559907834101382, + "grad_norm": 1.071897744375966, + "learning_rate": 1.5766211312955246e-07, + "loss": 0.7375132441520691, + "step": 7187 + }, + { + "epoch": 1.6562211981566821, + "grad_norm": 1.3721197645813625, + "learning_rate": 1.574568431091251e-07, + "loss": 0.7903615236282349, + "step": 7188 + }, + { + "epoch": 1.6564516129032258, + "grad_norm": 1.1205445704505106, + "learning_rate": 1.5725169538272132e-07, + "loss": 0.6912896633148193, + "step": 7189 + }, + { + "epoch": 1.6566820276497696, + "grad_norm": 1.2659829320834666, + "learning_rate": 1.570466699801185e-07, + "loss": 0.7181826233863831, + "step": 7190 + }, + { + "epoch": 1.6569124423963135, + "grad_norm": 1.3941328099536103, + "learning_rate": 1.5684176693107566e-07, + "loss": 0.8328898549079895, + "step": 7191 + }, + { + "epoch": 1.657142857142857, + "grad_norm": 1.275566962551196, + "learning_rate": 1.5663698626533384e-07, + "loss": 0.7775120735168457, + "step": 7192 + }, + { + "epoch": 1.657373271889401, + "grad_norm": 1.3683527646177032, + "learning_rate": 1.564323280126173e-07, + "loss": 0.8412137031555176, + "step": 7193 + }, + { + "epoch": 1.6576036866359447, + "grad_norm": 1.4192183215515342, + "learning_rate": 1.562277922026316e-07, + "loss": 0.7046825885772705, + "step": 7194 + }, + { + "epoch": 1.6578341013824884, + "grad_norm": 1.3386632639806328, + "learning_rate": 1.5602337886506468e-07, + "loss": 0.7107498645782471, + "step": 7195 + }, + { + "epoch": 1.6580645161290324, + "grad_norm": 1.1946522893092928, + "learning_rate": 1.558190880295872e-07, + "loss": 0.640724778175354, + "step": 7196 + }, + { + "epoch": 1.658294930875576, + "grad_norm": 1.3093502483074915, + "learning_rate": 1.556149197258515e-07, + "loss": 0.7856858968734741, + "step": 7197 + }, + { + "epoch": 1.6585253456221198, + "grad_norm": 1.4971129714340625, + "learning_rate": 1.554108739834923e-07, + "loss": 0.7956376075744629, + "step": 7198 + }, + { + "epoch": 1.6587557603686636, + "grad_norm": 1.2753834260169075, + "learning_rate": 1.5520695083212675e-07, + "loss": 0.721325159072876, + "step": 7199 + }, + { + "epoch": 1.6589861751152073, + "grad_norm": 1.060032555829029, + "learning_rate": 1.550031503013539e-07, + "loss": 0.7043335437774658, + "step": 7200 + }, + { + "epoch": 1.6592165898617512, + "grad_norm": 1.2269468216437214, + "learning_rate": 1.5479947242075496e-07, + "loss": 0.7154408693313599, + "step": 7201 + }, + { + "epoch": 1.659447004608295, + "grad_norm": 1.0598234159957265, + "learning_rate": 1.5459591721989397e-07, + "loss": 0.7353748083114624, + "step": 7202 + }, + { + "epoch": 1.6596774193548387, + "grad_norm": 1.1815091781809732, + "learning_rate": 1.5439248472831644e-07, + "loss": 0.7404372692108154, + "step": 7203 + }, + { + "epoch": 1.6599078341013827, + "grad_norm": 1.7521749620198364, + "learning_rate": 1.541891749755503e-07, + "loss": 0.8678613305091858, + "step": 7204 + }, + { + "epoch": 1.6601382488479262, + "grad_norm": 1.2663476960491773, + "learning_rate": 1.5398598799110562e-07, + "loss": 0.7177796363830566, + "step": 7205 + }, + { + "epoch": 1.66036866359447, + "grad_norm": 1.3475911636796425, + "learning_rate": 1.537829238044749e-07, + "loss": 0.7610895037651062, + "step": 7206 + }, + { + "epoch": 1.6605990783410138, + "grad_norm": 1.355013126121341, + "learning_rate": 1.5357998244513227e-07, + "loss": 0.7340127825737, + "step": 7207 + }, + { + "epoch": 1.6608294930875576, + "grad_norm": 1.2008253519594887, + "learning_rate": 1.5337716394253498e-07, + "loss": 0.7060200572013855, + "step": 7208 + }, + { + "epoch": 1.6610599078341015, + "grad_norm": 1.306554098336219, + "learning_rate": 1.5317446832612147e-07, + "loss": 0.8592087030410767, + "step": 7209 + }, + { + "epoch": 1.661290322580645, + "grad_norm": 1.1630740877062444, + "learning_rate": 1.5297189562531264e-07, + "loss": 0.8687897324562073, + "step": 7210 + }, + { + "epoch": 1.661520737327189, + "grad_norm": 1.346256802747815, + "learning_rate": 1.5276944586951202e-07, + "loss": 0.8158563375473022, + "step": 7211 + }, + { + "epoch": 1.6617511520737327, + "grad_norm": 1.2436624388230366, + "learning_rate": 1.5256711908810482e-07, + "loss": 0.7734059691429138, + "step": 7212 + }, + { + "epoch": 1.6619815668202764, + "grad_norm": 1.4006583359216147, + "learning_rate": 1.5236491531045815e-07, + "loss": 0.8302994966506958, + "step": 7213 + }, + { + "epoch": 1.6622119815668204, + "grad_norm": 1.3250021353738068, + "learning_rate": 1.5216283456592216e-07, + "loss": 0.8474830389022827, + "step": 7214 + }, + { + "epoch": 1.662442396313364, + "grad_norm": 1.130266104375724, + "learning_rate": 1.5196087688382808e-07, + "loss": 0.7903469800949097, + "step": 7215 + }, + { + "epoch": 1.6626728110599078, + "grad_norm": 1.1131412296095682, + "learning_rate": 1.5175904229349035e-07, + "loss": 0.7756912708282471, + "step": 7216 + }, + { + "epoch": 1.6629032258064518, + "grad_norm": 1.4164367883683733, + "learning_rate": 1.5155733082420463e-07, + "loss": 0.7495905756950378, + "step": 7217 + }, + { + "epoch": 1.6631336405529953, + "grad_norm": 1.3394708776746769, + "learning_rate": 1.5135574250524897e-07, + "loss": 0.8536649942398071, + "step": 7218 + }, + { + "epoch": 1.6633640552995392, + "grad_norm": 1.3243776315844114, + "learning_rate": 1.5115427736588404e-07, + "loss": 0.7301580905914307, + "step": 7219 + }, + { + "epoch": 1.663594470046083, + "grad_norm": 1.324768351380299, + "learning_rate": 1.5095293543535203e-07, + "loss": 0.7131164073944092, + "step": 7220 + }, + { + "epoch": 1.6638248847926267, + "grad_norm": 1.0897989875613177, + "learning_rate": 1.5075171674287712e-07, + "loss": 0.708457350730896, + "step": 7221 + }, + { + "epoch": 1.6640552995391706, + "grad_norm": 1.402833248483696, + "learning_rate": 1.5055062131766662e-07, + "loss": 0.7509758472442627, + "step": 7222 + }, + { + "epoch": 1.6642857142857141, + "grad_norm": 1.1455053593625757, + "learning_rate": 1.503496491889089e-07, + "loss": 0.8401786088943481, + "step": 7223 + }, + { + "epoch": 1.664516129032258, + "grad_norm": 1.3755379329147759, + "learning_rate": 1.5014880038577482e-07, + "loss": 0.8578320741653442, + "step": 7224 + }, + { + "epoch": 1.6647465437788018, + "grad_norm": 1.0530962657504686, + "learning_rate": 1.4994807493741723e-07, + "loss": 0.6890276670455933, + "step": 7225 + }, + { + "epoch": 1.6649769585253456, + "grad_norm": 1.1705604667481366, + "learning_rate": 1.4974747287297128e-07, + "loss": 0.785246729850769, + "step": 7226 + }, + { + "epoch": 1.6652073732718895, + "grad_norm": 1.1145207566800768, + "learning_rate": 1.4954699422155382e-07, + "loss": 0.7826062440872192, + "step": 7227 + }, + { + "epoch": 1.6654377880184332, + "grad_norm": 1.392497287743248, + "learning_rate": 1.4934663901226452e-07, + "loss": 0.807513952255249, + "step": 7228 + }, + { + "epoch": 1.665668202764977, + "grad_norm": 1.0951466978132682, + "learning_rate": 1.4914640727418448e-07, + "loss": 0.8138872385025024, + "step": 7229 + }, + { + "epoch": 1.6658986175115207, + "grad_norm": 1.0721150835685114, + "learning_rate": 1.489462990363768e-07, + "loss": 0.8465121984481812, + "step": 7230 + }, + { + "epoch": 1.6661290322580644, + "grad_norm": 1.2125852838751665, + "learning_rate": 1.4874631432788743e-07, + "loss": 0.7649251222610474, + "step": 7231 + }, + { + "epoch": 1.6663594470046084, + "grad_norm": 1.242983952838099, + "learning_rate": 1.485464531777436e-07, + "loss": 0.8297271132469177, + "step": 7232 + }, + { + "epoch": 1.666589861751152, + "grad_norm": 1.4592304164798606, + "learning_rate": 1.483467156149546e-07, + "loss": 0.7873194217681885, + "step": 7233 + }, + { + "epoch": 1.6668202764976958, + "grad_norm": 1.1529440121296932, + "learning_rate": 1.4814710166851274e-07, + "loss": 0.6924761533737183, + "step": 7234 + }, + { + "epoch": 1.6670506912442398, + "grad_norm": 0.9776015930659686, + "learning_rate": 1.4794761136739132e-07, + "loss": 0.6600887179374695, + "step": 7235 + }, + { + "epoch": 1.6672811059907833, + "grad_norm": 1.0700715817274216, + "learning_rate": 1.477482447405458e-07, + "loss": 0.6552041172981262, + "step": 7236 + }, + { + "epoch": 1.6675115207373272, + "grad_norm": 1.1844260959064823, + "learning_rate": 1.4754900181691465e-07, + "loss": 0.8609327077865601, + "step": 7237 + }, + { + "epoch": 1.667741935483871, + "grad_norm": 0.9877698580103615, + "learning_rate": 1.4734988262541726e-07, + "loss": 0.6970123052597046, + "step": 7238 + }, + { + "epoch": 1.6679723502304147, + "grad_norm": 1.1422057607025191, + "learning_rate": 1.4715088719495573e-07, + "loss": 0.7859683036804199, + "step": 7239 + }, + { + "epoch": 1.6682027649769586, + "grad_norm": 1.102405207717508, + "learning_rate": 1.4695201555441393e-07, + "loss": 0.7448029518127441, + "step": 7240 + }, + { + "epoch": 1.6684331797235024, + "grad_norm": 1.136418636365662, + "learning_rate": 1.4675326773265762e-07, + "loss": 0.7566728591918945, + "step": 7241 + }, + { + "epoch": 1.668663594470046, + "grad_norm": 1.183347797545015, + "learning_rate": 1.465546437585351e-07, + "loss": 0.7563366889953613, + "step": 7242 + }, + { + "epoch": 1.6688940092165898, + "grad_norm": 1.2270668729431573, + "learning_rate": 1.4635614366087623e-07, + "loss": 0.8580834865570068, + "step": 7243 + }, + { + "epoch": 1.6691244239631335, + "grad_norm": 1.261588467565845, + "learning_rate": 1.4615776746849306e-07, + "loss": 0.6200178861618042, + "step": 7244 + }, + { + "epoch": 1.6693548387096775, + "grad_norm": 1.12353329539602, + "learning_rate": 1.4595951521017958e-07, + "loss": 0.8052491545677185, + "step": 7245 + }, + { + "epoch": 1.6695852534562212, + "grad_norm": 1.7485044689788691, + "learning_rate": 1.4576138691471186e-07, + "loss": 0.7383530735969543, + "step": 7246 + }, + { + "epoch": 1.669815668202765, + "grad_norm": 1.2061617795996018, + "learning_rate": 1.4556338261084776e-07, + "loss": 0.6735742092132568, + "step": 7247 + }, + { + "epoch": 1.670046082949309, + "grad_norm": 1.1671720957777614, + "learning_rate": 1.453655023273277e-07, + "loss": 0.7570016980171204, + "step": 7248 + }, + { + "epoch": 1.6702764976958524, + "grad_norm": 1.1212050061324152, + "learning_rate": 1.4516774609287364e-07, + "loss": 0.7271980047225952, + "step": 7249 + }, + { + "epoch": 1.6705069124423964, + "grad_norm": 1.3773952001351246, + "learning_rate": 1.449701139361894e-07, + "loss": 0.8567354083061218, + "step": 7250 + }, + { + "epoch": 1.67073732718894, + "grad_norm": 1.4372041287717652, + "learning_rate": 1.447726058859614e-07, + "loss": 0.8675428628921509, + "step": 7251 + }, + { + "epoch": 1.6709677419354838, + "grad_norm": 1.6475511282046704, + "learning_rate": 1.4457522197085748e-07, + "loss": 0.9131098389625549, + "step": 7252 + }, + { + "epoch": 1.6711981566820278, + "grad_norm": 0.9228526790942371, + "learning_rate": 1.4437796221952748e-07, + "loss": 0.7921037673950195, + "step": 7253 + }, + { + "epoch": 1.6714285714285713, + "grad_norm": 1.3314958050470875, + "learning_rate": 1.441808266606037e-07, + "loss": 0.7559863328933716, + "step": 7254 + }, + { + "epoch": 1.6716589861751152, + "grad_norm": 1.4253402064070324, + "learning_rate": 1.4398381532269998e-07, + "loss": 0.7433857917785645, + "step": 7255 + }, + { + "epoch": 1.671889400921659, + "grad_norm": 1.340982715064525, + "learning_rate": 1.4378692823441207e-07, + "loss": 0.8171184062957764, + "step": 7256 + }, + { + "epoch": 1.6721198156682027, + "grad_norm": 1.4295893582001031, + "learning_rate": 1.4359016542431824e-07, + "loss": 0.7296291589736938, + "step": 7257 + }, + { + "epoch": 1.6723502304147466, + "grad_norm": 1.1566282275472088, + "learning_rate": 1.4339352692097828e-07, + "loss": 0.7397829294204712, + "step": 7258 + }, + { + "epoch": 1.6725806451612903, + "grad_norm": 1.1030928795639288, + "learning_rate": 1.431970127529335e-07, + "loss": 0.6724194884300232, + "step": 7259 + }, + { + "epoch": 1.672811059907834, + "grad_norm": 1.266832602935082, + "learning_rate": 1.430006229487084e-07, + "loss": 0.7711449861526489, + "step": 7260 + }, + { + "epoch": 1.673041474654378, + "grad_norm": 1.0334522746934713, + "learning_rate": 1.428043575368083e-07, + "loss": 0.7581815719604492, + "step": 7261 + }, + { + "epoch": 1.6732718894009215, + "grad_norm": 1.2775574658714877, + "learning_rate": 1.4260821654572063e-07, + "loss": 0.7092517614364624, + "step": 7262 + }, + { + "epoch": 1.6735023041474655, + "grad_norm": 1.116987885688497, + "learning_rate": 1.4241220000391562e-07, + "loss": 0.646745502948761, + "step": 7263 + }, + { + "epoch": 1.6737327188940092, + "grad_norm": 1.0897996116307995, + "learning_rate": 1.4221630793984453e-07, + "loss": 0.7364122867584229, + "step": 7264 + }, + { + "epoch": 1.673963133640553, + "grad_norm": 1.0366138580080708, + "learning_rate": 1.4202054038194068e-07, + "loss": 0.8186795711517334, + "step": 7265 + }, + { + "epoch": 1.6741935483870969, + "grad_norm": 1.178861697439358, + "learning_rate": 1.4182489735861957e-07, + "loss": 0.7172378301620483, + "step": 7266 + }, + { + "epoch": 1.6744239631336404, + "grad_norm": 1.6433299949580555, + "learning_rate": 1.416293788982783e-07, + "loss": 0.8780974745750427, + "step": 7267 + }, + { + "epoch": 1.6746543778801843, + "grad_norm": 1.303060213158533, + "learning_rate": 1.4143398502929672e-07, + "loss": 0.9034930467605591, + "step": 7268 + }, + { + "epoch": 1.674884792626728, + "grad_norm": 1.283952582595571, + "learning_rate": 1.4123871578003543e-07, + "loss": 0.7994415760040283, + "step": 7269 + }, + { + "epoch": 1.6751152073732718, + "grad_norm": 1.2332939563797212, + "learning_rate": 1.410435711788376e-07, + "loss": 0.8327854871749878, + "step": 7270 + }, + { + "epoch": 1.6753456221198157, + "grad_norm": 1.3516689374751454, + "learning_rate": 1.408485512540285e-07, + "loss": 0.7667550444602966, + "step": 7271 + }, + { + "epoch": 1.6755760368663595, + "grad_norm": 1.3721126007283877, + "learning_rate": 1.4065365603391478e-07, + "loss": 0.8073924779891968, + "step": 7272 + }, + { + "epoch": 1.6758064516129032, + "grad_norm": 1.2537292403097655, + "learning_rate": 1.4045888554678497e-07, + "loss": 0.7265589237213135, + "step": 7273 + }, + { + "epoch": 1.6760368663594472, + "grad_norm": 1.4008103355507637, + "learning_rate": 1.402642398209104e-07, + "loss": 0.6912035942077637, + "step": 7274 + }, + { + "epoch": 1.6762672811059907, + "grad_norm": 1.4159985968960598, + "learning_rate": 1.400697188845432e-07, + "loss": 0.917953372001648, + "step": 7275 + }, + { + "epoch": 1.6764976958525346, + "grad_norm": 1.1092123664048492, + "learning_rate": 1.3987532276591774e-07, + "loss": 0.6989340782165527, + "step": 7276 + }, + { + "epoch": 1.6767281105990783, + "grad_norm": 1.0530722269060104, + "learning_rate": 1.396810514932507e-07, + "loss": 0.6648346185684204, + "step": 7277 + }, + { + "epoch": 1.676958525345622, + "grad_norm": 1.152242717428616, + "learning_rate": 1.3948690509474014e-07, + "loss": 0.6462730169296265, + "step": 7278 + }, + { + "epoch": 1.677188940092166, + "grad_norm": 1.0559078213581141, + "learning_rate": 1.3929288359856584e-07, + "loss": 0.6084051132202148, + "step": 7279 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 1.2568155531692753, + "learning_rate": 1.3909898703289037e-07, + "loss": 0.8593035936355591, + "step": 7280 + }, + { + "epoch": 1.6776497695852535, + "grad_norm": 1.432799112874992, + "learning_rate": 1.389052154258572e-07, + "loss": 0.8064925670623779, + "step": 7281 + }, + { + "epoch": 1.6778801843317972, + "grad_norm": 1.3257643730794528, + "learning_rate": 1.3871156880559186e-07, + "loss": 0.7366064786911011, + "step": 7282 + }, + { + "epoch": 1.678110599078341, + "grad_norm": 1.4541745835743052, + "learning_rate": 1.3851804720020233e-07, + "loss": 0.8090124726295471, + "step": 7283 + }, + { + "epoch": 1.6783410138248849, + "grad_norm": 1.3768572400260246, + "learning_rate": 1.3832465063777787e-07, + "loss": 0.7326936721801758, + "step": 7284 + }, + { + "epoch": 1.6785714285714286, + "grad_norm": 1.1036181265329146, + "learning_rate": 1.3813137914638961e-07, + "loss": 0.7142004370689392, + "step": 7285 + }, + { + "epoch": 1.6788018433179723, + "grad_norm": 1.1850699819171153, + "learning_rate": 1.3793823275409066e-07, + "loss": 0.8358181715011597, + "step": 7286 + }, + { + "epoch": 1.6790322580645163, + "grad_norm": 1.341055264970921, + "learning_rate": 1.3774521148891583e-07, + "loss": 0.7337081432342529, + "step": 7287 + }, + { + "epoch": 1.6792626728110598, + "grad_norm": 1.079298746666331, + "learning_rate": 1.3755231537888222e-07, + "loss": 0.8029334545135498, + "step": 7288 + }, + { + "epoch": 1.6794930875576037, + "grad_norm": 1.1362422930327392, + "learning_rate": 1.373595444519884e-07, + "loss": 0.8132611513137817, + "step": 7289 + }, + { + "epoch": 1.6797235023041475, + "grad_norm": 1.2850987320352512, + "learning_rate": 1.3716689873621446e-07, + "loss": 0.7377278804779053, + "step": 7290 + }, + { + "epoch": 1.6799539170506912, + "grad_norm": 1.5545938019119256, + "learning_rate": 1.3697437825952307e-07, + "loss": 0.788368284702301, + "step": 7291 + }, + { + "epoch": 1.6801843317972351, + "grad_norm": 1.3811107908360538, + "learning_rate": 1.3678198304985822e-07, + "loss": 0.8288586139678955, + "step": 7292 + }, + { + "epoch": 1.6804147465437786, + "grad_norm": 1.2973962244733976, + "learning_rate": 1.3658971313514567e-07, + "loss": 0.8534054160118103, + "step": 7293 + }, + { + "epoch": 1.6806451612903226, + "grad_norm": 1.261356018830994, + "learning_rate": 1.363975685432933e-07, + "loss": 0.8730596303939819, + "step": 7294 + }, + { + "epoch": 1.6808755760368663, + "grad_norm": 1.2262296688166254, + "learning_rate": 1.3620554930219076e-07, + "loss": 0.6891343593597412, + "step": 7295 + }, + { + "epoch": 1.68110599078341, + "grad_norm": 1.4944659665191207, + "learning_rate": 1.360136554397089e-07, + "loss": 0.8575270175933838, + "step": 7296 + }, + { + "epoch": 1.681336405529954, + "grad_norm": 1.1221716147697696, + "learning_rate": 1.3582188698370134e-07, + "loss": 0.82694011926651, + "step": 7297 + }, + { + "epoch": 1.6815668202764977, + "grad_norm": 1.1921152491764102, + "learning_rate": 1.3563024396200296e-07, + "loss": 0.6468113660812378, + "step": 7298 + }, + { + "epoch": 1.6817972350230415, + "grad_norm": 1.1634380991195066, + "learning_rate": 1.3543872640243016e-07, + "loss": 0.6818577647209167, + "step": 7299 + }, + { + "epoch": 1.6820276497695854, + "grad_norm": 1.262155726089824, + "learning_rate": 1.352473343327819e-07, + "loss": 0.7630767822265625, + "step": 7300 + }, + { + "epoch": 1.682258064516129, + "grad_norm": 1.3348546512512276, + "learning_rate": 1.3505606778083832e-07, + "loss": 0.9019678831100464, + "step": 7301 + }, + { + "epoch": 1.6824884792626729, + "grad_norm": 1.1302876731614566, + "learning_rate": 1.3486492677436123e-07, + "loss": 0.821324348449707, + "step": 7302 + }, + { + "epoch": 1.6827188940092166, + "grad_norm": 1.1997119452659193, + "learning_rate": 1.3467391134109495e-07, + "loss": 0.796151876449585, + "step": 7303 + }, + { + "epoch": 1.6829493087557603, + "grad_norm": 1.298615109914031, + "learning_rate": 1.3448302150876488e-07, + "loss": 0.8020445108413696, + "step": 7304 + }, + { + "epoch": 1.6831797235023043, + "grad_norm": 0.9490183941784253, + "learning_rate": 1.3429225730507843e-07, + "loss": 0.7215749025344849, + "step": 7305 + }, + { + "epoch": 1.6834101382488478, + "grad_norm": 1.2708231250445967, + "learning_rate": 1.3410161875772474e-07, + "loss": 0.920941174030304, + "step": 7306 + }, + { + "epoch": 1.6836405529953917, + "grad_norm": 1.4523260098562263, + "learning_rate": 1.3391110589437494e-07, + "loss": 0.8979494571685791, + "step": 7307 + }, + { + "epoch": 1.6838709677419355, + "grad_norm": 1.3126261706157987, + "learning_rate": 1.337207187426812e-07, + "loss": 0.9125145673751831, + "step": 7308 + }, + { + "epoch": 1.6841013824884792, + "grad_norm": 1.1179697975279568, + "learning_rate": 1.3353045733027858e-07, + "loss": 0.8205714225769043, + "step": 7309 + }, + { + "epoch": 1.6843317972350231, + "grad_norm": 1.0993805126125902, + "learning_rate": 1.3334032168478305e-07, + "loss": 0.6914113759994507, + "step": 7310 + }, + { + "epoch": 1.6845622119815669, + "grad_norm": 1.3165472089957067, + "learning_rate": 1.3315031183379233e-07, + "loss": 0.7355014085769653, + "step": 7311 + }, + { + "epoch": 1.6847926267281106, + "grad_norm": 1.3581792517836289, + "learning_rate": 1.3296042780488637e-07, + "loss": 0.7564182281494141, + "step": 7312 + }, + { + "epoch": 1.6850230414746545, + "grad_norm": 1.197316556809727, + "learning_rate": 1.3277066962562643e-07, + "loss": 0.8091372847557068, + "step": 7313 + }, + { + "epoch": 1.685253456221198, + "grad_norm": 1.131878643977171, + "learning_rate": 1.3258103732355586e-07, + "loss": 0.7457877993583679, + "step": 7314 + }, + { + "epoch": 1.685483870967742, + "grad_norm": 1.2462081986852567, + "learning_rate": 1.3239153092619948e-07, + "loss": 0.861819863319397, + "step": 7315 + }, + { + "epoch": 1.6857142857142857, + "grad_norm": 1.2291218741883772, + "learning_rate": 1.3220215046106353e-07, + "loss": 0.7698357105255127, + "step": 7316 + }, + { + "epoch": 1.6859447004608294, + "grad_norm": 1.2862793081172317, + "learning_rate": 1.320128959556369e-07, + "loss": 0.7889456152915955, + "step": 7317 + }, + { + "epoch": 1.6861751152073734, + "grad_norm": 1.0926817497008894, + "learning_rate": 1.3182376743738932e-07, + "loss": 0.6467938423156738, + "step": 7318 + }, + { + "epoch": 1.686405529953917, + "grad_norm": 0.962046315570081, + "learning_rate": 1.3163476493377245e-07, + "loss": 0.7202441692352295, + "step": 7319 + }, + { + "epoch": 1.6866359447004609, + "grad_norm": 1.2860571238613498, + "learning_rate": 1.3144588847222004e-07, + "loss": 0.7464008331298828, + "step": 7320 + }, + { + "epoch": 1.6868663594470046, + "grad_norm": 1.3323127704795366, + "learning_rate": 1.3125713808014704e-07, + "loss": 0.8924611806869507, + "step": 7321 + }, + { + "epoch": 1.6870967741935483, + "grad_norm": 1.5027995023789942, + "learning_rate": 1.3106851378495044e-07, + "loss": 0.6943146586418152, + "step": 7322 + }, + { + "epoch": 1.6873271889400923, + "grad_norm": 1.336362656918588, + "learning_rate": 1.308800156140085e-07, + "loss": 0.7335963249206543, + "step": 7323 + }, + { + "epoch": 1.687557603686636, + "grad_norm": 1.1540515039280186, + "learning_rate": 1.30691643594682e-07, + "loss": 0.6900516748428345, + "step": 7324 + }, + { + "epoch": 1.6877880184331797, + "grad_norm": 1.0161083273097216, + "learning_rate": 1.3050339775431262e-07, + "loss": 0.7230286598205566, + "step": 7325 + }, + { + "epoch": 1.6880184331797237, + "grad_norm": 1.3577939883495977, + "learning_rate": 1.3031527812022403e-07, + "loss": 0.8069840669631958, + "step": 7326 + }, + { + "epoch": 1.6882488479262672, + "grad_norm": 1.1850570268151976, + "learning_rate": 1.3012728471972134e-07, + "loss": 0.7598710060119629, + "step": 7327 + }, + { + "epoch": 1.6884792626728111, + "grad_norm": 1.1081098309526143, + "learning_rate": 1.2993941758009164e-07, + "loss": 0.6817609071731567, + "step": 7328 + }, + { + "epoch": 1.6887096774193548, + "grad_norm": 1.1578322948538884, + "learning_rate": 1.2975167672860387e-07, + "loss": 0.6958975791931152, + "step": 7329 + }, + { + "epoch": 1.6889400921658986, + "grad_norm": 1.3026010781309694, + "learning_rate": 1.2956406219250814e-07, + "loss": 0.8270853757858276, + "step": 7330 + }, + { + "epoch": 1.6891705069124425, + "grad_norm": 1.2716142402347783, + "learning_rate": 1.2937657399903623e-07, + "loss": 0.8045610189437866, + "step": 7331 + }, + { + "epoch": 1.689400921658986, + "grad_norm": 1.3670021400758372, + "learning_rate": 1.2918921217540224e-07, + "loss": 0.6685627698898315, + "step": 7332 + }, + { + "epoch": 1.68963133640553, + "grad_norm": 1.481483528763015, + "learning_rate": 1.2900197674880142e-07, + "loss": 0.8157398700714111, + "step": 7333 + }, + { + "epoch": 1.6898617511520737, + "grad_norm": 1.1922253618562, + "learning_rate": 1.2881486774641025e-07, + "loss": 0.6142218112945557, + "step": 7334 + }, + { + "epoch": 1.6900921658986174, + "grad_norm": 1.2611165552955415, + "learning_rate": 1.2862788519538815e-07, + "loss": 0.7849327921867371, + "step": 7335 + }, + { + "epoch": 1.6903225806451614, + "grad_norm": 1.3074701765125263, + "learning_rate": 1.2844102912287457e-07, + "loss": 0.8035926818847656, + "step": 7336 + }, + { + "epoch": 1.6905529953917051, + "grad_norm": 1.26449405816571, + "learning_rate": 1.2825429955599209e-07, + "loss": 0.8456575870513916, + "step": 7337 + }, + { + "epoch": 1.6907834101382488, + "grad_norm": 1.0994096629111347, + "learning_rate": 1.2806769652184402e-07, + "loss": 0.7436026334762573, + "step": 7338 + }, + { + "epoch": 1.6910138248847926, + "grad_norm": 1.3946687886072922, + "learning_rate": 1.2788122004751522e-07, + "loss": 0.8315454721450806, + "step": 7339 + }, + { + "epoch": 1.6912442396313363, + "grad_norm": 1.1032652805797263, + "learning_rate": 1.2769487016007307e-07, + "loss": 0.7425665855407715, + "step": 7340 + }, + { + "epoch": 1.6914746543778802, + "grad_norm": 1.210532059455236, + "learning_rate": 1.2750864688656572e-07, + "loss": 0.7899731993675232, + "step": 7341 + }, + { + "epoch": 1.691705069124424, + "grad_norm": 1.2339006903630358, + "learning_rate": 1.2732255025402327e-07, + "loss": 0.7637509703636169, + "step": 7342 + }, + { + "epoch": 1.6919354838709677, + "grad_norm": 1.2301886439270189, + "learning_rate": 1.2713658028945717e-07, + "loss": 0.793779730796814, + "step": 7343 + }, + { + "epoch": 1.6921658986175117, + "grad_norm": 1.2351914671209905, + "learning_rate": 1.2695073701986103e-07, + "loss": 0.7248083353042603, + "step": 7344 + }, + { + "epoch": 1.6923963133640552, + "grad_norm": 1.4318296651769333, + "learning_rate": 1.2676502047220973e-07, + "loss": 0.7506270408630371, + "step": 7345 + }, + { + "epoch": 1.692626728110599, + "grad_norm": 1.248314789497465, + "learning_rate": 1.2657943067345965e-07, + "loss": 0.7921839952468872, + "step": 7346 + }, + { + "epoch": 1.6928571428571428, + "grad_norm": 0.9630256947791611, + "learning_rate": 1.263939676505491e-07, + "loss": 0.7627893686294556, + "step": 7347 + }, + { + "epoch": 1.6930875576036866, + "grad_norm": 1.039168896728356, + "learning_rate": 1.262086314303973e-07, + "loss": 0.788955807685852, + "step": 7348 + }, + { + "epoch": 1.6933179723502305, + "grad_norm": 1.0370858136190912, + "learning_rate": 1.2602342203990612e-07, + "loss": 0.5527241826057434, + "step": 7349 + }, + { + "epoch": 1.6935483870967742, + "grad_norm": 1.344465363325951, + "learning_rate": 1.2583833950595825e-07, + "loss": 0.7324573397636414, + "step": 7350 + }, + { + "epoch": 1.693778801843318, + "grad_norm": 1.0731663336898336, + "learning_rate": 1.256533838554179e-07, + "loss": 0.6588207483291626, + "step": 7351 + }, + { + "epoch": 1.6940092165898617, + "grad_norm": 1.417078203000081, + "learning_rate": 1.2546855511513165e-07, + "loss": 0.7597184181213379, + "step": 7352 + }, + { + "epoch": 1.6942396313364054, + "grad_norm": 1.1748568881342167, + "learning_rate": 1.2528385331192692e-07, + "loss": 0.7487671375274658, + "step": 7353 + }, + { + "epoch": 1.6944700460829494, + "grad_norm": 1.0203340332958148, + "learning_rate": 1.250992784726126e-07, + "loss": 0.757739245891571, + "step": 7354 + }, + { + "epoch": 1.694700460829493, + "grad_norm": 1.314521719717035, + "learning_rate": 1.249148306239801e-07, + "loss": 0.616966724395752, + "step": 7355 + }, + { + "epoch": 1.6949308755760368, + "grad_norm": 1.506626916778979, + "learning_rate": 1.2473050979280142e-07, + "loss": 0.9415719509124756, + "step": 7356 + }, + { + "epoch": 1.6951612903225808, + "grad_norm": 1.0903568482188648, + "learning_rate": 1.2454631600583044e-07, + "loss": 0.7731447815895081, + "step": 7357 + }, + { + "epoch": 1.6953917050691243, + "grad_norm": 1.2821570786422227, + "learning_rate": 1.2436224928980276e-07, + "loss": 0.800236701965332, + "step": 7358 + }, + { + "epoch": 1.6956221198156682, + "grad_norm": 1.2900334463062004, + "learning_rate": 1.241783096714356e-07, + "loss": 0.8113845586776733, + "step": 7359 + }, + { + "epoch": 1.695852534562212, + "grad_norm": 1.2157051726485628, + "learning_rate": 1.2399449717742706e-07, + "loss": 0.748763382434845, + "step": 7360 + }, + { + "epoch": 1.6960829493087557, + "grad_norm": 1.3769466349570898, + "learning_rate": 1.2381081183445774e-07, + "loss": 0.8595450520515442, + "step": 7361 + }, + { + "epoch": 1.6963133640552996, + "grad_norm": 1.240341465296028, + "learning_rate": 1.2362725366918913e-07, + "loss": 0.7800960540771484, + "step": 7362 + }, + { + "epoch": 1.6965437788018434, + "grad_norm": 1.1951306648014712, + "learning_rate": 1.2344382270826438e-07, + "loss": 0.6549400687217712, + "step": 7363 + }, + { + "epoch": 1.696774193548387, + "grad_norm": 1.1182982438102955, + "learning_rate": 1.2326051897830858e-07, + "loss": 0.7839380502700806, + "step": 7364 + }, + { + "epoch": 1.6970046082949308, + "grad_norm": 1.2576690972053175, + "learning_rate": 1.230773425059277e-07, + "loss": 0.8436654806137085, + "step": 7365 + }, + { + "epoch": 1.6972350230414746, + "grad_norm": 0.8415515075804344, + "learning_rate": 1.2289429331770974e-07, + "loss": 0.6517987251281738, + "step": 7366 + }, + { + "epoch": 1.6974654377880185, + "grad_norm": 1.073572916121381, + "learning_rate": 1.2271137144022392e-07, + "loss": 0.7108355760574341, + "step": 7367 + }, + { + "epoch": 1.6976958525345622, + "grad_norm": 1.138464806776697, + "learning_rate": 1.2252857690002094e-07, + "loss": 0.7801471948623657, + "step": 7368 + }, + { + "epoch": 1.697926267281106, + "grad_norm": 0.9980466100193536, + "learning_rate": 1.2234590972363358e-07, + "loss": 0.8240209221839905, + "step": 7369 + }, + { + "epoch": 1.69815668202765, + "grad_norm": 1.5026485017018454, + "learning_rate": 1.2216336993757558e-07, + "loss": 0.8119853138923645, + "step": 7370 + }, + { + "epoch": 1.6983870967741934, + "grad_norm": 0.9448426506131885, + "learning_rate": 1.2198095756834216e-07, + "loss": 0.7685642838478088, + "step": 7371 + }, + { + "epoch": 1.6986175115207374, + "grad_norm": 1.1884615399125027, + "learning_rate": 1.217986726424106e-07, + "loss": 0.7820984125137329, + "step": 7372 + }, + { + "epoch": 1.698847926267281, + "grad_norm": 1.4933868054084445, + "learning_rate": 1.2161651518623916e-07, + "loss": 0.8051085472106934, + "step": 7373 + }, + { + "epoch": 1.6990783410138248, + "grad_norm": 1.16418962691877, + "learning_rate": 1.2143448522626742e-07, + "loss": 0.828999400138855, + "step": 7374 + }, + { + "epoch": 1.6993087557603688, + "grad_norm": 1.513005376638313, + "learning_rate": 1.2125258278891738e-07, + "loss": 0.8215579986572266, + "step": 7375 + }, + { + "epoch": 1.6995391705069123, + "grad_norm": 1.2614405602995598, + "learning_rate": 1.2107080790059156e-07, + "loss": 0.9362014532089233, + "step": 7376 + }, + { + "epoch": 1.6997695852534562, + "grad_norm": 1.014310262155135, + "learning_rate": 1.2088916058767428e-07, + "loss": 0.7789602279663086, + "step": 7377 + }, + { + "epoch": 1.7, + "grad_norm": 1.322797235291574, + "learning_rate": 1.2070764087653163e-07, + "loss": 0.8371152877807617, + "step": 7378 + }, + { + "epoch": 1.7002304147465437, + "grad_norm": 1.2225532720655308, + "learning_rate": 1.2052624879351103e-07, + "loss": 0.64423668384552, + "step": 7379 + }, + { + "epoch": 1.7004608294930876, + "grad_norm": 1.3442813905677369, + "learning_rate": 1.203449843649409e-07, + "loss": 0.7635257244110107, + "step": 7380 + }, + { + "epoch": 1.7006912442396314, + "grad_norm": 1.15010903043395, + "learning_rate": 1.2016384761713194e-07, + "loss": 0.7859230041503906, + "step": 7381 + }, + { + "epoch": 1.700921658986175, + "grad_norm": 1.0218637195871514, + "learning_rate": 1.199828385763757e-07, + "loss": 0.7066336870193481, + "step": 7382 + }, + { + "epoch": 1.701152073732719, + "grad_norm": 1.1069799499148123, + "learning_rate": 1.198019572689455e-07, + "loss": 0.7190531492233276, + "step": 7383 + }, + { + "epoch": 1.7013824884792625, + "grad_norm": 1.520158585759741, + "learning_rate": 1.1962120372109586e-07, + "loss": 0.7389136552810669, + "step": 7384 + }, + { + "epoch": 1.7016129032258065, + "grad_norm": 1.5406735409523549, + "learning_rate": 1.1944057795906316e-07, + "loss": 0.774425745010376, + "step": 7385 + }, + { + "epoch": 1.7018433179723502, + "grad_norm": 1.0093305285556118, + "learning_rate": 1.1926008000906484e-07, + "loss": 0.7566725015640259, + "step": 7386 + }, + { + "epoch": 1.702073732718894, + "grad_norm": 1.153413777620863, + "learning_rate": 1.1907970989729987e-07, + "loss": 0.6891475915908813, + "step": 7387 + }, + { + "epoch": 1.702304147465438, + "grad_norm": 1.08541401133235, + "learning_rate": 1.1889946764994873e-07, + "loss": 0.6188378930091858, + "step": 7388 + }, + { + "epoch": 1.7025345622119814, + "grad_norm": 1.1534210847497282, + "learning_rate": 1.1871935329317362e-07, + "loss": 0.703027069568634, + "step": 7389 + }, + { + "epoch": 1.7027649769585254, + "grad_norm": 1.2738888238498793, + "learning_rate": 1.1853936685311772e-07, + "loss": 0.9253139495849609, + "step": 7390 + }, + { + "epoch": 1.702995391705069, + "grad_norm": 1.015934424294919, + "learning_rate": 1.1835950835590569e-07, + "loss": 0.6504430770874023, + "step": 7391 + }, + { + "epoch": 1.7032258064516128, + "grad_norm": 1.0145240040509695, + "learning_rate": 1.18179777827644e-07, + "loss": 0.6656354665756226, + "step": 7392 + }, + { + "epoch": 1.7034562211981568, + "grad_norm": 1.451290987899464, + "learning_rate": 1.1800017529442019e-07, + "loss": 0.8534063100814819, + "step": 7393 + }, + { + "epoch": 1.7036866359447005, + "grad_norm": 1.1896366783409809, + "learning_rate": 1.178207007823031e-07, + "loss": 0.8315893411636353, + "step": 7394 + }, + { + "epoch": 1.7039170506912442, + "grad_norm": 1.1636407894423468, + "learning_rate": 1.1764135431734367e-07, + "loss": 0.8161677718162537, + "step": 7395 + }, + { + "epoch": 1.7041474654377882, + "grad_norm": 1.418011015190517, + "learning_rate": 1.1746213592557352e-07, + "loss": 0.7942687273025513, + "step": 7396 + }, + { + "epoch": 1.7043778801843317, + "grad_norm": 0.9938387819486493, + "learning_rate": 1.1728304563300584e-07, + "loss": 0.8056384325027466, + "step": 7397 + }, + { + "epoch": 1.7046082949308756, + "grad_norm": 1.3626759695428086, + "learning_rate": 1.1710408346563583e-07, + "loss": 0.8535007238388062, + "step": 7398 + }, + { + "epoch": 1.7048387096774194, + "grad_norm": 1.1491077351100174, + "learning_rate": 1.1692524944943916e-07, + "loss": 0.7729576826095581, + "step": 7399 + }, + { + "epoch": 1.705069124423963, + "grad_norm": 1.2729586784281095, + "learning_rate": 1.1674654361037328e-07, + "loss": 0.7755489349365234, + "step": 7400 + }, + { + "epoch": 1.705299539170507, + "grad_norm": 1.7008944920024607, + "learning_rate": 1.1656796597437757e-07, + "loss": 0.8752193450927734, + "step": 7401 + }, + { + "epoch": 1.7055299539170505, + "grad_norm": 1.0505715773863387, + "learning_rate": 1.1638951656737217e-07, + "loss": 0.7135917544364929, + "step": 7402 + }, + { + "epoch": 1.7057603686635945, + "grad_norm": 1.1807276735663779, + "learning_rate": 1.1621119541525859e-07, + "loss": 0.7378124594688416, + "step": 7403 + }, + { + "epoch": 1.7059907834101382, + "grad_norm": 1.1699041912496186, + "learning_rate": 1.1603300254391978e-07, + "loss": 0.637479305267334, + "step": 7404 + }, + { + "epoch": 1.706221198156682, + "grad_norm": 0.9107859734790176, + "learning_rate": 1.1585493797922075e-07, + "loss": 0.6162394881248474, + "step": 7405 + }, + { + "epoch": 1.706451612903226, + "grad_norm": 1.0832025296305532, + "learning_rate": 1.1567700174700701e-07, + "loss": 0.7836494445800781, + "step": 7406 + }, + { + "epoch": 1.7066820276497696, + "grad_norm": 1.3117851793296085, + "learning_rate": 1.154991938731057e-07, + "loss": 0.6297281980514526, + "step": 7407 + }, + { + "epoch": 1.7069124423963133, + "grad_norm": 0.9987358693502671, + "learning_rate": 1.1532151438332549e-07, + "loss": 0.7190115451812744, + "step": 7408 + }, + { + "epoch": 1.7071428571428573, + "grad_norm": 1.353324439932077, + "learning_rate": 1.151439633034561e-07, + "loss": 0.7578086853027344, + "step": 7409 + }, + { + "epoch": 1.7073732718894008, + "grad_norm": 0.986158496671175, + "learning_rate": 1.1496654065926925e-07, + "loss": 0.7347216010093689, + "step": 7410 + }, + { + "epoch": 1.7076036866359448, + "grad_norm": 1.2279759650694806, + "learning_rate": 1.1478924647651711e-07, + "loss": 0.7940168380737305, + "step": 7411 + }, + { + "epoch": 1.7078341013824885, + "grad_norm": 1.2336717780625897, + "learning_rate": 1.1461208078093431e-07, + "loss": 0.7625843286514282, + "step": 7412 + }, + { + "epoch": 1.7080645161290322, + "grad_norm": 1.5771280074431184, + "learning_rate": 1.1443504359823585e-07, + "loss": 0.7603492736816406, + "step": 7413 + }, + { + "epoch": 1.7082949308755762, + "grad_norm": 1.1263740749103024, + "learning_rate": 1.1425813495411817e-07, + "loss": 0.8746018409729004, + "step": 7414 + }, + { + "epoch": 1.7085253456221197, + "grad_norm": 1.2947959548271089, + "learning_rate": 1.1408135487425996e-07, + "loss": 0.72724449634552, + "step": 7415 + }, + { + "epoch": 1.7087557603686636, + "grad_norm": 0.794129708213959, + "learning_rate": 1.1390470338432023e-07, + "loss": 0.6874721646308899, + "step": 7416 + }, + { + "epoch": 1.7089861751152073, + "grad_norm": 0.9673124457868691, + "learning_rate": 1.1372818050993959e-07, + "loss": 0.7129265666007996, + "step": 7417 + }, + { + "epoch": 1.709216589861751, + "grad_norm": 1.3811139782005308, + "learning_rate": 1.1355178627674045e-07, + "loss": 0.7505607008934021, + "step": 7418 + }, + { + "epoch": 1.709447004608295, + "grad_norm": 1.1149863565678992, + "learning_rate": 1.1337552071032608e-07, + "loss": 0.7497769594192505, + "step": 7419 + }, + { + "epoch": 1.7096774193548387, + "grad_norm": 1.342673457996757, + "learning_rate": 1.1319938383628092e-07, + "loss": 0.792352020740509, + "step": 7420 + }, + { + "epoch": 1.7099078341013825, + "grad_norm": 1.1720516000619245, + "learning_rate": 1.1302337568017139e-07, + "loss": 0.780627965927124, + "step": 7421 + }, + { + "epoch": 1.7101382488479264, + "grad_norm": 1.2702279678670012, + "learning_rate": 1.1284749626754464e-07, + "loss": 0.7024368047714233, + "step": 7422 + }, + { + "epoch": 1.71036866359447, + "grad_norm": 1.2880158142162281, + "learning_rate": 1.1267174562392945e-07, + "loss": 0.756782591342926, + "step": 7423 + }, + { + "epoch": 1.7105990783410139, + "grad_norm": 1.2881350167706749, + "learning_rate": 1.1249612377483552e-07, + "loss": 0.8585456609725952, + "step": 7424 + }, + { + "epoch": 1.7108294930875576, + "grad_norm": 1.2079330064248406, + "learning_rate": 1.1232063074575449e-07, + "loss": 0.8610610961914062, + "step": 7425 + }, + { + "epoch": 1.7110599078341013, + "grad_norm": 1.2629835504337044, + "learning_rate": 1.1214526656215872e-07, + "loss": 0.7493829131126404, + "step": 7426 + }, + { + "epoch": 1.7112903225806453, + "grad_norm": 1.1677189056932475, + "learning_rate": 1.1197003124950222e-07, + "loss": 0.7479410171508789, + "step": 7427 + }, + { + "epoch": 1.7115207373271888, + "grad_norm": 1.2024881147733253, + "learning_rate": 1.1179492483322006e-07, + "loss": 0.8056051135063171, + "step": 7428 + }, + { + "epoch": 1.7117511520737327, + "grad_norm": 1.2393004464149642, + "learning_rate": 1.1161994733872848e-07, + "loss": 0.8448202610015869, + "step": 7429 + }, + { + "epoch": 1.7119815668202765, + "grad_norm": 1.3170634810384778, + "learning_rate": 1.1144509879142571e-07, + "loss": 0.7783033847808838, + "step": 7430 + }, + { + "epoch": 1.7122119815668202, + "grad_norm": 1.2589188548838177, + "learning_rate": 1.1127037921669058e-07, + "loss": 0.6591838598251343, + "step": 7431 + }, + { + "epoch": 1.7124423963133641, + "grad_norm": 1.4141951291447457, + "learning_rate": 1.1109578863988322e-07, + "loss": 0.8508287668228149, + "step": 7432 + }, + { + "epoch": 1.7126728110599079, + "grad_norm": 1.0110596601133535, + "learning_rate": 1.1092132708634549e-07, + "loss": 0.7981588840484619, + "step": 7433 + }, + { + "epoch": 1.7129032258064516, + "grad_norm": 1.1560054105611206, + "learning_rate": 1.1074699458140025e-07, + "loss": 0.7754761576652527, + "step": 7434 + }, + { + "epoch": 1.7131336405529956, + "grad_norm": 1.4234254723014017, + "learning_rate": 1.1057279115035124e-07, + "loss": 0.8487040996551514, + "step": 7435 + }, + { + "epoch": 1.713364055299539, + "grad_norm": 1.2105987237993454, + "learning_rate": 1.1039871681848433e-07, + "loss": 0.8175803422927856, + "step": 7436 + }, + { + "epoch": 1.713594470046083, + "grad_norm": 1.0010434545431337, + "learning_rate": 1.1022477161106591e-07, + "loss": 0.8361574411392212, + "step": 7437 + }, + { + "epoch": 1.7138248847926267, + "grad_norm": 1.1841110354603608, + "learning_rate": 1.1005095555334409e-07, + "loss": 0.6253053545951843, + "step": 7438 + }, + { + "epoch": 1.7140552995391705, + "grad_norm": 1.5361244402123166, + "learning_rate": 1.0987726867054792e-07, + "loss": 0.8035168647766113, + "step": 7439 + }, + { + "epoch": 1.7142857142857144, + "grad_norm": 1.0148513511065955, + "learning_rate": 1.0970371098788767e-07, + "loss": 0.7352867722511292, + "step": 7440 + }, + { + "epoch": 1.714516129032258, + "grad_norm": 1.1469128257526675, + "learning_rate": 1.0953028253055541e-07, + "loss": 0.7540202140808105, + "step": 7441 + }, + { + "epoch": 1.7147465437788019, + "grad_norm": 1.2653522382652087, + "learning_rate": 1.0935698332372379e-07, + "loss": 0.7883191108703613, + "step": 7442 + }, + { + "epoch": 1.7149769585253456, + "grad_norm": 1.2745739855530656, + "learning_rate": 1.0918381339254701e-07, + "loss": 0.7581819295883179, + "step": 7443 + }, + { + "epoch": 1.7152073732718893, + "grad_norm": 1.1705192956080483, + "learning_rate": 1.090107727621603e-07, + "loss": 0.8066321611404419, + "step": 7444 + }, + { + "epoch": 1.7154377880184333, + "grad_norm": 1.1820593590096908, + "learning_rate": 1.0883786145768037e-07, + "loss": 0.7427937984466553, + "step": 7445 + }, + { + "epoch": 1.715668202764977, + "grad_norm": 1.3132499515834741, + "learning_rate": 1.0866507950420523e-07, + "loss": 0.7736409902572632, + "step": 7446 + }, + { + "epoch": 1.7158986175115207, + "grad_norm": 1.1930714060597967, + "learning_rate": 1.0849242692681382e-07, + "loss": 0.7253416776657104, + "step": 7447 + }, + { + "epoch": 1.7161290322580647, + "grad_norm": 0.9521960056037656, + "learning_rate": 1.0831990375056643e-07, + "loss": 0.7933270931243896, + "step": 7448 + }, + { + "epoch": 1.7163594470046082, + "grad_norm": 1.407227257578247, + "learning_rate": 1.0814751000050437e-07, + "loss": 0.7946739196777344, + "step": 7449 + }, + { + "epoch": 1.7165898617511521, + "grad_norm": 1.2776015375287177, + "learning_rate": 1.0797524570165073e-07, + "loss": 0.7798205614089966, + "step": 7450 + }, + { + "epoch": 1.7168202764976959, + "grad_norm": 1.2558469001082564, + "learning_rate": 1.078031108790094e-07, + "loss": 0.616565465927124, + "step": 7451 + }, + { + "epoch": 1.7170506912442396, + "grad_norm": 1.2221718815584264, + "learning_rate": 1.0763110555756516e-07, + "loss": 0.8406517505645752, + "step": 7452 + }, + { + "epoch": 1.7172811059907835, + "grad_norm": 1.3773523411720476, + "learning_rate": 1.0745922976228483e-07, + "loss": 0.8827311992645264, + "step": 7453 + }, + { + "epoch": 1.717511520737327, + "grad_norm": 1.2403910104019171, + "learning_rate": 1.0728748351811567e-07, + "loss": 0.585588812828064, + "step": 7454 + }, + { + "epoch": 1.717741935483871, + "grad_norm": 0.9381679846122704, + "learning_rate": 1.0711586684998631e-07, + "loss": 0.6305320858955383, + "step": 7455 + }, + { + "epoch": 1.7179723502304147, + "grad_norm": 1.0634674542520166, + "learning_rate": 1.0694437978280701e-07, + "loss": 0.7982319593429565, + "step": 7456 + }, + { + "epoch": 1.7182027649769585, + "grad_norm": 1.3468349324058282, + "learning_rate": 1.0677302234146879e-07, + "loss": 0.7792943716049194, + "step": 7457 + }, + { + "epoch": 1.7184331797235024, + "grad_norm": 1.308217346349807, + "learning_rate": 1.0660179455084372e-07, + "loss": 0.7019332051277161, + "step": 7458 + }, + { + "epoch": 1.7186635944700461, + "grad_norm": 1.2330257329830192, + "learning_rate": 1.0643069643578562e-07, + "loss": 0.8088894486427307, + "step": 7459 + }, + { + "epoch": 1.7188940092165899, + "grad_norm": 1.5573400915532798, + "learning_rate": 1.0625972802112882e-07, + "loss": 0.799231767654419, + "step": 7460 + }, + { + "epoch": 1.7191244239631336, + "grad_norm": 0.950308854182165, + "learning_rate": 1.0608888933168958e-07, + "loss": 0.7265694737434387, + "step": 7461 + }, + { + "epoch": 1.7193548387096773, + "grad_norm": 1.1717288459308963, + "learning_rate": 1.0591818039226464e-07, + "loss": 0.8566714525222778, + "step": 7462 + }, + { + "epoch": 1.7195852534562213, + "grad_norm": 1.2255123057406947, + "learning_rate": 1.0574760122763216e-07, + "loss": 0.811874508857727, + "step": 7463 + }, + { + "epoch": 1.719815668202765, + "grad_norm": 1.0493349652228454, + "learning_rate": 1.0557715186255156e-07, + "loss": 0.7990631461143494, + "step": 7464 + }, + { + "epoch": 1.7200460829493087, + "grad_norm": 1.3183681626099089, + "learning_rate": 1.0540683232176307e-07, + "loss": 0.8108334541320801, + "step": 7465 + }, + { + "epoch": 1.7202764976958527, + "grad_norm": 1.8420274096120763, + "learning_rate": 1.0523664262998888e-07, + "loss": 0.8927996158599854, + "step": 7466 + }, + { + "epoch": 1.7205069124423962, + "grad_norm": 1.1733285346989661, + "learning_rate": 1.0506658281193138e-07, + "loss": 0.7277737855911255, + "step": 7467 + }, + { + "epoch": 1.7207373271889401, + "grad_norm": 1.0503912207473127, + "learning_rate": 1.0489665289227467e-07, + "loss": 0.7229233980178833, + "step": 7468 + }, + { + "epoch": 1.7209677419354839, + "grad_norm": 1.298634428768958, + "learning_rate": 1.0472685289568373e-07, + "loss": 0.7211846709251404, + "step": 7469 + }, + { + "epoch": 1.7211981566820276, + "grad_norm": 1.1862135261022106, + "learning_rate": 1.0455718284680504e-07, + "loss": 0.8239504098892212, + "step": 7470 + }, + { + "epoch": 1.7214285714285715, + "grad_norm": 1.2304377847970827, + "learning_rate": 1.0438764277026579e-07, + "loss": 0.7492972612380981, + "step": 7471 + }, + { + "epoch": 1.7216589861751153, + "grad_norm": 1.3060072891774943, + "learning_rate": 1.0421823269067442e-07, + "loss": 0.7658303380012512, + "step": 7472 + }, + { + "epoch": 1.721889400921659, + "grad_norm": 1.0618950256674606, + "learning_rate": 1.0404895263262092e-07, + "loss": 0.708244800567627, + "step": 7473 + }, + { + "epoch": 1.7221198156682027, + "grad_norm": 1.1946101503339825, + "learning_rate": 1.0387980262067575e-07, + "loss": 0.7575969696044922, + "step": 7474 + }, + { + "epoch": 1.7223502304147464, + "grad_norm": 1.3899740319803422, + "learning_rate": 1.0371078267939082e-07, + "loss": 0.7321910262107849, + "step": 7475 + }, + { + "epoch": 1.7225806451612904, + "grad_norm": 1.3828231848460977, + "learning_rate": 1.035418928332995e-07, + "loss": 0.7812562584877014, + "step": 7476 + }, + { + "epoch": 1.7228110599078341, + "grad_norm": 1.3136112254743646, + "learning_rate": 1.0337313310691565e-07, + "loss": 0.7272104620933533, + "step": 7477 + }, + { + "epoch": 1.7230414746543778, + "grad_norm": 1.1508289944716614, + "learning_rate": 1.032045035247343e-07, + "loss": 0.7006442546844482, + "step": 7478 + }, + { + "epoch": 1.7232718894009218, + "grad_norm": 1.138231534813956, + "learning_rate": 1.0303600411123226e-07, + "loss": 0.7082154750823975, + "step": 7479 + }, + { + "epoch": 1.7235023041474653, + "grad_norm": 1.4157478972732351, + "learning_rate": 1.0286763489086681e-07, + "loss": 0.7204899191856384, + "step": 7480 + }, + { + "epoch": 1.7237327188940093, + "grad_norm": 1.1954797848768004, + "learning_rate": 1.026993958880763e-07, + "loss": 0.9119626879692078, + "step": 7481 + }, + { + "epoch": 1.723963133640553, + "grad_norm": 1.0923155592461768, + "learning_rate": 1.0253128712728088e-07, + "loss": 0.5961707830429077, + "step": 7482 + }, + { + "epoch": 1.7241935483870967, + "grad_norm": 1.1032837677908203, + "learning_rate": 1.023633086328809e-07, + "loss": 0.7469611167907715, + "step": 7483 + }, + { + "epoch": 1.7244239631336407, + "grad_norm": 1.2394445599695993, + "learning_rate": 1.0219546042925841e-07, + "loss": 0.8353795409202576, + "step": 7484 + }, + { + "epoch": 1.7246543778801844, + "grad_norm": 1.120589163159477, + "learning_rate": 1.0202774254077618e-07, + "loss": 0.6587873101234436, + "step": 7485 + }, + { + "epoch": 1.7248847926267281, + "grad_norm": 1.2182162589741892, + "learning_rate": 1.0186015499177847e-07, + "loss": 0.8595654964447021, + "step": 7486 + }, + { + "epoch": 1.7251152073732718, + "grad_norm": 1.0966229129393803, + "learning_rate": 1.0169269780659028e-07, + "loss": 0.7683298587799072, + "step": 7487 + }, + { + "epoch": 1.7253456221198156, + "grad_norm": 1.372358134101511, + "learning_rate": 1.0152537100951786e-07, + "loss": 0.888152003288269, + "step": 7488 + }, + { + "epoch": 1.7255760368663595, + "grad_norm": 1.1162191205168919, + "learning_rate": 1.013581746248482e-07, + "loss": 0.7835309505462646, + "step": 7489 + }, + { + "epoch": 1.7258064516129032, + "grad_norm": 1.4079534093347241, + "learning_rate": 1.0119110867684999e-07, + "loss": 0.9744646549224854, + "step": 7490 + }, + { + "epoch": 1.726036866359447, + "grad_norm": 1.109483043922066, + "learning_rate": 1.0102417318977251e-07, + "loss": 0.6842091083526611, + "step": 7491 + }, + { + "epoch": 1.726267281105991, + "grad_norm": 1.2357910065520838, + "learning_rate": 1.0085736818784607e-07, + "loss": 0.7435774207115173, + "step": 7492 + }, + { + "epoch": 1.7264976958525344, + "grad_norm": 1.3316804792215136, + "learning_rate": 1.0069069369528249e-07, + "loss": 0.8430237770080566, + "step": 7493 + }, + { + "epoch": 1.7267281105990784, + "grad_norm": 1.1766330255379311, + "learning_rate": 1.0052414973627421e-07, + "loss": 0.8203141689300537, + "step": 7494 + }, + { + "epoch": 1.726958525345622, + "grad_norm": 1.291685708783942, + "learning_rate": 1.0035773633499456e-07, + "loss": 0.7491584420204163, + "step": 7495 + }, + { + "epoch": 1.7271889400921658, + "grad_norm": 0.9475128549493947, + "learning_rate": 1.0019145351559876e-07, + "loss": 0.6738899946212769, + "step": 7496 + }, + { + "epoch": 1.7274193548387098, + "grad_norm": 1.4107090522911332, + "learning_rate": 1.0002530130222231e-07, + "loss": 0.8628265857696533, + "step": 7497 + }, + { + "epoch": 1.7276497695852533, + "grad_norm": 1.5650622568616335, + "learning_rate": 9.985927971898178e-08, + "loss": 1.0158125162124634, + "step": 7498 + }, + { + "epoch": 1.7278801843317972, + "grad_norm": 1.2981782537446935, + "learning_rate": 9.969338878997535e-08, + "loss": 0.7269070148468018, + "step": 7499 + }, + { + "epoch": 1.728110599078341, + "grad_norm": 1.3106792244331589, + "learning_rate": 9.952762853928165e-08, + "loss": 0.8769187927246094, + "step": 7500 + }, + { + "epoch": 1.7283410138248847, + "grad_norm": 1.325563750244826, + "learning_rate": 9.936199899096042e-08, + "loss": 0.7841119170188904, + "step": 7501 + }, + { + "epoch": 1.7285714285714286, + "grad_norm": 1.7907234255256992, + "learning_rate": 9.91965001690529e-08, + "loss": 0.9209425449371338, + "step": 7502 + }, + { + "epoch": 1.7288018433179724, + "grad_norm": 1.110414701934764, + "learning_rate": 9.903113209758096e-08, + "loss": 0.7795250415802002, + "step": 7503 + }, + { + "epoch": 1.729032258064516, + "grad_norm": 1.2158163264490913, + "learning_rate": 9.886589480054741e-08, + "loss": 0.7131094932556152, + "step": 7504 + }, + { + "epoch": 1.72926267281106, + "grad_norm": 1.167789931248441, + "learning_rate": 9.870078830193629e-08, + "loss": 0.8090137839317322, + "step": 7505 + }, + { + "epoch": 1.7294930875576036, + "grad_norm": 1.124104241227004, + "learning_rate": 9.853581262571231e-08, + "loss": 0.7797958850860596, + "step": 7506 + }, + { + "epoch": 1.7297235023041475, + "grad_norm": 1.3470491669984355, + "learning_rate": 9.83709677958221e-08, + "loss": 0.6927989721298218, + "step": 7507 + }, + { + "epoch": 1.7299539170506912, + "grad_norm": 1.152565458620573, + "learning_rate": 9.820625383619219e-08, + "loss": 0.8009092807769775, + "step": 7508 + }, + { + "epoch": 1.730184331797235, + "grad_norm": 1.0970285369996284, + "learning_rate": 9.804167077073056e-08, + "loss": 0.761864423751831, + "step": 7509 + }, + { + "epoch": 1.730414746543779, + "grad_norm": 1.5795757660336223, + "learning_rate": 9.787721862332654e-08, + "loss": 0.7459509372711182, + "step": 7510 + }, + { + "epoch": 1.7306451612903224, + "grad_norm": 1.0401744024243509, + "learning_rate": 9.771289741785005e-08, + "loss": 0.8216449022293091, + "step": 7511 + }, + { + "epoch": 1.7308755760368664, + "grad_norm": 1.3924364017238642, + "learning_rate": 9.754870717815177e-08, + "loss": 0.7860604524612427, + "step": 7512 + }, + { + "epoch": 1.73110599078341, + "grad_norm": 1.146706612325942, + "learning_rate": 9.738464792806422e-08, + "loss": 0.7727769613265991, + "step": 7513 + }, + { + "epoch": 1.7313364055299538, + "grad_norm": 1.2690787911964316, + "learning_rate": 9.722071969140011e-08, + "loss": 0.874458909034729, + "step": 7514 + }, + { + "epoch": 1.7315668202764978, + "grad_norm": 1.1530798069952481, + "learning_rate": 9.705692249195319e-08, + "loss": 0.840191125869751, + "step": 7515 + }, + { + "epoch": 1.7317972350230415, + "grad_norm": 1.1387350117516357, + "learning_rate": 9.689325635349877e-08, + "loss": 0.7169238924980164, + "step": 7516 + }, + { + "epoch": 1.7320276497695852, + "grad_norm": 1.2478630540284088, + "learning_rate": 9.672972129979273e-08, + "loss": 0.7554492950439453, + "step": 7517 + }, + { + "epoch": 1.7322580645161292, + "grad_norm": 1.2166706454141942, + "learning_rate": 9.656631735457154e-08, + "loss": 0.5734076499938965, + "step": 7518 + }, + { + "epoch": 1.7324884792626727, + "grad_norm": 1.5466370383298045, + "learning_rate": 9.640304454155369e-08, + "loss": 0.7867637872695923, + "step": 7519 + }, + { + "epoch": 1.7327188940092166, + "grad_norm": 1.2704443586099365, + "learning_rate": 9.623990288443773e-08, + "loss": 0.7330230474472046, + "step": 7520 + }, + { + "epoch": 1.7329493087557604, + "grad_norm": 1.1352922714992866, + "learning_rate": 9.607689240690319e-08, + "loss": 0.7880058288574219, + "step": 7521 + }, + { + "epoch": 1.733179723502304, + "grad_norm": 1.0605191939295662, + "learning_rate": 9.591401313261139e-08, + "loss": 0.796575665473938, + "step": 7522 + }, + { + "epoch": 1.733410138248848, + "grad_norm": 1.4376273040997398, + "learning_rate": 9.575126508520359e-08, + "loss": 0.8101698160171509, + "step": 7523 + }, + { + "epoch": 1.7336405529953915, + "grad_norm": 1.0868433692155355, + "learning_rate": 9.55886482883026e-08, + "loss": 0.7811597585678101, + "step": 7524 + }, + { + "epoch": 1.7338709677419355, + "grad_norm": 1.1754841201094306, + "learning_rate": 9.542616276551208e-08, + "loss": 0.7680011987686157, + "step": 7525 + }, + { + "epoch": 1.7341013824884792, + "grad_norm": 1.3670730603232781, + "learning_rate": 9.526380854041638e-08, + "loss": 0.8018794059753418, + "step": 7526 + }, + { + "epoch": 1.734331797235023, + "grad_norm": 1.1232468645544793, + "learning_rate": 9.510158563658133e-08, + "loss": 0.7770500183105469, + "step": 7527 + }, + { + "epoch": 1.734562211981567, + "grad_norm": 1.1848169541071576, + "learning_rate": 9.493949407755309e-08, + "loss": 0.7622300982475281, + "step": 7528 + }, + { + "epoch": 1.7347926267281106, + "grad_norm": 1.5281654640943847, + "learning_rate": 9.477753388685928e-08, + "loss": 0.831570029258728, + "step": 7529 + }, + { + "epoch": 1.7350230414746544, + "grad_norm": 1.1599086861943149, + "learning_rate": 9.461570508800776e-08, + "loss": 0.7987254858016968, + "step": 7530 + }, + { + "epoch": 1.7352534562211983, + "grad_norm": 1.2752040500202788, + "learning_rate": 9.44540077044883e-08, + "loss": 0.8219848275184631, + "step": 7531 + }, + { + "epoch": 1.7354838709677418, + "grad_norm": 1.298736989691398, + "learning_rate": 9.429244175977092e-08, + "loss": 0.8273369073867798, + "step": 7532 + }, + { + "epoch": 1.7357142857142858, + "grad_norm": 1.2555474610105797, + "learning_rate": 9.413100727730628e-08, + "loss": 0.8241056203842163, + "step": 7533 + }, + { + "epoch": 1.7359447004608295, + "grad_norm": 1.4118150886368108, + "learning_rate": 9.396970428052697e-08, + "loss": 0.6880715489387512, + "step": 7534 + }, + { + "epoch": 1.7361751152073732, + "grad_norm": 1.092011806345561, + "learning_rate": 9.380853279284551e-08, + "loss": 0.7355446815490723, + "step": 7535 + }, + { + "epoch": 1.7364055299539172, + "grad_norm": 1.2700711725839655, + "learning_rate": 9.364749283765604e-08, + "loss": 0.8835841417312622, + "step": 7536 + }, + { + "epoch": 1.7366359447004607, + "grad_norm": 1.1984936737610834, + "learning_rate": 9.348658443833313e-08, + "loss": 0.80763840675354, + "step": 7537 + }, + { + "epoch": 1.7368663594470046, + "grad_norm": 1.2855970061631397, + "learning_rate": 9.332580761823227e-08, + "loss": 0.7473145723342896, + "step": 7538 + }, + { + "epoch": 1.7370967741935484, + "grad_norm": 1.2970951445867331, + "learning_rate": 9.316516240069028e-08, + "loss": 0.6618188619613647, + "step": 7539 + }, + { + "epoch": 1.737327188940092, + "grad_norm": 1.3396426049949766, + "learning_rate": 9.300464880902447e-08, + "loss": 0.7432928085327148, + "step": 7540 + }, + { + "epoch": 1.737557603686636, + "grad_norm": 1.1659381023507147, + "learning_rate": 9.284426686653302e-08, + "loss": 0.7915963530540466, + "step": 7541 + }, + { + "epoch": 1.7377880184331798, + "grad_norm": 1.1552275821682043, + "learning_rate": 9.26840165964955e-08, + "loss": 0.6428440809249878, + "step": 7542 + }, + { + "epoch": 1.7380184331797235, + "grad_norm": 1.1399241166482426, + "learning_rate": 9.252389802217187e-08, + "loss": 0.7142912149429321, + "step": 7543 + }, + { + "epoch": 1.7382488479262674, + "grad_norm": 1.316337246157137, + "learning_rate": 9.236391116680309e-08, + "loss": 0.878044605255127, + "step": 7544 + }, + { + "epoch": 1.738479262672811, + "grad_norm": 1.089416476430598, + "learning_rate": 9.220405605361103e-08, + "loss": 0.6861810684204102, + "step": 7545 + }, + { + "epoch": 1.738709677419355, + "grad_norm": 1.3890455529154517, + "learning_rate": 9.204433270579825e-08, + "loss": 0.7638171911239624, + "step": 7546 + }, + { + "epoch": 1.7389400921658986, + "grad_norm": 1.1532660265349828, + "learning_rate": 9.188474114654876e-08, + "loss": 0.7149873971939087, + "step": 7547 + }, + { + "epoch": 1.7391705069124423, + "grad_norm": 1.1783502444227563, + "learning_rate": 9.172528139902703e-08, + "loss": 0.7249442338943481, + "step": 7548 + }, + { + "epoch": 1.7394009216589863, + "grad_norm": 1.178650320628679, + "learning_rate": 9.156595348637819e-08, + "loss": 0.6846513748168945, + "step": 7549 + }, + { + "epoch": 1.7396313364055298, + "grad_norm": 1.4706201914955974, + "learning_rate": 9.140675743172843e-08, + "loss": 0.9332281351089478, + "step": 7550 + }, + { + "epoch": 1.7398617511520738, + "grad_norm": 1.1835891939139382, + "learning_rate": 9.124769325818526e-08, + "loss": 0.6878118515014648, + "step": 7551 + }, + { + "epoch": 1.7400921658986175, + "grad_norm": 1.077038469987993, + "learning_rate": 9.108876098883633e-08, + "loss": 0.7695426344871521, + "step": 7552 + }, + { + "epoch": 1.7403225806451612, + "grad_norm": 1.3278288479360603, + "learning_rate": 9.09299606467503e-08, + "loss": 0.7983303666114807, + "step": 7553 + }, + { + "epoch": 1.7405529953917052, + "grad_norm": 1.4656214059917094, + "learning_rate": 9.077129225497726e-08, + "loss": 0.8158761262893677, + "step": 7554 + }, + { + "epoch": 1.7407834101382489, + "grad_norm": 1.1519947124673093, + "learning_rate": 9.061275583654748e-08, + "loss": 0.8064214587211609, + "step": 7555 + }, + { + "epoch": 1.7410138248847926, + "grad_norm": 1.2545881332280804, + "learning_rate": 9.045435141447211e-08, + "loss": 0.9058080911636353, + "step": 7556 + }, + { + "epoch": 1.7412442396313366, + "grad_norm": 1.213639501339424, + "learning_rate": 9.029607901174374e-08, + "loss": 0.7392270565032959, + "step": 7557 + }, + { + "epoch": 1.74147465437788, + "grad_norm": 1.0453486445607982, + "learning_rate": 9.013793865133501e-08, + "loss": 0.7114729881286621, + "step": 7558 + }, + { + "epoch": 1.741705069124424, + "grad_norm": 1.2302263811033798, + "learning_rate": 8.997993035620022e-08, + "loss": 0.8675493597984314, + "step": 7559 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.9934561818451934, + "learning_rate": 8.98220541492738e-08, + "loss": 0.8103020191192627, + "step": 7560 + }, + { + "epoch": 1.7421658986175115, + "grad_norm": 1.2538115734834285, + "learning_rate": 8.966431005347109e-08, + "loss": 0.7339279651641846, + "step": 7561 + }, + { + "epoch": 1.7423963133640554, + "grad_norm": 1.3510829475373114, + "learning_rate": 8.950669809168887e-08, + "loss": 0.6971707344055176, + "step": 7562 + }, + { + "epoch": 1.742626728110599, + "grad_norm": 1.105458403928542, + "learning_rate": 8.934921828680408e-08, + "loss": 0.8633124232292175, + "step": 7563 + }, + { + "epoch": 1.7428571428571429, + "grad_norm": 1.3082830118219664, + "learning_rate": 8.919187066167466e-08, + "loss": 0.7704664468765259, + "step": 7564 + }, + { + "epoch": 1.7430875576036866, + "grad_norm": 1.1782653714880955, + "learning_rate": 8.903465523913955e-08, + "loss": 0.7063533067703247, + "step": 7565 + }, + { + "epoch": 1.7433179723502303, + "grad_norm": 1.1177210535700517, + "learning_rate": 8.887757204201817e-08, + "loss": 0.7094486951828003, + "step": 7566 + }, + { + "epoch": 1.7435483870967743, + "grad_norm": 1.4575572123890834, + "learning_rate": 8.872062109311096e-08, + "loss": 0.8743780255317688, + "step": 7567 + }, + { + "epoch": 1.743778801843318, + "grad_norm": 1.5827740898240907, + "learning_rate": 8.856380241519935e-08, + "loss": 0.7282687425613403, + "step": 7568 + }, + { + "epoch": 1.7440092165898617, + "grad_norm": 1.105316538989134, + "learning_rate": 8.840711603104523e-08, + "loss": 0.7507487535476685, + "step": 7569 + }, + { + "epoch": 1.7442396313364057, + "grad_norm": 1.2820028807325874, + "learning_rate": 8.82505619633912e-08, + "loss": 0.807691216468811, + "step": 7570 + }, + { + "epoch": 1.7444700460829492, + "grad_norm": 1.3537034886290398, + "learning_rate": 8.809414023496142e-08, + "loss": 0.8650702238082886, + "step": 7571 + }, + { + "epoch": 1.7447004608294931, + "grad_norm": 0.9602033366804331, + "learning_rate": 8.793785086845984e-08, + "loss": 0.6872273683547974, + "step": 7572 + }, + { + "epoch": 1.7449308755760369, + "grad_norm": 1.0979215212634434, + "learning_rate": 8.778169388657163e-08, + "loss": 0.7242698669433594, + "step": 7573 + }, + { + "epoch": 1.7451612903225806, + "grad_norm": 1.0962988735603825, + "learning_rate": 8.762566931196313e-08, + "loss": 0.741705060005188, + "step": 7574 + }, + { + "epoch": 1.7453917050691246, + "grad_norm": 1.06231801843056, + "learning_rate": 8.746977716728099e-08, + "loss": 0.7293061017990112, + "step": 7575 + }, + { + "epoch": 1.745622119815668, + "grad_norm": 1.0145801945512316, + "learning_rate": 8.731401747515244e-08, + "loss": 0.8385475277900696, + "step": 7576 + }, + { + "epoch": 1.745852534562212, + "grad_norm": 1.4891647422185605, + "learning_rate": 8.715839025818617e-08, + "loss": 0.8484489917755127, + "step": 7577 + }, + { + "epoch": 1.7460829493087557, + "grad_norm": 1.1930293813449155, + "learning_rate": 8.7002895538971e-08, + "loss": 0.6511530876159668, + "step": 7578 + }, + { + "epoch": 1.7463133640552995, + "grad_norm": 1.4360732745608953, + "learning_rate": 8.684753334007688e-08, + "loss": 0.8274673223495483, + "step": 7579 + }, + { + "epoch": 1.7465437788018434, + "grad_norm": 1.081237944644138, + "learning_rate": 8.669230368405456e-08, + "loss": 0.7367755174636841, + "step": 7580 + }, + { + "epoch": 1.7467741935483871, + "grad_norm": 1.2748877435171337, + "learning_rate": 8.653720659343522e-08, + "loss": 0.80199134349823, + "step": 7581 + }, + { + "epoch": 1.7470046082949309, + "grad_norm": 1.1988639104811598, + "learning_rate": 8.638224209073097e-08, + "loss": 0.7782701253890991, + "step": 7582 + }, + { + "epoch": 1.7472350230414746, + "grad_norm": 1.3660035419508034, + "learning_rate": 8.622741019843504e-08, + "loss": 0.7613752484321594, + "step": 7583 + }, + { + "epoch": 1.7474654377880183, + "grad_norm": 1.3599194483251544, + "learning_rate": 8.60727109390208e-08, + "loss": 0.8213690519332886, + "step": 7584 + }, + { + "epoch": 1.7476958525345623, + "grad_norm": 1.1411507368613496, + "learning_rate": 8.59181443349426e-08, + "loss": 0.7064045667648315, + "step": 7585 + }, + { + "epoch": 1.747926267281106, + "grad_norm": 1.1189241999598565, + "learning_rate": 8.576371040863573e-08, + "loss": 0.6686617136001587, + "step": 7586 + }, + { + "epoch": 1.7481566820276497, + "grad_norm": 1.0194951619872286, + "learning_rate": 8.560940918251592e-08, + "loss": 0.7520097494125366, + "step": 7587 + }, + { + "epoch": 1.7483870967741937, + "grad_norm": 1.0822685191965165, + "learning_rate": 8.545524067897991e-08, + "loss": 0.8176038265228271, + "step": 7588 + }, + { + "epoch": 1.7486175115207372, + "grad_norm": 1.3408318725531652, + "learning_rate": 8.530120492040505e-08, + "loss": 0.6680614948272705, + "step": 7589 + }, + { + "epoch": 1.7488479262672811, + "grad_norm": 1.3621846138568519, + "learning_rate": 8.514730192914921e-08, + "loss": 0.7421592473983765, + "step": 7590 + }, + { + "epoch": 1.7490783410138249, + "grad_norm": 1.2822263575200588, + "learning_rate": 8.499353172755164e-08, + "loss": 0.8869342803955078, + "step": 7591 + }, + { + "epoch": 1.7493087557603686, + "grad_norm": 1.1206823186662898, + "learning_rate": 8.48398943379316e-08, + "loss": 0.6850584745407104, + "step": 7592 + }, + { + "epoch": 1.7495391705069125, + "grad_norm": 1.0932592535391596, + "learning_rate": 8.468638978258914e-08, + "loss": 0.7433363199234009, + "step": 7593 + }, + { + "epoch": 1.7497695852534563, + "grad_norm": 1.0269953798613225, + "learning_rate": 8.453301808380564e-08, + "loss": 0.7744357585906982, + "step": 7594 + }, + { + "epoch": 1.75, + "grad_norm": 1.382126107142446, + "learning_rate": 8.437977926384277e-08, + "loss": 0.8236217498779297, + "step": 7595 + }, + { + "epoch": 1.7502304147465437, + "grad_norm": 1.3329245666066865, + "learning_rate": 8.422667334494249e-08, + "loss": 0.8552603721618652, + "step": 7596 + }, + { + "epoch": 1.7504608294930875, + "grad_norm": 1.4100651978644374, + "learning_rate": 8.407370034932859e-08, + "loss": 0.7755998373031616, + "step": 7597 + }, + { + "epoch": 1.7506912442396314, + "grad_norm": 1.3033243035055457, + "learning_rate": 8.392086029920442e-08, + "loss": 0.8105130195617676, + "step": 7598 + }, + { + "epoch": 1.7509216589861751, + "grad_norm": 1.290928258750675, + "learning_rate": 8.376815321675457e-08, + "loss": 0.8787405490875244, + "step": 7599 + }, + { + "epoch": 1.7511520737327189, + "grad_norm": 1.1296910155342912, + "learning_rate": 8.361557912414441e-08, + "loss": 0.6107788681983948, + "step": 7600 + }, + { + "epoch": 1.7513824884792628, + "grad_norm": 0.9941949428855014, + "learning_rate": 8.34631380435199e-08, + "loss": 0.6825795769691467, + "step": 7601 + }, + { + "epoch": 1.7516129032258063, + "grad_norm": 1.5141115638242784, + "learning_rate": 8.331082999700734e-08, + "loss": 0.7069272994995117, + "step": 7602 + }, + { + "epoch": 1.7518433179723503, + "grad_norm": 1.5687921139560086, + "learning_rate": 8.315865500671449e-08, + "loss": 0.7784801721572876, + "step": 7603 + }, + { + "epoch": 1.752073732718894, + "grad_norm": 1.0771300382051838, + "learning_rate": 8.300661309472912e-08, + "loss": 0.7653795480728149, + "step": 7604 + }, + { + "epoch": 1.7523041474654377, + "grad_norm": 1.5582480598587298, + "learning_rate": 8.285470428311991e-08, + "loss": 0.7386122941970825, + "step": 7605 + }, + { + "epoch": 1.7525345622119817, + "grad_norm": 0.9515219540238303, + "learning_rate": 8.270292859393613e-08, + "loss": 0.7828700542449951, + "step": 7606 + }, + { + "epoch": 1.7527649769585254, + "grad_norm": 1.5500733851956912, + "learning_rate": 8.255128604920792e-08, + "loss": 0.8955565094947815, + "step": 7607 + }, + { + "epoch": 1.7529953917050691, + "grad_norm": 1.2505809950313513, + "learning_rate": 8.2399776670946e-08, + "loss": 0.9071576595306396, + "step": 7608 + }, + { + "epoch": 1.7532258064516129, + "grad_norm": 1.3402860152327503, + "learning_rate": 8.22484004811419e-08, + "loss": 0.752417802810669, + "step": 7609 + }, + { + "epoch": 1.7534562211981566, + "grad_norm": 1.367440429282924, + "learning_rate": 8.209715750176727e-08, + "loss": 0.8611370325088501, + "step": 7610 + }, + { + "epoch": 1.7536866359447005, + "grad_norm": 1.232351895452084, + "learning_rate": 8.19460477547752e-08, + "loss": 0.745223879814148, + "step": 7611 + }, + { + "epoch": 1.7539170506912443, + "grad_norm": 1.0415704016806513, + "learning_rate": 8.179507126209906e-08, + "loss": 0.7799668908119202, + "step": 7612 + }, + { + "epoch": 1.754147465437788, + "grad_norm": 1.3761849870920217, + "learning_rate": 8.164422804565263e-08, + "loss": 0.8177207708358765, + "step": 7613 + }, + { + "epoch": 1.754377880184332, + "grad_norm": 1.2017347256018391, + "learning_rate": 8.149351812733085e-08, + "loss": 0.7111436128616333, + "step": 7614 + }, + { + "epoch": 1.7546082949308754, + "grad_norm": 1.2253776843179969, + "learning_rate": 8.1342941529009e-08, + "loss": 0.6840728521347046, + "step": 7615 + }, + { + "epoch": 1.7548387096774194, + "grad_norm": 1.02983629791633, + "learning_rate": 8.119249827254281e-08, + "loss": 0.6115491986274719, + "step": 7616 + }, + { + "epoch": 1.7550691244239631, + "grad_norm": 1.3870391302655596, + "learning_rate": 8.104218837976939e-08, + "loss": 0.7149351239204407, + "step": 7617 + }, + { + "epoch": 1.7552995391705069, + "grad_norm": 1.2174150358988711, + "learning_rate": 8.089201187250571e-08, + "loss": 0.688147783279419, + "step": 7618 + }, + { + "epoch": 1.7555299539170508, + "grad_norm": 1.2630937737290178, + "learning_rate": 8.074196877254969e-08, + "loss": 0.8092058300971985, + "step": 7619 + }, + { + "epoch": 1.7557603686635943, + "grad_norm": 0.8375696110242734, + "learning_rate": 8.05920591016801e-08, + "loss": 0.7375935912132263, + "step": 7620 + }, + { + "epoch": 1.7559907834101383, + "grad_norm": 1.1868565460321117, + "learning_rate": 8.044228288165599e-08, + "loss": 0.6793934106826782, + "step": 7621 + }, + { + "epoch": 1.756221198156682, + "grad_norm": 1.2102446264436708, + "learning_rate": 8.0292640134217e-08, + "loss": 0.7395757436752319, + "step": 7622 + }, + { + "epoch": 1.7564516129032257, + "grad_norm": 0.9259939168277553, + "learning_rate": 8.014313088108394e-08, + "loss": 0.546409010887146, + "step": 7623 + }, + { + "epoch": 1.7566820276497697, + "grad_norm": 1.4575552468425101, + "learning_rate": 7.999375514395778e-08, + "loss": 0.7790534496307373, + "step": 7624 + }, + { + "epoch": 1.7569124423963134, + "grad_norm": 1.0896798964233478, + "learning_rate": 7.984451294452e-08, + "loss": 0.7398231625556946, + "step": 7625 + }, + { + "epoch": 1.7571428571428571, + "grad_norm": 1.2623646343227142, + "learning_rate": 7.969540430443311e-08, + "loss": 0.7414441108703613, + "step": 7626 + }, + { + "epoch": 1.757373271889401, + "grad_norm": 1.1312110923091452, + "learning_rate": 7.954642924533994e-08, + "loss": 0.7548750638961792, + "step": 7627 + }, + { + "epoch": 1.7576036866359446, + "grad_norm": 0.957909042850816, + "learning_rate": 7.939758778886385e-08, + "loss": 0.7546773552894592, + "step": 7628 + }, + { + "epoch": 1.7578341013824885, + "grad_norm": 1.1252175485529645, + "learning_rate": 7.924887995660945e-08, + "loss": 0.7373867630958557, + "step": 7629 + }, + { + "epoch": 1.7580645161290323, + "grad_norm": 0.9815120449405607, + "learning_rate": 7.910030577016113e-08, + "loss": 0.7271026968955994, + "step": 7630 + }, + { + "epoch": 1.758294930875576, + "grad_norm": 1.3179911972781693, + "learning_rate": 7.89518652510841e-08, + "loss": 0.8723413944244385, + "step": 7631 + }, + { + "epoch": 1.75852534562212, + "grad_norm": 1.3060473211580457, + "learning_rate": 7.880355842092468e-08, + "loss": 0.8282548189163208, + "step": 7632 + }, + { + "epoch": 1.7587557603686634, + "grad_norm": 1.1089249458958528, + "learning_rate": 7.865538530120918e-08, + "loss": 0.7436991930007935, + "step": 7633 + }, + { + "epoch": 1.7589861751152074, + "grad_norm": 1.0884201833829175, + "learning_rate": 7.850734591344488e-08, + "loss": 0.7750650644302368, + "step": 7634 + }, + { + "epoch": 1.7592165898617511, + "grad_norm": 1.1544057740235625, + "learning_rate": 7.835944027911957e-08, + "loss": 0.6824958324432373, + "step": 7635 + }, + { + "epoch": 1.7594470046082948, + "grad_norm": 1.1607504467923393, + "learning_rate": 7.821166841970107e-08, + "loss": 0.8500322103500366, + "step": 7636 + }, + { + "epoch": 1.7596774193548388, + "grad_norm": 1.3527797330475602, + "learning_rate": 7.806403035663889e-08, + "loss": 0.7111128568649292, + "step": 7637 + }, + { + "epoch": 1.7599078341013825, + "grad_norm": 1.1877365592337052, + "learning_rate": 7.791652611136212e-08, + "loss": 0.7320532202720642, + "step": 7638 + }, + { + "epoch": 1.7601382488479262, + "grad_norm": 1.2292449607917257, + "learning_rate": 7.776915570528076e-08, + "loss": 0.8439149856567383, + "step": 7639 + }, + { + "epoch": 1.7603686635944702, + "grad_norm": 1.0358127598823044, + "learning_rate": 7.762191915978578e-08, + "loss": 0.6489125490188599, + "step": 7640 + }, + { + "epoch": 1.7605990783410137, + "grad_norm": 1.0601387517448153, + "learning_rate": 7.74748164962482e-08, + "loss": 0.8118616342544556, + "step": 7641 + }, + { + "epoch": 1.7608294930875577, + "grad_norm": 1.0728808336951121, + "learning_rate": 7.732784773601953e-08, + "loss": 0.7528102397918701, + "step": 7642 + }, + { + "epoch": 1.7610599078341014, + "grad_norm": 1.0747138773482348, + "learning_rate": 7.718101290043244e-08, + "loss": 0.7856849431991577, + "step": 7643 + }, + { + "epoch": 1.761290322580645, + "grad_norm": 1.3272410706900097, + "learning_rate": 7.703431201079969e-08, + "loss": 0.8001973628997803, + "step": 7644 + }, + { + "epoch": 1.761520737327189, + "grad_norm": 1.2721768499077395, + "learning_rate": 7.688774508841478e-08, + "loss": 0.8332471251487732, + "step": 7645 + }, + { + "epoch": 1.7617511520737326, + "grad_norm": 1.2453092502270793, + "learning_rate": 7.67413121545516e-08, + "loss": 0.7534940242767334, + "step": 7646 + }, + { + "epoch": 1.7619815668202765, + "grad_norm": 1.1709183629535718, + "learning_rate": 7.65950132304647e-08, + "loss": 0.737503170967102, + "step": 7647 + }, + { + "epoch": 1.7622119815668202, + "grad_norm": 1.0899482570396566, + "learning_rate": 7.644884833738896e-08, + "loss": 0.7407201528549194, + "step": 7648 + }, + { + "epoch": 1.762442396313364, + "grad_norm": 1.0059187913201149, + "learning_rate": 7.630281749654055e-08, + "loss": 0.7589377164840698, + "step": 7649 + }, + { + "epoch": 1.762672811059908, + "grad_norm": 1.2624576054967966, + "learning_rate": 7.615692072911528e-08, + "loss": 0.6586496829986572, + "step": 7650 + }, + { + "epoch": 1.7629032258064516, + "grad_norm": 1.1572434060771926, + "learning_rate": 7.601115805628977e-08, + "loss": 0.705591082572937, + "step": 7651 + }, + { + "epoch": 1.7631336405529954, + "grad_norm": 1.2006722437863475, + "learning_rate": 7.586552949922176e-08, + "loss": 0.7889619469642639, + "step": 7652 + }, + { + "epoch": 1.7633640552995393, + "grad_norm": 1.0348577197525213, + "learning_rate": 7.572003507904868e-08, + "loss": 0.6912282705307007, + "step": 7653 + }, + { + "epoch": 1.7635944700460828, + "grad_norm": 1.1101374555344716, + "learning_rate": 7.557467481688873e-08, + "loss": 0.7374964952468872, + "step": 7654 + }, + { + "epoch": 1.7638248847926268, + "grad_norm": 1.1479262514291408, + "learning_rate": 7.542944873384105e-08, + "loss": 0.7302298545837402, + "step": 7655 + }, + { + "epoch": 1.7640552995391705, + "grad_norm": 1.2653276061660264, + "learning_rate": 7.5284356850985e-08, + "loss": 0.8323671817779541, + "step": 7656 + }, + { + "epoch": 1.7642857142857142, + "grad_norm": 1.0548505840987745, + "learning_rate": 7.513939918938028e-08, + "loss": 0.6654655933380127, + "step": 7657 + }, + { + "epoch": 1.7645161290322582, + "grad_norm": 1.1231001283574193, + "learning_rate": 7.499457577006751e-08, + "loss": 0.6371186375617981, + "step": 7658 + }, + { + "epoch": 1.7647465437788017, + "grad_norm": 1.3299088323872645, + "learning_rate": 7.484988661406733e-08, + "loss": 0.7761695384979248, + "step": 7659 + }, + { + "epoch": 1.7649769585253456, + "grad_norm": 1.1268786347378037, + "learning_rate": 7.470533174238158e-08, + "loss": 0.779335618019104, + "step": 7660 + }, + { + "epoch": 1.7652073732718894, + "grad_norm": 1.26329747548588, + "learning_rate": 7.456091117599195e-08, + "loss": 0.7642731666564941, + "step": 7661 + }, + { + "epoch": 1.765437788018433, + "grad_norm": 1.417392503393573, + "learning_rate": 7.441662493586076e-08, + "loss": 0.7490801215171814, + "step": 7662 + }, + { + "epoch": 1.765668202764977, + "grad_norm": 1.6109060172749883, + "learning_rate": 7.427247304293139e-08, + "loss": 0.9480686187744141, + "step": 7663 + }, + { + "epoch": 1.7658986175115208, + "grad_norm": 1.243245001745715, + "learning_rate": 7.412845551812707e-08, + "loss": 0.6208070516586304, + "step": 7664 + }, + { + "epoch": 1.7661290322580645, + "grad_norm": 1.2606477635417679, + "learning_rate": 7.398457238235167e-08, + "loss": 0.7782050371170044, + "step": 7665 + }, + { + "epoch": 1.7663594470046085, + "grad_norm": 1.1494295384377444, + "learning_rate": 7.38408236564897e-08, + "loss": 0.6725378632545471, + "step": 7666 + }, + { + "epoch": 1.766589861751152, + "grad_norm": 1.4030647180836417, + "learning_rate": 7.369720936140611e-08, + "loss": 0.8247120380401611, + "step": 7667 + }, + { + "epoch": 1.766820276497696, + "grad_norm": 1.2966757041323174, + "learning_rate": 7.355372951794614e-08, + "loss": 0.7866288423538208, + "step": 7668 + }, + { + "epoch": 1.7670506912442396, + "grad_norm": 1.5029385474750363, + "learning_rate": 7.341038414693613e-08, + "loss": 0.8096400499343872, + "step": 7669 + }, + { + "epoch": 1.7672811059907834, + "grad_norm": 1.5152361583075085, + "learning_rate": 7.326717326918208e-08, + "loss": 0.7799873352050781, + "step": 7670 + }, + { + "epoch": 1.7675115207373273, + "grad_norm": 1.0568101452951337, + "learning_rate": 7.312409690547095e-08, + "loss": 0.809285044670105, + "step": 7671 + }, + { + "epoch": 1.7677419354838708, + "grad_norm": 1.351048640166805, + "learning_rate": 7.298115507657021e-08, + "loss": 0.874248743057251, + "step": 7672 + }, + { + "epoch": 1.7679723502304148, + "grad_norm": 1.1594085684678137, + "learning_rate": 7.283834780322761e-08, + "loss": 0.7418022155761719, + "step": 7673 + }, + { + "epoch": 1.7682027649769585, + "grad_norm": 1.2895302232300179, + "learning_rate": 7.269567510617126e-08, + "loss": 0.720660388469696, + "step": 7674 + }, + { + "epoch": 1.7684331797235022, + "grad_norm": 1.241628438381412, + "learning_rate": 7.255313700611032e-08, + "loss": 0.7655429840087891, + "step": 7675 + }, + { + "epoch": 1.7686635944700462, + "grad_norm": 1.125747625986026, + "learning_rate": 7.241073352373361e-08, + "loss": 0.7303705215454102, + "step": 7676 + }, + { + "epoch": 1.76889400921659, + "grad_norm": 1.1695690935051566, + "learning_rate": 7.226846467971093e-08, + "loss": 0.7997909188270569, + "step": 7677 + }, + { + "epoch": 1.7691244239631336, + "grad_norm": 1.261135372954414, + "learning_rate": 7.212633049469264e-08, + "loss": 0.6546763181686401, + "step": 7678 + }, + { + "epoch": 1.7693548387096776, + "grad_norm": 0.9669222373383191, + "learning_rate": 7.1984330989309e-08, + "loss": 0.6374444961547852, + "step": 7679 + }, + { + "epoch": 1.769585253456221, + "grad_norm": 1.2966171484977755, + "learning_rate": 7.184246618417111e-08, + "loss": 0.7092937231063843, + "step": 7680 + }, + { + "epoch": 1.769815668202765, + "grad_norm": 1.3237517845156634, + "learning_rate": 7.17007360998706e-08, + "loss": 0.7702305316925049, + "step": 7681 + }, + { + "epoch": 1.7700460829493088, + "grad_norm": 0.978090031115468, + "learning_rate": 7.155914075697933e-08, + "loss": 0.7763724327087402, + "step": 7682 + }, + { + "epoch": 1.7702764976958525, + "grad_norm": 0.9935287090208255, + "learning_rate": 7.141768017604966e-08, + "loss": 0.6409577131271362, + "step": 7683 + }, + { + "epoch": 1.7705069124423964, + "grad_norm": 1.2265488041489598, + "learning_rate": 7.127635437761459e-08, + "loss": 0.7500795125961304, + "step": 7684 + }, + { + "epoch": 1.77073732718894, + "grad_norm": 1.405023681248552, + "learning_rate": 7.113516338218717e-08, + "loss": 0.7312004566192627, + "step": 7685 + }, + { + "epoch": 1.770967741935484, + "grad_norm": 0.910138776962328, + "learning_rate": 7.099410721026112e-08, + "loss": 0.823514997959137, + "step": 7686 + }, + { + "epoch": 1.7711981566820276, + "grad_norm": 1.4146285511420962, + "learning_rate": 7.085318588231048e-08, + "loss": 0.9504063129425049, + "step": 7687 + }, + { + "epoch": 1.7714285714285714, + "grad_norm": 0.8614868773221174, + "learning_rate": 7.071239941878981e-08, + "loss": 0.7850733399391174, + "step": 7688 + }, + { + "epoch": 1.7716589861751153, + "grad_norm": 1.356738665999072, + "learning_rate": 7.057174784013431e-08, + "loss": 0.9447094798088074, + "step": 7689 + }, + { + "epoch": 1.771889400921659, + "grad_norm": 1.134179637006652, + "learning_rate": 7.04312311667592e-08, + "loss": 0.6675062775611877, + "step": 7690 + }, + { + "epoch": 1.7721198156682028, + "grad_norm": 0.9395193655643466, + "learning_rate": 7.029084941906005e-08, + "loss": 0.6875232458114624, + "step": 7691 + }, + { + "epoch": 1.7723502304147467, + "grad_norm": 1.3573723926231736, + "learning_rate": 7.015060261741357e-08, + "loss": 0.7847919464111328, + "step": 7692 + }, + { + "epoch": 1.7725806451612902, + "grad_norm": 1.300014614678359, + "learning_rate": 7.001049078217613e-08, + "loss": 0.7924584150314331, + "step": 7693 + }, + { + "epoch": 1.7728110599078342, + "grad_norm": 1.4499718780004744, + "learning_rate": 6.987051393368471e-08, + "loss": 0.8802344799041748, + "step": 7694 + }, + { + "epoch": 1.773041474654378, + "grad_norm": 1.425988233405148, + "learning_rate": 6.973067209225692e-08, + "loss": 0.7038631439208984, + "step": 7695 + }, + { + "epoch": 1.7732718894009216, + "grad_norm": 1.1226859696380713, + "learning_rate": 6.959096527819064e-08, + "loss": 0.9016700387001038, + "step": 7696 + }, + { + "epoch": 1.7735023041474656, + "grad_norm": 1.1967072079572705, + "learning_rate": 6.945139351176387e-08, + "loss": 0.7678165435791016, + "step": 7697 + }, + { + "epoch": 1.773732718894009, + "grad_norm": 1.1001980127511188, + "learning_rate": 6.931195681323565e-08, + "loss": 0.6612143516540527, + "step": 7698 + }, + { + "epoch": 1.773963133640553, + "grad_norm": 1.3968871696274494, + "learning_rate": 6.917265520284476e-08, + "loss": 0.840233325958252, + "step": 7699 + }, + { + "epoch": 1.7741935483870968, + "grad_norm": 1.3698339080168875, + "learning_rate": 6.90334887008106e-08, + "loss": 0.7913506031036377, + "step": 7700 + }, + { + "epoch": 1.7744239631336405, + "grad_norm": 1.3434994536689218, + "learning_rate": 6.889445732733323e-08, + "loss": 0.7523634433746338, + "step": 7701 + }, + { + "epoch": 1.7746543778801844, + "grad_norm": 1.1357027982798495, + "learning_rate": 6.875556110259273e-08, + "loss": 0.7009792327880859, + "step": 7702 + }, + { + "epoch": 1.7748847926267282, + "grad_norm": 0.9926018792518734, + "learning_rate": 6.861680004674963e-08, + "loss": 0.6533738970756531, + "step": 7703 + }, + { + "epoch": 1.7751152073732719, + "grad_norm": 1.0969556014291875, + "learning_rate": 6.847817417994517e-08, + "loss": 0.860493540763855, + "step": 7704 + }, + { + "epoch": 1.7753456221198156, + "grad_norm": 1.3425565367947665, + "learning_rate": 6.833968352230057e-08, + "loss": 0.810010552406311, + "step": 7705 + }, + { + "epoch": 1.7755760368663593, + "grad_norm": 1.2400741621258158, + "learning_rate": 6.820132809391743e-08, + "loss": 0.8443198204040527, + "step": 7706 + }, + { + "epoch": 1.7758064516129033, + "grad_norm": 1.1086679828690398, + "learning_rate": 6.806310791487813e-08, + "loss": 0.758772611618042, + "step": 7707 + }, + { + "epoch": 1.776036866359447, + "grad_norm": 1.2474164003496853, + "learning_rate": 6.792502300524472e-08, + "loss": 0.8438040614128113, + "step": 7708 + }, + { + "epoch": 1.7762672811059907, + "grad_norm": 1.154420265010753, + "learning_rate": 6.778707338506051e-08, + "loss": 0.7727431058883667, + "step": 7709 + }, + { + "epoch": 1.7764976958525347, + "grad_norm": 1.6420516256349273, + "learning_rate": 6.764925907434849e-08, + "loss": 0.8118282556533813, + "step": 7710 + }, + { + "epoch": 1.7767281105990782, + "grad_norm": 1.22888062854885, + "learning_rate": 6.75115800931122e-08, + "loss": 0.7667281627655029, + "step": 7711 + }, + { + "epoch": 1.7769585253456222, + "grad_norm": 1.2558357954388057, + "learning_rate": 6.737403646133566e-08, + "loss": 0.7824913263320923, + "step": 7712 + }, + { + "epoch": 1.7771889400921659, + "grad_norm": 1.176254722115087, + "learning_rate": 6.723662819898312e-08, + "loss": 0.7318419218063354, + "step": 7713 + }, + { + "epoch": 1.7774193548387096, + "grad_norm": 1.2059569400095187, + "learning_rate": 6.709935532599897e-08, + "loss": 0.7060009241104126, + "step": 7714 + }, + { + "epoch": 1.7776497695852536, + "grad_norm": 1.3093811884607869, + "learning_rate": 6.69622178623086e-08, + "loss": 0.7367588877677917, + "step": 7715 + }, + { + "epoch": 1.7778801843317973, + "grad_norm": 1.3618967587860527, + "learning_rate": 6.682521582781708e-08, + "loss": 0.7340742349624634, + "step": 7716 + }, + { + "epoch": 1.778110599078341, + "grad_norm": 1.257394780772999, + "learning_rate": 6.668834924240995e-08, + "loss": 0.6655991077423096, + "step": 7717 + }, + { + "epoch": 1.7783410138248847, + "grad_norm": 1.3379718118337083, + "learning_rate": 6.655161812595367e-08, + "loss": 0.7562434673309326, + "step": 7718 + }, + { + "epoch": 1.7785714285714285, + "grad_norm": 1.2416548769934193, + "learning_rate": 6.641502249829423e-08, + "loss": 0.8078730702400208, + "step": 7719 + }, + { + "epoch": 1.7788018433179724, + "grad_norm": 1.1920319583326109, + "learning_rate": 6.627856237925811e-08, + "loss": 0.6285899877548218, + "step": 7720 + }, + { + "epoch": 1.7790322580645161, + "grad_norm": 1.1055337731409536, + "learning_rate": 6.61422377886528e-08, + "loss": 0.6633951663970947, + "step": 7721 + }, + { + "epoch": 1.7792626728110599, + "grad_norm": 1.0697990396462347, + "learning_rate": 6.600604874626548e-08, + "loss": 0.7273050546646118, + "step": 7722 + }, + { + "epoch": 1.7794930875576038, + "grad_norm": 1.2680575632659172, + "learning_rate": 6.586999527186354e-08, + "loss": 0.6665729284286499, + "step": 7723 + }, + { + "epoch": 1.7797235023041473, + "grad_norm": 1.275935674563519, + "learning_rate": 6.573407738519531e-08, + "loss": 0.7332675457000732, + "step": 7724 + }, + { + "epoch": 1.7799539170506913, + "grad_norm": 1.0778234517601935, + "learning_rate": 6.559829510598892e-08, + "loss": 0.7439071536064148, + "step": 7725 + }, + { + "epoch": 1.780184331797235, + "grad_norm": 1.3635129938987167, + "learning_rate": 6.546264845395299e-08, + "loss": 0.7104752063751221, + "step": 7726 + }, + { + "epoch": 1.7804147465437787, + "grad_norm": 1.2639306988819587, + "learning_rate": 6.53271374487765e-08, + "loss": 0.7792220115661621, + "step": 7727 + }, + { + "epoch": 1.7806451612903227, + "grad_norm": 1.0938522733418012, + "learning_rate": 6.519176211012867e-08, + "loss": 0.6379693746566772, + "step": 7728 + }, + { + "epoch": 1.7808755760368664, + "grad_norm": 1.3289044633653213, + "learning_rate": 6.505652245765881e-08, + "loss": 0.7737444639205933, + "step": 7729 + }, + { + "epoch": 1.7811059907834101, + "grad_norm": 1.1550683939038542, + "learning_rate": 6.49214185109973e-08, + "loss": 0.7681130170822144, + "step": 7730 + }, + { + "epoch": 1.7813364055299539, + "grad_norm": 1.4083081227680676, + "learning_rate": 6.478645028975372e-08, + "loss": 0.8718420267105103, + "step": 7731 + }, + { + "epoch": 1.7815668202764976, + "grad_norm": 1.1823677205039174, + "learning_rate": 6.465161781351914e-08, + "loss": 0.7557366490364075, + "step": 7732 + }, + { + "epoch": 1.7817972350230415, + "grad_norm": 1.1999869902911706, + "learning_rate": 6.45169211018638e-08, + "loss": 0.6794936656951904, + "step": 7733 + }, + { + "epoch": 1.7820276497695853, + "grad_norm": 1.2764239528790797, + "learning_rate": 6.438236017433895e-08, + "loss": 0.8390437364578247, + "step": 7734 + }, + { + "epoch": 1.782258064516129, + "grad_norm": 1.134383511808464, + "learning_rate": 6.424793505047599e-08, + "loss": 0.8024254441261292, + "step": 7735 + }, + { + "epoch": 1.782488479262673, + "grad_norm": 0.8536836629483899, + "learning_rate": 6.411364574978651e-08, + "loss": 0.6382162570953369, + "step": 7736 + }, + { + "epoch": 1.7827188940092165, + "grad_norm": 1.1757601346145792, + "learning_rate": 6.397949229176225e-08, + "loss": 0.6832011938095093, + "step": 7737 + }, + { + "epoch": 1.7829493087557604, + "grad_norm": 1.653357486541517, + "learning_rate": 6.384547469587564e-08, + "loss": 0.9003958702087402, + "step": 7738 + }, + { + "epoch": 1.7831797235023041, + "grad_norm": 1.1523951728047304, + "learning_rate": 6.371159298157913e-08, + "loss": 0.7030328512191772, + "step": 7739 + }, + { + "epoch": 1.7834101382488479, + "grad_norm": 1.2390057793357907, + "learning_rate": 6.357784716830528e-08, + "loss": 0.8153259754180908, + "step": 7740 + }, + { + "epoch": 1.7836405529953918, + "grad_norm": 1.4244568607420958, + "learning_rate": 6.344423727546744e-08, + "loss": 0.8229082226753235, + "step": 7741 + }, + { + "epoch": 1.7838709677419353, + "grad_norm": 1.3055755817113595, + "learning_rate": 6.331076332245866e-08, + "loss": 0.7306294441223145, + "step": 7742 + }, + { + "epoch": 1.7841013824884793, + "grad_norm": 1.1085692686400792, + "learning_rate": 6.317742532865233e-08, + "loss": 0.7613078951835632, + "step": 7743 + }, + { + "epoch": 1.784331797235023, + "grad_norm": 1.4694829399841158, + "learning_rate": 6.304422331340275e-08, + "loss": 0.9164611101150513, + "step": 7744 + }, + { + "epoch": 1.7845622119815667, + "grad_norm": 1.4076564642652605, + "learning_rate": 6.29111572960439e-08, + "loss": 0.8770956993103027, + "step": 7745 + }, + { + "epoch": 1.7847926267281107, + "grad_norm": 1.3274833988945276, + "learning_rate": 6.277822729588989e-08, + "loss": 0.7482821941375732, + "step": 7746 + }, + { + "epoch": 1.7850230414746544, + "grad_norm": 1.3149565308569835, + "learning_rate": 6.264543333223549e-08, + "loss": 0.7850298881530762, + "step": 7747 + }, + { + "epoch": 1.7852534562211981, + "grad_norm": 1.0844733877563915, + "learning_rate": 6.251277542435552e-08, + "loss": 0.5781385898590088, + "step": 7748 + }, + { + "epoch": 1.785483870967742, + "grad_norm": 1.2619844590894689, + "learning_rate": 6.238025359150501e-08, + "loss": 0.8217513561248779, + "step": 7749 + }, + { + "epoch": 1.7857142857142856, + "grad_norm": 1.2512912228822737, + "learning_rate": 6.224786785291969e-08, + "loss": 0.8500482439994812, + "step": 7750 + }, + { + "epoch": 1.7859447004608295, + "grad_norm": 1.458025138254964, + "learning_rate": 6.211561822781474e-08, + "loss": 0.8146470785140991, + "step": 7751 + }, + { + "epoch": 1.7861751152073733, + "grad_norm": 1.248354775738917, + "learning_rate": 6.198350473538616e-08, + "loss": 0.7351702451705933, + "step": 7752 + }, + { + "epoch": 1.786405529953917, + "grad_norm": 1.2620887228989164, + "learning_rate": 6.185152739481026e-08, + "loss": 0.7993056774139404, + "step": 7753 + }, + { + "epoch": 1.786635944700461, + "grad_norm": 1.4031526672609798, + "learning_rate": 6.171968622524315e-08, + "loss": 0.8570160865783691, + "step": 7754 + }, + { + "epoch": 1.7868663594470044, + "grad_norm": 1.1948359150749444, + "learning_rate": 6.158798124582143e-08, + "loss": 0.6200212836265564, + "step": 7755 + }, + { + "epoch": 1.7870967741935484, + "grad_norm": 1.2592084852014216, + "learning_rate": 6.145641247566202e-08, + "loss": 0.8196465373039246, + "step": 7756 + }, + { + "epoch": 1.7873271889400921, + "grad_norm": 0.9917037331823602, + "learning_rate": 6.132497993386165e-08, + "loss": 0.7038032412528992, + "step": 7757 + }, + { + "epoch": 1.7875576036866359, + "grad_norm": 1.2428262727857045, + "learning_rate": 6.119368363949806e-08, + "loss": 0.7222307324409485, + "step": 7758 + }, + { + "epoch": 1.7877880184331798, + "grad_norm": 1.2991738769607613, + "learning_rate": 6.106252361162834e-08, + "loss": 0.8457501530647278, + "step": 7759 + }, + { + "epoch": 1.7880184331797235, + "grad_norm": 1.5487287329891364, + "learning_rate": 6.093149986929025e-08, + "loss": 0.7543236017227173, + "step": 7760 + }, + { + "epoch": 1.7882488479262673, + "grad_norm": 1.294614145507911, + "learning_rate": 6.080061243150191e-08, + "loss": 0.5728875398635864, + "step": 7761 + }, + { + "epoch": 1.7884792626728112, + "grad_norm": 1.3902935059609232, + "learning_rate": 6.066986131726138e-08, + "loss": 0.6864895820617676, + "step": 7762 + }, + { + "epoch": 1.7887096774193547, + "grad_norm": 1.1640824452811938, + "learning_rate": 6.053924654554687e-08, + "loss": 0.8580472469329834, + "step": 7763 + }, + { + "epoch": 1.7889400921658987, + "grad_norm": 1.358237067906671, + "learning_rate": 6.040876813531714e-08, + "loss": 0.7670924663543701, + "step": 7764 + }, + { + "epoch": 1.7891705069124424, + "grad_norm": 1.2558108988688055, + "learning_rate": 6.027842610551082e-08, + "loss": 0.6558287739753723, + "step": 7765 + }, + { + "epoch": 1.7894009216589861, + "grad_norm": 1.2875975662335684, + "learning_rate": 6.014822047504697e-08, + "loss": 0.8186839818954468, + "step": 7766 + }, + { + "epoch": 1.78963133640553, + "grad_norm": 1.2720662525098447, + "learning_rate": 6.001815126282462e-08, + "loss": 0.7862167358398438, + "step": 7767 + }, + { + "epoch": 1.7898617511520736, + "grad_norm": 1.1119662378593531, + "learning_rate": 5.98882184877233e-08, + "loss": 0.8594048023223877, + "step": 7768 + }, + { + "epoch": 1.7900921658986175, + "grad_norm": 1.3277176558233812, + "learning_rate": 5.975842216860238e-08, + "loss": 0.804019033908844, + "step": 7769 + }, + { + "epoch": 1.7903225806451613, + "grad_norm": 1.1244948347974122, + "learning_rate": 5.962876232430192e-08, + "loss": 0.7404098510742188, + "step": 7770 + }, + { + "epoch": 1.790552995391705, + "grad_norm": 1.3595838567399194, + "learning_rate": 5.949923897364173e-08, + "loss": 0.7726024389266968, + "step": 7771 + }, + { + "epoch": 1.790783410138249, + "grad_norm": 1.5060671287860161, + "learning_rate": 5.936985213542178e-08, + "loss": 0.8225048184394836, + "step": 7772 + }, + { + "epoch": 1.7910138248847927, + "grad_norm": 1.4217281972238225, + "learning_rate": 5.924060182842272e-08, + "loss": 0.8485706448554993, + "step": 7773 + }, + { + "epoch": 1.7912442396313364, + "grad_norm": 1.189460803975086, + "learning_rate": 5.9111488071404867e-08, + "loss": 0.6580322980880737, + "step": 7774 + }, + { + "epoch": 1.7914746543778803, + "grad_norm": 1.1783786831629417, + "learning_rate": 5.898251088310879e-08, + "loss": 0.7486656904220581, + "step": 7775 + }, + { + "epoch": 1.7917050691244238, + "grad_norm": 1.5948072851449393, + "learning_rate": 5.885367028225574e-08, + "loss": 0.9068334102630615, + "step": 7776 + }, + { + "epoch": 1.7919354838709678, + "grad_norm": 1.1107745619546634, + "learning_rate": 5.872496628754653e-08, + "loss": 0.7091449499130249, + "step": 7777 + }, + { + "epoch": 1.7921658986175115, + "grad_norm": 1.3473785107334575, + "learning_rate": 5.8596398917662107e-08, + "loss": 0.7248316407203674, + "step": 7778 + }, + { + "epoch": 1.7923963133640552, + "grad_norm": 1.2057819957098448, + "learning_rate": 5.8467968191264315e-08, + "loss": 0.7740335464477539, + "step": 7779 + }, + { + "epoch": 1.7926267281105992, + "grad_norm": 1.267573304949112, + "learning_rate": 5.833967412699448e-08, + "loss": 0.7810479402542114, + "step": 7780 + }, + { + "epoch": 1.7928571428571427, + "grad_norm": 1.004282792701847, + "learning_rate": 5.821151674347435e-08, + "loss": 0.7072443962097168, + "step": 7781 + }, + { + "epoch": 1.7930875576036867, + "grad_norm": 1.1829190770666373, + "learning_rate": 5.808349605930585e-08, + "loss": 0.8218289613723755, + "step": 7782 + }, + { + "epoch": 1.7933179723502304, + "grad_norm": 1.393265214120735, + "learning_rate": 5.795561209307087e-08, + "loss": 0.8928433656692505, + "step": 7783 + }, + { + "epoch": 1.793548387096774, + "grad_norm": 1.455083354855402, + "learning_rate": 5.7827864863331796e-08, + "loss": 0.765188455581665, + "step": 7784 + }, + { + "epoch": 1.793778801843318, + "grad_norm": 1.0118039506572176, + "learning_rate": 5.7700254388630795e-08, + "loss": 0.7149494886398315, + "step": 7785 + }, + { + "epoch": 1.7940092165898618, + "grad_norm": 1.6638445812749356, + "learning_rate": 5.75727806874905e-08, + "loss": 0.8144164085388184, + "step": 7786 + }, + { + "epoch": 1.7942396313364055, + "grad_norm": 1.1101501647130416, + "learning_rate": 5.744544377841354e-08, + "loss": 0.7549517154693604, + "step": 7787 + }, + { + "epoch": 1.7944700460829495, + "grad_norm": 1.1805002478026116, + "learning_rate": 5.731824367988258e-08, + "loss": 0.7820652723312378, + "step": 7788 + }, + { + "epoch": 1.794700460829493, + "grad_norm": 1.2187125462499315, + "learning_rate": 5.719118041036042e-08, + "loss": 0.8253183364868164, + "step": 7789 + }, + { + "epoch": 1.794930875576037, + "grad_norm": 1.3044045265020685, + "learning_rate": 5.70642539882904e-08, + "loss": 0.8177148103713989, + "step": 7790 + }, + { + "epoch": 1.7951612903225806, + "grad_norm": 1.2453642288062106, + "learning_rate": 5.69374644320958e-08, + "loss": 0.722260594367981, + "step": 7791 + }, + { + "epoch": 1.7953917050691244, + "grad_norm": 1.3322495120015716, + "learning_rate": 5.6810811760179434e-08, + "loss": 0.8128643035888672, + "step": 7792 + }, + { + "epoch": 1.7956221198156683, + "grad_norm": 1.2461980802133077, + "learning_rate": 5.6684295990925394e-08, + "loss": 0.8267233371734619, + "step": 7793 + }, + { + "epoch": 1.7958525345622118, + "grad_norm": 1.1467604985666775, + "learning_rate": 5.655791714269697e-08, + "loss": 0.8385082483291626, + "step": 7794 + }, + { + "epoch": 1.7960829493087558, + "grad_norm": 1.2035138425735283, + "learning_rate": 5.643167523383785e-08, + "loss": 0.8705167770385742, + "step": 7795 + }, + { + "epoch": 1.7963133640552995, + "grad_norm": 1.263928906996047, + "learning_rate": 5.6305570282672024e-08, + "loss": 0.7628496885299683, + "step": 7796 + }, + { + "epoch": 1.7965437788018432, + "grad_norm": 1.2993701262886028, + "learning_rate": 5.61796023075034e-08, + "loss": 0.8246536254882812, + "step": 7797 + }, + { + "epoch": 1.7967741935483872, + "grad_norm": 1.2920173759654132, + "learning_rate": 5.6053771326615815e-08, + "loss": 0.7103257179260254, + "step": 7798 + }, + { + "epoch": 1.797004608294931, + "grad_norm": 1.318695367926756, + "learning_rate": 5.5928077358273984e-08, + "loss": 0.614989161491394, + "step": 7799 + }, + { + "epoch": 1.7972350230414746, + "grad_norm": 1.6404840895868877, + "learning_rate": 5.5802520420721866e-08, + "loss": 0.9876137971878052, + "step": 7800 + }, + { + "epoch": 1.7974654377880186, + "grad_norm": 1.2467848598458215, + "learning_rate": 5.5677100532183775e-08, + "loss": 0.7023773193359375, + "step": 7801 + }, + { + "epoch": 1.797695852534562, + "grad_norm": 1.1844278512776936, + "learning_rate": 5.555181771086459e-08, + "loss": 0.6680843830108643, + "step": 7802 + }, + { + "epoch": 1.797926267281106, + "grad_norm": 1.0826933828880965, + "learning_rate": 5.542667197494877e-08, + "loss": 0.7221776843070984, + "step": 7803 + }, + { + "epoch": 1.7981566820276498, + "grad_norm": 1.0071738664190577, + "learning_rate": 5.5301663342601e-08, + "loss": 0.7473262548446655, + "step": 7804 + }, + { + "epoch": 1.7983870967741935, + "grad_norm": 1.2499370802188474, + "learning_rate": 5.517679183196622e-08, + "loss": 0.8690468072891235, + "step": 7805 + }, + { + "epoch": 1.7986175115207375, + "grad_norm": 1.0933317196070476, + "learning_rate": 5.505205746116937e-08, + "loss": 0.8353981971740723, + "step": 7806 + }, + { + "epoch": 1.798847926267281, + "grad_norm": 1.177111485427447, + "learning_rate": 5.4927460248315405e-08, + "loss": 0.7691711187362671, + "step": 7807 + }, + { + "epoch": 1.799078341013825, + "grad_norm": 1.034283547212154, + "learning_rate": 5.480300021148953e-08, + "loss": 0.6732556819915771, + "step": 7808 + }, + { + "epoch": 1.7993087557603686, + "grad_norm": 1.1520777556370354, + "learning_rate": 5.467867736875664e-08, + "loss": 0.7273567914962769, + "step": 7809 + }, + { + "epoch": 1.7995391705069124, + "grad_norm": 1.201774068977123, + "learning_rate": 5.455449173816251e-08, + "loss": 0.7951864004135132, + "step": 7810 + }, + { + "epoch": 1.7997695852534563, + "grad_norm": 1.4133736179333027, + "learning_rate": 5.4430443337732276e-08, + "loss": 0.7073169350624084, + "step": 7811 + }, + { + "epoch": 1.8, + "grad_norm": 1.0101637387022209, + "learning_rate": 5.430653218547132e-08, + "loss": 0.682072639465332, + "step": 7812 + }, + { + "epoch": 1.8002304147465438, + "grad_norm": 0.9949453624163476, + "learning_rate": 5.4182758299365364e-08, + "loss": 0.7512049674987793, + "step": 7813 + }, + { + "epoch": 1.8004608294930877, + "grad_norm": 1.2218170088515747, + "learning_rate": 5.405912169738003e-08, + "loss": 0.7470980882644653, + "step": 7814 + }, + { + "epoch": 1.8006912442396312, + "grad_norm": 1.1792295753175266, + "learning_rate": 5.3935622397460634e-08, + "loss": 0.792417049407959, + "step": 7815 + }, + { + "epoch": 1.8009216589861752, + "grad_norm": 1.4508025797803343, + "learning_rate": 5.3812260417533505e-08, + "loss": 0.8600934743881226, + "step": 7816 + }, + { + "epoch": 1.801152073732719, + "grad_norm": 1.2411035382017865, + "learning_rate": 5.36890357755041e-08, + "loss": 0.6931058168411255, + "step": 7817 + }, + { + "epoch": 1.8013824884792626, + "grad_norm": 1.1047587345616248, + "learning_rate": 5.3565948489258216e-08, + "loss": 0.7382420897483826, + "step": 7818 + }, + { + "epoch": 1.8016129032258066, + "grad_norm": 1.5724454012098283, + "learning_rate": 5.344299857666224e-08, + "loss": 0.6811971068382263, + "step": 7819 + }, + { + "epoch": 1.80184331797235, + "grad_norm": 1.3142032735909368, + "learning_rate": 5.332018605556188e-08, + "loss": 0.8551425933837891, + "step": 7820 + }, + { + "epoch": 1.802073732718894, + "grad_norm": 1.298840655183536, + "learning_rate": 5.319751094378322e-08, + "loss": 0.7907109260559082, + "step": 7821 + }, + { + "epoch": 1.8023041474654378, + "grad_norm": 1.462185741805911, + "learning_rate": 5.3074973259132464e-08, + "loss": 0.6995817422866821, + "step": 7822 + }, + { + "epoch": 1.8025345622119815, + "grad_norm": 1.2098230160416081, + "learning_rate": 5.295257301939582e-08, + "loss": 0.9157558679580688, + "step": 7823 + }, + { + "epoch": 1.8027649769585254, + "grad_norm": 1.3503599705143554, + "learning_rate": 5.283031024233942e-08, + "loss": 0.8181086778640747, + "step": 7824 + }, + { + "epoch": 1.8029953917050692, + "grad_norm": 1.061101797749781, + "learning_rate": 5.270818494570961e-08, + "loss": 0.7170151472091675, + "step": 7825 + }, + { + "epoch": 1.803225806451613, + "grad_norm": 1.3415396727620215, + "learning_rate": 5.258619714723278e-08, + "loss": 0.7548947334289551, + "step": 7826 + }, + { + "epoch": 1.8034562211981566, + "grad_norm": 1.309211881034751, + "learning_rate": 5.2464346864615204e-08, + "loss": 0.7482869625091553, + "step": 7827 + }, + { + "epoch": 1.8036866359447004, + "grad_norm": 1.2839346666214595, + "learning_rate": 5.234263411554329e-08, + "loss": 0.6984925270080566, + "step": 7828 + }, + { + "epoch": 1.8039170506912443, + "grad_norm": 1.4972180990250632, + "learning_rate": 5.222105891768347e-08, + "loss": 0.910038948059082, + "step": 7829 + }, + { + "epoch": 1.804147465437788, + "grad_norm": 1.4071380742837927, + "learning_rate": 5.2099621288682174e-08, + "loss": 0.8936711549758911, + "step": 7830 + }, + { + "epoch": 1.8043778801843318, + "grad_norm": 1.2841490446822148, + "learning_rate": 5.197832124616608e-08, + "loss": 0.7376326322555542, + "step": 7831 + }, + { + "epoch": 1.8046082949308757, + "grad_norm": 1.6922079171273652, + "learning_rate": 5.1857158807741554e-08, + "loss": 0.8373547792434692, + "step": 7832 + }, + { + "epoch": 1.8048387096774192, + "grad_norm": 1.1938115721747944, + "learning_rate": 5.17361339909953e-08, + "loss": 0.7018512487411499, + "step": 7833 + }, + { + "epoch": 1.8050691244239632, + "grad_norm": 1.0051532014919082, + "learning_rate": 5.161524681349394e-08, + "loss": 0.6111225485801697, + "step": 7834 + }, + { + "epoch": 1.805299539170507, + "grad_norm": 1.1643316930206133, + "learning_rate": 5.149449729278388e-08, + "loss": 0.6961934566497803, + "step": 7835 + }, + { + "epoch": 1.8055299539170506, + "grad_norm": 1.1662486414151942, + "learning_rate": 5.137388544639198e-08, + "loss": 0.677324116230011, + "step": 7836 + }, + { + "epoch": 1.8057603686635946, + "grad_norm": 1.1241341054985654, + "learning_rate": 5.125341129182481e-08, + "loss": 0.7124897837638855, + "step": 7837 + }, + { + "epoch": 1.8059907834101383, + "grad_norm": 1.1858041195501718, + "learning_rate": 5.1133074846568815e-08, + "loss": 0.7474578619003296, + "step": 7838 + }, + { + "epoch": 1.806221198156682, + "grad_norm": 1.0832413753523613, + "learning_rate": 5.101287612809102e-08, + "loss": 0.699856162071228, + "step": 7839 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 1.2510053638983376, + "learning_rate": 5.089281515383803e-08, + "loss": 0.6548302173614502, + "step": 7840 + }, + { + "epoch": 1.8066820276497695, + "grad_norm": 1.4067864996197734, + "learning_rate": 5.077289194123624e-08, + "loss": 0.8376108407974243, + "step": 7841 + }, + { + "epoch": 1.8069124423963134, + "grad_norm": 1.4168917230935398, + "learning_rate": 5.065310650769283e-08, + "loss": 0.741931140422821, + "step": 7842 + }, + { + "epoch": 1.8071428571428572, + "grad_norm": 1.0130617353418785, + "learning_rate": 5.053345887059413e-08, + "loss": 0.7253270149230957, + "step": 7843 + }, + { + "epoch": 1.807373271889401, + "grad_norm": 1.452385981822963, + "learning_rate": 5.0413949047306894e-08, + "loss": 0.8248677849769592, + "step": 7844 + }, + { + "epoch": 1.8076036866359448, + "grad_norm": 1.2182337218961132, + "learning_rate": 5.0294577055177925e-08, + "loss": 0.7571253776550293, + "step": 7845 + }, + { + "epoch": 1.8078341013824883, + "grad_norm": 1.3374870147899762, + "learning_rate": 5.017534291153391e-08, + "loss": 0.8256274461746216, + "step": 7846 + }, + { + "epoch": 1.8080645161290323, + "grad_norm": 1.0202351482491858, + "learning_rate": 5.0056246633681356e-08, + "loss": 0.8609060049057007, + "step": 7847 + }, + { + "epoch": 1.808294930875576, + "grad_norm": 1.0533455142790622, + "learning_rate": 4.9937288238907196e-08, + "loss": 0.7005047798156738, + "step": 7848 + }, + { + "epoch": 1.8085253456221198, + "grad_norm": 1.508707208071474, + "learning_rate": 4.981846774447784e-08, + "loss": 0.8640049695968628, + "step": 7849 + }, + { + "epoch": 1.8087557603686637, + "grad_norm": 1.2891784390675838, + "learning_rate": 4.969978516763984e-08, + "loss": 0.8385862112045288, + "step": 7850 + }, + { + "epoch": 1.8089861751152074, + "grad_norm": 1.4569260681358536, + "learning_rate": 4.9581240525620184e-08, + "loss": 0.845676064491272, + "step": 7851 + }, + { + "epoch": 1.8092165898617512, + "grad_norm": 1.1553749249891685, + "learning_rate": 4.9462833835625327e-08, + "loss": 0.7638444304466248, + "step": 7852 + }, + { + "epoch": 1.8094470046082949, + "grad_norm": 1.3732641737808478, + "learning_rate": 4.934456511484153e-08, + "loss": 0.813924252986908, + "step": 7853 + }, + { + "epoch": 1.8096774193548386, + "grad_norm": 1.1884602060780909, + "learning_rate": 4.9226434380435835e-08, + "loss": 0.8773660659790039, + "step": 7854 + }, + { + "epoch": 1.8099078341013826, + "grad_norm": 1.1706588526128812, + "learning_rate": 4.91084416495543e-08, + "loss": 0.6703497171401978, + "step": 7855 + }, + { + "epoch": 1.8101382488479263, + "grad_norm": 1.149648913375334, + "learning_rate": 4.8990586939323896e-08, + "loss": 0.760738730430603, + "step": 7856 + }, + { + "epoch": 1.81036866359447, + "grad_norm": 1.0947557024146697, + "learning_rate": 4.887287026685072e-08, + "loss": 0.6466494798660278, + "step": 7857 + }, + { + "epoch": 1.810599078341014, + "grad_norm": 1.2590537115606641, + "learning_rate": 4.8755291649221206e-08, + "loss": 0.7416050434112549, + "step": 7858 + }, + { + "epoch": 1.8108294930875575, + "grad_norm": 1.200153480710325, + "learning_rate": 4.863785110350205e-08, + "loss": 0.6841444373130798, + "step": 7859 + }, + { + "epoch": 1.8110599078341014, + "grad_norm": 0.9841555488440165, + "learning_rate": 4.8520548646739265e-08, + "loss": 0.7818359732627869, + "step": 7860 + }, + { + "epoch": 1.8112903225806452, + "grad_norm": 1.287409595467329, + "learning_rate": 4.840338429595914e-08, + "loss": 0.8802354335784912, + "step": 7861 + }, + { + "epoch": 1.8115207373271889, + "grad_norm": 1.3745378964034318, + "learning_rate": 4.8286358068168055e-08, + "loss": 0.8105144500732422, + "step": 7862 + }, + { + "epoch": 1.8117511520737328, + "grad_norm": 1.2395900259386188, + "learning_rate": 4.816946998035232e-08, + "loss": 0.6613968014717102, + "step": 7863 + }, + { + "epoch": 1.8119815668202763, + "grad_norm": 1.191330407638407, + "learning_rate": 4.80527200494778e-08, + "loss": 0.7945050001144409, + "step": 7864 + }, + { + "epoch": 1.8122119815668203, + "grad_norm": 1.1914418584370128, + "learning_rate": 4.793610829249084e-08, + "loss": 0.7268643379211426, + "step": 7865 + }, + { + "epoch": 1.812442396313364, + "grad_norm": 1.0567405581917244, + "learning_rate": 4.781963472631745e-08, + "loss": 0.7416445016860962, + "step": 7866 + }, + { + "epoch": 1.8126728110599077, + "grad_norm": 1.246941647908604, + "learning_rate": 4.770329936786355e-08, + "loss": 0.8536533117294312, + "step": 7867 + }, + { + "epoch": 1.8129032258064517, + "grad_norm": 1.2429467538833636, + "learning_rate": 4.7587102234015074e-08, + "loss": 0.8258422017097473, + "step": 7868 + }, + { + "epoch": 1.8131336405529954, + "grad_norm": 1.189598318299626, + "learning_rate": 4.7471043341637874e-08, + "loss": 0.6976941823959351, + "step": 7869 + }, + { + "epoch": 1.8133640552995391, + "grad_norm": 1.222534835599988, + "learning_rate": 4.735512270757758e-08, + "loss": 0.8213087916374207, + "step": 7870 + }, + { + "epoch": 1.813594470046083, + "grad_norm": 1.1756801964544004, + "learning_rate": 4.723934034866028e-08, + "loss": 0.8012057542800903, + "step": 7871 + }, + { + "epoch": 1.8138248847926266, + "grad_norm": 1.0419940327131916, + "learning_rate": 4.7123696281691436e-08, + "loss": 0.7802866697311401, + "step": 7872 + }, + { + "epoch": 1.8140552995391706, + "grad_norm": 1.1630887083640626, + "learning_rate": 4.700819052345639e-08, + "loss": 0.8024426698684692, + "step": 7873 + }, + { + "epoch": 1.8142857142857143, + "grad_norm": 0.9709635675133196, + "learning_rate": 4.689282309072107e-08, + "loss": 0.6383114457130432, + "step": 7874 + }, + { + "epoch": 1.814516129032258, + "grad_norm": 1.2768186922012608, + "learning_rate": 4.677759400023085e-08, + "loss": 0.7226015329360962, + "step": 7875 + }, + { + "epoch": 1.814746543778802, + "grad_norm": 1.0424513670531574, + "learning_rate": 4.6662503268710684e-08, + "loss": 0.8390164971351624, + "step": 7876 + }, + { + "epoch": 1.8149769585253455, + "grad_norm": 1.0443665370850939, + "learning_rate": 4.654755091286633e-08, + "loss": 0.8120134472846985, + "step": 7877 + }, + { + "epoch": 1.8152073732718894, + "grad_norm": 1.305111160234168, + "learning_rate": 4.6432736949382656e-08, + "loss": 0.6554470062255859, + "step": 7878 + }, + { + "epoch": 1.8154377880184331, + "grad_norm": 1.1780234915455678, + "learning_rate": 4.631806139492478e-08, + "loss": 0.7268370985984802, + "step": 7879 + }, + { + "epoch": 1.8156682027649769, + "grad_norm": 1.4051894182356444, + "learning_rate": 4.620352426613794e-08, + "loss": 0.7991992831230164, + "step": 7880 + }, + { + "epoch": 1.8158986175115208, + "grad_norm": 1.1268859101296151, + "learning_rate": 4.608912557964673e-08, + "loss": 0.7695842981338501, + "step": 7881 + }, + { + "epoch": 1.8161290322580645, + "grad_norm": 1.9896156470888766, + "learning_rate": 4.59748653520563e-08, + "loss": 0.8633268475532532, + "step": 7882 + }, + { + "epoch": 1.8163594470046083, + "grad_norm": 1.1364981478494263, + "learning_rate": 4.586074359995118e-08, + "loss": 0.7018440961837769, + "step": 7883 + }, + { + "epoch": 1.8165898617511522, + "grad_norm": 1.1022691462384118, + "learning_rate": 4.574676033989589e-08, + "loss": 0.7304259538650513, + "step": 7884 + }, + { + "epoch": 1.8168202764976957, + "grad_norm": 1.2520833867580832, + "learning_rate": 4.563291558843518e-08, + "loss": 0.7408654689788818, + "step": 7885 + }, + { + "epoch": 1.8170506912442397, + "grad_norm": 0.8583590816187824, + "learning_rate": 4.55192093620933e-08, + "loss": 0.6378169059753418, + "step": 7886 + }, + { + "epoch": 1.8172811059907834, + "grad_norm": 1.2929203847720665, + "learning_rate": 4.540564167737471e-08, + "loss": 0.8854331374168396, + "step": 7887 + }, + { + "epoch": 1.8175115207373271, + "grad_norm": 1.3325768500609418, + "learning_rate": 4.529221255076343e-08, + "loss": 0.6948372721672058, + "step": 7888 + }, + { + "epoch": 1.817741935483871, + "grad_norm": 1.0169430034347062, + "learning_rate": 4.517892199872364e-08, + "loss": 0.8199236392974854, + "step": 7889 + }, + { + "epoch": 1.8179723502304146, + "grad_norm": 1.2358305635738154, + "learning_rate": 4.506577003769918e-08, + "loss": 0.6967995762825012, + "step": 7890 + }, + { + "epoch": 1.8182027649769585, + "grad_norm": 1.5521492896589208, + "learning_rate": 4.495275668411425e-08, + "loss": 0.848435640335083, + "step": 7891 + }, + { + "epoch": 1.8184331797235023, + "grad_norm": 1.0482582355280439, + "learning_rate": 4.483988195437227e-08, + "loss": 0.7085731029510498, + "step": 7892 + }, + { + "epoch": 1.818663594470046, + "grad_norm": 1.540410469929121, + "learning_rate": 4.472714586485682e-08, + "loss": 0.7400653958320618, + "step": 7893 + }, + { + "epoch": 1.81889400921659, + "grad_norm": 1.3011192141788026, + "learning_rate": 4.461454843193169e-08, + "loss": 0.7636830806732178, + "step": 7894 + }, + { + "epoch": 1.8191244239631337, + "grad_norm": 0.9509851989309867, + "learning_rate": 4.4502089671940135e-08, + "loss": 0.6902754306793213, + "step": 7895 + }, + { + "epoch": 1.8193548387096774, + "grad_norm": 1.4497717090666749, + "learning_rate": 4.438976960120522e-08, + "loss": 0.8397349119186401, + "step": 7896 + }, + { + "epoch": 1.8195852534562214, + "grad_norm": 1.1317263019718502, + "learning_rate": 4.4277588236030226e-08, + "loss": 0.7505836486816406, + "step": 7897 + }, + { + "epoch": 1.8198156682027649, + "grad_norm": 1.4213425196027163, + "learning_rate": 4.416554559269814e-08, + "loss": 0.9310287833213806, + "step": 7898 + }, + { + "epoch": 1.8200460829493088, + "grad_norm": 1.0910777164101302, + "learning_rate": 4.405364168747161e-08, + "loss": 0.724685549736023, + "step": 7899 + }, + { + "epoch": 1.8202764976958525, + "grad_norm": 0.99356469827684, + "learning_rate": 4.394187653659365e-08, + "loss": 0.6554735898971558, + "step": 7900 + }, + { + "epoch": 1.8205069124423963, + "grad_norm": 1.5629584518265682, + "learning_rate": 4.383025015628661e-08, + "loss": 0.7494597434997559, + "step": 7901 + }, + { + "epoch": 1.8207373271889402, + "grad_norm": 1.3596683636243805, + "learning_rate": 4.371876256275287e-08, + "loss": 0.817386269569397, + "step": 7902 + }, + { + "epoch": 1.8209677419354837, + "grad_norm": 1.2645292088995888, + "learning_rate": 4.3607413772174806e-08, + "loss": 0.8668064475059509, + "step": 7903 + }, + { + "epoch": 1.8211981566820277, + "grad_norm": 1.2001673372629817, + "learning_rate": 4.34962038007145e-08, + "loss": 0.7400633096694946, + "step": 7904 + }, + { + "epoch": 1.8214285714285714, + "grad_norm": 1.018878326746976, + "learning_rate": 4.3385132664514046e-08, + "loss": 0.7273544073104858, + "step": 7905 + }, + { + "epoch": 1.8216589861751151, + "grad_norm": 1.149057253315942, + "learning_rate": 4.3274200379695315e-08, + "loss": 0.7133193016052246, + "step": 7906 + }, + { + "epoch": 1.821889400921659, + "grad_norm": 1.2433089389356335, + "learning_rate": 4.316340696235976e-08, + "loss": 0.9390736222267151, + "step": 7907 + }, + { + "epoch": 1.8221198156682028, + "grad_norm": 1.1318410882734156, + "learning_rate": 4.3052752428588966e-08, + "loss": 0.7065613269805908, + "step": 7908 + }, + { + "epoch": 1.8223502304147465, + "grad_norm": 1.2803518971044316, + "learning_rate": 4.294223679444442e-08, + "loss": 0.813999354839325, + "step": 7909 + }, + { + "epoch": 1.8225806451612905, + "grad_norm": 1.616827704611462, + "learning_rate": 4.2831860075966955e-08, + "loss": 0.9234256148338318, + "step": 7910 + }, + { + "epoch": 1.822811059907834, + "grad_norm": 1.4124883659201861, + "learning_rate": 4.272162228917808e-08, + "loss": 0.8630207777023315, + "step": 7911 + }, + { + "epoch": 1.823041474654378, + "grad_norm": 1.382424983437882, + "learning_rate": 4.2611523450078456e-08, + "loss": 0.7827208042144775, + "step": 7912 + }, + { + "epoch": 1.8232718894009217, + "grad_norm": 1.3479238410287269, + "learning_rate": 4.250156357464873e-08, + "loss": 0.884107232093811, + "step": 7913 + }, + { + "epoch": 1.8235023041474654, + "grad_norm": 1.3064700630797408, + "learning_rate": 4.2391742678849484e-08, + "loss": 0.8615697026252747, + "step": 7914 + }, + { + "epoch": 1.8237327188940093, + "grad_norm": 1.4410161390206035, + "learning_rate": 4.2282060778621174e-08, + "loss": 0.8001279830932617, + "step": 7915 + }, + { + "epoch": 1.8239631336405528, + "grad_norm": 1.1016373373524035, + "learning_rate": 4.217251788988374e-08, + "loss": 0.7183214426040649, + "step": 7916 + }, + { + "epoch": 1.8241935483870968, + "grad_norm": 1.2680472029966925, + "learning_rate": 4.206311402853746e-08, + "loss": 0.7751119136810303, + "step": 7917 + }, + { + "epoch": 1.8244239631336405, + "grad_norm": 1.287058032235602, + "learning_rate": 4.195384921046208e-08, + "loss": 0.8073426485061646, + "step": 7918 + }, + { + "epoch": 1.8246543778801843, + "grad_norm": 1.053407718143569, + "learning_rate": 4.1844723451517017e-08, + "loss": 0.7918455600738525, + "step": 7919 + }, + { + "epoch": 1.8248847926267282, + "grad_norm": 1.1789390806182918, + "learning_rate": 4.1735736767542054e-08, + "loss": 0.8070017099380493, + "step": 7920 + }, + { + "epoch": 1.825115207373272, + "grad_norm": 1.1456133687492283, + "learning_rate": 4.1626889174356306e-08, + "loss": 0.7202159762382507, + "step": 7921 + }, + { + "epoch": 1.8253456221198157, + "grad_norm": 1.304718816677761, + "learning_rate": 4.15181806877587e-08, + "loss": 0.8412283658981323, + "step": 7922 + }, + { + "epoch": 1.8255760368663596, + "grad_norm": 1.079962569087528, + "learning_rate": 4.140961132352849e-08, + "loss": 0.6230478286743164, + "step": 7923 + }, + { + "epoch": 1.8258064516129031, + "grad_norm": 1.184647211526077, + "learning_rate": 4.1301181097424196e-08, + "loss": 0.6475099921226501, + "step": 7924 + }, + { + "epoch": 1.826036866359447, + "grad_norm": 1.1526955390848261, + "learning_rate": 4.1192890025184223e-08, + "loss": 0.6277462244033813, + "step": 7925 + }, + { + "epoch": 1.8262672811059908, + "grad_norm": 1.048650750687635, + "learning_rate": 4.1084738122527e-08, + "loss": 0.784058690071106, + "step": 7926 + }, + { + "epoch": 1.8264976958525345, + "grad_norm": 1.2758998200943634, + "learning_rate": 4.097672540515063e-08, + "loss": 0.7214534282684326, + "step": 7927 + }, + { + "epoch": 1.8267281105990785, + "grad_norm": 1.3299220547069754, + "learning_rate": 4.086885188873302e-08, + "loss": 0.7504015564918518, + "step": 7928 + }, + { + "epoch": 1.826958525345622, + "grad_norm": 1.3115105618474625, + "learning_rate": 4.076111758893175e-08, + "loss": 0.8837840557098389, + "step": 7929 + }, + { + "epoch": 1.827188940092166, + "grad_norm": 0.9756920709009218, + "learning_rate": 4.065352252138443e-08, + "loss": 0.6903706789016724, + "step": 7930 + }, + { + "epoch": 1.8274193548387097, + "grad_norm": 1.0882078909648618, + "learning_rate": 4.054606670170824e-08, + "loss": 0.6120485067367554, + "step": 7931 + }, + { + "epoch": 1.8276497695852534, + "grad_norm": 1.3933670864132435, + "learning_rate": 4.043875014550047e-08, + "loss": 0.9566253423690796, + "step": 7932 + }, + { + "epoch": 1.8278801843317973, + "grad_norm": 1.143561158140067, + "learning_rate": 4.033157286833766e-08, + "loss": 0.7702776193618774, + "step": 7933 + }, + { + "epoch": 1.828110599078341, + "grad_norm": 1.3861853644171394, + "learning_rate": 4.0224534885776706e-08, + "loss": 0.7326529026031494, + "step": 7934 + }, + { + "epoch": 1.8283410138248848, + "grad_norm": 1.199651876611857, + "learning_rate": 4.011763621335395e-08, + "loss": 0.8161343336105347, + "step": 7935 + }, + { + "epoch": 1.8285714285714287, + "grad_norm": 1.2385311136965618, + "learning_rate": 4.001087686658544e-08, + "loss": 0.7167537212371826, + "step": 7936 + }, + { + "epoch": 1.8288018433179722, + "grad_norm": 1.5866479195226006, + "learning_rate": 3.9904256860967433e-08, + "loss": 0.9195249080657959, + "step": 7937 + }, + { + "epoch": 1.8290322580645162, + "grad_norm": 1.4492337682663832, + "learning_rate": 3.979777621197544e-08, + "loss": 0.9483609199523926, + "step": 7938 + }, + { + "epoch": 1.82926267281106, + "grad_norm": 1.1520857488925356, + "learning_rate": 3.96914349350651e-08, + "loss": 0.6521364450454712, + "step": 7939 + }, + { + "epoch": 1.8294930875576036, + "grad_norm": 1.1394847291425385, + "learning_rate": 3.958523304567174e-08, + "loss": 0.714328408241272, + "step": 7940 + }, + { + "epoch": 1.8297235023041476, + "grad_norm": 1.2749952242619191, + "learning_rate": 3.9479170559210464e-08, + "loss": 0.705136775970459, + "step": 7941 + }, + { + "epoch": 1.829953917050691, + "grad_norm": 1.2310686937076982, + "learning_rate": 3.937324749107584e-08, + "loss": 0.9096843004226685, + "step": 7942 + }, + { + "epoch": 1.830184331797235, + "grad_norm": 1.1347026880501985, + "learning_rate": 3.9267463856642704e-08, + "loss": 0.7797929048538208, + "step": 7943 + }, + { + "epoch": 1.8304147465437788, + "grad_norm": 1.1418375010830168, + "learning_rate": 3.9161819671265414e-08, + "loss": 0.739689290523529, + "step": 7944 + }, + { + "epoch": 1.8306451612903225, + "grad_norm": 1.2414926332489717, + "learning_rate": 3.905631495027795e-08, + "loss": 0.7297589778900146, + "step": 7945 + }, + { + "epoch": 1.8308755760368665, + "grad_norm": 1.1411747974433366, + "learning_rate": 3.895094970899426e-08, + "loss": 0.6632317900657654, + "step": 7946 + }, + { + "epoch": 1.8311059907834102, + "grad_norm": 1.1035263718417188, + "learning_rate": 3.884572396270802e-08, + "loss": 0.8075754642486572, + "step": 7947 + }, + { + "epoch": 1.831336405529954, + "grad_norm": 1.1206981689667126, + "learning_rate": 3.874063772669256e-08, + "loss": 0.879385232925415, + "step": 7948 + }, + { + "epoch": 1.8315668202764976, + "grad_norm": 1.1296410172019098, + "learning_rate": 3.86356910162009e-08, + "loss": 0.7182341814041138, + "step": 7949 + }, + { + "epoch": 1.8317972350230414, + "grad_norm": 1.3256415462362086, + "learning_rate": 3.853088384646608e-08, + "loss": 0.8980770111083984, + "step": 7950 + }, + { + "epoch": 1.8320276497695853, + "grad_norm": 1.2399263879902838, + "learning_rate": 3.8426216232700483e-08, + "loss": 0.7798547744750977, + "step": 7951 + }, + { + "epoch": 1.832258064516129, + "grad_norm": 1.30590072600508, + "learning_rate": 3.832168819009685e-08, + "loss": 0.7545509934425354, + "step": 7952 + }, + { + "epoch": 1.8324884792626728, + "grad_norm": 1.4626138945450415, + "learning_rate": 3.821729973382681e-08, + "loss": 0.7394163608551025, + "step": 7953 + }, + { + "epoch": 1.8327188940092167, + "grad_norm": 1.095086275435991, + "learning_rate": 3.811305087904271e-08, + "loss": 0.7771584987640381, + "step": 7954 + }, + { + "epoch": 1.8329493087557602, + "grad_norm": 1.0772465088176202, + "learning_rate": 3.800894164087587e-08, + "loss": 0.6490596532821655, + "step": 7955 + }, + { + "epoch": 1.8331797235023042, + "grad_norm": 1.6261572682115344, + "learning_rate": 3.7904972034437546e-08, + "loss": 0.8465416431427002, + "step": 7956 + }, + { + "epoch": 1.833410138248848, + "grad_norm": 1.1256653812684285, + "learning_rate": 3.780114207481899e-08, + "loss": 0.6769351363182068, + "step": 7957 + }, + { + "epoch": 1.8336405529953916, + "grad_norm": 1.1157448396752008, + "learning_rate": 3.769745177709094e-08, + "loss": 0.8187215328216553, + "step": 7958 + }, + { + "epoch": 1.8338709677419356, + "grad_norm": 0.9478307441179703, + "learning_rate": 3.759390115630356e-08, + "loss": 0.7524763345718384, + "step": 7959 + }, + { + "epoch": 1.8341013824884793, + "grad_norm": 1.3846707864730958, + "learning_rate": 3.749049022748762e-08, + "loss": 0.8019517064094543, + "step": 7960 + }, + { + "epoch": 1.834331797235023, + "grad_norm": 1.2301171101661803, + "learning_rate": 3.738721900565278e-08, + "loss": 0.7732158899307251, + "step": 7961 + }, + { + "epoch": 1.8345622119815668, + "grad_norm": 1.1624945144679932, + "learning_rate": 3.728408750578871e-08, + "loss": 0.7152917385101318, + "step": 7962 + }, + { + "epoch": 1.8347926267281105, + "grad_norm": 1.2249354034345745, + "learning_rate": 3.7181095742864876e-08, + "loss": 0.7117735147476196, + "step": 7963 + }, + { + "epoch": 1.8350230414746544, + "grad_norm": 1.1387667941982393, + "learning_rate": 3.7078243731830436e-08, + "loss": 0.7651360034942627, + "step": 7964 + }, + { + "epoch": 1.8352534562211982, + "grad_norm": 1.103224145154883, + "learning_rate": 3.697553148761412e-08, + "loss": 0.6686996817588806, + "step": 7965 + }, + { + "epoch": 1.835483870967742, + "grad_norm": 1.4148867918515446, + "learning_rate": 3.687295902512455e-08, + "loss": 0.8654145002365112, + "step": 7966 + }, + { + "epoch": 1.8357142857142859, + "grad_norm": 1.2014603088046913, + "learning_rate": 3.6770526359250046e-08, + "loss": 0.7883874177932739, + "step": 7967 + }, + { + "epoch": 1.8359447004608294, + "grad_norm": 1.3036366063511584, + "learning_rate": 3.666823350485848e-08, + "loss": 0.7270755767822266, + "step": 7968 + }, + { + "epoch": 1.8361751152073733, + "grad_norm": 1.2757403346821974, + "learning_rate": 3.656608047679744e-08, + "loss": 0.654710054397583, + "step": 7969 + }, + { + "epoch": 1.836405529953917, + "grad_norm": 1.3173622827867584, + "learning_rate": 3.6464067289894485e-08, + "loss": 0.688032329082489, + "step": 7970 + }, + { + "epoch": 1.8366359447004608, + "grad_norm": 1.610615012564481, + "learning_rate": 3.6362193958956457e-08, + "loss": 0.901115894317627, + "step": 7971 + }, + { + "epoch": 1.8368663594470047, + "grad_norm": 1.116601972108686, + "learning_rate": 3.6260460498770404e-08, + "loss": 0.7335774302482605, + "step": 7972 + }, + { + "epoch": 1.8370967741935482, + "grad_norm": 1.386903572934919, + "learning_rate": 3.615886692410275e-08, + "loss": 0.8056570291519165, + "step": 7973 + }, + { + "epoch": 1.8373271889400922, + "grad_norm": 1.0398578754417405, + "learning_rate": 3.6057413249699356e-08, + "loss": 0.82081538438797, + "step": 7974 + }, + { + "epoch": 1.837557603686636, + "grad_norm": 1.2589683870881863, + "learning_rate": 3.595609949028655e-08, + "loss": 0.7741475105285645, + "step": 7975 + }, + { + "epoch": 1.8377880184331796, + "grad_norm": 1.4550225731476647, + "learning_rate": 3.5854925660569693e-08, + "loss": 0.9020792245864868, + "step": 7976 + }, + { + "epoch": 1.8380184331797236, + "grad_norm": 1.395018589671643, + "learning_rate": 3.57538917752338e-08, + "loss": 0.759677529335022, + "step": 7977 + }, + { + "epoch": 1.8382488479262673, + "grad_norm": 1.2528132061795532, + "learning_rate": 3.565299784894427e-08, + "loss": 0.6658498644828796, + "step": 7978 + }, + { + "epoch": 1.838479262672811, + "grad_norm": 1.156561409904186, + "learning_rate": 3.5552243896345254e-08, + "loss": 0.8359798192977905, + "step": 7979 + }, + { + "epoch": 1.838709677419355, + "grad_norm": 0.9586985661683237, + "learning_rate": 3.545162993206141e-08, + "loss": 0.656216025352478, + "step": 7980 + }, + { + "epoch": 1.8389400921658985, + "grad_norm": 1.1907827843907386, + "learning_rate": 3.53511559706966e-08, + "loss": 0.7783077359199524, + "step": 7981 + }, + { + "epoch": 1.8391705069124424, + "grad_norm": 1.315887741405374, + "learning_rate": 3.525082202683427e-08, + "loss": 0.7726818919181824, + "step": 7982 + }, + { + "epoch": 1.8394009216589862, + "grad_norm": 1.203190333477806, + "learning_rate": 3.5150628115038213e-08, + "loss": 0.6797339916229248, + "step": 7983 + }, + { + "epoch": 1.83963133640553, + "grad_norm": 1.6491537372199485, + "learning_rate": 3.505057424985114e-08, + "loss": 0.818444013595581, + "step": 7984 + }, + { + "epoch": 1.8398617511520738, + "grad_norm": 1.2385444618355612, + "learning_rate": 3.495066044579564e-08, + "loss": 0.716003954410553, + "step": 7985 + }, + { + "epoch": 1.8400921658986173, + "grad_norm": 1.1184726381698433, + "learning_rate": 3.485088671737435e-08, + "loss": 0.8214380741119385, + "step": 7986 + }, + { + "epoch": 1.8403225806451613, + "grad_norm": 1.2891166927609845, + "learning_rate": 3.475125307906923e-08, + "loss": 0.8004239797592163, + "step": 7987 + }, + { + "epoch": 1.840552995391705, + "grad_norm": 1.0064244623457703, + "learning_rate": 3.465175954534183e-08, + "loss": 0.724868655204773, + "step": 7988 + }, + { + "epoch": 1.8407834101382488, + "grad_norm": 1.2194713737299876, + "learning_rate": 3.455240613063359e-08, + "loss": 0.6774435043334961, + "step": 7989 + }, + { + "epoch": 1.8410138248847927, + "grad_norm": 1.2000954990034474, + "learning_rate": 3.445319284936543e-08, + "loss": 0.7618406414985657, + "step": 7990 + }, + { + "epoch": 1.8412442396313364, + "grad_norm": 1.2446761227229344, + "learning_rate": 3.4354119715938154e-08, + "loss": 0.8176794648170471, + "step": 7991 + }, + { + "epoch": 1.8414746543778802, + "grad_norm": 1.3311989323291133, + "learning_rate": 3.4255186744732045e-08, + "loss": 0.7540123462677002, + "step": 7992 + }, + { + "epoch": 1.841705069124424, + "grad_norm": 0.8317940065053944, + "learning_rate": 3.4156393950107164e-08, + "loss": 0.6888976097106934, + "step": 7993 + }, + { + "epoch": 1.8419354838709676, + "grad_norm": 0.9229557772464766, + "learning_rate": 3.405774134640294e-08, + "loss": 0.6719028949737549, + "step": 7994 + }, + { + "epoch": 1.8421658986175116, + "grad_norm": 1.2216480626353798, + "learning_rate": 3.3959228947938903e-08, + "loss": 0.817806601524353, + "step": 7995 + }, + { + "epoch": 1.8423963133640553, + "grad_norm": 1.176727717908757, + "learning_rate": 3.3860856769013955e-08, + "loss": 0.6681252717971802, + "step": 7996 + }, + { + "epoch": 1.842626728110599, + "grad_norm": 1.261442308873967, + "learning_rate": 3.3762624823906574e-08, + "loss": 0.7965174317359924, + "step": 7997 + }, + { + "epoch": 1.842857142857143, + "grad_norm": 1.163849986057629, + "learning_rate": 3.366453312687512e-08, + "loss": 0.714171826839447, + "step": 7998 + }, + { + "epoch": 1.8430875576036865, + "grad_norm": 1.2077995913515678, + "learning_rate": 3.356658169215743e-08, + "loss": 0.7489287853240967, + "step": 7999 + }, + { + "epoch": 1.8433179723502304, + "grad_norm": 1.270011813451473, + "learning_rate": 3.34687705339709e-08, + "loss": 0.790866494178772, + "step": 8000 + }, + { + "epoch": 1.8435483870967742, + "grad_norm": 0.9665221846950844, + "learning_rate": 3.337109966651297e-08, + "loss": 0.8208349943161011, + "step": 8001 + }, + { + "epoch": 1.8437788018433179, + "grad_norm": 1.1715709525124653, + "learning_rate": 3.3273569103960174e-08, + "loss": 0.7974207401275635, + "step": 8002 + }, + { + "epoch": 1.8440092165898618, + "grad_norm": 1.1483232930238036, + "learning_rate": 3.317617886046908e-08, + "loss": 0.751643180847168, + "step": 8003 + }, + { + "epoch": 1.8442396313364056, + "grad_norm": 1.3210448516681466, + "learning_rate": 3.3078928950175724e-08, + "loss": 0.9231137037277222, + "step": 8004 + }, + { + "epoch": 1.8444700460829493, + "grad_norm": 1.1496984894908708, + "learning_rate": 3.2981819387195683e-08, + "loss": 0.7975907325744629, + "step": 8005 + }, + { + "epoch": 1.8447004608294932, + "grad_norm": 1.1807761173209448, + "learning_rate": 3.288485018562448e-08, + "loss": 0.7467124462127686, + "step": 8006 + }, + { + "epoch": 1.8449308755760367, + "grad_norm": 1.1558703241619663, + "learning_rate": 3.278802135953706e-08, + "loss": 0.7983080148696899, + "step": 8007 + }, + { + "epoch": 1.8451612903225807, + "grad_norm": 1.2273424689042212, + "learning_rate": 3.269133292298787e-08, + "loss": 0.7991635799407959, + "step": 8008 + }, + { + "epoch": 1.8453917050691244, + "grad_norm": 1.3284825495150037, + "learning_rate": 3.259478489001111e-08, + "loss": 0.9309900403022766, + "step": 8009 + }, + { + "epoch": 1.8456221198156681, + "grad_norm": 1.4898197506974649, + "learning_rate": 3.249837727462068e-08, + "loss": 0.7667444944381714, + "step": 8010 + }, + { + "epoch": 1.845852534562212, + "grad_norm": 1.0693184262343387, + "learning_rate": 3.2402110090809955e-08, + "loss": 0.722775936126709, + "step": 8011 + }, + { + "epoch": 1.8460829493087556, + "grad_norm": 1.2061345728793884, + "learning_rate": 3.230598335255208e-08, + "loss": 0.7049660682678223, + "step": 8012 + }, + { + "epoch": 1.8463133640552996, + "grad_norm": 1.2538545243397632, + "learning_rate": 3.220999707379957e-08, + "loss": 0.7543717622756958, + "step": 8013 + }, + { + "epoch": 1.8465437788018433, + "grad_norm": 1.0254969440317054, + "learning_rate": 3.2114151268484825e-08, + "loss": 0.705594539642334, + "step": 8014 + }, + { + "epoch": 1.846774193548387, + "grad_norm": 1.3381301652737214, + "learning_rate": 3.201844595051972e-08, + "loss": 0.8663946390151978, + "step": 8015 + }, + { + "epoch": 1.847004608294931, + "grad_norm": 1.2931743474180666, + "learning_rate": 3.192288113379582e-08, + "loss": 0.6990827918052673, + "step": 8016 + }, + { + "epoch": 1.8472350230414747, + "grad_norm": 1.3047302382268444, + "learning_rate": 3.182745683218391e-08, + "loss": 0.8494592905044556, + "step": 8017 + }, + { + "epoch": 1.8474654377880184, + "grad_norm": 1.1964557388323078, + "learning_rate": 3.173217305953524e-08, + "loss": 0.7689815163612366, + "step": 8018 + }, + { + "epoch": 1.8476958525345624, + "grad_norm": 1.0869127948311592, + "learning_rate": 3.163702982967964e-08, + "loss": 0.7961923480033875, + "step": 8019 + }, + { + "epoch": 1.8479262672811059, + "grad_norm": 1.1859545141002084, + "learning_rate": 3.154202715642729e-08, + "loss": 0.7290681600570679, + "step": 8020 + }, + { + "epoch": 1.8481566820276498, + "grad_norm": 1.2696204436408378, + "learning_rate": 3.1447165053567594e-08, + "loss": 0.7486605048179626, + "step": 8021 + }, + { + "epoch": 1.8483870967741935, + "grad_norm": 1.2409295752272667, + "learning_rate": 3.135244353486977e-08, + "loss": 0.8263967633247375, + "step": 8022 + }, + { + "epoch": 1.8486175115207373, + "grad_norm": 1.3436046094044156, + "learning_rate": 3.1257862614082254e-08, + "loss": 0.7462657690048218, + "step": 8023 + }, + { + "epoch": 1.8488479262672812, + "grad_norm": 1.7105756282592546, + "learning_rate": 3.116342230493374e-08, + "loss": 0.9305819272994995, + "step": 8024 + }, + { + "epoch": 1.8490783410138247, + "grad_norm": 1.1597494849443377, + "learning_rate": 3.1069122621131925e-08, + "loss": 0.7202557325363159, + "step": 8025 + }, + { + "epoch": 1.8493087557603687, + "grad_norm": 1.0985806176068067, + "learning_rate": 3.097496357636409e-08, + "loss": 0.723913311958313, + "step": 8026 + }, + { + "epoch": 1.8495391705069124, + "grad_norm": 1.427360065972912, + "learning_rate": 3.088094518429751e-08, + "loss": 0.7067763805389404, + "step": 8027 + }, + { + "epoch": 1.8497695852534561, + "grad_norm": 1.3110685780585822, + "learning_rate": 3.078706745857884e-08, + "loss": 0.7853527665138245, + "step": 8028 + }, + { + "epoch": 1.85, + "grad_norm": 1.228901367807535, + "learning_rate": 3.0693330412834285e-08, + "loss": 0.7183133363723755, + "step": 8029 + }, + { + "epoch": 1.8502304147465438, + "grad_norm": 1.1077136741228983, + "learning_rate": 3.0599734060669626e-08, + "loss": 0.8041096925735474, + "step": 8030 + }, + { + "epoch": 1.8504608294930875, + "grad_norm": 1.0495776729925357, + "learning_rate": 3.050627841567022e-08, + "loss": 0.7259166240692139, + "step": 8031 + }, + { + "epoch": 1.8506912442396315, + "grad_norm": 1.5016516908972768, + "learning_rate": 3.041296349140099e-08, + "loss": 0.8844292163848877, + "step": 8032 + }, + { + "epoch": 1.850921658986175, + "grad_norm": 1.2846098007302502, + "learning_rate": 3.031978930140666e-08, + "loss": 0.7566810846328735, + "step": 8033 + }, + { + "epoch": 1.851152073732719, + "grad_norm": 1.4566612706299762, + "learning_rate": 3.0226755859211085e-08, + "loss": 0.8365379571914673, + "step": 8034 + }, + { + "epoch": 1.8513824884792627, + "grad_norm": 1.03909937329538, + "learning_rate": 3.013386317831823e-08, + "loss": 0.6786175966262817, + "step": 8035 + }, + { + "epoch": 1.8516129032258064, + "grad_norm": 0.8445952555360507, + "learning_rate": 3.0041111272211206e-08, + "loss": 0.5450198650360107, + "step": 8036 + }, + { + "epoch": 1.8518433179723504, + "grad_norm": 1.3789732970427235, + "learning_rate": 2.994850015435269e-08, + "loss": 0.8792393207550049, + "step": 8037 + }, + { + "epoch": 1.8520737327188939, + "grad_norm": 1.1270074296152806, + "learning_rate": 2.985602983818525e-08, + "loss": 0.8463287353515625, + "step": 8038 + }, + { + "epoch": 1.8523041474654378, + "grad_norm": 1.2927452986312467, + "learning_rate": 2.9763700337130827e-08, + "loss": 0.77659010887146, + "step": 8039 + }, + { + "epoch": 1.8525345622119815, + "grad_norm": 0.8652026295993711, + "learning_rate": 2.9671511664590698e-08, + "loss": 0.6180428266525269, + "step": 8040 + }, + { + "epoch": 1.8527649769585253, + "grad_norm": 1.2049419514211082, + "learning_rate": 2.9579463833946273e-08, + "loss": 0.7886658906936646, + "step": 8041 + }, + { + "epoch": 1.8529953917050692, + "grad_norm": 1.35078980115234, + "learning_rate": 2.9487556858557972e-08, + "loss": 0.8371871709823608, + "step": 8042 + }, + { + "epoch": 1.853225806451613, + "grad_norm": 1.1555875449847217, + "learning_rate": 2.9395790751765904e-08, + "loss": 0.7082366347312927, + "step": 8043 + }, + { + "epoch": 1.8534562211981567, + "grad_norm": 1.2745414422252506, + "learning_rate": 2.930416552689008e-08, + "loss": 0.7866584062576294, + "step": 8044 + }, + { + "epoch": 1.8536866359447006, + "grad_norm": 1.229235509048025, + "learning_rate": 2.9212681197229527e-08, + "loss": 0.8789514303207397, + "step": 8045 + }, + { + "epoch": 1.8539170506912441, + "grad_norm": 1.0208282620264577, + "learning_rate": 2.9121337776063072e-08, + "loss": 0.7041239738464355, + "step": 8046 + }, + { + "epoch": 1.854147465437788, + "grad_norm": 1.3204473756112607, + "learning_rate": 2.9030135276649215e-08, + "loss": 0.8290516138076782, + "step": 8047 + }, + { + "epoch": 1.8543778801843318, + "grad_norm": 1.2424965520320617, + "learning_rate": 2.8939073712225813e-08, + "loss": 0.8532444834709167, + "step": 8048 + }, + { + "epoch": 1.8546082949308755, + "grad_norm": 1.375111764710695, + "learning_rate": 2.8848153096010407e-08, + "loss": 0.8635869026184082, + "step": 8049 + }, + { + "epoch": 1.8548387096774195, + "grad_norm": 1.3481674122248803, + "learning_rate": 2.8757373441199885e-08, + "loss": 0.723747730255127, + "step": 8050 + }, + { + "epoch": 1.855069124423963, + "grad_norm": 1.3399875040651272, + "learning_rate": 2.8666734760970925e-08, + "loss": 0.893456220626831, + "step": 8051 + }, + { + "epoch": 1.855299539170507, + "grad_norm": 1.2732338285848108, + "learning_rate": 2.8576237068479335e-08, + "loss": 0.6871381998062134, + "step": 8052 + }, + { + "epoch": 1.8555299539170507, + "grad_norm": 1.0534516506243037, + "learning_rate": 2.848588037686106e-08, + "loss": 0.7820594906806946, + "step": 8053 + }, + { + "epoch": 1.8557603686635944, + "grad_norm": 1.0873243123362593, + "learning_rate": 2.839566469923105e-08, + "loss": 0.7783479690551758, + "step": 8054 + }, + { + "epoch": 1.8559907834101383, + "grad_norm": 1.25602911336094, + "learning_rate": 2.8305590048684268e-08, + "loss": 0.7612866163253784, + "step": 8055 + }, + { + "epoch": 1.856221198156682, + "grad_norm": 1.0752346215773687, + "learning_rate": 2.82156564382946e-08, + "loss": 0.7483590841293335, + "step": 8056 + }, + { + "epoch": 1.8564516129032258, + "grad_norm": 1.0547692532993052, + "learning_rate": 2.812586388111582e-08, + "loss": 0.7553579807281494, + "step": 8057 + }, + { + "epoch": 1.8566820276497698, + "grad_norm": 1.0828193353243305, + "learning_rate": 2.80362123901815e-08, + "loss": 0.8895602226257324, + "step": 8058 + }, + { + "epoch": 1.8569124423963133, + "grad_norm": 1.1481937931103232, + "learning_rate": 2.794670197850424e-08, + "loss": 0.7974053621292114, + "step": 8059 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 1.0112292806236838, + "learning_rate": 2.7857332659076193e-08, + "loss": 0.7730135917663574, + "step": 8060 + }, + { + "epoch": 1.857373271889401, + "grad_norm": 1.115608079627536, + "learning_rate": 2.7768104444869434e-08, + "loss": 0.7258738279342651, + "step": 8061 + }, + { + "epoch": 1.8576036866359447, + "grad_norm": 1.3030363105586589, + "learning_rate": 2.7679017348835264e-08, + "loss": 0.7068890333175659, + "step": 8062 + }, + { + "epoch": 1.8578341013824886, + "grad_norm": 1.3041822573340287, + "learning_rate": 2.7590071383904568e-08, + "loss": 0.8741557002067566, + "step": 8063 + }, + { + "epoch": 1.8580645161290321, + "grad_norm": 1.3236368529143523, + "learning_rate": 2.750126656298768e-08, + "loss": 0.8723797798156738, + "step": 8064 + }, + { + "epoch": 1.858294930875576, + "grad_norm": 1.2019235064586495, + "learning_rate": 2.7412602898974514e-08, + "loss": 0.8510957956314087, + "step": 8065 + }, + { + "epoch": 1.8585253456221198, + "grad_norm": 0.8996466342772348, + "learning_rate": 2.732408040473444e-08, + "loss": 0.6875216960906982, + "step": 8066 + }, + { + "epoch": 1.8587557603686635, + "grad_norm": 1.235948717542994, + "learning_rate": 2.7235699093116515e-08, + "loss": 0.8057721257209778, + "step": 8067 + }, + { + "epoch": 1.8589861751152075, + "grad_norm": 1.1066694710477807, + "learning_rate": 2.7147458976949145e-08, + "loss": 0.7547335624694824, + "step": 8068 + }, + { + "epoch": 1.8592165898617512, + "grad_norm": 1.2565080056809024, + "learning_rate": 2.7059360069040193e-08, + "loss": 0.8301708102226257, + "step": 8069 + }, + { + "epoch": 1.859447004608295, + "grad_norm": 1.354839024861171, + "learning_rate": 2.69714023821771e-08, + "loss": 0.8313431143760681, + "step": 8070 + }, + { + "epoch": 1.8596774193548387, + "grad_norm": 1.2482736529337517, + "learning_rate": 2.6883585929126872e-08, + "loss": 0.6631792783737183, + "step": 8071 + }, + { + "epoch": 1.8599078341013824, + "grad_norm": 1.342165180678223, + "learning_rate": 2.679591072263576e-08, + "loss": 0.7643609046936035, + "step": 8072 + }, + { + "epoch": 1.8601382488479263, + "grad_norm": 1.5670037508761703, + "learning_rate": 2.670837677543003e-08, + "loss": 0.8543407917022705, + "step": 8073 + }, + { + "epoch": 1.86036866359447, + "grad_norm": 1.0908415634382522, + "learning_rate": 2.662098410021485e-08, + "loss": 0.8051489591598511, + "step": 8074 + }, + { + "epoch": 1.8605990783410138, + "grad_norm": 1.1493604797084143, + "learning_rate": 2.653373270967518e-08, + "loss": 0.7065767645835876, + "step": 8075 + }, + { + "epoch": 1.8608294930875577, + "grad_norm": 0.9852441728403762, + "learning_rate": 2.6446622616475566e-08, + "loss": 0.672603189945221, + "step": 8076 + }, + { + "epoch": 1.8610599078341012, + "grad_norm": 1.2739019796547877, + "learning_rate": 2.6359653833259776e-08, + "loss": 0.7201080918312073, + "step": 8077 + }, + { + "epoch": 1.8612903225806452, + "grad_norm": 1.156933357533599, + "learning_rate": 2.627282637265149e-08, + "loss": 0.7147494554519653, + "step": 8078 + }, + { + "epoch": 1.861520737327189, + "grad_norm": 1.3793116889121875, + "learning_rate": 2.6186140247253297e-08, + "loss": 0.7051082253456116, + "step": 8079 + }, + { + "epoch": 1.8617511520737327, + "grad_norm": 1.2253670327071573, + "learning_rate": 2.6099595469647683e-08, + "loss": 0.5786069631576538, + "step": 8080 + }, + { + "epoch": 1.8619815668202766, + "grad_norm": 1.2391603364729231, + "learning_rate": 2.6013192052396493e-08, + "loss": 0.8880232572555542, + "step": 8081 + }, + { + "epoch": 1.8622119815668203, + "grad_norm": 1.3577487615179598, + "learning_rate": 2.5926930008041137e-08, + "loss": 0.9295729398727417, + "step": 8082 + }, + { + "epoch": 1.862442396313364, + "grad_norm": 1.1507407274303025, + "learning_rate": 2.5840809349102378e-08, + "loss": 0.6963248252868652, + "step": 8083 + }, + { + "epoch": 1.8626728110599078, + "grad_norm": 1.2547838683138512, + "learning_rate": 2.5754830088080548e-08, + "loss": 0.8788298964500427, + "step": 8084 + }, + { + "epoch": 1.8629032258064515, + "grad_norm": 1.3540782368440085, + "learning_rate": 2.5668992237455334e-08, + "loss": 0.7454242706298828, + "step": 8085 + }, + { + "epoch": 1.8631336405529955, + "grad_norm": 1.1950812039913048, + "learning_rate": 2.558329580968599e-08, + "loss": 0.7659780383110046, + "step": 8086 + }, + { + "epoch": 1.8633640552995392, + "grad_norm": 1.5016734977487585, + "learning_rate": 2.5497740817211456e-08, + "loss": 0.8799881935119629, + "step": 8087 + }, + { + "epoch": 1.863594470046083, + "grad_norm": 0.9825172132169212, + "learning_rate": 2.5412327272449684e-08, + "loss": 0.7319198846817017, + "step": 8088 + }, + { + "epoch": 1.8638248847926269, + "grad_norm": 1.0689400870779366, + "learning_rate": 2.532705518779854e-08, + "loss": 0.6450645923614502, + "step": 8089 + }, + { + "epoch": 1.8640552995391704, + "grad_norm": 1.1783740361717576, + "learning_rate": 2.52419245756349e-08, + "loss": 0.7213672399520874, + "step": 8090 + }, + { + "epoch": 1.8642857142857143, + "grad_norm": 1.3483335750734096, + "learning_rate": 2.515693544831554e-08, + "loss": 0.790163516998291, + "step": 8091 + }, + { + "epoch": 1.864516129032258, + "grad_norm": 1.2871905619529331, + "learning_rate": 2.507208781817638e-08, + "loss": 0.8324074745178223, + "step": 8092 + }, + { + "epoch": 1.8647465437788018, + "grad_norm": 1.4095960145667545, + "learning_rate": 2.4987381697533227e-08, + "loss": 0.879224419593811, + "step": 8093 + }, + { + "epoch": 1.8649769585253457, + "grad_norm": 1.4121148041878757, + "learning_rate": 2.4902817098680807e-08, + "loss": 0.8668204545974731, + "step": 8094 + }, + { + "epoch": 1.8652073732718892, + "grad_norm": 1.1605042845973315, + "learning_rate": 2.481839403389341e-08, + "loss": 0.6737711429595947, + "step": 8095 + }, + { + "epoch": 1.8654377880184332, + "grad_norm": 1.3482506919608122, + "learning_rate": 2.4734112515425343e-08, + "loss": 0.8948237299919128, + "step": 8096 + }, + { + "epoch": 1.865668202764977, + "grad_norm": 1.2927456093148797, + "learning_rate": 2.4649972555509823e-08, + "loss": 0.6866592168807983, + "step": 8097 + }, + { + "epoch": 1.8658986175115206, + "grad_norm": 1.2040358944727056, + "learning_rate": 2.4565974166359416e-08, + "loss": 0.8852076530456543, + "step": 8098 + }, + { + "epoch": 1.8661290322580646, + "grad_norm": 1.1474664367024714, + "learning_rate": 2.44821173601667e-08, + "loss": 0.7402448654174805, + "step": 8099 + }, + { + "epoch": 1.8663594470046083, + "grad_norm": 1.299234544884085, + "learning_rate": 2.439840214910316e-08, + "loss": 0.8536320924758911, + "step": 8100 + }, + { + "epoch": 1.866589861751152, + "grad_norm": 1.1550631938568499, + "learning_rate": 2.4314828545319965e-08, + "loss": 0.6408628225326538, + "step": 8101 + }, + { + "epoch": 1.866820276497696, + "grad_norm": 1.188548223378954, + "learning_rate": 2.4231396560947858e-08, + "loss": 0.9578930735588074, + "step": 8102 + }, + { + "epoch": 1.8670506912442395, + "grad_norm": 1.8289817367376688, + "learning_rate": 2.4148106208096708e-08, + "loss": 0.7606109976768494, + "step": 8103 + }, + { + "epoch": 1.8672811059907835, + "grad_norm": 0.9826738512020193, + "learning_rate": 2.4064957498856177e-08, + "loss": 0.7446529865264893, + "step": 8104 + }, + { + "epoch": 1.8675115207373272, + "grad_norm": 1.0744366993530696, + "learning_rate": 2.398195044529505e-08, + "loss": 0.6086497902870178, + "step": 8105 + }, + { + "epoch": 1.867741935483871, + "grad_norm": 1.5561440229209103, + "learning_rate": 2.389908505946181e-08, + "loss": 0.9348995685577393, + "step": 8106 + }, + { + "epoch": 1.8679723502304149, + "grad_norm": 1.1497120508700005, + "learning_rate": 2.381636135338405e-08, + "loss": 0.6817007660865784, + "step": 8107 + }, + { + "epoch": 1.8682027649769584, + "grad_norm": 1.0815805532535518, + "learning_rate": 2.373377933906917e-08, + "loss": 0.7228778600692749, + "step": 8108 + }, + { + "epoch": 1.8684331797235023, + "grad_norm": 1.2824972753864794, + "learning_rate": 2.3651339028503913e-08, + "loss": 0.6974154114723206, + "step": 8109 + }, + { + "epoch": 1.868663594470046, + "grad_norm": 1.2746687740486187, + "learning_rate": 2.3569040433654264e-08, + "loss": 0.8025680780410767, + "step": 8110 + }, + { + "epoch": 1.8688940092165898, + "grad_norm": 1.0439186994105132, + "learning_rate": 2.3486883566465777e-08, + "loss": 0.7570391893386841, + "step": 8111 + }, + { + "epoch": 1.8691244239631337, + "grad_norm": 1.1353343636911755, + "learning_rate": 2.3404868438863246e-08, + "loss": 0.7982438802719116, + "step": 8112 + }, + { + "epoch": 1.8693548387096774, + "grad_norm": 0.948053216671403, + "learning_rate": 2.3322995062751372e-08, + "loss": 0.6615588665008545, + "step": 8113 + }, + { + "epoch": 1.8695852534562212, + "grad_norm": 1.1794145616088556, + "learning_rate": 2.324126345001376e-08, + "loss": 0.7748852968215942, + "step": 8114 + }, + { + "epoch": 1.8698156682027651, + "grad_norm": 1.146675047414541, + "learning_rate": 2.3159673612513587e-08, + "loss": 0.7238468527793884, + "step": 8115 + }, + { + "epoch": 1.8700460829493086, + "grad_norm": 1.2843830020573481, + "learning_rate": 2.3078225562093822e-08, + "loss": 0.8146705627441406, + "step": 8116 + }, + { + "epoch": 1.8702764976958526, + "grad_norm": 1.0747488287412188, + "learning_rate": 2.2996919310576235e-08, + "loss": 0.8393594026565552, + "step": 8117 + }, + { + "epoch": 1.8705069124423963, + "grad_norm": 1.6346887094004536, + "learning_rate": 2.2915754869762384e-08, + "loss": 0.9619652032852173, + "step": 8118 + }, + { + "epoch": 1.87073732718894, + "grad_norm": 1.6641290836048537, + "learning_rate": 2.2834732251433286e-08, + "loss": 0.8301321268081665, + "step": 8119 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 1.2687107297135523, + "learning_rate": 2.2753851467349206e-08, + "loss": 0.8236079812049866, + "step": 8120 + }, + { + "epoch": 1.8711981566820275, + "grad_norm": 1.430457986003777, + "learning_rate": 2.267311252924975e-08, + "loss": 0.9007565379142761, + "step": 8121 + }, + { + "epoch": 1.8714285714285714, + "grad_norm": 1.1827948115854126, + "learning_rate": 2.2592515448854432e-08, + "loss": 0.7430707216262817, + "step": 8122 + }, + { + "epoch": 1.8716589861751152, + "grad_norm": 1.17432989990484, + "learning_rate": 2.2512060237861452e-08, + "loss": 0.7562465667724609, + "step": 8123 + }, + { + "epoch": 1.871889400921659, + "grad_norm": 1.1839994711227122, + "learning_rate": 2.24317469079488e-08, + "loss": 0.7736096978187561, + "step": 8124 + }, + { + "epoch": 1.8721198156682028, + "grad_norm": 1.1809968020267403, + "learning_rate": 2.2351575470774153e-08, + "loss": 0.7652724981307983, + "step": 8125 + }, + { + "epoch": 1.8723502304147466, + "grad_norm": 1.4664554269524215, + "learning_rate": 2.2271545937973978e-08, + "loss": 0.8034792542457581, + "step": 8126 + }, + { + "epoch": 1.8725806451612903, + "grad_norm": 1.2107856133228136, + "learning_rate": 2.219165832116454e-08, + "loss": 0.6158101558685303, + "step": 8127 + }, + { + "epoch": 1.8728110599078343, + "grad_norm": 1.1984460742665393, + "learning_rate": 2.2111912631941564e-08, + "loss": 0.6514682769775391, + "step": 8128 + }, + { + "epoch": 1.8730414746543778, + "grad_norm": 1.1090676234846621, + "learning_rate": 2.203230888187979e-08, + "loss": 0.833041787147522, + "step": 8129 + }, + { + "epoch": 1.8732718894009217, + "grad_norm": 1.3944148742352294, + "learning_rate": 2.1952847082533864e-08, + "loss": 0.8033208250999451, + "step": 8130 + }, + { + "epoch": 1.8735023041474654, + "grad_norm": 1.2067904980609332, + "learning_rate": 2.187352724543734e-08, + "loss": 0.742051362991333, + "step": 8131 + }, + { + "epoch": 1.8737327188940092, + "grad_norm": 1.2058964422107643, + "learning_rate": 2.1794349382103337e-08, + "loss": 0.7411169409751892, + "step": 8132 + }, + { + "epoch": 1.8739631336405531, + "grad_norm": 1.3201479261882787, + "learning_rate": 2.171531350402467e-08, + "loss": 0.7517165541648865, + "step": 8133 + }, + { + "epoch": 1.8741935483870966, + "grad_norm": 1.2371172479380752, + "learning_rate": 2.1636419622673263e-08, + "loss": 0.8010021448135376, + "step": 8134 + }, + { + "epoch": 1.8744239631336406, + "grad_norm": 1.2501522956166489, + "learning_rate": 2.1557667749500187e-08, + "loss": 0.7265241742134094, + "step": 8135 + }, + { + "epoch": 1.8746543778801843, + "grad_norm": 1.191380870353666, + "learning_rate": 2.1479057895936403e-08, + "loss": 0.6809227466583252, + "step": 8136 + }, + { + "epoch": 1.874884792626728, + "grad_norm": 1.2737037893770147, + "learning_rate": 2.140059007339201e-08, + "loss": 0.8235769271850586, + "step": 8137 + }, + { + "epoch": 1.875115207373272, + "grad_norm": 1.1356268338575812, + "learning_rate": 2.132226429325634e-08, + "loss": 0.7556289434432983, + "step": 8138 + }, + { + "epoch": 1.8753456221198157, + "grad_norm": 1.257264783564694, + "learning_rate": 2.1244080566898638e-08, + "loss": 0.7765048742294312, + "step": 8139 + }, + { + "epoch": 1.8755760368663594, + "grad_norm": 1.1776465139256578, + "learning_rate": 2.1166038905666816e-08, + "loss": 0.7637666463851929, + "step": 8140 + }, + { + "epoch": 1.8758064516129034, + "grad_norm": 1.2471130614608452, + "learning_rate": 2.10881393208886e-08, + "loss": 0.8413453698158264, + "step": 8141 + }, + { + "epoch": 1.8760368663594469, + "grad_norm": 1.443351972543058, + "learning_rate": 2.101038182387105e-08, + "loss": 0.7937475442886353, + "step": 8142 + }, + { + "epoch": 1.8762672811059908, + "grad_norm": 1.1772607773578063, + "learning_rate": 2.0932766425900585e-08, + "loss": 0.7654982805252075, + "step": 8143 + }, + { + "epoch": 1.8764976958525346, + "grad_norm": 1.53397176108589, + "learning_rate": 2.0855293138242968e-08, + "loss": 0.8950663805007935, + "step": 8144 + }, + { + "epoch": 1.8767281105990783, + "grad_norm": 1.250929142335872, + "learning_rate": 2.077796197214332e-08, + "loss": 0.6405420303344727, + "step": 8145 + }, + { + "epoch": 1.8769585253456222, + "grad_norm": 1.085136655013558, + "learning_rate": 2.0700772938826217e-08, + "loss": 0.7724314332008362, + "step": 8146 + }, + { + "epoch": 1.8771889400921657, + "grad_norm": 1.09160242748488, + "learning_rate": 2.0623726049495472e-08, + "loss": 0.7929061651229858, + "step": 8147 + }, + { + "epoch": 1.8774193548387097, + "grad_norm": 1.0975195498555617, + "learning_rate": 2.0546821315334363e-08, + "loss": 0.7207096815109253, + "step": 8148 + }, + { + "epoch": 1.8776497695852534, + "grad_norm": 1.347240880442127, + "learning_rate": 2.0470058747505513e-08, + "loss": 0.9234127402305603, + "step": 8149 + }, + { + "epoch": 1.8778801843317972, + "grad_norm": 1.2189429089634525, + "learning_rate": 2.0393438357150906e-08, + "loss": 0.9006322026252747, + "step": 8150 + }, + { + "epoch": 1.878110599078341, + "grad_norm": 0.9863507376975118, + "learning_rate": 2.0316960155391972e-08, + "loss": 0.6289799809455872, + "step": 8151 + }, + { + "epoch": 1.8783410138248848, + "grad_norm": 1.117182475586666, + "learning_rate": 2.0240624153329168e-08, + "loss": 0.8551793098449707, + "step": 8152 + }, + { + "epoch": 1.8785714285714286, + "grad_norm": 1.1253834649892556, + "learning_rate": 2.016443036204285e-08, + "loss": 0.8065170645713806, + "step": 8153 + }, + { + "epoch": 1.8788018433179725, + "grad_norm": 1.0124272640628642, + "learning_rate": 2.0088378792592286e-08, + "loss": 0.6361274719238281, + "step": 8154 + }, + { + "epoch": 1.879032258064516, + "grad_norm": 1.3966308966349001, + "learning_rate": 2.0012469456016312e-08, + "loss": 0.8539700508117676, + "step": 8155 + }, + { + "epoch": 1.87926267281106, + "grad_norm": 1.380681857214056, + "learning_rate": 1.9936702363333115e-08, + "loss": 0.7424989938735962, + "step": 8156 + }, + { + "epoch": 1.8794930875576037, + "grad_norm": 1.0795560964001287, + "learning_rate": 1.9861077525540116e-08, + "loss": 0.5831520557403564, + "step": 8157 + }, + { + "epoch": 1.8797235023041474, + "grad_norm": 1.3034651332513367, + "learning_rate": 1.9785594953614093e-08, + "loss": 0.8080646991729736, + "step": 8158 + }, + { + "epoch": 1.8799539170506914, + "grad_norm": 1.3028494466110516, + "learning_rate": 1.9710254658511392e-08, + "loss": 0.8008537292480469, + "step": 8159 + }, + { + "epoch": 1.8801843317972349, + "grad_norm": 0.7838996508063781, + "learning_rate": 1.9635056651167492e-08, + "loss": 0.7317294478416443, + "step": 8160 + }, + { + "epoch": 1.8804147465437788, + "grad_norm": 1.240068145392807, + "learning_rate": 1.956000094249721e-08, + "loss": 0.803238034248352, + "step": 8161 + }, + { + "epoch": 1.8806451612903226, + "grad_norm": 1.1592302203633778, + "learning_rate": 1.948508754339506e-08, + "loss": 0.7202219367027283, + "step": 8162 + }, + { + "epoch": 1.8808755760368663, + "grad_norm": 1.3406292816176746, + "learning_rate": 1.9410316464734233e-08, + "loss": 0.7691160440444946, + "step": 8163 + }, + { + "epoch": 1.8811059907834102, + "grad_norm": 1.0898220168427848, + "learning_rate": 1.933568771736782e-08, + "loss": 0.7092962265014648, + "step": 8164 + }, + { + "epoch": 1.881336405529954, + "grad_norm": 1.3165421464208054, + "learning_rate": 1.9261201312128274e-08, + "loss": 0.819804310798645, + "step": 8165 + }, + { + "epoch": 1.8815668202764977, + "grad_norm": 1.2278633726487793, + "learning_rate": 1.918685725982694e-08, + "loss": 0.9127538204193115, + "step": 8166 + }, + { + "epoch": 1.8817972350230416, + "grad_norm": 1.198181344272901, + "learning_rate": 1.9112655571254855e-08, + "loss": 0.8023328185081482, + "step": 8167 + }, + { + "epoch": 1.8820276497695851, + "grad_norm": 1.1150363141436184, + "learning_rate": 1.903859625718218e-08, + "loss": 0.723065972328186, + "step": 8168 + }, + { + "epoch": 1.882258064516129, + "grad_norm": 1.329775802249569, + "learning_rate": 1.896467932835877e-08, + "loss": 0.7838670611381531, + "step": 8169 + }, + { + "epoch": 1.8824884792626728, + "grad_norm": 1.0221481880663403, + "learning_rate": 1.8890904795513475e-08, + "loss": 0.6029871702194214, + "step": 8170 + }, + { + "epoch": 1.8827188940092165, + "grad_norm": 1.1179619592038208, + "learning_rate": 1.8817272669354512e-08, + "loss": 0.7622933387756348, + "step": 8171 + }, + { + "epoch": 1.8829493087557605, + "grad_norm": 1.3471730261003036, + "learning_rate": 1.8743782960569444e-08, + "loss": 0.7702913284301758, + "step": 8172 + }, + { + "epoch": 1.883179723502304, + "grad_norm": 1.1115192812221177, + "learning_rate": 1.867043567982518e-08, + "loss": 0.6385080814361572, + "step": 8173 + }, + { + "epoch": 1.883410138248848, + "grad_norm": 1.1957117872616694, + "learning_rate": 1.8597230837768208e-08, + "loss": 0.6886409521102905, + "step": 8174 + }, + { + "epoch": 1.8836405529953917, + "grad_norm": 1.2615274538141057, + "learning_rate": 1.8524168445023803e-08, + "loss": 0.7697125673294067, + "step": 8175 + }, + { + "epoch": 1.8838709677419354, + "grad_norm": 1.2703572064059772, + "learning_rate": 1.8451248512197148e-08, + "loss": 0.7942332029342651, + "step": 8176 + }, + { + "epoch": 1.8841013824884794, + "grad_norm": 1.2486681210000266, + "learning_rate": 1.8378471049872445e-08, + "loss": 0.7751410007476807, + "step": 8177 + }, + { + "epoch": 1.884331797235023, + "grad_norm": 1.4135289386452112, + "learning_rate": 1.8305836068613023e-08, + "loss": 0.8650992512702942, + "step": 8178 + }, + { + "epoch": 1.8845622119815668, + "grad_norm": 1.255590367160678, + "learning_rate": 1.8233343578962e-08, + "loss": 0.7084495425224304, + "step": 8179 + }, + { + "epoch": 1.8847926267281108, + "grad_norm": 1.2065933395861381, + "learning_rate": 1.8160993591441408e-08, + "loss": 0.7428494691848755, + "step": 8180 + }, + { + "epoch": 1.8850230414746543, + "grad_norm": 1.2721568643853003, + "learning_rate": 1.8088786116552844e-08, + "loss": 0.7431809902191162, + "step": 8181 + }, + { + "epoch": 1.8852534562211982, + "grad_norm": 1.5234831289492186, + "learning_rate": 1.801672116477715e-08, + "loss": 0.8312518000602722, + "step": 8182 + }, + { + "epoch": 1.885483870967742, + "grad_norm": 1.412977003038852, + "learning_rate": 1.7944798746574285e-08, + "loss": 0.8574832081794739, + "step": 8183 + }, + { + "epoch": 1.8857142857142857, + "grad_norm": 1.209006694724365, + "learning_rate": 1.7873018872383793e-08, + "loss": 0.7716966867446899, + "step": 8184 + }, + { + "epoch": 1.8859447004608296, + "grad_norm": 1.1984291768693995, + "learning_rate": 1.780138155262456e-08, + "loss": 0.8536000847816467, + "step": 8185 + }, + { + "epoch": 1.8861751152073731, + "grad_norm": 1.4411910829910872, + "learning_rate": 1.7729886797694606e-08, + "loss": 0.6559889316558838, + "step": 8186 + }, + { + "epoch": 1.886405529953917, + "grad_norm": 1.4146541158068258, + "learning_rate": 1.7658534617971065e-08, + "loss": 0.7371512651443481, + "step": 8187 + }, + { + "epoch": 1.8866359447004608, + "grad_norm": 1.5920989952321163, + "learning_rate": 1.7587325023810773e-08, + "loss": 0.8092008829116821, + "step": 8188 + }, + { + "epoch": 1.8868663594470045, + "grad_norm": 1.1485577131831675, + "learning_rate": 1.751625802554979e-08, + "loss": 0.7793067693710327, + "step": 8189 + }, + { + "epoch": 1.8870967741935485, + "grad_norm": 1.3107398360408737, + "learning_rate": 1.7445333633503312e-08, + "loss": 0.8102752566337585, + "step": 8190 + }, + { + "epoch": 1.8873271889400922, + "grad_norm": 0.9411355693415201, + "learning_rate": 1.737455185796588e-08, + "loss": 0.7141490578651428, + "step": 8191 + }, + { + "epoch": 1.887557603686636, + "grad_norm": 1.3771499753857814, + "learning_rate": 1.7303912709211497e-08, + "loss": 0.8010870218276978, + "step": 8192 + }, + { + "epoch": 1.8877880184331797, + "grad_norm": 1.0040229371574219, + "learning_rate": 1.723341619749319e-08, + "loss": 0.7945431470870972, + "step": 8193 + }, + { + "epoch": 1.8880184331797234, + "grad_norm": 1.5084700431378903, + "learning_rate": 1.7163062333043544e-08, + "loss": 0.765398383140564, + "step": 8194 + }, + { + "epoch": 1.8882488479262673, + "grad_norm": 1.141763186710756, + "learning_rate": 1.709285112607428e-08, + "loss": 0.8645910024642944, + "step": 8195 + }, + { + "epoch": 1.888479262672811, + "grad_norm": 1.4294051802947438, + "learning_rate": 1.7022782586776363e-08, + "loss": 0.7650351524353027, + "step": 8196 + }, + { + "epoch": 1.8887096774193548, + "grad_norm": 1.148441042244908, + "learning_rate": 1.695285672532043e-08, + "loss": 0.8059902191162109, + "step": 8197 + }, + { + "epoch": 1.8889400921658988, + "grad_norm": 1.3019488561633756, + "learning_rate": 1.688307355185592e-08, + "loss": 0.8389305472373962, + "step": 8198 + }, + { + "epoch": 1.8891705069124423, + "grad_norm": 1.3363862822981094, + "learning_rate": 1.681343307651173e-08, + "loss": 0.755578875541687, + "step": 8199 + }, + { + "epoch": 1.8894009216589862, + "grad_norm": 1.2754809499843205, + "learning_rate": 1.6743935309396218e-08, + "loss": 0.822825014591217, + "step": 8200 + }, + { + "epoch": 1.88963133640553, + "grad_norm": 1.2571266177044025, + "learning_rate": 1.667458026059676e-08, + "loss": 0.8229342699050903, + "step": 8201 + }, + { + "epoch": 1.8898617511520737, + "grad_norm": 1.3086181916191966, + "learning_rate": 1.6605367940180303e-08, + "loss": 0.7142254114151001, + "step": 8202 + }, + { + "epoch": 1.8900921658986176, + "grad_norm": 1.1722391698259569, + "learning_rate": 1.6536298358192812e-08, + "loss": 0.8904600739479065, + "step": 8203 + }, + { + "epoch": 1.8903225806451613, + "grad_norm": 1.151403763105922, + "learning_rate": 1.6467371524659603e-08, + "loss": 0.8758517503738403, + "step": 8204 + }, + { + "epoch": 1.890552995391705, + "grad_norm": 1.3083947750625244, + "learning_rate": 1.6398587449585555e-08, + "loss": 0.7609111666679382, + "step": 8205 + }, + { + "epoch": 1.8907834101382488, + "grad_norm": 0.9406449994318669, + "learning_rate": 1.6329946142954353e-08, + "loss": 0.8177064657211304, + "step": 8206 + }, + { + "epoch": 1.8910138248847925, + "grad_norm": 1.1366142550146048, + "learning_rate": 1.626144761472925e-08, + "loss": 0.6342105865478516, + "step": 8207 + }, + { + "epoch": 1.8912442396313365, + "grad_norm": 0.8903675484312013, + "learning_rate": 1.6193091874852627e-08, + "loss": 0.6025499105453491, + "step": 8208 + }, + { + "epoch": 1.8914746543778802, + "grad_norm": 1.3017839387858507, + "learning_rate": 1.6124878933246543e-08, + "loss": 0.78373783826828, + "step": 8209 + }, + { + "epoch": 1.891705069124424, + "grad_norm": 1.336095893979754, + "learning_rate": 1.605680879981164e-08, + "loss": 0.8072086572647095, + "step": 8210 + }, + { + "epoch": 1.8919354838709679, + "grad_norm": 1.5597980072939257, + "learning_rate": 1.5988881484428453e-08, + "loss": 0.9057372212409973, + "step": 8211 + }, + { + "epoch": 1.8921658986175114, + "grad_norm": 1.2099616448625954, + "learning_rate": 1.592109699695643e-08, + "loss": 0.8235929012298584, + "step": 8212 + }, + { + "epoch": 1.8923963133640553, + "grad_norm": 1.2417707847492958, + "learning_rate": 1.5853455347234366e-08, + "loss": 0.6610825061798096, + "step": 8213 + }, + { + "epoch": 1.892626728110599, + "grad_norm": 1.4158986087253451, + "learning_rate": 1.5785956545080415e-08, + "loss": 0.7152366638183594, + "step": 8214 + }, + { + "epoch": 1.8928571428571428, + "grad_norm": 1.330885873092923, + "learning_rate": 1.5718600600292066e-08, + "loss": 0.7971903085708618, + "step": 8215 + }, + { + "epoch": 1.8930875576036867, + "grad_norm": 1.226467557812747, + "learning_rate": 1.565138752264572e-08, + "loss": 0.7639449238777161, + "step": 8216 + }, + { + "epoch": 1.8933179723502302, + "grad_norm": 1.0517976072639703, + "learning_rate": 1.5584317321897356e-08, + "loss": 0.6396117806434631, + "step": 8217 + }, + { + "epoch": 1.8935483870967742, + "grad_norm": 1.328962567982178, + "learning_rate": 1.5517390007782183e-08, + "loss": 0.790566086769104, + "step": 8218 + }, + { + "epoch": 1.893778801843318, + "grad_norm": 1.6769404862380202, + "learning_rate": 1.5450605590014544e-08, + "loss": 0.7948310971260071, + "step": 8219 + }, + { + "epoch": 1.8940092165898617, + "grad_norm": 1.2378052027269906, + "learning_rate": 1.5383964078288124e-08, + "loss": 0.9425654411315918, + "step": 8220 + }, + { + "epoch": 1.8942396313364056, + "grad_norm": 1.2441112834124675, + "learning_rate": 1.531746548227586e-08, + "loss": 0.8001678586006165, + "step": 8221 + }, + { + "epoch": 1.8944700460829493, + "grad_norm": 0.9072642646135723, + "learning_rate": 1.5251109811629915e-08, + "loss": 0.6636781692504883, + "step": 8222 + }, + { + "epoch": 1.894700460829493, + "grad_norm": 1.0313464437335311, + "learning_rate": 1.5184897075981807e-08, + "loss": 0.7884416580200195, + "step": 8223 + }, + { + "epoch": 1.894930875576037, + "grad_norm": 1.0907885139753422, + "learning_rate": 1.511882728494218e-08, + "loss": 0.6888208389282227, + "step": 8224 + }, + { + "epoch": 1.8951612903225805, + "grad_norm": 1.3461823033287323, + "learning_rate": 1.5052900448100815e-08, + "loss": 0.7253614664077759, + "step": 8225 + }, + { + "epoch": 1.8953917050691245, + "grad_norm": 1.2272377599078015, + "learning_rate": 1.498711657502716e-08, + "loss": 0.7865983843803406, + "step": 8226 + }, + { + "epoch": 1.8956221198156682, + "grad_norm": 1.4908955714231082, + "learning_rate": 1.492147567526947e-08, + "loss": 0.8778063654899597, + "step": 8227 + }, + { + "epoch": 1.895852534562212, + "grad_norm": 1.2263224402103408, + "learning_rate": 1.4855977758355675e-08, + "loss": 0.7812581062316895, + "step": 8228 + }, + { + "epoch": 1.8960829493087559, + "grad_norm": 1.2890011409819144, + "learning_rate": 1.4790622833792287e-08, + "loss": 0.7160226106643677, + "step": 8229 + }, + { + "epoch": 1.8963133640552994, + "grad_norm": 1.1613199880989007, + "learning_rate": 1.472541091106594e-08, + "loss": 0.8187412619590759, + "step": 8230 + }, + { + "epoch": 1.8965437788018433, + "grad_norm": 1.1653251647412382, + "learning_rate": 1.4660341999641834e-08, + "loss": 0.7517846822738647, + "step": 8231 + }, + { + "epoch": 1.896774193548387, + "grad_norm": 1.3673338656755198, + "learning_rate": 1.4595416108964753e-08, + "loss": 0.9230127334594727, + "step": 8232 + }, + { + "epoch": 1.8970046082949308, + "grad_norm": 1.228175308993719, + "learning_rate": 1.4530633248458269e-08, + "loss": 0.6803582906723022, + "step": 8233 + }, + { + "epoch": 1.8972350230414747, + "grad_norm": 1.2890219242119376, + "learning_rate": 1.4465993427525968e-08, + "loss": 0.8444511294364929, + "step": 8234 + }, + { + "epoch": 1.8974654377880185, + "grad_norm": 1.4479761110450609, + "learning_rate": 1.4401496655550016e-08, + "loss": 0.7622519731521606, + "step": 8235 + }, + { + "epoch": 1.8976958525345622, + "grad_norm": 1.20875065982799, + "learning_rate": 1.4337142941892033e-08, + "loss": 0.687129020690918, + "step": 8236 + }, + { + "epoch": 1.8979262672811061, + "grad_norm": 1.1827775538431895, + "learning_rate": 1.4272932295892992e-08, + "loss": 0.6421219110488892, + "step": 8237 + }, + { + "epoch": 1.8981566820276496, + "grad_norm": 1.2669401147896007, + "learning_rate": 1.4208864726872772e-08, + "loss": 0.7829388380050659, + "step": 8238 + }, + { + "epoch": 1.8983870967741936, + "grad_norm": 1.3482974956529734, + "learning_rate": 1.4144940244130821e-08, + "loss": 0.7754424810409546, + "step": 8239 + }, + { + "epoch": 1.8986175115207373, + "grad_norm": 1.1130898544931584, + "learning_rate": 1.4081158856945719e-08, + "loss": 0.6544859409332275, + "step": 8240 + }, + { + "epoch": 1.898847926267281, + "grad_norm": 1.0822240775455856, + "learning_rate": 1.4017520574575282e-08, + "loss": 0.8020427227020264, + "step": 8241 + }, + { + "epoch": 1.899078341013825, + "grad_norm": 1.1350657169907092, + "learning_rate": 1.3954025406256343e-08, + "loss": 0.7343212366104126, + "step": 8242 + }, + { + "epoch": 1.8993087557603685, + "grad_norm": 1.2792336145941459, + "learning_rate": 1.3890673361205418e-08, + "loss": 0.7643232345581055, + "step": 8243 + }, + { + "epoch": 1.8995391705069125, + "grad_norm": 1.212662168320899, + "learning_rate": 1.3827464448617709e-08, + "loss": 0.7806165814399719, + "step": 8244 + }, + { + "epoch": 1.8997695852534562, + "grad_norm": 1.6104194734157218, + "learning_rate": 1.3764398677667988e-08, + "loss": 0.8533280491828918, + "step": 8245 + }, + { + "epoch": 1.9, + "grad_norm": 1.1289941083869026, + "learning_rate": 1.3701476057510264e-08, + "loss": 0.773565411567688, + "step": 8246 + }, + { + "epoch": 1.9002304147465439, + "grad_norm": 1.1091300492504157, + "learning_rate": 1.3638696597277677e-08, + "loss": 0.7752503752708435, + "step": 8247 + }, + { + "epoch": 1.9004608294930876, + "grad_norm": 0.9880656776459645, + "learning_rate": 1.3576060306082383e-08, + "loss": 0.7466747760772705, + "step": 8248 + }, + { + "epoch": 1.9006912442396313, + "grad_norm": 1.2177337280417093, + "learning_rate": 1.3513567193016106e-08, + "loss": 0.8103033304214478, + "step": 8249 + }, + { + "epoch": 1.9009216589861753, + "grad_norm": 1.0248826665714235, + "learning_rate": 1.3451217267149595e-08, + "loss": 0.6501287817955017, + "step": 8250 + }, + { + "epoch": 1.9011520737327188, + "grad_norm": 1.210107770730306, + "learning_rate": 1.3389010537532941e-08, + "loss": 0.7329230308532715, + "step": 8251 + }, + { + "epoch": 1.9013824884792627, + "grad_norm": 1.3978474783131303, + "learning_rate": 1.3326947013195255e-08, + "loss": 0.8413917422294617, + "step": 8252 + }, + { + "epoch": 1.9016129032258065, + "grad_norm": 1.4081927433558092, + "learning_rate": 1.3265026703144999e-08, + "loss": 0.7283090353012085, + "step": 8253 + }, + { + "epoch": 1.9018433179723502, + "grad_norm": 1.2553133709092965, + "learning_rate": 1.3203249616369872e-08, + "loss": 0.8378126621246338, + "step": 8254 + }, + { + "epoch": 1.9020737327188941, + "grad_norm": 1.099276496142028, + "learning_rate": 1.3141615761836811e-08, + "loss": 0.7675777673721313, + "step": 8255 + }, + { + "epoch": 1.9023041474654376, + "grad_norm": 1.6916159414604328, + "learning_rate": 1.308012514849155e-08, + "loss": 0.6448104381561279, + "step": 8256 + }, + { + "epoch": 1.9025345622119816, + "grad_norm": 1.3264486635424506, + "learning_rate": 1.3018777785259838e-08, + "loss": 0.8024395704269409, + "step": 8257 + }, + { + "epoch": 1.9027649769585253, + "grad_norm": 1.1900370575281645, + "learning_rate": 1.2957573681045887e-08, + "loss": 0.8159325122833252, + "step": 8258 + }, + { + "epoch": 1.902995391705069, + "grad_norm": 1.1100937535082447, + "learning_rate": 1.2896512844733365e-08, + "loss": 0.7916233539581299, + "step": 8259 + }, + { + "epoch": 1.903225806451613, + "grad_norm": 1.2408177778484295, + "learning_rate": 1.2835595285185296e-08, + "loss": 0.798140823841095, + "step": 8260 + }, + { + "epoch": 1.9034562211981567, + "grad_norm": 1.2142666252173266, + "learning_rate": 1.277482101124383e-08, + "loss": 0.7881651520729065, + "step": 8261 + }, + { + "epoch": 1.9036866359447004, + "grad_norm": 1.3615775077613546, + "learning_rate": 1.2714190031730021e-08, + "loss": 0.7023189663887024, + "step": 8262 + }, + { + "epoch": 1.9039170506912444, + "grad_norm": 1.2537620544817238, + "learning_rate": 1.2653702355444606e-08, + "loss": 0.8286309242248535, + "step": 8263 + }, + { + "epoch": 1.904147465437788, + "grad_norm": 1.4181409914325045, + "learning_rate": 1.259335799116723e-08, + "loss": 0.7626973986625671, + "step": 8264 + }, + { + "epoch": 1.9043778801843319, + "grad_norm": 1.7640804361655256, + "learning_rate": 1.2533156947656665e-08, + "loss": 1.0350267887115479, + "step": 8265 + }, + { + "epoch": 1.9046082949308756, + "grad_norm": 1.0808972871053977, + "learning_rate": 1.2473099233651251e-08, + "loss": 0.6378228664398193, + "step": 8266 + }, + { + "epoch": 1.9048387096774193, + "grad_norm": 1.1012549826430145, + "learning_rate": 1.2413184857868241e-08, + "loss": 0.8265732526779175, + "step": 8267 + }, + { + "epoch": 1.9050691244239633, + "grad_norm": 1.102740322591124, + "learning_rate": 1.23534138290039e-08, + "loss": 0.8545348644256592, + "step": 8268 + }, + { + "epoch": 1.9052995391705068, + "grad_norm": 1.1667419775790697, + "learning_rate": 1.2293786155734176e-08, + "loss": 0.660080075263977, + "step": 8269 + }, + { + "epoch": 1.9055299539170507, + "grad_norm": 1.4258566183231558, + "learning_rate": 1.2234301846713813e-08, + "loss": 0.8409689664840698, + "step": 8270 + }, + { + "epoch": 1.9057603686635944, + "grad_norm": 1.3639053971310304, + "learning_rate": 1.2174960910576904e-08, + "loss": 0.8026434183120728, + "step": 8271 + }, + { + "epoch": 1.9059907834101382, + "grad_norm": 1.1477802786886386, + "learning_rate": 1.2115763355936671e-08, + "loss": 0.8315812945365906, + "step": 8272 + }, + { + "epoch": 1.9062211981566821, + "grad_norm": 1.1488868543504023, + "learning_rate": 1.2056709191385572e-08, + "loss": 0.7373194694519043, + "step": 8273 + }, + { + "epoch": 1.9064516129032258, + "grad_norm": 1.28219548502893, + "learning_rate": 1.1997798425495309e-08, + "loss": 0.7502317428588867, + "step": 8274 + }, + { + "epoch": 1.9066820276497696, + "grad_norm": 1.1940555150789485, + "learning_rate": 1.1939031066816707e-08, + "loss": 0.8208760023117065, + "step": 8275 + }, + { + "epoch": 1.9069124423963135, + "grad_norm": 1.2690336009694645, + "learning_rate": 1.188040712387961e-08, + "loss": 0.7584094405174255, + "step": 8276 + }, + { + "epoch": 1.907142857142857, + "grad_norm": 1.3136164329476003, + "learning_rate": 1.1821926605193433e-08, + "loss": 0.7776647210121155, + "step": 8277 + }, + { + "epoch": 1.907373271889401, + "grad_norm": 1.0778088332238458, + "learning_rate": 1.1763589519246387e-08, + "loss": 0.7739659547805786, + "step": 8278 + }, + { + "epoch": 1.9076036866359447, + "grad_norm": 1.3752880267959628, + "learning_rate": 1.170539587450603e-08, + "loss": 0.7276068925857544, + "step": 8279 + }, + { + "epoch": 1.9078341013824884, + "grad_norm": 1.1782987713077362, + "learning_rate": 1.1647345679419163e-08, + "loss": 0.624208927154541, + "step": 8280 + }, + { + "epoch": 1.9080645161290324, + "grad_norm": 1.0744404873031923, + "learning_rate": 1.1589438942411712e-08, + "loss": 0.7865229845046997, + "step": 8281 + }, + { + "epoch": 1.908294930875576, + "grad_norm": 1.1655122856650737, + "learning_rate": 1.1531675671888619e-08, + "loss": 0.8290715217590332, + "step": 8282 + }, + { + "epoch": 1.9085253456221198, + "grad_norm": 1.4733922787626827, + "learning_rate": 1.1474055876234289e-08, + "loss": 0.8750064969062805, + "step": 8283 + }, + { + "epoch": 1.9087557603686636, + "grad_norm": 1.0358743027064434, + "learning_rate": 1.1416579563812146e-08, + "loss": 0.7946900129318237, + "step": 8284 + }, + { + "epoch": 1.9089861751152073, + "grad_norm": 1.1260650941834194, + "learning_rate": 1.1359246742964623e-08, + "loss": 0.6673855781555176, + "step": 8285 + }, + { + "epoch": 1.9092165898617512, + "grad_norm": 1.5734371068415847, + "learning_rate": 1.1302057422013734e-08, + "loss": 0.8423609137535095, + "step": 8286 + }, + { + "epoch": 1.909447004608295, + "grad_norm": 1.1774099615686673, + "learning_rate": 1.124501160926039e-08, + "loss": 0.7583299279212952, + "step": 8287 + }, + { + "epoch": 1.9096774193548387, + "grad_norm": 1.3632188021099019, + "learning_rate": 1.1188109312984639e-08, + "loss": 0.8489730358123779, + "step": 8288 + }, + { + "epoch": 1.9099078341013827, + "grad_norm": 1.268317857067217, + "learning_rate": 1.1131350541445871e-08, + "loss": 0.7460636496543884, + "step": 8289 + }, + { + "epoch": 1.9101382488479262, + "grad_norm": 1.1951667787690143, + "learning_rate": 1.1074735302882387e-08, + "loss": 0.7310905456542969, + "step": 8290 + }, + { + "epoch": 1.91036866359447, + "grad_norm": 1.1692661015812214, + "learning_rate": 1.1018263605511946e-08, + "loss": 0.8411405086517334, + "step": 8291 + }, + { + "epoch": 1.9105990783410138, + "grad_norm": 1.12451343736832, + "learning_rate": 1.0961935457531323e-08, + "loss": 0.7980802059173584, + "step": 8292 + }, + { + "epoch": 1.9108294930875576, + "grad_norm": 1.2914760603674136, + "learning_rate": 1.0905750867116426e-08, + "loss": 0.779492974281311, + "step": 8293 + }, + { + "epoch": 1.9110599078341015, + "grad_norm": 1.0940139924335759, + "learning_rate": 1.0849709842422283e-08, + "loss": 0.7893733978271484, + "step": 8294 + }, + { + "epoch": 1.911290322580645, + "grad_norm": 1.367510888792546, + "learning_rate": 1.07938123915835e-08, + "loss": 0.8281872272491455, + "step": 8295 + }, + { + "epoch": 1.911520737327189, + "grad_norm": 1.3626141199750628, + "learning_rate": 1.0738058522713144e-08, + "loss": 0.721331775188446, + "step": 8296 + }, + { + "epoch": 1.9117511520737327, + "grad_norm": 0.9302233955509024, + "learning_rate": 1.0682448243904073e-08, + "loss": 0.6043491363525391, + "step": 8297 + }, + { + "epoch": 1.9119815668202764, + "grad_norm": 1.002380139729753, + "learning_rate": 1.0626981563227943e-08, + "loss": 0.7737481594085693, + "step": 8298 + }, + { + "epoch": 1.9122119815668204, + "grad_norm": 1.20563258082351, + "learning_rate": 1.0571658488735536e-08, + "loss": 0.771499514579773, + "step": 8299 + }, + { + "epoch": 1.912442396313364, + "grad_norm": 1.1334287395884057, + "learning_rate": 1.0516479028457204e-08, + "loss": 0.6711971759796143, + "step": 8300 + }, + { + "epoch": 1.9126728110599078, + "grad_norm": 1.1514161835446617, + "learning_rate": 1.0461443190402097e-08, + "loss": 0.691685140132904, + "step": 8301 + }, + { + "epoch": 1.9129032258064518, + "grad_norm": 1.0627327279898275, + "learning_rate": 1.0406550982558382e-08, + "loss": 0.7339159250259399, + "step": 8302 + }, + { + "epoch": 1.9131336405529953, + "grad_norm": 1.098827920572517, + "learning_rate": 1.0351802412893796e-08, + "loss": 0.7832008600234985, + "step": 8303 + }, + { + "epoch": 1.9133640552995392, + "grad_norm": 1.8976948304927823, + "learning_rate": 1.0297197489355092e-08, + "loss": 0.862671971321106, + "step": 8304 + }, + { + "epoch": 1.913594470046083, + "grad_norm": 1.2340137918284608, + "learning_rate": 1.0242736219867821e-08, + "loss": 0.6442357897758484, + "step": 8305 + }, + { + "epoch": 1.9138248847926267, + "grad_norm": 1.3262423414476558, + "learning_rate": 1.0188418612337102e-08, + "loss": 0.8777452707290649, + "step": 8306 + }, + { + "epoch": 1.9140552995391706, + "grad_norm": 1.2308393583128812, + "learning_rate": 1.0134244674647186e-08, + "loss": 0.7672470808029175, + "step": 8307 + }, + { + "epoch": 1.9142857142857141, + "grad_norm": 0.9277990008899878, + "learning_rate": 1.0080214414661226e-08, + "loss": 0.7338177561759949, + "step": 8308 + }, + { + "epoch": 1.914516129032258, + "grad_norm": 1.3815065909330264, + "learning_rate": 1.0026327840221727e-08, + "loss": 0.7546414136886597, + "step": 8309 + }, + { + "epoch": 1.9147465437788018, + "grad_norm": 1.0116807626508924, + "learning_rate": 9.972584959149988e-09, + "loss": 0.621455192565918, + "step": 8310 + }, + { + "epoch": 1.9149769585253456, + "grad_norm": 1.0385626369203964, + "learning_rate": 9.918985779247102e-09, + "loss": 0.7403131723403931, + "step": 8311 + }, + { + "epoch": 1.9152073732718895, + "grad_norm": 1.1027069898803628, + "learning_rate": 9.865530308292624e-09, + "loss": 0.7924279570579529, + "step": 8312 + }, + { + "epoch": 1.9154377880184332, + "grad_norm": 1.1362295208393791, + "learning_rate": 9.81221855404568e-09, + "loss": 0.8831228017807007, + "step": 8313 + }, + { + "epoch": 1.915668202764977, + "grad_norm": 1.1281945792188444, + "learning_rate": 9.759050524244417e-09, + "loss": 0.6786219477653503, + "step": 8314 + }, + { + "epoch": 1.9158986175115207, + "grad_norm": 1.2807157366480393, + "learning_rate": 9.70602622660599e-09, + "loss": 0.7311046123504639, + "step": 8315 + }, + { + "epoch": 1.9161290322580644, + "grad_norm": 1.3847340573145779, + "learning_rate": 9.653145668826912e-09, + "loss": 0.8914301991462708, + "step": 8316 + }, + { + "epoch": 1.9163594470046084, + "grad_norm": 1.4027670914288322, + "learning_rate": 9.600408858582709e-09, + "loss": 0.8144292831420898, + "step": 8317 + }, + { + "epoch": 1.916589861751152, + "grad_norm": 1.1077379444431534, + "learning_rate": 9.547815803528036e-09, + "loss": 0.6670823097229004, + "step": 8318 + }, + { + "epoch": 1.9168202764976958, + "grad_norm": 1.2434106495167774, + "learning_rate": 9.495366511296676e-09, + "loss": 0.6801552772521973, + "step": 8319 + }, + { + "epoch": 1.9170506912442398, + "grad_norm": 1.0098918722618904, + "learning_rate": 9.44306098950165e-09, + "loss": 0.8144240379333496, + "step": 8320 + }, + { + "epoch": 1.9172811059907833, + "grad_norm": 1.0515221920732627, + "learning_rate": 9.390899245734995e-09, + "loss": 0.6352888345718384, + "step": 8321 + }, + { + "epoch": 1.9175115207373272, + "grad_norm": 1.2296941092807456, + "learning_rate": 9.33888128756788e-09, + "loss": 0.7513711452484131, + "step": 8322 + }, + { + "epoch": 1.917741935483871, + "grad_norm": 1.4377668264686976, + "learning_rate": 9.287007122550705e-09, + "loss": 0.7699171304702759, + "step": 8323 + }, + { + "epoch": 1.9179723502304147, + "grad_norm": 1.591632209718944, + "learning_rate": 9.235276758212895e-09, + "loss": 0.8321002721786499, + "step": 8324 + }, + { + "epoch": 1.9182027649769586, + "grad_norm": 1.0453744404830132, + "learning_rate": 9.183690202062999e-09, + "loss": 0.6815298795700073, + "step": 8325 + }, + { + "epoch": 1.9184331797235024, + "grad_norm": 1.0030633247337575, + "learning_rate": 9.132247461588915e-09, + "loss": 0.7135178446769714, + "step": 8326 + }, + { + "epoch": 1.918663594470046, + "grad_norm": 1.3123190228023687, + "learning_rate": 9.080948544257338e-09, + "loss": 0.8452005982398987, + "step": 8327 + }, + { + "epoch": 1.9188940092165898, + "grad_norm": 1.1270879003396566, + "learning_rate": 9.029793457514312e-09, + "loss": 0.7449440956115723, + "step": 8328 + }, + { + "epoch": 1.9191244239631335, + "grad_norm": 1.2310904327231214, + "learning_rate": 8.978782208784897e-09, + "loss": 0.8172955513000488, + "step": 8329 + }, + { + "epoch": 1.9193548387096775, + "grad_norm": 1.0097624251077932, + "learning_rate": 8.92791480547317e-09, + "loss": 0.6682305335998535, + "step": 8330 + }, + { + "epoch": 1.9195852534562212, + "grad_norm": 1.1974701853493588, + "learning_rate": 8.877191254962779e-09, + "loss": 0.6874973773956299, + "step": 8331 + }, + { + "epoch": 1.919815668202765, + "grad_norm": 1.1728345166861331, + "learning_rate": 8.826611564615949e-09, + "loss": 0.8371694684028625, + "step": 8332 + }, + { + "epoch": 1.920046082949309, + "grad_norm": 1.1837626119929445, + "learning_rate": 8.77617574177425e-09, + "loss": 0.7147493362426758, + "step": 8333 + }, + { + "epoch": 1.9202764976958524, + "grad_norm": 1.2783488550083906, + "learning_rate": 8.725883793758382e-09, + "loss": 0.7444115877151489, + "step": 8334 + }, + { + "epoch": 1.9205069124423964, + "grad_norm": 1.3799268170287549, + "learning_rate": 8.675735727868283e-09, + "loss": 0.7772307395935059, + "step": 8335 + }, + { + "epoch": 1.92073732718894, + "grad_norm": 1.2730237375907167, + "learning_rate": 8.625731551382798e-09, + "loss": 0.702937126159668, + "step": 8336 + }, + { + "epoch": 1.9209677419354838, + "grad_norm": 1.316574939310684, + "learning_rate": 8.575871271559898e-09, + "loss": 0.7404709458351135, + "step": 8337 + }, + { + "epoch": 1.9211981566820278, + "grad_norm": 1.4216605594412726, + "learning_rate": 8.526154895636906e-09, + "loss": 0.7142058610916138, + "step": 8338 + }, + { + "epoch": 1.9214285714285713, + "grad_norm": 1.381037068322115, + "learning_rate": 8.476582430830048e-09, + "loss": 0.8950545191764832, + "step": 8339 + }, + { + "epoch": 1.9216589861751152, + "grad_norm": 1.2364573338693037, + "learning_rate": 8.42715388433446e-09, + "loss": 0.6939054131507874, + "step": 8340 + }, + { + "epoch": 1.921889400921659, + "grad_norm": 1.3248307922164142, + "learning_rate": 8.377869263324954e-09, + "loss": 0.7916324138641357, + "step": 8341 + }, + { + "epoch": 1.9221198156682027, + "grad_norm": 1.3092539218499513, + "learning_rate": 8.328728574954924e-09, + "loss": 0.8059754371643066, + "step": 8342 + }, + { + "epoch": 1.9223502304147466, + "grad_norm": 1.1195879983393067, + "learning_rate": 8.279731826357105e-09, + "loss": 0.650648295879364, + "step": 8343 + }, + { + "epoch": 1.9225806451612903, + "grad_norm": 0.9135397053997126, + "learning_rate": 8.230879024643478e-09, + "loss": 0.6912552118301392, + "step": 8344 + }, + { + "epoch": 1.922811059907834, + "grad_norm": 0.8588678436998939, + "learning_rate": 8.182170176904702e-09, + "loss": 0.7430927753448486, + "step": 8345 + }, + { + "epoch": 1.923041474654378, + "grad_norm": 1.1000327691208154, + "learning_rate": 8.133605290210898e-09, + "loss": 0.7550772428512573, + "step": 8346 + }, + { + "epoch": 1.9232718894009215, + "grad_norm": 1.1138393113278757, + "learning_rate": 8.08518437161132e-09, + "loss": 0.7235819101333618, + "step": 8347 + }, + { + "epoch": 1.9235023041474655, + "grad_norm": 1.085631464611088, + "learning_rate": 8.036907428134121e-09, + "loss": 0.790582537651062, + "step": 8348 + }, + { + "epoch": 1.9237327188940092, + "grad_norm": 1.2928878399763604, + "learning_rate": 7.988774466786585e-09, + "loss": 0.7350871562957764, + "step": 8349 + }, + { + "epoch": 1.923963133640553, + "grad_norm": 1.3980478677422172, + "learning_rate": 7.940785494555124e-09, + "loss": 0.86177659034729, + "step": 8350 + }, + { + "epoch": 1.9241935483870969, + "grad_norm": 1.196963381013611, + "learning_rate": 7.892940518405499e-09, + "loss": 0.8039232492446899, + "step": 8351 + }, + { + "epoch": 1.9244239631336404, + "grad_norm": 1.231295549355971, + "learning_rate": 7.845239545282046e-09, + "loss": 0.7130967378616333, + "step": 8352 + }, + { + "epoch": 1.9246543778801843, + "grad_norm": 1.0830506625128473, + "learning_rate": 7.797682582108667e-09, + "loss": 0.7297911047935486, + "step": 8353 + }, + { + "epoch": 1.924884792626728, + "grad_norm": 1.2576048144274934, + "learning_rate": 7.750269635788065e-09, + "loss": 0.7302875518798828, + "step": 8354 + }, + { + "epoch": 1.9251152073732718, + "grad_norm": 1.1228331103171292, + "learning_rate": 7.703000713202401e-09, + "loss": 0.7976555824279785, + "step": 8355 + }, + { + "epoch": 1.9253456221198157, + "grad_norm": 1.1181213613597878, + "learning_rate": 7.65587582121252e-09, + "loss": 0.6747829914093018, + "step": 8356 + }, + { + "epoch": 1.9255760368663595, + "grad_norm": 1.3086474559444063, + "learning_rate": 7.608894966658509e-09, + "loss": 0.7217142581939697, + "step": 8357 + }, + { + "epoch": 1.9258064516129032, + "grad_norm": 1.3893709396765357, + "learning_rate": 7.562058156359685e-09, + "loss": 0.8635888695716858, + "step": 8358 + }, + { + "epoch": 1.9260368663594472, + "grad_norm": 1.3318330118319255, + "learning_rate": 7.515365397114282e-09, + "loss": 0.8435994386672974, + "step": 8359 + }, + { + "epoch": 1.9262672811059907, + "grad_norm": 1.4490671236886896, + "learning_rate": 7.468816695699653e-09, + "loss": 0.8632286787033081, + "step": 8360 + }, + { + "epoch": 1.9264976958525346, + "grad_norm": 1.501498499241499, + "learning_rate": 7.422412058872396e-09, + "loss": 0.7916556596755981, + "step": 8361 + }, + { + "epoch": 1.9267281105990783, + "grad_norm": 1.1808854932681303, + "learning_rate": 7.376151493368121e-09, + "loss": 0.8307663202285767, + "step": 8362 + }, + { + "epoch": 1.926958525345622, + "grad_norm": 1.4156996026964064, + "learning_rate": 7.330035005901236e-09, + "loss": 0.9020388126373291, + "step": 8363 + }, + { + "epoch": 1.927188940092166, + "grad_norm": 1.222606934693838, + "learning_rate": 7.28406260316572e-09, + "loss": 0.7926114797592163, + "step": 8364 + }, + { + "epoch": 1.9274193548387095, + "grad_norm": 1.0417046174216056, + "learning_rate": 7.2382342918343446e-09, + "loss": 0.7609784603118896, + "step": 8365 + }, + { + "epoch": 1.9276497695852535, + "grad_norm": 1.3729827404737949, + "learning_rate": 7.192550078559012e-09, + "loss": 0.6010490655899048, + "step": 8366 + }, + { + "epoch": 1.9278801843317972, + "grad_norm": 1.495271329234438, + "learning_rate": 7.147009969970641e-09, + "loss": 0.8219606876373291, + "step": 8367 + }, + { + "epoch": 1.928110599078341, + "grad_norm": 1.207499145814505, + "learning_rate": 7.101613972679499e-09, + "loss": 0.8688151836395264, + "step": 8368 + }, + { + "epoch": 1.9283410138248849, + "grad_norm": 1.0608698410629562, + "learning_rate": 7.0563620932747595e-09, + "loss": 0.7654411792755127, + "step": 8369 + }, + { + "epoch": 1.9285714285714286, + "grad_norm": 1.0982841652537483, + "learning_rate": 7.01125433832439e-09, + "loss": 0.6878413558006287, + "step": 8370 + }, + { + "epoch": 1.9288018433179723, + "grad_norm": 1.0662803206592244, + "learning_rate": 6.966290714375933e-09, + "loss": 0.6703332662582397, + "step": 8371 + }, + { + "epoch": 1.9290322580645163, + "grad_norm": 1.1405585467491617, + "learning_rate": 6.921471227955833e-09, + "loss": 0.752200722694397, + "step": 8372 + }, + { + "epoch": 1.9292626728110598, + "grad_norm": 1.1122335677850106, + "learning_rate": 6.8767958855695526e-09, + "loss": 0.8107069730758667, + "step": 8373 + }, + { + "epoch": 1.9294930875576037, + "grad_norm": 1.4102834771954489, + "learning_rate": 6.832264693701573e-09, + "loss": 0.8816967010498047, + "step": 8374 + }, + { + "epoch": 1.9297235023041475, + "grad_norm": 1.2593635712728732, + "learning_rate": 6.78787765881561e-09, + "loss": 0.7889697551727295, + "step": 8375 + }, + { + "epoch": 1.9299539170506912, + "grad_norm": 1.2377942170623384, + "learning_rate": 6.743634787354291e-09, + "loss": 0.7218060493469238, + "step": 8376 + }, + { + "epoch": 1.9301843317972351, + "grad_norm": 1.2786458190631131, + "learning_rate": 6.699536085739588e-09, + "loss": 0.8061347007751465, + "step": 8377 + }, + { + "epoch": 1.9304147465437786, + "grad_norm": 1.0571211016932303, + "learning_rate": 6.655581560372159e-09, + "loss": 0.7320632934570312, + "step": 8378 + }, + { + "epoch": 1.9306451612903226, + "grad_norm": 1.2201688729332103, + "learning_rate": 6.611771217632123e-09, + "loss": 0.7039695978164673, + "step": 8379 + }, + { + "epoch": 1.9308755760368663, + "grad_norm": 1.0152325785443144, + "learning_rate": 6.568105063878393e-09, + "loss": 0.7056317925453186, + "step": 8380 + }, + { + "epoch": 1.93110599078341, + "grad_norm": 1.3442992098354511, + "learning_rate": 6.524583105449122e-09, + "loss": 0.9265607595443726, + "step": 8381 + }, + { + "epoch": 1.931336405529954, + "grad_norm": 0.9980232024455323, + "learning_rate": 6.481205348661367e-09, + "loss": 0.7249365448951721, + "step": 8382 + }, + { + "epoch": 1.9315668202764977, + "grad_norm": 1.0217670095742197, + "learning_rate": 6.4379717998114256e-09, + "loss": 0.8216372728347778, + "step": 8383 + }, + { + "epoch": 1.9317972350230415, + "grad_norm": 1.0731967820570871, + "learning_rate": 6.394882465174611e-09, + "loss": 0.6750606894493103, + "step": 8384 + }, + { + "epoch": 1.9320276497695854, + "grad_norm": 1.1382732221343326, + "learning_rate": 6.351937351005143e-09, + "loss": 0.8265045285224915, + "step": 8385 + }, + { + "epoch": 1.932258064516129, + "grad_norm": 1.2033626019579449, + "learning_rate": 6.309136463536591e-09, + "loss": 0.5992317795753479, + "step": 8386 + }, + { + "epoch": 1.9324884792626729, + "grad_norm": 1.026760102298627, + "learning_rate": 6.266479808981428e-09, + "loss": 0.6586567163467407, + "step": 8387 + }, + { + "epoch": 1.9327188940092166, + "grad_norm": 1.1335080912138158, + "learning_rate": 6.223967393531259e-09, + "loss": 0.7496415376663208, + "step": 8388 + }, + { + "epoch": 1.9329493087557603, + "grad_norm": 1.2743344602397095, + "learning_rate": 6.181599223356593e-09, + "loss": 0.8637027740478516, + "step": 8389 + }, + { + "epoch": 1.9331797235023043, + "grad_norm": 1.3348493633535858, + "learning_rate": 6.139375304607064e-09, + "loss": 0.6925984621047974, + "step": 8390 + }, + { + "epoch": 1.9334101382488478, + "grad_norm": 1.3338549311969345, + "learning_rate": 6.0972956434115485e-09, + "loss": 0.8345432877540588, + "step": 8391 + }, + { + "epoch": 1.9336405529953917, + "grad_norm": 1.211546505819517, + "learning_rate": 6.055360245877938e-09, + "loss": 0.797752857208252, + "step": 8392 + }, + { + "epoch": 1.9338709677419355, + "grad_norm": 1.025513773253857, + "learning_rate": 6.013569118092809e-09, + "loss": 0.7460094690322876, + "step": 8393 + }, + { + "epoch": 1.9341013824884792, + "grad_norm": 1.0501792229397418, + "learning_rate": 5.97192226612242e-09, + "loss": 0.7695547342300415, + "step": 8394 + }, + { + "epoch": 1.9343317972350231, + "grad_norm": 1.3341559418127071, + "learning_rate": 5.9304196960113795e-09, + "loss": 0.8372104167938232, + "step": 8395 + }, + { + "epoch": 1.9345622119815669, + "grad_norm": 1.174939684239835, + "learning_rate": 5.889061413784091e-09, + "loss": 0.7647950053215027, + "step": 8396 + }, + { + "epoch": 1.9347926267281106, + "grad_norm": 1.0568987578487792, + "learning_rate": 5.84784742544353e-09, + "loss": 0.6958519220352173, + "step": 8397 + }, + { + "epoch": 1.9350230414746545, + "grad_norm": 1.1905008025272417, + "learning_rate": 5.806777736971691e-09, + "loss": 0.8488763570785522, + "step": 8398 + }, + { + "epoch": 1.935253456221198, + "grad_norm": 1.1975357379056275, + "learning_rate": 5.765852354330025e-09, + "loss": 0.6448318958282471, + "step": 8399 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 1.288117894635522, + "learning_rate": 5.725071283458671e-09, + "loss": 0.7449144124984741, + "step": 8400 + }, + { + "epoch": 1.9357142857142857, + "grad_norm": 1.2060473887345362, + "learning_rate": 5.684434530277005e-09, + "loss": 0.8339489102363586, + "step": 8401 + }, + { + "epoch": 1.9359447004608294, + "grad_norm": 1.355663998015665, + "learning_rate": 5.643942100683308e-09, + "loss": 0.7758409380912781, + "step": 8402 + }, + { + "epoch": 1.9361751152073734, + "grad_norm": 1.2457476365021507, + "learning_rate": 5.60359400055499e-09, + "loss": 0.8604291081428528, + "step": 8403 + }, + { + "epoch": 1.936405529953917, + "grad_norm": 0.9800977546704353, + "learning_rate": 5.5633902357487e-09, + "loss": 0.7379741668701172, + "step": 8404 + }, + { + "epoch": 1.9366359447004609, + "grad_norm": 1.0501931597758303, + "learning_rate": 5.52333081209988e-09, + "loss": 0.6943101286888123, + "step": 8405 + }, + { + "epoch": 1.9368663594470046, + "grad_norm": 1.193280273833338, + "learning_rate": 5.483415735422992e-09, + "loss": 0.7397646903991699, + "step": 8406 + }, + { + "epoch": 1.9370967741935483, + "grad_norm": 1.1298510822998358, + "learning_rate": 5.443645011511844e-09, + "loss": 0.7566234469413757, + "step": 8407 + }, + { + "epoch": 1.9373271889400923, + "grad_norm": 1.322820355956732, + "learning_rate": 5.40401864613893e-09, + "loss": 0.6345827579498291, + "step": 8408 + }, + { + "epoch": 1.937557603686636, + "grad_norm": 1.6653451978671274, + "learning_rate": 5.3645366450560944e-09, + "loss": 0.7259831428527832, + "step": 8409 + }, + { + "epoch": 1.9377880184331797, + "grad_norm": 1.347964952979272, + "learning_rate": 5.325199013993975e-09, + "loss": 0.7897600531578064, + "step": 8410 + }, + { + "epoch": 1.9380184331797237, + "grad_norm": 1.3016062068490681, + "learning_rate": 5.286005758662448e-09, + "loss": 0.8421739339828491, + "step": 8411 + }, + { + "epoch": 1.9382488479262672, + "grad_norm": 1.3347958532899202, + "learning_rate": 5.2469568847504085e-09, + "loss": 0.7652501463890076, + "step": 8412 + }, + { + "epoch": 1.9384792626728111, + "grad_norm": 1.3105993577298032, + "learning_rate": 5.2080523979256556e-09, + "loss": 0.6397069096565247, + "step": 8413 + }, + { + "epoch": 1.9387096774193548, + "grad_norm": 1.2689574006754154, + "learning_rate": 5.169292303835116e-09, + "loss": 0.840052604675293, + "step": 8414 + }, + { + "epoch": 1.9389400921658986, + "grad_norm": 1.344062608291919, + "learning_rate": 5.130676608104845e-09, + "loss": 0.8453920483589172, + "step": 8415 + }, + { + "epoch": 1.9391705069124425, + "grad_norm": 1.3358429095342716, + "learning_rate": 5.092205316339915e-09, + "loss": 0.8301386833190918, + "step": 8416 + }, + { + "epoch": 1.939400921658986, + "grad_norm": 1.0570862677742232, + "learning_rate": 5.0538784341241924e-09, + "loss": 0.6682429313659668, + "step": 8417 + }, + { + "epoch": 1.93963133640553, + "grad_norm": 1.4370850274204425, + "learning_rate": 5.0156959670208945e-09, + "loss": 0.7881286144256592, + "step": 8418 + }, + { + "epoch": 1.9398617511520737, + "grad_norm": 1.1170749783406635, + "learning_rate": 4.9776579205721424e-09, + "loss": 0.7413277626037598, + "step": 8419 + }, + { + "epoch": 1.9400921658986174, + "grad_norm": 1.2672048797390025, + "learning_rate": 4.939764300299187e-09, + "loss": 0.6718757152557373, + "step": 8420 + }, + { + "epoch": 1.9403225806451614, + "grad_norm": 1.1707673461814823, + "learning_rate": 4.9020151117019625e-09, + "loss": 0.8595068454742432, + "step": 8421 + }, + { + "epoch": 1.9405529953917051, + "grad_norm": 1.0350774696905816, + "learning_rate": 4.864410360260085e-09, + "loss": 0.6985205411911011, + "step": 8422 + }, + { + "epoch": 1.9407834101382488, + "grad_norm": 1.222465370246094, + "learning_rate": 4.826950051431522e-09, + "loss": 0.7148889303207397, + "step": 8423 + }, + { + "epoch": 1.9410138248847926, + "grad_norm": 1.320040251210183, + "learning_rate": 4.789634190653813e-09, + "loss": 0.8109019994735718, + "step": 8424 + }, + { + "epoch": 1.9412442396313363, + "grad_norm": 1.4762486891336946, + "learning_rate": 4.752462783343292e-09, + "loss": 0.8268437385559082, + "step": 8425 + }, + { + "epoch": 1.9414746543778802, + "grad_norm": 0.9708535634361853, + "learning_rate": 4.715435834895088e-09, + "loss": 0.7300432920455933, + "step": 8426 + }, + { + "epoch": 1.941705069124424, + "grad_norm": 1.3017508085468754, + "learning_rate": 4.6785533506839005e-09, + "loss": 0.848440408706665, + "step": 8427 + }, + { + "epoch": 1.9419354838709677, + "grad_norm": 1.0873655680994063, + "learning_rate": 4.6418153360630044e-09, + "loss": 0.7526305913925171, + "step": 8428 + }, + { + "epoch": 1.9421658986175117, + "grad_norm": 1.1186105868292944, + "learning_rate": 4.605221796365022e-09, + "loss": 0.6987402439117432, + "step": 8429 + }, + { + "epoch": 1.9423963133640552, + "grad_norm": 1.5889483697201847, + "learning_rate": 4.568772736901261e-09, + "loss": 0.7944519519805908, + "step": 8430 + }, + { + "epoch": 1.942626728110599, + "grad_norm": 1.0443704220390153, + "learning_rate": 4.532468162962378e-09, + "loss": 0.7206175327301025, + "step": 8431 + }, + { + "epoch": 1.9428571428571428, + "grad_norm": 1.332362884391146, + "learning_rate": 4.4963080798179345e-09, + "loss": 0.6892992854118347, + "step": 8432 + }, + { + "epoch": 1.9430875576036866, + "grad_norm": 1.0826330060160456, + "learning_rate": 4.460292492716511e-09, + "loss": 0.696158766746521, + "step": 8433 + }, + { + "epoch": 1.9433179723502305, + "grad_norm": 0.9789941295444919, + "learning_rate": 4.424421406885704e-09, + "loss": 0.8007163405418396, + "step": 8434 + }, + { + "epoch": 1.9435483870967742, + "grad_norm": 1.1286085842961833, + "learning_rate": 4.3886948275320135e-09, + "loss": 0.7969222068786621, + "step": 8435 + }, + { + "epoch": 1.943778801843318, + "grad_norm": 1.2183409512094359, + "learning_rate": 4.353112759841404e-09, + "loss": 0.7752852439880371, + "step": 8436 + }, + { + "epoch": 1.9440092165898617, + "grad_norm": 1.1860536416754315, + "learning_rate": 4.317675208978411e-09, + "loss": 0.7788258790969849, + "step": 8437 + }, + { + "epoch": 1.9442396313364054, + "grad_norm": 1.1863849018136006, + "learning_rate": 4.2823821800866964e-09, + "loss": 0.838456392288208, + "step": 8438 + }, + { + "epoch": 1.9444700460829494, + "grad_norm": 1.0569456831140607, + "learning_rate": 4.2472336782890525e-09, + "loss": 0.7503675222396851, + "step": 8439 + }, + { + "epoch": 1.944700460829493, + "grad_norm": 0.9808278818485672, + "learning_rate": 4.212229708687287e-09, + "loss": 0.810901403427124, + "step": 8440 + }, + { + "epoch": 1.9449308755760368, + "grad_norm": 1.0050063922171069, + "learning_rate": 4.1773702763621135e-09, + "loss": 0.7551805973052979, + "step": 8441 + }, + { + "epoch": 1.9451612903225808, + "grad_norm": 1.2275039222333026, + "learning_rate": 4.142655386373373e-09, + "loss": 0.9387043714523315, + "step": 8442 + }, + { + "epoch": 1.9453917050691243, + "grad_norm": 1.034577232879954, + "learning_rate": 4.1080850437598124e-09, + "loss": 0.7508292198181152, + "step": 8443 + }, + { + "epoch": 1.9456221198156682, + "grad_norm": 0.9799945991508818, + "learning_rate": 4.073659253539308e-09, + "loss": 0.737107515335083, + "step": 8444 + }, + { + "epoch": 1.945852534562212, + "grad_norm": 1.477967097078984, + "learning_rate": 4.03937802070875e-09, + "loss": 0.86794114112854, + "step": 8445 + }, + { + "epoch": 1.9460829493087557, + "grad_norm": 0.9207750837260967, + "learning_rate": 4.005241350243937e-09, + "loss": 0.7629859447479248, + "step": 8446 + }, + { + "epoch": 1.9463133640552996, + "grad_norm": 1.4180879805115079, + "learning_rate": 3.971249247099906e-09, + "loss": 0.7455410957336426, + "step": 8447 + }, + { + "epoch": 1.9465437788018434, + "grad_norm": 1.1941620926103322, + "learning_rate": 3.937401716210376e-09, + "loss": 0.8322222828865051, + "step": 8448 + }, + { + "epoch": 1.946774193548387, + "grad_norm": 1.510433091637528, + "learning_rate": 3.903698762488528e-09, + "loss": 0.7961260676383972, + "step": 8449 + }, + { + "epoch": 1.9470046082949308, + "grad_norm": 1.2160569883363423, + "learning_rate": 3.870140390826005e-09, + "loss": 0.8144096732139587, + "step": 8450 + }, + { + "epoch": 1.9472350230414746, + "grad_norm": 1.2123613138822447, + "learning_rate": 3.8367266060939095e-09, + "loss": 0.7973348498344421, + "step": 8451 + }, + { + "epoch": 1.9474654377880185, + "grad_norm": 1.4038735969349747, + "learning_rate": 3.803457413142253e-09, + "loss": 0.8311715126037598, + "step": 8452 + }, + { + "epoch": 1.9476958525345622, + "grad_norm": 0.9815978065709688, + "learning_rate": 3.770332816799948e-09, + "loss": 0.7851812839508057, + "step": 8453 + }, + { + "epoch": 1.947926267281106, + "grad_norm": 1.3820548975058524, + "learning_rate": 3.737352821875039e-09, + "loss": 0.8721193075180054, + "step": 8454 + }, + { + "epoch": 1.94815668202765, + "grad_norm": 1.2337347998012935, + "learning_rate": 3.704517433154364e-09, + "loss": 0.8594118356704712, + "step": 8455 + }, + { + "epoch": 1.9483870967741934, + "grad_norm": 0.9620755666197012, + "learning_rate": 3.671826655404109e-09, + "loss": 0.6526527404785156, + "step": 8456 + }, + { + "epoch": 1.9486175115207374, + "grad_norm": 0.9198704876253201, + "learning_rate": 3.639280493369368e-09, + "loss": 0.7577145099639893, + "step": 8457 + }, + { + "epoch": 1.948847926267281, + "grad_norm": 1.4898349304718468, + "learning_rate": 3.6068789517739173e-09, + "loss": 0.9176833629608154, + "step": 8458 + }, + { + "epoch": 1.9490783410138248, + "grad_norm": 1.5070373914502264, + "learning_rate": 3.5746220353209956e-09, + "loss": 0.8947671055793762, + "step": 8459 + }, + { + "epoch": 1.9493087557603688, + "grad_norm": 1.2654885409411176, + "learning_rate": 3.542509748692524e-09, + "loss": 0.8791666030883789, + "step": 8460 + }, + { + "epoch": 1.9495391705069123, + "grad_norm": 0.9247331783476281, + "learning_rate": 3.5105420965496626e-09, + "loss": 0.7431247234344482, + "step": 8461 + }, + { + "epoch": 1.9497695852534562, + "grad_norm": 1.3437504272827105, + "learning_rate": 3.4787190835324775e-09, + "loss": 0.7998695373535156, + "step": 8462 + }, + { + "epoch": 1.95, + "grad_norm": 1.359553043789141, + "learning_rate": 3.447040714259941e-09, + "loss": 0.8120161294937134, + "step": 8463 + }, + { + "epoch": 1.9502304147465437, + "grad_norm": 1.063781533705899, + "learning_rate": 3.415506993330153e-09, + "loss": 0.8062546849250793, + "step": 8464 + }, + { + "epoch": 1.9504608294930876, + "grad_norm": 1.3290963135655427, + "learning_rate": 3.384117925320229e-09, + "loss": 0.8100919723510742, + "step": 8465 + }, + { + "epoch": 1.9506912442396314, + "grad_norm": 1.410960677080016, + "learning_rate": 3.352873514786303e-09, + "loss": 0.7376535534858704, + "step": 8466 + }, + { + "epoch": 1.950921658986175, + "grad_norm": 1.1333962819853984, + "learning_rate": 3.321773766263303e-09, + "loss": 0.7534361481666565, + "step": 8467 + }, + { + "epoch": 1.951152073732719, + "grad_norm": 0.956942860373484, + "learning_rate": 3.290818684265506e-09, + "loss": 0.6914925575256348, + "step": 8468 + }, + { + "epoch": 1.9513824884792625, + "grad_norm": 1.40322423242457, + "learning_rate": 3.2600082732858746e-09, + "loss": 0.837024450302124, + "step": 8469 + }, + { + "epoch": 1.9516129032258065, + "grad_norm": 1.3077639635125993, + "learning_rate": 3.229342537796609e-09, + "loss": 0.7960337400436401, + "step": 8470 + }, + { + "epoch": 1.9518433179723502, + "grad_norm": 1.1044299774108808, + "learning_rate": 3.1988214822485928e-09, + "loss": 0.6611788868904114, + "step": 8471 + }, + { + "epoch": 1.952073732718894, + "grad_norm": 1.2652589643459276, + "learning_rate": 3.16844511107206e-09, + "loss": 0.8798158168792725, + "step": 8472 + }, + { + "epoch": 1.952304147465438, + "grad_norm": 1.3477135835069336, + "learning_rate": 3.1382134286761506e-09, + "loss": 0.790015459060669, + "step": 8473 + }, + { + "epoch": 1.9525345622119814, + "grad_norm": 1.062422263250462, + "learning_rate": 3.1081264394489103e-09, + "loss": 0.7676407098770142, + "step": 8474 + }, + { + "epoch": 1.9527649769585254, + "grad_norm": 1.1707572290080033, + "learning_rate": 3.07818414775729e-09, + "loss": 0.8213051557540894, + "step": 8475 + }, + { + "epoch": 1.952995391705069, + "grad_norm": 1.328203051872804, + "learning_rate": 3.048386557947591e-09, + "loss": 0.8909401893615723, + "step": 8476 + }, + { + "epoch": 1.9532258064516128, + "grad_norm": 1.2206551189591073, + "learning_rate": 3.0187336743446867e-09, + "loss": 0.838227391242981, + "step": 8477 + }, + { + "epoch": 1.9534562211981568, + "grad_norm": 1.1958685930192579, + "learning_rate": 2.9892255012528013e-09, + "loss": 0.7297696471214294, + "step": 8478 + }, + { + "epoch": 1.9536866359447005, + "grad_norm": 1.508389266534061, + "learning_rate": 2.9598620429550636e-09, + "loss": 1.0060585737228394, + "step": 8479 + }, + { + "epoch": 1.9539170506912442, + "grad_norm": 1.1858328009290373, + "learning_rate": 2.9306433037132873e-09, + "loss": 0.7812967300415039, + "step": 8480 + }, + { + "epoch": 1.9541474654377882, + "grad_norm": 1.196629989025656, + "learning_rate": 2.901569287768746e-09, + "loss": 0.7349315881729126, + "step": 8481 + }, + { + "epoch": 1.9543778801843317, + "grad_norm": 1.1580071941270487, + "learning_rate": 2.8726399993415085e-09, + "loss": 0.7083498239517212, + "step": 8482 + }, + { + "epoch": 1.9546082949308756, + "grad_norm": 1.3308451395414542, + "learning_rate": 2.8438554426304386e-09, + "loss": 0.7969732880592346, + "step": 8483 + }, + { + "epoch": 1.9548387096774194, + "grad_norm": 1.405840014033905, + "learning_rate": 2.815215621813749e-09, + "loss": 0.7701122164726257, + "step": 8484 + }, + { + "epoch": 1.955069124423963, + "grad_norm": 1.0487330945577633, + "learning_rate": 2.7867205410484485e-09, + "loss": 0.7323017120361328, + "step": 8485 + }, + { + "epoch": 1.955299539170507, + "grad_norm": 0.9842598310766136, + "learning_rate": 2.7583702044704504e-09, + "loss": 0.8357248306274414, + "step": 8486 + }, + { + "epoch": 1.9555299539170505, + "grad_norm": 1.4806137218761686, + "learning_rate": 2.7301646161947966e-09, + "loss": 0.8164674043655396, + "step": 8487 + }, + { + "epoch": 1.9557603686635945, + "grad_norm": 1.2641967325925645, + "learning_rate": 2.7021037803156566e-09, + "loss": 0.7972782850265503, + "step": 8488 + }, + { + "epoch": 1.9559907834101382, + "grad_norm": 1.2417679147004388, + "learning_rate": 2.6741877009058835e-09, + "loss": 0.864342987537384, + "step": 8489 + }, + { + "epoch": 1.956221198156682, + "grad_norm": 1.1067561191492752, + "learning_rate": 2.646416382017458e-09, + "loss": 0.7428402900695801, + "step": 8490 + }, + { + "epoch": 1.956451612903226, + "grad_norm": 1.3211414352422526, + "learning_rate": 2.618789827681378e-09, + "loss": 0.7164437770843506, + "step": 8491 + }, + { + "epoch": 1.9566820276497696, + "grad_norm": 1.153189225005644, + "learning_rate": 2.5913080419075473e-09, + "loss": 0.6997767686843872, + "step": 8492 + }, + { + "epoch": 1.9569124423963133, + "grad_norm": 1.2481992001614755, + "learning_rate": 2.563971028684886e-09, + "loss": 0.6399234533309937, + "step": 8493 + }, + { + "epoch": 1.9571428571428573, + "grad_norm": 1.1639751659112805, + "learning_rate": 2.536778791981553e-09, + "loss": 0.7642914056777954, + "step": 8494 + }, + { + "epoch": 1.9573732718894008, + "grad_norm": 1.218382512158835, + "learning_rate": 2.5097313357442806e-09, + "loss": 0.8284746408462524, + "step": 8495 + }, + { + "epoch": 1.9576036866359448, + "grad_norm": 1.2221524988832009, + "learning_rate": 2.4828286638989282e-09, + "loss": 0.6680238246917725, + "step": 8496 + }, + { + "epoch": 1.9578341013824885, + "grad_norm": 1.2965002342798193, + "learning_rate": 2.4560707803504834e-09, + "loss": 0.7621040344238281, + "step": 8497 + }, + { + "epoch": 1.9580645161290322, + "grad_norm": 1.2947556724815892, + "learning_rate": 2.4294576889827278e-09, + "loss": 0.7326159477233887, + "step": 8498 + }, + { + "epoch": 1.9582949308755762, + "grad_norm": 1.0656455780738308, + "learning_rate": 2.4029893936586833e-09, + "loss": 0.6496877670288086, + "step": 8499 + }, + { + "epoch": 1.9585253456221197, + "grad_norm": 1.241192579535759, + "learning_rate": 2.376665898220054e-09, + "loss": 0.665170431137085, + "step": 8500 + }, + { + "epoch": 1.9587557603686636, + "grad_norm": 1.2593646350179877, + "learning_rate": 2.3504872064876724e-09, + "loss": 0.7238261699676514, + "step": 8501 + }, + { + "epoch": 1.9589861751152073, + "grad_norm": 1.5709730629781664, + "learning_rate": 2.3244533222613882e-09, + "loss": 0.6696983575820923, + "step": 8502 + }, + { + "epoch": 1.959216589861751, + "grad_norm": 1.6011689537620306, + "learning_rate": 2.2985642493199563e-09, + "loss": 0.8414099216461182, + "step": 8503 + }, + { + "epoch": 1.959447004608295, + "grad_norm": 1.1680069988943498, + "learning_rate": 2.2728199914210377e-09, + "loss": 0.7390140295028687, + "step": 8504 + }, + { + "epoch": 1.9596774193548387, + "grad_norm": 1.1922421298842674, + "learning_rate": 2.247220552301532e-09, + "loss": 0.7910370826721191, + "step": 8505 + }, + { + "epoch": 1.9599078341013825, + "grad_norm": 1.2059164746419144, + "learning_rate": 2.2217659356771334e-09, + "loss": 0.8111266493797302, + "step": 8506 + }, + { + "epoch": 1.9601382488479264, + "grad_norm": 1.382769681983927, + "learning_rate": 2.1964561452425535e-09, + "loss": 0.8748809099197388, + "step": 8507 + }, + { + "epoch": 1.96036866359447, + "grad_norm": 1.4348415171969837, + "learning_rate": 2.1712911846714088e-09, + "loss": 0.726898193359375, + "step": 8508 + }, + { + "epoch": 1.9605990783410139, + "grad_norm": 0.903624770648156, + "learning_rate": 2.1462710576163335e-09, + "loss": 0.5221005082130432, + "step": 8509 + }, + { + "epoch": 1.9608294930875576, + "grad_norm": 0.9979524654583228, + "learning_rate": 2.1213957677090887e-09, + "loss": 0.7336875200271606, + "step": 8510 + }, + { + "epoch": 1.9610599078341013, + "grad_norm": 1.4570574984679434, + "learning_rate": 2.096665318560231e-09, + "loss": 0.9653327465057373, + "step": 8511 + }, + { + "epoch": 1.9612903225806453, + "grad_norm": 1.3910033326033395, + "learning_rate": 2.0720797137594448e-09, + "loss": 0.8309473991394043, + "step": 8512 + }, + { + "epoch": 1.9615207373271888, + "grad_norm": 1.250491052702372, + "learning_rate": 2.047638956874986e-09, + "loss": 0.7829124331474304, + "step": 8513 + }, + { + "epoch": 1.9617511520737327, + "grad_norm": 1.6063542888921636, + "learning_rate": 2.0233430514547955e-09, + "loss": 0.8399544358253479, + "step": 8514 + }, + { + "epoch": 1.9619815668202765, + "grad_norm": 1.2304488854915971, + "learning_rate": 1.999192001025163e-09, + "loss": 0.7827579975128174, + "step": 8515 + }, + { + "epoch": 1.9622119815668202, + "grad_norm": 1.7023781342726942, + "learning_rate": 1.9751858090916174e-09, + "loss": 0.8617441654205322, + "step": 8516 + }, + { + "epoch": 1.9624423963133641, + "grad_norm": 1.124873706648068, + "learning_rate": 1.951324479138594e-09, + "loss": 0.758098840713501, + "step": 8517 + }, + { + "epoch": 1.9626728110599079, + "grad_norm": 1.407820551284048, + "learning_rate": 1.927608014629656e-09, + "loss": 0.738059937953949, + "step": 8518 + }, + { + "epoch": 1.9629032258064516, + "grad_norm": 1.2924313700222672, + "learning_rate": 1.9040364190070492e-09, + "loss": 0.6286636590957642, + "step": 8519 + }, + { + "epoch": 1.9631336405529956, + "grad_norm": 1.4040969276884698, + "learning_rate": 1.88060969569237e-09, + "loss": 0.764518141746521, + "step": 8520 + }, + { + "epoch": 1.963364055299539, + "grad_norm": 0.9848782890607348, + "learning_rate": 1.8573278480857878e-09, + "loss": 0.775516152381897, + "step": 8521 + }, + { + "epoch": 1.963594470046083, + "grad_norm": 1.2592904992793421, + "learning_rate": 1.8341908795665994e-09, + "loss": 0.8513185977935791, + "step": 8522 + }, + { + "epoch": 1.9638248847926267, + "grad_norm": 1.4423039825526616, + "learning_rate": 1.8111987934933404e-09, + "loss": 0.7300710082054138, + "step": 8523 + }, + { + "epoch": 1.9640552995391705, + "grad_norm": 1.1896167974085796, + "learning_rate": 1.788351593203119e-09, + "loss": 0.7346746921539307, + "step": 8524 + }, + { + "epoch": 1.9642857142857144, + "grad_norm": 1.3610028359172472, + "learning_rate": 1.7656492820121715e-09, + "loss": 0.8231781721115112, + "step": 8525 + }, + { + "epoch": 1.964516129032258, + "grad_norm": 1.2672154264769777, + "learning_rate": 1.743091863215751e-09, + "loss": 0.6972112655639648, + "step": 8526 + }, + { + "epoch": 1.9647465437788019, + "grad_norm": 1.013160541626117, + "learning_rate": 1.720679340088016e-09, + "loss": 0.6512203812599182, + "step": 8527 + }, + { + "epoch": 1.9649769585253456, + "grad_norm": 1.220658103943082, + "learning_rate": 1.698411715882253e-09, + "loss": 0.6755591630935669, + "step": 8528 + }, + { + "epoch": 1.9652073732718893, + "grad_norm": 1.115552383506669, + "learning_rate": 1.6762889938303215e-09, + "loss": 0.6858727335929871, + "step": 8529 + }, + { + "epoch": 1.9654377880184333, + "grad_norm": 1.1810577023934496, + "learning_rate": 1.6543111771434303e-09, + "loss": 0.7820768356323242, + "step": 8530 + }, + { + "epoch": 1.965668202764977, + "grad_norm": 1.512690235242737, + "learning_rate": 1.6324782690116944e-09, + "loss": 0.7841604948043823, + "step": 8531 + }, + { + "epoch": 1.9658986175115207, + "grad_norm": 1.4015300039500524, + "learning_rate": 1.6107902726040234e-09, + "loss": 0.8665674328804016, + "step": 8532 + }, + { + "epoch": 1.9661290322580647, + "grad_norm": 1.1307460450405855, + "learning_rate": 1.5892471910684547e-09, + "loss": 0.6764376163482666, + "step": 8533 + }, + { + "epoch": 1.9663594470046082, + "grad_norm": 1.4229790787582275, + "learning_rate": 1.5678490275319312e-09, + "loss": 0.8453094959259033, + "step": 8534 + }, + { + "epoch": 1.9665898617511521, + "grad_norm": 1.0573142140796512, + "learning_rate": 1.546595785100413e-09, + "loss": 0.7798272371292114, + "step": 8535 + }, + { + "epoch": 1.9668202764976959, + "grad_norm": 1.1791892730982974, + "learning_rate": 1.5254874668586548e-09, + "loss": 0.7426424026489258, + "step": 8536 + }, + { + "epoch": 1.9670506912442396, + "grad_norm": 1.1309739514060748, + "learning_rate": 1.5045240758706501e-09, + "loss": 0.8443984985351562, + "step": 8537 + }, + { + "epoch": 1.9672811059907835, + "grad_norm": 1.1053257066980806, + "learning_rate": 1.4837056151790762e-09, + "loss": 0.8439072370529175, + "step": 8538 + }, + { + "epoch": 1.967511520737327, + "grad_norm": 1.4135182916864908, + "learning_rate": 1.463032087805849e-09, + "loss": 0.8307704925537109, + "step": 8539 + }, + { + "epoch": 1.967741935483871, + "grad_norm": 1.1593054366438007, + "learning_rate": 1.442503496751568e-09, + "loss": 0.678236722946167, + "step": 8540 + }, + { + "epoch": 1.9679723502304147, + "grad_norm": 1.3372006359269073, + "learning_rate": 1.4221198449960724e-09, + "loss": 0.7072663307189941, + "step": 8541 + }, + { + "epoch": 1.9682027649769585, + "grad_norm": 1.194618240695654, + "learning_rate": 1.4018811354977732e-09, + "loss": 0.7825980186462402, + "step": 8542 + }, + { + "epoch": 1.9684331797235024, + "grad_norm": 1.8366711172437336, + "learning_rate": 1.3817873711945426e-09, + "loss": 0.786361813545227, + "step": 8543 + }, + { + "epoch": 1.9686635944700461, + "grad_norm": 1.6047169504491765, + "learning_rate": 1.3618385550029365e-09, + "loss": 1.00287926197052, + "step": 8544 + }, + { + "epoch": 1.9688940092165899, + "grad_norm": 1.336810745652672, + "learning_rate": 1.3420346898183054e-09, + "loss": 0.7320775389671326, + "step": 8545 + }, + { + "epoch": 1.9691244239631336, + "grad_norm": 1.0018804515064612, + "learning_rate": 1.322375778515461e-09, + "loss": 0.7127507925033569, + "step": 8546 + }, + { + "epoch": 1.9693548387096773, + "grad_norm": 1.4124185296399752, + "learning_rate": 1.3028618239475652e-09, + "loss": 0.818395733833313, + "step": 8547 + }, + { + "epoch": 1.9695852534562213, + "grad_norm": 1.2063998497880193, + "learning_rate": 1.2834928289472413e-09, + "loss": 0.6384972929954529, + "step": 8548 + }, + { + "epoch": 1.969815668202765, + "grad_norm": 1.240783999344712, + "learning_rate": 1.2642687963256849e-09, + "loss": 0.7358517646789551, + "step": 8549 + }, + { + "epoch": 1.9700460829493087, + "grad_norm": 1.1083546443376424, + "learning_rate": 1.2451897288734414e-09, + "loss": 0.7311068773269653, + "step": 8550 + }, + { + "epoch": 1.9702764976958527, + "grad_norm": 1.5415338816809878, + "learning_rate": 1.2262556293597403e-09, + "loss": 0.8390932083129883, + "step": 8551 + }, + { + "epoch": 1.9705069124423962, + "grad_norm": 1.2045586519715463, + "learning_rate": 1.2074665005328277e-09, + "loss": 0.8114689588546753, + "step": 8552 + }, + { + "epoch": 1.9707373271889401, + "grad_norm": 1.4445688810441233, + "learning_rate": 1.1888223451199665e-09, + "loss": 1.0044716596603394, + "step": 8553 + }, + { + "epoch": 1.9709677419354839, + "grad_norm": 1.2243432992298795, + "learning_rate": 1.170323165827214e-09, + "loss": 0.7566370368003845, + "step": 8554 + }, + { + "epoch": 1.9711981566820276, + "grad_norm": 1.2230365473762954, + "learning_rate": 1.1519689653397557e-09, + "loss": 0.7543225288391113, + "step": 8555 + }, + { + "epoch": 1.9714285714285715, + "grad_norm": 1.306226883529119, + "learning_rate": 1.1337597463217941e-09, + "loss": 0.8291902542114258, + "step": 8556 + }, + { + "epoch": 1.9716589861751153, + "grad_norm": 1.1360827313333892, + "learning_rate": 1.1156955114162147e-09, + "loss": 0.7363135814666748, + "step": 8557 + }, + { + "epoch": 1.971889400921659, + "grad_norm": 1.102255040931488, + "learning_rate": 1.0977762632451427e-09, + "loss": 0.7180813550949097, + "step": 8558 + }, + { + "epoch": 1.9721198156682027, + "grad_norm": 1.1849465839861355, + "learning_rate": 1.0800020044093861e-09, + "loss": 0.7220569849014282, + "step": 8559 + }, + { + "epoch": 1.9723502304147464, + "grad_norm": 1.2915012101962247, + "learning_rate": 1.0623727374889925e-09, + "loss": 0.8839110136032104, + "step": 8560 + }, + { + "epoch": 1.9725806451612904, + "grad_norm": 1.2553727673767463, + "learning_rate": 1.0448884650426926e-09, + "loss": 0.7210807800292969, + "step": 8561 + }, + { + "epoch": 1.9728110599078341, + "grad_norm": 1.3474393893445982, + "learning_rate": 1.0275491896084565e-09, + "loss": 0.6993537545204163, + "step": 8562 + }, + { + "epoch": 1.9730414746543778, + "grad_norm": 1.0591927963671788, + "learning_rate": 1.0103549137030486e-09, + "loss": 0.6951562166213989, + "step": 8563 + }, + { + "epoch": 1.9732718894009218, + "grad_norm": 1.0760064093903359, + "learning_rate": 9.933056398220285e-10, + "loss": 0.855778694152832, + "step": 8564 + }, + { + "epoch": 1.9735023041474653, + "grad_norm": 1.3238204379730676, + "learning_rate": 9.76401370440194e-10, + "loss": 0.8461301326751709, + "step": 8565 + }, + { + "epoch": 1.9737327188940093, + "grad_norm": 1.0765880280550415, + "learning_rate": 9.596421080112493e-10, + "loss": 0.6144053936004639, + "step": 8566 + }, + { + "epoch": 1.973963133640553, + "grad_norm": 1.226899728476588, + "learning_rate": 9.430278549675818e-10, + "loss": 0.6623581647872925, + "step": 8567 + }, + { + "epoch": 1.9741935483870967, + "grad_norm": 1.6396403159587711, + "learning_rate": 9.265586137209292e-10, + "loss": 0.9540686011314392, + "step": 8568 + }, + { + "epoch": 1.9744239631336407, + "grad_norm": 1.1121119945854705, + "learning_rate": 9.102343866616014e-10, + "loss": 0.7231987714767456, + "step": 8569 + }, + { + "epoch": 1.9746543778801844, + "grad_norm": 1.3771440446346792, + "learning_rate": 8.940551761592585e-10, + "loss": 0.7759320735931396, + "step": 8570 + }, + { + "epoch": 1.9748847926267281, + "grad_norm": 1.3995632478363096, + "learning_rate": 8.780209845621334e-10, + "loss": 0.8277846574783325, + "step": 8571 + }, + { + "epoch": 1.9751152073732718, + "grad_norm": 1.1615989785178322, + "learning_rate": 8.621318141974754e-10, + "loss": 0.7913431525230408, + "step": 8572 + }, + { + "epoch": 1.9753456221198156, + "grad_norm": 1.1262387789302248, + "learning_rate": 8.46387667371773e-10, + "loss": 0.7011829614639282, + "step": 8573 + }, + { + "epoch": 1.9755760368663595, + "grad_norm": 1.358126505769676, + "learning_rate": 8.30788546370198e-10, + "loss": 0.8762087821960449, + "step": 8574 + }, + { + "epoch": 1.9758064516129032, + "grad_norm": 1.5337262034773564, + "learning_rate": 8.153344534569396e-10, + "loss": 0.7944581508636475, + "step": 8575 + }, + { + "epoch": 1.976036866359447, + "grad_norm": 0.9763562202292912, + "learning_rate": 8.00025390875203e-10, + "loss": 0.7086907625198364, + "step": 8576 + }, + { + "epoch": 1.976267281105991, + "grad_norm": 1.3716397771498143, + "learning_rate": 7.848613608468779e-10, + "loss": 0.7263821959495544, + "step": 8577 + }, + { + "epoch": 1.9764976958525344, + "grad_norm": 1.0912146553836337, + "learning_rate": 7.698423655732034e-10, + "loss": 0.714054524898529, + "step": 8578 + }, + { + "epoch": 1.9767281105990784, + "grad_norm": 1.0671768990247028, + "learning_rate": 7.549684072341023e-10, + "loss": 0.817487359046936, + "step": 8579 + }, + { + "epoch": 1.976958525345622, + "grad_norm": 1.3039849886057633, + "learning_rate": 7.402394879885143e-10, + "loss": 0.7933021783828735, + "step": 8580 + }, + { + "epoch": 1.9771889400921658, + "grad_norm": 1.1473238275849764, + "learning_rate": 7.25655609974396e-10, + "loss": 0.8699008822441101, + "step": 8581 + }, + { + "epoch": 1.9774193548387098, + "grad_norm": 1.2250569758639698, + "learning_rate": 7.112167753083876e-10, + "loss": 0.804245114326477, + "step": 8582 + }, + { + "epoch": 1.9776497695852533, + "grad_norm": 1.322132271674899, + "learning_rate": 6.969229860863679e-10, + "loss": 0.8334434628486633, + "step": 8583 + }, + { + "epoch": 1.9778801843317972, + "grad_norm": 1.1368298808414594, + "learning_rate": 6.827742443831219e-10, + "loss": 0.7549147605895996, + "step": 8584 + }, + { + "epoch": 1.978110599078341, + "grad_norm": 0.9036184179111577, + "learning_rate": 6.687705522522291e-10, + "loss": 0.69701087474823, + "step": 8585 + }, + { + "epoch": 1.9783410138248847, + "grad_norm": 1.259028975685209, + "learning_rate": 6.549119117263969e-10, + "loss": 0.727588415145874, + "step": 8586 + }, + { + "epoch": 1.9785714285714286, + "grad_norm": 1.0967653076646233, + "learning_rate": 6.411983248171271e-10, + "loss": 0.7309392094612122, + "step": 8587 + }, + { + "epoch": 1.9788018433179724, + "grad_norm": 1.6515699626026994, + "learning_rate": 6.276297935149388e-10, + "loss": 0.8299658298492432, + "step": 8588 + }, + { + "epoch": 1.979032258064516, + "grad_norm": 1.362481943616663, + "learning_rate": 6.142063197892566e-10, + "loss": 0.9731055498123169, + "step": 8589 + }, + { + "epoch": 1.97926267281106, + "grad_norm": 1.1190783849934713, + "learning_rate": 6.009279055885219e-10, + "loss": 0.6292351484298706, + "step": 8590 + }, + { + "epoch": 1.9794930875576036, + "grad_norm": 1.2981523800262795, + "learning_rate": 5.877945528400818e-10, + "loss": 0.7881810665130615, + "step": 8591 + }, + { + "epoch": 1.9797235023041475, + "grad_norm": 1.2430793849512602, + "learning_rate": 5.748062634501894e-10, + "loss": 0.7910494804382324, + "step": 8592 + }, + { + "epoch": 1.9799539170506912, + "grad_norm": 1.3789958651744842, + "learning_rate": 5.619630393042252e-10, + "loss": 0.8255902528762817, + "step": 8593 + }, + { + "epoch": 1.980184331797235, + "grad_norm": 1.0908110861505123, + "learning_rate": 5.492648822660318e-10, + "loss": 0.788017749786377, + "step": 8594 + }, + { + "epoch": 1.980414746543779, + "grad_norm": 1.2052887418241187, + "learning_rate": 5.367117941791343e-10, + "loss": 0.8717716932296753, + "step": 8595 + }, + { + "epoch": 1.9806451612903224, + "grad_norm": 1.3810911920135494, + "learning_rate": 5.243037768652981e-10, + "loss": 0.7220178246498108, + "step": 8596 + }, + { + "epoch": 1.9808755760368664, + "grad_norm": 1.2221356933031184, + "learning_rate": 5.120408321256376e-10, + "loss": 0.7536830902099609, + "step": 8597 + }, + { + "epoch": 1.98110599078341, + "grad_norm": 1.13011497917934, + "learning_rate": 4.999229617401735e-10, + "loss": 0.7480939626693726, + "step": 8598 + }, + { + "epoch": 1.9813364055299538, + "grad_norm": 1.1029404069670388, + "learning_rate": 4.879501674676101e-10, + "loss": 0.7168867588043213, + "step": 8599 + }, + { + "epoch": 1.9815668202764978, + "grad_norm": 1.1019009005346911, + "learning_rate": 4.761224510460016e-10, + "loss": 0.8352792263031006, + "step": 8600 + }, + { + "epoch": 1.9817972350230415, + "grad_norm": 1.2827894099174693, + "learning_rate": 4.644398141919748e-10, + "loss": 0.6987372636795044, + "step": 8601 + }, + { + "epoch": 1.9820276497695852, + "grad_norm": 1.2120343684069002, + "learning_rate": 4.5290225860128426e-10, + "loss": 0.6844612956047058, + "step": 8602 + }, + { + "epoch": 1.9822580645161292, + "grad_norm": 1.4290404101727392, + "learning_rate": 4.4150978594859055e-10, + "loss": 0.7659348249435425, + "step": 8603 + }, + { + "epoch": 1.9824884792626727, + "grad_norm": 1.1012416889537506, + "learning_rate": 4.3026239788757077e-10, + "loss": 0.8163154125213623, + "step": 8604 + }, + { + "epoch": 1.9827188940092166, + "grad_norm": 1.3238497684740367, + "learning_rate": 4.191600960505859e-10, + "loss": 0.8688125610351562, + "step": 8605 + }, + { + "epoch": 1.9829493087557604, + "grad_norm": 1.13771312339099, + "learning_rate": 4.082028820493466e-10, + "loss": 0.8250670433044434, + "step": 8606 + }, + { + "epoch": 1.983179723502304, + "grad_norm": 1.1783821953258633, + "learning_rate": 3.973907574741364e-10, + "loss": 0.9378982782363892, + "step": 8607 + }, + { + "epoch": 1.983410138248848, + "grad_norm": 1.1593506126073094, + "learning_rate": 3.867237238943666e-10, + "loss": 0.8764913082122803, + "step": 8608 + }, + { + "epoch": 1.9836405529953915, + "grad_norm": 1.271012232850208, + "learning_rate": 3.762017828583541e-10, + "loss": 0.7690116763114929, + "step": 8609 + }, + { + "epoch": 1.9838709677419355, + "grad_norm": 1.2648955747200947, + "learning_rate": 3.6582493589332187e-10, + "loss": 0.6977133750915527, + "step": 8610 + }, + { + "epoch": 1.9841013824884792, + "grad_norm": 1.0674977135329127, + "learning_rate": 3.5559318450539835e-10, + "loss": 0.7362618446350098, + "step": 8611 + }, + { + "epoch": 1.984331797235023, + "grad_norm": 1.4639153789709758, + "learning_rate": 3.455065301798399e-10, + "loss": 0.7065306305885315, + "step": 8612 + }, + { + "epoch": 1.984562211981567, + "grad_norm": 1.2869199371326872, + "learning_rate": 3.355649743805866e-10, + "loss": 0.812393307685852, + "step": 8613 + }, + { + "epoch": 1.9847926267281106, + "grad_norm": 1.1854338312494677, + "learning_rate": 3.2576851855070644e-10, + "loss": 0.6947695016860962, + "step": 8614 + }, + { + "epoch": 1.9850230414746544, + "grad_norm": 1.2703269278379015, + "learning_rate": 3.161171641121729e-10, + "loss": 0.6745340824127197, + "step": 8615 + }, + { + "epoch": 1.9852534562211983, + "grad_norm": 1.2827625316731694, + "learning_rate": 3.0661091246575454e-10, + "loss": 0.7426450848579407, + "step": 8616 + }, + { + "epoch": 1.9854838709677418, + "grad_norm": 1.1091238879569632, + "learning_rate": 2.9724976499134745e-10, + "loss": 0.7769409418106079, + "step": 8617 + }, + { + "epoch": 1.9857142857142858, + "grad_norm": 1.3932431178326243, + "learning_rate": 2.8803372304775365e-10, + "loss": 0.9591978192329407, + "step": 8618 + }, + { + "epoch": 1.9859447004608295, + "grad_norm": 1.0484296609281079, + "learning_rate": 2.789627879725698e-10, + "loss": 0.7504953742027283, + "step": 8619 + }, + { + "epoch": 1.9861751152073732, + "grad_norm": 1.4071650291408113, + "learning_rate": 2.700369610825204e-10, + "loss": 0.8990021347999573, + "step": 8620 + }, + { + "epoch": 1.9864055299539172, + "grad_norm": 1.3278656398693938, + "learning_rate": 2.612562436731247e-10, + "loss": 0.786778450012207, + "step": 8621 + }, + { + "epoch": 1.9866359447004607, + "grad_norm": 1.3996268794778322, + "learning_rate": 2.526206370189188e-10, + "loss": 0.7387717366218567, + "step": 8622 + }, + { + "epoch": 1.9868663594470046, + "grad_norm": 1.1375614144189101, + "learning_rate": 2.4413014237323336e-10, + "loss": 0.7672144174575806, + "step": 8623 + }, + { + "epoch": 1.9870967741935484, + "grad_norm": 1.0342597373081839, + "learning_rate": 2.357847609686381e-10, + "loss": 0.6191907525062561, + "step": 8624 + }, + { + "epoch": 1.987327188940092, + "grad_norm": 1.1782057870810292, + "learning_rate": 2.2758449401638624e-10, + "loss": 0.7257785201072693, + "step": 8625 + }, + { + "epoch": 1.987557603686636, + "grad_norm": 1.0162904765762713, + "learning_rate": 2.195293427066369e-10, + "loss": 0.6997271776199341, + "step": 8626 + }, + { + "epoch": 1.9877880184331798, + "grad_norm": 1.0539587989000714, + "learning_rate": 2.1161930820878804e-10, + "loss": 0.7813891768455505, + "step": 8627 + }, + { + "epoch": 1.9880184331797235, + "grad_norm": 1.4503426709948117, + "learning_rate": 2.0385439167069917e-10, + "loss": 0.8003429174423218, + "step": 8628 + }, + { + "epoch": 1.9882488479262674, + "grad_norm": 1.0912355930233222, + "learning_rate": 1.962345942196908e-10, + "loss": 0.8020645380020142, + "step": 8629 + }, + { + "epoch": 1.988479262672811, + "grad_norm": 1.6862187526303312, + "learning_rate": 1.8875991696165604e-10, + "loss": 0.9189429879188538, + "step": 8630 + }, + { + "epoch": 1.988709677419355, + "grad_norm": 1.2154025001234743, + "learning_rate": 1.8143036098150487e-10, + "loss": 0.7399884462356567, + "step": 8631 + }, + { + "epoch": 1.9889400921658986, + "grad_norm": 1.346746600672021, + "learning_rate": 1.7424592734316402e-10, + "loss": 0.7725361585617065, + "step": 8632 + }, + { + "epoch": 1.9891705069124423, + "grad_norm": 1.2077720906172131, + "learning_rate": 1.6720661708946593e-10, + "loss": 0.7887094020843506, + "step": 8633 + }, + { + "epoch": 1.9894009216589863, + "grad_norm": 1.1656177751476533, + "learning_rate": 1.6031243124203786e-10, + "loss": 0.8007388114929199, + "step": 8634 + }, + { + "epoch": 1.9896313364055298, + "grad_norm": 1.1324445653667632, + "learning_rate": 1.5356337080174587e-10, + "loss": 0.6478462219238281, + "step": 8635 + }, + { + "epoch": 1.9898617511520738, + "grad_norm": 1.0566661119746916, + "learning_rate": 1.469594367480287e-10, + "loss": 0.8274422287940979, + "step": 8636 + }, + { + "epoch": 1.9900921658986175, + "grad_norm": 1.2865497814691733, + "learning_rate": 1.4050063003956391e-10, + "loss": 0.7919641733169556, + "step": 8637 + }, + { + "epoch": 1.9903225806451612, + "grad_norm": 1.1652783170900007, + "learning_rate": 1.3418695161382388e-10, + "loss": 0.7973719239234924, + "step": 8638 + }, + { + "epoch": 1.9905529953917052, + "grad_norm": 1.2951125509591672, + "learning_rate": 1.280184023870756e-10, + "loss": 0.8002075552940369, + "step": 8639 + }, + { + "epoch": 1.9907834101382489, + "grad_norm": 1.2388157581694845, + "learning_rate": 1.2199498325482506e-10, + "loss": 0.748448371887207, + "step": 8640 + }, + { + "epoch": 1.9910138248847926, + "grad_norm": 1.1203681158314, + "learning_rate": 1.1611669509137278e-10, + "loss": 0.7333977222442627, + "step": 8641 + }, + { + "epoch": 1.9912442396313366, + "grad_norm": 1.2174560450430658, + "learning_rate": 1.1038353874992524e-10, + "loss": 0.7760608196258545, + "step": 8642 + }, + { + "epoch": 1.99147465437788, + "grad_norm": 1.0029836343944154, + "learning_rate": 1.0479551506259455e-10, + "loss": 0.6129526495933533, + "step": 8643 + }, + { + "epoch": 1.991705069124424, + "grad_norm": 1.2201657055802861, + "learning_rate": 9.935262484062068e-11, + "loss": 0.68567955493927, + "step": 8644 + }, + { + "epoch": 1.9919354838709677, + "grad_norm": 1.1159731481420905, + "learning_rate": 9.405486887381631e-11, + "loss": 0.9042092561721802, + "step": 8645 + }, + { + "epoch": 1.9921658986175115, + "grad_norm": 1.1343470197220147, + "learning_rate": 8.890224793123291e-11, + "loss": 0.7143117189407349, + "step": 8646 + }, + { + "epoch": 1.9923963133640554, + "grad_norm": 1.1380121853465122, + "learning_rate": 8.389476276071672e-11, + "loss": 0.7486213445663452, + "step": 8647 + }, + { + "epoch": 1.992626728110599, + "grad_norm": 1.0074777840055806, + "learning_rate": 7.903241408924177e-11, + "loss": 0.8554232716560364, + "step": 8648 + }, + { + "epoch": 1.9928571428571429, + "grad_norm": 1.2464259863760472, + "learning_rate": 7.431520262246582e-11, + "loss": 0.6604819297790527, + "step": 8649 + }, + { + "epoch": 1.9930875576036866, + "grad_norm": 1.450236790683031, + "learning_rate": 6.974312904517443e-11, + "loss": 0.8032737970352173, + "step": 8650 + }, + { + "epoch": 1.9933179723502303, + "grad_norm": 1.417412341607897, + "learning_rate": 6.531619402083687e-11, + "loss": 0.7712494730949402, + "step": 8651 + }, + { + "epoch": 1.9935483870967743, + "grad_norm": 1.1501531132473004, + "learning_rate": 6.103439819216127e-11, + "loss": 0.7894617915153503, + "step": 8652 + }, + { + "epoch": 1.993778801843318, + "grad_norm": 1.585587469758744, + "learning_rate": 5.689774218065046e-11, + "loss": 0.8386135697364807, + "step": 8653 + }, + { + "epoch": 1.9940092165898617, + "grad_norm": 1.151150154599754, + "learning_rate": 5.290622658660204e-11, + "loss": 0.744853138923645, + "step": 8654 + }, + { + "epoch": 1.9942396313364057, + "grad_norm": 1.2019290228056547, + "learning_rate": 4.90598519894414e-11, + "loss": 0.7604823112487793, + "step": 8655 + }, + { + "epoch": 1.9944700460829492, + "grad_norm": 1.3088329290176663, + "learning_rate": 4.53586189474997e-11, + "loss": 0.7552424669265747, + "step": 8656 + }, + { + "epoch": 1.9947004608294931, + "grad_norm": 1.1999629002739178, + "learning_rate": 4.180252799801387e-11, + "loss": 0.9652698636054993, + "step": 8657 + }, + { + "epoch": 1.9949308755760369, + "grad_norm": 3.0754205014147553, + "learning_rate": 3.839157965712658e-11, + "loss": 0.9589856266975403, + "step": 8658 + }, + { + "epoch": 1.9951612903225806, + "grad_norm": 1.0149584356506736, + "learning_rate": 3.512577441988629e-11, + "loss": 0.6802269220352173, + "step": 8659 + }, + { + "epoch": 1.9953917050691246, + "grad_norm": 1.3402861462863225, + "learning_rate": 3.200511276035822e-11, + "loss": 0.8262367248535156, + "step": 8660 + }, + { + "epoch": 1.995622119815668, + "grad_norm": 1.166077707630556, + "learning_rate": 2.9029595131513372e-11, + "loss": 0.8353632688522339, + "step": 8661 + }, + { + "epoch": 1.995852534562212, + "grad_norm": 1.3494376018654042, + "learning_rate": 2.61992219652285e-11, + "loss": 0.8807231187820435, + "step": 8662 + }, + { + "epoch": 1.9960829493087557, + "grad_norm": 1.1520030195581032, + "learning_rate": 2.3513993672397148e-11, + "loss": 0.8394359350204468, + "step": 8663 + }, + { + "epoch": 1.9963133640552995, + "grad_norm": 0.9751046818624397, + "learning_rate": 2.0973910642707592e-11, + "loss": 0.8343399167060852, + "step": 8664 + }, + { + "epoch": 1.9965437788018434, + "grad_norm": 0.9230235584546375, + "learning_rate": 1.857897324475388e-11, + "loss": 0.7168834209442139, + "step": 8665 + }, + { + "epoch": 1.9967741935483871, + "grad_norm": 1.1469086275708407, + "learning_rate": 1.6329181826257866e-11, + "loss": 0.7825703620910645, + "step": 8666 + }, + { + "epoch": 1.9970046082949309, + "grad_norm": 1.2013625464128237, + "learning_rate": 1.4224536713847157e-11, + "loss": 0.6497002840042114, + "step": 8667 + }, + { + "epoch": 1.9972350230414746, + "grad_norm": 1.204271254016415, + "learning_rate": 1.2265038212944112e-11, + "loss": 0.8188776969909668, + "step": 8668 + }, + { + "epoch": 1.9974654377880183, + "grad_norm": 2.0423246677180056, + "learning_rate": 1.0450686607987869e-11, + "loss": 0.898658812046051, + "step": 8669 + }, + { + "epoch": 1.9976958525345623, + "grad_norm": 1.5435875726791675, + "learning_rate": 8.781482162212306e-12, + "loss": 0.8580871820449829, + "step": 8670 + }, + { + "epoch": 1.997926267281106, + "grad_norm": 1.3046658328904006, + "learning_rate": 7.25742511797911e-12, + "loss": 0.7657710313796997, + "step": 8671 + }, + { + "epoch": 1.9981566820276497, + "grad_norm": 1.4371880227275262, + "learning_rate": 5.87851569655573e-12, + "loss": 0.7881382703781128, + "step": 8672 + }, + { + "epoch": 1.9983870967741937, + "grad_norm": 1.3805751034431293, + "learning_rate": 4.644754098004356e-12, + "loss": 0.8711144924163818, + "step": 8673 + }, + { + "epoch": 1.9986175115207372, + "grad_norm": 1.1130398802574797, + "learning_rate": 3.5561405015149814e-12, + "loss": 0.6993192434310913, + "step": 8674 + }, + { + "epoch": 1.9988479262672811, + "grad_norm": 0.9709196628106886, + "learning_rate": 2.6126750650723452e-12, + "loss": 0.7348669767379761, + "step": 8675 + }, + { + "epoch": 1.9990783410138249, + "grad_norm": 1.0686467754804958, + "learning_rate": 1.8143579254559227e-12, + "loss": 0.7356513142585754, + "step": 8676 + }, + { + "epoch": 1.9993087557603686, + "grad_norm": 1.6789468220081696, + "learning_rate": 1.1611891986840206e-12, + "loss": 0.7969627380371094, + "step": 8677 + }, + { + "epoch": 1.9995391705069125, + "grad_norm": 1.3374302292197147, + "learning_rate": 6.531689795696848e-13, + "loss": 0.7247132062911987, + "step": 8678 + }, + { + "epoch": 1.9997695852534563, + "grad_norm": 1.3757463334176048, + "learning_rate": 2.902973418317245e-13, + "loss": 0.6177656650543213, + "step": 8679 + }, + { + "epoch": 2.0, + "grad_norm": 1.3149906641620008, + "learning_rate": 7.25743380947108e-14, + "loss": 0.8378380537033081, + "step": 8680 + } + ], + "logging_steps": 1, + "max_steps": 8680, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 7249753014763520.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-8680/training_args.bin b/checkpoint-8680/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7eb191dd44f853b2edd49aafea231852c267845 --- /dev/null +++ b/checkpoint-8680/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f95b396ac9a3c4ab0d50e403be4c8c0fd191fd2a0aac0b5d95c7c3b72c8501b +size 6968 diff --git a/checkpoint-8680/zero_to_fp32.py b/checkpoint-8680/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-8680/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/processor_config.json b/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..874b9fe8fde883c45d6df6e99e75cc964841790a --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 7249753014763520.0, + "train_loss": 0.8227015781100444, + "train_runtime": 44224.0669, + "train_samples_per_second": 0.785, + "train_steps_per_second": 0.196 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..661926e50c6b70341eb088a866ca9ad1b234346e --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,8681 @@ +{"current_steps": 1, "total_steps": 8680, "loss": 1.1575632095336914, "lr": 0.0, "epoch": 0.0002304147465437788, "percentage": 0.01, "elapsed_time": "0:00:14", "remaining_time": "1 day, 10:10:28"} +{"current_steps": 2, "total_steps": 8680, "loss": 0.9973502159118652, "lr": 4.6082949308755755e-09, "epoch": 0.0004608294930875576, "percentage": 0.02, "elapsed_time": "0:00:18", "remaining_time": "22:32:41"} +{"current_steps": 3, "total_steps": 8680, "loss": 1.0778999328613281, "lr": 9.216589861751151e-09, "epoch": 0.0006912442396313364, "percentage": 0.03, "elapsed_time": "0:00:24", "remaining_time": "19:46:02"} +{"current_steps": 4, "total_steps": 8680, "loss": 1.1912263631820679, "lr": 1.3824884792626728e-08, "epoch": 0.0009216589861751152, "percentage": 0.05, "elapsed_time": "0:00:29", "remaining_time": "17:47:51"} +{"current_steps": 5, "total_steps": 8680, "loss": 1.136031150817871, "lr": 1.8433179723502302e-08, "epoch": 0.001152073732718894, "percentage": 0.06, "elapsed_time": "0:00:35", "remaining_time": "17:17:11"} +{"current_steps": 6, "total_steps": 8680, "loss": 1.1647956371307373, "lr": 2.304147465437788e-08, "epoch": 0.0013824884792626728, "percentage": 0.07, "elapsed_time": "0:00:40", "remaining_time": "16:24:22"} +{"current_steps": 7, "total_steps": 8680, "loss": 1.144924283027649, "lr": 2.7649769585253456e-08, "epoch": 0.0016129032258064516, "percentage": 0.08, "elapsed_time": "0:00:45", "remaining_time": "15:33:43"} +{"current_steps": 8, "total_steps": 8680, "loss": 1.1821019649505615, "lr": 3.225806451612903e-08, "epoch": 0.0018433179723502304, "percentage": 0.09, "elapsed_time": "0:00:50", "remaining_time": "15:05:54"} +{"current_steps": 9, "total_steps": 8680, "loss": 1.0586045980453491, "lr": 3.6866359447004604e-08, "epoch": 0.0020737327188940094, "percentage": 0.1, "elapsed_time": "0:00:56", "remaining_time": "15:06:17"} +{"current_steps": 10, "total_steps": 8680, "loss": 1.2029818296432495, "lr": 4.1474654377880186e-08, "epoch": 0.002304147465437788, "percentage": 0.12, "elapsed_time": "0:01:02", "remaining_time": "15:03:20"} +{"current_steps": 11, "total_steps": 8680, "loss": 1.1411634683609009, "lr": 4.608294930875576e-08, "epoch": 0.002534562211981567, "percentage": 0.13, "elapsed_time": "0:01:08", "remaining_time": "14:56:33"} +{"current_steps": 12, "total_steps": 8680, "loss": 1.2719087600708008, "lr": 5.069124423963134e-08, "epoch": 0.0027649769585253456, "percentage": 0.14, "elapsed_time": "0:01:12", "remaining_time": "14:30:47"} +{"current_steps": 13, "total_steps": 8680, "loss": 1.1132495403289795, "lr": 5.529953917050691e-08, "epoch": 0.0029953917050691246, "percentage": 0.15, "elapsed_time": "0:01:16", "remaining_time": "14:15:10"} +{"current_steps": 14, "total_steps": 8680, "loss": 1.1900808811187744, "lr": 5.990783410138249e-08, "epoch": 0.0032258064516129032, "percentage": 0.16, "elapsed_time": "0:01:23", "remaining_time": "14:17:45"} +{"current_steps": 15, "total_steps": 8680, "loss": 1.2273608446121216, "lr": 6.451612903225806e-08, "epoch": 0.0034562211981566822, "percentage": 0.17, "elapsed_time": "0:01:28", "remaining_time": "14:10:07"} +{"current_steps": 16, "total_steps": 8680, "loss": 1.2130601406097412, "lr": 6.912442396313364e-08, "epoch": 0.003686635944700461, "percentage": 0.18, "elapsed_time": "0:01:32", "remaining_time": "13:52:38"} +{"current_steps": 17, "total_steps": 8680, "loss": 1.0534124374389648, "lr": 7.373271889400921e-08, "epoch": 0.00391705069124424, "percentage": 0.2, "elapsed_time": "0:01:37", "remaining_time": "13:45:45"} +{"current_steps": 18, "total_steps": 8680, "loss": 1.1796221733093262, "lr": 7.834101382488478e-08, "epoch": 0.004147465437788019, "percentage": 0.21, "elapsed_time": "0:01:42", "remaining_time": "13:43:16"} +{"current_steps": 19, "total_steps": 8680, "loss": 1.118175745010376, "lr": 8.294930875576037e-08, "epoch": 0.004377880184331797, "percentage": 0.22, "elapsed_time": "0:01:48", "remaining_time": "13:43:15"} +{"current_steps": 20, "total_steps": 8680, "loss": 1.140963077545166, "lr": 8.755760368663594e-08, "epoch": 0.004608294930875576, "percentage": 0.23, "elapsed_time": "0:01:52", "remaining_time": "13:33:41"} +{"current_steps": 21, "total_steps": 8680, "loss": 1.107339859008789, "lr": 9.216589861751152e-08, "epoch": 0.004838709677419355, "percentage": 0.24, "elapsed_time": "0:01:57", "remaining_time": "13:27:17"} +{"current_steps": 22, "total_steps": 8680, "loss": 1.171803593635559, "lr": 9.677419354838709e-08, "epoch": 0.005069124423963134, "percentage": 0.25, "elapsed_time": "0:02:02", "remaining_time": "13:24:51"} +{"current_steps": 23, "total_steps": 8680, "loss": 0.9935251474380493, "lr": 1.0138248847926267e-07, "epoch": 0.005299539170506912, "percentage": 0.26, "elapsed_time": "0:02:07", "remaining_time": "13:22:49"} +{"current_steps": 24, "total_steps": 8680, "loss": 1.0870952606201172, "lr": 1.0599078341013824e-07, "epoch": 0.005529953917050691, "percentage": 0.28, "elapsed_time": "0:02:12", "remaining_time": "13:16:10"} +{"current_steps": 25, "total_steps": 8680, "loss": 1.1520278453826904, "lr": 1.1059907834101383e-07, "epoch": 0.00576036866359447, "percentage": 0.29, "elapsed_time": "0:02:17", "remaining_time": "13:15:07"} +{"current_steps": 26, "total_steps": 8680, "loss": 1.3603750467300415, "lr": 1.152073732718894e-07, "epoch": 0.005990783410138249, "percentage": 0.3, "elapsed_time": "0:02:21", "remaining_time": "13:07:24"} +{"current_steps": 27, "total_steps": 8680, "loss": 1.230550765991211, "lr": 1.1981566820276498e-07, "epoch": 0.006221198156682027, "percentage": 0.31, "elapsed_time": "0:02:25", "remaining_time": "12:57:39"} +{"current_steps": 28, "total_steps": 8680, "loss": 1.267604112625122, "lr": 1.2442396313364054e-07, "epoch": 0.0064516129032258064, "percentage": 0.32, "elapsed_time": "0:02:30", "remaining_time": "12:56:55"} +{"current_steps": 29, "total_steps": 8680, "loss": 1.344348669052124, "lr": 1.2903225806451611e-07, "epoch": 0.0066820276497695855, "percentage": 0.33, "elapsed_time": "0:02:35", "remaining_time": "12:50:48"} +{"current_steps": 30, "total_steps": 8680, "loss": 1.2794291973114014, "lr": 1.336405529953917e-07, "epoch": 0.0069124423963133645, "percentage": 0.35, "elapsed_time": "0:02:38", "remaining_time": "12:44:03"} +{"current_steps": 31, "total_steps": 8680, "loss": 1.2841103076934814, "lr": 1.3824884792626728e-07, "epoch": 0.007142857142857143, "percentage": 0.36, "elapsed_time": "0:02:42", "remaining_time": "12:37:48"} +{"current_steps": 32, "total_steps": 8680, "loss": 1.1505224704742432, "lr": 1.4285714285714285e-07, "epoch": 0.007373271889400922, "percentage": 0.37, "elapsed_time": "0:02:47", "remaining_time": "12:33:53"} +{"current_steps": 33, "total_steps": 8680, "loss": 0.9800833463668823, "lr": 1.4746543778801842e-07, "epoch": 0.007603686635944701, "percentage": 0.38, "elapsed_time": "0:02:52", "remaining_time": "12:32:36"} +{"current_steps": 34, "total_steps": 8680, "loss": 1.3712589740753174, "lr": 1.52073732718894e-07, "epoch": 0.00783410138248848, "percentage": 0.39, "elapsed_time": "0:02:57", "remaining_time": "12:30:47"} +{"current_steps": 35, "total_steps": 8680, "loss": 1.2274689674377441, "lr": 1.5668202764976955e-07, "epoch": 0.008064516129032258, "percentage": 0.4, "elapsed_time": "0:03:02", "remaining_time": "12:32:39"} +{"current_steps": 36, "total_steps": 8680, "loss": 1.0673755407333374, "lr": 1.6129032258064515e-07, "epoch": 0.008294930875576038, "percentage": 0.41, "elapsed_time": "0:03:06", "remaining_time": "12:26:23"} +{"current_steps": 37, "total_steps": 8680, "loss": 1.242164134979248, "lr": 1.6589861751152074e-07, "epoch": 0.008525345622119816, "percentage": 0.43, "elapsed_time": "0:03:10", "remaining_time": "12:23:00"} +{"current_steps": 38, "total_steps": 8680, "loss": 1.190554141998291, "lr": 1.705069124423963e-07, "epoch": 0.008755760368663594, "percentage": 0.44, "elapsed_time": "0:03:15", "remaining_time": "12:19:55"} +{"current_steps": 39, "total_steps": 8680, "loss": 1.3119773864746094, "lr": 1.7511520737327188e-07, "epoch": 0.008986175115207374, "percentage": 0.45, "elapsed_time": "0:03:20", "remaining_time": "12:19:54"} +{"current_steps": 40, "total_steps": 8680, "loss": 1.2532517910003662, "lr": 1.7972350230414745e-07, "epoch": 0.009216589861751152, "percentage": 0.46, "elapsed_time": "0:03:24", "remaining_time": "12:16:00"} +{"current_steps": 41, "total_steps": 8680, "loss": 1.307154655456543, "lr": 1.8433179723502305e-07, "epoch": 0.00944700460829493, "percentage": 0.47, "elapsed_time": "0:03:29", "remaining_time": "12:17:09"} +{"current_steps": 42, "total_steps": 8680, "loss": 1.1899281740188599, "lr": 1.889400921658986e-07, "epoch": 0.00967741935483871, "percentage": 0.48, "elapsed_time": "0:03:35", "remaining_time": "12:17:31"} +{"current_steps": 43, "total_steps": 8680, "loss": 1.1928249597549438, "lr": 1.9354838709677418e-07, "epoch": 0.009907834101382488, "percentage": 0.5, "elapsed_time": "0:03:40", "remaining_time": "12:19:05"} +{"current_steps": 44, "total_steps": 8680, "loss": 1.1663157939910889, "lr": 1.9815668202764975e-07, "epoch": 0.010138248847926268, "percentage": 0.51, "elapsed_time": "0:03:44", "remaining_time": "12:15:36"} +{"current_steps": 45, "total_steps": 8680, "loss": 1.1806118488311768, "lr": 2.0276497695852535e-07, "epoch": 0.010368663594470046, "percentage": 0.52, "elapsed_time": "0:03:49", "remaining_time": "12:13:30"} +{"current_steps": 46, "total_steps": 8680, "loss": 1.1704952716827393, "lr": 2.073732718894009e-07, "epoch": 0.010599078341013824, "percentage": 0.53, "elapsed_time": "0:03:54", "remaining_time": "12:14:08"} +{"current_steps": 47, "total_steps": 8680, "loss": 1.2124149799346924, "lr": 2.1198156682027649e-07, "epoch": 0.010829493087557604, "percentage": 0.54, "elapsed_time": "0:03:59", "remaining_time": "12:13:19"} +{"current_steps": 48, "total_steps": 8680, "loss": 1.041813850402832, "lr": 2.1658986175115208e-07, "epoch": 0.011059907834101382, "percentage": 0.55, "elapsed_time": "0:04:04", "remaining_time": "12:12:26"} +{"current_steps": 49, "total_steps": 8680, "loss": 1.255402684211731, "lr": 2.2119815668202765e-07, "epoch": 0.01129032258064516, "percentage": 0.56, "elapsed_time": "0:04:09", "remaining_time": "12:12:51"} +{"current_steps": 50, "total_steps": 8680, "loss": 1.1115221977233887, "lr": 2.2580645161290322e-07, "epoch": 0.01152073732718894, "percentage": 0.58, "elapsed_time": "0:04:14", "remaining_time": "12:12:35"} +{"current_steps": 51, "total_steps": 8680, "loss": 1.4048426151275635, "lr": 2.304147465437788e-07, "epoch": 0.011751152073732719, "percentage": 0.59, "elapsed_time": "0:04:18", "remaining_time": "12:07:56"} +{"current_steps": 52, "total_steps": 8680, "loss": 1.1690936088562012, "lr": 2.3502304147465438e-07, "epoch": 0.011981566820276499, "percentage": 0.6, "elapsed_time": "0:04:22", "remaining_time": "12:05:22"} +{"current_steps": 53, "total_steps": 8680, "loss": 1.164888620376587, "lr": 2.3963133640552995e-07, "epoch": 0.012211981566820277, "percentage": 0.61, "elapsed_time": "0:04:26", "remaining_time": "12:04:18"} +{"current_steps": 54, "total_steps": 8680, "loss": 1.004424810409546, "lr": 2.442396313364055e-07, "epoch": 0.012442396313364055, "percentage": 0.62, "elapsed_time": "0:04:33", "remaining_time": "12:06:58"} +{"current_steps": 55, "total_steps": 8680, "loss": 0.8810856342315674, "lr": 2.488479262672811e-07, "epoch": 0.012672811059907835, "percentage": 0.63, "elapsed_time": "0:04:38", "remaining_time": "12:07:46"} +{"current_steps": 56, "total_steps": 8680, "loss": 1.300262451171875, "lr": 2.534562211981567e-07, "epoch": 0.012903225806451613, "percentage": 0.65, "elapsed_time": "0:04:42", "remaining_time": "12:05:18"} +{"current_steps": 57, "total_steps": 8680, "loss": 1.3624285459518433, "lr": 2.5806451612903223e-07, "epoch": 0.013133640552995391, "percentage": 0.66, "elapsed_time": "0:04:47", "remaining_time": "12:04:09"} +{"current_steps": 58, "total_steps": 8680, "loss": 1.2133375406265259, "lr": 2.6267281105990777e-07, "epoch": 0.013364055299539171, "percentage": 0.67, "elapsed_time": "0:04:52", "remaining_time": "12:03:59"} +{"current_steps": 59, "total_steps": 8680, "loss": 1.2203283309936523, "lr": 2.672811059907834e-07, "epoch": 0.013594470046082949, "percentage": 0.68, "elapsed_time": "0:04:56", "remaining_time": "12:01:52"} +{"current_steps": 60, "total_steps": 8680, "loss": 1.291412353515625, "lr": 2.7188940092165896e-07, "epoch": 0.013824884792626729, "percentage": 0.69, "elapsed_time": "0:05:01", "remaining_time": "12:02:08"} +{"current_steps": 61, "total_steps": 8680, "loss": 1.1596577167510986, "lr": 2.7649769585253456e-07, "epoch": 0.014055299539170507, "percentage": 0.7, "elapsed_time": "0:05:08", "remaining_time": "12:05:50"} +{"current_steps": 62, "total_steps": 8680, "loss": 0.9789823889732361, "lr": 2.8110599078341015e-07, "epoch": 0.014285714285714285, "percentage": 0.71, "elapsed_time": "0:05:13", "remaining_time": "12:05:20"} +{"current_steps": 63, "total_steps": 8680, "loss": 1.220383882522583, "lr": 2.857142857142857e-07, "epoch": 0.014516129032258065, "percentage": 0.73, "elapsed_time": "0:05:17", "remaining_time": "12:04:31"} +{"current_steps": 64, "total_steps": 8680, "loss": 1.0961871147155762, "lr": 2.903225806451613e-07, "epoch": 0.014746543778801843, "percentage": 0.74, "elapsed_time": "0:05:21", "remaining_time": "12:01:16"} +{"current_steps": 65, "total_steps": 8680, "loss": 1.2281936407089233, "lr": 2.9493087557603683e-07, "epoch": 0.014976958525345621, "percentage": 0.75, "elapsed_time": "0:05:25", "remaining_time": "11:59:53"} +{"current_steps": 66, "total_steps": 8680, "loss": 1.279728889465332, "lr": 2.9953917050691243e-07, "epoch": 0.015207373271889401, "percentage": 0.76, "elapsed_time": "0:05:30", "remaining_time": "11:58:59"} +{"current_steps": 67, "total_steps": 8680, "loss": 1.0932798385620117, "lr": 3.04147465437788e-07, "epoch": 0.01543778801843318, "percentage": 0.77, "elapsed_time": "0:05:35", "remaining_time": "11:57:55"} +{"current_steps": 68, "total_steps": 8680, "loss": 1.1612955331802368, "lr": 3.0875576036866356e-07, "epoch": 0.01566820276497696, "percentage": 0.78, "elapsed_time": "0:05:40", "remaining_time": "11:58:53"} +{"current_steps": 69, "total_steps": 8680, "loss": 1.193152666091919, "lr": 3.133640552995391e-07, "epoch": 0.015898617511520736, "percentage": 0.79, "elapsed_time": "0:05:44", "remaining_time": "11:56:53"} +{"current_steps": 70, "total_steps": 8680, "loss": 1.3303695917129517, "lr": 3.1797235023041476e-07, "epoch": 0.016129032258064516, "percentage": 0.81, "elapsed_time": "0:05:49", "remaining_time": "11:55:47"} +{"current_steps": 71, "total_steps": 8680, "loss": 1.3735731840133667, "lr": 3.225806451612903e-07, "epoch": 0.016359447004608296, "percentage": 0.82, "elapsed_time": "0:05:52", "remaining_time": "11:52:35"} +{"current_steps": 72, "total_steps": 8680, "loss": 1.162925124168396, "lr": 3.271889400921659e-07, "epoch": 0.016589861751152075, "percentage": 0.83, "elapsed_time": "0:05:57", "remaining_time": "11:53:16"} +{"current_steps": 73, "total_steps": 8680, "loss": 1.3879203796386719, "lr": 3.317972350230415e-07, "epoch": 0.016820276497695852, "percentage": 0.84, "elapsed_time": "0:06:01", "remaining_time": "11:50:46"} +{"current_steps": 74, "total_steps": 8680, "loss": 1.2721638679504395, "lr": 3.3640552995391703e-07, "epoch": 0.017050691244239632, "percentage": 0.85, "elapsed_time": "0:06:06", "remaining_time": "11:49:55"} +{"current_steps": 75, "total_steps": 8680, "loss": 1.3997783660888672, "lr": 3.410138248847926e-07, "epoch": 0.01728110599078341, "percentage": 0.86, "elapsed_time": "0:06:09", "remaining_time": "11:47:19"} +{"current_steps": 76, "total_steps": 8680, "loss": 1.1099059581756592, "lr": 3.4562211981566817e-07, "epoch": 0.017511520737327188, "percentage": 0.88, "elapsed_time": "0:06:15", "remaining_time": "11:47:53"} +{"current_steps": 77, "total_steps": 8680, "loss": 1.341759204864502, "lr": 3.5023041474654376e-07, "epoch": 0.017741935483870968, "percentage": 0.89, "elapsed_time": "0:06:20", "remaining_time": "11:49:01"} +{"current_steps": 78, "total_steps": 8680, "loss": 1.343479871749878, "lr": 3.5483870967741936e-07, "epoch": 0.017972350230414748, "percentage": 0.9, "elapsed_time": "0:06:26", "remaining_time": "11:50:27"} +{"current_steps": 79, "total_steps": 8680, "loss": 1.2225772142410278, "lr": 3.594470046082949e-07, "epoch": 0.018202764976958524, "percentage": 0.91, "elapsed_time": "0:06:31", "remaining_time": "11:51:00"} +{"current_steps": 80, "total_steps": 8680, "loss": 1.1934573650360107, "lr": 3.6405529953917044e-07, "epoch": 0.018433179723502304, "percentage": 0.92, "elapsed_time": "0:06:38", "remaining_time": "11:53:13"} +{"current_steps": 81, "total_steps": 8680, "loss": 1.099440336227417, "lr": 3.686635944700461e-07, "epoch": 0.018663594470046084, "percentage": 0.93, "elapsed_time": "0:06:43", "remaining_time": "11:54:05"} +{"current_steps": 82, "total_steps": 8680, "loss": 1.0864269733428955, "lr": 3.7327188940092163e-07, "epoch": 0.01889400921658986, "percentage": 0.94, "elapsed_time": "0:06:49", "remaining_time": "11:56:29"} +{"current_steps": 83, "total_steps": 8680, "loss": 1.0706703662872314, "lr": 3.778801843317972e-07, "epoch": 0.01912442396313364, "percentage": 0.96, "elapsed_time": "0:06:55", "remaining_time": "11:56:40"} +{"current_steps": 84, "total_steps": 8680, "loss": 1.1675662994384766, "lr": 3.824884792626728e-07, "epoch": 0.01935483870967742, "percentage": 0.97, "elapsed_time": "0:06:59", "remaining_time": "11:54:49"} +{"current_steps": 85, "total_steps": 8680, "loss": 1.3294553756713867, "lr": 3.8709677419354837e-07, "epoch": 0.019585253456221197, "percentage": 0.98, "elapsed_time": "0:07:03", "remaining_time": "11:53:39"} +{"current_steps": 86, "total_steps": 8680, "loss": 1.050878882408142, "lr": 3.9170506912442396e-07, "epoch": 0.019815668202764977, "percentage": 0.99, "elapsed_time": "0:07:09", "remaining_time": "11:56:07"} +{"current_steps": 87, "total_steps": 8680, "loss": 1.3243739604949951, "lr": 3.963133640552995e-07, "epoch": 0.020046082949308756, "percentage": 1.0, "elapsed_time": "0:07:15", "remaining_time": "11:57:13"} +{"current_steps": 88, "total_steps": 8680, "loss": 1.1350429058074951, "lr": 4.009216589861751e-07, "epoch": 0.020276497695852536, "percentage": 1.01, "elapsed_time": "0:07:20", "remaining_time": "11:57:37"} +{"current_steps": 89, "total_steps": 8680, "loss": 1.24526047706604, "lr": 4.055299539170507e-07, "epoch": 0.020506912442396313, "percentage": 1.03, "elapsed_time": "0:07:25", "remaining_time": "11:56:40"} +{"current_steps": 90, "total_steps": 8680, "loss": 1.3459908962249756, "lr": 4.1013824884792624e-07, "epoch": 0.020737327188940093, "percentage": 1.04, "elapsed_time": "0:07:30", "remaining_time": "11:56:26"} +{"current_steps": 91, "total_steps": 8680, "loss": 1.2129223346710205, "lr": 4.147465437788018e-07, "epoch": 0.020967741935483872, "percentage": 1.05, "elapsed_time": "0:07:35", "remaining_time": "11:56:03"} +{"current_steps": 92, "total_steps": 8680, "loss": 1.0522969961166382, "lr": 4.1935483870967743e-07, "epoch": 0.02119815668202765, "percentage": 1.06, "elapsed_time": "0:07:40", "remaining_time": "11:56:17"} +{"current_steps": 93, "total_steps": 8680, "loss": 1.3128937482833862, "lr": 4.2396313364055297e-07, "epoch": 0.02142857142857143, "percentage": 1.07, "elapsed_time": "0:07:46", "remaining_time": "11:57:31"} +{"current_steps": 94, "total_steps": 8680, "loss": 1.3582855463027954, "lr": 4.285714285714285e-07, "epoch": 0.02165898617511521, "percentage": 1.08, "elapsed_time": "0:07:50", "remaining_time": "11:56:46"} +{"current_steps": 95, "total_steps": 8680, "loss": 1.3959028720855713, "lr": 4.3317972350230416e-07, "epoch": 0.021889400921658985, "percentage": 1.09, "elapsed_time": "0:07:54", "remaining_time": "11:55:07"} +{"current_steps": 96, "total_steps": 8680, "loss": 1.149501085281372, "lr": 4.377880184331797e-07, "epoch": 0.022119815668202765, "percentage": 1.11, "elapsed_time": "0:07:59", "remaining_time": "11:54:52"} +{"current_steps": 97, "total_steps": 8680, "loss": 1.024135708808899, "lr": 4.423963133640553e-07, "epoch": 0.022350230414746545, "percentage": 1.12, "elapsed_time": "0:08:04", "remaining_time": "11:54:06"} +{"current_steps": 98, "total_steps": 8680, "loss": 0.9255483150482178, "lr": 4.4700460829493084e-07, "epoch": 0.02258064516129032, "percentage": 1.13, "elapsed_time": "0:08:09", "remaining_time": "11:54:03"} +{"current_steps": 99, "total_steps": 8680, "loss": 1.1694722175598145, "lr": 4.5161290322580644e-07, "epoch": 0.0228110599078341, "percentage": 1.14, "elapsed_time": "0:08:13", "remaining_time": "11:53:34"} +{"current_steps": 100, "total_steps": 8680, "loss": 1.1588457822799683, "lr": 4.5622119815668203e-07, "epoch": 0.02304147465437788, "percentage": 1.15, "elapsed_time": "0:08:19", "remaining_time": "11:53:36"} +{"current_steps": 101, "total_steps": 8680, "loss": 1.172672986984253, "lr": 4.608294930875576e-07, "epoch": 0.023271889400921657, "percentage": 1.16, "elapsed_time": "0:08:25", "remaining_time": "11:55:20"} +{"current_steps": 102, "total_steps": 8680, "loss": 1.092405915260315, "lr": 4.654377880184331e-07, "epoch": 0.023502304147465437, "percentage": 1.18, "elapsed_time": "0:08:30", "remaining_time": "11:55:42"} +{"current_steps": 103, "total_steps": 8680, "loss": 1.11540687084198, "lr": 4.7004608294930877e-07, "epoch": 0.023732718894009217, "percentage": 1.19, "elapsed_time": "0:08:35", "remaining_time": "11:54:49"} +{"current_steps": 104, "total_steps": 8680, "loss": 1.1380189657211304, "lr": 4.746543778801843e-07, "epoch": 0.023963133640552997, "percentage": 1.2, "elapsed_time": "0:08:39", "remaining_time": "11:54:02"} +{"current_steps": 105, "total_steps": 8680, "loss": 1.1031086444854736, "lr": 4.792626728110599e-07, "epoch": 0.024193548387096774, "percentage": 1.21, "elapsed_time": "0:08:43", "remaining_time": "11:53:09"} +{"current_steps": 106, "total_steps": 8680, "loss": 1.1988024711608887, "lr": 4.838709677419355e-07, "epoch": 0.024423963133640553, "percentage": 1.22, "elapsed_time": "0:08:49", "remaining_time": "11:54:13"} +{"current_steps": 107, "total_steps": 8680, "loss": 1.0814614295959473, "lr": 4.88479262672811e-07, "epoch": 0.024654377880184333, "percentage": 1.23, "elapsed_time": "0:08:55", "remaining_time": "11:54:42"} +{"current_steps": 108, "total_steps": 8680, "loss": 1.0541695356369019, "lr": 4.930875576036866e-07, "epoch": 0.02488479262672811, "percentage": 1.24, "elapsed_time": "0:09:00", "remaining_time": "11:55:10"} +{"current_steps": 109, "total_steps": 8680, "loss": 1.2281692028045654, "lr": 4.976958525345622e-07, "epoch": 0.02511520737327189, "percentage": 1.26, "elapsed_time": "0:09:05", "remaining_time": "11:55:33"} +{"current_steps": 110, "total_steps": 8680, "loss": 1.2542369365692139, "lr": 5.023041474654378e-07, "epoch": 0.02534562211981567, "percentage": 1.27, "elapsed_time": "0:09:10", "remaining_time": "11:54:50"} +{"current_steps": 111, "total_steps": 8680, "loss": 1.36039137840271, "lr": 5.069124423963134e-07, "epoch": 0.025576036866359446, "percentage": 1.28, "elapsed_time": "0:09:15", "remaining_time": "11:55:04"} +{"current_steps": 112, "total_steps": 8680, "loss": 1.1092976331710815, "lr": 5.11520737327189e-07, "epoch": 0.025806451612903226, "percentage": 1.29, "elapsed_time": "0:09:20", "remaining_time": "11:54:37"} +{"current_steps": 113, "total_steps": 8680, "loss": 1.0634076595306396, "lr": 5.161290322580645e-07, "epoch": 0.026036866359447006, "percentage": 1.3, "elapsed_time": "0:09:25", "remaining_time": "11:54:00"} +{"current_steps": 114, "total_steps": 8680, "loss": 1.0024809837341309, "lr": 5.2073732718894e-07, "epoch": 0.026267281105990782, "percentage": 1.31, "elapsed_time": "0:09:31", "remaining_time": "11:56:00"} +{"current_steps": 115, "total_steps": 8680, "loss": 1.1691724061965942, "lr": 5.253456221198155e-07, "epoch": 0.026497695852534562, "percentage": 1.32, "elapsed_time": "0:09:36", "remaining_time": "11:55:17"} +{"current_steps": 116, "total_steps": 8680, "loss": 1.2053219079971313, "lr": 5.299539170506912e-07, "epoch": 0.026728110599078342, "percentage": 1.34, "elapsed_time": "0:09:40", "remaining_time": "11:54:48"} +{"current_steps": 117, "total_steps": 8680, "loss": 1.119420051574707, "lr": 5.345622119815668e-07, "epoch": 0.02695852534562212, "percentage": 1.35, "elapsed_time": "0:09:45", "remaining_time": "11:54:08"} +{"current_steps": 118, "total_steps": 8680, "loss": 1.1640167236328125, "lr": 5.391705069124423e-07, "epoch": 0.027188940092165898, "percentage": 1.36, "elapsed_time": "0:09:50", "remaining_time": "11:53:42"} +{"current_steps": 119, "total_steps": 8680, "loss": 1.180116057395935, "lr": 5.437788018433179e-07, "epoch": 0.027419354838709678, "percentage": 1.37, "elapsed_time": "0:09:54", "remaining_time": "11:52:41"} +{"current_steps": 120, "total_steps": 8680, "loss": 1.0726159811019897, "lr": 5.483870967741935e-07, "epoch": 0.027649769585253458, "percentage": 1.38, "elapsed_time": "0:09:59", "remaining_time": "11:52:43"} +{"current_steps": 121, "total_steps": 8680, "loss": 1.03219473361969, "lr": 5.529953917050691e-07, "epoch": 0.027880184331797234, "percentage": 1.39, "elapsed_time": "0:10:05", "remaining_time": "11:54:23"} +{"current_steps": 122, "total_steps": 8680, "loss": 1.1545735597610474, "lr": 5.576036866359447e-07, "epoch": 0.028110599078341014, "percentage": 1.41, "elapsed_time": "0:10:10", "remaining_time": "11:53:56"} +{"current_steps": 123, "total_steps": 8680, "loss": 1.2409746646881104, "lr": 5.622119815668203e-07, "epoch": 0.028341013824884794, "percentage": 1.42, "elapsed_time": "0:10:14", "remaining_time": "11:52:51"} +{"current_steps": 124, "total_steps": 8680, "loss": 1.2717409133911133, "lr": 5.668202764976958e-07, "epoch": 0.02857142857142857, "percentage": 1.43, "elapsed_time": "0:10:19", "remaining_time": "11:52:02"} +{"current_steps": 125, "total_steps": 8680, "loss": 1.523846983909607, "lr": 5.714285714285714e-07, "epoch": 0.02880184331797235, "percentage": 1.44, "elapsed_time": "0:10:22", "remaining_time": "11:50:14"} +{"current_steps": 126, "total_steps": 8680, "loss": 1.3386890888214111, "lr": 5.760368663594469e-07, "epoch": 0.02903225806451613, "percentage": 1.45, "elapsed_time": "0:10:28", "remaining_time": "11:50:35"} +{"current_steps": 127, "total_steps": 8680, "loss": 1.3080404996871948, "lr": 5.806451612903226e-07, "epoch": 0.029262672811059907, "percentage": 1.46, "elapsed_time": "0:10:32", "remaining_time": "11:49:51"} +{"current_steps": 128, "total_steps": 8680, "loss": 1.3194537162780762, "lr": 5.852534562211982e-07, "epoch": 0.029493087557603687, "percentage": 1.47, "elapsed_time": "0:10:36", "remaining_time": "11:48:15"} +{"current_steps": 129, "total_steps": 8680, "loss": 1.0546228885650635, "lr": 5.898617511520737e-07, "epoch": 0.029723502304147466, "percentage": 1.49, "elapsed_time": "0:10:41", "remaining_time": "11:48:33"} +{"current_steps": 130, "total_steps": 8680, "loss": 1.3160395622253418, "lr": 5.944700460829493e-07, "epoch": 0.029953917050691243, "percentage": 1.5, "elapsed_time": "0:10:45", "remaining_time": "11:47:51"} +{"current_steps": 131, "total_steps": 8680, "loss": 1.3497555255889893, "lr": 5.990783410138249e-07, "epoch": 0.030184331797235023, "percentage": 1.51, "elapsed_time": "0:10:49", "remaining_time": "11:46:35"} +{"current_steps": 132, "total_steps": 8680, "loss": 1.0863350629806519, "lr": 6.036866359447004e-07, "epoch": 0.030414746543778803, "percentage": 1.52, "elapsed_time": "0:10:55", "remaining_time": "11:47:28"} +{"current_steps": 133, "total_steps": 8680, "loss": 1.1640913486480713, "lr": 6.08294930875576e-07, "epoch": 0.03064516129032258, "percentage": 1.53, "elapsed_time": "0:10:59", "remaining_time": "11:46:14"} +{"current_steps": 134, "total_steps": 8680, "loss": 1.398510217666626, "lr": 6.129032258064516e-07, "epoch": 0.03087557603686636, "percentage": 1.54, "elapsed_time": "0:11:05", "remaining_time": "11:47:13"} +{"current_steps": 135, "total_steps": 8680, "loss": 1.3015594482421875, "lr": 6.175115207373271e-07, "epoch": 0.03110599078341014, "percentage": 1.56, "elapsed_time": "0:11:10", "remaining_time": "11:47:26"} +{"current_steps": 136, "total_steps": 8680, "loss": 1.2786016464233398, "lr": 6.221198156682027e-07, "epoch": 0.03133640552995392, "percentage": 1.57, "elapsed_time": "0:11:15", "remaining_time": "11:47:28"} +{"current_steps": 137, "total_steps": 8680, "loss": 1.0863161087036133, "lr": 6.267281105990782e-07, "epoch": 0.031566820276497695, "percentage": 1.58, "elapsed_time": "0:11:21", "remaining_time": "11:48:03"} +{"current_steps": 138, "total_steps": 8680, "loss": 1.1590030193328857, "lr": 6.313364055299539e-07, "epoch": 0.03179723502304147, "percentage": 1.59, "elapsed_time": "0:11:25", "remaining_time": "11:47:06"} +{"current_steps": 139, "total_steps": 8680, "loss": 1.2473185062408447, "lr": 6.359447004608295e-07, "epoch": 0.032027649769585255, "percentage": 1.6, "elapsed_time": "0:11:28", "remaining_time": "11:45:22"} +{"current_steps": 140, "total_steps": 8680, "loss": 1.1982496976852417, "lr": 6.40552995391705e-07, "epoch": 0.03225806451612903, "percentage": 1.61, "elapsed_time": "0:11:32", "remaining_time": "11:44:31"} +{"current_steps": 141, "total_steps": 8680, "loss": 1.078690528869629, "lr": 6.451612903225806e-07, "epoch": 0.03248847926267281, "percentage": 1.62, "elapsed_time": "0:11:37", "remaining_time": "11:44:23"} +{"current_steps": 142, "total_steps": 8680, "loss": 1.1540311574935913, "lr": 6.497695852534562e-07, "epoch": 0.03271889400921659, "percentage": 1.64, "elapsed_time": "0:11:42", "remaining_time": "11:43:51"} +{"current_steps": 143, "total_steps": 8680, "loss": 1.319035530090332, "lr": 6.543778801843318e-07, "epoch": 0.03294930875576037, "percentage": 1.65, "elapsed_time": "0:11:46", "remaining_time": "11:43:11"} +{"current_steps": 144, "total_steps": 8680, "loss": 1.199448585510254, "lr": 6.589861751152074e-07, "epoch": 0.03317972350230415, "percentage": 1.66, "elapsed_time": "0:11:50", "remaining_time": "11:42:20"} +{"current_steps": 145, "total_steps": 8680, "loss": 1.212646484375, "lr": 6.63594470046083e-07, "epoch": 0.03341013824884793, "percentage": 1.67, "elapsed_time": "0:11:55", "remaining_time": "11:41:57"} +{"current_steps": 146, "total_steps": 8680, "loss": 1.2833064794540405, "lr": 6.682027649769585e-07, "epoch": 0.033640552995391704, "percentage": 1.68, "elapsed_time": "0:12:01", "remaining_time": "11:42:57"} +{"current_steps": 147, "total_steps": 8680, "loss": 1.2852118015289307, "lr": 6.728110599078341e-07, "epoch": 0.03387096774193549, "percentage": 1.69, "elapsed_time": "0:12:05", "remaining_time": "11:42:02"} +{"current_steps": 148, "total_steps": 8680, "loss": 1.0287699699401855, "lr": 6.774193548387096e-07, "epoch": 0.034101382488479264, "percentage": 1.71, "elapsed_time": "0:12:09", "remaining_time": "11:40:48"} +{"current_steps": 149, "total_steps": 8680, "loss": 1.2580914497375488, "lr": 6.820276497695853e-07, "epoch": 0.03433179723502304, "percentage": 1.72, "elapsed_time": "0:12:14", "remaining_time": "11:40:49"} +{"current_steps": 150, "total_steps": 8680, "loss": 1.0557801723480225, "lr": 6.866359447004608e-07, "epoch": 0.03456221198156682, "percentage": 1.73, "elapsed_time": "0:12:19", "remaining_time": "11:40:37"} +{"current_steps": 151, "total_steps": 8680, "loss": 1.1820557117462158, "lr": 6.912442396313363e-07, "epoch": 0.0347926267281106, "percentage": 1.74, "elapsed_time": "0:12:22", "remaining_time": "11:39:18"} +{"current_steps": 152, "total_steps": 8680, "loss": 1.2777981758117676, "lr": 6.958525345622119e-07, "epoch": 0.035023041474654376, "percentage": 1.75, "elapsed_time": "0:12:27", "remaining_time": "11:39:09"} +{"current_steps": 153, "total_steps": 8680, "loss": 1.1465356349945068, "lr": 7.004608294930875e-07, "epoch": 0.03525345622119816, "percentage": 1.76, "elapsed_time": "0:12:33", "remaining_time": "11:39:35"} +{"current_steps": 154, "total_steps": 8680, "loss": 1.3553744554519653, "lr": 7.05069124423963e-07, "epoch": 0.035483870967741936, "percentage": 1.77, "elapsed_time": "0:12:39", "remaining_time": "11:40:34"} +{"current_steps": 155, "total_steps": 8680, "loss": 1.176223874092102, "lr": 7.096774193548387e-07, "epoch": 0.03571428571428571, "percentage": 1.79, "elapsed_time": "0:12:43", "remaining_time": "11:39:40"} +{"current_steps": 156, "total_steps": 8680, "loss": 1.1771761178970337, "lr": 7.142857142857143e-07, "epoch": 0.035944700460829496, "percentage": 1.8, "elapsed_time": "0:12:48", "remaining_time": "11:39:32"} +{"current_steps": 157, "total_steps": 8680, "loss": 1.1598860025405884, "lr": 7.188940092165898e-07, "epoch": 0.03617511520737327, "percentage": 1.81, "elapsed_time": "0:12:54", "remaining_time": "11:41:00"} +{"current_steps": 158, "total_steps": 8680, "loss": 1.0689195394515991, "lr": 7.235023041474654e-07, "epoch": 0.03640552995391705, "percentage": 1.82, "elapsed_time": "0:13:00", "remaining_time": "11:41:12"} +{"current_steps": 159, "total_steps": 8680, "loss": 1.1444990634918213, "lr": 7.281105990783409e-07, "epoch": 0.03663594470046083, "percentage": 1.83, "elapsed_time": "0:13:05", "remaining_time": "11:41:33"} +{"current_steps": 160, "total_steps": 8680, "loss": 1.2261321544647217, "lr": 7.327188940092166e-07, "epoch": 0.03686635944700461, "percentage": 1.84, "elapsed_time": "0:13:10", "remaining_time": "11:41:18"} +{"current_steps": 161, "total_steps": 8680, "loss": 0.9325876235961914, "lr": 7.373271889400922e-07, "epoch": 0.037096774193548385, "percentage": 1.85, "elapsed_time": "0:13:15", "remaining_time": "11:41:41"} +{"current_steps": 162, "total_steps": 8680, "loss": 1.071167230606079, "lr": 7.419354838709677e-07, "epoch": 0.03732718894009217, "percentage": 1.87, "elapsed_time": "0:13:20", "remaining_time": "11:41:20"} +{"current_steps": 163, "total_steps": 8680, "loss": 1.1856298446655273, "lr": 7.465437788018433e-07, "epoch": 0.037557603686635944, "percentage": 1.88, "elapsed_time": "0:13:23", "remaining_time": "11:40:06"} +{"current_steps": 164, "total_steps": 8680, "loss": 1.13883376121521, "lr": 7.511520737327189e-07, "epoch": 0.03778801843317972, "percentage": 1.89, "elapsed_time": "0:13:29", "remaining_time": "11:40:38"} +{"current_steps": 165, "total_steps": 8680, "loss": 1.2896685600280762, "lr": 7.557603686635944e-07, "epoch": 0.038018433179723504, "percentage": 1.9, "elapsed_time": "0:13:33", "remaining_time": "11:40:00"} +{"current_steps": 166, "total_steps": 8680, "loss": 1.3122754096984863, "lr": 7.603686635944701e-07, "epoch": 0.03824884792626728, "percentage": 1.91, "elapsed_time": "0:13:39", "remaining_time": "11:40:22"} +{"current_steps": 167, "total_steps": 8680, "loss": 1.165675163269043, "lr": 7.649769585253457e-07, "epoch": 0.03847926267281106, "percentage": 1.92, "elapsed_time": "0:13:44", "remaining_time": "11:40:16"} +{"current_steps": 168, "total_steps": 8680, "loss": 1.1348214149475098, "lr": 7.695852534562211e-07, "epoch": 0.03870967741935484, "percentage": 1.94, "elapsed_time": "0:13:49", "remaining_time": "11:40:18"} +{"current_steps": 169, "total_steps": 8680, "loss": 1.287818431854248, "lr": 7.741935483870967e-07, "epoch": 0.03894009216589862, "percentage": 1.95, "elapsed_time": "0:13:53", "remaining_time": "11:39:42"} +{"current_steps": 170, "total_steps": 8680, "loss": 1.0723031759262085, "lr": 7.788018433179722e-07, "epoch": 0.03917050691244239, "percentage": 1.96, "elapsed_time": "0:13:59", "remaining_time": "11:40:00"} +{"current_steps": 171, "total_steps": 8680, "loss": 1.29054594039917, "lr": 7.834101382488479e-07, "epoch": 0.03940092165898618, "percentage": 1.97, "elapsed_time": "0:14:03", "remaining_time": "11:39:23"} +{"current_steps": 172, "total_steps": 8680, "loss": 1.201147198677063, "lr": 7.880184331797235e-07, "epoch": 0.03963133640552995, "percentage": 1.98, "elapsed_time": "0:14:07", "remaining_time": "11:38:39"} +{"current_steps": 173, "total_steps": 8680, "loss": 1.2529574632644653, "lr": 7.92626728110599e-07, "epoch": 0.03986175115207373, "percentage": 1.99, "elapsed_time": "0:14:11", "remaining_time": "11:38:14"} +{"current_steps": 174, "total_steps": 8680, "loss": 1.3255105018615723, "lr": 7.972350230414746e-07, "epoch": 0.04009216589861751, "percentage": 2.0, "elapsed_time": "0:14:16", "remaining_time": "11:37:43"} +{"current_steps": 175, "total_steps": 8680, "loss": 1.3167433738708496, "lr": 8.018433179723502e-07, "epoch": 0.04032258064516129, "percentage": 2.02, "elapsed_time": "0:14:20", "remaining_time": "11:37:03"} +{"current_steps": 176, "total_steps": 8680, "loss": 1.4780502319335938, "lr": 8.064516129032257e-07, "epoch": 0.04055299539170507, "percentage": 2.03, "elapsed_time": "0:14:25", "remaining_time": "11:36:45"} +{"current_steps": 177, "total_steps": 8680, "loss": 1.3096996545791626, "lr": 8.110599078341014e-07, "epoch": 0.04078341013824885, "percentage": 2.04, "elapsed_time": "0:14:30", "remaining_time": "11:36:38"} +{"current_steps": 178, "total_steps": 8680, "loss": 1.3124895095825195, "lr": 8.15668202764977e-07, "epoch": 0.041013824884792625, "percentage": 2.05, "elapsed_time": "0:14:35", "remaining_time": "11:37:16"} +{"current_steps": 179, "total_steps": 8680, "loss": 1.2589681148529053, "lr": 8.202764976958525e-07, "epoch": 0.04124423963133641, "percentage": 2.06, "elapsed_time": "0:14:39", "remaining_time": "11:36:14"} +{"current_steps": 180, "total_steps": 8680, "loss": 1.0576659440994263, "lr": 8.248847926267281e-07, "epoch": 0.041474654377880185, "percentage": 2.07, "elapsed_time": "0:14:45", "remaining_time": "11:37:09"} +{"current_steps": 181, "total_steps": 8680, "loss": 1.2647404670715332, "lr": 8.294930875576036e-07, "epoch": 0.04170506912442396, "percentage": 2.09, "elapsed_time": "0:14:51", "remaining_time": "11:37:43"} +{"current_steps": 182, "total_steps": 8680, "loss": 1.072542428970337, "lr": 8.341013824884793e-07, "epoch": 0.041935483870967745, "percentage": 2.1, "elapsed_time": "0:14:57", "remaining_time": "11:38:22"} +{"current_steps": 183, "total_steps": 8680, "loss": 1.2417643070220947, "lr": 8.387096774193549e-07, "epoch": 0.04216589861751152, "percentage": 2.11, "elapsed_time": "0:15:02", "remaining_time": "11:38:35"} +{"current_steps": 184, "total_steps": 8680, "loss": 1.2490241527557373, "lr": 8.433179723502303e-07, "epoch": 0.0423963133640553, "percentage": 2.12, "elapsed_time": "0:15:06", "remaining_time": "11:37:48"} +{"current_steps": 185, "total_steps": 8680, "loss": 1.1169328689575195, "lr": 8.479262672811059e-07, "epoch": 0.04262672811059908, "percentage": 2.13, "elapsed_time": "0:15:11", "remaining_time": "11:37:52"} +{"current_steps": 186, "total_steps": 8680, "loss": 1.1799774169921875, "lr": 8.525345622119815e-07, "epoch": 0.04285714285714286, "percentage": 2.14, "elapsed_time": "0:15:17", "remaining_time": "11:38:13"} +{"current_steps": 187, "total_steps": 8680, "loss": 0.9655753374099731, "lr": 8.57142857142857e-07, "epoch": 0.043087557603686634, "percentage": 2.15, "elapsed_time": "0:15:22", "remaining_time": "11:38:11"} +{"current_steps": 188, "total_steps": 8680, "loss": 1.2863562107086182, "lr": 8.617511520737327e-07, "epoch": 0.04331797235023042, "percentage": 2.17, "elapsed_time": "0:15:27", "remaining_time": "11:38:21"} +{"current_steps": 189, "total_steps": 8680, "loss": 1.056877613067627, "lr": 8.663594470046083e-07, "epoch": 0.043548387096774194, "percentage": 2.18, "elapsed_time": "0:15:31", "remaining_time": "11:37:48"} +{"current_steps": 190, "total_steps": 8680, "loss": 1.2128019332885742, "lr": 8.709677419354838e-07, "epoch": 0.04377880184331797, "percentage": 2.19, "elapsed_time": "0:15:36", "remaining_time": "11:37:29"} +{"current_steps": 191, "total_steps": 8680, "loss": 1.1397441625595093, "lr": 8.755760368663594e-07, "epoch": 0.044009216589861753, "percentage": 2.2, "elapsed_time": "0:15:41", "remaining_time": "11:37:23"} +{"current_steps": 192, "total_steps": 8680, "loss": 1.062232255935669, "lr": 8.801843317972349e-07, "epoch": 0.04423963133640553, "percentage": 2.21, "elapsed_time": "0:15:47", "remaining_time": "11:38:28"} +{"current_steps": 193, "total_steps": 8680, "loss": 1.0102736949920654, "lr": 8.847926267281106e-07, "epoch": 0.044470046082949306, "percentage": 2.22, "elapsed_time": "0:15:53", "remaining_time": "11:38:52"} +{"current_steps": 194, "total_steps": 8680, "loss": 1.155517339706421, "lr": 8.894009216589862e-07, "epoch": 0.04470046082949309, "percentage": 2.24, "elapsed_time": "0:15:57", "remaining_time": "11:38:05"} +{"current_steps": 195, "total_steps": 8680, "loss": 1.042372703552246, "lr": 8.940092165898617e-07, "epoch": 0.044930875576036866, "percentage": 2.25, "elapsed_time": "0:16:03", "remaining_time": "11:39:05"} +{"current_steps": 196, "total_steps": 8680, "loss": 1.1158320903778076, "lr": 8.986175115207373e-07, "epoch": 0.04516129032258064, "percentage": 2.26, "elapsed_time": "0:16:09", "remaining_time": "11:39:41"} +{"current_steps": 197, "total_steps": 8680, "loss": 1.4693050384521484, "lr": 9.032258064516129e-07, "epoch": 0.045391705069124426, "percentage": 2.27, "elapsed_time": "0:16:14", "remaining_time": "11:39:13"} +{"current_steps": 198, "total_steps": 8680, "loss": 1.0990574359893799, "lr": 9.078341013824884e-07, "epoch": 0.0456221198156682, "percentage": 2.28, "elapsed_time": "0:16:20", "remaining_time": "11:39:56"} +{"current_steps": 199, "total_steps": 8680, "loss": 1.0259861946105957, "lr": 9.124423963133641e-07, "epoch": 0.04585253456221198, "percentage": 2.29, "elapsed_time": "0:16:24", "remaining_time": "11:39:28"} +{"current_steps": 200, "total_steps": 8680, "loss": 1.2708477973937988, "lr": 9.170506912442397e-07, "epoch": 0.04608294930875576, "percentage": 2.3, "elapsed_time": "0:16:29", "remaining_time": "11:39:03"} +{"current_steps": 201, "total_steps": 8680, "loss": 1.052978754043579, "lr": 9.216589861751152e-07, "epoch": 0.04631336405529954, "percentage": 2.32, "elapsed_time": "0:16:37", "remaining_time": "11:41:16"} +{"current_steps": 202, "total_steps": 8680, "loss": 1.3405938148498535, "lr": 9.262672811059907e-07, "epoch": 0.046543778801843315, "percentage": 2.33, "elapsed_time": "0:16:41", "remaining_time": "11:40:17"} +{"current_steps": 203, "total_steps": 8680, "loss": 0.9464558362960815, "lr": 9.308755760368662e-07, "epoch": 0.0467741935483871, "percentage": 2.34, "elapsed_time": "0:16:47", "remaining_time": "11:41:01"} +{"current_steps": 204, "total_steps": 8680, "loss": 1.3019077777862549, "lr": 9.354838709677418e-07, "epoch": 0.047004608294930875, "percentage": 2.35, "elapsed_time": "0:16:51", "remaining_time": "11:40:13"} +{"current_steps": 205, "total_steps": 8680, "loss": 1.146841049194336, "lr": 9.400921658986175e-07, "epoch": 0.04723502304147465, "percentage": 2.36, "elapsed_time": "0:16:55", "remaining_time": "11:39:45"} +{"current_steps": 206, "total_steps": 8680, "loss": 1.106084942817688, "lr": 9.44700460829493e-07, "epoch": 0.047465437788018434, "percentage": 2.37, "elapsed_time": "0:17:01", "remaining_time": "11:40:05"} +{"current_steps": 207, "total_steps": 8680, "loss": 1.2930629253387451, "lr": 9.493087557603686e-07, "epoch": 0.04769585253456221, "percentage": 2.38, "elapsed_time": "0:17:05", "remaining_time": "11:39:31"} +{"current_steps": 208, "total_steps": 8680, "loss": 1.1637842655181885, "lr": 9.539170506912442e-07, "epoch": 0.047926267281105994, "percentage": 2.4, "elapsed_time": "0:17:10", "remaining_time": "11:39:24"} +{"current_steps": 209, "total_steps": 8680, "loss": 1.1753308773040771, "lr": 9.585253456221198e-07, "epoch": 0.04815668202764977, "percentage": 2.41, "elapsed_time": "0:17:14", "remaining_time": "11:39:00"} +{"current_steps": 210, "total_steps": 8680, "loss": 1.2304046154022217, "lr": 9.631336405529954e-07, "epoch": 0.04838709677419355, "percentage": 2.42, "elapsed_time": "0:17:19", "remaining_time": "11:38:59"} +{"current_steps": 211, "total_steps": 8680, "loss": 1.4326789379119873, "lr": 9.67741935483871e-07, "epoch": 0.04861751152073733, "percentage": 2.43, "elapsed_time": "0:17:24", "remaining_time": "11:38:56"} +{"current_steps": 212, "total_steps": 8680, "loss": 1.0759861469268799, "lr": 9.723502304147466e-07, "epoch": 0.04884792626728111, "percentage": 2.44, "elapsed_time": "0:17:29", "remaining_time": "11:38:57"} +{"current_steps": 213, "total_steps": 8680, "loss": 1.1514811515808105, "lr": 9.76958525345622e-07, "epoch": 0.04907834101382488, "percentage": 2.45, "elapsed_time": "0:17:35", "remaining_time": "11:39:25"} +{"current_steps": 214, "total_steps": 8680, "loss": 1.1618578433990479, "lr": 9.815668202764976e-07, "epoch": 0.04930875576036867, "percentage": 2.47, "elapsed_time": "0:17:40", "remaining_time": "11:39:08"} +{"current_steps": 215, "total_steps": 8680, "loss": 1.0321345329284668, "lr": 9.861751152073732e-07, "epoch": 0.04953917050691244, "percentage": 2.48, "elapsed_time": "0:17:45", "remaining_time": "11:39:06"} +{"current_steps": 216, "total_steps": 8680, "loss": 1.0391438007354736, "lr": 9.907834101382488e-07, "epoch": 0.04976958525345622, "percentage": 2.49, "elapsed_time": "0:17:53", "remaining_time": "11:40:53"} +{"current_steps": 217, "total_steps": 8680, "loss": 1.080418586730957, "lr": 9.953917050691244e-07, "epoch": 0.05, "percentage": 2.5, "elapsed_time": "0:17:58", "remaining_time": "11:41:02"} +{"current_steps": 218, "total_steps": 8680, "loss": 1.2095223665237427, "lr": 1e-06, "epoch": 0.05023041474654378, "percentage": 2.51, "elapsed_time": "0:18:02", "remaining_time": "11:40:30"} +{"current_steps": 219, "total_steps": 8680, "loss": 1.1144485473632812, "lr": 1.0046082949308756e-06, "epoch": 0.050460829493087556, "percentage": 2.52, "elapsed_time": "0:18:07", "remaining_time": "11:40:22"} +{"current_steps": 220, "total_steps": 8680, "loss": 1.2560818195343018, "lr": 1.0092165898617511e-06, "epoch": 0.05069124423963134, "percentage": 2.53, "elapsed_time": "0:18:11", "remaining_time": "11:39:40"} +{"current_steps": 221, "total_steps": 8680, "loss": 1.1043426990509033, "lr": 1.0138248847926267e-06, "epoch": 0.050921658986175115, "percentage": 2.55, "elapsed_time": "0:18:16", "remaining_time": "11:39:41"} +{"current_steps": 222, "total_steps": 8680, "loss": 1.0725831985473633, "lr": 1.0184331797235021e-06, "epoch": 0.05115207373271889, "percentage": 2.56, "elapsed_time": "0:18:20", "remaining_time": "11:39:00"} +{"current_steps": 223, "total_steps": 8680, "loss": 0.9764004349708557, "lr": 1.023041474654378e-06, "epoch": 0.051382488479262675, "percentage": 2.57, "elapsed_time": "0:18:25", "remaining_time": "11:39:00"} +{"current_steps": 224, "total_steps": 8680, "loss": 1.2172776460647583, "lr": 1.0276497695852535e-06, "epoch": 0.05161290322580645, "percentage": 2.58, "elapsed_time": "0:18:31", "remaining_time": "11:39:22"} +{"current_steps": 225, "total_steps": 8680, "loss": 1.1065070629119873, "lr": 1.032258064516129e-06, "epoch": 0.05184331797235023, "percentage": 2.59, "elapsed_time": "0:18:35", "remaining_time": "11:38:43"} +{"current_steps": 226, "total_steps": 8680, "loss": 1.0840628147125244, "lr": 1.0368663594470047e-06, "epoch": 0.05207373271889401, "percentage": 2.6, "elapsed_time": "0:18:39", "remaining_time": "11:38:15"} +{"current_steps": 227, "total_steps": 8680, "loss": 1.109276294708252, "lr": 1.04147465437788e-06, "epoch": 0.05230414746543779, "percentage": 2.62, "elapsed_time": "0:18:45", "remaining_time": "11:38:28"} +{"current_steps": 228, "total_steps": 8680, "loss": 1.186352252960205, "lr": 1.0460829493087557e-06, "epoch": 0.052534562211981564, "percentage": 2.63, "elapsed_time": "0:18:51", "remaining_time": "11:39:06"} +{"current_steps": 229, "total_steps": 8680, "loss": 1.1605256795883179, "lr": 1.050691244239631e-06, "epoch": 0.05276497695852535, "percentage": 2.64, "elapsed_time": "0:18:55", "remaining_time": "11:38:42"} +{"current_steps": 230, "total_steps": 8680, "loss": 1.0269646644592285, "lr": 1.0552995391705069e-06, "epoch": 0.052995391705069124, "percentage": 2.65, "elapsed_time": "0:19:02", "remaining_time": "11:39:45"} +{"current_steps": 231, "total_steps": 8680, "loss": 0.9595874547958374, "lr": 1.0599078341013825e-06, "epoch": 0.0532258064516129, "percentage": 2.66, "elapsed_time": "0:19:07", "remaining_time": "11:39:44"} +{"current_steps": 232, "total_steps": 8680, "loss": 1.1606154441833496, "lr": 1.0645161290322579e-06, "epoch": 0.053456221198156684, "percentage": 2.67, "elapsed_time": "0:19:11", "remaining_time": "11:39:04"} +{"current_steps": 233, "total_steps": 8680, "loss": 0.9920428991317749, "lr": 1.0691244239631337e-06, "epoch": 0.05368663594470046, "percentage": 2.68, "elapsed_time": "0:19:18", "remaining_time": "11:40:09"} +{"current_steps": 234, "total_steps": 8680, "loss": 1.2124650478363037, "lr": 1.073732718894009e-06, "epoch": 0.05391705069124424, "percentage": 2.7, "elapsed_time": "0:19:23", "remaining_time": "11:39:48"} +{"current_steps": 235, "total_steps": 8680, "loss": 1.2237420082092285, "lr": 1.0783410138248847e-06, "epoch": 0.05414746543778802, "percentage": 2.71, "elapsed_time": "0:19:28", "remaining_time": "11:39:45"} +{"current_steps": 236, "total_steps": 8680, "loss": 1.1484715938568115, "lr": 1.0829493087557605e-06, "epoch": 0.054377880184331796, "percentage": 2.72, "elapsed_time": "0:19:33", "remaining_time": "11:39:56"} +{"current_steps": 237, "total_steps": 8680, "loss": 1.2143291234970093, "lr": 1.0875576036866358e-06, "epoch": 0.05460829493087557, "percentage": 2.73, "elapsed_time": "0:19:38", "remaining_time": "11:39:27"} +{"current_steps": 238, "total_steps": 8680, "loss": 1.1995420455932617, "lr": 1.0921658986175114e-06, "epoch": 0.054838709677419356, "percentage": 2.74, "elapsed_time": "0:19:42", "remaining_time": "11:38:51"} +{"current_steps": 239, "total_steps": 8680, "loss": 1.2577292919158936, "lr": 1.096774193548387e-06, "epoch": 0.05506912442396313, "percentage": 2.75, "elapsed_time": "0:19:47", "remaining_time": "11:38:44"} +{"current_steps": 240, "total_steps": 8680, "loss": 1.2170629501342773, "lr": 1.1013824884792626e-06, "epoch": 0.055299539170506916, "percentage": 2.76, "elapsed_time": "0:19:52", "remaining_time": "11:38:51"} +{"current_steps": 241, "total_steps": 8680, "loss": 0.8318669199943542, "lr": 1.1059907834101382e-06, "epoch": 0.05552995391705069, "percentage": 2.78, "elapsed_time": "0:19:58", "remaining_time": "11:39:32"} +{"current_steps": 242, "total_steps": 8680, "loss": 1.0760166645050049, "lr": 1.1105990783410138e-06, "epoch": 0.05576036866359447, "percentage": 2.79, "elapsed_time": "0:20:05", "remaining_time": "11:40:20"} +{"current_steps": 243, "total_steps": 8680, "loss": 1.2437031269073486, "lr": 1.1152073732718894e-06, "epoch": 0.05599078341013825, "percentage": 2.8, "elapsed_time": "0:20:10", "remaining_time": "11:40:25"} +{"current_steps": 244, "total_steps": 8680, "loss": 1.1680852174758911, "lr": 1.1198156682027648e-06, "epoch": 0.05622119815668203, "percentage": 2.81, "elapsed_time": "0:20:14", "remaining_time": "11:40:02"} +{"current_steps": 245, "total_steps": 8680, "loss": 1.051478385925293, "lr": 1.1244239631336406e-06, "epoch": 0.056451612903225805, "percentage": 2.82, "elapsed_time": "0:20:19", "remaining_time": "11:39:54"} +{"current_steps": 246, "total_steps": 8680, "loss": 1.1433100700378418, "lr": 1.1290322580645162e-06, "epoch": 0.05668202764976959, "percentage": 2.83, "elapsed_time": "0:20:25", "remaining_time": "11:40:12"} +{"current_steps": 247, "total_steps": 8680, "loss": 0.9521546363830566, "lr": 1.1336405529953916e-06, "epoch": 0.056912442396313365, "percentage": 2.85, "elapsed_time": "0:20:29", "remaining_time": "11:39:44"} +{"current_steps": 248, "total_steps": 8680, "loss": 1.226189136505127, "lr": 1.1382488479262674e-06, "epoch": 0.05714285714285714, "percentage": 2.86, "elapsed_time": "0:20:34", "remaining_time": "11:39:35"} +{"current_steps": 249, "total_steps": 8680, "loss": 1.108027696609497, "lr": 1.1428571428571428e-06, "epoch": 0.057373271889400924, "percentage": 2.87, "elapsed_time": "0:20:39", "remaining_time": "11:39:12"} +{"current_steps": 250, "total_steps": 8680, "loss": 1.042288064956665, "lr": 1.1474654377880184e-06, "epoch": 0.0576036866359447, "percentage": 2.88, "elapsed_time": "0:20:44", "remaining_time": "11:39:19"} +{"current_steps": 251, "total_steps": 8680, "loss": 1.193603754043579, "lr": 1.1520737327188938e-06, "epoch": 0.05783410138248848, "percentage": 2.89, "elapsed_time": "0:20:48", "remaining_time": "11:38:46"} +{"current_steps": 252, "total_steps": 8680, "loss": 1.193584680557251, "lr": 1.1566820276497696e-06, "epoch": 0.05806451612903226, "percentage": 2.9, "elapsed_time": "0:20:53", "remaining_time": "11:38:31"} +{"current_steps": 253, "total_steps": 8680, "loss": 1.2318934202194214, "lr": 1.1612903225806452e-06, "epoch": 0.05829493087557604, "percentage": 2.91, "elapsed_time": "0:20:58", "remaining_time": "11:38:46"} +{"current_steps": 254, "total_steps": 8680, "loss": 1.1626521348953247, "lr": 1.1658986175115205e-06, "epoch": 0.05852534562211981, "percentage": 2.93, "elapsed_time": "0:21:02", "remaining_time": "11:38:12"} +{"current_steps": 255, "total_steps": 8680, "loss": 1.2402286529541016, "lr": 1.1705069124423963e-06, "epoch": 0.0587557603686636, "percentage": 2.94, "elapsed_time": "0:21:07", "remaining_time": "11:37:46"} +{"current_steps": 256, "total_steps": 8680, "loss": 1.190323829650879, "lr": 1.1751152073732717e-06, "epoch": 0.05898617511520737, "percentage": 2.95, "elapsed_time": "0:21:12", "remaining_time": "11:37:53"} +{"current_steps": 257, "total_steps": 8680, "loss": 1.121636986732483, "lr": 1.1797235023041473e-06, "epoch": 0.05921658986175115, "percentage": 2.96, "elapsed_time": "0:21:18", "remaining_time": "11:38:33"} +{"current_steps": 258, "total_steps": 8680, "loss": 1.099304437637329, "lr": 1.1843317972350231e-06, "epoch": 0.05944700460829493, "percentage": 2.97, "elapsed_time": "0:21:22", "remaining_time": "11:37:52"} +{"current_steps": 259, "total_steps": 8680, "loss": 1.1730690002441406, "lr": 1.1889400921658985e-06, "epoch": 0.05967741935483871, "percentage": 2.98, "elapsed_time": "0:21:27", "remaining_time": "11:37:54"} +{"current_steps": 260, "total_steps": 8680, "loss": 1.1450574398040771, "lr": 1.1935483870967741e-06, "epoch": 0.059907834101382486, "percentage": 3.0, "elapsed_time": "0:21:31", "remaining_time": "11:37:19"} +{"current_steps": 261, "total_steps": 8680, "loss": 1.1435421705245972, "lr": 1.1981566820276497e-06, "epoch": 0.06013824884792627, "percentage": 3.01, "elapsed_time": "0:21:36", "remaining_time": "11:36:48"} +{"current_steps": 262, "total_steps": 8680, "loss": 1.2153000831604004, "lr": 1.2027649769585253e-06, "epoch": 0.060368663594470046, "percentage": 3.02, "elapsed_time": "0:21:40", "remaining_time": "11:36:38"} +{"current_steps": 263, "total_steps": 8680, "loss": 1.0380406379699707, "lr": 1.207373271889401e-06, "epoch": 0.06059907834101382, "percentage": 3.03, "elapsed_time": "0:21:46", "remaining_time": "11:36:48"} +{"current_steps": 264, "total_steps": 8680, "loss": 1.1639207601547241, "lr": 1.2119815668202765e-06, "epoch": 0.060829493087557605, "percentage": 3.04, "elapsed_time": "0:21:51", "remaining_time": "11:36:37"} +{"current_steps": 265, "total_steps": 8680, "loss": 1.1862819194793701, "lr": 1.216589861751152e-06, "epoch": 0.06105990783410138, "percentage": 3.05, "elapsed_time": "0:21:55", "remaining_time": "11:36:02"} +{"current_steps": 266, "total_steps": 8680, "loss": 1.2122020721435547, "lr": 1.2211981566820275e-06, "epoch": 0.06129032258064516, "percentage": 3.06, "elapsed_time": "0:21:59", "remaining_time": "11:35:53"} +{"current_steps": 267, "total_steps": 8680, "loss": 1.1201646327972412, "lr": 1.2258064516129033e-06, "epoch": 0.06152073732718894, "percentage": 3.08, "elapsed_time": "0:22:06", "remaining_time": "11:36:31"} +{"current_steps": 268, "total_steps": 8680, "loss": 0.9520926475524902, "lr": 1.2304147465437787e-06, "epoch": 0.06175115207373272, "percentage": 3.09, "elapsed_time": "0:22:11", "remaining_time": "11:36:43"} +{"current_steps": 269, "total_steps": 8680, "loss": 1.0426976680755615, "lr": 1.2350230414746543e-06, "epoch": 0.061981566820276494, "percentage": 3.1, "elapsed_time": "0:22:17", "remaining_time": "11:36:54"} +{"current_steps": 270, "total_steps": 8680, "loss": 0.934493899345398, "lr": 1.23963133640553e-06, "epoch": 0.06221198156682028, "percentage": 3.11, "elapsed_time": "0:22:21", "remaining_time": "11:36:24"} +{"current_steps": 271, "total_steps": 8680, "loss": 1.23980712890625, "lr": 1.2442396313364054e-06, "epoch": 0.062442396313364054, "percentage": 3.12, "elapsed_time": "0:22:26", "remaining_time": "11:36:25"} +{"current_steps": 272, "total_steps": 8680, "loss": 1.094742774963379, "lr": 1.248847926267281e-06, "epoch": 0.06267281105990784, "percentage": 3.13, "elapsed_time": "0:22:31", "remaining_time": "11:36:19"} +{"current_steps": 273, "total_steps": 8680, "loss": 1.0271551609039307, "lr": 1.2534562211981564e-06, "epoch": 0.06290322580645161, "percentage": 3.15, "elapsed_time": "0:22:35", "remaining_time": "11:35:48"} +{"current_steps": 274, "total_steps": 8680, "loss": 1.159210205078125, "lr": 1.2580645161290322e-06, "epoch": 0.06313364055299539, "percentage": 3.16, "elapsed_time": "0:22:39", "remaining_time": "11:34:56"} +{"current_steps": 275, "total_steps": 8680, "loss": 1.127510666847229, "lr": 1.2626728110599078e-06, "epoch": 0.06336405529953917, "percentage": 3.17, "elapsed_time": "0:22:43", "remaining_time": "11:34:19"} +{"current_steps": 276, "total_steps": 8680, "loss": 1.1371517181396484, "lr": 1.2672811059907832e-06, "epoch": 0.06359447004608294, "percentage": 3.18, "elapsed_time": "0:22:47", "remaining_time": "11:33:57"} +{"current_steps": 277, "total_steps": 8680, "loss": 1.0296730995178223, "lr": 1.271889400921659e-06, "epoch": 0.06382488479262673, "percentage": 3.19, "elapsed_time": "0:22:52", "remaining_time": "11:33:53"} +{"current_steps": 278, "total_steps": 8680, "loss": 1.036975383758545, "lr": 1.2764976958525344e-06, "epoch": 0.06405529953917051, "percentage": 3.2, "elapsed_time": "0:22:58", "remaining_time": "11:34:34"} +{"current_steps": 279, "total_steps": 8680, "loss": 1.2120393514633179, "lr": 1.28110599078341e-06, "epoch": 0.06428571428571428, "percentage": 3.21, "elapsed_time": "0:23:03", "remaining_time": "11:34:19"} +{"current_steps": 280, "total_steps": 8680, "loss": 1.0084068775177002, "lr": 1.2857142857142858e-06, "epoch": 0.06451612903225806, "percentage": 3.23, "elapsed_time": "0:23:07", "remaining_time": "11:33:56"} +{"current_steps": 281, "total_steps": 8680, "loss": 1.2005786895751953, "lr": 1.2903225806451612e-06, "epoch": 0.06474654377880185, "percentage": 3.24, "elapsed_time": "0:23:12", "remaining_time": "11:33:35"} +{"current_steps": 282, "total_steps": 8680, "loss": 1.1506783962249756, "lr": 1.2949308755760368e-06, "epoch": 0.06497695852534562, "percentage": 3.25, "elapsed_time": "0:23:16", "remaining_time": "11:33:10"} +{"current_steps": 283, "total_steps": 8680, "loss": 1.1219947338104248, "lr": 1.2995391705069124e-06, "epoch": 0.0652073732718894, "percentage": 3.26, "elapsed_time": "0:23:20", "remaining_time": "11:32:31"} +{"current_steps": 284, "total_steps": 8680, "loss": 1.2041170597076416, "lr": 1.304147465437788e-06, "epoch": 0.06543778801843318, "percentage": 3.27, "elapsed_time": "0:23:24", "remaining_time": "11:32:11"} +{"current_steps": 285, "total_steps": 8680, "loss": 1.0903037786483765, "lr": 1.3087557603686636e-06, "epoch": 0.06566820276497695, "percentage": 3.28, "elapsed_time": "0:23:30", "remaining_time": "11:32:23"} +{"current_steps": 286, "total_steps": 8680, "loss": 1.2140064239501953, "lr": 1.3133640552995392e-06, "epoch": 0.06589861751152074, "percentage": 3.29, "elapsed_time": "0:23:33", "remaining_time": "11:31:39"} +{"current_steps": 287, "total_steps": 8680, "loss": 1.3026092052459717, "lr": 1.3179723502304148e-06, "epoch": 0.06612903225806452, "percentage": 3.31, "elapsed_time": "0:23:38", "remaining_time": "11:31:08"} +{"current_steps": 288, "total_steps": 8680, "loss": 1.0937910079956055, "lr": 1.3225806451612901e-06, "epoch": 0.0663594470046083, "percentage": 3.32, "elapsed_time": "0:23:42", "remaining_time": "11:31:02"} +{"current_steps": 289, "total_steps": 8680, "loss": 1.1768109798431396, "lr": 1.327188940092166e-06, "epoch": 0.06658986175115207, "percentage": 3.33, "elapsed_time": "0:23:46", "remaining_time": "11:30:12"} +{"current_steps": 290, "total_steps": 8680, "loss": 1.0796440839767456, "lr": 1.3317972350230413e-06, "epoch": 0.06682027649769585, "percentage": 3.34, "elapsed_time": "0:23:50", "remaining_time": "11:29:35"} +{"current_steps": 291, "total_steps": 8680, "loss": 0.9972932934761047, "lr": 1.336405529953917e-06, "epoch": 0.06705069124423964, "percentage": 3.35, "elapsed_time": "0:23:55", "remaining_time": "11:29:51"} +{"current_steps": 292, "total_steps": 8680, "loss": 0.9860717058181763, "lr": 1.3410138248847927e-06, "epoch": 0.06728110599078341, "percentage": 3.36, "elapsed_time": "0:24:00", "remaining_time": "11:29:40"} +{"current_steps": 293, "total_steps": 8680, "loss": 1.045119047164917, "lr": 1.3456221198156681e-06, "epoch": 0.06751152073732719, "percentage": 3.38, "elapsed_time": "0:24:06", "remaining_time": "11:30:17"} +{"current_steps": 294, "total_steps": 8680, "loss": 1.2740920782089233, "lr": 1.3502304147465437e-06, "epoch": 0.06774193548387097, "percentage": 3.39, "elapsed_time": "0:24:11", "remaining_time": "11:30:04"} +{"current_steps": 295, "total_steps": 8680, "loss": 1.0860114097595215, "lr": 1.354838709677419e-06, "epoch": 0.06797235023041474, "percentage": 3.4, "elapsed_time": "0:24:15", "remaining_time": "11:29:31"} +{"current_steps": 296, "total_steps": 8680, "loss": 1.111539602279663, "lr": 1.359447004608295e-06, "epoch": 0.06820276497695853, "percentage": 3.41, "elapsed_time": "0:24:20", "remaining_time": "11:29:23"} +{"current_steps": 297, "total_steps": 8680, "loss": 1.1628870964050293, "lr": 1.3640552995391705e-06, "epoch": 0.06843317972350231, "percentage": 3.42, "elapsed_time": "0:24:24", "remaining_time": "11:28:58"} +{"current_steps": 298, "total_steps": 8680, "loss": 1.042768955230713, "lr": 1.3686635944700459e-06, "epoch": 0.06866359447004608, "percentage": 3.43, "elapsed_time": "0:24:28", "remaining_time": "11:28:26"} +{"current_steps": 299, "total_steps": 8680, "loss": 0.9970331192016602, "lr": 1.3732718894009217e-06, "epoch": 0.06889400921658986, "percentage": 3.44, "elapsed_time": "0:24:33", "remaining_time": "11:28:13"} +{"current_steps": 300, "total_steps": 8680, "loss": 1.1270179748535156, "lr": 1.377880184331797e-06, "epoch": 0.06912442396313365, "percentage": 3.46, "elapsed_time": "0:24:38", "remaining_time": "11:28:25"} +{"current_steps": 301, "total_steps": 8680, "loss": 0.9505646824836731, "lr": 1.3824884792626727e-06, "epoch": 0.06935483870967742, "percentage": 3.47, "elapsed_time": "0:24:44", "remaining_time": "11:28:51"} +{"current_steps": 302, "total_steps": 8680, "loss": 1.0997588634490967, "lr": 1.3870967741935485e-06, "epoch": 0.0695852534562212, "percentage": 3.48, "elapsed_time": "0:24:49", "remaining_time": "11:28:31"} +{"current_steps": 303, "total_steps": 8680, "loss": 1.1512106657028198, "lr": 1.3917050691244239e-06, "epoch": 0.06981566820276498, "percentage": 3.49, "elapsed_time": "0:24:53", "remaining_time": "11:27:57"} +{"current_steps": 304, "total_steps": 8680, "loss": 1.1340759992599487, "lr": 1.3963133640552995e-06, "epoch": 0.07004608294930875, "percentage": 3.5, "elapsed_time": "0:24:57", "remaining_time": "11:27:40"} +{"current_steps": 305, "total_steps": 8680, "loss": 1.187511682510376, "lr": 1.400921658986175e-06, "epoch": 0.07027649769585254, "percentage": 3.51, "elapsed_time": "0:25:01", "remaining_time": "11:27:18"} +{"current_steps": 306, "total_steps": 8680, "loss": 1.0711122751235962, "lr": 1.4055299539170507e-06, "epoch": 0.07050691244239632, "percentage": 3.53, "elapsed_time": "0:25:06", "remaining_time": "11:27:13"} +{"current_steps": 307, "total_steps": 8680, "loss": 0.9636896848678589, "lr": 1.410138248847926e-06, "epoch": 0.07073732718894009, "percentage": 3.54, "elapsed_time": "0:25:11", "remaining_time": "11:27:17"} +{"current_steps": 308, "total_steps": 8680, "loss": 1.0506833791732788, "lr": 1.4147465437788018e-06, "epoch": 0.07096774193548387, "percentage": 3.55, "elapsed_time": "0:25:16", "remaining_time": "11:27:07"} +{"current_steps": 309, "total_steps": 8680, "loss": 1.1076349020004272, "lr": 1.4193548387096774e-06, "epoch": 0.07119815668202766, "percentage": 3.56, "elapsed_time": "0:25:22", "remaining_time": "11:27:25"} +{"current_steps": 310, "total_steps": 8680, "loss": 1.0878944396972656, "lr": 1.4239631336405528e-06, "epoch": 0.07142857142857142, "percentage": 3.57, "elapsed_time": "0:25:27", "remaining_time": "11:27:14"} +{"current_steps": 311, "total_steps": 8680, "loss": 1.0808600187301636, "lr": 1.4285714285714286e-06, "epoch": 0.07165898617511521, "percentage": 3.58, "elapsed_time": "0:25:33", "remaining_time": "11:27:44"} +{"current_steps": 312, "total_steps": 8680, "loss": 1.2117588520050049, "lr": 1.433179723502304e-06, "epoch": 0.07188940092165899, "percentage": 3.59, "elapsed_time": "0:25:37", "remaining_time": "11:27:19"} +{"current_steps": 313, "total_steps": 8680, "loss": 1.0899101495742798, "lr": 1.4377880184331796e-06, "epoch": 0.07211981566820276, "percentage": 3.61, "elapsed_time": "0:25:42", "remaining_time": "11:27:21"} +{"current_steps": 314, "total_steps": 8680, "loss": 0.9540426135063171, "lr": 1.4423963133640554e-06, "epoch": 0.07235023041474654, "percentage": 3.62, "elapsed_time": "0:25:48", "remaining_time": "11:27:31"} +{"current_steps": 315, "total_steps": 8680, "loss": 1.1170068979263306, "lr": 1.4470046082949308e-06, "epoch": 0.07258064516129033, "percentage": 3.63, "elapsed_time": "0:25:52", "remaining_time": "11:27:11"} +{"current_steps": 316, "total_steps": 8680, "loss": 1.2030160427093506, "lr": 1.4516129032258064e-06, "epoch": 0.0728110599078341, "percentage": 3.64, "elapsed_time": "0:25:57", "remaining_time": "11:26:52"} +{"current_steps": 317, "total_steps": 8680, "loss": 1.1599903106689453, "lr": 1.4562211981566818e-06, "epoch": 0.07304147465437788, "percentage": 3.65, "elapsed_time": "0:26:02", "remaining_time": "11:27:12"} +{"current_steps": 318, "total_steps": 8680, "loss": 1.0777950286865234, "lr": 1.4608294930875576e-06, "epoch": 0.07327188940092166, "percentage": 3.66, "elapsed_time": "0:26:07", "remaining_time": "11:27:05"} +{"current_steps": 319, "total_steps": 8680, "loss": 1.1250553131103516, "lr": 1.4654377880184332e-06, "epoch": 0.07350230414746543, "percentage": 3.68, "elapsed_time": "0:26:12", "remaining_time": "11:26:53"} +{"current_steps": 320, "total_steps": 8680, "loss": 1.10176420211792, "lr": 1.4700460829493086e-06, "epoch": 0.07373271889400922, "percentage": 3.69, "elapsed_time": "0:26:16", "remaining_time": "11:26:33"} +{"current_steps": 321, "total_steps": 8680, "loss": 1.111799716949463, "lr": 1.4746543778801844e-06, "epoch": 0.073963133640553, "percentage": 3.7, "elapsed_time": "0:26:21", "remaining_time": "11:26:14"} +{"current_steps": 322, "total_steps": 8680, "loss": 1.1555054187774658, "lr": 1.4792626728110598e-06, "epoch": 0.07419354838709677, "percentage": 3.71, "elapsed_time": "0:26:27", "remaining_time": "11:26:55"} +{"current_steps": 323, "total_steps": 8680, "loss": 1.0977535247802734, "lr": 1.4838709677419353e-06, "epoch": 0.07442396313364055, "percentage": 3.72, "elapsed_time": "0:26:32", "remaining_time": "11:26:30"} +{"current_steps": 324, "total_steps": 8680, "loss": 0.9058012962341309, "lr": 1.4884792626728112e-06, "epoch": 0.07465437788018434, "percentage": 3.73, "elapsed_time": "0:26:37", "remaining_time": "11:26:27"} +{"current_steps": 325, "total_steps": 8680, "loss": 1.1147960424423218, "lr": 1.4930875576036865e-06, "epoch": 0.0748847926267281, "percentage": 3.74, "elapsed_time": "0:26:41", "remaining_time": "11:26:14"} +{"current_steps": 326, "total_steps": 8680, "loss": 1.1315648555755615, "lr": 1.4976958525345621e-06, "epoch": 0.07511520737327189, "percentage": 3.76, "elapsed_time": "0:26:47", "remaining_time": "11:26:39"} +{"current_steps": 327, "total_steps": 8680, "loss": 0.9134868383407593, "lr": 1.5023041474654377e-06, "epoch": 0.07534562211981567, "percentage": 3.77, "elapsed_time": "0:26:52", "remaining_time": "11:26:35"} +{"current_steps": 328, "total_steps": 8680, "loss": 1.017493724822998, "lr": 1.5069124423963133e-06, "epoch": 0.07557603686635944, "percentage": 3.78, "elapsed_time": "0:26:57", "remaining_time": "11:26:25"} +{"current_steps": 329, "total_steps": 8680, "loss": 1.220658540725708, "lr": 1.5115207373271887e-06, "epoch": 0.07580645161290323, "percentage": 3.79, "elapsed_time": "0:27:00", "remaining_time": "11:25:44"} +{"current_steps": 330, "total_steps": 8680, "loss": 1.2254307270050049, "lr": 1.5161290322580645e-06, "epoch": 0.07603686635944701, "percentage": 3.8, "elapsed_time": "0:27:04", "remaining_time": "11:25:00"} +{"current_steps": 331, "total_steps": 8680, "loss": 1.2653989791870117, "lr": 1.5207373271889401e-06, "epoch": 0.07626728110599078, "percentage": 3.81, "elapsed_time": "0:27:09", "remaining_time": "11:24:51"} +{"current_steps": 332, "total_steps": 8680, "loss": 1.199981451034546, "lr": 1.5253456221198155e-06, "epoch": 0.07649769585253456, "percentage": 3.82, "elapsed_time": "0:27:14", "remaining_time": "11:25:00"} +{"current_steps": 333, "total_steps": 8680, "loss": 1.1141018867492676, "lr": 1.5299539170506913e-06, "epoch": 0.07672811059907834, "percentage": 3.84, "elapsed_time": "0:27:20", "remaining_time": "11:25:21"} +{"current_steps": 334, "total_steps": 8680, "loss": 1.2139991521835327, "lr": 1.5345622119815667e-06, "epoch": 0.07695852534562211, "percentage": 3.85, "elapsed_time": "0:27:24", "remaining_time": "11:25:03"} +{"current_steps": 335, "total_steps": 8680, "loss": 1.0647475719451904, "lr": 1.5391705069124423e-06, "epoch": 0.0771889400921659, "percentage": 3.86, "elapsed_time": "0:27:28", "remaining_time": "11:24:29"} +{"current_steps": 336, "total_steps": 8680, "loss": 0.9740357398986816, "lr": 1.543778801843318e-06, "epoch": 0.07741935483870968, "percentage": 3.87, "elapsed_time": "0:27:33", "remaining_time": "11:24:28"} +{"current_steps": 337, "total_steps": 8680, "loss": 0.877153754234314, "lr": 1.5483870967741935e-06, "epoch": 0.07764976958525345, "percentage": 3.88, "elapsed_time": "0:27:39", "remaining_time": "11:24:34"} +{"current_steps": 338, "total_steps": 8680, "loss": 1.2472789287567139, "lr": 1.552995391705069e-06, "epoch": 0.07788018433179723, "percentage": 3.89, "elapsed_time": "0:27:43", "remaining_time": "11:24:28"} +{"current_steps": 339, "total_steps": 8680, "loss": 1.1873078346252441, "lr": 1.5576036866359445e-06, "epoch": 0.07811059907834102, "percentage": 3.91, "elapsed_time": "0:27:48", "remaining_time": "11:24:11"} +{"current_steps": 340, "total_steps": 8680, "loss": 1.0728449821472168, "lr": 1.5622119815668203e-06, "epoch": 0.07834101382488479, "percentage": 3.92, "elapsed_time": "0:27:52", "remaining_time": "11:23:41"} +{"current_steps": 341, "total_steps": 8680, "loss": 0.9974904656410217, "lr": 1.5668202764976959e-06, "epoch": 0.07857142857142857, "percentage": 3.93, "elapsed_time": "0:27:56", "remaining_time": "11:23:23"} +{"current_steps": 342, "total_steps": 8680, "loss": 1.0591039657592773, "lr": 1.5714285714285712e-06, "epoch": 0.07880184331797235, "percentage": 3.94, "elapsed_time": "0:28:01", "remaining_time": "11:23:25"} +{"current_steps": 343, "total_steps": 8680, "loss": 1.04117751121521, "lr": 1.576036866359447e-06, "epoch": 0.07903225806451612, "percentage": 3.95, "elapsed_time": "0:28:06", "remaining_time": "11:23:06"} +{"current_steps": 344, "total_steps": 8680, "loss": 0.934100866317749, "lr": 1.5806451612903224e-06, "epoch": 0.0792626728110599, "percentage": 3.96, "elapsed_time": "0:28:12", "remaining_time": "11:23:32"} +{"current_steps": 345, "total_steps": 8680, "loss": 1.0333890914916992, "lr": 1.585253456221198e-06, "epoch": 0.07949308755760369, "percentage": 3.97, "elapsed_time": "0:28:17", "remaining_time": "11:23:31"} +{"current_steps": 346, "total_steps": 8680, "loss": 1.1762741804122925, "lr": 1.5898617511520738e-06, "epoch": 0.07972350230414746, "percentage": 3.99, "elapsed_time": "0:28:23", "remaining_time": "11:23:48"} +{"current_steps": 347, "total_steps": 8680, "loss": 1.081842303276062, "lr": 1.5944700460829492e-06, "epoch": 0.07995391705069124, "percentage": 4.0, "elapsed_time": "0:28:28", "remaining_time": "11:23:49"} +{"current_steps": 348, "total_steps": 8680, "loss": 1.140712022781372, "lr": 1.5990783410138248e-06, "epoch": 0.08018433179723503, "percentage": 4.01, "elapsed_time": "0:28:32", "remaining_time": "11:23:25"} +{"current_steps": 349, "total_steps": 8680, "loss": 1.0155198574066162, "lr": 1.6036866359447004e-06, "epoch": 0.0804147465437788, "percentage": 4.02, "elapsed_time": "0:28:38", "remaining_time": "11:23:34"} +{"current_steps": 350, "total_steps": 8680, "loss": 1.0673280954360962, "lr": 1.608294930875576e-06, "epoch": 0.08064516129032258, "percentage": 4.03, "elapsed_time": "0:28:42", "remaining_time": "11:23:10"} +{"current_steps": 351, "total_steps": 8680, "loss": 1.1061692237854004, "lr": 1.6129032258064514e-06, "epoch": 0.08087557603686636, "percentage": 4.04, "elapsed_time": "0:28:46", "remaining_time": "11:22:47"} +{"current_steps": 352, "total_steps": 8680, "loss": 1.0120354890823364, "lr": 1.6175115207373272e-06, "epoch": 0.08110599078341015, "percentage": 4.06, "elapsed_time": "0:28:50", "remaining_time": "11:22:24"} +{"current_steps": 353, "total_steps": 8680, "loss": 1.1260986328125, "lr": 1.6221198156682028e-06, "epoch": 0.08133640552995391, "percentage": 4.07, "elapsed_time": "0:28:55", "remaining_time": "11:22:08"} +{"current_steps": 354, "total_steps": 8680, "loss": 1.0376214981079102, "lr": 1.6267281105990782e-06, "epoch": 0.0815668202764977, "percentage": 4.08, "elapsed_time": "0:28:59", "remaining_time": "11:21:47"} +{"current_steps": 355, "total_steps": 8680, "loss": 1.0802130699157715, "lr": 1.631336405529954e-06, "epoch": 0.08179723502304148, "percentage": 4.09, "elapsed_time": "0:29:03", "remaining_time": "11:21:22"} +{"current_steps": 356, "total_steps": 8680, "loss": 1.217378854751587, "lr": 1.6359447004608294e-06, "epoch": 0.08202764976958525, "percentage": 4.1, "elapsed_time": "0:29:07", "remaining_time": "11:20:53"} +{"current_steps": 357, "total_steps": 8680, "loss": 0.9107617139816284, "lr": 1.640552995391705e-06, "epoch": 0.08225806451612903, "percentage": 4.11, "elapsed_time": "0:29:13", "remaining_time": "11:21:25"} +{"current_steps": 358, "total_steps": 8680, "loss": 1.089385986328125, "lr": 1.6451612903225808e-06, "epoch": 0.08248847926267282, "percentage": 4.12, "elapsed_time": "0:29:18", "remaining_time": "11:21:21"} +{"current_steps": 359, "total_steps": 8680, "loss": 1.1420392990112305, "lr": 1.6497695852534561e-06, "epoch": 0.08271889400921659, "percentage": 4.14, "elapsed_time": "0:29:22", "remaining_time": "11:20:53"} +{"current_steps": 360, "total_steps": 8680, "loss": 0.9308648705482483, "lr": 1.6543778801843317e-06, "epoch": 0.08294930875576037, "percentage": 4.15, "elapsed_time": "0:29:29", "remaining_time": "11:21:43"} +{"current_steps": 361, "total_steps": 8680, "loss": 0.9463413953781128, "lr": 1.6589861751152071e-06, "epoch": 0.08317972350230415, "percentage": 4.16, "elapsed_time": "0:29:34", "remaining_time": "11:21:37"} +{"current_steps": 362, "total_steps": 8680, "loss": 1.0364834070205688, "lr": 1.663594470046083e-06, "epoch": 0.08341013824884792, "percentage": 4.17, "elapsed_time": "0:29:40", "remaining_time": "11:22:01"} +{"current_steps": 363, "total_steps": 8680, "loss": 0.9992797374725342, "lr": 1.6682027649769585e-06, "epoch": 0.0836405529953917, "percentage": 4.18, "elapsed_time": "0:29:45", "remaining_time": "11:21:46"} +{"current_steps": 364, "total_steps": 8680, "loss": 0.9862687587738037, "lr": 1.672811059907834e-06, "epoch": 0.08387096774193549, "percentage": 4.19, "elapsed_time": "0:29:49", "remaining_time": "11:21:31"} +{"current_steps": 365, "total_steps": 8680, "loss": 1.0882744789123535, "lr": 1.6774193548387097e-06, "epoch": 0.08410138248847926, "percentage": 4.21, "elapsed_time": "0:29:54", "remaining_time": "11:21:23"} +{"current_steps": 366, "total_steps": 8680, "loss": 0.9217149615287781, "lr": 1.682027649769585e-06, "epoch": 0.08433179723502304, "percentage": 4.22, "elapsed_time": "0:30:00", "remaining_time": "11:21:43"} +{"current_steps": 367, "total_steps": 8680, "loss": 1.0384632349014282, "lr": 1.6866359447004607e-06, "epoch": 0.08456221198156683, "percentage": 4.23, "elapsed_time": "0:30:05", "remaining_time": "11:21:46"} +{"current_steps": 368, "total_steps": 8680, "loss": 0.8760565519332886, "lr": 1.6912442396313363e-06, "epoch": 0.0847926267281106, "percentage": 4.24, "elapsed_time": "0:30:11", "remaining_time": "11:21:55"} +{"current_steps": 369, "total_steps": 8680, "loss": 0.9868614077568054, "lr": 1.6958525345622119e-06, "epoch": 0.08502304147465438, "percentage": 4.25, "elapsed_time": "0:30:16", "remaining_time": "11:21:46"} +{"current_steps": 370, "total_steps": 8680, "loss": 1.0386936664581299, "lr": 1.7004608294930875e-06, "epoch": 0.08525345622119816, "percentage": 4.26, "elapsed_time": "0:30:22", "remaining_time": "11:22:10"} +{"current_steps": 371, "total_steps": 8680, "loss": 1.2201364040374756, "lr": 1.705069124423963e-06, "epoch": 0.08548387096774193, "percentage": 4.27, "elapsed_time": "0:30:26", "remaining_time": "11:21:52"} +{"current_steps": 372, "total_steps": 8680, "loss": 0.9892920255661011, "lr": 1.7096774193548387e-06, "epoch": 0.08571428571428572, "percentage": 4.29, "elapsed_time": "0:30:31", "remaining_time": "11:21:47"} +{"current_steps": 373, "total_steps": 8680, "loss": 0.9379667639732361, "lr": 1.714285714285714e-06, "epoch": 0.0859447004608295, "percentage": 4.3, "elapsed_time": "0:30:35", "remaining_time": "11:21:20"} +{"current_steps": 374, "total_steps": 8680, "loss": 1.0150624513626099, "lr": 1.7188940092165899e-06, "epoch": 0.08617511520737327, "percentage": 4.31, "elapsed_time": "0:30:41", "remaining_time": "11:21:31"} +{"current_steps": 375, "total_steps": 8680, "loss": 0.8724589943885803, "lr": 1.7235023041474655e-06, "epoch": 0.08640552995391705, "percentage": 4.32, "elapsed_time": "0:30:46", "remaining_time": "11:21:29"} +{"current_steps": 376, "total_steps": 8680, "loss": 1.005715012550354, "lr": 1.7281105990783408e-06, "epoch": 0.08663594470046083, "percentage": 4.33, "elapsed_time": "0:30:51", "remaining_time": "11:21:31"} +{"current_steps": 377, "total_steps": 8680, "loss": 1.0238345861434937, "lr": 1.7327188940092167e-06, "epoch": 0.0868663594470046, "percentage": 4.34, "elapsed_time": "0:30:55", "remaining_time": "11:21:05"} +{"current_steps": 378, "total_steps": 8680, "loss": 1.061020851135254, "lr": 1.737327188940092e-06, "epoch": 0.08709677419354839, "percentage": 4.35, "elapsed_time": "0:31:00", "remaining_time": "11:21:02"} +{"current_steps": 379, "total_steps": 8680, "loss": 0.8607133626937866, "lr": 1.7419354838709676e-06, "epoch": 0.08732718894009217, "percentage": 4.37, "elapsed_time": "0:31:05", "remaining_time": "11:21:01"} +{"current_steps": 380, "total_steps": 8680, "loss": 0.9070740938186646, "lr": 1.7465437788018434e-06, "epoch": 0.08755760368663594, "percentage": 4.38, "elapsed_time": "0:31:11", "remaining_time": "11:21:11"} +{"current_steps": 381, "total_steps": 8680, "loss": 0.993092954158783, "lr": 1.7511520737327188e-06, "epoch": 0.08778801843317972, "percentage": 4.39, "elapsed_time": "0:31:16", "remaining_time": "11:21:11"} +{"current_steps": 382, "total_steps": 8680, "loss": 1.1119567155838013, "lr": 1.7557603686635944e-06, "epoch": 0.08801843317972351, "percentage": 4.4, "elapsed_time": "0:31:21", "remaining_time": "11:21:07"} +{"current_steps": 383, "total_steps": 8680, "loss": 1.030786395072937, "lr": 1.7603686635944698e-06, "epoch": 0.08824884792626728, "percentage": 4.41, "elapsed_time": "0:31:26", "remaining_time": "11:21:04"} +{"current_steps": 384, "total_steps": 8680, "loss": 1.0578559637069702, "lr": 1.7649769585253456e-06, "epoch": 0.08847926267281106, "percentage": 4.42, "elapsed_time": "0:31:30", "remaining_time": "11:20:37"} +{"current_steps": 385, "total_steps": 8680, "loss": 1.1282391548156738, "lr": 1.7695852534562212e-06, "epoch": 0.08870967741935484, "percentage": 4.44, "elapsed_time": "0:31:34", "remaining_time": "11:20:16"} +{"current_steps": 386, "total_steps": 8680, "loss": 0.7838784456253052, "lr": 1.7741935483870966e-06, "epoch": 0.08894009216589861, "percentage": 4.45, "elapsed_time": "0:31:40", "remaining_time": "11:20:36"} +{"current_steps": 387, "total_steps": 8680, "loss": 0.9244300127029419, "lr": 1.7788018433179724e-06, "epoch": 0.0891705069124424, "percentage": 4.46, "elapsed_time": "0:31:45", "remaining_time": "11:20:26"} +{"current_steps": 388, "total_steps": 8680, "loss": 0.916866660118103, "lr": 1.7834101382488478e-06, "epoch": 0.08940092165898618, "percentage": 4.47, "elapsed_time": "0:31:51", "remaining_time": "11:20:46"} +{"current_steps": 389, "total_steps": 8680, "loss": 0.9918155670166016, "lr": 1.7880184331797234e-06, "epoch": 0.08963133640552995, "percentage": 4.48, "elapsed_time": "0:31:56", "remaining_time": "11:20:54"} +{"current_steps": 390, "total_steps": 8680, "loss": 0.9879001379013062, "lr": 1.792626728110599e-06, "epoch": 0.08986175115207373, "percentage": 4.49, "elapsed_time": "0:32:00", "remaining_time": "11:20:32"} +{"current_steps": 391, "total_steps": 8680, "loss": 1.0252082347869873, "lr": 1.7972350230414746e-06, "epoch": 0.09009216589861752, "percentage": 4.5, "elapsed_time": "0:32:06", "remaining_time": "11:20:35"} +{"current_steps": 392, "total_steps": 8680, "loss": 1.0376901626586914, "lr": 1.8018433179723502e-06, "epoch": 0.09032258064516129, "percentage": 4.52, "elapsed_time": "0:32:10", "remaining_time": "11:20:20"} +{"current_steps": 393, "total_steps": 8680, "loss": 1.0237072706222534, "lr": 1.8064516129032258e-06, "epoch": 0.09055299539170507, "percentage": 4.53, "elapsed_time": "0:32:16", "remaining_time": "11:20:30"} +{"current_steps": 394, "total_steps": 8680, "loss": 1.004181146621704, "lr": 1.8110599078341013e-06, "epoch": 0.09078341013824885, "percentage": 4.54, "elapsed_time": "0:32:22", "remaining_time": "11:20:42"} +{"current_steps": 395, "total_steps": 8680, "loss": 1.1162958145141602, "lr": 1.8156682027649767e-06, "epoch": 0.09101382488479262, "percentage": 4.55, "elapsed_time": "0:32:26", "remaining_time": "11:20:33"} +{"current_steps": 396, "total_steps": 8680, "loss": 0.9634548425674438, "lr": 1.8202764976958525e-06, "epoch": 0.0912442396313364, "percentage": 4.56, "elapsed_time": "0:32:31", "remaining_time": "11:20:25"} +{"current_steps": 397, "total_steps": 8680, "loss": 0.9306463599205017, "lr": 1.8248847926267281e-06, "epoch": 0.09147465437788019, "percentage": 4.57, "elapsed_time": "0:32:37", "remaining_time": "11:20:45"} +{"current_steps": 398, "total_steps": 8680, "loss": 1.0243630409240723, "lr": 1.8294930875576035e-06, "epoch": 0.09170506912442396, "percentage": 4.59, "elapsed_time": "0:32:41", "remaining_time": "11:20:16"} +{"current_steps": 399, "total_steps": 8680, "loss": 0.9261370897293091, "lr": 1.8341013824884793e-06, "epoch": 0.09193548387096774, "percentage": 4.6, "elapsed_time": "0:32:47", "remaining_time": "11:20:37"} +{"current_steps": 400, "total_steps": 8680, "loss": 0.9929264783859253, "lr": 1.8387096774193547e-06, "epoch": 0.09216589861751152, "percentage": 4.61, "elapsed_time": "0:32:52", "remaining_time": "11:20:41"} +{"current_steps": 401, "total_steps": 8680, "loss": 1.0245590209960938, "lr": 1.8433179723502303e-06, "epoch": 0.0923963133640553, "percentage": 4.62, "elapsed_time": "0:32:59", "remaining_time": "11:21:03"} +{"current_steps": 402, "total_steps": 8680, "loss": 0.9801148176193237, "lr": 1.8479262672811061e-06, "epoch": 0.09262672811059908, "percentage": 4.63, "elapsed_time": "0:33:05", "remaining_time": "11:21:28"} +{"current_steps": 403, "total_steps": 8680, "loss": 1.181383728981018, "lr": 1.8525345622119815e-06, "epoch": 0.09285714285714286, "percentage": 4.64, "elapsed_time": "0:33:10", "remaining_time": "11:21:23"} +{"current_steps": 404, "total_steps": 8680, "loss": 0.9493411779403687, "lr": 1.857142857142857e-06, "epoch": 0.09308755760368663, "percentage": 4.65, "elapsed_time": "0:33:15", "remaining_time": "11:21:08"} +{"current_steps": 405, "total_steps": 8680, "loss": 1.1096491813659668, "lr": 1.8617511520737325e-06, "epoch": 0.09331797235023041, "percentage": 4.67, "elapsed_time": "0:33:19", "remaining_time": "11:20:58"} +{"current_steps": 406, "total_steps": 8680, "loss": 1.1019275188446045, "lr": 1.8663594470046083e-06, "epoch": 0.0935483870967742, "percentage": 4.68, "elapsed_time": "0:33:26", "remaining_time": "11:21:33"} +{"current_steps": 407, "total_steps": 8680, "loss": 0.973988950252533, "lr": 1.8709677419354837e-06, "epoch": 0.09377880184331797, "percentage": 4.69, "elapsed_time": "0:33:30", "remaining_time": "11:21:17"} +{"current_steps": 408, "total_steps": 8680, "loss": 1.1670622825622559, "lr": 1.8755760368663593e-06, "epoch": 0.09400921658986175, "percentage": 4.7, "elapsed_time": "0:33:35", "remaining_time": "11:21:04"} +{"current_steps": 409, "total_steps": 8680, "loss": 0.8550488948822021, "lr": 1.880184331797235e-06, "epoch": 0.09423963133640553, "percentage": 4.71, "elapsed_time": "0:33:41", "remaining_time": "11:21:16"} +{"current_steps": 410, "total_steps": 8680, "loss": 1.0501651763916016, "lr": 1.8847926267281104e-06, "epoch": 0.0944700460829493, "percentage": 4.72, "elapsed_time": "0:33:45", "remaining_time": "11:20:55"} +{"current_steps": 411, "total_steps": 8680, "loss": 1.1323202848434448, "lr": 1.889400921658986e-06, "epoch": 0.09470046082949309, "percentage": 4.74, "elapsed_time": "0:33:49", "remaining_time": "11:20:31"} +{"current_steps": 412, "total_steps": 8680, "loss": 1.168154001235962, "lr": 1.8940092165898616e-06, "epoch": 0.09493087557603687, "percentage": 4.75, "elapsed_time": "0:33:54", "remaining_time": "11:20:23"} +{"current_steps": 413, "total_steps": 8680, "loss": 1.0667431354522705, "lr": 1.8986175115207372e-06, "epoch": 0.09516129032258064, "percentage": 4.76, "elapsed_time": "0:33:59", "remaining_time": "11:20:24"} +{"current_steps": 414, "total_steps": 8680, "loss": 1.1447162628173828, "lr": 1.9032258064516128e-06, "epoch": 0.09539170506912442, "percentage": 4.77, "elapsed_time": "0:34:02", "remaining_time": "11:19:48"} +{"current_steps": 415, "total_steps": 8680, "loss": 0.9403433799743652, "lr": 1.9078341013824884e-06, "epoch": 0.0956221198156682, "percentage": 4.78, "elapsed_time": "0:34:07", "remaining_time": "11:19:46"} +{"current_steps": 416, "total_steps": 8680, "loss": 0.9837527275085449, "lr": 1.912442396313364e-06, "epoch": 0.09585253456221199, "percentage": 4.79, "elapsed_time": "0:34:12", "remaining_time": "11:19:30"} +{"current_steps": 417, "total_steps": 8680, "loss": 1.071333408355713, "lr": 1.9170506912442396e-06, "epoch": 0.09608294930875576, "percentage": 4.8, "elapsed_time": "0:34:16", "remaining_time": "11:19:03"} +{"current_steps": 418, "total_steps": 8680, "loss": 1.0156168937683105, "lr": 1.921658986175115e-06, "epoch": 0.09631336405529954, "percentage": 4.82, "elapsed_time": "0:34:21", "remaining_time": "11:19:15"} +{"current_steps": 419, "total_steps": 8680, "loss": 0.9705266952514648, "lr": 1.926267281105991e-06, "epoch": 0.09654377880184332, "percentage": 4.83, "elapsed_time": "0:34:26", "remaining_time": "11:19:05"} +{"current_steps": 420, "total_steps": 8680, "loss": 1.0570204257965088, "lr": 1.930875576036866e-06, "epoch": 0.0967741935483871, "percentage": 4.84, "elapsed_time": "0:34:33", "remaining_time": "11:19:40"} +{"current_steps": 421, "total_steps": 8680, "loss": 1.141861915588379, "lr": 1.935483870967742e-06, "epoch": 0.09700460829493088, "percentage": 4.85, "elapsed_time": "0:34:38", "remaining_time": "11:19:41"} +{"current_steps": 422, "total_steps": 8680, "loss": 0.9849745631217957, "lr": 1.9400921658986174e-06, "epoch": 0.09723502304147466, "percentage": 4.86, "elapsed_time": "0:34:43", "remaining_time": "11:19:29"} +{"current_steps": 423, "total_steps": 8680, "loss": 1.0279912948608398, "lr": 1.944700460829493e-06, "epoch": 0.09746543778801843, "percentage": 4.87, "elapsed_time": "0:34:47", "remaining_time": "11:18:59"} +{"current_steps": 424, "total_steps": 8680, "loss": 1.0707788467407227, "lr": 1.9493087557603686e-06, "epoch": 0.09769585253456221, "percentage": 4.88, "elapsed_time": "0:34:52", "remaining_time": "11:19:03"} +{"current_steps": 425, "total_steps": 8680, "loss": 0.9391129016876221, "lr": 1.953917050691244e-06, "epoch": 0.097926267281106, "percentage": 4.9, "elapsed_time": "0:34:57", "remaining_time": "11:19:04"} +{"current_steps": 426, "total_steps": 8680, "loss": 0.9792884588241577, "lr": 1.9585253456221198e-06, "epoch": 0.09815668202764977, "percentage": 4.91, "elapsed_time": "0:35:02", "remaining_time": "11:18:52"} +{"current_steps": 427, "total_steps": 8680, "loss": 1.0111792087554932, "lr": 1.963133640552995e-06, "epoch": 0.09838709677419355, "percentage": 4.92, "elapsed_time": "0:35:06", "remaining_time": "11:18:37"} +{"current_steps": 428, "total_steps": 8680, "loss": 1.0020272731781006, "lr": 1.967741935483871e-06, "epoch": 0.09861751152073733, "percentage": 4.93, "elapsed_time": "0:35:11", "remaining_time": "11:18:39"} +{"current_steps": 429, "total_steps": 8680, "loss": 1.1002991199493408, "lr": 1.9723502304147463e-06, "epoch": 0.0988479262672811, "percentage": 4.94, "elapsed_time": "0:35:16", "remaining_time": "11:18:17"} +{"current_steps": 430, "total_steps": 8680, "loss": 0.9656131267547607, "lr": 1.976958525345622e-06, "epoch": 0.09907834101382489, "percentage": 4.95, "elapsed_time": "0:35:20", "remaining_time": "11:18:11"} +{"current_steps": 431, "total_steps": 8680, "loss": 1.1845166683197021, "lr": 1.9815668202764975e-06, "epoch": 0.09930875576036867, "percentage": 4.97, "elapsed_time": "0:35:25", "remaining_time": "11:17:54"} +{"current_steps": 432, "total_steps": 8680, "loss": 0.8743879795074463, "lr": 1.9861751152073733e-06, "epoch": 0.09953917050691244, "percentage": 4.98, "elapsed_time": "0:35:31", "remaining_time": "11:18:10"} +{"current_steps": 433, "total_steps": 8680, "loss": 1.0800082683563232, "lr": 1.9907834101382487e-06, "epoch": 0.09976958525345622, "percentage": 4.99, "elapsed_time": "0:35:34", "remaining_time": "11:17:41"} +{"current_steps": 434, "total_steps": 8680, "loss": 1.0410808324813843, "lr": 1.995391705069124e-06, "epoch": 0.1, "percentage": 5.0, "elapsed_time": "0:35:39", "remaining_time": "11:17:28"} +{"current_steps": 435, "total_steps": 8680, "loss": 1.0214624404907227, "lr": 2e-06, "epoch": 0.10023041474654378, "percentage": 5.01, "elapsed_time": "0:35:44", "remaining_time": "11:17:29"} +{"current_steps": 436, "total_steps": 8680, "loss": 1.0304028987884521, "lr": 1.9999999274256618e-06, "epoch": 0.10046082949308756, "percentage": 5.02, "elapsed_time": "0:35:48", "remaining_time": "11:17:10"} +{"current_steps": 437, "total_steps": 8680, "loss": 1.0457626581192017, "lr": 1.9999997097026583e-06, "epoch": 0.10069124423963134, "percentage": 5.03, "elapsed_time": "0:35:54", "remaining_time": "11:17:26"} +{"current_steps": 438, "total_steps": 8680, "loss": 0.9837691187858582, "lr": 1.9999993468310205e-06, "epoch": 0.10092165898617511, "percentage": 5.05, "elapsed_time": "0:35:58", "remaining_time": "11:16:59"} +{"current_steps": 439, "total_steps": 8680, "loss": 1.0819612741470337, "lr": 1.9999988388108013e-06, "epoch": 0.1011520737327189, "percentage": 5.06, "elapsed_time": "0:36:02", "remaining_time": "11:16:26"} +{"current_steps": 440, "total_steps": 8680, "loss": 1.0417449474334717, "lr": 1.9999981856420743e-06, "epoch": 0.10138248847926268, "percentage": 5.07, "elapsed_time": "0:36:08", "remaining_time": "11:16:47"} +{"current_steps": 441, "total_steps": 8680, "loss": 1.0501068830490112, "lr": 1.999997387324935e-06, "epoch": 0.10161290322580645, "percentage": 5.08, "elapsed_time": "0:36:11", "remaining_time": "11:16:18"} +{"current_steps": 442, "total_steps": 8680, "loss": 1.0635120868682861, "lr": 1.999996443859498e-06, "epoch": 0.10184331797235023, "percentage": 5.09, "elapsed_time": "0:36:17", "remaining_time": "11:16:28"} +{"current_steps": 443, "total_steps": 8680, "loss": 0.9732234477996826, "lr": 1.999995355245902e-06, "epoch": 0.10207373271889401, "percentage": 5.1, "elapsed_time": "0:36:22", "remaining_time": "11:16:21"} +{"current_steps": 444, "total_steps": 8680, "loss": 0.9493811130523682, "lr": 1.9999941214843034e-06, "epoch": 0.10230414746543778, "percentage": 5.12, "elapsed_time": "0:36:27", "remaining_time": "11:16:18"} +{"current_steps": 445, "total_steps": 8680, "loss": 1.1455141305923462, "lr": 1.9999927425748817e-06, "epoch": 0.10253456221198157, "percentage": 5.13, "elapsed_time": "0:36:32", "remaining_time": "11:16:15"} +{"current_steps": 446, "total_steps": 8680, "loss": 0.9341592788696289, "lr": 1.9999912185178374e-06, "epoch": 0.10276497695852535, "percentage": 5.14, "elapsed_time": "0:36:38", "remaining_time": "11:16:33"} +{"current_steps": 447, "total_steps": 8680, "loss": 0.9535608291625977, "lr": 1.9999895493133916e-06, "epoch": 0.10299539170506912, "percentage": 5.15, "elapsed_time": "0:36:44", "remaining_time": "11:16:45"} +{"current_steps": 448, "total_steps": 8680, "loss": 1.1977221965789795, "lr": 1.999987734961787e-06, "epoch": 0.1032258064516129, "percentage": 5.16, "elapsed_time": "0:36:47", "remaining_time": "11:16:11"} +{"current_steps": 449, "total_steps": 8680, "loss": 1.1658375263214111, "lr": 1.999985775463286e-06, "epoch": 0.10345622119815669, "percentage": 5.17, "elapsed_time": "0:36:52", "remaining_time": "11:15:56"} +{"current_steps": 450, "total_steps": 8680, "loss": 1.1171612739562988, "lr": 1.9999836708181734e-06, "epoch": 0.10368663594470046, "percentage": 5.18, "elapsed_time": "0:36:56", "remaining_time": "11:15:34"} +{"current_steps": 451, "total_steps": 8680, "loss": 1.0864373445510864, "lr": 1.999981421026755e-06, "epoch": 0.10391705069124424, "percentage": 5.2, "elapsed_time": "0:37:00", "remaining_time": "11:15:22"} +{"current_steps": 452, "total_steps": 8680, "loss": 1.1211299896240234, "lr": 1.999979026089357e-06, "epoch": 0.10414746543778802, "percentage": 5.21, "elapsed_time": "0:37:05", "remaining_time": "11:15:04"} +{"current_steps": 453, "total_steps": 8680, "loss": 1.071751594543457, "lr": 1.9999764860063277e-06, "epoch": 0.10437788018433179, "percentage": 5.22, "elapsed_time": "0:37:08", "remaining_time": "11:14:34"} +{"current_steps": 454, "total_steps": 8680, "loss": 1.0377576351165771, "lr": 1.9999738007780347e-06, "epoch": 0.10460829493087558, "percentage": 5.23, "elapsed_time": "0:37:14", "remaining_time": "11:14:47"} +{"current_steps": 455, "total_steps": 8680, "loss": 0.9658410549163818, "lr": 1.9999709704048685e-06, "epoch": 0.10483870967741936, "percentage": 5.24, "elapsed_time": "0:37:20", "remaining_time": "11:14:56"} +{"current_steps": 456, "total_steps": 8680, "loss": 0.9070194959640503, "lr": 1.9999679948872395e-06, "epoch": 0.10506912442396313, "percentage": 5.25, "elapsed_time": "0:37:25", "remaining_time": "11:14:59"} +{"current_steps": 457, "total_steps": 8680, "loss": 1.2197664976119995, "lr": 1.9999648742255803e-06, "epoch": 0.10529953917050691, "percentage": 5.26, "elapsed_time": "0:37:29", "remaining_time": "11:14:43"} +{"current_steps": 458, "total_steps": 8680, "loss": 0.9032889604568481, "lr": 1.9999616084203426e-06, "epoch": 0.1055299539170507, "percentage": 5.28, "elapsed_time": "0:37:35", "remaining_time": "11:14:46"} +{"current_steps": 459, "total_steps": 8680, "loss": 0.9458762407302856, "lr": 1.9999581974720017e-06, "epoch": 0.10576036866359446, "percentage": 5.29, "elapsed_time": "0:37:40", "remaining_time": "11:14:48"} +{"current_steps": 460, "total_steps": 8680, "loss": 1.0024757385253906, "lr": 1.9999546413810526e-06, "epoch": 0.10599078341013825, "percentage": 5.3, "elapsed_time": "0:37:44", "remaining_time": "11:14:17"} +{"current_steps": 461, "total_steps": 8680, "loss": 0.9499050378799438, "lr": 1.9999509401480108e-06, "epoch": 0.10622119815668203, "percentage": 5.31, "elapsed_time": "0:37:50", "remaining_time": "11:14:32"} +{"current_steps": 462, "total_steps": 8680, "loss": 1.0764188766479492, "lr": 1.9999470937734132e-06, "epoch": 0.1064516129032258, "percentage": 5.32, "elapsed_time": "0:37:54", "remaining_time": "11:14:12"} +{"current_steps": 463, "total_steps": 8680, "loss": 0.9858300089836121, "lr": 1.9999431022578194e-06, "epoch": 0.10668202764976958, "percentage": 5.33, "elapsed_time": "0:37:58", "remaining_time": "11:13:48"} +{"current_steps": 464, "total_steps": 8680, "loss": 0.8965580463409424, "lr": 1.999938965601808e-06, "epoch": 0.10691244239631337, "percentage": 5.35, "elapsed_time": "0:38:03", "remaining_time": "11:13:57"} +{"current_steps": 465, "total_steps": 8680, "loss": 0.8860410451889038, "lr": 1.9999346838059788e-06, "epoch": 0.10714285714285714, "percentage": 5.36, "elapsed_time": "0:38:08", "remaining_time": "11:13:53"} +{"current_steps": 466, "total_steps": 8680, "loss": 1.0621274709701538, "lr": 1.9999302568709546e-06, "epoch": 0.10737327188940092, "percentage": 5.37, "elapsed_time": "0:38:14", "remaining_time": "11:13:59"} +{"current_steps": 467, "total_steps": 8680, "loss": 0.8894643783569336, "lr": 1.9999256847973774e-06, "epoch": 0.1076036866359447, "percentage": 5.38, "elapsed_time": "0:38:20", "remaining_time": "11:14:14"} +{"current_steps": 468, "total_steps": 8680, "loss": 0.98856520652771, "lr": 1.999920967585911e-06, "epoch": 0.10783410138248847, "percentage": 5.39, "elapsed_time": "0:38:25", "remaining_time": "11:14:18"} +{"current_steps": 469, "total_steps": 8680, "loss": 0.7885239124298096, "lr": 1.999916105237239e-06, "epoch": 0.10806451612903226, "percentage": 5.4, "elapsed_time": "0:38:31", "remaining_time": "11:14:30"} +{"current_steps": 470, "total_steps": 8680, "loss": 1.0274477005004883, "lr": 1.9999110977520687e-06, "epoch": 0.10829493087557604, "percentage": 5.41, "elapsed_time": "0:38:36", "remaining_time": "11:14:25"} +{"current_steps": 471, "total_steps": 8680, "loss": 0.8672109842300415, "lr": 1.999905945131126e-06, "epoch": 0.10852534562211981, "percentage": 5.43, "elapsed_time": "0:38:42", "remaining_time": "11:14:45"} +{"current_steps": 472, "total_steps": 8680, "loss": 0.852576732635498, "lr": 1.9999006473751594e-06, "epoch": 0.10875576036866359, "percentage": 5.44, "elapsed_time": "0:38:47", "remaining_time": "11:14:35"} +{"current_steps": 473, "total_steps": 8680, "loss": 0.9553557634353638, "lr": 1.9998952044849375e-06, "epoch": 0.10898617511520738, "percentage": 5.45, "elapsed_time": "0:38:51", "remaining_time": "11:14:14"} +{"current_steps": 474, "total_steps": 8680, "loss": 1.1375620365142822, "lr": 1.99988961646125e-06, "epoch": 0.10921658986175115, "percentage": 5.46, "elapsed_time": "0:38:55", "remaining_time": "11:13:49"} +{"current_steps": 475, "total_steps": 8680, "loss": 0.9653681516647339, "lr": 1.9998838833049083e-06, "epoch": 0.10944700460829493, "percentage": 5.47, "elapsed_time": "0:38:59", "remaining_time": "11:13:38"} +{"current_steps": 476, "total_steps": 8680, "loss": 1.1139185428619385, "lr": 1.999878005016745e-06, "epoch": 0.10967741935483871, "percentage": 5.48, "elapsed_time": "0:39:04", "remaining_time": "11:13:25"} +{"current_steps": 477, "total_steps": 8680, "loss": 0.8375418186187744, "lr": 1.9998719815976127e-06, "epoch": 0.10990783410138248, "percentage": 5.5, "elapsed_time": "0:39:09", "remaining_time": "11:13:26"} +{"current_steps": 478, "total_steps": 8680, "loss": 1.0005979537963867, "lr": 1.999865813048386e-06, "epoch": 0.11013824884792627, "percentage": 5.51, "elapsed_time": "0:39:15", "remaining_time": "11:13:45"} +{"current_steps": 479, "total_steps": 8680, "loss": 0.8499772548675537, "lr": 1.99985949936996e-06, "epoch": 0.11036866359447005, "percentage": 5.52, "elapsed_time": "0:39:21", "remaining_time": "11:13:46"} +{"current_steps": 480, "total_steps": 8680, "loss": 0.9805284738540649, "lr": 1.999853040563252e-06, "epoch": 0.11059907834101383, "percentage": 5.53, "elapsed_time": "0:39:25", "remaining_time": "11:13:34"} +{"current_steps": 481, "total_steps": 8680, "loss": 0.9462177753448486, "lr": 1.9998464366291983e-06, "epoch": 0.1108294930875576, "percentage": 5.54, "elapsed_time": "0:39:30", "remaining_time": "11:13:25"} +{"current_steps": 482, "total_steps": 8680, "loss": 1.1023187637329102, "lr": 1.999839687568758e-06, "epoch": 0.11105990783410138, "percentage": 5.55, "elapsed_time": "0:39:34", "remaining_time": "11:13:11"} +{"current_steps": 483, "total_steps": 8680, "loss": 0.9361279010772705, "lr": 1.9998327933829103e-06, "epoch": 0.11129032258064517, "percentage": 5.56, "elapsed_time": "0:39:39", "remaining_time": "11:13:06"} +{"current_steps": 484, "total_steps": 8680, "loss": 0.9811379909515381, "lr": 1.9998257540726567e-06, "epoch": 0.11152073732718894, "percentage": 5.58, "elapsed_time": "0:39:44", "remaining_time": "11:13:06"} +{"current_steps": 485, "total_steps": 8680, "loss": 1.0246069431304932, "lr": 1.9998185696390184e-06, "epoch": 0.11175115207373272, "percentage": 5.59, "elapsed_time": "0:39:49", "remaining_time": "11:12:52"} +{"current_steps": 486, "total_steps": 8680, "loss": 1.0614899396896362, "lr": 1.9998112400830385e-06, "epoch": 0.1119815668202765, "percentage": 5.6, "elapsed_time": "0:39:52", "remaining_time": "11:12:25"} +{"current_steps": 487, "total_steps": 8680, "loss": 1.02305269241333, "lr": 1.9998037654057803e-06, "epoch": 0.11221198156682027, "percentage": 5.61, "elapsed_time": "0:39:56", "remaining_time": "11:12:03"} +{"current_steps": 488, "total_steps": 8680, "loss": 1.044907808303833, "lr": 1.999796145608329e-06, "epoch": 0.11244239631336406, "percentage": 5.62, "elapsed_time": "0:40:02", "remaining_time": "11:12:05"} +{"current_steps": 489, "total_steps": 8680, "loss": 0.9669852256774902, "lr": 1.999788380691791e-06, "epoch": 0.11267281105990784, "percentage": 5.63, "elapsed_time": "0:40:06", "remaining_time": "11:11:49"} +{"current_steps": 490, "total_steps": 8680, "loss": 1.0235236883163452, "lr": 1.9997804706572933e-06, "epoch": 0.11290322580645161, "percentage": 5.65, "elapsed_time": "0:40:12", "remaining_time": "11:11:56"} +{"current_steps": 491, "total_steps": 8680, "loss": 0.8982692360877991, "lr": 1.9997724155059835e-06, "epoch": 0.1131336405529954, "percentage": 5.66, "elapsed_time": "0:40:17", "remaining_time": "11:12:07"} +{"current_steps": 492, "total_steps": 8680, "loss": 0.8390282988548279, "lr": 1.9997642152390312e-06, "epoch": 0.11336405529953918, "percentage": 5.67, "elapsed_time": "0:40:22", "remaining_time": "11:11:57"} +{"current_steps": 493, "total_steps": 8680, "loss": 0.8938695192337036, "lr": 1.9997558698576266e-06, "epoch": 0.11359447004608295, "percentage": 5.68, "elapsed_time": "0:40:26", "remaining_time": "11:11:43"} +{"current_steps": 494, "total_steps": 8680, "loss": 0.9747422933578491, "lr": 1.9997473793629813e-06, "epoch": 0.11382488479262673, "percentage": 5.69, "elapsed_time": "0:40:32", "remaining_time": "11:11:42"} +{"current_steps": 495, "total_steps": 8680, "loss": 1.050918698310852, "lr": 1.999738743756327e-06, "epoch": 0.11405529953917051, "percentage": 5.7, "elapsed_time": "0:40:36", "remaining_time": "11:11:32"} +{"current_steps": 496, "total_steps": 8680, "loss": 0.9169312715530396, "lr": 1.9997299630389174e-06, "epoch": 0.11428571428571428, "percentage": 5.71, "elapsed_time": "0:40:41", "remaining_time": "11:11:32"} +{"current_steps": 497, "total_steps": 8680, "loss": 1.0258065462112427, "lr": 1.9997210372120272e-06, "epoch": 0.11451612903225807, "percentage": 5.73, "elapsed_time": "0:40:46", "remaining_time": "11:11:16"} +{"current_steps": 498, "total_steps": 8680, "loss": 1.066356897354126, "lr": 1.9997119662769523e-06, "epoch": 0.11474654377880185, "percentage": 5.74, "elapsed_time": "0:40:51", "remaining_time": "11:11:15"} +{"current_steps": 499, "total_steps": 8680, "loss": 1.0336101055145264, "lr": 1.9997027502350086e-06, "epoch": 0.11497695852534562, "percentage": 5.75, "elapsed_time": "0:40:54", "remaining_time": "11:10:48"} +{"current_steps": 500, "total_steps": 8680, "loss": 1.0434989929199219, "lr": 1.9996933890875342e-06, "epoch": 0.1152073732718894, "percentage": 5.76, "elapsed_time": "0:40:59", "remaining_time": "11:10:30"} +{"current_steps": 501, "total_steps": 8680, "loss": 1.0081424713134766, "lr": 1.9996838828358876e-06, "epoch": 0.11543778801843319, "percentage": 5.77, "elapsed_time": "0:41:06", "remaining_time": "11:10:59"} +{"current_steps": 502, "total_steps": 8680, "loss": 1.0998575687408447, "lr": 1.999674231481449e-06, "epoch": 0.11566820276497695, "percentage": 5.78, "elapsed_time": "0:41:11", "remaining_time": "11:10:55"} +{"current_steps": 503, "total_steps": 8680, "loss": 1.0325868129730225, "lr": 1.9996644350256193e-06, "epoch": 0.11589861751152074, "percentage": 5.79, "elapsed_time": "0:41:16", "remaining_time": "11:11:05"} +{"current_steps": 504, "total_steps": 8680, "loss": 1.0520741939544678, "lr": 1.99965449346982e-06, "epoch": 0.11612903225806452, "percentage": 5.81, "elapsed_time": "0:41:21", "remaining_time": "11:11:00"} +{"current_steps": 505, "total_steps": 8680, "loss": 0.9355484247207642, "lr": 1.9996444068154943e-06, "epoch": 0.11635944700460829, "percentage": 5.82, "elapsed_time": "0:41:28", "remaining_time": "11:11:22"} +{"current_steps": 506, "total_steps": 8680, "loss": 1.2088062763214111, "lr": 1.9996341750641067e-06, "epoch": 0.11658986175115207, "percentage": 5.83, "elapsed_time": "0:41:31", "remaining_time": "11:10:53"} +{"current_steps": 507, "total_steps": 8680, "loss": 1.007477045059204, "lr": 1.9996237982171416e-06, "epoch": 0.11682027649769586, "percentage": 5.84, "elapsed_time": "0:41:35", "remaining_time": "11:10:23"} +{"current_steps": 508, "total_steps": 8680, "loss": 0.9528911113739014, "lr": 1.9996132762761054e-06, "epoch": 0.11705069124423963, "percentage": 5.85, "elapsed_time": "0:41:40", "remaining_time": "11:10:19"} +{"current_steps": 509, "total_steps": 8680, "loss": 1.0906065702438354, "lr": 1.9996026092425258e-06, "epoch": 0.11728110599078341, "percentage": 5.86, "elapsed_time": "0:41:44", "remaining_time": "11:09:57"} +{"current_steps": 510, "total_steps": 8680, "loss": 1.1328812837600708, "lr": 1.9995917971179507e-06, "epoch": 0.1175115207373272, "percentage": 5.88, "elapsed_time": "0:41:49", "remaining_time": "11:09:59"} +{"current_steps": 511, "total_steps": 8680, "loss": 1.1367099285125732, "lr": 1.9995808399039493e-06, "epoch": 0.11774193548387096, "percentage": 5.89, "elapsed_time": "0:41:52", "remaining_time": "11:09:29"} +{"current_steps": 512, "total_steps": 8680, "loss": 1.22605562210083, "lr": 1.999569737602112e-06, "epoch": 0.11797235023041475, "percentage": 5.9, "elapsed_time": "0:41:56", "remaining_time": "11:09:08"} +{"current_steps": 513, "total_steps": 8680, "loss": 0.8814148306846619, "lr": 1.9995584902140514e-06, "epoch": 0.11820276497695853, "percentage": 5.91, "elapsed_time": "0:42:01", "remaining_time": "11:09:02"} +{"current_steps": 514, "total_steps": 8680, "loss": 0.916766881942749, "lr": 1.9995470977413988e-06, "epoch": 0.1184331797235023, "percentage": 5.92, "elapsed_time": "0:42:07", "remaining_time": "11:09:13"} +{"current_steps": 515, "total_steps": 8680, "loss": 0.8088599443435669, "lr": 1.999535560185808e-06, "epoch": 0.11866359447004608, "percentage": 5.93, "elapsed_time": "0:42:12", "remaining_time": "11:09:13"} +{"current_steps": 516, "total_steps": 8680, "loss": 1.0029397010803223, "lr": 1.9995238775489538e-06, "epoch": 0.11889400921658987, "percentage": 5.94, "elapsed_time": "0:42:17", "remaining_time": "11:09:08"} +{"current_steps": 517, "total_steps": 8680, "loss": 1.157515287399292, "lr": 1.9995120498325322e-06, "epoch": 0.11912442396313364, "percentage": 5.96, "elapsed_time": "0:42:21", "remaining_time": "11:08:47"} +{"current_steps": 518, "total_steps": 8680, "loss": 0.989453911781311, "lr": 1.99950007703826e-06, "epoch": 0.11935483870967742, "percentage": 5.97, "elapsed_time": "0:42:25", "remaining_time": "11:08:30"} +{"current_steps": 519, "total_steps": 8680, "loss": 0.9791898727416992, "lr": 1.999487959167874e-06, "epoch": 0.1195852534562212, "percentage": 5.98, "elapsed_time": "0:42:29", "remaining_time": "11:08:08"} +{"current_steps": 520, "total_steps": 8680, "loss": 0.9994203448295593, "lr": 1.9994756962231343e-06, "epoch": 0.11981566820276497, "percentage": 5.99, "elapsed_time": "0:42:34", "remaining_time": "11:08:02"} +{"current_steps": 521, "total_steps": 8680, "loss": 0.9096299409866333, "lr": 1.999463288205821e-06, "epoch": 0.12004608294930876, "percentage": 6.0, "elapsed_time": "0:42:38", "remaining_time": "11:07:52"} +{"current_steps": 522, "total_steps": 8680, "loss": 0.9956046342849731, "lr": 1.999450735117734e-06, "epoch": 0.12027649769585254, "percentage": 6.01, "elapsed_time": "0:42:43", "remaining_time": "11:07:50"} +{"current_steps": 523, "total_steps": 8680, "loss": 1.0336079597473145, "lr": 1.9994380369606956e-06, "epoch": 0.12050691244239631, "percentage": 6.03, "elapsed_time": "0:42:49", "remaining_time": "11:07:59"} +{"current_steps": 524, "total_steps": 8680, "loss": 0.8828116655349731, "lr": 1.99942519373655e-06, "epoch": 0.12073732718894009, "percentage": 6.04, "elapsed_time": "0:42:54", "remaining_time": "11:07:56"} +{"current_steps": 525, "total_steps": 8680, "loss": 0.8733093738555908, "lr": 1.9994122054471597e-06, "epoch": 0.12096774193548387, "percentage": 6.05, "elapsed_time": "0:42:59", "remaining_time": "11:07:54"} +{"current_steps": 526, "total_steps": 8680, "loss": 1.0312494039535522, "lr": 1.9993990720944114e-06, "epoch": 0.12119815668202764, "percentage": 6.06, "elapsed_time": "0:43:04", "remaining_time": "11:07:45"} +{"current_steps": 527, "total_steps": 8680, "loss": 0.9229701161384583, "lr": 1.9993857936802105e-06, "epoch": 0.12142857142857143, "percentage": 6.07, "elapsed_time": "0:43:09", "remaining_time": "11:07:33"} +{"current_steps": 528, "total_steps": 8680, "loss": 0.8980100154876709, "lr": 1.9993723702064853e-06, "epoch": 0.12165898617511521, "percentage": 6.08, "elapsed_time": "0:43:14", "remaining_time": "11:07:31"} +{"current_steps": 529, "total_steps": 8680, "loss": 0.939933180809021, "lr": 1.999358801675183e-06, "epoch": 0.12188940092165898, "percentage": 6.09, "elapsed_time": "0:43:18", "remaining_time": "11:07:19"} +{"current_steps": 530, "total_steps": 8680, "loss": 1.0014444589614868, "lr": 1.9993450880882733e-06, "epoch": 0.12211981566820276, "percentage": 6.11, "elapsed_time": "0:43:22", "remaining_time": "11:07:02"} +{"current_steps": 531, "total_steps": 8680, "loss": 0.9995889663696289, "lr": 1.9993312294477477e-06, "epoch": 0.12235023041474655, "percentage": 6.12, "elapsed_time": "0:43:26", "remaining_time": "11:06:42"} +{"current_steps": 532, "total_steps": 8680, "loss": 1.0010197162628174, "lr": 1.9993172257556167e-06, "epoch": 0.12258064516129032, "percentage": 6.13, "elapsed_time": "0:43:30", "remaining_time": "11:06:18"} +{"current_steps": 533, "total_steps": 8680, "loss": 0.972966194152832, "lr": 1.9993030770139135e-06, "epoch": 0.1228110599078341, "percentage": 6.14, "elapsed_time": "0:43:35", "remaining_time": "11:06:10"} +{"current_steps": 534, "total_steps": 8680, "loss": 0.8033444881439209, "lr": 1.9992887832246917e-06, "epoch": 0.12304147465437788, "percentage": 6.15, "elapsed_time": "0:43:40", "remaining_time": "11:06:07"} +{"current_steps": 535, "total_steps": 8680, "loss": 0.7532742619514465, "lr": 1.9992743443900254e-06, "epoch": 0.12327188940092165, "percentage": 6.16, "elapsed_time": "0:43:45", "remaining_time": "11:06:04"} +{"current_steps": 536, "total_steps": 8680, "loss": 1.058760643005371, "lr": 1.9992597605120113e-06, "epoch": 0.12350230414746544, "percentage": 6.18, "elapsed_time": "0:43:50", "remaining_time": "11:06:00"} +{"current_steps": 537, "total_steps": 8680, "loss": 0.8559634685516357, "lr": 1.9992450315927658e-06, "epoch": 0.12373271889400922, "percentage": 6.19, "elapsed_time": "0:43:55", "remaining_time": "11:06:03"} +{"current_steps": 538, "total_steps": 8680, "loss": 1.053638219833374, "lr": 1.9992301576344267e-06, "epoch": 0.12396313364055299, "percentage": 6.2, "elapsed_time": "0:44:01", "remaining_time": "11:06:23"} +{"current_steps": 539, "total_steps": 8680, "loss": 0.8841970562934875, "lr": 1.9992151386391528e-06, "epoch": 0.12419354838709677, "percentage": 6.21, "elapsed_time": "0:44:06", "remaining_time": "11:06:07"} +{"current_steps": 540, "total_steps": 8680, "loss": 0.9355173110961914, "lr": 1.9991999746091247e-06, "epoch": 0.12442396313364056, "percentage": 6.22, "elapsed_time": "0:44:09", "remaining_time": "11:05:46"} +{"current_steps": 541, "total_steps": 8680, "loss": 0.9978284239768982, "lr": 1.999184665546543e-06, "epoch": 0.12465437788018432, "percentage": 6.23, "elapsed_time": "0:44:14", "remaining_time": "11:05:33"} +{"current_steps": 542, "total_steps": 8680, "loss": 0.8855264782905579, "lr": 1.99916921145363e-06, "epoch": 0.12488479262672811, "percentage": 6.24, "elapsed_time": "0:44:19", "remaining_time": "11:05:31"} +{"current_steps": 543, "total_steps": 8680, "loss": 0.885519802570343, "lr": 1.9991536123326283e-06, "epoch": 0.1251152073732719, "percentage": 6.26, "elapsed_time": "0:44:23", "remaining_time": "11:05:13"} +{"current_steps": 544, "total_steps": 8680, "loss": 0.9772528409957886, "lr": 1.9991378681858024e-06, "epoch": 0.12534562211981568, "percentage": 6.27, "elapsed_time": "0:44:27", "remaining_time": "11:04:59"} +{"current_steps": 545, "total_steps": 8680, "loss": 0.8817745447158813, "lr": 1.999121979015438e-06, "epoch": 0.12557603686635946, "percentage": 6.28, "elapsed_time": "0:44:31", "remaining_time": "11:04:32"} +{"current_steps": 546, "total_steps": 8680, "loss": 0.9374080896377563, "lr": 1.9991059448238404e-06, "epoch": 0.12580645161290321, "percentage": 6.29, "elapsed_time": "0:44:37", "remaining_time": "11:04:46"} +{"current_steps": 547, "total_steps": 8680, "loss": 0.9174116253852844, "lr": 1.9990897656133383e-06, "epoch": 0.126036866359447, "percentage": 6.3, "elapsed_time": "0:44:41", "remaining_time": "11:04:27"} +{"current_steps": 548, "total_steps": 8680, "loss": 0.9514039158821106, "lr": 1.999073441386279e-06, "epoch": 0.12626728110599078, "percentage": 6.31, "elapsed_time": "0:44:45", "remaining_time": "11:04:18"} +{"current_steps": 549, "total_steps": 8680, "loss": 1.10535728931427, "lr": 1.999056972145032e-06, "epoch": 0.12649769585253456, "percentage": 6.32, "elapsed_time": "0:44:50", "remaining_time": "11:04:09"} +{"current_steps": 550, "total_steps": 8680, "loss": 1.0641597509384155, "lr": 1.999040357891989e-06, "epoch": 0.12672811059907835, "percentage": 6.34, "elapsed_time": "0:44:56", "remaining_time": "11:04:14"} +{"current_steps": 551, "total_steps": 8680, "loss": 0.9596017599105835, "lr": 1.99902359862956e-06, "epoch": 0.12695852534562213, "percentage": 6.35, "elapsed_time": "0:45:00", "remaining_time": "11:04:07"} +{"current_steps": 552, "total_steps": 8680, "loss": 1.083927869796753, "lr": 1.9990066943601777e-06, "epoch": 0.1271889400921659, "percentage": 6.36, "elapsed_time": "0:45:05", "remaining_time": "11:03:55"} +{"current_steps": 553, "total_steps": 8680, "loss": 0.9146738052368164, "lr": 1.998989645086297e-06, "epoch": 0.12741935483870967, "percentage": 6.37, "elapsed_time": "0:45:09", "remaining_time": "11:03:41"} +{"current_steps": 554, "total_steps": 8680, "loss": 0.9038050770759583, "lr": 1.998972450810391e-06, "epoch": 0.12764976958525345, "percentage": 6.38, "elapsed_time": "0:45:14", "remaining_time": "11:03:38"} +{"current_steps": 555, "total_steps": 8680, "loss": 0.973220705986023, "lr": 1.9989551115349574e-06, "epoch": 0.12788018433179724, "percentage": 6.39, "elapsed_time": "0:45:18", "remaining_time": "11:03:20"} +{"current_steps": 556, "total_steps": 8680, "loss": 0.8804281949996948, "lr": 1.998937627262511e-06, "epoch": 0.12811059907834102, "percentage": 6.41, "elapsed_time": "0:45:23", "remaining_time": "11:03:07"} +{"current_steps": 557, "total_steps": 8680, "loss": 1.100919485092163, "lr": 1.9989199979955903e-06, "epoch": 0.1283410138248848, "percentage": 6.42, "elapsed_time": "0:45:26", "remaining_time": "11:02:38"} +{"current_steps": 558, "total_steps": 8680, "loss": 1.1152353286743164, "lr": 1.998902223736755e-06, "epoch": 0.12857142857142856, "percentage": 6.43, "elapsed_time": "0:45:31", "remaining_time": "11:02:32"} +{"current_steps": 559, "total_steps": 8680, "loss": 1.0721793174743652, "lr": 1.9988843044885837e-06, "epoch": 0.12880184331797234, "percentage": 6.44, "elapsed_time": "0:45:35", "remaining_time": "11:02:20"} +{"current_steps": 560, "total_steps": 8680, "loss": 0.9035133123397827, "lr": 1.9988662402536783e-06, "epoch": 0.12903225806451613, "percentage": 6.45, "elapsed_time": "0:45:41", "remaining_time": "11:02:32"} +{"current_steps": 561, "total_steps": 8680, "loss": 1.0053937435150146, "lr": 1.9988480310346603e-06, "epoch": 0.1292626728110599, "percentage": 6.46, "elapsed_time": "0:45:47", "remaining_time": "11:02:43"} +{"current_steps": 562, "total_steps": 8680, "loss": 0.8536228537559509, "lr": 1.9988296768341728e-06, "epoch": 0.1294930875576037, "percentage": 6.47, "elapsed_time": "0:45:51", "remaining_time": "11:02:31"} +{"current_steps": 563, "total_steps": 8680, "loss": 0.9673396348953247, "lr": 1.9988111776548797e-06, "epoch": 0.12972350230414748, "percentage": 6.49, "elapsed_time": "0:45:57", "remaining_time": "11:02:34"} +{"current_steps": 564, "total_steps": 8680, "loss": 0.9402456879615784, "lr": 1.998792533499467e-06, "epoch": 0.12995391705069123, "percentage": 6.5, "elapsed_time": "0:46:03", "remaining_time": "11:02:46"} +{"current_steps": 565, "total_steps": 8680, "loss": 0.8900678157806396, "lr": 1.99877374437064e-06, "epoch": 0.13018433179723501, "percentage": 6.51, "elapsed_time": "0:46:09", "remaining_time": "11:02:53"} +{"current_steps": 566, "total_steps": 8680, "loss": 0.9112892150878906, "lr": 1.9987548102711264e-06, "epoch": 0.1304147465437788, "percentage": 6.52, "elapsed_time": "0:46:14", "remaining_time": "11:02:58"} +{"current_steps": 567, "total_steps": 8680, "loss": 0.763452410697937, "lr": 1.9987357312036743e-06, "epoch": 0.13064516129032258, "percentage": 6.53, "elapsed_time": "0:46:19", "remaining_time": "11:02:44"} +{"current_steps": 568, "total_steps": 8680, "loss": 1.0410873889923096, "lr": 1.9987165071710527e-06, "epoch": 0.13087557603686636, "percentage": 6.54, "elapsed_time": "0:46:23", "remaining_time": "11:02:26"} +{"current_steps": 569, "total_steps": 8680, "loss": 1.029583215713501, "lr": 1.9986971381760524e-06, "epoch": 0.13110599078341015, "percentage": 6.56, "elapsed_time": "0:46:27", "remaining_time": "11:02:14"} +{"current_steps": 570, "total_steps": 8680, "loss": 0.994928777217865, "lr": 1.9986776242214845e-06, "epoch": 0.1313364055299539, "percentage": 6.57, "elapsed_time": "0:46:32", "remaining_time": "11:02:09"} +{"current_steps": 571, "total_steps": 8680, "loss": 1.001985788345337, "lr": 1.9986579653101817e-06, "epoch": 0.1315668202764977, "percentage": 6.58, "elapsed_time": "0:46:37", "remaining_time": "11:02:09"} +{"current_steps": 572, "total_steps": 8680, "loss": 0.9813050031661987, "lr": 1.998638161444997e-06, "epoch": 0.13179723502304147, "percentage": 6.59, "elapsed_time": "0:46:42", "remaining_time": "11:02:07"} +{"current_steps": 573, "total_steps": 8680, "loss": 0.8678451180458069, "lr": 1.9986182126288053e-06, "epoch": 0.13202764976958525, "percentage": 6.6, "elapsed_time": "0:46:48", "remaining_time": "11:02:09"} +{"current_steps": 574, "total_steps": 8680, "loss": 1.0393729209899902, "lr": 1.998598118864502e-06, "epoch": 0.13225806451612904, "percentage": 6.61, "elapsed_time": "0:46:52", "remaining_time": "11:02:02"} +{"current_steps": 575, "total_steps": 8680, "loss": 0.9631935358047485, "lr": 1.998577880155004e-06, "epoch": 0.13248847926267282, "percentage": 6.62, "elapsed_time": "0:46:56", "remaining_time": "11:01:44"} +{"current_steps": 576, "total_steps": 8680, "loss": 0.8480437994003296, "lr": 1.9985574965032483e-06, "epoch": 0.1327188940092166, "percentage": 6.64, "elapsed_time": "0:47:02", "remaining_time": "11:01:53"} +{"current_steps": 577, "total_steps": 8680, "loss": 1.0450071096420288, "lr": 1.998536967912194e-06, "epoch": 0.13294930875576036, "percentage": 6.65, "elapsed_time": "0:47:06", "remaining_time": "11:01:38"} +{"current_steps": 578, "total_steps": 8680, "loss": 0.9374763369560242, "lr": 1.9985162943848207e-06, "epoch": 0.13317972350230414, "percentage": 6.66, "elapsed_time": "0:47:12", "remaining_time": "11:01:38"} +{"current_steps": 579, "total_steps": 8680, "loss": 0.9405182600021362, "lr": 1.9984954759241293e-06, "epoch": 0.13341013824884793, "percentage": 6.67, "elapsed_time": "0:47:17", "remaining_time": "11:01:36"} +{"current_steps": 580, "total_steps": 8680, "loss": 0.8406375646591187, "lr": 1.998474512533141e-06, "epoch": 0.1336405529953917, "percentage": 6.68, "elapsed_time": "0:47:22", "remaining_time": "11:01:41"} +{"current_steps": 581, "total_steps": 8680, "loss": 0.9323312044143677, "lr": 1.9984534042148994e-06, "epoch": 0.1338709677419355, "percentage": 6.69, "elapsed_time": "0:47:28", "remaining_time": "11:01:46"} +{"current_steps": 582, "total_steps": 8680, "loss": 1.0704214572906494, "lr": 1.998432150972468e-06, "epoch": 0.13410138248847928, "percentage": 6.71, "elapsed_time": "0:47:33", "remaining_time": "11:01:38"} +{"current_steps": 583, "total_steps": 8680, "loss": 0.8636025190353394, "lr": 1.9984107528089315e-06, "epoch": 0.13433179723502303, "percentage": 6.72, "elapsed_time": "0:47:37", "remaining_time": "11:01:29"} +{"current_steps": 584, "total_steps": 8680, "loss": 0.9616303443908691, "lr": 1.998389209727396e-06, "epoch": 0.13456221198156681, "percentage": 6.73, "elapsed_time": "0:47:41", "remaining_time": "11:01:15"} +{"current_steps": 585, "total_steps": 8680, "loss": 1.0457193851470947, "lr": 1.998367521730988e-06, "epoch": 0.1347926267281106, "percentage": 6.74, "elapsed_time": "0:47:46", "remaining_time": "11:01:00"} +{"current_steps": 586, "total_steps": 8680, "loss": 1.0460572242736816, "lr": 1.9983456888228566e-06, "epoch": 0.13502304147465438, "percentage": 6.75, "elapsed_time": "0:47:50", "remaining_time": "11:00:45"} +{"current_steps": 587, "total_steps": 8680, "loss": 0.9693883657455444, "lr": 1.9983237110061695e-06, "epoch": 0.13525345622119817, "percentage": 6.76, "elapsed_time": "0:47:55", "remaining_time": "11:00:38"} +{"current_steps": 588, "total_steps": 8680, "loss": 0.8823472857475281, "lr": 1.9983015882841175e-06, "epoch": 0.13548387096774195, "percentage": 6.77, "elapsed_time": "0:47:58", "remaining_time": "11:00:16"} +{"current_steps": 589, "total_steps": 8680, "loss": 1.1602983474731445, "lr": 1.998279320659912e-06, "epoch": 0.1357142857142857, "percentage": 6.79, "elapsed_time": "0:48:02", "remaining_time": "10:59:58"} +{"current_steps": 590, "total_steps": 8680, "loss": 0.8191353678703308, "lr": 1.9982569081367843e-06, "epoch": 0.1359447004608295, "percentage": 6.8, "elapsed_time": "0:48:07", "remaining_time": "10:59:59"} +{"current_steps": 591, "total_steps": 8680, "loss": 1.141557216644287, "lr": 1.9982343507179876e-06, "epoch": 0.13617511520737327, "percentage": 6.81, "elapsed_time": "0:48:12", "remaining_time": "10:59:53"} +{"current_steps": 592, "total_steps": 8680, "loss": 0.9688570499420166, "lr": 1.998211648406797e-06, "epoch": 0.13640552995391705, "percentage": 6.82, "elapsed_time": "0:48:18", "remaining_time": "10:59:55"} +{"current_steps": 593, "total_steps": 8680, "loss": 1.0218561887741089, "lr": 1.9981888012065068e-06, "epoch": 0.13663594470046084, "percentage": 6.83, "elapsed_time": "0:48:23", "remaining_time": "11:00:02"} +{"current_steps": 594, "total_steps": 8680, "loss": 0.9531952142715454, "lr": 1.9981658091204334e-06, "epoch": 0.13686635944700462, "percentage": 6.84, "elapsed_time": "0:48:27", "remaining_time": "10:59:35"} +{"current_steps": 595, "total_steps": 8680, "loss": 1.1421492099761963, "lr": 1.9981426721519143e-06, "epoch": 0.13709677419354838, "percentage": 6.85, "elapsed_time": "0:48:30", "remaining_time": "10:59:14"} +{"current_steps": 596, "total_steps": 8680, "loss": 0.8173041343688965, "lr": 1.9981193903043074e-06, "epoch": 0.13732718894009216, "percentage": 6.87, "elapsed_time": "0:48:36", "remaining_time": "10:59:23"} +{"current_steps": 597, "total_steps": 8680, "loss": 0.8842465877532959, "lr": 1.998095963580993e-06, "epoch": 0.13755760368663594, "percentage": 6.88, "elapsed_time": "0:48:40", "remaining_time": "10:59:03"} +{"current_steps": 598, "total_steps": 8680, "loss": 0.8547788858413696, "lr": 1.9980723919853703e-06, "epoch": 0.13778801843317973, "percentage": 6.89, "elapsed_time": "0:48:46", "remaining_time": "10:59:07"} +{"current_steps": 599, "total_steps": 8680, "loss": 1.0085712671279907, "lr": 1.998048675520861e-06, "epoch": 0.1380184331797235, "percentage": 6.9, "elapsed_time": "0:48:50", "remaining_time": "10:58:50"} +{"current_steps": 600, "total_steps": 8680, "loss": 0.9276378750801086, "lr": 1.9980248141909083e-06, "epoch": 0.1382488479262673, "percentage": 6.91, "elapsed_time": "0:48:55", "remaining_time": "10:58:46"} +{"current_steps": 601, "total_steps": 8680, "loss": 0.9236693382263184, "lr": 1.998000807998975e-06, "epoch": 0.13847926267281105, "percentage": 6.92, "elapsed_time": "0:49:02", "remaining_time": "10:59:15"} +{"current_steps": 602, "total_steps": 8680, "loss": 1.0353924036026, "lr": 1.9979766569485454e-06, "epoch": 0.13870967741935483, "percentage": 6.94, "elapsed_time": "0:49:07", "remaining_time": "10:59:06"} +{"current_steps": 603, "total_steps": 8680, "loss": 0.8456567525863647, "lr": 1.9979523610431246e-06, "epoch": 0.13894009216589862, "percentage": 6.95, "elapsed_time": "0:49:11", "remaining_time": "10:58:59"} +{"current_steps": 604, "total_steps": 8680, "loss": 0.997468888759613, "lr": 1.997927920286241e-06, "epoch": 0.1391705069124424, "percentage": 6.96, "elapsed_time": "0:49:17", "remaining_time": "10:59:01"} +{"current_steps": 605, "total_steps": 8680, "loss": 0.8962260484695435, "lr": 1.9979033346814397e-06, "epoch": 0.13940092165898618, "percentage": 6.97, "elapsed_time": "0:49:22", "remaining_time": "10:59:07"} +{"current_steps": 606, "total_steps": 8680, "loss": 0.8586266040802002, "lr": 1.997878604232291e-06, "epoch": 0.13963133640552997, "percentage": 6.98, "elapsed_time": "0:49:27", "remaining_time": "10:58:55"} +{"current_steps": 607, "total_steps": 8680, "loss": 0.9639670848846436, "lr": 1.9978537289423837e-06, "epoch": 0.13986175115207372, "percentage": 6.99, "elapsed_time": "0:49:31", "remaining_time": "10:58:45"} +{"current_steps": 608, "total_steps": 8680, "loss": 1.005727767944336, "lr": 1.9978287088153286e-06, "epoch": 0.1400921658986175, "percentage": 7.0, "elapsed_time": "0:49:36", "remaining_time": "10:58:38"} +{"current_steps": 609, "total_steps": 8680, "loss": 1.148871898651123, "lr": 1.9978035438547575e-06, "epoch": 0.1403225806451613, "percentage": 7.02, "elapsed_time": "0:49:40", "remaining_time": "10:58:25"} +{"current_steps": 610, "total_steps": 8680, "loss": 1.0459539890289307, "lr": 1.9977782340643226e-06, "epoch": 0.14055299539170507, "percentage": 7.03, "elapsed_time": "0:49:46", "remaining_time": "10:58:24"} +{"current_steps": 611, "total_steps": 8680, "loss": 0.92689448595047, "lr": 1.9977527794476985e-06, "epoch": 0.14078341013824885, "percentage": 7.04, "elapsed_time": "0:49:50", "remaining_time": "10:58:11"} +{"current_steps": 612, "total_steps": 8680, "loss": 0.9735790491104126, "lr": 1.997727180008579e-06, "epoch": 0.14101382488479264, "percentage": 7.05, "elapsed_time": "0:49:53", "remaining_time": "10:57:48"} +{"current_steps": 613, "total_steps": 8680, "loss": 0.8882870674133301, "lr": 1.99770143575068e-06, "epoch": 0.1412442396313364, "percentage": 7.06, "elapsed_time": "0:49:59", "remaining_time": "10:57:54"} +{"current_steps": 614, "total_steps": 8680, "loss": 0.9229795932769775, "lr": 1.9976755466777386e-06, "epoch": 0.14147465437788018, "percentage": 7.07, "elapsed_time": "0:50:03", "remaining_time": "10:57:33"} +{"current_steps": 615, "total_steps": 8680, "loss": 0.9097769260406494, "lr": 1.997649512793512e-06, "epoch": 0.14170506912442396, "percentage": 7.09, "elapsed_time": "0:50:08", "remaining_time": "10:57:26"} +{"current_steps": 616, "total_steps": 8680, "loss": 0.7751711010932922, "lr": 1.9976233341017798e-06, "epoch": 0.14193548387096774, "percentage": 7.1, "elapsed_time": "0:50:13", "remaining_time": "10:57:25"} +{"current_steps": 617, "total_steps": 8680, "loss": 0.9071080684661865, "lr": 1.9975970106063414e-06, "epoch": 0.14216589861751153, "percentage": 7.11, "elapsed_time": "0:50:18", "remaining_time": "10:57:24"} +{"current_steps": 618, "total_steps": 8680, "loss": 0.8444115519523621, "lr": 1.997570542311017e-06, "epoch": 0.1423963133640553, "percentage": 7.12, "elapsed_time": "0:50:24", "remaining_time": "10:57:30"} +{"current_steps": 619, "total_steps": 8680, "loss": 0.9159516096115112, "lr": 1.9975439292196496e-06, "epoch": 0.14262672811059907, "percentage": 7.13, "elapsed_time": "0:50:27", "remaining_time": "10:57:11"} +{"current_steps": 620, "total_steps": 8680, "loss": 0.9697242975234985, "lr": 1.997517171336101e-06, "epoch": 0.14285714285714285, "percentage": 7.14, "elapsed_time": "0:50:32", "remaining_time": "10:56:58"} +{"current_steps": 621, "total_steps": 8680, "loss": 0.9894170761108398, "lr": 1.9974902686642557e-06, "epoch": 0.14308755760368663, "percentage": 7.15, "elapsed_time": "0:50:37", "remaining_time": "10:57:02"} +{"current_steps": 622, "total_steps": 8680, "loss": 1.0364127159118652, "lr": 1.9974632212080184e-06, "epoch": 0.14331797235023042, "percentage": 7.17, "elapsed_time": "0:50:41", "remaining_time": "10:56:42"} +{"current_steps": 623, "total_steps": 8680, "loss": 0.8980219960212708, "lr": 1.997436028971315e-06, "epoch": 0.1435483870967742, "percentage": 7.18, "elapsed_time": "0:50:47", "remaining_time": "10:56:49"} +{"current_steps": 624, "total_steps": 8680, "loss": 1.0293703079223633, "lr": 1.9974086919580925e-06, "epoch": 0.14377880184331798, "percentage": 7.19, "elapsed_time": "0:50:51", "remaining_time": "10:56:35"} +{"current_steps": 625, "total_steps": 8680, "loss": 1.006148099899292, "lr": 1.9973812101723186e-06, "epoch": 0.14400921658986174, "percentage": 7.2, "elapsed_time": "0:50:55", "remaining_time": "10:56:14"} +{"current_steps": 626, "total_steps": 8680, "loss": 0.9489799737930298, "lr": 1.9973535836179825e-06, "epoch": 0.14423963133640552, "percentage": 7.21, "elapsed_time": "0:50:58", "remaining_time": "10:55:53"} +{"current_steps": 627, "total_steps": 8680, "loss": 0.7601498961448669, "lr": 1.997325812299094e-06, "epoch": 0.1444700460829493, "percentage": 7.22, "elapsed_time": "0:51:04", "remaining_time": "10:56:05"} +{"current_steps": 628, "total_steps": 8680, "loss": 0.8345643281936646, "lr": 1.9972978962196843e-06, "epoch": 0.1447004608294931, "percentage": 7.24, "elapsed_time": "0:51:09", "remaining_time": "10:55:52"} +{"current_steps": 629, "total_steps": 8680, "loss": 0.8705894947052002, "lr": 1.9972698353838053e-06, "epoch": 0.14493087557603687, "percentage": 7.25, "elapsed_time": "0:51:14", "remaining_time": "10:55:50"} +{"current_steps": 630, "total_steps": 8680, "loss": 0.9515185356140137, "lr": 1.9972416297955294e-06, "epoch": 0.14516129032258066, "percentage": 7.26, "elapsed_time": "0:51:19", "remaining_time": "10:55:46"} +{"current_steps": 631, "total_steps": 8680, "loss": 0.7616517543792725, "lr": 1.9972132794589514e-06, "epoch": 0.1453917050691244, "percentage": 7.27, "elapsed_time": "0:51:24", "remaining_time": "10:55:46"} +{"current_steps": 632, "total_steps": 8680, "loss": 0.8870444297790527, "lr": 1.9971847843781862e-06, "epoch": 0.1456221198156682, "percentage": 7.28, "elapsed_time": "0:51:29", "remaining_time": "10:55:37"} +{"current_steps": 633, "total_steps": 8680, "loss": 0.8709393739700317, "lr": 1.9971561445573696e-06, "epoch": 0.14585253456221198, "percentage": 7.29, "elapsed_time": "0:51:33", "remaining_time": "10:55:21"} +{"current_steps": 634, "total_steps": 8680, "loss": 0.865444540977478, "lr": 1.997127360000658e-06, "epoch": 0.14608294930875576, "percentage": 7.3, "elapsed_time": "0:51:37", "remaining_time": "10:55:11"} +{"current_steps": 635, "total_steps": 8680, "loss": 0.9560728073120117, "lr": 1.997098430712231e-06, "epoch": 0.14631336405529954, "percentage": 7.32, "elapsed_time": "0:51:41", "remaining_time": "10:54:58"} +{"current_steps": 636, "total_steps": 8680, "loss": 0.7579058408737183, "lr": 1.9970693566962866e-06, "epoch": 0.14654377880184333, "percentage": 7.33, "elapsed_time": "0:51:47", "remaining_time": "10:55:08"} +{"current_steps": 637, "total_steps": 8680, "loss": 1.0709021091461182, "lr": 1.997040137957045e-06, "epoch": 0.14677419354838708, "percentage": 7.34, "elapsed_time": "0:51:52", "remaining_time": "10:54:53"} +{"current_steps": 638, "total_steps": 8680, "loss": 0.9911563396453857, "lr": 1.9970107744987474e-06, "epoch": 0.14700460829493087, "percentage": 7.35, "elapsed_time": "0:51:55", "remaining_time": "10:54:37"} +{"current_steps": 639, "total_steps": 8680, "loss": 0.9673472046852112, "lr": 1.996981266325655e-06, "epoch": 0.14723502304147465, "percentage": 7.36, "elapsed_time": "0:52:00", "remaining_time": "10:54:25"} +{"current_steps": 640, "total_steps": 8680, "loss": 0.7728441953659058, "lr": 1.9969516134420523e-06, "epoch": 0.14746543778801843, "percentage": 7.37, "elapsed_time": "0:52:06", "remaining_time": "10:54:34"} +{"current_steps": 641, "total_steps": 8680, "loss": 1.0198101997375488, "lr": 1.9969218158522426e-06, "epoch": 0.14769585253456222, "percentage": 7.38, "elapsed_time": "0:52:10", "remaining_time": "10:54:22"} +{"current_steps": 642, "total_steps": 8680, "loss": 0.9710760116577148, "lr": 1.996891873560551e-06, "epoch": 0.147926267281106, "percentage": 7.4, "elapsed_time": "0:52:14", "remaining_time": "10:54:00"} +{"current_steps": 643, "total_steps": 8680, "loss": 0.9956847429275513, "lr": 1.9968617865713237e-06, "epoch": 0.14815668202764978, "percentage": 7.41, "elapsed_time": "0:52:17", "remaining_time": "10:53:37"} +{"current_steps": 644, "total_steps": 8680, "loss": 1.0974771976470947, "lr": 1.996831554888928e-06, "epoch": 0.14838709677419354, "percentage": 7.42, "elapsed_time": "0:52:23", "remaining_time": "10:53:40"} +{"current_steps": 645, "total_steps": 8680, "loss": 0.914455771446228, "lr": 1.9968011785177513e-06, "epoch": 0.14861751152073732, "percentage": 7.43, "elapsed_time": "0:52:27", "remaining_time": "10:53:31"} +{"current_steps": 646, "total_steps": 8680, "loss": 1.1308314800262451, "lr": 1.9967706574622033e-06, "epoch": 0.1488479262672811, "percentage": 7.44, "elapsed_time": "0:52:32", "remaining_time": "10:53:21"} +{"current_steps": 647, "total_steps": 8680, "loss": 0.981814444065094, "lr": 1.9967399917267142e-06, "epoch": 0.1490783410138249, "percentage": 7.45, "elapsed_time": "0:52:35", "remaining_time": "10:53:02"} +{"current_steps": 648, "total_steps": 8680, "loss": 0.874076247215271, "lr": 1.9967091813157345e-06, "epoch": 0.14930875576036867, "percentage": 7.47, "elapsed_time": "0:52:40", "remaining_time": "10:52:54"} +{"current_steps": 649, "total_steps": 8680, "loss": 0.8496171832084656, "lr": 1.9966782262337365e-06, "epoch": 0.14953917050691246, "percentage": 7.48, "elapsed_time": "0:52:46", "remaining_time": "10:52:59"} +{"current_steps": 650, "total_steps": 8680, "loss": 0.9395674467086792, "lr": 1.9966471264852136e-06, "epoch": 0.1497695852534562, "percentage": 7.49, "elapsed_time": "0:52:51", "remaining_time": "10:53:00"} +{"current_steps": 651, "total_steps": 8680, "loss": 0.8363018035888672, "lr": 1.99661588207468e-06, "epoch": 0.15, "percentage": 7.5, "elapsed_time": "0:52:56", "remaining_time": "10:52:59"} +{"current_steps": 652, "total_steps": 8680, "loss": 1.0035831928253174, "lr": 1.9965844930066696e-06, "epoch": 0.15023041474654378, "percentage": 7.51, "elapsed_time": "0:53:01", "remaining_time": "10:52:48"} +{"current_steps": 653, "total_steps": 8680, "loss": 1.0316795110702515, "lr": 1.99655295928574e-06, "epoch": 0.15046082949308756, "percentage": 7.52, "elapsed_time": "0:53:05", "remaining_time": "10:52:43"} +{"current_steps": 654, "total_steps": 8680, "loss": 0.9545150995254517, "lr": 1.9965212809164676e-06, "epoch": 0.15069124423963134, "percentage": 7.53, "elapsed_time": "0:53:11", "remaining_time": "10:52:44"} +{"current_steps": 655, "total_steps": 8680, "loss": 0.993801474571228, "lr": 1.99648945790345e-06, "epoch": 0.15092165898617513, "percentage": 7.55, "elapsed_time": "0:53:16", "remaining_time": "10:52:39"} +{"current_steps": 656, "total_steps": 8680, "loss": 0.8666588664054871, "lr": 1.9964574902513075e-06, "epoch": 0.15115207373271888, "percentage": 7.56, "elapsed_time": "0:53:22", "remaining_time": "10:52:48"} +{"current_steps": 657, "total_steps": 8680, "loss": 0.9507651925086975, "lr": 1.9964253779646787e-06, "epoch": 0.15138248847926267, "percentage": 7.57, "elapsed_time": "0:53:26", "remaining_time": "10:52:39"} +{"current_steps": 658, "total_steps": 8680, "loss": 0.8700851202011108, "lr": 1.996393121048226e-06, "epoch": 0.15161290322580645, "percentage": 7.58, "elapsed_time": "0:53:30", "remaining_time": "10:52:19"} +{"current_steps": 659, "total_steps": 8680, "loss": 0.9966975450515747, "lr": 1.9963607195066307e-06, "epoch": 0.15184331797235023, "percentage": 7.59, "elapsed_time": "0:53:34", "remaining_time": "10:52:08"} +{"current_steps": 660, "total_steps": 8680, "loss": 0.9552028179168701, "lr": 1.9963281733445957e-06, "epoch": 0.15207373271889402, "percentage": 7.6, "elapsed_time": "0:53:40", "remaining_time": "10:52:10"} +{"current_steps": 661, "total_steps": 8680, "loss": 1.0182740688323975, "lr": 1.9962954825668456e-06, "epoch": 0.1523041474654378, "percentage": 7.62, "elapsed_time": "0:53:45", "remaining_time": "10:52:15"} +{"current_steps": 662, "total_steps": 8680, "loss": 1.0001778602600098, "lr": 1.996262647178125e-06, "epoch": 0.15253456221198156, "percentage": 7.63, "elapsed_time": "0:53:51", "remaining_time": "10:52:14"} +{"current_steps": 663, "total_steps": 8680, "loss": 0.9902865886688232, "lr": 1.9962296671832e-06, "epoch": 0.15276497695852534, "percentage": 7.64, "elapsed_time": "0:53:55", "remaining_time": "10:52:04"} +{"current_steps": 664, "total_steps": 8680, "loss": 0.9272845983505249, "lr": 1.9961965425868575e-06, "epoch": 0.15299539170506912, "percentage": 7.65, "elapsed_time": "0:54:00", "remaining_time": "10:52:01"} +{"current_steps": 665, "total_steps": 8680, "loss": 0.9705777168273926, "lr": 1.996163273393906e-06, "epoch": 0.1532258064516129, "percentage": 7.66, "elapsed_time": "0:54:05", "remaining_time": "10:51:56"} +{"current_steps": 666, "total_steps": 8680, "loss": 0.9472209215164185, "lr": 1.9961298596091736e-06, "epoch": 0.1534562211981567, "percentage": 7.67, "elapsed_time": "0:54:10", "remaining_time": "10:51:58"} +{"current_steps": 667, "total_steps": 8680, "loss": 0.9734043478965759, "lr": 1.9960963012375113e-06, "epoch": 0.15368663594470047, "percentage": 7.68, "elapsed_time": "0:54:15", "remaining_time": "10:51:46"} +{"current_steps": 668, "total_steps": 8680, "loss": 0.8765468001365662, "lr": 1.9960625982837894e-06, "epoch": 0.15391705069124423, "percentage": 7.7, "elapsed_time": "0:54:20", "remaining_time": "10:51:43"} +{"current_steps": 669, "total_steps": 8680, "loss": 1.0224063396453857, "lr": 1.9960287507529e-06, "epoch": 0.154147465437788, "percentage": 7.71, "elapsed_time": "0:54:25", "remaining_time": "10:51:40"} +{"current_steps": 670, "total_steps": 8680, "loss": 0.983299970626831, "lr": 1.995994758649756e-06, "epoch": 0.1543778801843318, "percentage": 7.72, "elapsed_time": "0:54:29", "remaining_time": "10:51:28"} +{"current_steps": 671, "total_steps": 8680, "loss": 1.0524147748947144, "lr": 1.9959606219792914e-06, "epoch": 0.15460829493087558, "percentage": 7.73, "elapsed_time": "0:54:33", "remaining_time": "10:51:15"} +{"current_steps": 672, "total_steps": 8680, "loss": 1.1012977361679077, "lr": 1.9959263407464606e-06, "epoch": 0.15483870967741936, "percentage": 7.74, "elapsed_time": "0:54:37", "remaining_time": "10:50:59"} +{"current_steps": 673, "total_steps": 8680, "loss": 0.8494049310684204, "lr": 1.99589191495624e-06, "epoch": 0.15506912442396314, "percentage": 7.75, "elapsed_time": "0:54:43", "remaining_time": "10:51:11"} +{"current_steps": 674, "total_steps": 8680, "loss": 0.8677045106887817, "lr": 1.9958573446136263e-06, "epoch": 0.1552995391705069, "percentage": 7.76, "elapsed_time": "0:54:48", "remaining_time": "10:51:04"} +{"current_steps": 675, "total_steps": 8680, "loss": 1.1034941673278809, "lr": 1.995822629723638e-06, "epoch": 0.15552995391705068, "percentage": 7.78, "elapsed_time": "0:54:52", "remaining_time": "10:50:48"} +{"current_steps": 676, "total_steps": 8680, "loss": 0.8428820371627808, "lr": 1.9957877702913128e-06, "epoch": 0.15576036866359447, "percentage": 7.79, "elapsed_time": "0:54:58", "remaining_time": "10:50:50"} +{"current_steps": 677, "total_steps": 8680, "loss": 0.8584408760070801, "lr": 1.9957527663217107e-06, "epoch": 0.15599078341013825, "percentage": 7.8, "elapsed_time": "0:55:01", "remaining_time": "10:50:32"} +{"current_steps": 678, "total_steps": 8680, "loss": 0.8089514970779419, "lr": 1.995717617819913e-06, "epoch": 0.15622119815668203, "percentage": 7.81, "elapsed_time": "0:55:06", "remaining_time": "10:50:28"} +{"current_steps": 679, "total_steps": 8680, "loss": 0.8459775447845459, "lr": 1.9956823247910217e-06, "epoch": 0.15645161290322582, "percentage": 7.82, "elapsed_time": "0:55:11", "remaining_time": "10:50:18"} +{"current_steps": 680, "total_steps": 8680, "loss": 1.0583066940307617, "lr": 1.9956468872401583e-06, "epoch": 0.15668202764976957, "percentage": 7.83, "elapsed_time": "0:55:16", "remaining_time": "10:50:22"} +{"current_steps": 681, "total_steps": 8680, "loss": 0.9396135807037354, "lr": 1.995611305172468e-06, "epoch": 0.15691244239631336, "percentage": 7.85, "elapsed_time": "0:55:21", "remaining_time": "10:50:18"} +{"current_steps": 682, "total_steps": 8680, "loss": 1.0143593549728394, "lr": 1.995575578593114e-06, "epoch": 0.15714285714285714, "percentage": 7.86, "elapsed_time": "0:55:27", "remaining_time": "10:50:23"} +{"current_steps": 683, "total_steps": 8680, "loss": 0.8822500109672546, "lr": 1.9955397075072833e-06, "epoch": 0.15737327188940092, "percentage": 7.87, "elapsed_time": "0:55:33", "remaining_time": "10:50:34"} +{"current_steps": 684, "total_steps": 8680, "loss": 0.8841962218284607, "lr": 1.995503691920182e-06, "epoch": 0.1576036866359447, "percentage": 7.88, "elapsed_time": "0:55:38", "remaining_time": "10:50:23"} +{"current_steps": 685, "total_steps": 8680, "loss": 0.8537080883979797, "lr": 1.9954675318370374e-06, "epoch": 0.1578341013824885, "percentage": 7.89, "elapsed_time": "0:55:45", "remaining_time": "10:50:41"} +{"current_steps": 686, "total_steps": 8680, "loss": 1.0292394161224365, "lr": 1.9954312272630985e-06, "epoch": 0.15806451612903225, "percentage": 7.9, "elapsed_time": "0:55:49", "remaining_time": "10:50:26"} +{"current_steps": 687, "total_steps": 8680, "loss": 0.8741706013679504, "lr": 1.995394778203635e-06, "epoch": 0.15829493087557603, "percentage": 7.91, "elapsed_time": "0:55:53", "remaining_time": "10:50:17"} +{"current_steps": 688, "total_steps": 8680, "loss": 0.9429572820663452, "lr": 1.995358184663937e-06, "epoch": 0.1585253456221198, "percentage": 7.93, "elapsed_time": "0:55:59", "remaining_time": "10:50:26"} +{"current_steps": 689, "total_steps": 8680, "loss": 0.9522494077682495, "lr": 1.995321446649316e-06, "epoch": 0.1587557603686636, "percentage": 7.94, "elapsed_time": "0:56:03", "remaining_time": "10:50:10"} +{"current_steps": 690, "total_steps": 8680, "loss": 0.9743782877922058, "lr": 1.9952845641651046e-06, "epoch": 0.15898617511520738, "percentage": 7.95, "elapsed_time": "0:56:07", "remaining_time": "10:49:53"} +{"current_steps": 691, "total_steps": 8680, "loss": 0.926364541053772, "lr": 1.995247537216657e-06, "epoch": 0.15921658986175116, "percentage": 7.96, "elapsed_time": "0:56:11", "remaining_time": "10:49:42"} +{"current_steps": 692, "total_steps": 8680, "loss": 0.8355565071105957, "lr": 1.995210365809346e-06, "epoch": 0.15944700460829492, "percentage": 7.97, "elapsed_time": "0:56:16", "remaining_time": "10:49:31"} +{"current_steps": 693, "total_steps": 8680, "loss": 0.9200692772865295, "lr": 1.9951730499485684e-06, "epoch": 0.1596774193548387, "percentage": 7.98, "elapsed_time": "0:56:21", "remaining_time": "10:49:29"} +{"current_steps": 694, "total_steps": 8680, "loss": 0.7571361064910889, "lr": 1.99513558963974e-06, "epoch": 0.15990783410138248, "percentage": 8.0, "elapsed_time": "0:56:25", "remaining_time": "10:49:18"} +{"current_steps": 695, "total_steps": 8680, "loss": 0.935307502746582, "lr": 1.995097984888298e-06, "epoch": 0.16013824884792627, "percentage": 8.01, "elapsed_time": "0:56:30", "remaining_time": "10:49:11"} +{"current_steps": 696, "total_steps": 8680, "loss": 1.1118557453155518, "lr": 1.995060235699701e-06, "epoch": 0.16036866359447005, "percentage": 8.02, "elapsed_time": "0:56:33", "remaining_time": "10:48:52"} +{"current_steps": 697, "total_steps": 8680, "loss": 0.8024749755859375, "lr": 1.995022342079428e-06, "epoch": 0.16059907834101383, "percentage": 8.03, "elapsed_time": "0:56:39", "remaining_time": "10:48:52"} +{"current_steps": 698, "total_steps": 8680, "loss": 0.9018943309783936, "lr": 1.994984304032979e-06, "epoch": 0.1608294930875576, "percentage": 8.04, "elapsed_time": "0:56:43", "remaining_time": "10:48:42"} +{"current_steps": 699, "total_steps": 8680, "loss": 0.8571128249168396, "lr": 1.9949461215658757e-06, "epoch": 0.16105990783410137, "percentage": 8.05, "elapsed_time": "0:56:47", "remaining_time": "10:48:31"} +{"current_steps": 700, "total_steps": 8680, "loss": 0.9707971215248108, "lr": 1.99490779468366e-06, "epoch": 0.16129032258064516, "percentage": 8.06, "elapsed_time": "0:56:52", "remaining_time": "10:48:27"} +{"current_steps": 701, "total_steps": 8680, "loss": 0.8157618045806885, "lr": 1.994869323391895e-06, "epoch": 0.16152073732718894, "percentage": 8.08, "elapsed_time": "0:57:01", "remaining_time": "10:48:59"} +{"current_steps": 702, "total_steps": 8680, "loss": 0.9009906053543091, "lr": 1.994830707696165e-06, "epoch": 0.16175115207373272, "percentage": 8.09, "elapsed_time": "0:57:05", "remaining_time": "10:48:47"} +{"current_steps": 703, "total_steps": 8680, "loss": 1.0093860626220703, "lr": 1.9947919476020745e-06, "epoch": 0.1619815668202765, "percentage": 8.1, "elapsed_time": "0:57:09", "remaining_time": "10:48:33"} +{"current_steps": 704, "total_steps": 8680, "loss": 1.018160343170166, "lr": 1.9947530431152494e-06, "epoch": 0.1622119815668203, "percentage": 8.11, "elapsed_time": "0:57:14", "remaining_time": "10:48:36"} +{"current_steps": 705, "total_steps": 8680, "loss": 0.9755370616912842, "lr": 1.9947139942413378e-06, "epoch": 0.16244239631336405, "percentage": 8.12, "elapsed_time": "0:57:19", "remaining_time": "10:48:29"} +{"current_steps": 706, "total_steps": 8680, "loss": 0.9406822919845581, "lr": 1.994674800986006e-06, "epoch": 0.16267281105990783, "percentage": 8.13, "elapsed_time": "0:57:25", "remaining_time": "10:48:33"} +{"current_steps": 707, "total_steps": 8680, "loss": 0.9128296971321106, "lr": 1.994635463354944e-06, "epoch": 0.1629032258064516, "percentage": 8.15, "elapsed_time": "0:57:29", "remaining_time": "10:48:18"} +{"current_steps": 708, "total_steps": 8680, "loss": 0.929735541343689, "lr": 1.994595981353861e-06, "epoch": 0.1631336405529954, "percentage": 8.16, "elapsed_time": "0:57:34", "remaining_time": "10:48:12"} +{"current_steps": 709, "total_steps": 8680, "loss": 0.9021023511886597, "lr": 1.994556354988488e-06, "epoch": 0.16336405529953918, "percentage": 8.17, "elapsed_time": "0:57:37", "remaining_time": "10:47:53"} +{"current_steps": 710, "total_steps": 8680, "loss": 0.9187623262405396, "lr": 1.994516584264577e-06, "epoch": 0.16359447004608296, "percentage": 8.18, "elapsed_time": "0:57:41", "remaining_time": "10:47:33"} +{"current_steps": 711, "total_steps": 8680, "loss": 0.8283985257148743, "lr": 1.9944766691879e-06, "epoch": 0.16382488479262672, "percentage": 8.19, "elapsed_time": "0:57:45", "remaining_time": "10:47:17"} +{"current_steps": 712, "total_steps": 8680, "loss": 1.0592901706695557, "lr": 1.994436609764251e-06, "epoch": 0.1640552995391705, "percentage": 8.2, "elapsed_time": "0:57:49", "remaining_time": "10:47:05"} +{"current_steps": 713, "total_steps": 8680, "loss": 0.98726487159729, "lr": 1.9943964059994446e-06, "epoch": 0.16428571428571428, "percentage": 8.21, "elapsed_time": "0:57:53", "remaining_time": "10:46:56"} +{"current_steps": 714, "total_steps": 8680, "loss": 0.8761749267578125, "lr": 1.9943560578993165e-06, "epoch": 0.16451612903225807, "percentage": 8.23, "elapsed_time": "0:57:58", "remaining_time": "10:46:53"} +{"current_steps": 715, "total_steps": 8680, "loss": 0.878170371055603, "lr": 1.9943155654697227e-06, "epoch": 0.16474654377880185, "percentage": 8.24, "elapsed_time": "0:58:04", "remaining_time": "10:46:58"} +{"current_steps": 716, "total_steps": 8680, "loss": 0.9444767236709595, "lr": 1.9942749287165414e-06, "epoch": 0.16497695852534563, "percentage": 8.25, "elapsed_time": "0:58:08", "remaining_time": "10:46:44"} +{"current_steps": 717, "total_steps": 8680, "loss": 0.8270057439804077, "lr": 1.9942341476456697e-06, "epoch": 0.1652073732718894, "percentage": 8.26, "elapsed_time": "0:58:13", "remaining_time": "10:46:38"} +{"current_steps": 718, "total_steps": 8680, "loss": 0.825955867767334, "lr": 1.9941932222630284e-06, "epoch": 0.16543778801843317, "percentage": 8.27, "elapsed_time": "0:58:20", "remaining_time": "10:46:56"} +{"current_steps": 719, "total_steps": 8680, "loss": 0.9384286403656006, "lr": 1.9941521525745564e-06, "epoch": 0.16566820276497696, "percentage": 8.28, "elapsed_time": "0:58:24", "remaining_time": "10:46:47"} +{"current_steps": 720, "total_steps": 8680, "loss": 0.9627010226249695, "lr": 1.994110938586216e-06, "epoch": 0.16589861751152074, "percentage": 8.29, "elapsed_time": "0:58:30", "remaining_time": "10:46:46"} +{"current_steps": 721, "total_steps": 8680, "loss": 0.8436836004257202, "lr": 1.9940695803039886e-06, "epoch": 0.16612903225806452, "percentage": 8.31, "elapsed_time": "0:58:34", "remaining_time": "10:46:35"} +{"current_steps": 722, "total_steps": 8680, "loss": 1.0689928531646729, "lr": 1.994028077733878e-06, "epoch": 0.1663594470046083, "percentage": 8.32, "elapsed_time": "0:58:39", "remaining_time": "10:46:35"} +{"current_steps": 723, "total_steps": 8680, "loss": 0.911309003829956, "lr": 1.993986430881907e-06, "epoch": 0.16658986175115206, "percentage": 8.33, "elapsed_time": "0:58:44", "remaining_time": "10:46:27"} +{"current_steps": 724, "total_steps": 8680, "loss": 0.9897152185440063, "lr": 1.993944639754122e-06, "epoch": 0.16682027649769585, "percentage": 8.34, "elapsed_time": "0:58:49", "remaining_time": "10:46:27"} +{"current_steps": 725, "total_steps": 8680, "loss": 1.0230367183685303, "lr": 1.9939027043565883e-06, "epoch": 0.16705069124423963, "percentage": 8.35, "elapsed_time": "0:58:54", "remaining_time": "10:46:23"} +{"current_steps": 726, "total_steps": 8680, "loss": 0.8067069053649902, "lr": 1.993860624695393e-06, "epoch": 0.1672811059907834, "percentage": 8.36, "elapsed_time": "0:59:00", "remaining_time": "10:46:33"} +{"current_steps": 727, "total_steps": 8680, "loss": 0.9784343242645264, "lr": 1.9938184007766434e-06, "epoch": 0.1675115207373272, "percentage": 8.38, "elapsed_time": "0:59:04", "remaining_time": "10:46:13"} +{"current_steps": 728, "total_steps": 8680, "loss": 0.8617877960205078, "lr": 1.9937760326064686e-06, "epoch": 0.16774193548387098, "percentage": 8.39, "elapsed_time": "0:59:09", "remaining_time": "10:46:12"} +{"current_steps": 729, "total_steps": 8680, "loss": 1.0390141010284424, "lr": 1.9937335201910183e-06, "epoch": 0.16797235023041474, "percentage": 8.4, "elapsed_time": "0:59:14", "remaining_time": "10:46:12"} +{"current_steps": 730, "total_steps": 8680, "loss": 1.0478965044021606, "lr": 1.9936908635364633e-06, "epoch": 0.16820276497695852, "percentage": 8.41, "elapsed_time": "0:59:19", "remaining_time": "10:46:03"} +{"current_steps": 731, "total_steps": 8680, "loss": 0.8396252393722534, "lr": 1.9936480626489944e-06, "epoch": 0.1684331797235023, "percentage": 8.42, "elapsed_time": "0:59:25", "remaining_time": "10:46:09"} +{"current_steps": 732, "total_steps": 8680, "loss": 0.8690099120140076, "lr": 1.9936051175348256e-06, "epoch": 0.16866359447004609, "percentage": 8.43, "elapsed_time": "0:59:29", "remaining_time": "10:46:00"} +{"current_steps": 733, "total_steps": 8680, "loss": 0.944722056388855, "lr": 1.993562028200189e-06, "epoch": 0.16889400921658987, "percentage": 8.44, "elapsed_time": "0:59:35", "remaining_time": "10:46:00"} +{"current_steps": 734, "total_steps": 8680, "loss": 0.7134733200073242, "lr": 1.9935187946513385e-06, "epoch": 0.16912442396313365, "percentage": 8.46, "elapsed_time": "0:59:40", "remaining_time": "10:46:04"} +{"current_steps": 735, "total_steps": 8680, "loss": 0.8102486729621887, "lr": 1.993475416894551e-06, "epoch": 0.1693548387096774, "percentage": 8.47, "elapsed_time": "0:59:44", "remaining_time": "10:45:50"} +{"current_steps": 736, "total_steps": 8680, "loss": 0.924787163734436, "lr": 1.9934318949361215e-06, "epoch": 0.1695852534562212, "percentage": 8.48, "elapsed_time": "0:59:48", "remaining_time": "10:45:36"} +{"current_steps": 737, "total_steps": 8680, "loss": 0.9595087766647339, "lr": 1.993388228782368e-06, "epoch": 0.16981566820276497, "percentage": 8.49, "elapsed_time": "0:59:52", "remaining_time": "10:45:21"} +{"current_steps": 738, "total_steps": 8680, "loss": 0.9949792623519897, "lr": 1.993344418439628e-06, "epoch": 0.17004608294930876, "percentage": 8.5, "elapsed_time": "0:59:56", "remaining_time": "10:45:02"} +{"current_steps": 739, "total_steps": 8680, "loss": 1.0905860662460327, "lr": 1.9933004639142604e-06, "epoch": 0.17027649769585254, "percentage": 8.51, "elapsed_time": "1:00:01", "remaining_time": "10:45:02"} +{"current_steps": 740, "total_steps": 8680, "loss": 0.9638324975967407, "lr": 1.9932563652126455e-06, "epoch": 0.17050691244239632, "percentage": 8.53, "elapsed_time": "1:00:07", "remaining_time": "10:45:06"} +{"current_steps": 741, "total_steps": 8680, "loss": 0.9434946179389954, "lr": 1.9932121223411844e-06, "epoch": 0.17073732718894008, "percentage": 8.54, "elapsed_time": "1:00:12", "remaining_time": "10:45:04"} +{"current_steps": 742, "total_steps": 8680, "loss": 0.9050095081329346, "lr": 1.9931677353062983e-06, "epoch": 0.17096774193548386, "percentage": 8.55, "elapsed_time": "1:00:16", "remaining_time": "10:44:54"} +{"current_steps": 743, "total_steps": 8680, "loss": 1.0698316097259521, "lr": 1.9931232041144303e-06, "epoch": 0.17119815668202765, "percentage": 8.56, "elapsed_time": "1:00:22", "remaining_time": "10:44:57"} +{"current_steps": 744, "total_steps": 8680, "loss": 0.7938296794891357, "lr": 1.993078528772044e-06, "epoch": 0.17142857142857143, "percentage": 8.57, "elapsed_time": "1:00:26", "remaining_time": "10:44:42"} +{"current_steps": 745, "total_steps": 8680, "loss": 0.8485043048858643, "lr": 1.993033709285624e-06, "epoch": 0.1716589861751152, "percentage": 8.58, "elapsed_time": "1:00:33", "remaining_time": "10:44:58"} +{"current_steps": 746, "total_steps": 8680, "loss": 0.8605694770812988, "lr": 1.9929887456616754e-06, "epoch": 0.171889400921659, "percentage": 8.59, "elapsed_time": "1:00:39", "remaining_time": "10:45:06"} +{"current_steps": 747, "total_steps": 8680, "loss": 0.7101563215255737, "lr": 1.9929436379067253e-06, "epoch": 0.17211981566820275, "percentage": 8.61, "elapsed_time": "1:00:44", "remaining_time": "10:45:08"} +{"current_steps": 748, "total_steps": 8680, "loss": 1.093912959098816, "lr": 1.9928983860273205e-06, "epoch": 0.17235023041474654, "percentage": 8.62, "elapsed_time": "1:00:49", "remaining_time": "10:45:04"} +{"current_steps": 749, "total_steps": 8680, "loss": 0.8099753856658936, "lr": 1.9928529900300294e-06, "epoch": 0.17258064516129032, "percentage": 8.63, "elapsed_time": "1:00:56", "remaining_time": "10:45:12"} +{"current_steps": 750, "total_steps": 8680, "loss": 0.7816359400749207, "lr": 1.992807449921441e-06, "epoch": 0.1728110599078341, "percentage": 8.64, "elapsed_time": "1:01:02", "remaining_time": "10:45:27"} +{"current_steps": 751, "total_steps": 8680, "loss": 0.8887455463409424, "lr": 1.9927617657081656e-06, "epoch": 0.17304147465437789, "percentage": 8.65, "elapsed_time": "1:01:07", "remaining_time": "10:45:22"} +{"current_steps": 752, "total_steps": 8680, "loss": 1.0926017761230469, "lr": 1.992715937396834e-06, "epoch": 0.17327188940092167, "percentage": 8.66, "elapsed_time": "1:01:11", "remaining_time": "10:45:04"} +{"current_steps": 753, "total_steps": 8680, "loss": 0.7657707929611206, "lr": 1.9926699649940985e-06, "epoch": 0.17350230414746542, "percentage": 8.68, "elapsed_time": "1:01:16", "remaining_time": "10:45:02"} +{"current_steps": 754, "total_steps": 8680, "loss": 0.9350340366363525, "lr": 1.992623848506632e-06, "epoch": 0.1737327188940092, "percentage": 8.69, "elapsed_time": "1:01:21", "remaining_time": "10:44:58"} +{"current_steps": 755, "total_steps": 8680, "loss": 0.883575439453125, "lr": 1.9925775879411276e-06, "epoch": 0.173963133640553, "percentage": 8.7, "elapsed_time": "1:01:26", "remaining_time": "10:44:52"} +{"current_steps": 756, "total_steps": 8680, "loss": 0.814304769039154, "lr": 1.9925311833043e-06, "epoch": 0.17419354838709677, "percentage": 8.71, "elapsed_time": "1:01:32", "remaining_time": "10:44:57"} +{"current_steps": 757, "total_steps": 8680, "loss": 0.9263690710067749, "lr": 1.992484634602886e-06, "epoch": 0.17442396313364056, "percentage": 8.72, "elapsed_time": "1:01:37", "remaining_time": "10:44:59"} +{"current_steps": 758, "total_steps": 8680, "loss": 1.0321627855300903, "lr": 1.9924379418436402e-06, "epoch": 0.17465437788018434, "percentage": 8.73, "elapsed_time": "1:01:41", "remaining_time": "10:44:47"} +{"current_steps": 759, "total_steps": 8680, "loss": 0.969459056854248, "lr": 1.9923911050333413e-06, "epoch": 0.1748847926267281, "percentage": 8.74, "elapsed_time": "1:01:46", "remaining_time": "10:44:37"} +{"current_steps": 760, "total_steps": 8680, "loss": 0.9926396012306213, "lr": 1.9923441241787874e-06, "epoch": 0.17511520737327188, "percentage": 8.76, "elapsed_time": "1:01:50", "remaining_time": "10:44:26"} +{"current_steps": 761, "total_steps": 8680, "loss": 0.776180624961853, "lr": 1.9922969992867975e-06, "epoch": 0.17534562211981566, "percentage": 8.77, "elapsed_time": "1:01:55", "remaining_time": "10:44:25"} +{"current_steps": 762, "total_steps": 8680, "loss": 0.9413800239562988, "lr": 1.992249730364212e-06, "epoch": 0.17557603686635945, "percentage": 8.78, "elapsed_time": "1:02:00", "remaining_time": "10:44:24"} +{"current_steps": 763, "total_steps": 8680, "loss": 0.8365576267242432, "lr": 1.9922023174178913e-06, "epoch": 0.17580645161290323, "percentage": 8.79, "elapsed_time": "1:02:05", "remaining_time": "10:44:16"} +{"current_steps": 764, "total_steps": 8680, "loss": 1.023102879524231, "lr": 1.992154760454718e-06, "epoch": 0.17603686635944701, "percentage": 8.8, "elapsed_time": "1:02:10", "remaining_time": "10:44:08"} +{"current_steps": 765, "total_steps": 8680, "loss": 1.079930067062378, "lr": 1.9921070594815944e-06, "epoch": 0.17626728110599077, "percentage": 8.81, "elapsed_time": "1:02:14", "remaining_time": "10:44:03"} +{"current_steps": 766, "total_steps": 8680, "loss": 0.8974392414093018, "lr": 1.9920592145054445e-06, "epoch": 0.17649769585253455, "percentage": 8.82, "elapsed_time": "1:02:19", "remaining_time": "10:43:54"} +{"current_steps": 767, "total_steps": 8680, "loss": 0.9509298205375671, "lr": 1.9920112255332133e-06, "epoch": 0.17672811059907834, "percentage": 8.84, "elapsed_time": "1:02:23", "remaining_time": "10:43:42"} +{"current_steps": 768, "total_steps": 8680, "loss": 0.938835620880127, "lr": 1.991963092571866e-06, "epoch": 0.17695852534562212, "percentage": 8.85, "elapsed_time": "1:02:29", "remaining_time": "10:43:47"} +{"current_steps": 769, "total_steps": 8680, "loss": 0.7918044328689575, "lr": 1.9919148156283888e-06, "epoch": 0.1771889400921659, "percentage": 8.86, "elapsed_time": "1:02:35", "remaining_time": "10:43:56"} +{"current_steps": 770, "total_steps": 8680, "loss": 0.8235958814620972, "lr": 1.9918663947097893e-06, "epoch": 0.1774193548387097, "percentage": 8.87, "elapsed_time": "1:02:39", "remaining_time": "10:43:43"} +{"current_steps": 771, "total_steps": 8680, "loss": 0.9079158902168274, "lr": 1.9918178298230953e-06, "epoch": 0.17764976958525347, "percentage": 8.88, "elapsed_time": "1:02:45", "remaining_time": "10:43:41"} +{"current_steps": 772, "total_steps": 8680, "loss": 0.8548607230186462, "lr": 1.9917691209753563e-06, "epoch": 0.17788018433179723, "percentage": 8.89, "elapsed_time": "1:02:49", "remaining_time": "10:43:30"} +{"current_steps": 773, "total_steps": 8680, "loss": 0.8327757120132446, "lr": 1.9917202681736428e-06, "epoch": 0.178110599078341, "percentage": 8.91, "elapsed_time": "1:02:54", "remaining_time": "10:43:30"} +{"current_steps": 774, "total_steps": 8680, "loss": 1.0511503219604492, "lr": 1.991671271425045e-06, "epoch": 0.1783410138248848, "percentage": 8.92, "elapsed_time": "1:02:58", "remaining_time": "10:43:16"} +{"current_steps": 775, "total_steps": 8680, "loss": 0.9168857932090759, "lr": 1.991622130736675e-06, "epoch": 0.17857142857142858, "percentage": 8.93, "elapsed_time": "1:03:04", "remaining_time": "10:43:18"} +{"current_steps": 776, "total_steps": 8680, "loss": 0.8740782737731934, "lr": 1.9915728461156654e-06, "epoch": 0.17880184331797236, "percentage": 8.94, "elapsed_time": "1:03:09", "remaining_time": "10:43:19"} +{"current_steps": 777, "total_steps": 8680, "loss": 0.9706588983535767, "lr": 1.99152341756917e-06, "epoch": 0.17903225806451614, "percentage": 8.95, "elapsed_time": "1:03:13", "remaining_time": "10:43:08"} +{"current_steps": 778, "total_steps": 8680, "loss": 1.144281268119812, "lr": 1.9914738451043627e-06, "epoch": 0.1792626728110599, "percentage": 8.96, "elapsed_time": "1:03:18", "remaining_time": "10:43:04"} +{"current_steps": 779, "total_steps": 8680, "loss": 0.973777174949646, "lr": 1.9914241287284403e-06, "epoch": 0.17949308755760368, "percentage": 8.97, "elapsed_time": "1:03:23", "remaining_time": "10:42:57"} +{"current_steps": 780, "total_steps": 8680, "loss": 0.9002145528793335, "lr": 1.991374268448617e-06, "epoch": 0.17972350230414746, "percentage": 8.99, "elapsed_time": "1:03:27", "remaining_time": "10:42:40"} +{"current_steps": 781, "total_steps": 8680, "loss": 0.9234670400619507, "lr": 1.9913242642721316e-06, "epoch": 0.17995391705069125, "percentage": 9.0, "elapsed_time": "1:03:31", "remaining_time": "10:42:30"} +{"current_steps": 782, "total_steps": 8680, "loss": 0.9552402496337891, "lr": 1.9912741162062415e-06, "epoch": 0.18018433179723503, "percentage": 9.01, "elapsed_time": "1:03:36", "remaining_time": "10:42:29"} +{"current_steps": 783, "total_steps": 8680, "loss": 1.0485708713531494, "lr": 1.9912238242582257e-06, "epoch": 0.18041474654377881, "percentage": 9.02, "elapsed_time": "1:03:40", "remaining_time": "10:42:13"} +{"current_steps": 784, "total_steps": 8680, "loss": 0.9852809906005859, "lr": 1.991173388435384e-06, "epoch": 0.18064516129032257, "percentage": 9.03, "elapsed_time": "1:03:44", "remaining_time": "10:41:55"} +{"current_steps": 785, "total_steps": 8680, "loss": 0.7824808359146118, "lr": 1.991122808745037e-06, "epoch": 0.18087557603686635, "percentage": 9.04, "elapsed_time": "1:03:49", "remaining_time": "10:41:56"} +{"current_steps": 786, "total_steps": 8680, "loss": 1.0380492210388184, "lr": 1.9910720851945268e-06, "epoch": 0.18110599078341014, "percentage": 9.06, "elapsed_time": "1:03:54", "remaining_time": "10:41:46"} +{"current_steps": 787, "total_steps": 8680, "loss": 0.9808282256126404, "lr": 1.991021217791215e-06, "epoch": 0.18133640552995392, "percentage": 9.07, "elapsed_time": "1:03:58", "remaining_time": "10:41:41"} +{"current_steps": 788, "total_steps": 8680, "loss": 0.8636116981506348, "lr": 1.9909702065424854e-06, "epoch": 0.1815668202764977, "percentage": 9.08, "elapsed_time": "1:04:03", "remaining_time": "10:41:33"} +{"current_steps": 789, "total_steps": 8680, "loss": 0.8179407715797424, "lr": 1.9909190514557427e-06, "epoch": 0.1817972350230415, "percentage": 9.09, "elapsed_time": "1:04:08", "remaining_time": "10:41:29"} +{"current_steps": 790, "total_steps": 8680, "loss": 0.9424594044685364, "lr": 1.990867752538411e-06, "epoch": 0.18202764976958524, "percentage": 9.1, "elapsed_time": "1:04:13", "remaining_time": "10:41:23"} +{"current_steps": 791, "total_steps": 8680, "loss": 0.9429298043251038, "lr": 1.9908163097979366e-06, "epoch": 0.18225806451612903, "percentage": 9.11, "elapsed_time": "1:04:17", "remaining_time": "10:41:10"} +{"current_steps": 792, "total_steps": 8680, "loss": 0.9671716690063477, "lr": 1.990764723241787e-06, "epoch": 0.1824884792626728, "percentage": 9.12, "elapsed_time": "1:04:21", "remaining_time": "10:40:57"} +{"current_steps": 793, "total_steps": 8680, "loss": 1.0063345432281494, "lr": 1.9907129928774494e-06, "epoch": 0.1827188940092166, "percentage": 9.14, "elapsed_time": "1:04:26", "remaining_time": "10:40:52"} +{"current_steps": 794, "total_steps": 8680, "loss": 0.9932061433792114, "lr": 1.990661118712432e-06, "epoch": 0.18294930875576038, "percentage": 9.15, "elapsed_time": "1:04:32", "remaining_time": "10:41:03"} +{"current_steps": 795, "total_steps": 8680, "loss": 0.859153151512146, "lr": 1.990609100754265e-06, "epoch": 0.18317972350230416, "percentage": 9.16, "elapsed_time": "1:04:38", "remaining_time": "10:41:07"} +{"current_steps": 796, "total_steps": 8680, "loss": 0.9328111410140991, "lr": 1.9905569390104984e-06, "epoch": 0.18341013824884791, "percentage": 9.17, "elapsed_time": "1:04:44", "remaining_time": "10:41:16"} +{"current_steps": 797, "total_steps": 8680, "loss": 0.9970628619194031, "lr": 1.9905046334887033e-06, "epoch": 0.1836405529953917, "percentage": 9.18, "elapsed_time": "1:04:48", "remaining_time": "10:41:00"} +{"current_steps": 798, "total_steps": 8680, "loss": 1.0347282886505127, "lr": 1.990452184196472e-06, "epoch": 0.18387096774193548, "percentage": 9.19, "elapsed_time": "1:04:53", "remaining_time": "10:40:51"} +{"current_steps": 799, "total_steps": 8680, "loss": 0.9167106747627258, "lr": 1.990399591141417e-06, "epoch": 0.18410138248847926, "percentage": 9.21, "elapsed_time": "1:04:58", "remaining_time": "10:40:50"} +{"current_steps": 800, "total_steps": 8680, "loss": 0.8895610570907593, "lr": 1.990346854331173e-06, "epoch": 0.18433179723502305, "percentage": 9.22, "elapsed_time": "1:05:02", "remaining_time": "10:40:42"} +{"current_steps": 801, "total_steps": 8680, "loss": 0.8525041341781616, "lr": 1.990293973773394e-06, "epoch": 0.18456221198156683, "percentage": 9.23, "elapsed_time": "1:05:09", "remaining_time": "10:40:50"} +{"current_steps": 802, "total_steps": 8680, "loss": 0.8184069395065308, "lr": 1.9902409494757553e-06, "epoch": 0.1847926267281106, "percentage": 9.24, "elapsed_time": "1:05:14", "remaining_time": "10:40:50"} +{"current_steps": 803, "total_steps": 8680, "loss": 0.8342509269714355, "lr": 1.9901877814459544e-06, "epoch": 0.18502304147465437, "percentage": 9.25, "elapsed_time": "1:05:19", "remaining_time": "10:40:49"} +{"current_steps": 804, "total_steps": 8680, "loss": 0.9254395365715027, "lr": 1.9901344696917072e-06, "epoch": 0.18525345622119815, "percentage": 9.26, "elapsed_time": "1:05:23", "remaining_time": "10:40:38"} +{"current_steps": 805, "total_steps": 8680, "loss": 0.9537396430969238, "lr": 1.990081014220753e-06, "epoch": 0.18548387096774194, "percentage": 9.27, "elapsed_time": "1:05:28", "remaining_time": "10:40:26"} +{"current_steps": 806, "total_steps": 8680, "loss": 0.871498167514801, "lr": 1.99002741504085e-06, "epoch": 0.18571428571428572, "percentage": 9.29, "elapsed_time": "1:05:32", "remaining_time": "10:40:21"} +{"current_steps": 807, "total_steps": 8680, "loss": 0.879954993724823, "lr": 1.9899736721597786e-06, "epoch": 0.1859447004608295, "percentage": 9.3, "elapsed_time": "1:05:38", "remaining_time": "10:40:19"} +{"current_steps": 808, "total_steps": 8680, "loss": 0.9238240718841553, "lr": 1.9899197855853386e-06, "epoch": 0.18617511520737326, "percentage": 9.31, "elapsed_time": "1:05:42", "remaining_time": "10:40:14"} +{"current_steps": 809, "total_steps": 8680, "loss": 0.7939119935035706, "lr": 1.9898657553253527e-06, "epoch": 0.18640552995391704, "percentage": 9.32, "elapsed_time": "1:05:47", "remaining_time": "10:40:10"} +{"current_steps": 810, "total_steps": 8680, "loss": 0.8536086082458496, "lr": 1.989811581387663e-06, "epoch": 0.18663594470046083, "percentage": 9.33, "elapsed_time": "1:05:53", "remaining_time": "10:40:14"} +{"current_steps": 811, "total_steps": 8680, "loss": 0.8272225856781006, "lr": 1.9897572637801322e-06, "epoch": 0.1868663594470046, "percentage": 9.34, "elapsed_time": "1:05:58", "remaining_time": "10:40:07"} +{"current_steps": 812, "total_steps": 8680, "loss": 0.9187904596328735, "lr": 1.989702802510645e-06, "epoch": 0.1870967741935484, "percentage": 9.35, "elapsed_time": "1:06:02", "remaining_time": "10:39:54"} +{"current_steps": 813, "total_steps": 8680, "loss": 0.905516505241394, "lr": 1.989648197587106e-06, "epoch": 0.18732718894009218, "percentage": 9.37, "elapsed_time": "1:06:08", "remaining_time": "10:39:58"} +{"current_steps": 814, "total_steps": 8680, "loss": 0.7548567056655884, "lr": 1.9895934490174415e-06, "epoch": 0.18755760368663593, "percentage": 9.38, "elapsed_time": "1:06:11", "remaining_time": "10:39:42"} +{"current_steps": 815, "total_steps": 8680, "loss": 0.8242576122283936, "lr": 1.9895385568095978e-06, "epoch": 0.18778801843317972, "percentage": 9.39, "elapsed_time": "1:06:16", "remaining_time": "10:39:39"} +{"current_steps": 816, "total_steps": 8680, "loss": 0.9861007928848267, "lr": 1.9894835209715427e-06, "epoch": 0.1880184331797235, "percentage": 9.4, "elapsed_time": "1:06:22", "remaining_time": "10:39:37"} +{"current_steps": 817, "total_steps": 8680, "loss": 0.9705426096916199, "lr": 1.989428341511264e-06, "epoch": 0.18824884792626728, "percentage": 9.41, "elapsed_time": "1:06:27", "remaining_time": "10:39:36"} +{"current_steps": 818, "total_steps": 8680, "loss": 0.9773565530776978, "lr": 1.9893730184367722e-06, "epoch": 0.18847926267281107, "percentage": 9.42, "elapsed_time": "1:06:32", "remaining_time": "10:39:33"} +{"current_steps": 819, "total_steps": 8680, "loss": 0.7929856777191162, "lr": 1.989317551756096e-06, "epoch": 0.18870967741935485, "percentage": 9.44, "elapsed_time": "1:06:37", "remaining_time": "10:39:33"} +{"current_steps": 820, "total_steps": 8680, "loss": 0.9749126434326172, "lr": 1.9892619414772866e-06, "epoch": 0.1889400921658986, "percentage": 9.45, "elapsed_time": "1:06:42", "remaining_time": "10:39:28"} +{"current_steps": 821, "total_steps": 8680, "loss": 0.9945374727249146, "lr": 1.9892061876084166e-06, "epoch": 0.1891705069124424, "percentage": 9.46, "elapsed_time": "1:06:46", "remaining_time": "10:39:15"} +{"current_steps": 822, "total_steps": 8680, "loss": 0.8016892075538635, "lr": 1.9891502901575776e-06, "epoch": 0.18940092165898617, "percentage": 9.47, "elapsed_time": "1:06:52", "remaining_time": "10:39:13"} +{"current_steps": 823, "total_steps": 8680, "loss": 0.9389557838439941, "lr": 1.9890942491328837e-06, "epoch": 0.18963133640552995, "percentage": 9.48, "elapsed_time": "1:06:56", "remaining_time": "10:39:01"} +{"current_steps": 824, "total_steps": 8680, "loss": 0.724082887172699, "lr": 1.9890380645424686e-06, "epoch": 0.18986175115207374, "percentage": 9.49, "elapsed_time": "1:07:00", "remaining_time": "10:38:51"} +{"current_steps": 825, "total_steps": 8680, "loss": 0.8877915143966675, "lr": 1.988981736394488e-06, "epoch": 0.19009216589861752, "percentage": 9.5, "elapsed_time": "1:07:04", "remaining_time": "10:38:40"} +{"current_steps": 826, "total_steps": 8680, "loss": 1.207446813583374, "lr": 1.9889252646971177e-06, "epoch": 0.19032258064516128, "percentage": 9.52, "elapsed_time": "1:07:08", "remaining_time": "10:38:23"} +{"current_steps": 827, "total_steps": 8680, "loss": 0.9155057668685913, "lr": 1.9888686494585542e-06, "epoch": 0.19055299539170506, "percentage": 9.53, "elapsed_time": "1:07:13", "remaining_time": "10:38:22"} +{"current_steps": 828, "total_steps": 8680, "loss": 1.005772352218628, "lr": 1.9888118906870154e-06, "epoch": 0.19078341013824884, "percentage": 9.54, "elapsed_time": "1:07:18", "remaining_time": "10:38:16"} +{"current_steps": 829, "total_steps": 8680, "loss": 0.9060605764389038, "lr": 1.9887549883907394e-06, "epoch": 0.19101382488479263, "percentage": 9.55, "elapsed_time": "1:07:23", "remaining_time": "10:38:13"} +{"current_steps": 830, "total_steps": 8680, "loss": 0.7652161717414856, "lr": 1.988697942577986e-06, "epoch": 0.1912442396313364, "percentage": 9.56, "elapsed_time": "1:07:28", "remaining_time": "10:38:10"} +{"current_steps": 831, "total_steps": 8680, "loss": 1.0191380977630615, "lr": 1.9886407532570354e-06, "epoch": 0.1914746543778802, "percentage": 9.57, "elapsed_time": "1:07:32", "remaining_time": "10:37:55"} +{"current_steps": 832, "total_steps": 8680, "loss": 0.9497933387756348, "lr": 1.9885834204361876e-06, "epoch": 0.19170506912442398, "percentage": 9.59, "elapsed_time": "1:07:35", "remaining_time": "10:37:37"} +{"current_steps": 833, "total_steps": 8680, "loss": 0.7728058099746704, "lr": 1.9885259441237657e-06, "epoch": 0.19193548387096773, "percentage": 9.6, "elapsed_time": "1:07:39", "remaining_time": "10:37:25"} +{"current_steps": 834, "total_steps": 8680, "loss": 0.8961999416351318, "lr": 1.9884683243281113e-06, "epoch": 0.19216589861751152, "percentage": 9.61, "elapsed_time": "1:07:45", "remaining_time": "10:37:23"} +{"current_steps": 835, "total_steps": 8680, "loss": 0.9218904972076416, "lr": 1.9884105610575885e-06, "epoch": 0.1923963133640553, "percentage": 9.62, "elapsed_time": "1:07:48", "remaining_time": "10:37:08"} +{"current_steps": 836, "total_steps": 8680, "loss": 0.8411329984664917, "lr": 1.9883526543205807e-06, "epoch": 0.19262672811059908, "percentage": 9.63, "elapsed_time": "1:07:55", "remaining_time": "10:37:14"} +{"current_steps": 837, "total_steps": 8680, "loss": 0.9536285400390625, "lr": 1.988294604125494e-06, "epoch": 0.19285714285714287, "percentage": 9.64, "elapsed_time": "1:08:00", "remaining_time": "10:37:13"} +{"current_steps": 838, "total_steps": 8680, "loss": 0.9404321908950806, "lr": 1.9882364104807535e-06, "epoch": 0.19308755760368665, "percentage": 9.65, "elapsed_time": "1:08:05", "remaining_time": "10:37:14"} +{"current_steps": 839, "total_steps": 8680, "loss": 1.2520880699157715, "lr": 1.9881780733948066e-06, "epoch": 0.1933179723502304, "percentage": 9.67, "elapsed_time": "1:08:09", "remaining_time": "10:37:01"} +{"current_steps": 840, "total_steps": 8680, "loss": 0.8961449861526489, "lr": 1.9881195928761205e-06, "epoch": 0.1935483870967742, "percentage": 9.68, "elapsed_time": "1:08:14", "remaining_time": "10:36:57"} +{"current_steps": 841, "total_steps": 8680, "loss": 0.8844394683837891, "lr": 1.9880609689331833e-06, "epoch": 0.19377880184331797, "percentage": 9.69, "elapsed_time": "1:08:20", "remaining_time": "10:36:57"} +{"current_steps": 842, "total_steps": 8680, "loss": 1.1305835247039795, "lr": 1.9880022015745044e-06, "epoch": 0.19400921658986175, "percentage": 9.7, "elapsed_time": "1:08:24", "remaining_time": "10:36:44"} +{"current_steps": 843, "total_steps": 8680, "loss": 0.9980956315994263, "lr": 1.9879432908086143e-06, "epoch": 0.19423963133640554, "percentage": 9.71, "elapsed_time": "1:08:29", "remaining_time": "10:36:44"} +{"current_steps": 844, "total_steps": 8680, "loss": 0.7613730430603027, "lr": 1.987884236644063e-06, "epoch": 0.19447004608294932, "percentage": 9.72, "elapsed_time": "1:08:35", "remaining_time": "10:36:48"} +{"current_steps": 845, "total_steps": 8680, "loss": 0.9742579460144043, "lr": 1.987825039089423e-06, "epoch": 0.19470046082949308, "percentage": 9.74, "elapsed_time": "1:08:40", "remaining_time": "10:36:45"} +{"current_steps": 846, "total_steps": 8680, "loss": 0.7118766903877258, "lr": 1.9877656981532864e-06, "epoch": 0.19493087557603686, "percentage": 9.75, "elapsed_time": "1:08:45", "remaining_time": "10:36:44"} +{"current_steps": 847, "total_steps": 8680, "loss": 0.8657095432281494, "lr": 1.9877062138442657e-06, "epoch": 0.19516129032258064, "percentage": 9.76, "elapsed_time": "1:08:49", "remaining_time": "10:36:28"} +{"current_steps": 848, "total_steps": 8680, "loss": 0.8543902039527893, "lr": 1.987646586170996e-06, "epoch": 0.19539170506912443, "percentage": 9.77, "elapsed_time": "1:08:55", "remaining_time": "10:36:34"} +{"current_steps": 849, "total_steps": 8680, "loss": 0.8896970748901367, "lr": 1.9875868151421317e-06, "epoch": 0.1956221198156682, "percentage": 9.78, "elapsed_time": "1:09:00", "remaining_time": "10:36:31"} +{"current_steps": 850, "total_steps": 8680, "loss": 0.8662775754928589, "lr": 1.9875269007663486e-06, "epoch": 0.195852534562212, "percentage": 9.79, "elapsed_time": "1:09:04", "remaining_time": "10:36:21"} +{"current_steps": 851, "total_steps": 8680, "loss": 0.8241516351699829, "lr": 1.9874668430523434e-06, "epoch": 0.19608294930875575, "percentage": 9.8, "elapsed_time": "1:09:09", "remaining_time": "10:36:13"} +{"current_steps": 852, "total_steps": 8680, "loss": 0.973886251449585, "lr": 1.987406642008833e-06, "epoch": 0.19631336405529953, "percentage": 9.82, "elapsed_time": "1:09:14", "remaining_time": "10:36:09"} +{"current_steps": 853, "total_steps": 8680, "loss": 0.8133533000946045, "lr": 1.9873462976445554e-06, "epoch": 0.19654377880184332, "percentage": 9.83, "elapsed_time": "1:09:20", "remaining_time": "10:36:13"} +{"current_steps": 854, "total_steps": 8680, "loss": 1.120869755744934, "lr": 1.9872858099682697e-06, "epoch": 0.1967741935483871, "percentage": 9.84, "elapsed_time": "1:09:23", "remaining_time": "10:35:55"} +{"current_steps": 855, "total_steps": 8680, "loss": 0.9376444816589355, "lr": 1.9872251789887562e-06, "epoch": 0.19700460829493088, "percentage": 9.85, "elapsed_time": "1:09:27", "remaining_time": "10:35:44"} +{"current_steps": 856, "total_steps": 8680, "loss": 0.8763699531555176, "lr": 1.9871644047148148e-06, "epoch": 0.19723502304147467, "percentage": 9.86, "elapsed_time": "1:09:33", "remaining_time": "10:35:44"} +{"current_steps": 857, "total_steps": 8680, "loss": 0.7993260622024536, "lr": 1.9871034871552667e-06, "epoch": 0.19746543778801842, "percentage": 9.87, "elapsed_time": "1:09:38", "remaining_time": "10:35:38"} +{"current_steps": 858, "total_steps": 8680, "loss": 1.0312654972076416, "lr": 1.9870424263189542e-06, "epoch": 0.1976958525345622, "percentage": 9.88, "elapsed_time": "1:09:43", "remaining_time": "10:35:36"} +{"current_steps": 859, "total_steps": 8680, "loss": 1.0784629583358765, "lr": 1.98698122221474e-06, "epoch": 0.197926267281106, "percentage": 9.9, "elapsed_time": "1:09:47", "remaining_time": "10:35:26"} +{"current_steps": 860, "total_steps": 8680, "loss": 1.136039137840271, "lr": 1.9869198748515085e-06, "epoch": 0.19815668202764977, "percentage": 9.91, "elapsed_time": "1:09:52", "remaining_time": "10:35:21"} +{"current_steps": 861, "total_steps": 8680, "loss": 0.834873378276825, "lr": 1.986858384238163e-06, "epoch": 0.19838709677419356, "percentage": 9.92, "elapsed_time": "1:09:58", "remaining_time": "10:35:26"} +{"current_steps": 862, "total_steps": 8680, "loss": 0.9705442190170288, "lr": 1.98679675038363e-06, "epoch": 0.19861751152073734, "percentage": 9.93, "elapsed_time": "1:10:01", "remaining_time": "10:35:08"} +{"current_steps": 863, "total_steps": 8680, "loss": 0.9343886375427246, "lr": 1.9867349732968547e-06, "epoch": 0.1988479262672811, "percentage": 9.94, "elapsed_time": "1:10:06", "remaining_time": "10:35:02"} +{"current_steps": 864, "total_steps": 8680, "loss": 0.9140456914901733, "lr": 1.986673052986805e-06, "epoch": 0.19907834101382488, "percentage": 9.95, "elapsed_time": "1:10:11", "remaining_time": "10:34:55"} +{"current_steps": 865, "total_steps": 8680, "loss": 0.9121139049530029, "lr": 1.986610989462467e-06, "epoch": 0.19930875576036866, "percentage": 9.97, "elapsed_time": "1:10:15", "remaining_time": "10:34:42"} +{"current_steps": 866, "total_steps": 8680, "loss": 0.7333672642707825, "lr": 1.9865487827328505e-06, "epoch": 0.19953917050691244, "percentage": 9.98, "elapsed_time": "1:10:20", "remaining_time": "10:34:42"} +{"current_steps": 867, "total_steps": 8680, "loss": 0.8405989408493042, "lr": 1.986486432806984e-06, "epoch": 0.19976958525345623, "percentage": 9.99, "elapsed_time": "1:10:25", "remaining_time": "10:34:40"} +{"current_steps": 868, "total_steps": 8680, "loss": 0.8693375587463379, "lr": 1.9864239396939176e-06, "epoch": 0.2, "percentage": 10.0, "elapsed_time": "1:10:31", "remaining_time": "10:34:40"} +{"current_steps": 869, "total_steps": 8680, "loss": 1.0137104988098145, "lr": 1.9863613034027223e-06, "epoch": 0.20023041474654377, "percentage": 10.01, "elapsed_time": "1:10:37", "remaining_time": "10:34:46"} +{"current_steps": 870, "total_steps": 8680, "loss": 1.0283832550048828, "lr": 1.9862985239424895e-06, "epoch": 0.20046082949308755, "percentage": 10.02, "elapsed_time": "1:10:41", "remaining_time": "10:34:39"} +{"current_steps": 871, "total_steps": 8680, "loss": 1.117444634437561, "lr": 1.9862356013223316e-06, "epoch": 0.20069124423963133, "percentage": 10.03, "elapsed_time": "1:10:46", "remaining_time": "10:34:35"} +{"current_steps": 872, "total_steps": 8680, "loss": 0.8861427307128906, "lr": 1.986172535551382e-06, "epoch": 0.20092165898617512, "percentage": 10.05, "elapsed_time": "1:10:50", "remaining_time": "10:34:21"} +{"current_steps": 873, "total_steps": 8680, "loss": 1.0273747444152832, "lr": 1.9861093266387946e-06, "epoch": 0.2011520737327189, "percentage": 10.06, "elapsed_time": "1:10:54", "remaining_time": "10:34:08"} +{"current_steps": 874, "total_steps": 8680, "loss": 0.918023943901062, "lr": 1.9860459745937437e-06, "epoch": 0.20138248847926268, "percentage": 10.07, "elapsed_time": "1:10:59", "remaining_time": "10:34:05"} +{"current_steps": 875, "total_steps": 8680, "loss": 0.8983356952667236, "lr": 1.9859824794254246e-06, "epoch": 0.20161290322580644, "percentage": 10.08, "elapsed_time": "1:11:04", "remaining_time": "10:34:01"} +{"current_steps": 876, "total_steps": 8680, "loss": 1.0180974006652832, "lr": 1.985918841143054e-06, "epoch": 0.20184331797235022, "percentage": 10.09, "elapsed_time": "1:11:09", "remaining_time": "10:33:58"} +{"current_steps": 877, "total_steps": 8680, "loss": 0.9656573534011841, "lr": 1.985855059755869e-06, "epoch": 0.202073732718894, "percentage": 10.1, "elapsed_time": "1:11:15", "remaining_time": "10:34:01"} +{"current_steps": 878, "total_steps": 8680, "loss": 0.8522181510925293, "lr": 1.9857911352731273e-06, "epoch": 0.2023041474654378, "percentage": 10.12, "elapsed_time": "1:11:20", "remaining_time": "10:33:56"} +{"current_steps": 879, "total_steps": 8680, "loss": 0.9180892705917358, "lr": 1.985727067704107e-06, "epoch": 0.20253456221198157, "percentage": 10.13, "elapsed_time": "1:11:25", "remaining_time": "10:33:55"} +{"current_steps": 880, "total_steps": 8680, "loss": 0.9979432821273804, "lr": 1.985662857058108e-06, "epoch": 0.20276497695852536, "percentage": 10.14, "elapsed_time": "1:11:31", "remaining_time": "10:33:53"} +{"current_steps": 881, "total_steps": 8680, "loss": 0.8916480541229248, "lr": 1.98559850334445e-06, "epoch": 0.2029953917050691, "percentage": 10.15, "elapsed_time": "1:11:35", "remaining_time": "10:33:46"} +{"current_steps": 882, "total_steps": 8680, "loss": 0.8755770921707153, "lr": 1.9855340065724738e-06, "epoch": 0.2032258064516129, "percentage": 10.16, "elapsed_time": "1:11:40", "remaining_time": "10:33:38"} +{"current_steps": 883, "total_steps": 8680, "loss": 1.0200350284576416, "lr": 1.9854693667515418e-06, "epoch": 0.20345622119815668, "percentage": 10.17, "elapsed_time": "1:11:45", "remaining_time": "10:33:41"} +{"current_steps": 884, "total_steps": 8680, "loss": 0.928024172782898, "lr": 1.9854045838910353e-06, "epoch": 0.20368663594470046, "percentage": 10.18, "elapsed_time": "1:11:49", "remaining_time": "10:33:29"} +{"current_steps": 885, "total_steps": 8680, "loss": 0.8617212176322937, "lr": 1.9853396580003582e-06, "epoch": 0.20391705069124424, "percentage": 10.2, "elapsed_time": "1:11:53", "remaining_time": "10:33:17"} +{"current_steps": 886, "total_steps": 8680, "loss": 0.9383209943771362, "lr": 1.985274589088934e-06, "epoch": 0.20414746543778803, "percentage": 10.21, "elapsed_time": "1:11:57", "remaining_time": "10:33:02"} +{"current_steps": 887, "total_steps": 8680, "loss": 0.7217687368392944, "lr": 1.985209377166208e-06, "epoch": 0.20437788018433178, "percentage": 10.22, "elapsed_time": "1:12:02", "remaining_time": "10:32:57"} +{"current_steps": 888, "total_steps": 8680, "loss": 1.0717028379440308, "lr": 1.9851440222416446e-06, "epoch": 0.20460829493087557, "percentage": 10.23, "elapsed_time": "1:12:06", "remaining_time": "10:32:45"} +{"current_steps": 889, "total_steps": 8680, "loss": 1.0137064456939697, "lr": 1.9850785243247303e-06, "epoch": 0.20483870967741935, "percentage": 10.24, "elapsed_time": "1:12:11", "remaining_time": "10:32:42"} +{"current_steps": 890, "total_steps": 8680, "loss": 0.8569058179855347, "lr": 1.985012883424973e-06, "epoch": 0.20506912442396313, "percentage": 10.25, "elapsed_time": "1:12:16", "remaining_time": "10:32:37"} +{"current_steps": 891, "total_steps": 8680, "loss": 0.9398901462554932, "lr": 1.9849470995518993e-06, "epoch": 0.20529953917050692, "percentage": 10.26, "elapsed_time": "1:12:21", "remaining_time": "10:32:29"} +{"current_steps": 892, "total_steps": 8680, "loss": 0.731800377368927, "lr": 1.9848811727150577e-06, "epoch": 0.2055299539170507, "percentage": 10.28, "elapsed_time": "1:12:26", "remaining_time": "10:32:26"} +{"current_steps": 893, "total_steps": 8680, "loss": 0.8543055653572083, "lr": 1.984815102924018e-06, "epoch": 0.20576036866359446, "percentage": 10.29, "elapsed_time": "1:12:30", "remaining_time": "10:32:18"} +{"current_steps": 894, "total_steps": 8680, "loss": 0.9112114906311035, "lr": 1.98474889018837e-06, "epoch": 0.20599078341013824, "percentage": 10.3, "elapsed_time": "1:12:35", "remaining_time": "10:32:17"} +{"current_steps": 895, "total_steps": 8680, "loss": 0.8272690773010254, "lr": 1.984682534517724e-06, "epoch": 0.20622119815668202, "percentage": 10.31, "elapsed_time": "1:12:40", "remaining_time": "10:32:05"} +{"current_steps": 896, "total_steps": 8680, "loss": 0.9680918455123901, "lr": 1.984616035921712e-06, "epoch": 0.2064516129032258, "percentage": 10.32, "elapsed_time": "1:12:46", "remaining_time": "10:32:11"} +{"current_steps": 897, "total_steps": 8680, "loss": 0.815123438835144, "lr": 1.984549394409985e-06, "epoch": 0.2066820276497696, "percentage": 10.33, "elapsed_time": "1:12:51", "remaining_time": "10:32:09"} +{"current_steps": 898, "total_steps": 8680, "loss": 0.8035521507263184, "lr": 1.984482609992218e-06, "epoch": 0.20691244239631337, "percentage": 10.35, "elapsed_time": "1:12:55", "remaining_time": "10:31:58"} +{"current_steps": 899, "total_steps": 8680, "loss": 0.9000132083892822, "lr": 1.9844156826781027e-06, "epoch": 0.20714285714285716, "percentage": 10.36, "elapsed_time": "1:13:00", "remaining_time": "10:31:54"} +{"current_steps": 900, "total_steps": 8680, "loss": 1.06328547000885, "lr": 1.9843486124773543e-06, "epoch": 0.2073732718894009, "percentage": 10.37, "elapsed_time": "1:13:04", "remaining_time": "10:31:44"} +{"current_steps": 901, "total_steps": 8680, "loss": 0.9028425216674805, "lr": 1.9842813993997083e-06, "epoch": 0.2076036866359447, "percentage": 10.38, "elapsed_time": "1:13:11", "remaining_time": "10:31:53"} +{"current_steps": 902, "total_steps": 8680, "loss": 0.7786350250244141, "lr": 1.9842140434549196e-06, "epoch": 0.20783410138248848, "percentage": 10.39, "elapsed_time": "1:13:16", "remaining_time": "10:31:53"} +{"current_steps": 903, "total_steps": 8680, "loss": 0.8041539788246155, "lr": 1.9841465446527656e-06, "epoch": 0.20806451612903226, "percentage": 10.4, "elapsed_time": "1:13:22", "remaining_time": "10:31:53"} +{"current_steps": 904, "total_steps": 8680, "loss": 0.8380184173583984, "lr": 1.9840789030030434e-06, "epoch": 0.20829493087557605, "percentage": 10.41, "elapsed_time": "1:13:26", "remaining_time": "10:31:47"} +{"current_steps": 905, "total_steps": 8680, "loss": 0.8191432952880859, "lr": 1.984011118515572e-06, "epoch": 0.20852534562211983, "percentage": 10.43, "elapsed_time": "1:13:31", "remaining_time": "10:31:40"} +{"current_steps": 906, "total_steps": 8680, "loss": 0.8236384391784668, "lr": 1.9839431912001885e-06, "epoch": 0.20875576036866358, "percentage": 10.44, "elapsed_time": "1:13:37", "remaining_time": "10:31:42"} +{"current_steps": 907, "total_steps": 8680, "loss": 0.8218076825141907, "lr": 1.9838751210667534e-06, "epoch": 0.20898617511520737, "percentage": 10.45, "elapsed_time": "1:13:41", "remaining_time": "10:31:28"} +{"current_steps": 908, "total_steps": 8680, "loss": 0.9140353202819824, "lr": 1.983806908125147e-06, "epoch": 0.20921658986175115, "percentage": 10.46, "elapsed_time": "1:13:44", "remaining_time": "10:31:14"} +{"current_steps": 909, "total_steps": 8680, "loss": 0.9179826974868774, "lr": 1.9837385523852706e-06, "epoch": 0.20944700460829493, "percentage": 10.47, "elapsed_time": "1:13:51", "remaining_time": "10:31:20"} +{"current_steps": 910, "total_steps": 8680, "loss": 0.8888909816741943, "lr": 1.9836700538570456e-06, "epoch": 0.20967741935483872, "percentage": 10.48, "elapsed_time": "1:13:56", "remaining_time": "10:31:22"} +{"current_steps": 911, "total_steps": 8680, "loss": 0.8951253890991211, "lr": 1.9836014125504143e-06, "epoch": 0.2099078341013825, "percentage": 10.5, "elapsed_time": "1:14:00", "remaining_time": "10:31:12"} +{"current_steps": 912, "total_steps": 8680, "loss": 1.084958553314209, "lr": 1.98353262847534e-06, "epoch": 0.21013824884792626, "percentage": 10.51, "elapsed_time": "1:14:05", "remaining_time": "10:31:04"} +{"current_steps": 913, "total_steps": 8680, "loss": 0.8590713739395142, "lr": 1.983463701641807e-06, "epoch": 0.21036866359447004, "percentage": 10.52, "elapsed_time": "1:14:10", "remaining_time": "10:30:57"} +{"current_steps": 914, "total_steps": 8680, "loss": 1.0393706560134888, "lr": 1.9833946320598195e-06, "epoch": 0.21059907834101382, "percentage": 10.53, "elapsed_time": "1:14:15", "remaining_time": "10:30:56"} +{"current_steps": 915, "total_steps": 8680, "loss": 0.9403085708618164, "lr": 1.983325419739403e-06, "epoch": 0.2108294930875576, "percentage": 10.54, "elapsed_time": "1:14:20", "remaining_time": "10:30:55"} +{"current_steps": 916, "total_steps": 8680, "loss": 0.8431342244148254, "lr": 1.9832560646906038e-06, "epoch": 0.2110599078341014, "percentage": 10.55, "elapsed_time": "1:14:25", "remaining_time": "10:30:51"} +{"current_steps": 917, "total_steps": 8680, "loss": 0.9024044871330261, "lr": 1.9831865669234884e-06, "epoch": 0.21129032258064517, "percentage": 10.56, "elapsed_time": "1:14:30", "remaining_time": "10:30:45"} +{"current_steps": 918, "total_steps": 8680, "loss": 0.747347354888916, "lr": 1.9831169264481443e-06, "epoch": 0.21152073732718893, "percentage": 10.58, "elapsed_time": "1:14:36", "remaining_time": "10:30:50"} +{"current_steps": 919, "total_steps": 8680, "loss": 0.8266197443008423, "lr": 1.9830471432746796e-06, "epoch": 0.2117511520737327, "percentage": 10.59, "elapsed_time": "1:14:41", "remaining_time": "10:30:47"} +{"current_steps": 920, "total_steps": 8680, "loss": 0.8633416295051575, "lr": 1.9829772174132235e-06, "epoch": 0.2119815668202765, "percentage": 10.6, "elapsed_time": "1:14:46", "remaining_time": "10:30:40"} +{"current_steps": 921, "total_steps": 8680, "loss": 1.0290095806121826, "lr": 1.9829071488739256e-06, "epoch": 0.21221198156682028, "percentage": 10.61, "elapsed_time": "1:14:50", "remaining_time": "10:30:34"} +{"current_steps": 922, "total_steps": 8680, "loss": 0.8193448781967163, "lr": 1.9828369376669566e-06, "epoch": 0.21244239631336406, "percentage": 10.62, "elapsed_time": "1:14:55", "remaining_time": "10:30:29"} +{"current_steps": 923, "total_steps": 8680, "loss": 0.8828415870666504, "lr": 1.982766583802507e-06, "epoch": 0.21267281105990785, "percentage": 10.63, "elapsed_time": "1:15:00", "remaining_time": "10:30:26"} +{"current_steps": 924, "total_steps": 8680, "loss": 0.8806191682815552, "lr": 1.9826960872907885e-06, "epoch": 0.2129032258064516, "percentage": 10.65, "elapsed_time": "1:15:05", "remaining_time": "10:30:16"} +{"current_steps": 925, "total_steps": 8680, "loss": 0.8441533446311951, "lr": 1.982625448142034e-06, "epoch": 0.21313364055299538, "percentage": 10.66, "elapsed_time": "1:15:10", "remaining_time": "10:30:16"} +{"current_steps": 926, "total_steps": 8680, "loss": 0.9084080457687378, "lr": 1.9825546663664963e-06, "epoch": 0.21336405529953917, "percentage": 10.67, "elapsed_time": "1:15:15", "remaining_time": "10:30:12"} +{"current_steps": 927, "total_steps": 8680, "loss": 0.9005601406097412, "lr": 1.98248374197445e-06, "epoch": 0.21359447004608295, "percentage": 10.68, "elapsed_time": "1:15:19", "remaining_time": "10:29:59"} +{"current_steps": 928, "total_steps": 8680, "loss": 1.0415414571762085, "lr": 1.9824126749761893e-06, "epoch": 0.21382488479262673, "percentage": 10.69, "elapsed_time": "1:15:24", "remaining_time": "10:29:53"} +{"current_steps": 929, "total_steps": 8680, "loss": 0.8130594491958618, "lr": 1.982341465382029e-06, "epoch": 0.21405529953917052, "percentage": 10.7, "elapsed_time": "1:15:28", "remaining_time": "10:29:42"} +{"current_steps": 930, "total_steps": 8680, "loss": 0.9178205728530884, "lr": 1.9822701132023053e-06, "epoch": 0.21428571428571427, "percentage": 10.71, "elapsed_time": "1:15:33", "remaining_time": "10:29:37"} +{"current_steps": 931, "total_steps": 8680, "loss": 0.9927947521209717, "lr": 1.9821986184473754e-06, "epoch": 0.21451612903225806, "percentage": 10.73, "elapsed_time": "1:15:37", "remaining_time": "10:29:28"} +{"current_steps": 932, "total_steps": 8680, "loss": 0.9172670841217041, "lr": 1.982126981127616e-06, "epoch": 0.21474654377880184, "percentage": 10.74, "elapsed_time": "1:15:41", "remaining_time": "10:29:15"} +{"current_steps": 933, "total_steps": 8680, "loss": 0.9513058066368103, "lr": 1.9820552012534255e-06, "epoch": 0.21497695852534562, "percentage": 10.75, "elapsed_time": "1:15:46", "remaining_time": "10:29:12"} +{"current_steps": 934, "total_steps": 8680, "loss": 1.014827013015747, "lr": 1.9819832788352227e-06, "epoch": 0.2152073732718894, "percentage": 10.76, "elapsed_time": "1:15:53", "remaining_time": "10:29:20"} +{"current_steps": 935, "total_steps": 8680, "loss": 1.0225746631622314, "lr": 1.9819112138834473e-06, "epoch": 0.2154377880184332, "percentage": 10.77, "elapsed_time": "1:15:58", "remaining_time": "10:29:19"} +{"current_steps": 936, "total_steps": 8680, "loss": 0.8804227113723755, "lr": 1.9818390064085584e-06, "epoch": 0.21566820276497695, "percentage": 10.78, "elapsed_time": "1:16:03", "remaining_time": "10:29:13"} +{"current_steps": 937, "total_steps": 8680, "loss": 0.7215760350227356, "lr": 1.9817666564210376e-06, "epoch": 0.21589861751152073, "percentage": 10.79, "elapsed_time": "1:16:07", "remaining_time": "10:29:03"} +{"current_steps": 938, "total_steps": 8680, "loss": 0.9978986978530884, "lr": 1.981694163931387e-06, "epoch": 0.2161290322580645, "percentage": 10.81, "elapsed_time": "1:16:12", "remaining_time": "10:28:58"} +{"current_steps": 939, "total_steps": 8680, "loss": 0.8646233081817627, "lr": 1.981621528950128e-06, "epoch": 0.2163594470046083, "percentage": 10.82, "elapsed_time": "1:16:17", "remaining_time": "10:28:56"} +{"current_steps": 940, "total_steps": 8680, "loss": 0.9619132876396179, "lr": 1.981548751487803e-06, "epoch": 0.21658986175115208, "percentage": 10.83, "elapsed_time": "1:16:23", "remaining_time": "10:29:02"} +{"current_steps": 941, "total_steps": 8680, "loss": 0.9209504127502441, "lr": 1.981475831554976e-06, "epoch": 0.21682027649769586, "percentage": 10.84, "elapsed_time": "1:16:27", "remaining_time": "10:28:47"} +{"current_steps": 942, "total_steps": 8680, "loss": 0.7629299163818359, "lr": 1.9814027691622318e-06, "epoch": 0.21705069124423962, "percentage": 10.85, "elapsed_time": "1:16:31", "remaining_time": "10:28:37"} +{"current_steps": 943, "total_steps": 8680, "loss": 0.8702583312988281, "lr": 1.9813295643201747e-06, "epoch": 0.2172811059907834, "percentage": 10.86, "elapsed_time": "1:16:36", "remaining_time": "10:28:29"} +{"current_steps": 944, "total_steps": 8680, "loss": 0.9571657180786133, "lr": 1.9812562170394305e-06, "epoch": 0.21751152073732719, "percentage": 10.88, "elapsed_time": "1:16:40", "remaining_time": "10:28:19"} +{"current_steps": 945, "total_steps": 8680, "loss": 0.7271617650985718, "lr": 1.9811827273306456e-06, "epoch": 0.21774193548387097, "percentage": 10.89, "elapsed_time": "1:16:46", "remaining_time": "10:28:27"} +{"current_steps": 946, "total_steps": 8680, "loss": 0.8189597725868225, "lr": 1.9811090952044865e-06, "epoch": 0.21797235023041475, "percentage": 10.9, "elapsed_time": "1:16:53", "remaining_time": "10:28:36"} +{"current_steps": 947, "total_steps": 8680, "loss": 0.7933987379074097, "lr": 1.981035320671641e-06, "epoch": 0.21820276497695854, "percentage": 10.91, "elapsed_time": "1:16:58", "remaining_time": "10:28:33"} +{"current_steps": 948, "total_steps": 8680, "loss": 0.9687645435333252, "lr": 1.9809614037428174e-06, "epoch": 0.2184331797235023, "percentage": 10.92, "elapsed_time": "1:17:03", "remaining_time": "10:28:32"} +{"current_steps": 949, "total_steps": 8680, "loss": 0.8293745517730713, "lr": 1.980887344428745e-06, "epoch": 0.21866359447004607, "percentage": 10.93, "elapsed_time": "1:17:08", "remaining_time": "10:28:26"} +{"current_steps": 950, "total_steps": 8680, "loss": 1.0447471141815186, "lr": 1.9808131427401727e-06, "epoch": 0.21889400921658986, "percentage": 10.94, "elapsed_time": "1:17:12", "remaining_time": "10:28:13"} +{"current_steps": 951, "total_steps": 8680, "loss": 0.8916672468185425, "lr": 1.9807387986878715e-06, "epoch": 0.21912442396313364, "percentage": 10.96, "elapsed_time": "1:17:17", "remaining_time": "10:28:08"} +{"current_steps": 952, "total_steps": 8680, "loss": 0.8380981683731079, "lr": 1.980664312282632e-06, "epoch": 0.21935483870967742, "percentage": 10.97, "elapsed_time": "1:17:22", "remaining_time": "10:28:07"} +{"current_steps": 953, "total_steps": 8680, "loss": 0.887790322303772, "lr": 1.9805896835352656e-06, "epoch": 0.2195852534562212, "percentage": 10.98, "elapsed_time": "1:17:27", "remaining_time": "10:28:02"} +{"current_steps": 954, "total_steps": 8680, "loss": 0.8353140950202942, "lr": 1.9805149124566048e-06, "epoch": 0.21981566820276496, "percentage": 10.99, "elapsed_time": "1:17:32", "remaining_time": "10:27:56"} +{"current_steps": 955, "total_steps": 8680, "loss": 1.0337531566619873, "lr": 1.9804399990575026e-06, "epoch": 0.22004608294930875, "percentage": 11.0, "elapsed_time": "1:17:36", "remaining_time": "10:27:48"} +{"current_steps": 956, "total_steps": 8680, "loss": 0.8845529556274414, "lr": 1.9803649433488324e-06, "epoch": 0.22027649769585253, "percentage": 11.01, "elapsed_time": "1:17:41", "remaining_time": "10:27:44"} +{"current_steps": 957, "total_steps": 8680, "loss": 0.7408445477485657, "lr": 1.9802897453414884e-06, "epoch": 0.2205069124423963, "percentage": 11.03, "elapsed_time": "1:17:47", "remaining_time": "10:27:48"} +{"current_steps": 958, "total_steps": 8680, "loss": 0.873178243637085, "lr": 1.980214405046386e-06, "epoch": 0.2207373271889401, "percentage": 11.04, "elapsed_time": "1:17:53", "remaining_time": "10:27:51"} +{"current_steps": 959, "total_steps": 8680, "loss": 1.0207639932632446, "lr": 1.98013892247446e-06, "epoch": 0.22096774193548388, "percentage": 11.05, "elapsed_time": "1:17:58", "remaining_time": "10:27:43"} +{"current_steps": 960, "total_steps": 8680, "loss": 0.8626997470855713, "lr": 1.980063297636667e-06, "epoch": 0.22119815668202766, "percentage": 11.06, "elapsed_time": "1:18:01", "remaining_time": "10:27:29"} +{"current_steps": 961, "total_steps": 8680, "loss": 0.8961347341537476, "lr": 1.9799875305439836e-06, "epoch": 0.22142857142857142, "percentage": 11.07, "elapsed_time": "1:18:06", "remaining_time": "10:27:24"} +{"current_steps": 962, "total_steps": 8680, "loss": 0.8115944862365723, "lr": 1.9799116212074075e-06, "epoch": 0.2216589861751152, "percentage": 11.08, "elapsed_time": "1:18:11", "remaining_time": "10:27:20"} +{"current_steps": 963, "total_steps": 8680, "loss": 0.8274029493331909, "lr": 1.979835569637957e-06, "epoch": 0.22188940092165899, "percentage": 11.09, "elapsed_time": "1:18:16", "remaining_time": "10:27:15"} +{"current_steps": 964, "total_steps": 8680, "loss": 1.020345687866211, "lr": 1.9797593758466706e-06, "epoch": 0.22211981566820277, "percentage": 11.11, "elapsed_time": "1:18:20", "remaining_time": "10:27:02"} +{"current_steps": 965, "total_steps": 8680, "loss": 0.8164723515510559, "lr": 1.979683039844608e-06, "epoch": 0.22235023041474655, "percentage": 11.12, "elapsed_time": "1:18:25", "remaining_time": "10:27:03"} +{"current_steps": 966, "total_steps": 8680, "loss": 0.832849383354187, "lr": 1.979606561642849e-06, "epoch": 0.22258064516129034, "percentage": 11.13, "elapsed_time": "1:18:30", "remaining_time": "10:26:59"} +{"current_steps": 967, "total_steps": 8680, "loss": 0.9765876531600952, "lr": 1.9795299412524945e-06, "epoch": 0.2228110599078341, "percentage": 11.14, "elapsed_time": "1:18:34", "remaining_time": "10:26:46"} +{"current_steps": 968, "total_steps": 8680, "loss": 0.9280411005020142, "lr": 1.9794531786846657e-06, "epoch": 0.22304147465437787, "percentage": 11.15, "elapsed_time": "1:18:38", "remaining_time": "10:26:34"} +{"current_steps": 969, "total_steps": 8680, "loss": 1.122058629989624, "lr": 1.9793762739505042e-06, "epoch": 0.22327188940092166, "percentage": 11.16, "elapsed_time": "1:18:43", "remaining_time": "10:26:26"} +{"current_steps": 970, "total_steps": 8680, "loss": 0.824627161026001, "lr": 1.9792992270611737e-06, "epoch": 0.22350230414746544, "percentage": 11.18, "elapsed_time": "1:18:47", "remaining_time": "10:26:16"} +{"current_steps": 971, "total_steps": 8680, "loss": 1.0583840608596802, "lr": 1.9792220380278565e-06, "epoch": 0.22373271889400922, "percentage": 11.19, "elapsed_time": "1:18:52", "remaining_time": "10:26:15"} +{"current_steps": 972, "total_steps": 8680, "loss": 1.053803563117981, "lr": 1.979144706861757e-06, "epoch": 0.223963133640553, "percentage": 11.2, "elapsed_time": "1:18:56", "remaining_time": "10:26:03"} +{"current_steps": 973, "total_steps": 8680, "loss": 0.8572183847427368, "lr": 1.9790672335740993e-06, "epoch": 0.22419354838709676, "percentage": 11.21, "elapsed_time": "1:19:01", "remaining_time": "10:25:56"} +{"current_steps": 974, "total_steps": 8680, "loss": 0.7955416440963745, "lr": 1.978989618176129e-06, "epoch": 0.22442396313364055, "percentage": 11.22, "elapsed_time": "1:19:07", "remaining_time": "10:26:01"} +{"current_steps": 975, "total_steps": 8680, "loss": 0.9455063343048096, "lr": 1.9789118606791113e-06, "epoch": 0.22465437788018433, "percentage": 11.23, "elapsed_time": "1:19:12", "remaining_time": "10:25:58"} +{"current_steps": 976, "total_steps": 8680, "loss": 0.788895845413208, "lr": 1.978833961094333e-06, "epoch": 0.2248847926267281, "percentage": 11.24, "elapsed_time": "1:19:18", "remaining_time": "10:25:58"} +{"current_steps": 977, "total_steps": 8680, "loss": 0.8344719409942627, "lr": 1.9787559194331014e-06, "epoch": 0.2251152073732719, "percentage": 11.26, "elapsed_time": "1:19:22", "remaining_time": "10:25:51"} +{"current_steps": 978, "total_steps": 8680, "loss": 0.85140061378479, "lr": 1.9786777357067436e-06, "epoch": 0.22534562211981568, "percentage": 11.27, "elapsed_time": "1:19:27", "remaining_time": "10:25:47"} +{"current_steps": 979, "total_steps": 8680, "loss": 0.8511399030685425, "lr": 1.978599409926608e-06, "epoch": 0.22557603686635944, "percentage": 11.28, "elapsed_time": "1:19:33", "remaining_time": "10:25:48"} +{"current_steps": 980, "total_steps": 8680, "loss": 0.9243351221084595, "lr": 1.9785209421040636e-06, "epoch": 0.22580645161290322, "percentage": 11.29, "elapsed_time": "1:19:38", "remaining_time": "10:25:42"} +{"current_steps": 981, "total_steps": 8680, "loss": 0.9043580293655396, "lr": 1.9784423322504996e-06, "epoch": 0.226036866359447, "percentage": 11.3, "elapsed_time": "1:19:42", "remaining_time": "10:25:31"} +{"current_steps": 982, "total_steps": 8680, "loss": 0.854049563407898, "lr": 1.978363580377327e-06, "epoch": 0.2262672811059908, "percentage": 11.31, "elapsed_time": "1:19:48", "remaining_time": "10:25:37"} +{"current_steps": 983, "total_steps": 8680, "loss": 0.7785296440124512, "lr": 1.9782846864959754e-06, "epoch": 0.22649769585253457, "percentage": 11.32, "elapsed_time": "1:19:54", "remaining_time": "10:25:37"} +{"current_steps": 984, "total_steps": 8680, "loss": 0.8464720845222473, "lr": 1.9782056506178965e-06, "epoch": 0.22672811059907835, "percentage": 11.34, "elapsed_time": "1:19:57", "remaining_time": "10:25:22"} +{"current_steps": 985, "total_steps": 8680, "loss": 0.8519179821014404, "lr": 1.9781264727545624e-06, "epoch": 0.2269585253456221, "percentage": 11.35, "elapsed_time": "1:20:02", "remaining_time": "10:25:20"} +{"current_steps": 986, "total_steps": 8680, "loss": 0.956415057182312, "lr": 1.978047152917466e-06, "epoch": 0.2271889400921659, "percentage": 11.36, "elapsed_time": "1:20:07", "remaining_time": "10:25:17"} +{"current_steps": 987, "total_steps": 8680, "loss": 1.028620719909668, "lr": 1.97796769111812e-06, "epoch": 0.22741935483870968, "percentage": 11.37, "elapsed_time": "1:20:12", "remaining_time": "10:25:11"} +{"current_steps": 988, "total_steps": 8680, "loss": 0.8707184195518494, "lr": 1.9778880873680585e-06, "epoch": 0.22764976958525346, "percentage": 11.38, "elapsed_time": "1:20:17", "remaining_time": "10:25:08"} +{"current_steps": 989, "total_steps": 8680, "loss": 0.9842795729637146, "lr": 1.9778083416788355e-06, "epoch": 0.22788018433179724, "percentage": 11.39, "elapsed_time": "1:20:22", "remaining_time": "10:25:02"} +{"current_steps": 990, "total_steps": 8680, "loss": 0.8827522993087769, "lr": 1.977728454062026e-06, "epoch": 0.22811059907834103, "percentage": 11.41, "elapsed_time": "1:20:26", "remaining_time": "10:24:51"} +{"current_steps": 991, "total_steps": 8680, "loss": 0.8608568906784058, "lr": 1.9776484245292256e-06, "epoch": 0.22834101382488478, "percentage": 11.42, "elapsed_time": "1:20:32", "remaining_time": "10:24:55"} +{"current_steps": 992, "total_steps": 8680, "loss": 0.8512595891952515, "lr": 1.977568253092051e-06, "epoch": 0.22857142857142856, "percentage": 11.43, "elapsed_time": "1:20:38", "remaining_time": "10:24:56"} +{"current_steps": 993, "total_steps": 8680, "loss": 0.7335344552993774, "lr": 1.9774879397621383e-06, "epoch": 0.22880184331797235, "percentage": 11.44, "elapsed_time": "1:20:42", "remaining_time": "10:24:45"} +{"current_steps": 994, "total_steps": 8680, "loss": 1.0301114320755005, "lr": 1.9774074845511457e-06, "epoch": 0.22903225806451613, "percentage": 11.45, "elapsed_time": "1:20:46", "remaining_time": "10:24:35"} +{"current_steps": 995, "total_steps": 8680, "loss": 0.9011565446853638, "lr": 1.97732688747075e-06, "epoch": 0.22926267281105991, "percentage": 11.46, "elapsed_time": "1:20:50", "remaining_time": "10:24:22"} +{"current_steps": 996, "total_steps": 8680, "loss": 0.8644282221794128, "lr": 1.9772461485326507e-06, "epoch": 0.2294930875576037, "percentage": 11.47, "elapsed_time": "1:20:55", "remaining_time": "10:24:21"} +{"current_steps": 997, "total_steps": 8680, "loss": 0.8107467889785767, "lr": 1.9771652677485664e-06, "epoch": 0.22972350230414745, "percentage": 11.49, "elapsed_time": "1:21:00", "remaining_time": "10:24:18"} +{"current_steps": 998, "total_steps": 8680, "loss": 1.0090508460998535, "lr": 1.9770842451302373e-06, "epoch": 0.22995391705069124, "percentage": 11.5, "elapsed_time": "1:21:05", "remaining_time": "10:24:09"} +{"current_steps": 999, "total_steps": 8680, "loss": 0.8153292536735535, "lr": 1.977003080689424e-06, "epoch": 0.23018433179723502, "percentage": 11.51, "elapsed_time": "1:21:10", "remaining_time": "10:24:05"} +{"current_steps": 1000, "total_steps": 8680, "loss": 0.8446916341781616, "lr": 1.976921774437906e-06, "epoch": 0.2304147465437788, "percentage": 11.52, "elapsed_time": "1:21:15", "remaining_time": "10:24:00"} +{"current_steps": 1001, "total_steps": 8680, "loss": 0.759350597858429, "lr": 1.9768403263874865e-06, "epoch": 0.2306451612903226, "percentage": 11.53, "elapsed_time": "1:21:22", "remaining_time": "10:24:14"} +{"current_steps": 1002, "total_steps": 8680, "loss": 0.9181695580482483, "lr": 1.9767587365499862e-06, "epoch": 0.23087557603686637, "percentage": 11.54, "elapsed_time": "1:21:27", "remaining_time": "10:24:12"} +{"current_steps": 1003, "total_steps": 8680, "loss": 0.8450978994369507, "lr": 1.976677004937249e-06, "epoch": 0.23110599078341013, "percentage": 11.56, "elapsed_time": "1:21:32", "remaining_time": "10:24:04"} +{"current_steps": 1004, "total_steps": 8680, "loss": 0.775252640247345, "lr": 1.9765951315611365e-06, "epoch": 0.2313364055299539, "percentage": 11.57, "elapsed_time": "1:21:36", "remaining_time": "10:23:57"} +{"current_steps": 1005, "total_steps": 8680, "loss": 0.8682440519332886, "lr": 1.976513116433534e-06, "epoch": 0.2315668202764977, "percentage": 11.58, "elapsed_time": "1:21:40", "remaining_time": "10:23:47"} +{"current_steps": 1006, "total_steps": 8680, "loss": 1.0701451301574707, "lr": 1.9764309595663457e-06, "epoch": 0.23179723502304148, "percentage": 11.59, "elapsed_time": "1:21:46", "remaining_time": "10:23:50"} +{"current_steps": 1007, "total_steps": 8680, "loss": 0.9381946921348572, "lr": 1.976348660971496e-06, "epoch": 0.23202764976958526, "percentage": 11.6, "elapsed_time": "1:21:51", "remaining_time": "10:23:41"} +{"current_steps": 1008, "total_steps": 8680, "loss": 0.7836539149284363, "lr": 1.976266220660931e-06, "epoch": 0.23225806451612904, "percentage": 11.61, "elapsed_time": "1:21:56", "remaining_time": "10:23:40"} +{"current_steps": 1009, "total_steps": 8680, "loss": 0.9271948337554932, "lr": 1.9761836386466156e-06, "epoch": 0.2324884792626728, "percentage": 11.62, "elapsed_time": "1:22:01", "remaining_time": "10:23:35"} +{"current_steps": 1010, "total_steps": 8680, "loss": 0.8268035650253296, "lr": 1.976100914940538e-06, "epoch": 0.23271889400921658, "percentage": 11.64, "elapsed_time": "1:22:06", "remaining_time": "10:23:33"} +{"current_steps": 1011, "total_steps": 8680, "loss": 0.8266786336898804, "lr": 1.976018049554705e-06, "epoch": 0.23294930875576036, "percentage": 11.65, "elapsed_time": "1:22:11", "remaining_time": "10:23:29"} +{"current_steps": 1012, "total_steps": 8680, "loss": 0.9437457323074341, "lr": 1.9759350425011435e-06, "epoch": 0.23317972350230415, "percentage": 11.66, "elapsed_time": "1:22:17", "remaining_time": "10:23:33"} +{"current_steps": 1013, "total_steps": 8680, "loss": 0.9078803062438965, "lr": 1.9758518937919033e-06, "epoch": 0.23341013824884793, "percentage": 11.67, "elapsed_time": "1:22:22", "remaining_time": "10:23:24"} +{"current_steps": 1014, "total_steps": 8680, "loss": 0.9873687624931335, "lr": 1.975768603439052e-06, "epoch": 0.23364055299539171, "percentage": 11.68, "elapsed_time": "1:22:25", "remaining_time": "10:23:09"} +{"current_steps": 1015, "total_steps": 8680, "loss": 0.9450196027755737, "lr": 1.97568517145468e-06, "epoch": 0.23387096774193547, "percentage": 11.69, "elapsed_time": "1:22:31", "remaining_time": "10:23:09"} +{"current_steps": 1016, "total_steps": 8680, "loss": 0.8804495334625244, "lr": 1.975601597850897e-06, "epoch": 0.23410138248847925, "percentage": 11.71, "elapsed_time": "1:22:34", "remaining_time": "10:22:55"} +{"current_steps": 1017, "total_steps": 8680, "loss": 0.9646104574203491, "lr": 1.9755178826398333e-06, "epoch": 0.23433179723502304, "percentage": 11.72, "elapsed_time": "1:22:39", "remaining_time": "10:22:45"} +{"current_steps": 1018, "total_steps": 8680, "loss": 0.9829385280609131, "lr": 1.9754340258336403e-06, "epoch": 0.23456221198156682, "percentage": 11.73, "elapsed_time": "1:22:43", "remaining_time": "10:22:39"} +{"current_steps": 1019, "total_steps": 8680, "loss": 0.8433707356452942, "lr": 1.97535002744449e-06, "epoch": 0.2347926267281106, "percentage": 11.74, "elapsed_time": "1:22:49", "remaining_time": "10:22:39"} +{"current_steps": 1020, "total_steps": 8680, "loss": 0.9892767071723938, "lr": 1.9752658874845744e-06, "epoch": 0.2350230414746544, "percentage": 11.75, "elapsed_time": "1:22:52", "remaining_time": "10:22:24"} +{"current_steps": 1021, "total_steps": 8680, "loss": 0.8367536664009094, "lr": 1.9751816059661065e-06, "epoch": 0.23525345622119814, "percentage": 11.76, "elapsed_time": "1:22:58", "remaining_time": "10:22:29"} +{"current_steps": 1022, "total_steps": 8680, "loss": 0.8947298526763916, "lr": 1.9750971829013194e-06, "epoch": 0.23548387096774193, "percentage": 11.77, "elapsed_time": "1:23:03", "remaining_time": "10:22:24"} +{"current_steps": 1023, "total_steps": 8680, "loss": 0.9218910336494446, "lr": 1.975012618302467e-06, "epoch": 0.2357142857142857, "percentage": 11.79, "elapsed_time": "1:23:10", "remaining_time": "10:22:33"} +{"current_steps": 1024, "total_steps": 8680, "loss": 0.8744943141937256, "lr": 1.9749279121818236e-06, "epoch": 0.2359447004608295, "percentage": 11.8, "elapsed_time": "1:23:14", "remaining_time": "10:22:24"} +{"current_steps": 1025, "total_steps": 8680, "loss": 0.9023007154464722, "lr": 1.9748430645516845e-06, "epoch": 0.23617511520737328, "percentage": 11.81, "elapsed_time": "1:23:19", "remaining_time": "10:22:17"} +{"current_steps": 1026, "total_steps": 8680, "loss": 0.8475106954574585, "lr": 1.974758075424365e-06, "epoch": 0.23640552995391706, "percentage": 11.82, "elapsed_time": "1:23:24", "remaining_time": "10:22:16"} +{"current_steps": 1027, "total_steps": 8680, "loss": 0.8594635725021362, "lr": 1.9746729448122013e-06, "epoch": 0.23663594470046084, "percentage": 11.83, "elapsed_time": "1:23:28", "remaining_time": "10:22:05"} +{"current_steps": 1028, "total_steps": 8680, "loss": 0.9601756930351257, "lr": 1.97458767272755e-06, "epoch": 0.2368663594470046, "percentage": 11.84, "elapsed_time": "1:23:33", "remaining_time": "10:21:59"} +{"current_steps": 1029, "total_steps": 8680, "loss": 0.9281105399131775, "lr": 1.9745022591827886e-06, "epoch": 0.23709677419354838, "percentage": 11.85, "elapsed_time": "1:23:38", "remaining_time": "10:21:54"} +{"current_steps": 1030, "total_steps": 8680, "loss": 0.8240020275115967, "lr": 1.9744167041903136e-06, "epoch": 0.23732718894009217, "percentage": 11.87, "elapsed_time": "1:23:45", "remaining_time": "10:22:02"} +{"current_steps": 1031, "total_steps": 8680, "loss": 0.807030200958252, "lr": 1.9743310077625446e-06, "epoch": 0.23755760368663595, "percentage": 11.88, "elapsed_time": "1:23:51", "remaining_time": "10:22:10"} +{"current_steps": 1032, "total_steps": 8680, "loss": 0.8044267892837524, "lr": 1.9742451699119194e-06, "epoch": 0.23778801843317973, "percentage": 11.89, "elapsed_time": "1:23:56", "remaining_time": "10:22:01"} +{"current_steps": 1033, "total_steps": 8680, "loss": 0.9198760390281677, "lr": 1.9741591906508975e-06, "epoch": 0.23801843317972352, "percentage": 11.9, "elapsed_time": "1:24:00", "remaining_time": "10:21:50"} +{"current_steps": 1034, "total_steps": 8680, "loss": 0.7951973676681519, "lr": 1.974073069991959e-06, "epoch": 0.23824884792626727, "percentage": 11.91, "elapsed_time": "1:24:05", "remaining_time": "10:21:51"} +{"current_steps": 1035, "total_steps": 8680, "loss": 0.8366928100585938, "lr": 1.9739868079476035e-06, "epoch": 0.23847926267281105, "percentage": 11.92, "elapsed_time": "1:24:11", "remaining_time": "10:21:49"} +{"current_steps": 1036, "total_steps": 8680, "loss": 0.9644484519958496, "lr": 1.9739004045303524e-06, "epoch": 0.23870967741935484, "percentage": 11.94, "elapsed_time": "1:24:16", "remaining_time": "10:21:46"} +{"current_steps": 1037, "total_steps": 8680, "loss": 0.8332105875015259, "lr": 1.9738138597527464e-06, "epoch": 0.23894009216589862, "percentage": 11.95, "elapsed_time": "1:24:22", "remaining_time": "10:21:51"} +{"current_steps": 1038, "total_steps": 8680, "loss": 0.8923197388648987, "lr": 1.9737271736273482e-06, "epoch": 0.2391705069124424, "percentage": 11.96, "elapsed_time": "1:24:28", "remaining_time": "10:21:57"} +{"current_steps": 1039, "total_steps": 8680, "loss": 0.861129879951477, "lr": 1.97364034616674e-06, "epoch": 0.2394009216589862, "percentage": 11.97, "elapsed_time": "1:24:32", "remaining_time": "10:21:43"} +{"current_steps": 1040, "total_steps": 8680, "loss": 0.8042281270027161, "lr": 1.973553377383524e-06, "epoch": 0.23963133640552994, "percentage": 11.98, "elapsed_time": "1:24:38", "remaining_time": "10:21:48"} +{"current_steps": 1041, "total_steps": 8680, "loss": 1.0315792560577393, "lr": 1.9734662672903247e-06, "epoch": 0.23986175115207373, "percentage": 11.99, "elapsed_time": "1:24:42", "remaining_time": "10:21:35"} +{"current_steps": 1042, "total_steps": 8680, "loss": 0.8165839910507202, "lr": 1.973379015899785e-06, "epoch": 0.2400921658986175, "percentage": 12.0, "elapsed_time": "1:24:47", "remaining_time": "10:21:28"} +{"current_steps": 1043, "total_steps": 8680, "loss": 1.0002663135528564, "lr": 1.97329162322457e-06, "epoch": 0.2403225806451613, "percentage": 12.02, "elapsed_time": "1:24:50", "remaining_time": "10:21:15"} +{"current_steps": 1044, "total_steps": 8680, "loss": 0.9340938925743103, "lr": 1.9732040892773642e-06, "epoch": 0.24055299539170508, "percentage": 12.03, "elapsed_time": "1:24:55", "remaining_time": "10:21:12"} +{"current_steps": 1045, "total_steps": 8680, "loss": 0.7457709312438965, "lr": 1.973116414070873e-06, "epoch": 0.24078341013824886, "percentage": 12.04, "elapsed_time": "1:25:02", "remaining_time": "10:21:20"} +{"current_steps": 1046, "total_steps": 8680, "loss": 0.846583366394043, "lr": 1.9730285976178227e-06, "epoch": 0.24101382488479262, "percentage": 12.05, "elapsed_time": "1:25:08", "remaining_time": "10:21:24"} +{"current_steps": 1047, "total_steps": 8680, "loss": 0.9701514840126038, "lr": 1.9729406399309594e-06, "epoch": 0.2412442396313364, "percentage": 12.06, "elapsed_time": "1:25:14", "remaining_time": "10:21:26"} +{"current_steps": 1048, "total_steps": 8680, "loss": 0.7943054437637329, "lr": 1.9728525410230506e-06, "epoch": 0.24147465437788018, "percentage": 12.07, "elapsed_time": "1:25:20", "remaining_time": "10:21:29"} +{"current_steps": 1049, "total_steps": 8680, "loss": 0.8885551691055298, "lr": 1.972764300906883e-06, "epoch": 0.24170506912442397, "percentage": 12.09, "elapsed_time": "1:25:25", "remaining_time": "10:21:23"} +{"current_steps": 1050, "total_steps": 8680, "loss": 0.8258899450302124, "lr": 1.9726759195952653e-06, "epoch": 0.24193548387096775, "percentage": 12.1, "elapsed_time": "1:25:30", "remaining_time": "10:21:19"} +{"current_steps": 1051, "total_steps": 8680, "loss": 1.0085303783416748, "lr": 1.9725873971010255e-06, "epoch": 0.24216589861751153, "percentage": 12.11, "elapsed_time": "1:25:33", "remaining_time": "10:21:05"} +{"current_steps": 1052, "total_steps": 8680, "loss": 0.814777135848999, "lr": 1.9724987334370124e-06, "epoch": 0.2423963133640553, "percentage": 12.12, "elapsed_time": "1:25:38", "remaining_time": "10:20:59"} +{"current_steps": 1053, "total_steps": 8680, "loss": 0.8328995704650879, "lr": 1.9724099286160953e-06, "epoch": 0.24262672811059907, "percentage": 12.13, "elapsed_time": "1:25:44", "remaining_time": "10:21:03"} +{"current_steps": 1054, "total_steps": 8680, "loss": 0.8699138164520264, "lr": 1.9723209826511645e-06, "epoch": 0.24285714285714285, "percentage": 12.14, "elapsed_time": "1:25:49", "remaining_time": "10:20:57"} +{"current_steps": 1055, "total_steps": 8680, "loss": 0.8298562169075012, "lr": 1.9722318955551303e-06, "epoch": 0.24308755760368664, "percentage": 12.15, "elapsed_time": "1:25:53", "remaining_time": "10:20:44"} +{"current_steps": 1056, "total_steps": 8680, "loss": 0.9470195770263672, "lr": 1.9721426673409236e-06, "epoch": 0.24331797235023042, "percentage": 12.17, "elapsed_time": "1:25:57", "remaining_time": "10:20:34"} +{"current_steps": 1057, "total_steps": 8680, "loss": 0.7733730673789978, "lr": 1.9720532980214955e-06, "epoch": 0.2435483870967742, "percentage": 12.18, "elapsed_time": "1:26:02", "remaining_time": "10:20:34"} +{"current_steps": 1058, "total_steps": 8680, "loss": 0.7761770486831665, "lr": 1.9719637876098184e-06, "epoch": 0.24377880184331796, "percentage": 12.19, "elapsed_time": "1:26:07", "remaining_time": "10:20:26"} +{"current_steps": 1059, "total_steps": 8680, "loss": 0.9270585775375366, "lr": 1.971874136118884e-06, "epoch": 0.24400921658986174, "percentage": 12.2, "elapsed_time": "1:26:13", "remaining_time": "10:20:29"} +{"current_steps": 1060, "total_steps": 8680, "loss": 0.906977653503418, "lr": 1.971784343561705e-06, "epoch": 0.24423963133640553, "percentage": 12.21, "elapsed_time": "1:26:18", "remaining_time": "10:20:25"} +{"current_steps": 1061, "total_steps": 8680, "loss": 0.9668625593185425, "lr": 1.971694409951316e-06, "epoch": 0.2444700460829493, "percentage": 12.22, "elapsed_time": "1:26:22", "remaining_time": "10:20:17"} +{"current_steps": 1062, "total_steps": 8680, "loss": 0.8215349316596985, "lr": 1.971604335300769e-06, "epoch": 0.2447004608294931, "percentage": 12.24, "elapsed_time": "1:26:26", "remaining_time": "10:20:07"} +{"current_steps": 1063, "total_steps": 8680, "loss": 0.8351551294326782, "lr": 1.971514119623139e-06, "epoch": 0.24493087557603688, "percentage": 12.25, "elapsed_time": "1:26:31", "remaining_time": "10:19:59"} +{"current_steps": 1064, "total_steps": 8680, "loss": 0.8778517246246338, "lr": 1.9714237629315206e-06, "epoch": 0.24516129032258063, "percentage": 12.26, "elapsed_time": "1:26:36", "remaining_time": "10:19:57"} +{"current_steps": 1065, "total_steps": 8680, "loss": 0.9415761232376099, "lr": 1.9713332652390293e-06, "epoch": 0.24539170506912442, "percentage": 12.27, "elapsed_time": "1:26:41", "remaining_time": "10:19:53"} +{"current_steps": 1066, "total_steps": 8680, "loss": 0.9040292501449585, "lr": 1.9712426265588e-06, "epoch": 0.2456221198156682, "percentage": 12.28, "elapsed_time": "1:26:47", "remaining_time": "10:19:52"} +{"current_steps": 1067, "total_steps": 8680, "loss": 0.8886675834655762, "lr": 1.9711518469039894e-06, "epoch": 0.24585253456221198, "percentage": 12.29, "elapsed_time": "1:26:52", "remaining_time": "10:19:52"} +{"current_steps": 1068, "total_steps": 8680, "loss": 0.8439750671386719, "lr": 1.971060926287774e-06, "epoch": 0.24608294930875577, "percentage": 12.3, "elapsed_time": "1:26:57", "remaining_time": "10:19:45"} +{"current_steps": 1069, "total_steps": 8680, "loss": 0.8698763251304626, "lr": 1.9709698647233507e-06, "epoch": 0.24631336405529955, "percentage": 12.32, "elapsed_time": "1:27:01", "remaining_time": "10:19:32"} +{"current_steps": 1070, "total_steps": 8680, "loss": 0.7866508364677429, "lr": 1.970878662223937e-06, "epoch": 0.2465437788018433, "percentage": 12.33, "elapsed_time": "1:27:07", "remaining_time": "10:19:35"} +{"current_steps": 1071, "total_steps": 8680, "loss": 0.8652541637420654, "lr": 1.97078731880277e-06, "epoch": 0.2467741935483871, "percentage": 12.34, "elapsed_time": "1:27:12", "remaining_time": "10:19:37"} +{"current_steps": 1072, "total_steps": 8680, "loss": 0.8614386320114136, "lr": 1.97069583447311e-06, "epoch": 0.24700460829493087, "percentage": 12.35, "elapsed_time": "1:27:17", "remaining_time": "10:19:29"} +{"current_steps": 1073, "total_steps": 8680, "loss": 0.9367830753326416, "lr": 1.970604209248234e-06, "epoch": 0.24723502304147466, "percentage": 12.36, "elapsed_time": "1:27:22", "remaining_time": "10:19:27"} +{"current_steps": 1074, "total_steps": 8680, "loss": 0.8851934671401978, "lr": 1.9705124431414417e-06, "epoch": 0.24746543778801844, "percentage": 12.37, "elapsed_time": "1:27:27", "remaining_time": "10:19:19"} +{"current_steps": 1075, "total_steps": 8680, "loss": 0.9619653224945068, "lr": 1.9704205361660534e-06, "epoch": 0.24769585253456222, "percentage": 12.38, "elapsed_time": "1:27:31", "remaining_time": "10:19:10"} +{"current_steps": 1076, "total_steps": 8680, "loss": 0.8826392889022827, "lr": 1.9703284883354094e-06, "epoch": 0.24792626728110598, "percentage": 12.4, "elapsed_time": "1:27:35", "remaining_time": "10:19:03"} +{"current_steps": 1077, "total_steps": 8680, "loss": 0.9075444340705872, "lr": 1.970236299662869e-06, "epoch": 0.24815668202764976, "percentage": 12.41, "elapsed_time": "1:27:39", "remaining_time": "10:18:48"} +{"current_steps": 1078, "total_steps": 8680, "loss": 1.048058032989502, "lr": 1.9701439701618147e-06, "epoch": 0.24838709677419354, "percentage": 12.42, "elapsed_time": "1:27:43", "remaining_time": "10:18:40"} +{"current_steps": 1079, "total_steps": 8680, "loss": 0.8460798263549805, "lr": 1.970051499845647e-06, "epoch": 0.24861751152073733, "percentage": 12.43, "elapsed_time": "1:27:49", "remaining_time": "10:18:40"} +{"current_steps": 1080, "total_steps": 8680, "loss": 0.9410982131958008, "lr": 1.9699588887277886e-06, "epoch": 0.2488479262672811, "percentage": 12.44, "elapsed_time": "1:27:54", "remaining_time": "10:18:34"} +{"current_steps": 1081, "total_steps": 8680, "loss": 0.8247401714324951, "lr": 1.9698661368216816e-06, "epoch": 0.2490783410138249, "percentage": 12.45, "elapsed_time": "1:27:58", "remaining_time": "10:18:26"} +{"current_steps": 1082, "total_steps": 8680, "loss": 0.8543484210968018, "lr": 1.969773244140789e-06, "epoch": 0.24930875576036865, "percentage": 12.47, "elapsed_time": "1:28:04", "remaining_time": "10:18:25"} +{"current_steps": 1083, "total_steps": 8680, "loss": 0.9339861273765564, "lr": 1.9696802106985933e-06, "epoch": 0.24953917050691243, "percentage": 12.48, "elapsed_time": "1:28:08", "remaining_time": "10:18:15"} +{"current_steps": 1084, "total_steps": 8680, "loss": 0.8268687725067139, "lr": 1.969587036508599e-06, "epoch": 0.24976958525345622, "percentage": 12.49, "elapsed_time": "1:28:13", "remaining_time": "10:18:15"} +{"current_steps": 1085, "total_steps": 8680, "loss": 0.9990735054016113, "lr": 1.96949372158433e-06, "epoch": 0.25, "percentage": 12.5, "elapsed_time": "1:28:18", "remaining_time": "10:18:12"} +{"current_steps": 1086, "total_steps": 8680, "loss": 0.871169924736023, "lr": 1.9694002659393305e-06, "epoch": 0.2502304147465438, "percentage": 12.51, "elapsed_time": "1:28:22", "remaining_time": "10:18:00"} +{"current_steps": 1087, "total_steps": 8680, "loss": 0.9275476932525635, "lr": 1.9693066695871657e-06, "epoch": 0.25046082949308757, "percentage": 12.52, "elapsed_time": "1:28:28", "remaining_time": "10:17:57"} +{"current_steps": 1088, "total_steps": 8680, "loss": 0.802006721496582, "lr": 1.969212932541421e-06, "epoch": 0.25069124423963135, "percentage": 12.53, "elapsed_time": "1:28:33", "remaining_time": "10:17:55"} +{"current_steps": 1089, "total_steps": 8680, "loss": 1.158774495124817, "lr": 1.9691190548157023e-06, "epoch": 0.25092165898617513, "percentage": 12.55, "elapsed_time": "1:28:36", "remaining_time": "10:17:41"} +{"current_steps": 1090, "total_steps": 8680, "loss": 0.8979278802871704, "lr": 1.969025036423636e-06, "epoch": 0.2511520737327189, "percentage": 12.56, "elapsed_time": "1:28:41", "remaining_time": "10:17:38"} +{"current_steps": 1091, "total_steps": 8680, "loss": 0.9486579895019531, "lr": 1.968930877378868e-06, "epoch": 0.2513824884792627, "percentage": 12.57, "elapsed_time": "1:28:46", "remaining_time": "10:17:30"} +{"current_steps": 1092, "total_steps": 8680, "loss": 0.8661590814590454, "lr": 1.968836577695066e-06, "epoch": 0.25161290322580643, "percentage": 12.58, "elapsed_time": "1:28:51", "remaining_time": "10:17:24"} +{"current_steps": 1093, "total_steps": 8680, "loss": 0.9224900007247925, "lr": 1.9687421373859173e-06, "epoch": 0.2518433179723502, "percentage": 12.59, "elapsed_time": "1:28:55", "remaining_time": "10:17:17"} +{"current_steps": 1094, "total_steps": 8680, "loss": 0.9563734531402588, "lr": 1.96864755646513e-06, "epoch": 0.252073732718894, "percentage": 12.6, "elapsed_time": "1:28:59", "remaining_time": "10:17:04"} +{"current_steps": 1095, "total_steps": 8680, "loss": 0.7457284927368164, "lr": 1.968552834946432e-06, "epoch": 0.2523041474654378, "percentage": 12.62, "elapsed_time": "1:29:04", "remaining_time": "10:17:03"} +{"current_steps": 1096, "total_steps": 8680, "loss": 0.8763077259063721, "lr": 1.9684579728435727e-06, "epoch": 0.25253456221198156, "percentage": 12.63, "elapsed_time": "1:29:11", "remaining_time": "10:17:09"} +{"current_steps": 1097, "total_steps": 8680, "loss": 0.8476013541221619, "lr": 1.9683629701703203e-06, "epoch": 0.25276497695852534, "percentage": 12.64, "elapsed_time": "1:29:15", "remaining_time": "10:17:01"} +{"current_steps": 1098, "total_steps": 8680, "loss": 0.9706464409828186, "lr": 1.9682678269404647e-06, "epoch": 0.25299539170506913, "percentage": 12.65, "elapsed_time": "1:29:20", "remaining_time": "10:16:58"} +{"current_steps": 1099, "total_steps": 8680, "loss": 0.9898370504379272, "lr": 1.968172543167816e-06, "epoch": 0.2532258064516129, "percentage": 12.66, "elapsed_time": "1:29:25", "remaining_time": "10:16:55"} +{"current_steps": 1100, "total_steps": 8680, "loss": 0.9073352813720703, "lr": 1.9680771188662043e-06, "epoch": 0.2534562211981567, "percentage": 12.67, "elapsed_time": "1:29:29", "remaining_time": "10:16:43"} +{"current_steps": 1101, "total_steps": 8680, "loss": 0.698054850101471, "lr": 1.9679815540494805e-06, "epoch": 0.2536866359447005, "percentage": 12.68, "elapsed_time": "1:29:37", "remaining_time": "10:17:00"} +{"current_steps": 1102, "total_steps": 8680, "loss": 0.8755865097045898, "lr": 1.967885848731515e-06, "epoch": 0.25391705069124426, "percentage": 12.7, "elapsed_time": "1:29:42", "remaining_time": "10:16:51"} +{"current_steps": 1103, "total_steps": 8680, "loss": 0.8884447813034058, "lr": 1.9677900029262004e-06, "epoch": 0.25414746543778804, "percentage": 12.71, "elapsed_time": "1:29:45", "remaining_time": "10:16:37"} +{"current_steps": 1104, "total_steps": 8680, "loss": 0.738738477230072, "lr": 1.967694016647448e-06, "epoch": 0.2543778801843318, "percentage": 12.72, "elapsed_time": "1:29:51", "remaining_time": "10:16:36"} +{"current_steps": 1105, "total_steps": 8680, "loss": 0.8024383783340454, "lr": 1.96759788990919e-06, "epoch": 0.25460829493087556, "percentage": 12.73, "elapsed_time": "1:29:55", "remaining_time": "10:16:30"} +{"current_steps": 1106, "total_steps": 8680, "loss": 0.8780910968780518, "lr": 1.967501622725379e-06, "epoch": 0.25483870967741934, "percentage": 12.74, "elapsed_time": "1:29:59", "remaining_time": "10:16:18"} +{"current_steps": 1107, "total_steps": 8680, "loss": 0.8709204196929932, "lr": 1.967405215109989e-06, "epoch": 0.2550691244239631, "percentage": 12.75, "elapsed_time": "1:30:05", "remaining_time": "10:16:19"} +{"current_steps": 1108, "total_steps": 8680, "loss": 0.8838910460472107, "lr": 1.9673086670770122e-06, "epoch": 0.2552995391705069, "percentage": 12.76, "elapsed_time": "1:30:10", "remaining_time": "10:16:13"} +{"current_steps": 1109, "total_steps": 8680, "loss": 0.9310617446899414, "lr": 1.967211978640463e-06, "epoch": 0.2555299539170507, "percentage": 12.78, "elapsed_time": "1:30:16", "remaining_time": "10:16:15"} +{"current_steps": 1110, "total_steps": 8680, "loss": 0.8453254699707031, "lr": 1.9671151498143756e-06, "epoch": 0.2557603686635945, "percentage": 12.79, "elapsed_time": "1:30:20", "remaining_time": "10:16:05"} +{"current_steps": 1111, "total_steps": 8680, "loss": 1.0201973915100098, "lr": 1.967018180612804e-06, "epoch": 0.25599078341013826, "percentage": 12.8, "elapsed_time": "1:30:24", "remaining_time": "10:15:56"} +{"current_steps": 1112, "total_steps": 8680, "loss": 0.84140944480896, "lr": 1.9669210710498242e-06, "epoch": 0.25622119815668204, "percentage": 12.81, "elapsed_time": "1:30:30", "remaining_time": "10:16:00"} +{"current_steps": 1113, "total_steps": 8680, "loss": 0.9012273550033569, "lr": 1.9668238211395308e-06, "epoch": 0.2564516129032258, "percentage": 12.82, "elapsed_time": "1:30:34", "remaining_time": "10:15:50"} +{"current_steps": 1114, "total_steps": 8680, "loss": 0.820103645324707, "lr": 1.9667264308960394e-06, "epoch": 0.2566820276497696, "percentage": 12.83, "elapsed_time": "1:30:38", "remaining_time": "10:15:36"} +{"current_steps": 1115, "total_steps": 8680, "loss": 1.0709048509597778, "lr": 1.9666289003334868e-06, "epoch": 0.2569124423963134, "percentage": 12.85, "elapsed_time": "1:30:43", "remaining_time": "10:15:30"} +{"current_steps": 1116, "total_steps": 8680, "loss": 0.9408602714538574, "lr": 1.966531229466029e-06, "epoch": 0.2571428571428571, "percentage": 12.86, "elapsed_time": "1:30:47", "remaining_time": "10:15:19"} +{"current_steps": 1117, "total_steps": 8680, "loss": 0.967316210269928, "lr": 1.9664334183078425e-06, "epoch": 0.2573732718894009, "percentage": 12.87, "elapsed_time": "1:30:51", "remaining_time": "10:15:13"} +{"current_steps": 1118, "total_steps": 8680, "loss": 0.9483754634857178, "lr": 1.9663354668731248e-06, "epoch": 0.2576036866359447, "percentage": 12.88, "elapsed_time": "1:30:56", "remaining_time": "10:15:05"} +{"current_steps": 1119, "total_steps": 8680, "loss": 0.7978509664535522, "lr": 1.966237375176093e-06, "epoch": 0.25783410138248847, "percentage": 12.89, "elapsed_time": "1:31:01", "remaining_time": "10:15:05"} +{"current_steps": 1120, "total_steps": 8680, "loss": 0.8720531463623047, "lr": 1.9661391432309862e-06, "epoch": 0.25806451612903225, "percentage": 12.9, "elapsed_time": "1:31:06", "remaining_time": "10:14:57"} +{"current_steps": 1121, "total_steps": 8680, "loss": 0.7984024286270142, "lr": 1.966040771052061e-06, "epoch": 0.25829493087557603, "percentage": 12.91, "elapsed_time": "1:31:10", "remaining_time": "10:14:49"} +{"current_steps": 1122, "total_steps": 8680, "loss": 0.9255385398864746, "lr": 1.965942258653597e-06, "epoch": 0.2585253456221198, "percentage": 12.93, "elapsed_time": "1:31:14", "remaining_time": "10:14:36"} +{"current_steps": 1123, "total_steps": 8680, "loss": 0.9028007984161377, "lr": 1.9658436060498927e-06, "epoch": 0.2587557603686636, "percentage": 12.94, "elapsed_time": "1:31:18", "remaining_time": "10:14:29"} +{"current_steps": 1124, "total_steps": 8680, "loss": 0.8773014545440674, "lr": 1.9657448132552677e-06, "epoch": 0.2589861751152074, "percentage": 12.95, "elapsed_time": "1:31:24", "remaining_time": "10:14:26"} +{"current_steps": 1125, "total_steps": 8680, "loss": 0.9280908107757568, "lr": 1.9656458802840617e-06, "epoch": 0.25921658986175117, "percentage": 12.96, "elapsed_time": "1:31:28", "remaining_time": "10:14:20"} +{"current_steps": 1126, "total_steps": 8680, "loss": 0.820783793926239, "lr": 1.9655468071506344e-06, "epoch": 0.25944700460829495, "percentage": 12.97, "elapsed_time": "1:31:32", "remaining_time": "10:14:07"} +{"current_steps": 1127, "total_steps": 8680, "loss": 0.7832465171813965, "lr": 1.9654475938693663e-06, "epoch": 0.25967741935483873, "percentage": 12.98, "elapsed_time": "1:31:38", "remaining_time": "10:14:12"} +{"current_steps": 1128, "total_steps": 8680, "loss": 0.8824669122695923, "lr": 1.965348240454658e-06, "epoch": 0.25990783410138246, "percentage": 13.0, "elapsed_time": "1:31:43", "remaining_time": "10:14:08"} +{"current_steps": 1129, "total_steps": 8680, "loss": 0.8782131671905518, "lr": 1.9652487469209305e-06, "epoch": 0.26013824884792625, "percentage": 13.01, "elapsed_time": "1:31:47", "remaining_time": "10:13:55"} +{"current_steps": 1130, "total_steps": 8680, "loss": 0.938920259475708, "lr": 1.9651491132826255e-06, "epoch": 0.26036866359447003, "percentage": 13.02, "elapsed_time": "1:31:52", "remaining_time": "10:13:48"} +{"current_steps": 1131, "total_steps": 8680, "loss": 0.8733320236206055, "lr": 1.965049339554204e-06, "epoch": 0.2605990783410138, "percentage": 13.03, "elapsed_time": "1:31:56", "remaining_time": "10:13:43"} +{"current_steps": 1132, "total_steps": 8680, "loss": 0.8688358664512634, "lr": 1.9649494257501485e-06, "epoch": 0.2608294930875576, "percentage": 13.04, "elapsed_time": "1:32:00", "remaining_time": "10:13:28"} +{"current_steps": 1133, "total_steps": 8680, "loss": 0.9250427484512329, "lr": 1.9648493718849617e-06, "epoch": 0.2610599078341014, "percentage": 13.05, "elapsed_time": "1:32:04", "remaining_time": "10:13:17"} +{"current_steps": 1134, "total_steps": 8680, "loss": 0.7890609502792358, "lr": 1.9647491779731655e-06, "epoch": 0.26129032258064516, "percentage": 13.06, "elapsed_time": "1:32:08", "remaining_time": "10:13:11"} +{"current_steps": 1135, "total_steps": 8680, "loss": 0.83612060546875, "lr": 1.964648844029303e-06, "epoch": 0.26152073732718895, "percentage": 13.08, "elapsed_time": "1:32:14", "remaining_time": "10:13:12"} +{"current_steps": 1136, "total_steps": 8680, "loss": 0.7951240539550781, "lr": 1.9645483700679387e-06, "epoch": 0.26175115207373273, "percentage": 13.09, "elapsed_time": "1:32:18", "remaining_time": "10:13:02"} +{"current_steps": 1137, "total_steps": 8680, "loss": 0.9746277332305908, "lr": 1.9644477561036546e-06, "epoch": 0.2619815668202765, "percentage": 13.1, "elapsed_time": "1:32:23", "remaining_time": "10:12:58"} +{"current_steps": 1138, "total_steps": 8680, "loss": 0.856966495513916, "lr": 1.9643470021510556e-06, "epoch": 0.2622119815668203, "percentage": 13.11, "elapsed_time": "1:32:28", "remaining_time": "10:12:54"} +{"current_steps": 1139, "total_steps": 8680, "loss": 0.7419042587280273, "lr": 1.9642461082247663e-06, "epoch": 0.2624423963133641, "percentage": 13.12, "elapsed_time": "1:32:33", "remaining_time": "10:12:49"} +{"current_steps": 1140, "total_steps": 8680, "loss": 0.8868693709373474, "lr": 1.9641450743394304e-06, "epoch": 0.2626728110599078, "percentage": 13.13, "elapsed_time": "1:32:37", "remaining_time": "10:12:39"} +{"current_steps": 1141, "total_steps": 8680, "loss": 1.0111520290374756, "lr": 1.9640439005097133e-06, "epoch": 0.2629032258064516, "percentage": 13.15, "elapsed_time": "1:32:43", "remaining_time": "10:12:38"} +{"current_steps": 1142, "total_steps": 8680, "loss": 0.9379187226295471, "lr": 1.9639425867503006e-06, "epoch": 0.2631336405529954, "percentage": 13.16, "elapsed_time": "1:32:47", "remaining_time": "10:12:28"} +{"current_steps": 1143, "total_steps": 8680, "loss": 0.8451071977615356, "lr": 1.9638411330758973e-06, "epoch": 0.26336405529953916, "percentage": 13.17, "elapsed_time": "1:32:51", "remaining_time": "10:12:21"} +{"current_steps": 1144, "total_steps": 8680, "loss": 1.0407288074493408, "lr": 1.9637395395012295e-06, "epoch": 0.26359447004608294, "percentage": 13.18, "elapsed_time": "1:32:55", "remaining_time": "10:12:10"} +{"current_steps": 1145, "total_steps": 8680, "loss": 0.9594388008117676, "lr": 1.9636378060410433e-06, "epoch": 0.2638248847926267, "percentage": 13.19, "elapsed_time": "1:33:01", "remaining_time": "10:12:10"} +{"current_steps": 1146, "total_steps": 8680, "loss": 0.7940789461135864, "lr": 1.9635359327101057e-06, "epoch": 0.2640552995391705, "percentage": 13.2, "elapsed_time": "1:33:07", "remaining_time": "10:12:12"} +{"current_steps": 1147, "total_steps": 8680, "loss": 0.9707269668579102, "lr": 1.9634339195232025e-06, "epoch": 0.2642857142857143, "percentage": 13.21, "elapsed_time": "1:33:11", "remaining_time": "10:12:04"} +{"current_steps": 1148, "total_steps": 8680, "loss": 0.9554522037506104, "lr": 1.9633317664951417e-06, "epoch": 0.2645161290322581, "percentage": 13.23, "elapsed_time": "1:33:16", "remaining_time": "10:11:56"} +{"current_steps": 1149, "total_steps": 8680, "loss": 1.009516716003418, "lr": 1.9632294736407497e-06, "epoch": 0.26474654377880186, "percentage": 13.24, "elapsed_time": "1:33:20", "remaining_time": "10:11:44"} +{"current_steps": 1150, "total_steps": 8680, "loss": 0.8337735533714294, "lr": 1.9631270409748754e-06, "epoch": 0.26497695852534564, "percentage": 13.25, "elapsed_time": "1:33:24", "remaining_time": "10:11:39"} +{"current_steps": 1151, "total_steps": 8680, "loss": 0.9103367328643799, "lr": 1.963024468512386e-06, "epoch": 0.2652073732718894, "percentage": 13.26, "elapsed_time": "1:33:28", "remaining_time": "10:11:26"} +{"current_steps": 1152, "total_steps": 8680, "loss": 0.7258249521255493, "lr": 1.9629217562681694e-06, "epoch": 0.2654377880184332, "percentage": 13.27, "elapsed_time": "1:33:33", "remaining_time": "10:11:24"} +{"current_steps": 1153, "total_steps": 8680, "loss": 0.7696776390075684, "lr": 1.962818904257135e-06, "epoch": 0.26566820276497694, "percentage": 13.28, "elapsed_time": "1:33:37", "remaining_time": "10:11:13"} +{"current_steps": 1154, "total_steps": 8680, "loss": 0.9027894139289856, "lr": 1.962715912494211e-06, "epoch": 0.2658986175115207, "percentage": 13.29, "elapsed_time": "1:33:42", "remaining_time": "10:11:05"} +{"current_steps": 1155, "total_steps": 8680, "loss": 1.0412788391113281, "lr": 1.962612780994347e-06, "epoch": 0.2661290322580645, "percentage": 13.31, "elapsed_time": "1:33:46", "remaining_time": "10:10:56"} +{"current_steps": 1156, "total_steps": 8680, "loss": 0.8656542897224426, "lr": 1.962509509772512e-06, "epoch": 0.2663594470046083, "percentage": 13.32, "elapsed_time": "1:33:50", "remaining_time": "10:10:48"} +{"current_steps": 1157, "total_steps": 8680, "loss": 0.9541186094284058, "lr": 1.9624060988436964e-06, "epoch": 0.26658986175115207, "percentage": 13.33, "elapsed_time": "1:33:57", "remaining_time": "10:10:54"} +{"current_steps": 1158, "total_steps": 8680, "loss": 0.7684942483901978, "lr": 1.962302548222909e-06, "epoch": 0.26682027649769585, "percentage": 13.34, "elapsed_time": "1:34:03", "remaining_time": "10:10:56"} +{"current_steps": 1159, "total_steps": 8680, "loss": 0.8934941291809082, "lr": 1.962198857925181e-06, "epoch": 0.26705069124423964, "percentage": 13.35, "elapsed_time": "1:34:07", "remaining_time": "10:10:44"} +{"current_steps": 1160, "total_steps": 8680, "loss": 0.8674842715263367, "lr": 1.962095027965562e-06, "epoch": 0.2672811059907834, "percentage": 13.36, "elapsed_time": "1:34:11", "remaining_time": "10:10:37"} +{"current_steps": 1161, "total_steps": 8680, "loss": 0.8850778937339783, "lr": 1.9619910583591237e-06, "epoch": 0.2675115207373272, "percentage": 13.38, "elapsed_time": "1:34:16", "remaining_time": "10:10:32"} +{"current_steps": 1162, "total_steps": 8680, "loss": 0.9140915870666504, "lr": 1.961886949120957e-06, "epoch": 0.267741935483871, "percentage": 13.39, "elapsed_time": "1:34:21", "remaining_time": "10:10:27"} +{"current_steps": 1163, "total_steps": 8680, "loss": 0.7557287812232971, "lr": 1.9617827002661733e-06, "epoch": 0.26797235023041477, "percentage": 13.4, "elapsed_time": "1:34:26", "remaining_time": "10:10:28"} +{"current_steps": 1164, "total_steps": 8680, "loss": 0.8780542612075806, "lr": 1.9616783118099032e-06, "epoch": 0.26820276497695855, "percentage": 13.41, "elapsed_time": "1:34:31", "remaining_time": "10:10:23"} +{"current_steps": 1165, "total_steps": 8680, "loss": 0.8352043628692627, "lr": 1.9615737837672995e-06, "epoch": 0.2684331797235023, "percentage": 13.42, "elapsed_time": "1:34:36", "remaining_time": "10:10:14"} +{"current_steps": 1166, "total_steps": 8680, "loss": 0.8119357228279114, "lr": 1.961469116153534e-06, "epoch": 0.26866359447004606, "percentage": 13.43, "elapsed_time": "1:34:41", "remaining_time": "10:10:13"} +{"current_steps": 1167, "total_steps": 8680, "loss": 0.8953120708465576, "lr": 1.9613643089837992e-06, "epoch": 0.26889400921658985, "percentage": 13.44, "elapsed_time": "1:34:46", "remaining_time": "10:10:09"} +{"current_steps": 1168, "total_steps": 8680, "loss": 0.9078162908554077, "lr": 1.9612593622733074e-06, "epoch": 0.26912442396313363, "percentage": 13.46, "elapsed_time": "1:34:52", "remaining_time": "10:10:10"} +{"current_steps": 1169, "total_steps": 8680, "loss": 0.9118859767913818, "lr": 1.961154276037292e-06, "epoch": 0.2693548387096774, "percentage": 13.47, "elapsed_time": "1:34:56", "remaining_time": "10:10:00"} +{"current_steps": 1170, "total_steps": 8680, "loss": 0.8456159234046936, "lr": 1.9610490502910056e-06, "epoch": 0.2695852534562212, "percentage": 13.48, "elapsed_time": "1:35:01", "remaining_time": "10:09:54"} +{"current_steps": 1171, "total_steps": 8680, "loss": 0.7860552072525024, "lr": 1.9609436850497222e-06, "epoch": 0.269815668202765, "percentage": 13.49, "elapsed_time": "1:35:06", "remaining_time": "10:09:53"} +{"current_steps": 1172, "total_steps": 8680, "loss": 0.8121567368507385, "lr": 1.9608381803287343e-06, "epoch": 0.27004608294930876, "percentage": 13.5, "elapsed_time": "1:35:11", "remaining_time": "10:09:47"} +{"current_steps": 1173, "total_steps": 8680, "loss": 0.9212384819984436, "lr": 1.9607325361433574e-06, "epoch": 0.27027649769585255, "percentage": 13.51, "elapsed_time": "1:35:16", "remaining_time": "10:09:42"} +{"current_steps": 1174, "total_steps": 8680, "loss": 0.9528858661651611, "lr": 1.960626752508924e-06, "epoch": 0.27050691244239633, "percentage": 13.53, "elapsed_time": "1:35:20", "remaining_time": "10:09:36"} +{"current_steps": 1175, "total_steps": 8680, "loss": 0.8561227321624756, "lr": 1.9605208294407894e-06, "epoch": 0.2707373271889401, "percentage": 13.54, "elapsed_time": "1:35:24", "remaining_time": "10:09:26"} +{"current_steps": 1176, "total_steps": 8680, "loss": 0.9333669543266296, "lr": 1.960414766954328e-06, "epoch": 0.2709677419354839, "percentage": 13.55, "elapsed_time": "1:35:30", "remaining_time": "10:09:23"} +{"current_steps": 1177, "total_steps": 8680, "loss": 0.8879388570785522, "lr": 1.9603085650649345e-06, "epoch": 0.2711981566820276, "percentage": 13.56, "elapsed_time": "1:35:37", "remaining_time": "10:09:31"} +{"current_steps": 1178, "total_steps": 8680, "loss": 1.0099214315414429, "lr": 1.9602022237880244e-06, "epoch": 0.2714285714285714, "percentage": 13.57, "elapsed_time": "1:35:43", "remaining_time": "10:09:35"} +{"current_steps": 1179, "total_steps": 8680, "loss": 0.9341822862625122, "lr": 1.9600957431390324e-06, "epoch": 0.2716589861751152, "percentage": 13.58, "elapsed_time": "1:35:47", "remaining_time": "10:09:28"} +{"current_steps": 1180, "total_steps": 8680, "loss": 0.7616428136825562, "lr": 1.9599891231334144e-06, "epoch": 0.271889400921659, "percentage": 13.59, "elapsed_time": "1:35:51", "remaining_time": "10:09:16"} +{"current_steps": 1181, "total_steps": 8680, "loss": 0.8270235061645508, "lr": 1.959882363786646e-06, "epoch": 0.27211981566820276, "percentage": 13.61, "elapsed_time": "1:35:56", "remaining_time": "10:09:13"} +{"current_steps": 1182, "total_steps": 8680, "loss": 0.8715114593505859, "lr": 1.9597754651142233e-06, "epoch": 0.27235023041474654, "percentage": 13.62, "elapsed_time": "1:36:00", "remaining_time": "10:09:03"} +{"current_steps": 1183, "total_steps": 8680, "loss": 0.6910781860351562, "lr": 1.959668427131662e-06, "epoch": 0.2725806451612903, "percentage": 13.63, "elapsed_time": "1:36:07", "remaining_time": "10:09:08"} +{"current_steps": 1184, "total_steps": 8680, "loss": 0.9158545136451721, "lr": 1.9595612498544997e-06, "epoch": 0.2728110599078341, "percentage": 13.64, "elapsed_time": "1:36:12", "remaining_time": "10:09:08"} +{"current_steps": 1185, "total_steps": 8680, "loss": 0.7129944562911987, "lr": 1.9594539332982917e-06, "epoch": 0.2730414746543779, "percentage": 13.65, "elapsed_time": "1:36:19", "remaining_time": "10:09:11"} +{"current_steps": 1186, "total_steps": 8680, "loss": 0.9487595558166504, "lr": 1.9593464774786155e-06, "epoch": 0.2732718894009217, "percentage": 13.66, "elapsed_time": "1:36:23", "remaining_time": "10:09:06"} +{"current_steps": 1187, "total_steps": 8680, "loss": 0.9455368518829346, "lr": 1.959238882411068e-06, "epoch": 0.27350230414746546, "percentage": 13.68, "elapsed_time": "1:36:28", "remaining_time": "10:08:57"} +{"current_steps": 1188, "total_steps": 8680, "loss": 0.9005390405654907, "lr": 1.959131148111267e-06, "epoch": 0.27373271889400924, "percentage": 13.69, "elapsed_time": "1:36:33", "remaining_time": "10:08:54"} +{"current_steps": 1189, "total_steps": 8680, "loss": 0.91117262840271, "lr": 1.9590232745948494e-06, "epoch": 0.27396313364055297, "percentage": 13.7, "elapsed_time": "1:36:37", "remaining_time": "10:08:43"} +{"current_steps": 1190, "total_steps": 8680, "loss": 0.7940579652786255, "lr": 1.958915261877473e-06, "epoch": 0.27419354838709675, "percentage": 13.71, "elapsed_time": "1:36:42", "remaining_time": "10:08:39"} +{"current_steps": 1191, "total_steps": 8680, "loss": 1.0705196857452393, "lr": 1.9588071099748155e-06, "epoch": 0.27442396313364054, "percentage": 13.72, "elapsed_time": "1:36:47", "remaining_time": "10:08:34"} +{"current_steps": 1192, "total_steps": 8680, "loss": 0.9311869740486145, "lr": 1.9586988189025756e-06, "epoch": 0.2746543778801843, "percentage": 13.73, "elapsed_time": "1:36:51", "remaining_time": "10:08:25"} +{"current_steps": 1193, "total_steps": 8680, "loss": 0.9400506019592285, "lr": 1.9585903886764715e-06, "epoch": 0.2748847926267281, "percentage": 13.74, "elapsed_time": "1:36:57", "remaining_time": "10:08:27"} +{"current_steps": 1194, "total_steps": 8680, "loss": 0.8282920122146606, "lr": 1.958481819312241e-06, "epoch": 0.2751152073732719, "percentage": 13.76, "elapsed_time": "1:37:01", "remaining_time": "10:08:20"} +{"current_steps": 1195, "total_steps": 8680, "loss": 0.9111119508743286, "lr": 1.9583731108256435e-06, "epoch": 0.27534562211981567, "percentage": 13.77, "elapsed_time": "1:37:06", "remaining_time": "10:08:15"} +{"current_steps": 1196, "total_steps": 8680, "loss": 0.9486548900604248, "lr": 1.9582642632324576e-06, "epoch": 0.27557603686635945, "percentage": 13.78, "elapsed_time": "1:37:10", "remaining_time": "10:08:03"} +{"current_steps": 1197, "total_steps": 8680, "loss": 0.8452764749526978, "lr": 1.9581552765484828e-06, "epoch": 0.27580645161290324, "percentage": 13.79, "elapsed_time": "1:37:14", "remaining_time": "10:07:56"} +{"current_steps": 1198, "total_steps": 8680, "loss": 0.8636663556098938, "lr": 1.958046150789538e-06, "epoch": 0.276036866359447, "percentage": 13.8, "elapsed_time": "1:37:19", "remaining_time": "10:07:49"} +{"current_steps": 1199, "total_steps": 8680, "loss": 0.9819158315658569, "lr": 1.9579368859714623e-06, "epoch": 0.2762672811059908, "percentage": 13.81, "elapsed_time": "1:37:22", "remaining_time": "10:07:35"} +{"current_steps": 1200, "total_steps": 8680, "loss": 0.8010607957839966, "lr": 1.957827482110116e-06, "epoch": 0.2764976958525346, "percentage": 13.82, "elapsed_time": "1:37:27", "remaining_time": "10:07:29"} +{"current_steps": 1201, "total_steps": 8680, "loss": 0.7686241865158081, "lr": 1.957717939221379e-06, "epoch": 0.2767281105990783, "percentage": 13.84, "elapsed_time": "1:37:34", "remaining_time": "10:07:34"} +{"current_steps": 1202, "total_steps": 8680, "loss": 0.8548723459243774, "lr": 1.9576082573211507e-06, "epoch": 0.2769585253456221, "percentage": 13.85, "elapsed_time": "1:37:39", "remaining_time": "10:07:33"} +{"current_steps": 1203, "total_steps": 8680, "loss": 0.7866852283477783, "lr": 1.957498436425351e-06, "epoch": 0.2771889400921659, "percentage": 13.86, "elapsed_time": "1:37:45", "remaining_time": "10:07:35"} +{"current_steps": 1204, "total_steps": 8680, "loss": 0.8086235523223877, "lr": 1.9573884765499215e-06, "epoch": 0.27741935483870966, "percentage": 13.87, "elapsed_time": "1:37:51", "remaining_time": "10:07:35"} +{"current_steps": 1205, "total_steps": 8680, "loss": 1.0310871601104736, "lr": 1.9572783777108217e-06, "epoch": 0.27764976958525345, "percentage": 13.88, "elapsed_time": "1:37:56", "remaining_time": "10:07:32"} +{"current_steps": 1206, "total_steps": 8680, "loss": 0.9482970237731934, "lr": 1.957168139924033e-06, "epoch": 0.27788018433179723, "percentage": 13.89, "elapsed_time": "1:38:01", "remaining_time": "10:07:28"} +{"current_steps": 1207, "total_steps": 8680, "loss": 0.809493899345398, "lr": 1.957057763205556e-06, "epoch": 0.278110599078341, "percentage": 13.91, "elapsed_time": "1:38:07", "remaining_time": "10:07:33"} +{"current_steps": 1208, "total_steps": 8680, "loss": 0.8679298162460327, "lr": 1.956947247571411e-06, "epoch": 0.2783410138248848, "percentage": 13.92, "elapsed_time": "1:38:12", "remaining_time": "10:07:29"} +{"current_steps": 1209, "total_steps": 8680, "loss": 0.8870571255683899, "lr": 1.95683659303764e-06, "epoch": 0.2785714285714286, "percentage": 13.93, "elapsed_time": "1:38:17", "remaining_time": "10:07:25"} +{"current_steps": 1210, "total_steps": 8680, "loss": 0.8452431559562683, "lr": 1.9567257996203046e-06, "epoch": 0.27880184331797236, "percentage": 13.94, "elapsed_time": "1:38:22", "remaining_time": "10:07:19"} +{"current_steps": 1211, "total_steps": 8680, "loss": 0.8376550674438477, "lr": 1.9566148673354855e-06, "epoch": 0.27903225806451615, "percentage": 13.95, "elapsed_time": "1:38:27", "remaining_time": "10:07:15"} +{"current_steps": 1212, "total_steps": 8680, "loss": 0.7686463594436646, "lr": 1.9565037961992853e-06, "epoch": 0.27926267281105993, "percentage": 13.96, "elapsed_time": "1:38:31", "remaining_time": "10:07:06"} +{"current_steps": 1213, "total_steps": 8680, "loss": 1.0064536333084106, "lr": 1.956392586227825e-06, "epoch": 0.2794930875576037, "percentage": 13.97, "elapsed_time": "1:38:37", "remaining_time": "10:07:04"} +{"current_steps": 1214, "total_steps": 8680, "loss": 0.9087784290313721, "lr": 1.956281237437247e-06, "epoch": 0.27972350230414744, "percentage": 13.99, "elapsed_time": "1:38:41", "remaining_time": "10:06:57"} +{"current_steps": 1215, "total_steps": 8680, "loss": 0.8528383374214172, "lr": 1.9561697498437133e-06, "epoch": 0.2799539170506912, "percentage": 14.0, "elapsed_time": "1:38:46", "remaining_time": "10:06:54"} +{"current_steps": 1216, "total_steps": 8680, "loss": 0.8229737281799316, "lr": 1.9560581234634062e-06, "epoch": 0.280184331797235, "percentage": 14.01, "elapsed_time": "1:38:51", "remaining_time": "10:06:48"} +{"current_steps": 1217, "total_steps": 8680, "loss": 0.8957454562187195, "lr": 1.9559463583125285e-06, "epoch": 0.2804147465437788, "percentage": 14.02, "elapsed_time": "1:38:56", "remaining_time": "10:06:43"} +{"current_steps": 1218, "total_steps": 8680, "loss": 0.8373404741287231, "lr": 1.955834454407302e-06, "epoch": 0.2806451612903226, "percentage": 14.03, "elapsed_time": "1:39:01", "remaining_time": "10:06:39"} +{"current_steps": 1219, "total_steps": 8680, "loss": 0.9117659330368042, "lr": 1.9557224117639698e-06, "epoch": 0.28087557603686636, "percentage": 14.04, "elapsed_time": "1:39:07", "remaining_time": "10:06:40"} +{"current_steps": 1220, "total_steps": 8680, "loss": 0.9079498052597046, "lr": 1.9556102303987946e-06, "epoch": 0.28110599078341014, "percentage": 14.06, "elapsed_time": "1:39:12", "remaining_time": "10:06:39"} +{"current_steps": 1221, "total_steps": 8680, "loss": 0.8127235174179077, "lr": 1.9554979103280597e-06, "epoch": 0.2813364055299539, "percentage": 14.07, "elapsed_time": "1:39:17", "remaining_time": "10:06:31"} +{"current_steps": 1222, "total_steps": 8680, "loss": 0.6790676712989807, "lr": 1.9553854515680684e-06, "epoch": 0.2815668202764977, "percentage": 14.08, "elapsed_time": "1:39:23", "remaining_time": "10:06:34"} +{"current_steps": 1223, "total_steps": 8680, "loss": 0.93434739112854, "lr": 1.955272854135143e-06, "epoch": 0.2817972350230415, "percentage": 14.09, "elapsed_time": "1:39:27", "remaining_time": "10:06:23"} +{"current_steps": 1224, "total_steps": 8680, "loss": 0.8624403476715088, "lr": 1.9551601180456274e-06, "epoch": 0.2820276497695853, "percentage": 14.1, "elapsed_time": "1:39:32", "remaining_time": "10:06:20"} +{"current_steps": 1225, "total_steps": 8680, "loss": 0.8871273994445801, "lr": 1.9550472433158856e-06, "epoch": 0.28225806451612906, "percentage": 14.11, "elapsed_time": "1:39:38", "remaining_time": "10:06:21"} +{"current_steps": 1226, "total_steps": 8680, "loss": 1.0226445198059082, "lr": 1.9549342299623007e-06, "epoch": 0.2824884792626728, "percentage": 14.12, "elapsed_time": "1:39:42", "remaining_time": "10:06:13"} +{"current_steps": 1227, "total_steps": 8680, "loss": 0.9232503771781921, "lr": 1.9548210780012764e-06, "epoch": 0.28271889400921657, "percentage": 14.14, "elapsed_time": "1:39:46", "remaining_time": "10:06:05"} +{"current_steps": 1228, "total_steps": 8680, "loss": 0.944965124130249, "lr": 1.9547077874492367e-06, "epoch": 0.28294930875576035, "percentage": 14.15, "elapsed_time": "1:39:51", "remaining_time": "10:05:58"} +{"current_steps": 1229, "total_steps": 8680, "loss": 0.9491870403289795, "lr": 1.9545943583226255e-06, "epoch": 0.28317972350230414, "percentage": 14.16, "elapsed_time": "1:39:55", "remaining_time": "10:05:48"} +{"current_steps": 1230, "total_steps": 8680, "loss": 0.8477638363838196, "lr": 1.9544807906379065e-06, "epoch": 0.2834101382488479, "percentage": 14.17, "elapsed_time": "1:40:01", "remaining_time": "10:05:53"} +{"current_steps": 1231, "total_steps": 8680, "loss": 0.9733752012252808, "lr": 1.9543670844115647e-06, "epoch": 0.2836405529953917, "percentage": 14.18, "elapsed_time": "1:40:05", "remaining_time": "10:05:40"} +{"current_steps": 1232, "total_steps": 8680, "loss": 0.8158911466598511, "lr": 1.954253239660104e-06, "epoch": 0.2838709677419355, "percentage": 14.19, "elapsed_time": "1:40:10", "remaining_time": "10:05:36"} +{"current_steps": 1233, "total_steps": 8680, "loss": 0.8814271092414856, "lr": 1.9541392564000487e-06, "epoch": 0.28410138248847927, "percentage": 14.21, "elapsed_time": "1:40:17", "remaining_time": "10:05:42"} +{"current_steps": 1234, "total_steps": 8680, "loss": 0.8366897106170654, "lr": 1.9540251346479435e-06, "epoch": 0.28433179723502305, "percentage": 14.22, "elapsed_time": "1:40:23", "remaining_time": "10:05:44"} +{"current_steps": 1235, "total_steps": 8680, "loss": 0.8043497800827026, "lr": 1.953910874420353e-06, "epoch": 0.28456221198156684, "percentage": 14.23, "elapsed_time": "1:40:29", "remaining_time": "10:05:45"} +{"current_steps": 1236, "total_steps": 8680, "loss": 0.904765248298645, "lr": 1.953796475733862e-06, "epoch": 0.2847926267281106, "percentage": 14.24, "elapsed_time": "1:40:32", "remaining_time": "10:05:33"} +{"current_steps": 1237, "total_steps": 8680, "loss": 0.9092245101928711, "lr": 1.953681938605075e-06, "epoch": 0.2850230414746544, "percentage": 14.25, "elapsed_time": "1:40:36", "remaining_time": "10:05:20"} +{"current_steps": 1238, "total_steps": 8680, "loss": 0.9119021892547607, "lr": 1.953567263050617e-06, "epoch": 0.28525345622119813, "percentage": 14.26, "elapsed_time": "1:40:41", "remaining_time": "10:05:16"} +{"current_steps": 1239, "total_steps": 8680, "loss": 0.8380709886550903, "lr": 1.9534524490871336e-06, "epoch": 0.2854838709677419, "percentage": 14.27, "elapsed_time": "1:40:47", "remaining_time": "10:05:17"} +{"current_steps": 1240, "total_steps": 8680, "loss": 0.9410983324050903, "lr": 1.9533374967312894e-06, "epoch": 0.2857142857142857, "percentage": 14.29, "elapsed_time": "1:40:52", "remaining_time": "10:05:17"} +{"current_steps": 1241, "total_steps": 8680, "loss": 0.882665753364563, "lr": 1.953222405999769e-06, "epoch": 0.2859447004608295, "percentage": 14.3, "elapsed_time": "1:40:58", "remaining_time": "10:05:14"} +{"current_steps": 1242, "total_steps": 8680, "loss": 0.9334039688110352, "lr": 1.953107176909279e-06, "epoch": 0.28617511520737327, "percentage": 14.31, "elapsed_time": "1:41:02", "remaining_time": "10:05:05"} +{"current_steps": 1243, "total_steps": 8680, "loss": 0.8743090033531189, "lr": 1.9529918094765433e-06, "epoch": 0.28640552995391705, "percentage": 14.32, "elapsed_time": "1:41:08", "remaining_time": "10:05:06"} +{"current_steps": 1244, "total_steps": 8680, "loss": 0.9017846584320068, "lr": 1.9528763037183086e-06, "epoch": 0.28663594470046083, "percentage": 14.33, "elapsed_time": "1:41:11", "remaining_time": "10:04:54"} +{"current_steps": 1245, "total_steps": 8680, "loss": 0.9412289261817932, "lr": 1.95276065965134e-06, "epoch": 0.2868663594470046, "percentage": 14.34, "elapsed_time": "1:41:16", "remaining_time": "10:04:50"} +{"current_steps": 1246, "total_steps": 8680, "loss": 0.9008835554122925, "lr": 1.9526448772924222e-06, "epoch": 0.2870967741935484, "percentage": 14.35, "elapsed_time": "1:41:21", "remaining_time": "10:04:43"} +{"current_steps": 1247, "total_steps": 8680, "loss": 0.803752064704895, "lr": 1.9525289566583622e-06, "epoch": 0.2873271889400922, "percentage": 14.37, "elapsed_time": "1:41:27", "remaining_time": "10:04:44"} +{"current_steps": 1248, "total_steps": 8680, "loss": 0.8354049921035767, "lr": 1.952412897765985e-06, "epoch": 0.28755760368663597, "percentage": 14.38, "elapsed_time": "1:41:32", "remaining_time": "10:04:44"} +{"current_steps": 1249, "total_steps": 8680, "loss": 1.047461748123169, "lr": 1.9522967006321363e-06, "epoch": 0.28778801843317975, "percentage": 14.39, "elapsed_time": "1:41:37", "remaining_time": "10:04:38"} +{"current_steps": 1250, "total_steps": 8680, "loss": 0.9036056399345398, "lr": 1.9521803652736826e-06, "epoch": 0.2880184331797235, "percentage": 14.4, "elapsed_time": "1:41:41", "remaining_time": "10:04:30"} +{"current_steps": 1251, "total_steps": 8680, "loss": 0.9534894227981567, "lr": 1.952063891707509e-06, "epoch": 0.28824884792626726, "percentage": 14.41, "elapsed_time": "1:41:46", "remaining_time": "10:04:23"} +{"current_steps": 1252, "total_steps": 8680, "loss": 0.9200841188430786, "lr": 1.9519472799505217e-06, "epoch": 0.28847926267281104, "percentage": 14.42, "elapsed_time": "1:41:51", "remaining_time": "10:04:16"} +{"current_steps": 1253, "total_steps": 8680, "loss": 0.8917449712753296, "lr": 1.9518305300196475e-06, "epoch": 0.2887096774193548, "percentage": 14.44, "elapsed_time": "1:41:55", "remaining_time": "10:04:11"} +{"current_steps": 1254, "total_steps": 8680, "loss": 0.92131507396698, "lr": 1.9517136419318317e-06, "epoch": 0.2889400921658986, "percentage": 14.45, "elapsed_time": "1:42:01", "remaining_time": "10:04:13"} +{"current_steps": 1255, "total_steps": 8680, "loss": 0.8862432241439819, "lr": 1.951596615704041e-06, "epoch": 0.2891705069124424, "percentage": 14.46, "elapsed_time": "1:42:06", "remaining_time": "10:04:07"} +{"current_steps": 1256, "total_steps": 8680, "loss": 0.7789605855941772, "lr": 1.951479451353261e-06, "epoch": 0.2894009216589862, "percentage": 14.47, "elapsed_time": "1:42:12", "remaining_time": "10:04:05"} +{"current_steps": 1257, "total_steps": 8680, "loss": 0.8187062740325928, "lr": 1.951362148896498e-06, "epoch": 0.28963133640552996, "percentage": 14.48, "elapsed_time": "1:42:16", "remaining_time": "10:03:59"} +{"current_steps": 1258, "total_steps": 8680, "loss": 1.0575072765350342, "lr": 1.9512447083507784e-06, "epoch": 0.28986175115207374, "percentage": 14.49, "elapsed_time": "1:42:20", "remaining_time": "10:03:50"} +{"current_steps": 1259, "total_steps": 8680, "loss": 0.8027279376983643, "lr": 1.9511271297331493e-06, "epoch": 0.2900921658986175, "percentage": 14.5, "elapsed_time": "1:42:28", "remaining_time": "10:03:58"} +{"current_steps": 1260, "total_steps": 8680, "loss": 0.6641743183135986, "lr": 1.951009413060676e-06, "epoch": 0.2903225806451613, "percentage": 14.52, "elapsed_time": "1:42:33", "remaining_time": "10:03:56"} +{"current_steps": 1261, "total_steps": 8680, "loss": 0.7937613725662231, "lr": 1.950891558350446e-06, "epoch": 0.2905529953917051, "percentage": 14.53, "elapsed_time": "1:42:38", "remaining_time": "10:03:53"} +{"current_steps": 1262, "total_steps": 8680, "loss": 0.9600511193275452, "lr": 1.950773565619564e-06, "epoch": 0.2907834101382488, "percentage": 14.54, "elapsed_time": "1:42:42", "remaining_time": "10:03:45"} +{"current_steps": 1263, "total_steps": 8680, "loss": 0.8275980353355408, "lr": 1.9506554348851585e-06, "epoch": 0.2910138248847926, "percentage": 14.55, "elapsed_time": "1:42:46", "remaining_time": "10:03:35"} +{"current_steps": 1264, "total_steps": 8680, "loss": 0.9008789658546448, "lr": 1.950537166164375e-06, "epoch": 0.2912442396313364, "percentage": 14.56, "elapsed_time": "1:42:51", "remaining_time": "10:03:26"} +{"current_steps": 1265, "total_steps": 8680, "loss": 0.8701465129852295, "lr": 1.95041875947438e-06, "epoch": 0.29147465437788017, "percentage": 14.57, "elapsed_time": "1:42:54", "remaining_time": "10:03:13"} +{"current_steps": 1266, "total_steps": 8680, "loss": 0.9313883781433105, "lr": 1.95030021483236e-06, "epoch": 0.29170506912442395, "percentage": 14.59, "elapsed_time": "1:42:59", "remaining_time": "10:03:06"} +{"current_steps": 1267, "total_steps": 8680, "loss": 0.883125901222229, "lr": 1.9501815322555222e-06, "epoch": 0.29193548387096774, "percentage": 14.6, "elapsed_time": "1:43:05", "remaining_time": "10:03:08"} +{"current_steps": 1268, "total_steps": 8680, "loss": 0.8856269121170044, "lr": 1.9500627117610927e-06, "epoch": 0.2921658986175115, "percentage": 14.61, "elapsed_time": "1:43:09", "remaining_time": "10:02:58"} +{"current_steps": 1269, "total_steps": 8680, "loss": 0.8817840218544006, "lr": 1.9499437533663184e-06, "epoch": 0.2923963133640553, "percentage": 14.62, "elapsed_time": "1:43:14", "remaining_time": "10:02:55"} +{"current_steps": 1270, "total_steps": 8680, "loss": 0.9911330342292786, "lr": 1.949824657088466e-06, "epoch": 0.2926267281105991, "percentage": 14.63, "elapsed_time": "1:43:18", "remaining_time": "10:02:45"} +{"current_steps": 1271, "total_steps": 8680, "loss": 0.8902890682220459, "lr": 1.949705422944822e-06, "epoch": 0.29285714285714287, "percentage": 14.64, "elapsed_time": "1:43:23", "remaining_time": "10:02:39"} +{"current_steps": 1272, "total_steps": 8680, "loss": 0.6846401691436768, "lr": 1.949586050952693e-06, "epoch": 0.29308755760368665, "percentage": 14.65, "elapsed_time": "1:43:27", "remaining_time": "10:02:34"} +{"current_steps": 1273, "total_steps": 8680, "loss": 0.9186165928840637, "lr": 1.9494665411294057e-06, "epoch": 0.29331797235023044, "percentage": 14.67, "elapsed_time": "1:43:32", "remaining_time": "10:02:30"} +{"current_steps": 1274, "total_steps": 8680, "loss": 0.8614095449447632, "lr": 1.949346893492307e-06, "epoch": 0.29354838709677417, "percentage": 14.68, "elapsed_time": "1:43:37", "remaining_time": "10:02:26"} +{"current_steps": 1275, "total_steps": 8680, "loss": 0.7824405431747437, "lr": 1.9492271080587637e-06, "epoch": 0.29377880184331795, "percentage": 14.69, "elapsed_time": "1:43:42", "remaining_time": "10:02:19"} +{"current_steps": 1276, "total_steps": 8680, "loss": 0.8694697618484497, "lr": 1.949107184846162e-06, "epoch": 0.29400921658986173, "percentage": 14.7, "elapsed_time": "1:43:46", "remaining_time": "10:02:11"} +{"current_steps": 1277, "total_steps": 8680, "loss": 0.8839597105979919, "lr": 1.948987123871909e-06, "epoch": 0.2942396313364055, "percentage": 14.71, "elapsed_time": "1:43:52", "remaining_time": "10:02:10"} +{"current_steps": 1278, "total_steps": 8680, "loss": 0.832268238067627, "lr": 1.948866925153431e-06, "epoch": 0.2944700460829493, "percentage": 14.72, "elapsed_time": "1:43:58", "remaining_time": "10:02:10"} +{"current_steps": 1279, "total_steps": 8680, "loss": 0.8243123888969421, "lr": 1.948746588708175e-06, "epoch": 0.2947004608294931, "percentage": 14.74, "elapsed_time": "1:44:03", "remaining_time": "10:02:08"} +{"current_steps": 1280, "total_steps": 8680, "loss": 0.99314284324646, "lr": 1.948626114553608e-06, "epoch": 0.29493087557603687, "percentage": 14.75, "elapsed_time": "1:44:08", "remaining_time": "10:02:06"} +{"current_steps": 1281, "total_steps": 8680, "loss": 0.8853542804718018, "lr": 1.948505502707216e-06, "epoch": 0.29516129032258065, "percentage": 14.76, "elapsed_time": "1:44:13", "remaining_time": "10:02:02"} +{"current_steps": 1282, "total_steps": 8680, "loss": 0.9623305797576904, "lr": 1.948384753186506e-06, "epoch": 0.29539170506912443, "percentage": 14.77, "elapsed_time": "1:44:19", "remaining_time": "10:02:01"} +{"current_steps": 1283, "total_steps": 8680, "loss": 0.8321142792701721, "lr": 1.948263866009005e-06, "epoch": 0.2956221198156682, "percentage": 14.78, "elapsed_time": "1:44:23", "remaining_time": "10:01:52"} +{"current_steps": 1284, "total_steps": 8680, "loss": 0.8911606669425964, "lr": 1.948142841192258e-06, "epoch": 0.295852534562212, "percentage": 14.79, "elapsed_time": "1:44:28", "remaining_time": "10:01:49"} +{"current_steps": 1285, "total_steps": 8680, "loss": 0.9501996040344238, "lr": 1.948021678753834e-06, "epoch": 0.2960829493087558, "percentage": 14.8, "elapsed_time": "1:44:33", "remaining_time": "10:01:41"} +{"current_steps": 1286, "total_steps": 8680, "loss": 0.8555784225463867, "lr": 1.947900378711318e-06, "epoch": 0.29631336405529957, "percentage": 14.82, "elapsed_time": "1:44:37", "remaining_time": "10:01:35"} +{"current_steps": 1287, "total_steps": 8680, "loss": 0.7703878283500671, "lr": 1.9477789410823163e-06, "epoch": 0.2965437788018433, "percentage": 14.83, "elapsed_time": "1:44:43", "remaining_time": "10:01:35"} +{"current_steps": 1288, "total_steps": 8680, "loss": 1.0072009563446045, "lr": 1.947657365884457e-06, "epoch": 0.2967741935483871, "percentage": 14.84, "elapsed_time": "1:44:48", "remaining_time": "10:01:29"} +{"current_steps": 1289, "total_steps": 8680, "loss": 0.7633493542671204, "lr": 1.9475356531353847e-06, "epoch": 0.29700460829493086, "percentage": 14.85, "elapsed_time": "1:44:53", "remaining_time": "10:01:27"} +{"current_steps": 1290, "total_steps": 8680, "loss": 0.88579261302948, "lr": 1.9474138028527674e-06, "epoch": 0.29723502304147464, "percentage": 14.86, "elapsed_time": "1:44:59", "remaining_time": "10:01:25"} +{"current_steps": 1291, "total_steps": 8680, "loss": 0.8356794118881226, "lr": 1.94729181505429e-06, "epoch": 0.2974654377880184, "percentage": 14.87, "elapsed_time": "1:45:03", "remaining_time": "10:01:15"} +{"current_steps": 1292, "total_steps": 8680, "loss": 0.8330395817756653, "lr": 1.94716968975766e-06, "epoch": 0.2976958525345622, "percentage": 14.88, "elapsed_time": "1:45:07", "remaining_time": "10:01:10"} +{"current_steps": 1293, "total_steps": 8680, "loss": 0.9219698905944824, "lr": 1.947047426980604e-06, "epoch": 0.297926267281106, "percentage": 14.9, "elapsed_time": "1:45:11", "remaining_time": "10:00:58"} +{"current_steps": 1294, "total_steps": 8680, "loss": 0.880803644657135, "lr": 1.9469250267408674e-06, "epoch": 0.2981566820276498, "percentage": 14.91, "elapsed_time": "1:45:15", "remaining_time": "10:00:50"} +{"current_steps": 1295, "total_steps": 8680, "loss": 0.8212012052536011, "lr": 1.9468024890562165e-06, "epoch": 0.29838709677419356, "percentage": 14.92, "elapsed_time": "1:45:21", "remaining_time": "10:00:48"} +{"current_steps": 1296, "total_steps": 8680, "loss": 1.0118587017059326, "lr": 1.946679813944438e-06, "epoch": 0.29861751152073734, "percentage": 14.93, "elapsed_time": "1:45:25", "remaining_time": "10:00:37"} +{"current_steps": 1297, "total_steps": 8680, "loss": 0.8708915710449219, "lr": 1.9465570014233377e-06, "epoch": 0.2988479262672811, "percentage": 14.94, "elapsed_time": "1:45:30", "remaining_time": "10:00:32"} +{"current_steps": 1298, "total_steps": 8680, "loss": 0.9386067986488342, "lr": 1.9464340515107415e-06, "epoch": 0.2990783410138249, "percentage": 14.95, "elapsed_time": "1:45:34", "remaining_time": "10:00:27"} +{"current_steps": 1299, "total_steps": 8680, "loss": 0.8672319650650024, "lr": 1.9463109642244958e-06, "epoch": 0.29930875576036864, "percentage": 14.97, "elapsed_time": "1:45:39", "remaining_time": "10:00:23"} +{"current_steps": 1300, "total_steps": 8680, "loss": 0.9002958536148071, "lr": 1.9461877395824662e-06, "epoch": 0.2995391705069124, "percentage": 14.98, "elapsed_time": "1:45:44", "remaining_time": "10:00:17"} +{"current_steps": 1301, "total_steps": 8680, "loss": 0.9206029772758484, "lr": 1.946064377602539e-06, "epoch": 0.2997695852534562, "percentage": 14.99, "elapsed_time": "1:45:53", "remaining_time": "10:00:34"} +{"current_steps": 1302, "total_steps": 8680, "loss": 1.0063598155975342, "lr": 1.94594087830262e-06, "epoch": 0.3, "percentage": 15.0, "elapsed_time": "1:45:58", "remaining_time": "10:00:29"} +{"current_steps": 1303, "total_steps": 8680, "loss": 0.7616912126541138, "lr": 1.9458172417006346e-06, "epoch": 0.3002304147465438, "percentage": 15.01, "elapsed_time": "1:46:03", "remaining_time": "10:00:30"} +{"current_steps": 1304, "total_steps": 8680, "loss": 0.8385730385780334, "lr": 1.945693467814529e-06, "epoch": 0.30046082949308756, "percentage": 15.02, "elapsed_time": "1:46:09", "remaining_time": "10:00:30"} +{"current_steps": 1305, "total_steps": 8680, "loss": 0.7032216787338257, "lr": 1.9455695566622677e-06, "epoch": 0.30069124423963134, "percentage": 15.03, "elapsed_time": "1:46:16", "remaining_time": "10:00:36"} +{"current_steps": 1306, "total_steps": 8680, "loss": 0.7647181749343872, "lr": 1.9454455082618373e-06, "epoch": 0.3009216589861751, "percentage": 15.05, "elapsed_time": "1:46:20", "remaining_time": "10:00:28"} +{"current_steps": 1307, "total_steps": 8680, "loss": 0.9918918013572693, "lr": 1.945321322631243e-06, "epoch": 0.3011520737327189, "percentage": 15.06, "elapsed_time": "1:46:24", "remaining_time": "10:00:13"} +{"current_steps": 1308, "total_steps": 8680, "loss": 0.838451623916626, "lr": 1.945196999788511e-06, "epoch": 0.3013824884792627, "percentage": 15.07, "elapsed_time": "1:46:29", "remaining_time": "10:00:09"} +{"current_steps": 1309, "total_steps": 8680, "loss": 0.9739303588867188, "lr": 1.945072539751685e-06, "epoch": 0.3016129032258065, "percentage": 15.08, "elapsed_time": "1:46:34", "remaining_time": "10:00:06"} +{"current_steps": 1310, "total_steps": 8680, "loss": 0.8233742713928223, "lr": 1.9449479425388305e-06, "epoch": 0.30184331797235026, "percentage": 15.09, "elapsed_time": "1:46:38", "remaining_time": "10:00:00"} +{"current_steps": 1311, "total_steps": 8680, "loss": 0.9765088558197021, "lr": 1.944823208168034e-06, "epoch": 0.302073732718894, "percentage": 15.1, "elapsed_time": "1:46:43", "remaining_time": "9:59:54"} +{"current_steps": 1312, "total_steps": 8680, "loss": 0.7614048719406128, "lr": 1.944698336657399e-06, "epoch": 0.30230414746543777, "percentage": 15.12, "elapsed_time": "1:46:48", "remaining_time": "9:59:51"} +{"current_steps": 1313, "total_steps": 8680, "loss": 0.760692834854126, "lr": 1.9445733280250512e-06, "epoch": 0.30253456221198155, "percentage": 15.13, "elapsed_time": "1:46:53", "remaining_time": "9:59:44"} +{"current_steps": 1314, "total_steps": 8680, "loss": 0.8484706878662109, "lr": 1.944448182289135e-06, "epoch": 0.30276497695852533, "percentage": 15.14, "elapsed_time": "1:46:57", "remaining_time": "9:59:36"} +{"current_steps": 1315, "total_steps": 8680, "loss": 0.8857289552688599, "lr": 1.944322899467816e-06, "epoch": 0.3029953917050691, "percentage": 15.15, "elapsed_time": "1:47:02", "remaining_time": "9:59:29"} +{"current_steps": 1316, "total_steps": 8680, "loss": 0.8375179171562195, "lr": 1.944197479579278e-06, "epoch": 0.3032258064516129, "percentage": 15.16, "elapsed_time": "1:47:06", "remaining_time": "9:59:20"} +{"current_steps": 1317, "total_steps": 8680, "loss": 0.8141925930976868, "lr": 1.9440719226417263e-06, "epoch": 0.3034562211981567, "percentage": 15.17, "elapsed_time": "1:47:12", "remaining_time": "9:59:20"} +{"current_steps": 1318, "total_steps": 8680, "loss": 0.9970111846923828, "lr": 1.943946228673384e-06, "epoch": 0.30368663594470047, "percentage": 15.18, "elapsed_time": "1:47:16", "remaining_time": "9:59:14"} +{"current_steps": 1319, "total_steps": 8680, "loss": 0.9542866349220276, "lr": 1.9438203976924966e-06, "epoch": 0.30391705069124425, "percentage": 15.2, "elapsed_time": "1:47:20", "remaining_time": "9:59:03"} +{"current_steps": 1320, "total_steps": 8680, "loss": 0.8808399438858032, "lr": 1.943694429717328e-06, "epoch": 0.30414746543778803, "percentage": 15.21, "elapsed_time": "1:47:26", "remaining_time": "9:59:02"} +{"current_steps": 1321, "total_steps": 8680, "loss": 0.8541150093078613, "lr": 1.9435683247661623e-06, "epoch": 0.3043778801843318, "percentage": 15.22, "elapsed_time": "1:47:30", "remaining_time": "9:58:52"} +{"current_steps": 1322, "total_steps": 8680, "loss": 0.8887044191360474, "lr": 1.943442082857303e-06, "epoch": 0.3046082949308756, "percentage": 15.23, "elapsed_time": "1:47:33", "remaining_time": "9:58:40"} +{"current_steps": 1323, "total_steps": 8680, "loss": 0.8699131011962891, "lr": 1.9433157040090746e-06, "epoch": 0.30483870967741933, "percentage": 15.24, "elapsed_time": "1:47:39", "remaining_time": "9:58:41"} +{"current_steps": 1324, "total_steps": 8680, "loss": 0.7096077799797058, "lr": 1.9431891882398205e-06, "epoch": 0.3050691244239631, "percentage": 15.25, "elapsed_time": "1:47:44", "remaining_time": "9:58:33"} +{"current_steps": 1325, "total_steps": 8680, "loss": 0.8040453195571899, "lr": 1.9430625355679045e-06, "epoch": 0.3052995391705069, "percentage": 15.26, "elapsed_time": "1:47:49", "remaining_time": "9:58:29"} +{"current_steps": 1326, "total_steps": 8680, "loss": 0.8275970220565796, "lr": 1.9429357460117093e-06, "epoch": 0.3055299539170507, "percentage": 15.28, "elapsed_time": "1:47:52", "remaining_time": "9:58:18"} +{"current_steps": 1327, "total_steps": 8680, "loss": 0.9724141359329224, "lr": 1.9428088195896393e-06, "epoch": 0.30576036866359446, "percentage": 15.29, "elapsed_time": "1:47:57", "remaining_time": "9:58:13"} +{"current_steps": 1328, "total_steps": 8680, "loss": 0.9293274879455566, "lr": 1.9426817563201174e-06, "epoch": 0.30599078341013825, "percentage": 15.3, "elapsed_time": "1:48:02", "remaining_time": "9:58:08"} +{"current_steps": 1329, "total_steps": 8680, "loss": 0.9454036951065063, "lr": 1.9425545562215865e-06, "epoch": 0.30622119815668203, "percentage": 15.31, "elapsed_time": "1:48:07", "remaining_time": "9:58:04"} +{"current_steps": 1330, "total_steps": 8680, "loss": 0.7751365900039673, "lr": 1.9424272193125094e-06, "epoch": 0.3064516129032258, "percentage": 15.32, "elapsed_time": "1:48:11", "remaining_time": "9:57:56"} +{"current_steps": 1331, "total_steps": 8680, "loss": 0.8444688320159912, "lr": 1.942299745611369e-06, "epoch": 0.3066820276497696, "percentage": 15.33, "elapsed_time": "1:48:16", "remaining_time": "9:57:48"} +{"current_steps": 1332, "total_steps": 8680, "loss": 0.7751414775848389, "lr": 1.9421721351366684e-06, "epoch": 0.3069124423963134, "percentage": 15.35, "elapsed_time": "1:48:20", "remaining_time": "9:57:41"} +{"current_steps": 1333, "total_steps": 8680, "loss": 0.912209153175354, "lr": 1.9420443879069287e-06, "epoch": 0.30714285714285716, "percentage": 15.36, "elapsed_time": "1:48:24", "remaining_time": "9:57:32"} +{"current_steps": 1334, "total_steps": 8680, "loss": 0.8897542357444763, "lr": 1.941916503940694e-06, "epoch": 0.30737327188940095, "percentage": 15.37, "elapsed_time": "1:48:29", "remaining_time": "9:57:28"} +{"current_steps": 1335, "total_steps": 8680, "loss": 0.8562046885490417, "lr": 1.9417884832565257e-06, "epoch": 0.3076036866359447, "percentage": 15.38, "elapsed_time": "1:48:34", "remaining_time": "9:57:20"} +{"current_steps": 1336, "total_steps": 8680, "loss": 0.8886401653289795, "lr": 1.9416603258730055e-06, "epoch": 0.30783410138248846, "percentage": 15.39, "elapsed_time": "1:48:40", "remaining_time": "9:57:22"} +{"current_steps": 1337, "total_steps": 8680, "loss": 0.7401903867721558, "lr": 1.9415320318087354e-06, "epoch": 0.30806451612903224, "percentage": 15.4, "elapsed_time": "1:48:46", "remaining_time": "9:57:25"} +{"current_steps": 1338, "total_steps": 8680, "loss": 0.761360764503479, "lr": 1.941403601082338e-06, "epoch": 0.308294930875576, "percentage": 15.41, "elapsed_time": "1:48:51", "remaining_time": "9:57:20"} +{"current_steps": 1339, "total_steps": 8680, "loss": 0.9223028421401978, "lr": 1.9412750337124537e-06, "epoch": 0.3085253456221198, "percentage": 15.43, "elapsed_time": "1:48:56", "remaining_time": "9:57:15"} +{"current_steps": 1340, "total_steps": 8680, "loss": 0.9287113547325134, "lr": 1.9411463297177446e-06, "epoch": 0.3087557603686636, "percentage": 15.44, "elapsed_time": "1:49:01", "remaining_time": "9:57:12"} +{"current_steps": 1341, "total_steps": 8680, "loss": 0.8548502922058105, "lr": 1.941017489116891e-06, "epoch": 0.3089861751152074, "percentage": 15.45, "elapsed_time": "1:49:06", "remaining_time": "9:57:07"} +{"current_steps": 1342, "total_steps": 8680, "loss": 0.8885709643363953, "lr": 1.9408885119285953e-06, "epoch": 0.30921658986175116, "percentage": 15.46, "elapsed_time": "1:49:12", "remaining_time": "9:57:06"} +{"current_steps": 1343, "total_steps": 8680, "loss": 0.8970856666564941, "lr": 1.940759398171577e-06, "epoch": 0.30944700460829494, "percentage": 15.47, "elapsed_time": "1:49:17", "remaining_time": "9:57:03"} +{"current_steps": 1344, "total_steps": 8680, "loss": 0.847138524055481, "lr": 1.9406301478645783e-06, "epoch": 0.3096774193548387, "percentage": 15.48, "elapsed_time": "1:49:23", "remaining_time": "9:57:06"} +{"current_steps": 1345, "total_steps": 8680, "loss": 0.7892216444015503, "lr": 1.9405007610263584e-06, "epoch": 0.3099078341013825, "percentage": 15.5, "elapsed_time": "1:49:28", "remaining_time": "9:56:59"} +{"current_steps": 1346, "total_steps": 8680, "loss": 0.8869141340255737, "lr": 1.940371237675698e-06, "epoch": 0.3101382488479263, "percentage": 15.51, "elapsed_time": "1:49:31", "remaining_time": "9:56:46"} +{"current_steps": 1347, "total_steps": 8680, "loss": 0.8405635356903076, "lr": 1.9402415778313976e-06, "epoch": 0.3103686635944701, "percentage": 15.52, "elapsed_time": "1:49:35", "remaining_time": "9:56:34"} +{"current_steps": 1348, "total_steps": 8680, "loss": 0.914352536201477, "lr": 1.9401117815122768e-06, "epoch": 0.3105990783410138, "percentage": 15.53, "elapsed_time": "1:49:38", "remaining_time": "9:56:23"} +{"current_steps": 1349, "total_steps": 8680, "loss": 0.9059416055679321, "lr": 1.9399818487371756e-06, "epoch": 0.3108294930875576, "percentage": 15.54, "elapsed_time": "1:49:44", "remaining_time": "9:56:23"} +{"current_steps": 1350, "total_steps": 8680, "loss": 1.0513644218444824, "lr": 1.939851779524953e-06, "epoch": 0.31105990783410137, "percentage": 15.55, "elapsed_time": "1:49:48", "remaining_time": "9:56:15"} +{"current_steps": 1351, "total_steps": 8680, "loss": 0.8673127889633179, "lr": 1.9397215738944893e-06, "epoch": 0.31129032258064515, "percentage": 15.56, "elapsed_time": "1:49:54", "remaining_time": "9:56:13"} +{"current_steps": 1352, "total_steps": 8680, "loss": 0.7907043695449829, "lr": 1.9395912318646827e-06, "epoch": 0.31152073732718893, "percentage": 15.58, "elapsed_time": "1:49:58", "remaining_time": "9:56:05"} +{"current_steps": 1353, "total_steps": 8680, "loss": 0.8127990961074829, "lr": 1.9394607534544527e-06, "epoch": 0.3117511520737327, "percentage": 15.59, "elapsed_time": "1:50:04", "remaining_time": "9:56:03"} +{"current_steps": 1354, "total_steps": 8680, "loss": 0.877837061882019, "lr": 1.9393301386827387e-06, "epoch": 0.3119815668202765, "percentage": 15.6, "elapsed_time": "1:50:08", "remaining_time": "9:55:53"} +{"current_steps": 1355, "total_steps": 8680, "loss": 0.8518592715263367, "lr": 1.939199387568498e-06, "epoch": 0.3122119815668203, "percentage": 15.61, "elapsed_time": "1:50:12", "remaining_time": "9:55:47"} +{"current_steps": 1356, "total_steps": 8680, "loss": 0.7151869535446167, "lr": 1.9390685001307093e-06, "epoch": 0.31244239631336407, "percentage": 15.62, "elapsed_time": "1:50:17", "remaining_time": "9:55:41"} +{"current_steps": 1357, "total_steps": 8680, "loss": 0.8325271606445312, "lr": 1.9389374763883716e-06, "epoch": 0.31267281105990785, "percentage": 15.63, "elapsed_time": "1:50:22", "remaining_time": "9:55:36"} +{"current_steps": 1358, "total_steps": 8680, "loss": 0.6875327825546265, "lr": 1.938806316360502e-06, "epoch": 0.31290322580645163, "percentage": 15.65, "elapsed_time": "1:50:28", "remaining_time": "9:55:37"} +{"current_steps": 1359, "total_steps": 8680, "loss": 0.8944000005722046, "lr": 1.9386750200661382e-06, "epoch": 0.3131336405529954, "percentage": 15.66, "elapsed_time": "1:50:32", "remaining_time": "9:55:27"} +{"current_steps": 1360, "total_steps": 8680, "loss": 0.8544747829437256, "lr": 1.938543587524338e-06, "epoch": 0.31336405529953915, "percentage": 15.67, "elapsed_time": "1:50:36", "remaining_time": "9:55:22"} +{"current_steps": 1361, "total_steps": 8680, "loss": 0.9216448068618774, "lr": 1.9384120187541788e-06, "epoch": 0.31359447004608293, "percentage": 15.68, "elapsed_time": "1:50:41", "remaining_time": "9:55:14"} +{"current_steps": 1362, "total_steps": 8680, "loss": 0.9239494800567627, "lr": 1.938280313774757e-06, "epoch": 0.3138248847926267, "percentage": 15.69, "elapsed_time": "1:50:47", "remaining_time": "9:55:17"} +{"current_steps": 1363, "total_steps": 8680, "loss": 0.8903297185897827, "lr": 1.9381484726051896e-06, "epoch": 0.3140552995391705, "percentage": 15.7, "elapsed_time": "1:50:51", "remaining_time": "9:55:09"} +{"current_steps": 1364, "total_steps": 8680, "loss": 0.7019625306129456, "lr": 1.9380164952646137e-06, "epoch": 0.3142857142857143, "percentage": 15.71, "elapsed_time": "1:50:57", "remaining_time": "9:55:09"} +{"current_steps": 1365, "total_steps": 8680, "loss": 0.8668909072875977, "lr": 1.9378843817721854e-06, "epoch": 0.31451612903225806, "percentage": 15.73, "elapsed_time": "1:51:02", "remaining_time": "9:55:04"} +{"current_steps": 1366, "total_steps": 8680, "loss": 0.8124282360076904, "lr": 1.9377521321470804e-06, "epoch": 0.31474654377880185, "percentage": 15.74, "elapsed_time": "1:51:07", "remaining_time": "9:55:01"} +{"current_steps": 1367, "total_steps": 8680, "loss": 0.7543507814407349, "lr": 1.937619746408495e-06, "epoch": 0.31497695852534563, "percentage": 15.75, "elapsed_time": "1:51:12", "remaining_time": "9:54:58"} +{"current_steps": 1368, "total_steps": 8680, "loss": 0.8355506062507629, "lr": 1.9374872245756448e-06, "epoch": 0.3152073732718894, "percentage": 15.76, "elapsed_time": "1:51:18", "remaining_time": "9:54:54"} +{"current_steps": 1369, "total_steps": 8680, "loss": 0.9642060399055481, "lr": 1.937354566667765e-06, "epoch": 0.3154377880184332, "percentage": 15.77, "elapsed_time": "1:51:23", "remaining_time": "9:54:51"} +{"current_steps": 1370, "total_steps": 8680, "loss": 0.9044197201728821, "lr": 1.93722177270411e-06, "epoch": 0.315668202764977, "percentage": 15.78, "elapsed_time": "1:51:27", "remaining_time": "9:54:44"} +{"current_steps": 1371, "total_steps": 8680, "loss": 0.8077869415283203, "lr": 1.937088842703956e-06, "epoch": 0.31589861751152076, "percentage": 15.79, "elapsed_time": "1:51:34", "remaining_time": "9:54:48"} +{"current_steps": 1372, "total_steps": 8680, "loss": 0.8319793939590454, "lr": 1.9369557766865968e-06, "epoch": 0.3161290322580645, "percentage": 15.81, "elapsed_time": "1:51:39", "remaining_time": "9:54:43"} +{"current_steps": 1373, "total_steps": 8680, "loss": 0.8233131170272827, "lr": 1.9368225746713475e-06, "epoch": 0.3163594470046083, "percentage": 15.82, "elapsed_time": "1:51:43", "remaining_time": "9:54:37"} +{"current_steps": 1374, "total_steps": 8680, "loss": 0.7898514270782471, "lr": 1.936689236677541e-06, "epoch": 0.31658986175115206, "percentage": 15.83, "elapsed_time": "1:51:49", "remaining_time": "9:54:38"} +{"current_steps": 1375, "total_steps": 8680, "loss": 0.9243249893188477, "lr": 1.9365557627245326e-06, "epoch": 0.31682027649769584, "percentage": 15.84, "elapsed_time": "1:51:54", "remaining_time": "9:54:32"} +{"current_steps": 1376, "total_steps": 8680, "loss": 0.8153131008148193, "lr": 1.9364221528316946e-06, "epoch": 0.3170506912442396, "percentage": 15.85, "elapsed_time": "1:51:59", "remaining_time": "9:54:26"} +{"current_steps": 1377, "total_steps": 8680, "loss": 0.9203826189041138, "lr": 1.936288407018421e-06, "epoch": 0.3172811059907834, "percentage": 15.86, "elapsed_time": "1:52:04", "remaining_time": "9:54:24"} +{"current_steps": 1378, "total_steps": 8680, "loss": 0.902605414390564, "lr": 1.936154525304124e-06, "epoch": 0.3175115207373272, "percentage": 15.88, "elapsed_time": "1:52:09", "remaining_time": "9:54:18"} +{"current_steps": 1379, "total_steps": 8680, "loss": 0.9504558444023132, "lr": 1.936020507708238e-06, "epoch": 0.317741935483871, "percentage": 15.89, "elapsed_time": "1:52:14", "remaining_time": "9:54:13"} +{"current_steps": 1380, "total_steps": 8680, "loss": 0.8068373203277588, "lr": 1.9358863542502133e-06, "epoch": 0.31797235023041476, "percentage": 15.9, "elapsed_time": "1:52:18", "remaining_time": "9:54:03"} +{"current_steps": 1381, "total_steps": 8680, "loss": 1.00711989402771, "lr": 1.935752064949524e-06, "epoch": 0.31820276497695854, "percentage": 15.91, "elapsed_time": "1:52:21", "remaining_time": "9:53:53"} +{"current_steps": 1382, "total_steps": 8680, "loss": 0.8271746039390564, "lr": 1.935617639825661e-06, "epoch": 0.3184331797235023, "percentage": 15.92, "elapsed_time": "1:52:27", "remaining_time": "9:53:50"} +{"current_steps": 1383, "total_steps": 8680, "loss": 0.8478754758834839, "lr": 1.9354830788981363e-06, "epoch": 0.3186635944700461, "percentage": 15.93, "elapsed_time": "1:52:31", "remaining_time": "9:53:43"} +{"current_steps": 1384, "total_steps": 8680, "loss": 0.9240723252296448, "lr": 1.935348382186481e-06, "epoch": 0.31889400921658984, "percentage": 15.94, "elapsed_time": "1:52:35", "remaining_time": "9:53:33"} +{"current_steps": 1385, "total_steps": 8680, "loss": 0.9275645613670349, "lr": 1.935213549710246e-06, "epoch": 0.3191244239631336, "percentage": 15.96, "elapsed_time": "1:52:39", "remaining_time": "9:53:21"} +{"current_steps": 1386, "total_steps": 8680, "loss": 0.9838275909423828, "lr": 1.9350785814890027e-06, "epoch": 0.3193548387096774, "percentage": 15.97, "elapsed_time": "1:52:43", "remaining_time": "9:53:13"} +{"current_steps": 1387, "total_steps": 8680, "loss": 0.9259177446365356, "lr": 1.934943477542341e-06, "epoch": 0.3195852534562212, "percentage": 15.98, "elapsed_time": "1:52:48", "remaining_time": "9:53:07"} +{"current_steps": 1388, "total_steps": 8680, "loss": 0.9252835512161255, "lr": 1.9348082378898714e-06, "epoch": 0.31981566820276497, "percentage": 15.99, "elapsed_time": "1:52:53", "remaining_time": "9:53:06"} +{"current_steps": 1389, "total_steps": 8680, "loss": 0.8929460048675537, "lr": 1.9346728625512235e-06, "epoch": 0.32004608294930875, "percentage": 16.0, "elapsed_time": "1:52:58", "remaining_time": "9:53:02"} +{"current_steps": 1390, "total_steps": 8680, "loss": 0.8909564018249512, "lr": 1.934537351546047e-06, "epoch": 0.32027649769585254, "percentage": 16.01, "elapsed_time": "1:53:03", "remaining_time": "9:52:55"} +{"current_steps": 1391, "total_steps": 8680, "loss": 0.8745983839035034, "lr": 1.934401704894011e-06, "epoch": 0.3205069124423963, "percentage": 16.03, "elapsed_time": "1:53:07", "remaining_time": "9:52:44"} +{"current_steps": 1392, "total_steps": 8680, "loss": 0.8622266054153442, "lr": 1.934265922614805e-06, "epoch": 0.3207373271889401, "percentage": 16.04, "elapsed_time": "1:53:12", "remaining_time": "9:52:41"} +{"current_steps": 1393, "total_steps": 8680, "loss": 0.6796590089797974, "lr": 1.9341300047281365e-06, "epoch": 0.3209677419354839, "percentage": 16.05, "elapsed_time": "1:53:17", "remaining_time": "9:52:38"} +{"current_steps": 1394, "total_steps": 8680, "loss": 0.9012733697891235, "lr": 1.9339939512537344e-06, "epoch": 0.32119815668202767, "percentage": 16.06, "elapsed_time": "1:53:22", "remaining_time": "9:52:32"} +{"current_steps": 1395, "total_steps": 8680, "loss": 0.9196282625198364, "lr": 1.933857762211347e-06, "epoch": 0.32142857142857145, "percentage": 16.07, "elapsed_time": "1:53:26", "remaining_time": "9:52:26"} +{"current_steps": 1396, "total_steps": 8680, "loss": 0.7717788219451904, "lr": 1.9337214376207417e-06, "epoch": 0.3216589861751152, "percentage": 16.08, "elapsed_time": "1:53:31", "remaining_time": "9:52:19"} +{"current_steps": 1397, "total_steps": 8680, "loss": 0.8516619801521301, "lr": 1.9335849775017057e-06, "epoch": 0.32188940092165896, "percentage": 16.09, "elapsed_time": "1:53:36", "remaining_time": "9:52:14"} +{"current_steps": 1398, "total_steps": 8680, "loss": 0.8089120388031006, "lr": 1.933448381874046e-06, "epoch": 0.32211981566820275, "percentage": 16.11, "elapsed_time": "1:53:41", "remaining_time": "9:52:13"} +{"current_steps": 1399, "total_steps": 8680, "loss": 0.8940925598144531, "lr": 1.9333116507575895e-06, "epoch": 0.32235023041474653, "percentage": 16.12, "elapsed_time": "1:53:46", "remaining_time": "9:52:08"} +{"current_steps": 1400, "total_steps": 8680, "loss": 1.0240859985351562, "lr": 1.9331747841721827e-06, "epoch": 0.3225806451612903, "percentage": 16.13, "elapsed_time": "1:53:50", "remaining_time": "9:51:58"} +{"current_steps": 1401, "total_steps": 8680, "loss": 0.742689847946167, "lr": 1.9330377821376916e-06, "epoch": 0.3228110599078341, "percentage": 16.14, "elapsed_time": "1:53:56", "remaining_time": "9:52:01"} +{"current_steps": 1402, "total_steps": 8680, "loss": 0.9843875169754028, "lr": 1.932900644674001e-06, "epoch": 0.3230414746543779, "percentage": 16.15, "elapsed_time": "1:54:00", "remaining_time": "9:51:48"} +{"current_steps": 1403, "total_steps": 8680, "loss": 0.7289329767227173, "lr": 1.932763371801017e-06, "epoch": 0.32327188940092166, "percentage": 16.16, "elapsed_time": "1:54:04", "remaining_time": "9:51:40"} +{"current_steps": 1404, "total_steps": 8680, "loss": 0.7706295251846313, "lr": 1.9326259635386644e-06, "epoch": 0.32350230414746545, "percentage": 16.18, "elapsed_time": "1:54:08", "remaining_time": "9:51:31"} +{"current_steps": 1405, "total_steps": 8680, "loss": 0.87788325548172, "lr": 1.932488419906888e-06, "epoch": 0.32373271889400923, "percentage": 16.19, "elapsed_time": "1:54:13", "remaining_time": "9:51:25"} +{"current_steps": 1406, "total_steps": 8680, "loss": 0.863690972328186, "lr": 1.9323507409256515e-06, "epoch": 0.323963133640553, "percentage": 16.2, "elapsed_time": "1:54:17", "remaining_time": "9:51:19"} +{"current_steps": 1407, "total_steps": 8680, "loss": 0.9333875179290771, "lr": 1.9322129266149396e-06, "epoch": 0.3241935483870968, "percentage": 16.21, "elapsed_time": "1:54:22", "remaining_time": "9:51:11"} +{"current_steps": 1408, "total_steps": 8680, "loss": 0.9170523881912231, "lr": 1.9320749769947555e-06, "epoch": 0.3244239631336406, "percentage": 16.22, "elapsed_time": "1:54:26", "remaining_time": "9:51:05"} +{"current_steps": 1409, "total_steps": 8680, "loss": 0.9337698221206665, "lr": 1.931936892085122e-06, "epoch": 0.3246543778801843, "percentage": 16.23, "elapsed_time": "1:54:31", "remaining_time": "9:50:57"} +{"current_steps": 1410, "total_steps": 8680, "loss": 0.8436682224273682, "lr": 1.9317986719060824e-06, "epoch": 0.3248847926267281, "percentage": 16.24, "elapsed_time": "1:54:35", "remaining_time": "9:50:48"} +{"current_steps": 1411, "total_steps": 8680, "loss": 0.6652755737304688, "lr": 1.9316603164776996e-06, "epoch": 0.3251152073732719, "percentage": 16.26, "elapsed_time": "1:54:40", "remaining_time": "9:50:47"} +{"current_steps": 1412, "total_steps": 8680, "loss": 0.7966932654380798, "lr": 1.931521825820055e-06, "epoch": 0.32534562211981566, "percentage": 16.27, "elapsed_time": "1:54:45", "remaining_time": "9:50:43"} +{"current_steps": 1413, "total_steps": 8680, "loss": 0.9791682958602905, "lr": 1.93138319995325e-06, "epoch": 0.32557603686635944, "percentage": 16.28, "elapsed_time": "1:54:49", "remaining_time": "9:50:31"} +{"current_steps": 1414, "total_steps": 8680, "loss": 0.7403467297554016, "lr": 1.931244438897407e-06, "epoch": 0.3258064516129032, "percentage": 16.29, "elapsed_time": "1:54:52", "remaining_time": "9:50:20"} +{"current_steps": 1415, "total_steps": 8680, "loss": 0.7758523225784302, "lr": 1.931105542672667e-06, "epoch": 0.326036866359447, "percentage": 16.3, "elapsed_time": "1:54:59", "remaining_time": "9:50:24"} +{"current_steps": 1416, "total_steps": 8680, "loss": 0.8444551229476929, "lr": 1.9309665112991894e-06, "epoch": 0.3262672811059908, "percentage": 16.31, "elapsed_time": "1:55:04", "remaining_time": "9:50:20"} +{"current_steps": 1417, "total_steps": 8680, "loss": 0.8796061277389526, "lr": 1.9308273447971553e-06, "epoch": 0.3264976958525346, "percentage": 16.32, "elapsed_time": "1:55:09", "remaining_time": "9:50:14"} +{"current_steps": 1418, "total_steps": 8680, "loss": 0.8386640548706055, "lr": 1.9306880431867643e-06, "epoch": 0.32672811059907836, "percentage": 16.34, "elapsed_time": "1:55:14", "remaining_time": "9:50:09"} +{"current_steps": 1419, "total_steps": 8680, "loss": 0.9229142665863037, "lr": 1.930548606488236e-06, "epoch": 0.32695852534562214, "percentage": 16.35, "elapsed_time": "1:55:18", "remaining_time": "9:49:59"} +{"current_steps": 1420, "total_steps": 8680, "loss": 0.9938615560531616, "lr": 1.9304090347218094e-06, "epoch": 0.3271889400921659, "percentage": 16.36, "elapsed_time": "1:55:21", "remaining_time": "9:49:46"} +{"current_steps": 1421, "total_steps": 8680, "loss": 0.7946186661720276, "lr": 1.930269327907743e-06, "epoch": 0.32741935483870965, "percentage": 16.37, "elapsed_time": "1:55:25", "remaining_time": "9:49:40"} +{"current_steps": 1422, "total_steps": 8680, "loss": 0.9456713199615479, "lr": 1.930129486066315e-06, "epoch": 0.32764976958525344, "percentage": 16.38, "elapsed_time": "1:55:30", "remaining_time": "9:49:32"} +{"current_steps": 1423, "total_steps": 8680, "loss": 0.844656765460968, "lr": 1.929989509217824e-06, "epoch": 0.3278801843317972, "percentage": 16.39, "elapsed_time": "1:55:36", "remaining_time": "9:49:34"} +{"current_steps": 1424, "total_steps": 8680, "loss": 0.7534950971603394, "lr": 1.9298493973825862e-06, "epoch": 0.328110599078341, "percentage": 16.41, "elapsed_time": "1:55:41", "remaining_time": "9:49:31"} +{"current_steps": 1425, "total_steps": 8680, "loss": 0.934429407119751, "lr": 1.92970915058094e-06, "epoch": 0.3283410138248848, "percentage": 16.42, "elapsed_time": "1:55:45", "remaining_time": "9:49:19"} +{"current_steps": 1426, "total_steps": 8680, "loss": 0.9491959810256958, "lr": 1.929568768833241e-06, "epoch": 0.32857142857142857, "percentage": 16.43, "elapsed_time": "1:55:49", "remaining_time": "9:49:13"} +{"current_steps": 1427, "total_steps": 8680, "loss": 0.9739001989364624, "lr": 1.9294282521598657e-06, "epoch": 0.32880184331797235, "percentage": 16.44, "elapsed_time": "1:55:54", "remaining_time": "9:49:05"} +{"current_steps": 1428, "total_steps": 8680, "loss": 0.8159639835357666, "lr": 1.92928760058121e-06, "epoch": 0.32903225806451614, "percentage": 16.45, "elapsed_time": "1:55:59", "remaining_time": "9:49:01"} +{"current_steps": 1429, "total_steps": 8680, "loss": 0.8752772808074951, "lr": 1.9291468141176894e-06, "epoch": 0.3292626728110599, "percentage": 16.46, "elapsed_time": "1:56:02", "remaining_time": "9:48:49"} +{"current_steps": 1430, "total_steps": 8680, "loss": 0.8543882369995117, "lr": 1.929005892789739e-06, "epoch": 0.3294930875576037, "percentage": 16.47, "elapsed_time": "1:56:07", "remaining_time": "9:48:43"} +{"current_steps": 1431, "total_steps": 8680, "loss": 0.8837493658065796, "lr": 1.928864836617813e-06, "epoch": 0.3297235023041475, "percentage": 16.49, "elapsed_time": "1:56:13", "remaining_time": "9:48:44"} +{"current_steps": 1432, "total_steps": 8680, "loss": 0.9320387840270996, "lr": 1.9287236456223854e-06, "epoch": 0.32995391705069127, "percentage": 16.5, "elapsed_time": "1:56:18", "remaining_time": "9:48:39"} +{"current_steps": 1433, "total_steps": 8680, "loss": 0.8272919654846191, "lr": 1.92858231982395e-06, "epoch": 0.330184331797235, "percentage": 16.51, "elapsed_time": "1:56:22", "remaining_time": "9:48:32"} +{"current_steps": 1434, "total_steps": 8680, "loss": 0.9415527582168579, "lr": 1.9284408592430207e-06, "epoch": 0.3304147465437788, "percentage": 16.52, "elapsed_time": "1:56:27", "remaining_time": "9:48:25"} +{"current_steps": 1435, "total_steps": 8680, "loss": 0.91558837890625, "lr": 1.928299263900129e-06, "epoch": 0.33064516129032256, "percentage": 16.53, "elapsed_time": "1:56:31", "remaining_time": "9:48:18"} +{"current_steps": 1436, "total_steps": 8680, "loss": 0.9333036541938782, "lr": 1.9281575338158287e-06, "epoch": 0.33087557603686635, "percentage": 16.54, "elapsed_time": "1:56:35", "remaining_time": "9:48:08"} +{"current_steps": 1437, "total_steps": 8680, "loss": 0.7823847532272339, "lr": 1.928015669010691e-06, "epoch": 0.33110599078341013, "percentage": 16.56, "elapsed_time": "1:56:40", "remaining_time": "9:48:03"} +{"current_steps": 1438, "total_steps": 8680, "loss": 0.8436610102653503, "lr": 1.9278736695053075e-06, "epoch": 0.3313364055299539, "percentage": 16.57, "elapsed_time": "1:56:45", "remaining_time": "9:47:59"} +{"current_steps": 1439, "total_steps": 8680, "loss": 0.8658925890922546, "lr": 1.927731535320289e-06, "epoch": 0.3315668202764977, "percentage": 16.58, "elapsed_time": "1:56:50", "remaining_time": "9:47:54"} +{"current_steps": 1440, "total_steps": 8680, "loss": 0.8510075807571411, "lr": 1.9275892664762665e-06, "epoch": 0.3317972350230415, "percentage": 16.59, "elapsed_time": "1:56:54", "remaining_time": "9:47:47"} +{"current_steps": 1441, "total_steps": 8680, "loss": 0.8002004623413086, "lr": 1.9274468629938897e-06, "epoch": 0.33202764976958526, "percentage": 16.6, "elapsed_time": "1:57:00", "remaining_time": "9:47:48"} +{"current_steps": 1442, "total_steps": 8680, "loss": 1.0030219554901123, "lr": 1.9273043248938287e-06, "epoch": 0.33225806451612905, "percentage": 16.61, "elapsed_time": "1:57:06", "remaining_time": "9:47:48"} +{"current_steps": 1443, "total_steps": 8680, "loss": 0.8415981531143188, "lr": 1.9271616521967723e-06, "epoch": 0.33248847926267283, "percentage": 16.62, "elapsed_time": "1:57:12", "remaining_time": "9:47:47"} +{"current_steps": 1444, "total_steps": 8680, "loss": 0.7704254388809204, "lr": 1.9270188449234295e-06, "epoch": 0.3327188940092166, "percentage": 16.64, "elapsed_time": "1:57:16", "remaining_time": "9:47:40"} +{"current_steps": 1445, "total_steps": 8680, "loss": 0.8350723385810852, "lr": 1.9268759030945294e-06, "epoch": 0.33294930875576034, "percentage": 16.65, "elapsed_time": "1:57:20", "remaining_time": "9:47:32"} +{"current_steps": 1446, "total_steps": 8680, "loss": 0.8729690313339233, "lr": 1.926732826730818e-06, "epoch": 0.3331797235023041, "percentage": 16.66, "elapsed_time": "1:57:25", "remaining_time": "9:47:27"} +{"current_steps": 1447, "total_steps": 8680, "loss": 0.7758746147155762, "lr": 1.926589615853064e-06, "epoch": 0.3334101382488479, "percentage": 16.67, "elapsed_time": "1:57:31", "remaining_time": "9:47:29"} +{"current_steps": 1448, "total_steps": 8680, "loss": 0.7895134687423706, "lr": 1.926446270482054e-06, "epoch": 0.3336405529953917, "percentage": 16.68, "elapsed_time": "1:57:37", "remaining_time": "9:47:27"} +{"current_steps": 1449, "total_steps": 8680, "loss": 1.0239053964614868, "lr": 1.9263027906385936e-06, "epoch": 0.3338709677419355, "percentage": 16.69, "elapsed_time": "1:57:41", "remaining_time": "9:47:18"} +{"current_steps": 1450, "total_steps": 8680, "loss": 0.9294595122337341, "lr": 1.9261591763435104e-06, "epoch": 0.33410138248847926, "percentage": 16.71, "elapsed_time": "1:57:46", "remaining_time": "9:47:14"} +{"current_steps": 1451, "total_steps": 8680, "loss": 0.9786148071289062, "lr": 1.9260154276176484e-06, "epoch": 0.33433179723502304, "percentage": 16.72, "elapsed_time": "1:57:49", "remaining_time": "9:47:01"} +{"current_steps": 1452, "total_steps": 8680, "loss": 0.8513587117195129, "lr": 1.925871544481873e-06, "epoch": 0.3345622119815668, "percentage": 16.73, "elapsed_time": "1:57:54", "remaining_time": "9:46:57"} +{"current_steps": 1453, "total_steps": 8680, "loss": 0.7737371921539307, "lr": 1.9257275269570686e-06, "epoch": 0.3347926267281106, "percentage": 16.74, "elapsed_time": "1:58:00", "remaining_time": "9:46:56"} +{"current_steps": 1454, "total_steps": 8680, "loss": 0.8567382097244263, "lr": 1.9255833750641392e-06, "epoch": 0.3350230414746544, "percentage": 16.75, "elapsed_time": "1:58:06", "remaining_time": "9:46:55"} +{"current_steps": 1455, "total_steps": 8680, "loss": 0.893741250038147, "lr": 1.9254390888240078e-06, "epoch": 0.3352534562211982, "percentage": 16.76, "elapsed_time": "1:58:10", "remaining_time": "9:46:49"} +{"current_steps": 1456, "total_steps": 8680, "loss": 0.9558119773864746, "lr": 1.9252946682576184e-06, "epoch": 0.33548387096774196, "percentage": 16.77, "elapsed_time": "1:58:16", "remaining_time": "9:46:50"} +{"current_steps": 1457, "total_steps": 8680, "loss": 0.7055593729019165, "lr": 1.9251501133859323e-06, "epoch": 0.3357142857142857, "percentage": 16.79, "elapsed_time": "1:58:21", "remaining_time": "9:46:44"} +{"current_steps": 1458, "total_steps": 8680, "loss": 0.8409907817840576, "lr": 1.9250054242299326e-06, "epoch": 0.33594470046082947, "percentage": 16.8, "elapsed_time": "1:58:25", "remaining_time": "9:46:37"} +{"current_steps": 1459, "total_steps": 8680, "loss": 0.9459772109985352, "lr": 1.9248606008106196e-06, "epoch": 0.33617511520737325, "percentage": 16.81, "elapsed_time": "1:58:29", "remaining_time": "9:46:26"} +{"current_steps": 1460, "total_steps": 8680, "loss": 0.7848879098892212, "lr": 1.924715643149015e-06, "epoch": 0.33640552995391704, "percentage": 16.82, "elapsed_time": "1:58:33", "remaining_time": "9:46:16"} +{"current_steps": 1461, "total_steps": 8680, "loss": 1.0365980863571167, "lr": 1.924570551266159e-06, "epoch": 0.3366359447004608, "percentage": 16.83, "elapsed_time": "1:58:37", "remaining_time": "9:46:09"} +{"current_steps": 1462, "total_steps": 8680, "loss": 0.7331318855285645, "lr": 1.924425325183111e-06, "epoch": 0.3368663594470046, "percentage": 16.84, "elapsed_time": "1:58:41", "remaining_time": "9:46:00"} +{"current_steps": 1463, "total_steps": 8680, "loss": 0.8536237478256226, "lr": 1.9242799649209515e-06, "epoch": 0.3370967741935484, "percentage": 16.85, "elapsed_time": "1:58:45", "remaining_time": "9:45:49"} +{"current_steps": 1464, "total_steps": 8680, "loss": 0.9296326637268066, "lr": 1.9241344705007784e-06, "epoch": 0.33732718894009217, "percentage": 16.87, "elapsed_time": "1:58:49", "remaining_time": "9:45:43"} +{"current_steps": 1465, "total_steps": 8680, "loss": 0.9084932804107666, "lr": 1.92398884194371e-06, "epoch": 0.33755760368663595, "percentage": 16.88, "elapsed_time": "1:58:54", "remaining_time": "9:45:38"} +{"current_steps": 1466, "total_steps": 8680, "loss": 0.7426833510398865, "lr": 1.9238430792708847e-06, "epoch": 0.33778801843317974, "percentage": 16.89, "elapsed_time": "1:59:01", "remaining_time": "9:45:42"} +{"current_steps": 1467, "total_steps": 8680, "loss": 0.7655431032180786, "lr": 1.9236971825034595e-06, "epoch": 0.3380184331797235, "percentage": 16.9, "elapsed_time": "1:59:06", "remaining_time": "9:45:35"} +{"current_steps": 1468, "total_steps": 8680, "loss": 0.9463646411895752, "lr": 1.923551151662611e-06, "epoch": 0.3382488479262673, "percentage": 16.91, "elapsed_time": "1:59:09", "remaining_time": "9:45:25"} +{"current_steps": 1469, "total_steps": 8680, "loss": 0.75661301612854, "lr": 1.9234049867695355e-06, "epoch": 0.3384792626728111, "percentage": 16.92, "elapsed_time": "1:59:15", "remaining_time": "9:45:23"} +{"current_steps": 1470, "total_steps": 8680, "loss": 0.7411723136901855, "lr": 1.9232586878454486e-06, "epoch": 0.3387096774193548, "percentage": 16.94, "elapsed_time": "1:59:20", "remaining_time": "9:45:18"} +{"current_steps": 1471, "total_steps": 8680, "loss": 0.9537360072135925, "lr": 1.9231122549115854e-06, "epoch": 0.3389400921658986, "percentage": 16.95, "elapsed_time": "1:59:24", "remaining_time": "9:45:12"} +{"current_steps": 1472, "total_steps": 8680, "loss": 0.9527197480201721, "lr": 1.9229656879892004e-06, "epoch": 0.3391705069124424, "percentage": 16.96, "elapsed_time": "1:59:28", "remaining_time": "9:45:04"} +{"current_steps": 1473, "total_steps": 8680, "loss": 0.9083822965621948, "lr": 1.9228189870995674e-06, "epoch": 0.33940092165898617, "percentage": 16.97, "elapsed_time": "1:59:32", "remaining_time": "9:44:52"} +{"current_steps": 1474, "total_steps": 8680, "loss": 0.8546823263168335, "lr": 1.9226721522639804e-06, "epoch": 0.33963133640552995, "percentage": 16.98, "elapsed_time": "1:59:37", "remaining_time": "9:44:49"} +{"current_steps": 1475, "total_steps": 8680, "loss": 0.7429832816123962, "lr": 1.922525183503752e-06, "epoch": 0.33986175115207373, "percentage": 16.99, "elapsed_time": "1:59:41", "remaining_time": "9:44:40"} +{"current_steps": 1476, "total_steps": 8680, "loss": 0.8805499076843262, "lr": 1.922378080840214e-06, "epoch": 0.3400921658986175, "percentage": 17.0, "elapsed_time": "1:59:46", "remaining_time": "9:44:35"} +{"current_steps": 1477, "total_steps": 8680, "loss": 1.0177074670791626, "lr": 1.9222308442947193e-06, "epoch": 0.3403225806451613, "percentage": 17.02, "elapsed_time": "1:59:50", "remaining_time": "9:44:27"} +{"current_steps": 1478, "total_steps": 8680, "loss": 0.778317391872406, "lr": 1.922083473888638e-06, "epoch": 0.3405529953917051, "percentage": 17.03, "elapsed_time": "1:59:56", "remaining_time": "9:44:25"} +{"current_steps": 1479, "total_steps": 8680, "loss": 0.8461896181106567, "lr": 1.921935969643361e-06, "epoch": 0.34078341013824887, "percentage": 17.04, "elapsed_time": "2:00:00", "remaining_time": "9:44:17"} +{"current_steps": 1480, "total_steps": 8680, "loss": 0.8028895258903503, "lr": 1.921788331580299e-06, "epoch": 0.34101382488479265, "percentage": 17.05, "elapsed_time": "2:00:05", "remaining_time": "9:44:14"} +{"current_steps": 1481, "total_steps": 8680, "loss": 0.9071121215820312, "lr": 1.9216405597208803e-06, "epoch": 0.34124423963133643, "percentage": 17.06, "elapsed_time": "2:00:10", "remaining_time": "9:44:09"} +{"current_steps": 1482, "total_steps": 8680, "loss": 0.7715062499046326, "lr": 1.921492654086555e-06, "epoch": 0.34147465437788016, "percentage": 17.07, "elapsed_time": "2:00:15", "remaining_time": "9:44:04"} +{"current_steps": 1483, "total_steps": 8680, "loss": 0.8446664810180664, "lr": 1.9213446146987907e-06, "epoch": 0.34170506912442394, "percentage": 17.09, "elapsed_time": "2:00:19", "remaining_time": "9:43:58"} +{"current_steps": 1484, "total_steps": 8680, "loss": 0.9835283756256104, "lr": 1.9211964415790754e-06, "epoch": 0.3419354838709677, "percentage": 17.1, "elapsed_time": "2:00:24", "remaining_time": "9:43:49"} +{"current_steps": 1485, "total_steps": 8680, "loss": 1.0630817413330078, "lr": 1.921048134748916e-06, "epoch": 0.3421658986175115, "percentage": 17.11, "elapsed_time": "2:00:27", "remaining_time": "9:43:40"} +{"current_steps": 1486, "total_steps": 8680, "loss": 0.8514837622642517, "lr": 1.920899694229839e-06, "epoch": 0.3423963133640553, "percentage": 17.12, "elapsed_time": "2:00:32", "remaining_time": "9:43:36"} +{"current_steps": 1487, "total_steps": 8680, "loss": 0.7302432060241699, "lr": 1.920751120043391e-06, "epoch": 0.3426267281105991, "percentage": 17.13, "elapsed_time": "2:00:37", "remaining_time": "9:43:28"} +{"current_steps": 1488, "total_steps": 8680, "loss": 0.778337836265564, "lr": 1.920602412211136e-06, "epoch": 0.34285714285714286, "percentage": 17.14, "elapsed_time": "2:00:42", "remaining_time": "9:43:24"} +{"current_steps": 1489, "total_steps": 8680, "loss": 0.815348207950592, "lr": 1.92045357075466e-06, "epoch": 0.34308755760368664, "percentage": 17.15, "elapsed_time": "2:00:46", "remaining_time": "9:43:17"} +{"current_steps": 1490, "total_steps": 8680, "loss": 0.7844003438949585, "lr": 1.920304595695567e-06, "epoch": 0.3433179723502304, "percentage": 17.17, "elapsed_time": "2:00:53", "remaining_time": "9:43:20"} +{"current_steps": 1491, "total_steps": 8680, "loss": 0.9513435363769531, "lr": 1.92015548705548e-06, "epoch": 0.3435483870967742, "percentage": 17.18, "elapsed_time": "2:00:57", "remaining_time": "9:43:13"} +{"current_steps": 1492, "total_steps": 8680, "loss": 0.7506752610206604, "lr": 1.9200062448560424e-06, "epoch": 0.343778801843318, "percentage": 17.19, "elapsed_time": "2:01:02", "remaining_time": "9:43:09"} +{"current_steps": 1493, "total_steps": 8680, "loss": 0.739554762840271, "lr": 1.919856869118916e-06, "epoch": 0.3440092165898618, "percentage": 17.2, "elapsed_time": "2:01:09", "remaining_time": "9:43:11"} +{"current_steps": 1494, "total_steps": 8680, "loss": 0.8167033791542053, "lr": 1.9197073598657826e-06, "epoch": 0.3442396313364055, "percentage": 17.21, "elapsed_time": "2:01:13", "remaining_time": "9:43:04"} +{"current_steps": 1495, "total_steps": 8680, "loss": 0.9308677911758423, "lr": 1.919557717118344e-06, "epoch": 0.3444700460829493, "percentage": 17.22, "elapsed_time": "2:01:17", "remaining_time": "9:42:55"} +{"current_steps": 1496, "total_steps": 8680, "loss": 0.8601467609405518, "lr": 1.9194079408983197e-06, "epoch": 0.34470046082949307, "percentage": 17.24, "elapsed_time": "2:01:22", "remaining_time": "9:42:51"} +{"current_steps": 1497, "total_steps": 8680, "loss": 0.8062653541564941, "lr": 1.91925803122745e-06, "epoch": 0.34493087557603686, "percentage": 17.25, "elapsed_time": "2:01:27", "remaining_time": "9:42:46"} +{"current_steps": 1498, "total_steps": 8680, "loss": 0.8910555839538574, "lr": 1.9191079881274943e-06, "epoch": 0.34516129032258064, "percentage": 17.26, "elapsed_time": "2:01:32", "remaining_time": "9:42:41"} +{"current_steps": 1499, "total_steps": 8680, "loss": 0.8604668378829956, "lr": 1.9189578116202307e-06, "epoch": 0.3453917050691244, "percentage": 17.27, "elapsed_time": "2:01:37", "remaining_time": "9:42:38"} +{"current_steps": 1500, "total_steps": 8680, "loss": 0.7255126237869263, "lr": 1.918807501727457e-06, "epoch": 0.3456221198156682, "percentage": 17.28, "elapsed_time": "2:01:44", "remaining_time": "9:42:41"} +{"current_steps": 1501, "total_steps": 8680, "loss": 0.998108983039856, "lr": 1.9186570584709912e-06, "epoch": 0.345852534562212, "percentage": 17.29, "elapsed_time": "2:01:50", "remaining_time": "9:42:44"} +{"current_steps": 1502, "total_steps": 8680, "loss": 0.7660422325134277, "lr": 1.918506481872669e-06, "epoch": 0.34608294930875577, "percentage": 17.3, "elapsed_time": "2:01:55", "remaining_time": "9:42:40"} +{"current_steps": 1503, "total_steps": 8680, "loss": 0.868739902973175, "lr": 1.9183557719543472e-06, "epoch": 0.34631336405529956, "percentage": 17.32, "elapsed_time": "2:02:00", "remaining_time": "9:42:37"} +{"current_steps": 1504, "total_steps": 8680, "loss": 0.6630350351333618, "lr": 1.918204928737901e-06, "epoch": 0.34654377880184334, "percentage": 17.33, "elapsed_time": "2:02:06", "remaining_time": "9:42:35"} +{"current_steps": 1505, "total_steps": 8680, "loss": 0.8651586771011353, "lr": 1.9180539522452247e-06, "epoch": 0.3467741935483871, "percentage": 17.34, "elapsed_time": "2:02:10", "remaining_time": "9:42:29"} +{"current_steps": 1506, "total_steps": 8680, "loss": 0.8584417700767517, "lr": 1.9179028424982326e-06, "epoch": 0.34700460829493085, "percentage": 17.35, "elapsed_time": "2:02:14", "remaining_time": "9:42:21"} +{"current_steps": 1507, "total_steps": 8680, "loss": 0.7793893814086914, "lr": 1.917751599518858e-06, "epoch": 0.34723502304147463, "percentage": 17.36, "elapsed_time": "2:02:20", "remaining_time": "9:42:18"} +{"current_steps": 1508, "total_steps": 8680, "loss": 0.8499815464019775, "lr": 1.9176002233290542e-06, "epoch": 0.3474654377880184, "percentage": 17.37, "elapsed_time": "2:02:24", "remaining_time": "9:42:11"} +{"current_steps": 1509, "total_steps": 8680, "loss": 0.7914199829101562, "lr": 1.917448713950792e-06, "epoch": 0.3476958525345622, "percentage": 17.38, "elapsed_time": "2:02:30", "remaining_time": "9:42:10"} +{"current_steps": 1510, "total_steps": 8680, "loss": 0.942331850528717, "lr": 1.9172970714060637e-06, "epoch": 0.347926267281106, "percentage": 17.4, "elapsed_time": "2:02:35", "remaining_time": "9:42:08"} +{"current_steps": 1511, "total_steps": 8680, "loss": 0.7780032157897949, "lr": 1.9171452957168803e-06, "epoch": 0.34815668202764977, "percentage": 17.41, "elapsed_time": "2:02:40", "remaining_time": "9:42:02"} +{"current_steps": 1512, "total_steps": 8680, "loss": 0.8544708490371704, "lr": 1.916993386905271e-06, "epoch": 0.34838709677419355, "percentage": 17.42, "elapsed_time": "2:02:45", "remaining_time": "9:41:56"} +{"current_steps": 1513, "total_steps": 8680, "loss": 0.798173725605011, "lr": 1.9168413449932855e-06, "epoch": 0.34861751152073733, "percentage": 17.43, "elapsed_time": "2:02:49", "remaining_time": "9:41:49"} +{"current_steps": 1514, "total_steps": 8680, "loss": 0.9426852464675903, "lr": 1.9166891700029922e-06, "epoch": 0.3488479262672811, "percentage": 17.44, "elapsed_time": "2:02:54", "remaining_time": "9:41:44"} +{"current_steps": 1515, "total_steps": 8680, "loss": 0.922240138053894, "lr": 1.91653686195648e-06, "epoch": 0.3490783410138249, "percentage": 17.45, "elapsed_time": "2:02:58", "remaining_time": "9:41:34"} +{"current_steps": 1516, "total_steps": 8680, "loss": 0.7997978925704956, "lr": 1.9163844208758556e-06, "epoch": 0.3493087557603687, "percentage": 17.47, "elapsed_time": "2:03:02", "remaining_time": "9:41:28"} +{"current_steps": 1517, "total_steps": 8680, "loss": 1.0597525835037231, "lr": 1.9162318467832455e-06, "epoch": 0.34953917050691247, "percentage": 17.48, "elapsed_time": "2:03:07", "remaining_time": "9:41:22"} +{"current_steps": 1518, "total_steps": 8680, "loss": 0.8211681842803955, "lr": 1.9160791397007957e-06, "epoch": 0.3497695852534562, "percentage": 17.49, "elapsed_time": "2:03:12", "remaining_time": "9:41:18"} +{"current_steps": 1519, "total_steps": 8680, "loss": 0.8078022003173828, "lr": 1.9159262996506716e-06, "epoch": 0.35, "percentage": 17.5, "elapsed_time": "2:03:17", "remaining_time": "9:41:12"} +{"current_steps": 1520, "total_steps": 8680, "loss": 0.9449256658554077, "lr": 1.915773326655057e-06, "epoch": 0.35023041474654376, "percentage": 17.51, "elapsed_time": "2:03:21", "remaining_time": "9:41:05"} +{"current_steps": 1521, "total_steps": 8680, "loss": 0.8744012117385864, "lr": 1.915620220736157e-06, "epoch": 0.35046082949308754, "percentage": 17.52, "elapsed_time": "2:03:25", "remaining_time": "9:40:55"} +{"current_steps": 1522, "total_steps": 8680, "loss": 0.9503095746040344, "lr": 1.9154669819161946e-06, "epoch": 0.35069124423963133, "percentage": 17.53, "elapsed_time": "2:03:30", "remaining_time": "9:40:49"} +{"current_steps": 1523, "total_steps": 8680, "loss": 1.055432915687561, "lr": 1.9153136102174106e-06, "epoch": 0.3509216589861751, "percentage": 17.55, "elapsed_time": "2:03:34", "remaining_time": "9:40:42"} +{"current_steps": 1524, "total_steps": 8680, "loss": 0.8540226221084595, "lr": 1.9151601056620684e-06, "epoch": 0.3511520737327189, "percentage": 17.56, "elapsed_time": "2:03:39", "remaining_time": "9:40:36"} +{"current_steps": 1525, "total_steps": 8680, "loss": 0.8846266865730286, "lr": 1.915006468272448e-06, "epoch": 0.3513824884792627, "percentage": 17.57, "elapsed_time": "2:03:44", "remaining_time": "9:40:34"} +{"current_steps": 1526, "total_steps": 8680, "loss": 0.8941656947135925, "lr": 1.9148526980708507e-06, "epoch": 0.35161290322580646, "percentage": 17.58, "elapsed_time": "2:03:48", "remaining_time": "9:40:25"} +{"current_steps": 1527, "total_steps": 8680, "loss": 0.868419885635376, "lr": 1.914698795079595e-06, "epoch": 0.35184331797235024, "percentage": 17.59, "elapsed_time": "2:03:52", "remaining_time": "9:40:14"} +{"current_steps": 1528, "total_steps": 8680, "loss": 0.7375580072402954, "lr": 1.91454475932102e-06, "epoch": 0.35207373271889403, "percentage": 17.6, "elapsed_time": "2:03:57", "remaining_time": "9:40:13"} +{"current_steps": 1529, "total_steps": 8680, "loss": 0.9415492415428162, "lr": 1.9143905908174844e-06, "epoch": 0.3523041474654378, "percentage": 17.62, "elapsed_time": "2:04:03", "remaining_time": "9:40:12"} +{"current_steps": 1530, "total_steps": 8680, "loss": 0.8395911455154419, "lr": 1.9142362895913646e-06, "epoch": 0.35253456221198154, "percentage": 17.63, "elapsed_time": "2:04:09", "remaining_time": "9:40:12"} +{"current_steps": 1531, "total_steps": 8680, "loss": 0.831234335899353, "lr": 1.914081855665057e-06, "epoch": 0.3527649769585253, "percentage": 17.64, "elapsed_time": "2:04:14", "remaining_time": "9:40:08"} +{"current_steps": 1532, "total_steps": 8680, "loss": 0.8975566029548645, "lr": 1.9139272890609794e-06, "epoch": 0.3529953917050691, "percentage": 17.65, "elapsed_time": "2:04:18", "remaining_time": "9:40:01"} +{"current_steps": 1533, "total_steps": 8680, "loss": 0.8134264945983887, "lr": 1.913772589801565e-06, "epoch": 0.3532258064516129, "percentage": 17.66, "elapsed_time": "2:04:23", "remaining_time": "9:39:54"} +{"current_steps": 1534, "total_steps": 8680, "loss": 0.9507275819778442, "lr": 1.913617757909269e-06, "epoch": 0.3534562211981567, "percentage": 17.67, "elapsed_time": "2:04:27", "remaining_time": "9:39:48"} +{"current_steps": 1535, "total_steps": 8680, "loss": 0.8839038610458374, "lr": 1.913462793406565e-06, "epoch": 0.35368663594470046, "percentage": 17.68, "elapsed_time": "2:04:32", "remaining_time": "9:39:43"} +{"current_steps": 1536, "total_steps": 8680, "loss": 0.8708392381668091, "lr": 1.9133076963159453e-06, "epoch": 0.35391705069124424, "percentage": 17.7, "elapsed_time": "2:04:36", "remaining_time": "9:39:35"} +{"current_steps": 1537, "total_steps": 8680, "loss": 0.7609391212463379, "lr": 1.913152466659923e-06, "epoch": 0.354147465437788, "percentage": 17.71, "elapsed_time": "2:04:42", "remaining_time": "9:39:33"} +{"current_steps": 1538, "total_steps": 8680, "loss": 0.9231283664703369, "lr": 1.912997104461029e-06, "epoch": 0.3543778801843318, "percentage": 17.72, "elapsed_time": "2:04:47", "remaining_time": "9:39:29"} +{"current_steps": 1539, "total_steps": 8680, "loss": 1.0297726392745972, "lr": 1.912841609741814e-06, "epoch": 0.3546082949308756, "percentage": 17.73, "elapsed_time": "2:04:53", "remaining_time": "9:39:31"} +{"current_steps": 1540, "total_steps": 8680, "loss": 0.8798987865447998, "lr": 1.9126859825248475e-06, "epoch": 0.3548387096774194, "percentage": 17.74, "elapsed_time": "2:04:57", "remaining_time": "9:39:21"} +{"current_steps": 1541, "total_steps": 8680, "loss": 0.9104069471359253, "lr": 1.912530222832719e-06, "epoch": 0.35506912442396316, "percentage": 17.75, "elapsed_time": "2:05:02", "remaining_time": "9:39:17"} +{"current_steps": 1542, "total_steps": 8680, "loss": 0.7618073225021362, "lr": 1.9123743306880368e-06, "epoch": 0.35529953917050694, "percentage": 17.76, "elapsed_time": "2:05:07", "remaining_time": "9:39:14"} +{"current_steps": 1543, "total_steps": 8680, "loss": 0.8397510051727295, "lr": 1.912218306113428e-06, "epoch": 0.35552995391705067, "percentage": 17.78, "elapsed_time": "2:05:13", "remaining_time": "9:39:11"} +{"current_steps": 1544, "total_steps": 8680, "loss": 0.9884299039840698, "lr": 1.91206214913154e-06, "epoch": 0.35576036866359445, "percentage": 17.79, "elapsed_time": "2:05:18", "remaining_time": "9:39:09"} +{"current_steps": 1545, "total_steps": 8680, "loss": 0.9878349304199219, "lr": 1.9119058597650385e-06, "epoch": 0.35599078341013823, "percentage": 17.8, "elapsed_time": "2:05:22", "remaining_time": "9:38:57"} +{"current_steps": 1546, "total_steps": 8680, "loss": 0.8790488243103027, "lr": 1.9117494380366086e-06, "epoch": 0.356221198156682, "percentage": 17.81, "elapsed_time": "2:05:27", "remaining_time": "9:38:53"} +{"current_steps": 1547, "total_steps": 8680, "loss": 0.7390745878219604, "lr": 1.9115928839689546e-06, "epoch": 0.3564516129032258, "percentage": 17.82, "elapsed_time": "2:05:34", "remaining_time": "9:39:02"} +{"current_steps": 1548, "total_steps": 8680, "loss": 0.7354288101196289, "lr": 1.9114361975848004e-06, "epoch": 0.3566820276497696, "percentage": 17.83, "elapsed_time": "2:05:39", "remaining_time": "9:38:56"} +{"current_steps": 1549, "total_steps": 8680, "loss": 0.9234673976898193, "lr": 1.911279378906889e-06, "epoch": 0.35691244239631337, "percentage": 17.85, "elapsed_time": "2:05:43", "remaining_time": "9:38:48"} +{"current_steps": 1550, "total_steps": 8680, "loss": 0.8913710117340088, "lr": 1.911122427957982e-06, "epoch": 0.35714285714285715, "percentage": 17.86, "elapsed_time": "2:05:47", "remaining_time": "9:38:39"} +{"current_steps": 1551, "total_steps": 8680, "loss": 0.754358172416687, "lr": 1.9109653447608605e-06, "epoch": 0.35737327188940093, "percentage": 17.87, "elapsed_time": "2:05:53", "remaining_time": "9:38:36"} +{"current_steps": 1552, "total_steps": 8680, "loss": 0.7361906170845032, "lr": 1.910808129338325e-06, "epoch": 0.3576036866359447, "percentage": 17.88, "elapsed_time": "2:05:57", "remaining_time": "9:38:30"} +{"current_steps": 1553, "total_steps": 8680, "loss": 0.8167279362678528, "lr": 1.9106507817131957e-06, "epoch": 0.3578341013824885, "percentage": 17.89, "elapsed_time": "2:06:02", "remaining_time": "9:38:24"} +{"current_steps": 1554, "total_steps": 8680, "loss": 0.7504739761352539, "lr": 1.910493301908311e-06, "epoch": 0.3580645161290323, "percentage": 17.9, "elapsed_time": "2:06:06", "remaining_time": "9:38:15"} +{"current_steps": 1555, "total_steps": 8680, "loss": 0.8452355861663818, "lr": 1.9103356899465287e-06, "epoch": 0.358294930875576, "percentage": 17.91, "elapsed_time": "2:06:11", "remaining_time": "9:38:12"} +{"current_steps": 1556, "total_steps": 8680, "loss": 0.891547679901123, "lr": 1.9101779458507263e-06, "epoch": 0.3585253456221198, "percentage": 17.93, "elapsed_time": "2:06:16", "remaining_time": "9:38:06"} +{"current_steps": 1557, "total_steps": 8680, "loss": 0.8132680654525757, "lr": 1.9100200696438e-06, "epoch": 0.3587557603686636, "percentage": 17.94, "elapsed_time": "2:06:20", "remaining_time": "9:38:01"} +{"current_steps": 1558, "total_steps": 8680, "loss": 0.799482524394989, "lr": 1.9098620613486646e-06, "epoch": 0.35898617511520736, "percentage": 17.95, "elapsed_time": "2:06:25", "remaining_time": "9:37:56"} +{"current_steps": 1559, "total_steps": 8680, "loss": 0.8490267992019653, "lr": 1.909703920988256e-06, "epoch": 0.35921658986175115, "percentage": 17.96, "elapsed_time": "2:06:31", "remaining_time": "9:37:54"} +{"current_steps": 1560, "total_steps": 8680, "loss": 0.8608428239822388, "lr": 1.9095456485855277e-06, "epoch": 0.35944700460829493, "percentage": 17.97, "elapsed_time": "2:06:36", "remaining_time": "9:37:49"} +{"current_steps": 1561, "total_steps": 8680, "loss": 0.8460499048233032, "lr": 1.9093872441634526e-06, "epoch": 0.3596774193548387, "percentage": 17.98, "elapsed_time": "2:06:41", "remaining_time": "9:37:48"} +{"current_steps": 1562, "total_steps": 8680, "loss": 0.9268433451652527, "lr": 1.9092287077450226e-06, "epoch": 0.3599078341013825, "percentage": 18.0, "elapsed_time": "2:06:46", "remaining_time": "9:37:42"} +{"current_steps": 1563, "total_steps": 8680, "loss": 0.7354154586791992, "lr": 1.90907003935325e-06, "epoch": 0.3601382488479263, "percentage": 18.01, "elapsed_time": "2:06:50", "remaining_time": "9:37:35"} +{"current_steps": 1564, "total_steps": 8680, "loss": 0.87982177734375, "lr": 1.9089112390111637e-06, "epoch": 0.36036866359447006, "percentage": 18.02, "elapsed_time": "2:06:55", "remaining_time": "9:37:30"} +{"current_steps": 1565, "total_steps": 8680, "loss": 0.994953453540802, "lr": 1.9087523067418148e-06, "epoch": 0.36059907834101385, "percentage": 18.03, "elapsed_time": "2:07:00", "remaining_time": "9:37:26"} +{"current_steps": 1566, "total_steps": 8680, "loss": 0.8623256087303162, "lr": 1.9085932425682715e-06, "epoch": 0.36082949308755763, "percentage": 18.04, "elapsed_time": "2:07:06", "remaining_time": "9:37:25"} +{"current_steps": 1567, "total_steps": 8680, "loss": 0.8752846717834473, "lr": 1.908434046513622e-06, "epoch": 0.36105990783410136, "percentage": 18.05, "elapsed_time": "2:07:10", "remaining_time": "9:37:15"} +{"current_steps": 1568, "total_steps": 8680, "loss": 0.9002033472061157, "lr": 1.908274718600973e-06, "epoch": 0.36129032258064514, "percentage": 18.06, "elapsed_time": "2:07:15", "remaining_time": "9:37:14"} +{"current_steps": 1569, "total_steps": 8680, "loss": 0.7290444374084473, "lr": 1.908115258853451e-06, "epoch": 0.3615207373271889, "percentage": 18.08, "elapsed_time": "2:07:20", "remaining_time": "9:37:06"} +{"current_steps": 1570, "total_steps": 8680, "loss": 0.6833889484405518, "lr": 1.9079556672942016e-06, "epoch": 0.3617511520737327, "percentage": 18.09, "elapsed_time": "2:07:24", "remaining_time": "9:37:00"} +{"current_steps": 1571, "total_steps": 8680, "loss": 1.0033842325210571, "lr": 1.907795943946389e-06, "epoch": 0.3619815668202765, "percentage": 18.1, "elapsed_time": "2:07:29", "remaining_time": "9:36:55"} +{"current_steps": 1572, "total_steps": 8680, "loss": 0.9590950012207031, "lr": 1.907636088833197e-06, "epoch": 0.3622119815668203, "percentage": 18.11, "elapsed_time": "2:07:33", "remaining_time": "9:36:47"} +{"current_steps": 1573, "total_steps": 8680, "loss": 0.8812122344970703, "lr": 1.907476101977828e-06, "epoch": 0.36244239631336406, "percentage": 18.12, "elapsed_time": "2:07:37", "remaining_time": "9:36:39"} +{"current_steps": 1574, "total_steps": 8680, "loss": 0.7549433708190918, "lr": 1.9073159834035045e-06, "epoch": 0.36267281105990784, "percentage": 18.13, "elapsed_time": "2:07:42", "remaining_time": "9:36:33"} +{"current_steps": 1575, "total_steps": 8680, "loss": 0.9235562086105347, "lr": 1.9071557331334667e-06, "epoch": 0.3629032258064516, "percentage": 18.15, "elapsed_time": "2:07:47", "remaining_time": "9:36:26"} +{"current_steps": 1576, "total_steps": 8680, "loss": 0.8468542098999023, "lr": 1.9069953511909755e-06, "epoch": 0.3631336405529954, "percentage": 18.16, "elapsed_time": "2:07:51", "remaining_time": "9:36:21"} +{"current_steps": 1577, "total_steps": 8680, "loss": 0.8804000616073608, "lr": 1.9068348375993096e-06, "epoch": 0.3633640552995392, "percentage": 18.17, "elapsed_time": "2:07:57", "remaining_time": "9:36:18"} +{"current_steps": 1578, "total_steps": 8680, "loss": 0.762598991394043, "lr": 1.9066741923817676e-06, "epoch": 0.363594470046083, "percentage": 18.18, "elapsed_time": "2:08:01", "remaining_time": "9:36:10"} +{"current_steps": 1579, "total_steps": 8680, "loss": 0.8791940212249756, "lr": 1.9065134155616666e-06, "epoch": 0.3638248847926267, "percentage": 18.19, "elapsed_time": "2:08:05", "remaining_time": "9:36:04"} +{"current_steps": 1580, "total_steps": 8680, "loss": 0.7041842937469482, "lr": 1.9063525071623439e-06, "epoch": 0.3640552995391705, "percentage": 18.2, "elapsed_time": "2:08:10", "remaining_time": "9:35:57"} +{"current_steps": 1581, "total_steps": 8680, "loss": 0.9526468515396118, "lr": 1.9061914672071543e-06, "epoch": 0.36428571428571427, "percentage": 18.21, "elapsed_time": "2:08:14", "remaining_time": "9:35:51"} +{"current_steps": 1582, "total_steps": 8680, "loss": 0.9388316869735718, "lr": 1.906030295719473e-06, "epoch": 0.36451612903225805, "percentage": 18.23, "elapsed_time": "2:08:18", "remaining_time": "9:35:42"} +{"current_steps": 1583, "total_steps": 8680, "loss": 0.7295777797698975, "lr": 1.9058689927226936e-06, "epoch": 0.36474654377880183, "percentage": 18.24, "elapsed_time": "2:08:23", "remaining_time": "9:35:36"} +{"current_steps": 1584, "total_steps": 8680, "loss": 0.7540932297706604, "lr": 1.905707558240229e-06, "epoch": 0.3649769585253456, "percentage": 18.25, "elapsed_time": "2:08:28", "remaining_time": "9:35:31"} +{"current_steps": 1585, "total_steps": 8680, "loss": 0.9457792639732361, "lr": 1.9055459922955118e-06, "epoch": 0.3652073732718894, "percentage": 18.26, "elapsed_time": "2:08:31", "remaining_time": "9:35:21"} +{"current_steps": 1586, "total_steps": 8680, "loss": 0.9121883511543274, "lr": 1.9053842949119923e-06, "epoch": 0.3654377880184332, "percentage": 18.27, "elapsed_time": "2:08:35", "remaining_time": "9:35:12"} +{"current_steps": 1587, "total_steps": 8680, "loss": 0.8140746355056763, "lr": 1.905222466113141e-06, "epoch": 0.36566820276497697, "percentage": 18.28, "elapsed_time": "2:08:39", "remaining_time": "9:35:01"} +{"current_steps": 1588, "total_steps": 8680, "loss": 0.7403484582901001, "lr": 1.905060505922447e-06, "epoch": 0.36589861751152075, "percentage": 18.29, "elapsed_time": "2:08:43", "remaining_time": "9:34:52"} +{"current_steps": 1589, "total_steps": 8680, "loss": 0.9040734171867371, "lr": 1.9048984143634188e-06, "epoch": 0.36612903225806454, "percentage": 18.31, "elapsed_time": "2:08:47", "remaining_time": "9:34:44"} +{"current_steps": 1590, "total_steps": 8680, "loss": 0.9060958623886108, "lr": 1.9047361914595834e-06, "epoch": 0.3663594470046083, "percentage": 18.32, "elapsed_time": "2:08:52", "remaining_time": "9:34:42"} +{"current_steps": 1591, "total_steps": 8680, "loss": 0.6925936937332153, "lr": 1.904573837234488e-06, "epoch": 0.36658986175115205, "percentage": 18.33, "elapsed_time": "2:08:59", "remaining_time": "9:34:43"} +{"current_steps": 1592, "total_steps": 8680, "loss": 0.8120197057723999, "lr": 1.9044113517116973e-06, "epoch": 0.36682027649769583, "percentage": 18.34, "elapsed_time": "2:09:04", "remaining_time": "9:34:40"} +{"current_steps": 1593, "total_steps": 8680, "loss": 0.796414852142334, "lr": 1.9042487349147965e-06, "epoch": 0.3670506912442396, "percentage": 18.35, "elapsed_time": "2:09:09", "remaining_time": "9:34:36"} +{"current_steps": 1594, "total_steps": 8680, "loss": 0.8390822410583496, "lr": 1.9040859868673885e-06, "epoch": 0.3672811059907834, "percentage": 18.36, "elapsed_time": "2:09:14", "remaining_time": "9:34:31"} +{"current_steps": 1595, "total_steps": 8680, "loss": 0.990093469619751, "lr": 1.9039231075930967e-06, "epoch": 0.3675115207373272, "percentage": 18.38, "elapsed_time": "2:09:18", "remaining_time": "9:34:24"} +{"current_steps": 1596, "total_steps": 8680, "loss": 0.8548597097396851, "lr": 1.9037600971155623e-06, "epoch": 0.36774193548387096, "percentage": 18.39, "elapsed_time": "2:09:22", "remaining_time": "9:34:16"} +{"current_steps": 1597, "total_steps": 8680, "loss": 0.687299370765686, "lr": 1.9035969554584464e-06, "epoch": 0.36797235023041475, "percentage": 18.4, "elapsed_time": "2:09:28", "remaining_time": "9:34:16"} +{"current_steps": 1598, "total_steps": 8680, "loss": 0.7857942581176758, "lr": 1.9034336826454282e-06, "epoch": 0.36820276497695853, "percentage": 18.41, "elapsed_time": "2:09:32", "remaining_time": "9:34:08"} +{"current_steps": 1599, "total_steps": 8680, "loss": 0.8836538195610046, "lr": 1.9032702787002072e-06, "epoch": 0.3684331797235023, "percentage": 18.42, "elapsed_time": "2:09:38", "remaining_time": "9:34:05"} +{"current_steps": 1600, "total_steps": 8680, "loss": 0.8132715225219727, "lr": 1.9031067436465011e-06, "epoch": 0.3686635944700461, "percentage": 18.43, "elapsed_time": "2:09:43", "remaining_time": "9:34:00"} +{"current_steps": 1601, "total_steps": 8680, "loss": 0.7632347345352173, "lr": 1.9029430775080467e-06, "epoch": 0.3688940092165899, "percentage": 18.44, "elapsed_time": "2:09:50", "remaining_time": "9:34:06"} +{"current_steps": 1602, "total_steps": 8680, "loss": 0.8616297841072083, "lr": 1.9027792803086e-06, "epoch": 0.36912442396313366, "percentage": 18.46, "elapsed_time": "2:09:54", "remaining_time": "9:33:58"} +{"current_steps": 1603, "total_steps": 8680, "loss": 0.8418172597885132, "lr": 1.9026153520719358e-06, "epoch": 0.36935483870967745, "percentage": 18.47, "elapsed_time": "2:10:00", "remaining_time": "9:34:00"} +{"current_steps": 1604, "total_steps": 8680, "loss": 0.7253717184066772, "lr": 1.902451292821848e-06, "epoch": 0.3695852534562212, "percentage": 18.48, "elapsed_time": "2:10:05", "remaining_time": "9:33:54"} +{"current_steps": 1605, "total_steps": 8680, "loss": 0.9746035933494568, "lr": 1.90228710258215e-06, "epoch": 0.36981566820276496, "percentage": 18.49, "elapsed_time": "2:10:09", "remaining_time": "9:33:43"} +{"current_steps": 1606, "total_steps": 8680, "loss": 0.7722853422164917, "lr": 1.9021227813766733e-06, "epoch": 0.37004608294930874, "percentage": 18.5, "elapsed_time": "2:10:13", "remaining_time": "9:33:37"} +{"current_steps": 1607, "total_steps": 8680, "loss": 0.8278614282608032, "lr": 1.9019583292292693e-06, "epoch": 0.3702764976958525, "percentage": 18.51, "elapsed_time": "2:10:18", "remaining_time": "9:33:30"} +{"current_steps": 1608, "total_steps": 8680, "loss": 0.7433085441589355, "lr": 1.9017937461638078e-06, "epoch": 0.3705069124423963, "percentage": 18.53, "elapsed_time": "2:10:23", "remaining_time": "9:33:26"} +{"current_steps": 1609, "total_steps": 8680, "loss": 0.9194153547286987, "lr": 1.901629032204178e-06, "epoch": 0.3707373271889401, "percentage": 18.54, "elapsed_time": "2:10:27", "remaining_time": "9:33:20"} +{"current_steps": 1610, "total_steps": 8680, "loss": 0.8502616882324219, "lr": 1.9014641873742877e-06, "epoch": 0.3709677419354839, "percentage": 18.55, "elapsed_time": "2:10:33", "remaining_time": "9:33:17"} +{"current_steps": 1611, "total_steps": 8680, "loss": 0.8494570255279541, "lr": 1.9012992116980637e-06, "epoch": 0.37119815668202766, "percentage": 18.56, "elapsed_time": "2:10:36", "remaining_time": "9:33:08"} +{"current_steps": 1612, "total_steps": 8680, "loss": 0.8567800521850586, "lr": 1.9011341051994526e-06, "epoch": 0.37142857142857144, "percentage": 18.57, "elapsed_time": "2:10:42", "remaining_time": "9:33:06"} +{"current_steps": 1613, "total_steps": 8680, "loss": 0.7739682197570801, "lr": 1.9009688679024189e-06, "epoch": 0.3716589861751152, "percentage": 18.58, "elapsed_time": "2:10:48", "remaining_time": "9:33:04"} +{"current_steps": 1614, "total_steps": 8680, "loss": 0.8548814058303833, "lr": 1.900803499830947e-06, "epoch": 0.371889400921659, "percentage": 18.59, "elapsed_time": "2:10:52", "remaining_time": "9:32:58"} +{"current_steps": 1615, "total_steps": 8680, "loss": 0.7444359064102173, "lr": 1.9006380010090395e-06, "epoch": 0.3721198156682028, "percentage": 18.61, "elapsed_time": "2:10:58", "remaining_time": "9:32:57"} +{"current_steps": 1616, "total_steps": 8680, "loss": 1.0483827590942383, "lr": 1.9004723714607183e-06, "epoch": 0.3723502304147465, "percentage": 18.62, "elapsed_time": "2:11:01", "remaining_time": "9:32:45"} +{"current_steps": 1617, "total_steps": 8680, "loss": 0.7734435200691223, "lr": 1.9003066112100248e-06, "epoch": 0.3725806451612903, "percentage": 18.63, "elapsed_time": "2:11:06", "remaining_time": "9:32:41"} +{"current_steps": 1618, "total_steps": 8680, "loss": 0.856806755065918, "lr": 1.9001407202810181e-06, "epoch": 0.3728110599078341, "percentage": 18.64, "elapsed_time": "2:11:13", "remaining_time": "9:32:45"} +{"current_steps": 1619, "total_steps": 8680, "loss": 0.8708832263946533, "lr": 1.8999746986977776e-06, "epoch": 0.37304147465437787, "percentage": 18.65, "elapsed_time": "2:11:19", "remaining_time": "9:32:43"} +{"current_steps": 1620, "total_steps": 8680, "loss": 0.9295653104782104, "lr": 1.899808546484401e-06, "epoch": 0.37327188940092165, "percentage": 18.66, "elapsed_time": "2:11:24", "remaining_time": "9:32:38"} +{"current_steps": 1621, "total_steps": 8680, "loss": 0.8799598217010498, "lr": 1.8996422636650054e-06, "epoch": 0.37350230414746544, "percentage": 18.68, "elapsed_time": "2:11:28", "remaining_time": "9:32:34"} +{"current_steps": 1622, "total_steps": 8680, "loss": 0.8014140725135803, "lr": 1.8994758502637259e-06, "epoch": 0.3737327188940092, "percentage": 18.69, "elapsed_time": "2:11:34", "remaining_time": "9:32:31"} +{"current_steps": 1623, "total_steps": 8680, "loss": 0.8252615928649902, "lr": 1.8993093063047174e-06, "epoch": 0.373963133640553, "percentage": 18.7, "elapsed_time": "2:11:38", "remaining_time": "9:32:23"} +{"current_steps": 1624, "total_steps": 8680, "loss": 0.8617361783981323, "lr": 1.899142631812154e-06, "epoch": 0.3741935483870968, "percentage": 18.71, "elapsed_time": "2:11:42", "remaining_time": "9:32:13"} +{"current_steps": 1625, "total_steps": 8680, "loss": 0.9316745400428772, "lr": 1.8989758268102274e-06, "epoch": 0.37442396313364057, "percentage": 18.72, "elapsed_time": "2:11:46", "remaining_time": "9:32:04"} +{"current_steps": 1626, "total_steps": 8680, "loss": 0.8195457458496094, "lr": 1.89880889132315e-06, "epoch": 0.37465437788018435, "percentage": 18.73, "elapsed_time": "2:11:51", "remaining_time": "9:32:00"} +{"current_steps": 1627, "total_steps": 8680, "loss": 0.7828787565231323, "lr": 1.8986418253751516e-06, "epoch": 0.37488479262672814, "percentage": 18.74, "elapsed_time": "2:11:54", "remaining_time": "9:31:50"} +{"current_steps": 1628, "total_steps": 8680, "loss": 0.8130955696105957, "lr": 1.898474628990482e-06, "epoch": 0.37511520737327186, "percentage": 18.76, "elapsed_time": "2:11:58", "remaining_time": "9:31:40"} +{"current_steps": 1629, "total_steps": 8680, "loss": 0.9925695657730103, "lr": 1.8983073021934097e-06, "epoch": 0.37534562211981565, "percentage": 18.77, "elapsed_time": "2:12:04", "remaining_time": "9:31:41"} +{"current_steps": 1630, "total_steps": 8680, "loss": 0.8547999858856201, "lr": 1.8981398450082216e-06, "epoch": 0.37557603686635943, "percentage": 18.78, "elapsed_time": "2:12:09", "remaining_time": "9:31:36"} +{"current_steps": 1631, "total_steps": 8680, "loss": 0.8922954797744751, "lr": 1.897972257459224e-06, "epoch": 0.3758064516129032, "percentage": 18.79, "elapsed_time": "2:12:14", "remaining_time": "9:31:30"} +{"current_steps": 1632, "total_steps": 8680, "loss": 0.8553646802902222, "lr": 1.8978045395707415e-06, "epoch": 0.376036866359447, "percentage": 18.8, "elapsed_time": "2:12:20", "remaining_time": "9:31:31"} +{"current_steps": 1633, "total_steps": 8680, "loss": 0.7854139804840088, "lr": 1.897636691367119e-06, "epoch": 0.3762672811059908, "percentage": 18.81, "elapsed_time": "2:12:26", "remaining_time": "9:31:31"} +{"current_steps": 1634, "total_steps": 8680, "loss": 0.8968626260757446, "lr": 1.897468712872719e-06, "epoch": 0.37649769585253456, "percentage": 18.82, "elapsed_time": "2:12:31", "remaining_time": "9:31:26"} +{"current_steps": 1635, "total_steps": 8680, "loss": 0.8898152112960815, "lr": 1.8973006041119234e-06, "epoch": 0.37672811059907835, "percentage": 18.84, "elapsed_time": "2:12:35", "remaining_time": "9:31:20"} +{"current_steps": 1636, "total_steps": 8680, "loss": 0.8499374389648438, "lr": 1.8971323651091332e-06, "epoch": 0.37695852534562213, "percentage": 18.85, "elapsed_time": "2:12:40", "remaining_time": "9:31:16"} +{"current_steps": 1637, "total_steps": 8680, "loss": 0.7803430557250977, "lr": 1.8969639958887677e-06, "epoch": 0.3771889400921659, "percentage": 18.86, "elapsed_time": "2:12:46", "remaining_time": "9:31:13"} +{"current_steps": 1638, "total_steps": 8680, "loss": 0.7669799327850342, "lr": 1.8967954964752657e-06, "epoch": 0.3774193548387097, "percentage": 18.87, "elapsed_time": "2:12:52", "remaining_time": "9:31:15"} +{"current_steps": 1639, "total_steps": 8680, "loss": 0.9085204601287842, "lr": 1.8966268668930845e-06, "epoch": 0.3776497695852535, "percentage": 18.88, "elapsed_time": "2:12:55", "remaining_time": "9:31:03"} +{"current_steps": 1640, "total_steps": 8680, "loss": 0.7793002724647522, "lr": 1.8964581071667005e-06, "epoch": 0.3778801843317972, "percentage": 18.89, "elapsed_time": "2:13:00", "remaining_time": "9:30:56"} +{"current_steps": 1641, "total_steps": 8680, "loss": 0.8649430274963379, "lr": 1.896289217320609e-06, "epoch": 0.378110599078341, "percentage": 18.91, "elapsed_time": "2:13:05", "remaining_time": "9:30:53"} +{"current_steps": 1642, "total_steps": 8680, "loss": 0.856898844242096, "lr": 1.8961201973793243e-06, "epoch": 0.3783410138248848, "percentage": 18.92, "elapsed_time": "2:13:09", "remaining_time": "9:30:46"} +{"current_steps": 1643, "total_steps": 8680, "loss": 0.8221957087516785, "lr": 1.895951047367379e-06, "epoch": 0.37857142857142856, "percentage": 18.93, "elapsed_time": "2:13:15", "remaining_time": "9:30:43"} +{"current_steps": 1644, "total_steps": 8680, "loss": 0.8158079385757446, "lr": 1.8957817673093256e-06, "epoch": 0.37880184331797234, "percentage": 18.94, "elapsed_time": "2:13:19", "remaining_time": "9:30:37"} +{"current_steps": 1645, "total_steps": 8680, "loss": 0.7803312540054321, "lr": 1.8956123572297343e-06, "epoch": 0.3790322580645161, "percentage": 18.95, "elapsed_time": "2:13:25", "remaining_time": "9:30:35"} +{"current_steps": 1646, "total_steps": 8680, "loss": 1.035685420036316, "lr": 1.8954428171531949e-06, "epoch": 0.3792626728110599, "percentage": 18.96, "elapsed_time": "2:13:30", "remaining_time": "9:30:31"} +{"current_steps": 1647, "total_steps": 8680, "loss": 0.6871123313903809, "lr": 1.8952731471043161e-06, "epoch": 0.3794930875576037, "percentage": 18.97, "elapsed_time": "2:13:37", "remaining_time": "9:30:34"} +{"current_steps": 1648, "total_steps": 8680, "loss": 0.9651780128479004, "lr": 1.8951033471077253e-06, "epoch": 0.3797235023041475, "percentage": 18.99, "elapsed_time": "2:13:41", "remaining_time": "9:30:26"} +{"current_steps": 1649, "total_steps": 8680, "loss": 1.018349528312683, "lr": 1.8949334171880687e-06, "epoch": 0.37995391705069126, "percentage": 19.0, "elapsed_time": "2:13:46", "remaining_time": "9:30:22"} +{"current_steps": 1650, "total_steps": 8680, "loss": 0.6839278936386108, "lr": 1.894763357370011e-06, "epoch": 0.38018433179723504, "percentage": 19.01, "elapsed_time": "2:13:50", "remaining_time": "9:30:16"} +{"current_steps": 1651, "total_steps": 8680, "loss": 0.8442174196243286, "lr": 1.894593167678237e-06, "epoch": 0.3804147465437788, "percentage": 19.02, "elapsed_time": "2:13:56", "remaining_time": "9:30:14"} +{"current_steps": 1652, "total_steps": 8680, "loss": 0.8224585056304932, "lr": 1.8944228481374484e-06, "epoch": 0.38064516129032255, "percentage": 19.03, "elapsed_time": "2:14:01", "remaining_time": "9:30:09"} +{"current_steps": 1653, "total_steps": 8680, "loss": 0.8570500612258911, "lr": 1.8942523987723678e-06, "epoch": 0.38087557603686634, "percentage": 19.04, "elapsed_time": "2:14:05", "remaining_time": "9:30:02"} +{"current_steps": 1654, "total_steps": 8680, "loss": 0.7696554660797119, "lr": 1.8940818196077354e-06, "epoch": 0.3811059907834101, "percentage": 19.06, "elapsed_time": "2:14:11", "remaining_time": "9:30:02"} +{"current_steps": 1655, "total_steps": 8680, "loss": 0.822563886642456, "lr": 1.8939111106683103e-06, "epoch": 0.3813364055299539, "percentage": 19.07, "elapsed_time": "2:14:16", "remaining_time": "9:29:58"} +{"current_steps": 1656, "total_steps": 8680, "loss": 0.6537219882011414, "lr": 1.8937402719788711e-06, "epoch": 0.3815668202764977, "percentage": 19.08, "elapsed_time": "2:14:24", "remaining_time": "9:30:04"} +{"current_steps": 1657, "total_steps": 8680, "loss": 0.9081932306289673, "lr": 1.8935693035642145e-06, "epoch": 0.38179723502304147, "percentage": 19.09, "elapsed_time": "2:14:27", "remaining_time": "9:29:55"} +{"current_steps": 1658, "total_steps": 8680, "loss": 0.6839661598205566, "lr": 1.8933982054491563e-06, "epoch": 0.38202764976958525, "percentage": 19.1, "elapsed_time": "2:14:33", "remaining_time": "9:29:52"} +{"current_steps": 1659, "total_steps": 8680, "loss": 0.9187283515930176, "lr": 1.8932269776585313e-06, "epoch": 0.38225806451612904, "percentage": 19.11, "elapsed_time": "2:14:38", "remaining_time": "9:29:47"} +{"current_steps": 1660, "total_steps": 8680, "loss": 0.9567047357559204, "lr": 1.893055620217193e-06, "epoch": 0.3824884792626728, "percentage": 19.12, "elapsed_time": "2:14:41", "remaining_time": "9:29:37"} +{"current_steps": 1661, "total_steps": 8680, "loss": 0.785561203956604, "lr": 1.8928841331500136e-06, "epoch": 0.3827188940092166, "percentage": 19.14, "elapsed_time": "2:14:48", "remaining_time": "9:29:38"} +{"current_steps": 1662, "total_steps": 8680, "loss": 0.8986088037490845, "lr": 1.8927125164818842e-06, "epoch": 0.3829493087557604, "percentage": 19.15, "elapsed_time": "2:14:53", "remaining_time": "9:29:36"} +{"current_steps": 1663, "total_steps": 8680, "loss": 1.0027087926864624, "lr": 1.892540770237715e-06, "epoch": 0.38317972350230417, "percentage": 19.16, "elapsed_time": "2:14:57", "remaining_time": "9:29:28"} +{"current_steps": 1664, "total_steps": 8680, "loss": 0.8502041697502136, "lr": 1.8923688944424346e-06, "epoch": 0.38341013824884795, "percentage": 19.17, "elapsed_time": "2:15:02", "remaining_time": "9:29:21"} +{"current_steps": 1665, "total_steps": 8680, "loss": 0.8526991605758667, "lr": 1.8921968891209907e-06, "epoch": 0.3836405529953917, "percentage": 19.18, "elapsed_time": "2:15:07", "remaining_time": "9:29:16"} +{"current_steps": 1666, "total_steps": 8680, "loss": 0.8084676265716553, "lr": 1.8920247542983492e-06, "epoch": 0.38387096774193546, "percentage": 19.19, "elapsed_time": "2:15:11", "remaining_time": "9:29:08"} +{"current_steps": 1667, "total_steps": 8680, "loss": 0.8922938704490662, "lr": 1.8918524899994957e-06, "epoch": 0.38410138248847925, "percentage": 19.21, "elapsed_time": "2:15:15", "remaining_time": "9:29:00"} +{"current_steps": 1668, "total_steps": 8680, "loss": 0.7965600490570068, "lr": 1.8916800962494337e-06, "epoch": 0.38433179723502303, "percentage": 19.22, "elapsed_time": "2:15:20", "remaining_time": "9:28:55"} +{"current_steps": 1669, "total_steps": 8680, "loss": 0.9505549073219299, "lr": 1.8915075730731865e-06, "epoch": 0.3845622119815668, "percentage": 19.23, "elapsed_time": "2:15:24", "remaining_time": "9:28:50"} +{"current_steps": 1670, "total_steps": 8680, "loss": 0.9459924697875977, "lr": 1.8913349204957947e-06, "epoch": 0.3847926267281106, "percentage": 19.24, "elapsed_time": "2:15:29", "remaining_time": "9:28:42"} +{"current_steps": 1671, "total_steps": 8680, "loss": 0.8433674573898315, "lr": 1.8911621385423195e-06, "epoch": 0.3850230414746544, "percentage": 19.25, "elapsed_time": "2:15:33", "remaining_time": "9:28:35"} +{"current_steps": 1672, "total_steps": 8680, "loss": 0.8945955038070679, "lr": 1.8909892272378398e-06, "epoch": 0.38525345622119817, "percentage": 19.26, "elapsed_time": "2:15:38", "remaining_time": "9:28:30"} +{"current_steps": 1673, "total_steps": 8680, "loss": 0.8580358624458313, "lr": 1.890816186607453e-06, "epoch": 0.38548387096774195, "percentage": 19.27, "elapsed_time": "2:15:42", "remaining_time": "9:28:22"} +{"current_steps": 1674, "total_steps": 8680, "loss": 0.7708698511123657, "lr": 1.8906430166762761e-06, "epoch": 0.38571428571428573, "percentage": 19.29, "elapsed_time": "2:15:46", "remaining_time": "9:28:13"} +{"current_steps": 1675, "total_steps": 8680, "loss": 0.8647153377532959, "lr": 1.8904697174694446e-06, "epoch": 0.3859447004608295, "percentage": 19.3, "elapsed_time": "2:15:50", "remaining_time": "9:28:07"} +{"current_steps": 1676, "total_steps": 8680, "loss": 0.9380506277084351, "lr": 1.890296289012112e-06, "epoch": 0.3861751152073733, "percentage": 19.31, "elapsed_time": "2:15:54", "remaining_time": "9:27:58"} +{"current_steps": 1677, "total_steps": 8680, "loss": 0.8814103603363037, "lr": 1.8901227313294519e-06, "epoch": 0.386405529953917, "percentage": 19.32, "elapsed_time": "2:15:59", "remaining_time": "9:27:54"} +{"current_steps": 1678, "total_steps": 8680, "loss": 0.9348419904708862, "lr": 1.8899490444466556e-06, "epoch": 0.3866359447004608, "percentage": 19.33, "elapsed_time": "2:16:04", "remaining_time": "9:27:48"} +{"current_steps": 1679, "total_steps": 8680, "loss": 0.7502046823501587, "lr": 1.8897752283889338e-06, "epoch": 0.3868663594470046, "percentage": 19.34, "elapsed_time": "2:16:08", "remaining_time": "9:27:42"} +{"current_steps": 1680, "total_steps": 8680, "loss": 0.8499769568443298, "lr": 1.8896012831815155e-06, "epoch": 0.3870967741935484, "percentage": 19.35, "elapsed_time": "2:16:14", "remaining_time": "9:27:40"} +{"current_steps": 1681, "total_steps": 8680, "loss": 0.8253993391990662, "lr": 1.8894272088496487e-06, "epoch": 0.38732718894009216, "percentage": 19.37, "elapsed_time": "2:16:19", "remaining_time": "9:27:34"} +{"current_steps": 1682, "total_steps": 8680, "loss": 0.8494073152542114, "lr": 1.8892530054185998e-06, "epoch": 0.38755760368663594, "percentage": 19.38, "elapsed_time": "2:16:24", "remaining_time": "9:27:30"} +{"current_steps": 1683, "total_steps": 8680, "loss": 0.8836106061935425, "lr": 1.8890786729136546e-06, "epoch": 0.3877880184331797, "percentage": 19.39, "elapsed_time": "2:16:29", "remaining_time": "9:27:27"} +{"current_steps": 1684, "total_steps": 8680, "loss": 0.8949145078659058, "lr": 1.8889042113601166e-06, "epoch": 0.3880184331797235, "percentage": 19.4, "elapsed_time": "2:16:35", "remaining_time": "9:27:25"} +{"current_steps": 1685, "total_steps": 8680, "loss": 0.6210965514183044, "lr": 1.8887296207833095e-06, "epoch": 0.3882488479262673, "percentage": 19.41, "elapsed_time": "2:16:40", "remaining_time": "9:27:23"} +{"current_steps": 1686, "total_steps": 8680, "loss": 0.9216527938842773, "lr": 1.8885549012085744e-06, "epoch": 0.3884792626728111, "percentage": 19.42, "elapsed_time": "2:16:45", "remaining_time": "9:27:16"} +{"current_steps": 1687, "total_steps": 8680, "loss": 0.9266358613967896, "lr": 1.8883800526612715e-06, "epoch": 0.38870967741935486, "percentage": 19.44, "elapsed_time": "2:16:49", "remaining_time": "9:27:09"} +{"current_steps": 1688, "total_steps": 8680, "loss": 0.8550606966018677, "lr": 1.88820507516678e-06, "epoch": 0.38894009216589864, "percentage": 19.45, "elapsed_time": "2:16:53", "remaining_time": "9:27:02"} +{"current_steps": 1689, "total_steps": 8680, "loss": 0.8632181882858276, "lr": 1.888029968750498e-06, "epoch": 0.38917050691244237, "percentage": 19.46, "elapsed_time": "2:16:58", "remaining_time": "9:26:58"} +{"current_steps": 1690, "total_steps": 8680, "loss": 0.8795493841171265, "lr": 1.8878547334378415e-06, "epoch": 0.38940092165898615, "percentage": 19.47, "elapsed_time": "2:17:04", "remaining_time": "9:26:55"} +{"current_steps": 1691, "total_steps": 8680, "loss": 0.9750456809997559, "lr": 1.8876793692542456e-06, "epoch": 0.38963133640552994, "percentage": 19.48, "elapsed_time": "2:17:07", "remaining_time": "9:26:45"} +{"current_steps": 1692, "total_steps": 8680, "loss": 0.9270161390304565, "lr": 1.8875038762251645e-06, "epoch": 0.3898617511520737, "percentage": 19.49, "elapsed_time": "2:17:12", "remaining_time": "9:26:42"} +{"current_steps": 1693, "total_steps": 8680, "loss": 0.8154089450836182, "lr": 1.8873282543760705e-06, "epoch": 0.3900921658986175, "percentage": 19.5, "elapsed_time": "2:17:16", "remaining_time": "9:26:32"} +{"current_steps": 1694, "total_steps": 8680, "loss": 0.9245043992996216, "lr": 1.887152503732455e-06, "epoch": 0.3903225806451613, "percentage": 19.52, "elapsed_time": "2:17:20", "remaining_time": "9:26:23"} +{"current_steps": 1695, "total_steps": 8680, "loss": 0.9218056201934814, "lr": 1.8869766243198284e-06, "epoch": 0.39055299539170507, "percentage": 19.53, "elapsed_time": "2:17:24", "remaining_time": "9:26:15"} +{"current_steps": 1696, "total_steps": 8680, "loss": 0.7753894329071045, "lr": 1.8868006161637192e-06, "epoch": 0.39078341013824885, "percentage": 19.54, "elapsed_time": "2:17:29", "remaining_time": "9:26:09"} +{"current_steps": 1697, "total_steps": 8680, "loss": 0.8455277681350708, "lr": 1.8866244792896739e-06, "epoch": 0.39101382488479264, "percentage": 19.55, "elapsed_time": "2:17:32", "remaining_time": "9:26:00"} +{"current_steps": 1698, "total_steps": 8680, "loss": 0.8301571607589722, "lr": 1.8864482137232596e-06, "epoch": 0.3912442396313364, "percentage": 19.56, "elapsed_time": "2:17:38", "remaining_time": "9:25:56"} +{"current_steps": 1699, "total_steps": 8680, "loss": 0.9768285155296326, "lr": 1.8862718194900602e-06, "epoch": 0.3914746543778802, "percentage": 19.57, "elapsed_time": "2:17:42", "remaining_time": "9:25:49"} +{"current_steps": 1700, "total_steps": 8680, "loss": 0.9659395217895508, "lr": 1.8860952966156798e-06, "epoch": 0.391705069124424, "percentage": 19.59, "elapsed_time": "2:17:47", "remaining_time": "9:25:43"} +{"current_steps": 1701, "total_steps": 8680, "loss": 0.9975444078445435, "lr": 1.8859186451257401e-06, "epoch": 0.3919354838709677, "percentage": 19.6, "elapsed_time": "2:17:53", "remaining_time": "9:25:43"} +{"current_steps": 1702, "total_steps": 8680, "loss": 0.9248796701431274, "lr": 1.8857418650458816e-06, "epoch": 0.3921658986175115, "percentage": 19.61, "elapsed_time": "2:17:59", "remaining_time": "9:25:46"} +{"current_steps": 1703, "total_steps": 8680, "loss": 0.8792428970336914, "lr": 1.8855649564017642e-06, "epoch": 0.3923963133640553, "percentage": 19.62, "elapsed_time": "2:18:05", "remaining_time": "9:25:45"} +{"current_steps": 1704, "total_steps": 8680, "loss": 0.8387417197227478, "lr": 1.8853879192190657e-06, "epoch": 0.39262672811059907, "percentage": 19.63, "elapsed_time": "2:18:10", "remaining_time": "9:25:39"} +{"current_steps": 1705, "total_steps": 8680, "loss": 0.7020218372344971, "lr": 1.8852107535234828e-06, "epoch": 0.39285714285714285, "percentage": 19.64, "elapsed_time": "2:18:15", "remaining_time": "9:25:36"} +{"current_steps": 1706, "total_steps": 8680, "loss": 0.7388321161270142, "lr": 1.885033459340731e-06, "epoch": 0.39308755760368663, "percentage": 19.65, "elapsed_time": "2:18:21", "remaining_time": "9:25:35"} +{"current_steps": 1707, "total_steps": 8680, "loss": 0.7536240220069885, "lr": 1.8848560366965441e-06, "epoch": 0.3933179723502304, "percentage": 19.67, "elapsed_time": "2:18:25", "remaining_time": "9:25:27"} +{"current_steps": 1708, "total_steps": 8680, "loss": 0.747667670249939, "lr": 1.8846784856166746e-06, "epoch": 0.3935483870967742, "percentage": 19.68, "elapsed_time": "2:18:30", "remaining_time": "9:25:21"} +{"current_steps": 1709, "total_steps": 8680, "loss": 0.8068975210189819, "lr": 1.8845008061268945e-06, "epoch": 0.393778801843318, "percentage": 19.69, "elapsed_time": "2:18:35", "remaining_time": "9:25:17"} +{"current_steps": 1710, "total_steps": 8680, "loss": 0.7613410949707031, "lr": 1.8843229982529932e-06, "epoch": 0.39400921658986177, "percentage": 19.7, "elapsed_time": "2:18:40", "remaining_time": "9:25:13"} +{"current_steps": 1711, "total_steps": 8680, "loss": 0.8579158782958984, "lr": 1.8841450620207793e-06, "epoch": 0.39423963133640555, "percentage": 19.71, "elapsed_time": "2:18:45", "remaining_time": "9:25:09"} +{"current_steps": 1712, "total_steps": 8680, "loss": 0.8754673004150391, "lr": 1.88396699745608e-06, "epoch": 0.39447004608294933, "percentage": 19.72, "elapsed_time": "2:18:49", "remaining_time": "9:25:01"} +{"current_steps": 1713, "total_steps": 8680, "loss": 0.7988177537918091, "lr": 1.8837888045847415e-06, "epoch": 0.39470046082949306, "percentage": 19.74, "elapsed_time": "2:18:53", "remaining_time": "9:24:53"} +{"current_steps": 1714, "total_steps": 8680, "loss": 0.8658367395401001, "lr": 1.8836104834326279e-06, "epoch": 0.39493087557603684, "percentage": 19.75, "elapsed_time": "2:18:59", "remaining_time": "9:24:51"} +{"current_steps": 1715, "total_steps": 8680, "loss": 0.8777489066123962, "lr": 1.8834320340256223e-06, "epoch": 0.3951612903225806, "percentage": 19.76, "elapsed_time": "2:19:03", "remaining_time": "9:24:43"} +{"current_steps": 1716, "total_steps": 8680, "loss": 0.9785901308059692, "lr": 1.8832534563896264e-06, "epoch": 0.3953917050691244, "percentage": 19.77, "elapsed_time": "2:19:07", "remaining_time": "9:24:35"} +{"current_steps": 1717, "total_steps": 8680, "loss": 0.847503125667572, "lr": 1.883074750550561e-06, "epoch": 0.3956221198156682, "percentage": 19.78, "elapsed_time": "2:19:12", "remaining_time": "9:24:30"} +{"current_steps": 1718, "total_steps": 8680, "loss": 1.0159538984298706, "lr": 1.8828959165343643e-06, "epoch": 0.395852534562212, "percentage": 19.79, "elapsed_time": "2:19:15", "remaining_time": "9:24:21"} +{"current_steps": 1719, "total_steps": 8680, "loss": 0.9064888954162598, "lr": 1.882716954366994e-06, "epoch": 0.39608294930875576, "percentage": 19.8, "elapsed_time": "2:19:21", "remaining_time": "9:24:17"} +{"current_steps": 1720, "total_steps": 8680, "loss": 0.956849217414856, "lr": 1.8825378640744264e-06, "epoch": 0.39631336405529954, "percentage": 19.82, "elapsed_time": "2:19:25", "remaining_time": "9:24:11"} +{"current_steps": 1721, "total_steps": 8680, "loss": 0.8983441591262817, "lr": 1.882358645682656e-06, "epoch": 0.3965437788018433, "percentage": 19.83, "elapsed_time": "2:19:30", "remaining_time": "9:24:07"} +{"current_steps": 1722, "total_steps": 8680, "loss": 0.7698956727981567, "lr": 1.8821792992176967e-06, "epoch": 0.3967741935483871, "percentage": 19.84, "elapsed_time": "2:19:35", "remaining_time": "9:24:01"} +{"current_steps": 1723, "total_steps": 8680, "loss": 0.9376351833343506, "lr": 1.8819998247055797e-06, "epoch": 0.3970046082949309, "percentage": 19.85, "elapsed_time": "2:19:39", "remaining_time": "9:23:52"} +{"current_steps": 1724, "total_steps": 8680, "loss": 0.8776079416275024, "lr": 1.881820222172356e-06, "epoch": 0.3972350230414747, "percentage": 19.86, "elapsed_time": "2:19:44", "remaining_time": "9:23:50"} +{"current_steps": 1725, "total_steps": 8680, "loss": 0.9776726961135864, "lr": 1.8816404916440942e-06, "epoch": 0.39746543778801846, "percentage": 19.87, "elapsed_time": "2:19:49", "remaining_time": "9:23:44"} +{"current_steps": 1726, "total_steps": 8680, "loss": 0.7699686288833618, "lr": 1.8814606331468822e-06, "epoch": 0.3976958525345622, "percentage": 19.88, "elapsed_time": "2:19:53", "remaining_time": "9:23:38"} +{"current_steps": 1727, "total_steps": 8680, "loss": 0.8256866931915283, "lr": 1.8812806467068265e-06, "epoch": 0.39792626728110597, "percentage": 19.9, "elapsed_time": "2:19:59", "remaining_time": "9:23:35"} +{"current_steps": 1728, "total_steps": 8680, "loss": 0.8493847846984863, "lr": 1.881100532350051e-06, "epoch": 0.39815668202764976, "percentage": 19.91, "elapsed_time": "2:20:02", "remaining_time": "9:23:26"} +{"current_steps": 1729, "total_steps": 8680, "loss": 0.8138688802719116, "lr": 1.8809202901027002e-06, "epoch": 0.39838709677419354, "percentage": 19.92, "elapsed_time": "2:20:08", "remaining_time": "9:23:23"} +{"current_steps": 1730, "total_steps": 8680, "loss": 0.8637882471084595, "lr": 1.880739919990935e-06, "epoch": 0.3986175115207373, "percentage": 19.93, "elapsed_time": "2:20:13", "remaining_time": "9:23:21"} +{"current_steps": 1731, "total_steps": 8680, "loss": 0.8988152742385864, "lr": 1.880559422040937e-06, "epoch": 0.3988479262672811, "percentage": 19.94, "elapsed_time": "2:20:17", "remaining_time": "9:23:12"} +{"current_steps": 1732, "total_steps": 8680, "loss": 0.8247279524803162, "lr": 1.880378796278904e-06, "epoch": 0.3990783410138249, "percentage": 19.95, "elapsed_time": "2:20:21", "remaining_time": "9:23:05"} +{"current_steps": 1733, "total_steps": 8680, "loss": 0.9699070453643799, "lr": 1.8801980427310546e-06, "epoch": 0.39930875576036867, "percentage": 19.97, "elapsed_time": "2:20:25", "remaining_time": "9:22:56"} +{"current_steps": 1734, "total_steps": 8680, "loss": 0.9516465663909912, "lr": 1.8800171614236241e-06, "epoch": 0.39953917050691246, "percentage": 19.98, "elapsed_time": "2:20:30", "remaining_time": "9:22:49"} +{"current_steps": 1735, "total_steps": 8680, "loss": 0.9553602933883667, "lr": 1.879836152382868e-06, "epoch": 0.39976958525345624, "percentage": 19.99, "elapsed_time": "2:20:35", "remaining_time": "9:22:46"} +{"current_steps": 1736, "total_steps": 8680, "loss": 0.7805094718933105, "lr": 1.879655015635059e-06, "epoch": 0.4, "percentage": 20.0, "elapsed_time": "2:20:39", "remaining_time": "9:22:36"} +{"current_steps": 1737, "total_steps": 8680, "loss": 0.9509962797164917, "lr": 1.8794737512064888e-06, "epoch": 0.4002304147465438, "percentage": 20.01, "elapsed_time": "2:20:43", "remaining_time": "9:22:30"} +{"current_steps": 1738, "total_steps": 8680, "loss": 0.8663454055786133, "lr": 1.8792923591234683e-06, "epoch": 0.40046082949308753, "percentage": 20.02, "elapsed_time": "2:20:48", "remaining_time": "9:22:27"} +{"current_steps": 1739, "total_steps": 8680, "loss": 0.8773336410522461, "lr": 1.8791108394123257e-06, "epoch": 0.4006912442396313, "percentage": 20.03, "elapsed_time": "2:20:53", "remaining_time": "9:22:22"} +{"current_steps": 1740, "total_steps": 8680, "loss": 0.7201284766197205, "lr": 1.8789291920994086e-06, "epoch": 0.4009216589861751, "percentage": 20.05, "elapsed_time": "2:21:00", "remaining_time": "9:22:23"} +{"current_steps": 1741, "total_steps": 8680, "loss": 0.799161434173584, "lr": 1.8787474172110826e-06, "epoch": 0.4011520737327189, "percentage": 20.06, "elapsed_time": "2:21:06", "remaining_time": "9:22:25"} +{"current_steps": 1742, "total_steps": 8680, "loss": 0.8987375497817993, "lr": 1.8785655147737326e-06, "epoch": 0.40138248847926267, "percentage": 20.07, "elapsed_time": "2:21:11", "remaining_time": "9:22:20"} +{"current_steps": 1743, "total_steps": 8680, "loss": 0.8553296327590942, "lr": 1.878383484813761e-06, "epoch": 0.40161290322580645, "percentage": 20.08, "elapsed_time": "2:21:16", "remaining_time": "9:22:15"} +{"current_steps": 1744, "total_steps": 8680, "loss": 0.8376551270484924, "lr": 1.8782013273575895e-06, "epoch": 0.40184331797235023, "percentage": 20.09, "elapsed_time": "2:21:21", "remaining_time": "9:22:10"} +{"current_steps": 1745, "total_steps": 8680, "loss": 0.8220775723457336, "lr": 1.8780190424316578e-06, "epoch": 0.402073732718894, "percentage": 20.1, "elapsed_time": "2:21:26", "remaining_time": "9:22:05"} +{"current_steps": 1746, "total_steps": 8680, "loss": 0.8614820241928101, "lr": 1.8778366300624244e-06, "epoch": 0.4023041474654378, "percentage": 20.12, "elapsed_time": "2:21:31", "remaining_time": "9:22:01"} +{"current_steps": 1747, "total_steps": 8680, "loss": 0.9434851408004761, "lr": 1.8776540902763665e-06, "epoch": 0.4025345622119816, "percentage": 20.13, "elapsed_time": "2:21:34", "remaining_time": "9:21:51"} +{"current_steps": 1748, "total_steps": 8680, "loss": 0.8150373101234436, "lr": 1.877471423099979e-06, "epoch": 0.40276497695852537, "percentage": 20.14, "elapsed_time": "2:21:40", "remaining_time": "9:21:48"} +{"current_steps": 1749, "total_steps": 8680, "loss": 0.7660368084907532, "lr": 1.8772886285597762e-06, "epoch": 0.40299539170506915, "percentage": 20.15, "elapsed_time": "2:21:44", "remaining_time": "9:21:43"} +{"current_steps": 1750, "total_steps": 8680, "loss": 0.7647032141685486, "lr": 1.8771057066822903e-06, "epoch": 0.4032258064516129, "percentage": 20.16, "elapsed_time": "2:21:50", "remaining_time": "9:21:42"} +{"current_steps": 1751, "total_steps": 8680, "loss": 0.6034061908721924, "lr": 1.8769226574940723e-06, "epoch": 0.40345622119815666, "percentage": 20.17, "elapsed_time": "2:21:56", "remaining_time": "9:21:41"} +{"current_steps": 1752, "total_steps": 8680, "loss": 1.0062675476074219, "lr": 1.8767394810216914e-06, "epoch": 0.40368663594470044, "percentage": 20.18, "elapsed_time": "2:22:00", "remaining_time": "9:21:31"} +{"current_steps": 1753, "total_steps": 8680, "loss": 0.9791489839553833, "lr": 1.8765561772917354e-06, "epoch": 0.40391705069124423, "percentage": 20.2, "elapsed_time": "2:22:04", "remaining_time": "9:21:24"} +{"current_steps": 1754, "total_steps": 8680, "loss": 0.9054251909255981, "lr": 1.8763727463308108e-06, "epoch": 0.404147465437788, "percentage": 20.21, "elapsed_time": "2:22:09", "remaining_time": "9:21:18"} +{"current_steps": 1755, "total_steps": 8680, "loss": 0.9156093597412109, "lr": 1.8761891881655423e-06, "epoch": 0.4043778801843318, "percentage": 20.22, "elapsed_time": "2:22:12", "remaining_time": "9:21:08"} +{"current_steps": 1756, "total_steps": 8680, "loss": 0.7525647878646851, "lr": 1.876005502822573e-06, "epoch": 0.4046082949308756, "percentage": 20.23, "elapsed_time": "2:22:16", "remaining_time": "9:21:00"} +{"current_steps": 1757, "total_steps": 8680, "loss": 0.8321493864059448, "lr": 1.8758216903285643e-06, "epoch": 0.40483870967741936, "percentage": 20.24, "elapsed_time": "2:22:21", "remaining_time": "9:20:54"} +{"current_steps": 1758, "total_steps": 8680, "loss": 0.9937042593955994, "lr": 1.8756377507101973e-06, "epoch": 0.40506912442396314, "percentage": 20.25, "elapsed_time": "2:22:27", "remaining_time": "9:20:53"} +{"current_steps": 1759, "total_steps": 8680, "loss": 0.7001460790634155, "lr": 1.8754536839941694e-06, "epoch": 0.40529953917050693, "percentage": 20.26, "elapsed_time": "2:22:32", "remaining_time": "9:20:51"} +{"current_steps": 1760, "total_steps": 8680, "loss": 1.0062569379806519, "lr": 1.8752694902071986e-06, "epoch": 0.4055299539170507, "percentage": 20.28, "elapsed_time": "2:22:37", "remaining_time": "9:20:47"} +{"current_steps": 1761, "total_steps": 8680, "loss": 0.7414188385009766, "lr": 1.8750851693760199e-06, "epoch": 0.4057603686635945, "percentage": 20.29, "elapsed_time": "2:22:43", "remaining_time": "9:20:45"} +{"current_steps": 1762, "total_steps": 8680, "loss": 0.7181771397590637, "lr": 1.8749007215273873e-06, "epoch": 0.4059907834101382, "percentage": 20.3, "elapsed_time": "2:22:48", "remaining_time": "9:20:40"} +{"current_steps": 1763, "total_steps": 8680, "loss": 0.8797845244407654, "lr": 1.8747161466880732e-06, "epoch": 0.406221198156682, "percentage": 20.31, "elapsed_time": "2:22:52", "remaining_time": "9:20:35"} +{"current_steps": 1764, "total_steps": 8680, "loss": 0.7774960398674011, "lr": 1.8745314448848684e-06, "epoch": 0.4064516129032258, "percentage": 20.32, "elapsed_time": "2:22:57", "remaining_time": "9:20:27"} +{"current_steps": 1765, "total_steps": 8680, "loss": 0.8499422073364258, "lr": 1.874346616144582e-06, "epoch": 0.4066820276497696, "percentage": 20.33, "elapsed_time": "2:23:01", "remaining_time": "9:20:21"} +{"current_steps": 1766, "total_steps": 8680, "loss": 0.7070250511169434, "lr": 1.874161660494042e-06, "epoch": 0.40691244239631336, "percentage": 20.35, "elapsed_time": "2:23:05", "remaining_time": "9:20:12"} +{"current_steps": 1767, "total_steps": 8680, "loss": 0.8009281158447266, "lr": 1.8739765779600939e-06, "epoch": 0.40714285714285714, "percentage": 20.36, "elapsed_time": "2:23:10", "remaining_time": "9:20:08"} +{"current_steps": 1768, "total_steps": 8680, "loss": 0.6866155862808228, "lr": 1.8737913685696027e-06, "epoch": 0.4073732718894009, "percentage": 20.37, "elapsed_time": "2:23:15", "remaining_time": "9:20:05"} +{"current_steps": 1769, "total_steps": 8680, "loss": 0.8200059533119202, "lr": 1.873606032349451e-06, "epoch": 0.4076036866359447, "percentage": 20.38, "elapsed_time": "2:23:19", "remaining_time": "9:19:57"} +{"current_steps": 1770, "total_steps": 8680, "loss": 0.8413814902305603, "lr": 1.8734205693265404e-06, "epoch": 0.4078341013824885, "percentage": 20.39, "elapsed_time": "2:23:25", "remaining_time": "9:19:53"} +{"current_steps": 1771, "total_steps": 8680, "loss": 0.9935271143913269, "lr": 1.8732349795277903e-06, "epoch": 0.4080645161290323, "percentage": 20.4, "elapsed_time": "2:23:29", "remaining_time": "9:19:47"} +{"current_steps": 1772, "total_steps": 8680, "loss": 0.8718058466911316, "lr": 1.873049262980139e-06, "epoch": 0.40829493087557606, "percentage": 20.41, "elapsed_time": "2:23:35", "remaining_time": "9:19:46"} +{"current_steps": 1773, "total_steps": 8680, "loss": 0.9009358882904053, "lr": 1.8728634197105428e-06, "epoch": 0.40852534562211984, "percentage": 20.43, "elapsed_time": "2:23:39", "remaining_time": "9:19:39"} +{"current_steps": 1774, "total_steps": 8680, "loss": 0.9128156900405884, "lr": 1.8726774497459768e-06, "epoch": 0.40875576036866357, "percentage": 20.44, "elapsed_time": "2:23:45", "remaining_time": "9:19:37"} +{"current_steps": 1775, "total_steps": 8680, "loss": 0.8524078130722046, "lr": 1.8724913531134342e-06, "epoch": 0.40898617511520735, "percentage": 20.45, "elapsed_time": "2:23:50", "remaining_time": "9:19:34"} +{"current_steps": 1776, "total_steps": 8680, "loss": 0.9431420564651489, "lr": 1.872305129839927e-06, "epoch": 0.40921658986175113, "percentage": 20.46, "elapsed_time": "2:23:55", "remaining_time": "9:19:29"} +{"current_steps": 1777, "total_steps": 8680, "loss": 0.7666694521903992, "lr": 1.8721187799524846e-06, "epoch": 0.4094470046082949, "percentage": 20.47, "elapsed_time": "2:24:00", "remaining_time": "9:19:26"} +{"current_steps": 1778, "total_steps": 8680, "loss": 0.872551679611206, "lr": 1.871932303478156e-06, "epoch": 0.4096774193548387, "percentage": 20.48, "elapsed_time": "2:24:06", "remaining_time": "9:19:23"} +{"current_steps": 1779, "total_steps": 8680, "loss": 0.7596250176429749, "lr": 1.8717457004440079e-06, "epoch": 0.4099078341013825, "percentage": 20.5, "elapsed_time": "2:24:11", "remaining_time": "9:19:19"} +{"current_steps": 1780, "total_steps": 8680, "loss": 1.0098414421081543, "lr": 1.8715589708771253e-06, "epoch": 0.41013824884792627, "percentage": 20.51, "elapsed_time": "2:24:15", "remaining_time": "9:19:12"} +{"current_steps": 1781, "total_steps": 8680, "loss": 0.9961523413658142, "lr": 1.871372114804612e-06, "epoch": 0.41036866359447005, "percentage": 20.52, "elapsed_time": "2:24:21", "remaining_time": "9:19:11"} +{"current_steps": 1782, "total_steps": 8680, "loss": 0.9065390825271606, "lr": 1.8711851322535896e-06, "epoch": 0.41059907834101383, "percentage": 20.53, "elapsed_time": "2:24:26", "remaining_time": "9:19:06"} +{"current_steps": 1783, "total_steps": 8680, "loss": 0.7906428575515747, "lr": 1.8709980232511987e-06, "epoch": 0.4108294930875576, "percentage": 20.54, "elapsed_time": "2:24:31", "remaining_time": "9:19:04"} +{"current_steps": 1784, "total_steps": 8680, "loss": 0.798285722732544, "lr": 1.8708107878245976e-06, "epoch": 0.4110599078341014, "percentage": 20.55, "elapsed_time": "2:24:36", "remaining_time": "9:18:57"} +{"current_steps": 1785, "total_steps": 8680, "loss": 0.7809790372848511, "lr": 1.870623426000964e-06, "epoch": 0.4112903225806452, "percentage": 20.56, "elapsed_time": "2:24:40", "remaining_time": "9:18:52"} +{"current_steps": 1786, "total_steps": 8680, "loss": 0.8931630849838257, "lr": 1.8704359378074921e-06, "epoch": 0.4115207373271889, "percentage": 20.58, "elapsed_time": "2:24:44", "remaining_time": "9:18:43"} +{"current_steps": 1787, "total_steps": 8680, "loss": 0.8219889402389526, "lr": 1.870248323271396e-06, "epoch": 0.4117511520737327, "percentage": 20.59, "elapsed_time": "2:24:49", "remaining_time": "9:18:38"} +{"current_steps": 1788, "total_steps": 8680, "loss": 0.8371819257736206, "lr": 1.8700605824199084e-06, "epoch": 0.4119815668202765, "percentage": 20.6, "elapsed_time": "2:24:53", "remaining_time": "9:18:29"} +{"current_steps": 1789, "total_steps": 8680, "loss": 0.951171875, "lr": 1.8698727152802789e-06, "epoch": 0.41221198156682026, "percentage": 20.61, "elapsed_time": "2:24:58", "remaining_time": "9:18:25"} +{"current_steps": 1790, "total_steps": 8680, "loss": 0.7678385972976685, "lr": 1.8696847218797763e-06, "epoch": 0.41244239631336405, "percentage": 20.62, "elapsed_time": "2:25:03", "remaining_time": "9:18:22"} +{"current_steps": 1791, "total_steps": 8680, "loss": 0.9296993017196655, "lr": 1.8694966022456872e-06, "epoch": 0.41267281105990783, "percentage": 20.63, "elapsed_time": "2:25:07", "remaining_time": "9:18:14"} +{"current_steps": 1792, "total_steps": 8680, "loss": 0.8991763591766357, "lr": 1.8693083564053178e-06, "epoch": 0.4129032258064516, "percentage": 20.65, "elapsed_time": "2:25:13", "remaining_time": "9:18:12"} +{"current_steps": 1793, "total_steps": 8680, "loss": 0.8332901000976562, "lr": 1.8691199843859913e-06, "epoch": 0.4131336405529954, "percentage": 20.66, "elapsed_time": "2:25:19", "remaining_time": "9:18:11"} +{"current_steps": 1794, "total_steps": 8680, "loss": 0.7723548412322998, "lr": 1.8689314862150497e-06, "epoch": 0.4133640552995392, "percentage": 20.67, "elapsed_time": "2:25:24", "remaining_time": "9:18:07"} +{"current_steps": 1795, "total_steps": 8680, "loss": 0.7768993377685547, "lr": 1.868742861919853e-06, "epoch": 0.41359447004608296, "percentage": 20.68, "elapsed_time": "2:25:29", "remaining_time": "9:18:02"} +{"current_steps": 1796, "total_steps": 8680, "loss": 0.6058932542800903, "lr": 1.86855411152778e-06, "epoch": 0.41382488479262675, "percentage": 20.69, "elapsed_time": "2:25:37", "remaining_time": "9:18:08"} +{"current_steps": 1797, "total_steps": 8680, "loss": 0.8711605072021484, "lr": 1.8683652350662274e-06, "epoch": 0.41405529953917053, "percentage": 20.7, "elapsed_time": "2:25:41", "remaining_time": "9:18:02"} +{"current_steps": 1798, "total_steps": 8680, "loss": 0.9023469090461731, "lr": 1.8681762325626104e-06, "epoch": 0.4142857142857143, "percentage": 20.71, "elapsed_time": "2:25:46", "remaining_time": "9:17:58"} +{"current_steps": 1799, "total_steps": 8680, "loss": 0.7735910415649414, "lr": 1.867987104044363e-06, "epoch": 0.41451612903225804, "percentage": 20.73, "elapsed_time": "2:25:50", "remaining_time": "9:17:51"} +{"current_steps": 1800, "total_steps": 8680, "loss": 0.6609020829200745, "lr": 1.8677978495389364e-06, "epoch": 0.4147465437788018, "percentage": 20.74, "elapsed_time": "2:25:56", "remaining_time": "9:17:48"} +{"current_steps": 1801, "total_steps": 8680, "loss": 0.7823291420936584, "lr": 1.8676084690738005e-06, "epoch": 0.4149769585253456, "percentage": 20.75, "elapsed_time": "2:26:02", "remaining_time": "9:17:47"} +{"current_steps": 1802, "total_steps": 8680, "loss": 0.9076563715934753, "lr": 1.867418962676444e-06, "epoch": 0.4152073732718894, "percentage": 20.76, "elapsed_time": "2:26:07", "remaining_time": "9:17:42"} +{"current_steps": 1803, "total_steps": 8680, "loss": 0.8645772933959961, "lr": 1.8672293303743735e-06, "epoch": 0.4154377880184332, "percentage": 20.77, "elapsed_time": "2:26:11", "remaining_time": "9:17:37"} +{"current_steps": 1804, "total_steps": 8680, "loss": 0.8071421384811401, "lr": 1.8670395721951135e-06, "epoch": 0.41566820276497696, "percentage": 20.78, "elapsed_time": "2:26:15", "remaining_time": "9:17:29"} +{"current_steps": 1805, "total_steps": 8680, "loss": 0.8459846377372742, "lr": 1.8668496881662077e-06, "epoch": 0.41589861751152074, "percentage": 20.79, "elapsed_time": "2:26:20", "remaining_time": "9:17:22"} +{"current_steps": 1806, "total_steps": 8680, "loss": 0.8467865586280823, "lr": 1.866659678315217e-06, "epoch": 0.4161290322580645, "percentage": 20.81, "elapsed_time": "2:26:25", "remaining_time": "9:17:20"} +{"current_steps": 1807, "total_steps": 8680, "loss": 0.8963291645050049, "lr": 1.8664695426697215e-06, "epoch": 0.4163594470046083, "percentage": 20.82, "elapsed_time": "2:26:32", "remaining_time": "9:17:21"} +{"current_steps": 1808, "total_steps": 8680, "loss": 0.7901826500892639, "lr": 1.8662792812573188e-06, "epoch": 0.4165898617511521, "percentage": 20.83, "elapsed_time": "2:26:37", "remaining_time": "9:17:16"} +{"current_steps": 1809, "total_steps": 8680, "loss": 0.807115912437439, "lr": 1.8660888941056252e-06, "epoch": 0.4168202764976959, "percentage": 20.84, "elapsed_time": "2:26:41", "remaining_time": "9:17:09"} +{"current_steps": 1810, "total_steps": 8680, "loss": 0.8439537286758423, "lr": 1.8658983812422753e-06, "epoch": 0.41705069124423966, "percentage": 20.85, "elapsed_time": "2:26:46", "remaining_time": "9:17:06"} +{"current_steps": 1811, "total_steps": 8680, "loss": 0.6920834183692932, "lr": 1.8657077426949214e-06, "epoch": 0.4172811059907834, "percentage": 20.86, "elapsed_time": "2:26:51", "remaining_time": "9:17:01"} +{"current_steps": 1812, "total_steps": 8680, "loss": 0.8712124824523926, "lr": 1.865516978491235e-06, "epoch": 0.41751152073732717, "percentage": 20.88, "elapsed_time": "2:26:55", "remaining_time": "9:16:51"} +{"current_steps": 1813, "total_steps": 8680, "loss": 0.7720927596092224, "lr": 1.865326088658905e-06, "epoch": 0.41774193548387095, "percentage": 20.89, "elapsed_time": "2:26:59", "remaining_time": "9:16:43"} +{"current_steps": 1814, "total_steps": 8680, "loss": 0.8003814220428467, "lr": 1.8651350732256386e-06, "epoch": 0.41797235023041474, "percentage": 20.9, "elapsed_time": "2:27:03", "remaining_time": "9:16:37"} +{"current_steps": 1815, "total_steps": 8680, "loss": 0.8999850749969482, "lr": 1.8649439322191616e-06, "epoch": 0.4182027649769585, "percentage": 20.91, "elapsed_time": "2:27:08", "remaining_time": "9:16:33"} +{"current_steps": 1816, "total_steps": 8680, "loss": 0.6752324104309082, "lr": 1.8647526656672179e-06, "epoch": 0.4184331797235023, "percentage": 20.92, "elapsed_time": "2:27:14", "remaining_time": "9:16:33"} +{"current_steps": 1817, "total_steps": 8680, "loss": 0.8521262407302856, "lr": 1.8645612735975696e-06, "epoch": 0.4186635944700461, "percentage": 20.93, "elapsed_time": "2:27:18", "remaining_time": "9:16:24"} +{"current_steps": 1818, "total_steps": 8680, "loss": 0.8813315629959106, "lr": 1.864369756037997e-06, "epoch": 0.41889400921658987, "percentage": 20.94, "elapsed_time": "2:27:22", "remaining_time": "9:16:15"} +{"current_steps": 1819, "total_steps": 8680, "loss": 0.8358273506164551, "lr": 1.8641781130162986e-06, "epoch": 0.41912442396313365, "percentage": 20.96, "elapsed_time": "2:27:27", "remaining_time": "9:16:10"} +{"current_steps": 1820, "total_steps": 8680, "loss": 0.8051023483276367, "lr": 1.863986344560291e-06, "epoch": 0.41935483870967744, "percentage": 20.97, "elapsed_time": "2:27:31", "remaining_time": "9:16:04"} +{"current_steps": 1821, "total_steps": 8680, "loss": 0.768791675567627, "lr": 1.863794450697809e-06, "epoch": 0.4195852534562212, "percentage": 20.98, "elapsed_time": "2:27:37", "remaining_time": "9:16:01"} +{"current_steps": 1822, "total_steps": 8680, "loss": 0.8420040607452393, "lr": 1.8636024314567065e-06, "epoch": 0.419815668202765, "percentage": 20.99, "elapsed_time": "2:27:40", "remaining_time": "9:15:51"} +{"current_steps": 1823, "total_steps": 8680, "loss": 0.7670450806617737, "lr": 1.8634102868648542e-06, "epoch": 0.42004608294930873, "percentage": 21.0, "elapsed_time": "2:27:45", "remaining_time": "9:15:45"} +{"current_steps": 1824, "total_steps": 8680, "loss": 0.8292283415794373, "lr": 1.863218016950142e-06, "epoch": 0.4202764976958525, "percentage": 21.01, "elapsed_time": "2:27:49", "remaining_time": "9:15:39"} +{"current_steps": 1825, "total_steps": 8680, "loss": 0.8005781769752502, "lr": 1.8630256217404767e-06, "epoch": 0.4205069124423963, "percentage": 21.03, "elapsed_time": "2:27:55", "remaining_time": "9:15:37"} +{"current_steps": 1826, "total_steps": 8680, "loss": 0.8214897513389587, "lr": 1.8628331012637854e-06, "epoch": 0.4207373271889401, "percentage": 21.04, "elapsed_time": "2:28:00", "remaining_time": "9:15:34"} +{"current_steps": 1827, "total_steps": 8680, "loss": 0.7938524484634399, "lr": 1.8626404555480118e-06, "epoch": 0.42096774193548386, "percentage": 21.05, "elapsed_time": "2:28:05", "remaining_time": "9:15:27"} +{"current_steps": 1828, "total_steps": 8680, "loss": 1.0047048330307007, "lr": 1.862447684621118e-06, "epoch": 0.42119815668202765, "percentage": 21.06, "elapsed_time": "2:28:08", "remaining_time": "9:15:18"} +{"current_steps": 1829, "total_steps": 8680, "loss": 0.7660601139068604, "lr": 1.862254788511084e-06, "epoch": 0.42142857142857143, "percentage": 21.07, "elapsed_time": "2:28:15", "remaining_time": "9:15:19"} +{"current_steps": 1830, "total_steps": 8680, "loss": 0.8227912783622742, "lr": 1.8620617672459096e-06, "epoch": 0.4216589861751152, "percentage": 21.08, "elapsed_time": "2:28:19", "remaining_time": "9:15:13"} +{"current_steps": 1831, "total_steps": 8680, "loss": 0.8570956587791443, "lr": 1.8618686208536106e-06, "epoch": 0.421889400921659, "percentage": 21.09, "elapsed_time": "2:28:23", "remaining_time": "9:15:05"} +{"current_steps": 1832, "total_steps": 8680, "loss": 0.7472532987594604, "lr": 1.8616753493622221e-06, "epoch": 0.4221198156682028, "percentage": 21.11, "elapsed_time": "2:28:28", "remaining_time": "9:14:59"} +{"current_steps": 1833, "total_steps": 8680, "loss": 0.812872052192688, "lr": 1.8614819527997976e-06, "epoch": 0.42235023041474656, "percentage": 21.12, "elapsed_time": "2:28:32", "remaining_time": "9:14:50"} +{"current_steps": 1834, "total_steps": 8680, "loss": 0.7801386117935181, "lr": 1.861288431194408e-06, "epoch": 0.42258064516129035, "percentage": 21.13, "elapsed_time": "2:28:36", "remaining_time": "9:14:43"} +{"current_steps": 1835, "total_steps": 8680, "loss": 0.7834687829017639, "lr": 1.8610947845741426e-06, "epoch": 0.4228110599078341, "percentage": 21.14, "elapsed_time": "2:28:41", "remaining_time": "9:14:37"} +{"current_steps": 1836, "total_steps": 8680, "loss": 0.786865234375, "lr": 1.8609010129671097e-06, "epoch": 0.42304147465437786, "percentage": 21.15, "elapsed_time": "2:28:45", "remaining_time": "9:14:31"} +{"current_steps": 1837, "total_steps": 8680, "loss": 0.7728738784790039, "lr": 1.860707116401434e-06, "epoch": 0.42327188940092164, "percentage": 21.16, "elapsed_time": "2:28:50", "remaining_time": "9:14:27"} +{"current_steps": 1838, "total_steps": 8680, "loss": 0.6508793830871582, "lr": 1.8605130949052598e-06, "epoch": 0.4235023041474654, "percentage": 21.18, "elapsed_time": "2:28:57", "remaining_time": "9:14:28"} +{"current_steps": 1839, "total_steps": 8680, "loss": 0.7949484586715698, "lr": 1.8603189485067492e-06, "epoch": 0.4237327188940092, "percentage": 21.19, "elapsed_time": "2:29:02", "remaining_time": "9:14:24"} +{"current_steps": 1840, "total_steps": 8680, "loss": 0.7151408195495605, "lr": 1.8601246772340822e-06, "epoch": 0.423963133640553, "percentage": 21.2, "elapsed_time": "2:29:06", "remaining_time": "9:14:19"} +{"current_steps": 1841, "total_steps": 8680, "loss": 0.7678598165512085, "lr": 1.859930281115457e-06, "epoch": 0.4241935483870968, "percentage": 21.21, "elapsed_time": "2:29:12", "remaining_time": "9:14:15"} +{"current_steps": 1842, "total_steps": 8680, "loss": 0.8042058944702148, "lr": 1.8597357601790895e-06, "epoch": 0.42442396313364056, "percentage": 21.22, "elapsed_time": "2:29:17", "remaining_time": "9:14:12"} +{"current_steps": 1843, "total_steps": 8680, "loss": 0.7328081130981445, "lr": 1.859541114453215e-06, "epoch": 0.42465437788018434, "percentage": 21.23, "elapsed_time": "2:29:23", "remaining_time": "9:14:11"} +{"current_steps": 1844, "total_steps": 8680, "loss": 0.7646626234054565, "lr": 1.8593463439660853e-06, "epoch": 0.4248847926267281, "percentage": 21.24, "elapsed_time": "2:29:28", "remaining_time": "9:14:06"} +{"current_steps": 1845, "total_steps": 8680, "loss": 0.8965721726417542, "lr": 1.8591514487459717e-06, "epoch": 0.4251152073732719, "percentage": 21.26, "elapsed_time": "2:29:32", "remaining_time": "9:14:00"} +{"current_steps": 1846, "total_steps": 8680, "loss": 0.8892468810081482, "lr": 1.8589564288211623e-06, "epoch": 0.4253456221198157, "percentage": 21.27, "elapsed_time": "2:29:38", "remaining_time": "9:13:58"} +{"current_steps": 1847, "total_steps": 8680, "loss": 0.8314409255981445, "lr": 1.8587612842199648e-06, "epoch": 0.4255760368663594, "percentage": 21.28, "elapsed_time": "2:29:42", "remaining_time": "9:13:50"} +{"current_steps": 1848, "total_steps": 8680, "loss": 0.7713892459869385, "lr": 1.8585660149707034e-06, "epoch": 0.4258064516129032, "percentage": 21.29, "elapsed_time": "2:29:46", "remaining_time": "9:13:44"} +{"current_steps": 1849, "total_steps": 8680, "loss": 0.9349459409713745, "lr": 1.8583706211017216e-06, "epoch": 0.426036866359447, "percentage": 21.3, "elapsed_time": "2:29:51", "remaining_time": "9:13:40"} +{"current_steps": 1850, "total_steps": 8680, "loss": 0.8438700437545776, "lr": 1.8581751026413805e-06, "epoch": 0.42626728110599077, "percentage": 21.31, "elapsed_time": "2:29:56", "remaining_time": "9:13:35"} +{"current_steps": 1851, "total_steps": 8680, "loss": 0.9559776782989502, "lr": 1.8579794596180594e-06, "epoch": 0.42649769585253455, "percentage": 21.32, "elapsed_time": "2:30:01", "remaining_time": "9:13:29"} +{"current_steps": 1852, "total_steps": 8680, "loss": 0.7124872803688049, "lr": 1.8577836920601556e-06, "epoch": 0.42672811059907834, "percentage": 21.34, "elapsed_time": "2:30:07", "remaining_time": "9:13:28"} +{"current_steps": 1853, "total_steps": 8680, "loss": 0.7935503125190735, "lr": 1.8575877999960842e-06, "epoch": 0.4269585253456221, "percentage": 21.35, "elapsed_time": "2:30:12", "remaining_time": "9:13:24"} +{"current_steps": 1854, "total_steps": 8680, "loss": 0.9145890474319458, "lr": 1.8573917834542792e-06, "epoch": 0.4271889400921659, "percentage": 21.36, "elapsed_time": "2:30:16", "remaining_time": "9:13:18"} +{"current_steps": 1855, "total_steps": 8680, "loss": 0.8239228129386902, "lr": 1.8571956424631918e-06, "epoch": 0.4274193548387097, "percentage": 21.37, "elapsed_time": "2:30:21", "remaining_time": "9:13:13"} +{"current_steps": 1856, "total_steps": 8680, "loss": 0.8767688274383545, "lr": 1.8569993770512916e-06, "epoch": 0.42764976958525347, "percentage": 21.38, "elapsed_time": "2:30:28", "remaining_time": "9:13:13"} +{"current_steps": 1857, "total_steps": 8680, "loss": 0.7860859632492065, "lr": 1.8568029872470663e-06, "epoch": 0.42788018433179725, "percentage": 21.39, "elapsed_time": "2:30:32", "remaining_time": "9:13:07"} +{"current_steps": 1858, "total_steps": 8680, "loss": 0.8855729103088379, "lr": 1.8566064730790218e-06, "epoch": 0.42811059907834104, "percentage": 21.41, "elapsed_time": "2:30:37", "remaining_time": "9:13:02"} +{"current_steps": 1859, "total_steps": 8680, "loss": 1.023299217224121, "lr": 1.8564098345756815e-06, "epoch": 0.4283410138248848, "percentage": 21.42, "elapsed_time": "2:30:42", "remaining_time": "9:12:57"} +{"current_steps": 1860, "total_steps": 8680, "loss": 0.7665202617645264, "lr": 1.8562130717655878e-06, "epoch": 0.42857142857142855, "percentage": 21.43, "elapsed_time": "2:30:47", "remaining_time": "9:12:52"} +{"current_steps": 1861, "total_steps": 8680, "loss": 0.8456651568412781, "lr": 1.8560161846773e-06, "epoch": 0.42880184331797233, "percentage": 21.44, "elapsed_time": "2:30:51", "remaining_time": "9:12:46"} +{"current_steps": 1862, "total_steps": 8680, "loss": 0.8920061588287354, "lr": 1.8558191733393964e-06, "epoch": 0.4290322580645161, "percentage": 21.45, "elapsed_time": "2:30:57", "remaining_time": "9:12:45"} +{"current_steps": 1863, "total_steps": 8680, "loss": 0.8686853051185608, "lr": 1.8556220377804723e-06, "epoch": 0.4292626728110599, "percentage": 21.46, "elapsed_time": "2:31:01", "remaining_time": "9:12:37"} +{"current_steps": 1864, "total_steps": 8680, "loss": 0.6976242065429688, "lr": 1.8554247780291425e-06, "epoch": 0.4294930875576037, "percentage": 21.47, "elapsed_time": "2:31:08", "remaining_time": "9:12:38"} +{"current_steps": 1865, "total_steps": 8680, "loss": 0.9612032771110535, "lr": 1.8552273941140387e-06, "epoch": 0.42972350230414746, "percentage": 21.49, "elapsed_time": "2:31:11", "remaining_time": "9:12:29"} +{"current_steps": 1866, "total_steps": 8680, "loss": 0.9288003444671631, "lr": 1.8550298860638108e-06, "epoch": 0.42995391705069125, "percentage": 21.5, "elapsed_time": "2:31:16", "remaining_time": "9:12:25"} +{"current_steps": 1867, "total_steps": 8680, "loss": 0.8397525548934937, "lr": 1.8548322539071263e-06, "epoch": 0.43018433179723503, "percentage": 21.51, "elapsed_time": "2:31:22", "remaining_time": "9:12:22"} +{"current_steps": 1868, "total_steps": 8680, "loss": 0.6311365365982056, "lr": 1.8546344976726722e-06, "epoch": 0.4304147465437788, "percentage": 21.52, "elapsed_time": "2:31:28", "remaining_time": "9:12:21"} +{"current_steps": 1869, "total_steps": 8680, "loss": 0.7868270874023438, "lr": 1.8544366173891523e-06, "epoch": 0.4306451612903226, "percentage": 21.53, "elapsed_time": "2:31:32", "remaining_time": "9:12:13"} +{"current_steps": 1870, "total_steps": 8680, "loss": 0.9197052717208862, "lr": 1.8542386130852883e-06, "epoch": 0.4308755760368664, "percentage": 21.54, "elapsed_time": "2:31:37", "remaining_time": "9:12:10"} +{"current_steps": 1871, "total_steps": 8680, "loss": 0.7875635027885437, "lr": 1.8540404847898206e-06, "epoch": 0.43110599078341016, "percentage": 21.56, "elapsed_time": "2:31:43", "remaining_time": "9:12:09"} +{"current_steps": 1872, "total_steps": 8680, "loss": 0.9805077910423279, "lr": 1.853842232531507e-06, "epoch": 0.4313364055299539, "percentage": 21.57, "elapsed_time": "2:31:46", "remaining_time": "9:11:59"} +{"current_steps": 1873, "total_steps": 8680, "loss": 0.8906866312026978, "lr": 1.8536438563391236e-06, "epoch": 0.4315668202764977, "percentage": 21.58, "elapsed_time": "2:31:50", "remaining_time": "9:11:50"} +{"current_steps": 1874, "total_steps": 8680, "loss": 0.7506693601608276, "lr": 1.8534453562414649e-06, "epoch": 0.43179723502304146, "percentage": 21.59, "elapsed_time": "2:31:55", "remaining_time": "9:11:47"} +{"current_steps": 1875, "total_steps": 8680, "loss": 0.6173181533813477, "lr": 1.8532467322673422e-06, "epoch": 0.43202764976958524, "percentage": 21.6, "elapsed_time": "2:32:01", "remaining_time": "9:11:45"} +{"current_steps": 1876, "total_steps": 8680, "loss": 0.9217972755432129, "lr": 1.853047984445586e-06, "epoch": 0.432258064516129, "percentage": 21.61, "elapsed_time": "2:32:06", "remaining_time": "9:11:42"} +{"current_steps": 1877, "total_steps": 8680, "loss": 0.8300588130950928, "lr": 1.8528491128050442e-06, "epoch": 0.4324884792626728, "percentage": 21.62, "elapsed_time": "2:32:11", "remaining_time": "9:11:36"} +{"current_steps": 1878, "total_steps": 8680, "loss": 0.8109279870986938, "lr": 1.8526501173745826e-06, "epoch": 0.4327188940092166, "percentage": 21.64, "elapsed_time": "2:32:16", "remaining_time": "9:11:30"} +{"current_steps": 1879, "total_steps": 8680, "loss": 0.9243700504302979, "lr": 1.852450998183085e-06, "epoch": 0.4329493087557604, "percentage": 21.65, "elapsed_time": "2:32:21", "remaining_time": "9:11:26"} +{"current_steps": 1880, "total_steps": 8680, "loss": 0.7983531951904297, "lr": 1.8522517552594539e-06, "epoch": 0.43317972350230416, "percentage": 21.66, "elapsed_time": "2:32:26", "remaining_time": "9:11:24"} +{"current_steps": 1881, "total_steps": 8680, "loss": 0.9931240081787109, "lr": 1.8520523886326088e-06, "epoch": 0.43341013824884794, "percentage": 21.67, "elapsed_time": "2:32:30", "remaining_time": "9:11:16"} +{"current_steps": 1882, "total_steps": 8680, "loss": 0.923255443572998, "lr": 1.8518528983314874e-06, "epoch": 0.4336405529953917, "percentage": 21.68, "elapsed_time": "2:32:35", "remaining_time": "9:11:09"} +{"current_steps": 1883, "total_steps": 8680, "loss": 0.8470325469970703, "lr": 1.8516532843850454e-06, "epoch": 0.4338709677419355, "percentage": 21.69, "elapsed_time": "2:32:41", "remaining_time": "9:11:11"} +{"current_steps": 1884, "total_steps": 8680, "loss": 0.9175074696540833, "lr": 1.8514535468222566e-06, "epoch": 0.43410138248847924, "percentage": 21.71, "elapsed_time": "2:32:45", "remaining_time": "9:11:03"} +{"current_steps": 1885, "total_steps": 8680, "loss": 0.8617827892303467, "lr": 1.8512536856721126e-06, "epoch": 0.434331797235023, "percentage": 21.72, "elapsed_time": "2:32:50", "remaining_time": "9:10:57"} +{"current_steps": 1886, "total_steps": 8680, "loss": 0.6787248849868774, "lr": 1.8510537009636231e-06, "epoch": 0.4345622119815668, "percentage": 21.73, "elapsed_time": "2:32:54", "remaining_time": "9:10:49"} +{"current_steps": 1887, "total_steps": 8680, "loss": 0.8031569719314575, "lr": 1.8508535927258157e-06, "epoch": 0.4347926267281106, "percentage": 21.74, "elapsed_time": "2:32:58", "remaining_time": "9:10:41"} +{"current_steps": 1888, "total_steps": 8680, "loss": 1.0252577066421509, "lr": 1.8506533609877354e-06, "epoch": 0.43502304147465437, "percentage": 21.75, "elapsed_time": "2:33:02", "remaining_time": "9:10:35"} +{"current_steps": 1889, "total_steps": 8680, "loss": 0.7947444915771484, "lr": 1.850453005778446e-06, "epoch": 0.43525345622119815, "percentage": 21.76, "elapsed_time": "2:33:08", "remaining_time": "9:10:32"} +{"current_steps": 1890, "total_steps": 8680, "loss": 0.817523717880249, "lr": 1.8502525271270288e-06, "epoch": 0.43548387096774194, "percentage": 21.77, "elapsed_time": "2:33:13", "remaining_time": "9:10:29"} +{"current_steps": 1891, "total_steps": 8680, "loss": 0.8029658794403076, "lr": 1.850051925062583e-06, "epoch": 0.4357142857142857, "percentage": 21.79, "elapsed_time": "2:33:18", "remaining_time": "9:10:25"} +{"current_steps": 1892, "total_steps": 8680, "loss": 0.871408224105835, "lr": 1.8498511996142253e-06, "epoch": 0.4359447004608295, "percentage": 21.8, "elapsed_time": "2:33:24", "remaining_time": "9:10:22"} +{"current_steps": 1893, "total_steps": 8680, "loss": 1.0133098363876343, "lr": 1.849650350811091e-06, "epoch": 0.4361751152073733, "percentage": 21.81, "elapsed_time": "2:33:29", "remaining_time": "9:10:19"} +{"current_steps": 1894, "total_steps": 8680, "loss": 0.8320624828338623, "lr": 1.8494493786823333e-06, "epoch": 0.43640552995391707, "percentage": 21.82, "elapsed_time": "2:33:35", "remaining_time": "9:10:18"} +{"current_steps": 1895, "total_steps": 8680, "loss": 0.7757631540298462, "lr": 1.8492482832571225e-06, "epoch": 0.43663594470046085, "percentage": 21.83, "elapsed_time": "2:33:40", "remaining_time": "9:10:14"} +{"current_steps": 1896, "total_steps": 8680, "loss": 0.8503100872039795, "lr": 1.8490470645646479e-06, "epoch": 0.4368663594470046, "percentage": 21.84, "elapsed_time": "2:33:44", "remaining_time": "9:10:07"} +{"current_steps": 1897, "total_steps": 8680, "loss": 0.8145939707756042, "lr": 1.8488457226341158e-06, "epoch": 0.43709677419354837, "percentage": 21.85, "elapsed_time": "2:33:50", "remaining_time": "9:10:03"} +{"current_steps": 1898, "total_steps": 8680, "loss": 0.831500232219696, "lr": 1.848644257494751e-06, "epoch": 0.43732718894009215, "percentage": 21.87, "elapsed_time": "2:33:54", "remaining_time": "9:09:58"} +{"current_steps": 1899, "total_steps": 8680, "loss": 0.9340692758560181, "lr": 1.8484426691757956e-06, "epoch": 0.43755760368663593, "percentage": 21.88, "elapsed_time": "2:33:59", "remaining_time": "9:09:53"} +{"current_steps": 1900, "total_steps": 8680, "loss": 1.011988639831543, "lr": 1.8482409577065097e-06, "epoch": 0.4377880184331797, "percentage": 21.89, "elapsed_time": "2:34:04", "remaining_time": "9:09:47"} +{"current_steps": 1901, "total_steps": 8680, "loss": 0.8110378980636597, "lr": 1.848039123116172e-06, "epoch": 0.4380184331797235, "percentage": 21.9, "elapsed_time": "2:34:10", "remaining_time": "9:09:48"} +{"current_steps": 1902, "total_steps": 8680, "loss": 0.8230330944061279, "lr": 1.8478371654340779e-06, "epoch": 0.4382488479262673, "percentage": 21.91, "elapsed_time": "2:34:15", "remaining_time": "9:09:44"} +{"current_steps": 1903, "total_steps": 8680, "loss": 0.875052809715271, "lr": 1.8476350846895419e-06, "epoch": 0.43847926267281107, "percentage": 21.92, "elapsed_time": "2:34:21", "remaining_time": "9:09:43"} +{"current_steps": 1904, "total_steps": 8680, "loss": 0.9373071193695068, "lr": 1.8474328809118953e-06, "epoch": 0.43870967741935485, "percentage": 21.94, "elapsed_time": "2:34:26", "remaining_time": "9:09:37"} +{"current_steps": 1905, "total_steps": 8680, "loss": 0.8341633677482605, "lr": 1.847230554130488e-06, "epoch": 0.43894009216589863, "percentage": 21.95, "elapsed_time": "2:34:31", "remaining_time": "9:09:33"} +{"current_steps": 1906, "total_steps": 8680, "loss": 0.8147767782211304, "lr": 1.8470281043746873e-06, "epoch": 0.4391705069124424, "percentage": 21.96, "elapsed_time": "2:34:37", "remaining_time": "9:09:32"} +{"current_steps": 1907, "total_steps": 8680, "loss": 0.740512490272522, "lr": 1.8468255316738785e-06, "epoch": 0.4394009216589862, "percentage": 21.97, "elapsed_time": "2:34:42", "remaining_time": "9:09:27"} +{"current_steps": 1908, "total_steps": 8680, "loss": 0.7754743099212646, "lr": 1.846622836057465e-06, "epoch": 0.4396313364055299, "percentage": 21.98, "elapsed_time": "2:34:47", "remaining_time": "9:09:22"} +{"current_steps": 1909, "total_steps": 8680, "loss": 0.9131484031677246, "lr": 1.8464200175548677e-06, "epoch": 0.4398617511520737, "percentage": 21.99, "elapsed_time": "2:34:51", "remaining_time": "9:09:15"} +{"current_steps": 1910, "total_steps": 8680, "loss": 0.7084713578224182, "lr": 1.8462170761955252e-06, "epoch": 0.4400921658986175, "percentage": 22.0, "elapsed_time": "2:34:56", "remaining_time": "9:09:10"} +{"current_steps": 1911, "total_steps": 8680, "loss": 0.8535224199295044, "lr": 1.8460140120088945e-06, "epoch": 0.4403225806451613, "percentage": 22.02, "elapsed_time": "2:34:59", "remaining_time": "9:09:01"} +{"current_steps": 1912, "total_steps": 8680, "loss": 0.7661323547363281, "lr": 1.8458108250244498e-06, "epoch": 0.44055299539170506, "percentage": 22.03, "elapsed_time": "2:35:03", "remaining_time": "9:08:53"} +{"current_steps": 1913, "total_steps": 8680, "loss": 0.8064024448394775, "lr": 1.8456075152716837e-06, "epoch": 0.44078341013824884, "percentage": 22.04, "elapsed_time": "2:35:09", "remaining_time": "9:08:50"} +{"current_steps": 1914, "total_steps": 8680, "loss": 0.7858735918998718, "lr": 1.8454040827801058e-06, "epoch": 0.4410138248847926, "percentage": 22.05, "elapsed_time": "2:35:14", "remaining_time": "9:08:47"} +{"current_steps": 1915, "total_steps": 8680, "loss": 0.9251735210418701, "lr": 1.8452005275792448e-06, "epoch": 0.4412442396313364, "percentage": 22.06, "elapsed_time": "2:35:19", "remaining_time": "9:08:43"} +{"current_steps": 1916, "total_steps": 8680, "loss": 0.7237124443054199, "lr": 1.8449968496986461e-06, "epoch": 0.4414746543778802, "percentage": 22.07, "elapsed_time": "2:35:26", "remaining_time": "9:08:43"} +{"current_steps": 1917, "total_steps": 8680, "loss": 0.8939133882522583, "lr": 1.8447930491678732e-06, "epoch": 0.441705069124424, "percentage": 22.09, "elapsed_time": "2:35:31", "remaining_time": "9:08:39"} +{"current_steps": 1918, "total_steps": 8680, "loss": 0.8815577626228333, "lr": 1.8445891260165076e-06, "epoch": 0.44193548387096776, "percentage": 22.1, "elapsed_time": "2:35:36", "remaining_time": "9:08:34"} +{"current_steps": 1919, "total_steps": 8680, "loss": 0.943426787853241, "lr": 1.8443850802741485e-06, "epoch": 0.44216589861751154, "percentage": 22.11, "elapsed_time": "2:35:39", "remaining_time": "9:08:25"} +{"current_steps": 1920, "total_steps": 8680, "loss": 0.8001632690429688, "lr": 1.8441809119704126e-06, "epoch": 0.4423963133640553, "percentage": 22.12, "elapsed_time": "2:35:44", "remaining_time": "9:08:19"} +{"current_steps": 1921, "total_steps": 8680, "loss": 0.8656308650970459, "lr": 1.8439766211349352e-06, "epoch": 0.44262672811059905, "percentage": 22.13, "elapsed_time": "2:35:49", "remaining_time": "9:08:15"} +{"current_steps": 1922, "total_steps": 8680, "loss": 0.9774024486541748, "lr": 1.8437722077973686e-06, "epoch": 0.44285714285714284, "percentage": 22.14, "elapsed_time": "2:35:53", "remaining_time": "9:08:07"} +{"current_steps": 1923, "total_steps": 8680, "loss": 0.7655738592147827, "lr": 1.8435676719873827e-06, "epoch": 0.4430875576036866, "percentage": 22.15, "elapsed_time": "2:35:57", "remaining_time": "9:08:01"} +{"current_steps": 1924, "total_steps": 8680, "loss": 0.6455004811286926, "lr": 1.8433630137346657e-06, "epoch": 0.4433179723502304, "percentage": 22.17, "elapsed_time": "2:36:03", "remaining_time": "9:07:58"} +{"current_steps": 1925, "total_steps": 8680, "loss": 0.8221153020858765, "lr": 1.8431582330689243e-06, "epoch": 0.4435483870967742, "percentage": 22.18, "elapsed_time": "2:36:07", "remaining_time": "9:07:50"} +{"current_steps": 1926, "total_steps": 8680, "loss": 0.7878339886665344, "lr": 1.8429533300198816e-06, "epoch": 0.44377880184331797, "percentage": 22.19, "elapsed_time": "2:36:11", "remaining_time": "9:07:44"} +{"current_steps": 1927, "total_steps": 8680, "loss": 0.8292763829231262, "lr": 1.8427483046172787e-06, "epoch": 0.44400921658986175, "percentage": 22.2, "elapsed_time": "2:36:17", "remaining_time": "9:07:42"} +{"current_steps": 1928, "total_steps": 8680, "loss": 0.7774572372436523, "lr": 1.842543156890875e-06, "epoch": 0.44423963133640554, "percentage": 22.21, "elapsed_time": "2:36:21", "remaining_time": "9:07:34"} +{"current_steps": 1929, "total_steps": 8680, "loss": 0.7327601909637451, "lr": 1.8423378868704476e-06, "epoch": 0.4444700460829493, "percentage": 22.22, "elapsed_time": "2:36:26", "remaining_time": "9:07:30"} +{"current_steps": 1930, "total_steps": 8680, "loss": 0.8067511320114136, "lr": 1.8421324945857909e-06, "epoch": 0.4447004608294931, "percentage": 22.24, "elapsed_time": "2:36:31", "remaining_time": "9:07:26"} +{"current_steps": 1931, "total_steps": 8680, "loss": 0.851010799407959, "lr": 1.8419269800667173e-06, "epoch": 0.4449308755760369, "percentage": 22.25, "elapsed_time": "2:36:37", "remaining_time": "9:07:24"} +{"current_steps": 1932, "total_steps": 8680, "loss": 0.8402234315872192, "lr": 1.8417213433430576e-06, "epoch": 0.44516129032258067, "percentage": 22.26, "elapsed_time": "2:36:42", "remaining_time": "9:07:19"} +{"current_steps": 1933, "total_steps": 8680, "loss": 0.8857355117797852, "lr": 1.8415155844446591e-06, "epoch": 0.4453917050691244, "percentage": 22.27, "elapsed_time": "2:36:46", "remaining_time": "9:07:13"} +{"current_steps": 1934, "total_steps": 8680, "loss": 0.7517881393432617, "lr": 1.841309703401387e-06, "epoch": 0.4456221198156682, "percentage": 22.28, "elapsed_time": "2:36:51", "remaining_time": "9:07:07"} +{"current_steps": 1935, "total_steps": 8680, "loss": 0.8583779335021973, "lr": 1.8411037002431257e-06, "epoch": 0.44585253456221197, "percentage": 22.29, "elapsed_time": "2:36:55", "remaining_time": "9:06:59"} +{"current_steps": 1936, "total_steps": 8680, "loss": 0.7691524028778076, "lr": 1.8408975749997758e-06, "epoch": 0.44608294930875575, "percentage": 22.3, "elapsed_time": "2:36:58", "remaining_time": "9:06:50"} +{"current_steps": 1937, "total_steps": 8680, "loss": 0.9164496660232544, "lr": 1.8406913277012558e-06, "epoch": 0.44631336405529953, "percentage": 22.32, "elapsed_time": "2:37:04", "remaining_time": "9:06:47"} +{"current_steps": 1938, "total_steps": 8680, "loss": 0.843226432800293, "lr": 1.8404849583775025e-06, "epoch": 0.4465437788018433, "percentage": 22.33, "elapsed_time": "2:37:09", "remaining_time": "9:06:44"} +{"current_steps": 1939, "total_steps": 8680, "loss": 0.8492633104324341, "lr": 1.8402784670584706e-06, "epoch": 0.4467741935483871, "percentage": 22.34, "elapsed_time": "2:37:13", "remaining_time": "9:06:37"} +{"current_steps": 1940, "total_steps": 8680, "loss": 0.8088324069976807, "lr": 1.8400718537741314e-06, "epoch": 0.4470046082949309, "percentage": 22.35, "elapsed_time": "2:37:19", "remaining_time": "9:06:34"} +{"current_steps": 1941, "total_steps": 8680, "loss": 0.8879667520523071, "lr": 1.8398651185544746e-06, "epoch": 0.44723502304147467, "percentage": 22.36, "elapsed_time": "2:37:22", "remaining_time": "9:06:24"} +{"current_steps": 1942, "total_steps": 8680, "loss": 0.8926588892936707, "lr": 1.8396582614295078e-06, "epoch": 0.44746543778801845, "percentage": 22.37, "elapsed_time": "2:37:26", "remaining_time": "9:06:17"} +{"current_steps": 1943, "total_steps": 8680, "loss": 0.8007583618164062, "lr": 1.8394512824292558e-06, "epoch": 0.44769585253456223, "percentage": 22.38, "elapsed_time": "2:37:32", "remaining_time": "9:06:16"} +{"current_steps": 1944, "total_steps": 8680, "loss": 0.7420827746391296, "lr": 1.8392441815837613e-06, "epoch": 0.447926267281106, "percentage": 22.4, "elapsed_time": "2:37:38", "remaining_time": "9:06:13"} +{"current_steps": 1945, "total_steps": 8680, "loss": 0.7653264999389648, "lr": 1.839036958923085e-06, "epoch": 0.44815668202764974, "percentage": 22.41, "elapsed_time": "2:37:42", "remaining_time": "9:06:05"} +{"current_steps": 1946, "total_steps": 8680, "loss": 0.886576771736145, "lr": 1.838829614477305e-06, "epoch": 0.4483870967741935, "percentage": 22.42, "elapsed_time": "2:37:46", "remaining_time": "9:05:59"} +{"current_steps": 1947, "total_steps": 8680, "loss": 0.904376745223999, "lr": 1.8386221482765168e-06, "epoch": 0.4486175115207373, "percentage": 22.43, "elapsed_time": "2:37:51", "remaining_time": "9:05:52"} +{"current_steps": 1948, "total_steps": 8680, "loss": 0.6791579723358154, "lr": 1.838414560350834e-06, "epoch": 0.4488479262672811, "percentage": 22.44, "elapsed_time": "2:37:56", "remaining_time": "9:05:49"} +{"current_steps": 1949, "total_steps": 8680, "loss": 0.9402183294296265, "lr": 1.838206850730388e-06, "epoch": 0.4490783410138249, "percentage": 22.45, "elapsed_time": "2:38:01", "remaining_time": "9:05:43"} +{"current_steps": 1950, "total_steps": 8680, "loss": 0.9756022691726685, "lr": 1.8379990194453265e-06, "epoch": 0.44930875576036866, "percentage": 22.47, "elapsed_time": "2:38:05", "remaining_time": "9:05:35"} +{"current_steps": 1951, "total_steps": 8680, "loss": 0.7311051487922668, "lr": 1.8377910665258173e-06, "epoch": 0.44953917050691244, "percentage": 22.48, "elapsed_time": "2:38:09", "remaining_time": "9:05:30"} +{"current_steps": 1952, "total_steps": 8680, "loss": 0.6966956853866577, "lr": 1.8375829920020438e-06, "epoch": 0.4497695852534562, "percentage": 22.49, "elapsed_time": "2:38:15", "remaining_time": "9:05:27"} +{"current_steps": 1953, "total_steps": 8680, "loss": 0.7327426671981812, "lr": 1.8373747959042076e-06, "epoch": 0.45, "percentage": 22.5, "elapsed_time": "2:38:19", "remaining_time": "9:05:21"} +{"current_steps": 1954, "total_steps": 8680, "loss": 0.8650925755500793, "lr": 1.8371664782625285e-06, "epoch": 0.4502304147465438, "percentage": 22.51, "elapsed_time": "2:38:24", "remaining_time": "9:05:16"} +{"current_steps": 1955, "total_steps": 8680, "loss": 0.876739501953125, "lr": 1.8369580391072431e-06, "epoch": 0.4504608294930876, "percentage": 22.52, "elapsed_time": "2:38:28", "remaining_time": "9:05:08"} +{"current_steps": 1956, "total_steps": 8680, "loss": 0.7787455320358276, "lr": 1.8367494784686066e-06, "epoch": 0.45069124423963136, "percentage": 22.53, "elapsed_time": "2:38:32", "remaining_time": "9:05:01"} +{"current_steps": 1957, "total_steps": 8680, "loss": 0.8874029517173767, "lr": 1.836540796376891e-06, "epoch": 0.4509216589861751, "percentage": 22.55, "elapsed_time": "2:38:39", "remaining_time": "9:05:02"} +{"current_steps": 1958, "total_steps": 8680, "loss": 0.8944835662841797, "lr": 1.8363319928623862e-06, "epoch": 0.4511520737327189, "percentage": 22.56, "elapsed_time": "2:38:44", "remaining_time": "9:04:57"} +{"current_steps": 1959, "total_steps": 8680, "loss": 0.7106739282608032, "lr": 1.8361230679553996e-06, "epoch": 0.45138248847926266, "percentage": 22.57, "elapsed_time": "2:38:50", "remaining_time": "9:04:56"} +{"current_steps": 1960, "total_steps": 8680, "loss": 0.8668634295463562, "lr": 1.835914021686257e-06, "epoch": 0.45161290322580644, "percentage": 22.58, "elapsed_time": "2:38:55", "remaining_time": "9:04:53"} +{"current_steps": 1961, "total_steps": 8680, "loss": 0.8123712539672852, "lr": 1.8357048540853003e-06, "epoch": 0.4518433179723502, "percentage": 22.59, "elapsed_time": "2:39:00", "remaining_time": "9:04:47"} +{"current_steps": 1962, "total_steps": 8680, "loss": 0.865728497505188, "lr": 1.8354955651828907e-06, "epoch": 0.452073732718894, "percentage": 22.6, "elapsed_time": "2:39:05", "remaining_time": "9:04:43"} +{"current_steps": 1963, "total_steps": 8680, "loss": 0.8066651225090027, "lr": 1.8352861550094056e-06, "epoch": 0.4523041474654378, "percentage": 22.62, "elapsed_time": "2:39:10", "remaining_time": "9:04:40"} +{"current_steps": 1964, "total_steps": 8680, "loss": 1.020591139793396, "lr": 1.835076623595241e-06, "epoch": 0.4525345622119816, "percentage": 22.63, "elapsed_time": "2:39:15", "remaining_time": "9:04:34"} +{"current_steps": 1965, "total_steps": 8680, "loss": 0.839346706867218, "lr": 1.83486697097081e-06, "epoch": 0.45276497695852536, "percentage": 22.64, "elapsed_time": "2:39:22", "remaining_time": "9:04:37"} +{"current_steps": 1966, "total_steps": 8680, "loss": 0.7707340121269226, "lr": 1.8346571971665434e-06, "epoch": 0.45299539170506914, "percentage": 22.65, "elapsed_time": "2:39:26", "remaining_time": "9:04:31"} +{"current_steps": 1967, "total_steps": 8680, "loss": 0.7969534397125244, "lr": 1.8344473022128897e-06, "epoch": 0.4532258064516129, "percentage": 22.66, "elapsed_time": "2:39:32", "remaining_time": "9:04:28"} +{"current_steps": 1968, "total_steps": 8680, "loss": 0.9371283650398254, "lr": 1.8342372861403143e-06, "epoch": 0.4534562211981567, "percentage": 22.67, "elapsed_time": "2:39:36", "remaining_time": "9:04:19"} +{"current_steps": 1969, "total_steps": 8680, "loss": 0.7915256023406982, "lr": 1.8340271489793015e-06, "epoch": 0.45368663594470043, "percentage": 22.68, "elapsed_time": "2:39:42", "remaining_time": "9:04:19"} +{"current_steps": 1970, "total_steps": 8680, "loss": 0.8394884467124939, "lr": 1.8338168907603522e-06, "epoch": 0.4539170506912442, "percentage": 22.7, "elapsed_time": "2:39:48", "remaining_time": "9:04:18"} +{"current_steps": 1971, "total_steps": 8680, "loss": 0.7786067128181458, "lr": 1.833606511513985e-06, "epoch": 0.454147465437788, "percentage": 22.71, "elapsed_time": "2:39:53", "remaining_time": "9:04:16"} +{"current_steps": 1972, "total_steps": 8680, "loss": 0.9237443208694458, "lr": 1.833396011270736e-06, "epoch": 0.4543778801843318, "percentage": 22.72, "elapsed_time": "2:39:57", "remaining_time": "9:04:07"} +{"current_steps": 1973, "total_steps": 8680, "loss": 0.7530162334442139, "lr": 1.8331853900611596e-06, "epoch": 0.45460829493087557, "percentage": 22.73, "elapsed_time": "2:40:01", "remaining_time": "9:04:00"} +{"current_steps": 1974, "total_steps": 8680, "loss": 0.8349624872207642, "lr": 1.8329746479158263e-06, "epoch": 0.45483870967741935, "percentage": 22.74, "elapsed_time": "2:40:06", "remaining_time": "9:03:53"} +{"current_steps": 1975, "total_steps": 8680, "loss": 0.8748637437820435, "lr": 1.8327637848653259e-06, "epoch": 0.45506912442396313, "percentage": 22.75, "elapsed_time": "2:40:11", "remaining_time": "9:03:50"} +{"current_steps": 1976, "total_steps": 8680, "loss": 0.9111478924751282, "lr": 1.832552800940265e-06, "epoch": 0.4552995391705069, "percentage": 22.76, "elapsed_time": "2:40:17", "remaining_time": "9:03:47"} +{"current_steps": 1977, "total_steps": 8680, "loss": 0.8108797073364258, "lr": 1.8323416961712665e-06, "epoch": 0.4555299539170507, "percentage": 22.78, "elapsed_time": "2:40:23", "remaining_time": "9:03:47"} +{"current_steps": 1978, "total_steps": 8680, "loss": 0.9266520738601685, "lr": 1.832130470588973e-06, "epoch": 0.4557603686635945, "percentage": 22.79, "elapsed_time": "2:40:27", "remaining_time": "9:03:40"} +{"current_steps": 1979, "total_steps": 8680, "loss": 0.9092522859573364, "lr": 1.831919124224043e-06, "epoch": 0.45599078341013827, "percentage": 22.8, "elapsed_time": "2:40:32", "remaining_time": "9:03:36"} +{"current_steps": 1980, "total_steps": 8680, "loss": 0.8249068260192871, "lr": 1.8317076571071536e-06, "epoch": 0.45622119815668205, "percentage": 22.81, "elapsed_time": "2:40:36", "remaining_time": "9:03:28"} +{"current_steps": 1981, "total_steps": 8680, "loss": 0.7497084140777588, "lr": 1.8314960692689992e-06, "epoch": 0.45645161290322583, "percentage": 22.82, "elapsed_time": "2:40:41", "remaining_time": "9:03:22"} +{"current_steps": 1982, "total_steps": 8680, "loss": 0.7360142469406128, "lr": 1.8312843607402907e-06, "epoch": 0.45668202764976956, "percentage": 22.83, "elapsed_time": "2:40:46", "remaining_time": "9:03:18"} +{"current_steps": 1983, "total_steps": 8680, "loss": 0.8443512320518494, "lr": 1.8310725315517578e-06, "epoch": 0.45691244239631335, "percentage": 22.85, "elapsed_time": "2:40:51", "remaining_time": "9:03:13"} +{"current_steps": 1984, "total_steps": 8680, "loss": 0.7995656728744507, "lr": 1.830860581734147e-06, "epoch": 0.45714285714285713, "percentage": 22.86, "elapsed_time": "2:40:57", "remaining_time": "9:03:13"} +{"current_steps": 1985, "total_steps": 8680, "loss": 0.7396436929702759, "lr": 1.8306485113182229e-06, "epoch": 0.4573732718894009, "percentage": 22.87, "elapsed_time": "2:41:00", "remaining_time": "9:03:04"} +{"current_steps": 1986, "total_steps": 8680, "loss": 0.7415385246276855, "lr": 1.8304363203347668e-06, "epoch": 0.4576036866359447, "percentage": 22.88, "elapsed_time": "2:41:06", "remaining_time": "9:03:02"} +{"current_steps": 1987, "total_steps": 8680, "loss": 0.9316694736480713, "lr": 1.8302240088145784e-06, "epoch": 0.4578341013824885, "percentage": 22.89, "elapsed_time": "2:41:10", "remaining_time": "9:02:54"} +{"current_steps": 1988, "total_steps": 8680, "loss": 0.7692697048187256, "lr": 1.830011576788474e-06, "epoch": 0.45806451612903226, "percentage": 22.9, "elapsed_time": "2:41:14", "remaining_time": "9:02:46"} +{"current_steps": 1989, "total_steps": 8680, "loss": 0.8377524614334106, "lr": 1.829799024287288e-06, "epoch": 0.45829493087557605, "percentage": 22.91, "elapsed_time": "2:41:20", "remaining_time": "9:02:43"} +{"current_steps": 1990, "total_steps": 8680, "loss": 0.8005630970001221, "lr": 1.8295863513418724e-06, "epoch": 0.45852534562211983, "percentage": 22.93, "elapsed_time": "2:41:25", "remaining_time": "9:02:40"} +{"current_steps": 1991, "total_steps": 8680, "loss": 0.8609297275543213, "lr": 1.829373557983096e-06, "epoch": 0.4587557603686636, "percentage": 22.94, "elapsed_time": "2:41:29", "remaining_time": "9:02:34"} +{"current_steps": 1992, "total_steps": 8680, "loss": 0.9111521244049072, "lr": 1.8291606442418454e-06, "epoch": 0.4589861751152074, "percentage": 22.95, "elapsed_time": "2:41:34", "remaining_time": "9:02:27"} +{"current_steps": 1993, "total_steps": 8680, "loss": 0.7540388107299805, "lr": 1.8289476101490254e-06, "epoch": 0.4592165898617512, "percentage": 22.96, "elapsed_time": "2:41:39", "remaining_time": "9:02:25"} +{"current_steps": 1994, "total_steps": 8680, "loss": 0.9018936157226562, "lr": 1.8287344557355565e-06, "epoch": 0.4594470046082949, "percentage": 22.97, "elapsed_time": "2:41:44", "remaining_time": "9:02:20"} +{"current_steps": 1995, "total_steps": 8680, "loss": 0.918912947177887, "lr": 1.8285211810323791e-06, "epoch": 0.4596774193548387, "percentage": 22.98, "elapsed_time": "2:41:48", "remaining_time": "9:02:13"} +{"current_steps": 1996, "total_steps": 8680, "loss": 0.7777351140975952, "lr": 1.8283077860704488e-06, "epoch": 0.4599078341013825, "percentage": 23.0, "elapsed_time": "2:41:54", "remaining_time": "9:02:09"} +{"current_steps": 1997, "total_steps": 8680, "loss": 0.9283437132835388, "lr": 1.82809427088074e-06, "epoch": 0.46013824884792626, "percentage": 23.01, "elapsed_time": "2:41:58", "remaining_time": "9:02:03"} +{"current_steps": 1998, "total_steps": 8680, "loss": 0.7032894492149353, "lr": 1.8278806354942442e-06, "epoch": 0.46036866359447004, "percentage": 23.02, "elapsed_time": "2:42:02", "remaining_time": "9:01:54"} +{"current_steps": 1999, "total_steps": 8680, "loss": 0.8392905592918396, "lr": 1.8276668799419696e-06, "epoch": 0.4605990783410138, "percentage": 23.03, "elapsed_time": "2:42:06", "remaining_time": "9:01:48"} +{"current_steps": 2000, "total_steps": 8680, "loss": 0.8059369325637817, "lr": 1.8274530042549434e-06, "epoch": 0.4608294930875576, "percentage": 23.04, "elapsed_time": "2:42:11", "remaining_time": "9:01:43"} +{"current_steps": 2001, "total_steps": 8680, "loss": 0.7738519906997681, "lr": 1.827239008464209e-06, "epoch": 0.4610599078341014, "percentage": 23.05, "elapsed_time": "2:42:18", "remaining_time": "9:01:45"} +{"current_steps": 2002, "total_steps": 8680, "loss": 0.9189014434814453, "lr": 1.8270248926008275e-06, "epoch": 0.4612903225806452, "percentage": 23.06, "elapsed_time": "2:42:22", "remaining_time": "9:01:37"} +{"current_steps": 2003, "total_steps": 8680, "loss": 0.8878552913665771, "lr": 1.8268106566958782e-06, "epoch": 0.46152073732718896, "percentage": 23.08, "elapsed_time": "2:42:28", "remaining_time": "9:01:35"} +{"current_steps": 2004, "total_steps": 8680, "loss": 0.9786058664321899, "lr": 1.826596300780456e-06, "epoch": 0.46175115207373274, "percentage": 23.09, "elapsed_time": "2:42:34", "remaining_time": "9:01:34"} +{"current_steps": 2005, "total_steps": 8680, "loss": 0.7887653112411499, "lr": 1.8263818248856754e-06, "epoch": 0.4619815668202765, "percentage": 23.1, "elapsed_time": "2:42:38", "remaining_time": "9:01:28"} +{"current_steps": 2006, "total_steps": 8680, "loss": 0.8773549795150757, "lr": 1.8261672290426668e-06, "epoch": 0.46221198156682025, "percentage": 23.11, "elapsed_time": "2:42:43", "remaining_time": "9:01:22"} +{"current_steps": 2007, "total_steps": 8680, "loss": 0.6929831504821777, "lr": 1.8259525132825786e-06, "epoch": 0.46244239631336403, "percentage": 23.12, "elapsed_time": "2:42:49", "remaining_time": "9:01:22"} +{"current_steps": 2008, "total_steps": 8680, "loss": 0.9438232183456421, "lr": 1.8257376776365765e-06, "epoch": 0.4626728110599078, "percentage": 23.13, "elapsed_time": "2:42:55", "remaining_time": "9:01:20"} +{"current_steps": 2009, "total_steps": 8680, "loss": 0.7559594511985779, "lr": 1.8255227221358435e-06, "epoch": 0.4629032258064516, "percentage": 23.15, "elapsed_time": "2:43:00", "remaining_time": "9:01:16"} +{"current_steps": 2010, "total_steps": 8680, "loss": 0.8990212678909302, "lr": 1.8253076468115805e-06, "epoch": 0.4631336405529954, "percentage": 23.16, "elapsed_time": "2:43:04", "remaining_time": "9:01:10"} +{"current_steps": 2011, "total_steps": 8680, "loss": 0.8638331890106201, "lr": 1.825092451695005e-06, "epoch": 0.46336405529953917, "percentage": 23.17, "elapsed_time": "2:43:08", "remaining_time": "9:01:01"} +{"current_steps": 2012, "total_steps": 8680, "loss": 0.9262570142745972, "lr": 1.8248771368173522e-06, "epoch": 0.46359447004608295, "percentage": 23.18, "elapsed_time": "2:43:14", "remaining_time": "9:01:00"} +{"current_steps": 2013, "total_steps": 8680, "loss": 0.7412514090538025, "lr": 1.8246617022098754e-06, "epoch": 0.46382488479262673, "percentage": 23.19, "elapsed_time": "2:43:19", "remaining_time": "9:00:57"} +{"current_steps": 2014, "total_steps": 8680, "loss": 0.8680287599563599, "lr": 1.8244461479038437e-06, "epoch": 0.4640552995391705, "percentage": 23.2, "elapsed_time": "2:43:23", "remaining_time": "9:00:48"} +{"current_steps": 2015, "total_steps": 8680, "loss": 0.7774302959442139, "lr": 1.8242304739305457e-06, "epoch": 0.4642857142857143, "percentage": 23.21, "elapsed_time": "2:43:29", "remaining_time": "9:00:47"} +{"current_steps": 2016, "total_steps": 8680, "loss": 0.9278442859649658, "lr": 1.824014680321285e-06, "epoch": 0.4645161290322581, "percentage": 23.23, "elapsed_time": "2:43:35", "remaining_time": "9:00:44"} +{"current_steps": 2017, "total_steps": 8680, "loss": 0.9617106914520264, "lr": 1.8237987671073846e-06, "epoch": 0.46474654377880187, "percentage": 23.24, "elapsed_time": "2:43:39", "remaining_time": "9:00:38"} +{"current_steps": 2018, "total_steps": 8680, "loss": 0.7983255386352539, "lr": 1.8235827343201838e-06, "epoch": 0.4649769585253456, "percentage": 23.25, "elapsed_time": "2:43:44", "remaining_time": "9:00:33"} +{"current_steps": 2019, "total_steps": 8680, "loss": 0.7966747283935547, "lr": 1.8233665819910393e-06, "epoch": 0.4652073732718894, "percentage": 23.26, "elapsed_time": "2:43:48", "remaining_time": "9:00:25"} +{"current_steps": 2020, "total_steps": 8680, "loss": 0.8977803587913513, "lr": 1.8231503101513253e-06, "epoch": 0.46543778801843316, "percentage": 23.27, "elapsed_time": "2:43:52", "remaining_time": "9:00:17"} +{"current_steps": 2021, "total_steps": 8680, "loss": 0.7098231911659241, "lr": 1.8229339188324334e-06, "epoch": 0.46566820276497695, "percentage": 23.28, "elapsed_time": "2:43:57", "remaining_time": "9:00:12"} +{"current_steps": 2022, "total_steps": 8680, "loss": 0.6402776837348938, "lr": 1.822717408065773e-06, "epoch": 0.46589861751152073, "percentage": 23.29, "elapsed_time": "2:44:02", "remaining_time": "9:00:10"} +{"current_steps": 2023, "total_steps": 8680, "loss": 0.797479510307312, "lr": 1.8225007778827698e-06, "epoch": 0.4661290322580645, "percentage": 23.31, "elapsed_time": "2:44:08", "remaining_time": "9:00:06"} +{"current_steps": 2024, "total_steps": 8680, "loss": 0.8205317258834839, "lr": 1.8222840283148675e-06, "epoch": 0.4663594470046083, "percentage": 23.32, "elapsed_time": "2:44:13", "remaining_time": "9:00:02"} +{"current_steps": 2025, "total_steps": 8680, "loss": 0.8123108148574829, "lr": 1.822067159393527e-06, "epoch": 0.4665898617511521, "percentage": 23.33, "elapsed_time": "2:44:18", "remaining_time": "8:59:57"} +{"current_steps": 2026, "total_steps": 8680, "loss": 0.9103116989135742, "lr": 1.8218501711502262e-06, "epoch": 0.46682027649769586, "percentage": 23.34, "elapsed_time": "2:44:23", "remaining_time": "8:59:53"} +{"current_steps": 2027, "total_steps": 8680, "loss": 0.725040078163147, "lr": 1.8216330636164617e-06, "epoch": 0.46705069124423965, "percentage": 23.35, "elapsed_time": "2:44:28", "remaining_time": "8:59:50"} +{"current_steps": 2028, "total_steps": 8680, "loss": 0.8598217964172363, "lr": 1.8214158368237456e-06, "epoch": 0.46728110599078343, "percentage": 23.36, "elapsed_time": "2:44:33", "remaining_time": "8:59:45"} +{"current_steps": 2029, "total_steps": 8680, "loss": 0.9139465093612671, "lr": 1.821198490803608e-06, "epoch": 0.4675115207373272, "percentage": 23.38, "elapsed_time": "2:44:37", "remaining_time": "8:59:36"} +{"current_steps": 2030, "total_steps": 8680, "loss": 0.8331620097160339, "lr": 1.8209810255875966e-06, "epoch": 0.46774193548387094, "percentage": 23.39, "elapsed_time": "2:44:42", "remaining_time": "8:59:32"} +{"current_steps": 2031, "total_steps": 8680, "loss": 0.7901387810707092, "lr": 1.8207634412072764e-06, "epoch": 0.4679723502304147, "percentage": 23.4, "elapsed_time": "2:44:47", "remaining_time": "8:59:28"} +{"current_steps": 2032, "total_steps": 8680, "loss": 0.7651060819625854, "lr": 1.8205457376942288e-06, "epoch": 0.4682027649769585, "percentage": 23.41, "elapsed_time": "2:44:53", "remaining_time": "8:59:27"} +{"current_steps": 2033, "total_steps": 8680, "loss": 0.7382134199142456, "lr": 1.820327915080054e-06, "epoch": 0.4684331797235023, "percentage": 23.42, "elapsed_time": "2:44:58", "remaining_time": "8:59:23"} +{"current_steps": 2034, "total_steps": 8680, "loss": 0.7851507067680359, "lr": 1.8201099733963682e-06, "epoch": 0.4686635944700461, "percentage": 23.43, "elapsed_time": "2:45:03", "remaining_time": "8:59:20"} +{"current_steps": 2035, "total_steps": 8680, "loss": 0.9357708692550659, "lr": 1.8198919126748056e-06, "epoch": 0.46889400921658986, "percentage": 23.44, "elapsed_time": "2:45:08", "remaining_time": "8:59:16"} +{"current_steps": 2036, "total_steps": 8680, "loss": 0.8188502788543701, "lr": 1.819673732947017e-06, "epoch": 0.46912442396313364, "percentage": 23.46, "elapsed_time": "2:45:12", "remaining_time": "8:59:07"} +{"current_steps": 2037, "total_steps": 8680, "loss": 0.81590735912323, "lr": 1.8194554342446712e-06, "epoch": 0.4693548387096774, "percentage": 23.47, "elapsed_time": "2:45:18", "remaining_time": "8:59:04"} +{"current_steps": 2038, "total_steps": 8680, "loss": 0.6879743933677673, "lr": 1.8192370165994544e-06, "epoch": 0.4695852534562212, "percentage": 23.48, "elapsed_time": "2:45:23", "remaining_time": "8:59:02"} +{"current_steps": 2039, "total_steps": 8680, "loss": 0.9287742376327515, "lr": 1.8190184800430686e-06, "epoch": 0.469815668202765, "percentage": 23.49, "elapsed_time": "2:45:27", "remaining_time": "8:58:53"} +{"current_steps": 2040, "total_steps": 8680, "loss": 0.9625484943389893, "lr": 1.818799824607235e-06, "epoch": 0.4700460829493088, "percentage": 23.5, "elapsed_time": "2:45:31", "remaining_time": "8:58:45"} +{"current_steps": 2041, "total_steps": 8680, "loss": 0.8267782926559448, "lr": 1.8185810503236904e-06, "epoch": 0.47027649769585256, "percentage": 23.51, "elapsed_time": "2:45:35", "remaining_time": "8:58:39"} +{"current_steps": 2042, "total_steps": 8680, "loss": 0.8827054500579834, "lr": 1.8183621572241904e-06, "epoch": 0.4705069124423963, "percentage": 23.53, "elapsed_time": "2:45:40", "remaining_time": "8:58:32"} +{"current_steps": 2043, "total_steps": 8680, "loss": 0.7755721807479858, "lr": 1.8181431453405067e-06, "epoch": 0.47073732718894007, "percentage": 23.54, "elapsed_time": "2:45:45", "remaining_time": "8:58:29"} +{"current_steps": 2044, "total_steps": 8680, "loss": 0.8320283889770508, "lr": 1.8179240147044285e-06, "epoch": 0.47096774193548385, "percentage": 23.55, "elapsed_time": "2:45:49", "remaining_time": "8:58:21"} +{"current_steps": 2045, "total_steps": 8680, "loss": 0.8737574815750122, "lr": 1.8177047653477619e-06, "epoch": 0.47119815668202764, "percentage": 23.56, "elapsed_time": "2:45:54", "remaining_time": "8:58:17"} +{"current_steps": 2046, "total_steps": 8680, "loss": 0.7007719278335571, "lr": 1.8174853973023317e-06, "epoch": 0.4714285714285714, "percentage": 23.57, "elapsed_time": "2:46:00", "remaining_time": "8:58:15"} +{"current_steps": 2047, "total_steps": 8680, "loss": 0.8062577247619629, "lr": 1.817265910599978e-06, "epoch": 0.4716589861751152, "percentage": 23.58, "elapsed_time": "2:46:04", "remaining_time": "8:58:08"} +{"current_steps": 2048, "total_steps": 8680, "loss": 0.7059667110443115, "lr": 1.8170463052725594e-06, "epoch": 0.471889400921659, "percentage": 23.59, "elapsed_time": "2:46:09", "remaining_time": "8:58:05"} +{"current_steps": 2049, "total_steps": 8680, "loss": 0.9025841951370239, "lr": 1.816826581351951e-06, "epoch": 0.47211981566820277, "percentage": 23.61, "elapsed_time": "2:46:15", "remaining_time": "8:58:01"} +{"current_steps": 2050, "total_steps": 8680, "loss": 0.7534186840057373, "lr": 1.8166067388700458e-06, "epoch": 0.47235023041474655, "percentage": 23.62, "elapsed_time": "2:46:19", "remaining_time": "8:57:53"} +{"current_steps": 2051, "total_steps": 8680, "loss": 0.9447616338729858, "lr": 1.8163867778587534e-06, "epoch": 0.47258064516129034, "percentage": 23.63, "elapsed_time": "2:46:24", "remaining_time": "8:57:50"} +{"current_steps": 2052, "total_steps": 8680, "loss": 0.7092128992080688, "lr": 1.8161666983500012e-06, "epoch": 0.4728110599078341, "percentage": 23.64, "elapsed_time": "2:46:29", "remaining_time": "8:57:46"} +{"current_steps": 2053, "total_steps": 8680, "loss": 0.8689497113227844, "lr": 1.815946500375733e-06, "epoch": 0.4730414746543779, "percentage": 23.65, "elapsed_time": "2:46:34", "remaining_time": "8:57:40"} +{"current_steps": 2054, "total_steps": 8680, "loss": 0.9298638105392456, "lr": 1.8157261839679105e-06, "epoch": 0.4732718894009217, "percentage": 23.66, "elapsed_time": "2:46:40", "remaining_time": "8:57:39"} +{"current_steps": 2055, "total_steps": 8680, "loss": 0.7138030529022217, "lr": 1.8155057491585125e-06, "epoch": 0.4735023041474654, "percentage": 23.68, "elapsed_time": "2:46:45", "remaining_time": "8:57:37"} +{"current_steps": 2056, "total_steps": 8680, "loss": 0.825221836566925, "lr": 1.815285195979534e-06, "epoch": 0.4737327188940092, "percentage": 23.69, "elapsed_time": "2:46:49", "remaining_time": "8:57:29"} +{"current_steps": 2057, "total_steps": 8680, "loss": 0.8643208742141724, "lr": 1.8150645244629891e-06, "epoch": 0.473963133640553, "percentage": 23.7, "elapsed_time": "2:46:55", "remaining_time": "8:57:27"} +{"current_steps": 2058, "total_steps": 8680, "loss": 0.9611828327178955, "lr": 1.8148437346409073e-06, "epoch": 0.47419354838709676, "percentage": 23.71, "elapsed_time": "2:46:59", "remaining_time": "8:57:20"} +{"current_steps": 2059, "total_steps": 8680, "loss": 0.8609912991523743, "lr": 1.8146228265453363e-06, "epoch": 0.47442396313364055, "percentage": 23.72, "elapsed_time": "2:47:04", "remaining_time": "8:57:15"} +{"current_steps": 2060, "total_steps": 8680, "loss": 0.8277603387832642, "lr": 1.8144018002083404e-06, "epoch": 0.47465437788018433, "percentage": 23.73, "elapsed_time": "2:47:08", "remaining_time": "8:57:06"} +{"current_steps": 2061, "total_steps": 8680, "loss": 0.8601360321044922, "lr": 1.814180655662001e-06, "epoch": 0.4748847926267281, "percentage": 23.74, "elapsed_time": "2:47:13", "remaining_time": "8:57:02"} +{"current_steps": 2062, "total_steps": 8680, "loss": 0.8454653024673462, "lr": 1.8139593929384178e-06, "epoch": 0.4751152073732719, "percentage": 23.76, "elapsed_time": "2:47:16", "remaining_time": "8:56:53"} +{"current_steps": 2063, "total_steps": 8680, "loss": 0.870082437992096, "lr": 1.8137380120697059e-06, "epoch": 0.4753456221198157, "percentage": 23.77, "elapsed_time": "2:47:21", "remaining_time": "8:56:46"} +{"current_steps": 2064, "total_steps": 8680, "loss": 0.8064073324203491, "lr": 1.8135165130879988e-06, "epoch": 0.47557603686635946, "percentage": 23.78, "elapsed_time": "2:47:25", "remaining_time": "8:56:40"} +{"current_steps": 2065, "total_steps": 8680, "loss": 0.829608678817749, "lr": 1.813294896025447e-06, "epoch": 0.47580645161290325, "percentage": 23.79, "elapsed_time": "2:47:31", "remaining_time": "8:56:39"} +{"current_steps": 2066, "total_steps": 8680, "loss": 0.8185791969299316, "lr": 1.8130731609142176e-06, "epoch": 0.47603686635944703, "percentage": 23.8, "elapsed_time": "2:47:36", "remaining_time": "8:56:34"} +{"current_steps": 2067, "total_steps": 8680, "loss": 0.8855293989181519, "lr": 1.812851307786495e-06, "epoch": 0.47626728110599076, "percentage": 23.81, "elapsed_time": "2:47:40", "remaining_time": "8:56:26"} +{"current_steps": 2068, "total_steps": 8680, "loss": 0.7495461106300354, "lr": 1.8126293366744815e-06, "epoch": 0.47649769585253454, "percentage": 23.82, "elapsed_time": "2:47:46", "remaining_time": "8:56:25"} +{"current_steps": 2069, "total_steps": 8680, "loss": 0.9435098171234131, "lr": 1.8124072476103956e-06, "epoch": 0.4767281105990783, "percentage": 23.84, "elapsed_time": "2:47:51", "remaining_time": "8:56:22"} +{"current_steps": 2070, "total_steps": 8680, "loss": 0.9299448728561401, "lr": 1.8121850406264727e-06, "epoch": 0.4769585253456221, "percentage": 23.85, "elapsed_time": "2:47:57", "remaining_time": "8:56:18"} +{"current_steps": 2071, "total_steps": 8680, "loss": 0.9011991024017334, "lr": 1.8119627157549665e-06, "epoch": 0.4771889400921659, "percentage": 23.86, "elapsed_time": "2:48:01", "remaining_time": "8:56:12"} +{"current_steps": 2072, "total_steps": 8680, "loss": 0.7326598167419434, "lr": 1.8117402730281476e-06, "epoch": 0.4774193548387097, "percentage": 23.87, "elapsed_time": "2:48:08", "remaining_time": "8:56:13"} +{"current_steps": 2073, "total_steps": 8680, "loss": 0.8137445449829102, "lr": 1.8115177124783024e-06, "epoch": 0.47764976958525346, "percentage": 23.88, "elapsed_time": "2:48:11", "remaining_time": "8:56:04"} +{"current_steps": 2074, "total_steps": 8680, "loss": 0.8653519153594971, "lr": 1.811295034137735e-06, "epoch": 0.47788018433179724, "percentage": 23.89, "elapsed_time": "2:48:15", "remaining_time": "8:55:56"} +{"current_steps": 2075, "total_steps": 8680, "loss": 0.9140677452087402, "lr": 1.811072238038768e-06, "epoch": 0.478110599078341, "percentage": 23.91, "elapsed_time": "2:48:21", "remaining_time": "8:55:53"} +{"current_steps": 2076, "total_steps": 8680, "loss": 0.8878934979438782, "lr": 1.810849324213739e-06, "epoch": 0.4783410138248848, "percentage": 23.92, "elapsed_time": "2:48:25", "remaining_time": "8:55:47"} +{"current_steps": 2077, "total_steps": 8680, "loss": 0.8238190412521362, "lr": 1.8106262926950045e-06, "epoch": 0.4785714285714286, "percentage": 23.93, "elapsed_time": "2:48:31", "remaining_time": "8:55:46"} +{"current_steps": 2078, "total_steps": 8680, "loss": 0.7722488641738892, "lr": 1.8104031435149362e-06, "epoch": 0.4788018433179724, "percentage": 23.94, "elapsed_time": "2:48:37", "remaining_time": "8:55:44"} +{"current_steps": 2079, "total_steps": 8680, "loss": 0.9338192939758301, "lr": 1.8101798767059248e-06, "epoch": 0.4790322580645161, "percentage": 23.95, "elapsed_time": "2:48:41", "remaining_time": "8:55:36"} +{"current_steps": 2080, "total_steps": 8680, "loss": 0.7342168688774109, "lr": 1.8099564923003767e-06, "epoch": 0.4792626728110599, "percentage": 23.96, "elapsed_time": "2:48:47", "remaining_time": "8:55:34"} +{"current_steps": 2081, "total_steps": 8680, "loss": 0.8445772528648376, "lr": 1.809732990330716e-06, "epoch": 0.47949308755760367, "percentage": 23.97, "elapsed_time": "2:48:52", "remaining_time": "8:55:30"} +{"current_steps": 2082, "total_steps": 8680, "loss": 0.825678825378418, "lr": 1.8095093708293839e-06, "epoch": 0.47972350230414745, "percentage": 23.99, "elapsed_time": "2:48:57", "remaining_time": "8:55:27"} +{"current_steps": 2083, "total_steps": 8680, "loss": 0.7995405197143555, "lr": 1.8092856338288381e-06, "epoch": 0.47995391705069124, "percentage": 24.0, "elapsed_time": "2:49:01", "remaining_time": "8:55:19"} +{"current_steps": 2084, "total_steps": 8680, "loss": 0.7811745405197144, "lr": 1.8090617793615536e-06, "epoch": 0.480184331797235, "percentage": 24.01, "elapsed_time": "2:49:06", "remaining_time": "8:55:13"} +{"current_steps": 2085, "total_steps": 8680, "loss": 0.842727780342102, "lr": 1.8088378074600231e-06, "epoch": 0.4804147465437788, "percentage": 24.02, "elapsed_time": "2:49:13", "remaining_time": "8:55:16"} +{"current_steps": 2086, "total_steps": 8680, "loss": 0.840941309928894, "lr": 1.808613718156756e-06, "epoch": 0.4806451612903226, "percentage": 24.03, "elapsed_time": "2:49:18", "remaining_time": "8:55:12"} +{"current_steps": 2087, "total_steps": 8680, "loss": 0.9024466872215271, "lr": 1.808389511484278e-06, "epoch": 0.48087557603686637, "percentage": 24.04, "elapsed_time": "2:49:23", "remaining_time": "8:55:08"} +{"current_steps": 2088, "total_steps": 8680, "loss": 0.9112771153450012, "lr": 1.8081651874751325e-06, "epoch": 0.48110599078341015, "percentage": 24.06, "elapsed_time": "2:49:28", "remaining_time": "8:55:04"} +{"current_steps": 2089, "total_steps": 8680, "loss": 0.834719181060791, "lr": 1.8079407461618797e-06, "epoch": 0.48133640552995394, "percentage": 24.07, "elapsed_time": "2:49:34", "remaining_time": "8:55:02"} +{"current_steps": 2090, "total_steps": 8680, "loss": 0.8472555875778198, "lr": 1.8077161875770971e-06, "epoch": 0.4815668202764977, "percentage": 24.08, "elapsed_time": "2:49:39", "remaining_time": "8:54:56"} +{"current_steps": 2091, "total_steps": 8680, "loss": 0.8459140062332153, "lr": 1.8074915117533796e-06, "epoch": 0.48179723502304145, "percentage": 24.09, "elapsed_time": "2:49:45", "remaining_time": "8:54:54"} +{"current_steps": 2092, "total_steps": 8680, "loss": 0.6570066213607788, "lr": 1.807266718723338e-06, "epoch": 0.48202764976958523, "percentage": 24.1, "elapsed_time": "2:49:50", "remaining_time": "8:54:50"} +{"current_steps": 2093, "total_steps": 8680, "loss": 0.8897342681884766, "lr": 1.8070418085196006e-06, "epoch": 0.482258064516129, "percentage": 24.11, "elapsed_time": "2:49:54", "remaining_time": "8:54:45"} +{"current_steps": 2094, "total_steps": 8680, "loss": 0.8339060544967651, "lr": 1.8068167811748132e-06, "epoch": 0.4824884792626728, "percentage": 24.12, "elapsed_time": "2:49:58", "remaining_time": "8:54:37"} +{"current_steps": 2095, "total_steps": 8680, "loss": 0.7972484827041626, "lr": 1.8065916367216383e-06, "epoch": 0.4827188940092166, "percentage": 24.14, "elapsed_time": "2:50:04", "remaining_time": "8:54:34"} +{"current_steps": 2096, "total_steps": 8680, "loss": 0.7894760966300964, "lr": 1.806366375192755e-06, "epoch": 0.48294930875576036, "percentage": 24.15, "elapsed_time": "2:50:09", "remaining_time": "8:54:29"} +{"current_steps": 2097, "total_steps": 8680, "loss": 0.713944673538208, "lr": 1.8061409966208597e-06, "epoch": 0.48317972350230415, "percentage": 24.16, "elapsed_time": "2:50:14", "remaining_time": "8:54:24"} +{"current_steps": 2098, "total_steps": 8680, "loss": 0.7832180261611938, "lr": 1.8059155010386662e-06, "epoch": 0.48341013824884793, "percentage": 24.17, "elapsed_time": "2:50:18", "remaining_time": "8:54:18"} +{"current_steps": 2099, "total_steps": 8680, "loss": 0.8873809576034546, "lr": 1.8056898884789043e-06, "epoch": 0.4836405529953917, "percentage": 24.18, "elapsed_time": "2:50:24", "remaining_time": "8:54:17"} +{"current_steps": 2100, "total_steps": 8680, "loss": 0.8174929618835449, "lr": 1.8054641589743218e-06, "epoch": 0.4838709677419355, "percentage": 24.19, "elapsed_time": "2:50:29", "remaining_time": "8:54:11"} +{"current_steps": 2101, "total_steps": 8680, "loss": 0.876921534538269, "lr": 1.805238312557683e-06, "epoch": 0.4841013824884793, "percentage": 24.21, "elapsed_time": "2:50:36", "remaining_time": "8:54:13"} +{"current_steps": 2102, "total_steps": 8680, "loss": 0.9455937147140503, "lr": 1.8050123492617693e-06, "epoch": 0.48433179723502306, "percentage": 24.22, "elapsed_time": "2:50:42", "remaining_time": "8:54:11"} +{"current_steps": 2103, "total_steps": 8680, "loss": 0.8146508932113647, "lr": 1.8047862691193784e-06, "epoch": 0.4845622119815668, "percentage": 24.23, "elapsed_time": "2:50:47", "remaining_time": "8:54:08"} +{"current_steps": 2104, "total_steps": 8680, "loss": 0.8513495326042175, "lr": 1.8045600721633262e-06, "epoch": 0.4847926267281106, "percentage": 24.24, "elapsed_time": "2:50:52", "remaining_time": "8:54:03"} +{"current_steps": 2105, "total_steps": 8680, "loss": 0.8430027961730957, "lr": 1.8043337584264443e-06, "epoch": 0.48502304147465436, "percentage": 24.25, "elapsed_time": "2:50:57", "remaining_time": "8:53:59"} +{"current_steps": 2106, "total_steps": 8680, "loss": 0.7683960199356079, "lr": 1.8041073279415826e-06, "epoch": 0.48525345622119814, "percentage": 24.26, "elapsed_time": "2:51:03", "remaining_time": "8:53:58"} +{"current_steps": 2107, "total_steps": 8680, "loss": 0.7099664211273193, "lr": 1.8038807807416067e-06, "epoch": 0.4854838709677419, "percentage": 24.27, "elapsed_time": "2:51:09", "remaining_time": "8:53:55"} +{"current_steps": 2108, "total_steps": 8680, "loss": 0.8046330213546753, "lr": 1.8036541168593994e-06, "epoch": 0.4857142857142857, "percentage": 24.29, "elapsed_time": "2:51:15", "remaining_time": "8:53:54"} +{"current_steps": 2109, "total_steps": 8680, "loss": 0.8387504816055298, "lr": 1.803427336327861e-06, "epoch": 0.4859447004608295, "percentage": 24.3, "elapsed_time": "2:51:20", "remaining_time": "8:53:50"} +{"current_steps": 2110, "total_steps": 8680, "loss": 0.883955717086792, "lr": 1.8032004391799085e-06, "epoch": 0.4861751152073733, "percentage": 24.31, "elapsed_time": "2:51:25", "remaining_time": "8:53:44"} +{"current_steps": 2111, "total_steps": 8680, "loss": 0.7622070908546448, "lr": 1.8029734254484756e-06, "epoch": 0.48640552995391706, "percentage": 24.32, "elapsed_time": "2:51:29", "remaining_time": "8:53:38"} +{"current_steps": 2112, "total_steps": 8680, "loss": 0.6625584363937378, "lr": 1.802746295166513e-06, "epoch": 0.48663594470046084, "percentage": 24.33, "elapsed_time": "2:51:35", "remaining_time": "8:53:35"} +{"current_steps": 2113, "total_steps": 8680, "loss": 0.8232327699661255, "lr": 1.8025190483669878e-06, "epoch": 0.4868663594470046, "percentage": 24.34, "elapsed_time": "2:51:39", "remaining_time": "8:53:28"} +{"current_steps": 2114, "total_steps": 8680, "loss": 0.9083148241043091, "lr": 1.8022916850828857e-06, "epoch": 0.4870967741935484, "percentage": 24.35, "elapsed_time": "2:51:43", "remaining_time": "8:53:23"} +{"current_steps": 2115, "total_steps": 8680, "loss": 0.8248398303985596, "lr": 1.8020642053472074e-06, "epoch": 0.4873271889400922, "percentage": 24.37, "elapsed_time": "2:51:48", "remaining_time": "8:53:16"} +{"current_steps": 2116, "total_steps": 8680, "loss": 0.8055423498153687, "lr": 1.8018366091929717e-06, "epoch": 0.4875576036866359, "percentage": 24.38, "elapsed_time": "2:51:53", "remaining_time": "8:53:13"} +{"current_steps": 2117, "total_steps": 8680, "loss": 0.8716787695884705, "lr": 1.8016088966532135e-06, "epoch": 0.4877880184331797, "percentage": 24.39, "elapsed_time": "2:51:57", "remaining_time": "8:53:07"} +{"current_steps": 2118, "total_steps": 8680, "loss": 0.8530780673027039, "lr": 1.801381067760985e-06, "epoch": 0.4880184331797235, "percentage": 24.4, "elapsed_time": "2:52:02", "remaining_time": "8:53:00"} +{"current_steps": 2119, "total_steps": 8680, "loss": 0.7958484888076782, "lr": 1.8011531225493557e-06, "epoch": 0.48824884792626727, "percentage": 24.41, "elapsed_time": "2:52:06", "remaining_time": "8:52:53"} +{"current_steps": 2120, "total_steps": 8680, "loss": 0.8312872648239136, "lr": 1.800925061051411e-06, "epoch": 0.48847926267281105, "percentage": 24.42, "elapsed_time": "2:52:11", "remaining_time": "8:52:47"} +{"current_steps": 2121, "total_steps": 8680, "loss": 0.8097391128540039, "lr": 1.8006968833002541e-06, "epoch": 0.48870967741935484, "percentage": 24.44, "elapsed_time": "2:52:14", "remaining_time": "8:52:39"} +{"current_steps": 2122, "total_steps": 8680, "loss": 0.8636112213134766, "lr": 1.8004685893290046e-06, "epoch": 0.4889400921658986, "percentage": 24.45, "elapsed_time": "2:52:20", "remaining_time": "8:52:36"} +{"current_steps": 2123, "total_steps": 8680, "loss": 0.9122721552848816, "lr": 1.800240179170799e-06, "epoch": 0.4891705069124424, "percentage": 24.46, "elapsed_time": "2:52:24", "remaining_time": "8:52:28"} +{"current_steps": 2124, "total_steps": 8680, "loss": 0.8172330856323242, "lr": 1.8000116528587907e-06, "epoch": 0.4894009216589862, "percentage": 24.47, "elapsed_time": "2:52:28", "remaining_time": "8:52:22"} +{"current_steps": 2125, "total_steps": 8680, "loss": 0.7377575635910034, "lr": 1.7997830104261502e-06, "epoch": 0.48963133640552997, "percentage": 24.48, "elapsed_time": "2:52:32", "remaining_time": "8:52:14"} +{"current_steps": 2126, "total_steps": 8680, "loss": 0.7278136014938354, "lr": 1.7995542519060644e-06, "epoch": 0.48986175115207375, "percentage": 24.49, "elapsed_time": "2:52:37", "remaining_time": "8:52:08"} +{"current_steps": 2127, "total_steps": 8680, "loss": 0.8977715969085693, "lr": 1.7993253773317374e-06, "epoch": 0.49009216589861754, "percentage": 24.5, "elapsed_time": "2:52:42", "remaining_time": "8:52:04"} +{"current_steps": 2128, "total_steps": 8680, "loss": 0.789979100227356, "lr": 1.7990963867363902e-06, "epoch": 0.49032258064516127, "percentage": 24.52, "elapsed_time": "2:52:46", "remaining_time": "8:51:57"} +{"current_steps": 2129, "total_steps": 8680, "loss": 0.8304328322410583, "lr": 1.7988672801532602e-06, "epoch": 0.49055299539170505, "percentage": 24.53, "elapsed_time": "2:52:51", "remaining_time": "8:51:52"} +{"current_steps": 2130, "total_steps": 8680, "loss": 0.7597516179084778, "lr": 1.7986380576156019e-06, "epoch": 0.49078341013824883, "percentage": 24.54, "elapsed_time": "2:52:55", "remaining_time": "8:51:45"} +{"current_steps": 2131, "total_steps": 8680, "loss": 0.661639928817749, "lr": 1.7984087191566873e-06, "epoch": 0.4910138248847926, "percentage": 24.55, "elapsed_time": "2:53:00", "remaining_time": "8:51:40"} +{"current_steps": 2132, "total_steps": 8680, "loss": 0.7871333360671997, "lr": 1.7981792648098035e-06, "epoch": 0.4912442396313364, "percentage": 24.56, "elapsed_time": "2:53:05", "remaining_time": "8:51:35"} +{"current_steps": 2133, "total_steps": 8680, "loss": 0.8166402578353882, "lr": 1.7979496946082565e-06, "epoch": 0.4914746543778802, "percentage": 24.57, "elapsed_time": "2:53:09", "remaining_time": "8:51:28"} +{"current_steps": 2134, "total_steps": 8680, "loss": 0.7112412452697754, "lr": 1.7977200085853674e-06, "epoch": 0.49170506912442397, "percentage": 24.59, "elapsed_time": "2:53:14", "remaining_time": "8:51:24"} +{"current_steps": 2135, "total_steps": 8680, "loss": 0.8358132839202881, "lr": 1.7974902067744752e-06, "epoch": 0.49193548387096775, "percentage": 24.6, "elapsed_time": "2:53:18", "remaining_time": "8:51:18"} +{"current_steps": 2136, "total_steps": 8680, "loss": 0.8544377088546753, "lr": 1.7972602892089353e-06, "epoch": 0.49216589861751153, "percentage": 24.61, "elapsed_time": "2:53:24", "remaining_time": "8:51:14"} +{"current_steps": 2137, "total_steps": 8680, "loss": 1.0105161666870117, "lr": 1.7970302559221197e-06, "epoch": 0.4923963133640553, "percentage": 24.62, "elapsed_time": "2:53:28", "remaining_time": "8:51:06"} +{"current_steps": 2138, "total_steps": 8680, "loss": 0.7666197419166565, "lr": 1.7968001069474176e-06, "epoch": 0.4926267281105991, "percentage": 24.63, "elapsed_time": "2:53:32", "remaining_time": "8:51:01"} +{"current_steps": 2139, "total_steps": 8680, "loss": 0.9250742197036743, "lr": 1.7965698423182349e-06, "epoch": 0.4928571428571429, "percentage": 24.64, "elapsed_time": "2:53:38", "remaining_time": "8:50:58"} +{"current_steps": 2140, "total_steps": 8680, "loss": 0.8269995450973511, "lr": 1.7963394620679942e-06, "epoch": 0.4930875576036866, "percentage": 24.65, "elapsed_time": "2:53:42", "remaining_time": "8:50:52"} +{"current_steps": 2141, "total_steps": 8680, "loss": 1.0431339740753174, "lr": 1.7961089662301346e-06, "epoch": 0.4933179723502304, "percentage": 24.67, "elapsed_time": "2:53:47", "remaining_time": "8:50:48"} +{"current_steps": 2142, "total_steps": 8680, "loss": 0.7474809288978577, "lr": 1.7958783548381125e-06, "epoch": 0.4935483870967742, "percentage": 24.68, "elapsed_time": "2:53:52", "remaining_time": "8:50:42"} +{"current_steps": 2143, "total_steps": 8680, "loss": 0.8850520849227905, "lr": 1.7956476279254007e-06, "epoch": 0.49377880184331796, "percentage": 24.69, "elapsed_time": "2:53:57", "remaining_time": "8:50:38"} +{"current_steps": 2144, "total_steps": 8680, "loss": 0.8898880481719971, "lr": 1.7954167855254893e-06, "epoch": 0.49400921658986174, "percentage": 24.7, "elapsed_time": "2:54:01", "remaining_time": "8:50:31"} +{"current_steps": 2145, "total_steps": 8680, "loss": 0.8718239068984985, "lr": 1.7951858276718842e-06, "epoch": 0.4942396313364055, "percentage": 24.71, "elapsed_time": "2:54:05", "remaining_time": "8:50:23"} +{"current_steps": 2146, "total_steps": 8680, "loss": 0.8407484292984009, "lr": 1.794954754398109e-06, "epoch": 0.4944700460829493, "percentage": 24.72, "elapsed_time": "2:54:09", "remaining_time": "8:50:16"} +{"current_steps": 2147, "total_steps": 8680, "loss": 0.8453764915466309, "lr": 1.7947235657377036e-06, "epoch": 0.4947004608294931, "percentage": 24.74, "elapsed_time": "2:54:14", "remaining_time": "8:50:11"} +{"current_steps": 2148, "total_steps": 8680, "loss": 0.5795568227767944, "lr": 1.794492261724225e-06, "epoch": 0.4949308755760369, "percentage": 24.75, "elapsed_time": "2:54:19", "remaining_time": "8:50:06"} +{"current_steps": 2149, "total_steps": 8680, "loss": 0.8601347208023071, "lr": 1.794260842391246e-06, "epoch": 0.49516129032258066, "percentage": 24.76, "elapsed_time": "2:54:23", "remaining_time": "8:49:58"} +{"current_steps": 2150, "total_steps": 8680, "loss": 0.8328324556350708, "lr": 1.7940293077723573e-06, "epoch": 0.49539170506912444, "percentage": 24.77, "elapsed_time": "2:54:27", "remaining_time": "8:49:52"} +{"current_steps": 2151, "total_steps": 8680, "loss": 0.8924463391304016, "lr": 1.7937976579011655e-06, "epoch": 0.4956221198156682, "percentage": 24.78, "elapsed_time": "2:54:33", "remaining_time": "8:49:49"} +{"current_steps": 2152, "total_steps": 8680, "loss": 0.9725968837738037, "lr": 1.7935658928112947e-06, "epoch": 0.49585253456221196, "percentage": 24.79, "elapsed_time": "2:54:37", "remaining_time": "8:49:43"} +{"current_steps": 2153, "total_steps": 8680, "loss": 0.7814322710037231, "lr": 1.7933340125363855e-06, "epoch": 0.49608294930875574, "percentage": 24.8, "elapsed_time": "2:54:43", "remaining_time": "8:49:42"} +{"current_steps": 2154, "total_steps": 8680, "loss": 0.8022886514663696, "lr": 1.793102017110094e-06, "epoch": 0.4963133640552995, "percentage": 24.82, "elapsed_time": "2:54:48", "remaining_time": "8:49:36"} +{"current_steps": 2155, "total_steps": 8680, "loss": 0.9747333526611328, "lr": 1.7928699065660951e-06, "epoch": 0.4965437788018433, "percentage": 24.83, "elapsed_time": "2:54:52", "remaining_time": "8:49:31"} +{"current_steps": 2156, "total_steps": 8680, "loss": 0.9039797782897949, "lr": 1.7926376809380783e-06, "epoch": 0.4967741935483871, "percentage": 24.84, "elapsed_time": "2:54:57", "remaining_time": "8:49:26"} +{"current_steps": 2157, "total_steps": 8680, "loss": 0.9444677829742432, "lr": 1.7924053402597518e-06, "epoch": 0.49700460829493087, "percentage": 24.85, "elapsed_time": "2:55:01", "remaining_time": "8:49:18"} +{"current_steps": 2158, "total_steps": 8680, "loss": 0.8442031741142273, "lr": 1.7921728845648393e-06, "epoch": 0.49723502304147466, "percentage": 24.86, "elapsed_time": "2:55:07", "remaining_time": "8:49:17"} +{"current_steps": 2159, "total_steps": 8680, "loss": 0.9410362839698792, "lr": 1.7919403138870813e-06, "epoch": 0.49746543778801844, "percentage": 24.87, "elapsed_time": "2:55:12", "remaining_time": "8:49:10"} +{"current_steps": 2160, "total_steps": 8680, "loss": 0.8929172158241272, "lr": 1.791707628260235e-06, "epoch": 0.4976958525345622, "percentage": 24.88, "elapsed_time": "2:55:16", "remaining_time": "8:49:03"} +{"current_steps": 2161, "total_steps": 8680, "loss": 0.9259560108184814, "lr": 1.7914748277180745e-06, "epoch": 0.497926267281106, "percentage": 24.9, "elapsed_time": "2:55:21", "remaining_time": "8:49:00"} +{"current_steps": 2162, "total_steps": 8680, "loss": 0.8201638460159302, "lr": 1.7912419122943904e-06, "epoch": 0.4981566820276498, "percentage": 24.91, "elapsed_time": "2:55:26", "remaining_time": "8:48:54"} +{"current_steps": 2163, "total_steps": 8680, "loss": 0.7554556131362915, "lr": 1.7910088820229907e-06, "epoch": 0.49838709677419357, "percentage": 24.92, "elapsed_time": "2:55:30", "remaining_time": "8:48:47"} +{"current_steps": 2164, "total_steps": 8680, "loss": 0.8206801414489746, "lr": 1.7907757369376984e-06, "epoch": 0.4986175115207373, "percentage": 24.93, "elapsed_time": "2:55:35", "remaining_time": "8:48:44"} +{"current_steps": 2165, "total_steps": 8680, "loss": 0.765400767326355, "lr": 1.7905424770723551e-06, "epoch": 0.4988479262672811, "percentage": 24.94, "elapsed_time": "2:55:41", "remaining_time": "8:48:41"} +{"current_steps": 2166, "total_steps": 8680, "loss": 0.9191527366638184, "lr": 1.7903091024608177e-06, "epoch": 0.49907834101382487, "percentage": 24.95, "elapsed_time": "2:55:44", "remaining_time": "8:48:32"} +{"current_steps": 2167, "total_steps": 8680, "loss": 0.8515042662620544, "lr": 1.7900756131369601e-06, "epoch": 0.49930875576036865, "percentage": 24.97, "elapsed_time": "2:55:48", "remaining_time": "8:48:24"} +{"current_steps": 2168, "total_steps": 8680, "loss": 0.8509752750396729, "lr": 1.7898420091346736e-06, "epoch": 0.49953917050691243, "percentage": 24.98, "elapsed_time": "2:55:54", "remaining_time": "8:48:21"} +{"current_steps": 2169, "total_steps": 8680, "loss": 0.8007084131240845, "lr": 1.7896082904878647e-06, "epoch": 0.4997695852534562, "percentage": 24.99, "elapsed_time": "2:55:58", "remaining_time": "8:48:14"} +{"current_steps": 2170, "total_steps": 8680, "loss": 0.8395413756370544, "lr": 1.789374457230458e-06, "epoch": 0.5, "percentage": 25.0, "elapsed_time": "2:56:02", "remaining_time": "8:48:08"} +{"current_steps": 2171, "total_steps": 8680, "loss": 0.8624853491783142, "lr": 1.7891405093963937e-06, "epoch": 0.5002304147465437, "percentage": 25.01, "elapsed_time": "2:56:07", "remaining_time": "8:48:01"} +{"current_steps": 2172, "total_steps": 8680, "loss": 0.8141548037528992, "lr": 1.788906447019629e-06, "epoch": 0.5004608294930876, "percentage": 25.02, "elapsed_time": "2:56:11", "remaining_time": "8:47:55"} +{"current_steps": 2173, "total_steps": 8680, "loss": 0.6764500141143799, "lr": 1.7886722701341382e-06, "epoch": 0.5006912442396313, "percentage": 25.03, "elapsed_time": "2:56:16", "remaining_time": "8:47:51"} +{"current_steps": 2174, "total_steps": 8680, "loss": 0.710756778717041, "lr": 1.7884379787739112e-06, "epoch": 0.5009216589861751, "percentage": 25.05, "elapsed_time": "2:56:20", "remaining_time": "8:47:43"} +{"current_steps": 2175, "total_steps": 8680, "loss": 0.8090574145317078, "lr": 1.7882035729729555e-06, "epoch": 0.5011520737327189, "percentage": 25.06, "elapsed_time": "2:56:23", "remaining_time": "8:47:34"} +{"current_steps": 2176, "total_steps": 8680, "loss": 0.7639138102531433, "lr": 1.7879690527652943e-06, "epoch": 0.5013824884792627, "percentage": 25.07, "elapsed_time": "2:56:28", "remaining_time": "8:47:29"} +{"current_steps": 2177, "total_steps": 8680, "loss": 0.8093903660774231, "lr": 1.7877344181849687e-06, "epoch": 0.5016129032258064, "percentage": 25.08, "elapsed_time": "2:56:33", "remaining_time": "8:47:24"} +{"current_steps": 2178, "total_steps": 8680, "loss": 0.8705824017524719, "lr": 1.7874996692660348e-06, "epoch": 0.5018433179723503, "percentage": 25.09, "elapsed_time": "2:56:39", "remaining_time": "8:47:23"} +{"current_steps": 2179, "total_steps": 8680, "loss": 0.7365947961807251, "lr": 1.7872648060425666e-06, "epoch": 0.502073732718894, "percentage": 25.1, "elapsed_time": "2:56:44", "remaining_time": "8:47:18"} +{"current_steps": 2180, "total_steps": 8680, "loss": 0.9405299425125122, "lr": 1.787029828548654e-06, "epoch": 0.5023041474654378, "percentage": 25.12, "elapsed_time": "2:56:48", "remaining_time": "8:47:12"} +{"current_steps": 2181, "total_steps": 8680, "loss": 0.9232017993927002, "lr": 1.7867947368184036e-06, "epoch": 0.5025345622119816, "percentage": 25.13, "elapsed_time": "2:56:54", "remaining_time": "8:47:08"} +{"current_steps": 2182, "total_steps": 8680, "loss": 0.9941537380218506, "lr": 1.7865595308859388e-06, "epoch": 0.5027649769585254, "percentage": 25.14, "elapsed_time": "2:56:58", "remaining_time": "8:47:00"} +{"current_steps": 2183, "total_steps": 8680, "loss": 0.6981802582740784, "lr": 1.7863242107853993e-06, "epoch": 0.5029953917050691, "percentage": 25.15, "elapsed_time": "2:57:03", "remaining_time": "8:46:58"} +{"current_steps": 2184, "total_steps": 8680, "loss": 0.8155109882354736, "lr": 1.7860887765509417e-06, "epoch": 0.5032258064516129, "percentage": 25.16, "elapsed_time": "2:57:07", "remaining_time": "8:46:49"} +{"current_steps": 2185, "total_steps": 8680, "loss": 0.7246255874633789, "lr": 1.7858532282167385e-06, "epoch": 0.5034562211981567, "percentage": 25.17, "elapsed_time": "2:57:11", "remaining_time": "8:46:43"} +{"current_steps": 2186, "total_steps": 8680, "loss": 0.7042064666748047, "lr": 1.7856175658169796e-06, "epoch": 0.5036866359447004, "percentage": 25.18, "elapsed_time": "2:57:17", "remaining_time": "8:46:40"} +{"current_steps": 2187, "total_steps": 8680, "loss": 0.7522145509719849, "lr": 1.7853817893858714e-06, "epoch": 0.5039170506912443, "percentage": 25.2, "elapsed_time": "2:57:22", "remaining_time": "8:46:37"} +{"current_steps": 2188, "total_steps": 8680, "loss": 1.0157709121704102, "lr": 1.7851458989576359e-06, "epoch": 0.504147465437788, "percentage": 25.21, "elapsed_time": "2:57:26", "remaining_time": "8:46:29"} +{"current_steps": 2189, "total_steps": 8680, "loss": 0.7096433639526367, "lr": 1.7849098945665127e-06, "epoch": 0.5043778801843318, "percentage": 25.22, "elapsed_time": "2:57:31", "remaining_time": "8:46:25"} +{"current_steps": 2190, "total_steps": 8680, "loss": 0.7743037939071655, "lr": 1.7846737762467572e-06, "epoch": 0.5046082949308756, "percentage": 25.23, "elapsed_time": "2:57:36", "remaining_time": "8:46:21"} +{"current_steps": 2191, "total_steps": 8680, "loss": 0.7907241582870483, "lr": 1.784437544032642e-06, "epoch": 0.5048387096774194, "percentage": 25.24, "elapsed_time": "2:57:41", "remaining_time": "8:46:15"} +{"current_steps": 2192, "total_steps": 8680, "loss": 0.8692185878753662, "lr": 1.7842011979584557e-06, "epoch": 0.5050691244239631, "percentage": 25.25, "elapsed_time": "2:57:47", "remaining_time": "8:46:13"} +{"current_steps": 2193, "total_steps": 8680, "loss": 0.9678715467453003, "lr": 1.783964738058504e-06, "epoch": 0.505299539170507, "percentage": 25.26, "elapsed_time": "2:57:50", "remaining_time": "8:46:04"} +{"current_steps": 2194, "total_steps": 8680, "loss": 0.855170726776123, "lr": 1.7837281643671077e-06, "epoch": 0.5055299539170507, "percentage": 25.28, "elapsed_time": "2:57:54", "remaining_time": "8:45:56"} +{"current_steps": 2195, "total_steps": 8680, "loss": 0.8452733755111694, "lr": 1.7834914769186065e-06, "epoch": 0.5057603686635944, "percentage": 25.29, "elapsed_time": "2:57:59", "remaining_time": "8:45:50"} +{"current_steps": 2196, "total_steps": 8680, "loss": 0.7517217397689819, "lr": 1.7832546757473543e-06, "epoch": 0.5059907834101383, "percentage": 25.3, "elapsed_time": "2:58:04", "remaining_time": "8:45:47"} +{"current_steps": 2197, "total_steps": 8680, "loss": 0.6971632838249207, "lr": 1.783017760887723e-06, "epoch": 0.506221198156682, "percentage": 25.31, "elapsed_time": "2:58:10", "remaining_time": "8:45:46"} +{"current_steps": 2198, "total_steps": 8680, "loss": 0.8638256192207336, "lr": 1.7827807323741002e-06, "epoch": 0.5064516129032258, "percentage": 25.32, "elapsed_time": "2:58:15", "remaining_time": "8:45:42"} +{"current_steps": 2199, "total_steps": 8680, "loss": 0.8410143256187439, "lr": 1.7825435902408903e-06, "epoch": 0.5066820276497696, "percentage": 25.33, "elapsed_time": "2:58:19", "remaining_time": "8:45:34"} +{"current_steps": 2200, "total_steps": 8680, "loss": 0.8127691745758057, "lr": 1.7823063345225143e-06, "epoch": 0.5069124423963134, "percentage": 25.35, "elapsed_time": "2:58:24", "remaining_time": "8:45:29"} +{"current_steps": 2201, "total_steps": 8680, "loss": 0.7351404428482056, "lr": 1.7820689652534096e-06, "epoch": 0.5071428571428571, "percentage": 25.36, "elapsed_time": "2:58:31", "remaining_time": "8:45:29"} +{"current_steps": 2202, "total_steps": 8680, "loss": 0.7258716821670532, "lr": 1.7818314824680298e-06, "epoch": 0.507373271889401, "percentage": 25.37, "elapsed_time": "2:58:37", "remaining_time": "8:45:29"} +{"current_steps": 2203, "total_steps": 8680, "loss": 0.9509599208831787, "lr": 1.7815938862008454e-06, "epoch": 0.5076036866359447, "percentage": 25.38, "elapsed_time": "2:58:42", "remaining_time": "8:45:24"} +{"current_steps": 2204, "total_steps": 8680, "loss": 0.8600929379463196, "lr": 1.7813561764863429e-06, "epoch": 0.5078341013824885, "percentage": 25.39, "elapsed_time": "2:58:46", "remaining_time": "8:45:17"} +{"current_steps": 2205, "total_steps": 8680, "loss": 0.8688119649887085, "lr": 1.7811183533590257e-06, "epoch": 0.5080645161290323, "percentage": 25.4, "elapsed_time": "2:58:51", "remaining_time": "8:45:14"} +{"current_steps": 2206, "total_steps": 8680, "loss": 0.8447986841201782, "lr": 1.780880416853414e-06, "epoch": 0.5082949308755761, "percentage": 25.41, "elapsed_time": "2:58:55", "remaining_time": "8:45:07"} +{"current_steps": 2207, "total_steps": 8680, "loss": 0.8262573480606079, "lr": 1.7806423670040433e-06, "epoch": 0.5085253456221198, "percentage": 25.43, "elapsed_time": "2:59:00", "remaining_time": "8:45:02"} +{"current_steps": 2208, "total_steps": 8680, "loss": 0.9534487724304199, "lr": 1.7804042038454666e-06, "epoch": 0.5087557603686635, "percentage": 25.44, "elapsed_time": "2:59:04", "remaining_time": "8:44:54"} +{"current_steps": 2209, "total_steps": 8680, "loss": 0.7712565064430237, "lr": 1.7801659274122527e-06, "epoch": 0.5089861751152074, "percentage": 25.45, "elapsed_time": "2:59:09", "remaining_time": "8:44:48"} +{"current_steps": 2210, "total_steps": 8680, "loss": 0.8190760016441345, "lr": 1.7799275377389873e-06, "epoch": 0.5092165898617511, "percentage": 25.46, "elapsed_time": "2:59:13", "remaining_time": "8:44:43"} +{"current_steps": 2211, "total_steps": 8680, "loss": 0.8647592067718506, "lr": 1.7796890348602722e-06, "epoch": 0.509447004608295, "percentage": 25.47, "elapsed_time": "2:59:19", "remaining_time": "8:44:39"} +{"current_steps": 2212, "total_steps": 8680, "loss": 0.7788198590278625, "lr": 1.7794504188107257e-06, "epoch": 0.5096774193548387, "percentage": 25.48, "elapsed_time": "2:59:25", "remaining_time": "8:44:38"} +{"current_steps": 2213, "total_steps": 8680, "loss": 0.8610718250274658, "lr": 1.779211689624983e-06, "epoch": 0.5099078341013825, "percentage": 25.5, "elapsed_time": "2:59:30", "remaining_time": "8:44:34"} +{"current_steps": 2214, "total_steps": 8680, "loss": 0.832200825214386, "lr": 1.7789728473376952e-06, "epoch": 0.5101382488479262, "percentage": 25.51, "elapsed_time": "2:59:35", "remaining_time": "8:44:30"} +{"current_steps": 2215, "total_steps": 8680, "loss": 0.7325488328933716, "lr": 1.7787338919835298e-06, "epoch": 0.5103686635944701, "percentage": 25.52, "elapsed_time": "2:59:39", "remaining_time": "8:44:23"} +{"current_steps": 2216, "total_steps": 8680, "loss": 0.8038203716278076, "lr": 1.7784948235971707e-06, "epoch": 0.5105990783410138, "percentage": 25.53, "elapsed_time": "2:59:44", "remaining_time": "8:44:17"} +{"current_steps": 2217, "total_steps": 8680, "loss": 0.7016317248344421, "lr": 1.7782556422133185e-06, "epoch": 0.5108294930875577, "percentage": 25.54, "elapsed_time": "2:59:50", "remaining_time": "8:44:16"} +{"current_steps": 2218, "total_steps": 8680, "loss": 0.7964655160903931, "lr": 1.7780163478666905e-06, "epoch": 0.5110599078341014, "percentage": 25.55, "elapsed_time": "2:59:54", "remaining_time": "8:44:08"} +{"current_steps": 2219, "total_steps": 8680, "loss": 0.6681785583496094, "lr": 1.777776940592019e-06, "epoch": 0.5112903225806451, "percentage": 25.56, "elapsed_time": "3:00:00", "remaining_time": "8:44:07"} +{"current_steps": 2220, "total_steps": 8680, "loss": 0.835777759552002, "lr": 1.7775374204240547e-06, "epoch": 0.511520737327189, "percentage": 25.58, "elapsed_time": "3:00:04", "remaining_time": "8:44:00"} +{"current_steps": 2221, "total_steps": 8680, "loss": 0.9442443251609802, "lr": 1.777297787397563e-06, "epoch": 0.5117511520737327, "percentage": 25.59, "elapsed_time": "3:00:10", "remaining_time": "8:43:57"} +{"current_steps": 2222, "total_steps": 8680, "loss": 0.9351231455802917, "lr": 1.7770580415473267e-06, "epoch": 0.5119815668202765, "percentage": 25.6, "elapsed_time": "3:00:14", "remaining_time": "8:43:51"} +{"current_steps": 2223, "total_steps": 8680, "loss": 0.7238374352455139, "lr": 1.776818182908144e-06, "epoch": 0.5122119815668202, "percentage": 25.61, "elapsed_time": "3:00:19", "remaining_time": "8:43:47"} +{"current_steps": 2224, "total_steps": 8680, "loss": 0.8206230998039246, "lr": 1.7765782115148308e-06, "epoch": 0.5124423963133641, "percentage": 25.62, "elapsed_time": "3:00:25", "remaining_time": "8:43:44"} +{"current_steps": 2225, "total_steps": 8680, "loss": 0.748784065246582, "lr": 1.7763381274022176e-06, "epoch": 0.5126728110599078, "percentage": 25.63, "elapsed_time": "3:00:29", "remaining_time": "8:43:39"} +{"current_steps": 2226, "total_steps": 8680, "loss": 0.7980858087539673, "lr": 1.7760979306051533e-06, "epoch": 0.5129032258064516, "percentage": 25.65, "elapsed_time": "3:00:34", "remaining_time": "8:43:34"} +{"current_steps": 2227, "total_steps": 8680, "loss": 0.8631168603897095, "lr": 1.7758576211585018e-06, "epoch": 0.5131336405529954, "percentage": 25.66, "elapsed_time": "3:00:40", "remaining_time": "8:43:30"} +{"current_steps": 2228, "total_steps": 8680, "loss": 0.9405999779701233, "lr": 1.7756171990971441e-06, "epoch": 0.5133640552995392, "percentage": 25.67, "elapsed_time": "3:00:44", "remaining_time": "8:43:24"} +{"current_steps": 2229, "total_steps": 8680, "loss": 0.9055094718933105, "lr": 1.7753766644559763e-06, "epoch": 0.5135944700460829, "percentage": 25.68, "elapsed_time": "3:00:50", "remaining_time": "8:43:22"} +{"current_steps": 2230, "total_steps": 8680, "loss": 0.7583146691322327, "lr": 1.775136017269912e-06, "epoch": 0.5138248847926268, "percentage": 25.69, "elapsed_time": "3:00:54", "remaining_time": "8:43:15"} +{"current_steps": 2231, "total_steps": 8680, "loss": 0.8728743195533752, "lr": 1.7748952575738811e-06, "epoch": 0.5140552995391705, "percentage": 25.7, "elapsed_time": "3:00:58", "remaining_time": "8:43:07"} +{"current_steps": 2232, "total_steps": 8680, "loss": 0.8133460283279419, "lr": 1.7746543854028295e-06, "epoch": 0.5142857142857142, "percentage": 25.71, "elapsed_time": "3:01:02", "remaining_time": "8:43:01"} +{"current_steps": 2233, "total_steps": 8680, "loss": 0.8389721512794495, "lr": 1.7744134007917194e-06, "epoch": 0.5145161290322581, "percentage": 25.73, "elapsed_time": "3:01:07", "remaining_time": "8:42:54"} +{"current_steps": 2234, "total_steps": 8680, "loss": 0.7016798257827759, "lr": 1.774172303775529e-06, "epoch": 0.5147465437788018, "percentage": 25.74, "elapsed_time": "3:01:12", "remaining_time": "8:42:52"} +{"current_steps": 2235, "total_steps": 8680, "loss": 0.7920540571212769, "lr": 1.7739310943892538e-06, "epoch": 0.5149769585253456, "percentage": 25.75, "elapsed_time": "3:01:17", "remaining_time": "8:42:48"} +{"current_steps": 2236, "total_steps": 8680, "loss": 0.900149405002594, "lr": 1.7736897726679048e-06, "epoch": 0.5152073732718894, "percentage": 25.76, "elapsed_time": "3:01:24", "remaining_time": "8:42:48"} +{"current_steps": 2237, "total_steps": 8680, "loss": 0.8537915349006653, "lr": 1.7734483386465096e-06, "epoch": 0.5154377880184332, "percentage": 25.77, "elapsed_time": "3:01:28", "remaining_time": "8:42:42"} +{"current_steps": 2238, "total_steps": 8680, "loss": 0.7418123483657837, "lr": 1.7732067923601121e-06, "epoch": 0.5156682027649769, "percentage": 25.78, "elapsed_time": "3:01:33", "remaining_time": "8:42:37"} +{"current_steps": 2239, "total_steps": 8680, "loss": 0.8890011310577393, "lr": 1.7729651338437721e-06, "epoch": 0.5158986175115208, "percentage": 25.79, "elapsed_time": "3:01:38", "remaining_time": "8:42:30"} +{"current_steps": 2240, "total_steps": 8680, "loss": 0.9082813262939453, "lr": 1.7727233631325663e-06, "epoch": 0.5161290322580645, "percentage": 25.81, "elapsed_time": "3:01:43", "remaining_time": "8:42:26"} +{"current_steps": 2241, "total_steps": 8680, "loss": 0.8337695598602295, "lr": 1.7724814802615868e-06, "epoch": 0.5163594470046083, "percentage": 25.82, "elapsed_time": "3:01:47", "remaining_time": "8:42:19"} +{"current_steps": 2242, "total_steps": 8680, "loss": 0.8990765810012817, "lr": 1.7722394852659437e-06, "epoch": 0.5165898617511521, "percentage": 25.83, "elapsed_time": "3:01:51", "remaining_time": "8:42:12"} +{"current_steps": 2243, "total_steps": 8680, "loss": 0.720890998840332, "lr": 1.7719973781807614e-06, "epoch": 0.5168202764976959, "percentage": 25.84, "elapsed_time": "3:01:56", "remaining_time": "8:42:09"} +{"current_steps": 2244, "total_steps": 8680, "loss": 0.7966938018798828, "lr": 1.7717551590411817e-06, "epoch": 0.5170506912442396, "percentage": 25.85, "elapsed_time": "3:02:03", "remaining_time": "8:42:08"} +{"current_steps": 2245, "total_steps": 8680, "loss": 0.9290107488632202, "lr": 1.7715128278823622e-06, "epoch": 0.5172811059907834, "percentage": 25.86, "elapsed_time": "3:02:07", "remaining_time": "8:42:02"} +{"current_steps": 2246, "total_steps": 8680, "loss": 0.8388533592224121, "lr": 1.771270384739477e-06, "epoch": 0.5175115207373272, "percentage": 25.88, "elapsed_time": "3:02:10", "remaining_time": "8:41:53"} +{"current_steps": 2247, "total_steps": 8680, "loss": 0.8845043182373047, "lr": 1.7710278296477169e-06, "epoch": 0.5177419354838709, "percentage": 25.89, "elapsed_time": "3:02:14", "remaining_time": "8:41:45"} +{"current_steps": 2248, "total_steps": 8680, "loss": 0.879709780216217, "lr": 1.7707851626422875e-06, "epoch": 0.5179723502304148, "percentage": 25.9, "elapsed_time": "3:02:20", "remaining_time": "8:41:41"} +{"current_steps": 2249, "total_steps": 8680, "loss": 0.8215152025222778, "lr": 1.7705423837584123e-06, "epoch": 0.5182027649769585, "percentage": 25.91, "elapsed_time": "3:02:25", "remaining_time": "8:41:38"} +{"current_steps": 2250, "total_steps": 8680, "loss": 0.8108627796173096, "lr": 1.7702994930313305e-06, "epoch": 0.5184331797235023, "percentage": 25.92, "elapsed_time": "3:02:30", "remaining_time": "8:41:32"} +{"current_steps": 2251, "total_steps": 8680, "loss": 0.8391602039337158, "lr": 1.7700564904962966e-06, "epoch": 0.5186635944700461, "percentage": 25.93, "elapsed_time": "3:02:35", "remaining_time": "8:41:30"} +{"current_steps": 2252, "total_steps": 8680, "loss": 0.8664923906326294, "lr": 1.769813376188583e-06, "epoch": 0.5188940092165899, "percentage": 25.94, "elapsed_time": "3:02:39", "remaining_time": "8:41:22"} +{"current_steps": 2253, "total_steps": 8680, "loss": 0.9670882821083069, "lr": 1.7695701501434765e-06, "epoch": 0.5191244239631336, "percentage": 25.96, "elapsed_time": "3:02:43", "remaining_time": "8:41:16"} +{"current_steps": 2254, "total_steps": 8680, "loss": 0.946273684501648, "lr": 1.7693268123962816e-06, "epoch": 0.5193548387096775, "percentage": 25.97, "elapsed_time": "3:02:48", "remaining_time": "8:41:10"} +{"current_steps": 2255, "total_steps": 8680, "loss": 0.9691795706748962, "lr": 1.7690833629823184e-06, "epoch": 0.5195852534562212, "percentage": 25.98, "elapsed_time": "3:02:51", "remaining_time": "8:41:01"} +{"current_steps": 2256, "total_steps": 8680, "loss": 0.8086103200912476, "lr": 1.7688398019369232e-06, "epoch": 0.5198156682027649, "percentage": 25.99, "elapsed_time": "3:02:57", "remaining_time": "8:40:57"} +{"current_steps": 2257, "total_steps": 8680, "loss": 0.8574277758598328, "lr": 1.7685961292954486e-06, "epoch": 0.5200460829493088, "percentage": 26.0, "elapsed_time": "3:03:02", "remaining_time": "8:40:54"} +{"current_steps": 2258, "total_steps": 8680, "loss": 0.7841963171958923, "lr": 1.7683523450932633e-06, "epoch": 0.5202764976958525, "percentage": 26.01, "elapsed_time": "3:03:06", "remaining_time": "8:40:48"} +{"current_steps": 2259, "total_steps": 8680, "loss": 0.6972980499267578, "lr": 1.7681084493657523e-06, "epoch": 0.5205069124423963, "percentage": 26.03, "elapsed_time": "3:03:12", "remaining_time": "8:40:44"} +{"current_steps": 2260, "total_steps": 8680, "loss": 0.9193723201751709, "lr": 1.7678644421483163e-06, "epoch": 0.5207373271889401, "percentage": 26.04, "elapsed_time": "3:03:17", "remaining_time": "8:40:39"} +{"current_steps": 2261, "total_steps": 8680, "loss": 0.7902654409408569, "lr": 1.7676203234763736e-06, "epoch": 0.5209677419354839, "percentage": 26.05, "elapsed_time": "3:03:20", "remaining_time": "8:40:31"} +{"current_steps": 2262, "total_steps": 8680, "loss": 0.8804734945297241, "lr": 1.767376093385357e-06, "epoch": 0.5211981566820276, "percentage": 26.06, "elapsed_time": "3:03:24", "remaining_time": "8:40:23"} +{"current_steps": 2263, "total_steps": 8680, "loss": 0.7884976863861084, "lr": 1.7671317519107163e-06, "epoch": 0.5214285714285715, "percentage": 26.07, "elapsed_time": "3:03:28", "remaining_time": "8:40:17"} +{"current_steps": 2264, "total_steps": 8680, "loss": 0.8233190774917603, "lr": 1.7668872990879173e-06, "epoch": 0.5216589861751152, "percentage": 26.08, "elapsed_time": "3:03:32", "remaining_time": "8:40:09"} +{"current_steps": 2265, "total_steps": 8680, "loss": 0.7985334396362305, "lr": 1.766642734952442e-06, "epoch": 0.521889400921659, "percentage": 26.09, "elapsed_time": "3:03:37", "remaining_time": "8:40:03"} +{"current_steps": 2266, "total_steps": 8680, "loss": 0.7805646657943726, "lr": 1.7663980595397887e-06, "epoch": 0.5221198156682028, "percentage": 26.11, "elapsed_time": "3:03:42", "remaining_time": "8:39:58"} +{"current_steps": 2267, "total_steps": 8680, "loss": 0.8528248071670532, "lr": 1.7661532728854718e-06, "epoch": 0.5223502304147466, "percentage": 26.12, "elapsed_time": "3:03:47", "remaining_time": "8:39:55"} +{"current_steps": 2268, "total_steps": 8680, "loss": 0.7714066505432129, "lr": 1.7659083750250215e-06, "epoch": 0.5225806451612903, "percentage": 26.13, "elapsed_time": "3:03:52", "remaining_time": "8:39:49"} +{"current_steps": 2269, "total_steps": 8680, "loss": 0.8250499963760376, "lr": 1.7656633659939843e-06, "epoch": 0.522811059907834, "percentage": 26.14, "elapsed_time": "3:03:57", "remaining_time": "8:39:46"} +{"current_steps": 2270, "total_steps": 8680, "loss": 0.7878777384757996, "lr": 1.7654182458279231e-06, "epoch": 0.5230414746543779, "percentage": 26.15, "elapsed_time": "3:04:03", "remaining_time": "8:39:44"} +{"current_steps": 2271, "total_steps": 8680, "loss": 0.9080224633216858, "lr": 1.7651730145624174e-06, "epoch": 0.5232718894009216, "percentage": 26.16, "elapsed_time": "3:04:08", "remaining_time": "8:39:39"} +{"current_steps": 2272, "total_steps": 8680, "loss": 0.8010937571525574, "lr": 1.7649276722330607e-06, "epoch": 0.5235023041474655, "percentage": 26.18, "elapsed_time": "3:04:12", "remaining_time": "8:39:32"} +{"current_steps": 2273, "total_steps": 8680, "loss": 0.903404951095581, "lr": 1.7646822188754658e-06, "epoch": 0.5237327188940092, "percentage": 26.19, "elapsed_time": "3:04:15", "remaining_time": "8:39:23"} +{"current_steps": 2274, "total_steps": 8680, "loss": 0.9009061455726624, "lr": 1.7644366545252589e-06, "epoch": 0.523963133640553, "percentage": 26.2, "elapsed_time": "3:04:20", "remaining_time": "8:39:17"} +{"current_steps": 2275, "total_steps": 8680, "loss": 0.7158697843551636, "lr": 1.7641909792180834e-06, "epoch": 0.5241935483870968, "percentage": 26.21, "elapsed_time": "3:04:23", "remaining_time": "8:39:09"} +{"current_steps": 2276, "total_steps": 8680, "loss": 0.8101463317871094, "lr": 1.763945192989599e-06, "epoch": 0.5244239631336406, "percentage": 26.22, "elapsed_time": "3:04:28", "remaining_time": "8:39:03"} +{"current_steps": 2277, "total_steps": 8680, "loss": 0.758610725402832, "lr": 1.7636992958754812e-06, "epoch": 0.5246543778801843, "percentage": 26.23, "elapsed_time": "3:04:33", "remaining_time": "8:38:58"} +{"current_steps": 2278, "total_steps": 8680, "loss": 0.9469501972198486, "lr": 1.7634532879114216e-06, "epoch": 0.5248847926267282, "percentage": 26.24, "elapsed_time": "3:04:37", "remaining_time": "8:38:52"} +{"current_steps": 2279, "total_steps": 8680, "loss": 0.7528036236763, "lr": 1.7632071691331281e-06, "epoch": 0.5251152073732719, "percentage": 26.26, "elapsed_time": "3:04:42", "remaining_time": "8:38:46"} +{"current_steps": 2280, "total_steps": 8680, "loss": 0.8519324064254761, "lr": 1.7629609395763242e-06, "epoch": 0.5253456221198156, "percentage": 26.27, "elapsed_time": "3:04:49", "remaining_time": "8:38:47"} +{"current_steps": 2281, "total_steps": 8680, "loss": 0.8620004653930664, "lr": 1.7627145992767498e-06, "epoch": 0.5255760368663595, "percentage": 26.28, "elapsed_time": "3:04:54", "remaining_time": "8:38:43"} +{"current_steps": 2282, "total_steps": 8680, "loss": 0.8066067695617676, "lr": 1.762468148270161e-06, "epoch": 0.5258064516129032, "percentage": 26.29, "elapsed_time": "3:04:58", "remaining_time": "8:38:36"} +{"current_steps": 2283, "total_steps": 8680, "loss": 0.865642786026001, "lr": 1.7622215865923301e-06, "epoch": 0.526036866359447, "percentage": 26.3, "elapsed_time": "3:05:03", "remaining_time": "8:38:31"} +{"current_steps": 2284, "total_steps": 8680, "loss": 0.8478001356124878, "lr": 1.761974914279045e-06, "epoch": 0.5262672811059907, "percentage": 26.31, "elapsed_time": "3:05:08", "remaining_time": "8:38:27"} +{"current_steps": 2285, "total_steps": 8680, "loss": 0.7984344363212585, "lr": 1.7617281313661098e-06, "epoch": 0.5264976958525346, "percentage": 26.32, "elapsed_time": "3:05:14", "remaining_time": "8:38:25"} +{"current_steps": 2286, "total_steps": 8680, "loss": 0.8480801582336426, "lr": 1.7614812378893444e-06, "epoch": 0.5267281105990783, "percentage": 26.34, "elapsed_time": "3:05:19", "remaining_time": "8:38:21"} +{"current_steps": 2287, "total_steps": 8680, "loss": 0.8667479753494263, "lr": 1.7612342338845859e-06, "epoch": 0.5269585253456222, "percentage": 26.35, "elapsed_time": "3:05:24", "remaining_time": "8:38:16"} +{"current_steps": 2288, "total_steps": 8680, "loss": 0.8431364297866821, "lr": 1.7609871193876854e-06, "epoch": 0.5271889400921659, "percentage": 26.36, "elapsed_time": "3:05:29", "remaining_time": "8:38:12"} +{"current_steps": 2289, "total_steps": 8680, "loss": 0.8544220924377441, "lr": 1.7607398944345127e-06, "epoch": 0.5274193548387097, "percentage": 26.37, "elapsed_time": "3:05:34", "remaining_time": "8:38:07"} +{"current_steps": 2290, "total_steps": 8680, "loss": 0.9298971891403198, "lr": 1.760492559060951e-06, "epoch": 0.5276497695852534, "percentage": 26.38, "elapsed_time": "3:05:39", "remaining_time": "8:38:03"} +{"current_steps": 2291, "total_steps": 8680, "loss": 0.739667534828186, "lr": 1.760245113302901e-06, "epoch": 0.5278801843317973, "percentage": 26.39, "elapsed_time": "3:05:44", "remaining_time": "8:37:59"} +{"current_steps": 2292, "total_steps": 8680, "loss": 0.8981268405914307, "lr": 1.7599975571962796e-06, "epoch": 0.528110599078341, "percentage": 26.41, "elapsed_time": "3:05:50", "remaining_time": "8:37:58"} +{"current_steps": 2293, "total_steps": 8680, "loss": 0.8027834892272949, "lr": 1.7597498907770185e-06, "epoch": 0.5283410138248847, "percentage": 26.42, "elapsed_time": "3:05:56", "remaining_time": "8:37:54"} +{"current_steps": 2294, "total_steps": 8680, "loss": 0.7018242478370667, "lr": 1.7595021140810669e-06, "epoch": 0.5285714285714286, "percentage": 26.43, "elapsed_time": "3:06:03", "remaining_time": "8:37:56"} +{"current_steps": 2295, "total_steps": 8680, "loss": 0.7655147910118103, "lr": 1.7592542271443887e-06, "epoch": 0.5288018433179723, "percentage": 26.44, "elapsed_time": "3:06:09", "remaining_time": "8:37:56"} +{"current_steps": 2296, "total_steps": 8680, "loss": 0.8283153772354126, "lr": 1.7590062300029644e-06, "epoch": 0.5290322580645161, "percentage": 26.45, "elapsed_time": "3:06:14", "remaining_time": "8:37:51"} +{"current_steps": 2297, "total_steps": 8680, "loss": 1.0430598258972168, "lr": 1.7587581226927907e-06, "epoch": 0.5292626728110599, "percentage": 26.46, "elapsed_time": "3:06:18", "remaining_time": "8:37:44"} +{"current_steps": 2298, "total_steps": 8680, "loss": 0.6683472990989685, "lr": 1.7585099052498802e-06, "epoch": 0.5294930875576037, "percentage": 26.47, "elapsed_time": "3:06:25", "remaining_time": "8:37:42"} +{"current_steps": 2299, "total_steps": 8680, "loss": 0.7254939079284668, "lr": 1.7582615777102609e-06, "epoch": 0.5297235023041474, "percentage": 26.49, "elapsed_time": "3:06:29", "remaining_time": "8:37:37"} +{"current_steps": 2300, "total_steps": 8680, "loss": 0.7913245558738708, "lr": 1.7580131401099774e-06, "epoch": 0.5299539170506913, "percentage": 26.5, "elapsed_time": "3:06:34", "remaining_time": "8:37:31"} +{"current_steps": 2301, "total_steps": 8680, "loss": 0.7832915782928467, "lr": 1.75776459248509e-06, "epoch": 0.530184331797235, "percentage": 26.51, "elapsed_time": "3:06:40", "remaining_time": "8:37:30"} +{"current_steps": 2302, "total_steps": 8680, "loss": 0.9754987955093384, "lr": 1.7575159348716754e-06, "epoch": 0.5304147465437788, "percentage": 26.52, "elapsed_time": "3:06:44", "remaining_time": "8:37:24"} +{"current_steps": 2303, "total_steps": 8680, "loss": 0.8343901634216309, "lr": 1.7572671673058254e-06, "epoch": 0.5306451612903226, "percentage": 26.53, "elapsed_time": "3:06:49", "remaining_time": "8:37:18"} +{"current_steps": 2304, "total_steps": 8680, "loss": 0.9836198091506958, "lr": 1.757018289823649e-06, "epoch": 0.5308755760368664, "percentage": 26.54, "elapsed_time": "3:06:54", "remaining_time": "8:37:13"} +{"current_steps": 2305, "total_steps": 8680, "loss": 0.8258972764015198, "lr": 1.7567693024612695e-06, "epoch": 0.5311059907834101, "percentage": 26.56, "elapsed_time": "3:06:58", "remaining_time": "8:37:07"} +{"current_steps": 2306, "total_steps": 8680, "loss": 0.8822964429855347, "lr": 1.7565202052548277e-06, "epoch": 0.5313364055299539, "percentage": 26.57, "elapsed_time": "3:07:03", "remaining_time": "8:37:02"} +{"current_steps": 2307, "total_steps": 8680, "loss": 0.721222996711731, "lr": 1.7562709982404797e-06, "epoch": 0.5315668202764977, "percentage": 26.58, "elapsed_time": "3:07:08", "remaining_time": "8:36:58"} +{"current_steps": 2308, "total_steps": 8680, "loss": 0.7273069620132446, "lr": 1.7560216814543974e-06, "epoch": 0.5317972350230414, "percentage": 26.59, "elapsed_time": "3:07:15", "remaining_time": "8:36:58"} +{"current_steps": 2309, "total_steps": 8680, "loss": 0.8031520843505859, "lr": 1.755772254932769e-06, "epoch": 0.5320276497695853, "percentage": 26.6, "elapsed_time": "3:07:20", "remaining_time": "8:36:55"} +{"current_steps": 2310, "total_steps": 8680, "loss": 0.8767163157463074, "lr": 1.7555227187117982e-06, "epoch": 0.532258064516129, "percentage": 26.61, "elapsed_time": "3:07:26", "remaining_time": "8:36:53"} +{"current_steps": 2311, "total_steps": 8680, "loss": 0.8018463850021362, "lr": 1.755273072827705e-06, "epoch": 0.5324884792626728, "percentage": 26.62, "elapsed_time": "3:07:32", "remaining_time": "8:36:51"} +{"current_steps": 2312, "total_steps": 8680, "loss": 0.8281232118606567, "lr": 1.7550233173167252e-06, "epoch": 0.5327188940092166, "percentage": 26.64, "elapsed_time": "3:07:37", "remaining_time": "8:36:46"} +{"current_steps": 2313, "total_steps": 8680, "loss": 0.8802565336227417, "lr": 1.7547734522151103e-06, "epoch": 0.5329493087557604, "percentage": 26.65, "elapsed_time": "3:07:40", "remaining_time": "8:36:37"} +{"current_steps": 2314, "total_steps": 8680, "loss": 0.8055544495582581, "lr": 1.754523477559128e-06, "epoch": 0.5331797235023041, "percentage": 26.66, "elapsed_time": "3:07:46", "remaining_time": "8:36:33"} +{"current_steps": 2315, "total_steps": 8680, "loss": 0.8163481950759888, "lr": 1.754273393385062e-06, "epoch": 0.533410138248848, "percentage": 26.67, "elapsed_time": "3:07:50", "remaining_time": "8:36:29"} +{"current_steps": 2316, "total_steps": 8680, "loss": 0.8308255076408386, "lr": 1.7540231997292111e-06, "epoch": 0.5336405529953917, "percentage": 26.68, "elapsed_time": "3:07:56", "remaining_time": "8:36:24"} +{"current_steps": 2317, "total_steps": 8680, "loss": 0.8387685418128967, "lr": 1.7537728966278913e-06, "epoch": 0.5338709677419354, "percentage": 26.69, "elapsed_time": "3:08:01", "remaining_time": "8:36:22"} +{"current_steps": 2318, "total_steps": 8680, "loss": 0.8668780326843262, "lr": 1.7535224841174333e-06, "epoch": 0.5341013824884793, "percentage": 26.71, "elapsed_time": "3:08:06", "remaining_time": "8:36:17"} +{"current_steps": 2319, "total_steps": 8680, "loss": 0.8394712209701538, "lr": 1.7532719622341842e-06, "epoch": 0.534331797235023, "percentage": 26.72, "elapsed_time": "3:08:11", "remaining_time": "8:36:11"} +{"current_steps": 2320, "total_steps": 8680, "loss": 0.7755688428878784, "lr": 1.7530213310145073e-06, "epoch": 0.5345622119815668, "percentage": 26.73, "elapsed_time": "3:08:15", "remaining_time": "8:36:04"} +{"current_steps": 2321, "total_steps": 8680, "loss": 0.7714632749557495, "lr": 1.7527705904947805e-06, "epoch": 0.5347926267281106, "percentage": 26.74, "elapsed_time": "3:08:20", "remaining_time": "8:36:01"} +{"current_steps": 2322, "total_steps": 8680, "loss": 0.8810869455337524, "lr": 1.7525197407113997e-06, "epoch": 0.5350230414746544, "percentage": 26.75, "elapsed_time": "3:08:25", "remaining_time": "8:35:57"} +{"current_steps": 2323, "total_steps": 8680, "loss": 0.8445242643356323, "lr": 1.7522687817007742e-06, "epoch": 0.5352534562211981, "percentage": 26.76, "elapsed_time": "3:08:30", "remaining_time": "8:35:51"} +{"current_steps": 2324, "total_steps": 8680, "loss": 0.9602948427200317, "lr": 1.7520177134993311e-06, "epoch": 0.535483870967742, "percentage": 26.77, "elapsed_time": "3:08:34", "remaining_time": "8:35:43"} +{"current_steps": 2325, "total_steps": 8680, "loss": 0.7865237593650818, "lr": 1.7517665361435126e-06, "epoch": 0.5357142857142857, "percentage": 26.79, "elapsed_time": "3:08:38", "remaining_time": "8:35:38"} +{"current_steps": 2326, "total_steps": 8680, "loss": 0.8062880039215088, "lr": 1.7515152496697763e-06, "epoch": 0.5359447004608295, "percentage": 26.8, "elapsed_time": "3:08:44", "remaining_time": "8:35:36"} +{"current_steps": 2327, "total_steps": 8680, "loss": 0.8386664986610413, "lr": 1.7512638541145966e-06, "epoch": 0.5361751152073733, "percentage": 26.81, "elapsed_time": "3:08:50", "remaining_time": "8:35:33"} +{"current_steps": 2328, "total_steps": 8680, "loss": 0.973692774772644, "lr": 1.7510123495144629e-06, "epoch": 0.5364055299539171, "percentage": 26.82, "elapsed_time": "3:08:55", "remaining_time": "8:35:28"} +{"current_steps": 2329, "total_steps": 8680, "loss": 0.8250089883804321, "lr": 1.7507607359058808e-06, "epoch": 0.5366359447004608, "percentage": 26.83, "elapsed_time": "3:08:59", "remaining_time": "8:35:20"} +{"current_steps": 2330, "total_steps": 8680, "loss": 0.8578102588653564, "lr": 1.750509013325372e-06, "epoch": 0.5368663594470046, "percentage": 26.84, "elapsed_time": "3:09:03", "remaining_time": "8:35:15"} +{"current_steps": 2331, "total_steps": 8680, "loss": 0.916475236415863, "lr": 1.7502571818094732e-06, "epoch": 0.5370967741935484, "percentage": 26.85, "elapsed_time": "3:09:09", "remaining_time": "8:35:12"} +{"current_steps": 2332, "total_steps": 8680, "loss": 0.8210046291351318, "lr": 1.7500052413947377e-06, "epoch": 0.5373271889400921, "percentage": 26.87, "elapsed_time": "3:09:12", "remaining_time": "8:35:02"} +{"current_steps": 2333, "total_steps": 8680, "loss": 0.816267728805542, "lr": 1.7497531921177344e-06, "epoch": 0.537557603686636, "percentage": 26.88, "elapsed_time": "3:09:17", "remaining_time": "8:34:58"} +{"current_steps": 2334, "total_steps": 8680, "loss": 1.0091882944107056, "lr": 1.7495010340150478e-06, "epoch": 0.5377880184331797, "percentage": 26.89, "elapsed_time": "3:09:21", "remaining_time": "8:34:51"} +{"current_steps": 2335, "total_steps": 8680, "loss": 0.7549277544021606, "lr": 1.7492487671232783e-06, "epoch": 0.5380184331797235, "percentage": 26.9, "elapsed_time": "3:09:26", "remaining_time": "8:34:46"} +{"current_steps": 2336, "total_steps": 8680, "loss": 0.9584934711456299, "lr": 1.7489963914790423e-06, "epoch": 0.5382488479262673, "percentage": 26.91, "elapsed_time": "3:09:30", "remaining_time": "8:34:40"} +{"current_steps": 2337, "total_steps": 8680, "loss": 0.8189069628715515, "lr": 1.7487439071189713e-06, "epoch": 0.5384792626728111, "percentage": 26.92, "elapsed_time": "3:09:37", "remaining_time": "8:34:40"} +{"current_steps": 2338, "total_steps": 8680, "loss": 0.7529993057250977, "lr": 1.7484913140797138e-06, "epoch": 0.5387096774193548, "percentage": 26.94, "elapsed_time": "3:09:41", "remaining_time": "8:34:32"} +{"current_steps": 2339, "total_steps": 8680, "loss": 0.8611496686935425, "lr": 1.7482386123979324e-06, "epoch": 0.5389400921658987, "percentage": 26.95, "elapsed_time": "3:09:44", "remaining_time": "8:34:24"} +{"current_steps": 2340, "total_steps": 8680, "loss": 0.9400241374969482, "lr": 1.7479858021103074e-06, "epoch": 0.5391705069124424, "percentage": 26.96, "elapsed_time": "3:09:48", "remaining_time": "8:34:16"} +{"current_steps": 2341, "total_steps": 8680, "loss": 0.6686737537384033, "lr": 1.7477328832535332e-06, "epoch": 0.5394009216589861, "percentage": 26.97, "elapsed_time": "3:09:54", "remaining_time": "8:34:14"} +{"current_steps": 2342, "total_steps": 8680, "loss": 0.864795982837677, "lr": 1.747479855864321e-06, "epoch": 0.53963133640553, "percentage": 26.98, "elapsed_time": "3:09:59", "remaining_time": "8:34:10"} +{"current_steps": 2343, "total_steps": 8680, "loss": 0.9579563140869141, "lr": 1.7472267199793971e-06, "epoch": 0.5398617511520737, "percentage": 26.99, "elapsed_time": "3:10:04", "remaining_time": "8:34:05"} +{"current_steps": 2344, "total_steps": 8680, "loss": 0.7492884397506714, "lr": 1.746973475635504e-06, "epoch": 0.5400921658986175, "percentage": 27.0, "elapsed_time": "3:10:08", "remaining_time": "8:33:58"} +{"current_steps": 2345, "total_steps": 8680, "loss": 1.020420789718628, "lr": 1.7467201228694e-06, "epoch": 0.5403225806451613, "percentage": 27.02, "elapsed_time": "3:10:11", "remaining_time": "8:33:49"} +{"current_steps": 2346, "total_steps": 8680, "loss": 0.8277238011360168, "lr": 1.7464666617178585e-06, "epoch": 0.5405529953917051, "percentage": 27.03, "elapsed_time": "3:10:17", "remaining_time": "8:33:47"} +{"current_steps": 2347, "total_steps": 8680, "loss": 0.8160337209701538, "lr": 1.7462130922176694e-06, "epoch": 0.5407834101382488, "percentage": 27.04, "elapsed_time": "3:10:22", "remaining_time": "8:33:41"} +{"current_steps": 2348, "total_steps": 8680, "loss": 0.8742454648017883, "lr": 1.7459594144056378e-06, "epoch": 0.5410138248847927, "percentage": 27.05, "elapsed_time": "3:10:26", "remaining_time": "8:33:34"} +{"current_steps": 2349, "total_steps": 8680, "loss": 0.7987914085388184, "lr": 1.7457056283185847e-06, "epoch": 0.5412442396313364, "percentage": 27.06, "elapsed_time": "3:10:31", "remaining_time": "8:33:30"} +{"current_steps": 2350, "total_steps": 8680, "loss": 0.6917734146118164, "lr": 1.7454517339933467e-06, "epoch": 0.5414746543778802, "percentage": 27.07, "elapsed_time": "3:10:36", "remaining_time": "8:33:24"} +{"current_steps": 2351, "total_steps": 8680, "loss": 0.8338258266448975, "lr": 1.7451977314667763e-06, "epoch": 0.541705069124424, "percentage": 27.09, "elapsed_time": "3:10:40", "remaining_time": "8:33:17"} +{"current_steps": 2352, "total_steps": 8680, "loss": 0.8308743238449097, "lr": 1.7449436207757418e-06, "epoch": 0.5419354838709678, "percentage": 27.1, "elapsed_time": "3:10:45", "remaining_time": "8:33:13"} +{"current_steps": 2353, "total_steps": 8680, "loss": 0.7843145728111267, "lr": 1.744689401957127e-06, "epoch": 0.5421658986175115, "percentage": 27.11, "elapsed_time": "3:10:50", "remaining_time": "8:33:10"} +{"current_steps": 2354, "total_steps": 8680, "loss": 0.9088687896728516, "lr": 1.7444350750478314e-06, "epoch": 0.5423963133640552, "percentage": 27.12, "elapsed_time": "3:10:55", "remaining_time": "8:33:05"} +{"current_steps": 2355, "total_steps": 8680, "loss": 0.876841127872467, "lr": 1.74418064008477e-06, "epoch": 0.5426267281105991, "percentage": 27.13, "elapsed_time": "3:11:00", "remaining_time": "8:32:59"} +{"current_steps": 2356, "total_steps": 8680, "loss": 0.7169051170349121, "lr": 1.743926097104874e-06, "epoch": 0.5428571428571428, "percentage": 27.14, "elapsed_time": "3:11:03", "remaining_time": "8:32:51"} +{"current_steps": 2357, "total_steps": 8680, "loss": 0.7979093194007874, "lr": 1.7436714461450897e-06, "epoch": 0.5430875576036867, "percentage": 27.15, "elapsed_time": "3:11:07", "remaining_time": "8:32:44"} +{"current_steps": 2358, "total_steps": 8680, "loss": 0.9152545928955078, "lr": 1.7434166872423795e-06, "epoch": 0.5433179723502304, "percentage": 27.17, "elapsed_time": "3:11:11", "remaining_time": "8:32:36"} +{"current_steps": 2359, "total_steps": 8680, "loss": 0.8968983888626099, "lr": 1.7431618204337212e-06, "epoch": 0.5435483870967742, "percentage": 27.18, "elapsed_time": "3:11:17", "remaining_time": "8:32:32"} +{"current_steps": 2360, "total_steps": 8680, "loss": 0.7591085433959961, "lr": 1.7429068457561086e-06, "epoch": 0.543778801843318, "percentage": 27.19, "elapsed_time": "3:11:23", "remaining_time": "8:32:31"} +{"current_steps": 2361, "total_steps": 8680, "loss": 0.6931861639022827, "lr": 1.7426517632465508e-06, "epoch": 0.5440092165898618, "percentage": 27.2, "elapsed_time": "3:11:28", "remaining_time": "8:32:27"} +{"current_steps": 2362, "total_steps": 8680, "loss": 0.7715095281600952, "lr": 1.7423965729420729e-06, "epoch": 0.5442396313364055, "percentage": 27.21, "elapsed_time": "3:11:33", "remaining_time": "8:32:24"} +{"current_steps": 2363, "total_steps": 8680, "loss": 0.8282119035720825, "lr": 1.742141274879715e-06, "epoch": 0.5444700460829494, "percentage": 27.22, "elapsed_time": "3:11:37", "remaining_time": "8:32:16"} +{"current_steps": 2364, "total_steps": 8680, "loss": 0.7595704197883606, "lr": 1.7418858690965337e-06, "epoch": 0.5447004608294931, "percentage": 27.24, "elapsed_time": "3:11:42", "remaining_time": "8:32:12"} +{"current_steps": 2365, "total_steps": 8680, "loss": 0.8738422393798828, "lr": 1.7416303556296005e-06, "epoch": 0.5449308755760369, "percentage": 27.25, "elapsed_time": "3:11:47", "remaining_time": "8:32:07"} +{"current_steps": 2366, "total_steps": 8680, "loss": 0.8399837017059326, "lr": 1.741374734516003e-06, "epoch": 0.5451612903225806, "percentage": 27.26, "elapsed_time": "3:11:51", "remaining_time": "8:32:01"} +{"current_steps": 2367, "total_steps": 8680, "loss": 0.8213151693344116, "lr": 1.7411190057928442e-06, "epoch": 0.5453917050691244, "percentage": 27.27, "elapsed_time": "3:11:56", "remaining_time": "8:31:55"} +{"current_steps": 2368, "total_steps": 8680, "loss": 0.748835563659668, "lr": 1.740863169497243e-06, "epoch": 0.5456221198156682, "percentage": 27.28, "elapsed_time": "3:12:00", "remaining_time": "8:31:48"} +{"current_steps": 2369, "total_steps": 8680, "loss": 0.9222339391708374, "lr": 1.7406072256663333e-06, "epoch": 0.5458525345622119, "percentage": 27.29, "elapsed_time": "3:12:05", "remaining_time": "8:31:44"} +{"current_steps": 2370, "total_steps": 8680, "loss": 0.6543160676956177, "lr": 1.7403511743372655e-06, "epoch": 0.5460829493087558, "percentage": 27.3, "elapsed_time": "3:12:11", "remaining_time": "8:31:41"} +{"current_steps": 2371, "total_steps": 8680, "loss": 0.9828567504882812, "lr": 1.7400950155472046e-06, "epoch": 0.5463133640552995, "percentage": 27.32, "elapsed_time": "3:12:15", "remaining_time": "8:31:35"} +{"current_steps": 2372, "total_steps": 8680, "loss": 0.95346599817276, "lr": 1.739838749333332e-06, "epoch": 0.5465437788018433, "percentage": 27.33, "elapsed_time": "3:12:20", "remaining_time": "8:31:29"} +{"current_steps": 2373, "total_steps": 8680, "loss": 0.626889705657959, "lr": 1.7395823757328442e-06, "epoch": 0.5467741935483871, "percentage": 27.34, "elapsed_time": "3:12:26", "remaining_time": "8:31:29"} +{"current_steps": 2374, "total_steps": 8680, "loss": 0.8152071833610535, "lr": 1.739325894782954e-06, "epoch": 0.5470046082949309, "percentage": 27.35, "elapsed_time": "3:12:30", "remaining_time": "8:31:22"} +{"current_steps": 2375, "total_steps": 8680, "loss": 0.8244980573654175, "lr": 1.7390693065208889e-06, "epoch": 0.5472350230414746, "percentage": 27.36, "elapsed_time": "3:12:35", "remaining_time": "8:31:16"} +{"current_steps": 2376, "total_steps": 8680, "loss": 0.8234372138977051, "lr": 1.738812610983892e-06, "epoch": 0.5474654377880185, "percentage": 27.37, "elapsed_time": "3:12:40", "remaining_time": "8:31:11"} +{"current_steps": 2377, "total_steps": 8680, "loss": 0.9334712624549866, "lr": 1.7385558082092228e-06, "epoch": 0.5476958525345622, "percentage": 27.38, "elapsed_time": "3:12:45", "remaining_time": "8:31:07"} +{"current_steps": 2378, "total_steps": 8680, "loss": 0.7873882055282593, "lr": 1.7382988982341557e-06, "epoch": 0.5479262672811059, "percentage": 27.4, "elapsed_time": "3:12:49", "remaining_time": "8:30:59"} +{"current_steps": 2379, "total_steps": 8680, "loss": 0.7971000671386719, "lr": 1.7380418810959814e-06, "epoch": 0.5481566820276498, "percentage": 27.41, "elapsed_time": "3:12:53", "remaining_time": "8:30:54"} +{"current_steps": 2380, "total_steps": 8680, "loss": 0.8617004156112671, "lr": 1.7377847568320046e-06, "epoch": 0.5483870967741935, "percentage": 27.42, "elapsed_time": "3:13:01", "remaining_time": "8:30:56"} +{"current_steps": 2381, "total_steps": 8680, "loss": 0.6798374056816101, "lr": 1.7375275254795472e-06, "epoch": 0.5486175115207373, "percentage": 27.43, "elapsed_time": "3:13:06", "remaining_time": "8:30:53"} +{"current_steps": 2382, "total_steps": 8680, "loss": 0.8621633052825928, "lr": 1.7372701870759459e-06, "epoch": 0.5488479262672811, "percentage": 27.44, "elapsed_time": "3:13:12", "remaining_time": "8:30:50"} +{"current_steps": 2383, "total_steps": 8680, "loss": 0.6533470153808594, "lr": 1.7370127416585527e-06, "epoch": 0.5490783410138249, "percentage": 27.45, "elapsed_time": "3:13:16", "remaining_time": "8:30:43"} +{"current_steps": 2384, "total_steps": 8680, "loss": 0.8854461908340454, "lr": 1.736755189264736e-06, "epoch": 0.5493087557603686, "percentage": 27.47, "elapsed_time": "3:13:21", "remaining_time": "8:30:38"} +{"current_steps": 2385, "total_steps": 8680, "loss": 0.9461240768432617, "lr": 1.7364975299318786e-06, "epoch": 0.5495391705069125, "percentage": 27.48, "elapsed_time": "3:13:24", "remaining_time": "8:30:29"} +{"current_steps": 2386, "total_steps": 8680, "loss": 0.8936882019042969, "lr": 1.73623976369738e-06, "epoch": 0.5497695852534562, "percentage": 27.49, "elapsed_time": "3:13:28", "remaining_time": "8:30:22"} +{"current_steps": 2387, "total_steps": 8680, "loss": 0.8177640438079834, "lr": 1.7359818905986544e-06, "epoch": 0.55, "percentage": 27.5, "elapsed_time": "3:13:32", "remaining_time": "8:30:14"} +{"current_steps": 2388, "total_steps": 8680, "loss": 0.793328046798706, "lr": 1.7357239106731317e-06, "epoch": 0.5502304147465438, "percentage": 27.51, "elapsed_time": "3:13:38", "remaining_time": "8:30:13"} +{"current_steps": 2389, "total_steps": 8680, "loss": 0.8837069272994995, "lr": 1.7354658239582572e-06, "epoch": 0.5504608294930876, "percentage": 27.52, "elapsed_time": "3:13:43", "remaining_time": "8:30:09"} +{"current_steps": 2390, "total_steps": 8680, "loss": 0.8801138401031494, "lr": 1.7352076304914918e-06, "epoch": 0.5506912442396313, "percentage": 27.53, "elapsed_time": "3:13:48", "remaining_time": "8:30:03"} +{"current_steps": 2391, "total_steps": 8680, "loss": 0.865073025226593, "lr": 1.7349493303103123e-06, "epoch": 0.5509216589861751, "percentage": 27.55, "elapsed_time": "3:13:54", "remaining_time": "8:30:02"} +{"current_steps": 2392, "total_steps": 8680, "loss": 0.8712339401245117, "lr": 1.7346909234522107e-06, "epoch": 0.5511520737327189, "percentage": 27.56, "elapsed_time": "3:14:00", "remaining_time": "8:30:01"} +{"current_steps": 2393, "total_steps": 8680, "loss": 0.7689294815063477, "lr": 1.7344324099546938e-06, "epoch": 0.5513824884792626, "percentage": 27.57, "elapsed_time": "3:14:06", "remaining_time": "8:29:57"} +{"current_steps": 2394, "total_steps": 8680, "loss": 0.9228999614715576, "lr": 1.7341737898552851e-06, "epoch": 0.5516129032258065, "percentage": 27.58, "elapsed_time": "3:14:11", "remaining_time": "8:29:52"} +{"current_steps": 2395, "total_steps": 8680, "loss": 0.9473327398300171, "lr": 1.7339150631915228e-06, "epoch": 0.5518433179723502, "percentage": 27.59, "elapsed_time": "3:14:15", "remaining_time": "8:29:47"} +{"current_steps": 2396, "total_steps": 8680, "loss": 0.7724621295928955, "lr": 1.7336562300009604e-06, "epoch": 0.552073732718894, "percentage": 27.6, "elapsed_time": "3:14:19", "remaining_time": "8:29:40"} +{"current_steps": 2397, "total_steps": 8680, "loss": 0.8646600246429443, "lr": 1.7333972903211675e-06, "epoch": 0.5523041474654378, "percentage": 27.62, "elapsed_time": "3:14:23", "remaining_time": "8:29:33"} +{"current_steps": 2398, "total_steps": 8680, "loss": 0.7143402099609375, "lr": 1.7331382441897286e-06, "epoch": 0.5525345622119816, "percentage": 27.63, "elapsed_time": "3:14:29", "remaining_time": "8:29:30"} +{"current_steps": 2399, "total_steps": 8680, "loss": 0.8229624032974243, "lr": 1.7328790916442446e-06, "epoch": 0.5527649769585253, "percentage": 27.64, "elapsed_time": "3:14:33", "remaining_time": "8:29:23"} +{"current_steps": 2400, "total_steps": 8680, "loss": 0.7244875431060791, "lr": 1.7326198327223303e-06, "epoch": 0.5529953917050692, "percentage": 27.65, "elapsed_time": "3:14:38", "remaining_time": "8:29:17"} +{"current_steps": 2401, "total_steps": 8680, "loss": 0.7797688245773315, "lr": 1.7323604674616173e-06, "epoch": 0.5532258064516129, "percentage": 27.66, "elapsed_time": "3:14:43", "remaining_time": "8:29:14"} +{"current_steps": 2402, "total_steps": 8680, "loss": 0.752421498298645, "lr": 1.7321009958997519e-06, "epoch": 0.5534562211981566, "percentage": 27.67, "elapsed_time": "3:14:48", "remaining_time": "8:29:09"} +{"current_steps": 2403, "total_steps": 8680, "loss": 0.8285892009735107, "lr": 1.7318414180743962e-06, "epoch": 0.5536866359447005, "percentage": 27.68, "elapsed_time": "3:14:53", "remaining_time": "8:29:03"} +{"current_steps": 2404, "total_steps": 8680, "loss": 0.8247888088226318, "lr": 1.7315817340232272e-06, "epoch": 0.5539170506912442, "percentage": 27.7, "elapsed_time": "3:14:58", "remaining_time": "8:28:59"} +{"current_steps": 2405, "total_steps": 8680, "loss": 0.7713418006896973, "lr": 1.7313219437839384e-06, "epoch": 0.554147465437788, "percentage": 27.71, "elapsed_time": "3:15:02", "remaining_time": "8:28:53"} +{"current_steps": 2406, "total_steps": 8680, "loss": 0.8748825788497925, "lr": 1.7310620473942374e-06, "epoch": 0.5543778801843318, "percentage": 27.72, "elapsed_time": "3:15:07", "remaining_time": "8:28:48"} +{"current_steps": 2407, "total_steps": 8680, "loss": 0.9255902767181396, "lr": 1.730802044891848e-06, "epoch": 0.5546082949308756, "percentage": 27.73, "elapsed_time": "3:15:12", "remaining_time": "8:28:45"} +{"current_steps": 2408, "total_steps": 8680, "loss": 0.7226976156234741, "lr": 1.7305419363145093e-06, "epoch": 0.5548387096774193, "percentage": 27.74, "elapsed_time": "3:15:17", "remaining_time": "8:28:39"} +{"current_steps": 2409, "total_steps": 8680, "loss": 0.9024704694747925, "lr": 1.7302817216999754e-06, "epoch": 0.5550691244239632, "percentage": 27.75, "elapsed_time": "3:15:20", "remaining_time": "8:28:31"} +{"current_steps": 2410, "total_steps": 8680, "loss": 0.7857767343521118, "lr": 1.7300214010860168e-06, "epoch": 0.5552995391705069, "percentage": 27.76, "elapsed_time": "3:15:25", "remaining_time": "8:28:24"} +{"current_steps": 2411, "total_steps": 8680, "loss": 0.7280064821243286, "lr": 1.7297609745104183e-06, "epoch": 0.5555299539170507, "percentage": 27.78, "elapsed_time": "3:15:30", "remaining_time": "8:28:21"} +{"current_steps": 2412, "total_steps": 8680, "loss": 0.8909369111061096, "lr": 1.72950044201098e-06, "epoch": 0.5557603686635945, "percentage": 27.79, "elapsed_time": "3:15:35", "remaining_time": "8:28:16"} +{"current_steps": 2413, "total_steps": 8680, "loss": 0.8543871641159058, "lr": 1.7292398036255183e-06, "epoch": 0.5559907834101383, "percentage": 27.8, "elapsed_time": "3:15:39", "remaining_time": "8:28:10"} +{"current_steps": 2414, "total_steps": 8680, "loss": 0.6934928894042969, "lr": 1.7289790593918648e-06, "epoch": 0.556221198156682, "percentage": 27.81, "elapsed_time": "3:15:44", "remaining_time": "8:28:03"} +{"current_steps": 2415, "total_steps": 8680, "loss": 0.6323058605194092, "lr": 1.7287182093478658e-06, "epoch": 0.5564516129032258, "percentage": 27.82, "elapsed_time": "3:15:49", "remaining_time": "8:28:01"} +{"current_steps": 2416, "total_steps": 8680, "loss": 0.8607437014579773, "lr": 1.7284572535313833e-06, "epoch": 0.5566820276497696, "percentage": 27.83, "elapsed_time": "3:15:54", "remaining_time": "8:27:54"} +{"current_steps": 2417, "total_steps": 8680, "loss": 0.932594358921051, "lr": 1.7281961919802948e-06, "epoch": 0.5569124423963133, "percentage": 27.85, "elapsed_time": "3:15:57", "remaining_time": "8:27:47"} +{"current_steps": 2418, "total_steps": 8680, "loss": 0.7239062786102295, "lr": 1.727935024732493e-06, "epoch": 0.5571428571428572, "percentage": 27.86, "elapsed_time": "3:16:02", "remaining_time": "8:27:42"} +{"current_steps": 2419, "total_steps": 8680, "loss": 0.7600498199462891, "lr": 1.727673751825886e-06, "epoch": 0.5573732718894009, "percentage": 27.87, "elapsed_time": "3:16:07", "remaining_time": "8:27:36"} +{"current_steps": 2420, "total_steps": 8680, "loss": 0.6888710260391235, "lr": 1.7274123732983977e-06, "epoch": 0.5576036866359447, "percentage": 27.88, "elapsed_time": "3:16:12", "remaining_time": "8:27:31"} +{"current_steps": 2421, "total_steps": 8680, "loss": 0.9768370389938354, "lr": 1.7271508891879657e-06, "epoch": 0.5578341013824885, "percentage": 27.89, "elapsed_time": "3:16:15", "remaining_time": "8:27:23"} +{"current_steps": 2422, "total_steps": 8680, "loss": 0.7302272319793701, "lr": 1.7268892995325453e-06, "epoch": 0.5580645161290323, "percentage": 27.9, "elapsed_time": "3:16:22", "remaining_time": "8:27:23"} +{"current_steps": 2423, "total_steps": 8680, "loss": 0.7664496898651123, "lr": 1.7266276043701052e-06, "epoch": 0.558294930875576, "percentage": 27.91, "elapsed_time": "3:16:27", "remaining_time": "8:27:18"} +{"current_steps": 2424, "total_steps": 8680, "loss": 0.7672723531723022, "lr": 1.72636580373863e-06, "epoch": 0.5585253456221199, "percentage": 27.93, "elapsed_time": "3:16:33", "remaining_time": "8:27:16"} +{"current_steps": 2425, "total_steps": 8680, "loss": 0.7467625141143799, "lr": 1.7261038976761203e-06, "epoch": 0.5587557603686636, "percentage": 27.94, "elapsed_time": "3:16:38", "remaining_time": "8:27:12"} +{"current_steps": 2426, "total_steps": 8680, "loss": 0.899692177772522, "lr": 1.7258418862205908e-06, "epoch": 0.5589861751152074, "percentage": 27.95, "elapsed_time": "3:16:43", "remaining_time": "8:27:07"} +{"current_steps": 2427, "total_steps": 8680, "loss": 0.9654138088226318, "lr": 1.7255797694100724e-06, "epoch": 0.5592165898617512, "percentage": 27.96, "elapsed_time": "3:16:47", "remaining_time": "8:27:00"} +{"current_steps": 2428, "total_steps": 8680, "loss": 0.8487396836280823, "lr": 1.725317547282611e-06, "epoch": 0.5594470046082949, "percentage": 27.97, "elapsed_time": "3:16:51", "remaining_time": "8:26:53"} +{"current_steps": 2429, "total_steps": 8680, "loss": 0.7785199284553528, "lr": 1.7250552198762682e-06, "epoch": 0.5596774193548387, "percentage": 27.98, "elapsed_time": "3:16:55", "remaining_time": "8:26:47"} +{"current_steps": 2430, "total_steps": 8680, "loss": 0.9243934750556946, "lr": 1.7247927872291198e-06, "epoch": 0.5599078341013825, "percentage": 28.0, "elapsed_time": "3:16:59", "remaining_time": "8:26:40"} +{"current_steps": 2431, "total_steps": 8680, "loss": 0.8674443960189819, "lr": 1.724530249379258e-06, "epoch": 0.5601382488479263, "percentage": 28.01, "elapsed_time": "3:17:04", "remaining_time": "8:26:36"} +{"current_steps": 2432, "total_steps": 8680, "loss": 0.8022270202636719, "lr": 1.7242676063647895e-06, "epoch": 0.56036866359447, "percentage": 28.02, "elapsed_time": "3:17:11", "remaining_time": "8:26:36"} +{"current_steps": 2433, "total_steps": 8680, "loss": 0.8696796894073486, "lr": 1.7240048582238367e-06, "epoch": 0.5605990783410139, "percentage": 28.03, "elapsed_time": "3:17:16", "remaining_time": "8:26:31"} +{"current_steps": 2434, "total_steps": 8680, "loss": 0.7752439975738525, "lr": 1.7237420049945374e-06, "epoch": 0.5608294930875576, "percentage": 28.04, "elapsed_time": "3:17:21", "remaining_time": "8:26:27"} +{"current_steps": 2435, "total_steps": 8680, "loss": 0.7660201787948608, "lr": 1.723479046715044e-06, "epoch": 0.5610599078341014, "percentage": 28.05, "elapsed_time": "3:17:24", "remaining_time": "8:26:18"} +{"current_steps": 2436, "total_steps": 8680, "loss": 0.9319918155670166, "lr": 1.7232159834235249e-06, "epoch": 0.5612903225806452, "percentage": 28.06, "elapsed_time": "3:17:30", "remaining_time": "8:26:14"} +{"current_steps": 2437, "total_steps": 8680, "loss": 0.8175421357154846, "lr": 1.722952815158163e-06, "epoch": 0.561520737327189, "percentage": 28.08, "elapsed_time": "3:17:34", "remaining_time": "8:26:07"} +{"current_steps": 2438, "total_steps": 8680, "loss": 0.6959598064422607, "lr": 1.7226895419571573e-06, "epoch": 0.5617511520737327, "percentage": 28.09, "elapsed_time": "3:17:41", "remaining_time": "8:26:07"} +{"current_steps": 2439, "total_steps": 8680, "loss": 0.8629111051559448, "lr": 1.722426163858721e-06, "epoch": 0.5619815668202764, "percentage": 28.1, "elapsed_time": "3:17:45", "remaining_time": "8:26:00"} +{"current_steps": 2440, "total_steps": 8680, "loss": 0.8222612142562866, "lr": 1.7221626809010833e-06, "epoch": 0.5622119815668203, "percentage": 28.11, "elapsed_time": "3:17:49", "remaining_time": "8:25:54"} +{"current_steps": 2441, "total_steps": 8680, "loss": 0.8329352140426636, "lr": 1.721899093122489e-06, "epoch": 0.562442396313364, "percentage": 28.12, "elapsed_time": "3:17:55", "remaining_time": "8:25:51"} +{"current_steps": 2442, "total_steps": 8680, "loss": 0.8777236938476562, "lr": 1.7216354005611966e-06, "epoch": 0.5626728110599079, "percentage": 28.13, "elapsed_time": "3:17:59", "remaining_time": "8:25:45"} +{"current_steps": 2443, "total_steps": 8680, "loss": 0.8487246036529541, "lr": 1.7213716032554814e-06, "epoch": 0.5629032258064516, "percentage": 28.15, "elapsed_time": "3:18:03", "remaining_time": "8:25:38"} +{"current_steps": 2444, "total_steps": 8680, "loss": 0.8429645299911499, "lr": 1.7211077012436327e-06, "epoch": 0.5631336405529954, "percentage": 28.16, "elapsed_time": "3:18:08", "remaining_time": "8:25:35"} +{"current_steps": 2445, "total_steps": 8680, "loss": 0.7683241367340088, "lr": 1.720843694563956e-06, "epoch": 0.5633640552995391, "percentage": 28.17, "elapsed_time": "3:18:14", "remaining_time": "8:25:32"} +{"current_steps": 2446, "total_steps": 8680, "loss": 0.8468153476715088, "lr": 1.7205795832547715e-06, "epoch": 0.563594470046083, "percentage": 28.18, "elapsed_time": "3:18:20", "remaining_time": "8:25:29"} +{"current_steps": 2447, "total_steps": 8680, "loss": 0.7957276105880737, "lr": 1.7203153673544136e-06, "epoch": 0.5638248847926267, "percentage": 28.19, "elapsed_time": "3:18:25", "remaining_time": "8:25:24"} +{"current_steps": 2448, "total_steps": 8680, "loss": 0.703586757183075, "lr": 1.7200510469012343e-06, "epoch": 0.5640552995391706, "percentage": 28.2, "elapsed_time": "3:18:30", "remaining_time": "8:25:20"} +{"current_steps": 2449, "total_steps": 8680, "loss": 0.8399583101272583, "lr": 1.7197866219335988e-06, "epoch": 0.5642857142857143, "percentage": 28.21, "elapsed_time": "3:18:35", "remaining_time": "8:25:16"} +{"current_steps": 2450, "total_steps": 8680, "loss": 0.8198127746582031, "lr": 1.7195220924898882e-06, "epoch": 0.5645161290322581, "percentage": 28.23, "elapsed_time": "3:18:39", "remaining_time": "8:25:09"} +{"current_steps": 2451, "total_steps": 8680, "loss": 0.8345620632171631, "lr": 1.7192574586084977e-06, "epoch": 0.5647465437788018, "percentage": 28.24, "elapsed_time": "3:18:44", "remaining_time": "8:25:04"} +{"current_steps": 2452, "total_steps": 8680, "loss": 0.7717207670211792, "lr": 1.71899272032784e-06, "epoch": 0.5649769585253456, "percentage": 28.25, "elapsed_time": "3:18:49", "remaining_time": "8:24:59"} +{"current_steps": 2453, "total_steps": 8680, "loss": 0.9178022146224976, "lr": 1.7187278776863402e-06, "epoch": 0.5652073732718894, "percentage": 28.26, "elapsed_time": "3:18:53", "remaining_time": "8:24:53"} +{"current_steps": 2454, "total_steps": 8680, "loss": 0.802221417427063, "lr": 1.7184629307224405e-06, "epoch": 0.5654377880184331, "percentage": 28.27, "elapsed_time": "3:18:58", "remaining_time": "8:24:49"} +{"current_steps": 2455, "total_steps": 8680, "loss": 0.8785420656204224, "lr": 1.718197879474598e-06, "epoch": 0.565668202764977, "percentage": 28.28, "elapsed_time": "3:19:03", "remaining_time": "8:24:44"} +{"current_steps": 2456, "total_steps": 8680, "loss": 0.866797924041748, "lr": 1.7179327239812835e-06, "epoch": 0.5658986175115207, "percentage": 28.29, "elapsed_time": "3:19:08", "remaining_time": "8:24:40"} +{"current_steps": 2457, "total_steps": 8680, "loss": 0.8483223915100098, "lr": 1.7176674642809848e-06, "epoch": 0.5661290322580645, "percentage": 28.31, "elapsed_time": "3:19:14", "remaining_time": "8:24:36"} +{"current_steps": 2458, "total_steps": 8680, "loss": 0.815066933631897, "lr": 1.7174021004122038e-06, "epoch": 0.5663594470046083, "percentage": 28.32, "elapsed_time": "3:19:19", "remaining_time": "8:24:34"} +{"current_steps": 2459, "total_steps": 8680, "loss": 0.8584767580032349, "lr": 1.7171366324134575e-06, "epoch": 0.5665898617511521, "percentage": 28.33, "elapsed_time": "3:19:24", "remaining_time": "8:24:27"} +{"current_steps": 2460, "total_steps": 8680, "loss": 0.8710953593254089, "lr": 1.7168710603232783e-06, "epoch": 0.5668202764976958, "percentage": 28.34, "elapsed_time": "3:19:28", "remaining_time": "8:24:21"} +{"current_steps": 2461, "total_steps": 8680, "loss": 0.8174586892127991, "lr": 1.7166053841802137e-06, "epoch": 0.5670506912442397, "percentage": 28.35, "elapsed_time": "3:19:32", "remaining_time": "8:24:15"} +{"current_steps": 2462, "total_steps": 8680, "loss": 0.7240795493125916, "lr": 1.7163396040228263e-06, "epoch": 0.5672811059907834, "percentage": 28.36, "elapsed_time": "3:19:38", "remaining_time": "8:24:11"} +{"current_steps": 2463, "total_steps": 8680, "loss": 0.8026313781738281, "lr": 1.7160737198896938e-06, "epoch": 0.5675115207373271, "percentage": 28.38, "elapsed_time": "3:19:41", "remaining_time": "8:24:03"} +{"current_steps": 2464, "total_steps": 8680, "loss": 0.8170863389968872, "lr": 1.7158077318194088e-06, "epoch": 0.567741935483871, "percentage": 28.39, "elapsed_time": "3:19:46", "remaining_time": "8:23:59"} +{"current_steps": 2465, "total_steps": 8680, "loss": 0.7524861097335815, "lr": 1.7155416398505794e-06, "epoch": 0.5679723502304147, "percentage": 28.4, "elapsed_time": "3:19:52", "remaining_time": "8:23:56"} +{"current_steps": 2466, "total_steps": 8680, "loss": 0.9895739555358887, "lr": 1.7152754440218278e-06, "epoch": 0.5682027649769585, "percentage": 28.41, "elapsed_time": "3:19:57", "remaining_time": "8:23:53"} +{"current_steps": 2467, "total_steps": 8680, "loss": 0.840786874294281, "lr": 1.7150091443717924e-06, "epoch": 0.5684331797235023, "percentage": 28.42, "elapsed_time": "3:20:05", "remaining_time": "8:23:54"} +{"current_steps": 2468, "total_steps": 8680, "loss": 0.8896929025650024, "lr": 1.7147427409391265e-06, "epoch": 0.5686635944700461, "percentage": 28.43, "elapsed_time": "3:20:10", "remaining_time": "8:23:50"} +{"current_steps": 2469, "total_steps": 8680, "loss": 0.9940589666366577, "lr": 1.714476233762498e-06, "epoch": 0.5688940092165898, "percentage": 28.44, "elapsed_time": "3:20:15", "remaining_time": "8:23:45"} +{"current_steps": 2470, "total_steps": 8680, "loss": 0.8827046155929565, "lr": 1.7142096228805896e-06, "epoch": 0.5691244239631337, "percentage": 28.46, "elapsed_time": "3:20:20", "remaining_time": "8:23:41"} +{"current_steps": 2471, "total_steps": 8680, "loss": 0.8402417302131653, "lr": 1.7139429083321003e-06, "epoch": 0.5693548387096774, "percentage": 28.47, "elapsed_time": "3:20:25", "remaining_time": "8:23:36"} +{"current_steps": 2472, "total_steps": 8680, "loss": 0.9298208951950073, "lr": 1.7136760901557428e-06, "epoch": 0.5695852534562212, "percentage": 28.48, "elapsed_time": "3:20:29", "remaining_time": "8:23:30"} +{"current_steps": 2473, "total_steps": 8680, "loss": 0.7272841930389404, "lr": 1.7134091683902456e-06, "epoch": 0.569815668202765, "percentage": 28.49, "elapsed_time": "3:20:34", "remaining_time": "8:23:25"} +{"current_steps": 2474, "total_steps": 8680, "loss": 0.7767274379730225, "lr": 1.7131421430743522e-06, "epoch": 0.5700460829493088, "percentage": 28.5, "elapsed_time": "3:20:39", "remaining_time": "8:23:20"} +{"current_steps": 2475, "total_steps": 8680, "loss": 0.8381883502006531, "lr": 1.7128750142468205e-06, "epoch": 0.5702764976958525, "percentage": 28.51, "elapsed_time": "3:20:43", "remaining_time": "8:23:12"} +{"current_steps": 2476, "total_steps": 8680, "loss": 0.6917109489440918, "lr": 1.7126077819464247e-06, "epoch": 0.5705069124423963, "percentage": 28.53, "elapsed_time": "3:20:48", "remaining_time": "8:23:10"} +{"current_steps": 2477, "total_steps": 8680, "loss": 0.848122239112854, "lr": 1.712340446211952e-06, "epoch": 0.5707373271889401, "percentage": 28.54, "elapsed_time": "3:20:53", "remaining_time": "8:23:04"} +{"current_steps": 2478, "total_steps": 8680, "loss": 0.7880194187164307, "lr": 1.7120730070822074e-06, "epoch": 0.5709677419354838, "percentage": 28.55, "elapsed_time": "3:20:59", "remaining_time": "8:23:01"} +{"current_steps": 2479, "total_steps": 8680, "loss": 0.8782297372817993, "lr": 1.7118054645960077e-06, "epoch": 0.5711981566820277, "percentage": 28.56, "elapsed_time": "3:21:04", "remaining_time": "8:22:57"} +{"current_steps": 2480, "total_steps": 8680, "loss": 0.9030005931854248, "lr": 1.7115378187921876e-06, "epoch": 0.5714285714285714, "percentage": 28.57, "elapsed_time": "3:21:09", "remaining_time": "8:22:52"} +{"current_steps": 2481, "total_steps": 8680, "loss": 0.9950683116912842, "lr": 1.7112700697095953e-06, "epoch": 0.5716589861751152, "percentage": 28.58, "elapsed_time": "3:21:13", "remaining_time": "8:22:46"} +{"current_steps": 2482, "total_steps": 8680, "loss": 0.8825187683105469, "lr": 1.7110022173870933e-06, "epoch": 0.571889400921659, "percentage": 28.59, "elapsed_time": "3:21:17", "remaining_time": "8:22:39"} +{"current_steps": 2483, "total_steps": 8680, "loss": 0.7918775081634521, "lr": 1.710734261863561e-06, "epoch": 0.5721198156682028, "percentage": 28.61, "elapsed_time": "3:21:22", "remaining_time": "8:22:36"} +{"current_steps": 2484, "total_steps": 8680, "loss": 1.0219467878341675, "lr": 1.7104662031778916e-06, "epoch": 0.5723502304147465, "percentage": 28.62, "elapsed_time": "3:21:27", "remaining_time": "8:22:30"} +{"current_steps": 2485, "total_steps": 8680, "loss": 0.7633316516876221, "lr": 1.7101980413689931e-06, "epoch": 0.5725806451612904, "percentage": 28.63, "elapsed_time": "3:21:33", "remaining_time": "8:22:28"} +{"current_steps": 2486, "total_steps": 8680, "loss": 0.8972171545028687, "lr": 1.7099297764757891e-06, "epoch": 0.5728110599078341, "percentage": 28.64, "elapsed_time": "3:21:36", "remaining_time": "8:22:20"} +{"current_steps": 2487, "total_steps": 8680, "loss": 0.9467268586158752, "lr": 1.7096614085372183e-06, "epoch": 0.5730414746543778, "percentage": 28.65, "elapsed_time": "3:21:41", "remaining_time": "8:22:14"} +{"current_steps": 2488, "total_steps": 8680, "loss": 0.7688668370246887, "lr": 1.709392937592233e-06, "epoch": 0.5732718894009217, "percentage": 28.66, "elapsed_time": "3:21:45", "remaining_time": "8:22:07"} +{"current_steps": 2489, "total_steps": 8680, "loss": 0.8521163463592529, "lr": 1.7091243636798022e-06, "epoch": 0.5735023041474654, "percentage": 28.68, "elapsed_time": "3:21:50", "remaining_time": "8:22:01"} +{"current_steps": 2490, "total_steps": 8680, "loss": 0.937403678894043, "lr": 1.7088556868389087e-06, "epoch": 0.5737327188940092, "percentage": 28.69, "elapsed_time": "3:21:54", "remaining_time": "8:21:57"} +{"current_steps": 2491, "total_steps": 8680, "loss": 0.929175853729248, "lr": 1.7085869071085507e-06, "epoch": 0.573963133640553, "percentage": 28.7, "elapsed_time": "3:21:59", "remaining_time": "8:21:50"} +{"current_steps": 2492, "total_steps": 8680, "loss": 0.8213154673576355, "lr": 1.708318024527741e-06, "epoch": 0.5741935483870968, "percentage": 28.71, "elapsed_time": "3:22:03", "remaining_time": "8:21:43"} +{"current_steps": 2493, "total_steps": 8680, "loss": 0.7666962146759033, "lr": 1.708049039135508e-06, "epoch": 0.5744239631336405, "percentage": 28.72, "elapsed_time": "3:22:08", "remaining_time": "8:21:40"} +{"current_steps": 2494, "total_steps": 8680, "loss": 0.9787846803665161, "lr": 1.707779950970894e-06, "epoch": 0.5746543778801844, "percentage": 28.73, "elapsed_time": "3:22:12", "remaining_time": "8:21:32"} +{"current_steps": 2495, "total_steps": 8680, "loss": 0.9688804149627686, "lr": 1.7075107600729575e-06, "epoch": 0.5748847926267281, "percentage": 28.74, "elapsed_time": "3:22:15", "remaining_time": "8:21:24"} +{"current_steps": 2496, "total_steps": 8680, "loss": 0.7186019420623779, "lr": 1.7072414664807706e-06, "epoch": 0.5751152073732719, "percentage": 28.76, "elapsed_time": "3:22:21", "remaining_time": "8:21:20"} +{"current_steps": 2497, "total_steps": 8680, "loss": 0.814068615436554, "lr": 1.706972070233421e-06, "epoch": 0.5753456221198157, "percentage": 28.77, "elapsed_time": "3:22:25", "remaining_time": "8:21:13"} +{"current_steps": 2498, "total_steps": 8680, "loss": 0.8439940214157104, "lr": 1.7067025713700111e-06, "epoch": 0.5755760368663595, "percentage": 28.78, "elapsed_time": "3:22:30", "remaining_time": "8:21:09"} +{"current_steps": 2499, "total_steps": 8680, "loss": 1.0199556350708008, "lr": 1.706432969929659e-06, "epoch": 0.5758064516129032, "percentage": 28.79, "elapsed_time": "3:22:34", "remaining_time": "8:21:03"} +{"current_steps": 2500, "total_steps": 8680, "loss": 0.9422338008880615, "lr": 1.7061632659514964e-06, "epoch": 0.576036866359447, "percentage": 28.8, "elapsed_time": "3:22:40", "remaining_time": "8:21:00"} +{"current_steps": 2501, "total_steps": 8680, "loss": 0.9307081699371338, "lr": 1.7058934594746704e-06, "epoch": 0.5762672811059908, "percentage": 28.81, "elapsed_time": "3:22:47", "remaining_time": "8:21:01"} +{"current_steps": 2502, "total_steps": 8680, "loss": 0.7202768325805664, "lr": 1.7056235505383433e-06, "epoch": 0.5764976958525345, "percentage": 28.82, "elapsed_time": "3:22:53", "remaining_time": "8:20:58"} +{"current_steps": 2503, "total_steps": 8680, "loss": 1.0184223651885986, "lr": 1.7053535391816923e-06, "epoch": 0.5767281105990784, "percentage": 28.84, "elapsed_time": "3:22:57", "remaining_time": "8:20:52"} +{"current_steps": 2504, "total_steps": 8680, "loss": 0.7957574129104614, "lr": 1.7050834254439085e-06, "epoch": 0.5769585253456221, "percentage": 28.85, "elapsed_time": "3:23:01", "remaining_time": "8:20:45"} +{"current_steps": 2505, "total_steps": 8680, "loss": 0.9694541096687317, "lr": 1.7048132093641989e-06, "epoch": 0.5771889400921659, "percentage": 28.86, "elapsed_time": "3:23:05", "remaining_time": "8:20:37"} +{"current_steps": 2506, "total_steps": 8680, "loss": 0.8427075147628784, "lr": 1.704542890981785e-06, "epoch": 0.5774193548387097, "percentage": 28.87, "elapsed_time": "3:23:09", "remaining_time": "8:20:31"} +{"current_steps": 2507, "total_steps": 8680, "loss": 0.7745763063430786, "lr": 1.7042724703359032e-06, "epoch": 0.5776497695852535, "percentage": 28.88, "elapsed_time": "3:23:15", "remaining_time": "8:20:30"} +{"current_steps": 2508, "total_steps": 8680, "loss": 0.8179641962051392, "lr": 1.7040019474658047e-06, "epoch": 0.5778801843317972, "percentage": 28.89, "elapsed_time": "3:23:20", "remaining_time": "8:20:23"} +{"current_steps": 2509, "total_steps": 8680, "loss": 0.8118200302124023, "lr": 1.7037313224107557e-06, "epoch": 0.5781105990783411, "percentage": 28.91, "elapsed_time": "3:23:24", "remaining_time": "8:20:16"} +{"current_steps": 2510, "total_steps": 8680, "loss": 0.7892665863037109, "lr": 1.7034605952100364e-06, "epoch": 0.5783410138248848, "percentage": 28.92, "elapsed_time": "3:23:29", "remaining_time": "8:20:12"} +{"current_steps": 2511, "total_steps": 8680, "loss": 0.7442026734352112, "lr": 1.7031897659029434e-06, "epoch": 0.5785714285714286, "percentage": 28.93, "elapsed_time": "3:23:33", "remaining_time": "8:20:07"} +{"current_steps": 2512, "total_steps": 8680, "loss": 0.8179585933685303, "lr": 1.7029188345287865e-06, "epoch": 0.5788018433179724, "percentage": 28.94, "elapsed_time": "3:23:40", "remaining_time": "8:20:06"} +{"current_steps": 2513, "total_steps": 8680, "loss": 0.7569797039031982, "lr": 1.7026478011268918e-06, "epoch": 0.5790322580645161, "percentage": 28.95, "elapsed_time": "3:23:45", "remaining_time": "8:20:01"} +{"current_steps": 2514, "total_steps": 8680, "loss": 0.8464581966400146, "lr": 1.7023766657365984e-06, "epoch": 0.5792626728110599, "percentage": 28.96, "elapsed_time": "3:23:50", "remaining_time": "8:19:57"} +{"current_steps": 2515, "total_steps": 8680, "loss": 0.7326645255088806, "lr": 1.702105428397262e-06, "epoch": 0.5794930875576036, "percentage": 28.97, "elapsed_time": "3:23:54", "remaining_time": "8:19:50"} +{"current_steps": 2516, "total_steps": 8680, "loss": 0.7993732690811157, "lr": 1.7018340891482522e-06, "epoch": 0.5797235023041475, "percentage": 28.99, "elapsed_time": "3:23:59", "remaining_time": "8:19:44"} +{"current_steps": 2517, "total_steps": 8680, "loss": 0.8124513626098633, "lr": 1.7015626480289532e-06, "epoch": 0.5799539170506912, "percentage": 29.0, "elapsed_time": "3:24:04", "remaining_time": "8:19:41"} +{"current_steps": 2518, "total_steps": 8680, "loss": 0.9075840711593628, "lr": 1.701291105078765e-06, "epoch": 0.580184331797235, "percentage": 29.01, "elapsed_time": "3:24:09", "remaining_time": "8:19:36"} +{"current_steps": 2519, "total_steps": 8680, "loss": 0.8212069272994995, "lr": 1.7010194603371009e-06, "epoch": 0.5804147465437788, "percentage": 29.02, "elapsed_time": "3:24:13", "remaining_time": "8:19:30"} +{"current_steps": 2520, "total_steps": 8680, "loss": 0.7582074999809265, "lr": 1.7007477138433903e-06, "epoch": 0.5806451612903226, "percentage": 29.03, "elapsed_time": "3:24:19", "remaining_time": "8:19:28"} +{"current_steps": 2521, "total_steps": 8680, "loss": 0.8917636871337891, "lr": 1.7004758656370769e-06, "epoch": 0.5808755760368663, "percentage": 29.04, "elapsed_time": "3:24:25", "remaining_time": "8:19:26"} +{"current_steps": 2522, "total_steps": 8680, "loss": 0.8919704556465149, "lr": 1.7002039157576186e-06, "epoch": 0.5811059907834102, "percentage": 29.06, "elapsed_time": "3:24:29", "remaining_time": "8:19:18"} +{"current_steps": 2523, "total_steps": 8680, "loss": 0.7474988698959351, "lr": 1.699931864244489e-06, "epoch": 0.5813364055299539, "percentage": 29.07, "elapsed_time": "3:24:34", "remaining_time": "8:19:13"} +{"current_steps": 2524, "total_steps": 8680, "loss": 0.8596241474151611, "lr": 1.6996597111371758e-06, "epoch": 0.5815668202764976, "percentage": 29.08, "elapsed_time": "3:24:38", "remaining_time": "8:19:06"} +{"current_steps": 2525, "total_steps": 8680, "loss": 0.9316335916519165, "lr": 1.699387456475182e-06, "epoch": 0.5817972350230415, "percentage": 29.09, "elapsed_time": "3:24:42", "remaining_time": "8:19:00"} +{"current_steps": 2526, "total_steps": 8680, "loss": 0.7364813089370728, "lr": 1.6991151002980248e-06, "epoch": 0.5820276497695852, "percentage": 29.1, "elapsed_time": "3:24:47", "remaining_time": "8:18:55"} +{"current_steps": 2527, "total_steps": 8680, "loss": 0.789472758769989, "lr": 1.698842642645236e-06, "epoch": 0.582258064516129, "percentage": 29.11, "elapsed_time": "3:24:52", "remaining_time": "8:18:52"} +{"current_steps": 2528, "total_steps": 8680, "loss": 1.024861216545105, "lr": 1.6985700835563627e-06, "epoch": 0.5824884792626728, "percentage": 29.12, "elapsed_time": "3:24:57", "remaining_time": "8:18:46"} +{"current_steps": 2529, "total_steps": 8680, "loss": 0.8465025424957275, "lr": 1.6982974230709667e-06, "epoch": 0.5827188940092166, "percentage": 29.14, "elapsed_time": "3:25:01", "remaining_time": "8:18:39"} +{"current_steps": 2530, "total_steps": 8680, "loss": 0.7502799034118652, "lr": 1.6980246612286244e-06, "epoch": 0.5829493087557603, "percentage": 29.15, "elapsed_time": "3:25:04", "remaining_time": "8:18:31"} +{"current_steps": 2531, "total_steps": 8680, "loss": 0.8019870519638062, "lr": 1.6977517980689264e-06, "epoch": 0.5831797235023042, "percentage": 29.16, "elapsed_time": "3:25:09", "remaining_time": "8:18:24"} +{"current_steps": 2532, "total_steps": 8680, "loss": 0.9048774242401123, "lr": 1.6974788336314788e-06, "epoch": 0.5834101382488479, "percentage": 29.17, "elapsed_time": "3:25:13", "remaining_time": "8:18:18"} +{"current_steps": 2533, "total_steps": 8680, "loss": 0.8411067724227905, "lr": 1.6972057679559018e-06, "epoch": 0.5836405529953917, "percentage": 29.18, "elapsed_time": "3:25:18", "remaining_time": "8:18:13"} +{"current_steps": 2534, "total_steps": 8680, "loss": 0.7399133443832397, "lr": 1.6969326010818304e-06, "epoch": 0.5838709677419355, "percentage": 29.19, "elapsed_time": "3:25:23", "remaining_time": "8:18:10"} +{"current_steps": 2535, "total_steps": 8680, "loss": 0.7553995847702026, "lr": 1.6966593330489144e-06, "epoch": 0.5841013824884793, "percentage": 29.21, "elapsed_time": "3:25:28", "remaining_time": "8:18:05"} +{"current_steps": 2536, "total_steps": 8680, "loss": 0.8405054807662964, "lr": 1.6963859638968188e-06, "epoch": 0.584331797235023, "percentage": 29.22, "elapsed_time": "3:25:35", "remaining_time": "8:18:05"} +{"current_steps": 2537, "total_steps": 8680, "loss": 0.7619640231132507, "lr": 1.6961124936652223e-06, "epoch": 0.5845622119815668, "percentage": 29.23, "elapsed_time": "3:25:41", "remaining_time": "8:18:03"} +{"current_steps": 2538, "total_steps": 8680, "loss": 0.7785576581954956, "lr": 1.6958389223938187e-06, "epoch": 0.5847926267281106, "percentage": 29.24, "elapsed_time": "3:25:46", "remaining_time": "8:17:58"} +{"current_steps": 2539, "total_steps": 8680, "loss": 0.9230754375457764, "lr": 1.695565250122317e-06, "epoch": 0.5850230414746543, "percentage": 29.25, "elapsed_time": "3:25:50", "remaining_time": "8:17:52"} +{"current_steps": 2540, "total_steps": 8680, "loss": 0.798599362373352, "lr": 1.69529147689044e-06, "epoch": 0.5852534562211982, "percentage": 29.26, "elapsed_time": "3:25:54", "remaining_time": "8:17:45"} +{"current_steps": 2541, "total_steps": 8680, "loss": 0.8491491079330444, "lr": 1.6950176027379253e-06, "epoch": 0.5854838709677419, "percentage": 29.27, "elapsed_time": "3:26:00", "remaining_time": "8:17:43"} +{"current_steps": 2542, "total_steps": 8680, "loss": 0.7906054854393005, "lr": 1.694743627704526e-06, "epoch": 0.5857142857142857, "percentage": 29.29, "elapsed_time": "3:26:05", "remaining_time": "8:17:38"} +{"current_steps": 2543, "total_steps": 8680, "loss": 0.8178746700286865, "lr": 1.6944695518300084e-06, "epoch": 0.5859447004608295, "percentage": 29.3, "elapsed_time": "3:26:10", "remaining_time": "8:17:33"} +{"current_steps": 2544, "total_steps": 8680, "loss": 0.867972731590271, "lr": 1.6941953751541552e-06, "epoch": 0.5861751152073733, "percentage": 29.31, "elapsed_time": "3:26:14", "remaining_time": "8:17:25"} +{"current_steps": 2545, "total_steps": 8680, "loss": 0.8000613451004028, "lr": 1.6939210977167622e-06, "epoch": 0.586405529953917, "percentage": 29.32, "elapsed_time": "3:26:18", "remaining_time": "8:17:19"} +{"current_steps": 2546, "total_steps": 8680, "loss": 0.8473562002182007, "lr": 1.6936467195576403e-06, "epoch": 0.5866359447004609, "percentage": 29.33, "elapsed_time": "3:26:22", "remaining_time": "8:17:13"} +{"current_steps": 2547, "total_steps": 8680, "loss": 0.971686065196991, "lr": 1.6933722407166156e-06, "epoch": 0.5868663594470046, "percentage": 29.34, "elapsed_time": "3:26:26", "remaining_time": "8:17:05"} +{"current_steps": 2548, "total_steps": 8680, "loss": 0.6679604053497314, "lr": 1.6930976612335276e-06, "epoch": 0.5870967741935483, "percentage": 29.35, "elapsed_time": "3:26:31", "remaining_time": "8:17:01"} +{"current_steps": 2549, "total_steps": 8680, "loss": 0.81952303647995, "lr": 1.692822981148232e-06, "epoch": 0.5873271889400922, "percentage": 29.37, "elapsed_time": "3:26:35", "remaining_time": "8:16:54"} +{"current_steps": 2550, "total_steps": 8680, "loss": 0.8711779713630676, "lr": 1.6925482005005978e-06, "epoch": 0.5875576036866359, "percentage": 29.38, "elapsed_time": "3:26:39", "remaining_time": "8:16:47"} +{"current_steps": 2551, "total_steps": 8680, "loss": 0.930451512336731, "lr": 1.6922733193305093e-06, "epoch": 0.5877880184331797, "percentage": 29.39, "elapsed_time": "3:26:43", "remaining_time": "8:16:41"} +{"current_steps": 2552, "total_steps": 8680, "loss": 0.8435598611831665, "lr": 1.6919983376778647e-06, "epoch": 0.5880184331797235, "percentage": 29.4, "elapsed_time": "3:26:47", "remaining_time": "8:16:34"} +{"current_steps": 2553, "total_steps": 8680, "loss": 0.8868621587753296, "lr": 1.6917232555825774e-06, "epoch": 0.5882488479262673, "percentage": 29.41, "elapsed_time": "3:26:52", "remaining_time": "8:16:28"} +{"current_steps": 2554, "total_steps": 8680, "loss": 0.6821786165237427, "lr": 1.6914480730845752e-06, "epoch": 0.588479262672811, "percentage": 29.42, "elapsed_time": "3:26:58", "remaining_time": "8:16:27"} +{"current_steps": 2555, "total_steps": 8680, "loss": 0.7241402864456177, "lr": 1.691172790223801e-06, "epoch": 0.5887096774193549, "percentage": 29.44, "elapsed_time": "3:27:04", "remaining_time": "8:16:23"} +{"current_steps": 2556, "total_steps": 8680, "loss": 0.7477490305900574, "lr": 1.690897407040211e-06, "epoch": 0.5889400921658986, "percentage": 29.45, "elapsed_time": "3:27:08", "remaining_time": "8:16:18"} +{"current_steps": 2557, "total_steps": 8680, "loss": 0.7881484031677246, "lr": 1.690621923573777e-06, "epoch": 0.5891705069124424, "percentage": 29.46, "elapsed_time": "3:27:14", "remaining_time": "8:16:15"} +{"current_steps": 2558, "total_steps": 8680, "loss": 0.8292979001998901, "lr": 1.6903463398644848e-06, "epoch": 0.5894009216589862, "percentage": 29.47, "elapsed_time": "3:27:20", "remaining_time": "8:16:14"} +{"current_steps": 2559, "total_steps": 8680, "loss": 0.7068917751312256, "lr": 1.690070655952336e-06, "epoch": 0.58963133640553, "percentage": 29.48, "elapsed_time": "3:27:25", "remaining_time": "8:16:09"} +{"current_steps": 2560, "total_steps": 8680, "loss": 0.8907356262207031, "lr": 1.6897948718773443e-06, "epoch": 0.5898617511520737, "percentage": 29.49, "elapsed_time": "3:27:30", "remaining_time": "8:16:04"} +{"current_steps": 2561, "total_steps": 8680, "loss": 0.7762824892997742, "lr": 1.6895189876795405e-06, "epoch": 0.5900921658986175, "percentage": 29.5, "elapsed_time": "3:27:34", "remaining_time": "8:15:57"} +{"current_steps": 2562, "total_steps": 8680, "loss": 0.9682759046554565, "lr": 1.6892430033989685e-06, "epoch": 0.5903225806451613, "percentage": 29.52, "elapsed_time": "3:27:38", "remaining_time": "8:15:50"} +{"current_steps": 2563, "total_steps": 8680, "loss": 0.7594735622406006, "lr": 1.6889669190756866e-06, "epoch": 0.590552995391705, "percentage": 29.53, "elapsed_time": "3:27:42", "remaining_time": "8:15:42"} +{"current_steps": 2564, "total_steps": 8680, "loss": 0.8161605000495911, "lr": 1.6886907347497687e-06, "epoch": 0.5907834101382489, "percentage": 29.54, "elapsed_time": "3:27:48", "remaining_time": "8:15:42"} +{"current_steps": 2565, "total_steps": 8680, "loss": 0.9390331506729126, "lr": 1.6884144504613023e-06, "epoch": 0.5910138248847926, "percentage": 29.55, "elapsed_time": "3:27:53", "remaining_time": "8:15:36"} +{"current_steps": 2566, "total_steps": 8680, "loss": 0.8895832300186157, "lr": 1.68813806625039e-06, "epoch": 0.5912442396313364, "percentage": 29.56, "elapsed_time": "3:27:58", "remaining_time": "8:15:32"} +{"current_steps": 2567, "total_steps": 8680, "loss": 0.7779919505119324, "lr": 1.687861582157148e-06, "epoch": 0.5914746543778802, "percentage": 29.57, "elapsed_time": "3:28:04", "remaining_time": "8:15:29"} +{"current_steps": 2568, "total_steps": 8680, "loss": 0.8974252343177795, "lr": 1.687584998221708e-06, "epoch": 0.591705069124424, "percentage": 29.59, "elapsed_time": "3:28:08", "remaining_time": "8:15:22"} +{"current_steps": 2569, "total_steps": 8680, "loss": 0.8487393856048584, "lr": 1.687308314484216e-06, "epoch": 0.5919354838709677, "percentage": 29.6, "elapsed_time": "3:28:12", "remaining_time": "8:15:17"} +{"current_steps": 2570, "total_steps": 8680, "loss": 0.8356295824050903, "lr": 1.6870315309848318e-06, "epoch": 0.5921658986175116, "percentage": 29.61, "elapsed_time": "3:28:18", "remaining_time": "8:15:13"} +{"current_steps": 2571, "total_steps": 8680, "loss": 0.8180248737335205, "lr": 1.6867546477637307e-06, "epoch": 0.5923963133640553, "percentage": 29.62, "elapsed_time": "3:28:23", "remaining_time": "8:15:10"} +{"current_steps": 2572, "total_steps": 8680, "loss": 0.8456830978393555, "lr": 1.6864776648611013e-06, "epoch": 0.5926267281105991, "percentage": 29.63, "elapsed_time": "3:28:27", "remaining_time": "8:15:03"} +{"current_steps": 2573, "total_steps": 8680, "loss": 0.8378905057907104, "lr": 1.6862005823171476e-06, "epoch": 0.5928571428571429, "percentage": 29.64, "elapsed_time": "3:28:33", "remaining_time": "8:15:01"} +{"current_steps": 2574, "total_steps": 8680, "loss": 0.8060408234596252, "lr": 1.685923400172088e-06, "epoch": 0.5930875576036866, "percentage": 29.65, "elapsed_time": "3:28:38", "remaining_time": "8:14:57"} +{"current_steps": 2575, "total_steps": 8680, "loss": 0.7550709247589111, "lr": 1.685646118466155e-06, "epoch": 0.5933179723502304, "percentage": 29.67, "elapsed_time": "3:28:43", "remaining_time": "8:14:52"} +{"current_steps": 2576, "total_steps": 8680, "loss": 0.8475208282470703, "lr": 1.6853687372395955e-06, "epoch": 0.5935483870967742, "percentage": 29.68, "elapsed_time": "3:28:49", "remaining_time": "8:14:48"} +{"current_steps": 2577, "total_steps": 8680, "loss": 0.8681533336639404, "lr": 1.6850912565326709e-06, "epoch": 0.593778801843318, "percentage": 29.69, "elapsed_time": "3:28:53", "remaining_time": "8:14:43"} +{"current_steps": 2578, "total_steps": 8680, "loss": 0.7756578922271729, "lr": 1.6848136763856573e-06, "epoch": 0.5940092165898617, "percentage": 29.7, "elapsed_time": "3:28:59", "remaining_time": "8:14:40"} +{"current_steps": 2579, "total_steps": 8680, "loss": 0.8910564184188843, "lr": 1.6845359968388456e-06, "epoch": 0.5942396313364056, "percentage": 29.71, "elapsed_time": "3:29:03", "remaining_time": "8:14:33"} +{"current_steps": 2580, "total_steps": 8680, "loss": 0.7293382883071899, "lr": 1.6842582179325397e-06, "epoch": 0.5944700460829493, "percentage": 29.72, "elapsed_time": "3:29:10", "remaining_time": "8:14:33"} +{"current_steps": 2581, "total_steps": 8680, "loss": 0.8497427105903625, "lr": 1.6839803397070597e-06, "epoch": 0.5947004608294931, "percentage": 29.74, "elapsed_time": "3:29:16", "remaining_time": "8:14:31"} +{"current_steps": 2582, "total_steps": 8680, "loss": 0.800891637802124, "lr": 1.6837023622027386e-06, "epoch": 0.5949308755760369, "percentage": 29.75, "elapsed_time": "3:29:21", "remaining_time": "8:14:27"} +{"current_steps": 2583, "total_steps": 8680, "loss": 0.889703631401062, "lr": 1.683424285459925e-06, "epoch": 0.5951612903225807, "percentage": 29.76, "elapsed_time": "3:29:25", "remaining_time": "8:14:20"} +{"current_steps": 2584, "total_steps": 8680, "loss": 0.7500913143157959, "lr": 1.6831461095189808e-06, "epoch": 0.5953917050691244, "percentage": 29.77, "elapsed_time": "3:29:30", "remaining_time": "8:14:16"} +{"current_steps": 2585, "total_steps": 8680, "loss": 0.8575263023376465, "lr": 1.6828678344202834e-06, "epoch": 0.5956221198156681, "percentage": 29.78, "elapsed_time": "3:29:36", "remaining_time": "8:14:12"} +{"current_steps": 2586, "total_steps": 8680, "loss": 0.7754372358322144, "lr": 1.6825894602042238e-06, "epoch": 0.595852534562212, "percentage": 29.79, "elapsed_time": "3:29:41", "remaining_time": "8:14:07"} +{"current_steps": 2587, "total_steps": 8680, "loss": 0.8861502408981323, "lr": 1.6823109869112074e-06, "epoch": 0.5960829493087557, "percentage": 29.8, "elapsed_time": "3:29:46", "remaining_time": "8:14:03"} +{"current_steps": 2588, "total_steps": 8680, "loss": 0.725920557975769, "lr": 1.6820324145816548e-06, "epoch": 0.5963133640552996, "percentage": 29.82, "elapsed_time": "3:29:51", "remaining_time": "8:13:58"} +{"current_steps": 2589, "total_steps": 8680, "loss": 0.6195499897003174, "lr": 1.6817537432559998e-06, "epoch": 0.5965437788018433, "percentage": 29.83, "elapsed_time": "3:29:57", "remaining_time": "8:13:56"} +{"current_steps": 2590, "total_steps": 8680, "loss": 0.8757472038269043, "lr": 1.6814749729746918e-06, "epoch": 0.5967741935483871, "percentage": 29.84, "elapsed_time": "3:30:02", "remaining_time": "8:13:52"} +{"current_steps": 2591, "total_steps": 8680, "loss": 0.8024059534072876, "lr": 1.6811961037781934e-06, "epoch": 0.5970046082949308, "percentage": 29.85, "elapsed_time": "3:30:06", "remaining_time": "8:13:47"} +{"current_steps": 2592, "total_steps": 8680, "loss": 0.8397082090377808, "lr": 1.6809171357069825e-06, "epoch": 0.5972350230414747, "percentage": 29.86, "elapsed_time": "3:30:12", "remaining_time": "8:13:44"} +{"current_steps": 2593, "total_steps": 8680, "loss": 0.7693872451782227, "lr": 1.6806380688015507e-06, "epoch": 0.5974654377880184, "percentage": 29.87, "elapsed_time": "3:30:17", "remaining_time": "8:13:38"} +{"current_steps": 2594, "total_steps": 8680, "loss": 0.7918043732643127, "lr": 1.6803589031024043e-06, "epoch": 0.5976958525345623, "percentage": 29.88, "elapsed_time": "3:30:22", "remaining_time": "8:13:33"} +{"current_steps": 2595, "total_steps": 8680, "loss": 0.8046969175338745, "lr": 1.680079638650064e-06, "epoch": 0.597926267281106, "percentage": 29.9, "elapsed_time": "3:30:28", "remaining_time": "8:13:32"} +{"current_steps": 2596, "total_steps": 8680, "loss": 0.7889789938926697, "lr": 1.6798002754850643e-06, "epoch": 0.5981566820276498, "percentage": 29.91, "elapsed_time": "3:30:33", "remaining_time": "8:13:27"} +{"current_steps": 2597, "total_steps": 8680, "loss": 0.874780535697937, "lr": 1.6795208136479543e-06, "epoch": 0.5983870967741935, "percentage": 29.92, "elapsed_time": "3:30:38", "remaining_time": "8:13:23"} +{"current_steps": 2598, "total_steps": 8680, "loss": 0.8728631734848022, "lr": 1.679241253179298e-06, "epoch": 0.5986175115207373, "percentage": 29.93, "elapsed_time": "3:30:43", "remaining_time": "8:13:19"} +{"current_steps": 2599, "total_steps": 8680, "loss": 0.5940345525741577, "lr": 1.678961594119673e-06, "epoch": 0.5988479262672811, "percentage": 29.94, "elapsed_time": "3:30:48", "remaining_time": "8:13:14"} +{"current_steps": 2600, "total_steps": 8680, "loss": 0.8524528741836548, "lr": 1.6786818365096712e-06, "epoch": 0.5990783410138248, "percentage": 29.95, "elapsed_time": "3:30:53", "remaining_time": "8:13:10"} +{"current_steps": 2601, "total_steps": 8680, "loss": 1.0738554000854492, "lr": 1.6784019803899e-06, "epoch": 0.5993087557603687, "percentage": 29.97, "elapsed_time": "3:30:59", "remaining_time": "8:13:08"} +{"current_steps": 2602, "total_steps": 8680, "loss": 0.9146362543106079, "lr": 1.6781220258009787e-06, "epoch": 0.5995391705069124, "percentage": 29.98, "elapsed_time": "3:31:04", "remaining_time": "8:13:02"} +{"current_steps": 2603, "total_steps": 8680, "loss": 0.8846019506454468, "lr": 1.6778419727835434e-06, "epoch": 0.5997695852534562, "percentage": 29.99, "elapsed_time": "3:31:09", "remaining_time": "8:12:57"} +{"current_steps": 2604, "total_steps": 8680, "loss": 0.9564694166183472, "lr": 1.6775618213782427e-06, "epoch": 0.6, "percentage": 30.0, "elapsed_time": "3:31:13", "remaining_time": "8:12:51"} +{"current_steps": 2605, "total_steps": 8680, "loss": 0.7311475276947021, "lr": 1.6772815716257411e-06, "epoch": 0.6002304147465438, "percentage": 30.01, "elapsed_time": "3:31:18", "remaining_time": "8:12:47"} +{"current_steps": 2606, "total_steps": 8680, "loss": 0.8198719024658203, "lr": 1.6770012235667157e-06, "epoch": 0.6004608294930875, "percentage": 30.02, "elapsed_time": "3:31:23", "remaining_time": "8:12:41"} +{"current_steps": 2607, "total_steps": 8680, "loss": 0.7667897939682007, "lr": 1.676720777241859e-06, "epoch": 0.6006912442396314, "percentage": 30.03, "elapsed_time": "3:31:28", "remaining_time": "8:12:37"} +{"current_steps": 2608, "total_steps": 8680, "loss": 0.8438166379928589, "lr": 1.6764402326918775e-06, "epoch": 0.6009216589861751, "percentage": 30.05, "elapsed_time": "3:31:33", "remaining_time": "8:12:33"} +{"current_steps": 2609, "total_steps": 8680, "loss": 0.801039457321167, "lr": 1.6761595899574913e-06, "epoch": 0.6011520737327188, "percentage": 30.06, "elapsed_time": "3:31:39", "remaining_time": "8:12:32"} +{"current_steps": 2610, "total_steps": 8680, "loss": 0.8063384294509888, "lr": 1.6758788490794362e-06, "epoch": 0.6013824884792627, "percentage": 30.07, "elapsed_time": "3:31:45", "remaining_time": "8:12:29"} +{"current_steps": 2611, "total_steps": 8680, "loss": 0.7574378848075867, "lr": 1.6755980100984609e-06, "epoch": 0.6016129032258064, "percentage": 30.08, "elapsed_time": "3:31:50", "remaining_time": "8:12:24"} +{"current_steps": 2612, "total_steps": 8680, "loss": 0.7640282511711121, "lr": 1.6753170730553285e-06, "epoch": 0.6018433179723502, "percentage": 30.09, "elapsed_time": "3:31:56", "remaining_time": "8:12:21"} +{"current_steps": 2613, "total_steps": 8680, "loss": 0.8366582989692688, "lr": 1.675036037990817e-06, "epoch": 0.602073732718894, "percentage": 30.1, "elapsed_time": "3:32:00", "remaining_time": "8:12:16"} +{"current_steps": 2614, "total_steps": 8680, "loss": 0.851488471031189, "lr": 1.6747549049457184e-06, "epoch": 0.6023041474654378, "percentage": 30.12, "elapsed_time": "3:32:04", "remaining_time": "8:12:08"} +{"current_steps": 2615, "total_steps": 8680, "loss": 0.6821870803833008, "lr": 1.6744736739608385e-06, "epoch": 0.6025345622119815, "percentage": 30.13, "elapsed_time": "3:32:10", "remaining_time": "8:12:05"} +{"current_steps": 2616, "total_steps": 8680, "loss": 0.9263452887535095, "lr": 1.6741923450769977e-06, "epoch": 0.6027649769585254, "percentage": 30.14, "elapsed_time": "3:32:15", "remaining_time": "8:12:00"} +{"current_steps": 2617, "total_steps": 8680, "loss": 0.7471155524253845, "lr": 1.6739109183350303e-06, "epoch": 0.6029953917050691, "percentage": 30.15, "elapsed_time": "3:32:20", "remaining_time": "8:11:56"} +{"current_steps": 2618, "total_steps": 8680, "loss": 0.8859940767288208, "lr": 1.6736293937757858e-06, "epoch": 0.603225806451613, "percentage": 30.16, "elapsed_time": "3:32:24", "remaining_time": "8:11:51"} +{"current_steps": 2619, "total_steps": 8680, "loss": 0.8078656792640686, "lr": 1.673347771440126e-06, "epoch": 0.6034562211981567, "percentage": 30.17, "elapsed_time": "3:32:29", "remaining_time": "8:11:44"} +{"current_steps": 2620, "total_steps": 8680, "loss": 0.7663185596466064, "lr": 1.673066051368929e-06, "epoch": 0.6036866359447005, "percentage": 30.18, "elapsed_time": "3:32:34", "remaining_time": "8:11:41"} +{"current_steps": 2621, "total_steps": 8680, "loss": 0.7924770712852478, "lr": 1.6727842336030855e-06, "epoch": 0.6039170506912442, "percentage": 30.2, "elapsed_time": "3:32:39", "remaining_time": "8:11:35"} +{"current_steps": 2622, "total_steps": 8680, "loss": 0.7781439423561096, "lr": 1.672502318183501e-06, "epoch": 0.604147465437788, "percentage": 30.21, "elapsed_time": "3:32:44", "remaining_time": "8:11:31"} +{"current_steps": 2623, "total_steps": 8680, "loss": 0.9342260360717773, "lr": 1.6722203051510953e-06, "epoch": 0.6043778801843318, "percentage": 30.22, "elapsed_time": "3:32:48", "remaining_time": "8:11:25"} +{"current_steps": 2624, "total_steps": 8680, "loss": 0.8589230179786682, "lr": 1.6719381945468024e-06, "epoch": 0.6046082949308755, "percentage": 30.23, "elapsed_time": "3:32:52", "remaining_time": "8:11:17"} +{"current_steps": 2625, "total_steps": 8680, "loss": 0.8692198991775513, "lr": 1.67165598641157e-06, "epoch": 0.6048387096774194, "percentage": 30.24, "elapsed_time": "3:32:56", "remaining_time": "8:11:11"} +{"current_steps": 2626, "total_steps": 8680, "loss": 0.9220771789550781, "lr": 1.6713736807863606e-06, "epoch": 0.6050691244239631, "percentage": 30.25, "elapsed_time": "3:33:01", "remaining_time": "8:11:06"} +{"current_steps": 2627, "total_steps": 8680, "loss": 0.670639157295227, "lr": 1.6710912777121497e-06, "epoch": 0.6052995391705069, "percentage": 30.26, "elapsed_time": "3:33:07", "remaining_time": "8:11:04"} +{"current_steps": 2628, "total_steps": 8680, "loss": 0.780914306640625, "lr": 1.6708087772299287e-06, "epoch": 0.6055299539170507, "percentage": 30.28, "elapsed_time": "3:33:11", "remaining_time": "8:10:56"} +{"current_steps": 2629, "total_steps": 8680, "loss": 0.836430549621582, "lr": 1.6705261793807014e-06, "epoch": 0.6057603686635945, "percentage": 30.29, "elapsed_time": "3:33:17", "remaining_time": "8:10:55"} +{"current_steps": 2630, "total_steps": 8680, "loss": 0.84266197681427, "lr": 1.670243484205487e-06, "epoch": 0.6059907834101382, "percentage": 30.3, "elapsed_time": "3:33:23", "remaining_time": "8:10:52"} +{"current_steps": 2631, "total_steps": 8680, "loss": 0.9276752471923828, "lr": 1.6699606917453184e-06, "epoch": 0.6062211981566821, "percentage": 30.31, "elapsed_time": "3:33:26", "remaining_time": "8:10:44"} +{"current_steps": 2632, "total_steps": 8680, "loss": 0.8319100141525269, "lr": 1.6696778020412418e-06, "epoch": 0.6064516129032258, "percentage": 30.32, "elapsed_time": "3:33:32", "remaining_time": "8:10:40"} +{"current_steps": 2633, "total_steps": 8680, "loss": 0.7511987686157227, "lr": 1.669394815134319e-06, "epoch": 0.6066820276497696, "percentage": 30.33, "elapsed_time": "3:33:36", "remaining_time": "8:10:33"} +{"current_steps": 2634, "total_steps": 8680, "loss": 0.7847566604614258, "lr": 1.6691117310656249e-06, "epoch": 0.6069124423963134, "percentage": 30.35, "elapsed_time": "3:33:40", "remaining_time": "8:10:28"} +{"current_steps": 2635, "total_steps": 8680, "loss": 0.8598428964614868, "lr": 1.668828549876249e-06, "epoch": 0.6071428571428571, "percentage": 30.36, "elapsed_time": "3:33:44", "remaining_time": "8:10:21"} +{"current_steps": 2636, "total_steps": 8680, "loss": 0.8676267266273499, "lr": 1.6685452716072942e-06, "epoch": 0.6073732718894009, "percentage": 30.37, "elapsed_time": "3:33:49", "remaining_time": "8:10:15"} +{"current_steps": 2637, "total_steps": 8680, "loss": 0.8139858841896057, "lr": 1.6682618962998787e-06, "epoch": 0.6076036866359447, "percentage": 30.38, "elapsed_time": "3:33:53", "remaining_time": "8:10:08"} +{"current_steps": 2638, "total_steps": 8680, "loss": 0.878848671913147, "lr": 1.6679784239951334e-06, "epoch": 0.6078341013824885, "percentage": 30.39, "elapsed_time": "3:33:57", "remaining_time": "8:10:02"} +{"current_steps": 2639, "total_steps": 8680, "loss": 0.7094229459762573, "lr": 1.6676948547342038e-06, "epoch": 0.6080645161290322, "percentage": 30.4, "elapsed_time": "3:34:01", "remaining_time": "8:09:56"} +{"current_steps": 2640, "total_steps": 8680, "loss": 0.7908186912536621, "lr": 1.6674111885582502e-06, "epoch": 0.6082949308755761, "percentage": 30.41, "elapsed_time": "3:34:07", "remaining_time": "8:09:53"} +{"current_steps": 2641, "total_steps": 8680, "loss": 0.7205992341041565, "lr": 1.6671274255084465e-06, "epoch": 0.6085253456221198, "percentage": 30.43, "elapsed_time": "3:34:13", "remaining_time": "8:09:50"} +{"current_steps": 2642, "total_steps": 8680, "loss": 0.8098955750465393, "lr": 1.6668435656259796e-06, "epoch": 0.6087557603686636, "percentage": 30.44, "elapsed_time": "3:34:16", "remaining_time": "8:09:43"} +{"current_steps": 2643, "total_steps": 8680, "loss": 0.9344205856323242, "lr": 1.6665596089520522e-06, "epoch": 0.6089861751152074, "percentage": 30.45, "elapsed_time": "3:34:21", "remaining_time": "8:09:38"} +{"current_steps": 2644, "total_steps": 8680, "loss": 0.6149121522903442, "lr": 1.6662755555278798e-06, "epoch": 0.6092165898617512, "percentage": 30.46, "elapsed_time": "3:34:27", "remaining_time": "8:09:34"} +{"current_steps": 2645, "total_steps": 8680, "loss": 0.790631115436554, "lr": 1.6659914053946929e-06, "epoch": 0.6094470046082949, "percentage": 30.47, "elapsed_time": "3:34:31", "remaining_time": "8:09:28"} +{"current_steps": 2646, "total_steps": 8680, "loss": 0.7789372801780701, "lr": 1.6657071585937349e-06, "epoch": 0.6096774193548387, "percentage": 30.48, "elapsed_time": "3:34:37", "remaining_time": "8:09:26"} +{"current_steps": 2647, "total_steps": 8680, "loss": 0.9119753837585449, "lr": 1.6654228151662641e-06, "epoch": 0.6099078341013825, "percentage": 30.5, "elapsed_time": "3:34:42", "remaining_time": "8:09:21"} +{"current_steps": 2648, "total_steps": 8680, "loss": 0.827568769454956, "lr": 1.6651383751535526e-06, "epoch": 0.6101382488479262, "percentage": 30.51, "elapsed_time": "3:34:46", "remaining_time": "8:09:15"} +{"current_steps": 2649, "total_steps": 8680, "loss": 0.8862377405166626, "lr": 1.6648538385968865e-06, "epoch": 0.6103686635944701, "percentage": 30.52, "elapsed_time": "3:34:51", "remaining_time": "8:09:10"} +{"current_steps": 2650, "total_steps": 8680, "loss": 0.7765665054321289, "lr": 1.6645692055375658e-06, "epoch": 0.6105990783410138, "percentage": 30.53, "elapsed_time": "3:34:56", "remaining_time": "8:09:06"} +{"current_steps": 2651, "total_steps": 8680, "loss": 0.7673745155334473, "lr": 1.6642844760169048e-06, "epoch": 0.6108294930875576, "percentage": 30.54, "elapsed_time": "3:35:01", "remaining_time": "8:09:01"} +{"current_steps": 2652, "total_steps": 8680, "loss": 0.8539090752601624, "lr": 1.6639996500762313e-06, "epoch": 0.6110599078341014, "percentage": 30.55, "elapsed_time": "3:35:06", "remaining_time": "8:08:56"} +{"current_steps": 2653, "total_steps": 8680, "loss": 0.9146299362182617, "lr": 1.663714727756888e-06, "epoch": 0.6112903225806452, "percentage": 30.56, "elapsed_time": "3:35:09", "remaining_time": "8:08:48"} +{"current_steps": 2654, "total_steps": 8680, "loss": 0.6720675230026245, "lr": 1.6634297091002304e-06, "epoch": 0.6115207373271889, "percentage": 30.58, "elapsed_time": "3:35:14", "remaining_time": "8:08:43"} +{"current_steps": 2655, "total_steps": 8680, "loss": 0.876419186592102, "lr": 1.6631445941476287e-06, "epoch": 0.6117511520737328, "percentage": 30.59, "elapsed_time": "3:35:19", "remaining_time": "8:08:37"} +{"current_steps": 2656, "total_steps": 8680, "loss": 0.781826376914978, "lr": 1.6628593829404673e-06, "epoch": 0.6119815668202765, "percentage": 30.6, "elapsed_time": "3:35:23", "remaining_time": "8:08:31"} +{"current_steps": 2657, "total_steps": 8680, "loss": 0.8700725436210632, "lr": 1.662574075520144e-06, "epoch": 0.6122119815668203, "percentage": 30.61, "elapsed_time": "3:35:27", "remaining_time": "8:08:25"} +{"current_steps": 2658, "total_steps": 8680, "loss": 0.7927212715148926, "lr": 1.6622886719280703e-06, "epoch": 0.6124423963133641, "percentage": 30.62, "elapsed_time": "3:35:32", "remaining_time": "8:08:19"} +{"current_steps": 2659, "total_steps": 8680, "loss": 0.8402982354164124, "lr": 1.6620031722056732e-06, "epoch": 0.6126728110599078, "percentage": 30.63, "elapsed_time": "3:35:39", "remaining_time": "8:08:19"} +{"current_steps": 2660, "total_steps": 8680, "loss": 0.844031572341919, "lr": 1.6617175763943916e-06, "epoch": 0.6129032258064516, "percentage": 30.65, "elapsed_time": "3:35:43", "remaining_time": "8:08:12"} +{"current_steps": 2661, "total_steps": 8680, "loss": 0.7927590608596802, "lr": 1.66143188453568e-06, "epoch": 0.6131336405529954, "percentage": 30.66, "elapsed_time": "3:35:47", "remaining_time": "8:08:05"} +{"current_steps": 2662, "total_steps": 8680, "loss": 0.6881238222122192, "lr": 1.6611460966710057e-06, "epoch": 0.6133640552995392, "percentage": 30.67, "elapsed_time": "3:35:52", "remaining_time": "8:08:02"} +{"current_steps": 2663, "total_steps": 8680, "loss": 0.8782250881195068, "lr": 1.6608602128418512e-06, "epoch": 0.6135944700460829, "percentage": 30.68, "elapsed_time": "3:35:58", "remaining_time": "8:07:58"} +{"current_steps": 2664, "total_steps": 8680, "loss": 0.810072124004364, "lr": 1.6605742330897112e-06, "epoch": 0.6138248847926268, "percentage": 30.69, "elapsed_time": "3:36:03", "remaining_time": "8:07:54"} +{"current_steps": 2665, "total_steps": 8680, "loss": 0.9278649091720581, "lr": 1.660288157456096e-06, "epoch": 0.6140552995391705, "percentage": 30.7, "elapsed_time": "3:36:07", "remaining_time": "8:07:48"} +{"current_steps": 2666, "total_steps": 8680, "loss": 0.7821990251541138, "lr": 1.6600019859825287e-06, "epoch": 0.6142857142857143, "percentage": 30.71, "elapsed_time": "3:36:12", "remaining_time": "8:07:43"} +{"current_steps": 2667, "total_steps": 8680, "loss": 0.7945138216018677, "lr": 1.6597157187105474e-06, "epoch": 0.614516129032258, "percentage": 30.73, "elapsed_time": "3:36:18", "remaining_time": "8:07:41"} +{"current_steps": 2668, "total_steps": 8680, "loss": 0.7796168327331543, "lr": 1.659429355681702e-06, "epoch": 0.6147465437788019, "percentage": 30.74, "elapsed_time": "3:36:22", "remaining_time": "8:07:34"} +{"current_steps": 2669, "total_steps": 8680, "loss": 0.8412867784500122, "lr": 1.659142896937559e-06, "epoch": 0.6149769585253456, "percentage": 30.75, "elapsed_time": "3:36:27", "remaining_time": "8:07:30"} +{"current_steps": 2670, "total_steps": 8680, "loss": 0.8507891893386841, "lr": 1.6588563425196976e-06, "epoch": 0.6152073732718893, "percentage": 30.76, "elapsed_time": "3:36:32", "remaining_time": "8:07:26"} +{"current_steps": 2671, "total_steps": 8680, "loss": 0.7538737654685974, "lr": 1.6585696924697097e-06, "epoch": 0.6154377880184332, "percentage": 30.77, "elapsed_time": "3:36:38", "remaining_time": "8:07:24"} +{"current_steps": 2672, "total_steps": 8680, "loss": 0.7241994142532349, "lr": 1.6582829468292027e-06, "epoch": 0.6156682027649769, "percentage": 30.78, "elapsed_time": "3:36:45", "remaining_time": "8:07:22"} +{"current_steps": 2673, "total_steps": 8680, "loss": 0.8282276391983032, "lr": 1.6579961056397979e-06, "epoch": 0.6158986175115208, "percentage": 30.79, "elapsed_time": "3:36:49", "remaining_time": "8:07:16"} +{"current_steps": 2674, "total_steps": 8680, "loss": 0.7823094725608826, "lr": 1.657709168943129e-06, "epoch": 0.6161290322580645, "percentage": 30.81, "elapsed_time": "3:36:54", "remaining_time": "8:07:11"} +{"current_steps": 2675, "total_steps": 8680, "loss": 0.7682117819786072, "lr": 1.6574221367808452e-06, "epoch": 0.6163594470046083, "percentage": 30.82, "elapsed_time": "3:36:58", "remaining_time": "8:07:05"} +{"current_steps": 2676, "total_steps": 8680, "loss": 0.7483188509941101, "lr": 1.6571350091946084e-06, "epoch": 0.616589861751152, "percentage": 30.83, "elapsed_time": "3:37:04", "remaining_time": "8:07:02"} +{"current_steps": 2677, "total_steps": 8680, "loss": 0.8244579434394836, "lr": 1.656847786226095e-06, "epoch": 0.6168202764976959, "percentage": 30.84, "elapsed_time": "3:37:09", "remaining_time": "8:06:57"} +{"current_steps": 2678, "total_steps": 8680, "loss": 0.9741685390472412, "lr": 1.6565604679169951e-06, "epoch": 0.6170506912442396, "percentage": 30.85, "elapsed_time": "3:37:13", "remaining_time": "8:06:50"} +{"current_steps": 2679, "total_steps": 8680, "loss": 1.0004706382751465, "lr": 1.6562730543090122e-06, "epoch": 0.6172811059907835, "percentage": 30.86, "elapsed_time": "3:37:17", "remaining_time": "8:06:43"} +{"current_steps": 2680, "total_steps": 8680, "loss": 0.6897011399269104, "lr": 1.6559855454438644e-06, "epoch": 0.6175115207373272, "percentage": 30.88, "elapsed_time": "3:37:23", "remaining_time": "8:06:41"} +{"current_steps": 2681, "total_steps": 8680, "loss": 0.7250478267669678, "lr": 1.6556979413632833e-06, "epoch": 0.617741935483871, "percentage": 30.89, "elapsed_time": "3:37:29", "remaining_time": "8:06:40"} +{"current_steps": 2682, "total_steps": 8680, "loss": 0.850714385509491, "lr": 1.6554102421090137e-06, "epoch": 0.6179723502304147, "percentage": 30.9, "elapsed_time": "3:37:35", "remaining_time": "8:06:36"} +{"current_steps": 2683, "total_steps": 8680, "loss": 0.8389794230461121, "lr": 1.6551224477228152e-06, "epoch": 0.6182027649769585, "percentage": 30.91, "elapsed_time": "3:37:39", "remaining_time": "8:06:31"} +{"current_steps": 2684, "total_steps": 8680, "loss": 0.8004277944564819, "lr": 1.6548345582464608e-06, "epoch": 0.6184331797235023, "percentage": 30.92, "elapsed_time": "3:37:45", "remaining_time": "8:06:28"} +{"current_steps": 2685, "total_steps": 8680, "loss": 0.8439298868179321, "lr": 1.654546573721737e-06, "epoch": 0.618663594470046, "percentage": 30.93, "elapsed_time": "3:37:51", "remaining_time": "8:06:24"} +{"current_steps": 2686, "total_steps": 8680, "loss": 0.7715939283370972, "lr": 1.6542584941904448e-06, "epoch": 0.6188940092165899, "percentage": 30.94, "elapsed_time": "3:37:56", "remaining_time": "8:06:21"} +{"current_steps": 2687, "total_steps": 8680, "loss": 0.8521275520324707, "lr": 1.6539703196943982e-06, "epoch": 0.6191244239631336, "percentage": 30.96, "elapsed_time": "3:38:02", "remaining_time": "8:06:17"} +{"current_steps": 2688, "total_steps": 8680, "loss": 0.8773370981216431, "lr": 1.6536820502754249e-06, "epoch": 0.6193548387096774, "percentage": 30.97, "elapsed_time": "3:38:07", "remaining_time": "8:06:13"} +{"current_steps": 2689, "total_steps": 8680, "loss": 0.7613356113433838, "lr": 1.653393685975368e-06, "epoch": 0.6195852534562212, "percentage": 30.98, "elapsed_time": "3:38:11", "remaining_time": "8:06:07"} +{"current_steps": 2690, "total_steps": 8680, "loss": 0.7534692287445068, "lr": 1.6531052268360823e-06, "epoch": 0.619815668202765, "percentage": 30.99, "elapsed_time": "3:38:16", "remaining_time": "8:06:01"} +{"current_steps": 2691, "total_steps": 8680, "loss": 0.861242413520813, "lr": 1.652816672899438e-06, "epoch": 0.6200460829493087, "percentage": 31.0, "elapsed_time": "3:38:22", "remaining_time": "8:05:59"} +{"current_steps": 2692, "total_steps": 8680, "loss": 0.9778954982757568, "lr": 1.652528024207317e-06, "epoch": 0.6202764976958526, "percentage": 31.01, "elapsed_time": "3:38:26", "remaining_time": "8:05:53"} +{"current_steps": 2693, "total_steps": 8680, "loss": 0.7874879240989685, "lr": 1.6522392808016176e-06, "epoch": 0.6205069124423963, "percentage": 31.03, "elapsed_time": "3:38:31", "remaining_time": "8:05:49"} +{"current_steps": 2694, "total_steps": 8680, "loss": 0.8306739330291748, "lr": 1.6519504427242503e-06, "epoch": 0.6207373271889401, "percentage": 31.04, "elapsed_time": "3:38:36", "remaining_time": "8:05:44"} +{"current_steps": 2695, "total_steps": 8680, "loss": 0.7617331743240356, "lr": 1.651661510017139e-06, "epoch": 0.6209677419354839, "percentage": 31.05, "elapsed_time": "3:38:41", "remaining_time": "8:05:40"} +{"current_steps": 2696, "total_steps": 8680, "loss": 0.8912776708602905, "lr": 1.6513724827222223e-06, "epoch": 0.6211981566820276, "percentage": 31.06, "elapsed_time": "3:38:46", "remaining_time": "8:05:35"} +{"current_steps": 2697, "total_steps": 8680, "loss": 0.832025945186615, "lr": 1.6510833608814519e-06, "epoch": 0.6214285714285714, "percentage": 31.07, "elapsed_time": "3:38:50", "remaining_time": "8:05:28"} +{"current_steps": 2698, "total_steps": 8680, "loss": 0.7391358613967896, "lr": 1.6507941445367934e-06, "epoch": 0.6216589861751152, "percentage": 31.08, "elapsed_time": "3:38:54", "remaining_time": "8:05:22"} +{"current_steps": 2699, "total_steps": 8680, "loss": 0.7968891263008118, "lr": 1.6505048337302267e-06, "epoch": 0.621889400921659, "percentage": 31.09, "elapsed_time": "3:38:59", "remaining_time": "8:05:17"} +{"current_steps": 2700, "total_steps": 8680, "loss": 0.8268226981163025, "lr": 1.6502154285037446e-06, "epoch": 0.6221198156682027, "percentage": 31.11, "elapsed_time": "3:39:03", "remaining_time": "8:05:10"} +{"current_steps": 2701, "total_steps": 8680, "loss": 0.8727509379386902, "lr": 1.6499259288993536e-06, "epoch": 0.6223502304147466, "percentage": 31.12, "elapsed_time": "3:39:10", "remaining_time": "8:05:10"} +{"current_steps": 2702, "total_steps": 8680, "loss": 0.8419584035873413, "lr": 1.6496363349590746e-06, "epoch": 0.6225806451612903, "percentage": 31.13, "elapsed_time": "3:39:14", "remaining_time": "8:05:03"} +{"current_steps": 2703, "total_steps": 8680, "loss": 0.7753620743751526, "lr": 1.6493466467249415e-06, "epoch": 0.6228110599078341, "percentage": 31.14, "elapsed_time": "3:39:19", "remaining_time": "8:04:59"} +{"current_steps": 2704, "total_steps": 8680, "loss": 0.7735302448272705, "lr": 1.6490568642390022e-06, "epoch": 0.6230414746543779, "percentage": 31.15, "elapsed_time": "3:39:26", "remaining_time": "8:04:57"} +{"current_steps": 2705, "total_steps": 8680, "loss": 0.8730747699737549, "lr": 1.6487669875433183e-06, "epoch": 0.6232718894009217, "percentage": 31.16, "elapsed_time": "3:39:30", "remaining_time": "8:04:51"} +{"current_steps": 2706, "total_steps": 8680, "loss": 1.026259183883667, "lr": 1.648477016679965e-06, "epoch": 0.6235023041474654, "percentage": 31.18, "elapsed_time": "3:39:34", "remaining_time": "8:04:45"} +{"current_steps": 2707, "total_steps": 8680, "loss": 1.0710067749023438, "lr": 1.6481869516910314e-06, "epoch": 0.6237327188940092, "percentage": 31.19, "elapsed_time": "3:39:38", "remaining_time": "8:04:37"} +{"current_steps": 2708, "total_steps": 8680, "loss": 0.8451842069625854, "lr": 1.6478967926186196e-06, "epoch": 0.623963133640553, "percentage": 31.2, "elapsed_time": "3:39:43", "remaining_time": "8:04:33"} +{"current_steps": 2709, "total_steps": 8680, "loss": 0.8114550113677979, "lr": 1.6476065395048463e-06, "epoch": 0.6241935483870967, "percentage": 31.21, "elapsed_time": "3:39:47", "remaining_time": "8:04:27"} +{"current_steps": 2710, "total_steps": 8680, "loss": 0.9158897399902344, "lr": 1.6473161923918408e-06, "epoch": 0.6244239631336406, "percentage": 31.22, "elapsed_time": "3:39:52", "remaining_time": "8:04:21"} +{"current_steps": 2711, "total_steps": 8680, "loss": 0.8455985188484192, "lr": 1.6470257513217471e-06, "epoch": 0.6246543778801843, "percentage": 31.23, "elapsed_time": "3:39:56", "remaining_time": "8:04:15"} +{"current_steps": 2712, "total_steps": 8680, "loss": 0.7869806885719299, "lr": 1.6467352163367224e-06, "epoch": 0.6248847926267281, "percentage": 31.24, "elapsed_time": "3:40:00", "remaining_time": "8:04:09"} +{"current_steps": 2713, "total_steps": 8680, "loss": 0.7813467979431152, "lr": 1.6464445874789369e-06, "epoch": 0.6251152073732719, "percentage": 31.26, "elapsed_time": "3:40:06", "remaining_time": "8:04:06"} +{"current_steps": 2714, "total_steps": 8680, "loss": 0.7607834339141846, "lr": 1.646153864790575e-06, "epoch": 0.6253456221198157, "percentage": 31.27, "elapsed_time": "3:40:11", "remaining_time": "8:04:02"} +{"current_steps": 2715, "total_steps": 8680, "loss": 0.6316394209861755, "lr": 1.6458630483138354e-06, "epoch": 0.6255760368663594, "percentage": 31.28, "elapsed_time": "3:40:16", "remaining_time": "8:03:57"} +{"current_steps": 2716, "total_steps": 8680, "loss": 0.8613089323043823, "lr": 1.6455721380909293e-06, "epoch": 0.6258064516129033, "percentage": 31.29, "elapsed_time": "3:40:20", "remaining_time": "8:03:49"} +{"current_steps": 2717, "total_steps": 8680, "loss": 0.8521597385406494, "lr": 1.6452811341640823e-06, "epoch": 0.626036866359447, "percentage": 31.3, "elapsed_time": "3:40:23", "remaining_time": "8:03:42"} +{"current_steps": 2718, "total_steps": 8680, "loss": 0.7649816870689392, "lr": 1.6449900365755322e-06, "epoch": 0.6262672811059908, "percentage": 31.31, "elapsed_time": "3:40:29", "remaining_time": "8:03:38"} +{"current_steps": 2719, "total_steps": 8680, "loss": 0.669215738773346, "lr": 1.6446988453675327e-06, "epoch": 0.6264976958525346, "percentage": 31.32, "elapsed_time": "3:40:34", "remaining_time": "8:03:34"} +{"current_steps": 2720, "total_steps": 8680, "loss": 0.7795897722244263, "lr": 1.6444075605823491e-06, "epoch": 0.6267281105990783, "percentage": 31.34, "elapsed_time": "3:40:40", "remaining_time": "8:03:31"} +{"current_steps": 2721, "total_steps": 8680, "loss": 0.9773029088973999, "lr": 1.6441161822622612e-06, "epoch": 0.6269585253456221, "percentage": 31.35, "elapsed_time": "3:40:43", "remaining_time": "8:03:23"} +{"current_steps": 2722, "total_steps": 8680, "loss": 0.8313496112823486, "lr": 1.6438247104495622e-06, "epoch": 0.6271889400921659, "percentage": 31.36, "elapsed_time": "3:40:48", "remaining_time": "8:03:18"} +{"current_steps": 2723, "total_steps": 8680, "loss": 0.822803258895874, "lr": 1.6435331451865589e-06, "epoch": 0.6274193548387097, "percentage": 31.37, "elapsed_time": "3:40:52", "remaining_time": "8:03:12"} +{"current_steps": 2724, "total_steps": 8680, "loss": 0.8933405876159668, "lr": 1.643241486515571e-06, "epoch": 0.6276497695852534, "percentage": 31.38, "elapsed_time": "3:40:57", "remaining_time": "8:03:08"} +{"current_steps": 2725, "total_steps": 8680, "loss": 0.865382194519043, "lr": 1.6429497344789334e-06, "epoch": 0.6278801843317973, "percentage": 31.39, "elapsed_time": "3:41:03", "remaining_time": "8:03:04"} +{"current_steps": 2726, "total_steps": 8680, "loss": 0.5955609679222107, "lr": 1.6426578891189929e-06, "epoch": 0.628110599078341, "percentage": 31.41, "elapsed_time": "3:41:09", "remaining_time": "8:03:02"} +{"current_steps": 2727, "total_steps": 8680, "loss": 0.7832648754119873, "lr": 1.6423659504781102e-06, "epoch": 0.6283410138248848, "percentage": 31.42, "elapsed_time": "3:41:15", "remaining_time": "8:02:59"} +{"current_steps": 2728, "total_steps": 8680, "loss": 0.8939651250839233, "lr": 1.6420739185986606e-06, "epoch": 0.6285714285714286, "percentage": 31.43, "elapsed_time": "3:41:19", "remaining_time": "8:02:53"} +{"current_steps": 2729, "total_steps": 8680, "loss": 0.7950553894042969, "lr": 1.6417817935230316e-06, "epoch": 0.6288018433179724, "percentage": 31.44, "elapsed_time": "3:41:23", "remaining_time": "8:02:46"} +{"current_steps": 2730, "total_steps": 8680, "loss": 0.7011410593986511, "lr": 1.6414895752936247e-06, "epoch": 0.6290322580645161, "percentage": 31.45, "elapsed_time": "3:41:27", "remaining_time": "8:02:40"} +{"current_steps": 2731, "total_steps": 8680, "loss": 0.8745814561843872, "lr": 1.6411972639528553e-06, "epoch": 0.6292626728110599, "percentage": 31.46, "elapsed_time": "3:41:32", "remaining_time": "8:02:34"} +{"current_steps": 2732, "total_steps": 8680, "loss": 0.9487906694412231, "lr": 1.640904859543152e-06, "epoch": 0.6294930875576037, "percentage": 31.47, "elapsed_time": "3:41:35", "remaining_time": "8:02:26"} +{"current_steps": 2733, "total_steps": 8680, "loss": 0.8493598103523254, "lr": 1.6406123621069565e-06, "epoch": 0.6297235023041474, "percentage": 31.49, "elapsed_time": "3:41:40", "remaining_time": "8:02:20"} +{"current_steps": 2734, "total_steps": 8680, "loss": 0.8176105618476868, "lr": 1.640319771686725e-06, "epoch": 0.6299539170506913, "percentage": 31.5, "elapsed_time": "3:41:46", "remaining_time": "8:02:19"} +{"current_steps": 2735, "total_steps": 8680, "loss": 0.8331952691078186, "lr": 1.640027088324926e-06, "epoch": 0.630184331797235, "percentage": 31.51, "elapsed_time": "3:41:50", "remaining_time": "8:02:12"} +{"current_steps": 2736, "total_steps": 8680, "loss": 0.7507727146148682, "lr": 1.6397343120640428e-06, "epoch": 0.6304147465437788, "percentage": 31.52, "elapsed_time": "3:41:55", "remaining_time": "8:02:08"} +{"current_steps": 2737, "total_steps": 8680, "loss": 0.7681083679199219, "lr": 1.6394414429465707e-06, "epoch": 0.6306451612903226, "percentage": 31.53, "elapsed_time": "3:42:01", "remaining_time": "8:02:05"} +{"current_steps": 2738, "total_steps": 8680, "loss": 0.86592036485672, "lr": 1.6391484810150197e-06, "epoch": 0.6308755760368664, "percentage": 31.54, "elapsed_time": "3:42:06", "remaining_time": "8:02:00"} +{"current_steps": 2739, "total_steps": 8680, "loss": 0.6561422348022461, "lr": 1.6388554263119133e-06, "epoch": 0.6311059907834101, "percentage": 31.56, "elapsed_time": "3:42:11", "remaining_time": "8:01:57"} +{"current_steps": 2740, "total_steps": 8680, "loss": 1.0149214267730713, "lr": 1.6385622788797871e-06, "epoch": 0.631336405529954, "percentage": 31.57, "elapsed_time": "3:42:16", "remaining_time": "8:01:50"} +{"current_steps": 2741, "total_steps": 8680, "loss": 0.8542313575744629, "lr": 1.6382690387611912e-06, "epoch": 0.6315668202764977, "percentage": 31.58, "elapsed_time": "3:42:21", "remaining_time": "8:01:46"} +{"current_steps": 2742, "total_steps": 8680, "loss": 0.8561190366744995, "lr": 1.6379757059986898e-06, "epoch": 0.6317972350230415, "percentage": 31.59, "elapsed_time": "3:42:26", "remaining_time": "8:01:43"} +{"current_steps": 2743, "total_steps": 8680, "loss": 0.7487457990646362, "lr": 1.6376822806348591e-06, "epoch": 0.6320276497695853, "percentage": 31.6, "elapsed_time": "3:42:31", "remaining_time": "8:01:38"} +{"current_steps": 2744, "total_steps": 8680, "loss": 0.6169087886810303, "lr": 1.6373887627122894e-06, "epoch": 0.632258064516129, "percentage": 31.61, "elapsed_time": "3:42:37", "remaining_time": "8:01:35"} +{"current_steps": 2745, "total_steps": 8680, "loss": 0.8384301662445068, "lr": 1.6370951522735848e-06, "epoch": 0.6324884792626728, "percentage": 31.62, "elapsed_time": "3:42:41", "remaining_time": "8:01:29"} +{"current_steps": 2746, "total_steps": 8680, "loss": 0.8009958267211914, "lr": 1.636801449361362e-06, "epoch": 0.6327188940092165, "percentage": 31.64, "elapsed_time": "3:42:46", "remaining_time": "8:01:24"} +{"current_steps": 2747, "total_steps": 8680, "loss": 0.7277840375900269, "lr": 1.6365076540182518e-06, "epoch": 0.6329493087557604, "percentage": 31.65, "elapsed_time": "3:42:52", "remaining_time": "8:01:21"} +{"current_steps": 2748, "total_steps": 8680, "loss": 0.7994974255561829, "lr": 1.6362137662868988e-06, "epoch": 0.6331797235023041, "percentage": 31.66, "elapsed_time": "3:42:56", "remaining_time": "8:01:14"} +{"current_steps": 2749, "total_steps": 8680, "loss": 0.9940546751022339, "lr": 1.6359197862099592e-06, "epoch": 0.633410138248848, "percentage": 31.67, "elapsed_time": "3:43:00", "remaining_time": "8:01:09"} +{"current_steps": 2750, "total_steps": 8680, "loss": 0.776983916759491, "lr": 1.6356257138301048e-06, "epoch": 0.6336405529953917, "percentage": 31.68, "elapsed_time": "3:43:06", "remaining_time": "8:01:06"} +{"current_steps": 2751, "total_steps": 8680, "loss": 0.8218704462051392, "lr": 1.6353315491900194e-06, "epoch": 0.6338709677419355, "percentage": 31.69, "elapsed_time": "3:43:12", "remaining_time": "8:01:03"} +{"current_steps": 2752, "total_steps": 8680, "loss": 0.8437784910202026, "lr": 1.635037292332401e-06, "epoch": 0.6341013824884792, "percentage": 31.71, "elapsed_time": "3:43:16", "remaining_time": "8:00:56"} +{"current_steps": 2753, "total_steps": 8680, "loss": 0.7774004340171814, "lr": 1.63474294329996e-06, "epoch": 0.6343317972350231, "percentage": 31.72, "elapsed_time": "3:43:20", "remaining_time": "8:00:50"} +{"current_steps": 2754, "total_steps": 8680, "loss": 0.8480523824691772, "lr": 1.634448502135421e-06, "epoch": 0.6345622119815668, "percentage": 31.73, "elapsed_time": "3:43:25", "remaining_time": "8:00:44"} +{"current_steps": 2755, "total_steps": 8680, "loss": 0.838944673538208, "lr": 1.634153968881522e-06, "epoch": 0.6347926267281107, "percentage": 31.74, "elapsed_time": "3:43:29", "remaining_time": "8:00:38"} +{"current_steps": 2756, "total_steps": 8680, "loss": 0.7989159822463989, "lr": 1.633859343581014e-06, "epoch": 0.6350230414746544, "percentage": 31.75, "elapsed_time": "3:43:34", "remaining_time": "8:00:35"} +{"current_steps": 2757, "total_steps": 8680, "loss": 0.8122522234916687, "lr": 1.6335646262766612e-06, "epoch": 0.6352534562211981, "percentage": 31.76, "elapsed_time": "3:43:38", "remaining_time": "8:00:27"} +{"current_steps": 2758, "total_steps": 8680, "loss": 0.7472352981567383, "lr": 1.6332698170112418e-06, "epoch": 0.635483870967742, "percentage": 31.77, "elapsed_time": "3:43:41", "remaining_time": "8:00:19"} +{"current_steps": 2759, "total_steps": 8680, "loss": 0.7160866260528564, "lr": 1.6329749158275466e-06, "epoch": 0.6357142857142857, "percentage": 31.79, "elapsed_time": "3:43:47", "remaining_time": "8:00:15"} +{"current_steps": 2760, "total_steps": 8680, "loss": 0.850339412689209, "lr": 1.6326799227683803e-06, "epoch": 0.6359447004608295, "percentage": 31.8, "elapsed_time": "3:43:52", "remaining_time": "8:00:12"} +{"current_steps": 2761, "total_steps": 8680, "loss": 0.7683566808700562, "lr": 1.632384837876561e-06, "epoch": 0.6361751152073732, "percentage": 31.81, "elapsed_time": "3:43:57", "remaining_time": "8:00:07"} +{"current_steps": 2762, "total_steps": 8680, "loss": 0.820326030254364, "lr": 1.6320896611949197e-06, "epoch": 0.6364055299539171, "percentage": 31.82, "elapsed_time": "3:44:03", "remaining_time": "8:00:03"} +{"current_steps": 2763, "total_steps": 8680, "loss": 0.9319206476211548, "lr": 1.6317943927663005e-06, "epoch": 0.6366359447004608, "percentage": 31.83, "elapsed_time": "3:44:07", "remaining_time": "7:59:57"} +{"current_steps": 2764, "total_steps": 8680, "loss": 0.8473616242408752, "lr": 1.6314990326335619e-06, "epoch": 0.6368663594470046, "percentage": 31.84, "elapsed_time": "3:44:12", "remaining_time": "7:59:53"} +{"current_steps": 2765, "total_steps": 8680, "loss": 0.7515239715576172, "lr": 1.6312035808395746e-06, "epoch": 0.6370967741935484, "percentage": 31.85, "elapsed_time": "3:44:16", "remaining_time": "7:59:47"} +{"current_steps": 2766, "total_steps": 8680, "loss": 0.8780150413513184, "lr": 1.630908037427223e-06, "epoch": 0.6373271889400922, "percentage": 31.87, "elapsed_time": "3:44:21", "remaining_time": "7:59:42"} +{"current_steps": 2767, "total_steps": 8680, "loss": 0.7502909898757935, "lr": 1.6306124024394051e-06, "epoch": 0.6375576036866359, "percentage": 31.88, "elapsed_time": "3:44:26", "remaining_time": "7:59:36"} +{"current_steps": 2768, "total_steps": 8680, "loss": 0.8440920114517212, "lr": 1.630316675919032e-06, "epoch": 0.6377880184331797, "percentage": 31.89, "elapsed_time": "3:44:31", "remaining_time": "7:59:32"} +{"current_steps": 2769, "total_steps": 8680, "loss": 0.7769831418991089, "lr": 1.6300208579090275e-06, "epoch": 0.6380184331797235, "percentage": 31.9, "elapsed_time": "3:44:36", "remaining_time": "7:59:29"} +{"current_steps": 2770, "total_steps": 8680, "loss": 0.6217764616012573, "lr": 1.6297249484523297e-06, "epoch": 0.6382488479262672, "percentage": 31.91, "elapsed_time": "3:44:42", "remaining_time": "7:59:26"} +{"current_steps": 2771, "total_steps": 8680, "loss": 0.8726013898849487, "lr": 1.6294289475918891e-06, "epoch": 0.6384792626728111, "percentage": 31.92, "elapsed_time": "3:44:47", "remaining_time": "7:59:22"} +{"current_steps": 2772, "total_steps": 8680, "loss": 0.9624546766281128, "lr": 1.6291328553706702e-06, "epoch": 0.6387096774193548, "percentage": 31.94, "elapsed_time": "3:44:52", "remaining_time": "7:59:16"} +{"current_steps": 2773, "total_steps": 8680, "loss": 0.733322024345398, "lr": 1.62883667183165e-06, "epoch": 0.6389400921658986, "percentage": 31.95, "elapsed_time": "3:44:56", "remaining_time": "7:59:10"} +{"current_steps": 2774, "total_steps": 8680, "loss": 0.7944040298461914, "lr": 1.6285403970178197e-06, "epoch": 0.6391705069124424, "percentage": 31.96, "elapsed_time": "3:45:01", "remaining_time": "7:59:04"} +{"current_steps": 2775, "total_steps": 8680, "loss": 0.8006964921951294, "lr": 1.6282440309721825e-06, "epoch": 0.6394009216589862, "percentage": 31.97, "elapsed_time": "3:45:06", "remaining_time": "7:59:00"} +{"current_steps": 2776, "total_steps": 8680, "loss": 0.8226393461227417, "lr": 1.6279475737377562e-06, "epoch": 0.6396313364055299, "percentage": 31.98, "elapsed_time": "3:45:11", "remaining_time": "7:58:55"} +{"current_steps": 2777, "total_steps": 8680, "loss": 0.8216049671173096, "lr": 1.6276510253575707e-06, "epoch": 0.6398617511520738, "percentage": 31.99, "elapsed_time": "3:45:16", "remaining_time": "7:58:51"} +{"current_steps": 2778, "total_steps": 8680, "loss": 0.9556760191917419, "lr": 1.6273543858746698e-06, "epoch": 0.6400921658986175, "percentage": 32.0, "elapsed_time": "3:45:20", "remaining_time": "7:58:45"} +{"current_steps": 2779, "total_steps": 8680, "loss": 0.9736160039901733, "lr": 1.6270576553321103e-06, "epoch": 0.6403225806451613, "percentage": 32.02, "elapsed_time": "3:45:25", "remaining_time": "7:58:41"} +{"current_steps": 2780, "total_steps": 8680, "loss": 0.6930527687072754, "lr": 1.6267608337729622e-06, "epoch": 0.6405529953917051, "percentage": 32.03, "elapsed_time": "3:45:31", "remaining_time": "7:58:38"} +{"current_steps": 2781, "total_steps": 8680, "loss": 0.8047456741333008, "lr": 1.6264639212403089e-06, "epoch": 0.6407834101382488, "percentage": 32.04, "elapsed_time": "3:45:36", "remaining_time": "7:58:34"} +{"current_steps": 2782, "total_steps": 8680, "loss": 0.7278450727462769, "lr": 1.6261669177772465e-06, "epoch": 0.6410138248847926, "percentage": 32.05, "elapsed_time": "3:45:41", "remaining_time": "7:58:28"} +{"current_steps": 2783, "total_steps": 8680, "loss": 0.7768574357032776, "lr": 1.6258698234268852e-06, "epoch": 0.6412442396313364, "percentage": 32.06, "elapsed_time": "3:45:47", "remaining_time": "7:58:26"} +{"current_steps": 2784, "total_steps": 8680, "loss": 0.7621645331382751, "lr": 1.6255726382323475e-06, "epoch": 0.6414746543778802, "percentage": 32.07, "elapsed_time": "3:45:52", "remaining_time": "7:58:21"} +{"current_steps": 2785, "total_steps": 8680, "loss": 0.7566754221916199, "lr": 1.6252753622367695e-06, "epoch": 0.6417050691244239, "percentage": 32.09, "elapsed_time": "3:45:57", "remaining_time": "7:58:17"} +{"current_steps": 2786, "total_steps": 8680, "loss": 0.7609840631484985, "lr": 1.6249779954833005e-06, "epoch": 0.6419354838709678, "percentage": 32.1, "elapsed_time": "3:46:01", "remaining_time": "7:58:10"} +{"current_steps": 2787, "total_steps": 8680, "loss": 0.7360000610351562, "lr": 1.6246805380151028e-06, "epoch": 0.6421658986175115, "percentage": 32.11, "elapsed_time": "3:46:07", "remaining_time": "7:58:08"} +{"current_steps": 2788, "total_steps": 8680, "loss": 0.7951081395149231, "lr": 1.624382989875352e-06, "epoch": 0.6423963133640553, "percentage": 32.12, "elapsed_time": "3:46:12", "remaining_time": "7:58:02"} +{"current_steps": 2789, "total_steps": 8680, "loss": 0.7273311614990234, "lr": 1.6240853511072367e-06, "epoch": 0.6426267281105991, "percentage": 32.13, "elapsed_time": "3:46:17", "remaining_time": "7:57:58"} +{"current_steps": 2790, "total_steps": 8680, "loss": 0.9270737171173096, "lr": 1.6237876217539588e-06, "epoch": 0.6428571428571429, "percentage": 32.14, "elapsed_time": "3:46:23", "remaining_time": "7:57:55"} +{"current_steps": 2791, "total_steps": 8680, "loss": 0.7624385356903076, "lr": 1.6234898018587336e-06, "epoch": 0.6430875576036866, "percentage": 32.15, "elapsed_time": "3:46:27", "remaining_time": "7:57:48"} +{"current_steps": 2792, "total_steps": 8680, "loss": 0.8266719579696655, "lr": 1.6231918914647889e-06, "epoch": 0.6433179723502304, "percentage": 32.17, "elapsed_time": "3:46:32", "remaining_time": "7:57:44"} +{"current_steps": 2793, "total_steps": 8680, "loss": 0.7606902122497559, "lr": 1.6228938906153663e-06, "epoch": 0.6435483870967742, "percentage": 32.18, "elapsed_time": "3:46:38", "remaining_time": "7:57:42"} +{"current_steps": 2794, "total_steps": 8680, "loss": 0.8239191174507141, "lr": 1.6225957993537197e-06, "epoch": 0.6437788018433179, "percentage": 32.19, "elapsed_time": "3:46:43", "remaining_time": "7:57:38"} +{"current_steps": 2795, "total_steps": 8680, "loss": 0.8313608169555664, "lr": 1.6222976177231174e-06, "epoch": 0.6440092165898618, "percentage": 32.2, "elapsed_time": "3:46:50", "remaining_time": "7:57:36"} +{"current_steps": 2796, "total_steps": 8680, "loss": 0.7725037932395935, "lr": 1.6219993457668396e-06, "epoch": 0.6442396313364055, "percentage": 32.21, "elapsed_time": "3:46:55", "remaining_time": "7:57:32"} +{"current_steps": 2797, "total_steps": 8680, "loss": 0.8791182041168213, "lr": 1.6217009835281802e-06, "epoch": 0.6444700460829493, "percentage": 32.22, "elapsed_time": "3:46:59", "remaining_time": "7:57:27"} +{"current_steps": 2798, "total_steps": 8680, "loss": 0.7157453298568726, "lr": 1.621402531050446e-06, "epoch": 0.6447004608294931, "percentage": 32.24, "elapsed_time": "3:47:05", "remaining_time": "7:57:22"} +{"current_steps": 2799, "total_steps": 8680, "loss": 0.8248307704925537, "lr": 1.621103988376957e-06, "epoch": 0.6449308755760369, "percentage": 32.25, "elapsed_time": "3:47:08", "remaining_time": "7:57:15"} +{"current_steps": 2800, "total_steps": 8680, "loss": 0.7094661593437195, "lr": 1.6208053555510467e-06, "epoch": 0.6451612903225806, "percentage": 32.26, "elapsed_time": "3:47:13", "remaining_time": "7:57:11"} +{"current_steps": 2801, "total_steps": 8680, "loss": 0.9130781888961792, "lr": 1.6205066326160605e-06, "epoch": 0.6453917050691245, "percentage": 32.27, "elapsed_time": "3:47:20", "remaining_time": "7:57:10"} +{"current_steps": 2802, "total_steps": 8680, "loss": 0.7140541076660156, "lr": 1.620207819615358e-06, "epoch": 0.6456221198156682, "percentage": 32.28, "elapsed_time": "3:47:24", "remaining_time": "7:57:04"} +{"current_steps": 2803, "total_steps": 8680, "loss": 0.8638602495193481, "lr": 1.6199089165923116e-06, "epoch": 0.645852534562212, "percentage": 32.29, "elapsed_time": "3:47:29", "remaining_time": "7:56:59"} +{"current_steps": 2804, "total_steps": 8680, "loss": 0.9055536389350891, "lr": 1.6196099235903068e-06, "epoch": 0.6460829493087558, "percentage": 32.3, "elapsed_time": "3:47:35", "remaining_time": "7:56:57"} +{"current_steps": 2805, "total_steps": 8680, "loss": 0.7694590091705322, "lr": 1.6193108406527416e-06, "epoch": 0.6463133640552995, "percentage": 32.32, "elapsed_time": "3:47:39", "remaining_time": "7:56:49"} +{"current_steps": 2806, "total_steps": 8680, "loss": 0.7512019872665405, "lr": 1.619011667823028e-06, "epoch": 0.6465437788018433, "percentage": 32.33, "elapsed_time": "3:47:44", "remaining_time": "7:56:45"} +{"current_steps": 2807, "total_steps": 8680, "loss": 0.6362565159797668, "lr": 1.6187124051445903e-06, "epoch": 0.646774193548387, "percentage": 32.34, "elapsed_time": "3:47:50", "remaining_time": "7:56:42"} +{"current_steps": 2808, "total_steps": 8680, "loss": 0.885259747505188, "lr": 1.6184130526608656e-06, "epoch": 0.6470046082949309, "percentage": 32.35, "elapsed_time": "3:47:55", "remaining_time": "7:56:37"} +{"current_steps": 2809, "total_steps": 8680, "loss": 0.7868754863739014, "lr": 1.6181136104153054e-06, "epoch": 0.6472350230414746, "percentage": 32.36, "elapsed_time": "3:48:00", "remaining_time": "7:56:32"} +{"current_steps": 2810, "total_steps": 8680, "loss": 0.889660120010376, "lr": 1.6178140784513729e-06, "epoch": 0.6474654377880185, "percentage": 32.37, "elapsed_time": "3:48:05", "remaining_time": "7:56:27"} +{"current_steps": 2811, "total_steps": 8680, "loss": 0.8460343480110168, "lr": 1.6175144568125444e-06, "epoch": 0.6476958525345622, "percentage": 32.38, "elapsed_time": "3:48:09", "remaining_time": "7:56:21"} +{"current_steps": 2812, "total_steps": 8680, "loss": 0.8729731440544128, "lr": 1.6172147455423105e-06, "epoch": 0.647926267281106, "percentage": 32.4, "elapsed_time": "3:48:13", "remaining_time": "7:56:15"} +{"current_steps": 2813, "total_steps": 8680, "loss": 0.7937173843383789, "lr": 1.616914944684173e-06, "epoch": 0.6481566820276498, "percentage": 32.41, "elapsed_time": "3:48:19", "remaining_time": "7:56:12"} +{"current_steps": 2814, "total_steps": 8680, "loss": 0.8764641284942627, "lr": 1.6166150542816483e-06, "epoch": 0.6483870967741936, "percentage": 32.42, "elapsed_time": "3:48:23", "remaining_time": "7:56:05"} +{"current_steps": 2815, "total_steps": 8680, "loss": 0.8078420758247375, "lr": 1.6163150743782645e-06, "epoch": 0.6486175115207373, "percentage": 32.43, "elapsed_time": "3:48:29", "remaining_time": "7:56:04"} +{"current_steps": 2816, "total_steps": 8680, "loss": 0.9124993085861206, "lr": 1.6160150050175636e-06, "epoch": 0.6488479262672812, "percentage": 32.44, "elapsed_time": "3:48:34", "remaining_time": "7:55:59"} +{"current_steps": 2817, "total_steps": 8680, "loss": 0.9584136009216309, "lr": 1.6157148462431003e-06, "epoch": 0.6490783410138249, "percentage": 32.45, "elapsed_time": "3:48:39", "remaining_time": "7:55:53"} +{"current_steps": 2818, "total_steps": 8680, "loss": 0.8404672145843506, "lr": 1.6154145980984422e-06, "epoch": 0.6493087557603686, "percentage": 32.47, "elapsed_time": "3:48:43", "remaining_time": "7:55:47"} +{"current_steps": 2819, "total_steps": 8680, "loss": 0.7928001880645752, "lr": 1.6151142606271695e-06, "epoch": 0.6495391705069125, "percentage": 32.48, "elapsed_time": "3:48:49", "remaining_time": "7:55:45"} +{"current_steps": 2820, "total_steps": 8680, "loss": 0.7877479791641235, "lr": 1.6148138338728766e-06, "epoch": 0.6497695852534562, "percentage": 32.49, "elapsed_time": "3:48:54", "remaining_time": "7:55:39"} +{"current_steps": 2821, "total_steps": 8680, "loss": 0.9502429366111755, "lr": 1.6145133178791695e-06, "epoch": 0.65, "percentage": 32.5, "elapsed_time": "3:48:58", "remaining_time": "7:55:33"} +{"current_steps": 2822, "total_steps": 8680, "loss": 0.7866412401199341, "lr": 1.6142127126896679e-06, "epoch": 0.6502304147465438, "percentage": 32.51, "elapsed_time": "3:49:03", "remaining_time": "7:55:30"} +{"current_steps": 2823, "total_steps": 8680, "loss": 0.8315345644950867, "lr": 1.613912018348004e-06, "epoch": 0.6504608294930876, "percentage": 32.52, "elapsed_time": "3:49:08", "remaining_time": "7:55:23"} +{"current_steps": 2824, "total_steps": 8680, "loss": 0.9718044400215149, "lr": 1.6136112348978236e-06, "epoch": 0.6506912442396313, "percentage": 32.53, "elapsed_time": "3:49:14", "remaining_time": "7:55:22"} +{"current_steps": 2825, "total_steps": 8680, "loss": 0.5874941349029541, "lr": 1.6133103623827843e-06, "epoch": 0.6509216589861752, "percentage": 32.55, "elapsed_time": "3:49:21", "remaining_time": "7:55:21"} +{"current_steps": 2826, "total_steps": 8680, "loss": 0.9498391151428223, "lr": 1.613009400846558e-06, "epoch": 0.6511520737327189, "percentage": 32.56, "elapsed_time": "3:49:25", "remaining_time": "7:55:14"} +{"current_steps": 2827, "total_steps": 8680, "loss": 0.858715295791626, "lr": 1.612708350332829e-06, "epoch": 0.6513824884792627, "percentage": 32.57, "elapsed_time": "3:49:29", "remaining_time": "7:55:08"} +{"current_steps": 2828, "total_steps": 8680, "loss": 0.8618113994598389, "lr": 1.6124072108852938e-06, "epoch": 0.6516129032258065, "percentage": 32.58, "elapsed_time": "3:49:35", "remaining_time": "7:55:04"} +{"current_steps": 2829, "total_steps": 8680, "loss": 0.8024446964263916, "lr": 1.6121059825476628e-06, "epoch": 0.6518433179723502, "percentage": 32.59, "elapsed_time": "3:49:39", "remaining_time": "7:54:59"} +{"current_steps": 2830, "total_steps": 8680, "loss": 0.8021122813224792, "lr": 1.6118046653636586e-06, "epoch": 0.652073732718894, "percentage": 32.6, "elapsed_time": "3:49:44", "remaining_time": "7:54:53"} +{"current_steps": 2831, "total_steps": 8680, "loss": 0.8092107772827148, "lr": 1.6115032593770176e-06, "epoch": 0.6523041474654377, "percentage": 32.62, "elapsed_time": "3:49:49", "remaining_time": "7:54:49"} +{"current_steps": 2832, "total_steps": 8680, "loss": 0.9842641353607178, "lr": 1.6112017646314872e-06, "epoch": 0.6525345622119816, "percentage": 32.63, "elapsed_time": "3:49:54", "remaining_time": "7:54:44"} +{"current_steps": 2833, "total_steps": 8680, "loss": 0.744353175163269, "lr": 1.6109001811708305e-06, "epoch": 0.6527649769585253, "percentage": 32.64, "elapsed_time": "3:50:00", "remaining_time": "7:54:42"} +{"current_steps": 2834, "total_steps": 8680, "loss": 0.7089616060256958, "lr": 1.6105985090388209e-06, "epoch": 0.6529953917050692, "percentage": 32.65, "elapsed_time": "3:50:05", "remaining_time": "7:54:37"} +{"current_steps": 2835, "total_steps": 8680, "loss": 0.9043736457824707, "lr": 1.610296748279246e-06, "epoch": 0.6532258064516129, "percentage": 32.66, "elapsed_time": "3:50:10", "remaining_time": "7:54:33"} +{"current_steps": 2836, "total_steps": 8680, "loss": 0.9170948266983032, "lr": 1.6099948989359061e-06, "epoch": 0.6534562211981567, "percentage": 32.67, "elapsed_time": "3:50:14", "remaining_time": "7:54:26"} +{"current_steps": 2837, "total_steps": 8680, "loss": 0.8275802135467529, "lr": 1.6096929610526145e-06, "epoch": 0.6536866359447004, "percentage": 32.68, "elapsed_time": "3:50:18", "remaining_time": "7:54:20"} +{"current_steps": 2838, "total_steps": 8680, "loss": 0.9180251955986023, "lr": 1.6093909346731965e-06, "epoch": 0.6539170506912443, "percentage": 32.7, "elapsed_time": "3:50:22", "remaining_time": "7:54:13"} +{"current_steps": 2839, "total_steps": 8680, "loss": 0.8041235208511353, "lr": 1.6090888198414908e-06, "epoch": 0.654147465437788, "percentage": 32.71, "elapsed_time": "3:50:28", "remaining_time": "7:54:11"} +{"current_steps": 2840, "total_steps": 8680, "loss": 0.7833176851272583, "lr": 1.6087866166013492e-06, "epoch": 0.6543778801843319, "percentage": 32.72, "elapsed_time": "3:50:33", "remaining_time": "7:54:05"} +{"current_steps": 2841, "total_steps": 8680, "loss": 0.838886022567749, "lr": 1.6084843249966364e-06, "epoch": 0.6546082949308756, "percentage": 32.73, "elapsed_time": "3:50:38", "remaining_time": "7:54:02"} +{"current_steps": 2842, "total_steps": 8680, "loss": 0.837687611579895, "lr": 1.6081819450712293e-06, "epoch": 0.6548387096774193, "percentage": 32.74, "elapsed_time": "3:50:42", "remaining_time": "7:53:55"} +{"current_steps": 2843, "total_steps": 8680, "loss": 0.6572843790054321, "lr": 1.607879476869018e-06, "epoch": 0.6550691244239631, "percentage": 32.75, "elapsed_time": "3:50:46", "remaining_time": "7:53:49"} +{"current_steps": 2844, "total_steps": 8680, "loss": 0.7698653936386108, "lr": 1.6075769204339053e-06, "epoch": 0.6552995391705069, "percentage": 32.76, "elapsed_time": "3:50:51", "remaining_time": "7:53:44"} +{"current_steps": 2845, "total_steps": 8680, "loss": 0.8639169335365295, "lr": 1.607274275809807e-06, "epoch": 0.6555299539170507, "percentage": 32.78, "elapsed_time": "3:50:56", "remaining_time": "7:53:39"} +{"current_steps": 2846, "total_steps": 8680, "loss": 0.837492823600769, "lr": 1.6069715430406517e-06, "epoch": 0.6557603686635944, "percentage": 32.79, "elapsed_time": "3:51:02", "remaining_time": "7:53:35"} +{"current_steps": 2847, "total_steps": 8680, "loss": 0.8824087381362915, "lr": 1.6066687221703803e-06, "epoch": 0.6559907834101383, "percentage": 32.8, "elapsed_time": "3:51:07", "remaining_time": "7:53:31"} +{"current_steps": 2848, "total_steps": 8680, "loss": 0.8161731958389282, "lr": 1.6063658132429468e-06, "epoch": 0.656221198156682, "percentage": 32.81, "elapsed_time": "3:51:11", "remaining_time": "7:53:25"} +{"current_steps": 2849, "total_steps": 8680, "loss": 0.8365877270698547, "lr": 1.6060628163023183e-06, "epoch": 0.6564516129032258, "percentage": 32.82, "elapsed_time": "3:51:15", "remaining_time": "7:53:19"} +{"current_steps": 2850, "total_steps": 8680, "loss": 0.877829909324646, "lr": 1.6057597313924745e-06, "epoch": 0.6566820276497696, "percentage": 32.83, "elapsed_time": "3:51:22", "remaining_time": "7:53:18"} +{"current_steps": 2851, "total_steps": 8680, "loss": 0.756903886795044, "lr": 1.6054565585574075e-06, "epoch": 0.6569124423963134, "percentage": 32.85, "elapsed_time": "3:51:26", "remaining_time": "7:53:11"} +{"current_steps": 2852, "total_steps": 8680, "loss": 0.7777276039123535, "lr": 1.6051532978411223e-06, "epoch": 0.6571428571428571, "percentage": 32.86, "elapsed_time": "3:51:31", "remaining_time": "7:53:07"} +{"current_steps": 2853, "total_steps": 8680, "loss": 0.9191532135009766, "lr": 1.6048499492876375e-06, "epoch": 0.6573732718894009, "percentage": 32.87, "elapsed_time": "3:51:36", "remaining_time": "7:53:03"} +{"current_steps": 2854, "total_steps": 8680, "loss": 0.7693309783935547, "lr": 1.6045465129409829e-06, "epoch": 0.6576036866359447, "percentage": 32.88, "elapsed_time": "3:51:41", "remaining_time": "7:52:57"} +{"current_steps": 2855, "total_steps": 8680, "loss": 0.7865023612976074, "lr": 1.6042429888452024e-06, "epoch": 0.6578341013824884, "percentage": 32.89, "elapsed_time": "3:51:46", "remaining_time": "7:52:52"} +{"current_steps": 2856, "total_steps": 8680, "loss": 0.844336748123169, "lr": 1.6039393770443521e-06, "epoch": 0.6580645161290323, "percentage": 32.9, "elapsed_time": "3:51:51", "remaining_time": "7:52:47"} +{"current_steps": 2857, "total_steps": 8680, "loss": 0.9590705633163452, "lr": 1.6036356775825009e-06, "epoch": 0.658294930875576, "percentage": 32.91, "elapsed_time": "3:51:55", "remaining_time": "7:52:42"} +{"current_steps": 2858, "total_steps": 8680, "loss": 0.8687748312950134, "lr": 1.6033318905037297e-06, "epoch": 0.6585253456221198, "percentage": 32.93, "elapsed_time": "3:52:00", "remaining_time": "7:52:37"} +{"current_steps": 2859, "total_steps": 8680, "loss": 0.8669745922088623, "lr": 1.6030280158521336e-06, "epoch": 0.6587557603686636, "percentage": 32.94, "elapsed_time": "3:52:05", "remaining_time": "7:52:32"} +{"current_steps": 2860, "total_steps": 8680, "loss": 0.6929436922073364, "lr": 1.6027240536718191e-06, "epoch": 0.6589861751152074, "percentage": 32.95, "elapsed_time": "3:52:10", "remaining_time": "7:52:27"} +{"current_steps": 2861, "total_steps": 8680, "loss": 0.6965433359146118, "lr": 1.6024200040069065e-06, "epoch": 0.6592165898617511, "percentage": 32.96, "elapsed_time": "3:52:15", "remaining_time": "7:52:23"} +{"current_steps": 2862, "total_steps": 8680, "loss": 0.780353307723999, "lr": 1.6021158669015273e-06, "epoch": 0.659447004608295, "percentage": 32.97, "elapsed_time": "3:52:21", "remaining_time": "7:52:20"} +{"current_steps": 2863, "total_steps": 8680, "loss": 0.685762882232666, "lr": 1.6018116423998277e-06, "epoch": 0.6596774193548387, "percentage": 32.98, "elapsed_time": "3:52:28", "remaining_time": "7:52:20"} +{"current_steps": 2864, "total_steps": 8680, "loss": 0.8249918222427368, "lr": 1.6015073305459646e-06, "epoch": 0.6599078341013825, "percentage": 33.0, "elapsed_time": "3:52:33", "remaining_time": "7:52:15"} +{"current_steps": 2865, "total_steps": 8680, "loss": 0.7327184677124023, "lr": 1.6012029313841086e-06, "epoch": 0.6601382488479263, "percentage": 33.01, "elapsed_time": "3:52:37", "remaining_time": "7:52:09"} +{"current_steps": 2866, "total_steps": 8680, "loss": 0.7785891890525818, "lr": 1.6008984449584433e-06, "epoch": 0.66036866359447, "percentage": 33.02, "elapsed_time": "3:52:42", "remaining_time": "7:52:03"} +{"current_steps": 2867, "total_steps": 8680, "loss": 0.7307751178741455, "lr": 1.600593871313164e-06, "epoch": 0.6605990783410138, "percentage": 33.03, "elapsed_time": "3:52:47", "remaining_time": "7:52:00"} +{"current_steps": 2868, "total_steps": 8680, "loss": 0.8802257180213928, "lr": 1.6002892104924796e-06, "epoch": 0.6608294930875576, "percentage": 33.04, "elapsed_time": "3:52:53", "remaining_time": "7:51:56"} +{"current_steps": 2869, "total_steps": 8680, "loss": 0.8699140548706055, "lr": 1.5999844625406106e-06, "epoch": 0.6610599078341014, "percentage": 33.05, "elapsed_time": "3:52:57", "remaining_time": "7:51:50"} +{"current_steps": 2870, "total_steps": 8680, "loss": 0.6453604102134705, "lr": 1.5996796275017914e-06, "epoch": 0.6612903225806451, "percentage": 33.06, "elapsed_time": "3:53:01", "remaining_time": "7:51:44"} +{"current_steps": 2871, "total_steps": 8680, "loss": 0.7319324016571045, "lr": 1.5993747054202682e-06, "epoch": 0.661520737327189, "percentage": 33.08, "elapsed_time": "3:53:06", "remaining_time": "7:51:40"} +{"current_steps": 2872, "total_steps": 8680, "loss": 0.8357574343681335, "lr": 1.5990696963402998e-06, "epoch": 0.6617511520737327, "percentage": 33.09, "elapsed_time": "3:53:10", "remaining_time": "7:51:32"} +{"current_steps": 2873, "total_steps": 8680, "loss": 0.7647984027862549, "lr": 1.5987646003061581e-06, "epoch": 0.6619815668202765, "percentage": 33.1, "elapsed_time": "3:53:15", "remaining_time": "7:51:27"} +{"current_steps": 2874, "total_steps": 8680, "loss": 0.8542075753211975, "lr": 1.5984594173621274e-06, "epoch": 0.6622119815668203, "percentage": 33.11, "elapsed_time": "3:53:19", "remaining_time": "7:51:22"} +{"current_steps": 2875, "total_steps": 8680, "loss": 0.7689328193664551, "lr": 1.5981541475525044e-06, "epoch": 0.6624423963133641, "percentage": 33.12, "elapsed_time": "3:53:23", "remaining_time": "7:51:15"} +{"current_steps": 2876, "total_steps": 8680, "loss": 0.7459174990653992, "lr": 1.5978487909215987e-06, "epoch": 0.6626728110599078, "percentage": 33.13, "elapsed_time": "3:53:28", "remaining_time": "7:51:09"} +{"current_steps": 2877, "total_steps": 8680, "loss": 0.8268495202064514, "lr": 1.5975433475137329e-06, "epoch": 0.6629032258064517, "percentage": 33.15, "elapsed_time": "3:53:33", "remaining_time": "7:51:06"} +{"current_steps": 2878, "total_steps": 8680, "loss": 0.8254266977310181, "lr": 1.5972378173732406e-06, "epoch": 0.6631336405529954, "percentage": 33.16, "elapsed_time": "3:53:38", "remaining_time": "7:51:01"} +{"current_steps": 2879, "total_steps": 8680, "loss": 0.8552727103233337, "lr": 1.59693220054447e-06, "epoch": 0.6633640552995391, "percentage": 33.17, "elapsed_time": "3:53:43", "remaining_time": "7:50:56"} +{"current_steps": 2880, "total_steps": 8680, "loss": 0.7196269035339355, "lr": 1.596626497071781e-06, "epoch": 0.663594470046083, "percentage": 33.18, "elapsed_time": "3:53:49", "remaining_time": "7:50:53"} +{"current_steps": 2881, "total_steps": 8680, "loss": 0.815540075302124, "lr": 1.5963207069995455e-06, "epoch": 0.6638248847926267, "percentage": 33.19, "elapsed_time": "3:53:54", "remaining_time": "7:50:48"} +{"current_steps": 2882, "total_steps": 8680, "loss": 0.8040128350257874, "lr": 1.596014830372149e-06, "epoch": 0.6640552995391705, "percentage": 33.2, "elapsed_time": "3:53:58", "remaining_time": "7:50:42"} +{"current_steps": 2883, "total_steps": 8680, "loss": 0.7990812659263611, "lr": 1.5957088672339887e-06, "epoch": 0.6642857142857143, "percentage": 33.21, "elapsed_time": "3:54:01", "remaining_time": "7:50:34"} +{"current_steps": 2884, "total_steps": 8680, "loss": 0.956179141998291, "lr": 1.5954028176294746e-06, "epoch": 0.6645161290322581, "percentage": 33.23, "elapsed_time": "3:54:07", "remaining_time": "7:50:31"} +{"current_steps": 2885, "total_steps": 8680, "loss": 0.7730144262313843, "lr": 1.5950966816030304e-06, "epoch": 0.6647465437788018, "percentage": 33.24, "elapsed_time": "3:54:12", "remaining_time": "7:50:27"} +{"current_steps": 2886, "total_steps": 8680, "loss": 0.902834415435791, "lr": 1.5947904591990904e-06, "epoch": 0.6649769585253457, "percentage": 33.25, "elapsed_time": "3:54:16", "remaining_time": "7:50:20"} +{"current_steps": 2887, "total_steps": 8680, "loss": 0.7234599590301514, "lr": 1.5944841504621027e-06, "epoch": 0.6652073732718894, "percentage": 33.26, "elapsed_time": "3:54:20", "remaining_time": "7:50:13"} +{"current_steps": 2888, "total_steps": 8680, "loss": 1.0267843008041382, "lr": 1.5941777554365271e-06, "epoch": 0.6654377880184332, "percentage": 33.27, "elapsed_time": "3:54:24", "remaining_time": "7:50:06"} +{"current_steps": 2889, "total_steps": 8680, "loss": 0.7431002855300903, "lr": 1.5938712741668376e-06, "epoch": 0.665668202764977, "percentage": 33.28, "elapsed_time": "3:54:29", "remaining_time": "7:50:02"} +{"current_steps": 2890, "total_steps": 8680, "loss": 0.7843111753463745, "lr": 1.5935647066975185e-06, "epoch": 0.6658986175115207, "percentage": 33.29, "elapsed_time": "3:54:35", "remaining_time": "7:49:59"} +{"current_steps": 2891, "total_steps": 8680, "loss": 0.8775256872177124, "lr": 1.593258053073068e-06, "epoch": 0.6661290322580645, "percentage": 33.31, "elapsed_time": "3:54:39", "remaining_time": "7:49:53"} +{"current_steps": 2892, "total_steps": 8680, "loss": 0.7861695289611816, "lr": 1.5929513133379966e-06, "epoch": 0.6663594470046083, "percentage": 33.32, "elapsed_time": "3:54:43", "remaining_time": "7:49:46"} +{"current_steps": 2893, "total_steps": 8680, "loss": 0.8721977472305298, "lr": 1.5926444875368267e-06, "epoch": 0.6665898617511521, "percentage": 33.33, "elapsed_time": "3:54:48", "remaining_time": "7:49:42"} +{"current_steps": 2894, "total_steps": 8680, "loss": 0.648263692855835, "lr": 1.5923375757140941e-06, "epoch": 0.6668202764976958, "percentage": 33.34, "elapsed_time": "3:54:53", "remaining_time": "7:49:38"} +{"current_steps": 2895, "total_steps": 8680, "loss": 0.8334729075431824, "lr": 1.592030577914347e-06, "epoch": 0.6670506912442397, "percentage": 33.35, "elapsed_time": "3:54:57", "remaining_time": "7:49:30"} +{"current_steps": 2896, "total_steps": 8680, "loss": 0.6105949878692627, "lr": 1.591723494182145e-06, "epoch": 0.6672811059907834, "percentage": 33.36, "elapsed_time": "3:55:02", "remaining_time": "7:49:26"} +{"current_steps": 2897, "total_steps": 8680, "loss": 0.7895448207855225, "lr": 1.5914163245620608e-06, "epoch": 0.6675115207373272, "percentage": 33.38, "elapsed_time": "3:55:06", "remaining_time": "7:49:19"} +{"current_steps": 2898, "total_steps": 8680, "loss": 0.8728576302528381, "lr": 1.5911090690986805e-06, "epoch": 0.667741935483871, "percentage": 33.39, "elapsed_time": "3:55:10", "remaining_time": "7:49:13"} +{"current_steps": 2899, "total_steps": 8680, "loss": 0.7637856006622314, "lr": 1.590801727836601e-06, "epoch": 0.6679723502304148, "percentage": 33.4, "elapsed_time": "3:55:17", "remaining_time": "7:49:12"} +{"current_steps": 2900, "total_steps": 8680, "loss": 0.8988397717475891, "lr": 1.590494300820433e-06, "epoch": 0.6682027649769585, "percentage": 33.41, "elapsed_time": "3:55:22", "remaining_time": "7:49:06"} +{"current_steps": 2901, "total_steps": 8680, "loss": 0.7486827373504639, "lr": 1.590186788094799e-06, "epoch": 0.6684331797235024, "percentage": 33.42, "elapsed_time": "3:55:28", "remaining_time": "7:49:04"} +{"current_steps": 2902, "total_steps": 8680, "loss": 0.8212865591049194, "lr": 1.589879189704334e-06, "epoch": 0.6686635944700461, "percentage": 33.43, "elapsed_time": "3:55:32", "remaining_time": "7:48:59"} +{"current_steps": 2903, "total_steps": 8680, "loss": 0.7421284914016724, "lr": 1.5895715056936853e-06, "epoch": 0.6688940092165898, "percentage": 33.44, "elapsed_time": "3:55:37", "remaining_time": "7:48:53"} +{"current_steps": 2904, "total_steps": 8680, "loss": 0.8721676468849182, "lr": 1.5892637361075132e-06, "epoch": 0.6691244239631337, "percentage": 33.46, "elapsed_time": "3:55:41", "remaining_time": "7:48:47"} +{"current_steps": 2905, "total_steps": 8680, "loss": 0.7265836000442505, "lr": 1.58895588099049e-06, "epoch": 0.6693548387096774, "percentage": 33.47, "elapsed_time": "3:55:48", "remaining_time": "7:48:46"} +{"current_steps": 2906, "total_steps": 8680, "loss": 0.863615870475769, "lr": 1.5886479403873e-06, "epoch": 0.6695852534562212, "percentage": 33.48, "elapsed_time": "3:55:52", "remaining_time": "7:48:39"} +{"current_steps": 2907, "total_steps": 8680, "loss": 0.8362177610397339, "lr": 1.588339914342641e-06, "epoch": 0.669815668202765, "percentage": 33.49, "elapsed_time": "3:55:56", "remaining_time": "7:48:32"} +{"current_steps": 2908, "total_steps": 8680, "loss": 0.9076892137527466, "lr": 1.5880318029012223e-06, "epoch": 0.6700460829493088, "percentage": 33.5, "elapsed_time": "3:55:59", "remaining_time": "7:48:25"} +{"current_steps": 2909, "total_steps": 8680, "loss": 0.9149065017700195, "lr": 1.5877236061077658e-06, "epoch": 0.6702764976958525, "percentage": 33.51, "elapsed_time": "3:56:04", "remaining_time": "7:48:19"} +{"current_steps": 2910, "total_steps": 8680, "loss": 0.7761013507843018, "lr": 1.5874153240070062e-06, "epoch": 0.6705069124423964, "percentage": 33.53, "elapsed_time": "3:56:08", "remaining_time": "7:48:14"} +{"current_steps": 2911, "total_steps": 8680, "loss": 0.8671830892562866, "lr": 1.5871069566436894e-06, "epoch": 0.6707373271889401, "percentage": 33.54, "elapsed_time": "3:56:14", "remaining_time": "7:48:10"} +{"current_steps": 2912, "total_steps": 8680, "loss": 0.9433870315551758, "lr": 1.5867985040625755e-06, "epoch": 0.6709677419354839, "percentage": 33.55, "elapsed_time": "3:56:17", "remaining_time": "7:48:02"} +{"current_steps": 2913, "total_steps": 8680, "loss": 0.8009352684020996, "lr": 1.5864899663084352e-06, "epoch": 0.6711981566820276, "percentage": 33.56, "elapsed_time": "3:56:23", "remaining_time": "7:47:59"} +{"current_steps": 2914, "total_steps": 8680, "loss": 0.6813808083534241, "lr": 1.5861813434260528e-06, "epoch": 0.6714285714285714, "percentage": 33.57, "elapsed_time": "3:56:29", "remaining_time": "7:47:56"} +{"current_steps": 2915, "total_steps": 8680, "loss": 0.712783932685852, "lr": 1.5858726354602248e-06, "epoch": 0.6716589861751152, "percentage": 33.58, "elapsed_time": "3:56:34", "remaining_time": "7:47:53"} +{"current_steps": 2916, "total_steps": 8680, "loss": 0.7871056795120239, "lr": 1.5855638424557588e-06, "epoch": 0.6718894009216589, "percentage": 33.59, "elapsed_time": "3:56:39", "remaining_time": "7:47:48"} +{"current_steps": 2917, "total_steps": 8680, "loss": 0.8590981960296631, "lr": 1.5852549644574766e-06, "epoch": 0.6721198156682028, "percentage": 33.61, "elapsed_time": "3:56:45", "remaining_time": "7:47:45"} +{"current_steps": 2918, "total_steps": 8680, "loss": 0.7952913641929626, "lr": 1.584946001510211e-06, "epoch": 0.6723502304147465, "percentage": 33.62, "elapsed_time": "3:56:49", "remaining_time": "7:47:39"} +{"current_steps": 2919, "total_steps": 8680, "loss": 0.8567384481430054, "lr": 1.5846369536588078e-06, "epoch": 0.6725806451612903, "percentage": 33.63, "elapsed_time": "3:56:54", "remaining_time": "7:47:33"} +{"current_steps": 2920, "total_steps": 8680, "loss": 0.859541654586792, "lr": 1.5843278209481246e-06, "epoch": 0.6728110599078341, "percentage": 33.64, "elapsed_time": "3:56:59", "remaining_time": "7:47:28"} +{"current_steps": 2921, "total_steps": 8680, "loss": 0.7843801975250244, "lr": 1.5840186034230318e-06, "epoch": 0.6730414746543779, "percentage": 33.65, "elapsed_time": "3:57:04", "remaining_time": "7:47:24"} +{"current_steps": 2922, "total_steps": 8680, "loss": 0.7448940277099609, "lr": 1.5837093011284118e-06, "epoch": 0.6732718894009216, "percentage": 33.66, "elapsed_time": "3:57:10", "remaining_time": "7:47:22"} +{"current_steps": 2923, "total_steps": 8680, "loss": 0.9325242042541504, "lr": 1.5833999141091593e-06, "epoch": 0.6735023041474655, "percentage": 33.68, "elapsed_time": "3:57:15", "remaining_time": "7:47:17"} +{"current_steps": 2924, "total_steps": 8680, "loss": 0.8005647659301758, "lr": 1.5830904424101816e-06, "epoch": 0.6737327188940092, "percentage": 33.69, "elapsed_time": "3:57:19", "remaining_time": "7:47:11"} +{"current_steps": 2925, "total_steps": 8680, "loss": 0.8897464275360107, "lr": 1.5827808860763984e-06, "epoch": 0.673963133640553, "percentage": 33.7, "elapsed_time": "3:57:24", "remaining_time": "7:47:06"} +{"current_steps": 2926, "total_steps": 8680, "loss": 0.8319039344787598, "lr": 1.5824712451527409e-06, "epoch": 0.6741935483870968, "percentage": 33.71, "elapsed_time": "3:57:28", "remaining_time": "7:47:00"} +{"current_steps": 2927, "total_steps": 8680, "loss": 0.7638111114501953, "lr": 1.5821615196841533e-06, "epoch": 0.6744239631336405, "percentage": 33.72, "elapsed_time": "3:57:33", "remaining_time": "7:46:55"} +{"current_steps": 2928, "total_steps": 8680, "loss": 0.7617092132568359, "lr": 1.581851709715592e-06, "epoch": 0.6746543778801843, "percentage": 33.73, "elapsed_time": "3:57:39", "remaining_time": "7:46:51"} +{"current_steps": 2929, "total_steps": 8680, "loss": 0.813319742679596, "lr": 1.581541815292025e-06, "epoch": 0.6748847926267281, "percentage": 33.74, "elapsed_time": "3:57:43", "remaining_time": "7:46:45"} +{"current_steps": 2930, "total_steps": 8680, "loss": 0.7495343089103699, "lr": 1.5812318364584334e-06, "epoch": 0.6751152073732719, "percentage": 33.76, "elapsed_time": "3:57:48", "remaining_time": "7:46:41"} +{"current_steps": 2931, "total_steps": 8680, "loss": 0.9064745306968689, "lr": 1.5809217732598103e-06, "epoch": 0.6753456221198156, "percentage": 33.77, "elapsed_time": "3:57:54", "remaining_time": "7:46:39"} +{"current_steps": 2932, "total_steps": 8680, "loss": 0.699098527431488, "lr": 1.580611625741161e-06, "epoch": 0.6755760368663595, "percentage": 33.78, "elapsed_time": "3:58:00", "remaining_time": "7:46:35"} +{"current_steps": 2933, "total_steps": 8680, "loss": 0.9168096780776978, "lr": 1.5803013939475025e-06, "epoch": 0.6758064516129032, "percentage": 33.79, "elapsed_time": "3:58:05", "remaining_time": "7:46:31"} +{"current_steps": 2934, "total_steps": 8680, "loss": 0.8848644495010376, "lr": 1.5799910779238652e-06, "epoch": 0.676036866359447, "percentage": 33.8, "elapsed_time": "3:58:10", "remaining_time": "7:46:25"} +{"current_steps": 2935, "total_steps": 8680, "loss": 0.7795228958129883, "lr": 1.5796806777152903e-06, "epoch": 0.6762672811059908, "percentage": 33.81, "elapsed_time": "3:58:16", "remaining_time": "7:46:24"} +{"current_steps": 2936, "total_steps": 8680, "loss": 0.9287698268890381, "lr": 1.5793701933668327e-06, "epoch": 0.6764976958525346, "percentage": 33.82, "elapsed_time": "3:58:20", "remaining_time": "7:46:17"} +{"current_steps": 2937, "total_steps": 8680, "loss": 0.8661396503448486, "lr": 1.5790596249235587e-06, "epoch": 0.6767281105990783, "percentage": 33.84, "elapsed_time": "3:58:25", "remaining_time": "7:46:12"} +{"current_steps": 2938, "total_steps": 8680, "loss": 0.7544706463813782, "lr": 1.5787489724305464e-06, "epoch": 0.6769585253456222, "percentage": 33.85, "elapsed_time": "3:58:30", "remaining_time": "7:46:08"} +{"current_steps": 2939, "total_steps": 8680, "loss": 0.8613651990890503, "lr": 1.5784382359328872e-06, "epoch": 0.6771889400921659, "percentage": 33.86, "elapsed_time": "3:58:34", "remaining_time": "7:46:02"} +{"current_steps": 2940, "total_steps": 8680, "loss": 0.8695065975189209, "lr": 1.5781274154756833e-06, "epoch": 0.6774193548387096, "percentage": 33.87, "elapsed_time": "3:58:39", "remaining_time": "7:45:57"} +{"current_steps": 2941, "total_steps": 8680, "loss": 0.9453287720680237, "lr": 1.577816511104051e-06, "epoch": 0.6776497695852535, "percentage": 33.88, "elapsed_time": "3:58:44", "remaining_time": "7:45:52"} +{"current_steps": 2942, "total_steps": 8680, "loss": 0.8599261045455933, "lr": 1.577505522863117e-06, "epoch": 0.6778801843317972, "percentage": 33.89, "elapsed_time": "3:58:49", "remaining_time": "7:45:47"} +{"current_steps": 2943, "total_steps": 8680, "loss": 0.8143391609191895, "lr": 1.5771944507980205e-06, "epoch": 0.678110599078341, "percentage": 33.91, "elapsed_time": "3:58:54", "remaining_time": "7:45:43"} +{"current_steps": 2944, "total_steps": 8680, "loss": 0.9558438062667847, "lr": 1.576883294953914e-06, "epoch": 0.6783410138248848, "percentage": 33.92, "elapsed_time": "3:58:58", "remaining_time": "7:45:36"} +{"current_steps": 2945, "total_steps": 8680, "loss": 0.7348268628120422, "lr": 1.5765720553759605e-06, "epoch": 0.6785714285714286, "percentage": 33.93, "elapsed_time": "3:59:04", "remaining_time": "7:45:34"} +{"current_steps": 2946, "total_steps": 8680, "loss": 0.9361155033111572, "lr": 1.5762607321093366e-06, "epoch": 0.6788018433179723, "percentage": 33.94, "elapsed_time": "3:59:11", "remaining_time": "7:45:32"} +{"current_steps": 2947, "total_steps": 8680, "loss": 0.8094985485076904, "lr": 1.5759493251992303e-06, "epoch": 0.6790322580645162, "percentage": 33.95, "elapsed_time": "3:59:15", "remaining_time": "7:45:26"} +{"current_steps": 2948, "total_steps": 8680, "loss": 0.8746658563613892, "lr": 1.575637834690842e-06, "epoch": 0.6792626728110599, "percentage": 33.96, "elapsed_time": "3:59:21", "remaining_time": "7:45:23"} +{"current_steps": 2949, "total_steps": 8680, "loss": 0.7433050870895386, "lr": 1.575326260629384e-06, "epoch": 0.6794930875576037, "percentage": 33.97, "elapsed_time": "3:59:26", "remaining_time": "7:45:19"} +{"current_steps": 2950, "total_steps": 8680, "loss": 0.8621053695678711, "lr": 1.5750146030600808e-06, "epoch": 0.6797235023041475, "percentage": 33.99, "elapsed_time": "3:59:31", "remaining_time": "7:45:14"} +{"current_steps": 2951, "total_steps": 8680, "loss": 0.7541971206665039, "lr": 1.5747028620281695e-06, "epoch": 0.6799539170506912, "percentage": 34.0, "elapsed_time": "3:59:35", "remaining_time": "7:45:08"} +{"current_steps": 2952, "total_steps": 8680, "loss": 0.9817987680435181, "lr": 1.5743910375788982e-06, "epoch": 0.680184331797235, "percentage": 34.01, "elapsed_time": "3:59:39", "remaining_time": "7:45:01"} +{"current_steps": 2953, "total_steps": 8680, "loss": 0.7763534188270569, "lr": 1.5740791297575283e-06, "epoch": 0.6804147465437788, "percentage": 34.02, "elapsed_time": "3:59:43", "remaining_time": "7:44:54"} +{"current_steps": 2954, "total_steps": 8680, "loss": 0.7482337355613708, "lr": 1.573767138609333e-06, "epoch": 0.6806451612903226, "percentage": 34.03, "elapsed_time": "3:59:48", "remaining_time": "7:44:49"} +{"current_steps": 2955, "total_steps": 8680, "loss": 0.7352473735809326, "lr": 1.5734550641795967e-06, "epoch": 0.6808755760368663, "percentage": 34.04, "elapsed_time": "3:59:52", "remaining_time": "7:44:43"} +{"current_steps": 2956, "total_steps": 8680, "loss": 0.8657293319702148, "lr": 1.573142906513617e-06, "epoch": 0.6811059907834102, "percentage": 34.06, "elapsed_time": "3:59:57", "remaining_time": "7:44:39"} +{"current_steps": 2957, "total_steps": 8680, "loss": 0.8035376667976379, "lr": 1.5728306656567033e-06, "epoch": 0.6813364055299539, "percentage": 34.07, "elapsed_time": "4:00:02", "remaining_time": "7:44:34"} +{"current_steps": 2958, "total_steps": 8680, "loss": 0.8030140399932861, "lr": 1.572518341654177e-06, "epoch": 0.6815668202764977, "percentage": 34.08, "elapsed_time": "4:00:06", "remaining_time": "7:44:27"} +{"current_steps": 2959, "total_steps": 8680, "loss": 0.797377347946167, "lr": 1.5722059345513711e-06, "epoch": 0.6817972350230415, "percentage": 34.09, "elapsed_time": "4:00:10", "remaining_time": "7:44:20"} +{"current_steps": 2960, "total_steps": 8680, "loss": 0.7041053175926208, "lr": 1.5718934443936311e-06, "epoch": 0.6820276497695853, "percentage": 34.1, "elapsed_time": "4:00:16", "remaining_time": "7:44:19"} +{"current_steps": 2961, "total_steps": 8680, "loss": 0.7911885976791382, "lr": 1.571580871226315e-06, "epoch": 0.682258064516129, "percentage": 34.11, "elapsed_time": "4:00:21", "remaining_time": "7:44:13"} +{"current_steps": 2962, "total_steps": 8680, "loss": 0.7908599376678467, "lr": 1.5712682150947922e-06, "epoch": 0.6824884792626729, "percentage": 34.12, "elapsed_time": "4:00:26", "remaining_time": "7:44:10"} +{"current_steps": 2963, "total_steps": 8680, "loss": 0.860281229019165, "lr": 1.5709554760444442e-06, "epoch": 0.6827188940092166, "percentage": 34.14, "elapsed_time": "4:00:30", "remaining_time": "7:44:03"} +{"current_steps": 2964, "total_steps": 8680, "loss": 0.6987707018852234, "lr": 1.5706426541206645e-06, "epoch": 0.6829493087557603, "percentage": 34.15, "elapsed_time": "4:00:35", "remaining_time": "7:43:57"} +{"current_steps": 2965, "total_steps": 8680, "loss": 0.7198495864868164, "lr": 1.5703297493688592e-06, "epoch": 0.6831797235023042, "percentage": 34.16, "elapsed_time": "4:00:39", "remaining_time": "7:43:52"} +{"current_steps": 2966, "total_steps": 8680, "loss": 0.8232598304748535, "lr": 1.5700167618344455e-06, "epoch": 0.6834101382488479, "percentage": 34.17, "elapsed_time": "4:00:45", "remaining_time": "7:43:48"} +{"current_steps": 2967, "total_steps": 8680, "loss": 0.8425456285476685, "lr": 1.569703691562854e-06, "epoch": 0.6836405529953917, "percentage": 34.18, "elapsed_time": "4:00:49", "remaining_time": "7:43:42"} +{"current_steps": 2968, "total_steps": 8680, "loss": 0.7758797407150269, "lr": 1.5693905385995252e-06, "epoch": 0.6838709677419355, "percentage": 34.19, "elapsed_time": "4:00:53", "remaining_time": "7:43:36"} +{"current_steps": 2969, "total_steps": 8680, "loss": 0.7478910684585571, "lr": 1.569077302989914e-06, "epoch": 0.6841013824884793, "percentage": 34.21, "elapsed_time": "4:00:58", "remaining_time": "7:43:32"} +{"current_steps": 2970, "total_steps": 8680, "loss": 0.8274309635162354, "lr": 1.5687639847794854e-06, "epoch": 0.684331797235023, "percentage": 34.22, "elapsed_time": "4:01:03", "remaining_time": "7:43:26"} +{"current_steps": 2971, "total_steps": 8680, "loss": 0.6800183653831482, "lr": 1.5684505840137173e-06, "epoch": 0.6845622119815669, "percentage": 34.23, "elapsed_time": "4:01:08", "remaining_time": "7:43:23"} +{"current_steps": 2972, "total_steps": 8680, "loss": 0.7768006324768066, "lr": 1.5681371007380996e-06, "epoch": 0.6847926267281106, "percentage": 34.24, "elapsed_time": "4:01:13", "remaining_time": "7:43:18"} +{"current_steps": 2973, "total_steps": 8680, "loss": 0.7462732195854187, "lr": 1.5678235349981338e-06, "epoch": 0.6850230414746544, "percentage": 34.25, "elapsed_time": "4:01:18", "remaining_time": "7:43:12"} +{"current_steps": 2974, "total_steps": 8680, "loss": 0.8461781144142151, "lr": 1.5675098868393335e-06, "epoch": 0.6852534562211982, "percentage": 34.26, "elapsed_time": "4:01:22", "remaining_time": "7:43:06"} +{"current_steps": 2975, "total_steps": 8680, "loss": 0.7968491911888123, "lr": 1.5671961563072244e-06, "epoch": 0.6854838709677419, "percentage": 34.27, "elapsed_time": "4:01:27", "remaining_time": "7:43:01"} +{"current_steps": 2976, "total_steps": 8680, "loss": 0.805394172668457, "lr": 1.5668823434473443e-06, "epoch": 0.6857142857142857, "percentage": 34.29, "elapsed_time": "4:01:32", "remaining_time": "7:42:56"} +{"current_steps": 2977, "total_steps": 8680, "loss": 0.7241736650466919, "lr": 1.5665684483052424e-06, "epoch": 0.6859447004608294, "percentage": 34.3, "elapsed_time": "4:01:36", "remaining_time": "7:42:49"} +{"current_steps": 2978, "total_steps": 8680, "loss": 0.7345866560935974, "lr": 1.5662544709264801e-06, "epoch": 0.6861751152073733, "percentage": 34.31, "elapsed_time": "4:01:41", "remaining_time": "7:42:47"} +{"current_steps": 2979, "total_steps": 8680, "loss": 0.7605085372924805, "lr": 1.5659404113566312e-06, "epoch": 0.686405529953917, "percentage": 34.32, "elapsed_time": "4:01:46", "remaining_time": "7:42:41"} +{"current_steps": 2980, "total_steps": 8680, "loss": 0.8555188179016113, "lr": 1.5656262696412808e-06, "epoch": 0.6866359447004609, "percentage": 34.33, "elapsed_time": "4:01:51", "remaining_time": "7:42:37"} +{"current_steps": 2981, "total_steps": 8680, "loss": 0.7139542698860168, "lr": 1.5653120458260261e-06, "epoch": 0.6868663594470046, "percentage": 34.34, "elapsed_time": "4:01:55", "remaining_time": "7:42:30"} +{"current_steps": 2982, "total_steps": 8680, "loss": 0.8676587343215942, "lr": 1.564997739956476e-06, "epoch": 0.6870967741935484, "percentage": 34.35, "elapsed_time": "4:02:00", "remaining_time": "7:42:24"} +{"current_steps": 2983, "total_steps": 8680, "loss": 0.8121025562286377, "lr": 1.5646833520782523e-06, "epoch": 0.6873271889400921, "percentage": 34.37, "elapsed_time": "4:02:03", "remaining_time": "7:42:17"} +{"current_steps": 2984, "total_steps": 8680, "loss": 0.7757136821746826, "lr": 1.5643688822369873e-06, "epoch": 0.687557603686636, "percentage": 34.38, "elapsed_time": "4:02:08", "remaining_time": "7:42:13"} +{"current_steps": 2985, "total_steps": 8680, "loss": 0.8357381820678711, "lr": 1.5640543304783264e-06, "epoch": 0.6877880184331797, "percentage": 34.39, "elapsed_time": "4:02:13", "remaining_time": "7:42:08"} +{"current_steps": 2986, "total_steps": 8680, "loss": 0.8635811805725098, "lr": 1.563739696847926e-06, "epoch": 0.6880184331797236, "percentage": 34.4, "elapsed_time": "4:02:17", "remaining_time": "7:42:02"} +{"current_steps": 2987, "total_steps": 8680, "loss": 0.90900057554245, "lr": 1.563424981391455e-06, "epoch": 0.6882488479262673, "percentage": 34.41, "elapsed_time": "4:02:22", "remaining_time": "7:41:56"} +{"current_steps": 2988, "total_steps": 8680, "loss": 0.9001314043998718, "lr": 1.563110184154594e-06, "epoch": 0.688479262672811, "percentage": 34.42, "elapsed_time": "4:02:25", "remaining_time": "7:41:49"} +{"current_steps": 2989, "total_steps": 8680, "loss": 0.7482000589370728, "lr": 1.5627953051830353e-06, "epoch": 0.6887096774193548, "percentage": 34.44, "elapsed_time": "4:02:32", "remaining_time": "7:41:46"} +{"current_steps": 2990, "total_steps": 8680, "loss": 0.8504235744476318, "lr": 1.5624803445224829e-06, "epoch": 0.6889400921658986, "percentage": 34.45, "elapsed_time": "4:02:36", "remaining_time": "7:41:40"} +{"current_steps": 2991, "total_steps": 8680, "loss": 0.7887089252471924, "lr": 1.5621653022186526e-06, "epoch": 0.6891705069124424, "percentage": 34.46, "elapsed_time": "4:02:40", "remaining_time": "7:41:34"} +{"current_steps": 2992, "total_steps": 8680, "loss": 0.8745719790458679, "lr": 1.5618501783172735e-06, "epoch": 0.6894009216589861, "percentage": 34.47, "elapsed_time": "4:02:46", "remaining_time": "7:41:31"} +{"current_steps": 2993, "total_steps": 8680, "loss": 0.8269633054733276, "lr": 1.5615349728640848e-06, "epoch": 0.68963133640553, "percentage": 34.48, "elapsed_time": "4:02:50", "remaining_time": "7:41:25"} +{"current_steps": 2994, "total_steps": 8680, "loss": 0.7355072498321533, "lr": 1.5612196859048382e-06, "epoch": 0.6898617511520737, "percentage": 34.49, "elapsed_time": "4:02:55", "remaining_time": "7:41:21"} +{"current_steps": 2995, "total_steps": 8680, "loss": 0.857653021812439, "lr": 1.5609043174852966e-06, "epoch": 0.6900921658986175, "percentage": 34.5, "elapsed_time": "4:02:59", "remaining_time": "7:41:14"} +{"current_steps": 2996, "total_steps": 8680, "loss": 0.8575785160064697, "lr": 1.5605888676512365e-06, "epoch": 0.6903225806451613, "percentage": 34.52, "elapsed_time": "4:03:04", "remaining_time": "7:41:10"} +{"current_steps": 2997, "total_steps": 8680, "loss": 0.8631561994552612, "lr": 1.560273336448444e-06, "epoch": 0.6905529953917051, "percentage": 34.53, "elapsed_time": "4:03:08", "remaining_time": "7:41:03"} +{"current_steps": 2998, "total_steps": 8680, "loss": 0.7993800044059753, "lr": 1.5599577239227185e-06, "epoch": 0.6907834101382488, "percentage": 34.54, "elapsed_time": "4:03:13", "remaining_time": "7:40:58"} +{"current_steps": 2999, "total_steps": 8680, "loss": 0.7961007356643677, "lr": 1.5596420301198707e-06, "epoch": 0.6910138248847926, "percentage": 34.55, "elapsed_time": "4:03:17", "remaining_time": "7:40:52"} +{"current_steps": 3000, "total_steps": 8680, "loss": 0.7536421418190002, "lr": 1.5593262550857232e-06, "epoch": 0.6912442396313364, "percentage": 34.56, "elapsed_time": "4:03:23", "remaining_time": "7:40:49"} +{"current_steps": 3001, "total_steps": 8680, "loss": 0.70341956615448, "lr": 1.55901039886611e-06, "epoch": 0.6914746543778801, "percentage": 34.57, "elapsed_time": "4:03:30", "remaining_time": "7:40:48"} +{"current_steps": 3002, "total_steps": 8680, "loss": 0.8152127265930176, "lr": 1.5586944615068776e-06, "epoch": 0.691705069124424, "percentage": 34.59, "elapsed_time": "4:03:36", "remaining_time": "7:40:45"} +{"current_steps": 3003, "total_steps": 8680, "loss": 0.6728770732879639, "lr": 1.5583784430538838e-06, "epoch": 0.6919354838709677, "percentage": 34.6, "elapsed_time": "4:03:40", "remaining_time": "7:40:39"} +{"current_steps": 3004, "total_steps": 8680, "loss": 0.8406884670257568, "lr": 1.558062343552998e-06, "epoch": 0.6921658986175115, "percentage": 34.61, "elapsed_time": "4:03:43", "remaining_time": "7:40:31"} +{"current_steps": 3005, "total_steps": 8680, "loss": 0.766754686832428, "lr": 1.5577461630501018e-06, "epoch": 0.6923963133640553, "percentage": 34.62, "elapsed_time": "4:03:47", "remaining_time": "7:40:24"} +{"current_steps": 3006, "total_steps": 8680, "loss": 0.7456642389297485, "lr": 1.5574299015910889e-06, "epoch": 0.6926267281105991, "percentage": 34.63, "elapsed_time": "4:03:52", "remaining_time": "7:40:19"} +{"current_steps": 3007, "total_steps": 8680, "loss": 0.7834097743034363, "lr": 1.557113559221863e-06, "epoch": 0.6928571428571428, "percentage": 34.64, "elapsed_time": "4:03:56", "remaining_time": "7:40:13"} +{"current_steps": 3008, "total_steps": 8680, "loss": 0.7425946593284607, "lr": 1.556797135988342e-06, "epoch": 0.6930875576036867, "percentage": 34.65, "elapsed_time": "4:04:00", "remaining_time": "7:40:07"} +{"current_steps": 3009, "total_steps": 8680, "loss": 0.7914093732833862, "lr": 1.5564806319364534e-06, "epoch": 0.6933179723502304, "percentage": 34.67, "elapsed_time": "4:04:05", "remaining_time": "7:40:01"} +{"current_steps": 3010, "total_steps": 8680, "loss": 0.819783091545105, "lr": 1.556164047112138e-06, "epoch": 0.6935483870967742, "percentage": 34.68, "elapsed_time": "4:04:09", "remaining_time": "7:39:56"} +{"current_steps": 3011, "total_steps": 8680, "loss": 0.7147302627563477, "lr": 1.5558473815613474e-06, "epoch": 0.693778801843318, "percentage": 34.69, "elapsed_time": "4:04:13", "remaining_time": "7:39:49"} +{"current_steps": 3012, "total_steps": 8680, "loss": 0.7247470617294312, "lr": 1.5555306353300452e-06, "epoch": 0.6940092165898617, "percentage": 34.7, "elapsed_time": "4:04:18", "remaining_time": "7:39:44"} +{"current_steps": 3013, "total_steps": 8680, "loss": 0.8277294635772705, "lr": 1.5552138084642067e-06, "epoch": 0.6942396313364055, "percentage": 34.71, "elapsed_time": "4:04:22", "remaining_time": "7:39:38"} +{"current_steps": 3014, "total_steps": 8680, "loss": 0.8014394640922546, "lr": 1.554896901009819e-06, "epoch": 0.6944700460829493, "percentage": 34.72, "elapsed_time": "4:04:27", "remaining_time": "7:39:33"} +{"current_steps": 3015, "total_steps": 8680, "loss": 0.7468869686126709, "lr": 1.5545799130128808e-06, "epoch": 0.6947004608294931, "percentage": 34.74, "elapsed_time": "4:04:32", "remaining_time": "7:39:28"} +{"current_steps": 3016, "total_steps": 8680, "loss": 0.7854933142662048, "lr": 1.554262844519402e-06, "epoch": 0.6949308755760368, "percentage": 34.75, "elapsed_time": "4:04:36", "remaining_time": "7:39:22"} +{"current_steps": 3017, "total_steps": 8680, "loss": 0.8359543681144714, "lr": 1.5539456955754053e-06, "epoch": 0.6951612903225807, "percentage": 34.76, "elapsed_time": "4:04:41", "remaining_time": "7:39:17"} +{"current_steps": 3018, "total_steps": 8680, "loss": 0.7767773866653442, "lr": 1.5536284662269243e-06, "epoch": 0.6953917050691244, "percentage": 34.77, "elapsed_time": "4:04:45", "remaining_time": "7:39:12"} +{"current_steps": 3019, "total_steps": 8680, "loss": 0.8388162851333618, "lr": 1.5533111565200044e-06, "epoch": 0.6956221198156682, "percentage": 34.78, "elapsed_time": "4:04:52", "remaining_time": "7:39:10"} +{"current_steps": 3020, "total_steps": 8680, "loss": 0.7791208028793335, "lr": 1.5529937665007024e-06, "epoch": 0.695852534562212, "percentage": 34.79, "elapsed_time": "4:04:57", "remaining_time": "7:39:05"} +{"current_steps": 3021, "total_steps": 8680, "loss": 0.8662698864936829, "lr": 1.5526762962150875e-06, "epoch": 0.6960829493087558, "percentage": 34.8, "elapsed_time": "4:05:03", "remaining_time": "7:39:02"} +{"current_steps": 3022, "total_steps": 8680, "loss": 0.737492024898529, "lr": 1.5523587457092394e-06, "epoch": 0.6963133640552995, "percentage": 34.82, "elapsed_time": "4:05:09", "remaining_time": "7:39:00"} +{"current_steps": 3023, "total_steps": 8680, "loss": 0.83610999584198, "lr": 1.552041115029251e-06, "epoch": 0.6965437788018434, "percentage": 34.83, "elapsed_time": "4:05:14", "remaining_time": "7:38:55"} +{"current_steps": 3024, "total_steps": 8680, "loss": 0.930977463722229, "lr": 1.5517234042212254e-06, "epoch": 0.6967741935483871, "percentage": 34.84, "elapsed_time": "4:05:19", "remaining_time": "7:38:50"} +{"current_steps": 3025, "total_steps": 8680, "loss": 0.7587058544158936, "lr": 1.551405613331278e-06, "epoch": 0.6970046082949308, "percentage": 34.85, "elapsed_time": "4:05:24", "remaining_time": "7:38:46"} +{"current_steps": 3026, "total_steps": 8680, "loss": 0.7549247741699219, "lr": 1.551087742405536e-06, "epoch": 0.6972350230414747, "percentage": 34.86, "elapsed_time": "4:05:29", "remaining_time": "7:38:41"} +{"current_steps": 3027, "total_steps": 8680, "loss": 0.6906812787055969, "lr": 1.5507697914901376e-06, "epoch": 0.6974654377880184, "percentage": 34.87, "elapsed_time": "4:05:33", "remaining_time": "7:38:35"} +{"current_steps": 3028, "total_steps": 8680, "loss": 0.7806124687194824, "lr": 1.5504517606312332e-06, "epoch": 0.6976958525345622, "percentage": 34.88, "elapsed_time": "4:05:40", "remaining_time": "7:38:33"} +{"current_steps": 3029, "total_steps": 8680, "loss": 0.8091036081314087, "lr": 1.5501336498749846e-06, "epoch": 0.697926267281106, "percentage": 34.9, "elapsed_time": "4:05:44", "remaining_time": "7:38:27"} +{"current_steps": 3030, "total_steps": 8680, "loss": 0.721937894821167, "lr": 1.5498154592675646e-06, "epoch": 0.6981566820276498, "percentage": 34.91, "elapsed_time": "4:05:50", "remaining_time": "7:38:24"} +{"current_steps": 3031, "total_steps": 8680, "loss": 0.712378740310669, "lr": 1.5494971888551587e-06, "epoch": 0.6983870967741935, "percentage": 34.92, "elapsed_time": "4:05:54", "remaining_time": "7:38:17"} +{"current_steps": 3032, "total_steps": 8680, "loss": 0.8106495141983032, "lr": 1.5491788386839635e-06, "epoch": 0.6986175115207374, "percentage": 34.93, "elapsed_time": "4:05:58", "remaining_time": "7:38:12"} +{"current_steps": 3033, "total_steps": 8680, "loss": 0.7886521816253662, "lr": 1.5488604088001866e-06, "epoch": 0.6988479262672811, "percentage": 34.94, "elapsed_time": "4:06:04", "remaining_time": "7:38:09"} +{"current_steps": 3034, "total_steps": 8680, "loss": 0.7483402490615845, "lr": 1.5485418992500479e-06, "epoch": 0.6990783410138249, "percentage": 34.95, "elapsed_time": "4:06:09", "remaining_time": "7:38:05"} +{"current_steps": 3035, "total_steps": 8680, "loss": 0.6236725449562073, "lr": 1.5482233100797788e-06, "epoch": 0.6993087557603687, "percentage": 34.97, "elapsed_time": "4:06:14", "remaining_time": "7:37:59"} +{"current_steps": 3036, "total_steps": 8680, "loss": 0.9477910995483398, "lr": 1.5479046413356222e-06, "epoch": 0.6995391705069124, "percentage": 34.98, "elapsed_time": "4:06:18", "remaining_time": "7:37:53"} +{"current_steps": 3037, "total_steps": 8680, "loss": 0.8921213746070862, "lr": 1.5475858930638322e-06, "epoch": 0.6997695852534562, "percentage": 34.99, "elapsed_time": "4:06:23", "remaining_time": "7:37:49"} +{"current_steps": 3038, "total_steps": 8680, "loss": 0.7460963726043701, "lr": 1.5472670653106744e-06, "epoch": 0.7, "percentage": 35.0, "elapsed_time": "4:06:29", "remaining_time": "7:37:45"} +{"current_steps": 3039, "total_steps": 8680, "loss": 0.6135849356651306, "lr": 1.5469481581224271e-06, "epoch": 0.7002304147465438, "percentage": 35.01, "elapsed_time": "4:06:34", "remaining_time": "7:37:42"} +{"current_steps": 3040, "total_steps": 8680, "loss": 0.8039313554763794, "lr": 1.546629171545378e-06, "epoch": 0.7004608294930875, "percentage": 35.02, "elapsed_time": "4:06:38", "remaining_time": "7:37:35"} +{"current_steps": 3041, "total_steps": 8680, "loss": 0.8751651048660278, "lr": 1.5463101056258289e-06, "epoch": 0.7006912442396314, "percentage": 35.03, "elapsed_time": "4:06:42", "remaining_time": "7:37:29"} +{"current_steps": 3042, "total_steps": 8680, "loss": 0.7600879669189453, "lr": 1.545990960410091e-06, "epoch": 0.7009216589861751, "percentage": 35.05, "elapsed_time": "4:06:48", "remaining_time": "7:37:25"} +{"current_steps": 3043, "total_steps": 8680, "loss": 0.8118841648101807, "lr": 1.545671735944488e-06, "epoch": 0.7011520737327189, "percentage": 35.06, "elapsed_time": "4:06:52", "remaining_time": "7:37:19"} +{"current_steps": 3044, "total_steps": 8680, "loss": 0.7144184112548828, "lr": 1.5453524322753546e-06, "epoch": 0.7013824884792627, "percentage": 35.07, "elapsed_time": "4:06:58", "remaining_time": "7:37:16"} +{"current_steps": 3045, "total_steps": 8680, "loss": 0.9730075001716614, "lr": 1.545033049449038e-06, "epoch": 0.7016129032258065, "percentage": 35.08, "elapsed_time": "4:07:02", "remaining_time": "7:37:10"} +{"current_steps": 3046, "total_steps": 8680, "loss": 0.6930910348892212, "lr": 1.5447135875118957e-06, "epoch": 0.7018433179723502, "percentage": 35.09, "elapsed_time": "4:07:07", "remaining_time": "7:37:05"} +{"current_steps": 3047, "total_steps": 8680, "loss": 0.8517031669616699, "lr": 1.5443940465102973e-06, "epoch": 0.7020737327188941, "percentage": 35.1, "elapsed_time": "4:07:12", "remaining_time": "7:37:00"} +{"current_steps": 3048, "total_steps": 8680, "loss": 0.7939779758453369, "lr": 1.5440744264906237e-06, "epoch": 0.7023041474654378, "percentage": 35.12, "elapsed_time": "4:07:16", "remaining_time": "7:36:54"} +{"current_steps": 3049, "total_steps": 8680, "loss": 0.8946782350540161, "lr": 1.5437547274992672e-06, "epoch": 0.7025345622119815, "percentage": 35.13, "elapsed_time": "4:07:21", "remaining_time": "7:36:48"} +{"current_steps": 3050, "total_steps": 8680, "loss": 0.9273954033851624, "lr": 1.543434949582632e-06, "epoch": 0.7027649769585254, "percentage": 35.14, "elapsed_time": "4:07:24", "remaining_time": "7:36:41"} +{"current_steps": 3051, "total_steps": 8680, "loss": 0.7731457352638245, "lr": 1.5431150927871333e-06, "epoch": 0.7029953917050691, "percentage": 35.15, "elapsed_time": "4:07:31", "remaining_time": "7:36:40"} +{"current_steps": 3052, "total_steps": 8680, "loss": 0.7982608079910278, "lr": 1.542795157159198e-06, "epoch": 0.7032258064516129, "percentage": 35.16, "elapsed_time": "4:07:35", "remaining_time": "7:36:34"} +{"current_steps": 3053, "total_steps": 8680, "loss": 0.8422989845275879, "lr": 1.542475142745264e-06, "epoch": 0.7034562211981567, "percentage": 35.17, "elapsed_time": "4:07:40", "remaining_time": "7:36:29"} +{"current_steps": 3054, "total_steps": 8680, "loss": 0.8344876766204834, "lr": 1.542155049591781e-06, "epoch": 0.7036866359447005, "percentage": 35.18, "elapsed_time": "4:07:46", "remaining_time": "7:36:25"} +{"current_steps": 3055, "total_steps": 8680, "loss": 0.8830629587173462, "lr": 1.541834877745211e-06, "epoch": 0.7039170506912442, "percentage": 35.2, "elapsed_time": "4:07:49", "remaining_time": "7:36:19"} +{"current_steps": 3056, "total_steps": 8680, "loss": 0.823864221572876, "lr": 1.5415146272520247e-06, "epoch": 0.7041474654377881, "percentage": 35.21, "elapsed_time": "4:07:54", "remaining_time": "7:36:13"} +{"current_steps": 3057, "total_steps": 8680, "loss": 0.8577016592025757, "lr": 1.5411942981587077e-06, "epoch": 0.7043778801843318, "percentage": 35.22, "elapsed_time": "4:07:57", "remaining_time": "7:36:06"} +{"current_steps": 3058, "total_steps": 8680, "loss": 0.7431750297546387, "lr": 1.540873890511755e-06, "epoch": 0.7046082949308756, "percentage": 35.23, "elapsed_time": "4:08:02", "remaining_time": "7:36:00"} +{"current_steps": 3059, "total_steps": 8680, "loss": 0.8219394683837891, "lr": 1.5405534043576729e-06, "epoch": 0.7048387096774194, "percentage": 35.24, "elapsed_time": "4:08:06", "remaining_time": "7:35:54"} +{"current_steps": 3060, "total_steps": 8680, "loss": 0.706437349319458, "lr": 1.5402328397429795e-06, "epoch": 0.7050691244239631, "percentage": 35.25, "elapsed_time": "4:08:12", "remaining_time": "7:35:52"} +{"current_steps": 3061, "total_steps": 8680, "loss": 0.8669443130493164, "lr": 1.5399121967142051e-06, "epoch": 0.7052995391705069, "percentage": 35.26, "elapsed_time": "4:08:16", "remaining_time": "7:35:45"} +{"current_steps": 3062, "total_steps": 8680, "loss": 0.7995564937591553, "lr": 1.5395914753178897e-06, "epoch": 0.7055299539170506, "percentage": 35.28, "elapsed_time": "4:08:21", "remaining_time": "7:35:40"} +{"current_steps": 3063, "total_steps": 8680, "loss": 0.7840889692306519, "lr": 1.5392706756005862e-06, "epoch": 0.7057603686635945, "percentage": 35.29, "elapsed_time": "4:08:28", "remaining_time": "7:35:39"} +{"current_steps": 3064, "total_steps": 8680, "loss": 0.8231604695320129, "lr": 1.5389497976088582e-06, "epoch": 0.7059907834101382, "percentage": 35.3, "elapsed_time": "4:08:33", "remaining_time": "7:35:35"} +{"current_steps": 3065, "total_steps": 8680, "loss": 0.7821571826934814, "lr": 1.5386288413892801e-06, "epoch": 0.706221198156682, "percentage": 35.31, "elapsed_time": "4:08:38", "remaining_time": "7:35:29"} +{"current_steps": 3066, "total_steps": 8680, "loss": 0.736830472946167, "lr": 1.538307806988439e-06, "epoch": 0.7064516129032258, "percentage": 35.32, "elapsed_time": "4:08:42", "remaining_time": "7:35:23"} +{"current_steps": 3067, "total_steps": 8680, "loss": 0.7783113718032837, "lr": 1.537986694452932e-06, "epoch": 0.7066820276497696, "percentage": 35.33, "elapsed_time": "4:08:46", "remaining_time": "7:35:18"} +{"current_steps": 3068, "total_steps": 8680, "loss": 0.8000421524047852, "lr": 1.5376655038293692e-06, "epoch": 0.7069124423963133, "percentage": 35.35, "elapsed_time": "4:08:52", "remaining_time": "7:35:14"} +{"current_steps": 3069, "total_steps": 8680, "loss": 0.7446980476379395, "lr": 1.5373442351643696e-06, "epoch": 0.7071428571428572, "percentage": 35.36, "elapsed_time": "4:08:57", "remaining_time": "7:35:09"} +{"current_steps": 3070, "total_steps": 8680, "loss": 0.7018321752548218, "lr": 1.537022888504566e-06, "epoch": 0.7073732718894009, "percentage": 35.37, "elapsed_time": "4:09:01", "remaining_time": "7:35:04"} +{"current_steps": 3071, "total_steps": 8680, "loss": 0.6903716325759888, "lr": 1.5367014638966008e-06, "epoch": 0.7076036866359448, "percentage": 35.38, "elapsed_time": "4:09:05", "remaining_time": "7:34:57"} +{"current_steps": 3072, "total_steps": 8680, "loss": 0.9635254144668579, "lr": 1.5363799613871289e-06, "epoch": 0.7078341013824885, "percentage": 35.39, "elapsed_time": "4:09:09", "remaining_time": "7:34:50"} +{"current_steps": 3073, "total_steps": 8680, "loss": 0.8612154722213745, "lr": 1.5360583810228156e-06, "epoch": 0.7080645161290322, "percentage": 35.4, "elapsed_time": "4:09:14", "remaining_time": "7:34:46"} +{"current_steps": 3074, "total_steps": 8680, "loss": 0.8632407784461975, "lr": 1.5357367228503376e-06, "epoch": 0.708294930875576, "percentage": 35.41, "elapsed_time": "4:09:19", "remaining_time": "7:34:41"} +{"current_steps": 3075, "total_steps": 8680, "loss": 0.8117856979370117, "lr": 1.5354149869163839e-06, "epoch": 0.7085253456221198, "percentage": 35.43, "elapsed_time": "4:09:23", "remaining_time": "7:34:35"} +{"current_steps": 3076, "total_steps": 8680, "loss": 0.8062559366226196, "lr": 1.5350931732676538e-06, "epoch": 0.7087557603686636, "percentage": 35.44, "elapsed_time": "4:09:29", "remaining_time": "7:34:31"} +{"current_steps": 3077, "total_steps": 8680, "loss": 0.7918965816497803, "lr": 1.5347712819508576e-06, "epoch": 0.7089861751152073, "percentage": 35.45, "elapsed_time": "4:09:34", "remaining_time": "7:34:26"} +{"current_steps": 3078, "total_steps": 8680, "loss": 0.7564986944198608, "lr": 1.534449313012718e-06, "epoch": 0.7092165898617512, "percentage": 35.46, "elapsed_time": "4:09:38", "remaining_time": "7:34:21"} +{"current_steps": 3079, "total_steps": 8680, "loss": 0.8261928558349609, "lr": 1.534127266499968e-06, "epoch": 0.7094470046082949, "percentage": 35.47, "elapsed_time": "4:09:44", "remaining_time": "7:34:18"} +{"current_steps": 3080, "total_steps": 8680, "loss": 0.705269455909729, "lr": 1.5338051424593524e-06, "epoch": 0.7096774193548387, "percentage": 35.48, "elapsed_time": "4:09:49", "remaining_time": "7:34:13"} +{"current_steps": 3081, "total_steps": 8680, "loss": 0.823144793510437, "lr": 1.5334829409376271e-06, "epoch": 0.7099078341013825, "percentage": 35.5, "elapsed_time": "4:09:53", "remaining_time": "7:34:08"} +{"current_steps": 3082, "total_steps": 8680, "loss": 0.7772066593170166, "lr": 1.5331606619815588e-06, "epoch": 0.7101382488479263, "percentage": 35.51, "elapsed_time": "4:09:58", "remaining_time": "7:34:02"} +{"current_steps": 3083, "total_steps": 8680, "loss": 0.8901097178459167, "lr": 1.5328383056379265e-06, "epoch": 0.71036866359447, "percentage": 35.52, "elapsed_time": "4:10:02", "remaining_time": "7:33:56"} +{"current_steps": 3084, "total_steps": 8680, "loss": 0.8454819917678833, "lr": 1.5325158719535196e-06, "epoch": 0.7105990783410139, "percentage": 35.53, "elapsed_time": "4:10:06", "remaining_time": "7:33:48"} +{"current_steps": 3085, "total_steps": 8680, "loss": 0.8444693684577942, "lr": 1.5321933609751388e-06, "epoch": 0.7108294930875576, "percentage": 35.54, "elapsed_time": "4:10:09", "remaining_time": "7:33:42"} +{"current_steps": 3086, "total_steps": 8680, "loss": 0.7893826961517334, "lr": 1.5318707727495964e-06, "epoch": 0.7110599078341013, "percentage": 35.55, "elapsed_time": "4:10:14", "remaining_time": "7:33:37"} +{"current_steps": 3087, "total_steps": 8680, "loss": 0.7536686658859253, "lr": 1.531548107323715e-06, "epoch": 0.7112903225806452, "percentage": 35.56, "elapsed_time": "4:10:20", "remaining_time": "7:33:33"} +{"current_steps": 3088, "total_steps": 8680, "loss": 0.8105358481407166, "lr": 1.53122536474433e-06, "epoch": 0.7115207373271889, "percentage": 35.58, "elapsed_time": "4:10:25", "remaining_time": "7:33:29"} +{"current_steps": 3089, "total_steps": 8680, "loss": 0.8104212284088135, "lr": 1.530902545058286e-06, "epoch": 0.7117511520737327, "percentage": 35.59, "elapsed_time": "4:10:30", "remaining_time": "7:33:25"} +{"current_steps": 3090, "total_steps": 8680, "loss": 0.7738373279571533, "lr": 1.5305796483124405e-06, "epoch": 0.7119815668202765, "percentage": 35.6, "elapsed_time": "4:10:35", "remaining_time": "7:33:20"} +{"current_steps": 3091, "total_steps": 8680, "loss": 0.7583746910095215, "lr": 1.5302566745536618e-06, "epoch": 0.7122119815668203, "percentage": 35.61, "elapsed_time": "4:10:41", "remaining_time": "7:33:16"} +{"current_steps": 3092, "total_steps": 8680, "loss": 0.8370871543884277, "lr": 1.5299336238288286e-06, "epoch": 0.712442396313364, "percentage": 35.62, "elapsed_time": "4:10:46", "remaining_time": "7:33:12"} +{"current_steps": 3093, "total_steps": 8680, "loss": 0.7833988666534424, "lr": 1.5296104961848314e-06, "epoch": 0.7126728110599079, "percentage": 35.63, "elapsed_time": "4:10:50", "remaining_time": "7:33:06"} +{"current_steps": 3094, "total_steps": 8680, "loss": 0.8024515509605408, "lr": 1.5292872916685717e-06, "epoch": 0.7129032258064516, "percentage": 35.65, "elapsed_time": "4:10:55", "remaining_time": "7:33:02"} +{"current_steps": 3095, "total_steps": 8680, "loss": 0.8044738173484802, "lr": 1.5289640103269623e-06, "epoch": 0.7131336405529954, "percentage": 35.66, "elapsed_time": "4:11:02", "remaining_time": "7:32:59"} +{"current_steps": 3096, "total_steps": 8680, "loss": 0.7783721685409546, "lr": 1.5286406522069273e-06, "epoch": 0.7133640552995392, "percentage": 35.67, "elapsed_time": "4:11:06", "remaining_time": "7:32:54"} +{"current_steps": 3097, "total_steps": 8680, "loss": 0.693443238735199, "lr": 1.5283172173554014e-06, "epoch": 0.7135944700460829, "percentage": 35.68, "elapsed_time": "4:11:11", "remaining_time": "7:32:49"} +{"current_steps": 3098, "total_steps": 8680, "loss": 0.8142237663269043, "lr": 1.527993705819331e-06, "epoch": 0.7138248847926267, "percentage": 35.69, "elapsed_time": "4:11:17", "remaining_time": "7:32:46"} +{"current_steps": 3099, "total_steps": 8680, "loss": 0.790626049041748, "lr": 1.5276701176456726e-06, "epoch": 0.7140552995391705, "percentage": 35.7, "elapsed_time": "4:11:22", "remaining_time": "7:32:41"} +{"current_steps": 3100, "total_steps": 8680, "loss": 0.9460805654525757, "lr": 1.5273464528813953e-06, "epoch": 0.7142857142857143, "percentage": 35.71, "elapsed_time": "4:11:25", "remaining_time": "7:32:34"} +{"current_steps": 3101, "total_steps": 8680, "loss": 0.6906337738037109, "lr": 1.5270227115734789e-06, "epoch": 0.714516129032258, "percentage": 35.73, "elapsed_time": "4:11:33", "remaining_time": "7:32:35"} +{"current_steps": 3102, "total_steps": 8680, "loss": 0.8828556537628174, "lr": 1.526698893768913e-06, "epoch": 0.7147465437788019, "percentage": 35.74, "elapsed_time": "4:11:38", "remaining_time": "7:32:30"} +{"current_steps": 3103, "total_steps": 8680, "loss": 0.8395771980285645, "lr": 1.5263749995147004e-06, "epoch": 0.7149769585253456, "percentage": 35.75, "elapsed_time": "4:11:42", "remaining_time": "7:32:24"} +{"current_steps": 3104, "total_steps": 8680, "loss": 0.7103895545005798, "lr": 1.5260510288578535e-06, "epoch": 0.7152073732718894, "percentage": 35.76, "elapsed_time": "4:11:47", "remaining_time": "7:32:18"} +{"current_steps": 3105, "total_steps": 8680, "loss": 0.9780298471450806, "lr": 1.5257269818453956e-06, "epoch": 0.7154377880184332, "percentage": 35.77, "elapsed_time": "4:11:50", "remaining_time": "7:32:11"} +{"current_steps": 3106, "total_steps": 8680, "loss": 0.8176128268241882, "lr": 1.525402858524363e-06, "epoch": 0.715668202764977, "percentage": 35.78, "elapsed_time": "4:11:56", "remaining_time": "7:32:08"} +{"current_steps": 3107, "total_steps": 8680, "loss": 0.6766567230224609, "lr": 1.5250786589418008e-06, "epoch": 0.7158986175115207, "percentage": 35.79, "elapsed_time": "4:12:02", "remaining_time": "7:32:04"} +{"current_steps": 3108, "total_steps": 8680, "loss": 0.7910950183868408, "lr": 1.5247543831447662e-06, "epoch": 0.7161290322580646, "percentage": 35.81, "elapsed_time": "4:12:07", "remaining_time": "7:32:00"} +{"current_steps": 3109, "total_steps": 8680, "loss": 0.8444501161575317, "lr": 1.5244300311803275e-06, "epoch": 0.7163594470046083, "percentage": 35.82, "elapsed_time": "4:12:12", "remaining_time": "7:31:55"} +{"current_steps": 3110, "total_steps": 8680, "loss": 0.7180038690567017, "lr": 1.5241056030955642e-06, "epoch": 0.716589861751152, "percentage": 35.83, "elapsed_time": "4:12:16", "remaining_time": "7:31:49"} +{"current_steps": 3111, "total_steps": 8680, "loss": 0.8563181757926941, "lr": 1.5237810989375663e-06, "epoch": 0.7168202764976959, "percentage": 35.84, "elapsed_time": "4:12:23", "remaining_time": "7:31:47"} +{"current_steps": 3112, "total_steps": 8680, "loss": 0.7792840003967285, "lr": 1.5234565187534353e-06, "epoch": 0.7170506912442396, "percentage": 35.85, "elapsed_time": "4:12:27", "remaining_time": "7:31:42"} +{"current_steps": 3113, "total_steps": 8680, "loss": 0.8414837121963501, "lr": 1.5231318625902835e-06, "epoch": 0.7172811059907834, "percentage": 35.86, "elapsed_time": "4:12:32", "remaining_time": "7:31:36"} +{"current_steps": 3114, "total_steps": 8680, "loss": 0.8549888134002686, "lr": 1.5228071304952348e-06, "epoch": 0.7175115207373272, "percentage": 35.88, "elapsed_time": "4:12:35", "remaining_time": "7:31:29"} +{"current_steps": 3115, "total_steps": 8680, "loss": 0.7973321676254272, "lr": 1.5224823225154228e-06, "epoch": 0.717741935483871, "percentage": 35.89, "elapsed_time": "4:12:40", "remaining_time": "7:31:23"} +{"current_steps": 3116, "total_steps": 8680, "loss": 0.7328228950500488, "lr": 1.5221574386979937e-06, "epoch": 0.7179723502304147, "percentage": 35.9, "elapsed_time": "4:12:45", "remaining_time": "7:31:19"} +{"current_steps": 3117, "total_steps": 8680, "loss": 0.8953883051872253, "lr": 1.5218324790901033e-06, "epoch": 0.7182027649769586, "percentage": 35.91, "elapsed_time": "4:12:50", "remaining_time": "7:31:14"} +{"current_steps": 3118, "total_steps": 8680, "loss": 0.7804527282714844, "lr": 1.5215074437389195e-06, "epoch": 0.7184331797235023, "percentage": 35.92, "elapsed_time": "4:12:54", "remaining_time": "7:31:08"} +{"current_steps": 3119, "total_steps": 8680, "loss": 0.7581363320350647, "lr": 1.5211823326916204e-06, "epoch": 0.7186635944700461, "percentage": 35.93, "elapsed_time": "4:12:59", "remaining_time": "7:31:04"} +{"current_steps": 3120, "total_steps": 8680, "loss": 0.7720214128494263, "lr": 1.520857145995396e-06, "epoch": 0.7188940092165899, "percentage": 35.94, "elapsed_time": "4:13:05", "remaining_time": "7:31:00"} +{"current_steps": 3121, "total_steps": 8680, "loss": 0.7142826914787292, "lr": 1.5205318836974463e-06, "epoch": 0.7191244239631336, "percentage": 35.96, "elapsed_time": "4:13:10", "remaining_time": "7:30:55"} +{"current_steps": 3122, "total_steps": 8680, "loss": 0.715612530708313, "lr": 1.520206545844983e-06, "epoch": 0.7193548387096774, "percentage": 35.97, "elapsed_time": "4:13:15", "remaining_time": "7:30:51"} +{"current_steps": 3123, "total_steps": 8680, "loss": 0.8851219415664673, "lr": 1.5198811324852277e-06, "epoch": 0.7195852534562212, "percentage": 35.98, "elapsed_time": "4:13:19", "remaining_time": "7:30:45"} +{"current_steps": 3124, "total_steps": 8680, "loss": 0.981631875038147, "lr": 1.5195556436654146e-06, "epoch": 0.719815668202765, "percentage": 35.99, "elapsed_time": "4:13:22", "remaining_time": "7:30:38"} +{"current_steps": 3125, "total_steps": 8680, "loss": 0.8586313724517822, "lr": 1.5192300794327876e-06, "epoch": 0.7200460829493087, "percentage": 36.0, "elapsed_time": "4:13:28", "remaining_time": "7:30:34"} +{"current_steps": 3126, "total_steps": 8680, "loss": 0.8863250017166138, "lr": 1.518904439834602e-06, "epoch": 0.7202764976958526, "percentage": 36.01, "elapsed_time": "4:13:33", "remaining_time": "7:30:30"} +{"current_steps": 3127, "total_steps": 8680, "loss": 0.864910900592804, "lr": 1.5185787249181239e-06, "epoch": 0.7205069124423963, "percentage": 36.03, "elapsed_time": "4:13:39", "remaining_time": "7:30:27"} +{"current_steps": 3128, "total_steps": 8680, "loss": 0.8120951652526855, "lr": 1.5182529347306302e-06, "epoch": 0.7207373271889401, "percentage": 36.04, "elapsed_time": "4:13:43", "remaining_time": "7:30:20"} +{"current_steps": 3129, "total_steps": 8680, "loss": 0.7866026163101196, "lr": 1.517927069319409e-06, "epoch": 0.7209677419354839, "percentage": 36.05, "elapsed_time": "4:13:48", "remaining_time": "7:30:15"} +{"current_steps": 3130, "total_steps": 8680, "loss": 0.8610655069351196, "lr": 1.5176011287317598e-06, "epoch": 0.7211981566820277, "percentage": 36.06, "elapsed_time": "4:13:53", "remaining_time": "7:30:11"} +{"current_steps": 3131, "total_steps": 8680, "loss": 0.7463846206665039, "lr": 1.5172751130149915e-06, "epoch": 0.7214285714285714, "percentage": 36.07, "elapsed_time": "4:13:59", "remaining_time": "7:30:07"} +{"current_steps": 3132, "total_steps": 8680, "loss": 0.6578936576843262, "lr": 1.5169490222164254e-06, "epoch": 0.7216589861751153, "percentage": 36.08, "elapsed_time": "4:14:03", "remaining_time": "7:30:02"} +{"current_steps": 3133, "total_steps": 8680, "loss": 0.6849668025970459, "lr": 1.516622856383393e-06, "epoch": 0.721889400921659, "percentage": 36.09, "elapsed_time": "4:14:09", "remaining_time": "7:29:59"} +{"current_steps": 3134, "total_steps": 8680, "loss": 0.9549611806869507, "lr": 1.5162966155632372e-06, "epoch": 0.7221198156682027, "percentage": 36.11, "elapsed_time": "4:14:13", "remaining_time": "7:29:53"} +{"current_steps": 3135, "total_steps": 8680, "loss": 0.8005616664886475, "lr": 1.5159702998033113e-06, "epoch": 0.7223502304147466, "percentage": 36.12, "elapsed_time": "4:14:17", "remaining_time": "7:29:45"} +{"current_steps": 3136, "total_steps": 8680, "loss": 0.8980830311775208, "lr": 1.5156439091509793e-06, "epoch": 0.7225806451612903, "percentage": 36.13, "elapsed_time": "4:14:22", "remaining_time": "7:29:42"} +{"current_steps": 3137, "total_steps": 8680, "loss": 0.8247464895248413, "lr": 1.5153174436536166e-06, "epoch": 0.7228110599078341, "percentage": 36.14, "elapsed_time": "4:14:27", "remaining_time": "7:29:36"} +{"current_steps": 3138, "total_steps": 8680, "loss": 0.818629264831543, "lr": 1.5149909033586088e-06, "epoch": 0.7230414746543778, "percentage": 36.15, "elapsed_time": "4:14:33", "remaining_time": "7:29:33"} +{"current_steps": 3139, "total_steps": 8680, "loss": 0.8928704261779785, "lr": 1.5146642883133532e-06, "epoch": 0.7232718894009217, "percentage": 36.16, "elapsed_time": "4:14:38", "remaining_time": "7:29:29"} +{"current_steps": 3140, "total_steps": 8680, "loss": 0.9330282807350159, "lr": 1.5143375985652576e-06, "epoch": 0.7235023041474654, "percentage": 36.18, "elapsed_time": "4:14:41", "remaining_time": "7:29:22"} +{"current_steps": 3141, "total_steps": 8680, "loss": 0.7961822748184204, "lr": 1.5140108341617405e-06, "epoch": 0.7237327188940093, "percentage": 36.19, "elapsed_time": "4:14:46", "remaining_time": "7:29:16"} +{"current_steps": 3142, "total_steps": 8680, "loss": 0.8073769807815552, "lr": 1.513683995150231e-06, "epoch": 0.723963133640553, "percentage": 36.2, "elapsed_time": "4:14:51", "remaining_time": "7:29:11"} +{"current_steps": 3143, "total_steps": 8680, "loss": 0.946292519569397, "lr": 1.51335708157817e-06, "epoch": 0.7241935483870968, "percentage": 36.21, "elapsed_time": "4:14:55", "remaining_time": "7:29:06"} +{"current_steps": 3144, "total_steps": 8680, "loss": 0.806084156036377, "lr": 1.513030093493008e-06, "epoch": 0.7244239631336405, "percentage": 36.22, "elapsed_time": "4:15:02", "remaining_time": "7:29:04"} +{"current_steps": 3145, "total_steps": 8680, "loss": 0.8804534673690796, "lr": 1.5127030309422072e-06, "epoch": 0.7246543778801844, "percentage": 36.23, "elapsed_time": "4:15:08", "remaining_time": "7:29:01"} +{"current_steps": 3146, "total_steps": 8680, "loss": 0.7489848136901855, "lr": 1.51237589397324e-06, "epoch": 0.7248847926267281, "percentage": 36.24, "elapsed_time": "4:15:11", "remaining_time": "7:28:53"} +{"current_steps": 3147, "total_steps": 8680, "loss": 0.875586986541748, "lr": 1.5120486826335905e-06, "epoch": 0.7251152073732718, "percentage": 36.26, "elapsed_time": "4:15:16", "remaining_time": "7:28:49"} +{"current_steps": 3148, "total_steps": 8680, "loss": 0.8334758281707764, "lr": 1.5117213969707522e-06, "epoch": 0.7253456221198157, "percentage": 36.27, "elapsed_time": "4:15:21", "remaining_time": "7:28:44"} +{"current_steps": 3149, "total_steps": 8680, "loss": 0.8010859489440918, "lr": 1.5113940370322306e-06, "epoch": 0.7255760368663594, "percentage": 36.28, "elapsed_time": "4:15:26", "remaining_time": "7:28:39"} +{"current_steps": 3150, "total_steps": 8680, "loss": 0.7907547950744629, "lr": 1.5110666028655417e-06, "epoch": 0.7258064516129032, "percentage": 36.29, "elapsed_time": "4:15:30", "remaining_time": "7:28:32"} +{"current_steps": 3151, "total_steps": 8680, "loss": 0.8922848105430603, "lr": 1.5107390945182117e-06, "epoch": 0.726036866359447, "percentage": 36.3, "elapsed_time": "4:15:33", "remaining_time": "7:28:26"} +{"current_steps": 3152, "total_steps": 8680, "loss": 0.7418628931045532, "lr": 1.5104115120377783e-06, "epoch": 0.7262672811059908, "percentage": 36.31, "elapsed_time": "4:15:40", "remaining_time": "7:28:23"} +{"current_steps": 3153, "total_steps": 8680, "loss": 0.9063338041305542, "lr": 1.51008385547179e-06, "epoch": 0.7264976958525345, "percentage": 36.32, "elapsed_time": "4:15:44", "remaining_time": "7:28:18"} +{"current_steps": 3154, "total_steps": 8680, "loss": 0.8718822002410889, "lr": 1.5097561248678047e-06, "epoch": 0.7267281105990784, "percentage": 36.34, "elapsed_time": "4:15:49", "remaining_time": "7:28:13"} +{"current_steps": 3155, "total_steps": 8680, "loss": 0.950742244720459, "lr": 1.5094283202733934e-06, "epoch": 0.7269585253456221, "percentage": 36.35, "elapsed_time": "4:15:53", "remaining_time": "7:28:06"} +{"current_steps": 3156, "total_steps": 8680, "loss": 0.7963443994522095, "lr": 1.5091004417361353e-06, "epoch": 0.727188940092166, "percentage": 36.36, "elapsed_time": "4:15:57", "remaining_time": "7:28:00"} +{"current_steps": 3157, "total_steps": 8680, "loss": 0.8428621888160706, "lr": 1.5087724893036225e-06, "epoch": 0.7274193548387097, "percentage": 36.37, "elapsed_time": "4:16:02", "remaining_time": "7:27:55"} +{"current_steps": 3158, "total_steps": 8680, "loss": 0.8271539211273193, "lr": 1.508444463023456e-06, "epoch": 0.7276497695852534, "percentage": 36.38, "elapsed_time": "4:16:07", "remaining_time": "7:27:51"} +{"current_steps": 3159, "total_steps": 8680, "loss": 0.7899917364120483, "lr": 1.508116362943249e-06, "epoch": 0.7278801843317972, "percentage": 36.39, "elapsed_time": "4:16:13", "remaining_time": "7:27:47"} +{"current_steps": 3160, "total_steps": 8680, "loss": 0.8734809160232544, "lr": 1.5077881891106246e-06, "epoch": 0.728110599078341, "percentage": 36.41, "elapsed_time": "4:16:18", "remaining_time": "7:27:43"} +{"current_steps": 3161, "total_steps": 8680, "loss": 0.7740491628646851, "lr": 1.5074599415732164e-06, "epoch": 0.7283410138248848, "percentage": 36.42, "elapsed_time": "4:16:24", "remaining_time": "7:27:40"} +{"current_steps": 3162, "total_steps": 8680, "loss": 0.7219515442848206, "lr": 1.5071316203786698e-06, "epoch": 0.7285714285714285, "percentage": 36.43, "elapsed_time": "4:16:28", "remaining_time": "7:27:34"} +{"current_steps": 3163, "total_steps": 8680, "loss": 0.8122725486755371, "lr": 1.50680322557464e-06, "epoch": 0.7288018433179724, "percentage": 36.44, "elapsed_time": "4:16:33", "remaining_time": "7:27:29"} +{"current_steps": 3164, "total_steps": 8680, "loss": 0.8280072212219238, "lr": 1.5064747572087923e-06, "epoch": 0.7290322580645161, "percentage": 36.45, "elapsed_time": "4:16:37", "remaining_time": "7:27:23"} +{"current_steps": 3165, "total_steps": 8680, "loss": 0.7287842035293579, "lr": 1.5061462153288047e-06, "epoch": 0.7292626728110599, "percentage": 36.46, "elapsed_time": "4:16:42", "remaining_time": "7:27:18"} +{"current_steps": 3166, "total_steps": 8680, "loss": 0.8404949903488159, "lr": 1.5058175999823639e-06, "epoch": 0.7294930875576037, "percentage": 36.47, "elapsed_time": "4:16:48", "remaining_time": "7:27:16"} +{"current_steps": 3167, "total_steps": 8680, "loss": 0.6572415828704834, "lr": 1.505488911217168e-06, "epoch": 0.7297235023041475, "percentage": 36.49, "elapsed_time": "4:16:54", "remaining_time": "7:27:12"} +{"current_steps": 3168, "total_steps": 8680, "loss": 0.8924484848976135, "lr": 1.5051601490809257e-06, "epoch": 0.7299539170506912, "percentage": 36.5, "elapsed_time": "4:16:58", "remaining_time": "7:27:06"} +{"current_steps": 3169, "total_steps": 8680, "loss": 0.8701428174972534, "lr": 1.5048313136213566e-06, "epoch": 0.7301843317972351, "percentage": 36.51, "elapsed_time": "4:17:03", "remaining_time": "7:27:01"} +{"current_steps": 3170, "total_steps": 8680, "loss": 0.8327716588973999, "lr": 1.5045024048861906e-06, "epoch": 0.7304147465437788, "percentage": 36.52, "elapsed_time": "4:17:07", "remaining_time": "7:26:55"} +{"current_steps": 3171, "total_steps": 8680, "loss": 0.8379253149032593, "lr": 1.5041734229231686e-06, "epoch": 0.7306451612903225, "percentage": 36.53, "elapsed_time": "4:17:12", "remaining_time": "7:26:50"} +{"current_steps": 3172, "total_steps": 8680, "loss": 0.7475664019584656, "lr": 1.5038443677800413e-06, "epoch": 0.7308755760368664, "percentage": 36.54, "elapsed_time": "4:17:16", "remaining_time": "7:26:45"} +{"current_steps": 3173, "total_steps": 8680, "loss": 0.9002243280410767, "lr": 1.5035152395045714e-06, "epoch": 0.7311059907834101, "percentage": 36.56, "elapsed_time": "4:17:21", "remaining_time": "7:26:39"} +{"current_steps": 3174, "total_steps": 8680, "loss": 0.6718685626983643, "lr": 1.503186038144531e-06, "epoch": 0.7313364055299539, "percentage": 36.57, "elapsed_time": "4:17:27", "remaining_time": "7:26:36"} +{"current_steps": 3175, "total_steps": 8680, "loss": 0.6836501359939575, "lr": 1.5028567637477033e-06, "epoch": 0.7315668202764977, "percentage": 36.58, "elapsed_time": "4:17:33", "remaining_time": "7:26:33"} +{"current_steps": 3176, "total_steps": 8680, "loss": 0.7548954486846924, "lr": 1.502527416361882e-06, "epoch": 0.7317972350230415, "percentage": 36.59, "elapsed_time": "4:17:38", "remaining_time": "7:26:29"} +{"current_steps": 3177, "total_steps": 8680, "loss": 0.8385212421417236, "lr": 1.5021979960348714e-06, "epoch": 0.7320276497695852, "percentage": 36.6, "elapsed_time": "4:17:43", "remaining_time": "7:26:24"} +{"current_steps": 3178, "total_steps": 8680, "loss": 0.8605425357818604, "lr": 1.5018685028144864e-06, "epoch": 0.7322580645161291, "percentage": 36.61, "elapsed_time": "4:17:47", "remaining_time": "7:26:19"} +{"current_steps": 3179, "total_steps": 8680, "loss": 0.8831393718719482, "lr": 1.501538936748553e-06, "epoch": 0.7324884792626728, "percentage": 36.62, "elapsed_time": "4:17:53", "remaining_time": "7:26:15"} +{"current_steps": 3180, "total_steps": 8680, "loss": 0.6965172290802002, "lr": 1.5012092978849062e-06, "epoch": 0.7327188940092166, "percentage": 36.64, "elapsed_time": "4:17:59", "remaining_time": "7:26:12"} +{"current_steps": 3181, "total_steps": 8680, "loss": 0.8062859773635864, "lr": 1.500879586271394e-06, "epoch": 0.7329493087557604, "percentage": 36.65, "elapsed_time": "4:18:03", "remaining_time": "7:26:05"} +{"current_steps": 3182, "total_steps": 8680, "loss": 0.8285790681838989, "lr": 1.5005498019558724e-06, "epoch": 0.7331797235023041, "percentage": 36.66, "elapsed_time": "4:18:07", "remaining_time": "7:25:59"} +{"current_steps": 3183, "total_steps": 8680, "loss": 0.612429141998291, "lr": 1.50021994498621e-06, "epoch": 0.7334101382488479, "percentage": 36.67, "elapsed_time": "4:18:13", "remaining_time": "7:25:57"} +{"current_steps": 3184, "total_steps": 8680, "loss": 0.8271423578262329, "lr": 1.4998900154102847e-06, "epoch": 0.7336405529953917, "percentage": 36.68, "elapsed_time": "4:18:18", "remaining_time": "7:25:52"} +{"current_steps": 3185, "total_steps": 8680, "loss": 0.838964581489563, "lr": 1.499560013275986e-06, "epoch": 0.7338709677419355, "percentage": 36.69, "elapsed_time": "4:18:22", "remaining_time": "7:25:46"} +{"current_steps": 3186, "total_steps": 8680, "loss": 0.7902333736419678, "lr": 1.4992299386312119e-06, "epoch": 0.7341013824884792, "percentage": 36.71, "elapsed_time": "4:18:26", "remaining_time": "7:25:40"} +{"current_steps": 3187, "total_steps": 8680, "loss": 0.8520635366439819, "lr": 1.4988997915238735e-06, "epoch": 0.7343317972350231, "percentage": 36.72, "elapsed_time": "4:18:31", "remaining_time": "7:25:35"} +{"current_steps": 3188, "total_steps": 8680, "loss": 0.8666567206382751, "lr": 1.4985695720018905e-06, "epoch": 0.7345622119815668, "percentage": 36.73, "elapsed_time": "4:18:36", "remaining_time": "7:25:30"} +{"current_steps": 3189, "total_steps": 8680, "loss": 0.6930691003799438, "lr": 1.4982392801131944e-06, "epoch": 0.7347926267281106, "percentage": 36.74, "elapsed_time": "4:18:40", "remaining_time": "7:25:24"} +{"current_steps": 3190, "total_steps": 8680, "loss": 0.7957722544670105, "lr": 1.4979089159057263e-06, "epoch": 0.7350230414746544, "percentage": 36.75, "elapsed_time": "4:18:46", "remaining_time": "7:25:21"} +{"current_steps": 3191, "total_steps": 8680, "loss": 0.8966697454452515, "lr": 1.4975784794274383e-06, "epoch": 0.7352534562211982, "percentage": 36.76, "elapsed_time": "4:18:52", "remaining_time": "7:25:17"} +{"current_steps": 3192, "total_steps": 8680, "loss": 0.7478537559509277, "lr": 1.4972479707262926e-06, "epoch": 0.7354838709677419, "percentage": 36.77, "elapsed_time": "4:18:57", "remaining_time": "7:25:12"} +{"current_steps": 3193, "total_steps": 8680, "loss": 0.8862416744232178, "lr": 1.4969173898502624e-06, "epoch": 0.7357142857142858, "percentage": 36.79, "elapsed_time": "4:19:02", "remaining_time": "7:25:08"} +{"current_steps": 3194, "total_steps": 8680, "loss": 0.7910712957382202, "lr": 1.4965867368473306e-06, "epoch": 0.7359447004608295, "percentage": 36.8, "elapsed_time": "4:19:07", "remaining_time": "7:25:03"} +{"current_steps": 3195, "total_steps": 8680, "loss": 0.7371944785118103, "lr": 1.4962560117654916e-06, "epoch": 0.7361751152073732, "percentage": 36.81, "elapsed_time": "4:19:13", "remaining_time": "7:25:00"} +{"current_steps": 3196, "total_steps": 8680, "loss": 0.7966737151145935, "lr": 1.4959252146527496e-06, "epoch": 0.7364055299539171, "percentage": 36.82, "elapsed_time": "4:19:17", "remaining_time": "7:24:54"} +{"current_steps": 3197, "total_steps": 8680, "loss": 0.8474653363227844, "lr": 1.4955943455571188e-06, "epoch": 0.7366359447004608, "percentage": 36.83, "elapsed_time": "4:19:22", "remaining_time": "7:24:51"} +{"current_steps": 3198, "total_steps": 8680, "loss": 1.0197458267211914, "lr": 1.4952634045266249e-06, "epoch": 0.7368663594470046, "percentage": 36.84, "elapsed_time": "4:19:27", "remaining_time": "7:24:45"} +{"current_steps": 3199, "total_steps": 8680, "loss": 0.8813979625701904, "lr": 1.4949323916093036e-06, "epoch": 0.7370967741935484, "percentage": 36.85, "elapsed_time": "4:19:31", "remaining_time": "7:24:38"} +{"current_steps": 3200, "total_steps": 8680, "loss": 0.9323042631149292, "lr": 1.4946013068532008e-06, "epoch": 0.7373271889400922, "percentage": 36.87, "elapsed_time": "4:19:36", "remaining_time": "7:24:34"} +{"current_steps": 3201, "total_steps": 8680, "loss": 0.8637902736663818, "lr": 1.494270150306373e-06, "epoch": 0.7375576036866359, "percentage": 36.88, "elapsed_time": "4:19:44", "remaining_time": "7:24:35"} +{"current_steps": 3202, "total_steps": 8680, "loss": 0.8046854734420776, "lr": 1.4939389220168875e-06, "epoch": 0.7377880184331798, "percentage": 36.89, "elapsed_time": "4:19:48", "remaining_time": "7:24:29"} +{"current_steps": 3203, "total_steps": 8680, "loss": 0.7616177201271057, "lr": 1.4936076220328211e-06, "epoch": 0.7380184331797235, "percentage": 36.9, "elapsed_time": "4:19:54", "remaining_time": "7:24:26"} +{"current_steps": 3204, "total_steps": 8680, "loss": 0.8548959493637085, "lr": 1.4932762504022619e-06, "epoch": 0.7382488479262673, "percentage": 36.91, "elapsed_time": "4:20:00", "remaining_time": "7:24:22"} +{"current_steps": 3205, "total_steps": 8680, "loss": 0.8062562942504883, "lr": 1.492944807173308e-06, "epoch": 0.738479262672811, "percentage": 36.92, "elapsed_time": "4:20:07", "remaining_time": "7:24:21"} +{"current_steps": 3206, "total_steps": 8680, "loss": 0.8776403069496155, "lr": 1.492613292394068e-06, "epoch": 0.7387096774193549, "percentage": 36.94, "elapsed_time": "4:20:11", "remaining_time": "7:24:14"} +{"current_steps": 3207, "total_steps": 8680, "loss": 0.7528336048126221, "lr": 1.4922817061126605e-06, "epoch": 0.7389400921658986, "percentage": 36.95, "elapsed_time": "4:20:15", "remaining_time": "7:24:09"} +{"current_steps": 3208, "total_steps": 8680, "loss": 0.7441881895065308, "lr": 1.4919500483772152e-06, "epoch": 0.7391705069124423, "percentage": 36.96, "elapsed_time": "4:20:21", "remaining_time": "7:24:05"} +{"current_steps": 3209, "total_steps": 8680, "loss": 0.8925758004188538, "lr": 1.4916183192358715e-06, "epoch": 0.7394009216589862, "percentage": 36.97, "elapsed_time": "4:20:24", "remaining_time": "7:23:58"} +{"current_steps": 3210, "total_steps": 8680, "loss": 0.7527008652687073, "lr": 1.4912865187367798e-06, "epoch": 0.7396313364055299, "percentage": 36.98, "elapsed_time": "4:20:29", "remaining_time": "7:23:53"} +{"current_steps": 3211, "total_steps": 8680, "loss": 0.753572404384613, "lr": 1.4909546469281e-06, "epoch": 0.7398617511520738, "percentage": 36.99, "elapsed_time": "4:20:33", "remaining_time": "7:23:47"} +{"current_steps": 3212, "total_steps": 8680, "loss": 0.8884274959564209, "lr": 1.4906227038580036e-06, "epoch": 0.7400921658986175, "percentage": 37.0, "elapsed_time": "4:20:38", "remaining_time": "7:23:41"} +{"current_steps": 3213, "total_steps": 8680, "loss": 0.7702244520187378, "lr": 1.4902906895746707e-06, "epoch": 0.7403225806451613, "percentage": 37.02, "elapsed_time": "4:20:43", "remaining_time": "7:23:38"} +{"current_steps": 3214, "total_steps": 8680, "loss": 0.8662835359573364, "lr": 1.4899586041262936e-06, "epoch": 0.740552995391705, "percentage": 37.03, "elapsed_time": "4:20:48", "remaining_time": "7:23:32"} +{"current_steps": 3215, "total_steps": 8680, "loss": 0.9819997549057007, "lr": 1.4896264475610736e-06, "epoch": 0.7407834101382489, "percentage": 37.04, "elapsed_time": "4:20:51", "remaining_time": "7:23:25"} +{"current_steps": 3216, "total_steps": 8680, "loss": 0.9137614965438843, "lr": 1.4892942199272232e-06, "epoch": 0.7410138248847926, "percentage": 37.05, "elapsed_time": "4:20:55", "remaining_time": "7:23:19"} +{"current_steps": 3217, "total_steps": 8680, "loss": 0.7554785013198853, "lr": 1.488961921272964e-06, "epoch": 0.7412442396313365, "percentage": 37.06, "elapsed_time": "4:21:01", "remaining_time": "7:23:15"} +{"current_steps": 3218, "total_steps": 8680, "loss": 0.8528940677642822, "lr": 1.4886295516465296e-06, "epoch": 0.7414746543778802, "percentage": 37.07, "elapsed_time": "4:21:07", "remaining_time": "7:23:12"} +{"current_steps": 3219, "total_steps": 8680, "loss": 0.7212377786636353, "lr": 1.4882971110961626e-06, "epoch": 0.7417050691244239, "percentage": 37.09, "elapsed_time": "4:21:11", "remaining_time": "7:23:06"} +{"current_steps": 3220, "total_steps": 8680, "loss": 0.7767617702484131, "lr": 1.4879645996701161e-06, "epoch": 0.7419354838709677, "percentage": 37.1, "elapsed_time": "4:21:17", "remaining_time": "7:23:03"} +{"current_steps": 3221, "total_steps": 8680, "loss": 0.8083292245864868, "lr": 1.4876320174166542e-06, "epoch": 0.7421658986175115, "percentage": 37.11, "elapsed_time": "4:21:21", "remaining_time": "7:22:57"} +{"current_steps": 3222, "total_steps": 8680, "loss": 0.8652364015579224, "lr": 1.4872993643840506e-06, "epoch": 0.7423963133640553, "percentage": 37.12, "elapsed_time": "4:21:26", "remaining_time": "7:22:53"} +{"current_steps": 3223, "total_steps": 8680, "loss": 0.7455019950866699, "lr": 1.486966640620589e-06, "epoch": 0.742626728110599, "percentage": 37.13, "elapsed_time": "4:21:32", "remaining_time": "7:22:49"} +{"current_steps": 3224, "total_steps": 8680, "loss": 0.7881917953491211, "lr": 1.4866338461745644e-06, "epoch": 0.7428571428571429, "percentage": 37.14, "elapsed_time": "4:21:37", "remaining_time": "7:22:44"} +{"current_steps": 3225, "total_steps": 8680, "loss": 0.8148372173309326, "lr": 1.4863009810942813e-06, "epoch": 0.7430875576036866, "percentage": 37.15, "elapsed_time": "4:21:42", "remaining_time": "7:22:40"} +{"current_steps": 3226, "total_steps": 8680, "loss": 0.6574658751487732, "lr": 1.4859680454280547e-06, "epoch": 0.7433179723502304, "percentage": 37.17, "elapsed_time": "4:21:47", "remaining_time": "7:22:36"} +{"current_steps": 3227, "total_steps": 8680, "loss": 0.7831655740737915, "lr": 1.4856350392242094e-06, "epoch": 0.7435483870967742, "percentage": 37.18, "elapsed_time": "4:21:54", "remaining_time": "7:22:34"} +{"current_steps": 3228, "total_steps": 8680, "loss": 0.7406231164932251, "lr": 1.485301962531081e-06, "epoch": 0.743778801843318, "percentage": 37.19, "elapsed_time": "4:22:00", "remaining_time": "7:22:31"} +{"current_steps": 3229, "total_steps": 8680, "loss": 0.8092324733734131, "lr": 1.4849688153970154e-06, "epoch": 0.7440092165898617, "percentage": 37.2, "elapsed_time": "4:22:05", "remaining_time": "7:22:27"} +{"current_steps": 3230, "total_steps": 8680, "loss": 0.6662560701370239, "lr": 1.4846355978703679e-06, "epoch": 0.7442396313364056, "percentage": 37.21, "elapsed_time": "4:22:09", "remaining_time": "7:22:21"} +{"current_steps": 3231, "total_steps": 8680, "loss": 0.8064731359481812, "lr": 1.4843023099995052e-06, "epoch": 0.7444700460829493, "percentage": 37.22, "elapsed_time": "4:22:13", "remaining_time": "7:22:14"} +{"current_steps": 3232, "total_steps": 8680, "loss": 0.7424519658088684, "lr": 1.4839689518328037e-06, "epoch": 0.744700460829493, "percentage": 37.24, "elapsed_time": "4:22:17", "remaining_time": "7:22:08"} +{"current_steps": 3233, "total_steps": 8680, "loss": 0.7851438522338867, "lr": 1.4836355234186489e-06, "epoch": 0.7449308755760369, "percentage": 37.25, "elapsed_time": "4:22:22", "remaining_time": "7:22:02"} +{"current_steps": 3234, "total_steps": 8680, "loss": 0.896986722946167, "lr": 1.4833020248054381e-06, "epoch": 0.7451612903225806, "percentage": 37.26, "elapsed_time": "4:22:26", "remaining_time": "7:21:56"} +{"current_steps": 3235, "total_steps": 8680, "loss": 0.9469928741455078, "lr": 1.4829684560415787e-06, "epoch": 0.7453917050691244, "percentage": 37.27, "elapsed_time": "4:22:30", "remaining_time": "7:21:50"} +{"current_steps": 3236, "total_steps": 8680, "loss": 0.7527188062667847, "lr": 1.4826348171754872e-06, "epoch": 0.7456221198156682, "percentage": 37.28, "elapsed_time": "4:22:34", "remaining_time": "7:21:44"} +{"current_steps": 3237, "total_steps": 8680, "loss": 0.7758080959320068, "lr": 1.4823011082555907e-06, "epoch": 0.745852534562212, "percentage": 37.29, "elapsed_time": "4:22:39", "remaining_time": "7:21:39"} +{"current_steps": 3238, "total_steps": 8680, "loss": 0.8359881043434143, "lr": 1.481967329330327e-06, "epoch": 0.7460829493087557, "percentage": 37.3, "elapsed_time": "4:22:44", "remaining_time": "7:21:34"} +{"current_steps": 3239, "total_steps": 8680, "loss": 0.6576982736587524, "lr": 1.4816334804481434e-06, "epoch": 0.7463133640552996, "percentage": 37.32, "elapsed_time": "4:22:49", "remaining_time": "7:21:30"} +{"current_steps": 3240, "total_steps": 8680, "loss": 0.7919917106628418, "lr": 1.4812995616574978e-06, "epoch": 0.7465437788018433, "percentage": 37.33, "elapsed_time": "4:22:53", "remaining_time": "7:21:23"} +{"current_steps": 3241, "total_steps": 8680, "loss": 0.7682263851165771, "lr": 1.480965573006858e-06, "epoch": 0.7467741935483871, "percentage": 37.34, "elapsed_time": "4:22:57", "remaining_time": "7:21:17"} +{"current_steps": 3242, "total_steps": 8680, "loss": 0.8573193550109863, "lr": 1.4806315145447017e-06, "epoch": 0.7470046082949309, "percentage": 37.35, "elapsed_time": "4:23:02", "remaining_time": "7:21:12"} +{"current_steps": 3243, "total_steps": 8680, "loss": 0.8473606109619141, "lr": 1.4802973863195174e-06, "epoch": 0.7472350230414746, "percentage": 37.36, "elapsed_time": "4:23:06", "remaining_time": "7:21:06"} +{"current_steps": 3244, "total_steps": 8680, "loss": 0.8110678195953369, "lr": 1.4799631883798033e-06, "epoch": 0.7474654377880184, "percentage": 37.37, "elapsed_time": "4:23:10", "remaining_time": "7:21:00"} +{"current_steps": 3245, "total_steps": 8680, "loss": 0.6624661087989807, "lr": 1.4796289207740681e-06, "epoch": 0.7476958525345622, "percentage": 37.38, "elapsed_time": "4:23:15", "remaining_time": "7:20:54"} +{"current_steps": 3246, "total_steps": 8680, "loss": 0.8145536184310913, "lr": 1.47929458355083e-06, "epoch": 0.747926267281106, "percentage": 37.4, "elapsed_time": "4:23:22", "remaining_time": "7:20:54"} +{"current_steps": 3247, "total_steps": 8680, "loss": 0.7819876074790955, "lr": 1.4789601767586172e-06, "epoch": 0.7481566820276497, "percentage": 37.41, "elapsed_time": "4:23:28", "remaining_time": "7:20:52"} +{"current_steps": 3248, "total_steps": 8680, "loss": 0.7573810815811157, "lr": 1.4786257004459692e-06, "epoch": 0.7483870967741936, "percentage": 37.42, "elapsed_time": "4:23:35", "remaining_time": "7:20:49"} +{"current_steps": 3249, "total_steps": 8680, "loss": 0.8149522542953491, "lr": 1.4782911546614343e-06, "epoch": 0.7486175115207373, "percentage": 37.43, "elapsed_time": "4:23:40", "remaining_time": "7:20:46"} +{"current_steps": 3250, "total_steps": 8680, "loss": 0.9935284852981567, "lr": 1.4779565394535714e-06, "epoch": 0.7488479262672811, "percentage": 37.44, "elapsed_time": "4:23:45", "remaining_time": "7:20:40"} +{"current_steps": 3251, "total_steps": 8680, "loss": 0.8673371076583862, "lr": 1.4776218548709497e-06, "epoch": 0.7490783410138249, "percentage": 37.45, "elapsed_time": "4:23:51", "remaining_time": "7:20:37"} +{"current_steps": 3252, "total_steps": 8680, "loss": 0.8569149374961853, "lr": 1.4772871009621477e-06, "epoch": 0.7493087557603687, "percentage": 37.47, "elapsed_time": "4:23:55", "remaining_time": "7:20:31"} +{"current_steps": 3253, "total_steps": 8680, "loss": 0.7177854776382446, "lr": 1.4769522777757551e-06, "epoch": 0.7495391705069124, "percentage": 37.48, "elapsed_time": "4:24:00", "remaining_time": "7:20:27"} +{"current_steps": 3254, "total_steps": 8680, "loss": 0.8115622997283936, "lr": 1.4766173853603706e-06, "epoch": 0.7497695852534563, "percentage": 37.49, "elapsed_time": "4:24:06", "remaining_time": "7:20:23"} +{"current_steps": 3255, "total_steps": 8680, "loss": 0.7209019660949707, "lr": 1.4762824237646038e-06, "epoch": 0.75, "percentage": 37.5, "elapsed_time": "4:24:11", "remaining_time": "7:20:18"} +{"current_steps": 3256, "total_steps": 8680, "loss": 0.8433470726013184, "lr": 1.4759473930370736e-06, "epoch": 0.7502304147465437, "percentage": 37.51, "elapsed_time": "4:24:15", "remaining_time": "7:20:12"} +{"current_steps": 3257, "total_steps": 8680, "loss": 0.853674054145813, "lr": 1.4756122932264093e-06, "epoch": 0.7504608294930876, "percentage": 37.52, "elapsed_time": "4:24:20", "remaining_time": "7:20:07"} +{"current_steps": 3258, "total_steps": 8680, "loss": 0.8645769357681274, "lr": 1.4752771243812503e-06, "epoch": 0.7506912442396313, "percentage": 37.53, "elapsed_time": "4:24:25", "remaining_time": "7:20:03"} +{"current_steps": 3259, "total_steps": 8680, "loss": 0.927452564239502, "lr": 1.474941886550246e-06, "epoch": 0.7509216589861751, "percentage": 37.55, "elapsed_time": "4:24:29", "remaining_time": "7:19:57"} +{"current_steps": 3260, "total_steps": 8680, "loss": 0.7461255788803101, "lr": 1.4746065797820552e-06, "epoch": 0.7511520737327189, "percentage": 37.56, "elapsed_time": "4:24:33", "remaining_time": "7:19:51"} +{"current_steps": 3261, "total_steps": 8680, "loss": 0.8737163543701172, "lr": 1.4742712041253481e-06, "epoch": 0.7513824884792627, "percentage": 37.57, "elapsed_time": "4:24:39", "remaining_time": "7:19:48"} +{"current_steps": 3262, "total_steps": 8680, "loss": 0.7148758172988892, "lr": 1.4739357596288036e-06, "epoch": 0.7516129032258064, "percentage": 37.58, "elapsed_time": "4:24:45", "remaining_time": "7:19:44"} +{"current_steps": 3263, "total_steps": 8680, "loss": 0.738334596157074, "lr": 1.4736002463411108e-06, "epoch": 0.7518433179723503, "percentage": 37.59, "elapsed_time": "4:24:51", "remaining_time": "7:19:41"} +{"current_steps": 3264, "total_steps": 8680, "loss": 0.7733340263366699, "lr": 1.4732646643109692e-06, "epoch": 0.752073732718894, "percentage": 37.6, "elapsed_time": "4:24:56", "remaining_time": "7:19:37"} +{"current_steps": 3265, "total_steps": 8680, "loss": 0.7882881164550781, "lr": 1.4729290135870883e-06, "epoch": 0.7523041474654378, "percentage": 37.62, "elapsed_time": "4:25:00", "remaining_time": "7:19:31"} +{"current_steps": 3266, "total_steps": 8680, "loss": 0.7908357381820679, "lr": 1.472593294218187e-06, "epoch": 0.7525345622119816, "percentage": 37.63, "elapsed_time": "4:25:06", "remaining_time": "7:19:28"} +{"current_steps": 3267, "total_steps": 8680, "loss": 0.8818062543869019, "lr": 1.4722575062529946e-06, "epoch": 0.7527649769585254, "percentage": 37.64, "elapsed_time": "4:25:11", "remaining_time": "7:19:24"} +{"current_steps": 3268, "total_steps": 8680, "loss": 0.7152599692344666, "lr": 1.4719216497402504e-06, "epoch": 0.7529953917050691, "percentage": 37.65, "elapsed_time": "4:25:17", "remaining_time": "7:19:20"} +{"current_steps": 3269, "total_steps": 8680, "loss": 0.8503165245056152, "lr": 1.4715857247287036e-06, "epoch": 0.7532258064516129, "percentage": 37.66, "elapsed_time": "4:25:23", "remaining_time": "7:19:17"} +{"current_steps": 3270, "total_steps": 8680, "loss": 0.8382623195648193, "lr": 1.4712497312671128e-06, "epoch": 0.7534562211981567, "percentage": 37.67, "elapsed_time": "4:25:27", "remaining_time": "7:19:11"} +{"current_steps": 3271, "total_steps": 8680, "loss": 0.8358533382415771, "lr": 1.4709136694042479e-06, "epoch": 0.7536866359447004, "percentage": 37.68, "elapsed_time": "4:25:31", "remaining_time": "7:19:04"} +{"current_steps": 3272, "total_steps": 8680, "loss": 0.6735624670982361, "lr": 1.4705775391888868e-06, "epoch": 0.7539170506912443, "percentage": 37.7, "elapsed_time": "4:25:37", "remaining_time": "7:19:01"} +{"current_steps": 3273, "total_steps": 8680, "loss": 0.8343949317932129, "lr": 1.470241340669819e-06, "epoch": 0.754147465437788, "percentage": 37.71, "elapsed_time": "4:25:41", "remaining_time": "7:18:56"} +{"current_steps": 3274, "total_steps": 8680, "loss": 0.8204318284988403, "lr": 1.4699050738958434e-06, "epoch": 0.7543778801843318, "percentage": 37.72, "elapsed_time": "4:25:47", "remaining_time": "7:18:51"} +{"current_steps": 3275, "total_steps": 8680, "loss": 0.7541854977607727, "lr": 1.4695687389157684e-06, "epoch": 0.7546082949308756, "percentage": 37.73, "elapsed_time": "4:25:51", "remaining_time": "7:18:45"} +{"current_steps": 3276, "total_steps": 8680, "loss": 0.8144943714141846, "lr": 1.4692323357784122e-06, "epoch": 0.7548387096774194, "percentage": 37.74, "elapsed_time": "4:25:54", "remaining_time": "7:18:38"} +{"current_steps": 3277, "total_steps": 8680, "loss": 0.9045677781105042, "lr": 1.468895864532604e-06, "epoch": 0.7550691244239631, "percentage": 37.75, "elapsed_time": "4:25:58", "remaining_time": "7:18:32"} +{"current_steps": 3278, "total_steps": 8680, "loss": 0.8818730115890503, "lr": 1.4685593252271816e-06, "epoch": 0.755299539170507, "percentage": 37.76, "elapsed_time": "4:26:03", "remaining_time": "7:18:26"} +{"current_steps": 3279, "total_steps": 8680, "loss": 0.8582229614257812, "lr": 1.4682227179109932e-06, "epoch": 0.7555299539170507, "percentage": 37.78, "elapsed_time": "4:26:07", "remaining_time": "7:18:21"} +{"current_steps": 3280, "total_steps": 8680, "loss": 0.8769974708557129, "lr": 1.4678860426328977e-06, "epoch": 0.7557603686635944, "percentage": 37.79, "elapsed_time": "4:26:13", "remaining_time": "7:18:17"} +{"current_steps": 3281, "total_steps": 8680, "loss": 0.8034937381744385, "lr": 1.467549299441762e-06, "epoch": 0.7559907834101383, "percentage": 37.8, "elapsed_time": "4:26:17", "remaining_time": "7:18:11"} +{"current_steps": 3282, "total_steps": 8680, "loss": 0.9057378768920898, "lr": 1.4672124883864646e-06, "epoch": 0.756221198156682, "percentage": 37.81, "elapsed_time": "4:26:21", "remaining_time": "7:18:05"} +{"current_steps": 3283, "total_steps": 8680, "loss": 0.8039969205856323, "lr": 1.4668756095158929e-06, "epoch": 0.7564516129032258, "percentage": 37.82, "elapsed_time": "4:26:27", "remaining_time": "7:18:01"} +{"current_steps": 3284, "total_steps": 8680, "loss": 0.887493908405304, "lr": 1.4665386628789448e-06, "epoch": 0.7566820276497696, "percentage": 37.83, "elapsed_time": "4:26:31", "remaining_time": "7:17:55"} +{"current_steps": 3285, "total_steps": 8680, "loss": 0.783561646938324, "lr": 1.4662016485245271e-06, "epoch": 0.7569124423963134, "percentage": 37.85, "elapsed_time": "4:26:36", "remaining_time": "7:17:50"} +{"current_steps": 3286, "total_steps": 8680, "loss": 0.7526337504386902, "lr": 1.4658645665015579e-06, "epoch": 0.7571428571428571, "percentage": 37.86, "elapsed_time": "4:26:40", "remaining_time": "7:17:45"} +{"current_steps": 3287, "total_steps": 8680, "loss": 0.8583099842071533, "lr": 1.4655274168589633e-06, "epoch": 0.757373271889401, "percentage": 37.87, "elapsed_time": "4:26:45", "remaining_time": "7:17:40"} +{"current_steps": 3288, "total_steps": 8680, "loss": 0.743253767490387, "lr": 1.4651901996456802e-06, "epoch": 0.7576036866359447, "percentage": 37.88, "elapsed_time": "4:26:51", "remaining_time": "7:17:37"} +{"current_steps": 3289, "total_steps": 8680, "loss": 0.8763987421989441, "lr": 1.4648529149106555e-06, "epoch": 0.7578341013824885, "percentage": 37.89, "elapsed_time": "4:26:56", "remaining_time": "7:17:32"} +{"current_steps": 3290, "total_steps": 8680, "loss": 0.8388645648956299, "lr": 1.4645155627028455e-06, "epoch": 0.7580645161290323, "percentage": 37.9, "elapsed_time": "4:27:01", "remaining_time": "7:17:28"} +{"current_steps": 3291, "total_steps": 8680, "loss": 0.8943589925765991, "lr": 1.4641781430712167e-06, "epoch": 0.7582949308755761, "percentage": 37.91, "elapsed_time": "4:27:06", "remaining_time": "7:17:23"} +{"current_steps": 3292, "total_steps": 8680, "loss": 0.9224259257316589, "lr": 1.463840656064745e-06, "epoch": 0.7585253456221198, "percentage": 37.93, "elapsed_time": "4:27:10", "remaining_time": "7:17:17"} +{"current_steps": 3293, "total_steps": 8680, "loss": 0.5836232900619507, "lr": 1.463503101732416e-06, "epoch": 0.7587557603686635, "percentage": 37.94, "elapsed_time": "4:27:17", "remaining_time": "7:17:16"} +{"current_steps": 3294, "total_steps": 8680, "loss": 0.6700382828712463, "lr": 1.4631654801232255e-06, "epoch": 0.7589861751152074, "percentage": 37.95, "elapsed_time": "4:27:22", "remaining_time": "7:17:10"} +{"current_steps": 3295, "total_steps": 8680, "loss": 0.7876112461090088, "lr": 1.4628277912861785e-06, "epoch": 0.7592165898617511, "percentage": 37.96, "elapsed_time": "4:27:26", "remaining_time": "7:17:04"} +{"current_steps": 3296, "total_steps": 8680, "loss": 0.8410799503326416, "lr": 1.4624900352702905e-06, "epoch": 0.759447004608295, "percentage": 37.97, "elapsed_time": "4:27:30", "remaining_time": "7:16:58"} +{"current_steps": 3297, "total_steps": 8680, "loss": 0.9615974426269531, "lr": 1.4621522121245859e-06, "epoch": 0.7596774193548387, "percentage": 37.98, "elapsed_time": "4:27:34", "remaining_time": "7:16:51"} +{"current_steps": 3298, "total_steps": 8680, "loss": 0.7973389625549316, "lr": 1.4618143218980996e-06, "epoch": 0.7599078341013825, "percentage": 38.0, "elapsed_time": "4:27:40", "remaining_time": "7:16:49"} +{"current_steps": 3299, "total_steps": 8680, "loss": 0.7734094858169556, "lr": 1.461476364639876e-06, "epoch": 0.7601382488479262, "percentage": 38.01, "elapsed_time": "4:27:45", "remaining_time": "7:16:45"} +{"current_steps": 3300, "total_steps": 8680, "loss": 0.7365939617156982, "lr": 1.461138340398969e-06, "epoch": 0.7603686635944701, "percentage": 38.02, "elapsed_time": "4:27:50", "remaining_time": "7:16:40"} +{"current_steps": 3301, "total_steps": 8680, "loss": 0.822052001953125, "lr": 1.4608002492244421e-06, "epoch": 0.7605990783410138, "percentage": 38.03, "elapsed_time": "4:27:57", "remaining_time": "7:16:38"} +{"current_steps": 3302, "total_steps": 8680, "loss": 0.7220577001571655, "lr": 1.460462091165369e-06, "epoch": 0.7608294930875577, "percentage": 38.04, "elapsed_time": "4:28:02", "remaining_time": "7:16:33"} +{"current_steps": 3303, "total_steps": 8680, "loss": 0.9795923233032227, "lr": 1.4601238662708332e-06, "epoch": 0.7610599078341014, "percentage": 38.05, "elapsed_time": "4:28:06", "remaining_time": "7:16:27"} +{"current_steps": 3304, "total_steps": 8680, "loss": 0.804523229598999, "lr": 1.4597855745899273e-06, "epoch": 0.7612903225806451, "percentage": 38.06, "elapsed_time": "4:28:10", "remaining_time": "7:16:21"} +{"current_steps": 3305, "total_steps": 8680, "loss": 0.7630297541618347, "lr": 1.4594472161717536e-06, "epoch": 0.761520737327189, "percentage": 38.08, "elapsed_time": "4:28:15", "remaining_time": "7:16:16"} +{"current_steps": 3306, "total_steps": 8680, "loss": 0.7088560461997986, "lr": 1.4591087910654254e-06, "epoch": 0.7617511520737327, "percentage": 38.09, "elapsed_time": "4:28:19", "remaining_time": "7:16:10"} +{"current_steps": 3307, "total_steps": 8680, "loss": 0.6627416014671326, "lr": 1.4587702993200637e-06, "epoch": 0.7619815668202765, "percentage": 38.1, "elapsed_time": "4:28:25", "remaining_time": "7:16:06"} +{"current_steps": 3308, "total_steps": 8680, "loss": 0.7931111454963684, "lr": 1.4584317409848001e-06, "epoch": 0.7622119815668202, "percentage": 38.11, "elapsed_time": "4:28:29", "remaining_time": "7:16:00"} +{"current_steps": 3309, "total_steps": 8680, "loss": 0.8107850551605225, "lr": 1.4580931161087763e-06, "epoch": 0.7624423963133641, "percentage": 38.12, "elapsed_time": "4:28:33", "remaining_time": "7:15:54"} +{"current_steps": 3310, "total_steps": 8680, "loss": 0.8211404085159302, "lr": 1.4577544247411431e-06, "epoch": 0.7626728110599078, "percentage": 38.13, "elapsed_time": "4:28:37", "remaining_time": "7:15:47"} +{"current_steps": 3311, "total_steps": 8680, "loss": 0.9861341714859009, "lr": 1.457415666931061e-06, "epoch": 0.7629032258064516, "percentage": 38.15, "elapsed_time": "4:28:40", "remaining_time": "7:15:40"} +{"current_steps": 3312, "total_steps": 8680, "loss": 0.8963409662246704, "lr": 1.4570768427277007e-06, "epoch": 0.7631336405529954, "percentage": 38.16, "elapsed_time": "4:28:46", "remaining_time": "7:15:37"} +{"current_steps": 3313, "total_steps": 8680, "loss": 0.7510147094726562, "lr": 1.4567379521802416e-06, "epoch": 0.7633640552995392, "percentage": 38.17, "elapsed_time": "4:28:50", "remaining_time": "7:15:31"} +{"current_steps": 3314, "total_steps": 8680, "loss": 0.7761805057525635, "lr": 1.4563989953378734e-06, "epoch": 0.7635944700460829, "percentage": 38.18, "elapsed_time": "4:28:55", "remaining_time": "7:15:26"} +{"current_steps": 3315, "total_steps": 8680, "loss": 0.6202781200408936, "lr": 1.4560599722497953e-06, "epoch": 0.7638248847926268, "percentage": 38.19, "elapsed_time": "4:29:02", "remaining_time": "7:15:24"} +{"current_steps": 3316, "total_steps": 8680, "loss": 0.711891770362854, "lr": 1.4557208829652159e-06, "epoch": 0.7640552995391705, "percentage": 38.2, "elapsed_time": "4:29:06", "remaining_time": "7:15:19"} +{"current_steps": 3317, "total_steps": 8680, "loss": 0.8689517974853516, "lr": 1.4553817275333537e-06, "epoch": 0.7642857142857142, "percentage": 38.21, "elapsed_time": "4:29:11", "remaining_time": "7:15:13"} +{"current_steps": 3318, "total_steps": 8680, "loss": 0.7323688268661499, "lr": 1.4550425060034365e-06, "epoch": 0.7645161290322581, "percentage": 38.23, "elapsed_time": "4:29:15", "remaining_time": "7:15:08"} +{"current_steps": 3319, "total_steps": 8680, "loss": 0.8934407234191895, "lr": 1.4547032184247022e-06, "epoch": 0.7647465437788018, "percentage": 38.24, "elapsed_time": "4:29:20", "remaining_time": "7:15:02"} +{"current_steps": 3320, "total_steps": 8680, "loss": 0.7729885578155518, "lr": 1.4543638648463975e-06, "epoch": 0.7649769585253456, "percentage": 38.25, "elapsed_time": "4:29:25", "remaining_time": "7:14:58"} +{"current_steps": 3321, "total_steps": 8680, "loss": 0.8962388038635254, "lr": 1.454024445317779e-06, "epoch": 0.7652073732718894, "percentage": 38.26, "elapsed_time": "4:29:30", "remaining_time": "7:14:54"} +{"current_steps": 3322, "total_steps": 8680, "loss": 0.8655213117599487, "lr": 1.4536849598881137e-06, "epoch": 0.7654377880184332, "percentage": 38.27, "elapsed_time": "4:29:35", "remaining_time": "7:14:49"} +{"current_steps": 3323, "total_steps": 8680, "loss": 0.6471779346466064, "lr": 1.453345408606677e-06, "epoch": 0.7656682027649769, "percentage": 38.28, "elapsed_time": "4:29:40", "remaining_time": "7:14:44"} +{"current_steps": 3324, "total_steps": 8680, "loss": 0.8665071129798889, "lr": 1.4530057915227545e-06, "epoch": 0.7658986175115208, "percentage": 38.29, "elapsed_time": "4:29:45", "remaining_time": "7:14:40"} +{"current_steps": 3325, "total_steps": 8680, "loss": 0.9504371285438538, "lr": 1.4526661086856407e-06, "epoch": 0.7661290322580645, "percentage": 38.31, "elapsed_time": "4:29:50", "remaining_time": "7:14:34"} +{"current_steps": 3326, "total_steps": 8680, "loss": 0.8122013807296753, "lr": 1.452326360144641e-06, "epoch": 0.7663594470046083, "percentage": 38.32, "elapsed_time": "4:29:54", "remaining_time": "7:14:29"} +{"current_steps": 3327, "total_steps": 8680, "loss": 0.817001223564148, "lr": 1.4519865459490687e-06, "epoch": 0.7665898617511521, "percentage": 38.33, "elapsed_time": "4:30:00", "remaining_time": "7:14:25"} +{"current_steps": 3328, "total_steps": 8680, "loss": 0.732322096824646, "lr": 1.4516466661482474e-06, "epoch": 0.7668202764976959, "percentage": 38.34, "elapsed_time": "4:30:06", "remaining_time": "7:14:23"} +{"current_steps": 3329, "total_steps": 8680, "loss": 0.7961580157279968, "lr": 1.4513067207915106e-06, "epoch": 0.7670506912442396, "percentage": 38.35, "elapsed_time": "4:30:10", "remaining_time": "7:14:17"} +{"current_steps": 3330, "total_steps": 8680, "loss": 0.7660717368125916, "lr": 1.4509667099282007e-06, "epoch": 0.7672811059907834, "percentage": 38.36, "elapsed_time": "4:30:17", "remaining_time": "7:14:14"} +{"current_steps": 3331, "total_steps": 8680, "loss": 0.8279193639755249, "lr": 1.4506266336076698e-06, "epoch": 0.7675115207373272, "percentage": 38.38, "elapsed_time": "4:30:22", "remaining_time": "7:14:10"} +{"current_steps": 3332, "total_steps": 8680, "loss": 0.7050153017044067, "lr": 1.4502864918792796e-06, "epoch": 0.7677419354838709, "percentage": 38.39, "elapsed_time": "4:30:28", "remaining_time": "7:14:07"} +{"current_steps": 3333, "total_steps": 8680, "loss": 0.8146064877510071, "lr": 1.4499462847924013e-06, "epoch": 0.7679723502304148, "percentage": 38.4, "elapsed_time": "4:30:33", "remaining_time": "7:14:02"} +{"current_steps": 3334, "total_steps": 8680, "loss": 0.8300814628601074, "lr": 1.4496060123964153e-06, "epoch": 0.7682027649769585, "percentage": 38.41, "elapsed_time": "4:30:37", "remaining_time": "7:13:57"} +{"current_steps": 3335, "total_steps": 8680, "loss": 0.8240403532981873, "lr": 1.4492656747407117e-06, "epoch": 0.7684331797235023, "percentage": 38.42, "elapsed_time": "4:30:41", "remaining_time": "7:13:50"} +{"current_steps": 3336, "total_steps": 8680, "loss": 0.901625394821167, "lr": 1.4489252718746908e-06, "epoch": 0.7686635944700461, "percentage": 38.43, "elapsed_time": "4:30:45", "remaining_time": "7:13:44"} +{"current_steps": 3337, "total_steps": 8680, "loss": 0.827139675617218, "lr": 1.4485848038477604e-06, "epoch": 0.7688940092165899, "percentage": 38.44, "elapsed_time": "4:30:50", "remaining_time": "7:13:38"} +{"current_steps": 3338, "total_steps": 8680, "loss": 0.7032946348190308, "lr": 1.4482442707093397e-06, "epoch": 0.7691244239631336, "percentage": 38.46, "elapsed_time": "4:30:54", "remaining_time": "7:13:32"} +{"current_steps": 3339, "total_steps": 8680, "loss": 0.6805816888809204, "lr": 1.4479036725088564e-06, "epoch": 0.7693548387096775, "percentage": 38.47, "elapsed_time": "4:30:59", "remaining_time": "7:13:28"} +{"current_steps": 3340, "total_steps": 8680, "loss": 0.673591136932373, "lr": 1.447563009295748e-06, "epoch": 0.7695852534562212, "percentage": 38.48, "elapsed_time": "4:31:04", "remaining_time": "7:13:24"} +{"current_steps": 3341, "total_steps": 8680, "loss": 0.6513386964797974, "lr": 1.4472222811194614e-06, "epoch": 0.7698156682027649, "percentage": 38.49, "elapsed_time": "4:31:10", "remaining_time": "7:13:20"} +{"current_steps": 3342, "total_steps": 8680, "loss": 0.7367297410964966, "lr": 1.4468814880294529e-06, "epoch": 0.7700460829493088, "percentage": 38.5, "elapsed_time": "4:31:16", "remaining_time": "7:13:16"} +{"current_steps": 3343, "total_steps": 8680, "loss": 0.7393670082092285, "lr": 1.4465406300751878e-06, "epoch": 0.7702764976958525, "percentage": 38.51, "elapsed_time": "4:31:21", "remaining_time": "7:13:12"} +{"current_steps": 3344, "total_steps": 8680, "loss": 0.7525930404663086, "lr": 1.4461997073061411e-06, "epoch": 0.7705069124423963, "percentage": 38.53, "elapsed_time": "4:31:25", "remaining_time": "7:13:06"} +{"current_steps": 3345, "total_steps": 8680, "loss": 0.6679942011833191, "lr": 1.445858719771798e-06, "epoch": 0.7707373271889401, "percentage": 38.54, "elapsed_time": "4:31:30", "remaining_time": "7:13:01"} +{"current_steps": 3346, "total_steps": 8680, "loss": 0.8440653085708618, "lr": 1.4455176675216518e-06, "epoch": 0.7709677419354839, "percentage": 38.55, "elapsed_time": "4:31:33", "remaining_time": "7:12:54"} +{"current_steps": 3347, "total_steps": 8680, "loss": 0.8765773177146912, "lr": 1.4451765506052063e-06, "epoch": 0.7711981566820276, "percentage": 38.56, "elapsed_time": "4:31:39", "remaining_time": "7:12:51"} +{"current_steps": 3348, "total_steps": 8680, "loss": 0.7309157848358154, "lr": 1.4448353690719732e-06, "epoch": 0.7714285714285715, "percentage": 38.57, "elapsed_time": "4:31:45", "remaining_time": "7:12:47"} +{"current_steps": 3349, "total_steps": 8680, "loss": 0.8043340444564819, "lr": 1.4444941229714758e-06, "epoch": 0.7716589861751152, "percentage": 38.58, "elapsed_time": "4:31:52", "remaining_time": "7:12:46"} +{"current_steps": 3350, "total_steps": 8680, "loss": 0.6528831124305725, "lr": 1.4441528123532443e-06, "epoch": 0.771889400921659, "percentage": 38.59, "elapsed_time": "4:31:56", "remaining_time": "7:12:40"} +{"current_steps": 3351, "total_steps": 8680, "loss": 0.7973155975341797, "lr": 1.4438114372668202e-06, "epoch": 0.7721198156682028, "percentage": 38.61, "elapsed_time": "4:32:01", "remaining_time": "7:12:34"} +{"current_steps": 3352, "total_steps": 8680, "loss": 0.940142810344696, "lr": 1.443469997761754e-06, "epoch": 0.7723502304147466, "percentage": 38.62, "elapsed_time": "4:32:05", "remaining_time": "7:12:30"} +{"current_steps": 3353, "total_steps": 8680, "loss": 0.7936829328536987, "lr": 1.443128493887604e-06, "epoch": 0.7725806451612903, "percentage": 38.63, "elapsed_time": "4:32:12", "remaining_time": "7:12:27"} +{"current_steps": 3354, "total_steps": 8680, "loss": 0.8369218111038208, "lr": 1.44278692569394e-06, "epoch": 0.772811059907834, "percentage": 38.64, "elapsed_time": "4:32:16", "remaining_time": "7:12:21"} +{"current_steps": 3355, "total_steps": 8680, "loss": 0.9305802583694458, "lr": 1.4424452932303398e-06, "epoch": 0.7730414746543779, "percentage": 38.65, "elapsed_time": "4:32:20", "remaining_time": "7:12:15"} +{"current_steps": 3356, "total_steps": 8680, "loss": 0.913454532623291, "lr": 1.4421035965463916e-06, "epoch": 0.7732718894009216, "percentage": 38.66, "elapsed_time": "4:32:25", "remaining_time": "7:12:11"} +{"current_steps": 3357, "total_steps": 8680, "loss": 0.8552114963531494, "lr": 1.4417618356916912e-06, "epoch": 0.7735023041474655, "percentage": 38.68, "elapsed_time": "4:32:30", "remaining_time": "7:12:06"} +{"current_steps": 3358, "total_steps": 8680, "loss": 0.8674488067626953, "lr": 1.4414200107158452e-06, "epoch": 0.7737327188940092, "percentage": 38.69, "elapsed_time": "4:32:36", "remaining_time": "7:12:02"} +{"current_steps": 3359, "total_steps": 8680, "loss": 0.9142898321151733, "lr": 1.441078121668469e-06, "epoch": 0.773963133640553, "percentage": 38.7, "elapsed_time": "4:32:41", "remaining_time": "7:11:57"} +{"current_steps": 3360, "total_steps": 8680, "loss": 0.8258639574050903, "lr": 1.4407361685991872e-06, "epoch": 0.7741935483870968, "percentage": 38.71, "elapsed_time": "4:32:45", "remaining_time": "7:11:51"} +{"current_steps": 3361, "total_steps": 8680, "loss": 0.773646354675293, "lr": 1.4403941515576343e-06, "epoch": 0.7744239631336406, "percentage": 38.72, "elapsed_time": "4:32:49", "remaining_time": "7:11:46"} +{"current_steps": 3362, "total_steps": 8680, "loss": 0.9481985569000244, "lr": 1.440052070593453e-06, "epoch": 0.7746543778801843, "percentage": 38.73, "elapsed_time": "4:32:54", "remaining_time": "7:11:41"} +{"current_steps": 3363, "total_steps": 8680, "loss": 0.7915977239608765, "lr": 1.4397099257562965e-06, "epoch": 0.7748847926267282, "percentage": 38.74, "elapsed_time": "4:33:00", "remaining_time": "7:11:37"} +{"current_steps": 3364, "total_steps": 8680, "loss": 0.887650191783905, "lr": 1.4393677170958261e-06, "epoch": 0.7751152073732719, "percentage": 38.76, "elapsed_time": "4:33:05", "remaining_time": "7:11:33"} +{"current_steps": 3365, "total_steps": 8680, "loss": 0.8516546487808228, "lr": 1.4390254446617137e-06, "epoch": 0.7753456221198156, "percentage": 38.77, "elapsed_time": "4:33:10", "remaining_time": "7:11:28"} +{"current_steps": 3366, "total_steps": 8680, "loss": 0.8076090812683105, "lr": 1.4386831085036386e-06, "epoch": 0.7755760368663595, "percentage": 38.78, "elapsed_time": "4:33:15", "remaining_time": "7:11:24"} +{"current_steps": 3367, "total_steps": 8680, "loss": 0.7480059862136841, "lr": 1.4383407086712913e-06, "epoch": 0.7758064516129032, "percentage": 38.79, "elapsed_time": "4:33:21", "remaining_time": "7:11:20"} +{"current_steps": 3368, "total_steps": 8680, "loss": 0.8586190938949585, "lr": 1.4379982452143704e-06, "epoch": 0.776036866359447, "percentage": 38.8, "elapsed_time": "4:33:25", "remaining_time": "7:11:14"} +{"current_steps": 3369, "total_steps": 8680, "loss": 0.7581472396850586, "lr": 1.4376557181825842e-06, "epoch": 0.7762672811059907, "percentage": 38.81, "elapsed_time": "4:33:30", "remaining_time": "7:11:10"} +{"current_steps": 3370, "total_steps": 8680, "loss": 0.7482568621635437, "lr": 1.4373131276256495e-06, "epoch": 0.7764976958525346, "percentage": 38.82, "elapsed_time": "4:33:36", "remaining_time": "7:11:06"} +{"current_steps": 3371, "total_steps": 8680, "loss": 0.8822590112686157, "lr": 1.4369704735932935e-06, "epoch": 0.7767281105990783, "percentage": 38.84, "elapsed_time": "4:33:40", "remaining_time": "7:11:01"} +{"current_steps": 3372, "total_steps": 8680, "loss": 0.7762279510498047, "lr": 1.4366277561352517e-06, "epoch": 0.7769585253456222, "percentage": 38.85, "elapsed_time": "4:33:46", "remaining_time": "7:10:57"} +{"current_steps": 3373, "total_steps": 8680, "loss": 0.8059147596359253, "lr": 1.4362849753012692e-06, "epoch": 0.7771889400921659, "percentage": 38.86, "elapsed_time": "4:33:51", "remaining_time": "7:10:53"} +{"current_steps": 3374, "total_steps": 8680, "loss": 0.778538703918457, "lr": 1.4359421311411e-06, "epoch": 0.7774193548387097, "percentage": 38.87, "elapsed_time": "4:33:56", "remaining_time": "7:10:48"} +{"current_steps": 3375, "total_steps": 8680, "loss": 0.9422975778579712, "lr": 1.4355992237045077e-06, "epoch": 0.7776497695852534, "percentage": 38.88, "elapsed_time": "4:33:59", "remaining_time": "7:10:41"} +{"current_steps": 3376, "total_steps": 8680, "loss": 0.7437118291854858, "lr": 1.4352562530412645e-06, "epoch": 0.7778801843317973, "percentage": 38.89, "elapsed_time": "4:34:05", "remaining_time": "7:10:37"} +{"current_steps": 3377, "total_steps": 8680, "loss": 0.6935930252075195, "lr": 1.4349132192011525e-06, "epoch": 0.778110599078341, "percentage": 38.91, "elapsed_time": "4:34:10", "remaining_time": "7:10:32"} +{"current_steps": 3378, "total_steps": 8680, "loss": 0.7797117829322815, "lr": 1.4345701222339628e-06, "epoch": 0.7783410138248847, "percentage": 38.92, "elapsed_time": "4:34:14", "remaining_time": "7:10:27"} +{"current_steps": 3379, "total_steps": 8680, "loss": 0.8795931339263916, "lr": 1.434226962189495e-06, "epoch": 0.7785714285714286, "percentage": 38.93, "elapsed_time": "4:34:19", "remaining_time": "7:10:22"} +{"current_steps": 3380, "total_steps": 8680, "loss": 0.8936992287635803, "lr": 1.433883739117558e-06, "epoch": 0.7788018433179723, "percentage": 38.94, "elapsed_time": "4:34:24", "remaining_time": "7:10:17"} +{"current_steps": 3381, "total_steps": 8680, "loss": 0.9142701625823975, "lr": 1.4335404530679708e-06, "epoch": 0.7790322580645161, "percentage": 38.95, "elapsed_time": "4:34:28", "remaining_time": "7:10:10"} +{"current_steps": 3382, "total_steps": 8680, "loss": 0.8996907472610474, "lr": 1.4331971040905613e-06, "epoch": 0.7792626728110599, "percentage": 38.96, "elapsed_time": "4:34:32", "remaining_time": "7:10:04"} +{"current_steps": 3383, "total_steps": 8680, "loss": 0.9645330905914307, "lr": 1.4328536922351654e-06, "epoch": 0.7794930875576037, "percentage": 38.97, "elapsed_time": "4:34:36", "remaining_time": "7:09:58"} +{"current_steps": 3384, "total_steps": 8680, "loss": 0.5122036933898926, "lr": 1.4325102175516289e-06, "epoch": 0.7797235023041474, "percentage": 38.99, "elapsed_time": "4:34:41", "remaining_time": "7:09:53"} +{"current_steps": 3385, "total_steps": 8680, "loss": 0.6556990742683411, "lr": 1.432166680089807e-06, "epoch": 0.7799539170506913, "percentage": 39.0, "elapsed_time": "4:34:46", "remaining_time": "7:09:49"} +{"current_steps": 3386, "total_steps": 8680, "loss": 0.6642920970916748, "lr": 1.4318230798995634e-06, "epoch": 0.780184331797235, "percentage": 39.01, "elapsed_time": "4:34:51", "remaining_time": "7:09:43"} +{"current_steps": 3387, "total_steps": 8680, "loss": 0.9373915195465088, "lr": 1.4314794170307718e-06, "epoch": 0.7804147465437788, "percentage": 39.02, "elapsed_time": "4:34:54", "remaining_time": "7:09:36"} +{"current_steps": 3388, "total_steps": 8680, "loss": 0.8295063972473145, "lr": 1.4311356915333139e-06, "epoch": 0.7806451612903226, "percentage": 39.03, "elapsed_time": "4:34:59", "remaining_time": "7:09:32"} +{"current_steps": 3389, "total_steps": 8680, "loss": 0.8167035579681396, "lr": 1.4307919034570809e-06, "epoch": 0.7808755760368664, "percentage": 39.04, "elapsed_time": "4:35:03", "remaining_time": "7:09:25"} +{"current_steps": 3390, "total_steps": 8680, "loss": 0.8444087505340576, "lr": 1.4304480528519736e-06, "epoch": 0.7811059907834101, "percentage": 39.06, "elapsed_time": "4:35:07", "remaining_time": "7:09:19"} +{"current_steps": 3391, "total_steps": 8680, "loss": 0.7753941416740417, "lr": 1.4301041397679012e-06, "epoch": 0.7813364055299539, "percentage": 39.07, "elapsed_time": "4:35:12", "remaining_time": "7:09:15"} +{"current_steps": 3392, "total_steps": 8680, "loss": 0.7885915040969849, "lr": 1.4297601642547824e-06, "epoch": 0.7815668202764977, "percentage": 39.08, "elapsed_time": "4:35:16", "remaining_time": "7:09:08"} +{"current_steps": 3393, "total_steps": 8680, "loss": 0.730733335018158, "lr": 1.4294161263625444e-06, "epoch": 0.7817972350230414, "percentage": 39.09, "elapsed_time": "4:35:23", "remaining_time": "7:09:06"} +{"current_steps": 3394, "total_steps": 8680, "loss": 0.8505427837371826, "lr": 1.4290720261411241e-06, "epoch": 0.7820276497695853, "percentage": 39.1, "elapsed_time": "4:35:27", "remaining_time": "7:09:00"} +{"current_steps": 3395, "total_steps": 8680, "loss": 0.7370787858963013, "lr": 1.4287278636404676e-06, "epoch": 0.782258064516129, "percentage": 39.11, "elapsed_time": "4:35:32", "remaining_time": "7:08:56"} +{"current_steps": 3396, "total_steps": 8680, "loss": 0.6776250600814819, "lr": 1.428383638910529e-06, "epoch": 0.7824884792626728, "percentage": 39.12, "elapsed_time": "4:35:38", "remaining_time": "7:08:52"} +{"current_steps": 3397, "total_steps": 8680, "loss": 0.8878101706504822, "lr": 1.4280393520012726e-06, "epoch": 0.7827188940092166, "percentage": 39.14, "elapsed_time": "4:35:44", "remaining_time": "7:08:49"} +{"current_steps": 3398, "total_steps": 8680, "loss": 0.789238691329956, "lr": 1.427695002962671e-06, "epoch": 0.7829493087557604, "percentage": 39.15, "elapsed_time": "4:35:50", "remaining_time": "7:08:46"} +{"current_steps": 3399, "total_steps": 8680, "loss": 0.772524356842041, "lr": 1.4273505918447052e-06, "epoch": 0.7831797235023041, "percentage": 39.16, "elapsed_time": "4:35:54", "remaining_time": "7:08:40"} +{"current_steps": 3400, "total_steps": 8680, "loss": 0.682374119758606, "lr": 1.4270061186973673e-06, "epoch": 0.783410138248848, "percentage": 39.17, "elapsed_time": "4:35:59", "remaining_time": "7:08:35"} +{"current_steps": 3401, "total_steps": 8680, "loss": 0.874775767326355, "lr": 1.4266615835706566e-06, "epoch": 0.7836405529953917, "percentage": 39.18, "elapsed_time": "4:36:07", "remaining_time": "7:08:36"} +{"current_steps": 3402, "total_steps": 8680, "loss": 0.9141736626625061, "lr": 1.4263169865145816e-06, "epoch": 0.7838709677419354, "percentage": 39.19, "elapsed_time": "4:36:13", "remaining_time": "7:08:32"} +{"current_steps": 3403, "total_steps": 8680, "loss": 0.8533145189285278, "lr": 1.4259723275791603e-06, "epoch": 0.7841013824884793, "percentage": 39.21, "elapsed_time": "4:36:18", "remaining_time": "7:08:28"} +{"current_steps": 3404, "total_steps": 8680, "loss": 0.7920266389846802, "lr": 1.4256276068144198e-06, "epoch": 0.784331797235023, "percentage": 39.22, "elapsed_time": "4:36:24", "remaining_time": "7:08:24"} +{"current_steps": 3405, "total_steps": 8680, "loss": 0.7822731733322144, "lr": 1.4252828242703957e-06, "epoch": 0.7845622119815668, "percentage": 39.23, "elapsed_time": "4:36:30", "remaining_time": "7:08:21"} +{"current_steps": 3406, "total_steps": 8680, "loss": 0.7103791832923889, "lr": 1.4249379799971324e-06, "epoch": 0.7847926267281106, "percentage": 39.24, "elapsed_time": "4:36:36", "remaining_time": "7:08:18"} +{"current_steps": 3407, "total_steps": 8680, "loss": 0.7857639789581299, "lr": 1.4245930740446841e-06, "epoch": 0.7850230414746544, "percentage": 39.25, "elapsed_time": "4:36:41", "remaining_time": "7:08:14"} +{"current_steps": 3408, "total_steps": 8680, "loss": 0.8069730997085571, "lr": 1.4242481064631134e-06, "epoch": 0.7852534562211981, "percentage": 39.26, "elapsed_time": "4:36:47", "remaining_time": "7:08:10"} +{"current_steps": 3409, "total_steps": 8680, "loss": 0.8758031129837036, "lr": 1.4239030773024912e-06, "epoch": 0.785483870967742, "percentage": 39.27, "elapsed_time": "4:36:52", "remaining_time": "7:08:05"} +{"current_steps": 3410, "total_steps": 8680, "loss": 0.895712673664093, "lr": 1.4235579866128983e-06, "epoch": 0.7857142857142857, "percentage": 39.29, "elapsed_time": "4:36:58", "remaining_time": "7:08:03"} +{"current_steps": 3411, "total_steps": 8680, "loss": 0.7904561758041382, "lr": 1.423212834444425e-06, "epoch": 0.7859447004608295, "percentage": 39.3, "elapsed_time": "4:37:03", "remaining_time": "7:07:58"} +{"current_steps": 3412, "total_steps": 8680, "loss": 0.9322203993797302, "lr": 1.4228676208471685e-06, "epoch": 0.7861751152073733, "percentage": 39.31, "elapsed_time": "4:37:09", "remaining_time": "7:07:54"} +{"current_steps": 3413, "total_steps": 8680, "loss": 0.9693628549575806, "lr": 1.422522345871237e-06, "epoch": 0.7864055299539171, "percentage": 39.32, "elapsed_time": "4:37:15", "remaining_time": "7:07:51"} +{"current_steps": 3414, "total_steps": 8680, "loss": 0.6737014651298523, "lr": 1.4221770095667462e-06, "epoch": 0.7866359447004608, "percentage": 39.33, "elapsed_time": "4:37:19", "remaining_time": "7:07:45"} +{"current_steps": 3415, "total_steps": 8680, "loss": 0.8682050108909607, "lr": 1.4218316119838215e-06, "epoch": 0.7868663594470046, "percentage": 39.34, "elapsed_time": "4:37:24", "remaining_time": "7:07:40"} +{"current_steps": 3416, "total_steps": 8680, "loss": 0.7920347452163696, "lr": 1.4214861531725966e-06, "epoch": 0.7870967741935484, "percentage": 39.35, "elapsed_time": "4:37:28", "remaining_time": "7:07:34"} +{"current_steps": 3417, "total_steps": 8680, "loss": 0.8330510854721069, "lr": 1.4211406331832144e-06, "epoch": 0.7873271889400921, "percentage": 39.37, "elapsed_time": "4:37:33", "remaining_time": "7:07:29"} +{"current_steps": 3418, "total_steps": 8680, "loss": 0.8314074873924255, "lr": 1.4207950520658272e-06, "epoch": 0.787557603686636, "percentage": 39.38, "elapsed_time": "4:37:37", "remaining_time": "7:07:24"} +{"current_steps": 3419, "total_steps": 8680, "loss": 0.7045331001281738, "lr": 1.420449409870595e-06, "epoch": 0.7877880184331797, "percentage": 39.39, "elapsed_time": "4:37:42", "remaining_time": "7:07:19"} +{"current_steps": 3420, "total_steps": 8680, "loss": 0.7825411558151245, "lr": 1.4201037066476876e-06, "epoch": 0.7880184331797235, "percentage": 39.4, "elapsed_time": "4:37:47", "remaining_time": "7:07:14"} +{"current_steps": 3421, "total_steps": 8680, "loss": 0.6960075497627258, "lr": 1.4197579424472834e-06, "epoch": 0.7882488479262673, "percentage": 39.41, "elapsed_time": "4:37:51", "remaining_time": "7:07:09"} +{"current_steps": 3422, "total_steps": 8680, "loss": 0.8366748094558716, "lr": 1.4194121173195694e-06, "epoch": 0.7884792626728111, "percentage": 39.42, "elapsed_time": "4:37:55", "remaining_time": "7:07:03"} +{"current_steps": 3423, "total_steps": 8680, "loss": 0.8859039545059204, "lr": 1.4190662313147419e-06, "epoch": 0.7887096774193548, "percentage": 39.44, "elapsed_time": "4:38:01", "remaining_time": "7:06:59"} +{"current_steps": 3424, "total_steps": 8680, "loss": 0.7098245620727539, "lr": 1.4187202844830057e-06, "epoch": 0.7889400921658987, "percentage": 39.45, "elapsed_time": "4:38:07", "remaining_time": "7:06:55"} +{"current_steps": 3425, "total_steps": 8680, "loss": 0.7410455942153931, "lr": 1.4183742768745743e-06, "epoch": 0.7891705069124424, "percentage": 39.46, "elapsed_time": "4:38:12", "remaining_time": "7:06:51"} +{"current_steps": 3426, "total_steps": 8680, "loss": 0.7414010763168335, "lr": 1.4180282085396706e-06, "epoch": 0.7894009216589861, "percentage": 39.47, "elapsed_time": "4:38:17", "remaining_time": "7:06:47"} +{"current_steps": 3427, "total_steps": 8680, "loss": 0.9043526649475098, "lr": 1.417682079528526e-06, "epoch": 0.78963133640553, "percentage": 39.48, "elapsed_time": "4:38:21", "remaining_time": "7:06:40"} +{"current_steps": 3428, "total_steps": 8680, "loss": 0.7709499597549438, "lr": 1.4173358898913804e-06, "epoch": 0.7898617511520737, "percentage": 39.49, "elapsed_time": "4:38:27", "remaining_time": "7:06:37"} +{"current_steps": 3429, "total_steps": 8680, "loss": 0.7499940395355225, "lr": 1.416989639678483e-06, "epoch": 0.7900921658986175, "percentage": 39.5, "elapsed_time": "4:38:32", "remaining_time": "7:06:33"} +{"current_steps": 3430, "total_steps": 8680, "loss": 0.7401680946350098, "lr": 1.4166433289400911e-06, "epoch": 0.7903225806451613, "percentage": 39.52, "elapsed_time": "4:38:39", "remaining_time": "7:06:31"} +{"current_steps": 3431, "total_steps": 8680, "loss": 1.0132567882537842, "lr": 1.4162969577264718e-06, "epoch": 0.7905529953917051, "percentage": 39.53, "elapsed_time": "4:38:43", "remaining_time": "7:06:25"} +{"current_steps": 3432, "total_steps": 8680, "loss": 0.8438389301300049, "lr": 1.4159505260879004e-06, "epoch": 0.7907834101382488, "percentage": 39.54, "elapsed_time": "4:38:48", "remaining_time": "7:06:20"} +{"current_steps": 3433, "total_steps": 8680, "loss": 0.9149703979492188, "lr": 1.4156040340746603e-06, "epoch": 0.7910138248847927, "percentage": 39.55, "elapsed_time": "4:38:52", "remaining_time": "7:06:14"} +{"current_steps": 3434, "total_steps": 8680, "loss": 0.9141047596931458, "lr": 1.4152574817370451e-06, "epoch": 0.7912442396313364, "percentage": 39.56, "elapsed_time": "4:38:55", "remaining_time": "7:06:06"} +{"current_steps": 3435, "total_steps": 8680, "loss": 0.6896570324897766, "lr": 1.414910869125356e-06, "epoch": 0.7914746543778802, "percentage": 39.57, "elapsed_time": "4:39:01", "remaining_time": "7:06:03"} +{"current_steps": 3436, "total_steps": 8680, "loss": 0.742916464805603, "lr": 1.4145641962899035e-06, "epoch": 0.791705069124424, "percentage": 39.59, "elapsed_time": "4:39:06", "remaining_time": "7:05:58"} +{"current_steps": 3437, "total_steps": 8680, "loss": 0.9315029382705688, "lr": 1.414217463281007e-06, "epoch": 0.7919354838709678, "percentage": 39.6, "elapsed_time": "4:39:11", "remaining_time": "7:05:53"} +{"current_steps": 3438, "total_steps": 8680, "loss": 0.7645175457000732, "lr": 1.4138706701489942e-06, "epoch": 0.7921658986175115, "percentage": 39.61, "elapsed_time": "4:39:15", "remaining_time": "7:05:48"} +{"current_steps": 3439, "total_steps": 8680, "loss": 0.8253934383392334, "lr": 1.413523816944201e-06, "epoch": 0.7923963133640552, "percentage": 39.62, "elapsed_time": "4:39:20", "remaining_time": "7:05:42"} +{"current_steps": 3440, "total_steps": 8680, "loss": 0.8650136590003967, "lr": 1.4131769037169736e-06, "epoch": 0.7926267281105991, "percentage": 39.63, "elapsed_time": "4:39:25", "remaining_time": "7:05:38"} +{"current_steps": 3441, "total_steps": 8680, "loss": 0.7453975677490234, "lr": 1.4128299305176654e-06, "epoch": 0.7928571428571428, "percentage": 39.64, "elapsed_time": "4:39:30", "remaining_time": "7:05:33"} +{"current_steps": 3442, "total_steps": 8680, "loss": 0.9121813774108887, "lr": 1.4124828973966392e-06, "epoch": 0.7930875576036867, "percentage": 39.65, "elapsed_time": "4:39:35", "remaining_time": "7:05:29"} +{"current_steps": 3443, "total_steps": 8680, "loss": 0.9097952842712402, "lr": 1.4121358044042667e-06, "epoch": 0.7933179723502304, "percentage": 39.67, "elapsed_time": "4:39:40", "remaining_time": "7:05:24"} +{"current_steps": 3444, "total_steps": 8680, "loss": 0.7185770273208618, "lr": 1.4117886515909277e-06, "epoch": 0.7935483870967742, "percentage": 39.68, "elapsed_time": "4:39:45", "remaining_time": "7:05:19"} +{"current_steps": 3445, "total_steps": 8680, "loss": 0.8192715644836426, "lr": 1.4114414390070111e-06, "epoch": 0.793778801843318, "percentage": 39.69, "elapsed_time": "4:39:50", "remaining_time": "7:05:14"} +{"current_steps": 3446, "total_steps": 8680, "loss": 0.7864251136779785, "lr": 1.4110941667029143e-06, "epoch": 0.7940092165898618, "percentage": 39.7, "elapsed_time": "4:39:55", "remaining_time": "7:05:10"} +{"current_steps": 3447, "total_steps": 8680, "loss": 0.7433357834815979, "lr": 1.4107468347290431e-06, "epoch": 0.7942396313364055, "percentage": 39.71, "elapsed_time": "4:40:00", "remaining_time": "7:05:05"} +{"current_steps": 3448, "total_steps": 8680, "loss": 0.8196350336074829, "lr": 1.4103994431358133e-06, "epoch": 0.7944700460829494, "percentage": 39.72, "elapsed_time": "4:40:06", "remaining_time": "7:05:01"} +{"current_steps": 3449, "total_steps": 8680, "loss": 0.7698987126350403, "lr": 1.410051991973647e-06, "epoch": 0.7947004608294931, "percentage": 39.74, "elapsed_time": "4:40:10", "remaining_time": "7:04:56"} +{"current_steps": 3450, "total_steps": 8680, "loss": 0.9404128789901733, "lr": 1.4097044812929776e-06, "epoch": 0.7949308755760369, "percentage": 39.75, "elapsed_time": "4:40:14", "remaining_time": "7:04:49"} +{"current_steps": 3451, "total_steps": 8680, "loss": 0.827290952205658, "lr": 1.4093569111442443e-06, "epoch": 0.7951612903225806, "percentage": 39.76, "elapsed_time": "4:40:19", "remaining_time": "7:04:44"} +{"current_steps": 3452, "total_steps": 8680, "loss": 0.8126389384269714, "lr": 1.4090092815778976e-06, "epoch": 0.7953917050691244, "percentage": 39.77, "elapsed_time": "4:40:22", "remaining_time": "7:04:37"} +{"current_steps": 3453, "total_steps": 8680, "loss": 0.7439650297164917, "lr": 1.4086615926443953e-06, "epoch": 0.7956221198156682, "percentage": 39.78, "elapsed_time": "4:40:29", "remaining_time": "7:04:35"} +{"current_steps": 3454, "total_steps": 8680, "loss": 0.7505590915679932, "lr": 1.4083138443942036e-06, "epoch": 0.7958525345622119, "percentage": 39.79, "elapsed_time": "4:40:33", "remaining_time": "7:04:29"} +{"current_steps": 3455, "total_steps": 8680, "loss": 0.7070168256759644, "lr": 1.407966036877798e-06, "epoch": 0.7960829493087558, "percentage": 39.8, "elapsed_time": "4:40:37", "remaining_time": "7:04:23"} +{"current_steps": 3456, "total_steps": 8680, "loss": 0.8271987438201904, "lr": 1.4076181701456623e-06, "epoch": 0.7963133640552995, "percentage": 39.82, "elapsed_time": "4:40:41", "remaining_time": "7:04:16"} +{"current_steps": 3457, "total_steps": 8680, "loss": 0.72886061668396, "lr": 1.4072702442482886e-06, "epoch": 0.7965437788018433, "percentage": 39.83, "elapsed_time": "4:40:46", "remaining_time": "7:04:13"} +{"current_steps": 3458, "total_steps": 8680, "loss": 0.838603138923645, "lr": 1.4069222592361784e-06, "epoch": 0.7967741935483871, "percentage": 39.84, "elapsed_time": "4:40:50", "remaining_time": "7:04:06"} +{"current_steps": 3459, "total_steps": 8680, "loss": 0.9829634428024292, "lr": 1.4065742151598408e-06, "epoch": 0.7970046082949309, "percentage": 39.85, "elapsed_time": "4:40:55", "remaining_time": "7:04:01"} +{"current_steps": 3460, "total_steps": 8680, "loss": 0.8269632458686829, "lr": 1.406226112069794e-06, "epoch": 0.7972350230414746, "percentage": 39.86, "elapsed_time": "4:41:00", "remaining_time": "7:03:56"} +{"current_steps": 3461, "total_steps": 8680, "loss": 0.7234654426574707, "lr": 1.405877950016565e-06, "epoch": 0.7974654377880185, "percentage": 39.87, "elapsed_time": "4:41:06", "remaining_time": "7:03:54"} +{"current_steps": 3462, "total_steps": 8680, "loss": 0.7843908071517944, "lr": 1.4055297290506887e-06, "epoch": 0.7976958525345622, "percentage": 39.88, "elapsed_time": "4:41:11", "remaining_time": "7:03:48"} +{"current_steps": 3463, "total_steps": 8680, "loss": 0.7294371128082275, "lr": 1.4051814492227094e-06, "epoch": 0.7979262672811059, "percentage": 39.9, "elapsed_time": "4:41:16", "remaining_time": "7:03:43"} +{"current_steps": 3464, "total_steps": 8680, "loss": 0.8805780410766602, "lr": 1.4048331105831787e-06, "epoch": 0.7981566820276498, "percentage": 39.91, "elapsed_time": "4:41:20", "remaining_time": "7:03:38"} +{"current_steps": 3465, "total_steps": 8680, "loss": 0.6933708190917969, "lr": 1.404484713182658e-06, "epoch": 0.7983870967741935, "percentage": 39.92, "elapsed_time": "4:41:25", "remaining_time": "7:03:33"} +{"current_steps": 3466, "total_steps": 8680, "loss": 0.8720458745956421, "lr": 1.404136257071717e-06, "epoch": 0.7986175115207373, "percentage": 39.93, "elapsed_time": "4:41:29", "remaining_time": "7:03:27"} +{"current_steps": 3467, "total_steps": 8680, "loss": 0.7675988674163818, "lr": 1.403787742300933e-06, "epoch": 0.7988479262672811, "percentage": 39.94, "elapsed_time": "4:41:34", "remaining_time": "7:03:23"} +{"current_steps": 3468, "total_steps": 8680, "loss": 0.7630051374435425, "lr": 1.403439168920893e-06, "epoch": 0.7990783410138249, "percentage": 39.95, "elapsed_time": "4:41:38", "remaining_time": "7:03:17"} +{"current_steps": 3469, "total_steps": 8680, "loss": 0.9195173978805542, "lr": 1.4030905369821914e-06, "epoch": 0.7993087557603686, "percentage": 39.97, "elapsed_time": "4:41:42", "remaining_time": "7:03:10"} +{"current_steps": 3470, "total_steps": 8680, "loss": 0.9347431659698486, "lr": 1.402741846535432e-06, "epoch": 0.7995391705069125, "percentage": 39.98, "elapsed_time": "4:41:46", "remaining_time": "7:03:03"} +{"current_steps": 3471, "total_steps": 8680, "loss": 0.7812551259994507, "lr": 1.4023930976312271e-06, "epoch": 0.7997695852534562, "percentage": 39.99, "elapsed_time": "4:41:52", "remaining_time": "7:03:01"} +{"current_steps": 3472, "total_steps": 8680, "loss": 0.7655330896377563, "lr": 1.4020442903201963e-06, "epoch": 0.8, "percentage": 40.0, "elapsed_time": "4:41:56", "remaining_time": "7:02:55"} +{"current_steps": 3473, "total_steps": 8680, "loss": 0.7543904185295105, "lr": 1.4016954246529694e-06, "epoch": 0.8002304147465438, "percentage": 40.01, "elapsed_time": "4:42:01", "remaining_time": "7:02:49"} +{"current_steps": 3474, "total_steps": 8680, "loss": 0.9343980550765991, "lr": 1.4013465006801833e-06, "epoch": 0.8004608294930876, "percentage": 40.02, "elapsed_time": "4:42:06", "remaining_time": "7:02:45"} +{"current_steps": 3475, "total_steps": 8680, "loss": 0.7366182208061218, "lr": 1.4009975184524838e-06, "epoch": 0.8006912442396313, "percentage": 40.03, "elapsed_time": "4:42:11", "remaining_time": "7:02:41"} +{"current_steps": 3476, "total_steps": 8680, "loss": 0.7028899192810059, "lr": 1.4006484780205254e-06, "epoch": 0.8009216589861751, "percentage": 40.05, "elapsed_time": "4:42:17", "remaining_time": "7:02:37"} +{"current_steps": 3477, "total_steps": 8680, "loss": 0.9259153604507446, "lr": 1.4002993794349708e-06, "epoch": 0.8011520737327189, "percentage": 40.06, "elapsed_time": "4:42:21", "remaining_time": "7:02:30"} +{"current_steps": 3478, "total_steps": 8680, "loss": 0.7263842225074768, "lr": 1.3999502227464914e-06, "epoch": 0.8013824884792626, "percentage": 40.07, "elapsed_time": "4:42:25", "remaining_time": "7:02:24"} +{"current_steps": 3479, "total_steps": 8680, "loss": 0.8177748918533325, "lr": 1.3996010080057664e-06, "epoch": 0.8016129032258065, "percentage": 40.08, "elapsed_time": "4:42:29", "remaining_time": "7:02:18"} +{"current_steps": 3480, "total_steps": 8680, "loss": 0.8526895046234131, "lr": 1.3992517352634842e-06, "epoch": 0.8018433179723502, "percentage": 40.09, "elapsed_time": "4:42:33", "remaining_time": "7:02:12"} +{"current_steps": 3481, "total_steps": 8680, "loss": 0.7914575338363647, "lr": 1.398902404570341e-06, "epoch": 0.802073732718894, "percentage": 40.1, "elapsed_time": "4:42:38", "remaining_time": "7:02:08"} +{"current_steps": 3482, "total_steps": 8680, "loss": 0.7546013593673706, "lr": 1.398553015977042e-06, "epoch": 0.8023041474654378, "percentage": 40.12, "elapsed_time": "4:42:42", "remaining_time": "7:02:01"} +{"current_steps": 3483, "total_steps": 8680, "loss": 0.7250038385391235, "lr": 1.3982035695343005e-06, "epoch": 0.8025345622119816, "percentage": 40.13, "elapsed_time": "4:42:47", "remaining_time": "7:01:57"} +{"current_steps": 3484, "total_steps": 8680, "loss": 0.8650141954421997, "lr": 1.3978540652928376e-06, "epoch": 0.8027649769585253, "percentage": 40.14, "elapsed_time": "4:42:52", "remaining_time": "7:01:52"} +{"current_steps": 3485, "total_steps": 8680, "loss": 0.8020066022872925, "lr": 1.3975045033033838e-06, "epoch": 0.8029953917050692, "percentage": 40.15, "elapsed_time": "4:42:57", "remaining_time": "7:01:47"} +{"current_steps": 3486, "total_steps": 8680, "loss": 0.7376772165298462, "lr": 1.3971548836166782e-06, "epoch": 0.8032258064516129, "percentage": 40.16, "elapsed_time": "4:43:02", "remaining_time": "7:01:42"} +{"current_steps": 3487, "total_steps": 8680, "loss": 0.8440769910812378, "lr": 1.3968052062834665e-06, "epoch": 0.8034562211981566, "percentage": 40.17, "elapsed_time": "4:43:06", "remaining_time": "7:01:37"} +{"current_steps": 3488, "total_steps": 8680, "loss": 0.7886836528778076, "lr": 1.3964554713545047e-06, "epoch": 0.8036866359447005, "percentage": 40.18, "elapsed_time": "4:43:14", "remaining_time": "7:01:36"} +{"current_steps": 3489, "total_steps": 8680, "loss": 0.9167575836181641, "lr": 1.396105678880556e-06, "epoch": 0.8039170506912442, "percentage": 40.2, "elapsed_time": "4:43:19", "remaining_time": "7:01:31"} +{"current_steps": 3490, "total_steps": 8680, "loss": 0.6761677861213684, "lr": 1.3957558289123922e-06, "epoch": 0.804147465437788, "percentage": 40.21, "elapsed_time": "4:43:24", "remaining_time": "7:01:27"} +{"current_steps": 3491, "total_steps": 8680, "loss": 0.7775592803955078, "lr": 1.3954059215007938e-06, "epoch": 0.8043778801843318, "percentage": 40.22, "elapsed_time": "4:43:29", "remaining_time": "7:01:23"} +{"current_steps": 3492, "total_steps": 8680, "loss": 0.8127217292785645, "lr": 1.3950559566965494e-06, "epoch": 0.8046082949308756, "percentage": 40.23, "elapsed_time": "4:43:34", "remaining_time": "7:01:17"} +{"current_steps": 3493, "total_steps": 8680, "loss": 0.8134229779243469, "lr": 1.394705934550456e-06, "epoch": 0.8048387096774193, "percentage": 40.24, "elapsed_time": "4:43:39", "remaining_time": "7:01:13"} +{"current_steps": 3494, "total_steps": 8680, "loss": 0.8853167295455933, "lr": 1.3943558551133186e-06, "epoch": 0.8050691244239632, "percentage": 40.25, "elapsed_time": "4:43:43", "remaining_time": "7:01:07"} +{"current_steps": 3495, "total_steps": 8680, "loss": 0.8024332523345947, "lr": 1.3940057184359506e-06, "epoch": 0.8052995391705069, "percentage": 40.26, "elapsed_time": "4:43:48", "remaining_time": "7:01:01"} +{"current_steps": 3496, "total_steps": 8680, "loss": 0.7581099271774292, "lr": 1.3936555245691745e-06, "epoch": 0.8055299539170507, "percentage": 40.28, "elapsed_time": "4:43:54", "remaining_time": "7:00:58"} +{"current_steps": 3497, "total_steps": 8680, "loss": 0.979412317276001, "lr": 1.3933052735638203e-06, "epoch": 0.8057603686635945, "percentage": 40.29, "elapsed_time": "4:43:58", "remaining_time": "7:00:52"} +{"current_steps": 3498, "total_steps": 8680, "loss": 0.7917830944061279, "lr": 1.392954965470726e-06, "epoch": 0.8059907834101383, "percentage": 40.3, "elapsed_time": "4:44:03", "remaining_time": "7:00:47"} +{"current_steps": 3499, "total_steps": 8680, "loss": 0.8565326929092407, "lr": 1.392604600340739e-06, "epoch": 0.806221198156682, "percentage": 40.31, "elapsed_time": "4:44:07", "remaining_time": "7:00:42"} +{"current_steps": 3500, "total_steps": 8680, "loss": 0.7276358604431152, "lr": 1.3922541782247136e-06, "epoch": 0.8064516129032258, "percentage": 40.32, "elapsed_time": "4:44:13", "remaining_time": "7:00:39"} +{"current_steps": 3501, "total_steps": 8680, "loss": 0.734528660774231, "lr": 1.3919036991735138e-06, "epoch": 0.8066820276497696, "percentage": 40.33, "elapsed_time": "4:44:23", "remaining_time": "7:00:41"} +{"current_steps": 3502, "total_steps": 8680, "loss": 0.8786039352416992, "lr": 1.391553163238011e-06, "epoch": 0.8069124423963133, "percentage": 40.35, "elapsed_time": "4:44:29", "remaining_time": "7:00:38"} +{"current_steps": 3503, "total_steps": 8680, "loss": 0.9509482383728027, "lr": 1.3912025704690844e-06, "epoch": 0.8071428571428572, "percentage": 40.36, "elapsed_time": "4:44:33", "remaining_time": "7:00:33"} +{"current_steps": 3504, "total_steps": 8680, "loss": 0.7188615202903748, "lr": 1.3908519209176225e-06, "epoch": 0.8073732718894009, "percentage": 40.37, "elapsed_time": "4:44:39", "remaining_time": "7:00:28"} +{"current_steps": 3505, "total_steps": 8680, "loss": 0.7681115865707397, "lr": 1.3905012146345221e-06, "epoch": 0.8076036866359447, "percentage": 40.38, "elapsed_time": "4:44:45", "remaining_time": "7:00:25"} +{"current_steps": 3506, "total_steps": 8680, "loss": 0.8835415840148926, "lr": 1.3901504516706874e-06, "epoch": 0.8078341013824885, "percentage": 40.39, "elapsed_time": "4:44:49", "remaining_time": "7:00:19"} +{"current_steps": 3507, "total_steps": 8680, "loss": 0.8179003000259399, "lr": 1.389799632077031e-06, "epoch": 0.8080645161290323, "percentage": 40.4, "elapsed_time": "4:44:55", "remaining_time": "7:00:16"} +{"current_steps": 3508, "total_steps": 8680, "loss": 0.9690247774124146, "lr": 1.3894487559044742e-06, "epoch": 0.808294930875576, "percentage": 40.41, "elapsed_time": "4:45:00", "remaining_time": "7:00:11"} +{"current_steps": 3509, "total_steps": 8680, "loss": 0.9759812951087952, "lr": 1.389097823203946e-06, "epoch": 0.8085253456221199, "percentage": 40.43, "elapsed_time": "4:45:04", "remaining_time": "7:00:06"} +{"current_steps": 3510, "total_steps": 8680, "loss": 0.6649112105369568, "lr": 1.3887468340263838e-06, "epoch": 0.8087557603686636, "percentage": 40.44, "elapsed_time": "4:45:11", "remaining_time": "7:00:04"} +{"current_steps": 3511, "total_steps": 8680, "loss": 0.7824583053588867, "lr": 1.388395788422733e-06, "epoch": 0.8089861751152074, "percentage": 40.45, "elapsed_time": "4:45:17", "remaining_time": "7:00:01"} +{"current_steps": 3512, "total_steps": 8680, "loss": 0.8226176500320435, "lr": 1.3880446864439482e-06, "epoch": 0.8092165898617512, "percentage": 40.46, "elapsed_time": "4:45:22", "remaining_time": "6:59:55"} +{"current_steps": 3513, "total_steps": 8680, "loss": 0.7708876729011536, "lr": 1.3876935281409904e-06, "epoch": 0.8094470046082949, "percentage": 40.47, "elapsed_time": "4:45:27", "remaining_time": "6:59:51"} +{"current_steps": 3514, "total_steps": 8680, "loss": 0.7162825465202332, "lr": 1.3873423135648303e-06, "epoch": 0.8096774193548387, "percentage": 40.48, "elapsed_time": "4:45:31", "remaining_time": "6:59:45"} +{"current_steps": 3515, "total_steps": 8680, "loss": 0.815816342830658, "lr": 1.3869910427664464e-06, "epoch": 0.8099078341013825, "percentage": 40.5, "elapsed_time": "4:45:37", "remaining_time": "6:59:42"} +{"current_steps": 3516, "total_steps": 8680, "loss": 0.9166251420974731, "lr": 1.3866397157968248e-06, "epoch": 0.8101382488479263, "percentage": 40.51, "elapsed_time": "4:45:42", "remaining_time": "6:59:37"} +{"current_steps": 3517, "total_steps": 8680, "loss": 0.9193897843360901, "lr": 1.3862883327069606e-06, "epoch": 0.81036866359447, "percentage": 40.52, "elapsed_time": "4:45:47", "remaining_time": "6:59:32"} +{"current_steps": 3518, "total_steps": 8680, "loss": 0.9019489288330078, "lr": 1.3859368935478557e-06, "epoch": 0.8105990783410139, "percentage": 40.53, "elapsed_time": "4:45:51", "remaining_time": "6:59:26"} +{"current_steps": 3519, "total_steps": 8680, "loss": 0.8616153597831726, "lr": 1.3855853983705222e-06, "epoch": 0.8108294930875576, "percentage": 40.54, "elapsed_time": "4:45:56", "remaining_time": "6:59:21"} +{"current_steps": 3520, "total_steps": 8680, "loss": 0.8898462057113647, "lr": 1.3852338472259782e-06, "epoch": 0.8110599078341014, "percentage": 40.55, "elapsed_time": "4:46:02", "remaining_time": "6:59:18"} +{"current_steps": 3521, "total_steps": 8680, "loss": 0.770263135433197, "lr": 1.3848822401652513e-06, "epoch": 0.8112903225806452, "percentage": 40.56, "elapsed_time": "4:46:07", "remaining_time": "6:59:14"} +{"current_steps": 3522, "total_steps": 8680, "loss": 0.7524563074111938, "lr": 1.384530577239377e-06, "epoch": 0.811520737327189, "percentage": 40.58, "elapsed_time": "4:46:13", "remaining_time": "6:59:10"} +{"current_steps": 3523, "total_steps": 8680, "loss": 0.776715874671936, "lr": 1.3841788584993981e-06, "epoch": 0.8117511520737327, "percentage": 40.59, "elapsed_time": "4:46:18", "remaining_time": "6:59:06"} +{"current_steps": 3524, "total_steps": 8680, "loss": 0.7165439128875732, "lr": 1.3838270839963666e-06, "epoch": 0.8119815668202764, "percentage": 40.6, "elapsed_time": "4:46:24", "remaining_time": "6:59:03"} +{"current_steps": 3525, "total_steps": 8680, "loss": 0.7641004323959351, "lr": 1.383475253781342e-06, "epoch": 0.8122119815668203, "percentage": 40.61, "elapsed_time": "4:46:29", "remaining_time": "6:58:57"} +{"current_steps": 3526, "total_steps": 8680, "loss": 0.7493933439254761, "lr": 1.3831233679053921e-06, "epoch": 0.812442396313364, "percentage": 40.62, "elapsed_time": "4:46:33", "remaining_time": "6:58:52"} +{"current_steps": 3527, "total_steps": 8680, "loss": 0.7981607913970947, "lr": 1.3827714264195924e-06, "epoch": 0.8126728110599079, "percentage": 40.63, "elapsed_time": "4:46:38", "remaining_time": "6:58:46"} +{"current_steps": 3528, "total_steps": 8680, "loss": 0.9130103588104248, "lr": 1.3824194293750272e-06, "epoch": 0.8129032258064516, "percentage": 40.65, "elapsed_time": "4:46:42", "remaining_time": "6:58:41"} +{"current_steps": 3529, "total_steps": 8680, "loss": 0.7208644151687622, "lr": 1.3820673768227878e-06, "epoch": 0.8131336405529954, "percentage": 40.66, "elapsed_time": "4:46:48", "remaining_time": "6:58:37"} +{"current_steps": 3530, "total_steps": 8680, "loss": 0.9134006500244141, "lr": 1.3817152688139745e-06, "epoch": 0.8133640552995391, "percentage": 40.67, "elapsed_time": "4:46:54", "remaining_time": "6:58:35"} +{"current_steps": 3531, "total_steps": 8680, "loss": 0.7383376359939575, "lr": 1.381363105399695e-06, "epoch": 0.813594470046083, "percentage": 40.68, "elapsed_time": "4:47:00", "remaining_time": "6:58:31"} +{"current_steps": 3532, "total_steps": 8680, "loss": 0.9143035411834717, "lr": 1.381010886631066e-06, "epoch": 0.8138248847926267, "percentage": 40.69, "elapsed_time": "4:47:05", "remaining_time": "6:58:26"} +{"current_steps": 3533, "total_steps": 8680, "loss": 0.7972506284713745, "lr": 1.3806586125592107e-06, "epoch": 0.8140552995391706, "percentage": 40.7, "elapsed_time": "4:47:10", "remaining_time": "6:58:22"} +{"current_steps": 3534, "total_steps": 8680, "loss": 0.8999859094619751, "lr": 1.380306283235262e-06, "epoch": 0.8142857142857143, "percentage": 40.71, "elapsed_time": "4:47:16", "remaining_time": "6:58:18"} +{"current_steps": 3535, "total_steps": 8680, "loss": 0.7759672999382019, "lr": 1.37995389871036e-06, "epoch": 0.8145161290322581, "percentage": 40.73, "elapsed_time": "4:47:20", "remaining_time": "6:58:12"} +{"current_steps": 3536, "total_steps": 8680, "loss": 0.7915023565292358, "lr": 1.3796014590356522e-06, "epoch": 0.8147465437788018, "percentage": 40.74, "elapsed_time": "4:47:26", "remaining_time": "6:58:09"} +{"current_steps": 3537, "total_steps": 8680, "loss": 0.8259623050689697, "lr": 1.3792489642622956e-06, "epoch": 0.8149769585253456, "percentage": 40.75, "elapsed_time": "4:47:31", "remaining_time": "6:58:05"} +{"current_steps": 3538, "total_steps": 8680, "loss": 0.7786526679992676, "lr": 1.3788964144414534e-06, "epoch": 0.8152073732718894, "percentage": 40.76, "elapsed_time": "4:47:38", "remaining_time": "6:58:02"} +{"current_steps": 3539, "total_steps": 8680, "loss": 0.8655314445495605, "lr": 1.3785438096242987e-06, "epoch": 0.8154377880184331, "percentage": 40.77, "elapsed_time": "4:47:43", "remaining_time": "6:57:58"} +{"current_steps": 3540, "total_steps": 8680, "loss": 0.8116016387939453, "lr": 1.3781911498620108e-06, "epoch": 0.815668202764977, "percentage": 40.78, "elapsed_time": "4:47:49", "remaining_time": "6:57:54"} +{"current_steps": 3541, "total_steps": 8680, "loss": 0.712907075881958, "lr": 1.3778384352057781e-06, "epoch": 0.8158986175115207, "percentage": 40.79, "elapsed_time": "4:47:54", "remaining_time": "6:57:50"} +{"current_steps": 3542, "total_steps": 8680, "loss": 0.8271318674087524, "lr": 1.377485665706797e-06, "epoch": 0.8161290322580645, "percentage": 40.81, "elapsed_time": "4:48:01", "remaining_time": "6:57:47"} +{"current_steps": 3543, "total_steps": 8680, "loss": 0.9161353707313538, "lr": 1.3771328414162713e-06, "epoch": 0.8163594470046083, "percentage": 40.82, "elapsed_time": "4:48:06", "remaining_time": "6:57:43"} +{"current_steps": 3544, "total_steps": 8680, "loss": 0.9555908441543579, "lr": 1.3767799623854125e-06, "epoch": 0.8165898617511521, "percentage": 40.83, "elapsed_time": "4:48:10", "remaining_time": "6:57:37"} +{"current_steps": 3545, "total_steps": 8680, "loss": 0.7863249778747559, "lr": 1.3764270286654414e-06, "epoch": 0.8168202764976958, "percentage": 40.84, "elapsed_time": "4:48:14", "remaining_time": "6:57:31"} +{"current_steps": 3546, "total_steps": 8680, "loss": 0.9086883068084717, "lr": 1.3760740403075853e-06, "epoch": 0.8170506912442397, "percentage": 40.85, "elapsed_time": "4:48:19", "remaining_time": "6:57:27"} +{"current_steps": 3547, "total_steps": 8680, "loss": 0.6231412887573242, "lr": 1.37572099736308e-06, "epoch": 0.8172811059907834, "percentage": 40.86, "elapsed_time": "4:48:24", "remaining_time": "6:57:22"} +{"current_steps": 3548, "total_steps": 8680, "loss": 0.8221716284751892, "lr": 1.3753678998831692e-06, "epoch": 0.8175115207373271, "percentage": 40.88, "elapsed_time": "4:48:29", "remaining_time": "6:57:17"} +{"current_steps": 3549, "total_steps": 8680, "loss": 0.8077783584594727, "lr": 1.375014747919105e-06, "epoch": 0.817741935483871, "percentage": 40.89, "elapsed_time": "4:48:34", "remaining_time": "6:57:13"} +{"current_steps": 3550, "total_steps": 8680, "loss": 0.6882060766220093, "lr": 1.3746615415221463e-06, "epoch": 0.8179723502304147, "percentage": 40.9, "elapsed_time": "4:48:40", "remaining_time": "6:57:09"} +{"current_steps": 3551, "total_steps": 8680, "loss": 0.700161337852478, "lr": 1.3743082807435614e-06, "epoch": 0.8182027649769585, "percentage": 40.91, "elapsed_time": "4:48:45", "remaining_time": "6:57:04"} +{"current_steps": 3552, "total_steps": 8680, "loss": 0.737981915473938, "lr": 1.3739549656346243e-06, "epoch": 0.8184331797235023, "percentage": 40.92, "elapsed_time": "4:48:50", "remaining_time": "6:57:00"} +{"current_steps": 3553, "total_steps": 8680, "loss": 0.8025717735290527, "lr": 1.3736015962466193e-06, "epoch": 0.8186635944700461, "percentage": 40.93, "elapsed_time": "4:48:55", "remaining_time": "6:56:55"} +{"current_steps": 3554, "total_steps": 8680, "loss": 0.8855722546577454, "lr": 1.3732481726308372e-06, "epoch": 0.8188940092165898, "percentage": 40.94, "elapsed_time": "4:49:00", "remaining_time": "6:56:50"} +{"current_steps": 3555, "total_steps": 8680, "loss": 0.819130539894104, "lr": 1.3728946948385768e-06, "epoch": 0.8191244239631337, "percentage": 40.96, "elapsed_time": "4:49:05", "remaining_time": "6:56:45"} +{"current_steps": 3556, "total_steps": 8680, "loss": 0.8419625759124756, "lr": 1.3725411629211454e-06, "epoch": 0.8193548387096774, "percentage": 40.97, "elapsed_time": "4:49:10", "remaining_time": "6:56:40"} +{"current_steps": 3557, "total_steps": 8680, "loss": 0.8478890657424927, "lr": 1.3721875769298575e-06, "epoch": 0.8195852534562212, "percentage": 40.98, "elapsed_time": "4:49:15", "remaining_time": "6:56:36"} +{"current_steps": 3558, "total_steps": 8680, "loss": 0.8654077053070068, "lr": 1.371833936916035e-06, "epoch": 0.819815668202765, "percentage": 40.99, "elapsed_time": "4:49:20", "remaining_time": "6:56:31"} +{"current_steps": 3559, "total_steps": 8680, "loss": 0.8898686170578003, "lr": 1.371480242931009e-06, "epoch": 0.8200460829493088, "percentage": 41.0, "elapsed_time": "4:49:24", "remaining_time": "6:56:25"} +{"current_steps": 3560, "total_steps": 8680, "loss": 0.873773455619812, "lr": 1.3711264950261176e-06, "epoch": 0.8202764976958525, "percentage": 41.01, "elapsed_time": "4:49:29", "remaining_time": "6:56:21"} +{"current_steps": 3561, "total_steps": 8680, "loss": 0.6323572397232056, "lr": 1.3707726932527068e-06, "epoch": 0.8205069124423963, "percentage": 41.03, "elapsed_time": "4:49:35", "remaining_time": "6:56:17"} +{"current_steps": 3562, "total_steps": 8680, "loss": 0.7018281817436218, "lr": 1.3704188376621304e-06, "epoch": 0.8207373271889401, "percentage": 41.04, "elapsed_time": "4:49:42", "remaining_time": "6:56:15"} +{"current_steps": 3563, "total_steps": 8680, "loss": 0.8052775859832764, "lr": 1.37006492830575e-06, "epoch": 0.8209677419354838, "percentage": 41.05, "elapsed_time": "4:49:47", "remaining_time": "6:56:11"} +{"current_steps": 3564, "total_steps": 8680, "loss": 0.8057233095169067, "lr": 1.3697109652349352e-06, "epoch": 0.8211981566820277, "percentage": 41.06, "elapsed_time": "4:49:52", "remaining_time": "6:56:06"} +{"current_steps": 3565, "total_steps": 8680, "loss": 0.8647899627685547, "lr": 1.3693569485010633e-06, "epoch": 0.8214285714285714, "percentage": 41.07, "elapsed_time": "4:49:58", "remaining_time": "6:56:02"} +{"current_steps": 3566, "total_steps": 8680, "loss": 0.8022265434265137, "lr": 1.369002878155519e-06, "epoch": 0.8216589861751152, "percentage": 41.08, "elapsed_time": "4:50:03", "remaining_time": "6:55:58"} +{"current_steps": 3567, "total_steps": 8680, "loss": 0.8534140586853027, "lr": 1.368648754249696e-06, "epoch": 0.821889400921659, "percentage": 41.09, "elapsed_time": "4:50:07", "remaining_time": "6:55:52"} +{"current_steps": 3568, "total_steps": 8680, "loss": 0.905183732509613, "lr": 1.3682945768349935e-06, "epoch": 0.8221198156682028, "percentage": 41.11, "elapsed_time": "4:50:13", "remaining_time": "6:55:49"} +{"current_steps": 3569, "total_steps": 8680, "loss": 0.6096831560134888, "lr": 1.3679403459628215e-06, "epoch": 0.8223502304147465, "percentage": 41.12, "elapsed_time": "4:50:18", "remaining_time": "6:55:44"} +{"current_steps": 3570, "total_steps": 8680, "loss": 0.7220188975334167, "lr": 1.367586061684595e-06, "epoch": 0.8225806451612904, "percentage": 41.13, "elapsed_time": "4:50:24", "remaining_time": "6:55:40"} +{"current_steps": 3571, "total_steps": 8680, "loss": 0.8028903007507324, "lr": 1.3672317240517386e-06, "epoch": 0.8228110599078341, "percentage": 41.14, "elapsed_time": "4:50:29", "remaining_time": "6:55:35"} +{"current_steps": 3572, "total_steps": 8680, "loss": 0.8121141791343689, "lr": 1.3668773331156831e-06, "epoch": 0.8230414746543778, "percentage": 41.15, "elapsed_time": "4:50:34", "remaining_time": "6:55:31"} +{"current_steps": 3573, "total_steps": 8680, "loss": 0.8259282112121582, "lr": 1.3665228889278687e-06, "epoch": 0.8232718894009217, "percentage": 41.16, "elapsed_time": "4:50:39", "remaining_time": "6:55:26"} +{"current_steps": 3574, "total_steps": 8680, "loss": 0.9356029033660889, "lr": 1.3661683915397423e-06, "epoch": 0.8235023041474654, "percentage": 41.18, "elapsed_time": "4:50:45", "remaining_time": "6:55:24"} +{"current_steps": 3575, "total_steps": 8680, "loss": 0.738788366317749, "lr": 1.3658138410027582e-06, "epoch": 0.8237327188940092, "percentage": 41.19, "elapsed_time": "4:50:50", "remaining_time": "6:55:19"} +{"current_steps": 3576, "total_steps": 8680, "loss": 0.7775605320930481, "lr": 1.3654592373683794e-06, "epoch": 0.823963133640553, "percentage": 41.2, "elapsed_time": "4:50:55", "remaining_time": "6:55:14"} +{"current_steps": 3577, "total_steps": 8680, "loss": 0.7645376324653625, "lr": 1.3651045806880766e-06, "epoch": 0.8241935483870968, "percentage": 41.21, "elapsed_time": "4:51:00", "remaining_time": "6:55:09"} +{"current_steps": 3578, "total_steps": 8680, "loss": 0.7713958024978638, "lr": 1.3647498710133272e-06, "epoch": 0.8244239631336405, "percentage": 41.22, "elapsed_time": "4:51:06", "remaining_time": "6:55:05"} +{"current_steps": 3579, "total_steps": 8680, "loss": 0.6920947432518005, "lr": 1.3643951083956165e-06, "epoch": 0.8246543778801844, "percentage": 41.23, "elapsed_time": "4:51:10", "remaining_time": "6:55:00"} +{"current_steps": 3580, "total_steps": 8680, "loss": 0.7108405828475952, "lr": 1.3640402928864382e-06, "epoch": 0.8248847926267281, "percentage": 41.24, "elapsed_time": "4:51:14", "remaining_time": "6:54:54"} +{"current_steps": 3581, "total_steps": 8680, "loss": 0.7879295945167542, "lr": 1.3636854245372936e-06, "epoch": 0.8251152073732719, "percentage": 41.26, "elapsed_time": "4:51:19", "remaining_time": "6:54:48"} +{"current_steps": 3582, "total_steps": 8680, "loss": 0.8173119425773621, "lr": 1.3633305033996909e-06, "epoch": 0.8253456221198157, "percentage": 41.27, "elapsed_time": "4:51:25", "remaining_time": "6:54:45"} +{"current_steps": 3583, "total_steps": 8680, "loss": 0.8530454635620117, "lr": 1.3629755295251466e-06, "epoch": 0.8255760368663595, "percentage": 41.28, "elapsed_time": "4:51:29", "remaining_time": "6:54:40"} +{"current_steps": 3584, "total_steps": 8680, "loss": 0.7749553918838501, "lr": 1.3626205029651846e-06, "epoch": 0.8258064516129032, "percentage": 41.29, "elapsed_time": "4:51:34", "remaining_time": "6:54:34"} +{"current_steps": 3585, "total_steps": 8680, "loss": 0.8313847780227661, "lr": 1.362265423771337e-06, "epoch": 0.826036866359447, "percentage": 41.3, "elapsed_time": "4:51:40", "remaining_time": "6:54:31"} +{"current_steps": 3586, "total_steps": 8680, "loss": 0.7285455465316772, "lr": 1.3619102919951424e-06, "epoch": 0.8262672811059908, "percentage": 41.31, "elapsed_time": "4:51:44", "remaining_time": "6:54:25"} +{"current_steps": 3587, "total_steps": 8680, "loss": 0.8084003925323486, "lr": 1.361555107688148e-06, "epoch": 0.8264976958525345, "percentage": 41.32, "elapsed_time": "4:51:48", "remaining_time": "6:54:20"} +{"current_steps": 3588, "total_steps": 8680, "loss": 0.8506543040275574, "lr": 1.3611998709019088e-06, "epoch": 0.8267281105990784, "percentage": 41.34, "elapsed_time": "4:51:54", "remaining_time": "6:54:16"} +{"current_steps": 3589, "total_steps": 8680, "loss": 0.8320293426513672, "lr": 1.3608445816879864e-06, "epoch": 0.8269585253456221, "percentage": 41.35, "elapsed_time": "4:51:59", "remaining_time": "6:54:11"} +{"current_steps": 3590, "total_steps": 8680, "loss": 0.8116205930709839, "lr": 1.3604892400979501e-06, "epoch": 0.8271889400921659, "percentage": 41.36, "elapsed_time": "4:52:04", "remaining_time": "6:54:06"} +{"current_steps": 3591, "total_steps": 8680, "loss": 0.8317450284957886, "lr": 1.3601338461833785e-06, "epoch": 0.8274193548387097, "percentage": 41.37, "elapsed_time": "4:52:10", "remaining_time": "6:54:02"} +{"current_steps": 3592, "total_steps": 8680, "loss": 0.7348642349243164, "lr": 1.3597783999958553e-06, "epoch": 0.8276497695852535, "percentage": 41.38, "elapsed_time": "4:52:14", "remaining_time": "6:53:57"} +{"current_steps": 3593, "total_steps": 8680, "loss": 0.8087270259857178, "lr": 1.359422901586974e-06, "epoch": 0.8278801843317972, "percentage": 41.39, "elapsed_time": "4:52:19", "remaining_time": "6:53:53"} +{"current_steps": 3594, "total_steps": 8680, "loss": 0.7964637875556946, "lr": 1.3590673510083345e-06, "epoch": 0.8281105990783411, "percentage": 41.41, "elapsed_time": "4:52:24", "remaining_time": "6:53:48"} +{"current_steps": 3595, "total_steps": 8680, "loss": 0.6192176342010498, "lr": 1.358711748311544e-06, "epoch": 0.8283410138248848, "percentage": 41.42, "elapsed_time": "4:52:30", "remaining_time": "6:53:44"} +{"current_steps": 3596, "total_steps": 8680, "loss": 0.7735739946365356, "lr": 1.3583560935482182e-06, "epoch": 0.8285714285714286, "percentage": 41.43, "elapsed_time": "4:52:34", "remaining_time": "6:53:39"} +{"current_steps": 3597, "total_steps": 8680, "loss": 0.7965315580368042, "lr": 1.35800038676998e-06, "epoch": 0.8288018433179724, "percentage": 41.44, "elapsed_time": "4:52:39", "remaining_time": "6:53:34"} +{"current_steps": 3598, "total_steps": 8680, "loss": 0.6489244699478149, "lr": 1.3576446280284595e-06, "epoch": 0.8290322580645161, "percentage": 41.45, "elapsed_time": "4:52:46", "remaining_time": "6:53:32"} +{"current_steps": 3599, "total_steps": 8680, "loss": 0.8073695302009583, "lr": 1.3572888173752946e-06, "epoch": 0.8292626728110599, "percentage": 41.46, "elapsed_time": "4:52:51", "remaining_time": "6:53:27"} +{"current_steps": 3600, "total_steps": 8680, "loss": 0.7925900816917419, "lr": 1.3569329548621309e-06, "epoch": 0.8294930875576036, "percentage": 41.47, "elapsed_time": "4:52:57", "remaining_time": "6:53:23"} +{"current_steps": 3601, "total_steps": 8680, "loss": 0.83954918384552, "lr": 1.356577040540621e-06, "epoch": 0.8297235023041475, "percentage": 41.49, "elapsed_time": "4:53:04", "remaining_time": "6:53:21"} +{"current_steps": 3602, "total_steps": 8680, "loss": 0.6384706497192383, "lr": 1.356221074462426e-06, "epoch": 0.8299539170506912, "percentage": 41.5, "elapsed_time": "4:53:10", "remaining_time": "6:53:18"} +{"current_steps": 3603, "total_steps": 8680, "loss": 0.8308184146881104, "lr": 1.3558650566792136e-06, "epoch": 0.830184331797235, "percentage": 41.51, "elapsed_time": "4:53:14", "remaining_time": "6:53:12"} +{"current_steps": 3604, "total_steps": 8680, "loss": 0.7972864508628845, "lr": 1.3555089872426596e-06, "epoch": 0.8304147465437788, "percentage": 41.52, "elapsed_time": "4:53:18", "remaining_time": "6:53:06"} +{"current_steps": 3605, "total_steps": 8680, "loss": 0.8038849830627441, "lr": 1.3551528662044463e-06, "epoch": 0.8306451612903226, "percentage": 41.53, "elapsed_time": "4:53:23", "remaining_time": "6:53:01"} +{"current_steps": 3606, "total_steps": 8680, "loss": 0.7735980749130249, "lr": 1.3547966936162646e-06, "epoch": 0.8308755760368663, "percentage": 41.54, "elapsed_time": "4:53:29", "remaining_time": "6:52:58"} +{"current_steps": 3607, "total_steps": 8680, "loss": 0.7717504501342773, "lr": 1.354440469529813e-06, "epoch": 0.8311059907834102, "percentage": 41.56, "elapsed_time": "4:53:34", "remaining_time": "6:52:53"} +{"current_steps": 3608, "total_steps": 8680, "loss": 0.9405615329742432, "lr": 1.3540841939967962e-06, "epoch": 0.8313364055299539, "percentage": 41.57, "elapsed_time": "4:53:39", "remaining_time": "6:52:48"} +{"current_steps": 3609, "total_steps": 8680, "loss": 0.7730603814125061, "lr": 1.3537278670689273e-06, "epoch": 0.8315668202764976, "percentage": 41.58, "elapsed_time": "4:53:43", "remaining_time": "6:52:42"} +{"current_steps": 3610, "total_steps": 8680, "loss": 0.8677463531494141, "lr": 1.353371488797927e-06, "epoch": 0.8317972350230415, "percentage": 41.59, "elapsed_time": "4:53:48", "remaining_time": "6:52:37"} +{"current_steps": 3611, "total_steps": 8680, "loss": 0.8261700868606567, "lr": 1.3530150592355227e-06, "epoch": 0.8320276497695852, "percentage": 41.6, "elapsed_time": "4:53:53", "remaining_time": "6:52:33"} +{"current_steps": 3612, "total_steps": 8680, "loss": 0.6799050569534302, "lr": 1.35265857843345e-06, "epoch": 0.832258064516129, "percentage": 41.61, "elapsed_time": "4:53:59", "remaining_time": "6:52:30"} +{"current_steps": 3613, "total_steps": 8680, "loss": 0.9117664098739624, "lr": 1.3523020464434514e-06, "epoch": 0.8324884792626728, "percentage": 41.62, "elapsed_time": "4:54:05", "remaining_time": "6:52:26"} +{"current_steps": 3614, "total_steps": 8680, "loss": 0.8637168407440186, "lr": 1.3519454633172771e-06, "epoch": 0.8327188940092166, "percentage": 41.64, "elapsed_time": "4:54:10", "remaining_time": "6:52:21"} +{"current_steps": 3615, "total_steps": 8680, "loss": 0.8169793486595154, "lr": 1.3515888291066848e-06, "epoch": 0.8329493087557603, "percentage": 41.65, "elapsed_time": "4:54:17", "remaining_time": "6:52:19"} +{"current_steps": 3616, "total_steps": 8680, "loss": 0.6901019811630249, "lr": 1.3512321438634392e-06, "epoch": 0.8331797235023042, "percentage": 41.66, "elapsed_time": "4:54:23", "remaining_time": "6:52:16"} +{"current_steps": 3617, "total_steps": 8680, "loss": 0.868461012840271, "lr": 1.3508754076393133e-06, "epoch": 0.8334101382488479, "percentage": 41.67, "elapsed_time": "4:54:28", "remaining_time": "6:52:11"} +{"current_steps": 3618, "total_steps": 8680, "loss": 0.7916195392608643, "lr": 1.3505186204860864e-06, "epoch": 0.8336405529953917, "percentage": 41.68, "elapsed_time": "4:54:33", "remaining_time": "6:52:07"} +{"current_steps": 3619, "total_steps": 8680, "loss": 0.7078498601913452, "lr": 1.3501617824555456e-06, "epoch": 0.8338709677419355, "percentage": 41.69, "elapsed_time": "4:54:38", "remaining_time": "6:52:02"} +{"current_steps": 3620, "total_steps": 8680, "loss": 0.890669584274292, "lr": 1.3498048935994857e-06, "epoch": 0.8341013824884793, "percentage": 41.71, "elapsed_time": "4:54:42", "remaining_time": "6:51:55"} +{"current_steps": 3621, "total_steps": 8680, "loss": 0.8162761926651001, "lr": 1.3494479539697087e-06, "epoch": 0.834331797235023, "percentage": 41.72, "elapsed_time": "4:54:47", "remaining_time": "6:51:51"} +{"current_steps": 3622, "total_steps": 8680, "loss": 0.7743235230445862, "lr": 1.3490909636180233e-06, "epoch": 0.8345622119815668, "percentage": 41.73, "elapsed_time": "4:54:52", "remaining_time": "6:51:46"} +{"current_steps": 3623, "total_steps": 8680, "loss": 0.8297950029373169, "lr": 1.3487339225962472e-06, "epoch": 0.8347926267281106, "percentage": 41.74, "elapsed_time": "4:54:56", "remaining_time": "6:51:40"} +{"current_steps": 3624, "total_steps": 8680, "loss": 0.9550352692604065, "lr": 1.3483768309562035e-06, "epoch": 0.8350230414746543, "percentage": 41.75, "elapsed_time": "4:55:00", "remaining_time": "6:51:34"} +{"current_steps": 3625, "total_steps": 8680, "loss": 0.7343823909759521, "lr": 1.3480196887497242e-06, "epoch": 0.8352534562211982, "percentage": 41.76, "elapsed_time": "4:55:05", "remaining_time": "6:51:29"} +{"current_steps": 3626, "total_steps": 8680, "loss": 0.8942683935165405, "lr": 1.3476624960286479e-06, "epoch": 0.8354838709677419, "percentage": 41.77, "elapsed_time": "4:55:10", "remaining_time": "6:51:25"} +{"current_steps": 3627, "total_steps": 8680, "loss": 0.778289794921875, "lr": 1.34730525284482e-06, "epoch": 0.8357142857142857, "percentage": 41.79, "elapsed_time": "4:55:13", "remaining_time": "6:51:18"} +{"current_steps": 3628, "total_steps": 8680, "loss": 0.5924088954925537, "lr": 1.3469479592500951e-06, "epoch": 0.8359447004608295, "percentage": 41.8, "elapsed_time": "4:55:19", "remaining_time": "6:51:14"} +{"current_steps": 3629, "total_steps": 8680, "loss": 1.0363706350326538, "lr": 1.3465906152963329e-06, "epoch": 0.8361751152073733, "percentage": 41.81, "elapsed_time": "4:55:22", "remaining_time": "6:51:07"} +{"current_steps": 3630, "total_steps": 8680, "loss": 0.7927669286727905, "lr": 1.346233221035402e-06, "epoch": 0.836405529953917, "percentage": 41.82, "elapsed_time": "4:55:27", "remaining_time": "6:51:01"} +{"current_steps": 3631, "total_steps": 8680, "loss": 0.8428707718849182, "lr": 1.345875776519177e-06, "epoch": 0.8366359447004609, "percentage": 41.83, "elapsed_time": "4:55:32", "remaining_time": "6:50:56"} +{"current_steps": 3632, "total_steps": 8680, "loss": 0.7975403070449829, "lr": 1.345518281799541e-06, "epoch": 0.8368663594470046, "percentage": 41.84, "elapsed_time": "4:55:36", "remaining_time": "6:50:50"} +{"current_steps": 3633, "total_steps": 8680, "loss": 0.8383880853652954, "lr": 1.3451607369283842e-06, "epoch": 0.8370967741935483, "percentage": 41.85, "elapsed_time": "4:55:40", "remaining_time": "6:50:45"} +{"current_steps": 3634, "total_steps": 8680, "loss": 0.9033386707305908, "lr": 1.3448031419576028e-06, "epoch": 0.8373271889400922, "percentage": 41.87, "elapsed_time": "4:55:46", "remaining_time": "6:50:42"} +{"current_steps": 3635, "total_steps": 8680, "loss": 0.8913514018058777, "lr": 1.3444454969391021e-06, "epoch": 0.8375576036866359, "percentage": 41.88, "elapsed_time": "4:55:51", "remaining_time": "6:50:37"} +{"current_steps": 3636, "total_steps": 8680, "loss": 0.9051915407180786, "lr": 1.3440878019247936e-06, "epoch": 0.8377880184331797, "percentage": 41.89, "elapsed_time": "4:55:56", "remaining_time": "6:50:32"} +{"current_steps": 3637, "total_steps": 8680, "loss": 0.8240993618965149, "lr": 1.343730056966596e-06, "epoch": 0.8380184331797235, "percentage": 41.9, "elapsed_time": "4:56:01", "remaining_time": "6:50:27"} +{"current_steps": 3638, "total_steps": 8680, "loss": 0.8276345133781433, "lr": 1.3433722621164358e-06, "epoch": 0.8382488479262673, "percentage": 41.91, "elapsed_time": "4:56:06", "remaining_time": "6:50:23"} +{"current_steps": 3639, "total_steps": 8680, "loss": 0.8250508904457092, "lr": 1.343014417426246e-06, "epoch": 0.838479262672811, "percentage": 41.92, "elapsed_time": "4:56:11", "remaining_time": "6:50:18"} +{"current_steps": 3640, "total_steps": 8680, "loss": 0.7872868180274963, "lr": 1.342656522947968e-06, "epoch": 0.8387096774193549, "percentage": 41.94, "elapsed_time": "4:56:16", "remaining_time": "6:50:13"} +{"current_steps": 3641, "total_steps": 8680, "loss": 0.7634146809577942, "lr": 1.3422985787335491e-06, "epoch": 0.8389400921658986, "percentage": 41.95, "elapsed_time": "4:56:20", "remaining_time": "6:50:07"} +{"current_steps": 3642, "total_steps": 8680, "loss": 0.63923180103302, "lr": 1.3419405848349448e-06, "epoch": 0.8391705069124424, "percentage": 41.96, "elapsed_time": "4:56:24", "remaining_time": "6:50:02"} +{"current_steps": 3643, "total_steps": 8680, "loss": 0.900942325592041, "lr": 1.3415825413041173e-06, "epoch": 0.8394009216589862, "percentage": 41.97, "elapsed_time": "4:56:29", "remaining_time": "6:49:57"} +{"current_steps": 3644, "total_steps": 8680, "loss": 0.6415199041366577, "lr": 1.341224448193036e-06, "epoch": 0.83963133640553, "percentage": 41.98, "elapsed_time": "4:56:36", "remaining_time": "6:49:54"} +{"current_steps": 3645, "total_steps": 8680, "loss": 0.7750275135040283, "lr": 1.3408663055536775e-06, "epoch": 0.8398617511520737, "percentage": 41.99, "elapsed_time": "4:56:41", "remaining_time": "6:49:50"} +{"current_steps": 3646, "total_steps": 8680, "loss": 0.8159983158111572, "lr": 1.3405081134380264e-06, "epoch": 0.8400921658986175, "percentage": 42.0, "elapsed_time": "4:56:47", "remaining_time": "6:49:46"} +{"current_steps": 3647, "total_steps": 8680, "loss": 0.6870952844619751, "lr": 1.3401498718980733e-06, "epoch": 0.8403225806451613, "percentage": 42.02, "elapsed_time": "4:56:53", "remaining_time": "6:49:43"} +{"current_steps": 3648, "total_steps": 8680, "loss": 0.8588749170303345, "lr": 1.3397915809858168e-06, "epoch": 0.840552995391705, "percentage": 42.03, "elapsed_time": "4:56:56", "remaining_time": "6:49:36"} +{"current_steps": 3649, "total_steps": 8680, "loss": 0.6926778554916382, "lr": 1.3394332407532619e-06, "epoch": 0.8407834101382489, "percentage": 42.04, "elapsed_time": "4:57:01", "remaining_time": "6:49:30"} +{"current_steps": 3650, "total_steps": 8680, "loss": 0.7165309190750122, "lr": 1.3390748512524213e-06, "epoch": 0.8410138248847926, "percentage": 42.05, "elapsed_time": "4:57:06", "remaining_time": "6:49:25"} +{"current_steps": 3651, "total_steps": 8680, "loss": 0.7782741189002991, "lr": 1.3387164125353149e-06, "epoch": 0.8412442396313364, "percentage": 42.06, "elapsed_time": "4:57:11", "remaining_time": "6:49:22"} +{"current_steps": 3652, "total_steps": 8680, "loss": 0.9153795838356018, "lr": 1.3383579246539698e-06, "epoch": 0.8414746543778802, "percentage": 42.07, "elapsed_time": "4:57:16", "remaining_time": "6:49:16"} +{"current_steps": 3653, "total_steps": 8680, "loss": 0.8419643044471741, "lr": 1.33799938766042e-06, "epoch": 0.841705069124424, "percentage": 42.09, "elapsed_time": "4:57:20", "remaining_time": "6:49:11"} +{"current_steps": 3654, "total_steps": 8680, "loss": 0.6927728652954102, "lr": 1.3376408016067064e-06, "epoch": 0.8419354838709677, "percentage": 42.1, "elapsed_time": "4:57:24", "remaining_time": "6:49:05"} +{"current_steps": 3655, "total_steps": 8680, "loss": 0.7721414566040039, "lr": 1.3372821665448774e-06, "epoch": 0.8421658986175116, "percentage": 42.11, "elapsed_time": "4:57:29", "remaining_time": "6:49:00"} +{"current_steps": 3656, "total_steps": 8680, "loss": 0.7277967929840088, "lr": 1.3369234825269887e-06, "epoch": 0.8423963133640553, "percentage": 42.12, "elapsed_time": "4:57:35", "remaining_time": "6:48:56"} +{"current_steps": 3657, "total_steps": 8680, "loss": 0.7764936089515686, "lr": 1.336564749605102e-06, "epoch": 0.8426267281105991, "percentage": 42.13, "elapsed_time": "4:57:41", "remaining_time": "6:48:52"} +{"current_steps": 3658, "total_steps": 8680, "loss": 0.7445545196533203, "lr": 1.336205967831288e-06, "epoch": 0.8428571428571429, "percentage": 42.14, "elapsed_time": "4:57:45", "remaining_time": "6:48:47"} +{"current_steps": 3659, "total_steps": 8680, "loss": 0.8359465599060059, "lr": 1.3358471372576227e-06, "epoch": 0.8430875576036866, "percentage": 42.15, "elapsed_time": "4:57:50", "remaining_time": "6:48:42"} +{"current_steps": 3660, "total_steps": 8680, "loss": 0.8634141683578491, "lr": 1.33548825793619e-06, "epoch": 0.8433179723502304, "percentage": 42.17, "elapsed_time": "4:57:54", "remaining_time": "6:48:36"} +{"current_steps": 3661, "total_steps": 8680, "loss": 0.7365708351135254, "lr": 1.3351293299190804e-06, "epoch": 0.8435483870967742, "percentage": 42.18, "elapsed_time": "4:58:00", "remaining_time": "6:48:33"} +{"current_steps": 3662, "total_steps": 8680, "loss": 0.7135465145111084, "lr": 1.3347703532583927e-06, "epoch": 0.843778801843318, "percentage": 42.19, "elapsed_time": "4:58:07", "remaining_time": "6:48:30"} +{"current_steps": 3663, "total_steps": 8680, "loss": 0.7411447763442993, "lr": 1.3344113280062313e-06, "epoch": 0.8440092165898617, "percentage": 42.2, "elapsed_time": "4:58:12", "remaining_time": "6:48:26"} +{"current_steps": 3664, "total_steps": 8680, "loss": 0.7765100002288818, "lr": 1.3340522542147081e-06, "epoch": 0.8442396313364056, "percentage": 42.21, "elapsed_time": "4:58:16", "remaining_time": "6:48:20"} +{"current_steps": 3665, "total_steps": 8680, "loss": 0.7638096809387207, "lr": 1.3336931319359426e-06, "epoch": 0.8444700460829493, "percentage": 42.22, "elapsed_time": "4:58:21", "remaining_time": "6:48:15"} +{"current_steps": 3666, "total_steps": 8680, "loss": 0.7114577889442444, "lr": 1.3333339612220606e-06, "epoch": 0.8447004608294931, "percentage": 42.24, "elapsed_time": "4:58:27", "remaining_time": "6:48:12"} +{"current_steps": 3667, "total_steps": 8680, "loss": 0.8702960014343262, "lr": 1.3329747421251955e-06, "epoch": 0.8449308755760369, "percentage": 42.25, "elapsed_time": "4:58:31", "remaining_time": "6:48:05"} +{"current_steps": 3668, "total_steps": 8680, "loss": 0.7248300313949585, "lr": 1.3326154746974878e-06, "epoch": 0.8451612903225807, "percentage": 42.26, "elapsed_time": "4:58:38", "remaining_time": "6:48:03"} +{"current_steps": 3669, "total_steps": 8680, "loss": 0.7648389339447021, "lr": 1.332256158991084e-06, "epoch": 0.8453917050691244, "percentage": 42.27, "elapsed_time": "4:58:43", "remaining_time": "6:47:58"} +{"current_steps": 3670, "total_steps": 8680, "loss": 0.7075401544570923, "lr": 1.3318967950581383e-06, "epoch": 0.8456221198156681, "percentage": 42.28, "elapsed_time": "4:58:48", "remaining_time": "6:47:53"} +{"current_steps": 3671, "total_steps": 8680, "loss": 0.6923220157623291, "lr": 1.3315373829508122e-06, "epoch": 0.845852534562212, "percentage": 42.29, "elapsed_time": "4:58:54", "remaining_time": "6:47:51"} +{"current_steps": 3672, "total_steps": 8680, "loss": 0.7522361874580383, "lr": 1.3311779227212742e-06, "epoch": 0.8460829493087557, "percentage": 42.3, "elapsed_time": "4:58:59", "remaining_time": "6:47:45"} +{"current_steps": 3673, "total_steps": 8680, "loss": 0.7087293863296509, "lr": 1.3308184144216989e-06, "epoch": 0.8463133640552996, "percentage": 42.32, "elapsed_time": "4:59:04", "remaining_time": "6:47:41"} +{"current_steps": 3674, "total_steps": 8680, "loss": 0.782098650932312, "lr": 1.3304588581042688e-06, "epoch": 0.8465437788018433, "percentage": 42.33, "elapsed_time": "4:59:09", "remaining_time": "6:47:36"} +{"current_steps": 3675, "total_steps": 8680, "loss": 0.7671197652816772, "lr": 1.330099253821173e-06, "epoch": 0.8467741935483871, "percentage": 42.34, "elapsed_time": "4:59:13", "remaining_time": "6:47:30"} +{"current_steps": 3676, "total_steps": 8680, "loss": 0.8098698258399963, "lr": 1.3297396016246073e-06, "epoch": 0.8470046082949308, "percentage": 42.35, "elapsed_time": "4:59:18", "remaining_time": "6:47:26"} +{"current_steps": 3677, "total_steps": 8680, "loss": 0.7671023011207581, "lr": 1.3293799015667751e-06, "epoch": 0.8472350230414747, "percentage": 42.36, "elapsed_time": "4:59:23", "remaining_time": "6:47:21"} +{"current_steps": 3678, "total_steps": 8680, "loss": 0.7448668479919434, "lr": 1.3290201536998862e-06, "epoch": 0.8474654377880184, "percentage": 42.37, "elapsed_time": "4:59:28", "remaining_time": "6:47:17"} +{"current_steps": 3679, "total_steps": 8680, "loss": 0.946117639541626, "lr": 1.3286603580761576e-06, "epoch": 0.8476958525345623, "percentage": 42.38, "elapsed_time": "4:59:32", "remaining_time": "6:47:10"} +{"current_steps": 3680, "total_steps": 8680, "loss": 0.8134163618087769, "lr": 1.328300514747813e-06, "epoch": 0.847926267281106, "percentage": 42.4, "elapsed_time": "4:59:37", "remaining_time": "6:47:05"} +{"current_steps": 3681, "total_steps": 8680, "loss": 0.725477933883667, "lr": 1.327940623767083e-06, "epoch": 0.8481566820276498, "percentage": 42.41, "elapsed_time": "4:59:41", "remaining_time": "6:46:59"} +{"current_steps": 3682, "total_steps": 8680, "loss": 0.8278200626373291, "lr": 1.3275806851862061e-06, "epoch": 0.8483870967741935, "percentage": 42.42, "elapsed_time": "4:59:45", "remaining_time": "6:46:53"} +{"current_steps": 3683, "total_steps": 8680, "loss": 0.8437181711196899, "lr": 1.327220699057426e-06, "epoch": 0.8486175115207373, "percentage": 42.43, "elapsed_time": "4:59:51", "remaining_time": "6:46:50"} +{"current_steps": 3684, "total_steps": 8680, "loss": 0.8921856880187988, "lr": 1.326860665432995e-06, "epoch": 0.8488479262672811, "percentage": 42.44, "elapsed_time": "4:59:57", "remaining_time": "6:46:46"} +{"current_steps": 3685, "total_steps": 8680, "loss": 0.7285119295120239, "lr": 1.326500584365171e-06, "epoch": 0.8490783410138248, "percentage": 42.45, "elapsed_time": "5:00:02", "remaining_time": "6:46:41"} +{"current_steps": 3686, "total_steps": 8680, "loss": 0.8968918323516846, "lr": 1.3261404559062196e-06, "epoch": 0.8493087557603687, "percentage": 42.47, "elapsed_time": "5:00:07", "remaining_time": "6:46:37"} +{"current_steps": 3687, "total_steps": 8680, "loss": 0.6794285774230957, "lr": 1.3257802801084123e-06, "epoch": 0.8495391705069124, "percentage": 42.48, "elapsed_time": "5:00:12", "remaining_time": "6:46:33"} +{"current_steps": 3688, "total_steps": 8680, "loss": 0.869774341583252, "lr": 1.3254200570240291e-06, "epoch": 0.8497695852534562, "percentage": 42.49, "elapsed_time": "5:00:18", "remaining_time": "6:46:29"} +{"current_steps": 3689, "total_steps": 8680, "loss": 0.7862332463264465, "lr": 1.3250597867053553e-06, "epoch": 0.85, "percentage": 42.5, "elapsed_time": "5:00:22", "remaining_time": "6:46:24"} +{"current_steps": 3690, "total_steps": 8680, "loss": 0.8424299955368042, "lr": 1.3246994692046835e-06, "epoch": 0.8502304147465438, "percentage": 42.51, "elapsed_time": "5:00:27", "remaining_time": "6:46:18"} +{"current_steps": 3691, "total_steps": 8680, "loss": 0.6232138276100159, "lr": 1.3243391045743137e-06, "epoch": 0.8504608294930875, "percentage": 42.52, "elapsed_time": "5:00:32", "remaining_time": "6:46:13"} +{"current_steps": 3692, "total_steps": 8680, "loss": 0.7108159065246582, "lr": 1.3239786928665523e-06, "epoch": 0.8506912442396314, "percentage": 42.53, "elapsed_time": "5:00:37", "remaining_time": "6:46:09"} +{"current_steps": 3693, "total_steps": 8680, "loss": 0.7282330393791199, "lr": 1.3236182341337126e-06, "epoch": 0.8509216589861751, "percentage": 42.55, "elapsed_time": "5:00:42", "remaining_time": "6:46:04"} +{"current_steps": 3694, "total_steps": 8680, "loss": 0.7864304780960083, "lr": 1.3232577284281147e-06, "epoch": 0.8511520737327188, "percentage": 42.56, "elapsed_time": "5:00:47", "remaining_time": "6:45:59"} +{"current_steps": 3695, "total_steps": 8680, "loss": 0.7826365232467651, "lr": 1.3228971758020852e-06, "epoch": 0.8513824884792627, "percentage": 42.57, "elapsed_time": "5:00:52", "remaining_time": "6:45:55"} +{"current_steps": 3696, "total_steps": 8680, "loss": 0.8429988026618958, "lr": 1.322536576307958e-06, "epoch": 0.8516129032258064, "percentage": 42.58, "elapsed_time": "5:00:56", "remaining_time": "6:45:49"} +{"current_steps": 3697, "total_steps": 8680, "loss": 0.771148145198822, "lr": 1.322175929998074e-06, "epoch": 0.8518433179723502, "percentage": 42.59, "elapsed_time": "5:01:01", "remaining_time": "6:45:44"} +{"current_steps": 3698, "total_steps": 8680, "loss": 0.9610496759414673, "lr": 1.3218152369247804e-06, "epoch": 0.852073732718894, "percentage": 42.6, "elapsed_time": "5:01:05", "remaining_time": "6:45:37"} +{"current_steps": 3699, "total_steps": 8680, "loss": 0.7286547422409058, "lr": 1.321454497140431e-06, "epoch": 0.8523041474654378, "percentage": 42.62, "elapsed_time": "5:01:09", "remaining_time": "6:45:32"} +{"current_steps": 3700, "total_steps": 8680, "loss": 0.7446750402450562, "lr": 1.321093710697387e-06, "epoch": 0.8525345622119815, "percentage": 42.63, "elapsed_time": "5:01:14", "remaining_time": "6:45:27"} +{"current_steps": 3701, "total_steps": 8680, "loss": 0.7211639881134033, "lr": 1.3207328776480156e-06, "epoch": 0.8527649769585254, "percentage": 42.64, "elapsed_time": "5:01:23", "remaining_time": "6:45:28"} +{"current_steps": 3702, "total_steps": 8680, "loss": 0.765962541103363, "lr": 1.320371998044692e-06, "epoch": 0.8529953917050691, "percentage": 42.65, "elapsed_time": "5:01:29", "remaining_time": "6:45:24"} +{"current_steps": 3703, "total_steps": 8680, "loss": 0.9090084433555603, "lr": 1.3200110719397967e-06, "epoch": 0.853225806451613, "percentage": 42.66, "elapsed_time": "5:01:33", "remaining_time": "6:45:18"} +{"current_steps": 3704, "total_steps": 8680, "loss": 0.8222901225090027, "lr": 1.319650099385718e-06, "epoch": 0.8534562211981567, "percentage": 42.67, "elapsed_time": "5:01:38", "remaining_time": "6:45:13"} +{"current_steps": 3705, "total_steps": 8680, "loss": 0.7929965853691101, "lr": 1.3192890804348508e-06, "epoch": 0.8536866359447005, "percentage": 42.68, "elapsed_time": "5:01:42", "remaining_time": "6:45:07"} +{"current_steps": 3706, "total_steps": 8680, "loss": 0.89229816198349, "lr": 1.318928015139596e-06, "epoch": 0.8539170506912442, "percentage": 42.7, "elapsed_time": "5:01:46", "remaining_time": "6:45:01"} +{"current_steps": 3707, "total_steps": 8680, "loss": 0.8348276615142822, "lr": 1.3185669035523621e-06, "epoch": 0.854147465437788, "percentage": 42.71, "elapsed_time": "5:01:49", "remaining_time": "6:44:54"} +{"current_steps": 3708, "total_steps": 8680, "loss": 0.9006820917129517, "lr": 1.3182057457255639e-06, "epoch": 0.8543778801843318, "percentage": 42.72, "elapsed_time": "5:01:54", "remaining_time": "6:44:49"} +{"current_steps": 3709, "total_steps": 8680, "loss": 0.665691614151001, "lr": 1.3178445417116233e-06, "epoch": 0.8546082949308755, "percentage": 42.73, "elapsed_time": "5:01:59", "remaining_time": "6:44:45"} +{"current_steps": 3710, "total_steps": 8680, "loss": 0.7073110342025757, "lr": 1.3174832915629677e-06, "epoch": 0.8548387096774194, "percentage": 42.74, "elapsed_time": "5:02:07", "remaining_time": "6:44:43"} +{"current_steps": 3711, "total_steps": 8680, "loss": 0.7125800848007202, "lr": 1.317121995332033e-06, "epoch": 0.8550691244239631, "percentage": 42.75, "elapsed_time": "5:02:11", "remaining_time": "6:44:37"} +{"current_steps": 3712, "total_steps": 8680, "loss": 0.847205638885498, "lr": 1.31676065307126e-06, "epoch": 0.8552995391705069, "percentage": 42.76, "elapsed_time": "5:02:16", "remaining_time": "6:44:33"} +{"current_steps": 3713, "total_steps": 8680, "loss": 0.860866904258728, "lr": 1.3163992648330979e-06, "epoch": 0.8555299539170507, "percentage": 42.78, "elapsed_time": "5:02:20", "remaining_time": "6:44:27"} +{"current_steps": 3714, "total_steps": 8680, "loss": 0.811161994934082, "lr": 1.3160378306700014e-06, "epoch": 0.8557603686635945, "percentage": 42.79, "elapsed_time": "5:02:26", "remaining_time": "6:44:24"} +{"current_steps": 3715, "total_steps": 8680, "loss": 1.0276790857315063, "lr": 1.3156763506344318e-06, "epoch": 0.8559907834101382, "percentage": 42.8, "elapsed_time": "5:02:30", "remaining_time": "6:44:17"} +{"current_steps": 3716, "total_steps": 8680, "loss": 0.7462253570556641, "lr": 1.3153148247788584e-06, "epoch": 0.8562211981566821, "percentage": 42.81, "elapsed_time": "5:02:37", "remaining_time": "6:44:15"} +{"current_steps": 3717, "total_steps": 8680, "loss": 0.9181896448135376, "lr": 1.314953253155755e-06, "epoch": 0.8564516129032258, "percentage": 42.82, "elapsed_time": "5:02:42", "remaining_time": "6:44:10"} +{"current_steps": 3718, "total_steps": 8680, "loss": 0.5943678021430969, "lr": 1.3145916358176044e-06, "epoch": 0.8566820276497696, "percentage": 42.83, "elapsed_time": "5:02:49", "remaining_time": "6:44:08"} +{"current_steps": 3719, "total_steps": 8680, "loss": 0.7908656597137451, "lr": 1.3142299728168942e-06, "epoch": 0.8569124423963134, "percentage": 42.85, "elapsed_time": "5:02:54", "remaining_time": "6:44:04"} +{"current_steps": 3720, "total_steps": 8680, "loss": 0.8716393709182739, "lr": 1.3138682642061192e-06, "epoch": 0.8571428571428571, "percentage": 42.86, "elapsed_time": "5:03:00", "remaining_time": "6:44:00"} +{"current_steps": 3721, "total_steps": 8680, "loss": 0.76909339427948, "lr": 1.3135065100377814e-06, "epoch": 0.8573732718894009, "percentage": 42.87, "elapsed_time": "5:03:06", "remaining_time": "6:43:57"} +{"current_steps": 3722, "total_steps": 8680, "loss": 0.7896728515625, "lr": 1.3131447103643884e-06, "epoch": 0.8576036866359447, "percentage": 42.88, "elapsed_time": "5:03:10", "remaining_time": "6:43:51"} +{"current_steps": 3723, "total_steps": 8680, "loss": 0.8458575010299683, "lr": 1.3127828652384554e-06, "epoch": 0.8578341013824885, "percentage": 42.89, "elapsed_time": "5:03:15", "remaining_time": "6:43:46"} +{"current_steps": 3724, "total_steps": 8680, "loss": 0.7419729232788086, "lr": 1.3124209747125036e-06, "epoch": 0.8580645161290322, "percentage": 42.9, "elapsed_time": "5:03:22", "remaining_time": "6:43:44"} +{"current_steps": 3725, "total_steps": 8680, "loss": 0.8801093697547913, "lr": 1.3120590388390608e-06, "epoch": 0.8582949308755761, "percentage": 42.91, "elapsed_time": "5:03:27", "remaining_time": "6:43:39"} +{"current_steps": 3726, "total_steps": 8680, "loss": 0.6337816715240479, "lr": 1.3116970576706617e-06, "epoch": 0.8585253456221198, "percentage": 42.93, "elapsed_time": "5:03:32", "remaining_time": "6:43:34"} +{"current_steps": 3727, "total_steps": 8680, "loss": 0.8099665641784668, "lr": 1.3113350312598472e-06, "epoch": 0.8587557603686636, "percentage": 42.94, "elapsed_time": "5:03:36", "remaining_time": "6:43:28"} +{"current_steps": 3728, "total_steps": 8680, "loss": 0.7430413961410522, "lr": 1.3109729596591651e-06, "epoch": 0.8589861751152074, "percentage": 42.95, "elapsed_time": "5:03:41", "remaining_time": "6:43:24"} +{"current_steps": 3729, "total_steps": 8680, "loss": 0.7374905347824097, "lr": 1.3106108429211699e-06, "epoch": 0.8592165898617512, "percentage": 42.96, "elapsed_time": "5:03:47", "remaining_time": "6:43:20"} +{"current_steps": 3730, "total_steps": 8680, "loss": 0.71753990650177, "lr": 1.3102486810984217e-06, "epoch": 0.8594470046082949, "percentage": 42.97, "elapsed_time": "5:03:52", "remaining_time": "6:43:16"} +{"current_steps": 3731, "total_steps": 8680, "loss": 0.9126461744308472, "lr": 1.3098864742434885e-06, "epoch": 0.8596774193548387, "percentage": 42.98, "elapsed_time": "5:03:56", "remaining_time": "6:43:10"} +{"current_steps": 3732, "total_steps": 8680, "loss": 0.846487283706665, "lr": 1.3095242224089434e-06, "epoch": 0.8599078341013825, "percentage": 43.0, "elapsed_time": "5:04:03", "remaining_time": "6:43:07"} +{"current_steps": 3733, "total_steps": 8680, "loss": 0.7026070952415466, "lr": 1.3091619256473671e-06, "epoch": 0.8601382488479262, "percentage": 43.01, "elapsed_time": "5:04:08", "remaining_time": "6:43:02"} +{"current_steps": 3734, "total_steps": 8680, "loss": 1.0044158697128296, "lr": 1.3087995840113471e-06, "epoch": 0.8603686635944701, "percentage": 43.02, "elapsed_time": "5:04:11", "remaining_time": "6:42:55"} +{"current_steps": 3735, "total_steps": 8680, "loss": 0.8061608076095581, "lr": 1.3084371975534759e-06, "epoch": 0.8605990783410138, "percentage": 43.03, "elapsed_time": "5:04:15", "remaining_time": "6:42:49"} +{"current_steps": 3736, "total_steps": 8680, "loss": 0.9189345836639404, "lr": 1.308074766326354e-06, "epoch": 0.8608294930875576, "percentage": 43.04, "elapsed_time": "5:04:20", "remaining_time": "6:42:44"} +{"current_steps": 3737, "total_steps": 8680, "loss": 0.8183290958404541, "lr": 1.3077122903825875e-06, "epoch": 0.8610599078341014, "percentage": 43.05, "elapsed_time": "5:04:25", "remaining_time": "6:42:40"} +{"current_steps": 3738, "total_steps": 8680, "loss": 0.860893726348877, "lr": 1.3073497697747893e-06, "epoch": 0.8612903225806452, "percentage": 43.06, "elapsed_time": "5:04:29", "remaining_time": "6:42:33"} +{"current_steps": 3739, "total_steps": 8680, "loss": 0.6732957363128662, "lr": 1.306987204555579e-06, "epoch": 0.8615207373271889, "percentage": 43.08, "elapsed_time": "5:04:36", "remaining_time": "6:42:31"} +{"current_steps": 3740, "total_steps": 8680, "loss": 0.7910758256912231, "lr": 1.3066245947775821e-06, "epoch": 0.8617511520737328, "percentage": 43.09, "elapsed_time": "5:04:41", "remaining_time": "6:42:27"} +{"current_steps": 3741, "total_steps": 8680, "loss": 0.9422181844711304, "lr": 1.3062619404934317e-06, "epoch": 0.8619815668202765, "percentage": 43.1, "elapsed_time": "5:04:47", "remaining_time": "6:42:23"} +{"current_steps": 3742, "total_steps": 8680, "loss": 0.7731142044067383, "lr": 1.3058992417557657e-06, "epoch": 0.8622119815668203, "percentage": 43.11, "elapsed_time": "5:04:51", "remaining_time": "6:42:18"} +{"current_steps": 3743, "total_steps": 8680, "loss": 0.8419089317321777, "lr": 1.3055364986172296e-06, "epoch": 0.8624423963133641, "percentage": 43.12, "elapsed_time": "5:04:56", "remaining_time": "6:42:13"} +{"current_steps": 3744, "total_steps": 8680, "loss": 0.7535419464111328, "lr": 1.3051737111304757e-06, "epoch": 0.8626728110599078, "percentage": 43.13, "elapsed_time": "5:05:01", "remaining_time": "6:42:08"} +{"current_steps": 3745, "total_steps": 8680, "loss": 0.7744847536087036, "lr": 1.3048108793481614e-06, "epoch": 0.8629032258064516, "percentage": 43.15, "elapsed_time": "5:05:06", "remaining_time": "6:42:04"} +{"current_steps": 3746, "total_steps": 8680, "loss": 0.7578398585319519, "lr": 1.3044480033229513e-06, "epoch": 0.8631336405529954, "percentage": 43.16, "elapsed_time": "5:05:12", "remaining_time": "6:41:59"} +{"current_steps": 3747, "total_steps": 8680, "loss": 0.8767418265342712, "lr": 1.3040850831075168e-06, "epoch": 0.8633640552995392, "percentage": 43.17, "elapsed_time": "5:05:16", "remaining_time": "6:41:53"} +{"current_steps": 3748, "total_steps": 8680, "loss": 0.7484671473503113, "lr": 1.303722118754535e-06, "epoch": 0.8635944700460829, "percentage": 43.18, "elapsed_time": "5:05:21", "remaining_time": "6:41:49"} +{"current_steps": 3749, "total_steps": 8680, "loss": 0.7231101989746094, "lr": 1.3033591103166897e-06, "epoch": 0.8638248847926268, "percentage": 43.19, "elapsed_time": "5:05:26", "remaining_time": "6:41:44"} +{"current_steps": 3750, "total_steps": 8680, "loss": 0.7626307606697083, "lr": 1.3029960578466709e-06, "epoch": 0.8640552995391705, "percentage": 43.2, "elapsed_time": "5:05:32", "remaining_time": "6:41:41"} +{"current_steps": 3751, "total_steps": 8680, "loss": 0.7244704961776733, "lr": 1.302632961397176e-06, "epoch": 0.8642857142857143, "percentage": 43.21, "elapsed_time": "5:05:36", "remaining_time": "6:41:34"} +{"current_steps": 3752, "total_steps": 8680, "loss": 0.8575884103775024, "lr": 1.3022698210209066e-06, "epoch": 0.864516129032258, "percentage": 43.23, "elapsed_time": "5:05:42", "remaining_time": "6:41:31"} +{"current_steps": 3753, "total_steps": 8680, "loss": 0.7617322206497192, "lr": 1.3019066367705733e-06, "epoch": 0.8647465437788019, "percentage": 43.24, "elapsed_time": "5:05:46", "remaining_time": "6:41:26"} +{"current_steps": 3754, "total_steps": 8680, "loss": 0.7899904251098633, "lr": 1.3015434086988914e-06, "epoch": 0.8649769585253456, "percentage": 43.25, "elapsed_time": "5:05:50", "remaining_time": "6:41:19"} +{"current_steps": 3755, "total_steps": 8680, "loss": 0.6405949592590332, "lr": 1.3011801368585825e-06, "epoch": 0.8652073732718893, "percentage": 43.26, "elapsed_time": "5:05:56", "remaining_time": "6:41:15"} +{"current_steps": 3756, "total_steps": 8680, "loss": 0.8473223447799683, "lr": 1.300816821302376e-06, "epoch": 0.8654377880184332, "percentage": 43.27, "elapsed_time": "5:06:01", "remaining_time": "6:41:11"} +{"current_steps": 3757, "total_steps": 8680, "loss": 0.7843037843704224, "lr": 1.3004534620830059e-06, "epoch": 0.8656682027649769, "percentage": 43.28, "elapsed_time": "5:06:05", "remaining_time": "6:41:05"} +{"current_steps": 3758, "total_steps": 8680, "loss": 0.7418329119682312, "lr": 1.3000900592532134e-06, "epoch": 0.8658986175115208, "percentage": 43.29, "elapsed_time": "5:06:12", "remaining_time": "6:41:03"} +{"current_steps": 3759, "total_steps": 8680, "loss": 0.9007542133331299, "lr": 1.2997266128657462e-06, "epoch": 0.8661290322580645, "percentage": 43.31, "elapsed_time": "5:06:18", "remaining_time": "6:41:00"} +{"current_steps": 3760, "total_steps": 8680, "loss": 0.7214536666870117, "lr": 1.2993631229733582e-06, "epoch": 0.8663594470046083, "percentage": 43.32, "elapsed_time": "5:06:23", "remaining_time": "6:40:55"} +{"current_steps": 3761, "total_steps": 8680, "loss": 0.6538300514221191, "lr": 1.2989995896288085e-06, "epoch": 0.866589861751152, "percentage": 43.33, "elapsed_time": "5:06:27", "remaining_time": "6:40:49"} +{"current_steps": 3762, "total_steps": 8680, "loss": 0.8132497668266296, "lr": 1.2986360128848647e-06, "epoch": 0.8668202764976959, "percentage": 43.34, "elapsed_time": "5:06:34", "remaining_time": "6:40:46"} +{"current_steps": 3763, "total_steps": 8680, "loss": 0.8940386176109314, "lr": 1.2982723927942987e-06, "epoch": 0.8670506912442396, "percentage": 43.35, "elapsed_time": "5:06:38", "remaining_time": "6:40:41"} +{"current_steps": 3764, "total_steps": 8680, "loss": 0.7426153421401978, "lr": 1.2979087294098904e-06, "epoch": 0.8672811059907835, "percentage": 43.36, "elapsed_time": "5:06:44", "remaining_time": "6:40:37"} +{"current_steps": 3765, "total_steps": 8680, "loss": 0.8140754103660583, "lr": 1.2975450227844236e-06, "epoch": 0.8675115207373272, "percentage": 43.38, "elapsed_time": "5:06:48", "remaining_time": "6:40:31"} +{"current_steps": 3766, "total_steps": 8680, "loss": 0.9078278541564941, "lr": 1.2971812729706907e-06, "epoch": 0.867741935483871, "percentage": 43.39, "elapsed_time": "5:06:53", "remaining_time": "6:40:25"} +{"current_steps": 3767, "total_steps": 8680, "loss": 0.6632627248764038, "lr": 1.29681748002149e-06, "epoch": 0.8679723502304147, "percentage": 43.4, "elapsed_time": "5:06:58", "remaining_time": "6:40:22"} +{"current_steps": 3768, "total_steps": 8680, "loss": 0.913419246673584, "lr": 1.2964536439896245e-06, "epoch": 0.8682027649769585, "percentage": 43.41, "elapsed_time": "5:07:03", "remaining_time": "6:40:17"} +{"current_steps": 3769, "total_steps": 8680, "loss": 0.776391863822937, "lr": 1.2960897649279054e-06, "epoch": 0.8684331797235023, "percentage": 43.42, "elapsed_time": "5:07:09", "remaining_time": "6:40:13"} +{"current_steps": 3770, "total_steps": 8680, "loss": 0.7171014547348022, "lr": 1.2957258428891488e-06, "epoch": 0.868663594470046, "percentage": 43.43, "elapsed_time": "5:07:14", "remaining_time": "6:40:09"} +{"current_steps": 3771, "total_steps": 8680, "loss": 0.8848521709442139, "lr": 1.2953618779261776e-06, "epoch": 0.8688940092165899, "percentage": 43.44, "elapsed_time": "5:07:18", "remaining_time": "6:40:02"} +{"current_steps": 3772, "total_steps": 8680, "loss": 0.6794570684432983, "lr": 1.2949978700918207e-06, "epoch": 0.8691244239631336, "percentage": 43.46, "elapsed_time": "5:07:24", "remaining_time": "6:39:59"} +{"current_steps": 3773, "total_steps": 8680, "loss": 0.7128770351409912, "lr": 1.2946338194389137e-06, "epoch": 0.8693548387096774, "percentage": 43.47, "elapsed_time": "5:07:29", "remaining_time": "6:39:54"} +{"current_steps": 3774, "total_steps": 8680, "loss": 0.7794370651245117, "lr": 1.2942697260202976e-06, "epoch": 0.8695852534562212, "percentage": 43.48, "elapsed_time": "5:07:33", "remaining_time": "6:39:48"} +{"current_steps": 3775, "total_steps": 8680, "loss": 0.7946528196334839, "lr": 1.2939055898888203e-06, "epoch": 0.869815668202765, "percentage": 43.49, "elapsed_time": "5:07:38", "remaining_time": "6:39:44"} +{"current_steps": 3776, "total_steps": 8680, "loss": 0.7052137851715088, "lr": 1.2935414110973357e-06, "epoch": 0.8700460829493087, "percentage": 43.5, "elapsed_time": "5:07:44", "remaining_time": "6:39:40"} +{"current_steps": 3777, "total_steps": 8680, "loss": 0.785929799079895, "lr": 1.293177189698704e-06, "epoch": 0.8702764976958526, "percentage": 43.51, "elapsed_time": "5:07:48", "remaining_time": "6:39:34"} +{"current_steps": 3778, "total_steps": 8680, "loss": 0.7907861471176147, "lr": 1.2928129257457915e-06, "epoch": 0.8705069124423963, "percentage": 43.53, "elapsed_time": "5:07:52", "remaining_time": "6:39:28"} +{"current_steps": 3779, "total_steps": 8680, "loss": 0.9145845770835876, "lr": 1.2924486192914704e-06, "epoch": 0.8707373271889401, "percentage": 43.54, "elapsed_time": "5:07:56", "remaining_time": "6:39:21"} +{"current_steps": 3780, "total_steps": 8680, "loss": 0.8332167863845825, "lr": 1.2920842703886191e-06, "epoch": 0.8709677419354839, "percentage": 43.55, "elapsed_time": "5:08:00", "remaining_time": "6:39:16"} +{"current_steps": 3781, "total_steps": 8680, "loss": 0.9593367576599121, "lr": 1.2917198790901229e-06, "epoch": 0.8711981566820276, "percentage": 43.56, "elapsed_time": "5:08:05", "remaining_time": "6:39:11"} +{"current_steps": 3782, "total_steps": 8680, "loss": 0.9269144535064697, "lr": 1.2913554454488723e-06, "epoch": 0.8714285714285714, "percentage": 43.57, "elapsed_time": "5:08:10", "remaining_time": "6:39:06"} +{"current_steps": 3783, "total_steps": 8680, "loss": 0.8474053144454956, "lr": 1.2909909695177645e-06, "epoch": 0.8716589861751152, "percentage": 43.58, "elapsed_time": "5:08:16", "remaining_time": "6:39:02"} +{"current_steps": 3784, "total_steps": 8680, "loss": 0.8098207116127014, "lr": 1.2906264513497027e-06, "epoch": 0.871889400921659, "percentage": 43.59, "elapsed_time": "5:08:21", "remaining_time": "6:38:58"} +{"current_steps": 3785, "total_steps": 8680, "loss": 0.7394517064094543, "lr": 1.2902618909975962e-06, "epoch": 0.8721198156682027, "percentage": 43.61, "elapsed_time": "5:08:26", "remaining_time": "6:38:54"} +{"current_steps": 3786, "total_steps": 8680, "loss": 0.8667110204696655, "lr": 1.2898972885143606e-06, "epoch": 0.8723502304147466, "percentage": 43.62, "elapsed_time": "5:08:32", "remaining_time": "6:38:49"} +{"current_steps": 3787, "total_steps": 8680, "loss": 0.826819121837616, "lr": 1.289532643952917e-06, "epoch": 0.8725806451612903, "percentage": 43.63, "elapsed_time": "5:08:36", "remaining_time": "6:38:44"} +{"current_steps": 3788, "total_steps": 8680, "loss": 0.7765695452690125, "lr": 1.2891679573661937e-06, "epoch": 0.8728110599078341, "percentage": 43.64, "elapsed_time": "5:08:40", "remaining_time": "6:38:38"} +{"current_steps": 3789, "total_steps": 8680, "loss": 0.7180448770523071, "lr": 1.2888032288071245e-06, "epoch": 0.8730414746543779, "percentage": 43.65, "elapsed_time": "5:08:45", "remaining_time": "6:38:34"} +{"current_steps": 3790, "total_steps": 8680, "loss": 0.7619662880897522, "lr": 1.2884384583286486e-06, "epoch": 0.8732718894009217, "percentage": 43.66, "elapsed_time": "5:08:51", "remaining_time": "6:38:30"} +{"current_steps": 3791, "total_steps": 8680, "loss": 0.8332309126853943, "lr": 1.2880736459837123e-06, "epoch": 0.8735023041474654, "percentage": 43.68, "elapsed_time": "5:08:56", "remaining_time": "6:38:24"} +{"current_steps": 3792, "total_steps": 8680, "loss": 0.9314864277839661, "lr": 1.2877087918252676e-06, "epoch": 0.8737327188940092, "percentage": 43.69, "elapsed_time": "5:09:01", "remaining_time": "6:38:20"} +{"current_steps": 3793, "total_steps": 8680, "loss": 0.8505650758743286, "lr": 1.287343895906273e-06, "epoch": 0.873963133640553, "percentage": 43.7, "elapsed_time": "5:09:05", "remaining_time": "6:38:14"} +{"current_steps": 3794, "total_steps": 8680, "loss": 0.8086442351341248, "lr": 1.286978958279692e-06, "epoch": 0.8741935483870967, "percentage": 43.71, "elapsed_time": "5:09:10", "remaining_time": "6:38:10"} +{"current_steps": 3795, "total_steps": 8680, "loss": 0.9369934797286987, "lr": 1.2866139789984951e-06, "epoch": 0.8744239631336406, "percentage": 43.72, "elapsed_time": "5:09:14", "remaining_time": "6:38:04"} +{"current_steps": 3796, "total_steps": 8680, "loss": 0.6776204705238342, "lr": 1.2862489581156585e-06, "epoch": 0.8746543778801843, "percentage": 43.73, "elapsed_time": "5:09:19", "remaining_time": "6:37:59"} +{"current_steps": 3797, "total_steps": 8680, "loss": 0.8742507100105286, "lr": 1.2858838956841646e-06, "epoch": 0.8748847926267281, "percentage": 43.74, "elapsed_time": "5:09:24", "remaining_time": "6:37:54"} +{"current_steps": 3798, "total_steps": 8680, "loss": 0.6592123508453369, "lr": 1.285518791757002e-06, "epoch": 0.8751152073732719, "percentage": 43.76, "elapsed_time": "5:09:30", "remaining_time": "6:37:50"} +{"current_steps": 3799, "total_steps": 8680, "loss": 0.727974534034729, "lr": 1.2851536463871646e-06, "epoch": 0.8753456221198157, "percentage": 43.77, "elapsed_time": "5:09:35", "remaining_time": "6:37:45"} +{"current_steps": 3800, "total_steps": 8680, "loss": 0.734921395778656, "lr": 1.284788459627653e-06, "epoch": 0.8755760368663594, "percentage": 43.78, "elapsed_time": "5:09:41", "remaining_time": "6:37:42"} +{"current_steps": 3801, "total_steps": 8680, "loss": 0.8848391771316528, "lr": 1.2844232315314734e-06, "epoch": 0.8758064516129033, "percentage": 43.79, "elapsed_time": "5:09:47", "remaining_time": "6:37:38"} +{"current_steps": 3802, "total_steps": 8680, "loss": 0.7014757394790649, "lr": 1.284057962151638e-06, "epoch": 0.876036866359447, "percentage": 43.8, "elapsed_time": "5:09:53", "remaining_time": "6:37:35"} +{"current_steps": 3803, "total_steps": 8680, "loss": 0.9037606716156006, "lr": 1.2836926515411662e-06, "epoch": 0.8762672811059908, "percentage": 43.81, "elapsed_time": "5:09:56", "remaining_time": "6:37:28"} +{"current_steps": 3804, "total_steps": 8680, "loss": 0.7842103242874146, "lr": 1.2833272997530808e-06, "epoch": 0.8764976958525346, "percentage": 43.82, "elapsed_time": "5:10:01", "remaining_time": "6:37:23"} +{"current_steps": 3805, "total_steps": 8680, "loss": 0.7233899831771851, "lr": 1.282961906840413e-06, "epoch": 0.8767281105990783, "percentage": 43.84, "elapsed_time": "5:10:07", "remaining_time": "6:37:20"} +{"current_steps": 3806, "total_steps": 8680, "loss": 0.8439977169036865, "lr": 1.2825964728561995e-06, "epoch": 0.8769585253456221, "percentage": 43.85, "elapsed_time": "5:10:12", "remaining_time": "6:37:15"} +{"current_steps": 3807, "total_steps": 8680, "loss": 0.6734062433242798, "lr": 1.2822309978534817e-06, "epoch": 0.8771889400921659, "percentage": 43.86, "elapsed_time": "5:10:19", "remaining_time": "6:37:13"} +{"current_steps": 3808, "total_steps": 8680, "loss": 0.8132908344268799, "lr": 1.2818654818853082e-06, "epoch": 0.8774193548387097, "percentage": 43.87, "elapsed_time": "5:10:23", "remaining_time": "6:37:07"} +{"current_steps": 3809, "total_steps": 8680, "loss": 0.7867386341094971, "lr": 1.2814999250047334e-06, "epoch": 0.8776497695852534, "percentage": 43.88, "elapsed_time": "5:10:29", "remaining_time": "6:37:03"} +{"current_steps": 3810, "total_steps": 8680, "loss": 0.7367507219314575, "lr": 1.2811343272648172e-06, "epoch": 0.8778801843317973, "percentage": 43.89, "elapsed_time": "5:10:34", "remaining_time": "6:36:58"} +{"current_steps": 3811, "total_steps": 8680, "loss": 0.8154586553573608, "lr": 1.280768688718625e-06, "epoch": 0.878110599078341, "percentage": 43.91, "elapsed_time": "5:10:40", "remaining_time": "6:36:55"} +{"current_steps": 3812, "total_steps": 8680, "loss": 0.9962621331214905, "lr": 1.2804030094192297e-06, "epoch": 0.8783410138248848, "percentage": 43.92, "elapsed_time": "5:10:44", "remaining_time": "6:36:49"} +{"current_steps": 3813, "total_steps": 8680, "loss": 0.8720508813858032, "lr": 1.280037289419709e-06, "epoch": 0.8785714285714286, "percentage": 43.93, "elapsed_time": "5:10:49", "remaining_time": "6:36:44"} +{"current_steps": 3814, "total_steps": 8680, "loss": 0.7211558818817139, "lr": 1.2796715287731461e-06, "epoch": 0.8788018433179724, "percentage": 43.94, "elapsed_time": "5:10:53", "remaining_time": "6:36:38"} +{"current_steps": 3815, "total_steps": 8680, "loss": 0.8354029059410095, "lr": 1.279305727532631e-06, "epoch": 0.8790322580645161, "percentage": 43.95, "elapsed_time": "5:10:58", "remaining_time": "6:36:34"} +{"current_steps": 3816, "total_steps": 8680, "loss": 0.9136772155761719, "lr": 1.2789398857512597e-06, "epoch": 0.8792626728110599, "percentage": 43.96, "elapsed_time": "5:11:04", "remaining_time": "6:36:30"} +{"current_steps": 3817, "total_steps": 8680, "loss": 0.7603391408920288, "lr": 1.2785740034821328e-06, "epoch": 0.8794930875576037, "percentage": 43.97, "elapsed_time": "5:11:09", "remaining_time": "6:36:25"} +{"current_steps": 3818, "total_steps": 8680, "loss": 0.8938640356063843, "lr": 1.2782080807783582e-06, "epoch": 0.8797235023041474, "percentage": 43.99, "elapsed_time": "5:11:13", "remaining_time": "6:36:20"} +{"current_steps": 3819, "total_steps": 8680, "loss": 0.8041675090789795, "lr": 1.2778421176930492e-06, "epoch": 0.8799539170506913, "percentage": 44.0, "elapsed_time": "5:11:19", "remaining_time": "6:36:16"} +{"current_steps": 3820, "total_steps": 8680, "loss": 0.7128704786300659, "lr": 1.2774761142793246e-06, "epoch": 0.880184331797235, "percentage": 44.01, "elapsed_time": "5:11:23", "remaining_time": "6:36:10"} +{"current_steps": 3821, "total_steps": 8680, "loss": 0.7927603721618652, "lr": 1.277110070590309e-06, "epoch": 0.8804147465437788, "percentage": 44.02, "elapsed_time": "5:11:30", "remaining_time": "6:36:07"} +{"current_steps": 3822, "total_steps": 8680, "loss": 0.8294891119003296, "lr": 1.2767439866791342e-06, "epoch": 0.8806451612903226, "percentage": 44.03, "elapsed_time": "5:11:35", "remaining_time": "6:36:03"} +{"current_steps": 3823, "total_steps": 8680, "loss": 0.8058860301971436, "lr": 1.2763778625989354e-06, "epoch": 0.8808755760368664, "percentage": 44.04, "elapsed_time": "5:11:40", "remaining_time": "6:35:58"} +{"current_steps": 3824, "total_steps": 8680, "loss": 0.9073271751403809, "lr": 1.2760116984028559e-06, "epoch": 0.8811059907834101, "percentage": 44.06, "elapsed_time": "5:11:44", "remaining_time": "6:35:52"} +{"current_steps": 3825, "total_steps": 8680, "loss": 0.755131721496582, "lr": 1.2756454941440439e-06, "epoch": 0.881336405529954, "percentage": 44.07, "elapsed_time": "5:11:50", "remaining_time": "6:35:48"} +{"current_steps": 3826, "total_steps": 8680, "loss": 0.7571133375167847, "lr": 1.2752792498756532e-06, "epoch": 0.8815668202764977, "percentage": 44.08, "elapsed_time": "5:11:54", "remaining_time": "6:35:43"} +{"current_steps": 3827, "total_steps": 8680, "loss": 0.8021755218505859, "lr": 1.2749129656508438e-06, "epoch": 0.8817972350230415, "percentage": 44.09, "elapsed_time": "5:11:59", "remaining_time": "6:35:38"} +{"current_steps": 3828, "total_steps": 8680, "loss": 0.7817519903182983, "lr": 1.2745466415227812e-06, "epoch": 0.8820276497695853, "percentage": 44.1, "elapsed_time": "5:12:04", "remaining_time": "6:35:33"} +{"current_steps": 3829, "total_steps": 8680, "loss": 0.7144416570663452, "lr": 1.2741802775446375e-06, "epoch": 0.882258064516129, "percentage": 44.11, "elapsed_time": "5:12:08", "remaining_time": "6:35:26"} +{"current_steps": 3830, "total_steps": 8680, "loss": 0.8154206275939941, "lr": 1.2738138737695894e-06, "epoch": 0.8824884792626728, "percentage": 44.12, "elapsed_time": "5:12:13", "remaining_time": "6:35:22"} +{"current_steps": 3831, "total_steps": 8680, "loss": 0.7478733062744141, "lr": 1.2734474302508199e-06, "epoch": 0.8827188940092165, "percentage": 44.14, "elapsed_time": "5:12:16", "remaining_time": "6:35:15"} +{"current_steps": 3832, "total_steps": 8680, "loss": 0.7792314291000366, "lr": 1.2730809470415177e-06, "epoch": 0.8829493087557604, "percentage": 44.15, "elapsed_time": "5:12:22", "remaining_time": "6:35:12"} +{"current_steps": 3833, "total_steps": 8680, "loss": 0.8550708293914795, "lr": 1.2727144241948776e-06, "epoch": 0.8831797235023041, "percentage": 44.16, "elapsed_time": "5:12:27", "remaining_time": "6:35:07"} +{"current_steps": 3834, "total_steps": 8680, "loss": 0.9415113925933838, "lr": 1.2723478617641e-06, "epoch": 0.883410138248848, "percentage": 44.17, "elapsed_time": "5:12:33", "remaining_time": "6:35:03"} +{"current_steps": 3835, "total_steps": 8680, "loss": 0.8359560370445251, "lr": 1.2719812598023909e-06, "epoch": 0.8836405529953917, "percentage": 44.18, "elapsed_time": "5:12:39", "remaining_time": "6:35:00"} +{"current_steps": 3836, "total_steps": 8680, "loss": 0.9515634775161743, "lr": 1.2716146183629618e-06, "epoch": 0.8838709677419355, "percentage": 44.19, "elapsed_time": "5:12:45", "remaining_time": "6:34:55"} +{"current_steps": 3837, "total_steps": 8680, "loss": 0.9433277249336243, "lr": 1.2712479374990302e-06, "epoch": 0.8841013824884792, "percentage": 44.21, "elapsed_time": "5:12:48", "remaining_time": "6:34:49"} +{"current_steps": 3838, "total_steps": 8680, "loss": 0.809203028678894, "lr": 1.27088121726382e-06, "epoch": 0.8843317972350231, "percentage": 44.22, "elapsed_time": "5:12:54", "remaining_time": "6:34:45"} +{"current_steps": 3839, "total_steps": 8680, "loss": 0.8003803491592407, "lr": 1.2705144577105596e-06, "epoch": 0.8845622119815668, "percentage": 44.23, "elapsed_time": "5:12:59", "remaining_time": "6:34:41"} +{"current_steps": 3840, "total_steps": 8680, "loss": 0.8258087038993835, "lr": 1.2701476588924837e-06, "epoch": 0.8847926267281107, "percentage": 44.24, "elapsed_time": "5:13:04", "remaining_time": "6:34:36"} +{"current_steps": 3841, "total_steps": 8680, "loss": 0.7337249517440796, "lr": 1.2697808208628326e-06, "epoch": 0.8850230414746544, "percentage": 44.25, "elapsed_time": "5:13:10", "remaining_time": "6:34:33"} +{"current_steps": 3842, "total_steps": 8680, "loss": 0.6963306665420532, "lr": 1.269413943674853e-06, "epoch": 0.8852534562211981, "percentage": 44.26, "elapsed_time": "5:13:16", "remaining_time": "6:34:29"} +{"current_steps": 3843, "total_steps": 8680, "loss": 0.8849321603775024, "lr": 1.2690470273817955e-06, "epoch": 0.885483870967742, "percentage": 44.27, "elapsed_time": "5:13:21", "remaining_time": "6:34:24"} +{"current_steps": 3844, "total_steps": 8680, "loss": 0.804117739200592, "lr": 1.2686800720369183e-06, "epoch": 0.8857142857142857, "percentage": 44.29, "elapsed_time": "5:13:26", "remaining_time": "6:34:19"} +{"current_steps": 3845, "total_steps": 8680, "loss": 0.7873985767364502, "lr": 1.2683130776934848e-06, "epoch": 0.8859447004608295, "percentage": 44.3, "elapsed_time": "5:13:32", "remaining_time": "6:34:16"} +{"current_steps": 3846, "total_steps": 8680, "loss": 0.7401156425476074, "lr": 1.2679460444047627e-06, "epoch": 0.8861751152073732, "percentage": 44.31, "elapsed_time": "5:13:36", "remaining_time": "6:34:10"} +{"current_steps": 3847, "total_steps": 8680, "loss": 0.8216343522071838, "lr": 1.2675789722240274e-06, "epoch": 0.8864055299539171, "percentage": 44.32, "elapsed_time": "5:13:40", "remaining_time": "6:34:04"} +{"current_steps": 3848, "total_steps": 8680, "loss": 0.9367883205413818, "lr": 1.2672118612045583e-06, "epoch": 0.8866359447004608, "percentage": 44.33, "elapsed_time": "5:13:46", "remaining_time": "6:34:00"} +{"current_steps": 3849, "total_steps": 8680, "loss": 0.959208607673645, "lr": 1.2668447113996411e-06, "epoch": 0.8868663594470046, "percentage": 44.34, "elapsed_time": "5:13:51", "remaining_time": "6:33:55"} +{"current_steps": 3850, "total_steps": 8680, "loss": 0.754011869430542, "lr": 1.2664775228625678e-06, "epoch": 0.8870967741935484, "percentage": 44.35, "elapsed_time": "5:13:56", "remaining_time": "6:33:50"} +{"current_steps": 3851, "total_steps": 8680, "loss": 0.7200918793678284, "lr": 1.2661102956466343e-06, "epoch": 0.8873271889400922, "percentage": 44.37, "elapsed_time": "5:14:01", "remaining_time": "6:33:46"} +{"current_steps": 3852, "total_steps": 8680, "loss": 0.7819997072219849, "lr": 1.2657430298051441e-06, "epoch": 0.8875576036866359, "percentage": 44.38, "elapsed_time": "5:14:06", "remaining_time": "6:33:41"} +{"current_steps": 3853, "total_steps": 8680, "loss": 0.6145305037498474, "lr": 1.2653757253914045e-06, "epoch": 0.8877880184331797, "percentage": 44.39, "elapsed_time": "5:14:11", "remaining_time": "6:33:37"} +{"current_steps": 3854, "total_steps": 8680, "loss": 0.8730908036231995, "lr": 1.2650083824587298e-06, "epoch": 0.8880184331797235, "percentage": 44.4, "elapsed_time": "5:14:15", "remaining_time": "6:33:31"} +{"current_steps": 3855, "total_steps": 8680, "loss": 0.7595944404602051, "lr": 1.2646410010604395e-06, "epoch": 0.8882488479262672, "percentage": 44.41, "elapsed_time": "5:14:21", "remaining_time": "6:33:27"} +{"current_steps": 3856, "total_steps": 8680, "loss": 0.8533104658126831, "lr": 1.264273581249858e-06, "epoch": 0.8884792626728111, "percentage": 44.42, "elapsed_time": "5:14:26", "remaining_time": "6:33:23"} +{"current_steps": 3857, "total_steps": 8680, "loss": 0.7239818572998047, "lr": 1.263906123080316e-06, "epoch": 0.8887096774193548, "percentage": 44.44, "elapsed_time": "5:14:31", "remaining_time": "6:33:18"} +{"current_steps": 3858, "total_steps": 8680, "loss": 0.7675650119781494, "lr": 1.2635386266051498e-06, "epoch": 0.8889400921658986, "percentage": 44.45, "elapsed_time": "5:14:36", "remaining_time": "6:33:13"} +{"current_steps": 3859, "total_steps": 8680, "loss": 0.8886630535125732, "lr": 1.2631710918777007e-06, "epoch": 0.8891705069124424, "percentage": 44.46, "elapsed_time": "5:14:41", "remaining_time": "6:33:08"} +{"current_steps": 3860, "total_steps": 8680, "loss": 0.798930287361145, "lr": 1.2628035189513159e-06, "epoch": 0.8894009216589862, "percentage": 44.47, "elapsed_time": "5:14:45", "remaining_time": "6:33:02"} +{"current_steps": 3861, "total_steps": 8680, "loss": 0.7189278602600098, "lr": 1.2624359078793484e-06, "epoch": 0.8896313364055299, "percentage": 44.48, "elapsed_time": "5:14:53", "remaining_time": "6:33:00"} +{"current_steps": 3862, "total_steps": 8680, "loss": 0.8187342882156372, "lr": 1.2620682587151565e-06, "epoch": 0.8898617511520738, "percentage": 44.49, "elapsed_time": "5:14:57", "remaining_time": "6:32:55"} +{"current_steps": 3863, "total_steps": 8680, "loss": 0.880839467048645, "lr": 1.2617005715121034e-06, "epoch": 0.8900921658986175, "percentage": 44.5, "elapsed_time": "5:15:04", "remaining_time": "6:32:53"} +{"current_steps": 3864, "total_steps": 8680, "loss": 0.84575355052948, "lr": 1.2613328463235586e-06, "epoch": 0.8903225806451613, "percentage": 44.52, "elapsed_time": "5:15:08", "remaining_time": "6:32:47"} +{"current_steps": 3865, "total_steps": 8680, "loss": 0.6823658347129822, "lr": 1.2609650832028978e-06, "epoch": 0.8905529953917051, "percentage": 44.53, "elapsed_time": "5:15:14", "remaining_time": "6:32:43"} +{"current_steps": 3866, "total_steps": 8680, "loss": 0.8295711278915405, "lr": 1.2605972822035e-06, "epoch": 0.8907834101382488, "percentage": 44.54, "elapsed_time": "5:15:19", "remaining_time": "6:32:39"} +{"current_steps": 3867, "total_steps": 8680, "loss": 0.8684213161468506, "lr": 1.2602294433787518e-06, "epoch": 0.8910138248847926, "percentage": 44.55, "elapsed_time": "5:15:23", "remaining_time": "6:32:33"} +{"current_steps": 3868, "total_steps": 8680, "loss": 0.6560889482498169, "lr": 1.2598615667820447e-06, "epoch": 0.8912442396313364, "percentage": 44.56, "elapsed_time": "5:15:30", "remaining_time": "6:32:30"} +{"current_steps": 3869, "total_steps": 8680, "loss": 0.740487277507782, "lr": 1.259493652466775e-06, "epoch": 0.8914746543778802, "percentage": 44.57, "elapsed_time": "5:15:35", "remaining_time": "6:32:25"} +{"current_steps": 3870, "total_steps": 8680, "loss": 0.8167253732681274, "lr": 1.2591257004863453e-06, "epoch": 0.8917050691244239, "percentage": 44.59, "elapsed_time": "5:15:40", "remaining_time": "6:32:21"} +{"current_steps": 3871, "total_steps": 8680, "loss": 0.8521690368652344, "lr": 1.2587577108941634e-06, "epoch": 0.8919354838709678, "percentage": 44.6, "elapsed_time": "5:15:45", "remaining_time": "6:32:16"} +{"current_steps": 3872, "total_steps": 8680, "loss": 0.8830848932266235, "lr": 1.2583896837436418e-06, "epoch": 0.8921658986175115, "percentage": 44.61, "elapsed_time": "5:15:50", "remaining_time": "6:32:11"} +{"current_steps": 3873, "total_steps": 8680, "loss": 0.7080649137496948, "lr": 1.2580216190881999e-06, "epoch": 0.8923963133640553, "percentage": 44.62, "elapsed_time": "5:15:55", "remaining_time": "6:32:07"} +{"current_steps": 3874, "total_steps": 8680, "loss": 0.8013911247253418, "lr": 1.2576535169812614e-06, "epoch": 0.8926267281105991, "percentage": 44.63, "elapsed_time": "5:16:01", "remaining_time": "6:32:02"} +{"current_steps": 3875, "total_steps": 8680, "loss": 0.8307033777236938, "lr": 1.2572853774762564e-06, "epoch": 0.8928571428571429, "percentage": 44.64, "elapsed_time": "5:16:06", "remaining_time": "6:31:58"} +{"current_steps": 3876, "total_steps": 8680, "loss": 0.7514123916625977, "lr": 1.256917200626619e-06, "epoch": 0.8930875576036866, "percentage": 44.65, "elapsed_time": "5:16:10", "remaining_time": "6:31:52"} +{"current_steps": 3877, "total_steps": 8680, "loss": 0.7608132362365723, "lr": 1.2565489864857903e-06, "epoch": 0.8933179723502304, "percentage": 44.67, "elapsed_time": "5:16:16", "remaining_time": "6:31:48"} +{"current_steps": 3878, "total_steps": 8680, "loss": 0.8011139631271362, "lr": 1.256180735107216e-06, "epoch": 0.8935483870967742, "percentage": 44.68, "elapsed_time": "5:16:20", "remaining_time": "6:31:43"} +{"current_steps": 3879, "total_steps": 8680, "loss": 0.9760414958000183, "lr": 1.2558124465443467e-06, "epoch": 0.8937788018433179, "percentage": 44.69, "elapsed_time": "5:16:25", "remaining_time": "6:31:38"} +{"current_steps": 3880, "total_steps": 8680, "loss": 0.7292976379394531, "lr": 1.2554441208506399e-06, "epoch": 0.8940092165898618, "percentage": 44.7, "elapsed_time": "5:16:30", "remaining_time": "6:31:33"} +{"current_steps": 3881, "total_steps": 8680, "loss": 0.819061279296875, "lr": 1.255075758079557e-06, "epoch": 0.8942396313364055, "percentage": 44.71, "elapsed_time": "5:16:36", "remaining_time": "6:31:29"} +{"current_steps": 3882, "total_steps": 8680, "loss": 0.8407306671142578, "lr": 1.2547073582845652e-06, "epoch": 0.8944700460829493, "percentage": 44.72, "elapsed_time": "5:16:42", "remaining_time": "6:31:26"} +{"current_steps": 3883, "total_steps": 8680, "loss": 0.7452164888381958, "lr": 1.2543389215191379e-06, "epoch": 0.8947004608294931, "percentage": 44.74, "elapsed_time": "5:16:47", "remaining_time": "6:31:21"} +{"current_steps": 3884, "total_steps": 8680, "loss": 0.9001756310462952, "lr": 1.2539704478367525e-06, "epoch": 0.8949308755760369, "percentage": 44.75, "elapsed_time": "5:16:50", "remaining_time": "6:31:14"} +{"current_steps": 3885, "total_steps": 8680, "loss": 0.7006322741508484, "lr": 1.253601937290893e-06, "epoch": 0.8951612903225806, "percentage": 44.76, "elapsed_time": "5:16:55", "remaining_time": "6:31:09"} +{"current_steps": 3886, "total_steps": 8680, "loss": 0.8464070558547974, "lr": 1.253233389935048e-06, "epoch": 0.8953917050691245, "percentage": 44.77, "elapsed_time": "5:17:01", "remaining_time": "6:31:06"} +{"current_steps": 3887, "total_steps": 8680, "loss": 0.8153925538063049, "lr": 1.2528648058227117e-06, "epoch": 0.8956221198156682, "percentage": 44.78, "elapsed_time": "5:17:05", "remaining_time": "6:31:00"} +{"current_steps": 3888, "total_steps": 8680, "loss": 0.7093103528022766, "lr": 1.2524961850073835e-06, "epoch": 0.895852534562212, "percentage": 44.79, "elapsed_time": "5:17:11", "remaining_time": "6:30:56"} +{"current_steps": 3889, "total_steps": 8680, "loss": 0.676047682762146, "lr": 1.2521275275425685e-06, "epoch": 0.8960829493087558, "percentage": 44.8, "elapsed_time": "5:17:16", "remaining_time": "6:30:51"} +{"current_steps": 3890, "total_steps": 8680, "loss": 0.6980170011520386, "lr": 1.2517588334817765e-06, "epoch": 0.8963133640552995, "percentage": 44.82, "elapsed_time": "5:17:22", "remaining_time": "6:30:48"} +{"current_steps": 3891, "total_steps": 8680, "loss": 0.7343952655792236, "lr": 1.2513901028785232e-06, "epoch": 0.8965437788018433, "percentage": 44.83, "elapsed_time": "5:17:28", "remaining_time": "6:30:44"} +{"current_steps": 3892, "total_steps": 8680, "loss": 0.6836012005805969, "lr": 1.251021335786329e-06, "epoch": 0.896774193548387, "percentage": 44.84, "elapsed_time": "5:17:33", "remaining_time": "6:30:40"} +{"current_steps": 3893, "total_steps": 8680, "loss": 0.7405731678009033, "lr": 1.2506525322587204e-06, "epoch": 0.8970046082949309, "percentage": 44.85, "elapsed_time": "5:17:40", "remaining_time": "6:30:37"} +{"current_steps": 3894, "total_steps": 8680, "loss": 0.7626791596412659, "lr": 1.2502836923492288e-06, "epoch": 0.8972350230414746, "percentage": 44.86, "elapsed_time": "5:17:45", "remaining_time": "6:30:33"} +{"current_steps": 3895, "total_steps": 8680, "loss": 0.951126754283905, "lr": 1.2499148161113904e-06, "epoch": 0.8974654377880185, "percentage": 44.87, "elapsed_time": "5:17:50", "remaining_time": "6:30:27"} +{"current_steps": 3896, "total_steps": 8680, "loss": 0.8248430490493774, "lr": 1.249545903598747e-06, "epoch": 0.8976958525345622, "percentage": 44.88, "elapsed_time": "5:17:54", "remaining_time": "6:30:22"} +{"current_steps": 3897, "total_steps": 8680, "loss": 0.9306991100311279, "lr": 1.2491769548648466e-06, "epoch": 0.897926267281106, "percentage": 44.9, "elapsed_time": "5:17:59", "remaining_time": "6:30:17"} +{"current_steps": 3898, "total_steps": 8680, "loss": 0.8529196977615356, "lr": 1.2488079699632406e-06, "epoch": 0.8981566820276498, "percentage": 44.91, "elapsed_time": "5:18:04", "remaining_time": "6:30:12"} +{"current_steps": 3899, "total_steps": 8680, "loss": 0.8614317178726196, "lr": 1.2484389489474873e-06, "epoch": 0.8983870967741936, "percentage": 44.92, "elapsed_time": "5:18:09", "remaining_time": "6:30:07"} +{"current_steps": 3900, "total_steps": 8680, "loss": 0.723548173904419, "lr": 1.2480698918711494e-06, "epoch": 0.8986175115207373, "percentage": 44.93, "elapsed_time": "5:18:15", "remaining_time": "6:30:04"} +{"current_steps": 3901, "total_steps": 8680, "loss": 0.9424235820770264, "lr": 1.2477007987877953e-06, "epoch": 0.8988479262672812, "percentage": 44.94, "elapsed_time": "5:18:24", "remaining_time": "6:30:03"} +{"current_steps": 3902, "total_steps": 8680, "loss": 0.8307658433914185, "lr": 1.2473316697509982e-06, "epoch": 0.8990783410138249, "percentage": 44.95, "elapsed_time": "5:18:28", "remaining_time": "6:29:57"} +{"current_steps": 3903, "total_steps": 8680, "loss": 0.7164772748947144, "lr": 1.2469625048143364e-06, "epoch": 0.8993087557603686, "percentage": 44.97, "elapsed_time": "5:18:32", "remaining_time": "6:29:52"} +{"current_steps": 3904, "total_steps": 8680, "loss": 0.824491024017334, "lr": 1.2465933040313941e-06, "epoch": 0.8995391705069125, "percentage": 44.98, "elapsed_time": "5:18:37", "remaining_time": "6:29:47"} +{"current_steps": 3905, "total_steps": 8680, "loss": 0.7468826770782471, "lr": 1.24622406745576e-06, "epoch": 0.8997695852534562, "percentage": 44.99, "elapsed_time": "5:18:42", "remaining_time": "6:29:43"} +{"current_steps": 3906, "total_steps": 8680, "loss": 0.8049126863479614, "lr": 1.2458547951410285e-06, "epoch": 0.9, "percentage": 45.0, "elapsed_time": "5:18:47", "remaining_time": "6:29:37"} +{"current_steps": 3907, "total_steps": 8680, "loss": 0.658754825592041, "lr": 1.245485487140799e-06, "epoch": 0.9002304147465438, "percentage": 45.01, "elapsed_time": "5:18:52", "remaining_time": "6:29:33"} +{"current_steps": 3908, "total_steps": 8680, "loss": 0.6772202849388123, "lr": 1.245116143508676e-06, "epoch": 0.9004608294930876, "percentage": 45.02, "elapsed_time": "5:18:58", "remaining_time": "6:29:29"} +{"current_steps": 3909, "total_steps": 8680, "loss": 0.8160394430160522, "lr": 1.2447467642982697e-06, "epoch": 0.9006912442396313, "percentage": 45.03, "elapsed_time": "5:19:02", "remaining_time": "6:29:24"} +{"current_steps": 3910, "total_steps": 8680, "loss": 0.8289823532104492, "lr": 1.244377349563194e-06, "epoch": 0.9009216589861752, "percentage": 45.05, "elapsed_time": "5:19:06", "remaining_time": "6:29:17"} +{"current_steps": 3911, "total_steps": 8680, "loss": 0.7574084997177124, "lr": 1.24400789935707e-06, "epoch": 0.9011520737327189, "percentage": 45.06, "elapsed_time": "5:19:11", "remaining_time": "6:29:12"} +{"current_steps": 3912, "total_steps": 8680, "loss": 0.8116365671157837, "lr": 1.2436384137335218e-06, "epoch": 0.9013824884792627, "percentage": 45.07, "elapsed_time": "5:19:15", "remaining_time": "6:29:07"} +{"current_steps": 3913, "total_steps": 8680, "loss": 0.814805805683136, "lr": 1.2432688927461808e-06, "epoch": 0.9016129032258065, "percentage": 45.08, "elapsed_time": "5:19:21", "remaining_time": "6:29:03"} +{"current_steps": 3914, "total_steps": 8680, "loss": 0.7947453260421753, "lr": 1.2428993364486822e-06, "epoch": 0.9018433179723502, "percentage": 45.09, "elapsed_time": "5:19:26", "remaining_time": "6:28:59"} +{"current_steps": 3915, "total_steps": 8680, "loss": 0.939562976360321, "lr": 1.2425297448946661e-06, "epoch": 0.902073732718894, "percentage": 45.1, "elapsed_time": "5:19:31", "remaining_time": "6:28:53"} +{"current_steps": 3916, "total_steps": 8680, "loss": 0.9460225105285645, "lr": 1.2421601181377787e-06, "epoch": 0.9023041474654377, "percentage": 45.12, "elapsed_time": "5:19:37", "remaining_time": "6:28:50"} +{"current_steps": 3917, "total_steps": 8680, "loss": 0.9183799028396606, "lr": 1.241790456231671e-06, "epoch": 0.9025345622119816, "percentage": 45.13, "elapsed_time": "5:19:42", "remaining_time": "6:28:45"} +{"current_steps": 3918, "total_steps": 8680, "loss": 0.6793398857116699, "lr": 1.2414207592299984e-06, "epoch": 0.9027649769585253, "percentage": 45.14, "elapsed_time": "5:19:47", "remaining_time": "6:28:41"} +{"current_steps": 3919, "total_steps": 8680, "loss": 0.7796125411987305, "lr": 1.2410510271864222e-06, "epoch": 0.9029953917050692, "percentage": 45.15, "elapsed_time": "5:19:52", "remaining_time": "6:28:35"} +{"current_steps": 3920, "total_steps": 8680, "loss": 0.8164567351341248, "lr": 1.2406812601546085e-06, "epoch": 0.9032258064516129, "percentage": 45.16, "elapsed_time": "5:19:57", "remaining_time": "6:28:30"} +{"current_steps": 3921, "total_steps": 8680, "loss": 0.7267879247665405, "lr": 1.2403114581882288e-06, "epoch": 0.9034562211981567, "percentage": 45.17, "elapsed_time": "5:20:01", "remaining_time": "6:28:25"} +{"current_steps": 3922, "total_steps": 8680, "loss": 0.7277103066444397, "lr": 1.2399416213409586e-06, "epoch": 0.9036866359447004, "percentage": 45.18, "elapsed_time": "5:20:07", "remaining_time": "6:28:21"} +{"current_steps": 3923, "total_steps": 8680, "loss": 0.8507979512214661, "lr": 1.23957174966648e-06, "epoch": 0.9039170506912443, "percentage": 45.2, "elapsed_time": "5:20:12", "remaining_time": "6:28:17"} +{"current_steps": 3924, "total_steps": 8680, "loss": 0.9431333541870117, "lr": 1.2392018432184792e-06, "epoch": 0.904147465437788, "percentage": 45.21, "elapsed_time": "5:20:16", "remaining_time": "6:28:11"} +{"current_steps": 3925, "total_steps": 8680, "loss": 0.669041633605957, "lr": 1.2388319020506473e-06, "epoch": 0.9043778801843319, "percentage": 45.22, "elapsed_time": "5:20:23", "remaining_time": "6:28:08"} +{"current_steps": 3926, "total_steps": 8680, "loss": 0.7639964818954468, "lr": 1.2384619262166808e-06, "epoch": 0.9046082949308756, "percentage": 45.23, "elapsed_time": "5:20:29", "remaining_time": "6:28:05"} +{"current_steps": 3927, "total_steps": 8680, "loss": 0.7390594482421875, "lr": 1.2380919157702819e-06, "epoch": 0.9048387096774193, "percentage": 45.24, "elapsed_time": "5:20:33", "remaining_time": "6:27:59"} +{"current_steps": 3928, "total_steps": 8680, "loss": 0.8320105075836182, "lr": 1.2377218707651562e-06, "epoch": 0.9050691244239631, "percentage": 45.25, "elapsed_time": "5:20:39", "remaining_time": "6:27:55"} +{"current_steps": 3929, "total_steps": 8680, "loss": 0.6820249557495117, "lr": 1.237351791255016e-06, "epoch": 0.9052995391705069, "percentage": 45.26, "elapsed_time": "5:20:46", "remaining_time": "6:27:52"} +{"current_steps": 3930, "total_steps": 8680, "loss": 0.8548537492752075, "lr": 1.2369816772935773e-06, "epoch": 0.9055299539170507, "percentage": 45.28, "elapsed_time": "5:20:51", "remaining_time": "6:27:48"} +{"current_steps": 3931, "total_steps": 8680, "loss": 0.7226318120956421, "lr": 1.236611528934562e-06, "epoch": 0.9057603686635944, "percentage": 45.29, "elapsed_time": "5:20:55", "remaining_time": "6:27:42"} +{"current_steps": 3932, "total_steps": 8680, "loss": 0.879987359046936, "lr": 1.2362413462316963e-06, "epoch": 0.9059907834101383, "percentage": 45.3, "elapsed_time": "5:20:59", "remaining_time": "6:27:36"} +{"current_steps": 3933, "total_steps": 8680, "loss": 0.7919881343841553, "lr": 1.2358711292387122e-06, "epoch": 0.906221198156682, "percentage": 45.31, "elapsed_time": "5:21:05", "remaining_time": "6:27:32"} +{"current_steps": 3934, "total_steps": 8680, "loss": 0.8232694268226624, "lr": 1.2355008780093456e-06, "epoch": 0.9064516129032258, "percentage": 45.32, "elapsed_time": "5:21:11", "remaining_time": "6:27:29"} +{"current_steps": 3935, "total_steps": 8680, "loss": 0.80347740650177, "lr": 1.2351305925973385e-06, "epoch": 0.9066820276497696, "percentage": 45.33, "elapsed_time": "5:21:15", "remaining_time": "6:27:23"} +{"current_steps": 3936, "total_steps": 8680, "loss": 0.7818408012390137, "lr": 1.234760273056437e-06, "epoch": 0.9069124423963134, "percentage": 45.35, "elapsed_time": "5:21:20", "remaining_time": "6:27:18"} +{"current_steps": 3937, "total_steps": 8680, "loss": 0.8391210436820984, "lr": 1.2343899194403931e-06, "epoch": 0.9071428571428571, "percentage": 45.36, "elapsed_time": "5:21:23", "remaining_time": "6:27:11"} +{"current_steps": 3938, "total_steps": 8680, "loss": 0.7937500476837158, "lr": 1.2340195318029622e-06, "epoch": 0.9073732718894009, "percentage": 45.37, "elapsed_time": "5:21:29", "remaining_time": "6:27:07"} +{"current_steps": 3939, "total_steps": 8680, "loss": 0.7158668041229248, "lr": 1.2336491101979065e-06, "epoch": 0.9076036866359447, "percentage": 45.38, "elapsed_time": "5:21:34", "remaining_time": "6:27:02"} +{"current_steps": 3940, "total_steps": 8680, "loss": 0.6956034898757935, "lr": 1.2332786546789915e-06, "epoch": 0.9078341013824884, "percentage": 45.39, "elapsed_time": "5:21:40", "remaining_time": "6:26:59"} +{"current_steps": 3941, "total_steps": 8680, "loss": 0.7252948880195618, "lr": 1.2329081652999887e-06, "epoch": 0.9080645161290323, "percentage": 45.4, "elapsed_time": "5:21:44", "remaining_time": "6:26:53"} +{"current_steps": 3942, "total_steps": 8680, "loss": 0.7131162881851196, "lr": 1.2325376421146739e-06, "epoch": 0.908294930875576, "percentage": 45.41, "elapsed_time": "5:21:49", "remaining_time": "6:26:48"} +{"current_steps": 3943, "total_steps": 8680, "loss": 0.7383663654327393, "lr": 1.2321670851768285e-06, "epoch": 0.9085253456221198, "percentage": 45.43, "elapsed_time": "5:21:54", "remaining_time": "6:26:44"} +{"current_steps": 3944, "total_steps": 8680, "loss": 0.8296892642974854, "lr": 1.2317964945402374e-06, "epoch": 0.9087557603686636, "percentage": 45.44, "elapsed_time": "5:21:59", "remaining_time": "6:26:39"} +{"current_steps": 3945, "total_steps": 8680, "loss": 0.8314273357391357, "lr": 1.2314258702586923e-06, "epoch": 0.9089861751152074, "percentage": 45.45, "elapsed_time": "5:22:04", "remaining_time": "6:26:34"} +{"current_steps": 3946, "total_steps": 8680, "loss": 0.7264384031295776, "lr": 1.2310552123859888e-06, "epoch": 0.9092165898617511, "percentage": 45.46, "elapsed_time": "5:22:10", "remaining_time": "6:26:30"} +{"current_steps": 3947, "total_steps": 8680, "loss": 0.6757937073707581, "lr": 1.230684520975927e-06, "epoch": 0.909447004608295, "percentage": 45.47, "elapsed_time": "5:22:14", "remaining_time": "6:26:24"} +{"current_steps": 3948, "total_steps": 8680, "loss": 0.8318504691123962, "lr": 1.230313796082312e-06, "epoch": 0.9096774193548387, "percentage": 45.48, "elapsed_time": "5:22:20", "remaining_time": "6:26:20"} +{"current_steps": 3949, "total_steps": 8680, "loss": 0.7043207883834839, "lr": 1.2299430377589547e-06, "epoch": 0.9099078341013825, "percentage": 45.5, "elapsed_time": "5:22:25", "remaining_time": "6:26:15"} +{"current_steps": 3950, "total_steps": 8680, "loss": 0.8499487638473511, "lr": 1.2295722460596696e-06, "epoch": 0.9101382488479263, "percentage": 45.51, "elapsed_time": "5:22:29", "remaining_time": "6:26:10"} +{"current_steps": 3951, "total_steps": 8680, "loss": 0.8219600319862366, "lr": 1.2292014210382772e-06, "epoch": 0.91036866359447, "percentage": 45.52, "elapsed_time": "5:22:35", "remaining_time": "6:26:06"} +{"current_steps": 3952, "total_steps": 8680, "loss": 0.8136317133903503, "lr": 1.2288305627486017e-06, "epoch": 0.9105990783410138, "percentage": 45.53, "elapsed_time": "5:22:39", "remaining_time": "6:26:00"} +{"current_steps": 3953, "total_steps": 8680, "loss": 0.7858958840370178, "lr": 1.2284596712444735e-06, "epoch": 0.9108294930875576, "percentage": 45.54, "elapsed_time": "5:22:44", "remaining_time": "6:25:56"} +{"current_steps": 3954, "total_steps": 8680, "loss": 0.8108563423156738, "lr": 1.2280887465797259e-06, "epoch": 0.9110599078341014, "percentage": 45.55, "elapsed_time": "5:22:49", "remaining_time": "6:25:51"} +{"current_steps": 3955, "total_steps": 8680, "loss": 0.8061145544052124, "lr": 1.2277177888081987e-06, "epoch": 0.9112903225806451, "percentage": 45.56, "elapsed_time": "5:22:56", "remaining_time": "6:25:48"} +{"current_steps": 3956, "total_steps": 8680, "loss": 0.7769665718078613, "lr": 1.2273467979837361e-06, "epoch": 0.911520737327189, "percentage": 45.58, "elapsed_time": "5:23:01", "remaining_time": "6:25:44"} +{"current_steps": 3957, "total_steps": 8680, "loss": 1.0548570156097412, "lr": 1.2269757741601867e-06, "epoch": 0.9117511520737327, "percentage": 45.59, "elapsed_time": "5:23:06", "remaining_time": "6:25:39"} +{"current_steps": 3958, "total_steps": 8680, "loss": 0.7095952033996582, "lr": 1.226604717391404e-06, "epoch": 0.9119815668202765, "percentage": 45.6, "elapsed_time": "5:23:11", "remaining_time": "6:25:34"} +{"current_steps": 3959, "total_steps": 8680, "loss": 0.7330363392829895, "lr": 1.226233627731247e-06, "epoch": 0.9122119815668203, "percentage": 45.61, "elapsed_time": "5:23:16", "remaining_time": "6:25:29"} +{"current_steps": 3960, "total_steps": 8680, "loss": 0.7328442931175232, "lr": 1.225862505233578e-06, "epoch": 0.9124423963133641, "percentage": 45.62, "elapsed_time": "5:23:21", "remaining_time": "6:25:25"} +{"current_steps": 3961, "total_steps": 8680, "loss": 0.7572993040084839, "lr": 1.2254913499522656e-06, "epoch": 0.9126728110599078, "percentage": 45.63, "elapsed_time": "5:23:27", "remaining_time": "6:25:21"} +{"current_steps": 3962, "total_steps": 8680, "loss": 0.7706469297409058, "lr": 1.2251201619411823e-06, "epoch": 0.9129032258064517, "percentage": 45.65, "elapsed_time": "5:23:33", "remaining_time": "6:25:17"} +{"current_steps": 3963, "total_steps": 8680, "loss": 0.7830193042755127, "lr": 1.2247489412542053e-06, "epoch": 0.9131336405529954, "percentage": 45.66, "elapsed_time": "5:23:38", "remaining_time": "6:25:12"} +{"current_steps": 3964, "total_steps": 8680, "loss": 0.8415955901145935, "lr": 1.224377687945217e-06, "epoch": 0.9133640552995391, "percentage": 45.67, "elapsed_time": "5:23:44", "remaining_time": "6:25:09"} +{"current_steps": 3965, "total_steps": 8680, "loss": 0.7383062839508057, "lr": 1.2240064020681044e-06, "epoch": 0.913594470046083, "percentage": 45.68, "elapsed_time": "5:23:49", "remaining_time": "6:25:04"} +{"current_steps": 3966, "total_steps": 8680, "loss": 0.7372882962226868, "lr": 1.2236350836767593e-06, "epoch": 0.9138248847926267, "percentage": 45.69, "elapsed_time": "5:23:53", "remaining_time": "6:24:59"} +{"current_steps": 3967, "total_steps": 8680, "loss": 0.7914254665374756, "lr": 1.2232637328250776e-06, "epoch": 0.9140552995391705, "percentage": 45.7, "elapsed_time": "5:23:57", "remaining_time": "6:24:53"} +{"current_steps": 3968, "total_steps": 8680, "loss": 0.8510675430297852, "lr": 1.2228923495669605e-06, "epoch": 0.9142857142857143, "percentage": 45.71, "elapsed_time": "5:24:03", "remaining_time": "6:24:49"} +{"current_steps": 3969, "total_steps": 8680, "loss": 0.7391757369041443, "lr": 1.2225209339563143e-06, "epoch": 0.9145161290322581, "percentage": 45.73, "elapsed_time": "5:24:08", "remaining_time": "6:24:43"} +{"current_steps": 3970, "total_steps": 8680, "loss": 0.69194495677948, "lr": 1.2221494860470491e-06, "epoch": 0.9147465437788018, "percentage": 45.74, "elapsed_time": "5:24:13", "remaining_time": "6:24:39"} +{"current_steps": 3971, "total_steps": 8680, "loss": 0.7593865394592285, "lr": 1.22177800589308e-06, "epoch": 0.9149769585253457, "percentage": 45.75, "elapsed_time": "5:24:16", "remaining_time": "6:24:33"} +{"current_steps": 3972, "total_steps": 8680, "loss": 0.7831966876983643, "lr": 1.2214064935483268e-06, "epoch": 0.9152073732718894, "percentage": 45.76, "elapsed_time": "5:24:22", "remaining_time": "6:24:29"} +{"current_steps": 3973, "total_steps": 8680, "loss": 0.8858723640441895, "lr": 1.2210349490667145e-06, "epoch": 0.9154377880184332, "percentage": 45.77, "elapsed_time": "5:24:26", "remaining_time": "6:24:22"} +{"current_steps": 3974, "total_steps": 8680, "loss": 0.8645567893981934, "lr": 1.2206633725021715e-06, "epoch": 0.915668202764977, "percentage": 45.78, "elapsed_time": "5:24:31", "remaining_time": "6:24:17"} +{"current_steps": 3975, "total_steps": 8680, "loss": 0.7619047164916992, "lr": 1.2202917639086322e-06, "epoch": 0.9158986175115207, "percentage": 45.79, "elapsed_time": "5:24:37", "remaining_time": "6:24:15"} +{"current_steps": 3976, "total_steps": 8680, "loss": 0.8652681112289429, "lr": 1.2199201233400355e-06, "epoch": 0.9161290322580645, "percentage": 45.81, "elapsed_time": "5:24:42", "remaining_time": "6:24:10"} +{"current_steps": 3977, "total_steps": 8680, "loss": 0.6860940456390381, "lr": 1.2195484508503234e-06, "epoch": 0.9163594470046083, "percentage": 45.82, "elapsed_time": "5:24:48", "remaining_time": "6:24:05"} +{"current_steps": 3978, "total_steps": 8680, "loss": 0.7372464537620544, "lr": 1.2191767464934444e-06, "epoch": 0.9165898617511521, "percentage": 45.83, "elapsed_time": "5:24:53", "remaining_time": "6:24:01"} +{"current_steps": 3979, "total_steps": 8680, "loss": 0.8719853162765503, "lr": 1.218805010323351e-06, "epoch": 0.9168202764976958, "percentage": 45.84, "elapsed_time": "5:24:58", "remaining_time": "6:23:56"} +{"current_steps": 3980, "total_steps": 8680, "loss": 0.8203779458999634, "lr": 1.2184332423940003e-06, "epoch": 0.9170506912442397, "percentage": 45.85, "elapsed_time": "5:25:02", "remaining_time": "6:23:50"} +{"current_steps": 3981, "total_steps": 8680, "loss": 0.8648861646652222, "lr": 1.218061442759353e-06, "epoch": 0.9172811059907834, "percentage": 45.86, "elapsed_time": "5:25:07", "remaining_time": "6:23:45"} +{"current_steps": 3982, "total_steps": 8680, "loss": 0.7651659250259399, "lr": 1.2176896114733766e-06, "epoch": 0.9175115207373272, "percentage": 45.88, "elapsed_time": "5:25:13", "remaining_time": "6:23:41"} +{"current_steps": 3983, "total_steps": 8680, "loss": 0.8495512008666992, "lr": 1.2173177485900408e-06, "epoch": 0.917741935483871, "percentage": 45.89, "elapsed_time": "5:25:17", "remaining_time": "6:23:36"} +{"current_steps": 3984, "total_steps": 8680, "loss": 0.7997228503227234, "lr": 1.2169458541633216e-06, "epoch": 0.9179723502304148, "percentage": 45.9, "elapsed_time": "5:25:22", "remaining_time": "6:23:31"} +{"current_steps": 3985, "total_steps": 8680, "loss": 0.8353173136711121, "lr": 1.2165739282471987e-06, "epoch": 0.9182027649769585, "percentage": 45.91, "elapsed_time": "5:25:27", "remaining_time": "6:23:26"} +{"current_steps": 3986, "total_steps": 8680, "loss": 0.9039655327796936, "lr": 1.216201970895657e-06, "epoch": 0.9184331797235024, "percentage": 45.92, "elapsed_time": "5:25:32", "remaining_time": "6:23:22"} +{"current_steps": 3987, "total_steps": 8680, "loss": 0.8158592581748962, "lr": 1.2158299821626854e-06, "epoch": 0.9186635944700461, "percentage": 45.93, "elapsed_time": "5:25:37", "remaining_time": "6:23:17"} +{"current_steps": 3988, "total_steps": 8680, "loss": 0.8443971872329712, "lr": 1.2154579621022776e-06, "epoch": 0.9188940092165898, "percentage": 45.94, "elapsed_time": "5:25:42", "remaining_time": "6:23:12"} +{"current_steps": 3989, "total_steps": 8680, "loss": 0.7934167385101318, "lr": 1.2150859107684318e-06, "epoch": 0.9191244239631337, "percentage": 45.96, "elapsed_time": "5:25:47", "remaining_time": "6:23:08"} +{"current_steps": 3990, "total_steps": 8680, "loss": 0.750052809715271, "lr": 1.2147138282151512e-06, "epoch": 0.9193548387096774, "percentage": 45.97, "elapsed_time": "5:25:53", "remaining_time": "6:23:04"} +{"current_steps": 3991, "total_steps": 8680, "loss": 0.813056468963623, "lr": 1.2143417144964423e-06, "epoch": 0.9195852534562212, "percentage": 45.98, "elapsed_time": "5:26:00", "remaining_time": "6:23:01"} +{"current_steps": 3992, "total_steps": 8680, "loss": 0.9478945732116699, "lr": 1.2139695696663174e-06, "epoch": 0.919815668202765, "percentage": 45.99, "elapsed_time": "5:26:04", "remaining_time": "6:22:55"} +{"current_steps": 3993, "total_steps": 8680, "loss": 0.687637448310852, "lr": 1.2135973937787927e-06, "epoch": 0.9200460829493088, "percentage": 46.0, "elapsed_time": "5:26:09", "remaining_time": "6:22:51"} +{"current_steps": 3994, "total_steps": 8680, "loss": 0.8073818683624268, "lr": 1.213225186887889e-06, "epoch": 0.9202764976958525, "percentage": 46.01, "elapsed_time": "5:26:16", "remaining_time": "6:22:48"} +{"current_steps": 3995, "total_steps": 8680, "loss": 0.6684166789054871, "lr": 1.2128529490476318e-06, "epoch": 0.9205069124423964, "percentage": 46.03, "elapsed_time": "5:26:22", "remaining_time": "6:22:45"} +{"current_steps": 3996, "total_steps": 8680, "loss": 0.7897466421127319, "lr": 1.2124806803120506e-06, "epoch": 0.9207373271889401, "percentage": 46.04, "elapsed_time": "5:26:28", "remaining_time": "6:22:40"} +{"current_steps": 3997, "total_steps": 8680, "loss": 0.832312822341919, "lr": 1.21210838073518e-06, "epoch": 0.9209677419354839, "percentage": 46.05, "elapsed_time": "5:26:32", "remaining_time": "6:22:34"} +{"current_steps": 3998, "total_steps": 8680, "loss": 0.9536067247390747, "lr": 1.2117360503710588e-06, "epoch": 0.9211981566820276, "percentage": 46.06, "elapsed_time": "5:26:36", "remaining_time": "6:22:29"} +{"current_steps": 3999, "total_steps": 8680, "loss": 0.8959759473800659, "lr": 1.2113636892737302e-06, "epoch": 0.9214285714285714, "percentage": 46.07, "elapsed_time": "5:26:41", "remaining_time": "6:22:24"} +{"current_steps": 4000, "total_steps": 8680, "loss": 0.6789166927337646, "lr": 1.2109912974972422e-06, "epoch": 0.9216589861751152, "percentage": 46.08, "elapsed_time": "5:26:48", "remaining_time": "6:22:21"} +{"current_steps": 4001, "total_steps": 8680, "loss": 0.7336491346359253, "lr": 1.2106188750956464e-06, "epoch": 0.9218894009216589, "percentage": 46.09, "elapsed_time": "5:26:54", "remaining_time": "6:22:18"} +{"current_steps": 4002, "total_steps": 8680, "loss": 0.7838259935379028, "lr": 1.2102464221229997e-06, "epoch": 0.9221198156682028, "percentage": 46.11, "elapsed_time": "5:27:00", "remaining_time": "6:22:14"} +{"current_steps": 4003, "total_steps": 8680, "loss": 0.9147623777389526, "lr": 1.2098739386333631e-06, "epoch": 0.9223502304147465, "percentage": 46.12, "elapsed_time": "5:27:05", "remaining_time": "6:22:09"} +{"current_steps": 4004, "total_steps": 8680, "loss": 0.7296491265296936, "lr": 1.2095014246808022e-06, "epoch": 0.9225806451612903, "percentage": 46.13, "elapsed_time": "5:27:10", "remaining_time": "6:22:05"} +{"current_steps": 4005, "total_steps": 8680, "loss": 0.7898432016372681, "lr": 1.2091288803193868e-06, "epoch": 0.9228110599078341, "percentage": 46.14, "elapsed_time": "5:27:14", "remaining_time": "6:21:59"} +{"current_steps": 4006, "total_steps": 8680, "loss": 0.8190659284591675, "lr": 1.2087563056031914e-06, "epoch": 0.9230414746543779, "percentage": 46.15, "elapsed_time": "5:27:18", "remaining_time": "6:21:53"} +{"current_steps": 4007, "total_steps": 8680, "loss": 0.8383443355560303, "lr": 1.2083837005862945e-06, "epoch": 0.9232718894009216, "percentage": 46.16, "elapsed_time": "5:27:22", "remaining_time": "6:21:47"} +{"current_steps": 4008, "total_steps": 8680, "loss": 0.5987120866775513, "lr": 1.2080110653227796e-06, "epoch": 0.9235023041474655, "percentage": 46.18, "elapsed_time": "5:27:29", "remaining_time": "6:21:45"} +{"current_steps": 4009, "total_steps": 8680, "loss": 0.8811358213424683, "lr": 1.2076383998667334e-06, "epoch": 0.9237327188940092, "percentage": 46.19, "elapsed_time": "5:27:36", "remaining_time": "6:21:41"} +{"current_steps": 4010, "total_steps": 8680, "loss": 0.7958807349205017, "lr": 1.2072657042722486e-06, "epoch": 0.923963133640553, "percentage": 46.2, "elapsed_time": "5:27:41", "remaining_time": "6:21:37"} +{"current_steps": 4011, "total_steps": 8680, "loss": 0.7192457914352417, "lr": 1.2068929785934215e-06, "epoch": 0.9241935483870968, "percentage": 46.21, "elapsed_time": "5:27:46", "remaining_time": "6:21:32"} +{"current_steps": 4012, "total_steps": 8680, "loss": 0.6854838132858276, "lr": 1.2065202228843523e-06, "epoch": 0.9244239631336405, "percentage": 46.22, "elapsed_time": "5:27:50", "remaining_time": "6:21:26"} +{"current_steps": 4013, "total_steps": 8680, "loss": 0.7334680557250977, "lr": 1.2061474371991457e-06, "epoch": 0.9246543778801843, "percentage": 46.23, "elapsed_time": "5:27:55", "remaining_time": "6:21:22"} +{"current_steps": 4014, "total_steps": 8680, "loss": 0.7614402770996094, "lr": 1.205774621591912e-06, "epoch": 0.9248847926267281, "percentage": 46.24, "elapsed_time": "5:28:02", "remaining_time": "6:21:19"} +{"current_steps": 4015, "total_steps": 8680, "loss": 0.7502505779266357, "lr": 1.2054017761167644e-06, "epoch": 0.9251152073732719, "percentage": 46.26, "elapsed_time": "5:28:07", "remaining_time": "6:21:15"} +{"current_steps": 4016, "total_steps": 8680, "loss": 0.7922523021697998, "lr": 1.2050289008278205e-06, "epoch": 0.9253456221198156, "percentage": 46.27, "elapsed_time": "5:28:11", "remaining_time": "6:21:08"} +{"current_steps": 4017, "total_steps": 8680, "loss": 0.7534265518188477, "lr": 1.2046559957792032e-06, "epoch": 0.9255760368663595, "percentage": 46.28, "elapsed_time": "5:28:15", "remaining_time": "6:21:03"} +{"current_steps": 4018, "total_steps": 8680, "loss": 0.6997093558311462, "lr": 1.2042830610250395e-06, "epoch": 0.9258064516129032, "percentage": 46.29, "elapsed_time": "5:28:21", "remaining_time": "6:20:59"} +{"current_steps": 4019, "total_steps": 8680, "loss": 0.7009599208831787, "lr": 1.2039100966194594e-06, "epoch": 0.926036866359447, "percentage": 46.3, "elapsed_time": "5:28:28", "remaining_time": "6:20:56"} +{"current_steps": 4020, "total_steps": 8680, "loss": 0.795873761177063, "lr": 1.203537102616599e-06, "epoch": 0.9262672811059908, "percentage": 46.31, "elapsed_time": "5:28:31", "remaining_time": "6:20:50"} +{"current_steps": 4021, "total_steps": 8680, "loss": 0.7860225439071655, "lr": 1.2031640790705972e-06, "epoch": 0.9264976958525346, "percentage": 46.32, "elapsed_time": "5:28:36", "remaining_time": "6:20:44"} +{"current_steps": 4022, "total_steps": 8680, "loss": 0.7657063007354736, "lr": 1.2027910260355989e-06, "epoch": 0.9267281105990783, "percentage": 46.34, "elapsed_time": "5:28:43", "remaining_time": "6:20:42"} +{"current_steps": 4023, "total_steps": 8680, "loss": 0.782909631729126, "lr": 1.2024179435657512e-06, "epoch": 0.9269585253456222, "percentage": 46.35, "elapsed_time": "5:28:50", "remaining_time": "6:20:39"} +{"current_steps": 4024, "total_steps": 8680, "loss": 0.713431715965271, "lr": 1.202044831715207e-06, "epoch": 0.9271889400921659, "percentage": 46.36, "elapsed_time": "5:28:55", "remaining_time": "6:20:35"} +{"current_steps": 4025, "total_steps": 8680, "loss": 0.9126790165901184, "lr": 1.201671690538123e-06, "epoch": 0.9274193548387096, "percentage": 46.37, "elapsed_time": "5:29:02", "remaining_time": "6:20:32"} +{"current_steps": 4026, "total_steps": 8680, "loss": 0.8640999794006348, "lr": 1.20129852008866e-06, "epoch": 0.9276497695852535, "percentage": 46.38, "elapsed_time": "5:29:08", "remaining_time": "6:20:28"} +{"current_steps": 4027, "total_steps": 8680, "loss": 0.723473072052002, "lr": 1.2009253204209832e-06, "epoch": 0.9278801843317972, "percentage": 46.39, "elapsed_time": "5:29:15", "remaining_time": "6:20:26"} +{"current_steps": 4028, "total_steps": 8680, "loss": 0.6764041185379028, "lr": 1.2005520915892626e-06, "epoch": 0.928110599078341, "percentage": 46.41, "elapsed_time": "5:29:20", "remaining_time": "6:20:22"} +{"current_steps": 4029, "total_steps": 8680, "loss": 0.8525882959365845, "lr": 1.200178833647671e-06, "epoch": 0.9283410138248848, "percentage": 46.42, "elapsed_time": "5:29:26", "remaining_time": "6:20:18"} +{"current_steps": 4030, "total_steps": 8680, "loss": 0.714957058429718, "lr": 1.1998055466503872e-06, "epoch": 0.9285714285714286, "percentage": 46.43, "elapsed_time": "5:29:30", "remaining_time": "6:20:12"} +{"current_steps": 4031, "total_steps": 8680, "loss": 0.8015910387039185, "lr": 1.1994322306515925e-06, "epoch": 0.9288018433179723, "percentage": 46.44, "elapsed_time": "5:29:36", "remaining_time": "6:20:08"} +{"current_steps": 4032, "total_steps": 8680, "loss": 1.0306739807128906, "lr": 1.1990588857054733e-06, "epoch": 0.9290322580645162, "percentage": 46.45, "elapsed_time": "5:29:43", "remaining_time": "6:20:05"} +{"current_steps": 4033, "total_steps": 8680, "loss": 0.8307464122772217, "lr": 1.1986855118662205e-06, "epoch": 0.9292626728110599, "percentage": 46.46, "elapsed_time": "5:29:48", "remaining_time": "6:20:01"} +{"current_steps": 4034, "total_steps": 8680, "loss": 0.8720347881317139, "lr": 1.1983121091880286e-06, "epoch": 0.9294930875576037, "percentage": 46.47, "elapsed_time": "5:29:52", "remaining_time": "6:19:55"} +{"current_steps": 4035, "total_steps": 8680, "loss": 0.7716174721717834, "lr": 1.1979386777250968e-06, "epoch": 0.9297235023041475, "percentage": 46.49, "elapsed_time": "5:29:58", "remaining_time": "6:19:51"} +{"current_steps": 4036, "total_steps": 8680, "loss": 0.8968960046768188, "lr": 1.1975652175316279e-06, "epoch": 0.9299539170506912, "percentage": 46.5, "elapsed_time": "5:30:03", "remaining_time": "6:19:47"} +{"current_steps": 4037, "total_steps": 8680, "loss": 0.7472472786903381, "lr": 1.197191728661829e-06, "epoch": 0.930184331797235, "percentage": 46.51, "elapsed_time": "5:30:08", "remaining_time": "6:19:42"} +{"current_steps": 4038, "total_steps": 8680, "loss": 0.7969691753387451, "lr": 1.196818211169912e-06, "epoch": 0.9304147465437788, "percentage": 46.52, "elapsed_time": "5:30:15", "remaining_time": "6:19:38"} +{"current_steps": 4039, "total_steps": 8680, "loss": 0.6187525987625122, "lr": 1.196444665110092e-06, "epoch": 0.9306451612903226, "percentage": 46.53, "elapsed_time": "5:30:20", "remaining_time": "6:19:34"} +{"current_steps": 4040, "total_steps": 8680, "loss": 0.8715502619743347, "lr": 1.1960710905365893e-06, "epoch": 0.9308755760368663, "percentage": 46.54, "elapsed_time": "5:30:25", "remaining_time": "6:19:30"} +{"current_steps": 4041, "total_steps": 8680, "loss": 0.7174774408340454, "lr": 1.1956974875036273e-06, "epoch": 0.9311059907834102, "percentage": 46.56, "elapsed_time": "5:30:32", "remaining_time": "6:19:27"} +{"current_steps": 4042, "total_steps": 8680, "loss": 0.6546192169189453, "lr": 1.1953238560654337e-06, "epoch": 0.9313364055299539, "percentage": 46.57, "elapsed_time": "5:30:38", "remaining_time": "6:19:23"} +{"current_steps": 4043, "total_steps": 8680, "loss": 0.8688700199127197, "lr": 1.194950196276241e-06, "epoch": 0.9315668202764977, "percentage": 46.58, "elapsed_time": "5:30:42", "remaining_time": "6:19:17"} +{"current_steps": 4044, "total_steps": 8680, "loss": 0.7679718732833862, "lr": 1.1945765081902856e-06, "epoch": 0.9317972350230415, "percentage": 46.59, "elapsed_time": "5:30:47", "remaining_time": "6:19:13"} +{"current_steps": 4045, "total_steps": 8680, "loss": 0.6335175037384033, "lr": 1.1942027918618073e-06, "epoch": 0.9320276497695853, "percentage": 46.6, "elapsed_time": "5:30:54", "remaining_time": "6:19:10"} +{"current_steps": 4046, "total_steps": 8680, "loss": 0.785153865814209, "lr": 1.1938290473450513e-06, "epoch": 0.932258064516129, "percentage": 46.61, "elapsed_time": "5:30:58", "remaining_time": "6:19:04"} +{"current_steps": 4047, "total_steps": 8680, "loss": 0.6873019337654114, "lr": 1.1934552746942653e-06, "epoch": 0.9324884792626729, "percentage": 46.62, "elapsed_time": "5:31:04", "remaining_time": "6:19:01"} +{"current_steps": 4048, "total_steps": 8680, "loss": 0.7416094541549683, "lr": 1.1930814739637025e-06, "epoch": 0.9327188940092166, "percentage": 46.64, "elapsed_time": "5:31:10", "remaining_time": "6:18:57"} +{"current_steps": 4049, "total_steps": 8680, "loss": 0.7206372618675232, "lr": 1.1927076452076193e-06, "epoch": 0.9329493087557603, "percentage": 46.65, "elapsed_time": "5:31:16", "remaining_time": "6:18:53"} +{"current_steps": 4050, "total_steps": 8680, "loss": 0.8352477550506592, "lr": 1.1923337884802767e-06, "epoch": 0.9331797235023042, "percentage": 46.66, "elapsed_time": "5:31:21", "remaining_time": "6:18:48"} +{"current_steps": 4051, "total_steps": 8680, "loss": 0.8243483304977417, "lr": 1.191959903835939e-06, "epoch": 0.9334101382488479, "percentage": 46.67, "elapsed_time": "5:31:25", "remaining_time": "6:18:43"} +{"current_steps": 4052, "total_steps": 8680, "loss": 0.827987790107727, "lr": 1.1915859913288756e-06, "epoch": 0.9336405529953917, "percentage": 46.68, "elapsed_time": "5:31:31", "remaining_time": "6:18:39"} +{"current_steps": 4053, "total_steps": 8680, "loss": 0.8624123334884644, "lr": 1.1912120510133589e-06, "epoch": 0.9338709677419355, "percentage": 46.69, "elapsed_time": "5:31:35", "remaining_time": "6:18:32"} +{"current_steps": 4054, "total_steps": 8680, "loss": 0.8615037202835083, "lr": 1.1908380829436667e-06, "epoch": 0.9341013824884793, "percentage": 46.71, "elapsed_time": "5:31:38", "remaining_time": "6:18:26"} +{"current_steps": 4055, "total_steps": 8680, "loss": 0.9367121458053589, "lr": 1.190464087174079e-06, "epoch": 0.934331797235023, "percentage": 46.72, "elapsed_time": "5:31:43", "remaining_time": "6:18:21"} +{"current_steps": 4056, "total_steps": 8680, "loss": 0.927996039390564, "lr": 1.190090063758881e-06, "epoch": 0.9345622119815669, "percentage": 46.73, "elapsed_time": "5:31:47", "remaining_time": "6:18:15"} +{"current_steps": 4057, "total_steps": 8680, "loss": 0.841314435005188, "lr": 1.1897160127523623e-06, "epoch": 0.9347926267281106, "percentage": 46.74, "elapsed_time": "5:31:51", "remaining_time": "6:18:08"} +{"current_steps": 4058, "total_steps": 8680, "loss": 0.864904522895813, "lr": 1.189341934208815e-06, "epoch": 0.9350230414746544, "percentage": 46.75, "elapsed_time": "5:31:55", "remaining_time": "6:18:03"} +{"current_steps": 4059, "total_steps": 8680, "loss": 0.9505404829978943, "lr": 1.188967828182537e-06, "epoch": 0.9352534562211982, "percentage": 46.76, "elapsed_time": "5:31:59", "remaining_time": "6:17:57"} +{"current_steps": 4060, "total_steps": 8680, "loss": 0.7347132563591003, "lr": 1.188593694727829e-06, "epoch": 0.9354838709677419, "percentage": 46.77, "elapsed_time": "5:32:04", "remaining_time": "6:17:53"} +{"current_steps": 4061, "total_steps": 8680, "loss": 0.6267231106758118, "lr": 1.1882195338989958e-06, "epoch": 0.9357142857142857, "percentage": 46.79, "elapsed_time": "5:32:10", "remaining_time": "6:17:48"} +{"current_steps": 4062, "total_steps": 8680, "loss": 0.8052406907081604, "lr": 1.1878453457503464e-06, "epoch": 0.9359447004608294, "percentage": 46.8, "elapsed_time": "5:32:15", "remaining_time": "6:17:44"} +{"current_steps": 4063, "total_steps": 8680, "loss": 0.7928211688995361, "lr": 1.1874711303361933e-06, "epoch": 0.9361751152073733, "percentage": 46.81, "elapsed_time": "5:32:20", "remaining_time": "6:17:39"} +{"current_steps": 4064, "total_steps": 8680, "loss": 0.8863959312438965, "lr": 1.1870968877108545e-06, "epoch": 0.936405529953917, "percentage": 46.82, "elapsed_time": "5:32:25", "remaining_time": "6:17:34"} +{"current_steps": 4065, "total_steps": 8680, "loss": 0.8749874830245972, "lr": 1.1867226179286496e-06, "epoch": 0.9366359447004609, "percentage": 46.83, "elapsed_time": "5:32:31", "remaining_time": "6:17:31"} +{"current_steps": 4066, "total_steps": 8680, "loss": 0.7516318559646606, "lr": 1.186348321043904e-06, "epoch": 0.9368663594470046, "percentage": 46.84, "elapsed_time": "5:32:36", "remaining_time": "6:17:26"} +{"current_steps": 4067, "total_steps": 8680, "loss": 0.8435031771659851, "lr": 1.1859739971109467e-06, "epoch": 0.9370967741935484, "percentage": 46.85, "elapsed_time": "5:32:40", "remaining_time": "6:17:20"} +{"current_steps": 4068, "total_steps": 8680, "loss": 0.8766932487487793, "lr": 1.1855996461841093e-06, "epoch": 0.9373271889400921, "percentage": 46.87, "elapsed_time": "5:32:44", "remaining_time": "6:17:14"} +{"current_steps": 4069, "total_steps": 8680, "loss": 0.8748513460159302, "lr": 1.1852252683177293e-06, "epoch": 0.937557603686636, "percentage": 46.88, "elapsed_time": "5:32:48", "remaining_time": "6:17:08"} +{"current_steps": 4070, "total_steps": 8680, "loss": 0.8917855024337769, "lr": 1.184850863566147e-06, "epoch": 0.9377880184331797, "percentage": 46.89, "elapsed_time": "5:32:53", "remaining_time": "6:17:03"} +{"current_steps": 4071, "total_steps": 8680, "loss": 0.7631640434265137, "lr": 1.1844764319837064e-06, "epoch": 0.9380184331797236, "percentage": 46.9, "elapsed_time": "5:33:00", "remaining_time": "6:17:01"} +{"current_steps": 4072, "total_steps": 8680, "loss": 0.8354158401489258, "lr": 1.1841019736247557e-06, "epoch": 0.9382488479262673, "percentage": 46.91, "elapsed_time": "5:33:06", "remaining_time": "6:16:56"} +{"current_steps": 4073, "total_steps": 8680, "loss": 0.8122761845588684, "lr": 1.1837274885436473e-06, "epoch": 0.938479262672811, "percentage": 46.92, "elapsed_time": "5:33:10", "remaining_time": "6:16:51"} +{"current_steps": 4074, "total_steps": 8680, "loss": 0.8281430006027222, "lr": 1.1833529767947374e-06, "epoch": 0.9387096774193548, "percentage": 46.94, "elapsed_time": "5:33:14", "remaining_time": "6:16:45"} +{"current_steps": 4075, "total_steps": 8680, "loss": 0.8291982412338257, "lr": 1.1829784384323856e-06, "epoch": 0.9389400921658986, "percentage": 46.95, "elapsed_time": "5:33:19", "remaining_time": "6:16:40"} +{"current_steps": 4076, "total_steps": 8680, "loss": 0.8951852321624756, "lr": 1.1826038735109553e-06, "epoch": 0.9391705069124424, "percentage": 46.96, "elapsed_time": "5:33:24", "remaining_time": "6:16:35"} +{"current_steps": 4077, "total_steps": 8680, "loss": 0.7006446123123169, "lr": 1.182229282084815e-06, "epoch": 0.9394009216589861, "percentage": 46.97, "elapsed_time": "5:33:29", "remaining_time": "6:16:30"} +{"current_steps": 4078, "total_steps": 8680, "loss": 0.8944047689437866, "lr": 1.1818546642083353e-06, "epoch": 0.93963133640553, "percentage": 46.98, "elapsed_time": "5:33:33", "remaining_time": "6:16:25"} +{"current_steps": 4079, "total_steps": 8680, "loss": 0.8252646923065186, "lr": 1.1814800199358919e-06, "epoch": 0.9398617511520737, "percentage": 46.99, "elapsed_time": "5:33:38", "remaining_time": "6:16:20"} +{"current_steps": 4080, "total_steps": 8680, "loss": 0.7852828502655029, "lr": 1.181105349321864e-06, "epoch": 0.9400921658986175, "percentage": 47.0, "elapsed_time": "5:33:44", "remaining_time": "6:16:17"} +{"current_steps": 4081, "total_steps": 8680, "loss": 0.7758563160896301, "lr": 1.1807306524206347e-06, "epoch": 0.9403225806451613, "percentage": 47.02, "elapsed_time": "5:33:50", "remaining_time": "6:16:13"} +{"current_steps": 4082, "total_steps": 8680, "loss": 0.7297114133834839, "lr": 1.1803559292865899e-06, "epoch": 0.9405529953917051, "percentage": 47.03, "elapsed_time": "5:33:57", "remaining_time": "6:16:10"} +{"current_steps": 4083, "total_steps": 8680, "loss": 0.7974321842193604, "lr": 1.1799811799741209e-06, "epoch": 0.9407834101382488, "percentage": 47.04, "elapsed_time": "5:34:03", "remaining_time": "6:16:06"} +{"current_steps": 4084, "total_steps": 8680, "loss": 0.6406733989715576, "lr": 1.179606404537622e-06, "epoch": 0.9410138248847926, "percentage": 47.05, "elapsed_time": "5:34:08", "remaining_time": "6:16:01"} +{"current_steps": 4085, "total_steps": 8680, "loss": 0.6925486326217651, "lr": 1.179231603031491e-06, "epoch": 0.9412442396313364, "percentage": 47.06, "elapsed_time": "5:34:12", "remaining_time": "6:15:56"} +{"current_steps": 4086, "total_steps": 8680, "loss": 0.792647123336792, "lr": 1.17885677551013e-06, "epoch": 0.9414746543778801, "percentage": 47.07, "elapsed_time": "5:34:19", "remaining_time": "6:15:53"} +{"current_steps": 4087, "total_steps": 8680, "loss": 0.7499191761016846, "lr": 1.1784819220279454e-06, "epoch": 0.941705069124424, "percentage": 47.09, "elapsed_time": "5:34:25", "remaining_time": "6:15:49"} +{"current_steps": 4088, "total_steps": 8680, "loss": 0.8307451009750366, "lr": 1.1781070426393455e-06, "epoch": 0.9419354838709677, "percentage": 47.1, "elapsed_time": "5:34:29", "remaining_time": "6:15:43"} +{"current_steps": 4089, "total_steps": 8680, "loss": 0.7859289646148682, "lr": 1.1777321373987445e-06, "epoch": 0.9421658986175115, "percentage": 47.11, "elapsed_time": "5:34:33", "remaining_time": "6:15:38"} +{"current_steps": 4090, "total_steps": 8680, "loss": 0.761134922504425, "lr": 1.177357206360559e-06, "epoch": 0.9423963133640553, "percentage": 47.12, "elapsed_time": "5:34:38", "remaining_time": "6:15:33"} +{"current_steps": 4091, "total_steps": 8680, "loss": 0.8697078227996826, "lr": 1.1769822495792098e-06, "epoch": 0.9426267281105991, "percentage": 47.13, "elapsed_time": "5:34:43", "remaining_time": "6:15:28"} +{"current_steps": 4092, "total_steps": 8680, "loss": 0.731541633605957, "lr": 1.1766072671091212e-06, "epoch": 0.9428571428571428, "percentage": 47.14, "elapsed_time": "5:34:48", "remaining_time": "6:15:23"} +{"current_steps": 4093, "total_steps": 8680, "loss": 0.7501940727233887, "lr": 1.1762322590047219e-06, "epoch": 0.9430875576036867, "percentage": 47.15, "elapsed_time": "5:34:54", "remaining_time": "6:15:19"} +{"current_steps": 4094, "total_steps": 8680, "loss": 0.9448602199554443, "lr": 1.1758572253204431e-06, "epoch": 0.9433179723502304, "percentage": 47.17, "elapsed_time": "5:34:59", "remaining_time": "6:15:14"} +{"current_steps": 4095, "total_steps": 8680, "loss": 0.7704026699066162, "lr": 1.175482166110721e-06, "epoch": 0.9435483870967742, "percentage": 47.18, "elapsed_time": "5:35:04", "remaining_time": "6:15:09"} +{"current_steps": 4096, "total_steps": 8680, "loss": 0.7905057668685913, "lr": 1.1751070814299947e-06, "epoch": 0.943778801843318, "percentage": 47.19, "elapsed_time": "5:35:08", "remaining_time": "6:15:04"} +{"current_steps": 4097, "total_steps": 8680, "loss": 0.8957202434539795, "lr": 1.1747319713327078e-06, "epoch": 0.9440092165898617, "percentage": 47.2, "elapsed_time": "5:35:14", "remaining_time": "6:15:00"} +{"current_steps": 4098, "total_steps": 8680, "loss": 0.7922521233558655, "lr": 1.174356835873306e-06, "epoch": 0.9442396313364055, "percentage": 47.21, "elapsed_time": "5:35:18", "remaining_time": "6:14:54"} +{"current_steps": 4099, "total_steps": 8680, "loss": 0.6501933336257935, "lr": 1.1739816751062404e-06, "epoch": 0.9444700460829493, "percentage": 47.22, "elapsed_time": "5:35:23", "remaining_time": "6:14:50"} +{"current_steps": 4100, "total_steps": 8680, "loss": 0.6743361353874207, "lr": 1.1736064890859654e-06, "epoch": 0.9447004608294931, "percentage": 47.24, "elapsed_time": "5:35:29", "remaining_time": "6:14:46"} +{"current_steps": 4101, "total_steps": 8680, "loss": 0.920632004737854, "lr": 1.173231277866938e-06, "epoch": 0.9449308755760368, "percentage": 47.25, "elapsed_time": "5:35:36", "remaining_time": "6:14:43"} +{"current_steps": 4102, "total_steps": 8680, "loss": 0.7498964071273804, "lr": 1.1728560415036199e-06, "epoch": 0.9451612903225807, "percentage": 47.26, "elapsed_time": "5:35:40", "remaining_time": "6:14:37"} +{"current_steps": 4103, "total_steps": 8680, "loss": 0.7665064334869385, "lr": 1.1724807800504765e-06, "epoch": 0.9453917050691244, "percentage": 47.27, "elapsed_time": "5:35:45", "remaining_time": "6:14:32"} +{"current_steps": 4104, "total_steps": 8680, "loss": 0.75946044921875, "lr": 1.172105493561976e-06, "epoch": 0.9456221198156682, "percentage": 47.28, "elapsed_time": "5:35:51", "remaining_time": "6:14:29"} +{"current_steps": 4105, "total_steps": 8680, "loss": 0.7701961398124695, "lr": 1.1717301820925908e-06, "epoch": 0.945852534562212, "percentage": 47.29, "elapsed_time": "5:35:56", "remaining_time": "6:14:23"} +{"current_steps": 4106, "total_steps": 8680, "loss": 0.7775348424911499, "lr": 1.1713548456967974e-06, "epoch": 0.9460829493087558, "percentage": 47.3, "elapsed_time": "5:36:02", "remaining_time": "6:14:20"} +{"current_steps": 4107, "total_steps": 8680, "loss": 0.8149436712265015, "lr": 1.1709794844290745e-06, "epoch": 0.9463133640552995, "percentage": 47.32, "elapsed_time": "5:36:08", "remaining_time": "6:14:17"} +{"current_steps": 4108, "total_steps": 8680, "loss": 0.7136009335517883, "lr": 1.170604098343906e-06, "epoch": 0.9465437788018434, "percentage": 47.33, "elapsed_time": "5:36:14", "remaining_time": "6:14:13"} +{"current_steps": 4109, "total_steps": 8680, "loss": 0.7678873538970947, "lr": 1.1702286874957786e-06, "epoch": 0.9467741935483871, "percentage": 47.34, "elapsed_time": "5:36:19", "remaining_time": "6:14:07"} +{"current_steps": 4110, "total_steps": 8680, "loss": 0.7506710290908813, "lr": 1.1698532519391827e-06, "epoch": 0.9470046082949308, "percentage": 47.35, "elapsed_time": "5:36:24", "remaining_time": "6:14:03"} +{"current_steps": 4111, "total_steps": 8680, "loss": 0.6646897792816162, "lr": 1.1694777917286118e-06, "epoch": 0.9472350230414747, "percentage": 47.36, "elapsed_time": "5:36:31", "remaining_time": "6:14:00"} +{"current_steps": 4112, "total_steps": 8680, "loss": 0.820647120475769, "lr": 1.1691023069185639e-06, "epoch": 0.9474654377880184, "percentage": 47.37, "elapsed_time": "5:36:36", "remaining_time": "6:13:55"} +{"current_steps": 4113, "total_steps": 8680, "loss": 0.872378408908844, "lr": 1.1687267975635402e-06, "epoch": 0.9476958525345622, "percentage": 47.38, "elapsed_time": "5:36:41", "remaining_time": "6:13:51"} +{"current_steps": 4114, "total_steps": 8680, "loss": 0.7920655608177185, "lr": 1.168351263718045e-06, "epoch": 0.947926267281106, "percentage": 47.4, "elapsed_time": "5:36:45", "remaining_time": "6:13:45"} +{"current_steps": 4115, "total_steps": 8680, "loss": 0.6593836545944214, "lr": 1.1679757054365866e-06, "epoch": 0.9481566820276498, "percentage": 47.41, "elapsed_time": "5:36:50", "remaining_time": "6:13:40"} +{"current_steps": 4116, "total_steps": 8680, "loss": 0.7473627328872681, "lr": 1.1676001227736772e-06, "epoch": 0.9483870967741935, "percentage": 47.42, "elapsed_time": "5:36:55", "remaining_time": "6:13:35"} +{"current_steps": 4117, "total_steps": 8680, "loss": 0.8001665472984314, "lr": 1.1672245157838317e-06, "epoch": 0.9486175115207374, "percentage": 47.43, "elapsed_time": "5:36:59", "remaining_time": "6:13:29"} +{"current_steps": 4118, "total_steps": 8680, "loss": 0.7342571020126343, "lr": 1.1668488845215689e-06, "epoch": 0.9488479262672811, "percentage": 47.44, "elapsed_time": "5:37:04", "remaining_time": "6:13:25"} +{"current_steps": 4119, "total_steps": 8680, "loss": 0.7616822719573975, "lr": 1.1664732290414118e-06, "epoch": 0.9490783410138249, "percentage": 47.45, "elapsed_time": "5:37:10", "remaining_time": "6:13:20"} +{"current_steps": 4120, "total_steps": 8680, "loss": 0.8885634541511536, "lr": 1.1660975493978857e-06, "epoch": 0.9493087557603687, "percentage": 47.47, "elapsed_time": "5:37:15", "remaining_time": "6:13:16"} +{"current_steps": 4121, "total_steps": 8680, "loss": 0.7816281318664551, "lr": 1.1657218456455205e-06, "epoch": 0.9495391705069124, "percentage": 47.48, "elapsed_time": "5:37:20", "remaining_time": "6:13:12"} +{"current_steps": 4122, "total_steps": 8680, "loss": 0.7412079572677612, "lr": 1.1653461178388485e-06, "epoch": 0.9497695852534562, "percentage": 47.49, "elapsed_time": "5:37:27", "remaining_time": "6:13:09"} +{"current_steps": 4123, "total_steps": 8680, "loss": 0.8096172213554382, "lr": 1.1649703660324064e-06, "epoch": 0.95, "percentage": 47.5, "elapsed_time": "5:37:32", "remaining_time": "6:13:04"} +{"current_steps": 4124, "total_steps": 8680, "loss": 0.6690856218338013, "lr": 1.164594590280734e-06, "epoch": 0.9502304147465438, "percentage": 47.51, "elapsed_time": "5:37:38", "remaining_time": "6:13:00"} +{"current_steps": 4125, "total_steps": 8680, "loss": 0.7509289979934692, "lr": 1.1642187906383746e-06, "epoch": 0.9504608294930875, "percentage": 47.52, "elapsed_time": "5:37:42", "remaining_time": "6:12:54"} +{"current_steps": 4126, "total_steps": 8680, "loss": 0.6643730401992798, "lr": 1.1638429671598754e-06, "epoch": 0.9506912442396314, "percentage": 47.53, "elapsed_time": "5:37:48", "remaining_time": "6:12:51"} +{"current_steps": 4127, "total_steps": 8680, "loss": 0.8100850582122803, "lr": 1.1634671198997864e-06, "epoch": 0.9509216589861751, "percentage": 47.55, "elapsed_time": "5:37:53", "remaining_time": "6:12:45"} +{"current_steps": 4128, "total_steps": 8680, "loss": 0.919742226600647, "lr": 1.1630912489126612e-06, "epoch": 0.9511520737327189, "percentage": 47.56, "elapsed_time": "5:37:57", "remaining_time": "6:12:40"} +{"current_steps": 4129, "total_steps": 8680, "loss": 0.8953771591186523, "lr": 1.1627153542530571e-06, "epoch": 0.9513824884792627, "percentage": 47.57, "elapsed_time": "5:38:02", "remaining_time": "6:12:35"} +{"current_steps": 4130, "total_steps": 8680, "loss": 0.7401770949363708, "lr": 1.162339435975535e-06, "epoch": 0.9516129032258065, "percentage": 47.58, "elapsed_time": "5:38:07", "remaining_time": "6:12:30"} +{"current_steps": 4131, "total_steps": 8680, "loss": 0.7618032097816467, "lr": 1.1619634941346585e-06, "epoch": 0.9518433179723502, "percentage": 47.59, "elapsed_time": "5:38:13", "remaining_time": "6:12:27"} +{"current_steps": 4132, "total_steps": 8680, "loss": 0.9134000539779663, "lr": 1.1615875287849955e-06, "epoch": 0.9520737327188941, "percentage": 47.6, "elapsed_time": "5:38:18", "remaining_time": "6:12:22"} +{"current_steps": 4133, "total_steps": 8680, "loss": 0.7555145025253296, "lr": 1.1612115399811162e-06, "epoch": 0.9523041474654378, "percentage": 47.62, "elapsed_time": "5:38:23", "remaining_time": "6:12:17"} +{"current_steps": 4134, "total_steps": 8680, "loss": 0.9125050902366638, "lr": 1.1608355277775955e-06, "epoch": 0.9525345622119815, "percentage": 47.63, "elapsed_time": "5:38:29", "remaining_time": "6:12:13"} +{"current_steps": 4135, "total_steps": 8680, "loss": 0.6575542688369751, "lr": 1.1604594922290106e-06, "epoch": 0.9527649769585254, "percentage": 47.64, "elapsed_time": "5:38:33", "remaining_time": "6:12:08"} +{"current_steps": 4136, "total_steps": 8680, "loss": 0.7530527114868164, "lr": 1.1600834333899431e-06, "epoch": 0.9529953917050691, "percentage": 47.65, "elapsed_time": "5:38:38", "remaining_time": "6:12:02"} +{"current_steps": 4137, "total_steps": 8680, "loss": 0.8818701505661011, "lr": 1.159707351314977e-06, "epoch": 0.9532258064516129, "percentage": 47.66, "elapsed_time": "5:38:43", "remaining_time": "6:11:58"} +{"current_steps": 4138, "total_steps": 8680, "loss": 0.7172919511795044, "lr": 1.1593312460587003e-06, "epoch": 0.9534562211981567, "percentage": 47.67, "elapsed_time": "5:38:50", "remaining_time": "6:11:55"} +{"current_steps": 4139, "total_steps": 8680, "loss": 0.8701400756835938, "lr": 1.1589551176757044e-06, "epoch": 0.9536866359447005, "percentage": 47.68, "elapsed_time": "5:38:54", "remaining_time": "6:11:49"} +{"current_steps": 4140, "total_steps": 8680, "loss": 0.867475152015686, "lr": 1.1585789662205834e-06, "epoch": 0.9539170506912442, "percentage": 47.7, "elapsed_time": "5:38:58", "remaining_time": "6:11:44"} +{"current_steps": 4141, "total_steps": 8680, "loss": 0.7809052467346191, "lr": 1.1582027917479356e-06, "epoch": 0.9541474654377881, "percentage": 47.71, "elapsed_time": "5:39:04", "remaining_time": "6:11:40"} +{"current_steps": 4142, "total_steps": 8680, "loss": 0.8589099645614624, "lr": 1.1578265943123619e-06, "epoch": 0.9543778801843318, "percentage": 47.72, "elapsed_time": "5:39:08", "remaining_time": "6:11:33"} +{"current_steps": 4143, "total_steps": 8680, "loss": 0.7826642394065857, "lr": 1.157450373968467e-06, "epoch": 0.9546082949308756, "percentage": 47.73, "elapsed_time": "5:39:14", "remaining_time": "6:11:30"} +{"current_steps": 4144, "total_steps": 8680, "loss": 0.9550029635429382, "lr": 1.1570741307708585e-06, "epoch": 0.9548387096774194, "percentage": 47.74, "elapsed_time": "5:39:18", "remaining_time": "6:11:23"} +{"current_steps": 4145, "total_steps": 8680, "loss": 0.8607431650161743, "lr": 1.1566978647741478e-06, "epoch": 0.9550691244239631, "percentage": 47.75, "elapsed_time": "5:39:23", "remaining_time": "6:11:19"} +{"current_steps": 4146, "total_steps": 8680, "loss": 0.7350449562072754, "lr": 1.15632157603295e-06, "epoch": 0.9552995391705069, "percentage": 47.76, "elapsed_time": "5:39:30", "remaining_time": "6:11:16"} +{"current_steps": 4147, "total_steps": 8680, "loss": 0.853142261505127, "lr": 1.1559452646018818e-06, "epoch": 0.9555299539170506, "percentage": 47.78, "elapsed_time": "5:39:34", "remaining_time": "6:11:11"} +{"current_steps": 4148, "total_steps": 8680, "loss": 0.7137192487716675, "lr": 1.1555689305355651e-06, "epoch": 0.9557603686635945, "percentage": 47.79, "elapsed_time": "5:39:40", "remaining_time": "6:11:06"} +{"current_steps": 4149, "total_steps": 8680, "loss": 0.9007513523101807, "lr": 1.1551925738886244e-06, "epoch": 0.9559907834101382, "percentage": 47.8, "elapsed_time": "5:39:45", "remaining_time": "6:11:02"} +{"current_steps": 4150, "total_steps": 8680, "loss": 0.8499083518981934, "lr": 1.1548161947156867e-06, "epoch": 0.956221198156682, "percentage": 47.81, "elapsed_time": "5:39:49", "remaining_time": "6:10:56"} +{"current_steps": 4151, "total_steps": 8680, "loss": 0.8068628311157227, "lr": 1.1544397930713836e-06, "epoch": 0.9564516129032258, "percentage": 47.82, "elapsed_time": "5:39:55", "remaining_time": "6:10:52"} +{"current_steps": 4152, "total_steps": 8680, "loss": 0.8357307314872742, "lr": 1.1540633690103487e-06, "epoch": 0.9566820276497696, "percentage": 47.83, "elapsed_time": "5:40:00", "remaining_time": "6:10:47"} +{"current_steps": 4153, "total_steps": 8680, "loss": 0.7650378942489624, "lr": 1.1536869225872198e-06, "epoch": 0.9569124423963133, "percentage": 47.85, "elapsed_time": "5:40:04", "remaining_time": "6:10:42"} +{"current_steps": 4154, "total_steps": 8680, "loss": 0.8717354536056519, "lr": 1.1533104538566376e-06, "epoch": 0.9571428571428572, "percentage": 47.86, "elapsed_time": "5:40:09", "remaining_time": "6:10:37"} +{"current_steps": 4155, "total_steps": 8680, "loss": 0.6314762830734253, "lr": 1.152933962873246e-06, "epoch": 0.9573732718894009, "percentage": 47.87, "elapsed_time": "5:40:14", "remaining_time": "6:10:32"} +{"current_steps": 4156, "total_steps": 8680, "loss": 0.8949059844017029, "lr": 1.152557449691692e-06, "epoch": 0.9576036866359448, "percentage": 47.88, "elapsed_time": "5:40:20", "remaining_time": "6:10:28"} +{"current_steps": 4157, "total_steps": 8680, "loss": 0.7862699031829834, "lr": 1.1521809143666261e-06, "epoch": 0.9578341013824885, "percentage": 47.89, "elapsed_time": "5:40:26", "remaining_time": "6:10:24"} +{"current_steps": 4158, "total_steps": 8680, "loss": 0.7954641580581665, "lr": 1.151804356952702e-06, "epoch": 0.9580645161290322, "percentage": 47.9, "elapsed_time": "5:40:29", "remaining_time": "6:10:18"} +{"current_steps": 4159, "total_steps": 8680, "loss": 0.7654163241386414, "lr": 1.1514277775045766e-06, "epoch": 0.958294930875576, "percentage": 47.91, "elapsed_time": "5:40:33", "remaining_time": "6:10:12"} +{"current_steps": 4160, "total_steps": 8680, "loss": 0.7050681114196777, "lr": 1.1510511760769097e-06, "epoch": 0.9585253456221198, "percentage": 47.93, "elapsed_time": "5:40:38", "remaining_time": "6:10:07"} +{"current_steps": 4161, "total_steps": 8680, "loss": 0.8646515607833862, "lr": 1.1506745527243646e-06, "epoch": 0.9587557603686636, "percentage": 47.94, "elapsed_time": "5:40:42", "remaining_time": "6:10:01"} +{"current_steps": 4162, "total_steps": 8680, "loss": 0.7427883148193359, "lr": 1.1502979075016078e-06, "epoch": 0.9589861751152073, "percentage": 47.95, "elapsed_time": "5:40:48", "remaining_time": "6:09:57"} +{"current_steps": 4163, "total_steps": 8680, "loss": 0.7800190448760986, "lr": 1.1499212404633083e-06, "epoch": 0.9592165898617512, "percentage": 47.96, "elapsed_time": "5:40:52", "remaining_time": "6:09:51"} +{"current_steps": 4164, "total_steps": 8680, "loss": 0.789481520652771, "lr": 1.1495445516641394e-06, "epoch": 0.9594470046082949, "percentage": 47.97, "elapsed_time": "5:40:59", "remaining_time": "6:09:48"} +{"current_steps": 4165, "total_steps": 8680, "loss": 0.7975008487701416, "lr": 1.1491678411587768e-06, "epoch": 0.9596774193548387, "percentage": 47.98, "elapsed_time": "5:41:03", "remaining_time": "6:09:43"} +{"current_steps": 4166, "total_steps": 8680, "loss": 0.7964596748352051, "lr": 1.1487911090018994e-06, "epoch": 0.9599078341013825, "percentage": 48.0, "elapsed_time": "5:41:08", "remaining_time": "6:09:38"} +{"current_steps": 4167, "total_steps": 8680, "loss": 0.7008803486824036, "lr": 1.1484143552481895e-06, "epoch": 0.9601382488479263, "percentage": 48.01, "elapsed_time": "5:41:14", "remaining_time": "6:09:34"} +{"current_steps": 4168, "total_steps": 8680, "loss": 0.708189070224762, "lr": 1.1480375799523328e-06, "epoch": 0.96036866359447, "percentage": 48.02, "elapsed_time": "5:41:21", "remaining_time": "6:09:31"} +{"current_steps": 4169, "total_steps": 8680, "loss": 0.8207682371139526, "lr": 1.1476607831690167e-06, "epoch": 0.9605990783410139, "percentage": 48.03, "elapsed_time": "5:41:26", "remaining_time": "6:09:27"} +{"current_steps": 4170, "total_steps": 8680, "loss": 0.7682942152023315, "lr": 1.1472839649529337e-06, "epoch": 0.9608294930875576, "percentage": 48.04, "elapsed_time": "5:41:32", "remaining_time": "6:09:22"} +{"current_steps": 4171, "total_steps": 8680, "loss": 0.8435598611831665, "lr": 1.1469071253587785e-06, "epoch": 0.9610599078341013, "percentage": 48.05, "elapsed_time": "5:41:36", "remaining_time": "6:09:17"} +{"current_steps": 4172, "total_steps": 8680, "loss": 0.7516113519668579, "lr": 1.1465302644412483e-06, "epoch": 0.9612903225806452, "percentage": 48.06, "elapsed_time": "5:41:41", "remaining_time": "6:09:13"} +{"current_steps": 4173, "total_steps": 8680, "loss": 0.7125411629676819, "lr": 1.1461533822550442e-06, "epoch": 0.9615207373271889, "percentage": 48.08, "elapsed_time": "5:41:46", "remaining_time": "6:09:08"} +{"current_steps": 4174, "total_steps": 8680, "loss": 0.7560747861862183, "lr": 1.14577647885487e-06, "epoch": 0.9617511520737327, "percentage": 48.09, "elapsed_time": "5:41:52", "remaining_time": "6:09:04"} +{"current_steps": 4175, "total_steps": 8680, "loss": 0.6702673435211182, "lr": 1.1453995542954332e-06, "epoch": 0.9619815668202765, "percentage": 48.1, "elapsed_time": "5:41:58", "remaining_time": "6:08:59"} +{"current_steps": 4176, "total_steps": 8680, "loss": 0.8083088397979736, "lr": 1.1450226086314433e-06, "epoch": 0.9622119815668203, "percentage": 48.11, "elapsed_time": "5:42:03", "remaining_time": "6:08:55"} +{"current_steps": 4177, "total_steps": 8680, "loss": 0.7579925060272217, "lr": 1.1446456419176135e-06, "epoch": 0.962442396313364, "percentage": 48.12, "elapsed_time": "5:42:10", "remaining_time": "6:08:52"} +{"current_steps": 4178, "total_steps": 8680, "loss": 0.713416576385498, "lr": 1.1442686542086609e-06, "epoch": 0.9626728110599079, "percentage": 48.13, "elapsed_time": "5:42:16", "remaining_time": "6:08:48"} +{"current_steps": 4179, "total_steps": 8680, "loss": 0.7767639756202698, "lr": 1.1438916455593035e-06, "epoch": 0.9629032258064516, "percentage": 48.15, "elapsed_time": "5:42:21", "remaining_time": "6:08:44"} +{"current_steps": 4180, "total_steps": 8680, "loss": 0.7493964433670044, "lr": 1.1435146160242645e-06, "epoch": 0.9631336405529954, "percentage": 48.16, "elapsed_time": "5:42:25", "remaining_time": "6:08:38"} +{"current_steps": 4181, "total_steps": 8680, "loss": 0.8789365291595459, "lr": 1.1431375656582692e-06, "epoch": 0.9633640552995392, "percentage": 48.17, "elapsed_time": "5:42:29", "remaining_time": "6:08:32"} +{"current_steps": 4182, "total_steps": 8680, "loss": 0.7750524878501892, "lr": 1.1427604945160457e-06, "epoch": 0.9635944700460829, "percentage": 48.18, "elapsed_time": "5:42:34", "remaining_time": "6:08:28"} +{"current_steps": 4183, "total_steps": 8680, "loss": 0.9330715537071228, "lr": 1.142383402652325e-06, "epoch": 0.9638248847926267, "percentage": 48.19, "elapsed_time": "5:42:40", "remaining_time": "6:08:23"} +{"current_steps": 4184, "total_steps": 8680, "loss": 0.6845035552978516, "lr": 1.142006290121842e-06, "epoch": 0.9640552995391705, "percentage": 48.2, "elapsed_time": "5:42:45", "remaining_time": "6:08:19"} +{"current_steps": 4185, "total_steps": 8680, "loss": 0.7295390963554382, "lr": 1.1416291569793343e-06, "epoch": 0.9642857142857143, "percentage": 48.21, "elapsed_time": "5:42:50", "remaining_time": "6:08:14"} +{"current_steps": 4186, "total_steps": 8680, "loss": 0.6869080066680908, "lr": 1.1412520032795419e-06, "epoch": 0.964516129032258, "percentage": 48.23, "elapsed_time": "5:42:56", "remaining_time": "6:08:10"} +{"current_steps": 4187, "total_steps": 8680, "loss": 1.0916842222213745, "lr": 1.140874829077208e-06, "epoch": 0.9647465437788019, "percentage": 48.24, "elapsed_time": "5:43:01", "remaining_time": "6:08:05"} +{"current_steps": 4188, "total_steps": 8680, "loss": 0.7487984299659729, "lr": 1.1404976344270793e-06, "epoch": 0.9649769585253456, "percentage": 48.25, "elapsed_time": "5:43:06", "remaining_time": "6:08:01"} +{"current_steps": 4189, "total_steps": 8680, "loss": 0.8852604627609253, "lr": 1.140120419383905e-06, "epoch": 0.9652073732718894, "percentage": 48.26, "elapsed_time": "5:43:11", "remaining_time": "6:07:55"} +{"current_steps": 4190, "total_steps": 8680, "loss": 0.7384698987007141, "lr": 1.139743184002437e-06, "epoch": 0.9654377880184332, "percentage": 48.27, "elapsed_time": "5:43:15", "remaining_time": "6:07:50"} +{"current_steps": 4191, "total_steps": 8680, "loss": 0.8033223152160645, "lr": 1.1393659283374312e-06, "epoch": 0.965668202764977, "percentage": 48.28, "elapsed_time": "5:43:19", "remaining_time": "6:07:44"} +{"current_steps": 4192, "total_steps": 8680, "loss": 0.8870355486869812, "lr": 1.1389886524436453e-06, "epoch": 0.9658986175115207, "percentage": 48.29, "elapsed_time": "5:43:24", "remaining_time": "6:07:39"} +{"current_steps": 4193, "total_steps": 8680, "loss": 0.869537353515625, "lr": 1.1386113563758405e-06, "epoch": 0.9661290322580646, "percentage": 48.31, "elapsed_time": "5:43:29", "remaining_time": "6:07:34"} +{"current_steps": 4194, "total_steps": 8680, "loss": 0.8564068675041199, "lr": 1.1382340401887808e-06, "epoch": 0.9663594470046083, "percentage": 48.32, "elapsed_time": "5:43:33", "remaining_time": "6:07:29"} +{"current_steps": 4195, "total_steps": 8680, "loss": 0.7988623380661011, "lr": 1.1378567039372332e-06, "epoch": 0.966589861751152, "percentage": 48.33, "elapsed_time": "5:43:38", "remaining_time": "6:07:24"} +{"current_steps": 4196, "total_steps": 8680, "loss": 0.9405556917190552, "lr": 1.1374793476759673e-06, "epoch": 0.9668202764976959, "percentage": 48.34, "elapsed_time": "5:43:43", "remaining_time": "6:07:19"} +{"current_steps": 4197, "total_steps": 8680, "loss": 0.6757407188415527, "lr": 1.137101971459756e-06, "epoch": 0.9670506912442396, "percentage": 48.35, "elapsed_time": "5:43:48", "remaining_time": "6:07:14"} +{"current_steps": 4198, "total_steps": 8680, "loss": 0.7521541118621826, "lr": 1.1367245753433757e-06, "epoch": 0.9672811059907834, "percentage": 48.36, "elapsed_time": "5:43:53", "remaining_time": "6:07:09"} +{"current_steps": 4199, "total_steps": 8680, "loss": 0.7306162714958191, "lr": 1.1363471593816037e-06, "epoch": 0.9675115207373272, "percentage": 48.38, "elapsed_time": "5:43:58", "remaining_time": "6:07:04"} +{"current_steps": 4200, "total_steps": 8680, "loss": 0.6884766817092896, "lr": 1.135969723629222e-06, "epoch": 0.967741935483871, "percentage": 48.39, "elapsed_time": "5:44:04", "remaining_time": "6:07:01"} +{"current_steps": 4201, "total_steps": 8680, "loss": 0.8420373201370239, "lr": 1.1355922681410152e-06, "epoch": 0.9679723502304147, "percentage": 48.4, "elapsed_time": "5:44:10", "remaining_time": "6:06:57"} +{"current_steps": 4202, "total_steps": 8680, "loss": 0.7252322435379028, "lr": 1.1352147929717704e-06, "epoch": 0.9682027649769586, "percentage": 48.41, "elapsed_time": "5:44:17", "remaining_time": "6:06:54"} +{"current_steps": 4203, "total_steps": 8680, "loss": 0.6375538110733032, "lr": 1.134837298176277e-06, "epoch": 0.9684331797235023, "percentage": 48.42, "elapsed_time": "5:44:22", "remaining_time": "6:06:49"} +{"current_steps": 4204, "total_steps": 8680, "loss": 0.713671863079071, "lr": 1.1344597838093283e-06, "epoch": 0.9686635944700461, "percentage": 48.43, "elapsed_time": "5:44:27", "remaining_time": "6:06:44"} +{"current_steps": 4205, "total_steps": 8680, "loss": 0.8591479063034058, "lr": 1.1340822499257201e-06, "epoch": 0.9688940092165899, "percentage": 48.44, "elapsed_time": "5:44:32", "remaining_time": "6:06:39"} +{"current_steps": 4206, "total_steps": 8680, "loss": 0.7638808488845825, "lr": 1.1337046965802505e-06, "epoch": 0.9691244239631336, "percentage": 48.46, "elapsed_time": "5:44:37", "remaining_time": "6:06:35"} +{"current_steps": 4207, "total_steps": 8680, "loss": 0.8133253455162048, "lr": 1.1333271238277215e-06, "epoch": 0.9693548387096774, "percentage": 48.47, "elapsed_time": "5:44:42", "remaining_time": "6:06:30"} +{"current_steps": 4208, "total_steps": 8680, "loss": 0.6938756704330444, "lr": 1.132949531722937e-06, "epoch": 0.9695852534562212, "percentage": 48.48, "elapsed_time": "5:44:47", "remaining_time": "6:06:25"} +{"current_steps": 4209, "total_steps": 8680, "loss": 0.793639063835144, "lr": 1.132571920320704e-06, "epoch": 0.969815668202765, "percentage": 48.49, "elapsed_time": "5:44:52", "remaining_time": "6:06:20"} +{"current_steps": 4210, "total_steps": 8680, "loss": 0.7188536524772644, "lr": 1.132194289675832e-06, "epoch": 0.9700460829493087, "percentage": 48.5, "elapsed_time": "5:44:57", "remaining_time": "6:06:16"} +{"current_steps": 4211, "total_steps": 8680, "loss": 0.8076587319374084, "lr": 1.1318166398431343e-06, "epoch": 0.9702764976958526, "percentage": 48.51, "elapsed_time": "5:45:02", "remaining_time": "6:06:10"} +{"current_steps": 4212, "total_steps": 8680, "loss": 0.8390023708343506, "lr": 1.1314389708774258e-06, "epoch": 0.9705069124423963, "percentage": 48.53, "elapsed_time": "5:45:07", "remaining_time": "6:06:06"} +{"current_steps": 4213, "total_steps": 8680, "loss": 0.8395706415176392, "lr": 1.1310612828335243e-06, "epoch": 0.9707373271889401, "percentage": 48.54, "elapsed_time": "5:45:11", "remaining_time": "6:06:00"} +{"current_steps": 4214, "total_steps": 8680, "loss": 0.9672995805740356, "lr": 1.1306835757662515e-06, "epoch": 0.9709677419354839, "percentage": 48.55, "elapsed_time": "5:45:16", "remaining_time": "6:05:55"} +{"current_steps": 4215, "total_steps": 8680, "loss": 0.7716202735900879, "lr": 1.1303058497304303e-06, "epoch": 0.9711981566820277, "percentage": 48.56, "elapsed_time": "5:45:21", "remaining_time": "6:05:50"} +{"current_steps": 4216, "total_steps": 8680, "loss": 0.6318329572677612, "lr": 1.1299281047808876e-06, "epoch": 0.9714285714285714, "percentage": 48.57, "elapsed_time": "5:45:27", "remaining_time": "6:05:46"} +{"current_steps": 4217, "total_steps": 8680, "loss": 0.8287553787231445, "lr": 1.1295503409724525e-06, "epoch": 0.9716589861751153, "percentage": 48.58, "elapsed_time": "5:45:32", "remaining_time": "6:05:42"} +{"current_steps": 4218, "total_steps": 8680, "loss": 0.6903107762336731, "lr": 1.129172558359957e-06, "epoch": 0.971889400921659, "percentage": 48.59, "elapsed_time": "5:45:37", "remaining_time": "6:05:37"} +{"current_steps": 4219, "total_steps": 8680, "loss": 0.684443473815918, "lr": 1.1287947569982355e-06, "epoch": 0.9721198156682027, "percentage": 48.61, "elapsed_time": "5:45:42", "remaining_time": "6:05:32"} +{"current_steps": 4220, "total_steps": 8680, "loss": 0.8566167950630188, "lr": 1.1284169369421254e-06, "epoch": 0.9723502304147466, "percentage": 48.62, "elapsed_time": "5:45:48", "remaining_time": "6:05:28"} +{"current_steps": 4221, "total_steps": 8680, "loss": 0.8103536367416382, "lr": 1.1280390982464673e-06, "epoch": 0.9725806451612903, "percentage": 48.63, "elapsed_time": "5:45:54", "remaining_time": "6:05:25"} +{"current_steps": 4222, "total_steps": 8680, "loss": 0.8027071356773376, "lr": 1.1276612409661036e-06, "epoch": 0.9728110599078341, "percentage": 48.64, "elapsed_time": "5:46:00", "remaining_time": "6:05:21"} +{"current_steps": 4223, "total_steps": 8680, "loss": 0.8251115679740906, "lr": 1.1272833651558796e-06, "epoch": 0.9730414746543778, "percentage": 48.65, "elapsed_time": "5:46:04", "remaining_time": "6:05:15"} +{"current_steps": 4224, "total_steps": 8680, "loss": 0.6468047499656677, "lr": 1.1269054708706437e-06, "epoch": 0.9732718894009217, "percentage": 48.66, "elapsed_time": "5:46:10", "remaining_time": "6:05:11"} +{"current_steps": 4225, "total_steps": 8680, "loss": 0.8085706233978271, "lr": 1.1265275581652465e-06, "epoch": 0.9735023041474654, "percentage": 48.68, "elapsed_time": "5:46:16", "remaining_time": "6:05:07"} +{"current_steps": 4226, "total_steps": 8680, "loss": 0.8396503925323486, "lr": 1.1261496270945418e-06, "epoch": 0.9737327188940093, "percentage": 48.69, "elapsed_time": "5:46:21", "remaining_time": "6:05:02"} +{"current_steps": 4227, "total_steps": 8680, "loss": 0.7860006093978882, "lr": 1.1257716777133861e-06, "epoch": 0.973963133640553, "percentage": 48.7, "elapsed_time": "5:46:26", "remaining_time": "6:04:57"} +{"current_steps": 4228, "total_steps": 8680, "loss": 0.8630701303482056, "lr": 1.1253937100766373e-06, "epoch": 0.9741935483870968, "percentage": 48.71, "elapsed_time": "5:46:32", "remaining_time": "6:04:53"} +{"current_steps": 4229, "total_steps": 8680, "loss": 0.8363114595413208, "lr": 1.1250157242391577e-06, "epoch": 0.9744239631336405, "percentage": 48.72, "elapsed_time": "5:46:37", "remaining_time": "6:04:49"} +{"current_steps": 4230, "total_steps": 8680, "loss": 0.7837141156196594, "lr": 1.1246377202558114e-06, "epoch": 0.9746543778801844, "percentage": 48.73, "elapsed_time": "5:46:43", "remaining_time": "6:04:45"} +{"current_steps": 4231, "total_steps": 8680, "loss": 0.8283151984214783, "lr": 1.1242596981814648e-06, "epoch": 0.9748847926267281, "percentage": 48.74, "elapsed_time": "5:46:49", "remaining_time": "6:04:42"} +{"current_steps": 4232, "total_steps": 8680, "loss": 0.9232061505317688, "lr": 1.1238816580709878e-06, "epoch": 0.9751152073732718, "percentage": 48.76, "elapsed_time": "5:46:53", "remaining_time": "6:04:35"} +{"current_steps": 4233, "total_steps": 8680, "loss": 0.8721164464950562, "lr": 1.123503599979252e-06, "epoch": 0.9753456221198157, "percentage": 48.77, "elapsed_time": "5:46:58", "remaining_time": "6:04:31"} +{"current_steps": 4234, "total_steps": 8680, "loss": 0.9398131370544434, "lr": 1.1231255239611321e-06, "epoch": 0.9755760368663594, "percentage": 48.78, "elapsed_time": "5:47:03", "remaining_time": "6:04:26"} +{"current_steps": 4235, "total_steps": 8680, "loss": 0.8124324083328247, "lr": 1.1227474300715054e-06, "epoch": 0.9758064516129032, "percentage": 48.79, "elapsed_time": "5:47:08", "remaining_time": "6:04:21"} +{"current_steps": 4236, "total_steps": 8680, "loss": 0.8532534837722778, "lr": 1.1223693183652515e-06, "epoch": 0.976036866359447, "percentage": 48.8, "elapsed_time": "5:47:13", "remaining_time": "6:04:16"} +{"current_steps": 4237, "total_steps": 8680, "loss": 0.7547662258148193, "lr": 1.1219911888972536e-06, "epoch": 0.9762672811059908, "percentage": 48.81, "elapsed_time": "5:47:18", "remaining_time": "6:04:11"} +{"current_steps": 4238, "total_steps": 8680, "loss": 0.7407231330871582, "lr": 1.1216130417223956e-06, "epoch": 0.9764976958525345, "percentage": 48.82, "elapsed_time": "5:47:23", "remaining_time": "6:04:07"} +{"current_steps": 4239, "total_steps": 8680, "loss": 0.8190197944641113, "lr": 1.1212348768955657e-06, "epoch": 0.9767281105990784, "percentage": 48.84, "elapsed_time": "5:47:29", "remaining_time": "6:04:02"} +{"current_steps": 4240, "total_steps": 8680, "loss": 0.6641337871551514, "lr": 1.1208566944716542e-06, "epoch": 0.9769585253456221, "percentage": 48.85, "elapsed_time": "5:47:33", "remaining_time": "6:03:57"} +{"current_steps": 4241, "total_steps": 8680, "loss": 0.8953202962875366, "lr": 1.120478494505553e-06, "epoch": 0.977188940092166, "percentage": 48.86, "elapsed_time": "5:47:38", "remaining_time": "6:03:52"} +{"current_steps": 4242, "total_steps": 8680, "loss": 0.7803191542625427, "lr": 1.1201002770521583e-06, "epoch": 0.9774193548387097, "percentage": 48.87, "elapsed_time": "5:47:43", "remaining_time": "6:03:47"} +{"current_steps": 4243, "total_steps": 8680, "loss": 0.6827100515365601, "lr": 1.1197220421663674e-06, "epoch": 0.9776497695852534, "percentage": 48.88, "elapsed_time": "5:47:50", "remaining_time": "6:03:44"} +{"current_steps": 4244, "total_steps": 8680, "loss": 0.8513565063476562, "lr": 1.1193437899030802e-06, "epoch": 0.9778801843317972, "percentage": 48.89, "elapsed_time": "5:47:55", "remaining_time": "6:03:40"} +{"current_steps": 4245, "total_steps": 8680, "loss": 0.7196829915046692, "lr": 1.1189655203172e-06, "epoch": 0.978110599078341, "percentage": 48.91, "elapsed_time": "5:48:00", "remaining_time": "6:03:34"} +{"current_steps": 4246, "total_steps": 8680, "loss": 0.7823485136032104, "lr": 1.1185872334636319e-06, "epoch": 0.9783410138248848, "percentage": 48.92, "elapsed_time": "5:48:06", "remaining_time": "6:03:30"} +{"current_steps": 4247, "total_steps": 8680, "loss": 0.7178136110305786, "lr": 1.1182089293972841e-06, "epoch": 0.9785714285714285, "percentage": 48.93, "elapsed_time": "5:48:11", "remaining_time": "6:03:26"} +{"current_steps": 4248, "total_steps": 8680, "loss": 0.7746715545654297, "lr": 1.1178306081730664e-06, "epoch": 0.9788018433179724, "percentage": 48.94, "elapsed_time": "5:48:16", "remaining_time": "6:03:21"} +{"current_steps": 4249, "total_steps": 8680, "loss": 0.8829167485237122, "lr": 1.117452269845892e-06, "epoch": 0.9790322580645161, "percentage": 48.95, "elapsed_time": "5:48:22", "remaining_time": "6:03:17"} +{"current_steps": 4250, "total_steps": 8680, "loss": 0.7592206001281738, "lr": 1.1170739144706764e-06, "epoch": 0.9792626728110599, "percentage": 48.96, "elapsed_time": "5:48:27", "remaining_time": "6:03:12"} +{"current_steps": 4251, "total_steps": 8680, "loss": 0.8107382655143738, "lr": 1.1166955421023368e-06, "epoch": 0.9794930875576037, "percentage": 48.97, "elapsed_time": "5:48:31", "remaining_time": "6:03:06"} +{"current_steps": 4252, "total_steps": 8680, "loss": 0.6807001829147339, "lr": 1.116317152795794e-06, "epoch": 0.9797235023041475, "percentage": 48.99, "elapsed_time": "5:48:36", "remaining_time": "6:03:01"} +{"current_steps": 4253, "total_steps": 8680, "loss": 0.7752517461776733, "lr": 1.1159387466059705e-06, "epoch": 0.9799539170506912, "percentage": 49.0, "elapsed_time": "5:48:40", "remaining_time": "6:02:56"} +{"current_steps": 4254, "total_steps": 8680, "loss": 0.7484745383262634, "lr": 1.115560323587791e-06, "epoch": 0.9801843317972351, "percentage": 49.01, "elapsed_time": "5:48:44", "remaining_time": "6:02:51"} +{"current_steps": 4255, "total_steps": 8680, "loss": 0.877413809299469, "lr": 1.1151818837961838e-06, "epoch": 0.9804147465437788, "percentage": 49.02, "elapsed_time": "5:48:49", "remaining_time": "6:02:45"} +{"current_steps": 4256, "total_steps": 8680, "loss": 0.7806656360626221, "lr": 1.1148034272860785e-06, "epoch": 0.9806451612903225, "percentage": 49.03, "elapsed_time": "5:48:53", "remaining_time": "6:02:39"} +{"current_steps": 4257, "total_steps": 8680, "loss": 0.6938076019287109, "lr": 1.1144249541124078e-06, "epoch": 0.9808755760368664, "percentage": 49.04, "elapsed_time": "5:48:58", "remaining_time": "6:02:34"} +{"current_steps": 4258, "total_steps": 8680, "loss": 0.8832957148551941, "lr": 1.1140464643301064e-06, "epoch": 0.9811059907834101, "percentage": 49.06, "elapsed_time": "5:49:03", "remaining_time": "6:02:30"} +{"current_steps": 4259, "total_steps": 8680, "loss": 0.7794016003608704, "lr": 1.1136679579941117e-06, "epoch": 0.9813364055299539, "percentage": 49.07, "elapsed_time": "5:49:08", "remaining_time": "6:02:25"} +{"current_steps": 4260, "total_steps": 8680, "loss": 0.6877585053443909, "lr": 1.1132894351593636e-06, "epoch": 0.9815668202764977, "percentage": 49.08, "elapsed_time": "5:49:15", "remaining_time": "6:02:22"} +{"current_steps": 4261, "total_steps": 8680, "loss": 0.8268473148345947, "lr": 1.1129108958808037e-06, "epoch": 0.9817972350230415, "percentage": 49.09, "elapsed_time": "5:49:19", "remaining_time": "6:02:16"} +{"current_steps": 4262, "total_steps": 8680, "loss": 0.6717547178268433, "lr": 1.112532340213377e-06, "epoch": 0.9820276497695852, "percentage": 49.1, "elapsed_time": "5:49:24", "remaining_time": "6:02:12"} +{"current_steps": 4263, "total_steps": 8680, "loss": 0.849999725818634, "lr": 1.11215376821203e-06, "epoch": 0.9822580645161291, "percentage": 49.11, "elapsed_time": "5:49:29", "remaining_time": "6:02:07"} +{"current_steps": 4264, "total_steps": 8680, "loss": 0.6562552452087402, "lr": 1.1117751799317118e-06, "epoch": 0.9824884792626728, "percentage": 49.12, "elapsed_time": "5:49:34", "remaining_time": "6:02:02"} +{"current_steps": 4265, "total_steps": 8680, "loss": 0.7734784483909607, "lr": 1.1113965754273743e-06, "epoch": 0.9827188940092166, "percentage": 49.14, "elapsed_time": "5:49:39", "remaining_time": "6:01:56"} +{"current_steps": 4266, "total_steps": 8680, "loss": 0.7580564022064209, "lr": 1.1110179547539717e-06, "epoch": 0.9829493087557604, "percentage": 49.15, "elapsed_time": "5:49:43", "remaining_time": "6:01:51"} +{"current_steps": 4267, "total_steps": 8680, "loss": 0.9207481145858765, "lr": 1.1106393179664595e-06, "epoch": 0.9831797235023041, "percentage": 49.16, "elapsed_time": "5:49:49", "remaining_time": "6:01:47"} +{"current_steps": 4268, "total_steps": 8680, "loss": 0.8987482786178589, "lr": 1.1102606651197968e-06, "epoch": 0.9834101382488479, "percentage": 49.17, "elapsed_time": "5:49:54", "remaining_time": "6:01:43"} +{"current_steps": 4269, "total_steps": 8680, "loss": 0.7486778497695923, "lr": 1.1098819962689445e-06, "epoch": 0.9836405529953917, "percentage": 49.18, "elapsed_time": "5:50:01", "remaining_time": "6:01:39"} +{"current_steps": 4270, "total_steps": 8680, "loss": 0.7387109994888306, "lr": 1.1095033114688662e-06, "epoch": 0.9838709677419355, "percentage": 49.19, "elapsed_time": "5:50:05", "remaining_time": "6:01:34"} +{"current_steps": 4271, "total_steps": 8680, "loss": 0.7337637543678284, "lr": 1.109124610774527e-06, "epoch": 0.9841013824884792, "percentage": 49.21, "elapsed_time": "5:50:11", "remaining_time": "6:01:30"} +{"current_steps": 4272, "total_steps": 8680, "loss": 0.7419463396072388, "lr": 1.1087458942408952e-06, "epoch": 0.9843317972350231, "percentage": 49.22, "elapsed_time": "5:50:16", "remaining_time": "6:01:25"} +{"current_steps": 4273, "total_steps": 8680, "loss": 0.7525068521499634, "lr": 1.1083671619229407e-06, "epoch": 0.9845622119815668, "percentage": 49.23, "elapsed_time": "5:50:21", "remaining_time": "6:01:20"} +{"current_steps": 4274, "total_steps": 8680, "loss": 0.8593931198120117, "lr": 1.107988413875636e-06, "epoch": 0.9847926267281106, "percentage": 49.24, "elapsed_time": "5:50:25", "remaining_time": "6:01:14"} +{"current_steps": 4275, "total_steps": 8680, "loss": 0.9123519659042358, "lr": 1.107609650153956e-06, "epoch": 0.9850230414746544, "percentage": 49.25, "elapsed_time": "5:50:30", "remaining_time": "6:01:09"} +{"current_steps": 4276, "total_steps": 8680, "loss": 0.7099615335464478, "lr": 1.107230870812878e-06, "epoch": 0.9852534562211982, "percentage": 49.26, "elapsed_time": "5:50:36", "remaining_time": "6:01:05"} +{"current_steps": 4277, "total_steps": 8680, "loss": 0.9525141716003418, "lr": 1.1068520759073807e-06, "epoch": 0.9854838709677419, "percentage": 49.27, "elapsed_time": "5:50:41", "remaining_time": "6:01:01"} +{"current_steps": 4278, "total_steps": 8680, "loss": 0.8360154628753662, "lr": 1.106473265492446e-06, "epoch": 0.9857142857142858, "percentage": 49.29, "elapsed_time": "5:50:46", "remaining_time": "6:00:56"} +{"current_steps": 4279, "total_steps": 8680, "loss": 0.7788960933685303, "lr": 1.106094439623058e-06, "epoch": 0.9859447004608295, "percentage": 49.3, "elapsed_time": "5:50:52", "remaining_time": "6:00:52"} +{"current_steps": 4280, "total_steps": 8680, "loss": 0.76897132396698, "lr": 1.1057155983542024e-06, "epoch": 0.9861751152073732, "percentage": 49.31, "elapsed_time": "5:50:55", "remaining_time": "6:00:46"} +{"current_steps": 4281, "total_steps": 8680, "loss": 0.8062764406204224, "lr": 1.1053367417408678e-06, "epoch": 0.9864055299539171, "percentage": 49.32, "elapsed_time": "5:50:59", "remaining_time": "6:00:40"} +{"current_steps": 4282, "total_steps": 8680, "loss": 0.6796555519104004, "lr": 1.1049578698380446e-06, "epoch": 0.9866359447004608, "percentage": 49.33, "elapsed_time": "5:51:04", "remaining_time": "6:00:34"} +{"current_steps": 4283, "total_steps": 8680, "loss": 0.8495693206787109, "lr": 1.1045789827007256e-06, "epoch": 0.9868663594470046, "percentage": 49.34, "elapsed_time": "5:51:10", "remaining_time": "6:00:30"} +{"current_steps": 4284, "total_steps": 8680, "loss": 0.9202588200569153, "lr": 1.1042000803839054e-06, "epoch": 0.9870967741935484, "percentage": 49.35, "elapsed_time": "5:51:16", "remaining_time": "6:00:27"} +{"current_steps": 4285, "total_steps": 8680, "loss": 0.8204039335250854, "lr": 1.1038211629425815e-06, "epoch": 0.9873271889400922, "percentage": 49.37, "elapsed_time": "5:51:21", "remaining_time": "6:00:22"} +{"current_steps": 4286, "total_steps": 8680, "loss": 0.921082615852356, "lr": 1.1034422304317534e-06, "epoch": 0.9875576036866359, "percentage": 49.38, "elapsed_time": "5:51:25", "remaining_time": "6:00:17"} +{"current_steps": 4287, "total_steps": 8680, "loss": 0.8114739656448364, "lr": 1.1030632829064225e-06, "epoch": 0.9877880184331798, "percentage": 49.39, "elapsed_time": "5:51:30", "remaining_time": "6:00:12"} +{"current_steps": 4288, "total_steps": 8680, "loss": 0.7394933700561523, "lr": 1.1026843204215924e-06, "epoch": 0.9880184331797235, "percentage": 49.4, "elapsed_time": "5:51:36", "remaining_time": "6:00:07"} +{"current_steps": 4289, "total_steps": 8680, "loss": 0.9515210390090942, "lr": 1.1023053430322692e-06, "epoch": 0.9882488479262673, "percentage": 49.41, "elapsed_time": "5:51:42", "remaining_time": "6:00:04"} +{"current_steps": 4290, "total_steps": 8680, "loss": 0.6729186773300171, "lr": 1.1019263507934611e-06, "epoch": 0.988479262672811, "percentage": 49.42, "elapsed_time": "5:51:48", "remaining_time": "6:00:00"} +{"current_steps": 4291, "total_steps": 8680, "loss": 0.6455283164978027, "lr": 1.1015473437601776e-06, "epoch": 0.9887096774193549, "percentage": 49.44, "elapsed_time": "5:51:54", "remaining_time": "5:59:56"} +{"current_steps": 4292, "total_steps": 8680, "loss": 0.8071424961090088, "lr": 1.1011683219874322e-06, "epoch": 0.9889400921658986, "percentage": 49.45, "elapsed_time": "5:51:59", "remaining_time": "5:59:51"} +{"current_steps": 4293, "total_steps": 8680, "loss": 0.7287160754203796, "lr": 1.1007892855302385e-06, "epoch": 0.9891705069124423, "percentage": 49.46, "elapsed_time": "5:52:06", "remaining_time": "5:59:48"} +{"current_steps": 4294, "total_steps": 8680, "loss": 0.7916513681411743, "lr": 1.1004102344436135e-06, "epoch": 0.9894009216589862, "percentage": 49.47, "elapsed_time": "5:52:11", "remaining_time": "5:59:44"} +{"current_steps": 4295, "total_steps": 8680, "loss": 0.8075610399246216, "lr": 1.1000311687825757e-06, "epoch": 0.9896313364055299, "percentage": 49.48, "elapsed_time": "5:52:15", "remaining_time": "5:59:38"} +{"current_steps": 4296, "total_steps": 8680, "loss": 0.6144437193870544, "lr": 1.0996520886021465e-06, "epoch": 0.9898617511520738, "percentage": 49.49, "elapsed_time": "5:52:19", "remaining_time": "5:59:33"} +{"current_steps": 4297, "total_steps": 8680, "loss": 0.830337643623352, "lr": 1.0992729939573482e-06, "epoch": 0.9900921658986175, "percentage": 49.5, "elapsed_time": "5:52:24", "remaining_time": "5:59:28"} +{"current_steps": 4298, "total_steps": 8680, "loss": 0.7104393243789673, "lr": 1.0988938849032063e-06, "epoch": 0.9903225806451613, "percentage": 49.52, "elapsed_time": "5:52:30", "remaining_time": "5:59:23"} +{"current_steps": 4299, "total_steps": 8680, "loss": 0.746238112449646, "lr": 1.0985147614947484e-06, "epoch": 0.990552995391705, "percentage": 49.53, "elapsed_time": "5:52:34", "remaining_time": "5:59:18"} +{"current_steps": 4300, "total_steps": 8680, "loss": 0.7309597730636597, "lr": 1.0981356237870027e-06, "epoch": 0.9907834101382489, "percentage": 49.54, "elapsed_time": "5:52:40", "remaining_time": "5:59:14"} +{"current_steps": 4301, "total_steps": 8680, "loss": 0.799136757850647, "lr": 1.0977564718350013e-06, "epoch": 0.9910138248847926, "percentage": 49.55, "elapsed_time": "5:52:46", "remaining_time": "5:59:10"} +{"current_steps": 4302, "total_steps": 8680, "loss": 0.7477747201919556, "lr": 1.0973773056937776e-06, "epoch": 0.9912442396313365, "percentage": 49.56, "elapsed_time": "5:52:51", "remaining_time": "5:59:05"} +{"current_steps": 4303, "total_steps": 8680, "loss": 0.8051053285598755, "lr": 1.0969981254183668e-06, "epoch": 0.9914746543778802, "percentage": 49.57, "elapsed_time": "5:52:56", "remaining_time": "5:59:00"} +{"current_steps": 4304, "total_steps": 8680, "loss": 0.8023163080215454, "lr": 1.0966189310638063e-06, "epoch": 0.9917050691244239, "percentage": 49.59, "elapsed_time": "5:53:01", "remaining_time": "5:58:56"} +{"current_steps": 4305, "total_steps": 8680, "loss": 0.6804348230361938, "lr": 1.096239722685136e-06, "epoch": 0.9919354838709677, "percentage": 49.6, "elapsed_time": "5:53:07", "remaining_time": "5:58:51"} +{"current_steps": 4306, "total_steps": 8680, "loss": 0.8276509046554565, "lr": 1.0958605003373976e-06, "epoch": 0.9921658986175115, "percentage": 49.61, "elapsed_time": "5:53:12", "remaining_time": "5:58:47"} +{"current_steps": 4307, "total_steps": 8680, "loss": 0.9733830690383911, "lr": 1.095481264075634e-06, "epoch": 0.9923963133640553, "percentage": 49.62, "elapsed_time": "5:53:17", "remaining_time": "5:58:42"} +{"current_steps": 4308, "total_steps": 8680, "loss": 0.824803352355957, "lr": 1.0951020139548917e-06, "epoch": 0.992626728110599, "percentage": 49.63, "elapsed_time": "5:53:22", "remaining_time": "5:58:37"} +{"current_steps": 4309, "total_steps": 8680, "loss": 0.8144090175628662, "lr": 1.094722750030218e-06, "epoch": 0.9928571428571429, "percentage": 49.64, "elapsed_time": "5:53:28", "remaining_time": "5:58:33"} +{"current_steps": 4310, "total_steps": 8680, "loss": 0.8394016027450562, "lr": 1.0943434723566623e-06, "epoch": 0.9930875576036866, "percentage": 49.65, "elapsed_time": "5:53:33", "remaining_time": "5:58:29"} +{"current_steps": 4311, "total_steps": 8680, "loss": 0.7688177824020386, "lr": 1.0939641809892766e-06, "epoch": 0.9933179723502304, "percentage": 49.67, "elapsed_time": "5:53:37", "remaining_time": "5:58:23"} +{"current_steps": 4312, "total_steps": 8680, "loss": 0.8157391548156738, "lr": 1.0935848759831144e-06, "epoch": 0.9935483870967742, "percentage": 49.68, "elapsed_time": "5:53:41", "remaining_time": "5:58:17"} +{"current_steps": 4313, "total_steps": 8680, "loss": 0.7618423700332642, "lr": 1.0932055573932316e-06, "epoch": 0.993778801843318, "percentage": 49.69, "elapsed_time": "5:53:47", "remaining_time": "5:58:13"} +{"current_steps": 4314, "total_steps": 8680, "loss": 0.7404567003250122, "lr": 1.0928262252746848e-06, "epoch": 0.9940092165898617, "percentage": 49.7, "elapsed_time": "5:53:53", "remaining_time": "5:58:09"} +{"current_steps": 4315, "total_steps": 8680, "loss": 0.6825613975524902, "lr": 1.092446879682535e-06, "epoch": 0.9942396313364056, "percentage": 49.71, "elapsed_time": "5:53:58", "remaining_time": "5:58:04"} +{"current_steps": 4316, "total_steps": 8680, "loss": 0.6607732772827148, "lr": 1.0920675206718428e-06, "epoch": 0.9944700460829493, "percentage": 49.72, "elapsed_time": "5:54:04", "remaining_time": "5:58:00"} +{"current_steps": 4317, "total_steps": 8680, "loss": 0.715195894241333, "lr": 1.0916881482976716e-06, "epoch": 0.994700460829493, "percentage": 49.74, "elapsed_time": "5:54:10", "remaining_time": "5:57:57"} +{"current_steps": 4318, "total_steps": 8680, "loss": 0.7593914270401001, "lr": 1.0913087626150872e-06, "epoch": 0.9949308755760369, "percentage": 49.75, "elapsed_time": "5:54:16", "remaining_time": "5:57:52"} +{"current_steps": 4319, "total_steps": 8680, "loss": 0.8368399143218994, "lr": 1.090929363679157e-06, "epoch": 0.9951612903225806, "percentage": 49.76, "elapsed_time": "5:54:21", "remaining_time": "5:57:48"} +{"current_steps": 4320, "total_steps": 8680, "loss": 0.7799170613288879, "lr": 1.0905499515449499e-06, "epoch": 0.9953917050691244, "percentage": 49.77, "elapsed_time": "5:54:28", "remaining_time": "5:57:44"} +{"current_steps": 4321, "total_steps": 8680, "loss": 0.8194636702537537, "lr": 1.0901705262675372e-06, "epoch": 0.9956221198156682, "percentage": 49.78, "elapsed_time": "5:54:33", "remaining_time": "5:57:40"} +{"current_steps": 4322, "total_steps": 8680, "loss": 0.7150344848632812, "lr": 1.0897910879019917e-06, "epoch": 0.995852534562212, "percentage": 49.79, "elapsed_time": "5:54:38", "remaining_time": "5:57:35"} +{"current_steps": 4323, "total_steps": 8680, "loss": 0.737568736076355, "lr": 1.089411636503389e-06, "epoch": 0.9960829493087557, "percentage": 49.8, "elapsed_time": "5:54:43", "remaining_time": "5:57:30"} +{"current_steps": 4324, "total_steps": 8680, "loss": 0.7037359476089478, "lr": 1.0890321721268056e-06, "epoch": 0.9963133640552996, "percentage": 49.82, "elapsed_time": "5:54:48", "remaining_time": "5:57:25"} +{"current_steps": 4325, "total_steps": 8680, "loss": 0.7664542198181152, "lr": 1.0886526948273206e-06, "epoch": 0.9965437788018433, "percentage": 49.83, "elapsed_time": "5:54:53", "remaining_time": "5:57:21"} +{"current_steps": 4326, "total_steps": 8680, "loss": 0.7700943946838379, "lr": 1.0882732046600138e-06, "epoch": 0.9967741935483871, "percentage": 49.84, "elapsed_time": "5:54:59", "remaining_time": "5:57:17"} +{"current_steps": 4327, "total_steps": 8680, "loss": 0.7634885311126709, "lr": 1.0878937016799683e-06, "epoch": 0.9970046082949309, "percentage": 49.85, "elapsed_time": "5:55:04", "remaining_time": "5:57:12"} +{"current_steps": 4328, "total_steps": 8680, "loss": 0.6784960031509399, "lr": 1.0875141859422685e-06, "epoch": 0.9972350230414746, "percentage": 49.86, "elapsed_time": "5:55:09", "remaining_time": "5:57:07"} +{"current_steps": 4329, "total_steps": 8680, "loss": 0.7224948406219482, "lr": 1.0871346575020002e-06, "epoch": 0.9974654377880184, "percentage": 49.87, "elapsed_time": "5:55:14", "remaining_time": "5:57:02"} +{"current_steps": 4330, "total_steps": 8680, "loss": 0.7886664867401123, "lr": 1.086755116414252e-06, "epoch": 0.9976958525345622, "percentage": 49.88, "elapsed_time": "5:55:19", "remaining_time": "5:56:58"} +{"current_steps": 4331, "total_steps": 8680, "loss": 0.7871295809745789, "lr": 1.0863755627341133e-06, "epoch": 0.997926267281106, "percentage": 49.9, "elapsed_time": "5:55:24", "remaining_time": "5:56:53"} +{"current_steps": 4332, "total_steps": 8680, "loss": 0.700717568397522, "lr": 1.085995996516676e-06, "epoch": 0.9981566820276497, "percentage": 49.91, "elapsed_time": "5:55:29", "remaining_time": "5:56:48"} +{"current_steps": 4333, "total_steps": 8680, "loss": 0.9090461730957031, "lr": 1.085616417817034e-06, "epoch": 0.9983870967741936, "percentage": 49.92, "elapsed_time": "5:55:34", "remaining_time": "5:56:43"} +{"current_steps": 4334, "total_steps": 8680, "loss": 0.7697109580039978, "lr": 1.0852368266902818e-06, "epoch": 0.9986175115207373, "percentage": 49.93, "elapsed_time": "5:55:39", "remaining_time": "5:56:38"} +{"current_steps": 4335, "total_steps": 8680, "loss": 0.8135972023010254, "lr": 1.0848572231915177e-06, "epoch": 0.9988479262672811, "percentage": 49.94, "elapsed_time": "5:55:43", "remaining_time": "5:56:32"} +{"current_steps": 4336, "total_steps": 8680, "loss": 0.803811252117157, "lr": 1.0844776073758392e-06, "epoch": 0.9990783410138249, "percentage": 49.95, "elapsed_time": "5:55:48", "remaining_time": "5:56:27"} +{"current_steps": 4337, "total_steps": 8680, "loss": 0.874006986618042, "lr": 1.0840979792983482e-06, "epoch": 0.9993087557603687, "percentage": 49.97, "elapsed_time": "5:55:53", "remaining_time": "5:56:22"} +{"current_steps": 4338, "total_steps": 8680, "loss": 0.7424730062484741, "lr": 1.0837183390141472e-06, "epoch": 0.9995391705069124, "percentage": 49.98, "elapsed_time": "5:55:59", "remaining_time": "5:56:19"} +{"current_steps": 4339, "total_steps": 8680, "loss": 0.8219665884971619, "lr": 1.0833386865783393e-06, "epoch": 0.9997695852534563, "percentage": 49.99, "elapsed_time": "5:56:04", "remaining_time": "5:56:13"} +{"current_steps": 4340, "total_steps": 8680, "loss": 0.7065195441246033, "lr": 1.0829590220460319e-06, "epoch": 1.0, "percentage": 50.0, "elapsed_time": "5:56:09", "remaining_time": "5:56:09"} +{"current_steps": 4341, "total_steps": 8680, "loss": 0.7988346219062805, "lr": 1.0825793454723324e-06, "epoch": 1.0002304147465437, "percentage": 50.01, "elapsed_time": "5:56:14", "remaining_time": "5:56:04"} +{"current_steps": 4342, "total_steps": 8680, "loss": 0.6731617450714111, "lr": 1.08219965691235e-06, "epoch": 1.0004608294930875, "percentage": 50.02, "elapsed_time": "5:56:20", "remaining_time": "5:56:00"} +{"current_steps": 4343, "total_steps": 8680, "loss": 0.8058687448501587, "lr": 1.0818199564211964e-06, "epoch": 1.0006912442396314, "percentage": 50.03, "elapsed_time": "5:56:25", "remaining_time": "5:55:55"} +{"current_steps": 4344, "total_steps": 8680, "loss": 0.8351448178291321, "lr": 1.081440244053984e-06, "epoch": 1.0009216589861751, "percentage": 50.05, "elapsed_time": "5:56:31", "remaining_time": "5:55:52"} +{"current_steps": 4345, "total_steps": 8680, "loss": 0.8619185090065002, "lr": 1.0810605198658286e-06, "epoch": 1.0011520737327189, "percentage": 50.06, "elapsed_time": "5:56:35", "remaining_time": "5:55:46"} +{"current_steps": 4346, "total_steps": 8680, "loss": 0.7600966691970825, "lr": 1.0806807839118455e-06, "epoch": 1.0013824884792626, "percentage": 50.07, "elapsed_time": "5:56:41", "remaining_time": "5:55:42"} +{"current_steps": 4347, "total_steps": 8680, "loss": 0.8123422265052795, "lr": 1.0803010362471536e-06, "epoch": 1.0016129032258065, "percentage": 50.08, "elapsed_time": "5:56:44", "remaining_time": "5:55:35"} +{"current_steps": 4348, "total_steps": 8680, "loss": 0.8277603983879089, "lr": 1.0799212769268727e-06, "epoch": 1.0018433179723503, "percentage": 50.09, "elapsed_time": "5:56:48", "remaining_time": "5:55:30"} +{"current_steps": 4349, "total_steps": 8680, "loss": 0.6666774153709412, "lr": 1.079541506006124e-06, "epoch": 1.002073732718894, "percentage": 50.1, "elapsed_time": "5:56:55", "remaining_time": "5:55:26"} +{"current_steps": 4350, "total_steps": 8680, "loss": 0.8483254909515381, "lr": 1.0791617235400313e-06, "epoch": 1.0023041474654377, "percentage": 50.12, "elapsed_time": "5:57:00", "remaining_time": "5:55:21"} +{"current_steps": 4351, "total_steps": 8680, "loss": 0.6585661172866821, "lr": 1.0787819295837193e-06, "epoch": 1.0025345622119817, "percentage": 50.13, "elapsed_time": "5:57:06", "remaining_time": "5:55:18"} +{"current_steps": 4352, "total_steps": 8680, "loss": 0.7591124773025513, "lr": 1.0784021241923142e-06, "epoch": 1.0027649769585254, "percentage": 50.14, "elapsed_time": "5:57:12", "remaining_time": "5:55:13"} +{"current_steps": 4353, "total_steps": 8680, "loss": 0.7305805683135986, "lr": 1.078022307420945e-06, "epoch": 1.0029953917050691, "percentage": 50.15, "elapsed_time": "5:57:19", "remaining_time": "5:55:10"} +{"current_steps": 4354, "total_steps": 8680, "loss": 0.6558996438980103, "lr": 1.0776424793247407e-06, "epoch": 1.0032258064516129, "percentage": 50.16, "elapsed_time": "5:57:25", "remaining_time": "5:55:07"} +{"current_steps": 4355, "total_steps": 8680, "loss": 0.6837360262870789, "lr": 1.0772626399588336e-06, "epoch": 1.0034562211981566, "percentage": 50.17, "elapsed_time": "5:57:29", "remaining_time": "5:55:01"} +{"current_steps": 4356, "total_steps": 8680, "loss": 0.778124988079071, "lr": 1.0768827893783562e-06, "epoch": 1.0036866359447005, "percentage": 50.18, "elapsed_time": "5:57:34", "remaining_time": "5:54:56"} +{"current_steps": 4357, "total_steps": 8680, "loss": 0.7676408886909485, "lr": 1.0765029276384438e-06, "epoch": 1.0039170506912443, "percentage": 50.2, "elapsed_time": "5:57:40", "remaining_time": "5:54:52"} +{"current_steps": 4358, "total_steps": 8680, "loss": 0.854246973991394, "lr": 1.0761230547942333e-06, "epoch": 1.004147465437788, "percentage": 50.21, "elapsed_time": "5:57:44", "remaining_time": "5:54:47"} +{"current_steps": 4359, "total_steps": 8680, "loss": 0.716766893863678, "lr": 1.0757431709008615e-06, "epoch": 1.0043778801843317, "percentage": 50.22, "elapsed_time": "5:57:51", "remaining_time": "5:54:44"} +{"current_steps": 4360, "total_steps": 8680, "loss": 0.6827799081802368, "lr": 1.075363276013469e-06, "epoch": 1.0046082949308757, "percentage": 50.23, "elapsed_time": "5:57:56", "remaining_time": "5:54:39"} +{"current_steps": 4361, "total_steps": 8680, "loss": 0.7977348566055298, "lr": 1.074983370187197e-06, "epoch": 1.0048387096774194, "percentage": 50.24, "elapsed_time": "5:58:02", "remaining_time": "5:54:35"} +{"current_steps": 4362, "total_steps": 8680, "loss": 0.6958035826683044, "lr": 1.0746034534771878e-06, "epoch": 1.0050691244239631, "percentage": 50.25, "elapsed_time": "5:58:06", "remaining_time": "5:54:29"} +{"current_steps": 4363, "total_steps": 8680, "loss": 0.8407979607582092, "lr": 1.0742235259385861e-06, "epoch": 1.0052995391705069, "percentage": 50.26, "elapsed_time": "5:58:11", "remaining_time": "5:54:25"} +{"current_steps": 4364, "total_steps": 8680, "loss": 0.8180495500564575, "lr": 1.073843587626538e-06, "epoch": 1.0055299539170508, "percentage": 50.28, "elapsed_time": "5:58:19", "remaining_time": "5:54:22"} +{"current_steps": 4365, "total_steps": 8680, "loss": 0.7551306486129761, "lr": 1.0734636385961907e-06, "epoch": 1.0057603686635945, "percentage": 50.29, "elapsed_time": "5:58:24", "remaining_time": "5:54:17"} +{"current_steps": 4366, "total_steps": 8680, "loss": 0.6598455309867859, "lr": 1.0730836789026936e-06, "epoch": 1.0059907834101383, "percentage": 50.3, "elapsed_time": "5:58:29", "remaining_time": "5:54:13"} +{"current_steps": 4367, "total_steps": 8680, "loss": 0.9186126589775085, "lr": 1.0727037086011971e-06, "epoch": 1.006221198156682, "percentage": 50.31, "elapsed_time": "5:58:34", "remaining_time": "5:54:08"} +{"current_steps": 4368, "total_steps": 8680, "loss": 0.8491259813308716, "lr": 1.0723237277468538e-06, "epoch": 1.0064516129032257, "percentage": 50.32, "elapsed_time": "5:58:40", "remaining_time": "5:54:04"} +{"current_steps": 4369, "total_steps": 8680, "loss": 0.6938691139221191, "lr": 1.071943736394817e-06, "epoch": 1.0066820276497697, "percentage": 50.33, "elapsed_time": "5:58:45", "remaining_time": "5:53:59"} +{"current_steps": 4370, "total_steps": 8680, "loss": 0.801313579082489, "lr": 1.0715637346002423e-06, "epoch": 1.0069124423963134, "percentage": 50.35, "elapsed_time": "5:58:50", "remaining_time": "5:53:54"} +{"current_steps": 4371, "total_steps": 8680, "loss": 0.7663706541061401, "lr": 1.071183722418286e-06, "epoch": 1.0071428571428571, "percentage": 50.36, "elapsed_time": "5:58:56", "remaining_time": "5:53:50"} +{"current_steps": 4372, "total_steps": 8680, "loss": 0.7434467077255249, "lr": 1.070803699904107e-06, "epoch": 1.0073732718894008, "percentage": 50.37, "elapsed_time": "5:59:01", "remaining_time": "5:53:46"} +{"current_steps": 4373, "total_steps": 8680, "loss": 0.8366774320602417, "lr": 1.0704236671128643e-06, "epoch": 1.0076036866359448, "percentage": 50.38, "elapsed_time": "5:59:06", "remaining_time": "5:53:41"} +{"current_steps": 4374, "total_steps": 8680, "loss": 0.7027710676193237, "lr": 1.07004362409972e-06, "epoch": 1.0078341013824885, "percentage": 50.39, "elapsed_time": "5:59:11", "remaining_time": "5:53:36"} +{"current_steps": 4375, "total_steps": 8680, "loss": 0.7965548038482666, "lr": 1.0696635709198357e-06, "epoch": 1.0080645161290323, "percentage": 50.4, "elapsed_time": "5:59:16", "remaining_time": "5:53:31"} +{"current_steps": 4376, "total_steps": 8680, "loss": 0.8058432340621948, "lr": 1.0692835076283768e-06, "epoch": 1.008294930875576, "percentage": 50.41, "elapsed_time": "5:59:20", "remaining_time": "5:53:26"} +{"current_steps": 4377, "total_steps": 8680, "loss": 0.9056248068809509, "lr": 1.0689034342805085e-06, "epoch": 1.0085253456221197, "percentage": 50.43, "elapsed_time": "5:59:24", "remaining_time": "5:53:19"} +{"current_steps": 4378, "total_steps": 8680, "loss": 0.8407673835754395, "lr": 1.0685233509313979e-06, "epoch": 1.0087557603686637, "percentage": 50.44, "elapsed_time": "5:59:30", "remaining_time": "5:53:16"} +{"current_steps": 4379, "total_steps": 8680, "loss": 0.9138794541358948, "lr": 1.0681432576362133e-06, "epoch": 1.0089861751152074, "percentage": 50.45, "elapsed_time": "5:59:35", "remaining_time": "5:53:11"} +{"current_steps": 4380, "total_steps": 8680, "loss": 0.6640630960464478, "lr": 1.067763154450125e-06, "epoch": 1.0092165898617511, "percentage": 50.46, "elapsed_time": "5:59:40", "remaining_time": "5:53:06"} +{"current_steps": 4381, "total_steps": 8680, "loss": 0.9387146234512329, "lr": 1.0673830414283051e-06, "epoch": 1.0094470046082948, "percentage": 50.47, "elapsed_time": "5:59:45", "remaining_time": "5:53:01"} +{"current_steps": 4382, "total_steps": 8680, "loss": 0.7288271188735962, "lr": 1.067002918625926e-06, "epoch": 1.0096774193548388, "percentage": 50.48, "elapsed_time": "5:59:50", "remaining_time": "5:52:57"} +{"current_steps": 4383, "total_steps": 8680, "loss": 0.7886035442352295, "lr": 1.0666227860981613e-06, "epoch": 1.0099078341013825, "percentage": 50.5, "elapsed_time": "5:59:56", "remaining_time": "5:52:52"} +{"current_steps": 4384, "total_steps": 8680, "loss": 0.6929852962493896, "lr": 1.066242643900188e-06, "epoch": 1.0101382488479262, "percentage": 50.51, "elapsed_time": "6:00:01", "remaining_time": "5:52:47"} +{"current_steps": 4385, "total_steps": 8680, "loss": 0.7709990739822388, "lr": 1.065862492087182e-06, "epoch": 1.01036866359447, "percentage": 50.52, "elapsed_time": "6:00:05", "remaining_time": "5:52:42"} +{"current_steps": 4386, "total_steps": 8680, "loss": 0.811382532119751, "lr": 1.065482330714323e-06, "epoch": 1.010599078341014, "percentage": 50.53, "elapsed_time": "6:00:11", "remaining_time": "5:52:38"} +{"current_steps": 4387, "total_steps": 8680, "loss": 0.8274353742599487, "lr": 1.0651021598367905e-06, "epoch": 1.0108294930875577, "percentage": 50.54, "elapsed_time": "6:00:17", "remaining_time": "5:52:34"} +{"current_steps": 4388, "total_steps": 8680, "loss": 0.7449204921722412, "lr": 1.0647219795097651e-06, "epoch": 1.0110599078341014, "percentage": 50.55, "elapsed_time": "6:00:22", "remaining_time": "5:52:29"} +{"current_steps": 4389, "total_steps": 8680, "loss": 0.675945520401001, "lr": 1.0643417897884303e-06, "epoch": 1.011290322580645, "percentage": 50.56, "elapsed_time": "6:00:27", "remaining_time": "5:52:24"} +{"current_steps": 4390, "total_steps": 8680, "loss": 0.7329400777816772, "lr": 1.06396159072797e-06, "epoch": 1.0115207373271888, "percentage": 50.58, "elapsed_time": "6:00:32", "remaining_time": "5:52:19"} +{"current_steps": 4391, "total_steps": 8680, "loss": 0.7809139490127563, "lr": 1.0635813823835692e-06, "epoch": 1.0117511520737328, "percentage": 50.59, "elapsed_time": "6:00:38", "remaining_time": "5:52:15"} +{"current_steps": 4392, "total_steps": 8680, "loss": 0.799081563949585, "lr": 1.0632011648104155e-06, "epoch": 1.0119815668202765, "percentage": 50.6, "elapsed_time": "6:00:42", "remaining_time": "5:52:10"} +{"current_steps": 4393, "total_steps": 8680, "loss": 0.7738279104232788, "lr": 1.062820938063696e-06, "epoch": 1.0122119815668202, "percentage": 50.61, "elapsed_time": "6:00:47", "remaining_time": "5:52:05"} +{"current_steps": 4394, "total_steps": 8680, "loss": 0.895797610282898, "lr": 1.0624407021986007e-06, "epoch": 1.012442396313364, "percentage": 50.62, "elapsed_time": "6:00:51", "remaining_time": "5:51:59"} +{"current_steps": 4395, "total_steps": 8680, "loss": 0.6887848973274231, "lr": 1.0620604572703198e-06, "epoch": 1.012672811059908, "percentage": 50.63, "elapsed_time": "6:00:57", "remaining_time": "5:51:55"} +{"current_steps": 4396, "total_steps": 8680, "loss": 0.9540888071060181, "lr": 1.0616802033340457e-06, "epoch": 1.0129032258064516, "percentage": 50.65, "elapsed_time": "6:01:01", "remaining_time": "5:51:49"} +{"current_steps": 4397, "total_steps": 8680, "loss": 0.9047783017158508, "lr": 1.0612999404449721e-06, "epoch": 1.0131336405529954, "percentage": 50.66, "elapsed_time": "6:01:05", "remaining_time": "5:51:43"} +{"current_steps": 4398, "total_steps": 8680, "loss": 0.7030448913574219, "lr": 1.0609196686582931e-06, "epoch": 1.013364055299539, "percentage": 50.67, "elapsed_time": "6:01:09", "remaining_time": "5:51:38"} +{"current_steps": 4399, "total_steps": 8680, "loss": 0.8097348213195801, "lr": 1.0605393880292046e-06, "epoch": 1.013594470046083, "percentage": 50.68, "elapsed_time": "6:01:15", "remaining_time": "5:51:33"} +{"current_steps": 4400, "total_steps": 8680, "loss": 0.7446185350418091, "lr": 1.0601590986129045e-06, "epoch": 1.0138248847926268, "percentage": 50.69, "elapsed_time": "6:01:20", "remaining_time": "5:51:28"} +{"current_steps": 4401, "total_steps": 8680, "loss": 0.7450964450836182, "lr": 1.0597788004645908e-06, "epoch": 1.0140552995391705, "percentage": 50.7, "elapsed_time": "6:01:27", "remaining_time": "5:51:26"} +{"current_steps": 4402, "total_steps": 8680, "loss": 0.8326355218887329, "lr": 1.0593984936394632e-06, "epoch": 1.0142857142857142, "percentage": 50.71, "elapsed_time": "6:01:32", "remaining_time": "5:51:21"} +{"current_steps": 4403, "total_steps": 8680, "loss": 0.7013953924179077, "lr": 1.0590181781927227e-06, "epoch": 1.014516129032258, "percentage": 50.73, "elapsed_time": "6:01:38", "remaining_time": "5:51:17"} +{"current_steps": 4404, "total_steps": 8680, "loss": 0.7806364297866821, "lr": 1.0586378541795723e-06, "epoch": 1.014746543778802, "percentage": 50.74, "elapsed_time": "6:01:44", "remaining_time": "5:51:13"} +{"current_steps": 4405, "total_steps": 8680, "loss": 0.8207389116287231, "lr": 1.0582575216552146e-06, "epoch": 1.0149769585253456, "percentage": 50.75, "elapsed_time": "6:01:49", "remaining_time": "5:51:08"} +{"current_steps": 4406, "total_steps": 8680, "loss": 0.8042873740196228, "lr": 1.0578771806748545e-06, "epoch": 1.0152073732718894, "percentage": 50.76, "elapsed_time": "6:01:53", "remaining_time": "5:51:02"} +{"current_steps": 4407, "total_steps": 8680, "loss": 0.7225071787834167, "lr": 1.057496831293699e-06, "epoch": 1.015437788018433, "percentage": 50.77, "elapsed_time": "6:01:58", "remaining_time": "5:50:57"} +{"current_steps": 4408, "total_steps": 8680, "loss": 0.7783743143081665, "lr": 1.0571164735669538e-06, "epoch": 1.015668202764977, "percentage": 50.78, "elapsed_time": "6:02:04", "remaining_time": "5:50:53"} +{"current_steps": 4409, "total_steps": 8680, "loss": 0.7455039024353027, "lr": 1.0567361075498286e-06, "epoch": 1.0158986175115208, "percentage": 50.79, "elapsed_time": "6:02:08", "remaining_time": "5:50:48"} +{"current_steps": 4410, "total_steps": 8680, "loss": 0.7819615602493286, "lr": 1.0563557332975322e-06, "epoch": 1.0161290322580645, "percentage": 50.81, "elapsed_time": "6:02:14", "remaining_time": "5:50:44"} +{"current_steps": 4411, "total_steps": 8680, "loss": 0.6466404795646667, "lr": 1.0559753508652758e-06, "epoch": 1.0163594470046082, "percentage": 50.82, "elapsed_time": "6:02:19", "remaining_time": "5:50:39"} +{"current_steps": 4412, "total_steps": 8680, "loss": 0.8728539943695068, "lr": 1.0555949603082715e-06, "epoch": 1.0165898617511522, "percentage": 50.83, "elapsed_time": "6:02:23", "remaining_time": "5:50:34"} +{"current_steps": 4413, "total_steps": 8680, "loss": 0.6082659959793091, "lr": 1.055214561681732e-06, "epoch": 1.016820276497696, "percentage": 50.84, "elapsed_time": "6:02:29", "remaining_time": "5:50:30"} +{"current_steps": 4414, "total_steps": 8680, "loss": 0.8429103493690491, "lr": 1.054834155040872e-06, "epoch": 1.0170506912442396, "percentage": 50.85, "elapsed_time": "6:02:34", "remaining_time": "5:50:25"} +{"current_steps": 4415, "total_steps": 8680, "loss": 0.7953135967254639, "lr": 1.0544537404409073e-06, "epoch": 1.0172811059907834, "percentage": 50.86, "elapsed_time": "6:02:40", "remaining_time": "5:50:21"} +{"current_steps": 4416, "total_steps": 8680, "loss": 0.7243527173995972, "lr": 1.0540733179370542e-06, "epoch": 1.017511520737327, "percentage": 50.88, "elapsed_time": "6:02:47", "remaining_time": "5:50:18"} +{"current_steps": 4417, "total_steps": 8680, "loss": 0.6882613897323608, "lr": 1.0536928875845303e-06, "epoch": 1.017741935483871, "percentage": 50.89, "elapsed_time": "6:02:52", "remaining_time": "5:50:13"} +{"current_steps": 4418, "total_steps": 8680, "loss": 0.9157286882400513, "lr": 1.053312449438555e-06, "epoch": 1.0179723502304148, "percentage": 50.9, "elapsed_time": "6:02:58", "remaining_time": "5:50:09"} +{"current_steps": 4419, "total_steps": 8680, "loss": 0.7224643230438232, "lr": 1.0529320035543482e-06, "epoch": 1.0182027649769585, "percentage": 50.91, "elapsed_time": "6:03:02", "remaining_time": "5:50:03"} +{"current_steps": 4420, "total_steps": 8680, "loss": 0.874829888343811, "lr": 1.0525515499871311e-06, "epoch": 1.0184331797235022, "percentage": 50.92, "elapsed_time": "6:03:08", "remaining_time": "5:49:59"} +{"current_steps": 4421, "total_steps": 8680, "loss": 0.6911267042160034, "lr": 1.0521710887921262e-06, "epoch": 1.0186635944700462, "percentage": 50.93, "elapsed_time": "6:03:14", "remaining_time": "5:49:55"} +{"current_steps": 4422, "total_steps": 8680, "loss": 0.9065574407577515, "lr": 1.051790620024557e-06, "epoch": 1.01889400921659, "percentage": 50.94, "elapsed_time": "6:03:19", "remaining_time": "5:49:50"} +{"current_steps": 4423, "total_steps": 8680, "loss": 0.7671108245849609, "lr": 1.0514101437396474e-06, "epoch": 1.0191244239631336, "percentage": 50.96, "elapsed_time": "6:03:24", "remaining_time": "5:49:46"} +{"current_steps": 4424, "total_steps": 8680, "loss": 0.8706510066986084, "lr": 1.051029659992624e-06, "epoch": 1.0193548387096774, "percentage": 50.97, "elapsed_time": "6:03:31", "remaining_time": "5:49:43"} +{"current_steps": 4425, "total_steps": 8680, "loss": 0.741087794303894, "lr": 1.0506491688387128e-06, "epoch": 1.019585253456221, "percentage": 50.98, "elapsed_time": "6:03:36", "remaining_time": "5:49:38"} +{"current_steps": 4426, "total_steps": 8680, "loss": 0.8045330047607422, "lr": 1.0502686703331419e-06, "epoch": 1.019815668202765, "percentage": 50.99, "elapsed_time": "6:03:41", "remaining_time": "5:49:33"} +{"current_steps": 4427, "total_steps": 8680, "loss": 0.8464969992637634, "lr": 1.0498881645311398e-06, "epoch": 1.0200460829493088, "percentage": 51.0, "elapsed_time": "6:03:45", "remaining_time": "5:49:28"} +{"current_steps": 4428, "total_steps": 8680, "loss": 0.7660650610923767, "lr": 1.0495076514879367e-06, "epoch": 1.0202764976958525, "percentage": 51.01, "elapsed_time": "6:03:50", "remaining_time": "5:49:23"} +{"current_steps": 4429, "total_steps": 8680, "loss": 0.8565669059753418, "lr": 1.0491271312587636e-06, "epoch": 1.0205069124423962, "percentage": 51.03, "elapsed_time": "6:03:55", "remaining_time": "5:49:18"} +{"current_steps": 4430, "total_steps": 8680, "loss": 0.8884295225143433, "lr": 1.0487466038988525e-06, "epoch": 1.0207373271889402, "percentage": 51.04, "elapsed_time": "6:04:00", "remaining_time": "5:49:13"} +{"current_steps": 4431, "total_steps": 8680, "loss": 0.7300036549568176, "lr": 1.0483660694634361e-06, "epoch": 1.020967741935484, "percentage": 51.05, "elapsed_time": "6:04:04", "remaining_time": "5:49:07"} +{"current_steps": 4432, "total_steps": 8680, "loss": 0.7879898548126221, "lr": 1.0479855280077493e-06, "epoch": 1.0211981566820276, "percentage": 51.06, "elapsed_time": "6:04:08", "remaining_time": "5:49:01"} +{"current_steps": 4433, "total_steps": 8680, "loss": 0.9811698198318481, "lr": 1.0476049795870263e-06, "epoch": 1.0214285714285714, "percentage": 51.07, "elapsed_time": "6:04:13", "remaining_time": "5:48:56"} +{"current_steps": 4434, "total_steps": 8680, "loss": 0.7706241607666016, "lr": 1.0472244242565034e-06, "epoch": 1.0216589861751153, "percentage": 51.08, "elapsed_time": "6:04:18", "remaining_time": "5:48:52"} +{"current_steps": 4435, "total_steps": 8680, "loss": 0.761093020439148, "lr": 1.046843862071418e-06, "epoch": 1.021889400921659, "percentage": 51.09, "elapsed_time": "6:04:23", "remaining_time": "5:48:46"} +{"current_steps": 4436, "total_steps": 8680, "loss": 0.8306092619895935, "lr": 1.046463293087008e-06, "epoch": 1.0221198156682028, "percentage": 51.11, "elapsed_time": "6:04:27", "remaining_time": "5:48:41"} +{"current_steps": 4437, "total_steps": 8680, "loss": 0.9669788479804993, "lr": 1.0460827173585125e-06, "epoch": 1.0223502304147465, "percentage": 51.12, "elapsed_time": "6:04:32", "remaining_time": "5:48:35"} +{"current_steps": 4438, "total_steps": 8680, "loss": 0.8461639285087585, "lr": 1.0457021349411715e-06, "epoch": 1.0225806451612902, "percentage": 51.13, "elapsed_time": "6:04:36", "remaining_time": "5:48:30"} +{"current_steps": 4439, "total_steps": 8680, "loss": 0.7230383157730103, "lr": 1.0453215458902262e-06, "epoch": 1.0228110599078342, "percentage": 51.14, "elapsed_time": "6:04:42", "remaining_time": "5:48:26"} +{"current_steps": 4440, "total_steps": 8680, "loss": 0.7506514191627502, "lr": 1.0449409502609186e-06, "epoch": 1.023041474654378, "percentage": 51.15, "elapsed_time": "6:04:47", "remaining_time": "5:48:21"} +{"current_steps": 4441, "total_steps": 8680, "loss": 0.7530048489570618, "lr": 1.0445603481084914e-06, "epoch": 1.0232718894009216, "percentage": 51.16, "elapsed_time": "6:04:53", "remaining_time": "5:48:17"} +{"current_steps": 4442, "total_steps": 8680, "loss": 0.8402249813079834, "lr": 1.044179739488189e-06, "epoch": 1.0235023041474653, "percentage": 51.18, "elapsed_time": "6:04:58", "remaining_time": "5:48:12"} +{"current_steps": 4443, "total_steps": 8680, "loss": 0.7661963701248169, "lr": 1.0437991244552557e-06, "epoch": 1.0237327188940093, "percentage": 51.19, "elapsed_time": "6:05:02", "remaining_time": "5:48:06"} +{"current_steps": 4444, "total_steps": 8680, "loss": 0.7982668876647949, "lr": 1.043418503064937e-06, "epoch": 1.023963133640553, "percentage": 51.2, "elapsed_time": "6:05:07", "remaining_time": "5:48:01"} +{"current_steps": 4445, "total_steps": 8680, "loss": 0.899538516998291, "lr": 1.0430378753724807e-06, "epoch": 1.0241935483870968, "percentage": 51.21, "elapsed_time": "6:05:11", "remaining_time": "5:47:55"} +{"current_steps": 4446, "total_steps": 8680, "loss": 0.8027441501617432, "lr": 1.0426572414331337e-06, "epoch": 1.0244239631336405, "percentage": 51.22, "elapsed_time": "6:05:16", "remaining_time": "5:47:51"} +{"current_steps": 4447, "total_steps": 8680, "loss": 0.8575221300125122, "lr": 1.0422766013021442e-06, "epoch": 1.0246543778801844, "percentage": 51.23, "elapsed_time": "6:05:20", "remaining_time": "5:47:45"} +{"current_steps": 4448, "total_steps": 8680, "loss": 0.7001699209213257, "lr": 1.0418959550347622e-06, "epoch": 1.0248847926267282, "percentage": 51.24, "elapsed_time": "6:05:26", "remaining_time": "5:47:41"} +{"current_steps": 4449, "total_steps": 8680, "loss": 0.9296507835388184, "lr": 1.041515302686238e-06, "epoch": 1.0251152073732719, "percentage": 51.26, "elapsed_time": "6:05:30", "remaining_time": "5:47:35"} +{"current_steps": 4450, "total_steps": 8680, "loss": 0.8214550018310547, "lr": 1.0411346443118222e-06, "epoch": 1.0253456221198156, "percentage": 51.27, "elapsed_time": "6:05:34", "remaining_time": "5:47:29"} +{"current_steps": 4451, "total_steps": 8680, "loss": 0.7598673701286316, "lr": 1.0407539799667673e-06, "epoch": 1.0255760368663593, "percentage": 51.28, "elapsed_time": "6:05:39", "remaining_time": "5:47:25"} +{"current_steps": 4452, "total_steps": 8680, "loss": 0.8222990036010742, "lr": 1.0403733097063265e-06, "epoch": 1.0258064516129033, "percentage": 51.29, "elapsed_time": "6:05:46", "remaining_time": "5:47:21"} +{"current_steps": 4453, "total_steps": 8680, "loss": 0.7860872745513916, "lr": 1.039992633585753e-06, "epoch": 1.026036866359447, "percentage": 51.3, "elapsed_time": "6:05:51", "remaining_time": "5:47:17"} +{"current_steps": 4454, "total_steps": 8680, "loss": 0.6602796912193298, "lr": 1.0396119516603018e-06, "epoch": 1.0262672811059907, "percentage": 51.31, "elapsed_time": "6:05:56", "remaining_time": "5:47:12"} +{"current_steps": 4455, "total_steps": 8680, "loss": 0.554654598236084, "lr": 1.0392312639852278e-06, "epoch": 1.0264976958525345, "percentage": 51.32, "elapsed_time": "6:06:01", "remaining_time": "5:47:07"} +{"current_steps": 4456, "total_steps": 8680, "loss": 0.7977210879325867, "lr": 1.0388505706157885e-06, "epoch": 1.0267281105990784, "percentage": 51.34, "elapsed_time": "6:06:05", "remaining_time": "5:47:02"} +{"current_steps": 4457, "total_steps": 8680, "loss": 0.8770938515663147, "lr": 1.0384698716072398e-06, "epoch": 1.0269585253456222, "percentage": 51.35, "elapsed_time": "6:06:11", "remaining_time": "5:46:58"} +{"current_steps": 4458, "total_steps": 8680, "loss": 0.710452675819397, "lr": 1.0380891670148403e-06, "epoch": 1.0271889400921659, "percentage": 51.36, "elapsed_time": "6:06:17", "remaining_time": "5:46:53"} +{"current_steps": 4459, "total_steps": 8680, "loss": 0.8876768946647644, "lr": 1.0377084568938485e-06, "epoch": 1.0274193548387096, "percentage": 51.37, "elapsed_time": "6:06:21", "remaining_time": "5:46:48"} +{"current_steps": 4460, "total_steps": 8680, "loss": 0.7770971059799194, "lr": 1.0373277412995241e-06, "epoch": 1.0276497695852536, "percentage": 51.38, "elapsed_time": "6:06:25", "remaining_time": "5:46:42"} +{"current_steps": 4461, "total_steps": 8680, "loss": 0.9199050068855286, "lr": 1.0369470202871275e-06, "epoch": 1.0278801843317973, "percentage": 51.39, "elapsed_time": "6:06:30", "remaining_time": "5:46:37"} +{"current_steps": 4462, "total_steps": 8680, "loss": 0.7931548357009888, "lr": 1.0365662939119199e-06, "epoch": 1.028110599078341, "percentage": 51.41, "elapsed_time": "6:06:36", "remaining_time": "5:46:33"} +{"current_steps": 4463, "total_steps": 8680, "loss": 0.7484941482543945, "lr": 1.0361855622291636e-06, "epoch": 1.0283410138248847, "percentage": 51.42, "elapsed_time": "6:06:43", "remaining_time": "5:46:30"} +{"current_steps": 4464, "total_steps": 8680, "loss": 0.7639475464820862, "lr": 1.03580482529412e-06, "epoch": 1.0285714285714285, "percentage": 51.43, "elapsed_time": "6:06:48", "remaining_time": "5:46:25"} +{"current_steps": 4465, "total_steps": 8680, "loss": 0.7705268859863281, "lr": 1.035424083162054e-06, "epoch": 1.0288018433179724, "percentage": 51.44, "elapsed_time": "6:06:54", "remaining_time": "5:46:21"} +{"current_steps": 4466, "total_steps": 8680, "loss": 0.7714117169380188, "lr": 1.0350433358882288e-06, "epoch": 1.0290322580645161, "percentage": 51.45, "elapsed_time": "6:07:01", "remaining_time": "5:46:19"} +{"current_steps": 4467, "total_steps": 8680, "loss": 0.851073145866394, "lr": 1.0346625835279102e-06, "epoch": 1.0292626728110599, "percentage": 51.46, "elapsed_time": "6:07:08", "remaining_time": "5:46:15"} +{"current_steps": 4468, "total_steps": 8680, "loss": 0.8001583218574524, "lr": 1.0342818261363631e-06, "epoch": 1.0294930875576036, "percentage": 51.47, "elapsed_time": "6:07:14", "remaining_time": "5:46:11"} +{"current_steps": 4469, "total_steps": 8680, "loss": 0.8352588415145874, "lr": 1.0339010637688547e-06, "epoch": 1.0297235023041476, "percentage": 51.49, "elapsed_time": "6:07:19", "remaining_time": "5:46:07"} +{"current_steps": 4470, "total_steps": 8680, "loss": 0.8136032223701477, "lr": 1.0335202964806515e-06, "epoch": 1.0299539170506913, "percentage": 51.5, "elapsed_time": "6:07:23", "remaining_time": "5:46:01"} +{"current_steps": 4471, "total_steps": 8680, "loss": 0.8041108846664429, "lr": 1.0331395243270215e-06, "epoch": 1.030184331797235, "percentage": 51.51, "elapsed_time": "6:07:28", "remaining_time": "5:45:56"} +{"current_steps": 4472, "total_steps": 8680, "loss": 0.6961067914962769, "lr": 1.032758747363234e-06, "epoch": 1.0304147465437787, "percentage": 51.52, "elapsed_time": "6:07:35", "remaining_time": "5:45:53"} +{"current_steps": 4473, "total_steps": 8680, "loss": 0.8063983917236328, "lr": 1.0323779656445572e-06, "epoch": 1.0306451612903227, "percentage": 51.53, "elapsed_time": "6:07:42", "remaining_time": "5:45:50"} +{"current_steps": 4474, "total_steps": 8680, "loss": 0.706061601638794, "lr": 1.0319971792262618e-06, "epoch": 1.0308755760368664, "percentage": 51.54, "elapsed_time": "6:07:47", "remaining_time": "5:45:46"} +{"current_steps": 4475, "total_steps": 8680, "loss": 0.8510581254959106, "lr": 1.0316163881636181e-06, "epoch": 1.0311059907834101, "percentage": 51.56, "elapsed_time": "6:07:53", "remaining_time": "5:45:41"} +{"current_steps": 4476, "total_steps": 8680, "loss": 0.7169028520584106, "lr": 1.0312355925118975e-06, "epoch": 1.0313364055299539, "percentage": 51.57, "elapsed_time": "6:07:57", "remaining_time": "5:45:35"} +{"current_steps": 4477, "total_steps": 8680, "loss": 0.7513360977172852, "lr": 1.0308547923263718e-06, "epoch": 1.0315668202764976, "percentage": 51.58, "elapsed_time": "6:08:04", "remaining_time": "5:45:32"} +{"current_steps": 4478, "total_steps": 8680, "loss": 0.7408783435821533, "lr": 1.030473987662314e-06, "epoch": 1.0317972350230415, "percentage": 51.59, "elapsed_time": "6:08:09", "remaining_time": "5:45:28"} +{"current_steps": 4479, "total_steps": 8680, "loss": 0.8177747130393982, "lr": 1.0300931785749974e-06, "epoch": 1.0320276497695853, "percentage": 51.6, "elapsed_time": "6:08:15", "remaining_time": "5:45:23"} +{"current_steps": 4480, "total_steps": 8680, "loss": 0.7530791759490967, "lr": 1.0297123651196954e-06, "epoch": 1.032258064516129, "percentage": 51.61, "elapsed_time": "6:08:19", "remaining_time": "5:45:18"} +{"current_steps": 4481, "total_steps": 8680, "loss": 0.7958859205245972, "lr": 1.0293315473516832e-06, "epoch": 1.0324884792626727, "percentage": 51.62, "elapsed_time": "6:08:24", "remaining_time": "5:45:13"} +{"current_steps": 4482, "total_steps": 8680, "loss": 0.8719943761825562, "lr": 1.0289507253262357e-06, "epoch": 1.0327188940092167, "percentage": 51.64, "elapsed_time": "6:08:29", "remaining_time": "5:45:08"} +{"current_steps": 4483, "total_steps": 8680, "loss": 0.7584139108657837, "lr": 1.028569899098629e-06, "epoch": 1.0329493087557604, "percentage": 51.65, "elapsed_time": "6:08:33", "remaining_time": "5:45:02"} +{"current_steps": 4484, "total_steps": 8680, "loss": 0.852983832359314, "lr": 1.0281890687241387e-06, "epoch": 1.0331797235023041, "percentage": 51.66, "elapsed_time": "6:08:38", "remaining_time": "5:44:57"} +{"current_steps": 4485, "total_steps": 8680, "loss": 0.7455692291259766, "lr": 1.027808234258043e-06, "epoch": 1.0334101382488479, "percentage": 51.67, "elapsed_time": "6:08:43", "remaining_time": "5:44:52"} +{"current_steps": 4486, "total_steps": 8680, "loss": 0.7078343629837036, "lr": 1.0274273957556185e-06, "epoch": 1.0336405529953918, "percentage": 51.68, "elapsed_time": "6:08:47", "remaining_time": "5:44:46"} +{"current_steps": 4487, "total_steps": 8680, "loss": 0.7580842971801758, "lr": 1.027046553272144e-06, "epoch": 1.0338709677419355, "percentage": 51.69, "elapsed_time": "6:08:51", "remaining_time": "5:44:41"} +{"current_steps": 4488, "total_steps": 8680, "loss": 0.7271389961242676, "lr": 1.026665706862898e-06, "epoch": 1.0341013824884793, "percentage": 51.71, "elapsed_time": "6:08:55", "remaining_time": "5:44:35"} +{"current_steps": 4489, "total_steps": 8680, "loss": 0.8271546363830566, "lr": 1.0262848565831599e-06, "epoch": 1.034331797235023, "percentage": 51.72, "elapsed_time": "6:09:01", "remaining_time": "5:44:31"} +{"current_steps": 4490, "total_steps": 8680, "loss": 0.6799920201301575, "lr": 1.0259040024882098e-06, "epoch": 1.0345622119815667, "percentage": 51.73, "elapsed_time": "6:09:05", "remaining_time": "5:44:26"} +{"current_steps": 4491, "total_steps": 8680, "loss": 0.6962645053863525, "lr": 1.0255231446333277e-06, "epoch": 1.0347926267281107, "percentage": 51.74, "elapsed_time": "6:09:11", "remaining_time": "5:44:22"} +{"current_steps": 4492, "total_steps": 8680, "loss": 0.8722797632217407, "lr": 1.0251422830737955e-06, "epoch": 1.0350230414746544, "percentage": 51.75, "elapsed_time": "6:09:16", "remaining_time": "5:44:16"} +{"current_steps": 4493, "total_steps": 8680, "loss": 0.8054880499839783, "lr": 1.024761417864894e-06, "epoch": 1.0352534562211981, "percentage": 51.76, "elapsed_time": "6:09:20", "remaining_time": "5:44:11"} +{"current_steps": 4494, "total_steps": 8680, "loss": 0.8196548223495483, "lr": 1.0243805490619053e-06, "epoch": 1.0354838709677419, "percentage": 51.77, "elapsed_time": "6:09:26", "remaining_time": "5:44:07"} +{"current_steps": 4495, "total_steps": 8680, "loss": 0.8197275400161743, "lr": 1.0239996767201122e-06, "epoch": 1.0357142857142858, "percentage": 51.79, "elapsed_time": "6:09:31", "remaining_time": "5:44:02"} +{"current_steps": 4496, "total_steps": 8680, "loss": 0.7704858779907227, "lr": 1.0236188008947978e-06, "epoch": 1.0359447004608295, "percentage": 51.8, "elapsed_time": "6:09:35", "remaining_time": "5:43:56"} +{"current_steps": 4497, "total_steps": 8680, "loss": 0.8296232223510742, "lr": 1.0232379216412459e-06, "epoch": 1.0361751152073733, "percentage": 51.81, "elapsed_time": "6:09:40", "remaining_time": "5:43:52"} +{"current_steps": 4498, "total_steps": 8680, "loss": 0.6546601057052612, "lr": 1.0228570390147404e-06, "epoch": 1.036405529953917, "percentage": 51.82, "elapsed_time": "6:09:45", "remaining_time": "5:43:46"} +{"current_steps": 4499, "total_steps": 8680, "loss": 0.808987021446228, "lr": 1.0224761530705656e-06, "epoch": 1.036635944700461, "percentage": 51.83, "elapsed_time": "6:09:49", "remaining_time": "5:43:41"} +{"current_steps": 4500, "total_steps": 8680, "loss": 0.862627387046814, "lr": 1.0220952638640073e-06, "epoch": 1.0368663594470047, "percentage": 51.84, "elapsed_time": "6:09:56", "remaining_time": "5:43:37"} +{"current_steps": 4501, "total_steps": 8680, "loss": 0.781114935874939, "lr": 1.0217143714503507e-06, "epoch": 1.0370967741935484, "percentage": 51.85, "elapsed_time": "6:10:05", "remaining_time": "5:43:37"} +{"current_steps": 4502, "total_steps": 8680, "loss": 0.7186112403869629, "lr": 1.0213334758848814e-06, "epoch": 1.0373271889400921, "percentage": 51.87, "elapsed_time": "6:10:09", "remaining_time": "5:43:31"} +{"current_steps": 4503, "total_steps": 8680, "loss": 0.8112529516220093, "lr": 1.0209525772228868e-06, "epoch": 1.0375576036866359, "percentage": 51.88, "elapsed_time": "6:10:14", "remaining_time": "5:43:26"} +{"current_steps": 4504, "total_steps": 8680, "loss": 0.7364751100540161, "lr": 1.020571675519653e-06, "epoch": 1.0377880184331798, "percentage": 51.89, "elapsed_time": "6:10:19", "remaining_time": "5:43:21"} +{"current_steps": 4505, "total_steps": 8680, "loss": 0.7015886902809143, "lr": 1.0201907708304681e-06, "epoch": 1.0380184331797235, "percentage": 51.9, "elapsed_time": "6:10:23", "remaining_time": "5:43:15"} +{"current_steps": 4506, "total_steps": 8680, "loss": 0.7018470168113708, "lr": 1.0198098632106197e-06, "epoch": 1.0382488479262673, "percentage": 51.91, "elapsed_time": "6:10:29", "remaining_time": "5:43:11"} +{"current_steps": 4507, "total_steps": 8680, "loss": 0.820391058921814, "lr": 1.0194289527153953e-06, "epoch": 1.038479262672811, "percentage": 51.92, "elapsed_time": "6:10:33", "remaining_time": "5:43:06"} +{"current_steps": 4508, "total_steps": 8680, "loss": 0.8341129422187805, "lr": 1.0190480394000844e-06, "epoch": 1.038709677419355, "percentage": 51.94, "elapsed_time": "6:10:38", "remaining_time": "5:43:00"} +{"current_steps": 4509, "total_steps": 8680, "loss": 0.7345695495605469, "lr": 1.0186671233199757e-06, "epoch": 1.0389400921658987, "percentage": 51.95, "elapsed_time": "6:10:42", "remaining_time": "5:42:55"} +{"current_steps": 4510, "total_steps": 8680, "loss": 0.8899500370025635, "lr": 1.0182862045303589e-06, "epoch": 1.0391705069124424, "percentage": 51.96, "elapsed_time": "6:10:47", "remaining_time": "5:42:49"} +{"current_steps": 4511, "total_steps": 8680, "loss": 0.8158663511276245, "lr": 1.0179052830865238e-06, "epoch": 1.0394009216589861, "percentage": 51.97, "elapsed_time": "6:10:52", "remaining_time": "5:42:45"} +{"current_steps": 4512, "total_steps": 8680, "loss": 0.734848141670227, "lr": 1.0175243590437604e-06, "epoch": 1.0396313364055298, "percentage": 51.98, "elapsed_time": "6:10:58", "remaining_time": "5:42:41"} +{"current_steps": 4513, "total_steps": 8680, "loss": 0.7920876741409302, "lr": 1.0171434324573596e-06, "epoch": 1.0398617511520738, "percentage": 51.99, "elapsed_time": "6:11:03", "remaining_time": "5:42:36"} +{"current_steps": 4514, "total_steps": 8680, "loss": 0.9224791526794434, "lr": 1.0167625033826122e-06, "epoch": 1.0400921658986175, "percentage": 52.0, "elapsed_time": "6:11:07", "remaining_time": "5:42:30"} +{"current_steps": 4515, "total_steps": 8680, "loss": 0.7086025476455688, "lr": 1.0163815718748096e-06, "epoch": 1.0403225806451613, "percentage": 52.02, "elapsed_time": "6:11:12", "remaining_time": "5:42:25"} +{"current_steps": 4516, "total_steps": 8680, "loss": 0.7657936811447144, "lr": 1.0160006379892434e-06, "epoch": 1.040552995391705, "percentage": 52.03, "elapsed_time": "6:11:17", "remaining_time": "5:42:20"} +{"current_steps": 4517, "total_steps": 8680, "loss": 0.786298394203186, "lr": 1.0156197017812058e-06, "epoch": 1.040783410138249, "percentage": 52.04, "elapsed_time": "6:11:22", "remaining_time": "5:42:15"} +{"current_steps": 4518, "total_steps": 8680, "loss": 0.8667294979095459, "lr": 1.0152387633059895e-06, "epoch": 1.0410138248847927, "percentage": 52.05, "elapsed_time": "6:11:27", "remaining_time": "5:42:11"} +{"current_steps": 4519, "total_steps": 8680, "loss": 0.8479517102241516, "lr": 1.0148578226188866e-06, "epoch": 1.0412442396313364, "percentage": 52.06, "elapsed_time": "6:11:32", "remaining_time": "5:42:06"} +{"current_steps": 4520, "total_steps": 8680, "loss": 0.6430692076683044, "lr": 1.0144768797751904e-06, "epoch": 1.0414746543778801, "percentage": 52.07, "elapsed_time": "6:11:37", "remaining_time": "5:42:01"} +{"current_steps": 4521, "total_steps": 8680, "loss": 0.874313473701477, "lr": 1.0140959348301946e-06, "epoch": 1.041705069124424, "percentage": 52.09, "elapsed_time": "6:11:43", "remaining_time": "5:41:57"} +{"current_steps": 4522, "total_steps": 8680, "loss": 0.8439676761627197, "lr": 1.013714987839192e-06, "epoch": 1.0419354838709678, "percentage": 52.1, "elapsed_time": "6:11:47", "remaining_time": "5:41:52"} +{"current_steps": 4523, "total_steps": 8680, "loss": 0.7480089664459229, "lr": 1.0133340388574774e-06, "epoch": 1.0421658986175115, "percentage": 52.11, "elapsed_time": "6:11:53", "remaining_time": "5:41:47"} +{"current_steps": 4524, "total_steps": 8680, "loss": 0.8786139488220215, "lr": 1.012953087940345e-06, "epoch": 1.0423963133640552, "percentage": 52.12, "elapsed_time": "6:11:58", "remaining_time": "5:41:42"} +{"current_steps": 4525, "total_steps": 8680, "loss": 0.8333299160003662, "lr": 1.0125721351430885e-06, "epoch": 1.042626728110599, "percentage": 52.13, "elapsed_time": "6:12:03", "remaining_time": "5:41:38"} +{"current_steps": 4526, "total_steps": 8680, "loss": 0.8201998472213745, "lr": 1.0121911805210032e-06, "epoch": 1.042857142857143, "percentage": 52.14, "elapsed_time": "6:12:08", "remaining_time": "5:41:33"} +{"current_steps": 4527, "total_steps": 8680, "loss": 0.7793110609054565, "lr": 1.0118102241293847e-06, "epoch": 1.0430875576036867, "percentage": 52.15, "elapsed_time": "6:12:14", "remaining_time": "5:41:29"} +{"current_steps": 4528, "total_steps": 8680, "loss": 0.7148817777633667, "lr": 1.0114292660235272e-06, "epoch": 1.0433179723502304, "percentage": 52.17, "elapsed_time": "6:12:19", "remaining_time": "5:41:24"} +{"current_steps": 4529, "total_steps": 8680, "loss": 0.7945176362991333, "lr": 1.011048306258727e-06, "epoch": 1.043548387096774, "percentage": 52.18, "elapsed_time": "6:12:25", "remaining_time": "5:41:20"} +{"current_steps": 4530, "total_steps": 8680, "loss": 0.7246826887130737, "lr": 1.01066734489028e-06, "epoch": 1.043778801843318, "percentage": 52.19, "elapsed_time": "6:12:30", "remaining_time": "5:41:15"} +{"current_steps": 4531, "total_steps": 8680, "loss": 0.7342358827590942, "lr": 1.0102863819734822e-06, "epoch": 1.0440092165898618, "percentage": 52.2, "elapsed_time": "6:12:34", "remaining_time": "5:41:10"} +{"current_steps": 4532, "total_steps": 8680, "loss": 0.6837234497070312, "lr": 1.0099054175636292e-06, "epoch": 1.0442396313364055, "percentage": 52.21, "elapsed_time": "6:12:39", "remaining_time": "5:41:05"} +{"current_steps": 4533, "total_steps": 8680, "loss": 0.6941408514976501, "lr": 1.0095244517160184e-06, "epoch": 1.0444700460829492, "percentage": 52.22, "elapsed_time": "6:12:46", "remaining_time": "5:41:01"} +{"current_steps": 4534, "total_steps": 8680, "loss": 0.7835201025009155, "lr": 1.009143484485946e-06, "epoch": 1.0447004608294932, "percentage": 52.24, "elapsed_time": "6:12:53", "remaining_time": "5:40:58"} +{"current_steps": 4535, "total_steps": 8680, "loss": 0.7887566089630127, "lr": 1.0087625159287086e-06, "epoch": 1.044930875576037, "percentage": 52.25, "elapsed_time": "6:12:56", "remaining_time": "5:40:52"} +{"current_steps": 4536, "total_steps": 8680, "loss": 0.7106727361679077, "lr": 1.0083815460996036e-06, "epoch": 1.0451612903225806, "percentage": 52.26, "elapsed_time": "6:13:02", "remaining_time": "5:40:48"} +{"current_steps": 4537, "total_steps": 8680, "loss": 0.8316382169723511, "lr": 1.0080005750539287e-06, "epoch": 1.0453917050691244, "percentage": 52.27, "elapsed_time": "6:13:07", "remaining_time": "5:40:43"} +{"current_steps": 4538, "total_steps": 8680, "loss": 0.7535592317581177, "lr": 1.0076196028469805e-06, "epoch": 1.045622119815668, "percentage": 52.28, "elapsed_time": "6:13:11", "remaining_time": "5:40:37"} +{"current_steps": 4539, "total_steps": 8680, "loss": 0.9255459308624268, "lr": 1.0072386295340571e-06, "epoch": 1.045852534562212, "percentage": 52.29, "elapsed_time": "6:13:15", "remaining_time": "5:40:31"} +{"current_steps": 4540, "total_steps": 8680, "loss": 0.7415009140968323, "lr": 1.0068576551704561e-06, "epoch": 1.0460829493087558, "percentage": 52.3, "elapsed_time": "6:13:20", "remaining_time": "5:40:26"} +{"current_steps": 4541, "total_steps": 8680, "loss": 0.673210620880127, "lr": 1.0064766798114758e-06, "epoch": 1.0463133640552995, "percentage": 52.32, "elapsed_time": "6:13:25", "remaining_time": "5:40:22"} +{"current_steps": 4542, "total_steps": 8680, "loss": 0.7063118815422058, "lr": 1.006095703512414e-06, "epoch": 1.0465437788018432, "percentage": 52.33, "elapsed_time": "6:13:31", "remaining_time": "5:40:18"} +{"current_steps": 4543, "total_steps": 8680, "loss": 0.73606276512146, "lr": 1.005714726328569e-06, "epoch": 1.0467741935483872, "percentage": 52.34, "elapsed_time": "6:13:38", "remaining_time": "5:40:14"} +{"current_steps": 4544, "total_steps": 8680, "loss": 0.6723713874816895, "lr": 1.005333748315239e-06, "epoch": 1.047004608294931, "percentage": 52.35, "elapsed_time": "6:13:43", "remaining_time": "5:40:10"} +{"current_steps": 4545, "total_steps": 8680, "loss": 0.643845796585083, "lr": 1.0049527695277223e-06, "epoch": 1.0472350230414746, "percentage": 52.36, "elapsed_time": "6:13:49", "remaining_time": "5:40:05"} +{"current_steps": 4546, "total_steps": 8680, "loss": 0.8820847272872925, "lr": 1.0045717900213175e-06, "epoch": 1.0474654377880184, "percentage": 52.37, "elapsed_time": "6:13:53", "remaining_time": "5:39:59"} +{"current_steps": 4547, "total_steps": 8680, "loss": 0.6555176973342896, "lr": 1.0041908098513239e-06, "epoch": 1.047695852534562, "percentage": 52.38, "elapsed_time": "6:13:57", "remaining_time": "5:39:54"} +{"current_steps": 4548, "total_steps": 8680, "loss": 0.8142974376678467, "lr": 1.0038098290730394e-06, "epoch": 1.047926267281106, "percentage": 52.4, "elapsed_time": "6:14:01", "remaining_time": "5:39:48"} +{"current_steps": 4549, "total_steps": 8680, "loss": 0.8107532262802124, "lr": 1.0034288477417634e-06, "epoch": 1.0481566820276498, "percentage": 52.41, "elapsed_time": "6:14:05", "remaining_time": "5:39:42"} +{"current_steps": 4550, "total_steps": 8680, "loss": 0.7078464031219482, "lr": 1.0030478659127947e-06, "epoch": 1.0483870967741935, "percentage": 52.42, "elapsed_time": "6:14:10", "remaining_time": "5:39:37"} +{"current_steps": 4551, "total_steps": 8680, "loss": 0.9168295860290527, "lr": 1.0026668836414322e-06, "epoch": 1.0486175115207372, "percentage": 52.43, "elapsed_time": "6:14:16", "remaining_time": "5:39:34"} +{"current_steps": 4552, "total_steps": 8680, "loss": 0.7384864091873169, "lr": 1.0022859009829752e-06, "epoch": 1.0488479262672812, "percentage": 52.44, "elapsed_time": "6:14:22", "remaining_time": "5:39:30"} +{"current_steps": 4553, "total_steps": 8680, "loss": 0.6092562675476074, "lr": 1.0019049179927229e-06, "epoch": 1.049078341013825, "percentage": 52.45, "elapsed_time": "6:14:29", "remaining_time": "5:39:27"} +{"current_steps": 4554, "total_steps": 8680, "loss": 0.713464617729187, "lr": 1.001523934725974e-06, "epoch": 1.0493087557603686, "percentage": 52.47, "elapsed_time": "6:14:34", "remaining_time": "5:39:22"} +{"current_steps": 4555, "total_steps": 8680, "loss": 0.7514123916625977, "lr": 1.001142951238028e-06, "epoch": 1.0495391705069124, "percentage": 52.48, "elapsed_time": "6:14:39", "remaining_time": "5:39:17"} +{"current_steps": 4556, "total_steps": 8680, "loss": 0.8092095851898193, "lr": 1.000761967584184e-06, "epoch": 1.0497695852534563, "percentage": 52.49, "elapsed_time": "6:14:43", "remaining_time": "5:39:11"} +{"current_steps": 4557, "total_steps": 8680, "loss": 0.7609254717826843, "lr": 1.000380983819742e-06, "epoch": 1.05, "percentage": 52.5, "elapsed_time": "6:14:49", "remaining_time": "5:39:07"} +{"current_steps": 4558, "total_steps": 8680, "loss": 0.8363404273986816, "lr": 1e-06, "epoch": 1.0502304147465438, "percentage": 52.51, "elapsed_time": "6:14:55", "remaining_time": "5:39:03"} +{"current_steps": 4559, "total_steps": 8680, "loss": 0.8139501810073853, "lr": 9.996190161802584e-07, "epoch": 1.0504608294930875, "percentage": 52.52, "elapsed_time": "6:15:01", "remaining_time": "5:38:59"} +{"current_steps": 4560, "total_steps": 8680, "loss": 0.8064978122711182, "lr": 9.992380324158157e-07, "epoch": 1.0506912442396312, "percentage": 52.53, "elapsed_time": "6:15:07", "remaining_time": "5:38:56"} +{"current_steps": 4561, "total_steps": 8680, "loss": 0.7162975072860718, "lr": 9.988570487619721e-07, "epoch": 1.0509216589861752, "percentage": 52.55, "elapsed_time": "6:15:13", "remaining_time": "5:38:51"} +{"current_steps": 4562, "total_steps": 8680, "loss": 0.9298074245452881, "lr": 9.984760652740261e-07, "epoch": 1.051152073732719, "percentage": 52.56, "elapsed_time": "6:15:19", "remaining_time": "5:38:47"} +{"current_steps": 4563, "total_steps": 8680, "loss": 0.6929144859313965, "lr": 9.980950820072773e-07, "epoch": 1.0513824884792626, "percentage": 52.57, "elapsed_time": "6:15:25", "remaining_time": "5:38:43"} +{"current_steps": 4564, "total_steps": 8680, "loss": 0.6516381502151489, "lr": 9.97714099017025e-07, "epoch": 1.0516129032258064, "percentage": 52.58, "elapsed_time": "6:15:32", "remaining_time": "5:38:40"} +{"current_steps": 4565, "total_steps": 8680, "loss": 0.864730715751648, "lr": 9.97333116358568e-07, "epoch": 1.0518433179723503, "percentage": 52.59, "elapsed_time": "6:15:37", "remaining_time": "5:38:36"} +{"current_steps": 4566, "total_steps": 8680, "loss": 0.7911246418952942, "lr": 9.969521340872052e-07, "epoch": 1.052073732718894, "percentage": 52.6, "elapsed_time": "6:15:42", "remaining_time": "5:38:31"} +{"current_steps": 4567, "total_steps": 8680, "loss": 0.7766593098640442, "lr": 9.965711522582367e-07, "epoch": 1.0523041474654378, "percentage": 52.62, "elapsed_time": "6:15:46", "remaining_time": "5:38:25"} +{"current_steps": 4568, "total_steps": 8680, "loss": 0.7703378200531006, "lr": 9.961901709269607e-07, "epoch": 1.0525345622119815, "percentage": 52.63, "elapsed_time": "6:15:52", "remaining_time": "5:38:21"} +{"current_steps": 4569, "total_steps": 8680, "loss": 0.7068926692008972, "lr": 9.958091901486762e-07, "epoch": 1.0527649769585254, "percentage": 52.64, "elapsed_time": "6:15:57", "remaining_time": "5:38:16"} +{"current_steps": 4570, "total_steps": 8680, "loss": 0.740556538105011, "lr": 9.954282099786824e-07, "epoch": 1.0529953917050692, "percentage": 52.65, "elapsed_time": "6:16:02", "remaining_time": "5:38:11"} +{"current_steps": 4571, "total_steps": 8680, "loss": 0.798403263092041, "lr": 9.950472304722778e-07, "epoch": 1.053225806451613, "percentage": 52.66, "elapsed_time": "6:16:05", "remaining_time": "5:38:04"} +{"current_steps": 4572, "total_steps": 8680, "loss": 0.6945887804031372, "lr": 9.94666251684761e-07, "epoch": 1.0534562211981566, "percentage": 52.67, "elapsed_time": "6:16:10", "remaining_time": "5:37:59"} +{"current_steps": 4573, "total_steps": 8680, "loss": 0.8257915377616882, "lr": 9.942852736714312e-07, "epoch": 1.0536866359447004, "percentage": 52.68, "elapsed_time": "6:16:14", "remaining_time": "5:37:54"} +{"current_steps": 4574, "total_steps": 8680, "loss": 0.751315712928772, "lr": 9.939042964875859e-07, "epoch": 1.0539170506912443, "percentage": 52.7, "elapsed_time": "6:16:19", "remaining_time": "5:37:48"} +{"current_steps": 4575, "total_steps": 8680, "loss": 0.6607721447944641, "lr": 9.935233201885241e-07, "epoch": 1.054147465437788, "percentage": 52.71, "elapsed_time": "6:16:24", "remaining_time": "5:37:44"} +{"current_steps": 4576, "total_steps": 8680, "loss": 0.9135023355484009, "lr": 9.931423448295438e-07, "epoch": 1.0543778801843318, "percentage": 52.72, "elapsed_time": "6:16:29", "remaining_time": "5:37:39"} +{"current_steps": 4577, "total_steps": 8680, "loss": 0.8238483667373657, "lr": 9.927613704659428e-07, "epoch": 1.0546082949308755, "percentage": 52.73, "elapsed_time": "6:16:33", "remaining_time": "5:37:33"} +{"current_steps": 4578, "total_steps": 8680, "loss": 0.7657001614570618, "lr": 9.923803971530196e-07, "epoch": 1.0548387096774194, "percentage": 52.74, "elapsed_time": "6:16:38", "remaining_time": "5:37:28"} +{"current_steps": 4579, "total_steps": 8680, "loss": 0.6360250115394592, "lr": 9.919994249460717e-07, "epoch": 1.0550691244239632, "percentage": 52.75, "elapsed_time": "6:16:45", "remaining_time": "5:37:25"} +{"current_steps": 4580, "total_steps": 8680, "loss": 0.6958763003349304, "lr": 9.916184539003963e-07, "epoch": 1.055299539170507, "percentage": 52.76, "elapsed_time": "6:16:49", "remaining_time": "5:37:20"} +{"current_steps": 4581, "total_steps": 8680, "loss": 0.7093038558959961, "lr": 9.912374840712915e-07, "epoch": 1.0555299539170506, "percentage": 52.78, "elapsed_time": "6:16:54", "remaining_time": "5:37:14"} +{"current_steps": 4582, "total_steps": 8680, "loss": 0.7641304731369019, "lr": 9.908565155140544e-07, "epoch": 1.0557603686635946, "percentage": 52.79, "elapsed_time": "6:16:59", "remaining_time": "5:37:09"} +{"current_steps": 4583, "total_steps": 8680, "loss": 0.7976446151733398, "lr": 9.904755482839817e-07, "epoch": 1.0559907834101383, "percentage": 52.8, "elapsed_time": "6:17:04", "remaining_time": "5:37:05"} +{"current_steps": 4584, "total_steps": 8680, "loss": 0.8407114744186401, "lr": 9.900945824363707e-07, "epoch": 1.056221198156682, "percentage": 52.81, "elapsed_time": "6:17:10", "remaining_time": "5:37:01"} +{"current_steps": 4585, "total_steps": 8680, "loss": 0.7988634705543518, "lr": 9.897136180265181e-07, "epoch": 1.0564516129032258, "percentage": 52.82, "elapsed_time": "6:17:15", "remaining_time": "5:36:56"} +{"current_steps": 4586, "total_steps": 8680, "loss": 0.7847359776496887, "lr": 9.893326551097198e-07, "epoch": 1.0566820276497695, "percentage": 52.83, "elapsed_time": "6:17:20", "remaining_time": "5:36:51"} +{"current_steps": 4587, "total_steps": 8680, "loss": 0.8458963632583618, "lr": 9.889516937412728e-07, "epoch": 1.0569124423963134, "percentage": 52.85, "elapsed_time": "6:17:25", "remaining_time": "5:36:46"} +{"current_steps": 4588, "total_steps": 8680, "loss": 0.8479788899421692, "lr": 9.88570733976473e-07, "epoch": 1.0571428571428572, "percentage": 52.86, "elapsed_time": "6:17:30", "remaining_time": "5:36:42"} +{"current_steps": 4589, "total_steps": 8680, "loss": 0.7467283010482788, "lr": 9.881897758706154e-07, "epoch": 1.057373271889401, "percentage": 52.87, "elapsed_time": "6:17:36", "remaining_time": "5:36:37"} +{"current_steps": 4590, "total_steps": 8680, "loss": 0.9400098323822021, "lr": 9.878088194789967e-07, "epoch": 1.0576036866359446, "percentage": 52.88, "elapsed_time": "6:17:41", "remaining_time": "5:36:32"} +{"current_steps": 4591, "total_steps": 8680, "loss": 0.8901257514953613, "lr": 9.874278648569118e-07, "epoch": 1.0578341013824886, "percentage": 52.89, "elapsed_time": "6:17:47", "remaining_time": "5:36:28"} +{"current_steps": 4592, "total_steps": 8680, "loss": 0.840053379535675, "lr": 9.870469120596552e-07, "epoch": 1.0580645161290323, "percentage": 52.9, "elapsed_time": "6:17:51", "remaining_time": "5:36:23"} +{"current_steps": 4593, "total_steps": 8680, "loss": 0.6825235486030579, "lr": 9.866659611425225e-07, "epoch": 1.058294930875576, "percentage": 52.91, "elapsed_time": "6:17:55", "remaining_time": "5:36:17"} +{"current_steps": 4594, "total_steps": 8680, "loss": 0.7783857583999634, "lr": 9.86285012160808e-07, "epoch": 1.0585253456221198, "percentage": 52.93, "elapsed_time": "6:18:00", "remaining_time": "5:36:12"} +{"current_steps": 4595, "total_steps": 8680, "loss": 0.7901174426078796, "lr": 9.859040651698055e-07, "epoch": 1.0587557603686637, "percentage": 52.94, "elapsed_time": "6:18:05", "remaining_time": "5:36:07"} +{"current_steps": 4596, "total_steps": 8680, "loss": 0.9475124478340149, "lr": 9.855231202248097e-07, "epoch": 1.0589861751152074, "percentage": 52.95, "elapsed_time": "6:18:08", "remaining_time": "5:36:01"} +{"current_steps": 4597, "total_steps": 8680, "loss": 0.8582692742347717, "lr": 9.851421773811133e-07, "epoch": 1.0592165898617512, "percentage": 52.96, "elapsed_time": "6:18:14", "remaining_time": "5:35:56"} +{"current_steps": 4598, "total_steps": 8680, "loss": 0.7885586023330688, "lr": 9.847612366940106e-07, "epoch": 1.0594470046082949, "percentage": 52.97, "elapsed_time": "6:18:18", "remaining_time": "5:35:51"} +{"current_steps": 4599, "total_steps": 8680, "loss": 0.7981748580932617, "lr": 9.843802982187943e-07, "epoch": 1.0596774193548386, "percentage": 52.98, "elapsed_time": "6:18:22", "remaining_time": "5:35:45"} +{"current_steps": 4600, "total_steps": 8680, "loss": 0.7060403823852539, "lr": 9.839993620107563e-07, "epoch": 1.0599078341013826, "percentage": 53.0, "elapsed_time": "6:18:28", "remaining_time": "5:35:41"} +{"current_steps": 4601, "total_steps": 8680, "loss": 0.7902223467826843, "lr": 9.836184281251905e-07, "epoch": 1.0601382488479263, "percentage": 53.01, "elapsed_time": "6:18:34", "remaining_time": "5:35:37"} +{"current_steps": 4602, "total_steps": 8680, "loss": 0.7074719071388245, "lr": 9.83237496617388e-07, "epoch": 1.06036866359447, "percentage": 53.02, "elapsed_time": "6:18:39", "remaining_time": "5:35:32"} +{"current_steps": 4603, "total_steps": 8680, "loss": 0.7180163264274597, "lr": 9.828565675426405e-07, "epoch": 1.0605990783410137, "percentage": 53.03, "elapsed_time": "6:18:47", "remaining_time": "5:35:30"} +{"current_steps": 4604, "total_steps": 8680, "loss": 0.7040787935256958, "lr": 9.824756409562397e-07, "epoch": 1.0608294930875577, "percentage": 53.04, "elapsed_time": "6:18:53", "remaining_time": "5:35:26"} +{"current_steps": 4605, "total_steps": 8680, "loss": 0.8387063145637512, "lr": 9.820947169134765e-07, "epoch": 1.0610599078341014, "percentage": 53.05, "elapsed_time": "6:18:58", "remaining_time": "5:35:21"} +{"current_steps": 4606, "total_steps": 8680, "loss": 0.8587188124656677, "lr": 9.81713795469641e-07, "epoch": 1.0612903225806452, "percentage": 53.06, "elapsed_time": "6:19:02", "remaining_time": "5:35:15"} +{"current_steps": 4607, "total_steps": 8680, "loss": 0.729094386100769, "lr": 9.813328766800242e-07, "epoch": 1.0615207373271889, "percentage": 53.08, "elapsed_time": "6:19:09", "remaining_time": "5:35:12"} +{"current_steps": 4608, "total_steps": 8680, "loss": 1.0576609373092651, "lr": 9.809519605999158e-07, "epoch": 1.0617511520737328, "percentage": 53.09, "elapsed_time": "6:19:13", "remaining_time": "5:35:06"} +{"current_steps": 4609, "total_steps": 8680, "loss": 0.7605572938919067, "lr": 9.805710472846044e-07, "epoch": 1.0619815668202766, "percentage": 53.1, "elapsed_time": "6:19:17", "remaining_time": "5:35:00"} +{"current_steps": 4610, "total_steps": 8680, "loss": 0.722477912902832, "lr": 9.801901367893807e-07, "epoch": 1.0622119815668203, "percentage": 53.11, "elapsed_time": "6:19:21", "remaining_time": "5:34:55"} +{"current_steps": 4611, "total_steps": 8680, "loss": 0.7335925698280334, "lr": 9.79809229169532e-07, "epoch": 1.062442396313364, "percentage": 53.12, "elapsed_time": "6:19:27", "remaining_time": "5:34:51"} +{"current_steps": 4612, "total_steps": 8680, "loss": 0.8116357922554016, "lr": 9.794283244803466e-07, "epoch": 1.0626728110599077, "percentage": 53.13, "elapsed_time": "6:19:31", "remaining_time": "5:34:45"} +{"current_steps": 4613, "total_steps": 8680, "loss": 0.8004311323165894, "lr": 9.79047422777113e-07, "epoch": 1.0629032258064517, "percentage": 53.15, "elapsed_time": "6:19:37", "remaining_time": "5:34:41"} +{"current_steps": 4614, "total_steps": 8680, "loss": 0.8198168277740479, "lr": 9.786665241151185e-07, "epoch": 1.0631336405529954, "percentage": 53.16, "elapsed_time": "6:19:42", "remaining_time": "5:34:37"} +{"current_steps": 4615, "total_steps": 8680, "loss": 0.7031205892562866, "lr": 9.782856285496494e-07, "epoch": 1.0633640552995391, "percentage": 53.17, "elapsed_time": "6:19:48", "remaining_time": "5:34:32"} +{"current_steps": 4616, "total_steps": 8680, "loss": 0.7303737998008728, "lr": 9.779047361359928e-07, "epoch": 1.0635944700460829, "percentage": 53.18, "elapsed_time": "6:19:53", "remaining_time": "5:34:28"} +{"current_steps": 4617, "total_steps": 8680, "loss": 0.8775424957275391, "lr": 9.775238469294345e-07, "epoch": 1.0638248847926268, "percentage": 53.19, "elapsed_time": "6:19:58", "remaining_time": "5:34:22"} +{"current_steps": 4618, "total_steps": 8680, "loss": 0.7463759183883667, "lr": 9.771429609852597e-07, "epoch": 1.0640552995391706, "percentage": 53.2, "elapsed_time": "6:20:04", "remaining_time": "5:34:18"} +{"current_steps": 4619, "total_steps": 8680, "loss": 0.7200205326080322, "lr": 9.767620783587542e-07, "epoch": 1.0642857142857143, "percentage": 53.21, "elapsed_time": "6:20:09", "remaining_time": "5:34:13"} +{"current_steps": 4620, "total_steps": 8680, "loss": 0.8255786299705505, "lr": 9.763811991052019e-07, "epoch": 1.064516129032258, "percentage": 53.23, "elapsed_time": "6:20:15", "remaining_time": "5:34:10"} +{"current_steps": 4621, "total_steps": 8680, "loss": 0.7975195050239563, "lr": 9.760003232798877e-07, "epoch": 1.064746543778802, "percentage": 53.24, "elapsed_time": "6:20:20", "remaining_time": "5:34:05"} +{"current_steps": 4622, "total_steps": 8680, "loss": 0.6993064880371094, "lr": 9.756194509380948e-07, "epoch": 1.0649769585253457, "percentage": 53.25, "elapsed_time": "6:20:28", "remaining_time": "5:34:02"} +{"current_steps": 4623, "total_steps": 8680, "loss": 0.818634033203125, "lr": 9.752385821351062e-07, "epoch": 1.0652073732718894, "percentage": 53.26, "elapsed_time": "6:20:33", "remaining_time": "5:33:58"} +{"current_steps": 4624, "total_steps": 8680, "loss": 0.707933783531189, "lr": 9.748577169262046e-07, "epoch": 1.0654377880184331, "percentage": 53.27, "elapsed_time": "6:20:38", "remaining_time": "5:33:52"} +{"current_steps": 4625, "total_steps": 8680, "loss": 0.8133440017700195, "lr": 9.744768553666723e-07, "epoch": 1.0656682027649769, "percentage": 53.28, "elapsed_time": "6:20:41", "remaining_time": "5:33:46"} +{"current_steps": 4626, "total_steps": 8680, "loss": 0.8818857669830322, "lr": 9.740959975117901e-07, "epoch": 1.0658986175115208, "percentage": 53.29, "elapsed_time": "6:20:46", "remaining_time": "5:33:41"} +{"current_steps": 4627, "total_steps": 8680, "loss": 0.6057544946670532, "lr": 9.737151434168402e-07, "epoch": 1.0661290322580645, "percentage": 53.31, "elapsed_time": "6:20:52", "remaining_time": "5:33:37"} +{"current_steps": 4628, "total_steps": 8680, "loss": 0.7560185194015503, "lr": 9.733342931371023e-07, "epoch": 1.0663594470046083, "percentage": 53.32, "elapsed_time": "6:20:58", "remaining_time": "5:33:33"} +{"current_steps": 4629, "total_steps": 8680, "loss": 0.8196524381637573, "lr": 9.72953446727856e-07, "epoch": 1.066589861751152, "percentage": 53.33, "elapsed_time": "6:21:02", "remaining_time": "5:33:27"} +{"current_steps": 4630, "total_steps": 8680, "loss": 0.8695862889289856, "lr": 9.725726042443814e-07, "epoch": 1.066820276497696, "percentage": 53.34, "elapsed_time": "6:21:06", "remaining_time": "5:33:22"} +{"current_steps": 4631, "total_steps": 8680, "loss": 0.7753207683563232, "lr": 9.721917657419573e-07, "epoch": 1.0670506912442397, "percentage": 53.35, "elapsed_time": "6:21:11", "remaining_time": "5:33:17"} +{"current_steps": 4632, "total_steps": 8680, "loss": 0.8245481252670288, "lr": 9.718109312758612e-07, "epoch": 1.0672811059907834, "percentage": 53.36, "elapsed_time": "6:21:16", "remaining_time": "5:33:12"} +{"current_steps": 4633, "total_steps": 8680, "loss": 0.8654806613922119, "lr": 9.71430100901371e-07, "epoch": 1.0675115207373271, "percentage": 53.38, "elapsed_time": "6:21:22", "remaining_time": "5:33:07"} +{"current_steps": 4634, "total_steps": 8680, "loss": 0.8667370080947876, "lr": 9.710492746737642e-07, "epoch": 1.067741935483871, "percentage": 53.39, "elapsed_time": "6:21:26", "remaining_time": "5:33:02"} +{"current_steps": 4635, "total_steps": 8680, "loss": 0.7786421775817871, "lr": 9.706684526483167e-07, "epoch": 1.0679723502304148, "percentage": 53.4, "elapsed_time": "6:21:32", "remaining_time": "5:32:58"} +{"current_steps": 4636, "total_steps": 8680, "loss": 0.7788090705871582, "lr": 9.702876348803045e-07, "epoch": 1.0682027649769585, "percentage": 53.41, "elapsed_time": "6:21:38", "remaining_time": "5:32:54"} +{"current_steps": 4637, "total_steps": 8680, "loss": 0.812332034111023, "lr": 9.69906821425003e-07, "epoch": 1.0684331797235023, "percentage": 53.42, "elapsed_time": "6:21:44", "remaining_time": "5:32:50"} +{"current_steps": 4638, "total_steps": 8680, "loss": 0.7884202599525452, "lr": 9.69526012337686e-07, "epoch": 1.068663594470046, "percentage": 53.43, "elapsed_time": "6:21:49", "remaining_time": "5:32:45"} +{"current_steps": 4639, "total_steps": 8680, "loss": 0.725990891456604, "lr": 9.69145207673628e-07, "epoch": 1.06889400921659, "percentage": 53.44, "elapsed_time": "6:21:54", "remaining_time": "5:32:40"} +{"current_steps": 4640, "total_steps": 8680, "loss": 0.7277272343635559, "lr": 9.687644074881028e-07, "epoch": 1.0691244239631337, "percentage": 53.46, "elapsed_time": "6:22:00", "remaining_time": "5:32:36"} +{"current_steps": 4641, "total_steps": 8680, "loss": 0.8081945180892944, "lr": 9.683836118363818e-07, "epoch": 1.0693548387096774, "percentage": 53.47, "elapsed_time": "6:22:05", "remaining_time": "5:32:31"} +{"current_steps": 4642, "total_steps": 8680, "loss": 0.8633503913879395, "lr": 9.680028207737383e-07, "epoch": 1.0695852534562211, "percentage": 53.48, "elapsed_time": "6:22:10", "remaining_time": "5:32:26"} +{"current_steps": 4643, "total_steps": 8680, "loss": 0.7873313426971436, "lr": 9.67622034355443e-07, "epoch": 1.069815668202765, "percentage": 53.49, "elapsed_time": "6:22:14", "remaining_time": "5:32:21"} +{"current_steps": 4644, "total_steps": 8680, "loss": 0.7927644848823547, "lr": 9.67241252636766e-07, "epoch": 1.0700460829493088, "percentage": 53.5, "elapsed_time": "6:22:19", "remaining_time": "5:32:16"} +{"current_steps": 4645, "total_steps": 8680, "loss": 0.9458138942718506, "lr": 9.668604756729784e-07, "epoch": 1.0702764976958525, "percentage": 53.51, "elapsed_time": "6:22:25", "remaining_time": "5:32:12"} +{"current_steps": 4646, "total_steps": 8680, "loss": 0.7471280097961426, "lr": 9.664797035193484e-07, "epoch": 1.0705069124423963, "percentage": 53.53, "elapsed_time": "6:22:32", "remaining_time": "5:32:08"} +{"current_steps": 4647, "total_steps": 8680, "loss": 0.7666789293289185, "lr": 9.660989362311455e-07, "epoch": 1.07073732718894, "percentage": 53.54, "elapsed_time": "6:22:37", "remaining_time": "5:32:04"} +{"current_steps": 4648, "total_steps": 8680, "loss": 0.7846331000328064, "lr": 9.65718173863637e-07, "epoch": 1.070967741935484, "percentage": 53.55, "elapsed_time": "6:22:43", "remaining_time": "5:31:59"} +{"current_steps": 4649, "total_steps": 8680, "loss": 0.7790371179580688, "lr": 9.653374164720897e-07, "epoch": 1.0711981566820277, "percentage": 53.56, "elapsed_time": "6:22:47", "remaining_time": "5:31:54"} +{"current_steps": 4650, "total_steps": 8680, "loss": 0.9056169986724854, "lr": 9.64956664111771e-07, "epoch": 1.0714285714285714, "percentage": 53.57, "elapsed_time": "6:22:52", "remaining_time": "5:31:49"} +{"current_steps": 4651, "total_steps": 8680, "loss": 0.6839256286621094, "lr": 9.645759168379461e-07, "epoch": 1.0716589861751151, "percentage": 53.58, "elapsed_time": "6:22:57", "remaining_time": "5:31:44"} +{"current_steps": 4652, "total_steps": 8680, "loss": 0.7071784138679504, "lr": 9.641951747058799e-07, "epoch": 1.071889400921659, "percentage": 53.59, "elapsed_time": "6:23:02", "remaining_time": "5:31:40"} +{"current_steps": 4653, "total_steps": 8680, "loss": 0.8166929483413696, "lr": 9.638144377708366e-07, "epoch": 1.0721198156682028, "percentage": 53.61, "elapsed_time": "6:23:07", "remaining_time": "5:31:34"} +{"current_steps": 4654, "total_steps": 8680, "loss": 0.8013010621070862, "lr": 9.6343370608808e-07, "epoch": 1.0723502304147465, "percentage": 53.62, "elapsed_time": "6:23:13", "remaining_time": "5:31:30"} +{"current_steps": 4655, "total_steps": 8680, "loss": 0.8157169818878174, "lr": 9.630529797128722e-07, "epoch": 1.0725806451612903, "percentage": 53.63, "elapsed_time": "6:23:18", "remaining_time": "5:31:25"} +{"current_steps": 4656, "total_steps": 8680, "loss": 0.6467397212982178, "lr": 9.626722587004758e-07, "epoch": 1.072811059907834, "percentage": 53.64, "elapsed_time": "6:23:24", "remaining_time": "5:31:21"} +{"current_steps": 4657, "total_steps": 8680, "loss": 0.6623806953430176, "lr": 9.622915431061519e-07, "epoch": 1.073041474654378, "percentage": 53.65, "elapsed_time": "6:23:30", "remaining_time": "5:31:17"} +{"current_steps": 4658, "total_steps": 8680, "loss": 0.8333703279495239, "lr": 9.619108329851596e-07, "epoch": 1.0732718894009217, "percentage": 53.66, "elapsed_time": "6:23:37", "remaining_time": "5:31:14"} +{"current_steps": 4659, "total_steps": 8680, "loss": 0.8798840045928955, "lr": 9.615301283927603e-07, "epoch": 1.0735023041474654, "percentage": 53.68, "elapsed_time": "6:23:42", "remaining_time": "5:31:09"} +{"current_steps": 4660, "total_steps": 8680, "loss": 0.8712242841720581, "lr": 9.611494293842119e-07, "epoch": 1.0737327188940091, "percentage": 53.69, "elapsed_time": "6:23:46", "remaining_time": "5:31:04"} +{"current_steps": 4661, "total_steps": 8680, "loss": 0.720801591873169, "lr": 9.60768736014772e-07, "epoch": 1.073963133640553, "percentage": 53.7, "elapsed_time": "6:23:50", "remaining_time": "5:30:58"} +{"current_steps": 4662, "total_steps": 8680, "loss": 0.7974982857704163, "lr": 9.603880483396983e-07, "epoch": 1.0741935483870968, "percentage": 53.71, "elapsed_time": "6:23:56", "remaining_time": "5:30:54"} +{"current_steps": 4663, "total_steps": 8680, "loss": 0.7656542062759399, "lr": 9.600073664142471e-07, "epoch": 1.0744239631336405, "percentage": 53.72, "elapsed_time": "6:24:01", "remaining_time": "5:30:49"} +{"current_steps": 4664, "total_steps": 8680, "loss": 0.8274385333061218, "lr": 9.596266902936737e-07, "epoch": 1.0746543778801843, "percentage": 53.73, "elapsed_time": "6:24:06", "remaining_time": "5:30:44"} +{"current_steps": 4665, "total_steps": 8680, "loss": 0.6508798599243164, "lr": 9.592460200332328e-07, "epoch": 1.0748847926267282, "percentage": 53.74, "elapsed_time": "6:24:11", "remaining_time": "5:30:39"} +{"current_steps": 4666, "total_steps": 8680, "loss": 0.6393407583236694, "lr": 9.588653556881781e-07, "epoch": 1.075115207373272, "percentage": 53.76, "elapsed_time": "6:24:17", "remaining_time": "5:30:35"} +{"current_steps": 4667, "total_steps": 8680, "loss": 0.7857781052589417, "lr": 9.58484697313762e-07, "epoch": 1.0753456221198157, "percentage": 53.77, "elapsed_time": "6:24:22", "remaining_time": "5:30:30"} +{"current_steps": 4668, "total_steps": 8680, "loss": 0.7433615922927856, "lr": 9.58104044965238e-07, "epoch": 1.0755760368663594, "percentage": 53.78, "elapsed_time": "6:24:26", "remaining_time": "5:30:25"} +{"current_steps": 4669, "total_steps": 8680, "loss": 0.6694349646568298, "lr": 9.57723398697856e-07, "epoch": 1.0758064516129031, "percentage": 53.79, "elapsed_time": "6:24:32", "remaining_time": "5:30:21"} +{"current_steps": 4670, "total_steps": 8680, "loss": 0.7849506735801697, "lr": 9.573427585668664e-07, "epoch": 1.076036866359447, "percentage": 53.8, "elapsed_time": "6:24:37", "remaining_time": "5:30:16"} +{"current_steps": 4671, "total_steps": 8680, "loss": 0.5924462080001831, "lr": 9.569621246275194e-07, "epoch": 1.0762672811059908, "percentage": 53.81, "elapsed_time": "6:24:44", "remaining_time": "5:30:12"} +{"current_steps": 4672, "total_steps": 8680, "loss": 0.7679359316825867, "lr": 9.565814969350628e-07, "epoch": 1.0764976958525345, "percentage": 53.82, "elapsed_time": "6:24:50", "remaining_time": "5:30:08"} +{"current_steps": 4673, "total_steps": 8680, "loss": 0.803286612033844, "lr": 9.562008755447444e-07, "epoch": 1.0767281105990782, "percentage": 53.84, "elapsed_time": "6:24:55", "remaining_time": "5:30:03"} +{"current_steps": 4674, "total_steps": 8680, "loss": 0.6302975416183472, "lr": 9.558202605118112e-07, "epoch": 1.0769585253456222, "percentage": 53.85, "elapsed_time": "6:25:00", "remaining_time": "5:29:59"} +{"current_steps": 4675, "total_steps": 8680, "loss": 0.7441667914390564, "lr": 9.554396518915085e-07, "epoch": 1.077188940092166, "percentage": 53.86, "elapsed_time": "6:25:06", "remaining_time": "5:29:55"} +{"current_steps": 4676, "total_steps": 8680, "loss": 0.805221438407898, "lr": 9.550590497390815e-07, "epoch": 1.0774193548387097, "percentage": 53.87, "elapsed_time": "6:25:11", "remaining_time": "5:29:49"} +{"current_steps": 4677, "total_steps": 8680, "loss": 0.9557743072509766, "lr": 9.54678454109774e-07, "epoch": 1.0776497695852534, "percentage": 53.88, "elapsed_time": "6:25:15", "remaining_time": "5:29:44"} +{"current_steps": 4678, "total_steps": 8680, "loss": 0.7361980080604553, "lr": 9.542978650588284e-07, "epoch": 1.0778801843317973, "percentage": 53.89, "elapsed_time": "6:25:20", "remaining_time": "5:29:39"} +{"current_steps": 4679, "total_steps": 8680, "loss": 0.7474843263626099, "lr": 9.539172826414876e-07, "epoch": 1.078110599078341, "percentage": 53.91, "elapsed_time": "6:25:24", "remaining_time": "5:29:33"} +{"current_steps": 4680, "total_steps": 8680, "loss": 0.595927357673645, "lr": 9.535367069129923e-07, "epoch": 1.0783410138248848, "percentage": 53.92, "elapsed_time": "6:25:30", "remaining_time": "5:29:29"} +{"current_steps": 4681, "total_steps": 8680, "loss": 0.894598126411438, "lr": 9.531561379285818e-07, "epoch": 1.0785714285714285, "percentage": 53.93, "elapsed_time": "6:25:36", "remaining_time": "5:29:25"} +{"current_steps": 4682, "total_steps": 8680, "loss": 0.915902853012085, "lr": 9.527755757434966e-07, "epoch": 1.0788018433179722, "percentage": 53.94, "elapsed_time": "6:25:41", "remaining_time": "5:29:20"} +{"current_steps": 4683, "total_steps": 8680, "loss": 0.8670432567596436, "lr": 9.523950204129739e-07, "epoch": 1.0790322580645162, "percentage": 53.95, "elapsed_time": "6:25:46", "remaining_time": "5:29:15"} +{"current_steps": 4684, "total_steps": 8680, "loss": 0.7829893231391907, "lr": 9.520144719922508e-07, "epoch": 1.07926267281106, "percentage": 53.96, "elapsed_time": "6:25:50", "remaining_time": "5:29:10"} +{"current_steps": 4685, "total_steps": 8680, "loss": 0.6584970951080322, "lr": 9.516339305365638e-07, "epoch": 1.0794930875576036, "percentage": 53.97, "elapsed_time": "6:25:55", "remaining_time": "5:29:04"} +{"current_steps": 4686, "total_steps": 8680, "loss": 0.7853457927703857, "lr": 9.512533961011478e-07, "epoch": 1.0797235023041474, "percentage": 53.99, "elapsed_time": "6:26:00", "remaining_time": "5:29:00"} +{"current_steps": 4687, "total_steps": 8680, "loss": 0.7890632152557373, "lr": 9.508728687412364e-07, "epoch": 1.0799539170506913, "percentage": 54.0, "elapsed_time": "6:26:06", "remaining_time": "5:28:56"} +{"current_steps": 4688, "total_steps": 8680, "loss": 0.8281408548355103, "lr": 9.504923485120634e-07, "epoch": 1.080184331797235, "percentage": 54.01, "elapsed_time": "6:26:10", "remaining_time": "5:28:50"} +{"current_steps": 4689, "total_steps": 8680, "loss": 0.7878601551055908, "lr": 9.501118354688605e-07, "epoch": 1.0804147465437788, "percentage": 54.02, "elapsed_time": "6:26:16", "remaining_time": "5:28:46"} +{"current_steps": 4690, "total_steps": 8680, "loss": 0.8332592844963074, "lr": 9.497313296668582e-07, "epoch": 1.0806451612903225, "percentage": 54.03, "elapsed_time": "6:26:21", "remaining_time": "5:28:41"} +{"current_steps": 4691, "total_steps": 8680, "loss": 0.7680759429931641, "lr": 9.493508311612874e-07, "epoch": 1.0808755760368665, "percentage": 54.04, "elapsed_time": "6:26:25", "remaining_time": "5:28:35"} +{"current_steps": 4692, "total_steps": 8680, "loss": 0.6368690729141235, "lr": 9.489703400073762e-07, "epoch": 1.0811059907834102, "percentage": 54.06, "elapsed_time": "6:26:31", "remaining_time": "5:28:31"} +{"current_steps": 4693, "total_steps": 8680, "loss": 0.7018477916717529, "lr": 9.485898562603525e-07, "epoch": 1.081336405529954, "percentage": 54.07, "elapsed_time": "6:26:38", "remaining_time": "5:28:28"} +{"current_steps": 4694, "total_steps": 8680, "loss": 0.8494987487792969, "lr": 9.482093799754432e-07, "epoch": 1.0815668202764976, "percentage": 54.08, "elapsed_time": "6:26:42", "remaining_time": "5:28:23"} +{"current_steps": 4695, "total_steps": 8680, "loss": 0.8146306276321411, "lr": 9.478289112078736e-07, "epoch": 1.0817972350230414, "percentage": 54.09, "elapsed_time": "6:26:48", "remaining_time": "5:28:18"} +{"current_steps": 4696, "total_steps": 8680, "loss": 0.7832612991333008, "lr": 9.474484500128689e-07, "epoch": 1.0820276497695853, "percentage": 54.1, "elapsed_time": "6:26:53", "remaining_time": "5:28:13"} +{"current_steps": 4697, "total_steps": 8680, "loss": 0.8569360971450806, "lr": 9.470679964456519e-07, "epoch": 1.082258064516129, "percentage": 54.11, "elapsed_time": "6:26:57", "remaining_time": "5:28:08"} +{"current_steps": 4698, "total_steps": 8680, "loss": 0.8145112991333008, "lr": 9.466875505614449e-07, "epoch": 1.0824884792626728, "percentage": 54.12, "elapsed_time": "6:27:03", "remaining_time": "5:28:03"} +{"current_steps": 4699, "total_steps": 8680, "loss": 0.6632689237594604, "lr": 9.463071124154697e-07, "epoch": 1.0827188940092165, "percentage": 54.14, "elapsed_time": "6:27:09", "remaining_time": "5:27:59"} +{"current_steps": 4700, "total_steps": 8680, "loss": 0.6299769878387451, "lr": 9.459266820629461e-07, "epoch": 1.0829493087557605, "percentage": 54.15, "elapsed_time": "6:27:14", "remaining_time": "5:27:54"} +{"current_steps": 4701, "total_steps": 8680, "loss": 0.7722063064575195, "lr": 9.455462595590925e-07, "epoch": 1.0831797235023042, "percentage": 54.16, "elapsed_time": "6:27:23", "remaining_time": "5:27:53"} +{"current_steps": 4702, "total_steps": 8680, "loss": 0.8219027519226074, "lr": 9.451658449591278e-07, "epoch": 1.083410138248848, "percentage": 54.17, "elapsed_time": "6:27:29", "remaining_time": "5:27:49"} +{"current_steps": 4703, "total_steps": 8680, "loss": 0.9078400731086731, "lr": 9.44785438318268e-07, "epoch": 1.0836405529953916, "percentage": 54.18, "elapsed_time": "6:27:34", "remaining_time": "5:27:44"} +{"current_steps": 4704, "total_steps": 8680, "loss": 0.8062041997909546, "lr": 9.444050396917286e-07, "epoch": 1.0838709677419356, "percentage": 54.19, "elapsed_time": "6:27:39", "remaining_time": "5:27:39"} +{"current_steps": 4705, "total_steps": 8680, "loss": 0.6379001140594482, "lr": 9.440246491347242e-07, "epoch": 1.0841013824884793, "percentage": 54.21, "elapsed_time": "6:27:45", "remaining_time": "5:27:35"} +{"current_steps": 4706, "total_steps": 8680, "loss": 0.919986367225647, "lr": 9.436442667024679e-07, "epoch": 1.084331797235023, "percentage": 54.22, "elapsed_time": "6:27:50", "remaining_time": "5:27:31"} +{"current_steps": 4707, "total_steps": 8680, "loss": 0.6534138917922974, "lr": 9.432638924501715e-07, "epoch": 1.0845622119815668, "percentage": 54.23, "elapsed_time": "6:27:57", "remaining_time": "5:27:27"} +{"current_steps": 4708, "total_steps": 8680, "loss": 0.8340045809745789, "lr": 9.428835264330462e-07, "epoch": 1.0847926267281105, "percentage": 54.24, "elapsed_time": "6:28:02", "remaining_time": "5:27:22"} +{"current_steps": 4709, "total_steps": 8680, "loss": 0.8347625732421875, "lr": 9.425031687063014e-07, "epoch": 1.0850230414746544, "percentage": 54.25, "elapsed_time": "6:28:08", "remaining_time": "5:27:18"} +{"current_steps": 4710, "total_steps": 8680, "loss": 0.807063639163971, "lr": 9.421228193251452e-07, "epoch": 1.0852534562211982, "percentage": 54.26, "elapsed_time": "6:28:14", "remaining_time": "5:27:14"} +{"current_steps": 4711, "total_steps": 8680, "loss": 0.7375985383987427, "lr": 9.417424783447855e-07, "epoch": 1.085483870967742, "percentage": 54.27, "elapsed_time": "6:28:21", "remaining_time": "5:27:11"} +{"current_steps": 4712, "total_steps": 8680, "loss": 0.5723168849945068, "lr": 9.413621458204281e-07, "epoch": 1.0857142857142856, "percentage": 54.29, "elapsed_time": "6:28:27", "remaining_time": "5:27:07"} +{"current_steps": 4713, "total_steps": 8680, "loss": 0.8272668123245239, "lr": 9.409818218072772e-07, "epoch": 1.0859447004608296, "percentage": 54.3, "elapsed_time": "6:28:31", "remaining_time": "5:27:01"} +{"current_steps": 4714, "total_steps": 8680, "loss": 0.6400803327560425, "lr": 9.406015063605368e-07, "epoch": 1.0861751152073733, "percentage": 54.31, "elapsed_time": "6:28:37", "remaining_time": "5:26:57"} +{"current_steps": 4715, "total_steps": 8680, "loss": 0.6829795837402344, "lr": 9.402211995354095e-07, "epoch": 1.086405529953917, "percentage": 54.32, "elapsed_time": "6:28:43", "remaining_time": "5:26:53"} +{"current_steps": 4716, "total_steps": 8680, "loss": 0.8509865999221802, "lr": 9.398409013870954e-07, "epoch": 1.0866359447004608, "percentage": 54.33, "elapsed_time": "6:28:48", "remaining_time": "5:26:48"} +{"current_steps": 4717, "total_steps": 8680, "loss": 0.895818829536438, "lr": 9.394606119707954e-07, "epoch": 1.0868663594470047, "percentage": 54.34, "elapsed_time": "6:28:53", "remaining_time": "5:26:44"} +{"current_steps": 4718, "total_steps": 8680, "loss": 0.8534268140792847, "lr": 9.390803313417072e-07, "epoch": 1.0870967741935484, "percentage": 54.35, "elapsed_time": "6:28:59", "remaining_time": "5:26:40"} +{"current_steps": 4719, "total_steps": 8680, "loss": 0.8603401184082031, "lr": 9.38700059555028e-07, "epoch": 1.0873271889400922, "percentage": 54.37, "elapsed_time": "6:29:04", "remaining_time": "5:26:34"} +{"current_steps": 4720, "total_steps": 8680, "loss": 0.8810417652130127, "lr": 9.383197966659542e-07, "epoch": 1.087557603686636, "percentage": 54.38, "elapsed_time": "6:29:10", "remaining_time": "5:26:30"} +{"current_steps": 4721, "total_steps": 8680, "loss": 0.7144299149513245, "lr": 9.3793954272968e-07, "epoch": 1.0877880184331796, "percentage": 54.39, "elapsed_time": "6:29:15", "remaining_time": "5:26:25"} +{"current_steps": 4722, "total_steps": 8680, "loss": 0.8780069351196289, "lr": 9.375592978013994e-07, "epoch": 1.0880184331797236, "percentage": 54.4, "elapsed_time": "6:29:21", "remaining_time": "5:26:21"} +{"current_steps": 4723, "total_steps": 8680, "loss": 0.7976780533790588, "lr": 9.371790619363041e-07, "epoch": 1.0882488479262673, "percentage": 54.41, "elapsed_time": "6:29:26", "remaining_time": "5:26:16"} +{"current_steps": 4724, "total_steps": 8680, "loss": 0.9183385372161865, "lr": 9.367988351895846e-07, "epoch": 1.088479262672811, "percentage": 54.42, "elapsed_time": "6:29:32", "remaining_time": "5:26:12"} +{"current_steps": 4725, "total_steps": 8680, "loss": 0.7891188859939575, "lr": 9.364186176164306e-07, "epoch": 1.0887096774193548, "percentage": 54.44, "elapsed_time": "6:29:38", "remaining_time": "5:26:08"} +{"current_steps": 4726, "total_steps": 8680, "loss": 0.7586535215377808, "lr": 9.360384092720301e-07, "epoch": 1.0889400921658987, "percentage": 54.45, "elapsed_time": "6:29:44", "remaining_time": "5:26:04"} +{"current_steps": 4727, "total_steps": 8680, "loss": 0.7915316224098206, "lr": 9.356582102115696e-07, "epoch": 1.0891705069124424, "percentage": 54.46, "elapsed_time": "6:29:50", "remaining_time": "5:26:00"} +{"current_steps": 4728, "total_steps": 8680, "loss": 0.6608257293701172, "lr": 9.352780204902349e-07, "epoch": 1.0894009216589862, "percentage": 54.47, "elapsed_time": "6:29:57", "remaining_time": "5:25:56"} +{"current_steps": 4729, "total_steps": 8680, "loss": 0.8375273942947388, "lr": 9.3489784016321e-07, "epoch": 1.08963133640553, "percentage": 54.48, "elapsed_time": "6:30:03", "remaining_time": "5:25:53"} +{"current_steps": 4730, "total_steps": 8680, "loss": 0.7629055976867676, "lr": 9.345176692856768e-07, "epoch": 1.0898617511520738, "percentage": 54.49, "elapsed_time": "6:30:10", "remaining_time": "5:25:50"} +{"current_steps": 4731, "total_steps": 8680, "loss": 0.8037875890731812, "lr": 9.341375079128177e-07, "epoch": 1.0900921658986176, "percentage": 54.5, "elapsed_time": "6:30:17", "remaining_time": "5:25:47"} +{"current_steps": 4732, "total_steps": 8680, "loss": 0.8843437433242798, "lr": 9.337573560998123e-07, "epoch": 1.0903225806451613, "percentage": 54.52, "elapsed_time": "6:30:23", "remaining_time": "5:25:42"} +{"current_steps": 4733, "total_steps": 8680, "loss": 0.7164910435676575, "lr": 9.333772139018387e-07, "epoch": 1.090552995391705, "percentage": 54.53, "elapsed_time": "6:30:29", "remaining_time": "5:25:38"} +{"current_steps": 4734, "total_steps": 8680, "loss": 0.8076978921890259, "lr": 9.329970813740742e-07, "epoch": 1.0907834101382488, "percentage": 54.54, "elapsed_time": "6:30:33", "remaining_time": "5:25:33"} +{"current_steps": 4735, "total_steps": 8680, "loss": 0.7265340089797974, "lr": 9.326169585716949e-07, "epoch": 1.0910138248847927, "percentage": 54.55, "elapsed_time": "6:30:39", "remaining_time": "5:25:28"} +{"current_steps": 4736, "total_steps": 8680, "loss": 0.7438681125640869, "lr": 9.322368455498747e-07, "epoch": 1.0912442396313364, "percentage": 54.56, "elapsed_time": "6:30:45", "remaining_time": "5:25:24"} +{"current_steps": 4737, "total_steps": 8680, "loss": 0.8760604858398438, "lr": 9.318567423637868e-07, "epoch": 1.0914746543778802, "percentage": 54.57, "elapsed_time": "6:30:48", "remaining_time": "5:25:18"} +{"current_steps": 4738, "total_steps": 8680, "loss": 0.7216911315917969, "lr": 9.314766490686026e-07, "epoch": 1.0917050691244239, "percentage": 54.59, "elapsed_time": "6:30:53", "remaining_time": "5:25:13"} +{"current_steps": 4739, "total_steps": 8680, "loss": 0.8003707528114319, "lr": 9.310965657194916e-07, "epoch": 1.0919354838709678, "percentage": 54.6, "elapsed_time": "6:30:59", "remaining_time": "5:25:08"} +{"current_steps": 4740, "total_steps": 8680, "loss": 0.6496548652648926, "lr": 9.307164923716233e-07, "epoch": 1.0921658986175116, "percentage": 54.61, "elapsed_time": "6:31:03", "remaining_time": "5:25:03"} +{"current_steps": 4741, "total_steps": 8680, "loss": 0.7659108638763428, "lr": 9.303364290801644e-07, "epoch": 1.0923963133640553, "percentage": 54.62, "elapsed_time": "6:31:08", "remaining_time": "5:24:58"} +{"current_steps": 4742, "total_steps": 8680, "loss": 0.7799512147903442, "lr": 9.299563759002802e-07, "epoch": 1.092626728110599, "percentage": 54.63, "elapsed_time": "6:31:13", "remaining_time": "5:24:53"} +{"current_steps": 4743, "total_steps": 8680, "loss": 0.7675691246986389, "lr": 9.295763328871357e-07, "epoch": 1.092857142857143, "percentage": 54.64, "elapsed_time": "6:31:17", "remaining_time": "5:24:48"} +{"current_steps": 4744, "total_steps": 8680, "loss": 0.677080512046814, "lr": 9.291963000958931e-07, "epoch": 1.0930875576036867, "percentage": 54.65, "elapsed_time": "6:31:23", "remaining_time": "5:24:43"} +{"current_steps": 4745, "total_steps": 8680, "loss": 0.7885928153991699, "lr": 9.28816277581714e-07, "epoch": 1.0933179723502304, "percentage": 54.67, "elapsed_time": "6:31:28", "remaining_time": "5:24:38"} +{"current_steps": 4746, "total_steps": 8680, "loss": 0.6568010449409485, "lr": 9.28436265399758e-07, "epoch": 1.0935483870967742, "percentage": 54.68, "elapsed_time": "6:31:31", "remaining_time": "5:24:32"} +{"current_steps": 4747, "total_steps": 8680, "loss": 0.9438225030899048, "lr": 9.280562636051827e-07, "epoch": 1.0937788018433179, "percentage": 54.69, "elapsed_time": "6:31:36", "remaining_time": "5:24:27"} +{"current_steps": 4748, "total_steps": 8680, "loss": 0.8119498491287231, "lr": 9.276762722531461e-07, "epoch": 1.0940092165898618, "percentage": 54.7, "elapsed_time": "6:31:42", "remaining_time": "5:24:22"} +{"current_steps": 4749, "total_steps": 8680, "loss": 0.7570452690124512, "lr": 9.272962913988029e-07, "epoch": 1.0942396313364056, "percentage": 54.71, "elapsed_time": "6:31:48", "remaining_time": "5:24:19"} +{"current_steps": 4750, "total_steps": 8680, "loss": 0.7541190385818481, "lr": 9.269163210973063e-07, "epoch": 1.0944700460829493, "percentage": 54.72, "elapsed_time": "6:31:54", "remaining_time": "5:24:15"} +{"current_steps": 4751, "total_steps": 8680, "loss": 0.6481921672821045, "lr": 9.265363614038093e-07, "epoch": 1.094700460829493, "percentage": 54.74, "elapsed_time": "6:32:00", "remaining_time": "5:24:10"} +{"current_steps": 4752, "total_steps": 8680, "loss": 0.7997267246246338, "lr": 9.261564123734623e-07, "epoch": 1.094930875576037, "percentage": 54.75, "elapsed_time": "6:32:05", "remaining_time": "5:24:06"} +{"current_steps": 4753, "total_steps": 8680, "loss": 0.9093008637428284, "lr": 9.25776474061414e-07, "epoch": 1.0951612903225807, "percentage": 54.76, "elapsed_time": "6:32:10", "remaining_time": "5:24:01"} +{"current_steps": 4754, "total_steps": 8680, "loss": 0.7609673142433167, "lr": 9.253965465228122e-07, "epoch": 1.0953917050691244, "percentage": 54.77, "elapsed_time": "6:32:14", "remaining_time": "5:23:55"} +{"current_steps": 4755, "total_steps": 8680, "loss": 0.8338878154754639, "lr": 9.250166298128032e-07, "epoch": 1.0956221198156681, "percentage": 54.78, "elapsed_time": "6:32:19", "remaining_time": "5:23:50"} +{"current_steps": 4756, "total_steps": 8680, "loss": 0.7503781318664551, "lr": 9.246367239865308e-07, "epoch": 1.095852534562212, "percentage": 54.79, "elapsed_time": "6:32:23", "remaining_time": "5:23:44"} +{"current_steps": 4757, "total_steps": 8680, "loss": 0.7630816698074341, "lr": 9.242568290991384e-07, "epoch": 1.0960829493087558, "percentage": 54.8, "elapsed_time": "6:32:27", "remaining_time": "5:23:39"} +{"current_steps": 4758, "total_steps": 8680, "loss": 0.8026378154754639, "lr": 9.238769452057671e-07, "epoch": 1.0963133640552996, "percentage": 54.82, "elapsed_time": "6:32:32", "remaining_time": "5:23:34"} +{"current_steps": 4759, "total_steps": 8680, "loss": 0.8256090879440308, "lr": 9.234970723615558e-07, "epoch": 1.0965437788018433, "percentage": 54.83, "elapsed_time": "6:32:37", "remaining_time": "5:23:28"} +{"current_steps": 4760, "total_steps": 8680, "loss": 0.7331836223602295, "lr": 9.231172106216437e-07, "epoch": 1.096774193548387, "percentage": 54.84, "elapsed_time": "6:32:42", "remaining_time": "5:23:24"} +{"current_steps": 4761, "total_steps": 8680, "loss": 0.886203944683075, "lr": 9.227373600411667e-07, "epoch": 1.097004608294931, "percentage": 54.85, "elapsed_time": "6:32:47", "remaining_time": "5:23:19"} +{"current_steps": 4762, "total_steps": 8680, "loss": 0.7802814245223999, "lr": 9.223575206752592e-07, "epoch": 1.0972350230414747, "percentage": 54.86, "elapsed_time": "6:32:53", "remaining_time": "5:23:15"} +{"current_steps": 4763, "total_steps": 8680, "loss": 0.9682798385620117, "lr": 9.219776925790552e-07, "epoch": 1.0974654377880184, "percentage": 54.87, "elapsed_time": "6:32:56", "remaining_time": "5:23:09"} +{"current_steps": 4764, "total_steps": 8680, "loss": 0.8733793497085571, "lr": 9.215978758076858e-07, "epoch": 1.0976958525345621, "percentage": 54.88, "elapsed_time": "6:33:02", "remaining_time": "5:23:04"} +{"current_steps": 4765, "total_steps": 8680, "loss": 0.8403818607330322, "lr": 9.212180704162809e-07, "epoch": 1.097926267281106, "percentage": 54.9, "elapsed_time": "6:33:06", "remaining_time": "5:22:59"} +{"current_steps": 4766, "total_steps": 8680, "loss": 0.7957059144973755, "lr": 9.208382764599688e-07, "epoch": 1.0981566820276498, "percentage": 54.91, "elapsed_time": "6:33:11", "remaining_time": "5:22:54"} +{"current_steps": 4767, "total_steps": 8680, "loss": 0.8943477272987366, "lr": 9.204584939938761e-07, "epoch": 1.0983870967741935, "percentage": 54.92, "elapsed_time": "6:33:16", "remaining_time": "5:22:49"} +{"current_steps": 4768, "total_steps": 8680, "loss": 0.7084406018257141, "lr": 9.200787230731273e-07, "epoch": 1.0986175115207373, "percentage": 54.93, "elapsed_time": "6:33:22", "remaining_time": "5:22:45"} +{"current_steps": 4769, "total_steps": 8680, "loss": 0.8374637365341187, "lr": 9.196989637528465e-07, "epoch": 1.098847926267281, "percentage": 54.94, "elapsed_time": "6:33:26", "remaining_time": "5:22:39"} +{"current_steps": 4770, "total_steps": 8680, "loss": 0.6963578462600708, "lr": 9.193192160881543e-07, "epoch": 1.099078341013825, "percentage": 54.95, "elapsed_time": "6:33:30", "remaining_time": "5:22:34"} +{"current_steps": 4771, "total_steps": 8680, "loss": 0.6732540130615234, "lr": 9.189394801341716e-07, "epoch": 1.0993087557603687, "percentage": 54.97, "elapsed_time": "6:33:36", "remaining_time": "5:22:29"} +{"current_steps": 4772, "total_steps": 8680, "loss": 0.7104849219322205, "lr": 9.185597559460159e-07, "epoch": 1.0995391705069124, "percentage": 54.98, "elapsed_time": "6:33:40", "remaining_time": "5:22:24"} +{"current_steps": 4773, "total_steps": 8680, "loss": 0.8461153507232666, "lr": 9.181800435788037e-07, "epoch": 1.0997695852534561, "percentage": 54.99, "elapsed_time": "6:33:45", "remaining_time": "5:22:18"} +{"current_steps": 4774, "total_steps": 8680, "loss": 0.7120847105979919, "lr": 9.178003430876502e-07, "epoch": 1.1, "percentage": 55.0, "elapsed_time": "6:33:51", "remaining_time": "5:22:15"} +{"current_steps": 4775, "total_steps": 8680, "loss": 0.8108617067337036, "lr": 9.174206545276677e-07, "epoch": 1.1002304147465438, "percentage": 55.01, "elapsed_time": "6:33:56", "remaining_time": "5:22:10"} +{"current_steps": 4776, "total_steps": 8680, "loss": 0.7019558548927307, "lr": 9.170409779539678e-07, "epoch": 1.1004608294930875, "percentage": 55.02, "elapsed_time": "6:34:02", "remaining_time": "5:22:05"} +{"current_steps": 4777, "total_steps": 8680, "loss": 0.7563629150390625, "lr": 9.166613134216605e-07, "epoch": 1.1006912442396313, "percentage": 55.03, "elapsed_time": "6:34:08", "remaining_time": "5:22:01"} +{"current_steps": 4778, "total_steps": 8680, "loss": 0.777009129524231, "lr": 9.162816609858533e-07, "epoch": 1.100921658986175, "percentage": 55.05, "elapsed_time": "6:34:14", "remaining_time": "5:21:57"} +{"current_steps": 4779, "total_steps": 8680, "loss": 0.812334418296814, "lr": 9.159020207016516e-07, "epoch": 1.101152073732719, "percentage": 55.06, "elapsed_time": "6:34:20", "remaining_time": "5:21:53"} +{"current_steps": 4780, "total_steps": 8680, "loss": 0.609114408493042, "lr": 9.155223926241608e-07, "epoch": 1.1013824884792627, "percentage": 55.07, "elapsed_time": "6:34:27", "remaining_time": "5:21:50"} +{"current_steps": 4781, "total_steps": 8680, "loss": 0.8277549147605896, "lr": 9.151427768084828e-07, "epoch": 1.1016129032258064, "percentage": 55.08, "elapsed_time": "6:34:33", "remaining_time": "5:21:46"} +{"current_steps": 4782, "total_steps": 8680, "loss": 0.8649400472640991, "lr": 9.147631733097179e-07, "epoch": 1.1018433179723501, "percentage": 55.09, "elapsed_time": "6:34:38", "remaining_time": "5:21:41"} +{"current_steps": 4783, "total_steps": 8680, "loss": 0.7894293665885925, "lr": 9.14383582182966e-07, "epoch": 1.102073732718894, "percentage": 55.1, "elapsed_time": "6:34:44", "remaining_time": "5:21:37"} +{"current_steps": 4784, "total_steps": 8680, "loss": 0.9121778011322021, "lr": 9.14004003483324e-07, "epoch": 1.1023041474654378, "percentage": 55.12, "elapsed_time": "6:34:49", "remaining_time": "5:21:32"} +{"current_steps": 4785, "total_steps": 8680, "loss": 0.7162299156188965, "lr": 9.136244372658867e-07, "epoch": 1.1025345622119815, "percentage": 55.13, "elapsed_time": "6:34:54", "remaining_time": "5:21:27"} +{"current_steps": 4786, "total_steps": 8680, "loss": 0.7059808969497681, "lr": 9.132448835857482e-07, "epoch": 1.1027649769585253, "percentage": 55.14, "elapsed_time": "6:34:59", "remaining_time": "5:21:22"} +{"current_steps": 4787, "total_steps": 8680, "loss": 0.8172405958175659, "lr": 9.128653424979999e-07, "epoch": 1.1029953917050692, "percentage": 55.15, "elapsed_time": "6:35:04", "remaining_time": "5:21:17"} +{"current_steps": 4788, "total_steps": 8680, "loss": 0.7672706842422485, "lr": 9.124858140577316e-07, "epoch": 1.103225806451613, "percentage": 55.16, "elapsed_time": "6:35:10", "remaining_time": "5:21:13"} +{"current_steps": 4789, "total_steps": 8680, "loss": 0.7054900527000427, "lr": 9.121062983200318e-07, "epoch": 1.1034562211981567, "percentage": 55.17, "elapsed_time": "6:35:15", "remaining_time": "5:21:08"} +{"current_steps": 4790, "total_steps": 8680, "loss": 0.888538122177124, "lr": 9.117267953399865e-07, "epoch": 1.1036866359447004, "percentage": 55.18, "elapsed_time": "6:35:20", "remaining_time": "5:21:03"} +{"current_steps": 4791, "total_steps": 8680, "loss": 0.7918668985366821, "lr": 9.113473051726796e-07, "epoch": 1.1039170506912441, "percentage": 55.2, "elapsed_time": "6:35:26", "remaining_time": "5:20:59"} +{"current_steps": 4792, "total_steps": 8680, "loss": 0.7385697960853577, "lr": 9.109678278731942e-07, "epoch": 1.104147465437788, "percentage": 55.21, "elapsed_time": "6:35:31", "remaining_time": "5:20:54"} +{"current_steps": 4793, "total_steps": 8680, "loss": 0.6394056081771851, "lr": 9.105883634966107e-07, "epoch": 1.1043778801843318, "percentage": 55.22, "elapsed_time": "6:35:36", "remaining_time": "5:20:49"} +{"current_steps": 4794, "total_steps": 8680, "loss": 0.8372077941894531, "lr": 9.102089120980081e-07, "epoch": 1.1046082949308755, "percentage": 55.23, "elapsed_time": "6:35:41", "remaining_time": "5:20:44"} +{"current_steps": 4795, "total_steps": 8680, "loss": 0.6944066286087036, "lr": 9.098294737324628e-07, "epoch": 1.1048387096774193, "percentage": 55.24, "elapsed_time": "6:35:46", "remaining_time": "5:20:40"} +{"current_steps": 4796, "total_steps": 8680, "loss": 0.8480994701385498, "lr": 9.0945004845505e-07, "epoch": 1.1050691244239632, "percentage": 55.25, "elapsed_time": "6:35:50", "remaining_time": "5:20:34"} +{"current_steps": 4797, "total_steps": 8680, "loss": 0.837437629699707, "lr": 9.090706363208431e-07, "epoch": 1.105299539170507, "percentage": 55.26, "elapsed_time": "6:35:55", "remaining_time": "5:20:29"} +{"current_steps": 4798, "total_steps": 8680, "loss": 0.8610002398490906, "lr": 9.086912373849128e-07, "epoch": 1.1055299539170507, "percentage": 55.28, "elapsed_time": "6:36:00", "remaining_time": "5:20:24"} +{"current_steps": 4799, "total_steps": 8680, "loss": 0.7323784828186035, "lr": 9.083118517023281e-07, "epoch": 1.1057603686635944, "percentage": 55.29, "elapsed_time": "6:36:06", "remaining_time": "5:20:19"} +{"current_steps": 4800, "total_steps": 8680, "loss": 0.7838932871818542, "lr": 9.079324793281573e-07, "epoch": 1.1059907834101383, "percentage": 55.3, "elapsed_time": "6:36:10", "remaining_time": "5:20:14"} +{"current_steps": 4801, "total_steps": 8680, "loss": 0.7655705213546753, "lr": 9.075531203174651e-07, "epoch": 1.106221198156682, "percentage": 55.31, "elapsed_time": "6:36:17", "remaining_time": "5:20:11"} +{"current_steps": 4802, "total_steps": 8680, "loss": 0.8320151567459106, "lr": 9.071737747253148e-07, "epoch": 1.1064516129032258, "percentage": 55.32, "elapsed_time": "6:36:22", "remaining_time": "5:20:05"} +{"current_steps": 4803, "total_steps": 8680, "loss": 0.7434612512588501, "lr": 9.067944426067687e-07, "epoch": 1.1066820276497695, "percentage": 55.33, "elapsed_time": "6:36:26", "remaining_time": "5:20:00"} +{"current_steps": 4804, "total_steps": 8680, "loss": 0.8351321220397949, "lr": 9.064151240168857e-07, "epoch": 1.1069124423963133, "percentage": 55.35, "elapsed_time": "6:36:30", "remaining_time": "5:19:54"} +{"current_steps": 4805, "total_steps": 8680, "loss": 0.6648053526878357, "lr": 9.060358190107233e-07, "epoch": 1.1071428571428572, "percentage": 55.36, "elapsed_time": "6:36:37", "remaining_time": "5:19:51"} +{"current_steps": 4806, "total_steps": 8680, "loss": 0.7507585287094116, "lr": 9.056565276433377e-07, "epoch": 1.107373271889401, "percentage": 55.37, "elapsed_time": "6:36:40", "remaining_time": "5:19:45"} +{"current_steps": 4807, "total_steps": 8680, "loss": 0.7638635635375977, "lr": 9.052772499697823e-07, "epoch": 1.1076036866359447, "percentage": 55.38, "elapsed_time": "6:36:46", "remaining_time": "5:19:40"} +{"current_steps": 4808, "total_steps": 8680, "loss": 0.8066626191139221, "lr": 9.048979860451081e-07, "epoch": 1.1078341013824884, "percentage": 55.39, "elapsed_time": "6:36:50", "remaining_time": "5:19:35"} +{"current_steps": 4809, "total_steps": 8680, "loss": 0.7090466022491455, "lr": 9.045187359243659e-07, "epoch": 1.1080645161290323, "percentage": 55.4, "elapsed_time": "6:36:56", "remaining_time": "5:19:31"} +{"current_steps": 4810, "total_steps": 8680, "loss": 0.7071142792701721, "lr": 9.041394996626027e-07, "epoch": 1.108294930875576, "percentage": 55.41, "elapsed_time": "6:37:00", "remaining_time": "5:19:25"} +{"current_steps": 4811, "total_steps": 8680, "loss": 0.7103942036628723, "lr": 9.037602773148638e-07, "epoch": 1.1085253456221198, "percentage": 55.43, "elapsed_time": "6:37:06", "remaining_time": "5:19:20"} +{"current_steps": 4812, "total_steps": 8680, "loss": 0.8408492207527161, "lr": 9.033810689361936e-07, "epoch": 1.1087557603686635, "percentage": 55.44, "elapsed_time": "6:37:12", "remaining_time": "5:19:16"} +{"current_steps": 4813, "total_steps": 8680, "loss": 0.7621495723724365, "lr": 9.030018745816335e-07, "epoch": 1.1089861751152075, "percentage": 55.45, "elapsed_time": "6:37:16", "remaining_time": "5:19:11"} +{"current_steps": 4814, "total_steps": 8680, "loss": 0.7105196714401245, "lr": 9.026226943062225e-07, "epoch": 1.1092165898617512, "percentage": 55.46, "elapsed_time": "6:37:21", "remaining_time": "5:19:06"} +{"current_steps": 4815, "total_steps": 8680, "loss": 0.8733636140823364, "lr": 9.022435281649986e-07, "epoch": 1.109447004608295, "percentage": 55.47, "elapsed_time": "6:37:27", "remaining_time": "5:19:02"} +{"current_steps": 4816, "total_steps": 8680, "loss": 0.9097845554351807, "lr": 9.018643762129974e-07, "epoch": 1.1096774193548387, "percentage": 55.48, "elapsed_time": "6:37:31", "remaining_time": "5:18:56"} +{"current_steps": 4817, "total_steps": 8680, "loss": 0.8743059635162354, "lr": 9.014852385052519e-07, "epoch": 1.1099078341013824, "percentage": 55.5, "elapsed_time": "6:37:35", "remaining_time": "5:18:50"} +{"current_steps": 4818, "total_steps": 8680, "loss": 0.7898736000061035, "lr": 9.011061150967937e-07, "epoch": 1.1101382488479263, "percentage": 55.51, "elapsed_time": "6:37:40", "remaining_time": "5:18:46"} +{"current_steps": 4819, "total_steps": 8680, "loss": 0.871254563331604, "lr": 9.007270060426516e-07, "epoch": 1.11036866359447, "percentage": 55.52, "elapsed_time": "6:37:44", "remaining_time": "5:18:40"} +{"current_steps": 4820, "total_steps": 8680, "loss": 0.6833579540252686, "lr": 9.003479113978536e-07, "epoch": 1.1105990783410138, "percentage": 55.53, "elapsed_time": "6:37:51", "remaining_time": "5:18:36"} +{"current_steps": 4821, "total_steps": 8680, "loss": 0.8289071321487427, "lr": 8.999688312174243e-07, "epoch": 1.1108294930875575, "percentage": 55.54, "elapsed_time": "6:37:56", "remaining_time": "5:18:32"} +{"current_steps": 4822, "total_steps": 8680, "loss": 0.6798583269119263, "lr": 8.995897655563864e-07, "epoch": 1.1110599078341015, "percentage": 55.55, "elapsed_time": "6:38:02", "remaining_time": "5:18:27"} +{"current_steps": 4823, "total_steps": 8680, "loss": 0.6518250703811646, "lr": 8.992107144697614e-07, "epoch": 1.1112903225806452, "percentage": 55.56, "elapsed_time": "6:38:07", "remaining_time": "5:18:23"} +{"current_steps": 4824, "total_steps": 8680, "loss": 0.9316667318344116, "lr": 8.988316780125679e-07, "epoch": 1.111520737327189, "percentage": 55.58, "elapsed_time": "6:38:11", "remaining_time": "5:18:17"} +{"current_steps": 4825, "total_steps": 8680, "loss": 0.755483865737915, "lr": 8.98452656239822e-07, "epoch": 1.1117511520737327, "percentage": 55.59, "elapsed_time": "6:38:16", "remaining_time": "5:18:12"} +{"current_steps": 4826, "total_steps": 8680, "loss": 0.7892755270004272, "lr": 8.980736492065391e-07, "epoch": 1.1119815668202766, "percentage": 55.6, "elapsed_time": "6:38:21", "remaining_time": "5:18:07"} +{"current_steps": 4827, "total_steps": 8680, "loss": 0.703255295753479, "lr": 8.976946569677308e-07, "epoch": 1.1122119815668203, "percentage": 55.61, "elapsed_time": "6:38:26", "remaining_time": "5:18:02"} +{"current_steps": 4828, "total_steps": 8680, "loss": 0.7885171175003052, "lr": 8.973156795784073e-07, "epoch": 1.112442396313364, "percentage": 55.62, "elapsed_time": "6:38:30", "remaining_time": "5:17:56"} +{"current_steps": 4829, "total_steps": 8680, "loss": 0.8035199642181396, "lr": 8.969367170935776e-07, "epoch": 1.1126728110599078, "percentage": 55.63, "elapsed_time": "6:38:36", "remaining_time": "5:17:52"} +{"current_steps": 4830, "total_steps": 8680, "loss": 0.8272112607955933, "lr": 8.965577695682467e-07, "epoch": 1.1129032258064515, "percentage": 55.65, "elapsed_time": "6:38:41", "remaining_time": "5:17:47"} +{"current_steps": 4831, "total_steps": 8680, "loss": 0.8734478950500488, "lr": 8.961788370574182e-07, "epoch": 1.1131336405529955, "percentage": 55.66, "elapsed_time": "6:38:46", "remaining_time": "5:17:42"} +{"current_steps": 4832, "total_steps": 8680, "loss": 0.7487469911575317, "lr": 8.957999196160946e-07, "epoch": 1.1133640552995392, "percentage": 55.67, "elapsed_time": "6:38:50", "remaining_time": "5:17:37"} +{"current_steps": 4833, "total_steps": 8680, "loss": 0.9193693399429321, "lr": 8.954210172992748e-07, "epoch": 1.113594470046083, "percentage": 55.68, "elapsed_time": "6:38:54", "remaining_time": "5:17:31"} +{"current_steps": 4834, "total_steps": 8680, "loss": 0.8228428959846497, "lr": 8.950421301619555e-07, "epoch": 1.1138248847926266, "percentage": 55.69, "elapsed_time": "6:38:59", "remaining_time": "5:17:26"} +{"current_steps": 4835, "total_steps": 8680, "loss": 0.7419015169143677, "lr": 8.946632582591324e-07, "epoch": 1.1140552995391706, "percentage": 55.7, "elapsed_time": "6:39:04", "remaining_time": "5:17:21"} +{"current_steps": 4836, "total_steps": 8680, "loss": 0.827411949634552, "lr": 8.942844016457975e-07, "epoch": 1.1142857142857143, "percentage": 55.71, "elapsed_time": "6:39:09", "remaining_time": "5:17:16"} +{"current_steps": 4837, "total_steps": 8680, "loss": 0.7066754102706909, "lr": 8.93905560376942e-07, "epoch": 1.114516129032258, "percentage": 55.73, "elapsed_time": "6:39:14", "remaining_time": "5:17:12"} +{"current_steps": 4838, "total_steps": 8680, "loss": 0.7201621532440186, "lr": 8.93526734507554e-07, "epoch": 1.1147465437788018, "percentage": 55.74, "elapsed_time": "6:39:18", "remaining_time": "5:17:06"} +{"current_steps": 4839, "total_steps": 8680, "loss": 0.6363521814346313, "lr": 8.931479240926196e-07, "epoch": 1.1149769585253457, "percentage": 55.75, "elapsed_time": "6:39:25", "remaining_time": "5:17:02"} +{"current_steps": 4840, "total_steps": 8680, "loss": 0.8232909440994263, "lr": 8.927691291871223e-07, "epoch": 1.1152073732718895, "percentage": 55.76, "elapsed_time": "6:39:29", "remaining_time": "5:16:57"} +{"current_steps": 4841, "total_steps": 8680, "loss": 0.7006033658981323, "lr": 8.923903498460441e-07, "epoch": 1.1154377880184332, "percentage": 55.77, "elapsed_time": "6:39:35", "remaining_time": "5:16:53"} +{"current_steps": 4842, "total_steps": 8680, "loss": 0.6982721090316772, "lr": 8.920115861243638e-07, "epoch": 1.115668202764977, "percentage": 55.78, "elapsed_time": "6:39:42", "remaining_time": "5:16:49"} +{"current_steps": 4843, "total_steps": 8680, "loss": 0.7735922336578369, "lr": 8.916328380770593e-07, "epoch": 1.1158986175115206, "percentage": 55.79, "elapsed_time": "6:39:47", "remaining_time": "5:16:44"} +{"current_steps": 4844, "total_steps": 8680, "loss": 0.7430423498153687, "lr": 8.912541057591049e-07, "epoch": 1.1161290322580646, "percentage": 55.81, "elapsed_time": "6:39:52", "remaining_time": "5:16:40"} +{"current_steps": 4845, "total_steps": 8680, "loss": 0.7783932685852051, "lr": 8.908753892254729e-07, "epoch": 1.1163594470046083, "percentage": 55.82, "elapsed_time": "6:39:58", "remaining_time": "5:16:35"} +{"current_steps": 4846, "total_steps": 8680, "loss": 0.726211428642273, "lr": 8.904966885311339e-07, "epoch": 1.116589861751152, "percentage": 55.83, "elapsed_time": "6:40:04", "remaining_time": "5:16:31"} +{"current_steps": 4847, "total_steps": 8680, "loss": 0.664351761341095, "lr": 8.901180037310555e-07, "epoch": 1.1168202764976958, "percentage": 55.84, "elapsed_time": "6:40:09", "remaining_time": "5:16:26"} +{"current_steps": 4848, "total_steps": 8680, "loss": 0.8246554136276245, "lr": 8.897393348802031e-07, "epoch": 1.1170506912442397, "percentage": 55.85, "elapsed_time": "6:40:15", "remaining_time": "5:16:22"} +{"current_steps": 4849, "total_steps": 8680, "loss": 0.9435447454452515, "lr": 8.893606820335405e-07, "epoch": 1.1172811059907835, "percentage": 55.86, "elapsed_time": "6:40:18", "remaining_time": "5:16:16"} +{"current_steps": 4850, "total_steps": 8680, "loss": 0.8471171855926514, "lr": 8.889820452460286e-07, "epoch": 1.1175115207373272, "percentage": 55.88, "elapsed_time": "6:40:22", "remaining_time": "5:16:10"} +{"current_steps": 4851, "total_steps": 8680, "loss": 0.6038233041763306, "lr": 8.886034245726254e-07, "epoch": 1.117741935483871, "percentage": 55.89, "elapsed_time": "6:40:28", "remaining_time": "5:16:06"} +{"current_steps": 4852, "total_steps": 8680, "loss": 0.8186997771263123, "lr": 8.882248200682881e-07, "epoch": 1.1179723502304149, "percentage": 55.9, "elapsed_time": "6:40:32", "remaining_time": "5:16:00"} +{"current_steps": 4853, "total_steps": 8680, "loss": 0.789948582649231, "lr": 8.878462317879702e-07, "epoch": 1.1182027649769586, "percentage": 55.91, "elapsed_time": "6:40:37", "remaining_time": "5:15:55"} +{"current_steps": 4854, "total_steps": 8680, "loss": 0.7543652057647705, "lr": 8.87467659786623e-07, "epoch": 1.1184331797235023, "percentage": 55.92, "elapsed_time": "6:40:42", "remaining_time": "5:15:50"} +{"current_steps": 4855, "total_steps": 8680, "loss": 0.5985269546508789, "lr": 8.870891041191963e-07, "epoch": 1.118663594470046, "percentage": 55.93, "elapsed_time": "6:40:49", "remaining_time": "5:15:47"} +{"current_steps": 4856, "total_steps": 8680, "loss": 0.7676643133163452, "lr": 8.867105648406364e-07, "epoch": 1.1188940092165898, "percentage": 55.94, "elapsed_time": "6:40:54", "remaining_time": "5:15:42"} +{"current_steps": 4857, "total_steps": 8680, "loss": 0.7317303419113159, "lr": 8.863320420058881e-07, "epoch": 1.1191244239631337, "percentage": 55.96, "elapsed_time": "6:40:58", "remaining_time": "5:15:36"} +{"current_steps": 4858, "total_steps": 8680, "loss": 0.8357843160629272, "lr": 8.859535356698936e-07, "epoch": 1.1193548387096774, "percentage": 55.97, "elapsed_time": "6:41:02", "remaining_time": "5:15:31"} +{"current_steps": 4859, "total_steps": 8680, "loss": 0.7149945497512817, "lr": 8.855750458875923e-07, "epoch": 1.1195852534562212, "percentage": 55.98, "elapsed_time": "6:41:08", "remaining_time": "5:15:27"} +{"current_steps": 4860, "total_steps": 8680, "loss": 0.7059169411659241, "lr": 8.851965727139214e-07, "epoch": 1.119815668202765, "percentage": 55.99, "elapsed_time": "6:41:14", "remaining_time": "5:15:22"} +{"current_steps": 4861, "total_steps": 8680, "loss": 0.7530190944671631, "lr": 8.848181162038163e-07, "epoch": 1.1200460829493089, "percentage": 56.0, "elapsed_time": "6:41:20", "remaining_time": "5:15:18"} +{"current_steps": 4862, "total_steps": 8680, "loss": 0.808814287185669, "lr": 8.844396764122092e-07, "epoch": 1.1202764976958526, "percentage": 56.01, "elapsed_time": "6:41:26", "remaining_time": "5:15:14"} +{"current_steps": 4863, "total_steps": 8680, "loss": 0.7205604910850525, "lr": 8.840612533940295e-07, "epoch": 1.1205069124423963, "percentage": 56.03, "elapsed_time": "6:41:30", "remaining_time": "5:15:08"} +{"current_steps": 4864, "total_steps": 8680, "loss": 0.7493274211883545, "lr": 8.83682847204206e-07, "epoch": 1.12073732718894, "percentage": 56.04, "elapsed_time": "6:41:35", "remaining_time": "5:15:03"} +{"current_steps": 4865, "total_steps": 8680, "loss": 0.8115849494934082, "lr": 8.833044578976631e-07, "epoch": 1.120967741935484, "percentage": 56.05, "elapsed_time": "6:41:41", "remaining_time": "5:14:59"} +{"current_steps": 4866, "total_steps": 8680, "loss": 0.8188419342041016, "lr": 8.829260855293237e-07, "epoch": 1.1211981566820277, "percentage": 56.06, "elapsed_time": "6:41:47", "remaining_time": "5:14:55"} +{"current_steps": 4867, "total_steps": 8680, "loss": 0.6152349710464478, "lr": 8.82547730154108e-07, "epoch": 1.1214285714285714, "percentage": 56.07, "elapsed_time": "6:41:51", "remaining_time": "5:14:50"} +{"current_steps": 4868, "total_steps": 8680, "loss": 0.7629969120025635, "lr": 8.821693918269333e-07, "epoch": 1.1216589861751152, "percentage": 56.08, "elapsed_time": "6:41:56", "remaining_time": "5:14:45"} +{"current_steps": 4869, "total_steps": 8680, "loss": 0.7063733339309692, "lr": 8.81791070602716e-07, "epoch": 1.121889400921659, "percentage": 56.09, "elapsed_time": "6:42:02", "remaining_time": "5:14:40"} +{"current_steps": 4870, "total_steps": 8680, "loss": 0.729676365852356, "lr": 8.814127665363682e-07, "epoch": 1.1221198156682028, "percentage": 56.11, "elapsed_time": "6:42:07", "remaining_time": "5:14:35"} +{"current_steps": 4871, "total_steps": 8680, "loss": 0.8188877105712891, "lr": 8.810344796827999e-07, "epoch": 1.1223502304147466, "percentage": 56.12, "elapsed_time": "6:42:12", "remaining_time": "5:14:31"} +{"current_steps": 4872, "total_steps": 8680, "loss": 0.70793217420578, "lr": 8.806562100969199e-07, "epoch": 1.1225806451612903, "percentage": 56.13, "elapsed_time": "6:42:18", "remaining_time": "5:14:26"} +{"current_steps": 4873, "total_steps": 8680, "loss": 0.8086484670639038, "lr": 8.802779578336329e-07, "epoch": 1.122811059907834, "percentage": 56.14, "elapsed_time": "6:42:23", "remaining_time": "5:14:21"} +{"current_steps": 4874, "total_steps": 8680, "loss": 0.8954081535339355, "lr": 8.798997229478417e-07, "epoch": 1.123041474654378, "percentage": 56.15, "elapsed_time": "6:42:28", "remaining_time": "5:14:16"} +{"current_steps": 4875, "total_steps": 8680, "loss": 0.6615205407142639, "lr": 8.795215054944469e-07, "epoch": 1.1232718894009217, "percentage": 56.16, "elapsed_time": "6:42:33", "remaining_time": "5:14:11"} +{"current_steps": 4876, "total_steps": 8680, "loss": 0.6851116418838501, "lr": 8.79143305528346e-07, "epoch": 1.1235023041474654, "percentage": 56.18, "elapsed_time": "6:42:38", "remaining_time": "5:14:07"} +{"current_steps": 4877, "total_steps": 8680, "loss": 0.7594672441482544, "lr": 8.787651231044342e-07, "epoch": 1.1237327188940092, "percentage": 56.19, "elapsed_time": "6:42:44", "remaining_time": "5:14:02"} +{"current_steps": 4878, "total_steps": 8680, "loss": 0.7170572280883789, "lr": 8.783869582776044e-07, "epoch": 1.123963133640553, "percentage": 56.2, "elapsed_time": "6:42:49", "remaining_time": "5:13:57"} +{"current_steps": 4879, "total_steps": 8680, "loss": 0.9139137864112854, "lr": 8.780088111027467e-07, "epoch": 1.1241935483870968, "percentage": 56.21, "elapsed_time": "6:42:53", "remaining_time": "5:13:52"} +{"current_steps": 4880, "total_steps": 8680, "loss": 0.8716791868209839, "lr": 8.776306816347482e-07, "epoch": 1.1244239631336406, "percentage": 56.22, "elapsed_time": "6:42:59", "remaining_time": "5:13:47"} +{"current_steps": 4881, "total_steps": 8680, "loss": 0.840330958366394, "lr": 8.772525699284946e-07, "epoch": 1.1246543778801843, "percentage": 56.23, "elapsed_time": "6:43:03", "remaining_time": "5:13:42"} +{"current_steps": 4882, "total_steps": 8680, "loss": 0.7713445425033569, "lr": 8.768744760388681e-07, "epoch": 1.124884792626728, "percentage": 56.24, "elapsed_time": "6:43:08", "remaining_time": "5:13:37"} +{"current_steps": 4883, "total_steps": 8680, "loss": 0.8964767456054688, "lr": 8.764964000207479e-07, "epoch": 1.125115207373272, "percentage": 56.26, "elapsed_time": "6:43:13", "remaining_time": "5:13:32"} +{"current_steps": 4884, "total_steps": 8680, "loss": 0.8038421869277954, "lr": 8.761183419290121e-07, "epoch": 1.1253456221198157, "percentage": 56.27, "elapsed_time": "6:43:18", "remaining_time": "5:13:27"} +{"current_steps": 4885, "total_steps": 8680, "loss": 0.6601011753082275, "lr": 8.757403018185351e-07, "epoch": 1.1255760368663594, "percentage": 56.28, "elapsed_time": "6:43:25", "remaining_time": "5:13:24"} +{"current_steps": 4886, "total_steps": 8680, "loss": 0.8226664066314697, "lr": 8.753622797441885e-07, "epoch": 1.1258064516129032, "percentage": 56.29, "elapsed_time": "6:43:31", "remaining_time": "5:13:20"} +{"current_steps": 4887, "total_steps": 8680, "loss": 0.7062248587608337, "lr": 8.749842757608422e-07, "epoch": 1.1260368663594469, "percentage": 56.3, "elapsed_time": "6:43:36", "remaining_time": "5:13:15"} +{"current_steps": 4888, "total_steps": 8680, "loss": 0.8642051815986633, "lr": 8.746062899233628e-07, "epoch": 1.1262672811059908, "percentage": 56.31, "elapsed_time": "6:43:40", "remaining_time": "5:13:09"} +{"current_steps": 4889, "total_steps": 8680, "loss": 0.8194048404693604, "lr": 8.74228322286614e-07, "epoch": 1.1264976958525346, "percentage": 56.32, "elapsed_time": "6:43:45", "remaining_time": "5:13:05"} +{"current_steps": 4890, "total_steps": 8680, "loss": 0.6957820653915405, "lr": 8.738503729054583e-07, "epoch": 1.1267281105990783, "percentage": 56.34, "elapsed_time": "6:43:52", "remaining_time": "5:13:01"} +{"current_steps": 4891, "total_steps": 8680, "loss": 0.8107770681381226, "lr": 8.734724418347537e-07, "epoch": 1.1269585253456222, "percentage": 56.35, "elapsed_time": "6:43:58", "remaining_time": "5:12:57"} +{"current_steps": 4892, "total_steps": 8680, "loss": 0.7727551460266113, "lr": 8.730945291293563e-07, "epoch": 1.127188940092166, "percentage": 56.36, "elapsed_time": "6:44:03", "remaining_time": "5:12:52"} +{"current_steps": 4893, "total_steps": 8680, "loss": 0.7389936447143555, "lr": 8.727166348441207e-07, "epoch": 1.1274193548387097, "percentage": 56.37, "elapsed_time": "6:44:08", "remaining_time": "5:12:47"} +{"current_steps": 4894, "total_steps": 8680, "loss": 0.7666463851928711, "lr": 8.723387590338964e-07, "epoch": 1.1276497695852534, "percentage": 56.38, "elapsed_time": "6:44:14", "remaining_time": "5:12:43"} +{"current_steps": 4895, "total_steps": 8680, "loss": 0.7795453071594238, "lr": 8.719609017535328e-07, "epoch": 1.1278801843317972, "percentage": 56.39, "elapsed_time": "6:44:19", "remaining_time": "5:12:38"} +{"current_steps": 4896, "total_steps": 8680, "loss": 0.8560752272605896, "lr": 8.715830630578746e-07, "epoch": 1.128110599078341, "percentage": 56.41, "elapsed_time": "6:44:24", "remaining_time": "5:12:33"} +{"current_steps": 4897, "total_steps": 8680, "loss": 0.7574455738067627, "lr": 8.712052430017645e-07, "epoch": 1.1283410138248848, "percentage": 56.42, "elapsed_time": "6:44:28", "remaining_time": "5:12:27"} +{"current_steps": 4898, "total_steps": 8680, "loss": 0.8017276525497437, "lr": 8.708274416400432e-07, "epoch": 1.1285714285714286, "percentage": 56.43, "elapsed_time": "6:44:32", "remaining_time": "5:12:22"} +{"current_steps": 4899, "total_steps": 8680, "loss": 0.7046157121658325, "lr": 8.704496590275477e-07, "epoch": 1.1288018433179723, "percentage": 56.44, "elapsed_time": "6:44:38", "remaining_time": "5:12:17"} +{"current_steps": 4900, "total_steps": 8680, "loss": 0.7352035641670227, "lr": 8.700718952191124e-07, "epoch": 1.129032258064516, "percentage": 56.45, "elapsed_time": "6:44:43", "remaining_time": "5:12:12"} +{"current_steps": 4901, "total_steps": 8680, "loss": 0.6444690227508545, "lr": 8.696941502695698e-07, "epoch": 1.12926267281106, "percentage": 56.46, "elapsed_time": "6:44:49", "remaining_time": "5:12:08"} +{"current_steps": 4902, "total_steps": 8680, "loss": 0.7909440994262695, "lr": 8.69316424233749e-07, "epoch": 1.1294930875576037, "percentage": 56.47, "elapsed_time": "6:44:54", "remaining_time": "5:12:04"} +{"current_steps": 4903, "total_steps": 8680, "loss": 0.646790087223053, "lr": 8.689387171664756e-07, "epoch": 1.1297235023041474, "percentage": 56.49, "elapsed_time": "6:44:59", "remaining_time": "5:11:59"} +{"current_steps": 4904, "total_steps": 8680, "loss": 0.786831796169281, "lr": 8.685610291225744e-07, "epoch": 1.1299539170506911, "percentage": 56.5, "elapsed_time": "6:45:04", "remaining_time": "5:11:53"} +{"current_steps": 4905, "total_steps": 8680, "loss": 0.8004348278045654, "lr": 8.681833601568657e-07, "epoch": 1.130184331797235, "percentage": 56.51, "elapsed_time": "6:45:09", "remaining_time": "5:11:48"} +{"current_steps": 4906, "total_steps": 8680, "loss": 0.6846532821655273, "lr": 8.678057103241677e-07, "epoch": 1.1304147465437788, "percentage": 56.52, "elapsed_time": "6:45:13", "remaining_time": "5:11:43"} +{"current_steps": 4907, "total_steps": 8680, "loss": 0.7555707693099976, "lr": 8.67428079679296e-07, "epoch": 1.1306451612903226, "percentage": 56.53, "elapsed_time": "6:45:18", "remaining_time": "5:11:38"} +{"current_steps": 4908, "total_steps": 8680, "loss": 0.852725625038147, "lr": 8.67050468277063e-07, "epoch": 1.1308755760368663, "percentage": 56.54, "elapsed_time": "6:45:25", "remaining_time": "5:11:35"} +{"current_steps": 4909, "total_steps": 8680, "loss": 0.6990044713020325, "lr": 8.666728761722782e-07, "epoch": 1.1311059907834102, "percentage": 56.56, "elapsed_time": "6:45:31", "remaining_time": "5:11:31"} +{"current_steps": 4910, "total_steps": 8680, "loss": 0.8050999641418457, "lr": 8.662953034197493e-07, "epoch": 1.131336405529954, "percentage": 56.57, "elapsed_time": "6:45:36", "remaining_time": "5:11:26"} +{"current_steps": 4911, "total_steps": 8680, "loss": 0.8169291019439697, "lr": 8.659177500742802e-07, "epoch": 1.1315668202764977, "percentage": 56.58, "elapsed_time": "6:45:43", "remaining_time": "5:11:22"} +{"current_steps": 4912, "total_steps": 8680, "loss": 0.7814679145812988, "lr": 8.655402161906716e-07, "epoch": 1.1317972350230414, "percentage": 56.59, "elapsed_time": "6:45:49", "remaining_time": "5:11:18"} +{"current_steps": 4913, "total_steps": 8680, "loss": 0.6734834313392639, "lr": 8.651627018237231e-07, "epoch": 1.1320276497695851, "percentage": 56.6, "elapsed_time": "6:45:54", "remaining_time": "5:11:13"} +{"current_steps": 4914, "total_steps": 8680, "loss": 0.8765416145324707, "lr": 8.647852070282299e-07, "epoch": 1.132258064516129, "percentage": 56.61, "elapsed_time": "6:46:00", "remaining_time": "5:11:09"} +{"current_steps": 4915, "total_steps": 8680, "loss": 1.0023764371871948, "lr": 8.644077318589847e-07, "epoch": 1.1324884792626728, "percentage": 56.62, "elapsed_time": "6:46:04", "remaining_time": "5:11:03"} +{"current_steps": 4916, "total_steps": 8680, "loss": 0.7561393976211548, "lr": 8.64030276370778e-07, "epoch": 1.1327188940092165, "percentage": 56.64, "elapsed_time": "6:46:10", "remaining_time": "5:10:59"} +{"current_steps": 4917, "total_steps": 8680, "loss": 0.8252062797546387, "lr": 8.636528406183961e-07, "epoch": 1.1329493087557603, "percentage": 56.65, "elapsed_time": "6:46:15", "remaining_time": "5:10:54"} +{"current_steps": 4918, "total_steps": 8680, "loss": 0.7598097324371338, "lr": 8.632754246566246e-07, "epoch": 1.1331797235023042, "percentage": 56.66, "elapsed_time": "6:46:21", "remaining_time": "5:10:50"} +{"current_steps": 4919, "total_steps": 8680, "loss": 0.6113640069961548, "lr": 8.628980285402438e-07, "epoch": 1.133410138248848, "percentage": 56.67, "elapsed_time": "6:46:27", "remaining_time": "5:10:46"} +{"current_steps": 4920, "total_steps": 8680, "loss": 0.7457853555679321, "lr": 8.625206523240325e-07, "epoch": 1.1336405529953917, "percentage": 56.68, "elapsed_time": "6:46:31", "remaining_time": "5:10:40"} +{"current_steps": 4921, "total_steps": 8680, "loss": 0.7334161996841431, "lr": 8.62143296062767e-07, "epoch": 1.1338709677419354, "percentage": 56.69, "elapsed_time": "6:46:36", "remaining_time": "5:10:35"} +{"current_steps": 4922, "total_steps": 8680, "loss": 0.7446962594985962, "lr": 8.617659598112195e-07, "epoch": 1.1341013824884794, "percentage": 56.71, "elapsed_time": "6:46:41", "remaining_time": "5:10:30"} +{"current_steps": 4923, "total_steps": 8680, "loss": 0.7074497938156128, "lr": 8.613886436241594e-07, "epoch": 1.134331797235023, "percentage": 56.72, "elapsed_time": "6:46:47", "remaining_time": "5:10:26"} +{"current_steps": 4924, "total_steps": 8680, "loss": 0.6728851795196533, "lr": 8.610113475563547e-07, "epoch": 1.1345622119815668, "percentage": 56.73, "elapsed_time": "6:46:53", "remaining_time": "5:10:22"} +{"current_steps": 4925, "total_steps": 8680, "loss": 0.7732793092727661, "lr": 8.606340716625689e-07, "epoch": 1.1347926267281105, "percentage": 56.74, "elapsed_time": "6:46:58", "remaining_time": "5:10:17"} +{"current_steps": 4926, "total_steps": 8680, "loss": 0.7514671683311462, "lr": 8.60256815997563e-07, "epoch": 1.1350230414746543, "percentage": 56.75, "elapsed_time": "6:47:02", "remaining_time": "5:10:12"} +{"current_steps": 4927, "total_steps": 8680, "loss": 0.7824795842170715, "lr": 8.598795806160952e-07, "epoch": 1.1352534562211982, "percentage": 56.76, "elapsed_time": "6:47:07", "remaining_time": "5:10:06"} +{"current_steps": 4928, "total_steps": 8680, "loss": 0.789236307144165, "lr": 8.59502365572921e-07, "epoch": 1.135483870967742, "percentage": 56.77, "elapsed_time": "6:47:12", "remaining_time": "5:10:01"} +{"current_steps": 4929, "total_steps": 8680, "loss": 0.7005175948143005, "lr": 8.591251709227919e-07, "epoch": 1.1357142857142857, "percentage": 56.79, "elapsed_time": "6:47:15", "remaining_time": "5:09:55"} +{"current_steps": 4930, "total_steps": 8680, "loss": 0.7851300239562988, "lr": 8.587479967204582e-07, "epoch": 1.1359447004608294, "percentage": 56.8, "elapsed_time": "6:47:19", "remaining_time": "5:09:50"} +{"current_steps": 4931, "total_steps": 8680, "loss": 0.8901405334472656, "lr": 8.583708430206658e-07, "epoch": 1.1361751152073734, "percentage": 56.81, "elapsed_time": "6:47:24", "remaining_time": "5:09:45"} +{"current_steps": 4932, "total_steps": 8680, "loss": 0.8118528127670288, "lr": 8.579937098781576e-07, "epoch": 1.136405529953917, "percentage": 56.82, "elapsed_time": "6:47:30", "remaining_time": "5:09:41"} +{"current_steps": 4933, "total_steps": 8680, "loss": 0.6500028371810913, "lr": 8.57616597347675e-07, "epoch": 1.1366359447004608, "percentage": 56.83, "elapsed_time": "6:47:37", "remaining_time": "5:09:37"} +{"current_steps": 4934, "total_steps": 8680, "loss": 0.7752922773361206, "lr": 8.572395054839547e-07, "epoch": 1.1368663594470045, "percentage": 56.84, "elapsed_time": "6:47:41", "remaining_time": "5:09:31"} +{"current_steps": 4935, "total_steps": 8680, "loss": 0.7346245050430298, "lr": 8.568624343417309e-07, "epoch": 1.1370967741935485, "percentage": 56.85, "elapsed_time": "6:47:45", "remaining_time": "5:09:26"} +{"current_steps": 4936, "total_steps": 8680, "loss": 0.9249104261398315, "lr": 8.564853839757356e-07, "epoch": 1.1373271889400922, "percentage": 56.87, "elapsed_time": "6:47:49", "remaining_time": "5:09:20"} +{"current_steps": 4937, "total_steps": 8680, "loss": 0.7407078742980957, "lr": 8.561083544406965e-07, "epoch": 1.137557603686636, "percentage": 56.88, "elapsed_time": "6:47:54", "remaining_time": "5:09:15"} +{"current_steps": 4938, "total_steps": 8680, "loss": 0.7615865468978882, "lr": 8.557313457913393e-07, "epoch": 1.1377880184331797, "percentage": 56.89, "elapsed_time": "6:47:59", "remaining_time": "5:09:10"} +{"current_steps": 4939, "total_steps": 8680, "loss": 0.757561445236206, "lr": 8.553543580823866e-07, "epoch": 1.1380184331797234, "percentage": 56.9, "elapsed_time": "6:48:04", "remaining_time": "5:09:05"} +{"current_steps": 4940, "total_steps": 8680, "loss": 0.7130411863327026, "lr": 8.549773913685572e-07, "epoch": 1.1382488479262673, "percentage": 56.91, "elapsed_time": "6:48:09", "remaining_time": "5:09:00"} +{"current_steps": 4941, "total_steps": 8680, "loss": 0.7507551312446594, "lr": 8.54600445704567e-07, "epoch": 1.138479262672811, "percentage": 56.92, "elapsed_time": "6:48:15", "remaining_time": "5:08:56"} +{"current_steps": 4942, "total_steps": 8680, "loss": 0.896443247795105, "lr": 8.542235211451301e-07, "epoch": 1.1387096774193548, "percentage": 56.94, "elapsed_time": "6:48:19", "remaining_time": "5:08:50"} +{"current_steps": 4943, "total_steps": 8680, "loss": 0.7530815601348877, "lr": 8.538466177449557e-07, "epoch": 1.1389400921658985, "percentage": 56.95, "elapsed_time": "6:48:23", "remaining_time": "5:08:44"} +{"current_steps": 4944, "total_steps": 8680, "loss": 0.8730431795120239, "lr": 8.534697355587517e-07, "epoch": 1.1391705069124425, "percentage": 56.96, "elapsed_time": "6:48:27", "remaining_time": "5:08:39"} +{"current_steps": 4945, "total_steps": 8680, "loss": 0.6452720165252686, "lr": 8.530928746412216e-07, "epoch": 1.1394009216589862, "percentage": 56.97, "elapsed_time": "6:48:32", "remaining_time": "5:08:34"} +{"current_steps": 4946, "total_steps": 8680, "loss": 0.7679018974304199, "lr": 8.527160350470661e-07, "epoch": 1.13963133640553, "percentage": 56.98, "elapsed_time": "6:48:37", "remaining_time": "5:08:29"} +{"current_steps": 4947, "total_steps": 8680, "loss": 0.8186824321746826, "lr": 8.523392168309832e-07, "epoch": 1.1398617511520737, "percentage": 56.99, "elapsed_time": "6:48:42", "remaining_time": "5:08:24"} +{"current_steps": 4948, "total_steps": 8680, "loss": 0.666642427444458, "lr": 8.519624200476676e-07, "epoch": 1.1400921658986176, "percentage": 57.0, "elapsed_time": "6:48:47", "remaining_time": "5:08:19"} +{"current_steps": 4949, "total_steps": 8680, "loss": 0.7478682994842529, "lr": 8.515856447518104e-07, "epoch": 1.1403225806451613, "percentage": 57.02, "elapsed_time": "6:48:54", "remaining_time": "5:08:16"} +{"current_steps": 4950, "total_steps": 8680, "loss": 0.7527793645858765, "lr": 8.512088909981007e-07, "epoch": 1.140552995391705, "percentage": 57.03, "elapsed_time": "6:49:01", "remaining_time": "5:08:12"} +{"current_steps": 4951, "total_steps": 8680, "loss": 0.7614094018936157, "lr": 8.508321588412235e-07, "epoch": 1.1407834101382488, "percentage": 57.04, "elapsed_time": "6:49:05", "remaining_time": "5:08:07"} +{"current_steps": 4952, "total_steps": 8680, "loss": 0.8294994831085205, "lr": 8.504554483358605e-07, "epoch": 1.1410138248847925, "percentage": 57.05, "elapsed_time": "6:49:09", "remaining_time": "5:08:01"} +{"current_steps": 4953, "total_steps": 8680, "loss": 0.8900095224380493, "lr": 8.500787595366919e-07, "epoch": 1.1412442396313365, "percentage": 57.06, "elapsed_time": "6:49:14", "remaining_time": "5:07:56"} +{"current_steps": 4954, "total_steps": 8680, "loss": 0.8403744697570801, "lr": 8.497020924983926e-07, "epoch": 1.1414746543778802, "percentage": 57.07, "elapsed_time": "6:49:19", "remaining_time": "5:07:51"} +{"current_steps": 4955, "total_steps": 8680, "loss": 0.7046208381652832, "lr": 8.493254472756355e-07, "epoch": 1.141705069124424, "percentage": 57.09, "elapsed_time": "6:49:24", "remaining_time": "5:07:46"} +{"current_steps": 4956, "total_steps": 8680, "loss": 0.8226789832115173, "lr": 8.489488239230904e-07, "epoch": 1.1419354838709677, "percentage": 57.1, "elapsed_time": "6:49:30", "remaining_time": "5:07:42"} +{"current_steps": 4957, "total_steps": 8680, "loss": 0.7248969674110413, "lr": 8.485722224954236e-07, "epoch": 1.1421658986175116, "percentage": 57.11, "elapsed_time": "6:49:36", "remaining_time": "5:07:38"} +{"current_steps": 4958, "total_steps": 8680, "loss": 0.8116840124130249, "lr": 8.481956430472979e-07, "epoch": 1.1423963133640553, "percentage": 57.12, "elapsed_time": "6:49:41", "remaining_time": "5:07:33"} +{"current_steps": 4959, "total_steps": 8680, "loss": 0.7534138560295105, "lr": 8.478190856333739e-07, "epoch": 1.142626728110599, "percentage": 57.13, "elapsed_time": "6:49:47", "remaining_time": "5:07:29"} +{"current_steps": 4960, "total_steps": 8680, "loss": 0.8945306539535522, "lr": 8.474425503083082e-07, "epoch": 1.1428571428571428, "percentage": 57.14, "elapsed_time": "6:49:51", "remaining_time": "5:07:23"} +{"current_steps": 4961, "total_steps": 8680, "loss": 0.7554503083229065, "lr": 8.47066037126754e-07, "epoch": 1.1430875576036867, "percentage": 57.15, "elapsed_time": "6:49:58", "remaining_time": "5:07:20"} +{"current_steps": 4962, "total_steps": 8680, "loss": 0.832726776599884, "lr": 8.466895461433625e-07, "epoch": 1.1433179723502305, "percentage": 57.17, "elapsed_time": "6:50:03", "remaining_time": "5:07:15"} +{"current_steps": 4963, "total_steps": 8680, "loss": 0.8312773704528809, "lr": 8.463130774127804e-07, "epoch": 1.1435483870967742, "percentage": 57.18, "elapsed_time": "6:50:07", "remaining_time": "5:07:09"} +{"current_steps": 4964, "total_steps": 8680, "loss": 0.6484537124633789, "lr": 8.459366309896512e-07, "epoch": 1.143778801843318, "percentage": 57.19, "elapsed_time": "6:50:14", "remaining_time": "5:07:06"} +{"current_steps": 4965, "total_steps": 8680, "loss": 0.9216604828834534, "lr": 8.455602069286165e-07, "epoch": 1.1440092165898617, "percentage": 57.2, "elapsed_time": "6:50:20", "remaining_time": "5:07:01"} +{"current_steps": 4966, "total_steps": 8680, "loss": 0.6213096380233765, "lr": 8.451838052843131e-07, "epoch": 1.1442396313364056, "percentage": 57.21, "elapsed_time": "6:50:27", "remaining_time": "5:06:58"} +{"current_steps": 4967, "total_steps": 8680, "loss": 0.6873677968978882, "lr": 8.448074261113756e-07, "epoch": 1.1444700460829493, "percentage": 57.22, "elapsed_time": "6:50:31", "remaining_time": "5:06:53"} +{"current_steps": 4968, "total_steps": 8680, "loss": 0.7883448600769043, "lr": 8.444310694644348e-07, "epoch": 1.144700460829493, "percentage": 57.24, "elapsed_time": "6:50:36", "remaining_time": "5:06:48"} +{"current_steps": 4969, "total_steps": 8680, "loss": 0.724172830581665, "lr": 8.440547353981178e-07, "epoch": 1.1449308755760368, "percentage": 57.25, "elapsed_time": "6:50:41", "remaining_time": "5:06:42"} +{"current_steps": 4970, "total_steps": 8680, "loss": 0.7115252017974854, "lr": 8.4367842396705e-07, "epoch": 1.1451612903225807, "percentage": 57.26, "elapsed_time": "6:50:46", "remaining_time": "5:06:37"} +{"current_steps": 4971, "total_steps": 8680, "loss": 0.7165110111236572, "lr": 8.433021352258521e-07, "epoch": 1.1453917050691245, "percentage": 57.27, "elapsed_time": "6:50:52", "remaining_time": "5:06:33"} +{"current_steps": 4972, "total_steps": 8680, "loss": 0.7563315629959106, "lr": 8.429258692291413e-07, "epoch": 1.1456221198156682, "percentage": 57.28, "elapsed_time": "6:50:57", "remaining_time": "5:06:29"} +{"current_steps": 4973, "total_steps": 8680, "loss": 0.7528449892997742, "lr": 8.425496260315331e-07, "epoch": 1.145852534562212, "percentage": 57.29, "elapsed_time": "6:51:02", "remaining_time": "5:06:24"} +{"current_steps": 4974, "total_steps": 8680, "loss": 0.7976171970367432, "lr": 8.421734056876383e-07, "epoch": 1.1460829493087559, "percentage": 57.3, "elapsed_time": "6:51:08", "remaining_time": "5:06:19"} +{"current_steps": 4975, "total_steps": 8680, "loss": 0.7498095035552979, "lr": 8.417972082520644e-07, "epoch": 1.1463133640552996, "percentage": 57.32, "elapsed_time": "6:51:15", "remaining_time": "5:06:16"} +{"current_steps": 4976, "total_steps": 8680, "loss": 0.9568856954574585, "lr": 8.414210337794165e-07, "epoch": 1.1465437788018433, "percentage": 57.33, "elapsed_time": "6:51:19", "remaining_time": "5:06:10"} +{"current_steps": 4977, "total_steps": 8680, "loss": 0.6402908563613892, "lr": 8.410448823242957e-07, "epoch": 1.146774193548387, "percentage": 57.34, "elapsed_time": "6:51:24", "remaining_time": "5:06:06"} +{"current_steps": 4978, "total_steps": 8680, "loss": 0.8224657773971558, "lr": 8.406687539412995e-07, "epoch": 1.1470046082949308, "percentage": 57.35, "elapsed_time": "6:51:29", "remaining_time": "5:06:00"} +{"current_steps": 4979, "total_steps": 8680, "loss": 0.7804544568061829, "lr": 8.402926486850229e-07, "epoch": 1.1472350230414747, "percentage": 57.36, "elapsed_time": "6:51:33", "remaining_time": "5:05:55"} +{"current_steps": 4980, "total_steps": 8680, "loss": 0.7920527458190918, "lr": 8.39916566610057e-07, "epoch": 1.1474654377880185, "percentage": 57.37, "elapsed_time": "6:51:37", "remaining_time": "5:05:49"} +{"current_steps": 4981, "total_steps": 8680, "loss": 0.7672078609466553, "lr": 8.395405077709891e-07, "epoch": 1.1476958525345622, "percentage": 57.38, "elapsed_time": "6:51:42", "remaining_time": "5:05:44"} +{"current_steps": 4982, "total_steps": 8680, "loss": 0.6997950077056885, "lr": 8.391644722224047e-07, "epoch": 1.147926267281106, "percentage": 57.4, "elapsed_time": "6:51:48", "remaining_time": "5:05:40"} +{"current_steps": 4983, "total_steps": 8680, "loss": 0.7754349708557129, "lr": 8.38788460018884e-07, "epoch": 1.1481566820276499, "percentage": 57.41, "elapsed_time": "6:51:53", "remaining_time": "5:05:35"} +{"current_steps": 4984, "total_steps": 8680, "loss": 0.706238329410553, "lr": 8.384124712150046e-07, "epoch": 1.1483870967741936, "percentage": 57.42, "elapsed_time": "6:51:57", "remaining_time": "5:05:29"} +{"current_steps": 4985, "total_steps": 8680, "loss": 0.7115224599838257, "lr": 8.380365058653415e-07, "epoch": 1.1486175115207373, "percentage": 57.43, "elapsed_time": "6:52:04", "remaining_time": "5:05:26"} +{"current_steps": 4986, "total_steps": 8680, "loss": 0.9026098847389221, "lr": 8.376605640244652e-07, "epoch": 1.148847926267281, "percentage": 57.44, "elapsed_time": "6:52:10", "remaining_time": "5:05:22"} +{"current_steps": 4987, "total_steps": 8680, "loss": 0.9123632311820984, "lr": 8.372846457469428e-07, "epoch": 1.149078341013825, "percentage": 57.45, "elapsed_time": "6:52:15", "remaining_time": "5:05:17"} +{"current_steps": 4988, "total_steps": 8680, "loss": 0.8365681171417236, "lr": 8.369087510873389e-07, "epoch": 1.1493087557603687, "percentage": 57.47, "elapsed_time": "6:52:19", "remaining_time": "5:05:11"} +{"current_steps": 4989, "total_steps": 8680, "loss": 0.7506389617919922, "lr": 8.36532880100214e-07, "epoch": 1.1495391705069125, "percentage": 57.48, "elapsed_time": "6:52:23", "remaining_time": "5:05:05"} +{"current_steps": 4990, "total_steps": 8680, "loss": 0.7736936807632446, "lr": 8.361570328401246e-07, "epoch": 1.1497695852534562, "percentage": 57.49, "elapsed_time": "6:52:28", "remaining_time": "5:05:00"} +{"current_steps": 4991, "total_steps": 8680, "loss": 0.7364238500595093, "lr": 8.357812093616254e-07, "epoch": 1.15, "percentage": 57.5, "elapsed_time": "6:52:33", "remaining_time": "5:04:56"} +{"current_steps": 4992, "total_steps": 8680, "loss": 0.8588067293167114, "lr": 8.354054097192659e-07, "epoch": 1.1502304147465439, "percentage": 57.51, "elapsed_time": "6:52:37", "remaining_time": "5:04:50"} +{"current_steps": 4993, "total_steps": 8680, "loss": 0.777319073677063, "lr": 8.350296339675938e-07, "epoch": 1.1504608294930876, "percentage": 57.52, "elapsed_time": "6:52:43", "remaining_time": "5:04:45"} +{"current_steps": 4994, "total_steps": 8680, "loss": 0.6695454716682434, "lr": 8.346538821611517e-07, "epoch": 1.1506912442396313, "percentage": 57.53, "elapsed_time": "6:52:48", "remaining_time": "5:04:41"} +{"current_steps": 4995, "total_steps": 8680, "loss": 0.7785383462905884, "lr": 8.342781543544796e-07, "epoch": 1.150921658986175, "percentage": 57.55, "elapsed_time": "6:52:54", "remaining_time": "5:04:37"} +{"current_steps": 4996, "total_steps": 8680, "loss": 0.7386239767074585, "lr": 8.339024506021143e-07, "epoch": 1.1511520737327188, "percentage": 57.56, "elapsed_time": "6:52:59", "remaining_time": "5:04:31"} +{"current_steps": 4997, "total_steps": 8680, "loss": 0.8044750690460205, "lr": 8.335267709585884e-07, "epoch": 1.1513824884792627, "percentage": 57.57, "elapsed_time": "6:53:04", "remaining_time": "5:04:26"} +{"current_steps": 4998, "total_steps": 8680, "loss": 0.6925652623176575, "lr": 8.331511154784307e-07, "epoch": 1.1516129032258065, "percentage": 57.58, "elapsed_time": "6:53:09", "remaining_time": "5:04:22"} +{"current_steps": 4999, "total_steps": 8680, "loss": 0.7906935214996338, "lr": 8.327754842161684e-07, "epoch": 1.1518433179723502, "percentage": 57.59, "elapsed_time": "6:53:15", "remaining_time": "5:04:18"} +{"current_steps": 5000, "total_steps": 8680, "loss": 0.7131960988044739, "lr": 8.323998772263231e-07, "epoch": 1.1520737327188941, "percentage": 57.6, "elapsed_time": "6:53:21", "remaining_time": "5:04:13"} +{"current_steps": 5001, "total_steps": 8680, "loss": 0.8412370085716248, "lr": 8.320242945634132e-07, "epoch": 1.1523041474654379, "percentage": 57.62, "elapsed_time": "6:53:28", "remaining_time": "5:04:10"} +{"current_steps": 5002, "total_steps": 8680, "loss": 0.7800952792167664, "lr": 8.316487362819551e-07, "epoch": 1.1525345622119816, "percentage": 57.63, "elapsed_time": "6:53:34", "remaining_time": "5:04:06"} +{"current_steps": 5003, "total_steps": 8680, "loss": 0.8620247840881348, "lr": 8.312732024364602e-07, "epoch": 1.1527649769585253, "percentage": 57.64, "elapsed_time": "6:53:38", "remaining_time": "5:04:00"} +{"current_steps": 5004, "total_steps": 8680, "loss": 0.7551721334457397, "lr": 8.30897693081436e-07, "epoch": 1.152995391705069, "percentage": 57.65, "elapsed_time": "6:53:45", "remaining_time": "5:03:56"} +{"current_steps": 5005, "total_steps": 8680, "loss": 0.8510593175888062, "lr": 8.305222082713882e-07, "epoch": 1.153225806451613, "percentage": 57.66, "elapsed_time": "6:53:50", "remaining_time": "5:03:52"} +{"current_steps": 5006, "total_steps": 8680, "loss": 0.6503845453262329, "lr": 8.301467480608176e-07, "epoch": 1.1534562211981567, "percentage": 57.67, "elapsed_time": "6:53:56", "remaining_time": "5:03:47"} +{"current_steps": 5007, "total_steps": 8680, "loss": 0.7729237079620361, "lr": 8.297713125042212e-07, "epoch": 1.1536866359447004, "percentage": 57.68, "elapsed_time": "6:54:02", "remaining_time": "5:03:44"} +{"current_steps": 5008, "total_steps": 8680, "loss": 0.77802574634552, "lr": 8.293959016560939e-07, "epoch": 1.1539170506912442, "percentage": 57.7, "elapsed_time": "6:54:08", "remaining_time": "5:03:39"} +{"current_steps": 5009, "total_steps": 8680, "loss": 0.7977825999259949, "lr": 8.290205155709256e-07, "epoch": 1.154147465437788, "percentage": 57.71, "elapsed_time": "6:54:14", "remaining_time": "5:03:35"} +{"current_steps": 5010, "total_steps": 8680, "loss": 0.7479745149612427, "lr": 8.286451543032027e-07, "epoch": 1.1543778801843319, "percentage": 57.72, "elapsed_time": "6:54:19", "remaining_time": "5:03:30"} +{"current_steps": 5011, "total_steps": 8680, "loss": 0.7631532549858093, "lr": 8.282698179074092e-07, "epoch": 1.1546082949308756, "percentage": 57.73, "elapsed_time": "6:54:25", "remaining_time": "5:03:26"} +{"current_steps": 5012, "total_steps": 8680, "loss": 0.7437061071395874, "lr": 8.278945064380243e-07, "epoch": 1.1548387096774193, "percentage": 57.74, "elapsed_time": "6:54:31", "remaining_time": "5:03:22"} +{"current_steps": 5013, "total_steps": 8680, "loss": 0.9334282875061035, "lr": 8.275192199495236e-07, "epoch": 1.1550691244239633, "percentage": 57.75, "elapsed_time": "6:54:37", "remaining_time": "5:03:17"} +{"current_steps": 5014, "total_steps": 8680, "loss": 0.7119227647781372, "lr": 8.2714395849638e-07, "epoch": 1.155299539170507, "percentage": 57.76, "elapsed_time": "6:54:41", "remaining_time": "5:03:12"} +{"current_steps": 5015, "total_steps": 8680, "loss": 0.8335816860198975, "lr": 8.267687221330619e-07, "epoch": 1.1555299539170507, "percentage": 57.78, "elapsed_time": "6:54:47", "remaining_time": "5:03:07"} +{"current_steps": 5016, "total_steps": 8680, "loss": 0.6130940914154053, "lr": 8.263935109140347e-07, "epoch": 1.1557603686635944, "percentage": 57.79, "elapsed_time": "6:54:52", "remaining_time": "5:03:03"} +{"current_steps": 5017, "total_steps": 8680, "loss": 0.8223903179168701, "lr": 8.260183248937595e-07, "epoch": 1.1559907834101382, "percentage": 57.8, "elapsed_time": "6:54:57", "remaining_time": "5:02:58"} +{"current_steps": 5018, "total_steps": 8680, "loss": 0.8024790287017822, "lr": 8.256431641266938e-07, "epoch": 1.1562211981566821, "percentage": 57.81, "elapsed_time": "6:55:02", "remaining_time": "5:02:53"} +{"current_steps": 5019, "total_steps": 8680, "loss": 0.7425345182418823, "lr": 8.252680286672924e-07, "epoch": 1.1564516129032258, "percentage": 57.82, "elapsed_time": "6:55:08", "remaining_time": "5:02:48"} +{"current_steps": 5020, "total_steps": 8680, "loss": 0.7729727029800415, "lr": 8.248929185700053e-07, "epoch": 1.1566820276497696, "percentage": 57.83, "elapsed_time": "6:55:15", "remaining_time": "5:02:45"} +{"current_steps": 5021, "total_steps": 8680, "loss": 0.8451874256134033, "lr": 8.245178338892788e-07, "epoch": 1.1569124423963133, "percentage": 57.85, "elapsed_time": "6:55:19", "remaining_time": "5:02:40"} +{"current_steps": 5022, "total_steps": 8680, "loss": 0.8666542768478394, "lr": 8.241427746795569e-07, "epoch": 1.157142857142857, "percentage": 57.86, "elapsed_time": "6:55:24", "remaining_time": "5:02:35"} +{"current_steps": 5023, "total_steps": 8680, "loss": 0.740352988243103, "lr": 8.237677409952784e-07, "epoch": 1.157373271889401, "percentage": 57.87, "elapsed_time": "6:55:29", "remaining_time": "5:02:29"} +{"current_steps": 5024, "total_steps": 8680, "loss": 0.6325985193252563, "lr": 8.233927328908788e-07, "epoch": 1.1576036866359447, "percentage": 57.88, "elapsed_time": "6:55:33", "remaining_time": "5:02:24"} +{"current_steps": 5025, "total_steps": 8680, "loss": 0.8075892925262451, "lr": 8.230177504207901e-07, "epoch": 1.1578341013824884, "percentage": 57.89, "elapsed_time": "6:55:37", "remaining_time": "5:02:18"} +{"current_steps": 5026, "total_steps": 8680, "loss": 0.7176432609558105, "lr": 8.22642793639441e-07, "epoch": 1.1580645161290322, "percentage": 57.9, "elapsed_time": "6:55:43", "remaining_time": "5:02:14"} +{"current_steps": 5027, "total_steps": 8680, "loss": 0.7734829187393188, "lr": 8.222678626012554e-07, "epoch": 1.1582949308755761, "percentage": 57.91, "elapsed_time": "6:55:46", "remaining_time": "5:02:08"} +{"current_steps": 5028, "total_steps": 8680, "loss": 0.8642655611038208, "lr": 8.218929573606544e-07, "epoch": 1.1585253456221198, "percentage": 57.93, "elapsed_time": "6:55:51", "remaining_time": "5:02:03"} +{"current_steps": 5029, "total_steps": 8680, "loss": 0.7788450121879578, "lr": 8.215180779720548e-07, "epoch": 1.1587557603686636, "percentage": 57.94, "elapsed_time": "6:55:57", "remaining_time": "5:01:59"} +{"current_steps": 5030, "total_steps": 8680, "loss": 0.7470313310623169, "lr": 8.211432244898696e-07, "epoch": 1.1589861751152073, "percentage": 57.95, "elapsed_time": "6:56:05", "remaining_time": "5:01:56"} +{"current_steps": 5031, "total_steps": 8680, "loss": 0.7691675424575806, "lr": 8.207683969685091e-07, "epoch": 1.1592165898617512, "percentage": 57.96, "elapsed_time": "6:56:11", "remaining_time": "5:01:51"} +{"current_steps": 5032, "total_steps": 8680, "loss": 0.7060209512710571, "lr": 8.203935954623783e-07, "epoch": 1.159447004608295, "percentage": 57.97, "elapsed_time": "6:56:15", "remaining_time": "5:01:46"} +{"current_steps": 5033, "total_steps": 8680, "loss": 0.7617488503456116, "lr": 8.20018820025879e-07, "epoch": 1.1596774193548387, "percentage": 57.98, "elapsed_time": "6:56:21", "remaining_time": "5:01:41"} +{"current_steps": 5034, "total_steps": 8680, "loss": 0.7016350626945496, "lr": 8.196440707134102e-07, "epoch": 1.1599078341013824, "percentage": 58.0, "elapsed_time": "6:56:26", "remaining_time": "5:01:37"} +{"current_steps": 5035, "total_steps": 8680, "loss": 0.8375445604324341, "lr": 8.192693475793657e-07, "epoch": 1.1601382488479262, "percentage": 58.01, "elapsed_time": "6:56:30", "remaining_time": "5:01:31"} +{"current_steps": 5036, "total_steps": 8680, "loss": 0.8903663158416748, "lr": 8.188946506781359e-07, "epoch": 1.16036866359447, "percentage": 58.02, "elapsed_time": "6:56:36", "remaining_time": "5:01:27"} +{"current_steps": 5037, "total_steps": 8680, "loss": 0.7613073587417603, "lr": 8.18519980064108e-07, "epoch": 1.1605990783410138, "percentage": 58.03, "elapsed_time": "6:56:42", "remaining_time": "5:01:22"} +{"current_steps": 5038, "total_steps": 8680, "loss": 0.7443521022796631, "lr": 8.181453357916649e-07, "epoch": 1.1608294930875576, "percentage": 58.04, "elapsed_time": "6:56:46", "remaining_time": "5:01:17"} +{"current_steps": 5039, "total_steps": 8680, "loss": 0.7986443042755127, "lr": 8.17770717915185e-07, "epoch": 1.1610599078341013, "percentage": 58.05, "elapsed_time": "6:56:52", "remaining_time": "5:01:13"} +{"current_steps": 5040, "total_steps": 8680, "loss": 0.7128815650939941, "lr": 8.173961264890447e-07, "epoch": 1.1612903225806452, "percentage": 58.06, "elapsed_time": "6:56:57", "remaining_time": "5:01:08"} +{"current_steps": 5041, "total_steps": 8680, "loss": 0.7189117074012756, "lr": 8.170215615676144e-07, "epoch": 1.161520737327189, "percentage": 58.08, "elapsed_time": "6:57:01", "remaining_time": "5:01:02"} +{"current_steps": 5042, "total_steps": 8680, "loss": 0.8358731269836426, "lr": 8.166470232052626e-07, "epoch": 1.1617511520737327, "percentage": 58.09, "elapsed_time": "6:57:05", "remaining_time": "5:00:57"} +{"current_steps": 5043, "total_steps": 8680, "loss": 0.7734829187393188, "lr": 8.162725114563527e-07, "epoch": 1.1619815668202764, "percentage": 58.1, "elapsed_time": "6:57:11", "remaining_time": "5:00:52"} +{"current_steps": 5044, "total_steps": 8680, "loss": 0.842268705368042, "lr": 8.158980263752443e-07, "epoch": 1.1622119815668204, "percentage": 58.11, "elapsed_time": "6:57:17", "remaining_time": "5:00:48"} +{"current_steps": 5045, "total_steps": 8680, "loss": 0.7973036766052246, "lr": 8.155235680162937e-07, "epoch": 1.162442396313364, "percentage": 58.12, "elapsed_time": "6:57:21", "remaining_time": "5:00:42"} +{"current_steps": 5046, "total_steps": 8680, "loss": 0.743615984916687, "lr": 8.151491364338532e-07, "epoch": 1.1626728110599078, "percentage": 58.13, "elapsed_time": "6:57:26", "remaining_time": "5:00:37"} +{"current_steps": 5047, "total_steps": 8680, "loss": 0.799458384513855, "lr": 8.147747316822705e-07, "epoch": 1.1629032258064516, "percentage": 58.15, "elapsed_time": "6:57:31", "remaining_time": "5:00:32"} +{"current_steps": 5048, "total_steps": 8680, "loss": 0.8368128538131714, "lr": 8.144003538158907e-07, "epoch": 1.1631336405529953, "percentage": 58.16, "elapsed_time": "6:57:35", "remaining_time": "5:00:27"} +{"current_steps": 5049, "total_steps": 8680, "loss": 0.8543322086334229, "lr": 8.140260028890537e-07, "epoch": 1.1633640552995392, "percentage": 58.17, "elapsed_time": "6:57:40", "remaining_time": "5:00:22"} +{"current_steps": 5050, "total_steps": 8680, "loss": 0.9586522579193115, "lr": 8.136516789560957e-07, "epoch": 1.163594470046083, "percentage": 58.18, "elapsed_time": "6:57:44", "remaining_time": "5:00:16"} +{"current_steps": 5051, "total_steps": 8680, "loss": 0.7781316041946411, "lr": 8.132773820713505e-07, "epoch": 1.1638248847926267, "percentage": 58.19, "elapsed_time": "6:57:48", "remaining_time": "5:00:10"} +{"current_steps": 5052, "total_steps": 8680, "loss": 0.7726340293884277, "lr": 8.129031122891459e-07, "epoch": 1.1640552995391704, "percentage": 58.2, "elapsed_time": "6:57:53", "remaining_time": "5:00:05"} +{"current_steps": 5053, "total_steps": 8680, "loss": 0.886093258857727, "lr": 8.125288696638064e-07, "epoch": 1.1642857142857144, "percentage": 58.21, "elapsed_time": "6:57:56", "remaining_time": "4:59:59"} +{"current_steps": 5054, "total_steps": 8680, "loss": 0.7896960973739624, "lr": 8.121546542496538e-07, "epoch": 1.164516129032258, "percentage": 58.23, "elapsed_time": "6:58:00", "remaining_time": "4:59:54"} +{"current_steps": 5055, "total_steps": 8680, "loss": 0.8272452354431152, "lr": 8.117804661010045e-07, "epoch": 1.1647465437788018, "percentage": 58.24, "elapsed_time": "6:58:04", "remaining_time": "4:59:48"} +{"current_steps": 5056, "total_steps": 8680, "loss": 0.8452264070510864, "lr": 8.11406305272171e-07, "epoch": 1.1649769585253456, "percentage": 58.25, "elapsed_time": "6:58:10", "remaining_time": "4:59:44"} +{"current_steps": 5057, "total_steps": 8680, "loss": 0.7973369359970093, "lr": 8.11032171817463e-07, "epoch": 1.1652073732718895, "percentage": 58.26, "elapsed_time": "6:58:15", "remaining_time": "4:59:39"} +{"current_steps": 5058, "total_steps": 8680, "loss": 0.8045153617858887, "lr": 8.10658065791185e-07, "epoch": 1.1654377880184332, "percentage": 58.27, "elapsed_time": "6:58:20", "remaining_time": "4:59:34"} +{"current_steps": 5059, "total_steps": 8680, "loss": 0.8921254873275757, "lr": 8.102839872476378e-07, "epoch": 1.165668202764977, "percentage": 58.28, "elapsed_time": "6:58:26", "remaining_time": "4:59:29"} +{"current_steps": 5060, "total_steps": 8680, "loss": 0.7633669376373291, "lr": 8.099099362411191e-07, "epoch": 1.1658986175115207, "percentage": 58.29, "elapsed_time": "6:58:31", "remaining_time": "4:59:25"} +{"current_steps": 5061, "total_steps": 8680, "loss": 0.9303205013275146, "lr": 8.095359128259214e-07, "epoch": 1.1661290322580644, "percentage": 58.31, "elapsed_time": "6:58:36", "remaining_time": "4:59:20"} +{"current_steps": 5062, "total_steps": 8680, "loss": 0.867104709148407, "lr": 8.091619170563335e-07, "epoch": 1.1663594470046084, "percentage": 58.32, "elapsed_time": "6:58:41", "remaining_time": "4:59:15"} +{"current_steps": 5063, "total_steps": 8680, "loss": 0.8136844038963318, "lr": 8.087879489866409e-07, "epoch": 1.166589861751152, "percentage": 58.33, "elapsed_time": "6:58:45", "remaining_time": "4:59:09"} +{"current_steps": 5064, "total_steps": 8680, "loss": 0.9016939997673035, "lr": 8.084140086711246e-07, "epoch": 1.1668202764976958, "percentage": 58.34, "elapsed_time": "6:58:49", "remaining_time": "4:59:03"} +{"current_steps": 5065, "total_steps": 8680, "loss": 0.8621236085891724, "lr": 8.080400961640608e-07, "epoch": 1.1670506912442395, "percentage": 58.35, "elapsed_time": "6:58:53", "remaining_time": "4:58:58"} +{"current_steps": 5066, "total_steps": 8680, "loss": 0.856648862361908, "lr": 8.076662115197234e-07, "epoch": 1.1672811059907835, "percentage": 58.36, "elapsed_time": "6:58:57", "remaining_time": "4:58:52"} +{"current_steps": 5067, "total_steps": 8680, "loss": 0.7752784490585327, "lr": 8.072923547923805e-07, "epoch": 1.1675115207373272, "percentage": 58.38, "elapsed_time": "6:59:01", "remaining_time": "4:58:47"} +{"current_steps": 5068, "total_steps": 8680, "loss": 0.8573904037475586, "lr": 8.069185260362974e-07, "epoch": 1.167741935483871, "percentage": 58.39, "elapsed_time": "6:59:05", "remaining_time": "4:58:41"} +{"current_steps": 5069, "total_steps": 8680, "loss": 0.724372148513794, "lr": 8.065447253057347e-07, "epoch": 1.1679723502304147, "percentage": 58.4, "elapsed_time": "6:59:10", "remaining_time": "4:58:36"} +{"current_steps": 5070, "total_steps": 8680, "loss": 0.7428436875343323, "lr": 8.061709526549486e-07, "epoch": 1.1682027649769586, "percentage": 58.41, "elapsed_time": "6:59:14", "remaining_time": "4:58:30"} +{"current_steps": 5071, "total_steps": 8680, "loss": 0.8888595104217529, "lr": 8.057972081381925e-07, "epoch": 1.1684331797235024, "percentage": 58.42, "elapsed_time": "6:59:18", "remaining_time": "4:58:25"} +{"current_steps": 5072, "total_steps": 8680, "loss": 0.5753290057182312, "lr": 8.054234918097146e-07, "epoch": 1.168663594470046, "percentage": 58.43, "elapsed_time": "6:59:25", "remaining_time": "4:58:21"} +{"current_steps": 5073, "total_steps": 8680, "loss": 0.6724086999893188, "lr": 8.050498037237589e-07, "epoch": 1.1688940092165898, "percentage": 58.44, "elapsed_time": "6:59:30", "remaining_time": "4:58:16"} +{"current_steps": 5074, "total_steps": 8680, "loss": 0.7410751581192017, "lr": 8.046761439345664e-07, "epoch": 1.1691244239631335, "percentage": 58.46, "elapsed_time": "6:59:34", "remaining_time": "4:58:11"} +{"current_steps": 5075, "total_steps": 8680, "loss": 0.8522979021072388, "lr": 8.043025124963731e-07, "epoch": 1.1693548387096775, "percentage": 58.47, "elapsed_time": "6:59:39", "remaining_time": "4:58:06"} +{"current_steps": 5076, "total_steps": 8680, "loss": 0.6243441700935364, "lr": 8.039289094634109e-07, "epoch": 1.1695852534562212, "percentage": 58.48, "elapsed_time": "6:59:45", "remaining_time": "4:58:02"} +{"current_steps": 5077, "total_steps": 8680, "loss": 0.9332150220870972, "lr": 8.03555334889908e-07, "epoch": 1.169815668202765, "percentage": 58.49, "elapsed_time": "6:59:51", "remaining_time": "4:57:57"} +{"current_steps": 5078, "total_steps": 8680, "loss": 0.7620645761489868, "lr": 8.031817888300883e-07, "epoch": 1.1700460829493087, "percentage": 58.5, "elapsed_time": "6:59:54", "remaining_time": "4:57:51"} +{"current_steps": 5079, "total_steps": 8680, "loss": 0.6983245015144348, "lr": 8.028082713381708e-07, "epoch": 1.1702764976958526, "percentage": 58.51, "elapsed_time": "6:59:59", "remaining_time": "4:57:46"} +{"current_steps": 5080, "total_steps": 8680, "loss": 0.6220129728317261, "lr": 8.024347824683723e-07, "epoch": 1.1705069124423964, "percentage": 58.53, "elapsed_time": "7:00:04", "remaining_time": "4:57:41"} +{"current_steps": 5081, "total_steps": 8680, "loss": 0.7363810539245605, "lr": 8.020613222749034e-07, "epoch": 1.17073732718894, "percentage": 58.54, "elapsed_time": "7:00:09", "remaining_time": "4:57:36"} +{"current_steps": 5082, "total_steps": 8680, "loss": 0.6864198446273804, "lr": 8.016878908119713e-07, "epoch": 1.1709677419354838, "percentage": 58.55, "elapsed_time": "7:00:15", "remaining_time": "4:57:32"} +{"current_steps": 5083, "total_steps": 8680, "loss": 0.758607029914856, "lr": 8.013144881337795e-07, "epoch": 1.1711981566820278, "percentage": 58.56, "elapsed_time": "7:00:22", "remaining_time": "4:57:28"} +{"current_steps": 5084, "total_steps": 8680, "loss": 0.7519336938858032, "lr": 8.009411142945269e-07, "epoch": 1.1714285714285715, "percentage": 58.57, "elapsed_time": "7:00:27", "remaining_time": "4:57:23"} +{"current_steps": 5085, "total_steps": 8680, "loss": 0.7681798934936523, "lr": 8.005677693484076e-07, "epoch": 1.1716589861751152, "percentage": 58.58, "elapsed_time": "7:00:32", "remaining_time": "4:57:18"} +{"current_steps": 5086, "total_steps": 8680, "loss": 0.6808522939682007, "lr": 8.00194453349613e-07, "epoch": 1.171889400921659, "percentage": 58.59, "elapsed_time": "7:00:36", "remaining_time": "4:57:13"} +{"current_steps": 5087, "total_steps": 8680, "loss": 0.7373358607292175, "lr": 7.99821166352329e-07, "epoch": 1.1721198156682027, "percentage": 58.61, "elapsed_time": "7:00:42", "remaining_time": "4:57:09"} +{"current_steps": 5088, "total_steps": 8680, "loss": 0.7272510528564453, "lr": 7.994479084107374e-07, "epoch": 1.1723502304147466, "percentage": 58.62, "elapsed_time": "7:00:47", "remaining_time": "4:57:04"} +{"current_steps": 5089, "total_steps": 8680, "loss": 0.845584511756897, "lr": 7.990746795790166e-07, "epoch": 1.1725806451612903, "percentage": 58.63, "elapsed_time": "7:00:52", "remaining_time": "4:56:59"} +{"current_steps": 5090, "total_steps": 8680, "loss": 0.7751157283782959, "lr": 7.987014799113397e-07, "epoch": 1.172811059907834, "percentage": 58.64, "elapsed_time": "7:00:58", "remaining_time": "4:56:54"} +{"current_steps": 5091, "total_steps": 8680, "loss": 0.679701566696167, "lr": 7.98328309461877e-07, "epoch": 1.1730414746543778, "percentage": 58.65, "elapsed_time": "7:01:02", "remaining_time": "4:56:49"} +{"current_steps": 5092, "total_steps": 8680, "loss": 0.7630679607391357, "lr": 7.979551682847932e-07, "epoch": 1.1732718894009218, "percentage": 58.66, "elapsed_time": "7:01:07", "remaining_time": "4:56:44"} +{"current_steps": 5093, "total_steps": 8680, "loss": 0.700912594795227, "lr": 7.975820564342487e-07, "epoch": 1.1735023041474655, "percentage": 58.68, "elapsed_time": "7:01:13", "remaining_time": "4:56:39"} +{"current_steps": 5094, "total_steps": 8680, "loss": 0.6789706945419312, "lr": 7.972089739644012e-07, "epoch": 1.1737327188940092, "percentage": 58.69, "elapsed_time": "7:01:18", "remaining_time": "4:56:35"} +{"current_steps": 5095, "total_steps": 8680, "loss": 0.6744855642318726, "lr": 7.968359209294027e-07, "epoch": 1.173963133640553, "percentage": 58.7, "elapsed_time": "7:01:25", "remaining_time": "4:56:31"} +{"current_steps": 5096, "total_steps": 8680, "loss": 0.7551798820495605, "lr": 7.964628973834011e-07, "epoch": 1.1741935483870969, "percentage": 58.71, "elapsed_time": "7:01:29", "remaining_time": "4:56:26"} +{"current_steps": 5097, "total_steps": 8680, "loss": 0.711478054523468, "lr": 7.960899033805407e-07, "epoch": 1.1744239631336406, "percentage": 58.72, "elapsed_time": "7:01:35", "remaining_time": "4:56:21"} +{"current_steps": 5098, "total_steps": 8680, "loss": 0.7464019060134888, "lr": 7.95716938974961e-07, "epoch": 1.1746543778801843, "percentage": 58.73, "elapsed_time": "7:01:40", "remaining_time": "4:56:17"} +{"current_steps": 5099, "total_steps": 8680, "loss": 0.7667930126190186, "lr": 7.953440042207966e-07, "epoch": 1.174884792626728, "percentage": 58.74, "elapsed_time": "7:01:46", "remaining_time": "4:56:12"} +{"current_steps": 5100, "total_steps": 8680, "loss": 0.7574796676635742, "lr": 7.949710991721796e-07, "epoch": 1.1751152073732718, "percentage": 58.76, "elapsed_time": "7:01:50", "remaining_time": "4:56:07"} +{"current_steps": 5101, "total_steps": 8680, "loss": 0.6627304553985596, "lr": 7.945982238832361e-07, "epoch": 1.1753456221198157, "percentage": 58.77, "elapsed_time": "7:01:57", "remaining_time": "4:56:03"} +{"current_steps": 5102, "total_steps": 8680, "loss": 0.6803916692733765, "lr": 7.942253784080879e-07, "epoch": 1.1755760368663595, "percentage": 58.78, "elapsed_time": "7:02:02", "remaining_time": "4:55:58"} +{"current_steps": 5103, "total_steps": 8680, "loss": 0.7107337713241577, "lr": 7.938525628008541e-07, "epoch": 1.1758064516129032, "percentage": 58.79, "elapsed_time": "7:02:07", "remaining_time": "4:55:53"} +{"current_steps": 5104, "total_steps": 8680, "loss": 0.7669517993927002, "lr": 7.934797771156481e-07, "epoch": 1.176036866359447, "percentage": 58.8, "elapsed_time": "7:02:12", "remaining_time": "4:55:48"} +{"current_steps": 5105, "total_steps": 8680, "loss": 0.7431854605674744, "lr": 7.931070214065787e-07, "epoch": 1.1762672811059907, "percentage": 58.81, "elapsed_time": "7:02:17", "remaining_time": "4:55:43"} +{"current_steps": 5106, "total_steps": 8680, "loss": 0.7778047323226929, "lr": 7.927342957277512e-07, "epoch": 1.1764976958525346, "percentage": 58.82, "elapsed_time": "7:02:20", "remaining_time": "4:55:37"} +{"current_steps": 5107, "total_steps": 8680, "loss": 0.7759886980056763, "lr": 7.923616001332666e-07, "epoch": 1.1767281105990783, "percentage": 58.84, "elapsed_time": "7:02:24", "remaining_time": "4:55:32"} +{"current_steps": 5108, "total_steps": 8680, "loss": 0.8010379076004028, "lr": 7.919889346772206e-07, "epoch": 1.176958525345622, "percentage": 58.85, "elapsed_time": "7:02:30", "remaining_time": "4:55:27"} +{"current_steps": 5109, "total_steps": 8680, "loss": 0.6671626567840576, "lr": 7.916162994137055e-07, "epoch": 1.177188940092166, "percentage": 58.86, "elapsed_time": "7:02:35", "remaining_time": "4:55:22"} +{"current_steps": 5110, "total_steps": 8680, "loss": 0.7521620988845825, "lr": 7.912436943968088e-07, "epoch": 1.1774193548387097, "percentage": 58.87, "elapsed_time": "7:02:40", "remaining_time": "4:55:17"} +{"current_steps": 5111, "total_steps": 8680, "loss": 0.7626729011535645, "lr": 7.908711196806131e-07, "epoch": 1.1776497695852535, "percentage": 58.88, "elapsed_time": "7:02:46", "remaining_time": "4:55:13"} +{"current_steps": 5112, "total_steps": 8680, "loss": 0.8247047066688538, "lr": 7.904985753191979e-07, "epoch": 1.1778801843317972, "percentage": 58.89, "elapsed_time": "7:02:52", "remaining_time": "4:55:08"} +{"current_steps": 5113, "total_steps": 8680, "loss": 0.6851831078529358, "lr": 7.901260613666372e-07, "epoch": 1.178110599078341, "percentage": 58.91, "elapsed_time": "7:02:57", "remaining_time": "4:55:04"} +{"current_steps": 5114, "total_steps": 8680, "loss": 0.7752102613449097, "lr": 7.897535778770003e-07, "epoch": 1.1783410138248849, "percentage": 58.92, "elapsed_time": "7:03:01", "remaining_time": "4:54:58"} +{"current_steps": 5115, "total_steps": 8680, "loss": 0.8885148167610168, "lr": 7.893811249043537e-07, "epoch": 1.1785714285714286, "percentage": 58.93, "elapsed_time": "7:03:06", "remaining_time": "4:54:53"} +{"current_steps": 5116, "total_steps": 8680, "loss": 0.7530373334884644, "lr": 7.890087025027579e-07, "epoch": 1.1788018433179723, "percentage": 58.94, "elapsed_time": "7:03:11", "remaining_time": "4:54:48"} +{"current_steps": 5117, "total_steps": 8680, "loss": 0.7795672416687012, "lr": 7.886363107262697e-07, "epoch": 1.179032258064516, "percentage": 58.95, "elapsed_time": "7:03:15", "remaining_time": "4:54:42"} +{"current_steps": 5118, "total_steps": 8680, "loss": 0.7563966512680054, "lr": 7.882639496289413e-07, "epoch": 1.1792626728110598, "percentage": 58.96, "elapsed_time": "7:03:20", "remaining_time": "4:54:37"} +{"current_steps": 5119, "total_steps": 8680, "loss": 0.7218793630599976, "lr": 7.878916192648198e-07, "epoch": 1.1794930875576037, "percentage": 58.97, "elapsed_time": "7:03:26", "remaining_time": "4:54:33"} +{"current_steps": 5120, "total_steps": 8680, "loss": 0.8213250637054443, "lr": 7.875193196879494e-07, "epoch": 1.1797235023041475, "percentage": 58.99, "elapsed_time": "7:03:31", "remaining_time": "4:54:28"} +{"current_steps": 5121, "total_steps": 8680, "loss": 0.8134827613830566, "lr": 7.871470509523685e-07, "epoch": 1.1799539170506912, "percentage": 59.0, "elapsed_time": "7:03:36", "remaining_time": "4:54:24"} +{"current_steps": 5122, "total_steps": 8680, "loss": 0.6135407090187073, "lr": 7.867748131121109e-07, "epoch": 1.1801843317972351, "percentage": 59.01, "elapsed_time": "7:03:43", "remaining_time": "4:54:20"} +{"current_steps": 5123, "total_steps": 8680, "loss": 0.8110366463661194, "lr": 7.864026062212073e-07, "epoch": 1.1804147465437789, "percentage": 59.02, "elapsed_time": "7:03:47", "remaining_time": "4:54:14"} +{"current_steps": 5124, "total_steps": 8680, "loss": 0.6723964214324951, "lr": 7.860304303336827e-07, "epoch": 1.1806451612903226, "percentage": 59.03, "elapsed_time": "7:03:53", "remaining_time": "4:54:10"} +{"current_steps": 5125, "total_steps": 8680, "loss": 0.8308886885643005, "lr": 7.856582855035577e-07, "epoch": 1.1808755760368663, "percentage": 59.04, "elapsed_time": "7:03:58", "remaining_time": "4:54:05"} +{"current_steps": 5126, "total_steps": 8680, "loss": 0.7960010766983032, "lr": 7.852861717848488e-07, "epoch": 1.18110599078341, "percentage": 59.06, "elapsed_time": "7:04:03", "remaining_time": "4:54:00"} +{"current_steps": 5127, "total_steps": 8680, "loss": 0.7931640148162842, "lr": 7.84914089231568e-07, "epoch": 1.181336405529954, "percentage": 59.07, "elapsed_time": "7:04:08", "remaining_time": "4:53:55"} +{"current_steps": 5128, "total_steps": 8680, "loss": 0.762995719909668, "lr": 7.845420378977222e-07, "epoch": 1.1815668202764977, "percentage": 59.08, "elapsed_time": "7:04:13", "remaining_time": "4:53:50"} +{"current_steps": 5129, "total_steps": 8680, "loss": 0.9416301250457764, "lr": 7.841700178373146e-07, "epoch": 1.1817972350230415, "percentage": 59.09, "elapsed_time": "7:04:18", "remaining_time": "4:53:45"} +{"current_steps": 5130, "total_steps": 8680, "loss": 0.7666923999786377, "lr": 7.837980291043431e-07, "epoch": 1.1820276497695852, "percentage": 59.1, "elapsed_time": "7:04:24", "remaining_time": "4:53:41"} +{"current_steps": 5131, "total_steps": 8680, "loss": 0.7668861150741577, "lr": 7.834260717528012e-07, "epoch": 1.182258064516129, "percentage": 59.11, "elapsed_time": "7:04:30", "remaining_time": "4:53:37"} +{"current_steps": 5132, "total_steps": 8680, "loss": 0.7576566934585571, "lr": 7.830541458366786e-07, "epoch": 1.1824884792626729, "percentage": 59.12, "elapsed_time": "7:04:35", "remaining_time": "4:53:32"} +{"current_steps": 5133, "total_steps": 8680, "loss": 0.6288204193115234, "lr": 7.826822514099595e-07, "epoch": 1.1827188940092166, "percentage": 59.14, "elapsed_time": "7:04:40", "remaining_time": "4:53:27"} +{"current_steps": 5134, "total_steps": 8680, "loss": 0.8332630395889282, "lr": 7.823103885266236e-07, "epoch": 1.1829493087557603, "percentage": 59.15, "elapsed_time": "7:04:44", "remaining_time": "4:53:22"} +{"current_steps": 5135, "total_steps": 8680, "loss": 0.9294546246528625, "lr": 7.819385572406469e-07, "epoch": 1.1831797235023043, "percentage": 59.16, "elapsed_time": "7:04:50", "remaining_time": "4:53:17"} +{"current_steps": 5136, "total_steps": 8680, "loss": 0.637617826461792, "lr": 7.81566757606e-07, "epoch": 1.183410138248848, "percentage": 59.17, "elapsed_time": "7:04:55", "remaining_time": "4:53:12"} +{"current_steps": 5137, "total_steps": 8680, "loss": 0.7614878416061401, "lr": 7.81194989676649e-07, "epoch": 1.1836405529953917, "percentage": 59.18, "elapsed_time": "7:04:59", "remaining_time": "4:53:07"} +{"current_steps": 5138, "total_steps": 8680, "loss": 0.8612164258956909, "lr": 7.808232535065556e-07, "epoch": 1.1838709677419355, "percentage": 59.19, "elapsed_time": "7:05:04", "remaining_time": "4:53:02"} +{"current_steps": 5139, "total_steps": 8680, "loss": 0.7530151605606079, "lr": 7.804515491496765e-07, "epoch": 1.1841013824884792, "percentage": 59.21, "elapsed_time": "7:05:09", "remaining_time": "4:52:56"} +{"current_steps": 5140, "total_steps": 8680, "loss": 0.7739782929420471, "lr": 7.800798766599648e-07, "epoch": 1.1843317972350231, "percentage": 59.22, "elapsed_time": "7:05:14", "remaining_time": "4:52:52"} +{"current_steps": 5141, "total_steps": 8680, "loss": 0.7992277145385742, "lr": 7.797082360913678e-07, "epoch": 1.1845622119815669, "percentage": 59.23, "elapsed_time": "7:05:20", "remaining_time": "4:52:47"} +{"current_steps": 5142, "total_steps": 8680, "loss": 0.8744574785232544, "lr": 7.793366274978284e-07, "epoch": 1.1847926267281106, "percentage": 59.24, "elapsed_time": "7:05:26", "remaining_time": "4:52:43"} +{"current_steps": 5143, "total_steps": 8680, "loss": 0.7522493600845337, "lr": 7.789650509332857e-07, "epoch": 1.1850230414746543, "percentage": 59.25, "elapsed_time": "7:05:30", "remaining_time": "4:52:38"} +{"current_steps": 5144, "total_steps": 8680, "loss": 0.8811007142066956, "lr": 7.785935064516733e-07, "epoch": 1.185253456221198, "percentage": 59.26, "elapsed_time": "7:05:36", "remaining_time": "4:52:33"} +{"current_steps": 5145, "total_steps": 8680, "loss": 0.8141417503356934, "lr": 7.782219941069201e-07, "epoch": 1.185483870967742, "percentage": 59.27, "elapsed_time": "7:05:42", "remaining_time": "4:52:29"} +{"current_steps": 5146, "total_steps": 8680, "loss": 0.9473680257797241, "lr": 7.778505139529509e-07, "epoch": 1.1857142857142857, "percentage": 59.29, "elapsed_time": "7:05:47", "remaining_time": "4:52:24"} +{"current_steps": 5147, "total_steps": 8680, "loss": 0.740132212638855, "lr": 7.774790660436857e-07, "epoch": 1.1859447004608294, "percentage": 59.3, "elapsed_time": "7:05:52", "remaining_time": "4:52:19"} +{"current_steps": 5148, "total_steps": 8680, "loss": 0.7904594540596008, "lr": 7.771076504330392e-07, "epoch": 1.1861751152073732, "percentage": 59.31, "elapsed_time": "7:05:58", "remaining_time": "4:52:15"} +{"current_steps": 5149, "total_steps": 8680, "loss": 0.8085094690322876, "lr": 7.767362671749224e-07, "epoch": 1.1864055299539171, "percentage": 59.32, "elapsed_time": "7:06:03", "remaining_time": "4:52:10"} +{"current_steps": 5150, "total_steps": 8680, "loss": 0.6954756379127502, "lr": 7.76364916323241e-07, "epoch": 1.1866359447004609, "percentage": 59.33, "elapsed_time": "7:06:08", "remaining_time": "4:52:05"} +{"current_steps": 5151, "total_steps": 8680, "loss": 0.8575167059898376, "lr": 7.759935979318953e-07, "epoch": 1.1868663594470046, "percentage": 59.34, "elapsed_time": "7:06:12", "remaining_time": "4:51:59"} +{"current_steps": 5152, "total_steps": 8680, "loss": 0.6125110387802124, "lr": 7.756223120547829e-07, "epoch": 1.1870967741935483, "percentage": 59.35, "elapsed_time": "7:06:17", "remaining_time": "4:51:55"} +{"current_steps": 5153, "total_steps": 8680, "loss": 0.7737400531768799, "lr": 7.752510587457949e-07, "epoch": 1.1873271889400923, "percentage": 59.37, "elapsed_time": "7:06:22", "remaining_time": "4:51:50"} +{"current_steps": 5154, "total_steps": 8680, "loss": 0.7300955653190613, "lr": 7.748798380588177e-07, "epoch": 1.187557603686636, "percentage": 59.38, "elapsed_time": "7:06:28", "remaining_time": "4:51:46"} +{"current_steps": 5155, "total_steps": 8680, "loss": 0.7974356412887573, "lr": 7.745086500477343e-07, "epoch": 1.1877880184331797, "percentage": 59.39, "elapsed_time": "7:06:34", "remaining_time": "4:51:41"} +{"current_steps": 5156, "total_steps": 8680, "loss": 0.8158693313598633, "lr": 7.74137494766422e-07, "epoch": 1.1880184331797234, "percentage": 59.4, "elapsed_time": "7:06:40", "remaining_time": "4:51:37"} +{"current_steps": 5157, "total_steps": 8680, "loss": 0.6656177639961243, "lr": 7.737663722687531e-07, "epoch": 1.1882488479262672, "percentage": 59.41, "elapsed_time": "7:06:46", "remaining_time": "4:51:32"} +{"current_steps": 5158, "total_steps": 8680, "loss": 0.7796640992164612, "lr": 7.733952826085958e-07, "epoch": 1.1884792626728111, "percentage": 59.42, "elapsed_time": "7:06:51", "remaining_time": "4:51:27"} +{"current_steps": 5159, "total_steps": 8680, "loss": 0.9224779009819031, "lr": 7.730242258398135e-07, "epoch": 1.1887096774193548, "percentage": 59.44, "elapsed_time": "7:06:56", "remaining_time": "4:51:22"} +{"current_steps": 5160, "total_steps": 8680, "loss": 0.7105277180671692, "lr": 7.726532020162639e-07, "epoch": 1.1889400921658986, "percentage": 59.45, "elapsed_time": "7:07:01", "remaining_time": "4:51:18"} +{"current_steps": 5161, "total_steps": 8680, "loss": 0.5793930292129517, "lr": 7.722822111918012e-07, "epoch": 1.1891705069124423, "percentage": 59.46, "elapsed_time": "7:07:08", "remaining_time": "4:51:14"} +{"current_steps": 5162, "total_steps": 8680, "loss": 0.7319367527961731, "lr": 7.719112534202743e-07, "epoch": 1.1894009216589863, "percentage": 59.47, "elapsed_time": "7:07:14", "remaining_time": "4:51:10"} +{"current_steps": 5163, "total_steps": 8680, "loss": 0.7517954111099243, "lr": 7.715403287555266e-07, "epoch": 1.18963133640553, "percentage": 59.48, "elapsed_time": "7:07:20", "remaining_time": "4:51:05"} +{"current_steps": 5164, "total_steps": 8680, "loss": 0.8633241057395935, "lr": 7.711694372513981e-07, "epoch": 1.1898617511520737, "percentage": 59.49, "elapsed_time": "7:07:25", "remaining_time": "4:51:01"} +{"current_steps": 5165, "total_steps": 8680, "loss": 0.6453210115432739, "lr": 7.707985789617227e-07, "epoch": 1.1900921658986174, "percentage": 59.5, "elapsed_time": "7:07:30", "remaining_time": "4:50:56"} +{"current_steps": 5166, "total_steps": 8680, "loss": 0.7609909772872925, "lr": 7.704277539403303e-07, "epoch": 1.1903225806451614, "percentage": 59.52, "elapsed_time": "7:07:35", "remaining_time": "4:50:51"} +{"current_steps": 5167, "total_steps": 8680, "loss": 0.7419755458831787, "lr": 7.700569622410453e-07, "epoch": 1.1905529953917051, "percentage": 59.53, "elapsed_time": "7:07:40", "remaining_time": "4:50:46"} +{"current_steps": 5168, "total_steps": 8680, "loss": 0.849078357219696, "lr": 7.696862039176879e-07, "epoch": 1.1907834101382488, "percentage": 59.54, "elapsed_time": "7:07:46", "remaining_time": "4:50:42"} +{"current_steps": 5169, "total_steps": 8680, "loss": 0.8147921562194824, "lr": 7.693154790240732e-07, "epoch": 1.1910138248847926, "percentage": 59.55, "elapsed_time": "7:07:50", "remaining_time": "4:50:36"} +{"current_steps": 5170, "total_steps": 8680, "loss": 0.7660118937492371, "lr": 7.689447876140114e-07, "epoch": 1.1912442396313363, "percentage": 59.56, "elapsed_time": "7:07:54", "remaining_time": "4:50:30"} +{"current_steps": 5171, "total_steps": 8680, "loss": 0.7775185108184814, "lr": 7.685741297413075e-07, "epoch": 1.1914746543778802, "percentage": 59.57, "elapsed_time": "7:07:59", "remaining_time": "4:50:26"} +{"current_steps": 5172, "total_steps": 8680, "loss": 0.7184321880340576, "lr": 7.682035054597624e-07, "epoch": 1.191705069124424, "percentage": 59.59, "elapsed_time": "7:08:04", "remaining_time": "4:50:20"} +{"current_steps": 5173, "total_steps": 8680, "loss": 0.7108585834503174, "lr": 7.678329148231719e-07, "epoch": 1.1919354838709677, "percentage": 59.6, "elapsed_time": "7:08:10", "remaining_time": "4:50:16"} +{"current_steps": 5174, "total_steps": 8680, "loss": 0.7252670526504517, "lr": 7.674623578853259e-07, "epoch": 1.1921658986175114, "percentage": 59.61, "elapsed_time": "7:08:14", "remaining_time": "4:50:10"} +{"current_steps": 5175, "total_steps": 8680, "loss": 0.818352460861206, "lr": 7.670918347000113e-07, "epoch": 1.1923963133640554, "percentage": 59.62, "elapsed_time": "7:08:19", "remaining_time": "4:50:05"} +{"current_steps": 5176, "total_steps": 8680, "loss": 0.6538013815879822, "lr": 7.667213453210086e-07, "epoch": 1.192626728110599, "percentage": 59.63, "elapsed_time": "7:08:26", "remaining_time": "4:50:02"} +{"current_steps": 5177, "total_steps": 8680, "loss": 0.7058148384094238, "lr": 7.663508898020935e-07, "epoch": 1.1928571428571428, "percentage": 59.64, "elapsed_time": "7:08:31", "remaining_time": "4:49:57"} +{"current_steps": 5178, "total_steps": 8680, "loss": 0.7003160715103149, "lr": 7.659804681970377e-07, "epoch": 1.1930875576036866, "percentage": 59.65, "elapsed_time": "7:08:37", "remaining_time": "4:49:53"} +{"current_steps": 5179, "total_steps": 8680, "loss": 0.84567791223526, "lr": 7.656100805596072e-07, "epoch": 1.1933179723502305, "percentage": 59.67, "elapsed_time": "7:08:42", "remaining_time": "4:49:48"} +{"current_steps": 5180, "total_steps": 8680, "loss": 0.7994743585586548, "lr": 7.652397269435626e-07, "epoch": 1.1935483870967742, "percentage": 59.68, "elapsed_time": "7:08:49", "remaining_time": "4:49:44"} +{"current_steps": 5181, "total_steps": 8680, "loss": 0.8177791833877563, "lr": 7.648694074026615e-07, "epoch": 1.193778801843318, "percentage": 59.69, "elapsed_time": "7:08:53", "remaining_time": "4:49:39"} +{"current_steps": 5182, "total_steps": 8680, "loss": 0.6663975715637207, "lr": 7.644991219906545e-07, "epoch": 1.1940092165898617, "percentage": 59.7, "elapsed_time": "7:08:59", "remaining_time": "4:49:34"} +{"current_steps": 5183, "total_steps": 8680, "loss": 0.8275883197784424, "lr": 7.641288707612878e-07, "epoch": 1.1942396313364054, "percentage": 59.71, "elapsed_time": "7:09:05", "remaining_time": "4:49:30"} +{"current_steps": 5184, "total_steps": 8680, "loss": 0.7710767388343811, "lr": 7.637586537683036e-07, "epoch": 1.1944700460829494, "percentage": 59.72, "elapsed_time": "7:09:09", "remaining_time": "4:49:25"} +{"current_steps": 5185, "total_steps": 8680, "loss": 0.7628582715988159, "lr": 7.633884710654382e-07, "epoch": 1.194700460829493, "percentage": 59.74, "elapsed_time": "7:09:14", "remaining_time": "4:49:20"} +{"current_steps": 5186, "total_steps": 8680, "loss": 0.7002676725387573, "lr": 7.630183227064227e-07, "epoch": 1.1949308755760368, "percentage": 59.75, "elapsed_time": "7:09:20", "remaining_time": "4:49:15"} +{"current_steps": 5187, "total_steps": 8680, "loss": 0.8272073268890381, "lr": 7.626482087449841e-07, "epoch": 1.1951612903225806, "percentage": 59.76, "elapsed_time": "7:09:26", "remaining_time": "4:49:11"} +{"current_steps": 5188, "total_steps": 8680, "loss": 0.7881417274475098, "lr": 7.622781292348435e-07, "epoch": 1.1953917050691245, "percentage": 59.77, "elapsed_time": "7:09:31", "remaining_time": "4:49:06"} +{"current_steps": 5189, "total_steps": 8680, "loss": 0.797294020652771, "lr": 7.61908084229718e-07, "epoch": 1.1956221198156682, "percentage": 59.78, "elapsed_time": "7:09:35", "remaining_time": "4:49:01"} +{"current_steps": 5190, "total_steps": 8680, "loss": 0.7752290964126587, "lr": 7.615380737833191e-07, "epoch": 1.195852534562212, "percentage": 59.79, "elapsed_time": "7:09:40", "remaining_time": "4:48:55"} +{"current_steps": 5191, "total_steps": 8680, "loss": 0.7299143075942993, "lr": 7.611680979493525e-07, "epoch": 1.1960829493087557, "percentage": 59.8, "elapsed_time": "7:09:46", "remaining_time": "4:48:51"} +{"current_steps": 5192, "total_steps": 8680, "loss": 0.6749997138977051, "lr": 7.60798156781521e-07, "epoch": 1.1963133640552996, "percentage": 59.82, "elapsed_time": "7:09:51", "remaining_time": "4:48:46"} +{"current_steps": 5193, "total_steps": 8680, "loss": 0.7933796048164368, "lr": 7.6042825033352e-07, "epoch": 1.1965437788018434, "percentage": 59.83, "elapsed_time": "7:09:55", "remaining_time": "4:48:40"} +{"current_steps": 5194, "total_steps": 8680, "loss": 0.7214919328689575, "lr": 7.600583786590411e-07, "epoch": 1.196774193548387, "percentage": 59.84, "elapsed_time": "7:09:59", "remaining_time": "4:48:35"} +{"current_steps": 5195, "total_steps": 8680, "loss": 0.7804256081581116, "lr": 7.596885418117713e-07, "epoch": 1.1970046082949308, "percentage": 59.85, "elapsed_time": "7:10:05", "remaining_time": "4:48:31"} +{"current_steps": 5196, "total_steps": 8680, "loss": 0.7615138292312622, "lr": 7.593187398453915e-07, "epoch": 1.1972350230414746, "percentage": 59.86, "elapsed_time": "7:10:11", "remaining_time": "4:48:27"} +{"current_steps": 5197, "total_steps": 8680, "loss": 0.8473657369613647, "lr": 7.589489728135778e-07, "epoch": 1.1974654377880185, "percentage": 59.87, "elapsed_time": "7:10:18", "remaining_time": "4:48:23"} +{"current_steps": 5198, "total_steps": 8680, "loss": 0.7302027940750122, "lr": 7.585792407700018e-07, "epoch": 1.1976958525345622, "percentage": 59.88, "elapsed_time": "7:10:23", "remaining_time": "4:48:18"} +{"current_steps": 5199, "total_steps": 8680, "loss": 0.7631692886352539, "lr": 7.582095437683294e-07, "epoch": 1.197926267281106, "percentage": 59.9, "elapsed_time": "7:10:27", "remaining_time": "4:48:12"} +{"current_steps": 5200, "total_steps": 8680, "loss": 0.7982754707336426, "lr": 7.578398818622211e-07, "epoch": 1.1981566820276497, "percentage": 59.91, "elapsed_time": "7:10:33", "remaining_time": "4:48:08"} +{"current_steps": 5201, "total_steps": 8680, "loss": 0.8445635437965393, "lr": 7.574702551053339e-07, "epoch": 1.1983870967741936, "percentage": 59.92, "elapsed_time": "7:10:40", "remaining_time": "4:48:05"} +{"current_steps": 5202, "total_steps": 8680, "loss": 0.8486276268959045, "lr": 7.571006635513182e-07, "epoch": 1.1986175115207374, "percentage": 59.93, "elapsed_time": "7:10:45", "remaining_time": "4:47:59"} +{"current_steps": 5203, "total_steps": 8680, "loss": 0.8433184623718262, "lr": 7.567311072538191e-07, "epoch": 1.198847926267281, "percentage": 59.94, "elapsed_time": "7:10:49", "remaining_time": "4:47:54"} +{"current_steps": 5204, "total_steps": 8680, "loss": 0.9772260189056396, "lr": 7.56361586266478e-07, "epoch": 1.1990783410138248, "percentage": 59.95, "elapsed_time": "7:10:53", "remaining_time": "4:47:48"} +{"current_steps": 5205, "total_steps": 8680, "loss": 0.8349692821502686, "lr": 7.559921006429304e-07, "epoch": 1.1993087557603688, "percentage": 59.97, "elapsed_time": "7:10:59", "remaining_time": "4:47:44"} +{"current_steps": 5206, "total_steps": 8680, "loss": 0.7454575300216675, "lr": 7.556226504368059e-07, "epoch": 1.1995391705069125, "percentage": 59.98, "elapsed_time": "7:11:03", "remaining_time": "4:47:38"} +{"current_steps": 5207, "total_steps": 8680, "loss": 0.6680991649627686, "lr": 7.552532357017303e-07, "epoch": 1.1997695852534562, "percentage": 59.99, "elapsed_time": "7:11:10", "remaining_time": "4:47:35"} +{"current_steps": 5208, "total_steps": 8680, "loss": 0.6528318524360657, "lr": 7.54883856491324e-07, "epoch": 1.2, "percentage": 60.0, "elapsed_time": "7:11:15", "remaining_time": "4:47:30"} +{"current_steps": 5209, "total_steps": 8680, "loss": 0.7711834907531738, "lr": 7.545145128592008e-07, "epoch": 1.2002304147465437, "percentage": 60.01, "elapsed_time": "7:11:19", "remaining_time": "4:47:24"} +{"current_steps": 5210, "total_steps": 8680, "loss": 0.6378746628761292, "lr": 7.541452048589714e-07, "epoch": 1.2004608294930876, "percentage": 60.02, "elapsed_time": "7:11:23", "remaining_time": "4:47:19"} +{"current_steps": 5211, "total_steps": 8680, "loss": 0.7489340305328369, "lr": 7.537759325442402e-07, "epoch": 1.2006912442396314, "percentage": 60.03, "elapsed_time": "7:11:30", "remaining_time": "4:47:15"} +{"current_steps": 5212, "total_steps": 8680, "loss": 0.7869534492492676, "lr": 7.53406695968606e-07, "epoch": 1.200921658986175, "percentage": 60.05, "elapsed_time": "7:11:36", "remaining_time": "4:47:11"} +{"current_steps": 5213, "total_steps": 8680, "loss": 0.7252482175827026, "lr": 7.530374951856637e-07, "epoch": 1.2011520737327188, "percentage": 60.06, "elapsed_time": "7:11:41", "remaining_time": "4:47:06"} +{"current_steps": 5214, "total_steps": 8680, "loss": 0.763259768486023, "lr": 7.526683302490018e-07, "epoch": 1.2013824884792628, "percentage": 60.07, "elapsed_time": "7:11:46", "remaining_time": "4:47:01"} +{"current_steps": 5215, "total_steps": 8680, "loss": 0.8135688304901123, "lr": 7.522992012122046e-07, "epoch": 1.2016129032258065, "percentage": 60.08, "elapsed_time": "7:11:51", "remaining_time": "4:46:56"} +{"current_steps": 5216, "total_steps": 8680, "loss": 0.9282290935516357, "lr": 7.519301081288504e-07, "epoch": 1.2018433179723502, "percentage": 60.09, "elapsed_time": "7:11:55", "remaining_time": "4:46:50"} +{"current_steps": 5217, "total_steps": 8680, "loss": 0.7968727946281433, "lr": 7.515610510525125e-07, "epoch": 1.202073732718894, "percentage": 60.1, "elapsed_time": "7:12:00", "remaining_time": "4:46:45"} +{"current_steps": 5218, "total_steps": 8680, "loss": 0.9495606422424316, "lr": 7.511920300367594e-07, "epoch": 1.202304147465438, "percentage": 60.12, "elapsed_time": "7:12:04", "remaining_time": "4:46:40"} +{"current_steps": 5219, "total_steps": 8680, "loss": 0.6790425181388855, "lr": 7.508230451351537e-07, "epoch": 1.2025345622119816, "percentage": 60.13, "elapsed_time": "7:12:11", "remaining_time": "4:46:36"} +{"current_steps": 5220, "total_steps": 8680, "loss": 0.7269036173820496, "lr": 7.504540964012527e-07, "epoch": 1.2027649769585254, "percentage": 60.14, "elapsed_time": "7:12:16", "remaining_time": "4:46:31"} +{"current_steps": 5221, "total_steps": 8680, "loss": 0.820799708366394, "lr": 7.500851838886097e-07, "epoch": 1.202995391705069, "percentage": 60.15, "elapsed_time": "7:12:20", "remaining_time": "4:46:26"} +{"current_steps": 5222, "total_steps": 8680, "loss": 0.7693401575088501, "lr": 7.497163076507715e-07, "epoch": 1.2032258064516128, "percentage": 60.16, "elapsed_time": "7:12:25", "remaining_time": "4:46:21"} +{"current_steps": 5223, "total_steps": 8680, "loss": 0.7687606811523438, "lr": 7.493474677412793e-07, "epoch": 1.2034562211981568, "percentage": 60.17, "elapsed_time": "7:12:30", "remaining_time": "4:46:16"} +{"current_steps": 5224, "total_steps": 8680, "loss": 0.6858488321304321, "lr": 7.489786642136709e-07, "epoch": 1.2036866359447005, "percentage": 60.18, "elapsed_time": "7:12:37", "remaining_time": "4:46:12"} +{"current_steps": 5225, "total_steps": 8680, "loss": 0.7575044631958008, "lr": 7.486098971214769e-07, "epoch": 1.2039170506912442, "percentage": 60.2, "elapsed_time": "7:12:43", "remaining_time": "4:46:08"} +{"current_steps": 5226, "total_steps": 8680, "loss": 0.6799627542495728, "lr": 7.482411665182236e-07, "epoch": 1.204147465437788, "percentage": 60.21, "elapsed_time": "7:12:49", "remaining_time": "4:46:03"} +{"current_steps": 5227, "total_steps": 8680, "loss": 0.8882759809494019, "lr": 7.478724724574317e-07, "epoch": 1.2043778801843317, "percentage": 60.22, "elapsed_time": "7:12:52", "remaining_time": "4:45:57"} +{"current_steps": 5228, "total_steps": 8680, "loss": 0.7835016250610352, "lr": 7.475038149926165e-07, "epoch": 1.2046082949308756, "percentage": 60.23, "elapsed_time": "7:12:57", "remaining_time": "4:45:52"} +{"current_steps": 5229, "total_steps": 8680, "loss": 0.9264512062072754, "lr": 7.471351941772883e-07, "epoch": 1.2048387096774194, "percentage": 60.24, "elapsed_time": "7:13:01", "remaining_time": "4:45:47"} +{"current_steps": 5230, "total_steps": 8680, "loss": 0.8094228506088257, "lr": 7.467666100649521e-07, "epoch": 1.205069124423963, "percentage": 60.25, "elapsed_time": "7:13:05", "remaining_time": "4:45:41"} +{"current_steps": 5231, "total_steps": 8680, "loss": 0.7782102823257446, "lr": 7.463980627091073e-07, "epoch": 1.205299539170507, "percentage": 60.26, "elapsed_time": "7:13:09", "remaining_time": "4:45:35"} +{"current_steps": 5232, "total_steps": 8680, "loss": 0.7946768999099731, "lr": 7.460295521632474e-07, "epoch": 1.2055299539170508, "percentage": 60.28, "elapsed_time": "7:13:13", "remaining_time": "4:45:30"} +{"current_steps": 5233, "total_steps": 8680, "loss": 0.7571625709533691, "lr": 7.456610784808624e-07, "epoch": 1.2057603686635945, "percentage": 60.29, "elapsed_time": "7:13:18", "remaining_time": "4:45:25"} +{"current_steps": 5234, "total_steps": 8680, "loss": 0.9760236144065857, "lr": 7.45292641715435e-07, "epoch": 1.2059907834101382, "percentage": 60.3, "elapsed_time": "7:13:22", "remaining_time": "4:45:19"} +{"current_steps": 5235, "total_steps": 8680, "loss": 0.6370055675506592, "lr": 7.449242419204431e-07, "epoch": 1.206221198156682, "percentage": 60.31, "elapsed_time": "7:13:29", "remaining_time": "4:45:15"} +{"current_steps": 5236, "total_steps": 8680, "loss": 0.7991320490837097, "lr": 7.445558791493603e-07, "epoch": 1.206451612903226, "percentage": 60.32, "elapsed_time": "7:13:35", "remaining_time": "4:45:11"} +{"current_steps": 5237, "total_steps": 8680, "loss": 0.8840054273605347, "lr": 7.441875534556531e-07, "epoch": 1.2066820276497696, "percentage": 60.33, "elapsed_time": "7:13:40", "remaining_time": "4:45:06"} +{"current_steps": 5238, "total_steps": 8680, "loss": 0.8634533882141113, "lr": 7.438192648927841e-07, "epoch": 1.2069124423963133, "percentage": 60.35, "elapsed_time": "7:13:45", "remaining_time": "4:45:01"} +{"current_steps": 5239, "total_steps": 8680, "loss": 0.7081723213195801, "lr": 7.434510135142098e-07, "epoch": 1.207142857142857, "percentage": 60.36, "elapsed_time": "7:13:49", "remaining_time": "4:44:56"} +{"current_steps": 5240, "total_steps": 8680, "loss": 0.7160249352455139, "lr": 7.430827993733808e-07, "epoch": 1.2073732718894008, "percentage": 60.37, "elapsed_time": "7:13:53", "remaining_time": "4:44:51"} +{"current_steps": 5241, "total_steps": 8680, "loss": 0.5323421955108643, "lr": 7.427146225237438e-07, "epoch": 1.2076036866359448, "percentage": 60.38, "elapsed_time": "7:14:00", "remaining_time": "4:44:46"} +{"current_steps": 5242, "total_steps": 8680, "loss": 0.6439197063446045, "lr": 7.423464830187386e-07, "epoch": 1.2078341013824885, "percentage": 60.39, "elapsed_time": "7:14:06", "remaining_time": "4:44:42"} +{"current_steps": 5243, "total_steps": 8680, "loss": 0.8268016576766968, "lr": 7.419783809117999e-07, "epoch": 1.2080645161290322, "percentage": 60.4, "elapsed_time": "7:14:11", "remaining_time": "4:44:37"} +{"current_steps": 5244, "total_steps": 8680, "loss": 0.8115339279174805, "lr": 7.416103162563582e-07, "epoch": 1.2082949308755762, "percentage": 60.41, "elapsed_time": "7:14:18", "remaining_time": "4:44:33"} +{"current_steps": 5245, "total_steps": 8680, "loss": 0.8677197694778442, "lr": 7.41242289105837e-07, "epoch": 1.2085253456221199, "percentage": 60.43, "elapsed_time": "7:14:23", "remaining_time": "4:44:29"} +{"current_steps": 5246, "total_steps": 8680, "loss": 0.7942948937416077, "lr": 7.408742995136547e-07, "epoch": 1.2087557603686636, "percentage": 60.44, "elapsed_time": "7:14:28", "remaining_time": "4:44:24"} +{"current_steps": 5247, "total_steps": 8680, "loss": 0.8457766771316528, "lr": 7.405063475332249e-07, "epoch": 1.2089861751152073, "percentage": 60.45, "elapsed_time": "7:14:33", "remaining_time": "4:44:19"} +{"current_steps": 5248, "total_steps": 8680, "loss": 0.8463923931121826, "lr": 7.401384332179552e-07, "epoch": 1.209216589861751, "percentage": 60.46, "elapsed_time": "7:14:38", "remaining_time": "4:44:14"} +{"current_steps": 5249, "total_steps": 8680, "loss": 0.9192875623703003, "lr": 7.397705566212479e-07, "epoch": 1.209447004608295, "percentage": 60.47, "elapsed_time": "7:14:42", "remaining_time": "4:44:08"} +{"current_steps": 5250, "total_steps": 8680, "loss": 0.7461347579956055, "lr": 7.394027177964999e-07, "epoch": 1.2096774193548387, "percentage": 60.48, "elapsed_time": "7:14:47", "remaining_time": "4:44:03"} +{"current_steps": 5251, "total_steps": 8680, "loss": 0.6953321695327759, "lr": 7.390349167971025e-07, "epoch": 1.2099078341013825, "percentage": 60.5, "elapsed_time": "7:14:53", "remaining_time": "4:43:59"} +{"current_steps": 5252, "total_steps": 8680, "loss": 0.7226089835166931, "lr": 7.38667153676441e-07, "epoch": 1.2101382488479262, "percentage": 60.51, "elapsed_time": "7:15:00", "remaining_time": "4:43:55"} +{"current_steps": 5253, "total_steps": 8680, "loss": 0.6746406555175781, "lr": 7.382994284878967e-07, "epoch": 1.21036866359447, "percentage": 60.52, "elapsed_time": "7:15:04", "remaining_time": "4:43:50"} +{"current_steps": 5254, "total_steps": 8680, "loss": 0.7600215673446655, "lr": 7.379317412848438e-07, "epoch": 1.2105990783410139, "percentage": 60.53, "elapsed_time": "7:15:10", "remaining_time": "4:43:45"} +{"current_steps": 5255, "total_steps": 8680, "loss": 0.7530734539031982, "lr": 7.375640921206514e-07, "epoch": 1.2108294930875576, "percentage": 60.54, "elapsed_time": "7:15:15", "remaining_time": "4:43:40"} +{"current_steps": 5256, "total_steps": 8680, "loss": 0.8103033304214478, "lr": 7.371964810486839e-07, "epoch": 1.2110599078341013, "percentage": 60.55, "elapsed_time": "7:15:20", "remaining_time": "4:43:36"} +{"current_steps": 5257, "total_steps": 8680, "loss": 0.8916831016540527, "lr": 7.368289081222994e-07, "epoch": 1.2112903225806453, "percentage": 60.56, "elapsed_time": "7:15:24", "remaining_time": "4:43:30"} +{"current_steps": 5258, "total_steps": 8680, "loss": 0.6728129386901855, "lr": 7.364613733948501e-07, "epoch": 1.211520737327189, "percentage": 60.58, "elapsed_time": "7:15:30", "remaining_time": "4:43:26"} +{"current_steps": 5259, "total_steps": 8680, "loss": 0.8609380722045898, "lr": 7.360938769196841e-07, "epoch": 1.2117511520737327, "percentage": 60.59, "elapsed_time": "7:15:34", "remaining_time": "4:43:20"} +{"current_steps": 5260, "total_steps": 8680, "loss": 0.9370373487472534, "lr": 7.357264187501422e-07, "epoch": 1.2119815668202765, "percentage": 60.6, "elapsed_time": "7:15:39", "remaining_time": "4:43:15"} +{"current_steps": 5261, "total_steps": 8680, "loss": 0.6812434196472168, "lr": 7.353589989395604e-07, "epoch": 1.2122119815668202, "percentage": 60.61, "elapsed_time": "7:15:44", "remaining_time": "4:43:10"} +{"current_steps": 5262, "total_steps": 8680, "loss": 0.7661731243133545, "lr": 7.349916175412701e-07, "epoch": 1.2124423963133641, "percentage": 60.62, "elapsed_time": "7:15:48", "remaining_time": "4:43:05"} +{"current_steps": 5263, "total_steps": 8680, "loss": 0.7306643128395081, "lr": 7.346242746085951e-07, "epoch": 1.2126728110599079, "percentage": 60.63, "elapsed_time": "7:15:53", "remaining_time": "4:42:59"} +{"current_steps": 5264, "total_steps": 8680, "loss": 0.7189076542854309, "lr": 7.34256970194856e-07, "epoch": 1.2129032258064516, "percentage": 60.65, "elapsed_time": "7:15:58", "remaining_time": "4:42:55"} +{"current_steps": 5265, "total_steps": 8680, "loss": 0.6935977935791016, "lr": 7.338897043533656e-07, "epoch": 1.2131336405529953, "percentage": 60.66, "elapsed_time": "7:16:04", "remaining_time": "4:42:50"} +{"current_steps": 5266, "total_steps": 8680, "loss": 0.8451323509216309, "lr": 7.335224771374323e-07, "epoch": 1.213364055299539, "percentage": 60.67, "elapsed_time": "7:16:10", "remaining_time": "4:42:46"} +{"current_steps": 5267, "total_steps": 8680, "loss": 0.7936843037605286, "lr": 7.331552886003589e-07, "epoch": 1.213594470046083, "percentage": 60.68, "elapsed_time": "7:16:17", "remaining_time": "4:42:42"} +{"current_steps": 5268, "total_steps": 8680, "loss": 0.7989950776100159, "lr": 7.327881387954418e-07, "epoch": 1.2138248847926267, "percentage": 60.69, "elapsed_time": "7:16:21", "remaining_time": "4:42:37"} +{"current_steps": 5269, "total_steps": 8680, "loss": 0.7579236030578613, "lr": 7.324210277759726e-07, "epoch": 1.2140552995391705, "percentage": 60.7, "elapsed_time": "7:16:25", "remaining_time": "4:42:31"} +{"current_steps": 5270, "total_steps": 8680, "loss": 0.7101268768310547, "lr": 7.320539555952372e-07, "epoch": 1.2142857142857142, "percentage": 60.71, "elapsed_time": "7:16:31", "remaining_time": "4:42:27"} +{"current_steps": 5271, "total_steps": 8680, "loss": 0.7920513153076172, "lr": 7.316869223065155e-07, "epoch": 1.2145161290322581, "percentage": 60.73, "elapsed_time": "7:16:36", "remaining_time": "4:42:22"} +{"current_steps": 5272, "total_steps": 8680, "loss": 0.9241428375244141, "lr": 7.313199279630814e-07, "epoch": 1.2147465437788019, "percentage": 60.74, "elapsed_time": "7:16:41", "remaining_time": "4:42:17"} +{"current_steps": 5273, "total_steps": 8680, "loss": 0.8278338313102722, "lr": 7.309529726182044e-07, "epoch": 1.2149769585253456, "percentage": 60.75, "elapsed_time": "7:16:46", "remaining_time": "4:42:12"} +{"current_steps": 5274, "total_steps": 8680, "loss": 0.8230598568916321, "lr": 7.305860563251473e-07, "epoch": 1.2152073732718893, "percentage": 60.76, "elapsed_time": "7:16:52", "remaining_time": "4:42:08"} +{"current_steps": 5275, "total_steps": 8680, "loss": 0.7791799902915955, "lr": 7.302191791371672e-07, "epoch": 1.2154377880184333, "percentage": 60.77, "elapsed_time": "7:16:58", "remaining_time": "4:42:03"} +{"current_steps": 5276, "total_steps": 8680, "loss": 0.705475926399231, "lr": 7.298523411075163e-07, "epoch": 1.215668202764977, "percentage": 60.78, "elapsed_time": "7:17:03", "remaining_time": "4:41:58"} +{"current_steps": 5277, "total_steps": 8680, "loss": 0.8078421354293823, "lr": 7.294855422894406e-07, "epoch": 1.2158986175115207, "percentage": 60.79, "elapsed_time": "7:17:07", "remaining_time": "4:41:53"} +{"current_steps": 5278, "total_steps": 8680, "loss": 0.8115853667259216, "lr": 7.2911878273618e-07, "epoch": 1.2161290322580645, "percentage": 60.81, "elapsed_time": "7:17:12", "remaining_time": "4:41:48"} +{"current_steps": 5279, "total_steps": 8680, "loss": 0.6917247772216797, "lr": 7.287520625009698e-07, "epoch": 1.2163594470046082, "percentage": 60.82, "elapsed_time": "7:17:17", "remaining_time": "4:41:43"} +{"current_steps": 5280, "total_steps": 8680, "loss": 0.7131551504135132, "lr": 7.283853816370386e-07, "epoch": 1.2165898617511521, "percentage": 60.83, "elapsed_time": "7:17:21", "remaining_time": "4:41:38"} +{"current_steps": 5281, "total_steps": 8680, "loss": 0.713994562625885, "lr": 7.280187401976093e-07, "epoch": 1.2168202764976959, "percentage": 60.84, "elapsed_time": "7:17:27", "remaining_time": "4:41:33"} +{"current_steps": 5282, "total_steps": 8680, "loss": 0.7123454809188843, "lr": 7.276521382359001e-07, "epoch": 1.2170506912442396, "percentage": 60.85, "elapsed_time": "7:17:31", "remaining_time": "4:41:28"} +{"current_steps": 5283, "total_steps": 8680, "loss": 0.7805770635604858, "lr": 7.272855758051226e-07, "epoch": 1.2172811059907833, "percentage": 60.86, "elapsed_time": "7:17:35", "remaining_time": "4:41:22"} +{"current_steps": 5284, "total_steps": 8680, "loss": 0.756670355796814, "lr": 7.269190529584823e-07, "epoch": 1.2175115207373273, "percentage": 60.88, "elapsed_time": "7:17:41", "remaining_time": "4:41:18"} +{"current_steps": 5285, "total_steps": 8680, "loss": 0.5992655754089355, "lr": 7.265525697491804e-07, "epoch": 1.217741935483871, "percentage": 60.89, "elapsed_time": "7:17:46", "remaining_time": "4:41:13"} +{"current_steps": 5286, "total_steps": 8680, "loss": 0.7552722692489624, "lr": 7.26186126230411e-07, "epoch": 1.2179723502304147, "percentage": 60.9, "elapsed_time": "7:17:50", "remaining_time": "4:41:07"} +{"current_steps": 5287, "total_steps": 8680, "loss": 0.7189064025878906, "lr": 7.258197224553627e-07, "epoch": 1.2182027649769585, "percentage": 60.91, "elapsed_time": "7:17:55", "remaining_time": "4:41:02"} +{"current_steps": 5288, "total_steps": 8680, "loss": 0.8277319669723511, "lr": 7.254533584772188e-07, "epoch": 1.2184331797235024, "percentage": 60.92, "elapsed_time": "7:17:59", "remaining_time": "4:40:57"} +{"current_steps": 5289, "total_steps": 8680, "loss": 0.6655987501144409, "lr": 7.250870343491561e-07, "epoch": 1.2186635944700461, "percentage": 60.93, "elapsed_time": "7:18:05", "remaining_time": "4:40:52"} +{"current_steps": 5290, "total_steps": 8680, "loss": 0.8654178380966187, "lr": 7.247207501243469e-07, "epoch": 1.2188940092165899, "percentage": 60.94, "elapsed_time": "7:18:10", "remaining_time": "4:40:47"} +{"current_steps": 5291, "total_steps": 8680, "loss": 0.9148486852645874, "lr": 7.243545058559564e-07, "epoch": 1.2191244239631336, "percentage": 60.96, "elapsed_time": "7:18:14", "remaining_time": "4:40:42"} +{"current_steps": 5292, "total_steps": 8680, "loss": 0.8003618717193604, "lr": 7.239883015971439e-07, "epoch": 1.2193548387096773, "percentage": 60.97, "elapsed_time": "7:18:19", "remaining_time": "4:40:37"} +{"current_steps": 5293, "total_steps": 8680, "loss": 0.7290889024734497, "lr": 7.236221374010647e-07, "epoch": 1.2195852534562213, "percentage": 60.98, "elapsed_time": "7:18:25", "remaining_time": "4:40:32"} +{"current_steps": 5294, "total_steps": 8680, "loss": 0.5989147424697876, "lr": 7.232560133208663e-07, "epoch": 1.219815668202765, "percentage": 60.99, "elapsed_time": "7:18:30", "remaining_time": "4:40:28"} +{"current_steps": 5295, "total_steps": 8680, "loss": 0.8424522876739502, "lr": 7.228899294096907e-07, "epoch": 1.2200460829493087, "percentage": 61.0, "elapsed_time": "7:18:36", "remaining_time": "4:40:23"} +{"current_steps": 5296, "total_steps": 8680, "loss": 0.7753746509552002, "lr": 7.225238857206754e-07, "epoch": 1.2202764976958524, "percentage": 61.01, "elapsed_time": "7:18:40", "remaining_time": "4:40:18"} +{"current_steps": 5297, "total_steps": 8680, "loss": 0.693191647529602, "lr": 7.221578823069508e-07, "epoch": 1.2205069124423964, "percentage": 61.03, "elapsed_time": "7:18:46", "remaining_time": "4:40:13"} +{"current_steps": 5298, "total_steps": 8680, "loss": 0.7561964988708496, "lr": 7.217919192216417e-07, "epoch": 1.2207373271889401, "percentage": 61.04, "elapsed_time": "7:18:51", "remaining_time": "4:40:08"} +{"current_steps": 5299, "total_steps": 8680, "loss": 0.7721199989318848, "lr": 7.214259965178673e-07, "epoch": 1.2209677419354839, "percentage": 61.05, "elapsed_time": "7:18:59", "remaining_time": "4:40:05"} +{"current_steps": 5300, "total_steps": 8680, "loss": 0.8100659251213074, "lr": 7.210601142487407e-07, "epoch": 1.2211981566820276, "percentage": 61.06, "elapsed_time": "7:19:04", "remaining_time": "4:40:01"} +{"current_steps": 5301, "total_steps": 8680, "loss": 0.6753256916999817, "lr": 7.206942724673688e-07, "epoch": 1.2214285714285715, "percentage": 61.07, "elapsed_time": "7:19:11", "remaining_time": "4:39:56"} +{"current_steps": 5302, "total_steps": 8680, "loss": 0.7534425854682922, "lr": 7.20328471226854e-07, "epoch": 1.2216589861751153, "percentage": 61.08, "elapsed_time": "7:19:17", "remaining_time": "4:39:52"} +{"current_steps": 5303, "total_steps": 8680, "loss": 0.8275027275085449, "lr": 7.199627105802913e-07, "epoch": 1.221889400921659, "percentage": 61.09, "elapsed_time": "7:19:22", "remaining_time": "4:39:47"} +{"current_steps": 5304, "total_steps": 8680, "loss": 0.728579580783844, "lr": 7.195969905807702e-07, "epoch": 1.2221198156682027, "percentage": 61.11, "elapsed_time": "7:19:29", "remaining_time": "4:39:44"} +{"current_steps": 5305, "total_steps": 8680, "loss": 0.8221413493156433, "lr": 7.192313112813749e-07, "epoch": 1.2223502304147464, "percentage": 61.12, "elapsed_time": "7:19:34", "remaining_time": "4:39:39"} +{"current_steps": 5306, "total_steps": 8680, "loss": 0.7819123268127441, "lr": 7.188656727351832e-07, "epoch": 1.2225806451612904, "percentage": 61.13, "elapsed_time": "7:19:39", "remaining_time": "4:39:34"} +{"current_steps": 5307, "total_steps": 8680, "loss": 0.7474294900894165, "lr": 7.185000749952666e-07, "epoch": 1.2228110599078341, "percentage": 61.14, "elapsed_time": "7:19:43", "remaining_time": "4:39:28"} +{"current_steps": 5308, "total_steps": 8680, "loss": 0.8072259426116943, "lr": 7.181345181146919e-07, "epoch": 1.2230414746543778, "percentage": 61.15, "elapsed_time": "7:19:47", "remaining_time": "4:39:23"} +{"current_steps": 5309, "total_steps": 8680, "loss": 0.8718069791793823, "lr": 7.177690021465184e-07, "epoch": 1.2232718894009216, "percentage": 61.16, "elapsed_time": "7:19:51", "remaining_time": "4:39:17"} +{"current_steps": 5310, "total_steps": 8680, "loss": 0.8374875783920288, "lr": 7.174035271438006e-07, "epoch": 1.2235023041474655, "percentage": 61.18, "elapsed_time": "7:19:57", "remaining_time": "4:39:12"} +{"current_steps": 5311, "total_steps": 8680, "loss": 0.6669566631317139, "lr": 7.170380931595869e-07, "epoch": 1.2237327188940093, "percentage": 61.19, "elapsed_time": "7:20:01", "remaining_time": "4:39:07"} +{"current_steps": 5312, "total_steps": 8680, "loss": 0.8735665678977966, "lr": 7.16672700246919e-07, "epoch": 1.223963133640553, "percentage": 61.2, "elapsed_time": "7:20:05", "remaining_time": "4:39:02"} +{"current_steps": 5313, "total_steps": 8680, "loss": 0.8312361240386963, "lr": 7.16307348458834e-07, "epoch": 1.2241935483870967, "percentage": 61.21, "elapsed_time": "7:20:09", "remaining_time": "4:38:56"} +{"current_steps": 5314, "total_steps": 8680, "loss": 0.7927724123001099, "lr": 7.159420378483619e-07, "epoch": 1.2244239631336407, "percentage": 61.22, "elapsed_time": "7:20:13", "remaining_time": "4:38:50"} +{"current_steps": 5315, "total_steps": 8680, "loss": 0.7641698122024536, "lr": 7.155767684685264e-07, "epoch": 1.2246543778801844, "percentage": 61.23, "elapsed_time": "7:20:18", "remaining_time": "4:38:45"} +{"current_steps": 5316, "total_steps": 8680, "loss": 0.7490028142929077, "lr": 7.15211540372347e-07, "epoch": 1.2248847926267281, "percentage": 61.24, "elapsed_time": "7:20:24", "remaining_time": "4:38:41"} +{"current_steps": 5317, "total_steps": 8680, "loss": 0.7194815874099731, "lr": 7.148463536128354e-07, "epoch": 1.2251152073732718, "percentage": 61.26, "elapsed_time": "7:20:28", "remaining_time": "4:38:36"} +{"current_steps": 5318, "total_steps": 8680, "loss": 0.8328256607055664, "lr": 7.144812082429979e-07, "epoch": 1.2253456221198156, "percentage": 61.27, "elapsed_time": "7:20:33", "remaining_time": "4:38:31"} +{"current_steps": 5319, "total_steps": 8680, "loss": 0.9124876260757446, "lr": 7.141161043158352e-07, "epoch": 1.2255760368663595, "percentage": 61.28, "elapsed_time": "7:20:39", "remaining_time": "4:38:26"} +{"current_steps": 5320, "total_steps": 8680, "loss": 0.8183319568634033, "lr": 7.137510418843416e-07, "epoch": 1.2258064516129032, "percentage": 61.29, "elapsed_time": "7:20:43", "remaining_time": "4:38:20"} +{"current_steps": 5321, "total_steps": 8680, "loss": 0.8423885107040405, "lr": 7.133860210015048e-07, "epoch": 1.226036866359447, "percentage": 61.3, "elapsed_time": "7:20:48", "remaining_time": "4:38:16"} +{"current_steps": 5322, "total_steps": 8680, "loss": 0.8175387382507324, "lr": 7.130210417203082e-07, "epoch": 1.2262672811059907, "percentage": 61.31, "elapsed_time": "7:20:54", "remaining_time": "4:38:11"} +{"current_steps": 5323, "total_steps": 8680, "loss": 0.8415048718452454, "lr": 7.126561040937274e-07, "epoch": 1.2264976958525347, "percentage": 61.32, "elapsed_time": "7:20:59", "remaining_time": "4:38:07"} +{"current_steps": 5324, "total_steps": 8680, "loss": 0.6891156435012817, "lr": 7.122912081747321e-07, "epoch": 1.2267281105990784, "percentage": 61.34, "elapsed_time": "7:21:05", "remaining_time": "4:38:02"} +{"current_steps": 5325, "total_steps": 8680, "loss": 0.667617678642273, "lr": 7.119263540162876e-07, "epoch": 1.226958525345622, "percentage": 61.35, "elapsed_time": "7:21:10", "remaining_time": "4:37:57"} +{"current_steps": 5326, "total_steps": 8680, "loss": 0.7752082347869873, "lr": 7.115615416713517e-07, "epoch": 1.2271889400921658, "percentage": 61.36, "elapsed_time": "7:21:14", "remaining_time": "4:37:52"} +{"current_steps": 5327, "total_steps": 8680, "loss": 0.6582639813423157, "lr": 7.111967711928757e-07, "epoch": 1.2274193548387098, "percentage": 61.37, "elapsed_time": "7:21:20", "remaining_time": "4:37:47"} +{"current_steps": 5328, "total_steps": 8680, "loss": 0.6996462345123291, "lr": 7.108320426338063e-07, "epoch": 1.2276497695852535, "percentage": 61.38, "elapsed_time": "7:21:26", "remaining_time": "4:37:43"} +{"current_steps": 5329, "total_steps": 8680, "loss": 0.7132028341293335, "lr": 7.104673560470828e-07, "epoch": 1.2278801843317972, "percentage": 61.39, "elapsed_time": "7:21:31", "remaining_time": "4:37:38"} +{"current_steps": 5330, "total_steps": 8680, "loss": 0.7344096899032593, "lr": 7.101027114856395e-07, "epoch": 1.228110599078341, "percentage": 61.41, "elapsed_time": "7:21:35", "remaining_time": "4:37:33"} +{"current_steps": 5331, "total_steps": 8680, "loss": 0.7805585861206055, "lr": 7.097381090024039e-07, "epoch": 1.2283410138248847, "percentage": 61.42, "elapsed_time": "7:21:40", "remaining_time": "4:37:28"} +{"current_steps": 5332, "total_steps": 8680, "loss": 0.6785855889320374, "lr": 7.093735486502976e-07, "epoch": 1.2285714285714286, "percentage": 61.43, "elapsed_time": "7:21:45", "remaining_time": "4:37:23"} +{"current_steps": 5333, "total_steps": 8680, "loss": 0.7465041875839233, "lr": 7.090090304822355e-07, "epoch": 1.2288018433179724, "percentage": 61.44, "elapsed_time": "7:21:52", "remaining_time": "4:37:18"} +{"current_steps": 5334, "total_steps": 8680, "loss": 0.7400432825088501, "lr": 7.086445545511278e-07, "epoch": 1.229032258064516, "percentage": 61.45, "elapsed_time": "7:21:57", "remaining_time": "4:37:14"} +{"current_steps": 5335, "total_steps": 8680, "loss": 0.8567768335342407, "lr": 7.082801209098774e-07, "epoch": 1.2292626728110598, "percentage": 61.46, "elapsed_time": "7:22:02", "remaining_time": "4:37:09"} +{"current_steps": 5336, "total_steps": 8680, "loss": 0.7451025247573853, "lr": 7.079157296113807e-07, "epoch": 1.2294930875576038, "percentage": 61.47, "elapsed_time": "7:22:08", "remaining_time": "4:37:05"} +{"current_steps": 5337, "total_steps": 8680, "loss": 0.7178194522857666, "lr": 7.075513807085299e-07, "epoch": 1.2297235023041475, "percentage": 61.49, "elapsed_time": "7:22:13", "remaining_time": "4:37:00"} +{"current_steps": 5338, "total_steps": 8680, "loss": 0.7538058161735535, "lr": 7.071870742542086e-07, "epoch": 1.2299539170506912, "percentage": 61.5, "elapsed_time": "7:22:17", "remaining_time": "4:36:54"} +{"current_steps": 5339, "total_steps": 8680, "loss": 0.7853896021842957, "lr": 7.068228103012959e-07, "epoch": 1.230184331797235, "percentage": 61.51, "elapsed_time": "7:22:22", "remaining_time": "4:36:49"} +{"current_steps": 5340, "total_steps": 8680, "loss": 0.9359887838363647, "lr": 7.064585889026644e-07, "epoch": 1.230414746543779, "percentage": 61.52, "elapsed_time": "7:22:26", "remaining_time": "4:36:44"} +{"current_steps": 5341, "total_steps": 8680, "loss": 0.8590530753135681, "lr": 7.060944101111797e-07, "epoch": 1.2306451612903226, "percentage": 61.53, "elapsed_time": "7:22:31", "remaining_time": "4:36:38"} +{"current_steps": 5342, "total_steps": 8680, "loss": 0.7047204971313477, "lr": 7.057302739797025e-07, "epoch": 1.2308755760368664, "percentage": 61.54, "elapsed_time": "7:22:36", "remaining_time": "4:36:34"} +{"current_steps": 5343, "total_steps": 8680, "loss": 0.8826072216033936, "lr": 7.053661805610867e-07, "epoch": 1.23110599078341, "percentage": 61.56, "elapsed_time": "7:22:41", "remaining_time": "4:36:29"} +{"current_steps": 5344, "total_steps": 8680, "loss": 0.9394192695617676, "lr": 7.050021299081792e-07, "epoch": 1.2313364055299538, "percentage": 61.57, "elapsed_time": "7:22:46", "remaining_time": "4:36:24"} +{"current_steps": 5345, "total_steps": 8680, "loss": 0.7814885377883911, "lr": 7.046381220738224e-07, "epoch": 1.2315668202764978, "percentage": 61.58, "elapsed_time": "7:22:51", "remaining_time": "4:36:19"} +{"current_steps": 5346, "total_steps": 8680, "loss": 0.781699538230896, "lr": 7.042741571108512e-07, "epoch": 1.2317972350230415, "percentage": 61.59, "elapsed_time": "7:22:57", "remaining_time": "4:36:14"} +{"current_steps": 5347, "total_steps": 8680, "loss": 0.6554632186889648, "lr": 7.039102350720946e-07, "epoch": 1.2320276497695852, "percentage": 61.6, "elapsed_time": "7:23:02", "remaining_time": "4:36:09"} +{"current_steps": 5348, "total_steps": 8680, "loss": 0.6449903249740601, "lr": 7.035463560103753e-07, "epoch": 1.232258064516129, "percentage": 61.61, "elapsed_time": "7:23:07", "remaining_time": "4:36:04"} +{"current_steps": 5349, "total_steps": 8680, "loss": 0.8222958445549011, "lr": 7.031825199785101e-07, "epoch": 1.2324884792626727, "percentage": 61.62, "elapsed_time": "7:23:11", "remaining_time": "4:35:59"} +{"current_steps": 5350, "total_steps": 8680, "loss": 0.8315533399581909, "lr": 7.02818727029309e-07, "epoch": 1.2327188940092166, "percentage": 61.64, "elapsed_time": "7:23:16", "remaining_time": "4:35:54"} +{"current_steps": 5351, "total_steps": 8680, "loss": 0.8065732717514038, "lr": 7.024549772155764e-07, "epoch": 1.2329493087557604, "percentage": 61.65, "elapsed_time": "7:23:23", "remaining_time": "4:35:50"} +{"current_steps": 5352, "total_steps": 8680, "loss": 0.7607216835021973, "lr": 7.020912705901101e-07, "epoch": 1.233179723502304, "percentage": 61.66, "elapsed_time": "7:23:29", "remaining_time": "4:35:46"} +{"current_steps": 5353, "total_steps": 8680, "loss": 0.877311110496521, "lr": 7.01727607205701e-07, "epoch": 1.233410138248848, "percentage": 61.67, "elapsed_time": "7:23:33", "remaining_time": "4:35:40"} +{"current_steps": 5354, "total_steps": 8680, "loss": 0.7352526187896729, "lr": 7.013639871151354e-07, "epoch": 1.2336405529953918, "percentage": 61.68, "elapsed_time": "7:23:38", "remaining_time": "4:35:36"} +{"current_steps": 5355, "total_steps": 8680, "loss": 0.7676074504852295, "lr": 7.010004103711915e-07, "epoch": 1.2338709677419355, "percentage": 61.69, "elapsed_time": "7:23:44", "remaining_time": "4:35:31"} +{"current_steps": 5356, "total_steps": 8680, "loss": 0.7802003622055054, "lr": 7.00636877026642e-07, "epoch": 1.2341013824884792, "percentage": 61.71, "elapsed_time": "7:23:48", "remaining_time": "4:35:25"} +{"current_steps": 5357, "total_steps": 8680, "loss": 0.747033953666687, "lr": 7.002733871342537e-07, "epoch": 1.234331797235023, "percentage": 61.72, "elapsed_time": "7:23:53", "remaining_time": "4:35:20"} +{"current_steps": 5358, "total_steps": 8680, "loss": 0.8086956739425659, "lr": 6.999099407467865e-07, "epoch": 1.234562211981567, "percentage": 61.73, "elapsed_time": "7:23:57", "remaining_time": "4:35:15"} +{"current_steps": 5359, "total_steps": 8680, "loss": 0.9362099170684814, "lr": 6.995465379169941e-07, "epoch": 1.2347926267281106, "percentage": 61.74, "elapsed_time": "7:24:03", "remaining_time": "4:35:11"} +{"current_steps": 5360, "total_steps": 8680, "loss": 0.6784812211990356, "lr": 6.991831786976241e-07, "epoch": 1.2350230414746544, "percentage": 61.75, "elapsed_time": "7:24:07", "remaining_time": "4:35:05"} +{"current_steps": 5361, "total_steps": 8680, "loss": 0.7733708620071411, "lr": 6.988198631414171e-07, "epoch": 1.235253456221198, "percentage": 61.76, "elapsed_time": "7:24:11", "remaining_time": "4:35:00"} +{"current_steps": 5362, "total_steps": 8680, "loss": 0.8747115135192871, "lr": 6.984565913011087e-07, "epoch": 1.2354838709677418, "percentage": 61.77, "elapsed_time": "7:24:16", "remaining_time": "4:34:55"} +{"current_steps": 5363, "total_steps": 8680, "loss": 0.6947430372238159, "lr": 6.980933632294268e-07, "epoch": 1.2357142857142858, "percentage": 61.79, "elapsed_time": "7:24:22", "remaining_time": "4:34:50"} +{"current_steps": 5364, "total_steps": 8680, "loss": 0.7128404378890991, "lr": 6.97730178979093e-07, "epoch": 1.2359447004608295, "percentage": 61.8, "elapsed_time": "7:24:28", "remaining_time": "4:34:46"} +{"current_steps": 5365, "total_steps": 8680, "loss": 0.7190830707550049, "lr": 6.973670386028242e-07, "epoch": 1.2361751152073732, "percentage": 61.81, "elapsed_time": "7:24:33", "remaining_time": "4:34:41"} +{"current_steps": 5366, "total_steps": 8680, "loss": 0.7625770568847656, "lr": 6.970039421533291e-07, "epoch": 1.2364055299539172, "percentage": 61.82, "elapsed_time": "7:24:38", "remaining_time": "4:34:36"} +{"current_steps": 5367, "total_steps": 8680, "loss": 0.7942707538604736, "lr": 6.966408896833104e-07, "epoch": 1.236635944700461, "percentage": 61.83, "elapsed_time": "7:24:43", "remaining_time": "4:34:31"} +{"current_steps": 5368, "total_steps": 8680, "loss": 0.8329455852508545, "lr": 6.962778812454652e-07, "epoch": 1.2368663594470046, "percentage": 61.84, "elapsed_time": "7:24:48", "remaining_time": "4:34:26"} +{"current_steps": 5369, "total_steps": 8680, "loss": 0.6034290790557861, "lr": 6.959149168924833e-07, "epoch": 1.2370967741935484, "percentage": 61.85, "elapsed_time": "7:24:54", "remaining_time": "4:34:21"} +{"current_steps": 5370, "total_steps": 8680, "loss": 0.8424680233001709, "lr": 6.955519966770486e-07, "epoch": 1.237327188940092, "percentage": 61.87, "elapsed_time": "7:24:58", "remaining_time": "4:34:16"} +{"current_steps": 5371, "total_steps": 8680, "loss": 0.8670322895050049, "lr": 6.951891206518388e-07, "epoch": 1.237557603686636, "percentage": 61.88, "elapsed_time": "7:25:03", "remaining_time": "4:34:11"} +{"current_steps": 5372, "total_steps": 8680, "loss": 0.7283621430397034, "lr": 6.948262888695244e-07, "epoch": 1.2377880184331798, "percentage": 61.89, "elapsed_time": "7:25:08", "remaining_time": "4:34:06"} +{"current_steps": 5373, "total_steps": 8680, "loss": 0.7990118265151978, "lr": 6.9446350138277e-07, "epoch": 1.2380184331797235, "percentage": 61.9, "elapsed_time": "7:25:13", "remaining_time": "4:34:01"} +{"current_steps": 5374, "total_steps": 8680, "loss": 0.945558488368988, "lr": 6.941007582442342e-07, "epoch": 1.2382488479262672, "percentage": 61.91, "elapsed_time": "7:25:17", "remaining_time": "4:33:55"} +{"current_steps": 5375, "total_steps": 8680, "loss": 0.6905936002731323, "lr": 6.937380595065685e-07, "epoch": 1.238479262672811, "percentage": 61.92, "elapsed_time": "7:25:23", "remaining_time": "4:33:51"} +{"current_steps": 5376, "total_steps": 8680, "loss": 0.7757662534713745, "lr": 6.933754052224176e-07, "epoch": 1.238709677419355, "percentage": 61.94, "elapsed_time": "7:25:29", "remaining_time": "4:33:47"} +{"current_steps": 5377, "total_steps": 8680, "loss": 0.63062584400177, "lr": 6.930127954444209e-07, "epoch": 1.2389400921658986, "percentage": 61.95, "elapsed_time": "7:25:36", "remaining_time": "4:33:43"} +{"current_steps": 5378, "total_steps": 8680, "loss": 0.7341021299362183, "lr": 6.926502302252109e-07, "epoch": 1.2391705069124423, "percentage": 61.96, "elapsed_time": "7:25:41", "remaining_time": "4:33:38"} +{"current_steps": 5379, "total_steps": 8680, "loss": 0.572767972946167, "lr": 6.922877096174127e-07, "epoch": 1.2394009216589863, "percentage": 61.97, "elapsed_time": "7:25:48", "remaining_time": "4:33:34"} +{"current_steps": 5380, "total_steps": 8680, "loss": 0.630276083946228, "lr": 6.919252336736463e-07, "epoch": 1.23963133640553, "percentage": 61.98, "elapsed_time": "7:25:54", "remaining_time": "4:33:30"} +{"current_steps": 5381, "total_steps": 8680, "loss": 0.668334424495697, "lr": 6.915628024465244e-07, "epoch": 1.2398617511520738, "percentage": 61.99, "elapsed_time": "7:25:59", "remaining_time": "4:33:25"} +{"current_steps": 5382, "total_steps": 8680, "loss": 0.6766513586044312, "lr": 6.912004159886529e-07, "epoch": 1.2400921658986175, "percentage": 62.0, "elapsed_time": "7:26:04", "remaining_time": "4:33:20"} +{"current_steps": 5383, "total_steps": 8680, "loss": 0.7016473412513733, "lr": 6.908380743526328e-07, "epoch": 1.2403225806451612, "percentage": 62.02, "elapsed_time": "7:26:10", "remaining_time": "4:33:16"} +{"current_steps": 5384, "total_steps": 8680, "loss": 0.8837979435920715, "lr": 6.904757775910568e-07, "epoch": 1.2405529953917052, "percentage": 62.03, "elapsed_time": "7:26:16", "remaining_time": "4:33:12"} +{"current_steps": 5385, "total_steps": 8680, "loss": 0.7187714576721191, "lr": 6.901135257565116e-07, "epoch": 1.2407834101382489, "percentage": 62.04, "elapsed_time": "7:26:22", "remaining_time": "4:33:07"} +{"current_steps": 5386, "total_steps": 8680, "loss": 0.8227157592773438, "lr": 6.897513189015782e-07, "epoch": 1.2410138248847926, "percentage": 62.05, "elapsed_time": "7:26:27", "remaining_time": "4:33:03"} +{"current_steps": 5387, "total_steps": 8680, "loss": 0.8812209367752075, "lr": 6.893891570788301e-07, "epoch": 1.2412442396313363, "percentage": 62.06, "elapsed_time": "7:26:32", "remaining_time": "4:32:57"} +{"current_steps": 5388, "total_steps": 8680, "loss": 0.6702297925949097, "lr": 6.890270403408348e-07, "epoch": 1.24147465437788, "percentage": 62.07, "elapsed_time": "7:26:39", "remaining_time": "4:32:53"} +{"current_steps": 5389, "total_steps": 8680, "loss": 0.646358847618103, "lr": 6.886649687401529e-07, "epoch": 1.241705069124424, "percentage": 62.09, "elapsed_time": "7:26:44", "remaining_time": "4:32:48"} +{"current_steps": 5390, "total_steps": 8680, "loss": 0.6514080762863159, "lr": 6.883029423293383e-07, "epoch": 1.2419354838709677, "percentage": 62.1, "elapsed_time": "7:26:48", "remaining_time": "4:32:43"} +{"current_steps": 5391, "total_steps": 8680, "loss": 0.6938437819480896, "lr": 6.879409611609393e-07, "epoch": 1.2421658986175115, "percentage": 62.11, "elapsed_time": "7:26:53", "remaining_time": "4:32:38"} +{"current_steps": 5392, "total_steps": 8680, "loss": 0.8601399064064026, "lr": 6.875790252874967e-07, "epoch": 1.2423963133640552, "percentage": 62.12, "elapsed_time": "7:26:59", "remaining_time": "4:32:34"} +{"current_steps": 5393, "total_steps": 8680, "loss": 0.6641080379486084, "lr": 6.872171347615445e-07, "epoch": 1.2426267281105992, "percentage": 62.13, "elapsed_time": "7:27:04", "remaining_time": "4:32:29"} +{"current_steps": 5394, "total_steps": 8680, "loss": 0.7109012603759766, "lr": 6.868552896356117e-07, "epoch": 1.2428571428571429, "percentage": 62.14, "elapsed_time": "7:27:10", "remaining_time": "4:32:24"} +{"current_steps": 5395, "total_steps": 8680, "loss": 0.8558728694915771, "lr": 6.864934899622191e-07, "epoch": 1.2430875576036866, "percentage": 62.15, "elapsed_time": "7:27:14", "remaining_time": "4:32:19"} +{"current_steps": 5396, "total_steps": 8680, "loss": 0.6119382977485657, "lr": 6.861317357938807e-07, "epoch": 1.2433179723502303, "percentage": 62.17, "elapsed_time": "7:27:20", "remaining_time": "4:32:15"} +{"current_steps": 5397, "total_steps": 8680, "loss": 0.7527587413787842, "lr": 6.857700271831059e-07, "epoch": 1.2435483870967743, "percentage": 62.18, "elapsed_time": "7:27:26", "remaining_time": "4:32:10"} +{"current_steps": 5398, "total_steps": 8680, "loss": 0.8082761168479919, "lr": 6.854083641823957e-07, "epoch": 1.243778801843318, "percentage": 62.19, "elapsed_time": "7:27:33", "remaining_time": "4:32:07"} +{"current_steps": 5399, "total_steps": 8680, "loss": 0.7289307117462158, "lr": 6.850467468442447e-07, "epoch": 1.2440092165898617, "percentage": 62.2, "elapsed_time": "7:27:37", "remaining_time": "4:32:01"} +{"current_steps": 5400, "total_steps": 8680, "loss": 0.8824148178100586, "lr": 6.846851752211418e-07, "epoch": 1.2442396313364055, "percentage": 62.21, "elapsed_time": "7:27:43", "remaining_time": "4:31:57"} +{"current_steps": 5401, "total_steps": 8680, "loss": 0.7046724557876587, "lr": 6.843236493655682e-07, "epoch": 1.2444700460829492, "percentage": 62.22, "elapsed_time": "7:27:50", "remaining_time": "4:31:53"} +{"current_steps": 5402, "total_steps": 8680, "loss": 0.8192921876907349, "lr": 6.839621693299987e-07, "epoch": 1.2447004608294931, "percentage": 62.24, "elapsed_time": "7:27:55", "remaining_time": "4:31:48"} +{"current_steps": 5403, "total_steps": 8680, "loss": 0.7651070356369019, "lr": 6.83600735166902e-07, "epoch": 1.2449308755760369, "percentage": 62.25, "elapsed_time": "7:28:02", "remaining_time": "4:31:44"} +{"current_steps": 5404, "total_steps": 8680, "loss": 0.7689340114593506, "lr": 6.832393469287401e-07, "epoch": 1.2451612903225806, "percentage": 62.26, "elapsed_time": "7:28:07", "remaining_time": "4:31:39"} +{"current_steps": 5405, "total_steps": 8680, "loss": 0.9214832782745361, "lr": 6.828780046679671e-07, "epoch": 1.2453917050691243, "percentage": 62.27, "elapsed_time": "7:28:14", "remaining_time": "4:31:35"} +{"current_steps": 5406, "total_steps": 8680, "loss": 0.7210682034492493, "lr": 6.825167084370322e-07, "epoch": 1.2456221198156683, "percentage": 62.28, "elapsed_time": "7:28:21", "remaining_time": "4:31:32"} +{"current_steps": 5407, "total_steps": 8680, "loss": 0.871317446231842, "lr": 6.82155458288377e-07, "epoch": 1.245852534562212, "percentage": 62.29, "elapsed_time": "7:28:25", "remaining_time": "4:31:26"} +{"current_steps": 5408, "total_steps": 8680, "loss": 0.7669065594673157, "lr": 6.817942542744359e-07, "epoch": 1.2460829493087557, "percentage": 62.3, "elapsed_time": "7:28:29", "remaining_time": "4:31:20"} +{"current_steps": 5409, "total_steps": 8680, "loss": 0.7317448854446411, "lr": 6.814330964476379e-07, "epoch": 1.2463133640552995, "percentage": 62.32, "elapsed_time": "7:28:34", "remaining_time": "4:31:15"} +{"current_steps": 5410, "total_steps": 8680, "loss": 0.7873220443725586, "lr": 6.810719848604036e-07, "epoch": 1.2465437788018434, "percentage": 62.33, "elapsed_time": "7:28:40", "remaining_time": "4:31:11"} +{"current_steps": 5411, "total_steps": 8680, "loss": 0.713294267654419, "lr": 6.807109195651492e-07, "epoch": 1.2467741935483871, "percentage": 62.34, "elapsed_time": "7:28:44", "remaining_time": "4:31:06"} +{"current_steps": 5412, "total_steps": 8680, "loss": 0.7592979669570923, "lr": 6.803499006142819e-07, "epoch": 1.2470046082949309, "percentage": 62.35, "elapsed_time": "7:28:48", "remaining_time": "4:31:00"} +{"current_steps": 5413, "total_steps": 8680, "loss": 0.7805737257003784, "lr": 6.79988928060203e-07, "epoch": 1.2472350230414746, "percentage": 62.36, "elapsed_time": "7:28:54", "remaining_time": "4:30:55"} +{"current_steps": 5414, "total_steps": 8680, "loss": 0.7706440687179565, "lr": 6.79628001955308e-07, "epoch": 1.2474654377880183, "percentage": 62.37, "elapsed_time": "7:28:59", "remaining_time": "4:30:51"} +{"current_steps": 5415, "total_steps": 8680, "loss": 0.772534966468811, "lr": 6.792671223519844e-07, "epoch": 1.2476958525345623, "percentage": 62.38, "elapsed_time": "7:29:03", "remaining_time": "4:30:45"} +{"current_steps": 5416, "total_steps": 8680, "loss": 0.7939096093177795, "lr": 6.789062893026129e-07, "epoch": 1.247926267281106, "percentage": 62.4, "elapsed_time": "7:29:08", "remaining_time": "4:30:40"} +{"current_steps": 5417, "total_steps": 8680, "loss": 0.7062902450561523, "lr": 6.78545502859569e-07, "epoch": 1.2481566820276497, "percentage": 62.41, "elapsed_time": "7:29:13", "remaining_time": "4:30:35"} +{"current_steps": 5418, "total_steps": 8680, "loss": 0.8296496868133545, "lr": 6.781847630752197e-07, "epoch": 1.2483870967741935, "percentage": 62.42, "elapsed_time": "7:29:18", "remaining_time": "4:30:30"} +{"current_steps": 5419, "total_steps": 8680, "loss": 0.926125168800354, "lr": 6.778240700019258e-07, "epoch": 1.2486175115207374, "percentage": 62.43, "elapsed_time": "7:29:23", "remaining_time": "4:30:25"} +{"current_steps": 5420, "total_steps": 8680, "loss": 0.7301739454269409, "lr": 6.774634236920419e-07, "epoch": 1.2488479262672811, "percentage": 62.44, "elapsed_time": "7:29:28", "remaining_time": "4:30:20"} +{"current_steps": 5421, "total_steps": 8680, "loss": 0.7313426733016968, "lr": 6.771028241979151e-07, "epoch": 1.2490783410138249, "percentage": 62.45, "elapsed_time": "7:29:32", "remaining_time": "4:30:15"} +{"current_steps": 5422, "total_steps": 8680, "loss": 0.7193025946617126, "lr": 6.767422715718853e-07, "epoch": 1.2493087557603686, "percentage": 62.47, "elapsed_time": "7:29:37", "remaining_time": "4:30:10"} +{"current_steps": 5423, "total_steps": 8680, "loss": 0.6544638872146606, "lr": 6.763817658662874e-07, "epoch": 1.2495391705069125, "percentage": 62.48, "elapsed_time": "7:29:42", "remaining_time": "4:30:05"} +{"current_steps": 5424, "total_steps": 8680, "loss": 0.8402822613716125, "lr": 6.760213071334478e-07, "epoch": 1.2497695852534563, "percentage": 62.49, "elapsed_time": "7:29:47", "remaining_time": "4:30:00"} +{"current_steps": 5425, "total_steps": 8680, "loss": 0.6840100288391113, "lr": 6.756608954256861e-07, "epoch": 1.25, "percentage": 62.5, "elapsed_time": "7:29:53", "remaining_time": "4:29:55"} +{"current_steps": 5426, "total_steps": 8680, "loss": 0.7315107583999634, "lr": 6.753005307953165e-07, "epoch": 1.2502304147465437, "percentage": 62.51, "elapsed_time": "7:29:57", "remaining_time": "4:29:50"} +{"current_steps": 5427, "total_steps": 8680, "loss": 0.6369785070419312, "lr": 6.74940213294645e-07, "epoch": 1.2504608294930875, "percentage": 62.52, "elapsed_time": "7:30:01", "remaining_time": "4:29:45"} +{"current_steps": 5428, "total_steps": 8680, "loss": 0.7700424790382385, "lr": 6.745799429759711e-07, "epoch": 1.2506912442396314, "percentage": 62.53, "elapsed_time": "7:30:05", "remaining_time": "4:29:39"} +{"current_steps": 5429, "total_steps": 8680, "loss": 0.7436221241950989, "lr": 6.742197198915877e-07, "epoch": 1.2509216589861751, "percentage": 62.55, "elapsed_time": "7:30:10", "remaining_time": "4:29:34"} +{"current_steps": 5430, "total_steps": 8680, "loss": 0.8028342723846436, "lr": 6.738595440937809e-07, "epoch": 1.2511520737327189, "percentage": 62.56, "elapsed_time": "7:30:15", "remaining_time": "4:29:29"} +{"current_steps": 5431, "total_steps": 8680, "loss": 0.7705515623092651, "lr": 6.734994156348288e-07, "epoch": 1.2513824884792628, "percentage": 62.57, "elapsed_time": "7:30:20", "remaining_time": "4:29:24"} +{"current_steps": 5432, "total_steps": 8680, "loss": 0.7110899686813354, "lr": 6.73139334567005e-07, "epoch": 1.2516129032258063, "percentage": 62.58, "elapsed_time": "7:30:25", "remaining_time": "4:29:19"} +{"current_steps": 5433, "total_steps": 8680, "loss": 0.7495337128639221, "lr": 6.727793009425739e-07, "epoch": 1.2518433179723503, "percentage": 62.59, "elapsed_time": "7:30:29", "remaining_time": "4:29:14"} +{"current_steps": 5434, "total_steps": 8680, "loss": 0.7735337018966675, "lr": 6.724193148137938e-07, "epoch": 1.252073732718894, "percentage": 62.6, "elapsed_time": "7:30:35", "remaining_time": "4:29:09"} +{"current_steps": 5435, "total_steps": 8680, "loss": 0.8655617237091064, "lr": 6.720593762329167e-07, "epoch": 1.2523041474654377, "percentage": 62.62, "elapsed_time": "7:30:39", "remaining_time": "4:29:04"} +{"current_steps": 5436, "total_steps": 8680, "loss": 0.7989616394042969, "lr": 6.716994852521871e-07, "epoch": 1.2525345622119817, "percentage": 62.63, "elapsed_time": "7:30:44", "remaining_time": "4:28:58"} +{"current_steps": 5437, "total_steps": 8680, "loss": 0.8090296983718872, "lr": 6.713396419238424e-07, "epoch": 1.2527649769585254, "percentage": 62.64, "elapsed_time": "7:30:48", "remaining_time": "4:28:53"} +{"current_steps": 5438, "total_steps": 8680, "loss": 0.7150726318359375, "lr": 6.709798463001138e-07, "epoch": 1.2529953917050691, "percentage": 62.65, "elapsed_time": "7:30:53", "remaining_time": "4:28:48"} +{"current_steps": 5439, "total_steps": 8680, "loss": 0.7136287689208984, "lr": 6.706200984332249e-07, "epoch": 1.2532258064516129, "percentage": 62.66, "elapsed_time": "7:30:58", "remaining_time": "4:28:43"} +{"current_steps": 5440, "total_steps": 8680, "loss": 0.8538687229156494, "lr": 6.702603983753927e-07, "epoch": 1.2534562211981566, "percentage": 62.67, "elapsed_time": "7:31:04", "remaining_time": "4:28:39"} +{"current_steps": 5441, "total_steps": 8680, "loss": 0.7960666418075562, "lr": 6.699007461788272e-07, "epoch": 1.2536866359447005, "percentage": 62.68, "elapsed_time": "7:31:10", "remaining_time": "4:28:34"} +{"current_steps": 5442, "total_steps": 8680, "loss": 0.7462595701217651, "lr": 6.695411418957309e-07, "epoch": 1.2539170506912443, "percentage": 62.7, "elapsed_time": "7:31:14", "remaining_time": "4:28:29"} +{"current_steps": 5443, "total_steps": 8680, "loss": 0.795913577079773, "lr": 6.691815855783009e-07, "epoch": 1.254147465437788, "percentage": 62.71, "elapsed_time": "7:31:18", "remaining_time": "4:28:24"} +{"current_steps": 5444, "total_steps": 8680, "loss": 0.7589330077171326, "lr": 6.688220772787258e-07, "epoch": 1.2543778801843317, "percentage": 62.72, "elapsed_time": "7:31:24", "remaining_time": "4:28:19"} +{"current_steps": 5445, "total_steps": 8680, "loss": 0.7719615697860718, "lr": 6.684626170491874e-07, "epoch": 1.2546082949308754, "percentage": 62.73, "elapsed_time": "7:31:30", "remaining_time": "4:28:15"} +{"current_steps": 5446, "total_steps": 8680, "loss": 0.8516664505004883, "lr": 6.681032049418616e-07, "epoch": 1.2548387096774194, "percentage": 62.74, "elapsed_time": "7:31:36", "remaining_time": "4:28:10"} +{"current_steps": 5447, "total_steps": 8680, "loss": 0.8597210049629211, "lr": 6.677438410089163e-07, "epoch": 1.2550691244239631, "percentage": 62.75, "elapsed_time": "7:31:39", "remaining_time": "4:28:04"} +{"current_steps": 5448, "total_steps": 8680, "loss": 0.7101171016693115, "lr": 6.673845253025124e-07, "epoch": 1.2552995391705069, "percentage": 62.76, "elapsed_time": "7:31:45", "remaining_time": "4:28:00"} +{"current_steps": 5449, "total_steps": 8680, "loss": 0.6946178078651428, "lr": 6.670252578748044e-07, "epoch": 1.2555299539170508, "percentage": 62.78, "elapsed_time": "7:31:51", "remaining_time": "4:27:55"} +{"current_steps": 5450, "total_steps": 8680, "loss": 0.9912126660346985, "lr": 6.666660387779395e-07, "epoch": 1.2557603686635945, "percentage": 62.79, "elapsed_time": "7:31:54", "remaining_time": "4:27:49"} +{"current_steps": 5451, "total_steps": 8680, "loss": 0.6495379209518433, "lr": 6.663068680640573e-07, "epoch": 1.2559907834101383, "percentage": 62.8, "elapsed_time": "7:31:59", "remaining_time": "4:27:44"} +{"current_steps": 5452, "total_steps": 8680, "loss": 0.6276426315307617, "lr": 6.65947745785292e-07, "epoch": 1.256221198156682, "percentage": 62.81, "elapsed_time": "7:32:05", "remaining_time": "4:27:40"} +{"current_steps": 5453, "total_steps": 8680, "loss": 0.7273461818695068, "lr": 6.655886719937691e-07, "epoch": 1.2564516129032257, "percentage": 62.82, "elapsed_time": "7:32:10", "remaining_time": "4:27:35"} +{"current_steps": 5454, "total_steps": 8680, "loss": 0.8248249292373657, "lr": 6.652296467416073e-07, "epoch": 1.2566820276497697, "percentage": 62.83, "elapsed_time": "7:32:14", "remaining_time": "4:27:29"} +{"current_steps": 5455, "total_steps": 8680, "loss": 0.8709753751754761, "lr": 6.648706700809196e-07, "epoch": 1.2569124423963134, "percentage": 62.85, "elapsed_time": "7:32:19", "remaining_time": "4:27:24"} +{"current_steps": 5456, "total_steps": 8680, "loss": 0.8207283020019531, "lr": 6.645117420638105e-07, "epoch": 1.2571428571428571, "percentage": 62.86, "elapsed_time": "7:32:23", "remaining_time": "4:27:19"} +{"current_steps": 5457, "total_steps": 8680, "loss": 0.8222801685333252, "lr": 6.641528627423774e-07, "epoch": 1.2573732718894008, "percentage": 62.87, "elapsed_time": "7:32:28", "remaining_time": "4:27:14"} +{"current_steps": 5458, "total_steps": 8680, "loss": 0.7684904336929321, "lr": 6.637940321687121e-07, "epoch": 1.2576036866359446, "percentage": 62.88, "elapsed_time": "7:32:33", "remaining_time": "4:27:09"} +{"current_steps": 5459, "total_steps": 8680, "loss": 0.7930517196655273, "lr": 6.634352503948979e-07, "epoch": 1.2578341013824885, "percentage": 62.89, "elapsed_time": "7:32:37", "remaining_time": "4:27:03"} +{"current_steps": 5460, "total_steps": 8680, "loss": 0.7414563298225403, "lr": 6.630765174730116e-07, "epoch": 1.2580645161290323, "percentage": 62.9, "elapsed_time": "7:32:43", "remaining_time": "4:26:59"} +{"current_steps": 5461, "total_steps": 8680, "loss": 0.7959232926368713, "lr": 6.627178334551227e-07, "epoch": 1.258294930875576, "percentage": 62.91, "elapsed_time": "7:32:47", "remaining_time": "4:26:54"} +{"current_steps": 5462, "total_steps": 8680, "loss": 0.6722866296768188, "lr": 6.623591983932935e-07, "epoch": 1.25852534562212, "percentage": 62.93, "elapsed_time": "7:32:53", "remaining_time": "4:26:49"} +{"current_steps": 5463, "total_steps": 8680, "loss": 0.7688727378845215, "lr": 6.620006123395799e-07, "epoch": 1.2587557603686637, "percentage": 62.94, "elapsed_time": "7:32:59", "remaining_time": "4:26:45"} +{"current_steps": 5464, "total_steps": 8680, "loss": 0.7543724179267883, "lr": 6.616420753460301e-07, "epoch": 1.2589861751152074, "percentage": 62.95, "elapsed_time": "7:33:04", "remaining_time": "4:26:40"} +{"current_steps": 5465, "total_steps": 8680, "loss": 0.7097430229187012, "lr": 6.612835874646847e-07, "epoch": 1.2592165898617511, "percentage": 62.96, "elapsed_time": "7:33:09", "remaining_time": "4:26:34"} +{"current_steps": 5466, "total_steps": 8680, "loss": 0.8640443682670593, "lr": 6.609251487475786e-07, "epoch": 1.2594470046082948, "percentage": 62.97, "elapsed_time": "7:33:15", "remaining_time": "4:26:30"} +{"current_steps": 5467, "total_steps": 8680, "loss": 0.7872523069381714, "lr": 6.605667592467384e-07, "epoch": 1.2596774193548388, "percentage": 62.98, "elapsed_time": "7:33:19", "remaining_time": "4:26:25"} +{"current_steps": 5468, "total_steps": 8680, "loss": 0.8647557497024536, "lr": 6.602084190141835e-07, "epoch": 1.2599078341013825, "percentage": 63.0, "elapsed_time": "7:33:23", "remaining_time": "4:26:19"} +{"current_steps": 5469, "total_steps": 8680, "loss": 0.7323553562164307, "lr": 6.598501281019268e-07, "epoch": 1.2601382488479262, "percentage": 63.01, "elapsed_time": "7:33:29", "remaining_time": "4:26:15"} +{"current_steps": 5470, "total_steps": 8680, "loss": 0.8214852809906006, "lr": 6.594918865619739e-07, "epoch": 1.26036866359447, "percentage": 63.02, "elapsed_time": "7:33:33", "remaining_time": "4:26:10"} +{"current_steps": 5471, "total_steps": 8680, "loss": 0.8011265397071838, "lr": 6.591336944463223e-07, "epoch": 1.2605990783410137, "percentage": 63.03, "elapsed_time": "7:33:38", "remaining_time": "4:26:04"} +{"current_steps": 5472, "total_steps": 8680, "loss": 0.798862636089325, "lr": 6.587755518069642e-07, "epoch": 1.2608294930875577, "percentage": 63.04, "elapsed_time": "7:33:43", "remaining_time": "4:25:59"} +{"current_steps": 5473, "total_steps": 8680, "loss": 0.7231202721595764, "lr": 6.58417458695883e-07, "epoch": 1.2610599078341014, "percentage": 63.05, "elapsed_time": "7:33:47", "remaining_time": "4:25:54"} +{"current_steps": 5474, "total_steps": 8680, "loss": 0.8816685676574707, "lr": 6.580594151650551e-07, "epoch": 1.261290322580645, "percentage": 63.06, "elapsed_time": "7:33:51", "remaining_time": "4:25:49"} +{"current_steps": 5475, "total_steps": 8680, "loss": 0.6343427300453186, "lr": 6.577014212664509e-07, "epoch": 1.261520737327189, "percentage": 63.08, "elapsed_time": "7:33:57", "remaining_time": "4:25:44"} +{"current_steps": 5476, "total_steps": 8680, "loss": 0.7785895466804504, "lr": 6.573434770520321e-07, "epoch": 1.2617511520737328, "percentage": 63.09, "elapsed_time": "7:34:01", "remaining_time": "4:25:39"} +{"current_steps": 5477, "total_steps": 8680, "loss": 0.7408698797225952, "lr": 6.569855825737536e-07, "epoch": 1.2619815668202765, "percentage": 63.1, "elapsed_time": "7:34:06", "remaining_time": "4:25:34"} +{"current_steps": 5478, "total_steps": 8680, "loss": 0.8481286764144897, "lr": 6.566277378835643e-07, "epoch": 1.2622119815668202, "percentage": 63.11, "elapsed_time": "7:34:11", "remaining_time": "4:25:29"} +{"current_steps": 5479, "total_steps": 8680, "loss": 0.8221831917762756, "lr": 6.56269943033404e-07, "epoch": 1.262442396313364, "percentage": 63.12, "elapsed_time": "7:34:18", "remaining_time": "4:25:25"} +{"current_steps": 5480, "total_steps": 8680, "loss": 0.805405855178833, "lr": 6.559121980752065e-07, "epoch": 1.262672811059908, "percentage": 63.13, "elapsed_time": "7:34:23", "remaining_time": "4:25:20"} +{"current_steps": 5481, "total_steps": 8680, "loss": 0.8643565773963928, "lr": 6.55554503060898e-07, "epoch": 1.2629032258064516, "percentage": 63.15, "elapsed_time": "7:34:29", "remaining_time": "4:25:16"} +{"current_steps": 5482, "total_steps": 8680, "loss": 0.7087225914001465, "lr": 6.551968580423973e-07, "epoch": 1.2631336405529954, "percentage": 63.16, "elapsed_time": "7:34:36", "remaining_time": "4:25:12"} +{"current_steps": 5483, "total_steps": 8680, "loss": 0.8401756882667542, "lr": 6.54839263071616e-07, "epoch": 1.263364055299539, "percentage": 63.17, "elapsed_time": "7:34:41", "remaining_time": "4:25:07"} +{"current_steps": 5484, "total_steps": 8680, "loss": 0.76345294713974, "lr": 6.544817182004589e-07, "epoch": 1.2635944700460828, "percentage": 63.18, "elapsed_time": "7:34:47", "remaining_time": "4:25:02"} +{"current_steps": 5485, "total_steps": 8680, "loss": 0.7177271842956543, "lr": 6.541242234808228e-07, "epoch": 1.2638248847926268, "percentage": 63.19, "elapsed_time": "7:34:52", "remaining_time": "4:24:57"} +{"current_steps": 5486, "total_steps": 8680, "loss": 0.7436186075210571, "lr": 6.537667789645981e-07, "epoch": 1.2640552995391705, "percentage": 63.2, "elapsed_time": "7:34:56", "remaining_time": "4:24:52"} +{"current_steps": 5487, "total_steps": 8680, "loss": 0.6526673436164856, "lr": 6.53409384703667e-07, "epoch": 1.2642857142857142, "percentage": 63.21, "elapsed_time": "7:35:02", "remaining_time": "4:24:47"} +{"current_steps": 5488, "total_steps": 8680, "loss": 0.879219651222229, "lr": 6.530520407499049e-07, "epoch": 1.2645161290322582, "percentage": 63.23, "elapsed_time": "7:35:06", "remaining_time": "4:24:42"} +{"current_steps": 5489, "total_steps": 8680, "loss": 0.7005003690719604, "lr": 6.526947471551798e-07, "epoch": 1.264746543778802, "percentage": 63.24, "elapsed_time": "7:35:11", "remaining_time": "4:24:37"} +{"current_steps": 5490, "total_steps": 8680, "loss": 0.716349720954895, "lr": 6.523375039713525e-07, "epoch": 1.2649769585253456, "percentage": 63.25, "elapsed_time": "7:35:16", "remaining_time": "4:24:32"} +{"current_steps": 5491, "total_steps": 8680, "loss": 0.8524413704872131, "lr": 6.519803112502758e-07, "epoch": 1.2652073732718894, "percentage": 63.26, "elapsed_time": "7:35:21", "remaining_time": "4:24:27"} +{"current_steps": 5492, "total_steps": 8680, "loss": 0.8032857179641724, "lr": 6.516231690437966e-07, "epoch": 1.265437788018433, "percentage": 63.27, "elapsed_time": "7:35:26", "remaining_time": "4:24:22"} +{"current_steps": 5493, "total_steps": 8680, "loss": 0.8912144899368286, "lr": 6.512660774037531e-07, "epoch": 1.265668202764977, "percentage": 63.28, "elapsed_time": "7:35:30", "remaining_time": "4:24:16"} +{"current_steps": 5494, "total_steps": 8680, "loss": 0.6526974439620972, "lr": 6.509090363819764e-07, "epoch": 1.2658986175115208, "percentage": 63.29, "elapsed_time": "7:35:36", "remaining_time": "4:24:12"} +{"current_steps": 5495, "total_steps": 8680, "loss": 0.7436610460281372, "lr": 6.505520460302916e-07, "epoch": 1.2661290322580645, "percentage": 63.31, "elapsed_time": "7:35:41", "remaining_time": "4:24:07"} +{"current_steps": 5496, "total_steps": 8680, "loss": 0.7112951874732971, "lr": 6.501951064005145e-07, "epoch": 1.2663594470046082, "percentage": 63.32, "elapsed_time": "7:35:47", "remaining_time": "4:24:03"} +{"current_steps": 5497, "total_steps": 8680, "loss": 0.6908622980117798, "lr": 6.498382175444545e-07, "epoch": 1.266589861751152, "percentage": 63.33, "elapsed_time": "7:35:53", "remaining_time": "4:23:58"} +{"current_steps": 5498, "total_steps": 8680, "loss": 0.8169400691986084, "lr": 6.494813795139137e-07, "epoch": 1.266820276497696, "percentage": 63.34, "elapsed_time": "7:35:58", "remaining_time": "4:23:53"} +{"current_steps": 5499, "total_steps": 8680, "loss": 0.7577871084213257, "lr": 6.491245923606868e-07, "epoch": 1.2670506912442396, "percentage": 63.35, "elapsed_time": "7:36:02", "remaining_time": "4:23:48"} +{"current_steps": 5500, "total_steps": 8680, "loss": 0.7470887303352356, "lr": 6.487678561365606e-07, "epoch": 1.2672811059907834, "percentage": 63.36, "elapsed_time": "7:36:08", "remaining_time": "4:23:43"} +{"current_steps": 5501, "total_steps": 8680, "loss": 0.7862193584442139, "lr": 6.484111708933153e-07, "epoch": 1.2675115207373273, "percentage": 63.38, "elapsed_time": "7:36:14", "remaining_time": "4:23:39"} +{"current_steps": 5502, "total_steps": 8680, "loss": 0.6809444427490234, "lr": 6.48054536682723e-07, "epoch": 1.267741935483871, "percentage": 63.39, "elapsed_time": "7:36:19", "remaining_time": "4:23:34"} +{"current_steps": 5503, "total_steps": 8680, "loss": 0.7560738921165466, "lr": 6.476979535565486e-07, "epoch": 1.2679723502304148, "percentage": 63.4, "elapsed_time": "7:36:24", "remaining_time": "4:23:29"} +{"current_steps": 5504, "total_steps": 8680, "loss": 0.6961003541946411, "lr": 6.473414215665501e-07, "epoch": 1.2682027649769585, "percentage": 63.41, "elapsed_time": "7:36:30", "remaining_time": "4:23:25"} +{"current_steps": 5505, "total_steps": 8680, "loss": 0.762688159942627, "lr": 6.469849407644775e-07, "epoch": 1.2684331797235022, "percentage": 63.42, "elapsed_time": "7:36:34", "remaining_time": "4:23:19"} +{"current_steps": 5506, "total_steps": 8680, "loss": 0.8735007047653198, "lr": 6.46628511202073e-07, "epoch": 1.2686635944700462, "percentage": 63.43, "elapsed_time": "7:36:40", "remaining_time": "4:23:15"} +{"current_steps": 5507, "total_steps": 8680, "loss": 0.7127432823181152, "lr": 6.462721329310727e-07, "epoch": 1.26889400921659, "percentage": 63.44, "elapsed_time": "7:36:44", "remaining_time": "4:23:09"} +{"current_steps": 5508, "total_steps": 8680, "loss": 0.7720422744750977, "lr": 6.45915806003204e-07, "epoch": 1.2691244239631336, "percentage": 63.46, "elapsed_time": "7:36:48", "remaining_time": "4:23:04"} +{"current_steps": 5509, "total_steps": 8680, "loss": 0.8046890497207642, "lr": 6.455595304701871e-07, "epoch": 1.2693548387096774, "percentage": 63.47, "elapsed_time": "7:36:53", "remaining_time": "4:22:59"} +{"current_steps": 5510, "total_steps": 8680, "loss": 0.8218742609024048, "lr": 6.452033063837354e-07, "epoch": 1.269585253456221, "percentage": 63.48, "elapsed_time": "7:36:58", "remaining_time": "4:22:54"} +{"current_steps": 5511, "total_steps": 8680, "loss": 0.912622332572937, "lr": 6.448471337955536e-07, "epoch": 1.269815668202765, "percentage": 63.49, "elapsed_time": "7:37:01", "remaining_time": "4:22:48"} +{"current_steps": 5512, "total_steps": 8680, "loss": 0.7940733432769775, "lr": 6.444910127573407e-07, "epoch": 1.2700460829493088, "percentage": 63.5, "elapsed_time": "7:37:05", "remaining_time": "4:22:42"} +{"current_steps": 5513, "total_steps": 8680, "loss": 0.7085565328598022, "lr": 6.441349433207864e-07, "epoch": 1.2702764976958525, "percentage": 63.51, "elapsed_time": "7:37:11", "remaining_time": "4:22:38"} +{"current_steps": 5514, "total_steps": 8680, "loss": 0.9316935539245605, "lr": 6.437789255375739e-07, "epoch": 1.2705069124423964, "percentage": 63.53, "elapsed_time": "7:37:15", "remaining_time": "4:22:32"} +{"current_steps": 5515, "total_steps": 8680, "loss": 0.7412574291229248, "lr": 6.43422959459379e-07, "epoch": 1.2707373271889402, "percentage": 63.54, "elapsed_time": "7:37:22", "remaining_time": "4:22:28"} +{"current_steps": 5516, "total_steps": 8680, "loss": 0.7476450204849243, "lr": 6.430670451378695e-07, "epoch": 1.270967741935484, "percentage": 63.55, "elapsed_time": "7:37:26", "remaining_time": "4:22:23"} +{"current_steps": 5517, "total_steps": 8680, "loss": 0.8530189990997314, "lr": 6.427111826247056e-07, "epoch": 1.2711981566820276, "percentage": 63.56, "elapsed_time": "7:37:30", "remaining_time": "4:22:18"} +{"current_steps": 5518, "total_steps": 8680, "loss": 0.8193017840385437, "lr": 6.423553719715406e-07, "epoch": 1.2714285714285714, "percentage": 63.57, "elapsed_time": "7:37:36", "remaining_time": "4:22:13"} +{"current_steps": 5519, "total_steps": 8680, "loss": 0.7444974780082703, "lr": 6.419996132300203e-07, "epoch": 1.2716589861751153, "percentage": 63.58, "elapsed_time": "7:37:42", "remaining_time": "4:22:08"} +{"current_steps": 5520, "total_steps": 8680, "loss": 0.7422837018966675, "lr": 6.416439064517818e-07, "epoch": 1.271889400921659, "percentage": 63.59, "elapsed_time": "7:37:47", "remaining_time": "4:22:04"} +{"current_steps": 5521, "total_steps": 8680, "loss": 1.0155640840530396, "lr": 6.412882516884562e-07, "epoch": 1.2721198156682028, "percentage": 63.61, "elapsed_time": "7:37:52", "remaining_time": "4:21:58"} +{"current_steps": 5522, "total_steps": 8680, "loss": 0.8097087144851685, "lr": 6.409326489916658e-07, "epoch": 1.2723502304147465, "percentage": 63.62, "elapsed_time": "7:37:57", "remaining_time": "4:21:54"} +{"current_steps": 5523, "total_steps": 8680, "loss": 0.8545565009117126, "lr": 6.405770984130257e-07, "epoch": 1.2725806451612902, "percentage": 63.63, "elapsed_time": "7:38:03", "remaining_time": "4:21:49"} +{"current_steps": 5524, "total_steps": 8680, "loss": 0.6765652298927307, "lr": 6.402216000041445e-07, "epoch": 1.2728110599078342, "percentage": 63.64, "elapsed_time": "7:38:08", "remaining_time": "4:21:44"} +{"current_steps": 5525, "total_steps": 8680, "loss": 0.7964426875114441, "lr": 6.398661538166217e-07, "epoch": 1.273041474654378, "percentage": 63.65, "elapsed_time": "7:38:14", "remaining_time": "4:21:40"} +{"current_steps": 5526, "total_steps": 8680, "loss": 0.7449651956558228, "lr": 6.395107599020495e-07, "epoch": 1.2732718894009216, "percentage": 63.66, "elapsed_time": "7:38:18", "remaining_time": "4:21:35"} +{"current_steps": 5527, "total_steps": 8680, "loss": 0.8639888167381287, "lr": 6.391554183120138e-07, "epoch": 1.2735023041474656, "percentage": 63.68, "elapsed_time": "7:38:23", "remaining_time": "4:21:29"} +{"current_steps": 5528, "total_steps": 8680, "loss": 0.7668901681900024, "lr": 6.388001290980914e-07, "epoch": 1.2737327188940093, "percentage": 63.69, "elapsed_time": "7:38:28", "remaining_time": "4:21:24"} +{"current_steps": 5529, "total_steps": 8680, "loss": 0.6461849212646484, "lr": 6.384448923118517e-07, "epoch": 1.273963133640553, "percentage": 63.7, "elapsed_time": "7:38:34", "remaining_time": "4:21:20"} +{"current_steps": 5530, "total_steps": 8680, "loss": 0.7045707702636719, "lr": 6.380897080048576e-07, "epoch": 1.2741935483870968, "percentage": 63.71, "elapsed_time": "7:38:39", "remaining_time": "4:21:15"} +{"current_steps": 5531, "total_steps": 8680, "loss": 0.8303793668746948, "lr": 6.377345762286632e-07, "epoch": 1.2744239631336405, "percentage": 63.72, "elapsed_time": "7:38:44", "remaining_time": "4:21:10"} +{"current_steps": 5532, "total_steps": 8680, "loss": 0.808259129524231, "lr": 6.373794970348152e-07, "epoch": 1.2746543778801844, "percentage": 63.73, "elapsed_time": "7:38:49", "remaining_time": "4:21:05"} +{"current_steps": 5533, "total_steps": 8680, "loss": 0.8224689960479736, "lr": 6.370244704748535e-07, "epoch": 1.2748847926267282, "percentage": 63.74, "elapsed_time": "7:38:53", "remaining_time": "4:21:00"} +{"current_steps": 5534, "total_steps": 8680, "loss": 0.8559266328811646, "lr": 6.366694966003089e-07, "epoch": 1.2751152073732719, "percentage": 63.76, "elapsed_time": "7:38:57", "remaining_time": "4:20:54"} +{"current_steps": 5535, "total_steps": 8680, "loss": 0.7972407341003418, "lr": 6.363145754627063e-07, "epoch": 1.2753456221198156, "percentage": 63.77, "elapsed_time": "7:39:03", "remaining_time": "4:20:50"} +{"current_steps": 5536, "total_steps": 8680, "loss": 0.7750328779220581, "lr": 6.359597071135618e-07, "epoch": 1.2755760368663593, "percentage": 63.78, "elapsed_time": "7:39:08", "remaining_time": "4:20:45"} +{"current_steps": 5537, "total_steps": 8680, "loss": 0.807072639465332, "lr": 6.356048916043836e-07, "epoch": 1.2758064516129033, "percentage": 63.79, "elapsed_time": "7:39:12", "remaining_time": "4:20:39"} +{"current_steps": 5538, "total_steps": 8680, "loss": 0.8459323048591614, "lr": 6.35250128986673e-07, "epoch": 1.276036866359447, "percentage": 63.8, "elapsed_time": "7:39:18", "remaining_time": "4:20:35"} +{"current_steps": 5539, "total_steps": 8680, "loss": 0.7874447107315063, "lr": 6.348954193119233e-07, "epoch": 1.2762672811059907, "percentage": 63.81, "elapsed_time": "7:39:23", "remaining_time": "4:20:30"} +{"current_steps": 5540, "total_steps": 8680, "loss": 0.8817394971847534, "lr": 6.345407626316202e-07, "epoch": 1.2764976958525347, "percentage": 63.82, "elapsed_time": "7:39:27", "remaining_time": "4:20:25"} +{"current_steps": 5541, "total_steps": 8680, "loss": 0.7936382293701172, "lr": 6.341861589972417e-07, "epoch": 1.2767281105990782, "percentage": 63.84, "elapsed_time": "7:39:33", "remaining_time": "4:20:20"} +{"current_steps": 5542, "total_steps": 8680, "loss": 0.7301348447799683, "lr": 6.33831608460258e-07, "epoch": 1.2769585253456222, "percentage": 63.85, "elapsed_time": "7:39:37", "remaining_time": "4:20:15"} +{"current_steps": 5543, "total_steps": 8680, "loss": 0.6546784043312073, "lr": 6.334771110721311e-07, "epoch": 1.2771889400921659, "percentage": 63.86, "elapsed_time": "7:39:44", "remaining_time": "4:20:11"} +{"current_steps": 5544, "total_steps": 8680, "loss": 0.798918604850769, "lr": 6.331226668843168e-07, "epoch": 1.2774193548387096, "percentage": 63.87, "elapsed_time": "7:39:49", "remaining_time": "4:20:06"} +{"current_steps": 5545, "total_steps": 8680, "loss": 0.6275264620780945, "lr": 6.327682759482618e-07, "epoch": 1.2776497695852536, "percentage": 63.88, "elapsed_time": "7:39:53", "remaining_time": "4:20:00"} +{"current_steps": 5546, "total_steps": 8680, "loss": 0.6870732307434082, "lr": 6.324139383154048e-07, "epoch": 1.2778801843317973, "percentage": 63.89, "elapsed_time": "7:39:59", "remaining_time": "4:19:56"} +{"current_steps": 5547, "total_steps": 8680, "loss": 0.8280556201934814, "lr": 6.320596540371785e-07, "epoch": 1.278110599078341, "percentage": 63.91, "elapsed_time": "7:40:04", "remaining_time": "4:19:51"} +{"current_steps": 5548, "total_steps": 8680, "loss": 0.8053648471832275, "lr": 6.317054231650063e-07, "epoch": 1.2783410138248847, "percentage": 63.92, "elapsed_time": "7:40:08", "remaining_time": "4:19:45"} +{"current_steps": 5549, "total_steps": 8680, "loss": 0.7628893852233887, "lr": 6.313512457503043e-07, "epoch": 1.2785714285714285, "percentage": 63.93, "elapsed_time": "7:40:13", "remaining_time": "4:19:41"} +{"current_steps": 5550, "total_steps": 8680, "loss": 0.8075753450393677, "lr": 6.30997121844481e-07, "epoch": 1.2788018433179724, "percentage": 63.94, "elapsed_time": "7:40:18", "remaining_time": "4:19:35"} +{"current_steps": 5551, "total_steps": 8680, "loss": 0.7883275747299194, "lr": 6.306430514989371e-07, "epoch": 1.2790322580645161, "percentage": 63.95, "elapsed_time": "7:40:23", "remaining_time": "4:19:31"} +{"current_steps": 5552, "total_steps": 8680, "loss": 0.7438768744468689, "lr": 6.302890347650648e-07, "epoch": 1.2792626728110599, "percentage": 63.96, "elapsed_time": "7:40:29", "remaining_time": "4:19:26"} +{"current_steps": 5553, "total_steps": 8680, "loss": 0.7756023406982422, "lr": 6.299350716942501e-07, "epoch": 1.2794930875576038, "percentage": 63.97, "elapsed_time": "7:40:34", "remaining_time": "4:19:21"} +{"current_steps": 5554, "total_steps": 8680, "loss": 0.7128444910049438, "lr": 6.295811623378698e-07, "epoch": 1.2797235023041473, "percentage": 63.99, "elapsed_time": "7:40:40", "remaining_time": "4:19:16"} +{"current_steps": 5555, "total_steps": 8680, "loss": 0.7611228823661804, "lr": 6.292273067472931e-07, "epoch": 1.2799539170506913, "percentage": 64.0, "elapsed_time": "7:40:45", "remaining_time": "4:19:12"} +{"current_steps": 5556, "total_steps": 8680, "loss": 0.7803670167922974, "lr": 6.288735049738822e-07, "epoch": 1.280184331797235, "percentage": 64.01, "elapsed_time": "7:40:49", "remaining_time": "4:19:06"} +{"current_steps": 5557, "total_steps": 8680, "loss": 0.958204448223114, "lr": 6.28519757068991e-07, "epoch": 1.2804147465437787, "percentage": 64.02, "elapsed_time": "7:40:55", "remaining_time": "4:19:02"} +{"current_steps": 5558, "total_steps": 8680, "loss": 0.7220249772071838, "lr": 6.28166063083965e-07, "epoch": 1.2806451612903227, "percentage": 64.03, "elapsed_time": "7:41:00", "remaining_time": "4:18:57"} +{"current_steps": 5559, "total_steps": 8680, "loss": 0.7396695613861084, "lr": 6.278124230701427e-07, "epoch": 1.2808755760368664, "percentage": 64.04, "elapsed_time": "7:41:05", "remaining_time": "4:18:52"} +{"current_steps": 5560, "total_steps": 8680, "loss": 0.819474458694458, "lr": 6.274588370788545e-07, "epoch": 1.2811059907834101, "percentage": 64.06, "elapsed_time": "7:41:09", "remaining_time": "4:18:47"} +{"current_steps": 5561, "total_steps": 8680, "loss": 0.6997617483139038, "lr": 6.271053051614231e-07, "epoch": 1.2813364055299539, "percentage": 64.07, "elapsed_time": "7:41:14", "remaining_time": "4:18:41"} +{"current_steps": 5562, "total_steps": 8680, "loss": 0.7526183128356934, "lr": 6.26751827369163e-07, "epoch": 1.2815668202764976, "percentage": 64.08, "elapsed_time": "7:41:18", "remaining_time": "4:18:36"} +{"current_steps": 5563, "total_steps": 8680, "loss": 0.7185813188552856, "lr": 6.263984037533805e-07, "epoch": 1.2817972350230415, "percentage": 64.09, "elapsed_time": "7:41:24", "remaining_time": "4:18:32"} +{"current_steps": 5564, "total_steps": 8680, "loss": 0.7739845514297485, "lr": 6.260450343653757e-07, "epoch": 1.2820276497695853, "percentage": 64.1, "elapsed_time": "7:41:29", "remaining_time": "4:18:27"} +{"current_steps": 5565, "total_steps": 8680, "loss": 0.698557436466217, "lr": 6.25691719256439e-07, "epoch": 1.282258064516129, "percentage": 64.11, "elapsed_time": "7:41:34", "remaining_time": "4:18:21"} +{"current_steps": 5566, "total_steps": 8680, "loss": 0.6946271657943726, "lr": 6.253384584778534e-07, "epoch": 1.2824884792626727, "percentage": 64.12, "elapsed_time": "7:41:41", "remaining_time": "4:18:17"} +{"current_steps": 5567, "total_steps": 8680, "loss": 0.7746025323867798, "lr": 6.24985252080895e-07, "epoch": 1.2827188940092165, "percentage": 64.14, "elapsed_time": "7:41:45", "remaining_time": "4:18:12"} +{"current_steps": 5568, "total_steps": 8680, "loss": 0.8759660720825195, "lr": 6.246321001168306e-07, "epoch": 1.2829493087557604, "percentage": 64.15, "elapsed_time": "7:41:49", "remaining_time": "4:18:07"} +{"current_steps": 5569, "total_steps": 8680, "loss": 0.741111159324646, "lr": 6.2427900263692e-07, "epoch": 1.2831797235023041, "percentage": 64.16, "elapsed_time": "7:41:53", "remaining_time": "4:18:01"} +{"current_steps": 5570, "total_steps": 8680, "loss": 0.8580630421638489, "lr": 6.239259596924149e-07, "epoch": 1.2834101382488479, "percentage": 64.17, "elapsed_time": "7:41:58", "remaining_time": "4:17:56"} +{"current_steps": 5571, "total_steps": 8680, "loss": 0.7139618992805481, "lr": 6.235729713345588e-07, "epoch": 1.2836405529953918, "percentage": 64.18, "elapsed_time": "7:42:02", "remaining_time": "4:17:51"} +{"current_steps": 5572, "total_steps": 8680, "loss": 0.8300976753234863, "lr": 6.232200376145873e-07, "epoch": 1.2838709677419355, "percentage": 64.19, "elapsed_time": "7:42:08", "remaining_time": "4:17:46"} +{"current_steps": 5573, "total_steps": 8680, "loss": 0.7193114757537842, "lr": 6.228671585837288e-07, "epoch": 1.2841013824884793, "percentage": 64.21, "elapsed_time": "7:42:13", "remaining_time": "4:17:41"} +{"current_steps": 5574, "total_steps": 8680, "loss": 0.8802851438522339, "lr": 6.225143342932031e-07, "epoch": 1.284331797235023, "percentage": 64.22, "elapsed_time": "7:42:19", "remaining_time": "4:17:37"} +{"current_steps": 5575, "total_steps": 8680, "loss": 0.749543309211731, "lr": 6.221615647942217e-07, "epoch": 1.2845622119815667, "percentage": 64.23, "elapsed_time": "7:42:24", "remaining_time": "4:17:32"} +{"current_steps": 5576, "total_steps": 8680, "loss": 0.703508734703064, "lr": 6.218088501379892e-07, "epoch": 1.2847926267281107, "percentage": 64.24, "elapsed_time": "7:42:29", "remaining_time": "4:17:27"} +{"current_steps": 5577, "total_steps": 8680, "loss": 0.7519023418426514, "lr": 6.214561903757017e-07, "epoch": 1.2850230414746544, "percentage": 64.25, "elapsed_time": "7:42:35", "remaining_time": "4:17:23"} +{"current_steps": 5578, "total_steps": 8680, "loss": 0.9525241851806641, "lr": 6.211035855585466e-07, "epoch": 1.2852534562211981, "percentage": 64.26, "elapsed_time": "7:42:39", "remaining_time": "4:17:17"} +{"current_steps": 5579, "total_steps": 8680, "loss": 0.8288872241973877, "lr": 6.207510357377046e-07, "epoch": 1.2854838709677419, "percentage": 64.27, "elapsed_time": "7:42:43", "remaining_time": "4:17:12"} +{"current_steps": 5580, "total_steps": 8680, "loss": 0.8531112670898438, "lr": 6.203985409643478e-07, "epoch": 1.2857142857142856, "percentage": 64.29, "elapsed_time": "7:42:49", "remaining_time": "4:17:07"} +{"current_steps": 5581, "total_steps": 8680, "loss": 0.7106495499610901, "lr": 6.200461012896401e-07, "epoch": 1.2859447004608295, "percentage": 64.3, "elapsed_time": "7:42:53", "remaining_time": "4:17:02"} +{"current_steps": 5582, "total_steps": 8680, "loss": 0.714931845664978, "lr": 6.19693716764738e-07, "epoch": 1.2861751152073733, "percentage": 64.31, "elapsed_time": "7:42:58", "remaining_time": "4:16:56"} +{"current_steps": 5583, "total_steps": 8680, "loss": 0.8281360268592834, "lr": 6.19341387440789e-07, "epoch": 1.286405529953917, "percentage": 64.32, "elapsed_time": "7:43:03", "remaining_time": "4:16:51"} +{"current_steps": 5584, "total_steps": 8680, "loss": 0.9155910611152649, "lr": 6.189891133689342e-07, "epoch": 1.286635944700461, "percentage": 64.33, "elapsed_time": "7:43:08", "remaining_time": "4:16:46"} +{"current_steps": 5585, "total_steps": 8680, "loss": 0.7573060989379883, "lr": 6.186368946003051e-07, "epoch": 1.2868663594470047, "percentage": 64.34, "elapsed_time": "7:43:11", "remaining_time": "4:16:41"} +{"current_steps": 5586, "total_steps": 8680, "loss": 0.6994235515594482, "lr": 6.182847311860255e-07, "epoch": 1.2870967741935484, "percentage": 64.35, "elapsed_time": "7:43:15", "remaining_time": "4:16:35"} +{"current_steps": 5587, "total_steps": 8680, "loss": 0.771092414855957, "lr": 6.179326231772123e-07, "epoch": 1.2873271889400921, "percentage": 64.37, "elapsed_time": "7:43:20", "remaining_time": "4:16:30"} +{"current_steps": 5588, "total_steps": 8680, "loss": 0.7470684051513672, "lr": 6.17580570624973e-07, "epoch": 1.2875576036866359, "percentage": 64.38, "elapsed_time": "7:43:26", "remaining_time": "4:16:26"} +{"current_steps": 5589, "total_steps": 8680, "loss": 0.918886125087738, "lr": 6.172285735804075e-07, "epoch": 1.2877880184331798, "percentage": 64.39, "elapsed_time": "7:43:31", "remaining_time": "4:16:21"} +{"current_steps": 5590, "total_steps": 8680, "loss": 0.7232617139816284, "lr": 6.16876632094608e-07, "epoch": 1.2880184331797235, "percentage": 64.4, "elapsed_time": "7:43:37", "remaining_time": "4:16:16"} +{"current_steps": 5591, "total_steps": 8680, "loss": 0.7367006540298462, "lr": 6.16524746218658e-07, "epoch": 1.2882488479262673, "percentage": 64.41, "elapsed_time": "7:43:41", "remaining_time": "4:16:11"} +{"current_steps": 5592, "total_steps": 8680, "loss": 0.8783999681472778, "lr": 6.161729160036333e-07, "epoch": 1.288479262672811, "percentage": 64.42, "elapsed_time": "7:43:46", "remaining_time": "4:16:06"} +{"current_steps": 5593, "total_steps": 8680, "loss": 0.8266523480415344, "lr": 6.158211415006019e-07, "epoch": 1.2887096774193547, "percentage": 64.44, "elapsed_time": "7:43:50", "remaining_time": "4:16:00"} +{"current_steps": 5594, "total_steps": 8680, "loss": 0.8528730869293213, "lr": 6.154694227606234e-07, "epoch": 1.2889400921658987, "percentage": 64.45, "elapsed_time": "7:43:55", "remaining_time": "4:15:55"} +{"current_steps": 5595, "total_steps": 8680, "loss": 0.7586283683776855, "lr": 6.151177598347485e-07, "epoch": 1.2891705069124424, "percentage": 64.46, "elapsed_time": "7:44:00", "remaining_time": "4:15:51"} +{"current_steps": 5596, "total_steps": 8680, "loss": 0.8671954870223999, "lr": 6.147661527740217e-07, "epoch": 1.2894009216589861, "percentage": 64.47, "elapsed_time": "7:44:04", "remaining_time": "4:15:45"} +{"current_steps": 5597, "total_steps": 8680, "loss": 0.7354376316070557, "lr": 6.14414601629478e-07, "epoch": 1.28963133640553, "percentage": 64.48, "elapsed_time": "7:44:11", "remaining_time": "4:15:41"} +{"current_steps": 5598, "total_steps": 8680, "loss": 0.8515663146972656, "lr": 6.140631064521443e-07, "epoch": 1.2898617511520738, "percentage": 64.49, "elapsed_time": "7:44:14", "remaining_time": "4:15:35"} +{"current_steps": 5599, "total_steps": 8680, "loss": 0.9068351984024048, "lr": 6.137116672930395e-07, "epoch": 1.2900921658986175, "percentage": 64.5, "elapsed_time": "7:44:19", "remaining_time": "4:15:30"} +{"current_steps": 5600, "total_steps": 8680, "loss": 0.7260826230049133, "lr": 6.133602842031752e-07, "epoch": 1.2903225806451613, "percentage": 64.52, "elapsed_time": "7:44:24", "remaining_time": "4:15:25"} +{"current_steps": 5601, "total_steps": 8680, "loss": 0.7162504196166992, "lr": 6.130089572335535e-07, "epoch": 1.290552995391705, "percentage": 64.53, "elapsed_time": "7:44:34", "remaining_time": "4:15:23"} +{"current_steps": 5602, "total_steps": 8680, "loss": 0.7625414133071899, "lr": 6.126576864351695e-07, "epoch": 1.290783410138249, "percentage": 64.54, "elapsed_time": "7:44:38", "remaining_time": "4:15:17"} +{"current_steps": 5603, "total_steps": 8680, "loss": 0.787274956703186, "lr": 6.123064718590099e-07, "epoch": 1.2910138248847927, "percentage": 64.55, "elapsed_time": "7:44:42", "remaining_time": "4:15:12"} +{"current_steps": 5604, "total_steps": 8680, "loss": 0.6539326310157776, "lr": 6.119553135560519e-07, "epoch": 1.2912442396313364, "percentage": 64.56, "elapsed_time": "7:44:48", "remaining_time": "4:15:07"} +{"current_steps": 5605, "total_steps": 8680, "loss": 0.8481189012527466, "lr": 6.11604211577267e-07, "epoch": 1.2914746543778801, "percentage": 64.57, "elapsed_time": "7:44:52", "remaining_time": "4:15:02"} +{"current_steps": 5606, "total_steps": 8680, "loss": 0.794892430305481, "lr": 6.112531659736164e-07, "epoch": 1.2917050691244238, "percentage": 64.59, "elapsed_time": "7:44:58", "remaining_time": "4:14:57"} +{"current_steps": 5607, "total_steps": 8680, "loss": 0.6738630533218384, "lr": 6.10902176796054e-07, "epoch": 1.2919354838709678, "percentage": 64.6, "elapsed_time": "7:45:04", "remaining_time": "4:14:53"} +{"current_steps": 5608, "total_steps": 8680, "loss": 0.7220937609672546, "lr": 6.105512440955258e-07, "epoch": 1.2921658986175115, "percentage": 64.61, "elapsed_time": "7:45:08", "remaining_time": "4:14:48"} +{"current_steps": 5609, "total_steps": 8680, "loss": 0.6831785440444946, "lr": 6.102003679229688e-07, "epoch": 1.2923963133640552, "percentage": 64.62, "elapsed_time": "7:45:14", "remaining_time": "4:14:43"} +{"current_steps": 5610, "total_steps": 8680, "loss": 0.7033277750015259, "lr": 6.098495483293125e-07, "epoch": 1.2926267281105992, "percentage": 64.63, "elapsed_time": "7:45:22", "remaining_time": "4:14:39"} +{"current_steps": 5611, "total_steps": 8680, "loss": 0.7063429355621338, "lr": 6.094987853654779e-07, "epoch": 1.292857142857143, "percentage": 64.64, "elapsed_time": "7:45:27", "remaining_time": "4:14:35"} +{"current_steps": 5612, "total_steps": 8680, "loss": 0.7791472673416138, "lr": 6.091480790823771e-07, "epoch": 1.2930875576036867, "percentage": 64.65, "elapsed_time": "7:45:33", "remaining_time": "4:14:30"} +{"current_steps": 5613, "total_steps": 8680, "loss": 0.8674220442771912, "lr": 6.087974295309157e-07, "epoch": 1.2933179723502304, "percentage": 64.67, "elapsed_time": "7:45:38", "remaining_time": "4:14:25"} +{"current_steps": 5614, "total_steps": 8680, "loss": 0.7878479957580566, "lr": 6.084468367619895e-07, "epoch": 1.293548387096774, "percentage": 64.68, "elapsed_time": "7:45:43", "remaining_time": "4:14:21"} +{"current_steps": 5615, "total_steps": 8680, "loss": 0.7019612789154053, "lr": 6.080963008264861e-07, "epoch": 1.293778801843318, "percentage": 64.69, "elapsed_time": "7:45:49", "remaining_time": "4:14:16"} +{"current_steps": 5616, "total_steps": 8680, "loss": 0.68759685754776, "lr": 6.077458217752863e-07, "epoch": 1.2940092165898618, "percentage": 64.7, "elapsed_time": "7:45:54", "remaining_time": "4:14:11"} +{"current_steps": 5617, "total_steps": 8680, "loss": 0.851733922958374, "lr": 6.073953996592612e-07, "epoch": 1.2942396313364055, "percentage": 64.71, "elapsed_time": "7:45:58", "remaining_time": "4:14:06"} +{"current_steps": 5618, "total_steps": 8680, "loss": 0.699798047542572, "lr": 6.070450345292739e-07, "epoch": 1.2944700460829492, "percentage": 64.72, "elapsed_time": "7:46:04", "remaining_time": "4:14:01"} +{"current_steps": 5619, "total_steps": 8680, "loss": 0.8625125885009766, "lr": 6.066947264361798e-07, "epoch": 1.294700460829493, "percentage": 64.74, "elapsed_time": "7:46:08", "remaining_time": "4:13:56"} +{"current_steps": 5620, "total_steps": 8680, "loss": 0.759062647819519, "lr": 6.063444754308253e-07, "epoch": 1.294930875576037, "percentage": 64.75, "elapsed_time": "7:46:13", "remaining_time": "4:13:51"} +{"current_steps": 5621, "total_steps": 8680, "loss": 0.7549973726272583, "lr": 6.059942815640491e-07, "epoch": 1.2951612903225806, "percentage": 64.76, "elapsed_time": "7:46:18", "remaining_time": "4:13:46"} +{"current_steps": 5622, "total_steps": 8680, "loss": 0.8142743110656738, "lr": 6.056441448866816e-07, "epoch": 1.2953917050691244, "percentage": 64.77, "elapsed_time": "7:46:22", "remaining_time": "4:13:40"} +{"current_steps": 5623, "total_steps": 8680, "loss": 0.7881144881248474, "lr": 6.052940654495442e-07, "epoch": 1.2956221198156683, "percentage": 64.78, "elapsed_time": "7:46:27", "remaining_time": "4:13:35"} +{"current_steps": 5624, "total_steps": 8680, "loss": 0.7922053933143616, "lr": 6.049440433034505e-07, "epoch": 1.295852534562212, "percentage": 64.79, "elapsed_time": "7:46:31", "remaining_time": "4:13:30"} +{"current_steps": 5625, "total_steps": 8680, "loss": 0.6808311939239502, "lr": 6.045940784992061e-07, "epoch": 1.2960829493087558, "percentage": 64.8, "elapsed_time": "7:46:36", "remaining_time": "4:13:25"} +{"current_steps": 5626, "total_steps": 8680, "loss": 0.933373749256134, "lr": 6.04244171087608e-07, "epoch": 1.2963133640552995, "percentage": 64.82, "elapsed_time": "7:46:41", "remaining_time": "4:13:20"} +{"current_steps": 5627, "total_steps": 8680, "loss": 0.8077404499053955, "lr": 6.038943211194439e-07, "epoch": 1.2965437788018432, "percentage": 64.83, "elapsed_time": "7:46:45", "remaining_time": "4:13:14"} +{"current_steps": 5628, "total_steps": 8680, "loss": 0.7920867204666138, "lr": 6.035445286454953e-07, "epoch": 1.2967741935483872, "percentage": 64.84, "elapsed_time": "7:46:49", "remaining_time": "4:13:09"} +{"current_steps": 5629, "total_steps": 8680, "loss": 0.5872117280960083, "lr": 6.031947937165335e-07, "epoch": 1.297004608294931, "percentage": 64.85, "elapsed_time": "7:46:54", "remaining_time": "4:13:04"} +{"current_steps": 5630, "total_steps": 8680, "loss": 0.8593505620956421, "lr": 6.02845116383322e-07, "epoch": 1.2972350230414746, "percentage": 64.86, "elapsed_time": "7:46:58", "remaining_time": "4:12:58"} +{"current_steps": 5631, "total_steps": 8680, "loss": 0.8352359533309937, "lr": 6.02495496696616e-07, "epoch": 1.2974654377880184, "percentage": 64.87, "elapsed_time": "7:47:02", "remaining_time": "4:12:53"} +{"current_steps": 5632, "total_steps": 8680, "loss": 0.7316182255744934, "lr": 6.021459347071623e-07, "epoch": 1.297695852534562, "percentage": 64.88, "elapsed_time": "7:47:08", "remaining_time": "4:12:48"} +{"current_steps": 5633, "total_steps": 8680, "loss": 0.7294400334358215, "lr": 6.017964304656997e-07, "epoch": 1.297926267281106, "percentage": 64.9, "elapsed_time": "7:47:12", "remaining_time": "4:12:43"} +{"current_steps": 5634, "total_steps": 8680, "loss": 0.6595947742462158, "lr": 6.014469840229581e-07, "epoch": 1.2981566820276498, "percentage": 64.91, "elapsed_time": "7:47:18", "remaining_time": "4:12:38"} +{"current_steps": 5635, "total_steps": 8680, "loss": 0.7849195003509521, "lr": 6.010975954296587e-07, "epoch": 1.2983870967741935, "percentage": 64.92, "elapsed_time": "7:47:21", "remaining_time": "4:12:33"} +{"current_steps": 5636, "total_steps": 8680, "loss": 0.6915944218635559, "lr": 6.007482647365159e-07, "epoch": 1.2986175115207375, "percentage": 64.93, "elapsed_time": "7:47:27", "remaining_time": "4:12:28"} +{"current_steps": 5637, "total_steps": 8680, "loss": 0.6821994781494141, "lr": 6.003989919942338e-07, "epoch": 1.2988479262672812, "percentage": 64.94, "elapsed_time": "7:47:33", "remaining_time": "4:12:24"} +{"current_steps": 5638, "total_steps": 8680, "loss": 0.7333718538284302, "lr": 6.000497772535087e-07, "epoch": 1.299078341013825, "percentage": 64.95, "elapsed_time": "7:47:40", "remaining_time": "4:12:19"} +{"current_steps": 5639, "total_steps": 8680, "loss": 0.8069280385971069, "lr": 5.997006205650292e-07, "epoch": 1.2993087557603686, "percentage": 64.97, "elapsed_time": "7:47:45", "remaining_time": "4:12:15"} +{"current_steps": 5640, "total_steps": 8680, "loss": 0.6989297866821289, "lr": 5.993515219794745e-07, "epoch": 1.2995391705069124, "percentage": 64.98, "elapsed_time": "7:47:52", "remaining_time": "4:12:11"} +{"current_steps": 5641, "total_steps": 8680, "loss": 0.7784403562545776, "lr": 5.990024815475161e-07, "epoch": 1.2997695852534563, "percentage": 64.99, "elapsed_time": "7:47:57", "remaining_time": "4:12:06"} +{"current_steps": 5642, "total_steps": 8680, "loss": 0.6554181575775146, "lr": 5.986534993198168e-07, "epoch": 1.3, "percentage": 65.0, "elapsed_time": "7:48:02", "remaining_time": "4:12:01"} +{"current_steps": 5643, "total_steps": 8680, "loss": 0.7647836208343506, "lr": 5.983045753470307e-07, "epoch": 1.3002304147465438, "percentage": 65.01, "elapsed_time": "7:48:07", "remaining_time": "4:11:56"} +{"current_steps": 5644, "total_steps": 8680, "loss": 0.7787084579467773, "lr": 5.979557096798033e-07, "epoch": 1.3004608294930875, "percentage": 65.02, "elapsed_time": "7:48:12", "remaining_time": "4:11:51"} +{"current_steps": 5645, "total_steps": 8680, "loss": 0.6367940902709961, "lr": 5.97606902368773e-07, "epoch": 1.3006912442396312, "percentage": 65.03, "elapsed_time": "7:48:19", "remaining_time": "4:11:47"} +{"current_steps": 5646, "total_steps": 8680, "loss": 0.7650243043899536, "lr": 5.972581534645679e-07, "epoch": 1.3009216589861752, "percentage": 65.05, "elapsed_time": "7:48:22", "remaining_time": "4:11:41"} +{"current_steps": 5647, "total_steps": 8680, "loss": 0.6506018042564392, "lr": 5.969094630178084e-07, "epoch": 1.301152073732719, "percentage": 65.06, "elapsed_time": "7:48:28", "remaining_time": "4:11:37"} +{"current_steps": 5648, "total_steps": 8680, "loss": 0.7351242303848267, "lr": 5.965608310791071e-07, "epoch": 1.3013824884792626, "percentage": 65.07, "elapsed_time": "7:48:34", "remaining_time": "4:11:32"} +{"current_steps": 5649, "total_steps": 8680, "loss": 0.7327077984809875, "lr": 5.96212257699067e-07, "epoch": 1.3016129032258066, "percentage": 65.08, "elapsed_time": "7:48:39", "remaining_time": "4:11:27"} +{"current_steps": 5650, "total_steps": 8680, "loss": 0.6448171138763428, "lr": 5.958637429282831e-07, "epoch": 1.3018433179723503, "percentage": 65.09, "elapsed_time": "7:48:45", "remaining_time": "4:11:23"} +{"current_steps": 5651, "total_steps": 8680, "loss": 0.8347861766815186, "lr": 5.955152868173418e-07, "epoch": 1.302073732718894, "percentage": 65.1, "elapsed_time": "7:48:49", "remaining_time": "4:11:17"} +{"current_steps": 5652, "total_steps": 8680, "loss": 0.736280620098114, "lr": 5.951668894168215e-07, "epoch": 1.3023041474654378, "percentage": 65.12, "elapsed_time": "7:48:53", "remaining_time": "4:11:12"} +{"current_steps": 5653, "total_steps": 8680, "loss": 0.8677594661712646, "lr": 5.948185507772908e-07, "epoch": 1.3025345622119815, "percentage": 65.13, "elapsed_time": "7:48:59", "remaining_time": "4:11:07"} +{"current_steps": 5654, "total_steps": 8680, "loss": 0.6598676443099976, "lr": 5.944702709493113e-07, "epoch": 1.3027649769585254, "percentage": 65.14, "elapsed_time": "7:49:04", "remaining_time": "4:11:02"} +{"current_steps": 5655, "total_steps": 8680, "loss": 0.7795349359512329, "lr": 5.941220499834352e-07, "epoch": 1.3029953917050692, "percentage": 65.15, "elapsed_time": "7:49:09", "remaining_time": "4:10:57"} +{"current_steps": 5656, "total_steps": 8680, "loss": 0.6929318904876709, "lr": 5.937738879302058e-07, "epoch": 1.303225806451613, "percentage": 65.16, "elapsed_time": "7:49:15", "remaining_time": "4:10:53"} +{"current_steps": 5657, "total_steps": 8680, "loss": 0.859328031539917, "lr": 5.934257848401593e-07, "epoch": 1.3034562211981566, "percentage": 65.17, "elapsed_time": "7:49:20", "remaining_time": "4:10:48"} +{"current_steps": 5658, "total_steps": 8680, "loss": 1.0015549659729004, "lr": 5.930777407638216e-07, "epoch": 1.3036866359447004, "percentage": 65.18, "elapsed_time": "7:49:24", "remaining_time": "4:10:43"} +{"current_steps": 5659, "total_steps": 8680, "loss": 0.6775785088539124, "lr": 5.927297557517115e-07, "epoch": 1.3039170506912443, "percentage": 65.2, "elapsed_time": "7:49:29", "remaining_time": "4:10:38"} +{"current_steps": 5660, "total_steps": 8680, "loss": 0.7228262424468994, "lr": 5.923818298543378e-07, "epoch": 1.304147465437788, "percentage": 65.21, "elapsed_time": "7:49:33", "remaining_time": "4:10:32"} +{"current_steps": 5661, "total_steps": 8680, "loss": 0.6139897108078003, "lr": 5.92033963122202e-07, "epoch": 1.3043778801843318, "percentage": 65.22, "elapsed_time": "7:49:39", "remaining_time": "4:10:28"} +{"current_steps": 5662, "total_steps": 8680, "loss": 0.7336323261260986, "lr": 5.916861556057965e-07, "epoch": 1.3046082949308757, "percentage": 65.23, "elapsed_time": "7:49:43", "remaining_time": "4:10:22"} +{"current_steps": 5663, "total_steps": 8680, "loss": 0.9223559498786926, "lr": 5.913384073556049e-07, "epoch": 1.3048387096774192, "percentage": 65.24, "elapsed_time": "7:49:49", "remaining_time": "4:10:18"} +{"current_steps": 5664, "total_steps": 8680, "loss": 0.7230484485626221, "lr": 5.909907184221023e-07, "epoch": 1.3050691244239632, "percentage": 65.25, "elapsed_time": "7:49:53", "remaining_time": "4:10:12"} +{"current_steps": 5665, "total_steps": 8680, "loss": 0.753510594367981, "lr": 5.906430888557556e-07, "epoch": 1.305299539170507, "percentage": 65.26, "elapsed_time": "7:49:57", "remaining_time": "4:10:07"} +{"current_steps": 5666, "total_steps": 8680, "loss": 0.8960593938827515, "lr": 5.902955187070229e-07, "epoch": 1.3055299539170506, "percentage": 65.28, "elapsed_time": "7:50:02", "remaining_time": "4:10:02"} +{"current_steps": 5667, "total_steps": 8680, "loss": 0.6865993738174438, "lr": 5.899480080263527e-07, "epoch": 1.3057603686635946, "percentage": 65.29, "elapsed_time": "7:50:07", "remaining_time": "4:09:57"} +{"current_steps": 5668, "total_steps": 8680, "loss": 0.7748720645904541, "lr": 5.896005568641868e-07, "epoch": 1.3059907834101383, "percentage": 65.3, "elapsed_time": "7:50:13", "remaining_time": "4:09:52"} +{"current_steps": 5669, "total_steps": 8680, "loss": 0.834233283996582, "lr": 5.892531652709567e-07, "epoch": 1.306221198156682, "percentage": 65.31, "elapsed_time": "7:50:17", "remaining_time": "4:09:47"} +{"current_steps": 5670, "total_steps": 8680, "loss": 0.8398417234420776, "lr": 5.889058332970858e-07, "epoch": 1.3064516129032258, "percentage": 65.32, "elapsed_time": "7:50:21", "remaining_time": "4:09:41"} +{"current_steps": 5671, "total_steps": 8680, "loss": 0.6889529228210449, "lr": 5.885585609929891e-07, "epoch": 1.3066820276497695, "percentage": 65.33, "elapsed_time": "7:50:26", "remaining_time": "4:09:36"} +{"current_steps": 5672, "total_steps": 8680, "loss": 0.6625782251358032, "lr": 5.882113484090725e-07, "epoch": 1.3069124423963134, "percentage": 65.35, "elapsed_time": "7:50:31", "remaining_time": "4:09:31"} +{"current_steps": 5673, "total_steps": 8680, "loss": 0.7774407267570496, "lr": 5.878641955957334e-07, "epoch": 1.3071428571428572, "percentage": 65.36, "elapsed_time": "7:50:35", "remaining_time": "4:09:26"} +{"current_steps": 5674, "total_steps": 8680, "loss": 0.7799595594406128, "lr": 5.875171026033608e-07, "epoch": 1.307373271889401, "percentage": 65.37, "elapsed_time": "7:50:41", "remaining_time": "4:09:21"} +{"current_steps": 5675, "total_steps": 8680, "loss": 0.800041913986206, "lr": 5.87170069482335e-07, "epoch": 1.3076036866359446, "percentage": 65.38, "elapsed_time": "7:50:47", "remaining_time": "4:09:17"} +{"current_steps": 5676, "total_steps": 8680, "loss": 0.7478667497634888, "lr": 5.868230962830265e-07, "epoch": 1.3078341013824883, "percentage": 65.39, "elapsed_time": "7:50:52", "remaining_time": "4:09:12"} +{"current_steps": 5677, "total_steps": 8680, "loss": 0.7538981437683105, "lr": 5.86476183055799e-07, "epoch": 1.3080645161290323, "percentage": 65.4, "elapsed_time": "7:50:56", "remaining_time": "4:09:07"} +{"current_steps": 5678, "total_steps": 8680, "loss": 0.7556810975074768, "lr": 5.861293298510061e-07, "epoch": 1.308294930875576, "percentage": 65.41, "elapsed_time": "7:51:01", "remaining_time": "4:09:02"} +{"current_steps": 5679, "total_steps": 8680, "loss": 0.670037031173706, "lr": 5.85782536718993e-07, "epoch": 1.3085253456221198, "percentage": 65.43, "elapsed_time": "7:51:06", "remaining_time": "4:08:57"} +{"current_steps": 5680, "total_steps": 8680, "loss": 0.6238662600517273, "lr": 5.854358037100964e-07, "epoch": 1.3087557603686637, "percentage": 65.44, "elapsed_time": "7:51:12", "remaining_time": "4:08:52"} +{"current_steps": 5681, "total_steps": 8680, "loss": 0.7972823977470398, "lr": 5.85089130874644e-07, "epoch": 1.3089861751152074, "percentage": 65.45, "elapsed_time": "7:51:16", "remaining_time": "4:08:47"} +{"current_steps": 5682, "total_steps": 8680, "loss": 0.7332338094711304, "lr": 5.847425182629549e-07, "epoch": 1.3092165898617512, "percentage": 65.46, "elapsed_time": "7:51:20", "remaining_time": "4:08:41"} +{"current_steps": 5683, "total_steps": 8680, "loss": 0.8186966180801392, "lr": 5.843959659253398e-07, "epoch": 1.3094470046082949, "percentage": 65.47, "elapsed_time": "7:51:25", "remaining_time": "4:08:36"} +{"current_steps": 5684, "total_steps": 8680, "loss": 0.8207032680511475, "lr": 5.840494739120996e-07, "epoch": 1.3096774193548386, "percentage": 65.48, "elapsed_time": "7:51:31", "remaining_time": "4:08:32"} +{"current_steps": 5685, "total_steps": 8680, "loss": 0.848265528678894, "lr": 5.83703042273528e-07, "epoch": 1.3099078341013826, "percentage": 65.5, "elapsed_time": "7:51:35", "remaining_time": "4:08:26"} +{"current_steps": 5686, "total_steps": 8680, "loss": 0.7766404151916504, "lr": 5.833566710599088e-07, "epoch": 1.3101382488479263, "percentage": 65.51, "elapsed_time": "7:51:40", "remaining_time": "4:08:21"} +{"current_steps": 5687, "total_steps": 8680, "loss": 0.7570784687995911, "lr": 5.830103603215168e-07, "epoch": 1.31036866359447, "percentage": 65.52, "elapsed_time": "7:51:45", "remaining_time": "4:08:16"} +{"current_steps": 5688, "total_steps": 8680, "loss": 0.7551493644714355, "lr": 5.826641101086194e-07, "epoch": 1.3105990783410137, "percentage": 65.53, "elapsed_time": "7:51:51", "remaining_time": "4:08:12"} +{"current_steps": 5689, "total_steps": 8680, "loss": 0.8589804172515869, "lr": 5.823179204714739e-07, "epoch": 1.3108294930875575, "percentage": 65.54, "elapsed_time": "7:51:55", "remaining_time": "4:08:07"} +{"current_steps": 5690, "total_steps": 8680, "loss": 0.8252761960029602, "lr": 5.819717914603288e-07, "epoch": 1.3110599078341014, "percentage": 65.55, "elapsed_time": "7:52:00", "remaining_time": "4:08:01"} +{"current_steps": 5691, "total_steps": 8680, "loss": 0.7784370183944702, "lr": 5.816257231254254e-07, "epoch": 1.3112903225806452, "percentage": 65.56, "elapsed_time": "7:52:06", "remaining_time": "4:07:57"} +{"current_steps": 5692, "total_steps": 8680, "loss": 0.8040215969085693, "lr": 5.812797155169942e-07, "epoch": 1.3115207373271889, "percentage": 65.58, "elapsed_time": "7:52:12", "remaining_time": "4:07:53"} +{"current_steps": 5693, "total_steps": 8680, "loss": 0.8355100154876709, "lr": 5.809337686852582e-07, "epoch": 1.3117511520737328, "percentage": 65.59, "elapsed_time": "7:52:19", "remaining_time": "4:07:48"} +{"current_steps": 5694, "total_steps": 8680, "loss": 0.8233312368392944, "lr": 5.805878826804303e-07, "epoch": 1.3119815668202766, "percentage": 65.6, "elapsed_time": "7:52:23", "remaining_time": "4:07:43"} +{"current_steps": 5695, "total_steps": 8680, "loss": 0.7756507992744446, "lr": 5.802420575527165e-07, "epoch": 1.3122119815668203, "percentage": 65.61, "elapsed_time": "7:52:27", "remaining_time": "4:07:38"} +{"current_steps": 5696, "total_steps": 8680, "loss": 0.7503829002380371, "lr": 5.798962933523124e-07, "epoch": 1.312442396313364, "percentage": 65.62, "elapsed_time": "7:52:33", "remaining_time": "4:07:33"} +{"current_steps": 5697, "total_steps": 8680, "loss": 0.749663770198822, "lr": 5.795505901294051e-07, "epoch": 1.3126728110599077, "percentage": 65.63, "elapsed_time": "7:52:37", "remaining_time": "4:07:28"} +{"current_steps": 5698, "total_steps": 8680, "loss": 0.9003115296363831, "lr": 5.792049479341732e-07, "epoch": 1.3129032258064517, "percentage": 65.65, "elapsed_time": "7:52:41", "remaining_time": "4:07:22"} +{"current_steps": 5699, "total_steps": 8680, "loss": 0.655732274055481, "lr": 5.788593668167854e-07, "epoch": 1.3131336405529954, "percentage": 65.66, "elapsed_time": "7:52:47", "remaining_time": "4:07:18"} +{"current_steps": 5700, "total_steps": 8680, "loss": 0.7318822145462036, "lr": 5.785138468274036e-07, "epoch": 1.3133640552995391, "percentage": 65.67, "elapsed_time": "7:52:53", "remaining_time": "4:07:14"} +{"current_steps": 5701, "total_steps": 8680, "loss": 0.6512752771377563, "lr": 5.781683880161788e-07, "epoch": 1.3135944700460829, "percentage": 65.68, "elapsed_time": "7:53:00", "remaining_time": "4:07:10"} +{"current_steps": 5702, "total_steps": 8680, "loss": 0.7232785820960999, "lr": 5.778229904332537e-07, "epoch": 1.3138248847926266, "percentage": 65.69, "elapsed_time": "7:53:04", "remaining_time": "4:07:04"} +{"current_steps": 5703, "total_steps": 8680, "loss": 0.837032675743103, "lr": 5.77477654128763e-07, "epoch": 1.3140552995391706, "percentage": 65.7, "elapsed_time": "7:53:09", "remaining_time": "4:06:59"} +{"current_steps": 5704, "total_steps": 8680, "loss": 0.926714301109314, "lr": 5.771323791528315e-07, "epoch": 1.3142857142857143, "percentage": 65.71, "elapsed_time": "7:53:13", "remaining_time": "4:06:54"} +{"current_steps": 5705, "total_steps": 8680, "loss": 0.7228986620903015, "lr": 5.76787165555575e-07, "epoch": 1.314516129032258, "percentage": 65.73, "elapsed_time": "7:53:17", "remaining_time": "4:06:48"} +{"current_steps": 5706, "total_steps": 8680, "loss": 0.8330450057983398, "lr": 5.764420133871015e-07, "epoch": 1.314746543778802, "percentage": 65.74, "elapsed_time": "7:53:23", "remaining_time": "4:06:44"} +{"current_steps": 5707, "total_steps": 8680, "loss": 0.793700098991394, "lr": 5.760969226975088e-07, "epoch": 1.3149769585253457, "percentage": 65.75, "elapsed_time": "7:53:29", "remaining_time": "4:06:39"} +{"current_steps": 5708, "total_steps": 8680, "loss": 0.8797321319580078, "lr": 5.757518935368868e-07, "epoch": 1.3152073732718894, "percentage": 65.76, "elapsed_time": "7:53:35", "remaining_time": "4:06:34"} +{"current_steps": 5709, "total_steps": 8680, "loss": 0.8772039413452148, "lr": 5.754069259553159e-07, "epoch": 1.3154377880184331, "percentage": 65.77, "elapsed_time": "7:53:39", "remaining_time": "4:06:29"} +{"current_steps": 5710, "total_steps": 8680, "loss": 0.5998358726501465, "lr": 5.750620200028672e-07, "epoch": 1.3156682027649769, "percentage": 65.78, "elapsed_time": "7:53:44", "remaining_time": "4:06:24"} +{"current_steps": 5711, "total_steps": 8680, "loss": 0.7694767713546753, "lr": 5.747171757296041e-07, "epoch": 1.3158986175115208, "percentage": 65.79, "elapsed_time": "7:53:51", "remaining_time": "4:06:20"} +{"current_steps": 5712, "total_steps": 8680, "loss": 0.8526760339736938, "lr": 5.7437239318558e-07, "epoch": 1.3161290322580645, "percentage": 65.81, "elapsed_time": "7:53:56", "remaining_time": "4:06:15"} +{"current_steps": 5713, "total_steps": 8680, "loss": 0.8407987356185913, "lr": 5.740276724208396e-07, "epoch": 1.3163594470046083, "percentage": 65.82, "elapsed_time": "7:54:01", "remaining_time": "4:06:11"} +{"current_steps": 5714, "total_steps": 8680, "loss": 0.9731476306915283, "lr": 5.736830134854183e-07, "epoch": 1.316589861751152, "percentage": 65.83, "elapsed_time": "7:54:06", "remaining_time": "4:06:05"} +{"current_steps": 5715, "total_steps": 8680, "loss": 0.7230468988418579, "lr": 5.733384164293434e-07, "epoch": 1.3168202764976957, "percentage": 65.84, "elapsed_time": "7:54:10", "remaining_time": "4:06:00"} +{"current_steps": 5716, "total_steps": 8680, "loss": 0.8260238766670227, "lr": 5.729938813026327e-07, "epoch": 1.3170506912442397, "percentage": 65.85, "elapsed_time": "7:54:15", "remaining_time": "4:05:55"} +{"current_steps": 5717, "total_steps": 8680, "loss": 0.7616437673568726, "lr": 5.726494081552948e-07, "epoch": 1.3172811059907834, "percentage": 65.86, "elapsed_time": "7:54:19", "remaining_time": "4:05:50"} +{"current_steps": 5718, "total_steps": 8680, "loss": 0.7628509998321533, "lr": 5.723049970373295e-07, "epoch": 1.3175115207373271, "percentage": 65.88, "elapsed_time": "7:54:25", "remaining_time": "4:05:45"} +{"current_steps": 5719, "total_steps": 8680, "loss": 0.744842529296875, "lr": 5.719606479987273e-07, "epoch": 1.317741935483871, "percentage": 65.89, "elapsed_time": "7:54:30", "remaining_time": "4:05:40"} +{"current_steps": 5720, "total_steps": 8680, "loss": 0.7228065133094788, "lr": 5.716163610894708e-07, "epoch": 1.3179723502304148, "percentage": 65.9, "elapsed_time": "7:54:35", "remaining_time": "4:05:35"} +{"current_steps": 5721, "total_steps": 8680, "loss": 0.8764907121658325, "lr": 5.712721363595325e-07, "epoch": 1.3182027649769585, "percentage": 65.91, "elapsed_time": "7:54:39", "remaining_time": "4:05:30"} +{"current_steps": 5722, "total_steps": 8680, "loss": 0.7966248393058777, "lr": 5.709279738588757e-07, "epoch": 1.3184331797235023, "percentage": 65.92, "elapsed_time": "7:54:44", "remaining_time": "4:05:25"} +{"current_steps": 5723, "total_steps": 8680, "loss": 0.8983157873153687, "lr": 5.705838736374558e-07, "epoch": 1.318663594470046, "percentage": 65.93, "elapsed_time": "7:54:50", "remaining_time": "4:05:20"} +{"current_steps": 5724, "total_steps": 8680, "loss": 0.7349347472190857, "lr": 5.70239835745218e-07, "epoch": 1.31889400921659, "percentage": 65.94, "elapsed_time": "7:54:54", "remaining_time": "4:05:15"} +{"current_steps": 5725, "total_steps": 8680, "loss": 0.9297066926956177, "lr": 5.698958602320988e-07, "epoch": 1.3191244239631337, "percentage": 65.96, "elapsed_time": "7:54:59", "remaining_time": "4:05:10"} +{"current_steps": 5726, "total_steps": 8680, "loss": 0.7106038331985474, "lr": 5.695519471480266e-07, "epoch": 1.3193548387096774, "percentage": 65.97, "elapsed_time": "7:55:05", "remaining_time": "4:05:05"} +{"current_steps": 5727, "total_steps": 8680, "loss": 0.8759022951126099, "lr": 5.692080965429193e-07, "epoch": 1.3195852534562211, "percentage": 65.98, "elapsed_time": "7:55:10", "remaining_time": "4:05:00"} +{"current_steps": 5728, "total_steps": 8680, "loss": 0.8337300419807434, "lr": 5.688643084666862e-07, "epoch": 1.3198156682027649, "percentage": 65.99, "elapsed_time": "7:55:15", "remaining_time": "4:04:55"} +{"current_steps": 5729, "total_steps": 8680, "loss": 0.8543391227722168, "lr": 5.685205829692283e-07, "epoch": 1.3200460829493088, "percentage": 66.0, "elapsed_time": "7:55:20", "remaining_time": "4:04:50"} +{"current_steps": 5730, "total_steps": 8680, "loss": 0.7497329711914062, "lr": 5.681769201004366e-07, "epoch": 1.3202764976958525, "percentage": 66.01, "elapsed_time": "7:55:27", "remaining_time": "4:04:46"} +{"current_steps": 5731, "total_steps": 8680, "loss": 0.8190964460372925, "lr": 5.678333199101929e-07, "epoch": 1.3205069124423963, "percentage": 66.03, "elapsed_time": "7:55:31", "remaining_time": "4:04:41"} +{"current_steps": 5732, "total_steps": 8680, "loss": 0.8233011960983276, "lr": 5.674897824483711e-07, "epoch": 1.3207373271889402, "percentage": 66.04, "elapsed_time": "7:55:36", "remaining_time": "4:04:36"} +{"current_steps": 5733, "total_steps": 8680, "loss": 0.75257408618927, "lr": 5.671463077648348e-07, "epoch": 1.320967741935484, "percentage": 66.05, "elapsed_time": "7:55:42", "remaining_time": "4:04:32"} +{"current_steps": 5734, "total_steps": 8680, "loss": 0.6468796133995056, "lr": 5.668028959094386e-07, "epoch": 1.3211981566820277, "percentage": 66.06, "elapsed_time": "7:55:49", "remaining_time": "4:04:28"} +{"current_steps": 5735, "total_steps": 8680, "loss": 0.6756174564361572, "lr": 5.664595469320288e-07, "epoch": 1.3214285714285714, "percentage": 66.07, "elapsed_time": "7:55:56", "remaining_time": "4:04:24"} +{"current_steps": 5736, "total_steps": 8680, "loss": 0.9040344953536987, "lr": 5.661162608824419e-07, "epoch": 1.3216589861751151, "percentage": 66.08, "elapsed_time": "7:56:02", "remaining_time": "4:04:19"} +{"current_steps": 5737, "total_steps": 8680, "loss": 0.8082150816917419, "lr": 5.657730378105055e-07, "epoch": 1.321889400921659, "percentage": 66.09, "elapsed_time": "7:56:06", "remaining_time": "4:04:14"} +{"current_steps": 5738, "total_steps": 8680, "loss": 0.8760210275650024, "lr": 5.654298777660375e-07, "epoch": 1.3221198156682028, "percentage": 66.11, "elapsed_time": "7:56:10", "remaining_time": "4:04:08"} +{"current_steps": 5739, "total_steps": 8680, "loss": 0.6980990171432495, "lr": 5.650867807988473e-07, "epoch": 1.3223502304147465, "percentage": 66.12, "elapsed_time": "7:56:15", "remaining_time": "4:04:03"} +{"current_steps": 5740, "total_steps": 8680, "loss": 0.6552839279174805, "lr": 5.647437469587355e-07, "epoch": 1.3225806451612903, "percentage": 66.13, "elapsed_time": "7:56:21", "remaining_time": "4:03:59"} +{"current_steps": 5741, "total_steps": 8680, "loss": 0.8304816484451294, "lr": 5.644007762954925e-07, "epoch": 1.322811059907834, "percentage": 66.14, "elapsed_time": "7:56:27", "remaining_time": "4:03:54"} +{"current_steps": 5742, "total_steps": 8680, "loss": 0.7977567315101624, "lr": 5.640578688589e-07, "epoch": 1.323041474654378, "percentage": 66.15, "elapsed_time": "7:56:31", "remaining_time": "4:03:49"} +{"current_steps": 5743, "total_steps": 8680, "loss": 0.7656992673873901, "lr": 5.637150246987308e-07, "epoch": 1.3232718894009217, "percentage": 66.16, "elapsed_time": "7:56:35", "remaining_time": "4:03:43"} +{"current_steps": 5744, "total_steps": 8680, "loss": 0.921256422996521, "lr": 5.633722438647483e-07, "epoch": 1.3235023041474654, "percentage": 66.18, "elapsed_time": "7:56:39", "remaining_time": "4:03:38"} +{"current_steps": 5745, "total_steps": 8680, "loss": 0.8012785315513611, "lr": 5.630295264067063e-07, "epoch": 1.3237327188940093, "percentage": 66.19, "elapsed_time": "7:56:43", "remaining_time": "4:03:32"} +{"current_steps": 5746, "total_steps": 8680, "loss": 0.613241970539093, "lr": 5.626868723743504e-07, "epoch": 1.323963133640553, "percentage": 66.2, "elapsed_time": "7:56:49", "remaining_time": "4:03:28"} +{"current_steps": 5747, "total_steps": 8680, "loss": 0.7134846448898315, "lr": 5.623442818174161e-07, "epoch": 1.3241935483870968, "percentage": 66.21, "elapsed_time": "7:56:54", "remaining_time": "4:03:23"} +{"current_steps": 5748, "total_steps": 8680, "loss": 0.8963242173194885, "lr": 5.620017547856295e-07, "epoch": 1.3244239631336405, "percentage": 66.22, "elapsed_time": "7:56:59", "remaining_time": "4:03:18"} +{"current_steps": 5749, "total_steps": 8680, "loss": 0.8401378393173218, "lr": 5.616592913287087e-07, "epoch": 1.3246543778801843, "percentage": 66.23, "elapsed_time": "7:57:04", "remaining_time": "4:03:13"} +{"current_steps": 5750, "total_steps": 8680, "loss": 0.6455308198928833, "lr": 5.613168914963615e-07, "epoch": 1.3248847926267282, "percentage": 66.24, "elapsed_time": "7:57:10", "remaining_time": "4:03:09"} +{"current_steps": 5751, "total_steps": 8680, "loss": 0.6920031905174255, "lr": 5.609745553382863e-07, "epoch": 1.325115207373272, "percentage": 66.26, "elapsed_time": "7:57:16", "remaining_time": "4:03:04"} +{"current_steps": 5752, "total_steps": 8680, "loss": 0.9099706411361694, "lr": 5.606322829041737e-07, "epoch": 1.3253456221198157, "percentage": 66.27, "elapsed_time": "7:57:21", "remaining_time": "4:02:59"} +{"current_steps": 5753, "total_steps": 8680, "loss": 0.8034265637397766, "lr": 5.602900742437036e-07, "epoch": 1.3255760368663594, "percentage": 66.28, "elapsed_time": "7:57:25", "remaining_time": "4:02:54"} +{"current_steps": 5754, "total_steps": 8680, "loss": 0.7216918468475342, "lr": 5.599479294065471e-07, "epoch": 1.3258064516129031, "percentage": 66.29, "elapsed_time": "7:57:32", "remaining_time": "4:02:50"} +{"current_steps": 5755, "total_steps": 8680, "loss": 0.7428277730941772, "lr": 5.596058484423655e-07, "epoch": 1.326036866359447, "percentage": 66.3, "elapsed_time": "7:57:38", "remaining_time": "4:02:45"} +{"current_steps": 5756, "total_steps": 8680, "loss": 0.7636011838912964, "lr": 5.592638314008127e-07, "epoch": 1.3262672811059908, "percentage": 66.31, "elapsed_time": "7:57:42", "remaining_time": "4:02:40"} +{"current_steps": 5757, "total_steps": 8680, "loss": 0.7765215635299683, "lr": 5.589218783315311e-07, "epoch": 1.3264976958525345, "percentage": 66.32, "elapsed_time": "7:57:46", "remaining_time": "4:02:34"} +{"current_steps": 5758, "total_steps": 8680, "loss": 0.6524033546447754, "lr": 5.585799892841551e-07, "epoch": 1.3267281105990785, "percentage": 66.34, "elapsed_time": "7:57:52", "remaining_time": "4:02:30"} +{"current_steps": 5759, "total_steps": 8680, "loss": 0.8105186223983765, "lr": 5.582381643083087e-07, "epoch": 1.3269585253456222, "percentage": 66.35, "elapsed_time": "7:57:58", "remaining_time": "4:02:25"} +{"current_steps": 5760, "total_steps": 8680, "loss": 0.7654449939727783, "lr": 5.578964034536084e-07, "epoch": 1.327188940092166, "percentage": 66.36, "elapsed_time": "7:58:04", "remaining_time": "4:02:21"} +{"current_steps": 5761, "total_steps": 8680, "loss": 0.6545592546463013, "lr": 5.5755470676966e-07, "epoch": 1.3274193548387097, "percentage": 66.37, "elapsed_time": "7:58:09", "remaining_time": "4:02:16"} +{"current_steps": 5762, "total_steps": 8680, "loss": 0.7116275429725647, "lr": 5.572130743060597e-07, "epoch": 1.3276497695852534, "percentage": 66.38, "elapsed_time": "7:58:14", "remaining_time": "4:02:11"} +{"current_steps": 5763, "total_steps": 8680, "loss": 0.8396822214126587, "lr": 5.568715061123959e-07, "epoch": 1.3278801843317973, "percentage": 66.39, "elapsed_time": "7:58:19", "remaining_time": "4:02:06"} +{"current_steps": 5764, "total_steps": 8680, "loss": 0.6729685664176941, "lr": 5.565300022382464e-07, "epoch": 1.328110599078341, "percentage": 66.41, "elapsed_time": "7:58:26", "remaining_time": "4:02:02"} +{"current_steps": 5765, "total_steps": 8680, "loss": 0.6891340017318726, "lr": 5.561885627331795e-07, "epoch": 1.3283410138248848, "percentage": 66.42, "elapsed_time": "7:58:32", "remaining_time": "4:01:58"} +{"current_steps": 5766, "total_steps": 8680, "loss": 0.7232956886291504, "lr": 5.558471876467556e-07, "epoch": 1.3285714285714285, "percentage": 66.43, "elapsed_time": "7:58:37", "remaining_time": "4:01:53"} +{"current_steps": 5767, "total_steps": 8680, "loss": 0.7800660133361816, "lr": 5.555058770285246e-07, "epoch": 1.3288018433179722, "percentage": 66.44, "elapsed_time": "7:58:42", "remaining_time": "4:01:48"} +{"current_steps": 5768, "total_steps": 8680, "loss": 0.6794005036354065, "lr": 5.551646309280266e-07, "epoch": 1.3290322580645162, "percentage": 66.45, "elapsed_time": "7:58:48", "remaining_time": "4:01:43"} +{"current_steps": 5769, "total_steps": 8680, "loss": 0.7739551067352295, "lr": 5.548234493947939e-07, "epoch": 1.32926267281106, "percentage": 66.46, "elapsed_time": "7:58:53", "remaining_time": "4:01:38"} +{"current_steps": 5770, "total_steps": 8680, "loss": 0.759978711605072, "lr": 5.544823324783482e-07, "epoch": 1.3294930875576036, "percentage": 66.47, "elapsed_time": "7:58:58", "remaining_time": "4:01:33"} +{"current_steps": 5771, "total_steps": 8680, "loss": 0.7563333511352539, "lr": 5.541412802282017e-07, "epoch": 1.3297235023041476, "percentage": 66.49, "elapsed_time": "7:59:05", "remaining_time": "4:01:29"} +{"current_steps": 5772, "total_steps": 8680, "loss": 0.6705852746963501, "lr": 5.538002926938587e-07, "epoch": 1.3299539170506913, "percentage": 66.5, "elapsed_time": "7:59:10", "remaining_time": "4:01:24"} +{"current_steps": 5773, "total_steps": 8680, "loss": 0.8343281745910645, "lr": 5.534593699248124e-07, "epoch": 1.330184331797235, "percentage": 66.51, "elapsed_time": "7:59:15", "remaining_time": "4:01:19"} +{"current_steps": 5774, "total_steps": 8680, "loss": 0.7158486843109131, "lr": 5.531185119705474e-07, "epoch": 1.3304147465437788, "percentage": 66.52, "elapsed_time": "7:59:20", "remaining_time": "4:01:14"} +{"current_steps": 5775, "total_steps": 8680, "loss": 0.8888766765594482, "lr": 5.527777188805385e-07, "epoch": 1.3306451612903225, "percentage": 66.53, "elapsed_time": "7:59:25", "remaining_time": "4:01:09"} +{"current_steps": 5776, "total_steps": 8680, "loss": 0.873813271522522, "lr": 5.524369907042519e-07, "epoch": 1.3308755760368665, "percentage": 66.54, "elapsed_time": "7:59:30", "remaining_time": "4:01:04"} +{"current_steps": 5777, "total_steps": 8680, "loss": 0.7654919624328613, "lr": 5.520963274911437e-07, "epoch": 1.3311059907834102, "percentage": 66.56, "elapsed_time": "7:59:36", "remaining_time": "4:01:00"} +{"current_steps": 5778, "total_steps": 8680, "loss": 0.6976190805435181, "lr": 5.517557292906606e-07, "epoch": 1.331336405529954, "percentage": 66.57, "elapsed_time": "7:59:40", "remaining_time": "4:00:55"} +{"current_steps": 5779, "total_steps": 8680, "loss": 0.8356388807296753, "lr": 5.5141519615224e-07, "epoch": 1.3315668202764976, "percentage": 66.58, "elapsed_time": "7:59:45", "remaining_time": "4:00:50"} +{"current_steps": 5780, "total_steps": 8680, "loss": 0.719998836517334, "lr": 5.510747281253094e-07, "epoch": 1.3317972350230414, "percentage": 66.59, "elapsed_time": "7:59:49", "remaining_time": "4:00:44"} +{"current_steps": 5781, "total_steps": 8680, "loss": 0.8432124853134155, "lr": 5.507343252592882e-07, "epoch": 1.3320276497695853, "percentage": 66.6, "elapsed_time": "7:59:55", "remaining_time": "4:00:40"} +{"current_steps": 5782, "total_steps": 8680, "loss": 0.8426402807235718, "lr": 5.503939876035845e-07, "epoch": 1.332258064516129, "percentage": 66.61, "elapsed_time": "8:00:01", "remaining_time": "4:00:35"} +{"current_steps": 5783, "total_steps": 8680, "loss": 0.8133292198181152, "lr": 5.500537152075986e-07, "epoch": 1.3324884792626728, "percentage": 66.62, "elapsed_time": "8:00:06", "remaining_time": "4:00:30"} +{"current_steps": 5784, "total_steps": 8680, "loss": 0.8097467422485352, "lr": 5.497135081207205e-07, "epoch": 1.3327188940092167, "percentage": 66.64, "elapsed_time": "8:00:11", "remaining_time": "4:00:25"} +{"current_steps": 5785, "total_steps": 8680, "loss": 0.6943382024765015, "lr": 5.493733663923299e-07, "epoch": 1.3329493087557602, "percentage": 66.65, "elapsed_time": "8:00:16", "remaining_time": "4:00:20"} +{"current_steps": 5786, "total_steps": 8680, "loss": 0.5896245837211609, "lr": 5.490332900717993e-07, "epoch": 1.3331797235023042, "percentage": 66.66, "elapsed_time": "8:00:22", "remaining_time": "4:00:16"} +{"current_steps": 5787, "total_steps": 8680, "loss": 0.6837725639343262, "lr": 5.486932792084895e-07, "epoch": 1.333410138248848, "percentage": 66.67, "elapsed_time": "8:00:28", "remaining_time": "4:00:11"} +{"current_steps": 5788, "total_steps": 8680, "loss": 0.8371915221214294, "lr": 5.483533338517523e-07, "epoch": 1.3336405529953916, "percentage": 66.68, "elapsed_time": "8:00:32", "remaining_time": "4:00:06"} +{"current_steps": 5789, "total_steps": 8680, "loss": 0.8001077175140381, "lr": 5.480134540509313e-07, "epoch": 1.3338709677419356, "percentage": 66.69, "elapsed_time": "8:00:37", "remaining_time": "4:00:01"} +{"current_steps": 5790, "total_steps": 8680, "loss": 0.9070717096328735, "lr": 5.476736398553591e-07, "epoch": 1.3341013824884793, "percentage": 66.71, "elapsed_time": "8:00:41", "remaining_time": "3:59:55"} +{"current_steps": 5791, "total_steps": 8680, "loss": 0.9061849117279053, "lr": 5.473338913143589e-07, "epoch": 1.334331797235023, "percentage": 66.72, "elapsed_time": "8:00:46", "remaining_time": "3:59:50"} +{"current_steps": 5792, "total_steps": 8680, "loss": 0.8465786576271057, "lr": 5.469942084772454e-07, "epoch": 1.3345622119815668, "percentage": 66.73, "elapsed_time": "8:00:51", "remaining_time": "3:59:45"} +{"current_steps": 5793, "total_steps": 8680, "loss": 0.8221259117126465, "lr": 5.466545913933229e-07, "epoch": 1.3347926267281105, "percentage": 66.74, "elapsed_time": "8:00:57", "remaining_time": "3:59:41"} +{"current_steps": 5794, "total_steps": 8680, "loss": 0.594088077545166, "lr": 5.463150401118864e-07, "epoch": 1.3350230414746544, "percentage": 66.75, "elapsed_time": "8:01:02", "remaining_time": "3:59:36"} +{"current_steps": 5795, "total_steps": 8680, "loss": 0.6983529925346375, "lr": 5.459755546822207e-07, "epoch": 1.3352534562211982, "percentage": 66.76, "elapsed_time": "8:01:08", "remaining_time": "3:59:31"} +{"current_steps": 5796, "total_steps": 8680, "loss": 0.7720709443092346, "lr": 5.456361351536027e-07, "epoch": 1.335483870967742, "percentage": 66.77, "elapsed_time": "8:01:13", "remaining_time": "3:59:26"} +{"current_steps": 5797, "total_steps": 8680, "loss": 0.8087977766990662, "lr": 5.45296781575298e-07, "epoch": 1.3357142857142856, "percentage": 66.79, "elapsed_time": "8:01:17", "remaining_time": "3:59:21"} +{"current_steps": 5798, "total_steps": 8680, "loss": 0.6808000802993774, "lr": 5.449574939965636e-07, "epoch": 1.3359447004608294, "percentage": 66.8, "elapsed_time": "8:01:21", "remaining_time": "3:59:16"} +{"current_steps": 5799, "total_steps": 8680, "loss": 0.7222881317138672, "lr": 5.446182724666466e-07, "epoch": 1.3361751152073733, "percentage": 66.81, "elapsed_time": "8:01:27", "remaining_time": "3:59:11"} +{"current_steps": 5800, "total_steps": 8680, "loss": 0.872687578201294, "lr": 5.44279117034784e-07, "epoch": 1.336405529953917, "percentage": 66.82, "elapsed_time": "8:01:31", "remaining_time": "3:59:06"} +{"current_steps": 5801, "total_steps": 8680, "loss": 0.7728114128112793, "lr": 5.439400277502048e-07, "epoch": 1.3366359447004608, "percentage": 66.83, "elapsed_time": "8:01:38", "remaining_time": "3:59:01"} +{"current_steps": 5802, "total_steps": 8680, "loss": 0.807528018951416, "lr": 5.436010046621267e-07, "epoch": 1.3368663594470047, "percentage": 66.84, "elapsed_time": "8:01:43", "remaining_time": "3:58:56"} +{"current_steps": 5803, "total_steps": 8680, "loss": 0.6997063159942627, "lr": 5.432620478197583e-07, "epoch": 1.3370967741935484, "percentage": 66.85, "elapsed_time": "8:01:49", "remaining_time": "3:58:52"} +{"current_steps": 5804, "total_steps": 8680, "loss": 0.797568678855896, "lr": 5.429231572722995e-07, "epoch": 1.3373271889400922, "percentage": 66.87, "elapsed_time": "8:01:54", "remaining_time": "3:58:47"} +{"current_steps": 5805, "total_steps": 8680, "loss": 0.6412359476089478, "lr": 5.425843330689386e-07, "epoch": 1.337557603686636, "percentage": 66.88, "elapsed_time": "8:01:59", "remaining_time": "3:58:42"} +{"current_steps": 5806, "total_steps": 8680, "loss": 0.8605507612228394, "lr": 5.422455752588569e-07, "epoch": 1.3377880184331796, "percentage": 66.89, "elapsed_time": "8:02:04", "remaining_time": "3:58:38"} +{"current_steps": 5807, "total_steps": 8680, "loss": 0.856192946434021, "lr": 5.419068838912238e-07, "epoch": 1.3380184331797236, "percentage": 66.9, "elapsed_time": "8:02:10", "remaining_time": "3:58:33"} +{"current_steps": 5808, "total_steps": 8680, "loss": 0.8614650368690491, "lr": 5.415682590151998e-07, "epoch": 1.3382488479262673, "percentage": 66.91, "elapsed_time": "8:02:14", "remaining_time": "3:58:27"} +{"current_steps": 5809, "total_steps": 8680, "loss": 0.9675840139389038, "lr": 5.412297006799365e-07, "epoch": 1.338479262672811, "percentage": 66.92, "elapsed_time": "8:02:19", "remaining_time": "3:58:23"} +{"current_steps": 5810, "total_steps": 8680, "loss": 0.7333405017852783, "lr": 5.408912089345747e-07, "epoch": 1.3387096774193548, "percentage": 66.94, "elapsed_time": "8:02:25", "remaining_time": "3:58:18"} +{"current_steps": 5811, "total_steps": 8680, "loss": 0.8271909952163696, "lr": 5.405527838282457e-07, "epoch": 1.3389400921658985, "percentage": 66.95, "elapsed_time": "8:02:31", "remaining_time": "3:58:14"} +{"current_steps": 5812, "total_steps": 8680, "loss": 0.8036069869995117, "lr": 5.402144254100724e-07, "epoch": 1.3391705069124424, "percentage": 66.96, "elapsed_time": "8:02:38", "remaining_time": "3:58:09"} +{"current_steps": 5813, "total_steps": 8680, "loss": 0.855912446975708, "lr": 5.398761337291667e-07, "epoch": 1.3394009216589862, "percentage": 66.97, "elapsed_time": "8:02:42", "remaining_time": "3:58:04"} +{"current_steps": 5814, "total_steps": 8680, "loss": 0.8198536038398743, "lr": 5.395379088346309e-07, "epoch": 1.33963133640553, "percentage": 66.98, "elapsed_time": "8:02:46", "remaining_time": "3:57:58"} +{"current_steps": 5815, "total_steps": 8680, "loss": 0.8931646347045898, "lr": 5.391997507755581e-07, "epoch": 1.3398617511520738, "percentage": 66.99, "elapsed_time": "8:02:50", "remaining_time": "3:57:53"} +{"current_steps": 5816, "total_steps": 8680, "loss": 0.7073954343795776, "lr": 5.388616596010312e-07, "epoch": 1.3400921658986176, "percentage": 67.0, "elapsed_time": "8:02:56", "remaining_time": "3:57:49"} +{"current_steps": 5817, "total_steps": 8680, "loss": 0.7758424282073975, "lr": 5.385236353601241e-07, "epoch": 1.3403225806451613, "percentage": 67.02, "elapsed_time": "8:03:01", "remaining_time": "3:57:44"} +{"current_steps": 5818, "total_steps": 8680, "loss": 0.6805497407913208, "lr": 5.381856781019005e-07, "epoch": 1.340552995391705, "percentage": 67.03, "elapsed_time": "8:03:06", "remaining_time": "3:57:39"} +{"current_steps": 5819, "total_steps": 8680, "loss": 0.8956538438796997, "lr": 5.378477878754144e-07, "epoch": 1.3407834101382488, "percentage": 67.04, "elapsed_time": "8:03:11", "remaining_time": "3:57:33"} +{"current_steps": 5820, "total_steps": 8680, "loss": 0.7819657921791077, "lr": 5.375099647297096e-07, "epoch": 1.3410138248847927, "percentage": 67.05, "elapsed_time": "8:03:16", "remaining_time": "3:57:28"} +{"current_steps": 5821, "total_steps": 8680, "loss": 0.5764007568359375, "lr": 5.371722087138217e-07, "epoch": 1.3412442396313364, "percentage": 67.06, "elapsed_time": "8:03:20", "remaining_time": "3:57:23"} +{"current_steps": 5822, "total_steps": 8680, "loss": 0.697022557258606, "lr": 5.368345198767749e-07, "epoch": 1.3414746543778802, "percentage": 67.07, "elapsed_time": "8:03:27", "remaining_time": "3:57:19"} +{"current_steps": 5823, "total_steps": 8680, "loss": 0.7773014307022095, "lr": 5.364968982675839e-07, "epoch": 1.3417050691244239, "percentage": 67.09, "elapsed_time": "8:03:32", "remaining_time": "3:57:14"} +{"current_steps": 5824, "total_steps": 8680, "loss": 0.7395004034042358, "lr": 5.361593439352551e-07, "epoch": 1.3419354838709676, "percentage": 67.1, "elapsed_time": "8:03:37", "remaining_time": "3:57:09"} +{"current_steps": 5825, "total_steps": 8680, "loss": 0.7989716529846191, "lr": 5.358218569287834e-07, "epoch": 1.3421658986175116, "percentage": 67.11, "elapsed_time": "8:03:42", "remaining_time": "3:57:04"} +{"current_steps": 5826, "total_steps": 8680, "loss": 0.8894884586334229, "lr": 5.354844372971543e-07, "epoch": 1.3423963133640553, "percentage": 67.12, "elapsed_time": "8:03:47", "remaining_time": "3:56:59"} +{"current_steps": 5827, "total_steps": 8680, "loss": 0.8415021300315857, "lr": 5.351470850893446e-07, "epoch": 1.342626728110599, "percentage": 67.13, "elapsed_time": "8:03:51", "remaining_time": "3:56:54"} +{"current_steps": 5828, "total_steps": 8680, "loss": 0.9963078498840332, "lr": 5.3480980035432e-07, "epoch": 1.342857142857143, "percentage": 67.14, "elapsed_time": "8:03:55", "remaining_time": "3:56:49"} +{"current_steps": 5829, "total_steps": 8680, "loss": 0.8489943742752075, "lr": 5.344725831410368e-07, "epoch": 1.3430875576036867, "percentage": 67.15, "elapsed_time": "8:04:01", "remaining_time": "3:56:44"} +{"current_steps": 5830, "total_steps": 8680, "loss": 0.6949954032897949, "lr": 5.341354334984422e-07, "epoch": 1.3433179723502304, "percentage": 67.17, "elapsed_time": "8:04:06", "remaining_time": "3:56:39"} +{"current_steps": 5831, "total_steps": 8680, "loss": 0.878408670425415, "lr": 5.337983514754722e-07, "epoch": 1.3435483870967742, "percentage": 67.18, "elapsed_time": "8:04:12", "remaining_time": "3:56:34"} +{"current_steps": 5832, "total_steps": 8680, "loss": 0.722877025604248, "lr": 5.334613371210549e-07, "epoch": 1.3437788018433179, "percentage": 67.19, "elapsed_time": "8:04:17", "remaining_time": "3:56:29"} +{"current_steps": 5833, "total_steps": 8680, "loss": 0.670013427734375, "lr": 5.331243904841068e-07, "epoch": 1.3440092165898618, "percentage": 67.2, "elapsed_time": "8:04:22", "remaining_time": "3:56:24"} +{"current_steps": 5834, "total_steps": 8680, "loss": 0.8336968421936035, "lr": 5.327875116135354e-07, "epoch": 1.3442396313364056, "percentage": 67.21, "elapsed_time": "8:04:26", "remaining_time": "3:56:19"} +{"current_steps": 5835, "total_steps": 8680, "loss": 0.7917020916938782, "lr": 5.324507005582381e-07, "epoch": 1.3444700460829493, "percentage": 67.22, "elapsed_time": "8:04:30", "remaining_time": "3:56:13"} +{"current_steps": 5836, "total_steps": 8680, "loss": 0.7479217052459717, "lr": 5.321139573671024e-07, "epoch": 1.344700460829493, "percentage": 67.24, "elapsed_time": "8:04:35", "remaining_time": "3:56:08"} +{"current_steps": 5837, "total_steps": 8680, "loss": 0.8059084415435791, "lr": 5.317772820890068e-07, "epoch": 1.3449308755760367, "percentage": 67.25, "elapsed_time": "8:04:40", "remaining_time": "3:56:04"} +{"current_steps": 5838, "total_steps": 8680, "loss": 0.6853187680244446, "lr": 5.314406747728186e-07, "epoch": 1.3451612903225807, "percentage": 67.26, "elapsed_time": "8:04:47", "remaining_time": "3:55:59"} +{"current_steps": 5839, "total_steps": 8680, "loss": 0.7769491672515869, "lr": 5.311041354673964e-07, "epoch": 1.3453917050691244, "percentage": 67.27, "elapsed_time": "8:04:52", "remaining_time": "3:55:55"} +{"current_steps": 5840, "total_steps": 8680, "loss": 0.6669384241104126, "lr": 5.307676642215877e-07, "epoch": 1.3456221198156681, "percentage": 67.28, "elapsed_time": "8:04:58", "remaining_time": "3:55:50"} +{"current_steps": 5841, "total_steps": 8680, "loss": 0.7884945869445801, "lr": 5.304312610842319e-07, "epoch": 1.345852534562212, "percentage": 67.29, "elapsed_time": "8:05:02", "remaining_time": "3:55:45"} +{"current_steps": 5842, "total_steps": 8680, "loss": 0.8030047416687012, "lr": 5.300949261041567e-07, "epoch": 1.3460829493087558, "percentage": 67.3, "elapsed_time": "8:05:07", "remaining_time": "3:55:40"} +{"current_steps": 5843, "total_steps": 8680, "loss": 0.7792675495147705, "lr": 5.297586593301806e-07, "epoch": 1.3463133640552996, "percentage": 67.32, "elapsed_time": "8:05:13", "remaining_time": "3:55:35"} +{"current_steps": 5844, "total_steps": 8680, "loss": 0.8699119091033936, "lr": 5.29422460811113e-07, "epoch": 1.3465437788018433, "percentage": 67.33, "elapsed_time": "8:05:17", "remaining_time": "3:55:30"} +{"current_steps": 5845, "total_steps": 8680, "loss": 0.8075394630432129, "lr": 5.290863305957523e-07, "epoch": 1.346774193548387, "percentage": 67.34, "elapsed_time": "8:05:22", "remaining_time": "3:55:25"} +{"current_steps": 5846, "total_steps": 8680, "loss": 0.7875077128410339, "lr": 5.287502687328868e-07, "epoch": 1.347004608294931, "percentage": 67.35, "elapsed_time": "8:05:28", "remaining_time": "3:55:20"} +{"current_steps": 5847, "total_steps": 8680, "loss": 0.6799413561820984, "lr": 5.284142752712965e-07, "epoch": 1.3472350230414747, "percentage": 67.36, "elapsed_time": "8:05:32", "remaining_time": "3:55:15"} +{"current_steps": 5848, "total_steps": 8680, "loss": 0.914801299571991, "lr": 5.280783502597496e-07, "epoch": 1.3474654377880184, "percentage": 67.37, "elapsed_time": "8:05:36", "remaining_time": "3:55:09"} +{"current_steps": 5849, "total_steps": 8680, "loss": 0.8591992855072021, "lr": 5.277424937470052e-07, "epoch": 1.3476958525345621, "percentage": 67.38, "elapsed_time": "8:05:40", "remaining_time": "3:55:04"} +{"current_steps": 5850, "total_steps": 8680, "loss": 0.7830478549003601, "lr": 5.27406705781813e-07, "epoch": 1.3479262672811059, "percentage": 67.4, "elapsed_time": "8:05:44", "remaining_time": "3:54:58"} +{"current_steps": 5851, "total_steps": 8680, "loss": 0.8365499973297119, "lr": 5.270709864129119e-07, "epoch": 1.3481566820276498, "percentage": 67.41, "elapsed_time": "8:05:49", "remaining_time": "3:54:53"} +{"current_steps": 5852, "total_steps": 8680, "loss": 0.8342669010162354, "lr": 5.267353356890305e-07, "epoch": 1.3483870967741935, "percentage": 67.42, "elapsed_time": "8:05:53", "remaining_time": "3:54:48"} +{"current_steps": 5853, "total_steps": 8680, "loss": 0.7802393436431885, "lr": 5.263997536588891e-07, "epoch": 1.3486175115207373, "percentage": 67.43, "elapsed_time": "8:05:57", "remaining_time": "3:54:43"} +{"current_steps": 5854, "total_steps": 8680, "loss": 0.8245328068733215, "lr": 5.260642403711964e-07, "epoch": 1.3488479262672812, "percentage": 67.44, "elapsed_time": "8:06:02", "remaining_time": "3:54:38"} +{"current_steps": 5855, "total_steps": 8680, "loss": 0.7209265232086182, "lr": 5.257287958746519e-07, "epoch": 1.349078341013825, "percentage": 67.45, "elapsed_time": "8:06:06", "remaining_time": "3:54:32"} +{"current_steps": 5856, "total_steps": 8680, "loss": 0.9258058071136475, "lr": 5.253934202179444e-07, "epoch": 1.3493087557603687, "percentage": 67.47, "elapsed_time": "8:06:10", "remaining_time": "3:54:27"} +{"current_steps": 5857, "total_steps": 8680, "loss": 0.6889467835426331, "lr": 5.25058113449754e-07, "epoch": 1.3495391705069124, "percentage": 67.48, "elapsed_time": "8:06:15", "remaining_time": "3:54:22"} +{"current_steps": 5858, "total_steps": 8680, "loss": 0.8810057640075684, "lr": 5.247228756187498e-07, "epoch": 1.3497695852534561, "percentage": 67.49, "elapsed_time": "8:06:20", "remaining_time": "3:54:17"} +{"current_steps": 5859, "total_steps": 8680, "loss": 0.7236393690109253, "lr": 5.243877067735909e-07, "epoch": 1.35, "percentage": 67.5, "elapsed_time": "8:06:25", "remaining_time": "3:54:12"} +{"current_steps": 5860, "total_steps": 8680, "loss": 0.8287979364395142, "lr": 5.240526069629264e-07, "epoch": 1.3502304147465438, "percentage": 67.51, "elapsed_time": "8:06:31", "remaining_time": "3:54:07"} +{"current_steps": 5861, "total_steps": 8680, "loss": 0.8268846869468689, "lr": 5.237175762353964e-07, "epoch": 1.3504608294930875, "percentage": 67.52, "elapsed_time": "8:06:36", "remaining_time": "3:54:03"} +{"current_steps": 5862, "total_steps": 8680, "loss": 0.7995575666427612, "lr": 5.233826146396296e-07, "epoch": 1.3506912442396313, "percentage": 67.53, "elapsed_time": "8:06:42", "remaining_time": "3:53:58"} +{"current_steps": 5863, "total_steps": 8680, "loss": 0.7379493713378906, "lr": 5.230477222242449e-07, "epoch": 1.350921658986175, "percentage": 67.55, "elapsed_time": "8:06:48", "remaining_time": "3:53:53"} +{"current_steps": 5864, "total_steps": 8680, "loss": 0.729906439781189, "lr": 5.227128990378524e-07, "epoch": 1.351152073732719, "percentage": 67.56, "elapsed_time": "8:06:52", "remaining_time": "3:53:48"} +{"current_steps": 5865, "total_steps": 8680, "loss": 0.8356789350509644, "lr": 5.223781451290506e-07, "epoch": 1.3513824884792627, "percentage": 67.57, "elapsed_time": "8:06:58", "remaining_time": "3:53:43"} +{"current_steps": 5866, "total_steps": 8680, "loss": 0.8130582571029663, "lr": 5.220434605464285e-07, "epoch": 1.3516129032258064, "percentage": 67.58, "elapsed_time": "8:07:03", "remaining_time": "3:53:38"} +{"current_steps": 5867, "total_steps": 8680, "loss": 0.7686447501182556, "lr": 5.217088453385658e-07, "epoch": 1.3518433179723504, "percentage": 67.59, "elapsed_time": "8:07:08", "remaining_time": "3:53:33"} +{"current_steps": 5868, "total_steps": 8680, "loss": 0.7945844531059265, "lr": 5.213742995540309e-07, "epoch": 1.352073732718894, "percentage": 67.6, "elapsed_time": "8:07:13", "remaining_time": "3:53:29"} +{"current_steps": 5869, "total_steps": 8680, "loss": 0.8082837462425232, "lr": 5.210398232413824e-07, "epoch": 1.3523041474654378, "percentage": 67.62, "elapsed_time": "8:07:19", "remaining_time": "3:53:24"} +{"current_steps": 5870, "total_steps": 8680, "loss": 0.7826153039932251, "lr": 5.2070541644917e-07, "epoch": 1.3525345622119815, "percentage": 67.63, "elapsed_time": "8:07:24", "remaining_time": "3:53:19"} +{"current_steps": 5871, "total_steps": 8680, "loss": 0.6853276491165161, "lr": 5.203710792259318e-07, "epoch": 1.3527649769585253, "percentage": 67.64, "elapsed_time": "8:07:29", "remaining_time": "3:53:14"} +{"current_steps": 5872, "total_steps": 8680, "loss": 0.8354780673980713, "lr": 5.200368116201962e-07, "epoch": 1.3529953917050692, "percentage": 67.65, "elapsed_time": "8:07:35", "remaining_time": "3:53:09"} +{"current_steps": 5873, "total_steps": 8680, "loss": 0.7857648134231567, "lr": 5.197026136804823e-07, "epoch": 1.353225806451613, "percentage": 67.66, "elapsed_time": "8:07:39", "remaining_time": "3:53:04"} +{"current_steps": 5874, "total_steps": 8680, "loss": 0.663504958152771, "lr": 5.193684854552982e-07, "epoch": 1.3534562211981567, "percentage": 67.67, "elapsed_time": "8:07:45", "remaining_time": "3:53:00"} +{"current_steps": 5875, "total_steps": 8680, "loss": 0.8192203044891357, "lr": 5.190344269931423e-07, "epoch": 1.3536866359447004, "percentage": 67.68, "elapsed_time": "8:07:51", "remaining_time": "3:52:55"} +{"current_steps": 5876, "total_steps": 8680, "loss": 0.801753044128418, "lr": 5.187004383425024e-07, "epoch": 1.3539170506912441, "percentage": 67.7, "elapsed_time": "8:07:57", "remaining_time": "3:52:51"} +{"current_steps": 5877, "total_steps": 8680, "loss": 0.9427206516265869, "lr": 5.183665195518566e-07, "epoch": 1.354147465437788, "percentage": 67.71, "elapsed_time": "8:08:01", "remaining_time": "3:52:45"} +{"current_steps": 5878, "total_steps": 8680, "loss": 0.7801729440689087, "lr": 5.18032670669673e-07, "epoch": 1.3543778801843318, "percentage": 67.72, "elapsed_time": "8:08:05", "remaining_time": "3:52:40"} +{"current_steps": 5879, "total_steps": 8680, "loss": 0.8224533796310425, "lr": 5.176988917444094e-07, "epoch": 1.3546082949308755, "percentage": 67.73, "elapsed_time": "8:08:11", "remaining_time": "3:52:35"} +{"current_steps": 5880, "total_steps": 8680, "loss": 0.7800098657608032, "lr": 5.173651828245127e-07, "epoch": 1.3548387096774195, "percentage": 67.74, "elapsed_time": "8:08:17", "remaining_time": "3:52:31"} +{"current_steps": 5881, "total_steps": 8680, "loss": 0.7612746953964233, "lr": 5.170315439584212e-07, "epoch": 1.3550691244239632, "percentage": 67.75, "elapsed_time": "8:08:22", "remaining_time": "3:52:25"} +{"current_steps": 5882, "total_steps": 8680, "loss": 0.8027492761611938, "lr": 5.166979751945617e-07, "epoch": 1.355299539170507, "percentage": 67.76, "elapsed_time": "8:08:27", "remaining_time": "3:52:21"} +{"current_steps": 5883, "total_steps": 8680, "loss": 0.7509280443191528, "lr": 5.163644765813508e-07, "epoch": 1.3555299539170507, "percentage": 67.78, "elapsed_time": "8:08:32", "remaining_time": "3:52:16"} +{"current_steps": 5884, "total_steps": 8680, "loss": 0.7663145661354065, "lr": 5.160310481671966e-07, "epoch": 1.3557603686635944, "percentage": 67.79, "elapsed_time": "8:08:38", "remaining_time": "3:52:11"} +{"current_steps": 5885, "total_steps": 8680, "loss": 0.7598870396614075, "lr": 5.156976900004948e-07, "epoch": 1.3559907834101383, "percentage": 67.8, "elapsed_time": "8:08:43", "remaining_time": "3:52:06"} +{"current_steps": 5886, "total_steps": 8680, "loss": 0.7923038005828857, "lr": 5.153644021296317e-07, "epoch": 1.356221198156682, "percentage": 67.81, "elapsed_time": "8:08:49", "remaining_time": "3:52:02"} +{"current_steps": 5887, "total_steps": 8680, "loss": 0.8711799383163452, "lr": 5.150311846029846e-07, "epoch": 1.3564516129032258, "percentage": 67.82, "elapsed_time": "8:08:53", "remaining_time": "3:51:56"} +{"current_steps": 5888, "total_steps": 8680, "loss": 0.7852096557617188, "lr": 5.146980374689191e-07, "epoch": 1.3566820276497695, "percentage": 67.83, "elapsed_time": "8:08:56", "remaining_time": "3:51:51"} +{"current_steps": 5889, "total_steps": 8680, "loss": 0.7259876132011414, "lr": 5.143649607757905e-07, "epoch": 1.3569124423963133, "percentage": 67.85, "elapsed_time": "8:09:04", "remaining_time": "3:51:47"} +{"current_steps": 5890, "total_steps": 8680, "loss": 0.7612321376800537, "lr": 5.140319545719454e-07, "epoch": 1.3571428571428572, "percentage": 67.86, "elapsed_time": "8:09:09", "remaining_time": "3:51:42"} +{"current_steps": 5891, "total_steps": 8680, "loss": 0.7881298661231995, "lr": 5.136990189057187e-07, "epoch": 1.357373271889401, "percentage": 67.87, "elapsed_time": "8:09:14", "remaining_time": "3:51:37"} +{"current_steps": 5892, "total_steps": 8680, "loss": 0.6956340074539185, "lr": 5.133661538254353e-07, "epoch": 1.3576036866359447, "percentage": 67.88, "elapsed_time": "8:09:20", "remaining_time": "3:51:33"} +{"current_steps": 5893, "total_steps": 8680, "loss": 0.7800698280334473, "lr": 5.130333593794107e-07, "epoch": 1.3578341013824886, "percentage": 67.89, "elapsed_time": "8:09:25", "remaining_time": "3:51:27"} +{"current_steps": 5894, "total_steps": 8680, "loss": 0.6920318603515625, "lr": 5.127006356159496e-07, "epoch": 1.3580645161290323, "percentage": 67.9, "elapsed_time": "8:09:29", "remaining_time": "3:51:22"} +{"current_steps": 5895, "total_steps": 8680, "loss": 0.6972872018814087, "lr": 5.123679825833458e-07, "epoch": 1.358294930875576, "percentage": 67.91, "elapsed_time": "8:09:35", "remaining_time": "3:51:17"} +{"current_steps": 5896, "total_steps": 8680, "loss": 0.8820276260375977, "lr": 5.12035400329884e-07, "epoch": 1.3585253456221198, "percentage": 67.93, "elapsed_time": "8:09:40", "remaining_time": "3:51:13"} +{"current_steps": 5897, "total_steps": 8680, "loss": 0.8834109306335449, "lr": 5.117028889038375e-07, "epoch": 1.3587557603686635, "percentage": 67.94, "elapsed_time": "8:09:45", "remaining_time": "3:51:07"} +{"current_steps": 5898, "total_steps": 8680, "loss": 0.6981096267700195, "lr": 5.113704483534704e-07, "epoch": 1.3589861751152075, "percentage": 67.95, "elapsed_time": "8:09:49", "remaining_time": "3:51:02"} +{"current_steps": 5899, "total_steps": 8680, "loss": 0.7617249488830566, "lr": 5.11038078727036e-07, "epoch": 1.3592165898617512, "percentage": 67.96, "elapsed_time": "8:09:54", "remaining_time": "3:50:57"} +{"current_steps": 5900, "total_steps": 8680, "loss": 0.8373798131942749, "lr": 5.107057800727773e-07, "epoch": 1.359447004608295, "percentage": 67.97, "elapsed_time": "8:09:58", "remaining_time": "3:50:52"} +{"current_steps": 5901, "total_steps": 8680, "loss": 0.7176666855812073, "lr": 5.103735524389264e-07, "epoch": 1.3596774193548387, "percentage": 67.98, "elapsed_time": "8:10:05", "remaining_time": "3:50:48"} +{"current_steps": 5902, "total_steps": 8680, "loss": 0.7872966527938843, "lr": 5.100413958737067e-07, "epoch": 1.3599078341013824, "percentage": 68.0, "elapsed_time": "8:10:10", "remaining_time": "3:50:43"} +{"current_steps": 5903, "total_steps": 8680, "loss": 0.6668897271156311, "lr": 5.097093104253295e-07, "epoch": 1.3601382488479263, "percentage": 68.01, "elapsed_time": "8:10:16", "remaining_time": "3:50:38"} +{"current_steps": 5904, "total_steps": 8680, "loss": 0.8413408994674683, "lr": 5.093772961419967e-07, "epoch": 1.36036866359447, "percentage": 68.02, "elapsed_time": "8:10:21", "remaining_time": "3:50:33"} +{"current_steps": 5905, "total_steps": 8680, "loss": 0.632825493812561, "lr": 5.090453530719e-07, "epoch": 1.3605990783410138, "percentage": 68.03, "elapsed_time": "8:10:27", "remaining_time": "3:50:29"} +{"current_steps": 5906, "total_steps": 8680, "loss": 0.737346887588501, "lr": 5.087134812632201e-07, "epoch": 1.3608294930875577, "percentage": 68.04, "elapsed_time": "8:10:34", "remaining_time": "3:50:25"} +{"current_steps": 5907, "total_steps": 8680, "loss": 1.00008225440979, "lr": 5.083816807641283e-07, "epoch": 1.3610599078341012, "percentage": 68.05, "elapsed_time": "8:10:39", "remaining_time": "3:50:19"} +{"current_steps": 5908, "total_steps": 8680, "loss": 0.7844079732894897, "lr": 5.08049951622785e-07, "epoch": 1.3612903225806452, "percentage": 68.06, "elapsed_time": "8:10:44", "remaining_time": "3:50:15"} +{"current_steps": 5909, "total_steps": 8680, "loss": 0.8615080118179321, "lr": 5.077182938873393e-07, "epoch": 1.361520737327189, "percentage": 68.08, "elapsed_time": "8:10:49", "remaining_time": "3:50:10"} +{"current_steps": 5910, "total_steps": 8680, "loss": 0.6930621862411499, "lr": 5.073867076059321e-07, "epoch": 1.3617511520737327, "percentage": 68.09, "elapsed_time": "8:10:53", "remaining_time": "3:50:04"} +{"current_steps": 5911, "total_steps": 8680, "loss": 0.7020307183265686, "lr": 5.07055192826692e-07, "epoch": 1.3619815668202766, "percentage": 68.1, "elapsed_time": "8:10:59", "remaining_time": "3:50:00"} +{"current_steps": 5912, "total_steps": 8680, "loss": 0.7281042337417603, "lr": 5.067237495977379e-07, "epoch": 1.3622119815668203, "percentage": 68.11, "elapsed_time": "8:11:04", "remaining_time": "3:49:55"} +{"current_steps": 5913, "total_steps": 8680, "loss": 0.8092719316482544, "lr": 5.063923779671789e-07, "epoch": 1.362442396313364, "percentage": 68.12, "elapsed_time": "8:11:09", "remaining_time": "3:49:50"} +{"current_steps": 5914, "total_steps": 8680, "loss": 0.7323317527770996, "lr": 5.060610779831125e-07, "epoch": 1.3626728110599078, "percentage": 68.13, "elapsed_time": "8:11:13", "remaining_time": "3:49:44"} +{"current_steps": 5915, "total_steps": 8680, "loss": 0.7370069622993469, "lr": 5.05729849693627e-07, "epoch": 1.3629032258064515, "percentage": 68.15, "elapsed_time": "8:11:19", "remaining_time": "3:49:40"} +{"current_steps": 5916, "total_steps": 8680, "loss": 0.7175320386886597, "lr": 5.053986931467994e-07, "epoch": 1.3631336405529955, "percentage": 68.16, "elapsed_time": "8:11:24", "remaining_time": "3:49:35"} +{"current_steps": 5917, "total_steps": 8680, "loss": 0.8643501996994019, "lr": 5.050676083906964e-07, "epoch": 1.3633640552995392, "percentage": 68.17, "elapsed_time": "8:11:29", "remaining_time": "3:49:30"} +{"current_steps": 5918, "total_steps": 8680, "loss": 0.9110950827598572, "lr": 5.047365954733752e-07, "epoch": 1.363594470046083, "percentage": 68.18, "elapsed_time": "8:11:36", "remaining_time": "3:49:26"} +{"current_steps": 5919, "total_steps": 8680, "loss": 0.9242197275161743, "lr": 5.044056544428814e-07, "epoch": 1.3638248847926266, "percentage": 68.19, "elapsed_time": "8:11:42", "remaining_time": "3:49:21"} +{"current_steps": 5920, "total_steps": 8680, "loss": 0.9218860864639282, "lr": 5.040747853472509e-07, "epoch": 1.3640552995391704, "percentage": 68.2, "elapsed_time": "8:11:46", "remaining_time": "3:49:16"} +{"current_steps": 5921, "total_steps": 8680, "loss": 0.970054030418396, "lr": 5.037439882345084e-07, "epoch": 1.3642857142857143, "percentage": 68.21, "elapsed_time": "8:11:50", "remaining_time": "3:49:10"} +{"current_steps": 5922, "total_steps": 8680, "loss": 0.7707182168960571, "lr": 5.034132631526695e-07, "epoch": 1.364516129032258, "percentage": 68.23, "elapsed_time": "8:11:55", "remaining_time": "3:49:05"} +{"current_steps": 5923, "total_steps": 8680, "loss": 0.7673811912536621, "lr": 5.03082610149738e-07, "epoch": 1.3647465437788018, "percentage": 68.24, "elapsed_time": "8:11:59", "remaining_time": "3:49:00"} +{"current_steps": 5924, "total_steps": 8680, "loss": 0.7387198209762573, "lr": 5.027520292737073e-07, "epoch": 1.3649769585253457, "percentage": 68.25, "elapsed_time": "8:12:04", "remaining_time": "3:48:55"} +{"current_steps": 5925, "total_steps": 8680, "loss": 0.7803019881248474, "lr": 5.024215205725619e-07, "epoch": 1.3652073732718895, "percentage": 68.26, "elapsed_time": "8:12:09", "remaining_time": "3:48:50"} +{"current_steps": 5926, "total_steps": 8680, "loss": 0.8753018379211426, "lr": 5.020910840942738e-07, "epoch": 1.3654377880184332, "percentage": 68.27, "elapsed_time": "8:12:14", "remaining_time": "3:48:45"} +{"current_steps": 5927, "total_steps": 8680, "loss": 0.7917389869689941, "lr": 5.017607198868055e-07, "epoch": 1.365668202764977, "percentage": 68.28, "elapsed_time": "8:12:20", "remaining_time": "3:48:41"} +{"current_steps": 5928, "total_steps": 8680, "loss": 0.8393691182136536, "lr": 5.014304279981095e-07, "epoch": 1.3658986175115206, "percentage": 68.29, "elapsed_time": "8:12:24", "remaining_time": "3:48:35"} +{"current_steps": 5929, "total_steps": 8680, "loss": 0.6635205745697021, "lr": 5.011002084761264e-07, "epoch": 1.3661290322580646, "percentage": 68.31, "elapsed_time": "8:12:29", "remaining_time": "3:48:30"} +{"current_steps": 5930, "total_steps": 8680, "loss": 0.7058769464492798, "lr": 5.007700613687879e-07, "epoch": 1.3663594470046083, "percentage": 68.32, "elapsed_time": "8:12:33", "remaining_time": "3:48:25"} +{"current_steps": 5931, "total_steps": 8680, "loss": 0.841168224811554, "lr": 5.004399867240143e-07, "epoch": 1.366589861751152, "percentage": 68.33, "elapsed_time": "8:12:37", "remaining_time": "3:48:19"} +{"current_steps": 5932, "total_steps": 8680, "loss": 0.7385121583938599, "lr": 5.001099845897148e-07, "epoch": 1.3668202764976958, "percentage": 68.34, "elapsed_time": "8:12:42", "remaining_time": "3:48:14"} +{"current_steps": 5933, "total_steps": 8680, "loss": 0.6525158882141113, "lr": 4.997800550137897e-07, "epoch": 1.3670506912442395, "percentage": 68.35, "elapsed_time": "8:12:47", "remaining_time": "3:48:09"} +{"current_steps": 5934, "total_steps": 8680, "loss": 0.7838844060897827, "lr": 4.994501980441274e-07, "epoch": 1.3672811059907835, "percentage": 68.36, "elapsed_time": "8:12:51", "remaining_time": "3:48:04"} +{"current_steps": 5935, "total_steps": 8680, "loss": 0.8831999897956848, "lr": 4.991204137286061e-07, "epoch": 1.3675115207373272, "percentage": 68.38, "elapsed_time": "8:12:54", "remaining_time": "3:47:58"} +{"current_steps": 5936, "total_steps": 8680, "loss": 0.8053784966468811, "lr": 4.987907021150938e-07, "epoch": 1.367741935483871, "percentage": 68.39, "elapsed_time": "8:13:01", "remaining_time": "3:47:54"} +{"current_steps": 5937, "total_steps": 8680, "loss": 0.8093301057815552, "lr": 4.984610632514475e-07, "epoch": 1.3679723502304149, "percentage": 68.4, "elapsed_time": "8:13:05", "remaining_time": "3:47:49"} +{"current_steps": 5938, "total_steps": 8680, "loss": 0.7609653472900391, "lr": 4.981314971855136e-07, "epoch": 1.3682027649769586, "percentage": 68.41, "elapsed_time": "8:13:10", "remaining_time": "3:47:44"} +{"current_steps": 5939, "total_steps": 8680, "loss": 0.7131600379943848, "lr": 4.978020039651288e-07, "epoch": 1.3684331797235023, "percentage": 68.42, "elapsed_time": "8:13:17", "remaining_time": "3:47:39"} +{"current_steps": 5940, "total_steps": 8680, "loss": 0.6555063724517822, "lr": 4.974725836381184e-07, "epoch": 1.368663594470046, "percentage": 68.43, "elapsed_time": "8:13:20", "remaining_time": "3:47:34"} +{"current_steps": 5941, "total_steps": 8680, "loss": 0.8349519968032837, "lr": 4.971432362522968e-07, "epoch": 1.3688940092165898, "percentage": 68.44, "elapsed_time": "8:13:25", "remaining_time": "3:47:29"} +{"current_steps": 5942, "total_steps": 8680, "loss": 0.7335611581802368, "lr": 4.968139618554691e-07, "epoch": 1.3691244239631337, "percentage": 68.46, "elapsed_time": "8:13:31", "remaining_time": "3:47:24"} +{"current_steps": 5943, "total_steps": 8680, "loss": 0.8349814414978027, "lr": 4.964847604954287e-07, "epoch": 1.3693548387096774, "percentage": 68.47, "elapsed_time": "8:13:36", "remaining_time": "3:47:19"} +{"current_steps": 5944, "total_steps": 8680, "loss": 0.6816729307174683, "lr": 4.961556322199585e-07, "epoch": 1.3695852534562212, "percentage": 68.48, "elapsed_time": "8:13:43", "remaining_time": "3:47:15"} +{"current_steps": 5945, "total_steps": 8680, "loss": 0.847672164440155, "lr": 4.958265770768315e-07, "epoch": 1.369815668202765, "percentage": 68.49, "elapsed_time": "8:13:48", "remaining_time": "3:47:10"} +{"current_steps": 5946, "total_steps": 8680, "loss": 0.6674519777297974, "lr": 4.954975951138095e-07, "epoch": 1.3700460829493086, "percentage": 68.5, "elapsed_time": "8:13:54", "remaining_time": "3:47:06"} +{"current_steps": 5947, "total_steps": 8680, "loss": 0.7836427092552185, "lr": 4.951686863786432e-07, "epoch": 1.3702764976958526, "percentage": 68.51, "elapsed_time": "8:14:01", "remaining_time": "3:47:01"} +{"current_steps": 5948, "total_steps": 8680, "loss": 0.640183687210083, "lr": 4.948398509190742e-07, "epoch": 1.3705069124423963, "percentage": 68.53, "elapsed_time": "8:14:05", "remaining_time": "3:46:56"} +{"current_steps": 5949, "total_steps": 8680, "loss": 0.8438451290130615, "lr": 4.945110887828322e-07, "epoch": 1.37073732718894, "percentage": 68.54, "elapsed_time": "8:14:10", "remaining_time": "3:46:51"} +{"current_steps": 5950, "total_steps": 8680, "loss": 0.9311714172363281, "lr": 4.94182400017636e-07, "epoch": 1.370967741935484, "percentage": 68.55, "elapsed_time": "8:14:13", "remaining_time": "3:46:45"} +{"current_steps": 5951, "total_steps": 8680, "loss": 0.7332801818847656, "lr": 4.938537846711952e-07, "epoch": 1.3711981566820277, "percentage": 68.56, "elapsed_time": "8:14:18", "remaining_time": "3:46:40"} +{"current_steps": 5952, "total_steps": 8680, "loss": 0.7189289331436157, "lr": 4.935252427912075e-07, "epoch": 1.3714285714285714, "percentage": 68.57, "elapsed_time": "8:14:24", "remaining_time": "3:46:36"} +{"current_steps": 5953, "total_steps": 8680, "loss": 0.827372670173645, "lr": 4.9319677442536e-07, "epoch": 1.3716589861751152, "percentage": 68.58, "elapsed_time": "8:14:29", "remaining_time": "3:46:31"} +{"current_steps": 5954, "total_steps": 8680, "loss": 0.7607625722885132, "lr": 4.9286837962133e-07, "epoch": 1.371889400921659, "percentage": 68.59, "elapsed_time": "8:14:35", "remaining_time": "3:46:26"} +{"current_steps": 5955, "total_steps": 8680, "loss": 0.9420886635780334, "lr": 4.925400584267836e-07, "epoch": 1.3721198156682028, "percentage": 68.61, "elapsed_time": "8:14:40", "remaining_time": "3:46:21"} +{"current_steps": 5956, "total_steps": 8680, "loss": 0.7605317831039429, "lr": 4.922118108893757e-07, "epoch": 1.3723502304147466, "percentage": 68.62, "elapsed_time": "8:14:45", "remaining_time": "3:46:16"} +{"current_steps": 5957, "total_steps": 8680, "loss": 0.8353599309921265, "lr": 4.918836370567513e-07, "epoch": 1.3725806451612903, "percentage": 68.63, "elapsed_time": "8:14:50", "remaining_time": "3:46:11"} +{"current_steps": 5958, "total_steps": 8680, "loss": 0.8540027141571045, "lr": 4.915555369765439e-07, "epoch": 1.372811059907834, "percentage": 68.64, "elapsed_time": "8:14:55", "remaining_time": "3:46:06"} +{"current_steps": 5959, "total_steps": 8680, "loss": 0.6965712308883667, "lr": 4.912275106963778e-07, "epoch": 1.3730414746543778, "percentage": 68.65, "elapsed_time": "8:15:01", "remaining_time": "3:46:02"} +{"current_steps": 5960, "total_steps": 8680, "loss": 0.7460787296295166, "lr": 4.908995582638648e-07, "epoch": 1.3732718894009217, "percentage": 68.66, "elapsed_time": "8:15:06", "remaining_time": "3:45:57"} +{"current_steps": 5961, "total_steps": 8680, "loss": 0.8652873039245605, "lr": 4.905716797266067e-07, "epoch": 1.3735023041474654, "percentage": 68.68, "elapsed_time": "8:15:11", "remaining_time": "3:45:52"} +{"current_steps": 5962, "total_steps": 8680, "loss": 0.7757953405380249, "lr": 4.902438751321952e-07, "epoch": 1.3737327188940092, "percentage": 68.69, "elapsed_time": "8:15:16", "remaining_time": "3:45:47"} +{"current_steps": 5963, "total_steps": 8680, "loss": 0.8842452168464661, "lr": 4.899161445282102e-07, "epoch": 1.3739631336405531, "percentage": 68.7, "elapsed_time": "8:15:21", "remaining_time": "3:45:42"} +{"current_steps": 5964, "total_steps": 8680, "loss": 0.7259113788604736, "lr": 4.895884879622215e-07, "epoch": 1.3741935483870968, "percentage": 68.71, "elapsed_time": "8:15:27", "remaining_time": "3:45:37"} +{"current_steps": 5965, "total_steps": 8680, "loss": 0.8871402144432068, "lr": 4.892609054817883e-07, "epoch": 1.3744239631336406, "percentage": 68.72, "elapsed_time": "8:15:32", "remaining_time": "3:45:32"} +{"current_steps": 5966, "total_steps": 8680, "loss": 0.7564518451690674, "lr": 4.889333971344586e-07, "epoch": 1.3746543778801843, "percentage": 68.73, "elapsed_time": "8:15:36", "remaining_time": "3:45:27"} +{"current_steps": 5967, "total_steps": 8680, "loss": 0.7886015176773071, "lr": 4.886059629677692e-07, "epoch": 1.374884792626728, "percentage": 68.74, "elapsed_time": "8:15:42", "remaining_time": "3:45:22"} +{"current_steps": 5968, "total_steps": 8680, "loss": 0.8256035447120667, "lr": 4.882786030292479e-07, "epoch": 1.375115207373272, "percentage": 68.76, "elapsed_time": "8:15:48", "remaining_time": "3:45:18"} +{"current_steps": 5969, "total_steps": 8680, "loss": 0.9351227283477783, "lr": 4.879513173664099e-07, "epoch": 1.3753456221198157, "percentage": 68.77, "elapsed_time": "8:15:53", "remaining_time": "3:45:13"} +{"current_steps": 5970, "total_steps": 8680, "loss": 0.7221553921699524, "lr": 4.876241060267598e-07, "epoch": 1.3755760368663594, "percentage": 68.78, "elapsed_time": "8:15:57", "remaining_time": "3:45:07"} +{"current_steps": 5971, "total_steps": 8680, "loss": 0.7451514005661011, "lr": 4.872969690577928e-07, "epoch": 1.3758064516129032, "percentage": 68.79, "elapsed_time": "8:16:02", "remaining_time": "3:45:02"} +{"current_steps": 5972, "total_steps": 8680, "loss": 0.810903787612915, "lr": 4.86969906506992e-07, "epoch": 1.3760368663594469, "percentage": 68.8, "elapsed_time": "8:16:06", "remaining_time": "3:44:57"} +{"current_steps": 5973, "total_steps": 8680, "loss": 0.6279938817024231, "lr": 4.866429184218298e-07, "epoch": 1.3762672811059908, "percentage": 68.81, "elapsed_time": "8:16:13", "remaining_time": "3:44:53"} +{"current_steps": 5974, "total_steps": 8680, "loss": 0.7742956876754761, "lr": 4.863160048497688e-07, "epoch": 1.3764976958525346, "percentage": 68.82, "elapsed_time": "8:16:18", "remaining_time": "3:44:48"} +{"current_steps": 5975, "total_steps": 8680, "loss": 0.7423844933509827, "lr": 4.859891658382597e-07, "epoch": 1.3767281105990783, "percentage": 68.84, "elapsed_time": "8:16:23", "remaining_time": "3:44:43"} +{"current_steps": 5976, "total_steps": 8680, "loss": 0.8387676477432251, "lr": 4.856624014347426e-07, "epoch": 1.3769585253456222, "percentage": 68.85, "elapsed_time": "8:16:28", "remaining_time": "3:44:38"} +{"current_steps": 5977, "total_steps": 8680, "loss": 0.7959855794906616, "lr": 4.853357116866471e-07, "epoch": 1.377188940092166, "percentage": 68.86, "elapsed_time": "8:16:33", "remaining_time": "3:44:33"} +{"current_steps": 5978, "total_steps": 8680, "loss": 0.7086259722709656, "lr": 4.850090966413913e-07, "epoch": 1.3774193548387097, "percentage": 68.87, "elapsed_time": "8:16:37", "remaining_time": "3:44:28"} +{"current_steps": 5979, "total_steps": 8680, "loss": 0.7219396829605103, "lr": 4.846825563463838e-07, "epoch": 1.3776497695852534, "percentage": 68.88, "elapsed_time": "8:16:43", "remaining_time": "3:44:23"} +{"current_steps": 5980, "total_steps": 8680, "loss": 0.8383582830429077, "lr": 4.84356090849021e-07, "epoch": 1.3778801843317972, "percentage": 68.89, "elapsed_time": "8:16:48", "remaining_time": "3:44:18"} +{"current_steps": 5981, "total_steps": 8680, "loss": 0.7624244689941406, "lr": 4.840297001966887e-07, "epoch": 1.378110599078341, "percentage": 68.91, "elapsed_time": "8:16:53", "remaining_time": "3:44:13"} +{"current_steps": 5982, "total_steps": 8680, "loss": 0.7901623249053955, "lr": 4.837033844367626e-07, "epoch": 1.3783410138248848, "percentage": 68.92, "elapsed_time": "8:16:58", "remaining_time": "3:44:08"} +{"current_steps": 5983, "total_steps": 8680, "loss": 0.7732094526290894, "lr": 4.833771436166068e-07, "epoch": 1.3785714285714286, "percentage": 68.93, "elapsed_time": "8:17:03", "remaining_time": "3:44:03"} +{"current_steps": 5984, "total_steps": 8680, "loss": 0.7882228493690491, "lr": 4.830509777835744e-07, "epoch": 1.3788018433179723, "percentage": 68.94, "elapsed_time": "8:17:09", "remaining_time": "3:43:59"} +{"current_steps": 5985, "total_steps": 8680, "loss": 0.8601159453392029, "lr": 4.827248869850086e-07, "epoch": 1.379032258064516, "percentage": 68.95, "elapsed_time": "8:17:12", "remaining_time": "3:43:53"} +{"current_steps": 5986, "total_steps": 8680, "loss": 0.8828538656234741, "lr": 4.823988712682406e-07, "epoch": 1.37926267281106, "percentage": 68.96, "elapsed_time": "8:17:18", "remaining_time": "3:43:48"} +{"current_steps": 5987, "total_steps": 8680, "loss": 0.8586058020591736, "lr": 4.820729306805907e-07, "epoch": 1.3794930875576037, "percentage": 68.97, "elapsed_time": "8:17:23", "remaining_time": "3:43:44"} +{"current_steps": 5988, "total_steps": 8680, "loss": 0.8276243209838867, "lr": 4.8174706526937e-07, "epoch": 1.3797235023041474, "percentage": 68.99, "elapsed_time": "8:17:28", "remaining_time": "3:43:38"} +{"current_steps": 5989, "total_steps": 8680, "loss": 0.837665855884552, "lr": 4.814212750818764e-07, "epoch": 1.3799539170506914, "percentage": 69.0, "elapsed_time": "8:17:33", "remaining_time": "3:43:33"} +{"current_steps": 5990, "total_steps": 8680, "loss": 0.7493194341659546, "lr": 4.810955601653978e-07, "epoch": 1.380184331797235, "percentage": 69.01, "elapsed_time": "8:17:37", "remaining_time": "3:43:28"} +{"current_steps": 5991, "total_steps": 8680, "loss": 0.8382525444030762, "lr": 4.807699205672123e-07, "epoch": 1.3804147465437788, "percentage": 69.02, "elapsed_time": "8:17:43", "remaining_time": "3:43:23"} +{"current_steps": 5992, "total_steps": 8680, "loss": 0.8152645826339722, "lr": 4.804443563345854e-07, "epoch": 1.3806451612903226, "percentage": 69.03, "elapsed_time": "8:17:47", "remaining_time": "3:43:18"} +{"current_steps": 5993, "total_steps": 8680, "loss": 0.7168164849281311, "lr": 4.801188675147719e-07, "epoch": 1.3808755760368663, "percentage": 69.04, "elapsed_time": "8:17:52", "remaining_time": "3:43:13"} +{"current_steps": 5994, "total_steps": 8680, "loss": 0.883512556552887, "lr": 4.79793454155017e-07, "epoch": 1.3811059907834102, "percentage": 69.06, "elapsed_time": "8:17:56", "remaining_time": "3:43:08"} +{"current_steps": 5995, "total_steps": 8680, "loss": 0.7258438467979431, "lr": 4.794681163025536e-07, "epoch": 1.381336405529954, "percentage": 69.07, "elapsed_time": "8:18:02", "remaining_time": "3:43:03"} +{"current_steps": 5996, "total_steps": 8680, "loss": 0.8408991098403931, "lr": 4.79142854004604e-07, "epoch": 1.3815668202764977, "percentage": 69.08, "elapsed_time": "8:18:06", "remaining_time": "3:42:58"} +{"current_steps": 5997, "total_steps": 8680, "loss": 0.6506227254867554, "lr": 4.788176673083796e-07, "epoch": 1.3817972350230414, "percentage": 69.09, "elapsed_time": "8:18:12", "remaining_time": "3:42:53"} +{"current_steps": 5998, "total_steps": 8680, "loss": 0.6971127986907959, "lr": 4.784925562610809e-07, "epoch": 1.3820276497695851, "percentage": 69.1, "elapsed_time": "8:18:16", "remaining_time": "3:42:48"} +{"current_steps": 5999, "total_steps": 8680, "loss": 0.8399784564971924, "lr": 4.781675209098967e-07, "epoch": 1.382258064516129, "percentage": 69.11, "elapsed_time": "8:18:21", "remaining_time": "3:42:43"} +{"current_steps": 6000, "total_steps": 8680, "loss": 0.6451772451400757, "lr": 4.778425613020067e-07, "epoch": 1.3824884792626728, "percentage": 69.12, "elapsed_time": "8:18:26", "remaining_time": "3:42:38"} +{"current_steps": 6001, "total_steps": 8680, "loss": 0.7794390916824341, "lr": 4.775176774845774e-07, "epoch": 1.3827188940092165, "percentage": 69.14, "elapsed_time": "8:18:36", "remaining_time": "3:42:35"} +{"current_steps": 6002, "total_steps": 8680, "loss": 0.7743663191795349, "lr": 4.771928695047652e-07, "epoch": 1.3829493087557605, "percentage": 69.15, "elapsed_time": "8:18:40", "remaining_time": "3:42:30"} +{"current_steps": 6003, "total_steps": 8680, "loss": 0.7654878497123718, "lr": 4.768681374097165e-07, "epoch": 1.3831797235023042, "percentage": 69.16, "elapsed_time": "8:18:44", "remaining_time": "3:42:24"} +{"current_steps": 6004, "total_steps": 8680, "loss": 0.634769082069397, "lr": 4.765434812465645e-07, "epoch": 1.383410138248848, "percentage": 69.17, "elapsed_time": "8:18:50", "remaining_time": "3:42:20"} +{"current_steps": 6005, "total_steps": 8680, "loss": 0.7941944599151611, "lr": 4.762189010624337e-07, "epoch": 1.3836405529953917, "percentage": 69.18, "elapsed_time": "8:18:54", "remaining_time": "3:42:14"} +{"current_steps": 6006, "total_steps": 8680, "loss": 0.7437179088592529, "lr": 4.75894396904436e-07, "epoch": 1.3838709677419354, "percentage": 69.19, "elapsed_time": "8:18:59", "remaining_time": "3:42:09"} +{"current_steps": 6007, "total_steps": 8680, "loss": 0.7854535579681396, "lr": 4.7556996881967236e-07, "epoch": 1.3841013824884794, "percentage": 69.21, "elapsed_time": "8:19:03", "remaining_time": "3:42:04"} +{"current_steps": 6008, "total_steps": 8680, "loss": 0.7506910562515259, "lr": 4.752456168552339e-07, "epoch": 1.384331797235023, "percentage": 69.22, "elapsed_time": "8:19:08", "remaining_time": "3:41:59"} +{"current_steps": 6009, "total_steps": 8680, "loss": 0.8967334032058716, "lr": 4.749213410581995e-07, "epoch": 1.3845622119815668, "percentage": 69.23, "elapsed_time": "8:19:12", "remaining_time": "3:41:54"} +{"current_steps": 6010, "total_steps": 8680, "loss": 0.7053096294403076, "lr": 4.7459714147563677e-07, "epoch": 1.3847926267281105, "percentage": 69.24, "elapsed_time": "8:19:19", "remaining_time": "3:41:49"} +{"current_steps": 6011, "total_steps": 8680, "loss": 0.8759415149688721, "lr": 4.7427301815460396e-07, "epoch": 1.3850230414746543, "percentage": 69.25, "elapsed_time": "8:19:23", "remaining_time": "3:41:44"} +{"current_steps": 6012, "total_steps": 8680, "loss": 0.8827483654022217, "lr": 4.739489711421466e-07, "epoch": 1.3852534562211982, "percentage": 69.26, "elapsed_time": "8:19:29", "remaining_time": "3:41:39"} +{"current_steps": 6013, "total_steps": 8680, "loss": 0.7268258929252625, "lr": 4.736250004852993e-07, "epoch": 1.385483870967742, "percentage": 69.27, "elapsed_time": "8:19:33", "remaining_time": "3:41:34"} +{"current_steps": 6014, "total_steps": 8680, "loss": 0.7142586708068848, "lr": 4.7330110623108665e-07, "epoch": 1.3857142857142857, "percentage": 69.29, "elapsed_time": "8:19:37", "remaining_time": "3:41:28"} +{"current_steps": 6015, "total_steps": 8680, "loss": 0.7123303413391113, "lr": 4.7297728842652116e-07, "epoch": 1.3859447004608296, "percentage": 69.3, "elapsed_time": "8:19:41", "remaining_time": "3:41:23"} +{"current_steps": 6016, "total_steps": 8680, "loss": 0.7548067569732666, "lr": 4.726535471186047e-07, "epoch": 1.3861751152073734, "percentage": 69.31, "elapsed_time": "8:19:46", "remaining_time": "3:41:18"} +{"current_steps": 6017, "total_steps": 8680, "loss": 0.7792191505432129, "lr": 4.723298823543277e-07, "epoch": 1.386405529953917, "percentage": 69.32, "elapsed_time": "8:19:52", "remaining_time": "3:41:13"} +{"current_steps": 6018, "total_steps": 8680, "loss": 0.8658785820007324, "lr": 4.7200629418066975e-07, "epoch": 1.3866359447004608, "percentage": 69.33, "elapsed_time": "8:19:56", "remaining_time": "3:41:08"} +{"current_steps": 6019, "total_steps": 8680, "loss": 0.7173904776573181, "lr": 4.716827826445987e-07, "epoch": 1.3868663594470045, "percentage": 69.34, "elapsed_time": "8:20:00", "remaining_time": "3:41:03"} +{"current_steps": 6020, "total_steps": 8680, "loss": 0.6675543785095215, "lr": 4.7135934779307284e-07, "epoch": 1.3870967741935485, "percentage": 69.35, "elapsed_time": "8:20:06", "remaining_time": "3:40:58"} +{"current_steps": 6021, "total_steps": 8680, "loss": 0.8164724111557007, "lr": 4.710359896730378e-07, "epoch": 1.3873271889400922, "percentage": 69.37, "elapsed_time": "8:20:12", "remaining_time": "3:40:54"} +{"current_steps": 6022, "total_steps": 8680, "loss": 0.8354332447052002, "lr": 4.707127083314283e-07, "epoch": 1.387557603686636, "percentage": 69.38, "elapsed_time": "8:20:16", "remaining_time": "3:40:48"} +{"current_steps": 6023, "total_steps": 8680, "loss": 0.8414663672447205, "lr": 4.7038950381516885e-07, "epoch": 1.3877880184331797, "percentage": 69.39, "elapsed_time": "8:20:21", "remaining_time": "3:40:43"} +{"current_steps": 6024, "total_steps": 8680, "loss": 0.7693418264389038, "lr": 4.700663761711717e-07, "epoch": 1.3880184331797234, "percentage": 69.4, "elapsed_time": "8:20:27", "remaining_time": "3:40:39"} +{"current_steps": 6025, "total_steps": 8680, "loss": 0.7809267044067383, "lr": 4.697433254463382e-07, "epoch": 1.3882488479262673, "percentage": 69.41, "elapsed_time": "8:20:33", "remaining_time": "3:40:34"} +{"current_steps": 6026, "total_steps": 8680, "loss": 0.7455927133560181, "lr": 4.6942035168755944e-07, "epoch": 1.388479262672811, "percentage": 69.42, "elapsed_time": "8:20:38", "remaining_time": "3:40:29"} +{"current_steps": 6027, "total_steps": 8680, "loss": 0.8217881917953491, "lr": 4.6909745494171383e-07, "epoch": 1.3887096774193548, "percentage": 69.44, "elapsed_time": "8:20:43", "remaining_time": "3:40:24"} +{"current_steps": 6028, "total_steps": 8680, "loss": 0.8138882517814636, "lr": 4.687746352556703e-07, "epoch": 1.3889400921658988, "percentage": 69.45, "elapsed_time": "8:20:48", "remaining_time": "3:40:19"} +{"current_steps": 6029, "total_steps": 8680, "loss": 0.8926469087600708, "lr": 4.6845189267628505e-07, "epoch": 1.3891705069124423, "percentage": 69.46, "elapsed_time": "8:20:52", "remaining_time": "3:40:14"} +{"current_steps": 6030, "total_steps": 8680, "loss": 0.797023355960846, "lr": 4.681292272504036e-07, "epoch": 1.3894009216589862, "percentage": 69.47, "elapsed_time": "8:20:57", "remaining_time": "3:40:09"} +{"current_steps": 6031, "total_steps": 8680, "loss": 0.6767498254776001, "lr": 4.6780663902486104e-07, "epoch": 1.38963133640553, "percentage": 69.48, "elapsed_time": "8:21:02", "remaining_time": "3:40:04"} +{"current_steps": 6032, "total_steps": 8680, "loss": 0.7514280080795288, "lr": 4.674841280464804e-07, "epoch": 1.3898617511520737, "percentage": 69.49, "elapsed_time": "8:21:07", "remaining_time": "3:39:59"} +{"current_steps": 6033, "total_steps": 8680, "loss": 0.8879726529121399, "lr": 4.671616943620731e-07, "epoch": 1.3900921658986176, "percentage": 69.5, "elapsed_time": "8:21:12", "remaining_time": "3:39:54"} +{"current_steps": 6034, "total_steps": 8680, "loss": 0.6674140095710754, "lr": 4.66839338018441e-07, "epoch": 1.3903225806451613, "percentage": 69.52, "elapsed_time": "8:21:19", "remaining_time": "3:39:50"} +{"current_steps": 6035, "total_steps": 8680, "loss": 0.9094855785369873, "lr": 4.6651705906237307e-07, "epoch": 1.390552995391705, "percentage": 69.53, "elapsed_time": "8:21:23", "remaining_time": "3:39:44"} +{"current_steps": 6036, "total_steps": 8680, "loss": 0.8334506750106812, "lr": 4.661948575406478e-07, "epoch": 1.3907834101382488, "percentage": 69.54, "elapsed_time": "8:21:28", "remaining_time": "3:39:39"} +{"current_steps": 6037, "total_steps": 8680, "loss": 0.6545997858047485, "lr": 4.658727335000323e-07, "epoch": 1.3910138248847925, "percentage": 69.55, "elapsed_time": "8:21:35", "remaining_time": "3:39:35"} +{"current_steps": 6038, "total_steps": 8680, "loss": 0.7810590267181396, "lr": 4.6555068698728237e-07, "epoch": 1.3912442396313365, "percentage": 69.56, "elapsed_time": "8:21:39", "remaining_time": "3:39:30"} +{"current_steps": 6039, "total_steps": 8680, "loss": 0.7581864595413208, "lr": 4.652287180491424e-07, "epoch": 1.3914746543778802, "percentage": 69.57, "elapsed_time": "8:21:44", "remaining_time": "3:39:25"} +{"current_steps": 6040, "total_steps": 8680, "loss": 0.7134817242622375, "lr": 4.649068267323465e-07, "epoch": 1.391705069124424, "percentage": 69.59, "elapsed_time": "8:21:50", "remaining_time": "3:39:20"} +{"current_steps": 6041, "total_steps": 8680, "loss": 0.7050445079803467, "lr": 4.645850130836162e-07, "epoch": 1.3919354838709677, "percentage": 69.6, "elapsed_time": "8:21:57", "remaining_time": "3:39:16"} +{"current_steps": 6042, "total_steps": 8680, "loss": 0.8510535955429077, "lr": 4.642632771496622e-07, "epoch": 1.3921658986175114, "percentage": 69.61, "elapsed_time": "8:22:02", "remaining_time": "3:39:11"} +{"current_steps": 6043, "total_steps": 8680, "loss": 0.8627035617828369, "lr": 4.6394161897718454e-07, "epoch": 1.3923963133640553, "percentage": 69.62, "elapsed_time": "8:22:08", "remaining_time": "3:39:07"} +{"current_steps": 6044, "total_steps": 8680, "loss": 0.89891517162323, "lr": 4.6362003861287127e-07, "epoch": 1.392626728110599, "percentage": 69.63, "elapsed_time": "8:22:13", "remaining_time": "3:39:02"} +{"current_steps": 6045, "total_steps": 8680, "loss": 0.7267141342163086, "lr": 4.6329853610339896e-07, "epoch": 1.3928571428571428, "percentage": 69.64, "elapsed_time": "8:22:18", "remaining_time": "3:38:57"} +{"current_steps": 6046, "total_steps": 8680, "loss": 0.8021189570426941, "lr": 4.6297711149543405e-07, "epoch": 1.3930875576036867, "percentage": 69.65, "elapsed_time": "8:22:23", "remaining_time": "3:38:52"} +{"current_steps": 6047, "total_steps": 8680, "loss": 0.7836861610412598, "lr": 4.6265576483563054e-07, "epoch": 1.3933179723502305, "percentage": 69.67, "elapsed_time": "8:22:28", "remaining_time": "3:38:47"} +{"current_steps": 6048, "total_steps": 8680, "loss": 0.816940188407898, "lr": 4.623344961706309e-07, "epoch": 1.3935483870967742, "percentage": 69.68, "elapsed_time": "8:22:34", "remaining_time": "3:38:42"} +{"current_steps": 6049, "total_steps": 8680, "loss": 0.77923583984375, "lr": 4.6201330554706773e-07, "epoch": 1.393778801843318, "percentage": 69.69, "elapsed_time": "8:22:41", "remaining_time": "3:38:38"} +{"current_steps": 6050, "total_steps": 8680, "loss": 0.8017981052398682, "lr": 4.6169219301156117e-07, "epoch": 1.3940092165898617, "percentage": 69.7, "elapsed_time": "8:22:46", "remaining_time": "3:38:33"} +{"current_steps": 6051, "total_steps": 8680, "loss": 0.6786847114562988, "lr": 4.6137115861071973e-07, "epoch": 1.3942396313364056, "percentage": 69.71, "elapsed_time": "8:22:52", "remaining_time": "3:38:28"} +{"current_steps": 6052, "total_steps": 8680, "loss": 0.7802412509918213, "lr": 4.61050202391142e-07, "epoch": 1.3944700460829493, "percentage": 69.72, "elapsed_time": "8:22:57", "remaining_time": "3:38:24"} +{"current_steps": 6053, "total_steps": 8680, "loss": 0.7434886693954468, "lr": 4.6072932439941347e-07, "epoch": 1.394700460829493, "percentage": 69.74, "elapsed_time": "8:23:03", "remaining_time": "3:38:19"} +{"current_steps": 6054, "total_steps": 8680, "loss": 0.7590811252593994, "lr": 4.6040852468211e-07, "epoch": 1.3949308755760368, "percentage": 69.75, "elapsed_time": "8:23:08", "remaining_time": "3:38:14"} +{"current_steps": 6055, "total_steps": 8680, "loss": 0.8952670097351074, "lr": 4.600878032857949e-07, "epoch": 1.3951612903225805, "percentage": 69.76, "elapsed_time": "8:23:12", "remaining_time": "3:38:09"} +{"current_steps": 6056, "total_steps": 8680, "loss": 0.8055328130722046, "lr": 4.5976716025702036e-07, "epoch": 1.3953917050691245, "percentage": 69.77, "elapsed_time": "8:23:18", "remaining_time": "3:38:04"} +{"current_steps": 6057, "total_steps": 8680, "loss": 0.8919316530227661, "lr": 4.5944659564232725e-07, "epoch": 1.3956221198156682, "percentage": 69.78, "elapsed_time": "8:23:24", "remaining_time": "3:38:00"} +{"current_steps": 6058, "total_steps": 8680, "loss": 0.701945960521698, "lr": 4.591261094882453e-07, "epoch": 1.395852534562212, "percentage": 69.79, "elapsed_time": "8:23:29", "remaining_time": "3:37:55"} +{"current_steps": 6059, "total_steps": 8680, "loss": 0.7457436323165894, "lr": 4.5880570184129206e-07, "epoch": 1.3960829493087559, "percentage": 69.8, "elapsed_time": "8:23:35", "remaining_time": "3:37:50"} +{"current_steps": 6060, "total_steps": 8680, "loss": 0.8093513250350952, "lr": 4.5848537274797527e-07, "epoch": 1.3963133640552996, "percentage": 69.82, "elapsed_time": "8:23:41", "remaining_time": "3:37:45"} +{"current_steps": 6061, "total_steps": 8680, "loss": 0.7098822593688965, "lr": 4.5816512225478965e-07, "epoch": 1.3965437788018433, "percentage": 69.83, "elapsed_time": "8:23:47", "remaining_time": "3:37:41"} +{"current_steps": 6062, "total_steps": 8680, "loss": 0.7423167824745178, "lr": 4.578449504082189e-07, "epoch": 1.396774193548387, "percentage": 69.84, "elapsed_time": "8:23:53", "remaining_time": "3:37:36"} +{"current_steps": 6063, "total_steps": 8680, "loss": 0.8730076551437378, "lr": 4.5752485725473624e-07, "epoch": 1.3970046082949308, "percentage": 69.85, "elapsed_time": "8:23:58", "remaining_time": "3:37:31"} +{"current_steps": 6064, "total_steps": 8680, "loss": 0.6914420127868652, "lr": 4.572048428408024e-07, "epoch": 1.3972350230414747, "percentage": 69.86, "elapsed_time": "8:24:05", "remaining_time": "3:37:27"} +{"current_steps": 6065, "total_steps": 8680, "loss": 0.8051402568817139, "lr": 4.5688490721286664e-07, "epoch": 1.3974654377880185, "percentage": 69.87, "elapsed_time": "8:24:12", "remaining_time": "3:37:23"} +{"current_steps": 6066, "total_steps": 8680, "loss": 0.9185452461242676, "lr": 4.5656505041736803e-07, "epoch": 1.3976958525345622, "percentage": 69.88, "elapsed_time": "8:24:19", "remaining_time": "3:37:19"} +{"current_steps": 6067, "total_steps": 8680, "loss": 0.766645073890686, "lr": 4.5624527250073287e-07, "epoch": 1.397926267281106, "percentage": 69.9, "elapsed_time": "8:24:25", "remaining_time": "3:37:15"} +{"current_steps": 6068, "total_steps": 8680, "loss": 0.8005224466323853, "lr": 4.559255735093763e-07, "epoch": 1.3981566820276496, "percentage": 69.91, "elapsed_time": "8:24:31", "remaining_time": "3:37:10"} +{"current_steps": 6069, "total_steps": 8680, "loss": 0.8072810173034668, "lr": 4.5560595348970275e-07, "epoch": 1.3983870967741936, "percentage": 69.92, "elapsed_time": "8:24:38", "remaining_time": "3:37:06"} +{"current_steps": 6070, "total_steps": 8680, "loss": 0.7537474632263184, "lr": 4.552864124881045e-07, "epoch": 1.3986175115207373, "percentage": 69.93, "elapsed_time": "8:24:44", "remaining_time": "3:37:01"} +{"current_steps": 6071, "total_steps": 8680, "loss": 0.8396750092506409, "lr": 4.549669505509619e-07, "epoch": 1.398847926267281, "percentage": 69.94, "elapsed_time": "8:24:49", "remaining_time": "3:36:56"} +{"current_steps": 6072, "total_steps": 8680, "loss": 0.8456804752349854, "lr": 4.546475677246453e-07, "epoch": 1.399078341013825, "percentage": 69.95, "elapsed_time": "8:24:54", "remaining_time": "3:36:51"} +{"current_steps": 6073, "total_steps": 8680, "loss": 0.6150076389312744, "lr": 4.543282640555123e-07, "epoch": 1.3993087557603687, "percentage": 69.97, "elapsed_time": "8:25:01", "remaining_time": "3:36:47"} +{"current_steps": 6074, "total_steps": 8680, "loss": 0.667172908782959, "lr": 4.540090395899089e-07, "epoch": 1.3995391705069125, "percentage": 69.98, "elapsed_time": "8:25:08", "remaining_time": "3:36:43"} +{"current_steps": 6075, "total_steps": 8680, "loss": 0.7918317914009094, "lr": 4.5368989437417116e-07, "epoch": 1.3997695852534562, "percentage": 69.99, "elapsed_time": "8:25:15", "remaining_time": "3:36:39"} +{"current_steps": 6076, "total_steps": 8680, "loss": 0.6800580024719238, "lr": 4.5337082845462193e-07, "epoch": 1.4, "percentage": 70.0, "elapsed_time": "8:25:20", "remaining_time": "3:36:34"} +{"current_steps": 6077, "total_steps": 8680, "loss": 0.9205034971237183, "lr": 4.530518418775733e-07, "epoch": 1.4002304147465439, "percentage": 70.01, "elapsed_time": "8:25:24", "remaining_time": "3:36:29"} +{"current_steps": 6078, "total_steps": 8680, "loss": 0.7228822708129883, "lr": 4.5273293468932585e-07, "epoch": 1.4004608294930876, "percentage": 70.02, "elapsed_time": "8:25:30", "remaining_time": "3:36:24"} +{"current_steps": 6079, "total_steps": 8680, "loss": 0.6827987432479858, "lr": 4.524141069361679e-07, "epoch": 1.4006912442396313, "percentage": 70.03, "elapsed_time": "8:25:36", "remaining_time": "3:36:19"} +{"current_steps": 6080, "total_steps": 8680, "loss": 0.6272581815719604, "lr": 4.520953586643779e-07, "epoch": 1.400921658986175, "percentage": 70.05, "elapsed_time": "8:25:41", "remaining_time": "3:36:14"} +{"current_steps": 6081, "total_steps": 8680, "loss": 0.8041881322860718, "lr": 4.5177668992022125e-07, "epoch": 1.4011520737327188, "percentage": 70.06, "elapsed_time": "8:25:44", "remaining_time": "3:36:09"} +{"current_steps": 6082, "total_steps": 8680, "loss": 0.7284958362579346, "lr": 4.5145810074995194e-07, "epoch": 1.4013824884792627, "percentage": 70.07, "elapsed_time": "8:25:50", "remaining_time": "3:36:04"} +{"current_steps": 6083, "total_steps": 8680, "loss": 0.7653781175613403, "lr": 4.511395911998135e-07, "epoch": 1.4016129032258065, "percentage": 70.08, "elapsed_time": "8:25:54", "remaining_time": "3:35:59"} +{"current_steps": 6084, "total_steps": 8680, "loss": 0.8037170171737671, "lr": 4.5082116131603677e-07, "epoch": 1.4018433179723502, "percentage": 70.09, "elapsed_time": "8:26:01", "remaining_time": "3:35:55"} +{"current_steps": 6085, "total_steps": 8680, "loss": 0.783043384552002, "lr": 4.505028111448411e-07, "epoch": 1.4020737327188941, "percentage": 70.1, "elapsed_time": "8:26:05", "remaining_time": "3:35:49"} +{"current_steps": 6086, "total_steps": 8680, "loss": 0.6712161302566528, "lr": 4.501845407324354e-07, "epoch": 1.4023041474654379, "percentage": 70.12, "elapsed_time": "8:26:10", "remaining_time": "3:35:44"} +{"current_steps": 6087, "total_steps": 8680, "loss": 0.9537261724472046, "lr": 4.4986635012501575e-07, "epoch": 1.4025345622119816, "percentage": 70.13, "elapsed_time": "8:26:14", "remaining_time": "3:35:39"} +{"current_steps": 6088, "total_steps": 8680, "loss": 0.8984304666519165, "lr": 4.495482393687666e-07, "epoch": 1.4027649769585253, "percentage": 70.14, "elapsed_time": "8:26:18", "remaining_time": "3:35:33"} +{"current_steps": 6089, "total_steps": 8680, "loss": 0.6894555687904358, "lr": 4.4923020850986224e-07, "epoch": 1.402995391705069, "percentage": 70.15, "elapsed_time": "8:26:24", "remaining_time": "3:35:29"} +{"current_steps": 6090, "total_steps": 8680, "loss": 0.685502290725708, "lr": 4.489122575944639e-07, "epoch": 1.403225806451613, "percentage": 70.16, "elapsed_time": "8:26:28", "remaining_time": "3:35:23"} +{"current_steps": 6091, "total_steps": 8680, "loss": 0.6794239282608032, "lr": 4.485943866687216e-07, "epoch": 1.4034562211981567, "percentage": 70.17, "elapsed_time": "8:26:33", "remaining_time": "3:35:18"} +{"current_steps": 6092, "total_steps": 8680, "loss": 0.7647888660430908, "lr": 4.482765957787744e-07, "epoch": 1.4036866359447004, "percentage": 70.18, "elapsed_time": "8:26:39", "remaining_time": "3:35:14"} +{"current_steps": 6093, "total_steps": 8680, "loss": 0.798794150352478, "lr": 4.4795888497074896e-07, "epoch": 1.4039170506912442, "percentage": 70.2, "elapsed_time": "8:26:43", "remaining_time": "3:35:08"} +{"current_steps": 6094, "total_steps": 8680, "loss": 0.79430091381073, "lr": 4.4764125429076026e-07, "epoch": 1.404147465437788, "percentage": 70.21, "elapsed_time": "8:26:48", "remaining_time": "3:35:04"} +{"current_steps": 6095, "total_steps": 8680, "loss": 0.9089795351028442, "lr": 4.4732370378491255e-07, "epoch": 1.4043778801843319, "percentage": 70.22, "elapsed_time": "8:26:54", "remaining_time": "3:34:59"} +{"current_steps": 6096, "total_steps": 8680, "loss": 0.8270718455314636, "lr": 4.4700623349929757e-07, "epoch": 1.4046082949308756, "percentage": 70.23, "elapsed_time": "8:27:00", "remaining_time": "3:34:54"} +{"current_steps": 6097, "total_steps": 8680, "loss": 0.7550361156463623, "lr": 4.466888434799958e-07, "epoch": 1.4048387096774193, "percentage": 70.24, "elapsed_time": "8:27:05", "remaining_time": "3:34:49"} +{"current_steps": 6098, "total_steps": 8680, "loss": 0.7406442165374756, "lr": 4.463715337730759e-07, "epoch": 1.4050691244239633, "percentage": 70.25, "elapsed_time": "8:27:10", "remaining_time": "3:34:44"} +{"current_steps": 6099, "total_steps": 8680, "loss": 0.830552875995636, "lr": 4.460543044245949e-07, "epoch": 1.405299539170507, "percentage": 70.26, "elapsed_time": "8:27:15", "remaining_time": "3:34:39"} +{"current_steps": 6100, "total_steps": 8680, "loss": 0.8961822390556335, "lr": 4.45737155480598e-07, "epoch": 1.4055299539170507, "percentage": 70.28, "elapsed_time": "8:27:20", "remaining_time": "3:34:35"} +{"current_steps": 6101, "total_steps": 8680, "loss": 0.6307489275932312, "lr": 4.454200869871195e-07, "epoch": 1.4057603686635944, "percentage": 70.29, "elapsed_time": "8:27:27", "remaining_time": "3:34:30"} +{"current_steps": 6102, "total_steps": 8680, "loss": 0.8682084083557129, "lr": 4.451030989901808e-07, "epoch": 1.4059907834101382, "percentage": 70.3, "elapsed_time": "8:27:31", "remaining_time": "3:34:25"} +{"current_steps": 6103, "total_steps": 8680, "loss": 0.7157681584358215, "lr": 4.4478619153579323e-07, "epoch": 1.4062211981566821, "percentage": 70.31, "elapsed_time": "8:27:37", "remaining_time": "3:34:20"} +{"current_steps": 6104, "total_steps": 8680, "loss": 0.7267071008682251, "lr": 4.4446936466995486e-07, "epoch": 1.4064516129032258, "percentage": 70.32, "elapsed_time": "8:27:42", "remaining_time": "3:34:15"} +{"current_steps": 6105, "total_steps": 8680, "loss": 0.8435063362121582, "lr": 4.4415261843865246e-07, "epoch": 1.4066820276497696, "percentage": 70.33, "elapsed_time": "8:27:46", "remaining_time": "3:34:10"} +{"current_steps": 6106, "total_steps": 8680, "loss": 0.8895175457000732, "lr": 4.43835952887862e-07, "epoch": 1.4069124423963133, "percentage": 70.35, "elapsed_time": "8:27:51", "remaining_time": "3:34:05"} +{"current_steps": 6107, "total_steps": 8680, "loss": 0.7470073699951172, "lr": 4.435193680635467e-07, "epoch": 1.407142857142857, "percentage": 70.36, "elapsed_time": "8:27:56", "remaining_time": "3:34:00"} +{"current_steps": 6108, "total_steps": 8680, "loss": 0.7993630170822144, "lr": 4.432028640116581e-07, "epoch": 1.407373271889401, "percentage": 70.37, "elapsed_time": "8:28:00", "remaining_time": "3:33:55"} +{"current_steps": 6109, "total_steps": 8680, "loss": 0.823069155216217, "lr": 4.4288644077813695e-07, "epoch": 1.4076036866359447, "percentage": 70.38, "elapsed_time": "8:28:05", "remaining_time": "3:33:49"} +{"current_steps": 6110, "total_steps": 8680, "loss": 0.7665367126464844, "lr": 4.4257009840891146e-07, "epoch": 1.4078341013824884, "percentage": 70.39, "elapsed_time": "8:28:09", "remaining_time": "3:33:44"} +{"current_steps": 6111, "total_steps": 8680, "loss": 0.7173991799354553, "lr": 4.422538369498979e-07, "epoch": 1.4080645161290324, "percentage": 70.4, "elapsed_time": "8:28:15", "remaining_time": "3:33:39"} +{"current_steps": 6112, "total_steps": 8680, "loss": 0.8288347125053406, "lr": 4.4193765644700186e-07, "epoch": 1.4082949308755761, "percentage": 70.41, "elapsed_time": "8:28:20", "remaining_time": "3:33:34"} +{"current_steps": 6113, "total_steps": 8680, "loss": 0.8589911460876465, "lr": 4.4162155694611636e-07, "epoch": 1.4085253456221198, "percentage": 70.43, "elapsed_time": "8:28:26", "remaining_time": "3:33:30"} +{"current_steps": 6114, "total_steps": 8680, "loss": 0.8783868551254272, "lr": 4.4130553849312213e-07, "epoch": 1.4087557603686636, "percentage": 70.44, "elapsed_time": "8:28:31", "remaining_time": "3:33:25"} +{"current_steps": 6115, "total_steps": 8680, "loss": 0.7625287771224976, "lr": 4.409896011338898e-07, "epoch": 1.4089861751152073, "percentage": 70.45, "elapsed_time": "8:28:36", "remaining_time": "3:33:20"} +{"current_steps": 6116, "total_steps": 8680, "loss": 0.7412571907043457, "lr": 4.406737449142769e-07, "epoch": 1.4092165898617512, "percentage": 70.46, "elapsed_time": "8:28:40", "remaining_time": "3:33:15"} +{"current_steps": 6117, "total_steps": 8680, "loss": 0.6248455047607422, "lr": 4.4035796988012943e-07, "epoch": 1.409447004608295, "percentage": 70.47, "elapsed_time": "8:28:47", "remaining_time": "3:33:10"} +{"current_steps": 6118, "total_steps": 8680, "loss": 0.7970919609069824, "lr": 4.400422760772817e-07, "epoch": 1.4096774193548387, "percentage": 70.48, "elapsed_time": "8:28:51", "remaining_time": "3:33:05"} +{"current_steps": 6119, "total_steps": 8680, "loss": 0.6184223294258118, "lr": 4.397266635515563e-07, "epoch": 1.4099078341013824, "percentage": 70.5, "elapsed_time": "8:28:58", "remaining_time": "3:33:01"} +{"current_steps": 6120, "total_steps": 8680, "loss": 0.9014843702316284, "lr": 4.394111323487637e-07, "epoch": 1.4101382488479262, "percentage": 70.51, "elapsed_time": "8:29:02", "remaining_time": "3:32:56"} +{"current_steps": 6121, "total_steps": 8680, "loss": 0.8468939661979675, "lr": 4.390956825147034e-07, "epoch": 1.41036866359447, "percentage": 70.52, "elapsed_time": "8:29:06", "remaining_time": "3:32:50"} +{"current_steps": 6122, "total_steps": 8680, "loss": 0.7832604646682739, "lr": 4.3878031409516234e-07, "epoch": 1.4105990783410138, "percentage": 70.53, "elapsed_time": "8:29:10", "remaining_time": "3:32:45"} +{"current_steps": 6123, "total_steps": 8680, "loss": 0.7202898263931274, "lr": 4.3846502713591527e-07, "epoch": 1.4108294930875576, "percentage": 70.54, "elapsed_time": "8:29:16", "remaining_time": "3:32:40"} +{"current_steps": 6124, "total_steps": 8680, "loss": 0.6785540580749512, "lr": 4.3814982168272664e-07, "epoch": 1.4110599078341015, "percentage": 70.55, "elapsed_time": "8:29:22", "remaining_time": "3:32:36"} +{"current_steps": 6125, "total_steps": 8680, "loss": 0.795532763004303, "lr": 4.378346977813474e-07, "epoch": 1.4112903225806452, "percentage": 70.56, "elapsed_time": "8:29:27", "remaining_time": "3:32:30"} +{"current_steps": 6126, "total_steps": 8680, "loss": 0.7715259790420532, "lr": 4.3751965547751735e-07, "epoch": 1.411520737327189, "percentage": 70.58, "elapsed_time": "8:29:32", "remaining_time": "3:32:25"} +{"current_steps": 6127, "total_steps": 8680, "loss": 0.8657132983207703, "lr": 4.37204694816965e-07, "epoch": 1.4117511520737327, "percentage": 70.59, "elapsed_time": "8:29:38", "remaining_time": "3:32:21"} +{"current_steps": 6128, "total_steps": 8680, "loss": 0.7253363132476807, "lr": 4.3688981584540586e-07, "epoch": 1.4119815668202764, "percentage": 70.6, "elapsed_time": "8:29:43", "remaining_time": "3:32:16"} +{"current_steps": 6129, "total_steps": 8680, "loss": 0.8511998653411865, "lr": 4.365750186085447e-07, "epoch": 1.4122119815668204, "percentage": 70.61, "elapsed_time": "8:29:48", "remaining_time": "3:32:11"} +{"current_steps": 6130, "total_steps": 8680, "loss": 0.7936528921127319, "lr": 4.3626030315207386e-07, "epoch": 1.412442396313364, "percentage": 70.62, "elapsed_time": "8:29:53", "remaining_time": "3:32:06"} +{"current_steps": 6131, "total_steps": 8680, "loss": 0.758521556854248, "lr": 4.3594566952167324e-07, "epoch": 1.4126728110599078, "percentage": 70.63, "elapsed_time": "8:29:59", "remaining_time": "3:32:01"} +{"current_steps": 6132, "total_steps": 8680, "loss": 0.8202048540115356, "lr": 4.3563111776301243e-07, "epoch": 1.4129032258064516, "percentage": 70.65, "elapsed_time": "8:30:04", "remaining_time": "3:31:56"} +{"current_steps": 6133, "total_steps": 8680, "loss": 0.7864067554473877, "lr": 4.3531664792174773e-07, "epoch": 1.4131336405529953, "percentage": 70.66, "elapsed_time": "8:30:10", "remaining_time": "3:31:52"} +{"current_steps": 6134, "total_steps": 8680, "loss": 0.8051233291625977, "lr": 4.350022600435236e-07, "epoch": 1.4133640552995392, "percentage": 70.67, "elapsed_time": "8:30:14", "remaining_time": "3:31:46"} +{"current_steps": 6135, "total_steps": 8680, "loss": 0.7617348432540894, "lr": 4.34687954173974e-07, "epoch": 1.413594470046083, "percentage": 70.68, "elapsed_time": "8:30:19", "remaining_time": "3:31:41"} +{"current_steps": 6136, "total_steps": 8680, "loss": 0.7899652719497681, "lr": 4.3437373035871927e-07, "epoch": 1.4138248847926267, "percentage": 70.69, "elapsed_time": "8:30:25", "remaining_time": "3:31:37"} +{"current_steps": 6137, "total_steps": 8680, "loss": 0.8467222452163696, "lr": 4.340595886433689e-07, "epoch": 1.4140552995391706, "percentage": 70.7, "elapsed_time": "8:30:30", "remaining_time": "3:31:32"} +{"current_steps": 6138, "total_steps": 8680, "loss": 0.8451426029205322, "lr": 4.3374552907352003e-07, "epoch": 1.4142857142857144, "percentage": 70.71, "elapsed_time": "8:30:36", "remaining_time": "3:31:27"} +{"current_steps": 6139, "total_steps": 8680, "loss": 0.7140414714813232, "lr": 4.3343155169475797e-07, "epoch": 1.414516129032258, "percentage": 70.73, "elapsed_time": "8:30:41", "remaining_time": "3:31:22"} +{"current_steps": 6140, "total_steps": 8680, "loss": 0.7680803537368774, "lr": 4.331176565526558e-07, "epoch": 1.4147465437788018, "percentage": 70.74, "elapsed_time": "8:30:47", "remaining_time": "3:31:18"} +{"current_steps": 6141, "total_steps": 8680, "loss": 0.7262120246887207, "lr": 4.328038436927757e-07, "epoch": 1.4149769585253456, "percentage": 70.75, "elapsed_time": "8:30:52", "remaining_time": "3:31:13"} +{"current_steps": 6142, "total_steps": 8680, "loss": 0.7788687944412231, "lr": 4.3249011316066676e-07, "epoch": 1.4152073732718895, "percentage": 70.76, "elapsed_time": "8:30:59", "remaining_time": "3:31:08"} +{"current_steps": 6143, "total_steps": 8680, "loss": 0.7613503336906433, "lr": 4.321764650018662e-07, "epoch": 1.4154377880184332, "percentage": 70.77, "elapsed_time": "8:31:02", "remaining_time": "3:31:03"} +{"current_steps": 6144, "total_steps": 8680, "loss": 0.6778309345245361, "lr": 4.3186289926190056e-07, "epoch": 1.415668202764977, "percentage": 70.78, "elapsed_time": "8:31:09", "remaining_time": "3:30:59"} +{"current_steps": 6145, "total_steps": 8680, "loss": 0.8626673221588135, "lr": 4.315494159862829e-07, "epoch": 1.4158986175115207, "percentage": 70.79, "elapsed_time": "8:31:14", "remaining_time": "3:30:54"} +{"current_steps": 6146, "total_steps": 8680, "loss": 0.8321051597595215, "lr": 4.312360152205147e-07, "epoch": 1.4161290322580644, "percentage": 70.81, "elapsed_time": "8:31:18", "remaining_time": "3:30:48"} +{"current_steps": 6147, "total_steps": 8680, "loss": 0.9317119717597961, "lr": 4.309226970100861e-07, "epoch": 1.4163594470046084, "percentage": 70.82, "elapsed_time": "8:31:24", "remaining_time": "3:30:44"} +{"current_steps": 6148, "total_steps": 8680, "loss": 0.9479870200157166, "lr": 4.306094614004748e-07, "epoch": 1.416589861751152, "percentage": 70.83, "elapsed_time": "8:31:30", "remaining_time": "3:30:39"} +{"current_steps": 6149, "total_steps": 8680, "loss": 0.8222699165344238, "lr": 4.3029630843714606e-07, "epoch": 1.4168202764976958, "percentage": 70.84, "elapsed_time": "8:31:35", "remaining_time": "3:30:34"} +{"current_steps": 6150, "total_steps": 8680, "loss": 0.8232519030570984, "lr": 4.2998323816555427e-07, "epoch": 1.4170506912442398, "percentage": 70.85, "elapsed_time": "8:31:40", "remaining_time": "3:30:29"} +{"current_steps": 6151, "total_steps": 8680, "loss": 0.7423735857009888, "lr": 4.2967025063114057e-07, "epoch": 1.4172811059907833, "percentage": 70.86, "elapsed_time": "8:31:45", "remaining_time": "3:30:24"} +{"current_steps": 6152, "total_steps": 8680, "loss": 0.6947557926177979, "lr": 4.2935734587933527e-07, "epoch": 1.4175115207373272, "percentage": 70.88, "elapsed_time": "8:31:49", "remaining_time": "3:30:19"} +{"current_steps": 6153, "total_steps": 8680, "loss": 0.789128303527832, "lr": 4.290445239555558e-07, "epoch": 1.417741935483871, "percentage": 70.89, "elapsed_time": "8:31:56", "remaining_time": "3:30:15"} +{"current_steps": 6154, "total_steps": 8680, "loss": 0.8025885820388794, "lr": 4.2873178490520745e-07, "epoch": 1.4179723502304147, "percentage": 70.9, "elapsed_time": "8:32:01", "remaining_time": "3:30:10"} +{"current_steps": 6155, "total_steps": 8680, "loss": 0.8139045238494873, "lr": 4.284191287736847e-07, "epoch": 1.4182027649769586, "percentage": 70.91, "elapsed_time": "8:32:06", "remaining_time": "3:30:05"} +{"current_steps": 6156, "total_steps": 8680, "loss": 0.8154167532920837, "lr": 4.2810655560636864e-07, "epoch": 1.4184331797235024, "percentage": 70.92, "elapsed_time": "8:32:12", "remaining_time": "3:30:00"} +{"current_steps": 6157, "total_steps": 8680, "loss": 0.6383910775184631, "lr": 4.2779406544862896e-07, "epoch": 1.418663594470046, "percentage": 70.93, "elapsed_time": "8:32:19", "remaining_time": "3:29:56"} +{"current_steps": 6158, "total_steps": 8680, "loss": 0.7008179426193237, "lr": 4.2748165834582316e-07, "epoch": 1.4188940092165898, "percentage": 70.94, "elapsed_time": "8:32:24", "remaining_time": "3:29:51"} +{"current_steps": 6159, "total_steps": 8680, "loss": 0.9458012580871582, "lr": 4.2716933434329684e-07, "epoch": 1.4191244239631335, "percentage": 70.96, "elapsed_time": "8:32:28", "remaining_time": "3:29:45"} +{"current_steps": 6160, "total_steps": 8680, "loss": 0.7354133725166321, "lr": 4.268570934863829e-07, "epoch": 1.4193548387096775, "percentage": 70.97, "elapsed_time": "8:32:33", "remaining_time": "3:29:40"} +{"current_steps": 6161, "total_steps": 8680, "loss": 0.7146268486976624, "lr": 4.265449358204034e-07, "epoch": 1.4195852534562212, "percentage": 70.98, "elapsed_time": "8:32:39", "remaining_time": "3:29:36"} +{"current_steps": 6162, "total_steps": 8680, "loss": 0.7357315421104431, "lr": 4.262328613906674e-07, "epoch": 1.419815668202765, "percentage": 70.99, "elapsed_time": "8:32:43", "remaining_time": "3:29:30"} +{"current_steps": 6163, "total_steps": 8680, "loss": 0.8006314039230347, "lr": 4.2592087024247157e-07, "epoch": 1.4200460829493087, "percentage": 71.0, "elapsed_time": "8:32:48", "remaining_time": "3:29:25"} +{"current_steps": 6164, "total_steps": 8680, "loss": 0.8299369812011719, "lr": 4.256089624211018e-07, "epoch": 1.4202764976958524, "percentage": 71.01, "elapsed_time": "8:32:53", "remaining_time": "3:29:21"} +{"current_steps": 6165, "total_steps": 8680, "loss": 0.7018890380859375, "lr": 4.252971379718308e-07, "epoch": 1.4205069124423964, "percentage": 71.03, "elapsed_time": "8:32:59", "remaining_time": "3:29:16"} +{"current_steps": 6166, "total_steps": 8680, "loss": 0.6501315236091614, "lr": 4.24985396939919e-07, "epoch": 1.42073732718894, "percentage": 71.04, "elapsed_time": "8:33:05", "remaining_time": "3:29:11"} +{"current_steps": 6167, "total_steps": 8680, "loss": 0.8379749059677124, "lr": 4.24673739370616e-07, "epoch": 1.4209677419354838, "percentage": 71.05, "elapsed_time": "8:33:10", "remaining_time": "3:29:06"} +{"current_steps": 6168, "total_steps": 8680, "loss": 0.7996747493743896, "lr": 4.24362165309158e-07, "epoch": 1.4211981566820278, "percentage": 71.06, "elapsed_time": "8:33:17", "remaining_time": "3:29:02"} +{"current_steps": 6169, "total_steps": 8680, "loss": 0.7258181571960449, "lr": 4.240506748007695e-07, "epoch": 1.4214285714285715, "percentage": 71.07, "elapsed_time": "8:33:23", "remaining_time": "3:28:57"} +{"current_steps": 6170, "total_steps": 8680, "loss": 0.6035803556442261, "lr": 4.237392678906633e-07, "epoch": 1.4216589861751152, "percentage": 71.08, "elapsed_time": "8:33:28", "remaining_time": "3:28:53"} +{"current_steps": 6171, "total_steps": 8680, "loss": 0.7668799757957458, "lr": 4.2342794462403954e-07, "epoch": 1.421889400921659, "percentage": 71.09, "elapsed_time": "8:33:33", "remaining_time": "3:28:48"} +{"current_steps": 6172, "total_steps": 8680, "loss": 0.7816733121871948, "lr": 4.23116705046086e-07, "epoch": 1.4221198156682027, "percentage": 71.11, "elapsed_time": "8:33:39", "remaining_time": "3:28:43"} +{"current_steps": 6173, "total_steps": 8680, "loss": 0.8753983974456787, "lr": 4.228055492019793e-07, "epoch": 1.4223502304147466, "percentage": 71.12, "elapsed_time": "8:33:43", "remaining_time": "3:28:37"} +{"current_steps": 6174, "total_steps": 8680, "loss": 0.8319464921951294, "lr": 4.224944771368831e-07, "epoch": 1.4225806451612903, "percentage": 71.13, "elapsed_time": "8:33:48", "remaining_time": "3:28:33"} +{"current_steps": 6175, "total_steps": 8680, "loss": 0.6670328378677368, "lr": 4.2218348889594866e-07, "epoch": 1.422811059907834, "percentage": 71.14, "elapsed_time": "8:33:53", "remaining_time": "3:28:28"} +{"current_steps": 6176, "total_steps": 8680, "loss": 0.7879645824432373, "lr": 4.218725845243163e-07, "epoch": 1.4230414746543778, "percentage": 71.15, "elapsed_time": "8:33:59", "remaining_time": "3:28:23"} +{"current_steps": 6177, "total_steps": 8680, "loss": 0.709680438041687, "lr": 4.2156176406711287e-07, "epoch": 1.4232718894009215, "percentage": 71.16, "elapsed_time": "8:34:04", "remaining_time": "3:28:18"} +{"current_steps": 6178, "total_steps": 8680, "loss": 0.7990894317626953, "lr": 4.2125102756945364e-07, "epoch": 1.4235023041474655, "percentage": 71.18, "elapsed_time": "8:34:08", "remaining_time": "3:28:13"} +{"current_steps": 6179, "total_steps": 8680, "loss": 0.7283308506011963, "lr": 4.2094037507644165e-07, "epoch": 1.4237327188940092, "percentage": 71.19, "elapsed_time": "8:34:15", "remaining_time": "3:28:08"} +{"current_steps": 6180, "total_steps": 8680, "loss": 0.8763309717178345, "lr": 4.2062980663316715e-07, "epoch": 1.423963133640553, "percentage": 71.2, "elapsed_time": "8:34:19", "remaining_time": "3:28:03"} +{"current_steps": 6181, "total_steps": 8680, "loss": 0.9370014667510986, "lr": 4.2031932228470966e-07, "epoch": 1.4241935483870969, "percentage": 71.21, "elapsed_time": "8:34:24", "remaining_time": "3:27:58"} +{"current_steps": 6182, "total_steps": 8680, "loss": 0.7883036136627197, "lr": 4.2000892207613526e-07, "epoch": 1.4244239631336406, "percentage": 71.22, "elapsed_time": "8:34:28", "remaining_time": "3:27:53"} +{"current_steps": 6183, "total_steps": 8680, "loss": 0.7021682262420654, "lr": 4.196986060524975e-07, "epoch": 1.4246543778801843, "percentage": 71.23, "elapsed_time": "8:34:35", "remaining_time": "3:27:48"} +{"current_steps": 6184, "total_steps": 8680, "loss": 0.842636227607727, "lr": 4.193883742588393e-07, "epoch": 1.424884792626728, "percentage": 71.24, "elapsed_time": "8:34:41", "remaining_time": "3:27:44"} +{"current_steps": 6185, "total_steps": 8680, "loss": 0.8003957867622375, "lr": 4.190782267401899e-07, "epoch": 1.4251152073732718, "percentage": 71.26, "elapsed_time": "8:34:45", "remaining_time": "3:27:39"} +{"current_steps": 6186, "total_steps": 8680, "loss": 0.9799495935440063, "lr": 4.1876816354156655e-07, "epoch": 1.4253456221198157, "percentage": 71.27, "elapsed_time": "8:34:49", "remaining_time": "3:27:33"} +{"current_steps": 6187, "total_steps": 8680, "loss": 0.8726102113723755, "lr": 4.184581847079751e-07, "epoch": 1.4255760368663595, "percentage": 71.28, "elapsed_time": "8:34:54", "remaining_time": "3:27:28"} +{"current_steps": 6188, "total_steps": 8680, "loss": 0.8771729469299316, "lr": 4.181482902844082e-07, "epoch": 1.4258064516129032, "percentage": 71.29, "elapsed_time": "8:34:58", "remaining_time": "3:27:23"} +{"current_steps": 6189, "total_steps": 8680, "loss": 0.5891281962394714, "lr": 4.1783848031584644e-07, "epoch": 1.426036866359447, "percentage": 71.3, "elapsed_time": "8:35:04", "remaining_time": "3:27:18"} +{"current_steps": 6190, "total_steps": 8680, "loss": 0.8133054971694946, "lr": 4.1752875484725904e-07, "epoch": 1.4262672811059907, "percentage": 71.31, "elapsed_time": "8:35:09", "remaining_time": "3:27:13"} +{"current_steps": 6191, "total_steps": 8680, "loss": 0.7175684571266174, "lr": 4.1721911392360164e-07, "epoch": 1.4264976958525346, "percentage": 71.32, "elapsed_time": "8:35:13", "remaining_time": "3:27:08"} +{"current_steps": 6192, "total_steps": 8680, "loss": 0.7112927436828613, "lr": 4.16909557589818e-07, "epoch": 1.4267281105990783, "percentage": 71.34, "elapsed_time": "8:35:18", "remaining_time": "3:27:03"} +{"current_steps": 6193, "total_steps": 8680, "loss": 0.8564406037330627, "lr": 4.166000858908406e-07, "epoch": 1.426958525345622, "percentage": 71.35, "elapsed_time": "8:35:24", "remaining_time": "3:26:58"} +{"current_steps": 6194, "total_steps": 8680, "loss": 0.7630729675292969, "lr": 4.162906988715883e-07, "epoch": 1.427188940092166, "percentage": 71.36, "elapsed_time": "8:35:29", "remaining_time": "3:26:53"} +{"current_steps": 6195, "total_steps": 8680, "loss": 0.6810768246650696, "lr": 4.1598139657696806e-07, "epoch": 1.4274193548387097, "percentage": 71.37, "elapsed_time": "8:35:32", "remaining_time": "3:26:48"} +{"current_steps": 6196, "total_steps": 8680, "loss": 0.8482312560081482, "lr": 4.1567217905187535e-07, "epoch": 1.4276497695852535, "percentage": 71.38, "elapsed_time": "8:35:37", "remaining_time": "3:26:42"} +{"current_steps": 6197, "total_steps": 8680, "loss": 0.845355749130249, "lr": 4.1536304634119225e-07, "epoch": 1.4278801843317972, "percentage": 71.39, "elapsed_time": "8:35:42", "remaining_time": "3:26:37"} +{"current_steps": 6198, "total_steps": 8680, "loss": 0.8082824349403381, "lr": 4.1505399848978896e-07, "epoch": 1.428110599078341, "percentage": 71.41, "elapsed_time": "8:35:46", "remaining_time": "3:26:32"} +{"current_steps": 6199, "total_steps": 8680, "loss": 0.8141404390335083, "lr": 4.147450355425235e-07, "epoch": 1.4283410138248849, "percentage": 71.42, "elapsed_time": "8:35:51", "remaining_time": "3:26:27"} +{"current_steps": 6200, "total_steps": 8680, "loss": 0.8144549131393433, "lr": 4.14436157544241e-07, "epoch": 1.4285714285714286, "percentage": 71.43, "elapsed_time": "8:35:57", "remaining_time": "3:26:23"} +{"current_steps": 6201, "total_steps": 8680, "loss": 0.6554359793663025, "lr": 4.141273645397754e-07, "epoch": 1.4288018433179723, "percentage": 71.44, "elapsed_time": "8:36:04", "remaining_time": "3:26:18"} +{"current_steps": 6202, "total_steps": 8680, "loss": 0.8035449981689453, "lr": 4.138186565739472e-07, "epoch": 1.429032258064516, "percentage": 71.45, "elapsed_time": "8:36:10", "remaining_time": "3:26:14"} +{"current_steps": 6203, "total_steps": 8680, "loss": 0.7848105430603027, "lr": 4.1351003369156467e-07, "epoch": 1.4292626728110598, "percentage": 71.46, "elapsed_time": "8:36:15", "remaining_time": "3:26:09"} +{"current_steps": 6204, "total_steps": 8680, "loss": 0.7064214944839478, "lr": 4.132014959374246e-07, "epoch": 1.4294930875576037, "percentage": 71.47, "elapsed_time": "8:36:21", "remaining_time": "3:26:04"} +{"current_steps": 6205, "total_steps": 8680, "loss": 0.7636318802833557, "lr": 4.128930433563107e-07, "epoch": 1.4297235023041475, "percentage": 71.49, "elapsed_time": "8:36:26", "remaining_time": "3:25:59"} +{"current_steps": 6206, "total_steps": 8680, "loss": 0.6839499473571777, "lr": 4.1258467599299395e-07, "epoch": 1.4299539170506912, "percentage": 71.5, "elapsed_time": "8:36:31", "remaining_time": "3:25:54"} +{"current_steps": 6207, "total_steps": 8680, "loss": 0.8355294466018677, "lr": 4.122763938922341e-07, "epoch": 1.4301843317972351, "percentage": 71.51, "elapsed_time": "8:36:37", "remaining_time": "3:25:49"} +{"current_steps": 6208, "total_steps": 8680, "loss": 0.7563334107398987, "lr": 4.1196819709877773e-07, "epoch": 1.4304147465437789, "percentage": 71.52, "elapsed_time": "8:36:42", "remaining_time": "3:25:44"} +{"current_steps": 6209, "total_steps": 8680, "loss": 0.6991991996765137, "lr": 4.116600856573588e-07, "epoch": 1.4306451612903226, "percentage": 71.53, "elapsed_time": "8:36:46", "remaining_time": "3:25:39"} +{"current_steps": 6210, "total_steps": 8680, "loss": 0.7249872088432312, "lr": 4.113520596126998e-07, "epoch": 1.4308755760368663, "percentage": 71.54, "elapsed_time": "8:36:52", "remaining_time": "3:25:34"} +{"current_steps": 6211, "total_steps": 8680, "loss": 0.6570736169815063, "lr": 4.110441190095101e-07, "epoch": 1.43110599078341, "percentage": 71.56, "elapsed_time": "8:36:57", "remaining_time": "3:25:30"} +{"current_steps": 6212, "total_steps": 8680, "loss": 0.7137724161148071, "lr": 4.107362638924865e-07, "epoch": 1.431336405529954, "percentage": 71.57, "elapsed_time": "8:37:02", "remaining_time": "3:25:25"} +{"current_steps": 6213, "total_steps": 8680, "loss": 0.7620561122894287, "lr": 4.1042849430631453e-07, "epoch": 1.4315668202764977, "percentage": 71.58, "elapsed_time": "8:37:08", "remaining_time": "3:25:20"} +{"current_steps": 6214, "total_steps": 8680, "loss": 0.8186367750167847, "lr": 4.1012081029566616e-07, "epoch": 1.4317972350230415, "percentage": 71.59, "elapsed_time": "8:37:13", "remaining_time": "3:25:15"} +{"current_steps": 6215, "total_steps": 8680, "loss": 0.8068171739578247, "lr": 4.098132119052008e-07, "epoch": 1.4320276497695852, "percentage": 71.6, "elapsed_time": "8:37:19", "remaining_time": "3:25:10"} +{"current_steps": 6216, "total_steps": 8680, "loss": 0.8640002012252808, "lr": 4.095056991795668e-07, "epoch": 1.432258064516129, "percentage": 71.61, "elapsed_time": "8:37:24", "remaining_time": "3:25:06"} +{"current_steps": 6217, "total_steps": 8680, "loss": 0.8886386156082153, "lr": 4.0919827216339887e-07, "epoch": 1.4324884792626729, "percentage": 71.62, "elapsed_time": "8:37:30", "remaining_time": "3:25:01"} +{"current_steps": 6218, "total_steps": 8680, "loss": 0.6853137016296387, "lr": 4.0889093090131965e-07, "epoch": 1.4327188940092166, "percentage": 71.64, "elapsed_time": "8:37:35", "remaining_time": "3:24:56"} +{"current_steps": 6219, "total_steps": 8680, "loss": 0.7423670291900635, "lr": 4.0858367543793923e-07, "epoch": 1.4329493087557603, "percentage": 71.65, "elapsed_time": "8:37:41", "remaining_time": "3:24:51"} +{"current_steps": 6220, "total_steps": 8680, "loss": 0.7969200611114502, "lr": 4.0827650581785544e-07, "epoch": 1.4331797235023043, "percentage": 71.66, "elapsed_time": "8:37:45", "remaining_time": "3:24:46"} +{"current_steps": 6221, "total_steps": 8680, "loss": 0.8506221771240234, "lr": 4.079694220856531e-07, "epoch": 1.433410138248848, "percentage": 71.67, "elapsed_time": "8:37:50", "remaining_time": "3:24:41"} +{"current_steps": 6222, "total_steps": 8680, "loss": 0.6755083799362183, "lr": 4.076624242859058e-07, "epoch": 1.4336405529953917, "percentage": 71.68, "elapsed_time": "8:37:55", "remaining_time": "3:24:36"} +{"current_steps": 6223, "total_steps": 8680, "loss": 0.7734944820404053, "lr": 4.0735551246317333e-07, "epoch": 1.4338709677419355, "percentage": 71.69, "elapsed_time": "8:38:01", "remaining_time": "3:24:31"} +{"current_steps": 6224, "total_steps": 8680, "loss": 0.8564216494560242, "lr": 4.0704868666200345e-07, "epoch": 1.4341013824884792, "percentage": 71.71, "elapsed_time": "8:38:05", "remaining_time": "3:24:26"} +{"current_steps": 6225, "total_steps": 8680, "loss": 0.6858065128326416, "lr": 4.067419469269321e-07, "epoch": 1.4343317972350231, "percentage": 71.72, "elapsed_time": "8:38:10", "remaining_time": "3:24:21"} +{"current_steps": 6226, "total_steps": 8680, "loss": 0.684749960899353, "lr": 4.064352933024813e-07, "epoch": 1.4345622119815669, "percentage": 71.73, "elapsed_time": "8:38:15", "remaining_time": "3:24:16"} +{"current_steps": 6227, "total_steps": 8680, "loss": 0.7648766040802002, "lr": 4.061287258331624e-07, "epoch": 1.4347926267281106, "percentage": 71.74, "elapsed_time": "8:38:22", "remaining_time": "3:24:12"} +{"current_steps": 6228, "total_steps": 8680, "loss": 0.924850583076477, "lr": 4.058222445634727e-07, "epoch": 1.4350230414746543, "percentage": 71.75, "elapsed_time": "8:38:28", "remaining_time": "3:24:07"} +{"current_steps": 6229, "total_steps": 8680, "loss": 0.906406581401825, "lr": 4.055158495378972e-07, "epoch": 1.435253456221198, "percentage": 71.76, "elapsed_time": "8:38:33", "remaining_time": "3:24:02"} +{"current_steps": 6230, "total_steps": 8680, "loss": 0.9169156551361084, "lr": 4.052095408009095e-07, "epoch": 1.435483870967742, "percentage": 71.77, "elapsed_time": "8:38:39", "remaining_time": "3:23:57"} +{"current_steps": 6231, "total_steps": 8680, "loss": 0.7367587685585022, "lr": 4.0490331839696967e-07, "epoch": 1.4357142857142857, "percentage": 71.79, "elapsed_time": "8:38:45", "remaining_time": "3:23:53"} +{"current_steps": 6232, "total_steps": 8680, "loss": 0.7608749270439148, "lr": 4.045971823705249e-07, "epoch": 1.4359447004608294, "percentage": 71.8, "elapsed_time": "8:38:51", "remaining_time": "3:23:48"} +{"current_steps": 6233, "total_steps": 8680, "loss": 0.7008038759231567, "lr": 4.0429113276601134e-07, "epoch": 1.4361751152073734, "percentage": 71.81, "elapsed_time": "8:38:57", "remaining_time": "3:23:44"} +{"current_steps": 6234, "total_steps": 8680, "loss": 0.8581372499465942, "lr": 4.039851696278511e-07, "epoch": 1.4364055299539171, "percentage": 71.82, "elapsed_time": "8:39:03", "remaining_time": "3:23:39"} +{"current_steps": 6235, "total_steps": 8680, "loss": 0.6602354049682617, "lr": 4.036792930004542e-07, "epoch": 1.4366359447004609, "percentage": 71.83, "elapsed_time": "8:39:07", "remaining_time": "3:23:34"} +{"current_steps": 6236, "total_steps": 8680, "loss": 0.8560018539428711, "lr": 4.0337350292821893e-07, "epoch": 1.4368663594470046, "percentage": 71.84, "elapsed_time": "8:39:13", "remaining_time": "3:23:29"} +{"current_steps": 6237, "total_steps": 8680, "loss": 0.8837640285491943, "lr": 4.030677994555298e-07, "epoch": 1.4370967741935483, "percentage": 71.85, "elapsed_time": "8:39:17", "remaining_time": "3:23:24"} +{"current_steps": 6238, "total_steps": 8680, "loss": 0.8214797973632812, "lr": 4.027621826267593e-07, "epoch": 1.4373271889400923, "percentage": 71.87, "elapsed_time": "8:39:22", "remaining_time": "3:23:19"} +{"current_steps": 6239, "total_steps": 8680, "loss": 0.7590944766998291, "lr": 4.024566524862675e-07, "epoch": 1.437557603686636, "percentage": 71.88, "elapsed_time": "8:39:27", "remaining_time": "3:23:14"} +{"current_steps": 6240, "total_steps": 8680, "loss": 0.8792011141777039, "lr": 4.021512090784014e-07, "epoch": 1.4377880184331797, "percentage": 71.89, "elapsed_time": "8:39:32", "remaining_time": "3:23:09"} +{"current_steps": 6241, "total_steps": 8680, "loss": 0.8309401273727417, "lr": 4.0184585244749556e-07, "epoch": 1.4380184331797234, "percentage": 71.9, "elapsed_time": "8:39:36", "remaining_time": "3:23:03"} +{"current_steps": 6242, "total_steps": 8680, "loss": 0.7474797964096069, "lr": 4.015405826378727e-07, "epoch": 1.4382488479262672, "percentage": 71.91, "elapsed_time": "8:39:42", "remaining_time": "3:22:59"} +{"current_steps": 6243, "total_steps": 8680, "loss": 0.7376091480255127, "lr": 4.012353996938421e-07, "epoch": 1.4384792626728111, "percentage": 71.92, "elapsed_time": "8:39:47", "remaining_time": "3:22:54"} +{"current_steps": 6244, "total_steps": 8680, "loss": 0.7809054851531982, "lr": 4.0093030365970014e-07, "epoch": 1.4387096774193548, "percentage": 71.94, "elapsed_time": "8:39:51", "remaining_time": "3:22:48"} +{"current_steps": 6245, "total_steps": 8680, "loss": 0.8551669120788574, "lr": 4.0062529457973194e-07, "epoch": 1.4389400921658986, "percentage": 71.95, "elapsed_time": "8:39:55", "remaining_time": "3:22:43"} +{"current_steps": 6246, "total_steps": 8680, "loss": 0.7874705791473389, "lr": 4.0032037249820874e-07, "epoch": 1.4391705069124425, "percentage": 71.96, "elapsed_time": "8:39:59", "remaining_time": "3:22:38"} +{"current_steps": 6247, "total_steps": 8680, "loss": 0.8032190799713135, "lr": 4.0001553745938923e-07, "epoch": 1.4394009216589863, "percentage": 71.97, "elapsed_time": "8:40:04", "remaining_time": "3:22:33"} +{"current_steps": 6248, "total_steps": 8680, "loss": 0.7600107192993164, "lr": 3.9971078950752057e-07, "epoch": 1.43963133640553, "percentage": 71.98, "elapsed_time": "8:40:09", "remaining_time": "3:22:28"} +{"current_steps": 6249, "total_steps": 8680, "loss": 0.7738933563232422, "lr": 3.994061286868361e-07, "epoch": 1.4398617511520737, "percentage": 71.99, "elapsed_time": "8:40:14", "remaining_time": "3:22:23"} +{"current_steps": 6250, "total_steps": 8680, "loss": 0.701007604598999, "lr": 3.9910155504155665e-07, "epoch": 1.4400921658986174, "percentage": 72.0, "elapsed_time": "8:40:20", "remaining_time": "3:22:18"} +{"current_steps": 6251, "total_steps": 8680, "loss": 0.8962818384170532, "lr": 3.9879706861589126e-07, "epoch": 1.4403225806451614, "percentage": 72.02, "elapsed_time": "8:40:25", "remaining_time": "3:22:13"} +{"current_steps": 6252, "total_steps": 8680, "loss": 0.7636146545410156, "lr": 3.9849266945403513e-07, "epoch": 1.4405529953917051, "percentage": 72.03, "elapsed_time": "8:40:30", "remaining_time": "3:22:08"} +{"current_steps": 6253, "total_steps": 8680, "loss": 0.8816943168640137, "lr": 3.981883576001722e-07, "epoch": 1.4407834101382488, "percentage": 72.04, "elapsed_time": "8:40:35", "remaining_time": "3:22:03"} +{"current_steps": 6254, "total_steps": 8680, "loss": 0.7252858877182007, "lr": 3.978841330984725e-07, "epoch": 1.4410138248847926, "percentage": 72.05, "elapsed_time": "8:40:40", "remaining_time": "3:21:58"} +{"current_steps": 6255, "total_steps": 8680, "loss": 0.6720175743103027, "lr": 3.975799959930932e-07, "epoch": 1.4412442396313363, "percentage": 72.06, "elapsed_time": "8:40:44", "remaining_time": "3:21:53"} +{"current_steps": 6256, "total_steps": 8680, "loss": 0.8000779151916504, "lr": 3.972759463281805e-07, "epoch": 1.4414746543778802, "percentage": 72.07, "elapsed_time": "8:40:50", "remaining_time": "3:21:48"} +{"current_steps": 6257, "total_steps": 8680, "loss": 0.7356371283531189, "lr": 3.9697198414786626e-07, "epoch": 1.441705069124424, "percentage": 72.09, "elapsed_time": "8:40:56", "remaining_time": "3:21:44"} +{"current_steps": 6258, "total_steps": 8680, "loss": 0.708438515663147, "lr": 3.966681094962703e-07, "epoch": 1.4419354838709677, "percentage": 72.1, "elapsed_time": "8:41:01", "remaining_time": "3:21:39"} +{"current_steps": 6259, "total_steps": 8680, "loss": 0.709287166595459, "lr": 3.963643224174994e-07, "epoch": 1.4421658986175117, "percentage": 72.11, "elapsed_time": "8:41:05", "remaining_time": "3:21:33"} +{"current_steps": 6260, "total_steps": 8680, "loss": 0.743755578994751, "lr": 3.9606062295564813e-07, "epoch": 1.4423963133640554, "percentage": 72.12, "elapsed_time": "8:41:12", "remaining_time": "3:21:29"} +{"current_steps": 6261, "total_steps": 8680, "loss": 0.9727948904037476, "lr": 3.9575701115479744e-07, "epoch": 1.442626728110599, "percentage": 72.13, "elapsed_time": "8:41:16", "remaining_time": "3:21:23"} +{"current_steps": 6262, "total_steps": 8680, "loss": 0.9070688486099243, "lr": 3.9545348705901703e-07, "epoch": 1.4428571428571428, "percentage": 72.14, "elapsed_time": "8:41:20", "remaining_time": "3:21:18"} +{"current_steps": 6263, "total_steps": 8680, "loss": 0.8167496919631958, "lr": 3.951500507123627e-07, "epoch": 1.4430875576036866, "percentage": 72.15, "elapsed_time": "8:41:25", "remaining_time": "3:21:13"} +{"current_steps": 6264, "total_steps": 8680, "loss": 0.7691773772239685, "lr": 3.948467021588775e-07, "epoch": 1.4433179723502305, "percentage": 72.17, "elapsed_time": "8:41:29", "remaining_time": "3:21:08"} +{"current_steps": 6265, "total_steps": 8680, "loss": 0.7638411521911621, "lr": 3.945434414425927e-07, "epoch": 1.4435483870967742, "percentage": 72.18, "elapsed_time": "8:41:34", "remaining_time": "3:21:03"} +{"current_steps": 6266, "total_steps": 8680, "loss": 0.8138284683227539, "lr": 3.942402686075258e-07, "epoch": 1.443778801843318, "percentage": 72.19, "elapsed_time": "8:41:39", "remaining_time": "3:20:58"} +{"current_steps": 6267, "total_steps": 8680, "loss": 0.8404628038406372, "lr": 3.939371836976816e-07, "epoch": 1.4440092165898617, "percentage": 72.2, "elapsed_time": "8:41:44", "remaining_time": "3:20:53"} +{"current_steps": 6268, "total_steps": 8680, "loss": 0.7354726791381836, "lr": 3.936341867570533e-07, "epoch": 1.4442396313364054, "percentage": 72.21, "elapsed_time": "8:41:50", "remaining_time": "3:20:48"} +{"current_steps": 6269, "total_steps": 8680, "loss": 0.8607511520385742, "lr": 3.9333127782962003e-07, "epoch": 1.4444700460829494, "percentage": 72.22, "elapsed_time": "8:41:56", "remaining_time": "3:20:43"} +{"current_steps": 6270, "total_steps": 8680, "loss": 0.7372239232063293, "lr": 3.930284569593483e-07, "epoch": 1.444700460829493, "percentage": 72.24, "elapsed_time": "8:42:01", "remaining_time": "3:20:39"} +{"current_steps": 6271, "total_steps": 8680, "loss": 0.8902593851089478, "lr": 3.927257241901929e-07, "epoch": 1.4449308755760368, "percentage": 72.25, "elapsed_time": "8:42:05", "remaining_time": "3:20:33"} +{"current_steps": 6272, "total_steps": 8680, "loss": 0.7481765747070312, "lr": 3.924230795660947e-07, "epoch": 1.4451612903225808, "percentage": 72.26, "elapsed_time": "8:42:10", "remaining_time": "3:20:28"} +{"current_steps": 6273, "total_steps": 8680, "loss": 0.6868888139724731, "lr": 3.9212052313098177e-07, "epoch": 1.4453917050691243, "percentage": 72.27, "elapsed_time": "8:42:15", "remaining_time": "3:20:23"} +{"current_steps": 6274, "total_steps": 8680, "loss": 0.6867324709892273, "lr": 3.918180549287705e-07, "epoch": 1.4456221198156682, "percentage": 72.28, "elapsed_time": "8:42:21", "remaining_time": "3:20:18"} +{"current_steps": 6275, "total_steps": 8680, "loss": 0.8473105430603027, "lr": 3.9151567500336323e-07, "epoch": 1.445852534562212, "percentage": 72.29, "elapsed_time": "8:42:26", "remaining_time": "3:20:13"} +{"current_steps": 6276, "total_steps": 8680, "loss": 0.7629631757736206, "lr": 3.912133833986504e-07, "epoch": 1.4460829493087557, "percentage": 72.3, "elapsed_time": "8:42:31", "remaining_time": "3:20:08"} +{"current_steps": 6277, "total_steps": 8680, "loss": 0.9501597881317139, "lr": 3.909111801585091e-07, "epoch": 1.4463133640552996, "percentage": 72.32, "elapsed_time": "8:42:36", "remaining_time": "3:20:04"} +{"current_steps": 6278, "total_steps": 8680, "loss": 0.7330536842346191, "lr": 3.906090653268037e-07, "epoch": 1.4465437788018434, "percentage": 72.33, "elapsed_time": "8:42:41", "remaining_time": "3:19:59"} +{"current_steps": 6279, "total_steps": 8680, "loss": 0.907101571559906, "lr": 3.903070389473857e-07, "epoch": 1.446774193548387, "percentage": 72.34, "elapsed_time": "8:42:46", "remaining_time": "3:19:53"} +{"current_steps": 6280, "total_steps": 8680, "loss": 0.8177503347396851, "lr": 3.900051010640939e-07, "epoch": 1.4470046082949308, "percentage": 72.35, "elapsed_time": "8:42:51", "remaining_time": "3:19:49"} +{"current_steps": 6281, "total_steps": 8680, "loss": 0.7851059436798096, "lr": 3.897032517207538e-07, "epoch": 1.4472350230414746, "percentage": 72.36, "elapsed_time": "8:42:55", "remaining_time": "3:19:43"} +{"current_steps": 6282, "total_steps": 8680, "loss": 0.7056214809417725, "lr": 3.8940149096117914e-07, "epoch": 1.4474654377880185, "percentage": 72.37, "elapsed_time": "8:43:01", "remaining_time": "3:19:39"} +{"current_steps": 6283, "total_steps": 8680, "loss": 0.784143328666687, "lr": 3.8909981882916975e-07, "epoch": 1.4476958525345622, "percentage": 72.38, "elapsed_time": "8:43:06", "remaining_time": "3:19:34"} +{"current_steps": 6284, "total_steps": 8680, "loss": 0.8157210350036621, "lr": 3.8879823536851253e-07, "epoch": 1.447926267281106, "percentage": 72.4, "elapsed_time": "8:43:13", "remaining_time": "3:19:29"} +{"current_steps": 6285, "total_steps": 8680, "loss": 0.7329680323600769, "lr": 3.884967406229828e-07, "epoch": 1.4481566820276497, "percentage": 72.41, "elapsed_time": "8:43:17", "remaining_time": "3:19:24"} +{"current_steps": 6286, "total_steps": 8680, "loss": 0.9214208722114563, "lr": 3.8819533463634145e-07, "epoch": 1.4483870967741934, "percentage": 72.42, "elapsed_time": "8:43:21", "remaining_time": "3:19:19"} +{"current_steps": 6287, "total_steps": 8680, "loss": 0.8118722438812256, "lr": 3.8789401745233706e-07, "epoch": 1.4486175115207374, "percentage": 72.43, "elapsed_time": "8:43:25", "remaining_time": "3:19:13"} +{"current_steps": 6288, "total_steps": 8680, "loss": 0.7517364025115967, "lr": 3.8759278911470615e-07, "epoch": 1.448847926267281, "percentage": 72.44, "elapsed_time": "8:43:31", "remaining_time": "3:19:09"} +{"current_steps": 6289, "total_steps": 8680, "loss": 0.8979834318161011, "lr": 3.872916496671711e-07, "epoch": 1.4490783410138248, "percentage": 72.45, "elapsed_time": "8:43:35", "remaining_time": "3:19:03"} +{"current_steps": 6290, "total_steps": 8680, "loss": 0.9159818887710571, "lr": 3.8699059915344166e-07, "epoch": 1.4493087557603688, "percentage": 72.47, "elapsed_time": "8:43:40", "remaining_time": "3:18:58"} +{"current_steps": 6291, "total_steps": 8680, "loss": 0.8176029324531555, "lr": 3.8668963761721563e-07, "epoch": 1.4495391705069125, "percentage": 72.48, "elapsed_time": "8:43:45", "remaining_time": "3:18:53"} +{"current_steps": 6292, "total_steps": 8680, "loss": 0.7077589631080627, "lr": 3.8638876510217666e-07, "epoch": 1.4497695852534562, "percentage": 72.49, "elapsed_time": "8:43:50", "remaining_time": "3:18:48"} +{"current_steps": 6293, "total_steps": 8680, "loss": 0.8107718825340271, "lr": 3.8608798165199585e-07, "epoch": 1.45, "percentage": 72.5, "elapsed_time": "8:43:55", "remaining_time": "3:18:43"} +{"current_steps": 6294, "total_steps": 8680, "loss": 0.9021201133728027, "lr": 3.8578728731033214e-07, "epoch": 1.4502304147465437, "percentage": 72.51, "elapsed_time": "8:44:00", "remaining_time": "3:18:38"} +{"current_steps": 6295, "total_steps": 8680, "loss": 0.9134507179260254, "lr": 3.854866821208306e-07, "epoch": 1.4504608294930876, "percentage": 72.52, "elapsed_time": "8:44:05", "remaining_time": "3:18:33"} +{"current_steps": 6296, "total_steps": 8680, "loss": 0.9081463813781738, "lr": 3.8518616612712317e-07, "epoch": 1.4506912442396314, "percentage": 72.53, "elapsed_time": "8:44:09", "remaining_time": "3:18:28"} +{"current_steps": 6297, "total_steps": 8680, "loss": 0.7892032861709595, "lr": 3.848857393728303e-07, "epoch": 1.450921658986175, "percentage": 72.55, "elapsed_time": "8:44:14", "remaining_time": "3:18:23"} +{"current_steps": 6298, "total_steps": 8680, "loss": 0.753928542137146, "lr": 3.8458540190155796e-07, "epoch": 1.4511520737327188, "percentage": 72.56, "elapsed_time": "8:44:21", "remaining_time": "3:18:19"} +{"current_steps": 6299, "total_steps": 8680, "loss": 0.6316792964935303, "lr": 3.8428515375689996e-07, "epoch": 1.4513824884792625, "percentage": 72.57, "elapsed_time": "8:44:26", "remaining_time": "3:18:14"} +{"current_steps": 6300, "total_steps": 8680, "loss": 0.6569210290908813, "lr": 3.8398499498243665e-07, "epoch": 1.4516129032258065, "percentage": 72.58, "elapsed_time": "8:44:31", "remaining_time": "3:18:09"} +{"current_steps": 6301, "total_steps": 8680, "loss": 0.9082256555557251, "lr": 3.836849256217355e-07, "epoch": 1.4518433179723502, "percentage": 72.59, "elapsed_time": "8:44:39", "remaining_time": "3:18:05"} +{"current_steps": 6302, "total_steps": 8680, "loss": 0.6533655524253845, "lr": 3.833849457183519e-07, "epoch": 1.452073732718894, "percentage": 72.6, "elapsed_time": "8:44:43", "remaining_time": "3:18:00"} +{"current_steps": 6303, "total_steps": 8680, "loss": 0.8181168437004089, "lr": 3.830850553158271e-07, "epoch": 1.452304147465438, "percentage": 72.62, "elapsed_time": "8:44:48", "remaining_time": "3:17:55"} +{"current_steps": 6304, "total_steps": 8680, "loss": 0.8258780241012573, "lr": 3.827852544576895e-07, "epoch": 1.4525345622119816, "percentage": 72.63, "elapsed_time": "8:44:52", "remaining_time": "3:17:49"} +{"current_steps": 6305, "total_steps": 8680, "loss": 0.7917114496231079, "lr": 3.824855431874555e-07, "epoch": 1.4527649769585254, "percentage": 72.64, "elapsed_time": "8:44:57", "remaining_time": "3:17:44"} +{"current_steps": 6306, "total_steps": 8680, "loss": 0.7523643970489502, "lr": 3.821859215486274e-07, "epoch": 1.452995391705069, "percentage": 72.65, "elapsed_time": "8:45:01", "remaining_time": "3:17:39"} +{"current_steps": 6307, "total_steps": 8680, "loss": 0.7248106002807617, "lr": 3.818863895846945e-07, "epoch": 1.4532258064516128, "percentage": 72.66, "elapsed_time": "8:45:05", "remaining_time": "3:17:33"} +{"current_steps": 6308, "total_steps": 8680, "loss": 0.6663920879364014, "lr": 3.815869473391343e-07, "epoch": 1.4534562211981568, "percentage": 72.67, "elapsed_time": "8:45:08", "remaining_time": "3:17:28"} +{"current_steps": 6309, "total_steps": 8680, "loss": 0.887082576751709, "lr": 3.8128759485540995e-07, "epoch": 1.4536866359447005, "percentage": 72.68, "elapsed_time": "8:45:13", "remaining_time": "3:17:23"} +{"current_steps": 6310, "total_steps": 8680, "loss": 0.8491328954696655, "lr": 3.8098833217697193e-07, "epoch": 1.4539170506912442, "percentage": 72.7, "elapsed_time": "8:45:18", "remaining_time": "3:17:18"} +{"current_steps": 6311, "total_steps": 8680, "loss": 0.6749746799468994, "lr": 3.806891593472582e-07, "epoch": 1.454147465437788, "percentage": 72.71, "elapsed_time": "8:45:24", "remaining_time": "3:17:13"} +{"current_steps": 6312, "total_steps": 8680, "loss": 0.7607502937316895, "lr": 3.803900764096932e-07, "epoch": 1.4543778801843317, "percentage": 72.72, "elapsed_time": "8:45:30", "remaining_time": "3:17:08"} +{"current_steps": 6313, "total_steps": 8680, "loss": 0.6713626980781555, "lr": 3.8009108340768804e-07, "epoch": 1.4546082949308756, "percentage": 72.73, "elapsed_time": "8:45:36", "remaining_time": "3:17:04"} +{"current_steps": 6314, "total_steps": 8680, "loss": 0.7031810879707336, "lr": 3.797921803846419e-07, "epoch": 1.4548387096774194, "percentage": 72.74, "elapsed_time": "8:45:40", "remaining_time": "3:16:59"} +{"current_steps": 6315, "total_steps": 8680, "loss": 0.7233775854110718, "lr": 3.7949336738393955e-07, "epoch": 1.455069124423963, "percentage": 72.75, "elapsed_time": "8:45:46", "remaining_time": "3:16:54"} +{"current_steps": 6316, "total_steps": 8680, "loss": 0.7446990013122559, "lr": 3.791946444489532e-07, "epoch": 1.455299539170507, "percentage": 72.76, "elapsed_time": "8:45:51", "remaining_time": "3:16:49"} +{"current_steps": 6317, "total_steps": 8680, "loss": 0.731992244720459, "lr": 3.7889601162304273e-07, "epoch": 1.4555299539170508, "percentage": 72.78, "elapsed_time": "8:45:57", "remaining_time": "3:16:44"} +{"current_steps": 6318, "total_steps": 8680, "loss": 0.7167335152626038, "lr": 3.785974689495539e-07, "epoch": 1.4557603686635945, "percentage": 72.79, "elapsed_time": "8:46:03", "remaining_time": "3:16:40"} +{"current_steps": 6319, "total_steps": 8680, "loss": 0.7634297609329224, "lr": 3.7829901647181993e-07, "epoch": 1.4559907834101382, "percentage": 72.8, "elapsed_time": "8:46:10", "remaining_time": "3:16:35"} +{"current_steps": 6320, "total_steps": 8680, "loss": 0.7584050893783569, "lr": 3.7800065423316066e-07, "epoch": 1.456221198156682, "percentage": 72.81, "elapsed_time": "8:46:15", "remaining_time": "3:16:30"} +{"current_steps": 6321, "total_steps": 8680, "loss": 0.7150899171829224, "lr": 3.777023822768829e-07, "epoch": 1.456451612903226, "percentage": 72.82, "elapsed_time": "8:46:21", "remaining_time": "3:16:26"} +{"current_steps": 6322, "total_steps": 8680, "loss": 0.7821052670478821, "lr": 3.7740420064628034e-07, "epoch": 1.4566820276497696, "percentage": 72.83, "elapsed_time": "8:46:25", "remaining_time": "3:16:20"} +{"current_steps": 6323, "total_steps": 8680, "loss": 0.8678094148635864, "lr": 3.7710610938463405e-07, "epoch": 1.4569124423963133, "percentage": 72.85, "elapsed_time": "8:46:29", "remaining_time": "3:16:15"} +{"current_steps": 6324, "total_steps": 8680, "loss": 0.6953635215759277, "lr": 3.7680810853521107e-07, "epoch": 1.457142857142857, "percentage": 72.86, "elapsed_time": "8:46:34", "remaining_time": "3:16:10"} +{"current_steps": 6325, "total_steps": 8680, "loss": 0.765946626663208, "lr": 3.765101981412665e-07, "epoch": 1.4573732718894008, "percentage": 72.87, "elapsed_time": "8:46:38", "remaining_time": "3:16:05"} +{"current_steps": 6326, "total_steps": 8680, "loss": 0.8828680515289307, "lr": 3.7621237824604137e-07, "epoch": 1.4576036866359448, "percentage": 72.88, "elapsed_time": "8:46:43", "remaining_time": "3:16:00"} +{"current_steps": 6327, "total_steps": 8680, "loss": 0.8916178345680237, "lr": 3.7591464889276326e-07, "epoch": 1.4578341013824885, "percentage": 72.89, "elapsed_time": "8:46:47", "remaining_time": "3:15:54"} +{"current_steps": 6328, "total_steps": 8680, "loss": 0.7563039064407349, "lr": 3.756170101246481e-07, "epoch": 1.4580645161290322, "percentage": 72.9, "elapsed_time": "8:46:53", "remaining_time": "3:15:49"} +{"current_steps": 6329, "total_steps": 8680, "loss": 0.8548855781555176, "lr": 3.7531946198489725e-07, "epoch": 1.4582949308755762, "percentage": 72.91, "elapsed_time": "8:46:57", "remaining_time": "3:15:44"} +{"current_steps": 6330, "total_steps": 8680, "loss": 0.8337546586990356, "lr": 3.750220045166993e-07, "epoch": 1.4585253456221199, "percentage": 72.93, "elapsed_time": "8:47:03", "remaining_time": "3:15:40"} +{"current_steps": 6331, "total_steps": 8680, "loss": 0.8909939527511597, "lr": 3.7472463776323036e-07, "epoch": 1.4587557603686636, "percentage": 72.94, "elapsed_time": "8:47:07", "remaining_time": "3:15:34"} +{"current_steps": 6332, "total_steps": 8680, "loss": 0.629026472568512, "lr": 3.744273617676524e-07, "epoch": 1.4589861751152073, "percentage": 72.95, "elapsed_time": "8:47:14", "remaining_time": "3:15:30"} +{"current_steps": 6333, "total_steps": 8680, "loss": 0.7264849543571472, "lr": 3.7413017657311454e-07, "epoch": 1.459216589861751, "percentage": 72.96, "elapsed_time": "8:47:18", "remaining_time": "3:15:25"} +{"current_steps": 6334, "total_steps": 8680, "loss": 0.808081865310669, "lr": 3.738330822227532e-07, "epoch": 1.459447004608295, "percentage": 72.97, "elapsed_time": "8:47:23", "remaining_time": "3:15:20"} +{"current_steps": 6335, "total_steps": 8680, "loss": 0.6092932820320129, "lr": 3.7353607875969115e-07, "epoch": 1.4596774193548387, "percentage": 72.98, "elapsed_time": "8:47:29", "remaining_time": "3:15:15"} +{"current_steps": 6336, "total_steps": 8680, "loss": 0.8700584173202515, "lr": 3.7323916622703756e-07, "epoch": 1.4599078341013825, "percentage": 73.0, "elapsed_time": "8:47:33", "remaining_time": "3:15:10"} +{"current_steps": 6337, "total_steps": 8680, "loss": 0.8424433469772339, "lr": 3.7294234466788954e-07, "epoch": 1.4601382488479262, "percentage": 73.01, "elapsed_time": "8:47:37", "remaining_time": "3:15:04"} +{"current_steps": 6338, "total_steps": 8680, "loss": 0.8587443828582764, "lr": 3.7264561412533013e-07, "epoch": 1.46036866359447, "percentage": 73.02, "elapsed_time": "8:47:43", "remaining_time": "3:15:00"} +{"current_steps": 6339, "total_steps": 8680, "loss": 0.7708064913749695, "lr": 3.7234897464242934e-07, "epoch": 1.4605990783410139, "percentage": 73.03, "elapsed_time": "8:47:47", "remaining_time": "3:14:54"} +{"current_steps": 6340, "total_steps": 8680, "loss": 0.8226567506790161, "lr": 3.7205242626224395e-07, "epoch": 1.4608294930875576, "percentage": 73.04, "elapsed_time": "8:47:54", "remaining_time": "3:14:50"} +{"current_steps": 6341, "total_steps": 8680, "loss": 0.8414342403411865, "lr": 3.717559690278176e-07, "epoch": 1.4610599078341013, "percentage": 73.05, "elapsed_time": "8:47:58", "remaining_time": "3:14:45"} +{"current_steps": 6342, "total_steps": 8680, "loss": 0.765863299369812, "lr": 3.714596029821804e-07, "epoch": 1.4612903225806453, "percentage": 73.06, "elapsed_time": "8:48:02", "remaining_time": "3:14:39"} +{"current_steps": 6343, "total_steps": 8680, "loss": 0.7253202199935913, "lr": 3.7116332816834997e-07, "epoch": 1.461520737327189, "percentage": 73.08, "elapsed_time": "8:48:07", "remaining_time": "3:14:34"} +{"current_steps": 6344, "total_steps": 8680, "loss": 0.786415696144104, "lr": 3.7086714462933e-07, "epoch": 1.4617511520737327, "percentage": 73.09, "elapsed_time": "8:48:11", "remaining_time": "3:14:29"} +{"current_steps": 6345, "total_steps": 8680, "loss": 0.8382824659347534, "lr": 3.705710524081108e-07, "epoch": 1.4619815668202765, "percentage": 73.1, "elapsed_time": "8:48:18", "remaining_time": "3:14:25"} +{"current_steps": 6346, "total_steps": 8680, "loss": 0.7953319549560547, "lr": 3.702750515476705e-07, "epoch": 1.4622119815668202, "percentage": 73.11, "elapsed_time": "8:48:22", "remaining_time": "3:14:19"} +{"current_steps": 6347, "total_steps": 8680, "loss": 0.7897430658340454, "lr": 3.699791420909727e-07, "epoch": 1.4624423963133641, "percentage": 73.12, "elapsed_time": "8:48:26", "remaining_time": "3:14:14"} +{"current_steps": 6348, "total_steps": 8680, "loss": 0.7276254892349243, "lr": 3.6968332408096804e-07, "epoch": 1.4626728110599079, "percentage": 73.13, "elapsed_time": "8:48:32", "remaining_time": "3:14:09"} +{"current_steps": 6349, "total_steps": 8680, "loss": 0.7318450212478638, "lr": 3.693875975605949e-07, "epoch": 1.4629032258064516, "percentage": 73.15, "elapsed_time": "8:48:37", "remaining_time": "3:14:05"} +{"current_steps": 6350, "total_steps": 8680, "loss": 0.8438090085983276, "lr": 3.6909196257277676e-07, "epoch": 1.4631336405529953, "percentage": 73.16, "elapsed_time": "8:48:41", "remaining_time": "3:13:59"} +{"current_steps": 6351, "total_steps": 8680, "loss": 0.7977915406227112, "lr": 3.6879641916042534e-07, "epoch": 1.463364055299539, "percentage": 73.17, "elapsed_time": "8:48:47", "remaining_time": "3:13:54"} +{"current_steps": 6352, "total_steps": 8680, "loss": 0.8845348358154297, "lr": 3.685009673664382e-07, "epoch": 1.463594470046083, "percentage": 73.18, "elapsed_time": "8:48:50", "remaining_time": "3:13:49"} +{"current_steps": 6353, "total_steps": 8680, "loss": 0.8971320986747742, "lr": 3.682056072336992e-07, "epoch": 1.4638248847926267, "percentage": 73.19, "elapsed_time": "8:48:54", "remaining_time": "3:13:43"} +{"current_steps": 6354, "total_steps": 8680, "loss": 0.7015302181243896, "lr": 3.679103388050803e-07, "epoch": 1.4640552995391705, "percentage": 73.2, "elapsed_time": "8:49:01", "remaining_time": "3:13:39"} +{"current_steps": 6355, "total_steps": 8680, "loss": 0.5953146815299988, "lr": 3.676151621234389e-07, "epoch": 1.4642857142857144, "percentage": 73.21, "elapsed_time": "8:49:07", "remaining_time": "3:13:34"} +{"current_steps": 6356, "total_steps": 8680, "loss": 0.5794636011123657, "lr": 3.673200772316193e-07, "epoch": 1.4645161290322581, "percentage": 73.23, "elapsed_time": "8:49:12", "remaining_time": "3:13:29"} +{"current_steps": 6357, "total_steps": 8680, "loss": 0.8272292017936707, "lr": 3.6702508417245324e-07, "epoch": 1.4647465437788019, "percentage": 73.24, "elapsed_time": "8:49:17", "remaining_time": "3:13:24"} +{"current_steps": 6358, "total_steps": 8680, "loss": 0.7239755392074585, "lr": 3.6673018298875826e-07, "epoch": 1.4649769585253456, "percentage": 73.25, "elapsed_time": "8:49:22", "remaining_time": "3:13:19"} +{"current_steps": 6359, "total_steps": 8680, "loss": 0.8597465753555298, "lr": 3.6643537372333886e-07, "epoch": 1.4652073732718893, "percentage": 73.26, "elapsed_time": "8:49:28", "remaining_time": "3:13:15"} +{"current_steps": 6360, "total_steps": 8680, "loss": 0.7540475130081177, "lr": 3.661406564189862e-07, "epoch": 1.4654377880184333, "percentage": 73.27, "elapsed_time": "8:49:32", "remaining_time": "3:13:10"} +{"current_steps": 6361, "total_steps": 8680, "loss": 0.793259859085083, "lr": 3.658460311184782e-07, "epoch": 1.465668202764977, "percentage": 73.28, "elapsed_time": "8:49:38", "remaining_time": "3:13:05"} +{"current_steps": 6362, "total_steps": 8680, "loss": 0.797966718673706, "lr": 3.6555149786457883e-07, "epoch": 1.4658986175115207, "percentage": 73.29, "elapsed_time": "8:49:43", "remaining_time": "3:13:00"} +{"current_steps": 6363, "total_steps": 8680, "loss": 0.7466796636581421, "lr": 3.6525705670004016e-07, "epoch": 1.4661290322580645, "percentage": 73.31, "elapsed_time": "8:49:48", "remaining_time": "3:12:55"} +{"current_steps": 6364, "total_steps": 8680, "loss": 0.7694044709205627, "lr": 3.6496270766759927e-07, "epoch": 1.4663594470046082, "percentage": 73.32, "elapsed_time": "8:49:53", "remaining_time": "3:12:50"} +{"current_steps": 6365, "total_steps": 8680, "loss": 0.7701553106307983, "lr": 3.6466845080998043e-07, "epoch": 1.4665898617511521, "percentage": 73.33, "elapsed_time": "8:49:58", "remaining_time": "3:12:45"} +{"current_steps": 6366, "total_steps": 8680, "loss": 0.6718326807022095, "lr": 3.643742861698952e-07, "epoch": 1.4668202764976959, "percentage": 73.34, "elapsed_time": "8:50:02", "remaining_time": "3:12:40"} +{"current_steps": 6367, "total_steps": 8680, "loss": 0.7099052667617798, "lr": 3.6408021379004086e-07, "epoch": 1.4670506912442396, "percentage": 73.35, "elapsed_time": "8:50:08", "remaining_time": "3:12:35"} +{"current_steps": 6368, "total_steps": 8680, "loss": 0.8650654554367065, "lr": 3.6378623371310126e-07, "epoch": 1.4672811059907835, "percentage": 73.36, "elapsed_time": "8:50:13", "remaining_time": "3:12:30"} +{"current_steps": 6369, "total_steps": 8680, "loss": 0.7920950055122375, "lr": 3.6349234598174794e-07, "epoch": 1.4675115207373273, "percentage": 73.38, "elapsed_time": "8:50:18", "remaining_time": "3:12:25"} +{"current_steps": 6370, "total_steps": 8680, "loss": 0.7927969098091125, "lr": 3.63198550638638e-07, "epoch": 1.467741935483871, "percentage": 73.39, "elapsed_time": "8:50:23", "remaining_time": "3:12:20"} +{"current_steps": 6371, "total_steps": 8680, "loss": 0.9403868913650513, "lr": 3.6290484772641514e-07, "epoch": 1.4679723502304147, "percentage": 73.4, "elapsed_time": "8:50:28", "remaining_time": "3:12:15"} +{"current_steps": 6372, "total_steps": 8680, "loss": 0.9157334566116333, "lr": 3.626112372877106e-07, "epoch": 1.4682027649769585, "percentage": 73.41, "elapsed_time": "8:50:33", "remaining_time": "3:12:10"} +{"current_steps": 6373, "total_steps": 8680, "loss": 0.7742066979408264, "lr": 3.6231771936514067e-07, "epoch": 1.4684331797235024, "percentage": 73.42, "elapsed_time": "8:50:38", "remaining_time": "3:12:05"} +{"current_steps": 6374, "total_steps": 8680, "loss": 0.69399094581604, "lr": 3.6202429400131006e-07, "epoch": 1.4686635944700461, "percentage": 73.43, "elapsed_time": "8:50:44", "remaining_time": "3:12:00"} +{"current_steps": 6375, "total_steps": 8680, "loss": 0.874832272529602, "lr": 3.6173096123880854e-07, "epoch": 1.4688940092165899, "percentage": 73.44, "elapsed_time": "8:50:49", "remaining_time": "3:11:55"} +{"current_steps": 6376, "total_steps": 8680, "loss": 0.6685272455215454, "lr": 3.6143772112021275e-07, "epoch": 1.4691244239631336, "percentage": 73.46, "elapsed_time": "8:50:56", "remaining_time": "3:11:51"} +{"current_steps": 6377, "total_steps": 8680, "loss": 0.7422738671302795, "lr": 3.611445736880867e-07, "epoch": 1.4693548387096773, "percentage": 73.47, "elapsed_time": "8:51:02", "remaining_time": "3:11:46"} +{"current_steps": 6378, "total_steps": 8680, "loss": 0.8208622932434082, "lr": 3.6085151898498e-07, "epoch": 1.4695852534562213, "percentage": 73.48, "elapsed_time": "8:51:07", "remaining_time": "3:11:41"} +{"current_steps": 6379, "total_steps": 8680, "loss": 0.8001033663749695, "lr": 3.605585570534293e-07, "epoch": 1.469815668202765, "percentage": 73.49, "elapsed_time": "8:51:11", "remaining_time": "3:11:36"} +{"current_steps": 6380, "total_steps": 8680, "loss": 0.789332926273346, "lr": 3.6026568793595744e-07, "epoch": 1.4700460829493087, "percentage": 73.5, "elapsed_time": "8:51:16", "remaining_time": "3:11:31"} +{"current_steps": 6381, "total_steps": 8680, "loss": 0.8071820139884949, "lr": 3.599729116750742e-07, "epoch": 1.4702764976958527, "percentage": 73.51, "elapsed_time": "8:51:21", "remaining_time": "3:11:26"} +{"current_steps": 6382, "total_steps": 8680, "loss": 0.8028534054756165, "lr": 3.5968022831327506e-07, "epoch": 1.4705069124423962, "percentage": 73.53, "elapsed_time": "8:51:26", "remaining_time": "3:11:21"} +{"current_steps": 6383, "total_steps": 8680, "loss": 0.6888329982757568, "lr": 3.593876378930435e-07, "epoch": 1.4707373271889401, "percentage": 73.54, "elapsed_time": "8:51:30", "remaining_time": "3:11:16"} +{"current_steps": 6384, "total_steps": 8680, "loss": 0.8176132440567017, "lr": 3.590951404568483e-07, "epoch": 1.4709677419354839, "percentage": 73.55, "elapsed_time": "8:51:35", "remaining_time": "3:11:11"} +{"current_steps": 6385, "total_steps": 8680, "loss": 0.6715027689933777, "lr": 3.588027360471446e-07, "epoch": 1.4711981566820276, "percentage": 73.56, "elapsed_time": "8:51:41", "remaining_time": "3:11:06"} +{"current_steps": 6386, "total_steps": 8680, "loss": 0.8622937798500061, "lr": 3.585104247063753e-07, "epoch": 1.4714285714285715, "percentage": 73.57, "elapsed_time": "8:51:47", "remaining_time": "3:11:01"} +{"current_steps": 6387, "total_steps": 8680, "loss": 0.7244299650192261, "lr": 3.5821820647696864e-07, "epoch": 1.4716589861751153, "percentage": 73.58, "elapsed_time": "8:51:51", "remaining_time": "3:10:56"} +{"current_steps": 6388, "total_steps": 8680, "loss": 0.8130464553833008, "lr": 3.579260814013393e-07, "epoch": 1.471889400921659, "percentage": 73.59, "elapsed_time": "8:51:55", "remaining_time": "3:10:51"} +{"current_steps": 6389, "total_steps": 8680, "loss": 0.8563692569732666, "lr": 3.576340495218897e-07, "epoch": 1.4721198156682027, "percentage": 73.61, "elapsed_time": "8:52:02", "remaining_time": "3:10:46"} +{"current_steps": 6390, "total_steps": 8680, "loss": 0.8315908908843994, "lr": 3.573421108810073e-07, "epoch": 1.4723502304147464, "percentage": 73.62, "elapsed_time": "8:52:06", "remaining_time": "3:10:41"} +{"current_steps": 6391, "total_steps": 8680, "loss": 0.653038740158081, "lr": 3.5705026552106645e-07, "epoch": 1.4725806451612904, "percentage": 73.63, "elapsed_time": "8:52:12", "remaining_time": "3:10:36"} +{"current_steps": 6392, "total_steps": 8680, "loss": 0.7511966228485107, "lr": 3.5675851348442876e-07, "epoch": 1.4728110599078341, "percentage": 73.64, "elapsed_time": "8:52:17", "remaining_time": "3:10:32"} +{"current_steps": 6393, "total_steps": 8680, "loss": 0.8675990104675293, "lr": 3.564668548134413e-07, "epoch": 1.4730414746543778, "percentage": 73.65, "elapsed_time": "8:52:23", "remaining_time": "3:10:27"} +{"current_steps": 6394, "total_steps": 8680, "loss": 0.7574094533920288, "lr": 3.5617528955043765e-07, "epoch": 1.4732718894009218, "percentage": 73.66, "elapsed_time": "8:52:28", "remaining_time": "3:10:22"} +{"current_steps": 6395, "total_steps": 8680, "loss": 0.7004787921905518, "lr": 3.5588381773773866e-07, "epoch": 1.4735023041474653, "percentage": 73.68, "elapsed_time": "8:52:32", "remaining_time": "3:10:16"} +{"current_steps": 6396, "total_steps": 8680, "loss": 0.680101215839386, "lr": 3.555924394176508e-07, "epoch": 1.4737327188940093, "percentage": 73.69, "elapsed_time": "8:52:37", "remaining_time": "3:10:12"} +{"current_steps": 6397, "total_steps": 8680, "loss": 0.8340710401535034, "lr": 3.55301154632467e-07, "epoch": 1.473963133640553, "percentage": 73.7, "elapsed_time": "8:52:42", "remaining_time": "3:10:06"} +{"current_steps": 6398, "total_steps": 8680, "loss": 0.8307079076766968, "lr": 3.5500996342446756e-07, "epoch": 1.4741935483870967, "percentage": 73.71, "elapsed_time": "8:52:47", "remaining_time": "3:10:02"} +{"current_steps": 6399, "total_steps": 8680, "loss": 0.9614958167076111, "lr": 3.547188658359179e-07, "epoch": 1.4744239631336407, "percentage": 73.72, "elapsed_time": "8:52:51", "remaining_time": "3:09:56"} +{"current_steps": 6400, "total_steps": 8680, "loss": 0.782494068145752, "lr": 3.544278619090707e-07, "epoch": 1.4746543778801844, "percentage": 73.73, "elapsed_time": "8:52:55", "remaining_time": "3:09:51"} +{"current_steps": 6401, "total_steps": 8680, "loss": 0.7474460601806641, "lr": 3.5413695168616474e-07, "epoch": 1.4748847926267281, "percentage": 73.74, "elapsed_time": "8:53:03", "remaining_time": "3:09:47"} +{"current_steps": 6402, "total_steps": 8680, "loss": 0.7182635068893433, "lr": 3.5384613520942484e-07, "epoch": 1.4751152073732718, "percentage": 73.76, "elapsed_time": "8:53:07", "remaining_time": "3:09:42"} +{"current_steps": 6403, "total_steps": 8680, "loss": 0.8116436004638672, "lr": 3.5355541252106336e-07, "epoch": 1.4753456221198156, "percentage": 73.77, "elapsed_time": "8:53:11", "remaining_time": "3:09:36"} +{"current_steps": 6404, "total_steps": 8680, "loss": 0.8007283210754395, "lr": 3.5326478366327806e-07, "epoch": 1.4755760368663595, "percentage": 73.78, "elapsed_time": "8:53:18", "remaining_time": "3:09:32"} +{"current_steps": 6405, "total_steps": 8680, "loss": 0.7707732915878296, "lr": 3.5297424867825276e-07, "epoch": 1.4758064516129032, "percentage": 73.79, "elapsed_time": "8:53:21", "remaining_time": "3:09:26"} +{"current_steps": 6406, "total_steps": 8680, "loss": 0.8031977415084839, "lr": 3.5268380760815917e-07, "epoch": 1.476036866359447, "percentage": 73.8, "elapsed_time": "8:53:26", "remaining_time": "3:09:21"} +{"current_steps": 6407, "total_steps": 8680, "loss": 0.7113008499145508, "lr": 3.5239346049515397e-07, "epoch": 1.4762672811059907, "percentage": 73.81, "elapsed_time": "8:53:32", "remaining_time": "3:09:16"} +{"current_steps": 6408, "total_steps": 8680, "loss": 0.8069616556167603, "lr": 3.521032073813802e-07, "epoch": 1.4764976958525344, "percentage": 73.82, "elapsed_time": "8:53:37", "remaining_time": "3:09:12"} +{"current_steps": 6409, "total_steps": 8680, "loss": 0.9780417680740356, "lr": 3.518130483089686e-07, "epoch": 1.4767281105990784, "percentage": 73.84, "elapsed_time": "8:53:41", "remaining_time": "3:09:06"} +{"current_steps": 6410, "total_steps": 8680, "loss": 0.765299379825592, "lr": 3.515229833200351e-07, "epoch": 1.476958525345622, "percentage": 73.85, "elapsed_time": "8:53:47", "remaining_time": "3:09:02"} +{"current_steps": 6411, "total_steps": 8680, "loss": 0.7279179096221924, "lr": 3.512330124566816e-07, "epoch": 1.4771889400921658, "percentage": 73.86, "elapsed_time": "8:53:54", "remaining_time": "3:08:57"} +{"current_steps": 6412, "total_steps": 8680, "loss": 0.8429825901985168, "lr": 3.509431357609978e-07, "epoch": 1.4774193548387098, "percentage": 73.87, "elapsed_time": "8:53:59", "remaining_time": "3:08:52"} +{"current_steps": 6413, "total_steps": 8680, "loss": 0.741936206817627, "lr": 3.506533532750586e-07, "epoch": 1.4776497695852535, "percentage": 73.88, "elapsed_time": "8:54:05", "remaining_time": "3:08:48"} +{"current_steps": 6414, "total_steps": 8680, "loss": 0.6841387748718262, "lr": 3.5036366504092527e-07, "epoch": 1.4778801843317972, "percentage": 73.89, "elapsed_time": "8:54:10", "remaining_time": "3:08:43"} +{"current_steps": 6415, "total_steps": 8680, "loss": 0.7136961221694946, "lr": 3.5007407110064626e-07, "epoch": 1.478110599078341, "percentage": 73.91, "elapsed_time": "8:54:16", "remaining_time": "3:08:38"} +{"current_steps": 6416, "total_steps": 8680, "loss": 0.8483344912528992, "lr": 3.497845714962554e-07, "epoch": 1.4783410138248847, "percentage": 73.92, "elapsed_time": "8:54:21", "remaining_time": "3:08:33"} +{"current_steps": 6417, "total_steps": 8680, "loss": 0.7060235738754272, "lr": 3.4949516626977294e-07, "epoch": 1.4785714285714286, "percentage": 73.93, "elapsed_time": "8:54:26", "remaining_time": "3:08:28"} +{"current_steps": 6418, "total_steps": 8680, "loss": 0.7351587414741516, "lr": 3.4920585546320625e-07, "epoch": 1.4788018433179724, "percentage": 73.94, "elapsed_time": "8:54:30", "remaining_time": "3:08:23"} +{"current_steps": 6419, "total_steps": 8680, "loss": 0.7445269823074341, "lr": 3.489166391185482e-07, "epoch": 1.479032258064516, "percentage": 73.95, "elapsed_time": "8:54:35", "remaining_time": "3:08:18"} +{"current_steps": 6420, "total_steps": 8680, "loss": 0.795128583908081, "lr": 3.4862751727777796e-07, "epoch": 1.4792626728110598, "percentage": 73.96, "elapsed_time": "8:54:39", "remaining_time": "3:08:12"} +{"current_steps": 6421, "total_steps": 8680, "loss": 0.7916193008422852, "lr": 3.4833848998286133e-07, "epoch": 1.4794930875576036, "percentage": 73.97, "elapsed_time": "8:54:44", "remaining_time": "3:08:07"} +{"current_steps": 6422, "total_steps": 8680, "loss": 0.8279474973678589, "lr": 3.480495572757497e-07, "epoch": 1.4797235023041475, "percentage": 73.99, "elapsed_time": "8:54:49", "remaining_time": "3:08:02"} +{"current_steps": 6423, "total_steps": 8680, "loss": 0.9339898824691772, "lr": 3.477607191983822e-07, "epoch": 1.4799539170506912, "percentage": 74.0, "elapsed_time": "8:54:56", "remaining_time": "3:07:58"} +{"current_steps": 6424, "total_steps": 8680, "loss": 0.8579660654067993, "lr": 3.4747197579268296e-07, "epoch": 1.480184331797235, "percentage": 74.01, "elapsed_time": "8:55:00", "remaining_time": "3:07:53"} +{"current_steps": 6425, "total_steps": 8680, "loss": 0.7637878060340881, "lr": 3.471833271005622e-07, "epoch": 1.480414746543779, "percentage": 74.02, "elapsed_time": "8:55:07", "remaining_time": "3:07:48"} +{"current_steps": 6426, "total_steps": 8680, "loss": 0.8600465059280396, "lr": 3.4689477316391756e-07, "epoch": 1.4806451612903226, "percentage": 74.03, "elapsed_time": "8:55:12", "remaining_time": "3:07:43"} +{"current_steps": 6427, "total_steps": 8680, "loss": 0.6576759815216064, "lr": 3.46606314024632e-07, "epoch": 1.4808755760368664, "percentage": 74.04, "elapsed_time": "8:55:17", "remaining_time": "3:07:38"} +{"current_steps": 6428, "total_steps": 8680, "loss": 0.7556706666946411, "lr": 3.463179497245747e-07, "epoch": 1.48110599078341, "percentage": 74.06, "elapsed_time": "8:55:24", "remaining_time": "3:07:34"} +{"current_steps": 6429, "total_steps": 8680, "loss": 0.8826701641082764, "lr": 3.4602968030560196e-07, "epoch": 1.4813364055299538, "percentage": 74.07, "elapsed_time": "8:55:28", "remaining_time": "3:07:29"} +{"current_steps": 6430, "total_steps": 8680, "loss": 0.7352213263511658, "lr": 3.457415058095554e-07, "epoch": 1.4815668202764978, "percentage": 74.08, "elapsed_time": "8:55:34", "remaining_time": "3:07:24"} +{"current_steps": 6431, "total_steps": 8680, "loss": 0.8108851909637451, "lr": 3.454534262782628e-07, "epoch": 1.4817972350230415, "percentage": 74.09, "elapsed_time": "8:55:40", "remaining_time": "3:07:19"} +{"current_steps": 6432, "total_steps": 8680, "loss": 0.8595583438873291, "lr": 3.4516544175353914e-07, "epoch": 1.4820276497695852, "percentage": 74.1, "elapsed_time": "8:55:44", "remaining_time": "3:07:14"} +{"current_steps": 6433, "total_steps": 8680, "loss": 0.7194280028343201, "lr": 3.448775522771847e-07, "epoch": 1.482258064516129, "percentage": 74.11, "elapsed_time": "8:55:50", "remaining_time": "3:07:09"} +{"current_steps": 6434, "total_steps": 8680, "loss": 0.8966056108474731, "lr": 3.445897578909861e-07, "epoch": 1.4824884792626727, "percentage": 74.12, "elapsed_time": "8:55:55", "remaining_time": "3:07:04"} +{"current_steps": 6435, "total_steps": 8680, "loss": 0.8089771270751953, "lr": 3.443020586367167e-07, "epoch": 1.4827188940092166, "percentage": 74.14, "elapsed_time": "8:56:00", "remaining_time": "3:07:00"} +{"current_steps": 6436, "total_steps": 8680, "loss": 0.7835644483566284, "lr": 3.4401445455613555e-07, "epoch": 1.4829493087557604, "percentage": 74.15, "elapsed_time": "8:56:06", "remaining_time": "3:06:55"} +{"current_steps": 6437, "total_steps": 8680, "loss": 0.7285257577896118, "lr": 3.4372694569098746e-07, "epoch": 1.483179723502304, "percentage": 74.16, "elapsed_time": "8:56:10", "remaining_time": "3:06:50"} +{"current_steps": 6438, "total_steps": 8680, "loss": 0.9108592867851257, "lr": 3.434395320830048e-07, "epoch": 1.483410138248848, "percentage": 74.17, "elapsed_time": "8:56:15", "remaining_time": "3:06:44"} +{"current_steps": 6439, "total_steps": 8680, "loss": 0.7154395580291748, "lr": 3.431522137739049e-07, "epoch": 1.4836405529953918, "percentage": 74.18, "elapsed_time": "8:56:19", "remaining_time": "3:06:39"} +{"current_steps": 6440, "total_steps": 8680, "loss": 0.6483602523803711, "lr": 3.428649908053917e-07, "epoch": 1.4838709677419355, "percentage": 74.19, "elapsed_time": "8:56:24", "remaining_time": "3:06:34"} +{"current_steps": 6441, "total_steps": 8680, "loss": 0.8090662956237793, "lr": 3.425778632191551e-07, "epoch": 1.4841013824884792, "percentage": 74.21, "elapsed_time": "8:56:30", "remaining_time": "3:06:29"} +{"current_steps": 6442, "total_steps": 8680, "loss": 0.7884642481803894, "lr": 3.422908310568712e-07, "epoch": 1.484331797235023, "percentage": 74.22, "elapsed_time": "8:56:36", "remaining_time": "3:06:25"} +{"current_steps": 6443, "total_steps": 8680, "loss": 0.8628194332122803, "lr": 3.4200389436020225e-07, "epoch": 1.484562211981567, "percentage": 74.23, "elapsed_time": "8:56:41", "remaining_time": "3:06:20"} +{"current_steps": 6444, "total_steps": 8680, "loss": 0.8192269802093506, "lr": 3.4171705317079723e-07, "epoch": 1.4847926267281106, "percentage": 74.24, "elapsed_time": "8:56:45", "remaining_time": "3:06:14"} +{"current_steps": 6445, "total_steps": 8680, "loss": 0.7768012285232544, "lr": 3.4143030753029054e-07, "epoch": 1.4850230414746544, "percentage": 74.25, "elapsed_time": "8:56:51", "remaining_time": "3:06:10"} +{"current_steps": 6446, "total_steps": 8680, "loss": 0.7420791387557983, "lr": 3.411436574803026e-07, "epoch": 1.485253456221198, "percentage": 74.26, "elapsed_time": "8:56:56", "remaining_time": "3:06:05"} +{"current_steps": 6447, "total_steps": 8680, "loss": 0.823938250541687, "lr": 3.4085710306244086e-07, "epoch": 1.4854838709677418, "percentage": 74.27, "elapsed_time": "8:57:01", "remaining_time": "3:06:00"} +{"current_steps": 6448, "total_steps": 8680, "loss": 0.7215089201927185, "lr": 3.405706443182976e-07, "epoch": 1.4857142857142858, "percentage": 74.29, "elapsed_time": "8:57:06", "remaining_time": "3:05:55"} +{"current_steps": 6449, "total_steps": 8680, "loss": 0.8301436901092529, "lr": 3.4028428128945286e-07, "epoch": 1.4859447004608295, "percentage": 74.3, "elapsed_time": "8:57:12", "remaining_time": "3:05:50"} +{"current_steps": 6450, "total_steps": 8680, "loss": 0.6727990508079529, "lr": 3.399980140174712e-07, "epoch": 1.4861751152073732, "percentage": 74.31, "elapsed_time": "8:57:18", "remaining_time": "3:05:45"} +{"current_steps": 6451, "total_steps": 8680, "loss": 0.8364754319190979, "lr": 3.397118425439038e-07, "epoch": 1.4864055299539172, "percentage": 74.32, "elapsed_time": "8:57:23", "remaining_time": "3:05:41"} +{"current_steps": 6452, "total_steps": 8680, "loss": 0.7241604328155518, "lr": 3.394257669102887e-07, "epoch": 1.486635944700461, "percentage": 74.33, "elapsed_time": "8:57:29", "remaining_time": "3:05:36"} +{"current_steps": 6453, "total_steps": 8680, "loss": 0.7762489914894104, "lr": 3.3913978715814897e-07, "epoch": 1.4868663594470046, "percentage": 74.34, "elapsed_time": "8:57:34", "remaining_time": "3:05:31"} +{"current_steps": 6454, "total_steps": 8680, "loss": 0.9278200268745422, "lr": 3.38853903328994e-07, "epoch": 1.4870967741935484, "percentage": 74.35, "elapsed_time": "8:57:38", "remaining_time": "3:05:25"} +{"current_steps": 6455, "total_steps": 8680, "loss": 0.693070113658905, "lr": 3.3856811546431994e-07, "epoch": 1.487327188940092, "percentage": 74.37, "elapsed_time": "8:57:43", "remaining_time": "3:05:21"} +{"current_steps": 6456, "total_steps": 8680, "loss": 0.8541949987411499, "lr": 3.382824236056084e-07, "epoch": 1.487557603686636, "percentage": 74.38, "elapsed_time": "8:57:48", "remaining_time": "3:05:15"} +{"current_steps": 6457, "total_steps": 8680, "loss": 0.7638850212097168, "lr": 3.379968277943267e-07, "epoch": 1.4877880184331798, "percentage": 74.39, "elapsed_time": "8:57:52", "remaining_time": "3:05:10"} +{"current_steps": 6458, "total_steps": 8680, "loss": 0.8240739107131958, "lr": 3.377113280719295e-07, "epoch": 1.4880184331797235, "percentage": 74.4, "elapsed_time": "8:57:57", "remaining_time": "3:05:05"} +{"current_steps": 6459, "total_steps": 8680, "loss": 0.7360633015632629, "lr": 3.374259244798562e-07, "epoch": 1.4882488479262672, "percentage": 74.41, "elapsed_time": "8:58:01", "remaining_time": "3:05:00"} +{"current_steps": 6460, "total_steps": 8680, "loss": 0.8626362085342407, "lr": 3.371406170595328e-07, "epoch": 1.488479262672811, "percentage": 74.42, "elapsed_time": "8:58:07", "remaining_time": "3:04:55"} +{"current_steps": 6461, "total_steps": 8680, "loss": 0.8499895334243774, "lr": 3.368554058523713e-07, "epoch": 1.488709677419355, "percentage": 74.44, "elapsed_time": "8:58:12", "remaining_time": "3:04:50"} +{"current_steps": 6462, "total_steps": 8680, "loss": 0.8335039615631104, "lr": 3.3657029089976985e-07, "epoch": 1.4889400921658986, "percentage": 74.45, "elapsed_time": "8:58:17", "remaining_time": "3:04:45"} +{"current_steps": 6463, "total_steps": 8680, "loss": 0.8154790997505188, "lr": 3.3628527224311196e-07, "epoch": 1.4891705069124423, "percentage": 74.46, "elapsed_time": "8:58:21", "remaining_time": "3:04:40"} +{"current_steps": 6464, "total_steps": 8680, "loss": 0.7952951192855835, "lr": 3.3600034992376856e-07, "epoch": 1.4894009216589863, "percentage": 74.47, "elapsed_time": "8:58:28", "remaining_time": "3:04:35"} +{"current_steps": 6465, "total_steps": 8680, "loss": 0.7227598428726196, "lr": 3.3571552398309535e-07, "epoch": 1.48963133640553, "percentage": 74.48, "elapsed_time": "8:58:33", "remaining_time": "3:04:30"} +{"current_steps": 6466, "total_steps": 8680, "loss": 0.6703250408172607, "lr": 3.3543079446243404e-07, "epoch": 1.4898617511520738, "percentage": 74.49, "elapsed_time": "8:58:39", "remaining_time": "3:04:26"} +{"current_steps": 6467, "total_steps": 8680, "loss": 0.7468122243881226, "lr": 3.351461614031136e-07, "epoch": 1.4900921658986175, "percentage": 74.5, "elapsed_time": "8:58:45", "remaining_time": "3:04:21"} +{"current_steps": 6468, "total_steps": 8680, "loss": 0.8649178743362427, "lr": 3.348616248464475e-07, "epoch": 1.4903225806451612, "percentage": 74.52, "elapsed_time": "8:58:49", "remaining_time": "3:04:16"} +{"current_steps": 6469, "total_steps": 8680, "loss": 0.8229554295539856, "lr": 3.345771848337359e-07, "epoch": 1.4905529953917052, "percentage": 74.53, "elapsed_time": "8:58:56", "remaining_time": "3:04:12"} +{"current_steps": 6470, "total_steps": 8680, "loss": 0.7275597453117371, "lr": 3.342928414062652e-07, "epoch": 1.4907834101382489, "percentage": 74.54, "elapsed_time": "8:59:01", "remaining_time": "3:04:07"} +{"current_steps": 6471, "total_steps": 8680, "loss": 0.657899796962738, "lr": 3.3400859460530737e-07, "epoch": 1.4910138248847926, "percentage": 74.55, "elapsed_time": "8:59:07", "remaining_time": "3:04:02"} +{"current_steps": 6472, "total_steps": 8680, "loss": 0.7579425573348999, "lr": 3.3372444447212e-07, "epoch": 1.4912442396313363, "percentage": 74.56, "elapsed_time": "8:59:12", "remaining_time": "3:03:57"} +{"current_steps": 6473, "total_steps": 8680, "loss": 0.8707751631736755, "lr": 3.334403910479479e-07, "epoch": 1.49147465437788, "percentage": 74.57, "elapsed_time": "8:59:17", "remaining_time": "3:03:52"} +{"current_steps": 6474, "total_steps": 8680, "loss": 0.7923752665519714, "lr": 3.331564343740201e-07, "epoch": 1.491705069124424, "percentage": 74.59, "elapsed_time": "8:59:20", "remaining_time": "3:03:46"} +{"current_steps": 6475, "total_steps": 8680, "loss": 0.8308948278427124, "lr": 3.328725744915536e-07, "epoch": 1.4919354838709677, "percentage": 74.6, "elapsed_time": "8:59:25", "remaining_time": "3:03:41"} +{"current_steps": 6476, "total_steps": 8680, "loss": 0.8984559774398804, "lr": 3.3258881144174967e-07, "epoch": 1.4921658986175115, "percentage": 74.61, "elapsed_time": "8:59:30", "remaining_time": "3:03:36"} +{"current_steps": 6477, "total_steps": 8680, "loss": 0.9279792308807373, "lr": 3.3230514526579614e-07, "epoch": 1.4923963133640554, "percentage": 74.62, "elapsed_time": "8:59:36", "remaining_time": "3:03:32"} +{"current_steps": 6478, "total_steps": 8680, "loss": 0.7934520244598389, "lr": 3.3202157600486655e-07, "epoch": 1.4926267281105992, "percentage": 74.63, "elapsed_time": "8:59:42", "remaining_time": "3:03:27"} +{"current_steps": 6479, "total_steps": 8680, "loss": 0.8463613390922546, "lr": 3.3173810370012136e-07, "epoch": 1.4928571428571429, "percentage": 74.64, "elapsed_time": "8:59:47", "remaining_time": "3:03:22"} +{"current_steps": 6480, "total_steps": 8680, "loss": 0.8087350130081177, "lr": 3.314547283927057e-07, "epoch": 1.4930875576036866, "percentage": 74.65, "elapsed_time": "8:59:51", "remaining_time": "3:03:17"} +{"current_steps": 6481, "total_steps": 8680, "loss": 0.7711254358291626, "lr": 3.3117145012375113e-07, "epoch": 1.4933179723502303, "percentage": 74.67, "elapsed_time": "8:59:57", "remaining_time": "3:03:12"} +{"current_steps": 6482, "total_steps": 8680, "loss": 0.7140679359436035, "lr": 3.3088826893437526e-07, "epoch": 1.4935483870967743, "percentage": 74.68, "elapsed_time": "9:00:01", "remaining_time": "3:03:07"} +{"current_steps": 6483, "total_steps": 8680, "loss": 0.7074463367462158, "lr": 3.3060518486568103e-07, "epoch": 1.493778801843318, "percentage": 74.69, "elapsed_time": "9:00:06", "remaining_time": "3:03:02"} +{"current_steps": 6484, "total_steps": 8680, "loss": 0.7560559511184692, "lr": 3.3032219795875827e-07, "epoch": 1.4940092165898617, "percentage": 74.7, "elapsed_time": "9:00:13", "remaining_time": "3:02:57"} +{"current_steps": 6485, "total_steps": 8680, "loss": 0.7699435353279114, "lr": 3.3003930825468194e-07, "epoch": 1.4942396313364055, "percentage": 74.71, "elapsed_time": "9:00:19", "remaining_time": "3:02:53"} +{"current_steps": 6486, "total_steps": 8680, "loss": 0.817488431930542, "lr": 3.297565157945129e-07, "epoch": 1.4944700460829492, "percentage": 74.72, "elapsed_time": "9:00:23", "remaining_time": "3:02:47"} +{"current_steps": 6487, "total_steps": 8680, "loss": 0.7534141540527344, "lr": 3.294738206192985e-07, "epoch": 1.4947004608294931, "percentage": 74.74, "elapsed_time": "9:00:29", "remaining_time": "3:02:42"} +{"current_steps": 6488, "total_steps": 8680, "loss": 0.7423536777496338, "lr": 3.291912227700715e-07, "epoch": 1.4949308755760369, "percentage": 74.75, "elapsed_time": "9:00:32", "remaining_time": "3:02:37"} +{"current_steps": 6489, "total_steps": 8680, "loss": 0.7181985378265381, "lr": 3.2890872228785003e-07, "epoch": 1.4951612903225806, "percentage": 74.76, "elapsed_time": "9:00:37", "remaining_time": "3:02:32"} +{"current_steps": 6490, "total_steps": 8680, "loss": 0.7143938541412354, "lr": 3.286263192136396e-07, "epoch": 1.4953917050691246, "percentage": 74.77, "elapsed_time": "9:00:41", "remaining_time": "3:02:27"} +{"current_steps": 6491, "total_steps": 8680, "loss": 0.8247631788253784, "lr": 3.2834401358843e-07, "epoch": 1.4956221198156683, "percentage": 74.78, "elapsed_time": "9:00:45", "remaining_time": "3:02:21"} +{"current_steps": 6492, "total_steps": 8680, "loss": 0.8627001047134399, "lr": 3.280618054531974e-07, "epoch": 1.495852534562212, "percentage": 74.79, "elapsed_time": "9:00:49", "remaining_time": "3:02:16"} +{"current_steps": 6493, "total_steps": 8680, "loss": 0.813239574432373, "lr": 3.2777969484890456e-07, "epoch": 1.4960829493087557, "percentage": 74.8, "elapsed_time": "9:00:54", "remaining_time": "3:02:11"} +{"current_steps": 6494, "total_steps": 8680, "loss": 0.6633884310722351, "lr": 3.2749768181649904e-07, "epoch": 1.4963133640552995, "percentage": 74.82, "elapsed_time": "9:00:59", "remaining_time": "3:02:06"} +{"current_steps": 6495, "total_steps": 8680, "loss": 0.7760038375854492, "lr": 3.272157663969144e-07, "epoch": 1.4965437788018434, "percentage": 74.83, "elapsed_time": "9:01:03", "remaining_time": "3:02:01"} +{"current_steps": 6496, "total_steps": 8680, "loss": 0.9352993369102478, "lr": 3.2693394863107105e-07, "epoch": 1.4967741935483871, "percentage": 74.84, "elapsed_time": "9:01:07", "remaining_time": "3:01:55"} +{"current_steps": 6497, "total_steps": 8680, "loss": 0.7011485695838928, "lr": 3.2665222855987397e-07, "epoch": 1.4970046082949309, "percentage": 74.85, "elapsed_time": "9:01:13", "remaining_time": "3:01:50"} +{"current_steps": 6498, "total_steps": 8680, "loss": 0.9008398056030273, "lr": 3.263706062242142e-07, "epoch": 1.4972350230414746, "percentage": 74.86, "elapsed_time": "9:01:18", "remaining_time": "3:01:46"} +{"current_steps": 6499, "total_steps": 8680, "loss": 0.768037736415863, "lr": 3.260890816649694e-07, "epoch": 1.4974654377880183, "percentage": 74.87, "elapsed_time": "9:01:23", "remaining_time": "3:01:41"} +{"current_steps": 6500, "total_steps": 8680, "loss": 0.7603639364242554, "lr": 3.258076549230024e-07, "epoch": 1.4976958525345623, "percentage": 74.88, "elapsed_time": "9:01:28", "remaining_time": "3:01:35"} +{"current_steps": 6501, "total_steps": 8680, "loss": 0.7984024286270142, "lr": 3.2552632603916177e-07, "epoch": 1.497926267281106, "percentage": 74.9, "elapsed_time": "9:01:35", "remaining_time": "3:01:31"} +{"current_steps": 6502, "total_steps": 8680, "loss": 0.8466978073120117, "lr": 3.2524509505428187e-07, "epoch": 1.4981566820276497, "percentage": 74.91, "elapsed_time": "9:01:42", "remaining_time": "3:01:27"} +{"current_steps": 6503, "total_steps": 8680, "loss": 0.7964911460876465, "lr": 3.24963962009183e-07, "epoch": 1.4983870967741937, "percentage": 74.92, "elapsed_time": "9:01:47", "remaining_time": "3:01:22"} +{"current_steps": 6504, "total_steps": 8680, "loss": 0.7551665306091309, "lr": 3.246829269446716e-07, "epoch": 1.4986175115207372, "percentage": 74.93, "elapsed_time": "9:01:51", "remaining_time": "3:01:17"} +{"current_steps": 6505, "total_steps": 8680, "loss": 0.6468057632446289, "lr": 3.2440198990153945e-07, "epoch": 1.4988479262672811, "percentage": 74.94, "elapsed_time": "9:01:56", "remaining_time": "3:01:12"} +{"current_steps": 6506, "total_steps": 8680, "loss": 0.7739330530166626, "lr": 3.241211509205638e-07, "epoch": 1.4990783410138249, "percentage": 74.95, "elapsed_time": "9:02:03", "remaining_time": "3:01:07"} +{"current_steps": 6507, "total_steps": 8680, "loss": 0.8205568790435791, "lr": 3.238404100425085e-07, "epoch": 1.4993087557603686, "percentage": 74.97, "elapsed_time": "9:02:07", "remaining_time": "3:01:02"} +{"current_steps": 6508, "total_steps": 8680, "loss": 0.667822003364563, "lr": 3.235597673081227e-07, "epoch": 1.4995391705069125, "percentage": 74.98, "elapsed_time": "9:02:14", "remaining_time": "3:00:58"} +{"current_steps": 6509, "total_steps": 8680, "loss": 0.7829990386962891, "lr": 3.232792227581409e-07, "epoch": 1.4997695852534563, "percentage": 74.99, "elapsed_time": "9:02:20", "remaining_time": "3:00:53"} +{"current_steps": 6510, "total_steps": 8680, "loss": 0.768509566783905, "lr": 3.229987764332843e-07, "epoch": 1.5, "percentage": 75.0, "elapsed_time": "9:02:26", "remaining_time": "3:00:48"} +{"current_steps": 6511, "total_steps": 8680, "loss": 0.8448585867881775, "lr": 3.227184283742591e-07, "epoch": 1.5002304147465437, "percentage": 75.01, "elapsed_time": "9:02:31", "remaining_time": "3:00:43"} +{"current_steps": 6512, "total_steps": 8680, "loss": 0.6929391622543335, "lr": 3.2243817862175705e-07, "epoch": 1.5004608294930875, "percentage": 75.02, "elapsed_time": "9:02:36", "remaining_time": "3:00:38"} +{"current_steps": 6513, "total_steps": 8680, "loss": 0.6453005075454712, "lr": 3.221580272164567e-07, "epoch": 1.5006912442396314, "percentage": 75.03, "elapsed_time": "9:02:42", "remaining_time": "3:00:34"} +{"current_steps": 6514, "total_steps": 8680, "loss": 0.7870811820030212, "lr": 3.2187797419902143e-07, "epoch": 1.5009216589861751, "percentage": 75.05, "elapsed_time": "9:02:48", "remaining_time": "3:00:29"} +{"current_steps": 6515, "total_steps": 8680, "loss": 0.7032002210617065, "lr": 3.2159801961010013e-07, "epoch": 1.5011520737327189, "percentage": 75.06, "elapsed_time": "9:02:53", "remaining_time": "3:00:24"} +{"current_steps": 6516, "total_steps": 8680, "loss": 0.8018448352813721, "lr": 3.213181634903285e-07, "epoch": 1.5013824884792628, "percentage": 75.07, "elapsed_time": "9:02:58", "remaining_time": "3:00:19"} +{"current_steps": 6517, "total_steps": 8680, "loss": 0.7066134810447693, "lr": 3.2103840588032707e-07, "epoch": 1.5016129032258063, "percentage": 75.08, "elapsed_time": "9:03:03", "remaining_time": "3:00:14"} +{"current_steps": 6518, "total_steps": 8680, "loss": 0.6835265159606934, "lr": 3.207587468207018e-07, "epoch": 1.5018433179723503, "percentage": 75.09, "elapsed_time": "9:03:09", "remaining_time": "3:00:09"} +{"current_steps": 6519, "total_steps": 8680, "loss": 0.6679749488830566, "lr": 3.204791863520455e-07, "epoch": 1.502073732718894, "percentage": 75.1, "elapsed_time": "9:03:17", "remaining_time": "3:00:05"} +{"current_steps": 6520, "total_steps": 8680, "loss": 0.781232476234436, "lr": 3.201997245149358e-07, "epoch": 1.5023041474654377, "percentage": 75.12, "elapsed_time": "9:03:23", "remaining_time": "3:00:01"} +{"current_steps": 6521, "total_steps": 8680, "loss": 0.7853572368621826, "lr": 3.1992036134993616e-07, "epoch": 1.5025345622119817, "percentage": 75.13, "elapsed_time": "9:03:28", "remaining_time": "2:59:56"} +{"current_steps": 6522, "total_steps": 8680, "loss": 0.8220832943916321, "lr": 3.1964109689759576e-07, "epoch": 1.5027649769585254, "percentage": 75.14, "elapsed_time": "9:03:33", "remaining_time": "2:59:51"} +{"current_steps": 6523, "total_steps": 8680, "loss": 0.8046013116836548, "lr": 3.193619311984491e-07, "epoch": 1.5029953917050691, "percentage": 75.15, "elapsed_time": "9:03:40", "remaining_time": "2:59:46"} +{"current_steps": 6524, "total_steps": 8680, "loss": 0.7123414874076843, "lr": 3.190828642930174e-07, "epoch": 1.5032258064516129, "percentage": 75.16, "elapsed_time": "9:03:45", "remaining_time": "2:59:41"} +{"current_steps": 6525, "total_steps": 8680, "loss": 0.7913625240325928, "lr": 3.188038962218066e-07, "epoch": 1.5034562211981566, "percentage": 75.17, "elapsed_time": "9:03:50", "remaining_time": "2:59:36"} +{"current_steps": 6526, "total_steps": 8680, "loss": 0.7837327718734741, "lr": 3.185250270253081e-07, "epoch": 1.5036866359447005, "percentage": 75.18, "elapsed_time": "9:03:54", "remaining_time": "2:59:31"} +{"current_steps": 6527, "total_steps": 8680, "loss": 0.7799992561340332, "lr": 3.182462567440002e-07, "epoch": 1.5039170506912443, "percentage": 75.2, "elapsed_time": "9:03:59", "remaining_time": "2:59:26"} +{"current_steps": 6528, "total_steps": 8680, "loss": 0.8591268062591553, "lr": 3.1796758541834545e-07, "epoch": 1.504147465437788, "percentage": 75.21, "elapsed_time": "9:04:04", "remaining_time": "2:59:21"} +{"current_steps": 6529, "total_steps": 8680, "loss": 0.6886378526687622, "lr": 3.176890130887926e-07, "epoch": 1.504377880184332, "percentage": 75.22, "elapsed_time": "9:04:10", "remaining_time": "2:59:16"} +{"current_steps": 6530, "total_steps": 8680, "loss": 0.8641641139984131, "lr": 3.1741053979577647e-07, "epoch": 1.5046082949308754, "percentage": 75.23, "elapsed_time": "9:04:15", "remaining_time": "2:59:11"} +{"current_steps": 6531, "total_steps": 8680, "loss": 0.8215552568435669, "lr": 3.1713216557971687e-07, "epoch": 1.5048387096774194, "percentage": 75.24, "elapsed_time": "9:04:21", "remaining_time": "2:59:06"} +{"current_steps": 6532, "total_steps": 8680, "loss": 0.8506371974945068, "lr": 3.1685389048101906e-07, "epoch": 1.5050691244239631, "percentage": 75.25, "elapsed_time": "9:04:24", "remaining_time": "2:59:01"} +{"current_steps": 6533, "total_steps": 8680, "loss": 0.740912675857544, "lr": 3.1657571454007515e-07, "epoch": 1.5052995391705069, "percentage": 75.26, "elapsed_time": "9:04:30", "remaining_time": "2:58:56"} +{"current_steps": 6534, "total_steps": 8680, "loss": 0.6458308696746826, "lr": 3.162976377972614e-07, "epoch": 1.5055299539170508, "percentage": 75.28, "elapsed_time": "9:04:36", "remaining_time": "2:58:52"} +{"current_steps": 6535, "total_steps": 8680, "loss": 0.7368316650390625, "lr": 3.1601966029294013e-07, "epoch": 1.5057603686635943, "percentage": 75.29, "elapsed_time": "9:04:42", "remaining_time": "2:58:47"} +{"current_steps": 6536, "total_steps": 8680, "loss": 0.6648637056350708, "lr": 3.1574178206746003e-07, "epoch": 1.5059907834101383, "percentage": 75.3, "elapsed_time": "9:04:49", "remaining_time": "2:58:43"} +{"current_steps": 6537, "total_steps": 8680, "loss": 0.706688404083252, "lr": 3.154640031611544e-07, "epoch": 1.506221198156682, "percentage": 75.31, "elapsed_time": "9:04:54", "remaining_time": "2:58:38"} +{"current_steps": 6538, "total_steps": 8680, "loss": 0.722059965133667, "lr": 3.1518632361434263e-07, "epoch": 1.5064516129032257, "percentage": 75.32, "elapsed_time": "9:05:01", "remaining_time": "2:58:33"} +{"current_steps": 6539, "total_steps": 8680, "loss": 0.7098807096481323, "lr": 3.14908743467329e-07, "epoch": 1.5066820276497697, "percentage": 75.33, "elapsed_time": "9:05:08", "remaining_time": "2:58:29"} +{"current_steps": 6540, "total_steps": 8680, "loss": 0.7131781578063965, "lr": 3.1463126276040454e-07, "epoch": 1.5069124423963134, "percentage": 75.35, "elapsed_time": "9:05:13", "remaining_time": "2:58:24"} +{"current_steps": 6541, "total_steps": 8680, "loss": 0.7292109727859497, "lr": 3.143538815338451e-07, "epoch": 1.5071428571428571, "percentage": 75.36, "elapsed_time": "9:05:21", "remaining_time": "2:58:20"} +{"current_steps": 6542, "total_steps": 8680, "loss": 0.7305347919464111, "lr": 3.1407659982791204e-07, "epoch": 1.507373271889401, "percentage": 75.37, "elapsed_time": "9:05:25", "remaining_time": "2:58:15"} +{"current_steps": 6543, "total_steps": 8680, "loss": 0.8072094321250916, "lr": 3.1379941768285247e-07, "epoch": 1.5076036866359446, "percentage": 75.38, "elapsed_time": "9:05:29", "remaining_time": "2:58:09"} +{"current_steps": 6544, "total_steps": 8680, "loss": 0.8772450685501099, "lr": 3.135223351388987e-07, "epoch": 1.5078341013824885, "percentage": 75.39, "elapsed_time": "9:05:35", "remaining_time": "2:58:05"} +{"current_steps": 6545, "total_steps": 8680, "loss": 0.8463687896728516, "lr": 3.1324535223626957e-07, "epoch": 1.5080645161290323, "percentage": 75.4, "elapsed_time": "9:05:40", "remaining_time": "2:58:00"} +{"current_steps": 6546, "total_steps": 8680, "loss": 0.6764696836471558, "lr": 3.1296846901516806e-07, "epoch": 1.508294930875576, "percentage": 75.41, "elapsed_time": "9:05:44", "remaining_time": "2:57:54"} +{"current_steps": 6547, "total_steps": 8680, "loss": 0.8395411968231201, "lr": 3.126916855157841e-07, "epoch": 1.50852534562212, "percentage": 75.43, "elapsed_time": "9:05:49", "remaining_time": "2:57:49"} +{"current_steps": 6548, "total_steps": 8680, "loss": 0.8227219581604004, "lr": 3.1241500177829195e-07, "epoch": 1.5087557603686634, "percentage": 75.44, "elapsed_time": "9:05:55", "remaining_time": "2:57:45"} +{"current_steps": 6549, "total_steps": 8680, "loss": 0.7079675197601318, "lr": 3.121384178428519e-07, "epoch": 1.5089861751152074, "percentage": 75.45, "elapsed_time": "9:06:01", "remaining_time": "2:57:40"} +{"current_steps": 6550, "total_steps": 8680, "loss": 0.7792578935623169, "lr": 3.1186193374961014e-07, "epoch": 1.5092165898617511, "percentage": 75.46, "elapsed_time": "9:06:06", "remaining_time": "2:57:35"} +{"current_steps": 6551, "total_steps": 8680, "loss": 0.7821195125579834, "lr": 3.1158554953869776e-07, "epoch": 1.5094470046082948, "percentage": 75.47, "elapsed_time": "9:06:11", "remaining_time": "2:57:30"} +{"current_steps": 6552, "total_steps": 8680, "loss": 0.6640183329582214, "lr": 3.1130926525023114e-07, "epoch": 1.5096774193548388, "percentage": 75.48, "elapsed_time": "9:06:16", "remaining_time": "2:57:25"} +{"current_steps": 6553, "total_steps": 8680, "loss": 0.8087342977523804, "lr": 3.110330809243134e-07, "epoch": 1.5099078341013825, "percentage": 75.5, "elapsed_time": "9:06:21", "remaining_time": "2:57:20"} +{"current_steps": 6554, "total_steps": 8680, "loss": 0.7716038227081299, "lr": 3.1075699660103184e-07, "epoch": 1.5101382488479262, "percentage": 75.51, "elapsed_time": "9:06:27", "remaining_time": "2:57:15"} +{"current_steps": 6555, "total_steps": 8680, "loss": 0.8408910632133484, "lr": 3.1048101232045943e-07, "epoch": 1.5103686635944702, "percentage": 75.52, "elapsed_time": "9:06:32", "remaining_time": "2:57:10"} +{"current_steps": 6556, "total_steps": 8680, "loss": 0.8799750804901123, "lr": 3.1020512812265564e-07, "epoch": 1.5105990783410137, "percentage": 75.53, "elapsed_time": "9:06:38", "remaining_time": "2:57:05"} +{"current_steps": 6557, "total_steps": 8680, "loss": 0.6534945368766785, "lr": 3.0992934404766426e-07, "epoch": 1.5108294930875577, "percentage": 75.54, "elapsed_time": "9:06:45", "remaining_time": "2:57:01"} +{"current_steps": 6558, "total_steps": 8680, "loss": 0.8293032646179199, "lr": 3.0965366013551507e-07, "epoch": 1.5110599078341014, "percentage": 75.55, "elapsed_time": "9:06:49", "remaining_time": "2:56:56"} +{"current_steps": 6559, "total_steps": 8680, "loss": 0.7486997842788696, "lr": 3.0937807642622295e-07, "epoch": 1.511290322580645, "percentage": 75.56, "elapsed_time": "9:06:53", "remaining_time": "2:56:51"} +{"current_steps": 6560, "total_steps": 8680, "loss": 0.7431119680404663, "lr": 3.0910259295978914e-07, "epoch": 1.511520737327189, "percentage": 75.58, "elapsed_time": "9:07:00", "remaining_time": "2:56:46"} +{"current_steps": 6561, "total_steps": 8680, "loss": 0.945859432220459, "lr": 3.0882720977619927e-07, "epoch": 1.5117511520737326, "percentage": 75.59, "elapsed_time": "9:07:04", "remaining_time": "2:56:41"} +{"current_steps": 6562, "total_steps": 8680, "loss": 0.7894721031188965, "lr": 3.0855192691542487e-07, "epoch": 1.5119815668202765, "percentage": 75.6, "elapsed_time": "9:07:11", "remaining_time": "2:56:36"} +{"current_steps": 6563, "total_steps": 8680, "loss": 0.8593465089797974, "lr": 3.082767444174229e-07, "epoch": 1.5122119815668202, "percentage": 75.61, "elapsed_time": "9:07:16", "remaining_time": "2:56:31"} +{"current_steps": 6564, "total_steps": 8680, "loss": 0.8297405242919922, "lr": 3.080016623221355e-07, "epoch": 1.512442396313364, "percentage": 75.62, "elapsed_time": "9:07:19", "remaining_time": "2:56:26"} +{"current_steps": 6565, "total_steps": 8680, "loss": 0.7372928857803345, "lr": 3.07726680669491e-07, "epoch": 1.512672811059908, "percentage": 75.63, "elapsed_time": "9:07:24", "remaining_time": "2:56:21"} +{"current_steps": 6566, "total_steps": 8680, "loss": 0.7031347751617432, "lr": 3.0745179949940235e-07, "epoch": 1.5129032258064516, "percentage": 75.65, "elapsed_time": "9:07:30", "remaining_time": "2:56:16"} +{"current_steps": 6567, "total_steps": 8680, "loss": 0.7086467742919922, "lr": 3.071770188517679e-07, "epoch": 1.5131336405529954, "percentage": 75.66, "elapsed_time": "9:07:35", "remaining_time": "2:56:11"} +{"current_steps": 6568, "total_steps": 8680, "loss": 0.9091345071792603, "lr": 3.069023387664723e-07, "epoch": 1.5133640552995393, "percentage": 75.67, "elapsed_time": "9:07:39", "remaining_time": "2:56:06"} +{"current_steps": 6569, "total_steps": 8680, "loss": 0.7470624446868896, "lr": 3.066277592833847e-07, "epoch": 1.5135944700460828, "percentage": 75.68, "elapsed_time": "9:07:44", "remaining_time": "2:56:01"} +{"current_steps": 6570, "total_steps": 8680, "loss": 0.75694739818573, "lr": 3.0635328044235965e-07, "epoch": 1.5138248847926268, "percentage": 75.69, "elapsed_time": "9:07:49", "remaining_time": "2:55:56"} +{"current_steps": 6571, "total_steps": 8680, "loss": 0.7832024693489075, "lr": 3.0607890228323796e-07, "epoch": 1.5140552995391705, "percentage": 75.7, "elapsed_time": "9:07:54", "remaining_time": "2:55:51"} +{"current_steps": 6572, "total_steps": 8680, "loss": 0.6777220368385315, "lr": 3.0580462484584455e-07, "epoch": 1.5142857142857142, "percentage": 75.71, "elapsed_time": "9:08:00", "remaining_time": "2:55:46"} +{"current_steps": 6573, "total_steps": 8680, "loss": 0.7748236060142517, "lr": 3.055304481699913e-07, "epoch": 1.5145161290322582, "percentage": 75.73, "elapsed_time": "9:08:04", "remaining_time": "2:55:41"} +{"current_steps": 6574, "total_steps": 8680, "loss": 0.7495633363723755, "lr": 3.052563722954741e-07, "epoch": 1.5147465437788017, "percentage": 75.74, "elapsed_time": "9:08:10", "remaining_time": "2:55:36"} +{"current_steps": 6575, "total_steps": 8680, "loss": 0.8011484742164612, "lr": 3.049823972620744e-07, "epoch": 1.5149769585253456, "percentage": 75.75, "elapsed_time": "9:08:15", "remaining_time": "2:55:31"} +{"current_steps": 6576, "total_steps": 8680, "loss": 0.7480140924453735, "lr": 3.0470852310956e-07, "epoch": 1.5152073732718894, "percentage": 75.76, "elapsed_time": "9:08:19", "remaining_time": "2:55:26"} +{"current_steps": 6577, "total_steps": 8680, "loss": 0.6561319828033447, "lr": 3.0443474987768305e-07, "epoch": 1.515437788018433, "percentage": 75.77, "elapsed_time": "9:08:27", "remaining_time": "2:55:22"} +{"current_steps": 6578, "total_steps": 8680, "loss": 0.7437188029289246, "lr": 3.041610776061813e-07, "epoch": 1.515668202764977, "percentage": 75.78, "elapsed_time": "9:08:31", "remaining_time": "2:55:16"} +{"current_steps": 6579, "total_steps": 8680, "loss": 0.7429096698760986, "lr": 3.0388750633477766e-07, "epoch": 1.5158986175115208, "percentage": 75.79, "elapsed_time": "9:08:36", "remaining_time": "2:55:11"} +{"current_steps": 6580, "total_steps": 8680, "loss": 0.859411358833313, "lr": 3.0361403610318125e-07, "epoch": 1.5161290322580645, "percentage": 75.81, "elapsed_time": "9:08:42", "remaining_time": "2:55:07"} +{"current_steps": 6581, "total_steps": 8680, "loss": 0.7636305093765259, "lr": 3.0334066695108565e-07, "epoch": 1.5163594470046085, "percentage": 75.82, "elapsed_time": "9:08:50", "remaining_time": "2:55:03"} +{"current_steps": 6582, "total_steps": 8680, "loss": 0.8331989049911499, "lr": 3.030673989181699e-07, "epoch": 1.516589861751152, "percentage": 75.83, "elapsed_time": "9:08:57", "remaining_time": "2:54:58"} +{"current_steps": 6583, "total_steps": 8680, "loss": 0.770574688911438, "lr": 3.0279423204409857e-07, "epoch": 1.516820276497696, "percentage": 75.84, "elapsed_time": "9:09:01", "remaining_time": "2:54:53"} +{"current_steps": 6584, "total_steps": 8680, "loss": 0.7470898628234863, "lr": 3.025211663685213e-07, "epoch": 1.5170506912442396, "percentage": 75.85, "elapsed_time": "9:09:06", "remaining_time": "2:54:48"} +{"current_steps": 6585, "total_steps": 8680, "loss": 0.7907510995864868, "lr": 3.022482019310736e-07, "epoch": 1.5172811059907834, "percentage": 75.86, "elapsed_time": "9:09:12", "remaining_time": "2:54:43"} +{"current_steps": 6586, "total_steps": 8680, "loss": 0.751417338848114, "lr": 3.019753387713757e-07, "epoch": 1.5175115207373273, "percentage": 75.88, "elapsed_time": "9:09:18", "remaining_time": "2:54:39"} +{"current_steps": 6587, "total_steps": 8680, "loss": 0.8987867832183838, "lr": 3.01702576929033e-07, "epoch": 1.5177419354838708, "percentage": 75.89, "elapsed_time": "9:09:22", "remaining_time": "2:54:33"} +{"current_steps": 6588, "total_steps": 8680, "loss": 0.7618268728256226, "lr": 3.0142991644363714e-07, "epoch": 1.5179723502304148, "percentage": 75.9, "elapsed_time": "9:09:29", "remaining_time": "2:54:29"} +{"current_steps": 6589, "total_steps": 8680, "loss": 0.9358207583427429, "lr": 3.011573573547641e-07, "epoch": 1.5182027649769585, "percentage": 75.91, "elapsed_time": "9:09:34", "remaining_time": "2:54:24"} +{"current_steps": 6590, "total_steps": 8680, "loss": 0.6549144387245178, "lr": 3.008848997019753e-07, "epoch": 1.5184331797235022, "percentage": 75.92, "elapsed_time": "9:09:40", "remaining_time": "2:54:19"} +{"current_steps": 6591, "total_steps": 8680, "loss": 0.8642100095748901, "lr": 3.00612543524818e-07, "epoch": 1.5186635944700462, "percentage": 75.93, "elapsed_time": "9:09:45", "remaining_time": "2:54:14"} +{"current_steps": 6592, "total_steps": 8680, "loss": 0.7348824143409729, "lr": 3.003402888628241e-07, "epoch": 1.51889400921659, "percentage": 75.94, "elapsed_time": "9:09:49", "remaining_time": "2:54:09"} +{"current_steps": 6593, "total_steps": 8680, "loss": 0.8737039566040039, "lr": 3.000681357555108e-07, "epoch": 1.5191244239631336, "percentage": 75.96, "elapsed_time": "9:09:56", "remaining_time": "2:54:04"} +{"current_steps": 6594, "total_steps": 8680, "loss": 0.749860405921936, "lr": 2.9979608424238134e-07, "epoch": 1.5193548387096776, "percentage": 75.97, "elapsed_time": "9:10:00", "remaining_time": "2:53:59"} +{"current_steps": 6595, "total_steps": 8680, "loss": 0.7583779096603394, "lr": 2.99524134362923e-07, "epoch": 1.519585253456221, "percentage": 75.98, "elapsed_time": "9:10:04", "remaining_time": "2:53:54"} +{"current_steps": 6596, "total_steps": 8680, "loss": 0.7096224427223206, "lr": 2.992522861566095e-07, "epoch": 1.519815668202765, "percentage": 75.99, "elapsed_time": "9:10:08", "remaining_time": "2:53:49"} +{"current_steps": 6597, "total_steps": 8680, "loss": 0.7813585996627808, "lr": 2.9898053966289904e-07, "epoch": 1.5200460829493088, "percentage": 76.0, "elapsed_time": "9:10:14", "remaining_time": "2:53:44"} +{"current_steps": 6598, "total_steps": 8680, "loss": 0.7744605541229248, "lr": 2.9870889492123517e-07, "epoch": 1.5202764976958525, "percentage": 76.01, "elapsed_time": "9:10:20", "remaining_time": "2:53:39"} +{"current_steps": 6599, "total_steps": 8680, "loss": 0.8398552536964417, "lr": 2.984373519710469e-07, "epoch": 1.5205069124423964, "percentage": 76.03, "elapsed_time": "9:10:25", "remaining_time": "2:53:34"} +{"current_steps": 6600, "total_steps": 8680, "loss": 0.6853294372558594, "lr": 2.981659108517478e-07, "epoch": 1.52073732718894, "percentage": 76.04, "elapsed_time": "9:10:31", "remaining_time": "2:53:29"} +{"current_steps": 6601, "total_steps": 8680, "loss": 0.7673987150192261, "lr": 2.97894571602738e-07, "epoch": 1.520967741935484, "percentage": 76.05, "elapsed_time": "9:10:39", "remaining_time": "2:53:25"} +{"current_steps": 6602, "total_steps": 8680, "loss": 0.7000377774238586, "lr": 2.976233342634017e-07, "epoch": 1.5211981566820276, "percentage": 76.06, "elapsed_time": "9:10:45", "remaining_time": "2:53:21"} +{"current_steps": 6603, "total_steps": 8680, "loss": 0.8429346680641174, "lr": 2.9735219887310857e-07, "epoch": 1.5214285714285714, "percentage": 76.07, "elapsed_time": "9:10:48", "remaining_time": "2:53:15"} +{"current_steps": 6604, "total_steps": 8680, "loss": 0.9118648767471313, "lr": 2.970811654712133e-07, "epoch": 1.5216589861751153, "percentage": 76.08, "elapsed_time": "9:10:52", "remaining_time": "2:53:10"} +{"current_steps": 6605, "total_steps": 8680, "loss": 0.7745784521102905, "lr": 2.9681023409705666e-07, "epoch": 1.521889400921659, "percentage": 76.09, "elapsed_time": "9:10:58", "remaining_time": "2:53:05"} +{"current_steps": 6606, "total_steps": 8680, "loss": 0.8481245040893555, "lr": 2.9653940478996367e-07, "epoch": 1.5221198156682028, "percentage": 76.11, "elapsed_time": "9:11:02", "remaining_time": "2:53:00"} +{"current_steps": 6607, "total_steps": 8680, "loss": 0.8643463850021362, "lr": 2.9626867758924436e-07, "epoch": 1.5223502304147467, "percentage": 76.12, "elapsed_time": "9:11:07", "remaining_time": "2:52:55"} +{"current_steps": 6608, "total_steps": 8680, "loss": 0.9524952173233032, "lr": 2.959980525341953e-07, "epoch": 1.5225806451612902, "percentage": 76.13, "elapsed_time": "9:11:10", "remaining_time": "2:52:49"} +{"current_steps": 6609, "total_steps": 8680, "loss": 0.7153829336166382, "lr": 2.9572752966409686e-07, "epoch": 1.5228110599078342, "percentage": 76.14, "elapsed_time": "9:11:16", "remaining_time": "2:52:44"} +{"current_steps": 6610, "total_steps": 8680, "loss": 0.8332774639129639, "lr": 2.954571090182149e-07, "epoch": 1.523041474654378, "percentage": 76.15, "elapsed_time": "9:11:20", "remaining_time": "2:52:39"} +{"current_steps": 6611, "total_steps": 8680, "loss": 0.7511743307113647, "lr": 2.9518679063580123e-07, "epoch": 1.5232718894009216, "percentage": 76.16, "elapsed_time": "9:11:26", "remaining_time": "2:52:34"} +{"current_steps": 6612, "total_steps": 8680, "loss": 0.715233325958252, "lr": 2.9491657455609175e-07, "epoch": 1.5235023041474656, "percentage": 76.18, "elapsed_time": "9:11:32", "remaining_time": "2:52:30"} +{"current_steps": 6613, "total_steps": 8680, "loss": 0.7386246919631958, "lr": 2.946464608183078e-07, "epoch": 1.523732718894009, "percentage": 76.19, "elapsed_time": "9:11:37", "remaining_time": "2:52:25"} +{"current_steps": 6614, "total_steps": 8680, "loss": 0.8337790369987488, "lr": 2.943764494616565e-07, "epoch": 1.523963133640553, "percentage": 76.2, "elapsed_time": "9:11:43", "remaining_time": "2:52:20"} +{"current_steps": 6615, "total_steps": 8680, "loss": 0.8447855710983276, "lr": 2.941065405253296e-07, "epoch": 1.5241935483870968, "percentage": 76.21, "elapsed_time": "9:11:49", "remaining_time": "2:52:15"} +{"current_steps": 6616, "total_steps": 8680, "loss": 0.7430610060691833, "lr": 2.938367340485035e-07, "epoch": 1.5244239631336405, "percentage": 76.22, "elapsed_time": "9:11:54", "remaining_time": "2:52:10"} +{"current_steps": 6617, "total_steps": 8680, "loss": 0.7740806937217712, "lr": 2.9356703007034087e-07, "epoch": 1.5246543778801844, "percentage": 76.23, "elapsed_time": "9:11:59", "remaining_time": "2:52:05"} +{"current_steps": 6618, "total_steps": 8680, "loss": 0.7824152708053589, "lr": 2.9329742862998875e-07, "epoch": 1.5248847926267282, "percentage": 76.24, "elapsed_time": "9:12:04", "remaining_time": "2:52:00"} +{"current_steps": 6619, "total_steps": 8680, "loss": 0.9222463965415955, "lr": 2.930279297665792e-07, "epoch": 1.5251152073732719, "percentage": 76.26, "elapsed_time": "9:12:08", "remaining_time": "2:51:55"} +{"current_steps": 6620, "total_steps": 8680, "loss": 0.9548497200012207, "lr": 2.927585335192294e-07, "epoch": 1.5253456221198156, "percentage": 76.27, "elapsed_time": "9:12:12", "remaining_time": "2:51:50"} +{"current_steps": 6621, "total_steps": 8680, "loss": 0.9007906913757324, "lr": 2.9248923992704255e-07, "epoch": 1.5255760368663593, "percentage": 76.28, "elapsed_time": "9:12:17", "remaining_time": "2:51:45"} +{"current_steps": 6622, "total_steps": 8680, "loss": 0.6932169198989868, "lr": 2.9222004902910593e-07, "epoch": 1.5258064516129033, "percentage": 76.29, "elapsed_time": "9:12:22", "remaining_time": "2:51:40"} +{"current_steps": 6623, "total_steps": 8680, "loss": 0.7327853441238403, "lr": 2.919509608644922e-07, "epoch": 1.526036866359447, "percentage": 76.3, "elapsed_time": "9:12:28", "remaining_time": "2:51:35"} +{"current_steps": 6624, "total_steps": 8680, "loss": 0.617963433265686, "lr": 2.916819754722588e-07, "epoch": 1.5262672811059907, "percentage": 76.31, "elapsed_time": "9:12:33", "remaining_time": "2:51:30"} +{"current_steps": 6625, "total_steps": 8680, "loss": 1.0567349195480347, "lr": 2.914130928914493e-07, "epoch": 1.5264976958525347, "percentage": 76.32, "elapsed_time": "9:12:38", "remaining_time": "2:51:25"} +{"current_steps": 6626, "total_steps": 8680, "loss": 0.7362378835678101, "lr": 2.9114431316109145e-07, "epoch": 1.5267281105990782, "percentage": 76.34, "elapsed_time": "9:12:42", "remaining_time": "2:51:19"} +{"current_steps": 6627, "total_steps": 8680, "loss": 0.6879991888999939, "lr": 2.9087563632019774e-07, "epoch": 1.5269585253456222, "percentage": 76.35, "elapsed_time": "9:12:47", "remaining_time": "2:51:15"} +{"current_steps": 6628, "total_steps": 8680, "loss": 0.7804177403450012, "lr": 2.9060706240776686e-07, "epoch": 1.5271889400921659, "percentage": 76.36, "elapsed_time": "9:12:51", "remaining_time": "2:51:09"} +{"current_steps": 6629, "total_steps": 8680, "loss": 0.7459548711776733, "lr": 2.9033859146278197e-07, "epoch": 1.5274193548387096, "percentage": 76.37, "elapsed_time": "9:12:57", "remaining_time": "2:51:05"} +{"current_steps": 6630, "total_steps": 8680, "loss": 0.7392233610153198, "lr": 2.900702235242106e-07, "epoch": 1.5276497695852536, "percentage": 76.38, "elapsed_time": "9:13:01", "remaining_time": "2:50:59"} +{"current_steps": 6631, "total_steps": 8680, "loss": 0.6956135034561157, "lr": 2.8980195863100675e-07, "epoch": 1.5278801843317973, "percentage": 76.39, "elapsed_time": "9:13:08", "remaining_time": "2:50:55"} +{"current_steps": 6632, "total_steps": 8680, "loss": 0.7042561769485474, "lr": 2.8953379682210856e-07, "epoch": 1.528110599078341, "percentage": 76.41, "elapsed_time": "9:13:14", "remaining_time": "2:50:50"} +{"current_steps": 6633, "total_steps": 8680, "loss": 0.7114298343658447, "lr": 2.8926573813643884e-07, "epoch": 1.5283410138248847, "percentage": 76.42, "elapsed_time": "9:13:21", "remaining_time": "2:50:46"} +{"current_steps": 6634, "total_steps": 8680, "loss": 0.862826943397522, "lr": 2.8899778261290664e-07, "epoch": 1.5285714285714285, "percentage": 76.43, "elapsed_time": "9:13:27", "remaining_time": "2:50:41"} +{"current_steps": 6635, "total_steps": 8680, "loss": 0.8229889869689941, "lr": 2.8872993029040506e-07, "epoch": 1.5288018433179724, "percentage": 76.44, "elapsed_time": "9:13:32", "remaining_time": "2:50:36"} +{"current_steps": 6636, "total_steps": 8680, "loss": 0.8058778047561646, "lr": 2.884621812078122e-07, "epoch": 1.5290322580645161, "percentage": 76.45, "elapsed_time": "9:13:36", "remaining_time": "2:50:31"} +{"current_steps": 6637, "total_steps": 8680, "loss": 0.8150385618209839, "lr": 2.881945354039921e-07, "epoch": 1.5292626728110599, "percentage": 76.46, "elapsed_time": "9:13:40", "remaining_time": "2:50:26"} +{"current_steps": 6638, "total_steps": 8680, "loss": 0.7067136168479919, "lr": 2.8792699291779276e-07, "epoch": 1.5294930875576038, "percentage": 76.47, "elapsed_time": "9:13:45", "remaining_time": "2:50:20"} +{"current_steps": 6639, "total_steps": 8680, "loss": 0.7725155353546143, "lr": 2.8765955378804784e-07, "epoch": 1.5297235023041473, "percentage": 76.49, "elapsed_time": "9:13:49", "remaining_time": "2:50:15"} +{"current_steps": 6640, "total_steps": 8680, "loss": 0.5956720113754272, "lr": 2.873922180535754e-07, "epoch": 1.5299539170506913, "percentage": 76.5, "elapsed_time": "9:13:56", "remaining_time": "2:50:11"} +{"current_steps": 6641, "total_steps": 8680, "loss": 0.6506170630455017, "lr": 2.8712498575317934e-07, "epoch": 1.530184331797235, "percentage": 76.51, "elapsed_time": "9:14:01", "remaining_time": "2:50:06"} +{"current_steps": 6642, "total_steps": 8680, "loss": 0.7860926985740662, "lr": 2.86857856925648e-07, "epoch": 1.5304147465437787, "percentage": 76.52, "elapsed_time": "9:14:06", "remaining_time": "2:50:01"} +{"current_steps": 6643, "total_steps": 8680, "loss": 0.7003993391990662, "lr": 2.8659083160975464e-07, "epoch": 1.5306451612903227, "percentage": 76.53, "elapsed_time": "9:14:13", "remaining_time": "2:49:56"} +{"current_steps": 6644, "total_steps": 8680, "loss": 0.6887079477310181, "lr": 2.8632390984425746e-07, "epoch": 1.5308755760368664, "percentage": 76.54, "elapsed_time": "9:14:18", "remaining_time": "2:49:51"} +{"current_steps": 6645, "total_steps": 8680, "loss": 0.788282036781311, "lr": 2.860570916678998e-07, "epoch": 1.5311059907834101, "percentage": 76.56, "elapsed_time": "9:14:24", "remaining_time": "2:49:46"} +{"current_steps": 6646, "total_steps": 8680, "loss": 0.771350085735321, "lr": 2.8579037711941043e-07, "epoch": 1.5313364055299539, "percentage": 76.57, "elapsed_time": "9:14:28", "remaining_time": "2:49:41"} +{"current_steps": 6647, "total_steps": 8680, "loss": 0.6418509483337402, "lr": 2.855237662375021e-07, "epoch": 1.5315668202764976, "percentage": 76.58, "elapsed_time": "9:14:34", "remaining_time": "2:49:37"} +{"current_steps": 6648, "total_steps": 8680, "loss": 0.6606692671775818, "lr": 2.852572590608735e-07, "epoch": 1.5317972350230415, "percentage": 76.59, "elapsed_time": "9:14:41", "remaining_time": "2:49:32"} +{"current_steps": 6649, "total_steps": 8680, "loss": 0.8623934984207153, "lr": 2.849908556282076e-07, "epoch": 1.5320276497695853, "percentage": 76.6, "elapsed_time": "9:14:45", "remaining_time": "2:49:27"} +{"current_steps": 6650, "total_steps": 8680, "loss": 0.848737359046936, "lr": 2.8472455597817215e-07, "epoch": 1.532258064516129, "percentage": 76.61, "elapsed_time": "9:14:50", "remaining_time": "2:49:22"} +{"current_steps": 6651, "total_steps": 8680, "loss": 0.7156505584716797, "lr": 2.844583601494207e-07, "epoch": 1.532488479262673, "percentage": 76.62, "elapsed_time": "9:14:56", "remaining_time": "2:49:17"} +{"current_steps": 6652, "total_steps": 8680, "loss": 0.598319411277771, "lr": 2.8419226818059116e-07, "epoch": 1.5327188940092165, "percentage": 76.64, "elapsed_time": "9:15:01", "remaining_time": "2:49:12"} +{"current_steps": 6653, "total_steps": 8680, "loss": 0.6320680379867554, "lr": 2.8392628011030585e-07, "epoch": 1.5329493087557604, "percentage": 76.65, "elapsed_time": "9:15:07", "remaining_time": "2:49:08"} +{"current_steps": 6654, "total_steps": 8680, "loss": 0.8770536184310913, "lr": 2.836603959771734e-07, "epoch": 1.5331797235023041, "percentage": 76.66, "elapsed_time": "9:15:12", "remaining_time": "2:49:03"} +{"current_steps": 6655, "total_steps": 8680, "loss": 0.896265983581543, "lr": 2.833946158197862e-07, "epoch": 1.5334101382488479, "percentage": 76.67, "elapsed_time": "9:15:17", "remaining_time": "2:48:58"} +{"current_steps": 6656, "total_steps": 8680, "loss": 0.7194868326187134, "lr": 2.8312893967672145e-07, "epoch": 1.5336405529953918, "percentage": 76.68, "elapsed_time": "9:15:23", "remaining_time": "2:48:53"} +{"current_steps": 6657, "total_steps": 8680, "loss": 0.7993383407592773, "lr": 2.828633675865425e-07, "epoch": 1.5338709677419353, "percentage": 76.69, "elapsed_time": "9:15:28", "remaining_time": "2:48:48"} +{"current_steps": 6658, "total_steps": 8680, "loss": 0.6808127760887146, "lr": 2.8259789958779635e-07, "epoch": 1.5341013824884793, "percentage": 76.71, "elapsed_time": "9:15:32", "remaining_time": "2:48:43"} +{"current_steps": 6659, "total_steps": 8680, "loss": 0.7348822355270386, "lr": 2.823325357190153e-07, "epoch": 1.534331797235023, "percentage": 76.72, "elapsed_time": "9:15:37", "remaining_time": "2:48:38"} +{"current_steps": 6660, "total_steps": 8680, "loss": 0.7729920744895935, "lr": 2.820672760187166e-07, "epoch": 1.5345622119815667, "percentage": 76.73, "elapsed_time": "9:15:43", "remaining_time": "2:48:33"} +{"current_steps": 6661, "total_steps": 8680, "loss": 0.803922176361084, "lr": 2.818021205254021e-07, "epoch": 1.5347926267281107, "percentage": 76.74, "elapsed_time": "9:15:48", "remaining_time": "2:48:28"} +{"current_steps": 6662, "total_steps": 8680, "loss": 0.7931007146835327, "lr": 2.815370692775594e-07, "epoch": 1.5350230414746544, "percentage": 76.75, "elapsed_time": "9:15:54", "remaining_time": "2:48:23"} +{"current_steps": 6663, "total_steps": 8680, "loss": 0.7990511655807495, "lr": 2.8127212231365995e-07, "epoch": 1.5352534562211981, "percentage": 76.76, "elapsed_time": "9:16:00", "remaining_time": "2:48:18"} +{"current_steps": 6664, "total_steps": 8680, "loss": 0.8163471817970276, "lr": 2.8100727967216043e-07, "epoch": 1.535483870967742, "percentage": 76.77, "elapsed_time": "9:16:04", "remaining_time": "2:48:13"} +{"current_steps": 6665, "total_steps": 8680, "loss": 0.7628358602523804, "lr": 2.8074254139150225e-07, "epoch": 1.5357142857142856, "percentage": 76.79, "elapsed_time": "9:16:10", "remaining_time": "2:48:08"} +{"current_steps": 6666, "total_steps": 8680, "loss": 0.8008173704147339, "lr": 2.8047790751011216e-07, "epoch": 1.5359447004608295, "percentage": 76.8, "elapsed_time": "9:16:16", "remaining_time": "2:48:03"} +{"current_steps": 6667, "total_steps": 8680, "loss": 0.9139487743377686, "lr": 2.802133780664013e-07, "epoch": 1.5361751152073733, "percentage": 76.81, "elapsed_time": "9:16:20", "remaining_time": "2:47:58"} +{"current_steps": 6668, "total_steps": 8680, "loss": 0.9436901211738586, "lr": 2.7994895309876555e-07, "epoch": 1.536405529953917, "percentage": 76.82, "elapsed_time": "9:16:25", "remaining_time": "2:47:53"} +{"current_steps": 6669, "total_steps": 8680, "loss": 0.8072221875190735, "lr": 2.7968463264558617e-07, "epoch": 1.536635944700461, "percentage": 76.83, "elapsed_time": "9:16:31", "remaining_time": "2:47:49"} +{"current_steps": 6670, "total_steps": 8680, "loss": 0.7434822916984558, "lr": 2.7942041674522866e-07, "epoch": 1.5368663594470044, "percentage": 76.84, "elapsed_time": "9:16:38", "remaining_time": "2:47:44"} +{"current_steps": 6671, "total_steps": 8680, "loss": 0.6729850769042969, "lr": 2.7915630543604394e-07, "epoch": 1.5370967741935484, "percentage": 76.85, "elapsed_time": "9:16:43", "remaining_time": "2:47:39"} +{"current_steps": 6672, "total_steps": 8680, "loss": 0.8752315044403076, "lr": 2.7889229875636723e-07, "epoch": 1.5373271889400921, "percentage": 76.87, "elapsed_time": "9:16:49", "remaining_time": "2:47:34"} +{"current_steps": 6673, "total_steps": 8680, "loss": 0.8519413471221924, "lr": 2.786283967445184e-07, "epoch": 1.5375576036866359, "percentage": 76.88, "elapsed_time": "9:16:53", "remaining_time": "2:47:29"} +{"current_steps": 6674, "total_steps": 8680, "loss": 0.8868448734283447, "lr": 2.783645994388032e-07, "epoch": 1.5377880184331798, "percentage": 76.89, "elapsed_time": "9:16:59", "remaining_time": "2:47:24"} +{"current_steps": 6675, "total_steps": 8680, "loss": 0.9223456978797913, "lr": 2.78100906877511e-07, "epoch": 1.5380184331797235, "percentage": 76.9, "elapsed_time": "9:17:05", "remaining_time": "2:47:20"} +{"current_steps": 6676, "total_steps": 8680, "loss": 0.799191951751709, "lr": 2.7783731909891616e-07, "epoch": 1.5382488479262673, "percentage": 76.91, "elapsed_time": "9:17:09", "remaining_time": "2:47:14"} +{"current_steps": 6677, "total_steps": 8680, "loss": 0.7092995643615723, "lr": 2.775738361412788e-07, "epoch": 1.5384792626728112, "percentage": 76.92, "elapsed_time": "9:17:15", "remaining_time": "2:47:10"} +{"current_steps": 6678, "total_steps": 8680, "loss": 0.674687385559082, "lr": 2.7731045804284283e-07, "epoch": 1.5387096774193547, "percentage": 76.94, "elapsed_time": "9:17:20", "remaining_time": "2:47:05"} +{"current_steps": 6679, "total_steps": 8680, "loss": 0.7366930246353149, "lr": 2.77047184841837e-07, "epoch": 1.5389400921658987, "percentage": 76.95, "elapsed_time": "9:17:25", "remaining_time": "2:46:59"} +{"current_steps": 6680, "total_steps": 8680, "loss": 0.838137149810791, "lr": 2.767840165764753e-07, "epoch": 1.5391705069124424, "percentage": 76.96, "elapsed_time": "9:17:28", "remaining_time": "2:46:54"} +{"current_steps": 6681, "total_steps": 8680, "loss": 0.7507175803184509, "lr": 2.765209532849558e-07, "epoch": 1.5394009216589861, "percentage": 76.97, "elapsed_time": "9:17:34", "remaining_time": "2:46:49"} +{"current_steps": 6682, "total_steps": 8680, "loss": 0.8157602548599243, "lr": 2.7625799500546267e-07, "epoch": 1.53963133640553, "percentage": 76.98, "elapsed_time": "9:17:38", "remaining_time": "2:46:44"} +{"current_steps": 6683, "total_steps": 8680, "loss": 0.7779219150543213, "lr": 2.7599514177616333e-07, "epoch": 1.5398617511520736, "percentage": 76.99, "elapsed_time": "9:17:45", "remaining_time": "2:46:40"} +{"current_steps": 6684, "total_steps": 8680, "loss": 0.8261638879776001, "lr": 2.757323936352106e-07, "epoch": 1.5400921658986175, "percentage": 77.0, "elapsed_time": "9:17:49", "remaining_time": "2:46:34"} +{"current_steps": 6685, "total_steps": 8680, "loss": 0.6139177680015564, "lr": 2.7546975062074197e-07, "epoch": 1.5403225806451613, "percentage": 77.02, "elapsed_time": "9:17:56", "remaining_time": "2:46:30"} +{"current_steps": 6686, "total_steps": 8680, "loss": 0.744202733039856, "lr": 2.752072127708802e-07, "epoch": 1.540552995391705, "percentage": 77.03, "elapsed_time": "9:18:00", "remaining_time": "2:46:25"} +{"current_steps": 6687, "total_steps": 8680, "loss": 0.7685158848762512, "lr": 2.749447801237319e-07, "epoch": 1.540783410138249, "percentage": 77.04, "elapsed_time": "9:18:07", "remaining_time": "2:46:20"} +{"current_steps": 6688, "total_steps": 8680, "loss": 0.7483633756637573, "lr": 2.7468245271738865e-07, "epoch": 1.5410138248847927, "percentage": 77.05, "elapsed_time": "9:18:13", "remaining_time": "2:46:15"} +{"current_steps": 6689, "total_steps": 8680, "loss": 0.8967286348342896, "lr": 2.7442023058992746e-07, "epoch": 1.5412442396313364, "percentage": 77.06, "elapsed_time": "9:18:17", "remaining_time": "2:46:10"} +{"current_steps": 6690, "total_steps": 8680, "loss": 0.8035085201263428, "lr": 2.7415811377940933e-07, "epoch": 1.5414746543778803, "percentage": 77.07, "elapsed_time": "9:18:21", "remaining_time": "2:46:05"} +{"current_steps": 6691, "total_steps": 8680, "loss": 0.8504149913787842, "lr": 2.738961023238798e-07, "epoch": 1.5417050691244238, "percentage": 77.09, "elapsed_time": "9:18:25", "remaining_time": "2:46:00"} +{"current_steps": 6692, "total_steps": 8680, "loss": 0.7612431049346924, "lr": 2.736341962613701e-07, "epoch": 1.5419354838709678, "percentage": 77.1, "elapsed_time": "9:18:30", "remaining_time": "2:45:54"} +{"current_steps": 6693, "total_steps": 8680, "loss": 0.6974390745162964, "lr": 2.733723956298951e-07, "epoch": 1.5421658986175115, "percentage": 77.11, "elapsed_time": "9:18:34", "remaining_time": "2:45:49"} +{"current_steps": 6694, "total_steps": 8680, "loss": 0.7946817874908447, "lr": 2.7311070046745476e-07, "epoch": 1.5423963133640552, "percentage": 77.12, "elapsed_time": "9:18:38", "remaining_time": "2:45:44"} +{"current_steps": 6695, "total_steps": 8680, "loss": 0.7801793813705444, "lr": 2.728491108120342e-07, "epoch": 1.5426267281105992, "percentage": 77.13, "elapsed_time": "9:18:42", "remaining_time": "2:45:39"} +{"current_steps": 6696, "total_steps": 8680, "loss": 0.720335066318512, "lr": 2.725876267016023e-07, "epoch": 1.5428571428571427, "percentage": 77.14, "elapsed_time": "9:18:47", "remaining_time": "2:45:34"} +{"current_steps": 6697, "total_steps": 8680, "loss": 0.6820393800735474, "lr": 2.7232624817411376e-07, "epoch": 1.5430875576036867, "percentage": 77.15, "elapsed_time": "9:18:53", "remaining_time": "2:45:29"} +{"current_steps": 6698, "total_steps": 8680, "loss": 0.8217613697052002, "lr": 2.7206497526750694e-07, "epoch": 1.5433179723502304, "percentage": 77.17, "elapsed_time": "9:18:57", "remaining_time": "2:45:24"} +{"current_steps": 6699, "total_steps": 8680, "loss": 0.7600520849227905, "lr": 2.7180380801970525e-07, "epoch": 1.543548387096774, "percentage": 77.18, "elapsed_time": "9:19:03", "remaining_time": "2:45:19"} +{"current_steps": 6700, "total_steps": 8680, "loss": 0.9402344226837158, "lr": 2.7154274646861687e-07, "epoch": 1.543778801843318, "percentage": 77.19, "elapsed_time": "9:19:07", "remaining_time": "2:45:14"} +{"current_steps": 6701, "total_steps": 8680, "loss": 0.7470760345458984, "lr": 2.7128179065213417e-07, "epoch": 1.5440092165898618, "percentage": 77.2, "elapsed_time": "9:19:16", "remaining_time": "2:45:10"} +{"current_steps": 6702, "total_steps": 8680, "loss": 0.6915948390960693, "lr": 2.710209406081353e-07, "epoch": 1.5442396313364055, "percentage": 77.21, "elapsed_time": "9:19:21", "remaining_time": "2:45:05"} +{"current_steps": 6703, "total_steps": 8680, "loss": 0.7554904222488403, "lr": 2.707601963744817e-07, "epoch": 1.5444700460829495, "percentage": 77.22, "elapsed_time": "9:19:26", "remaining_time": "2:45:00"} +{"current_steps": 6704, "total_steps": 8680, "loss": 0.8197575807571411, "lr": 2.7049955798902026e-07, "epoch": 1.544700460829493, "percentage": 77.24, "elapsed_time": "9:19:30", "remaining_time": "2:44:54"} +{"current_steps": 6705, "total_steps": 8680, "loss": 0.7106794118881226, "lr": 2.702390254895819e-07, "epoch": 1.544930875576037, "percentage": 77.25, "elapsed_time": "9:19:34", "remaining_time": "2:44:49"} +{"current_steps": 6706, "total_steps": 8680, "loss": 0.6320512294769287, "lr": 2.699785989139832e-07, "epoch": 1.5451612903225806, "percentage": 77.26, "elapsed_time": "9:19:40", "remaining_time": "2:44:44"} +{"current_steps": 6707, "total_steps": 8680, "loss": 0.8327566385269165, "lr": 2.697182783000246e-07, "epoch": 1.5453917050691244, "percentage": 77.27, "elapsed_time": "9:19:44", "remaining_time": "2:44:39"} +{"current_steps": 6708, "total_steps": 8680, "loss": 0.8732178211212158, "lr": 2.6945806368549063e-07, "epoch": 1.5456221198156683, "percentage": 77.28, "elapsed_time": "9:19:49", "remaining_time": "2:44:34"} +{"current_steps": 6709, "total_steps": 8680, "loss": 0.8709380626678467, "lr": 2.69197955108152e-07, "epoch": 1.5458525345622118, "percentage": 77.29, "elapsed_time": "9:19:53", "remaining_time": "2:44:29"} +{"current_steps": 6710, "total_steps": 8680, "loss": 0.7821739912033081, "lr": 2.689379526057628e-07, "epoch": 1.5460829493087558, "percentage": 77.3, "elapsed_time": "9:19:59", "remaining_time": "2:44:24"} +{"current_steps": 6711, "total_steps": 8680, "loss": 0.8658162355422974, "lr": 2.686780562160615e-07, "epoch": 1.5463133640552995, "percentage": 77.32, "elapsed_time": "9:20:04", "remaining_time": "2:44:19"} +{"current_steps": 6712, "total_steps": 8680, "loss": 0.6354731321334839, "lr": 2.6841826597677274e-07, "epoch": 1.5465437788018432, "percentage": 77.33, "elapsed_time": "9:20:08", "remaining_time": "2:44:14"} +{"current_steps": 6713, "total_steps": 8680, "loss": 0.8000082969665527, "lr": 2.68158581925604e-07, "epoch": 1.5467741935483872, "percentage": 77.34, "elapsed_time": "9:20:12", "remaining_time": "2:44:09"} +{"current_steps": 6714, "total_steps": 8680, "loss": 0.7998030185699463, "lr": 2.6789900410024804e-07, "epoch": 1.547004608294931, "percentage": 77.35, "elapsed_time": "9:20:18", "remaining_time": "2:44:04"} +{"current_steps": 6715, "total_steps": 8680, "loss": 0.861609935760498, "lr": 2.676395325383827e-07, "epoch": 1.5472350230414746, "percentage": 77.36, "elapsed_time": "9:20:23", "remaining_time": "2:43:59"} +{"current_steps": 6716, "total_steps": 8680, "loss": 0.8119577765464783, "lr": 2.6738016727766976e-07, "epoch": 1.5474654377880186, "percentage": 77.37, "elapsed_time": "9:20:27", "remaining_time": "2:43:53"} +{"current_steps": 6717, "total_steps": 8680, "loss": 0.7704594135284424, "lr": 2.671209083557553e-07, "epoch": 1.547695852534562, "percentage": 77.38, "elapsed_time": "9:20:32", "remaining_time": "2:43:48"} +{"current_steps": 6718, "total_steps": 8680, "loss": 0.7577236890792847, "lr": 2.6686175581027114e-07, "epoch": 1.547926267281106, "percentage": 77.4, "elapsed_time": "9:20:38", "remaining_time": "2:43:44"} +{"current_steps": 6719, "total_steps": 8680, "loss": 0.8362265825271606, "lr": 2.666027096788326e-07, "epoch": 1.5481566820276498, "percentage": 77.41, "elapsed_time": "9:20:44", "remaining_time": "2:43:39"} +{"current_steps": 6720, "total_steps": 8680, "loss": 0.7604315280914307, "lr": 2.6634376999903984e-07, "epoch": 1.5483870967741935, "percentage": 77.42, "elapsed_time": "9:20:49", "remaining_time": "2:43:34"} +{"current_steps": 6721, "total_steps": 8680, "loss": 0.7181323766708374, "lr": 2.6608493680847757e-07, "epoch": 1.5486175115207375, "percentage": 77.43, "elapsed_time": "9:20:55", "remaining_time": "2:43:29"} +{"current_steps": 6722, "total_steps": 8680, "loss": 0.8613896369934082, "lr": 2.6582621014471495e-07, "epoch": 1.548847926267281, "percentage": 77.44, "elapsed_time": "9:21:00", "remaining_time": "2:43:24"} +{"current_steps": 6723, "total_steps": 8680, "loss": 0.6254151463508606, "lr": 2.6556759004530616e-07, "epoch": 1.549078341013825, "percentage": 77.45, "elapsed_time": "9:21:05", "remaining_time": "2:43:19"} +{"current_steps": 6724, "total_steps": 8680, "loss": 0.7960973381996155, "lr": 2.6530907654778957e-07, "epoch": 1.5493087557603686, "percentage": 77.47, "elapsed_time": "9:21:10", "remaining_time": "2:43:14"} +{"current_steps": 6725, "total_steps": 8680, "loss": 0.7899094820022583, "lr": 2.6505066968968747e-07, "epoch": 1.5495391705069124, "percentage": 77.48, "elapsed_time": "9:21:15", "remaining_time": "2:43:09"} +{"current_steps": 6726, "total_steps": 8680, "loss": 0.6578950881958008, "lr": 2.647923695085081e-07, "epoch": 1.5497695852534563, "percentage": 77.49, "elapsed_time": "9:21:20", "remaining_time": "2:43:04"} +{"current_steps": 6727, "total_steps": 8680, "loss": 0.737798810005188, "lr": 2.64534176041743e-07, "epoch": 1.55, "percentage": 77.5, "elapsed_time": "9:21:25", "remaining_time": "2:42:59"} +{"current_steps": 6728, "total_steps": 8680, "loss": 0.7809627056121826, "lr": 2.642760893268684e-07, "epoch": 1.5502304147465438, "percentage": 77.51, "elapsed_time": "9:21:30", "remaining_time": "2:42:54"} +{"current_steps": 6729, "total_steps": 8680, "loss": 0.6693655252456665, "lr": 2.640181094013456e-07, "epoch": 1.5504608294930877, "percentage": 77.52, "elapsed_time": "9:21:37", "remaining_time": "2:42:50"} +{"current_steps": 6730, "total_steps": 8680, "loss": 0.7264609932899475, "lr": 2.6376023630262003e-07, "epoch": 1.5506912442396312, "percentage": 77.53, "elapsed_time": "9:21:43", "remaining_time": "2:42:45"} +{"current_steps": 6731, "total_steps": 8680, "loss": 0.7585712671279907, "lr": 2.635024700681211e-07, "epoch": 1.5509216589861752, "percentage": 77.55, "elapsed_time": "9:21:50", "remaining_time": "2:42:41"} +{"current_steps": 6732, "total_steps": 8680, "loss": 0.7335324287414551, "lr": 2.6324481073526404e-07, "epoch": 1.551152073732719, "percentage": 77.56, "elapsed_time": "9:21:54", "remaining_time": "2:42:35"} +{"current_steps": 6733, "total_steps": 8680, "loss": 0.835372805595398, "lr": 2.629872583414473e-07, "epoch": 1.5513824884792626, "percentage": 77.57, "elapsed_time": "9:22:00", "remaining_time": "2:42:31"} +{"current_steps": 6734, "total_steps": 8680, "loss": 0.8069926500320435, "lr": 2.6272981292405405e-07, "epoch": 1.5516129032258066, "percentage": 77.58, "elapsed_time": "9:22:04", "remaining_time": "2:42:25"} +{"current_steps": 6735, "total_steps": 8680, "loss": 0.7548434138298035, "lr": 2.6247247452045285e-07, "epoch": 1.55184331797235, "percentage": 77.59, "elapsed_time": "9:22:08", "remaining_time": "2:42:20"} +{"current_steps": 6736, "total_steps": 8680, "loss": 0.6907505989074707, "lr": 2.6221524316799546e-07, "epoch": 1.552073732718894, "percentage": 77.6, "elapsed_time": "9:22:14", "remaining_time": "2:42:15"} +{"current_steps": 6737, "total_steps": 8680, "loss": 0.8544988632202148, "lr": 2.619581189040185e-07, "epoch": 1.5523041474654378, "percentage": 77.62, "elapsed_time": "9:22:20", "remaining_time": "2:42:10"} +{"current_steps": 6738, "total_steps": 8680, "loss": 0.7176710367202759, "lr": 2.6170110176584404e-07, "epoch": 1.5525345622119815, "percentage": 77.63, "elapsed_time": "9:22:25", "remaining_time": "2:42:05"} +{"current_steps": 6739, "total_steps": 8680, "loss": 0.7160323858261108, "lr": 2.6144419179077715e-07, "epoch": 1.5527649769585254, "percentage": 77.64, "elapsed_time": "9:22:30", "remaining_time": "2:42:00"} +{"current_steps": 6740, "total_steps": 8680, "loss": 0.7749248743057251, "lr": 2.6118738901610806e-07, "epoch": 1.5529953917050692, "percentage": 77.65, "elapsed_time": "9:22:34", "remaining_time": "2:41:55"} +{"current_steps": 6741, "total_steps": 8680, "loss": 0.7701436281204224, "lr": 2.6093069347911145e-07, "epoch": 1.553225806451613, "percentage": 77.66, "elapsed_time": "9:22:39", "remaining_time": "2:41:50"} +{"current_steps": 6742, "total_steps": 8680, "loss": 0.6725181341171265, "lr": 2.606741052170459e-07, "epoch": 1.5534562211981566, "percentage": 77.67, "elapsed_time": "9:22:45", "remaining_time": "2:41:45"} +{"current_steps": 6743, "total_steps": 8680, "loss": 0.7730624675750732, "lr": 2.6041762426715563e-07, "epoch": 1.5536866359447004, "percentage": 77.68, "elapsed_time": "9:22:51", "remaining_time": "2:41:41"} +{"current_steps": 6744, "total_steps": 8680, "loss": 0.7083867788314819, "lr": 2.601612506666682e-07, "epoch": 1.5539170506912443, "percentage": 77.7, "elapsed_time": "9:22:56", "remaining_time": "2:41:36"} +{"current_steps": 6745, "total_steps": 8680, "loss": 0.7680408954620361, "lr": 2.599049844527953e-07, "epoch": 1.554147465437788, "percentage": 77.71, "elapsed_time": "9:23:02", "remaining_time": "2:41:31"} +{"current_steps": 6746, "total_steps": 8680, "loss": 0.7145194411277771, "lr": 2.596488256627346e-07, "epoch": 1.5543778801843318, "percentage": 77.72, "elapsed_time": "9:23:08", "remaining_time": "2:41:26"} +{"current_steps": 6747, "total_steps": 8680, "loss": 0.8626812696456909, "lr": 2.593927743336667e-07, "epoch": 1.5546082949308757, "percentage": 77.73, "elapsed_time": "9:23:12", "remaining_time": "2:41:21"} +{"current_steps": 6748, "total_steps": 8680, "loss": 0.775201678276062, "lr": 2.591368305027569e-07, "epoch": 1.5548387096774192, "percentage": 77.74, "elapsed_time": "9:23:18", "remaining_time": "2:41:16"} +{"current_steps": 6749, "total_steps": 8680, "loss": 0.9363858699798584, "lr": 2.588809942071557e-07, "epoch": 1.5550691244239632, "percentage": 77.75, "elapsed_time": "9:23:22", "remaining_time": "2:41:11"} +{"current_steps": 6750, "total_steps": 8680, "loss": 0.8079385757446289, "lr": 2.5862526548399697e-07, "epoch": 1.555299539170507, "percentage": 77.76, "elapsed_time": "9:23:27", "remaining_time": "2:41:06"} +{"current_steps": 6751, "total_steps": 8680, "loss": 0.8635082840919495, "lr": 2.5836964437039934e-07, "epoch": 1.5555299539170506, "percentage": 77.78, "elapsed_time": "9:23:33", "remaining_time": "2:41:01"} +{"current_steps": 6752, "total_steps": 8680, "loss": 0.7840827703475952, "lr": 2.581141309034662e-07, "epoch": 1.5557603686635946, "percentage": 77.79, "elapsed_time": "9:23:39", "remaining_time": "2:40:56"} +{"current_steps": 6753, "total_steps": 8680, "loss": 0.7833336591720581, "lr": 2.5785872512028497e-07, "epoch": 1.5559907834101383, "percentage": 77.8, "elapsed_time": "9:23:45", "remaining_time": "2:40:52"} +{"current_steps": 6754, "total_steps": 8680, "loss": 0.7340226173400879, "lr": 2.576034270579269e-07, "epoch": 1.556221198156682, "percentage": 77.81, "elapsed_time": "9:23:51", "remaining_time": "2:40:47"} +{"current_steps": 6755, "total_steps": 8680, "loss": 0.6423541307449341, "lr": 2.5734823675344895e-07, "epoch": 1.5564516129032258, "percentage": 77.82, "elapsed_time": "9:23:56", "remaining_time": "2:40:42"} +{"current_steps": 6756, "total_steps": 8680, "loss": 0.7772454619407654, "lr": 2.570931542438913e-07, "epoch": 1.5566820276497695, "percentage": 77.83, "elapsed_time": "9:24:01", "remaining_time": "2:40:37"} +{"current_steps": 6757, "total_steps": 8680, "loss": 0.8113390803337097, "lr": 2.568381795662785e-07, "epoch": 1.5569124423963134, "percentage": 77.85, "elapsed_time": "9:24:05", "remaining_time": "2:40:32"} +{"current_steps": 6758, "total_steps": 8680, "loss": 0.6688467264175415, "lr": 2.5658331275762045e-07, "epoch": 1.5571428571428572, "percentage": 77.86, "elapsed_time": "9:24:12", "remaining_time": "2:40:27"} +{"current_steps": 6759, "total_steps": 8680, "loss": 0.8140766620635986, "lr": 2.5632855385491037e-07, "epoch": 1.557373271889401, "percentage": 77.87, "elapsed_time": "9:24:18", "remaining_time": "2:40:23"} +{"current_steps": 6760, "total_steps": 8680, "loss": 0.7661154270172119, "lr": 2.560739028951262e-07, "epoch": 1.5576036866359448, "percentage": 77.88, "elapsed_time": "9:24:24", "remaining_time": "2:40:18"} +{"current_steps": 6761, "total_steps": 8680, "loss": 0.6781749725341797, "lr": 2.558193599152302e-07, "epoch": 1.5578341013824883, "percentage": 77.89, "elapsed_time": "9:24:28", "remaining_time": "2:40:13"} +{"current_steps": 6762, "total_steps": 8680, "loss": 0.8885331749916077, "lr": 2.5556492495216865e-07, "epoch": 1.5580645161290323, "percentage": 77.9, "elapsed_time": "9:24:33", "remaining_time": "2:40:07"} +{"current_steps": 6763, "total_steps": 8680, "loss": 0.799277663230896, "lr": 2.55310598042873e-07, "epoch": 1.558294930875576, "percentage": 77.91, "elapsed_time": "9:24:37", "remaining_time": "2:40:02"} +{"current_steps": 6764, "total_steps": 8680, "loss": 0.8288404941558838, "lr": 2.550563792242583e-07, "epoch": 1.5585253456221198, "percentage": 77.93, "elapsed_time": "9:24:42", "remaining_time": "2:39:57"} +{"current_steps": 6765, "total_steps": 8680, "loss": 0.9452340602874756, "lr": 2.5480226853322397e-07, "epoch": 1.5587557603686637, "percentage": 77.94, "elapsed_time": "9:24:48", "remaining_time": "2:39:52"} +{"current_steps": 6766, "total_steps": 8680, "loss": 0.6716231107711792, "lr": 2.5454826600665347e-07, "epoch": 1.5589861751152074, "percentage": 77.95, "elapsed_time": "9:24:53", "remaining_time": "2:39:48"} +{"current_steps": 6767, "total_steps": 8680, "loss": 0.90239417552948, "lr": 2.542943716814157e-07, "epoch": 1.5592165898617512, "percentage": 77.96, "elapsed_time": "9:25:00", "remaining_time": "2:39:43"} +{"current_steps": 6768, "total_steps": 8680, "loss": 0.7895521521568298, "lr": 2.5404058559436225e-07, "epoch": 1.5594470046082949, "percentage": 77.97, "elapsed_time": "9:25:06", "remaining_time": "2:39:38"} +{"current_steps": 6769, "total_steps": 8680, "loss": 0.8097352385520935, "lr": 2.537869077823307e-07, "epoch": 1.5596774193548386, "percentage": 77.98, "elapsed_time": "9:25:13", "remaining_time": "2:39:34"} +{"current_steps": 6770, "total_steps": 8680, "loss": 0.7599455118179321, "lr": 2.535333382821415e-07, "epoch": 1.5599078341013826, "percentage": 78.0, "elapsed_time": "9:25:18", "remaining_time": "2:39:29"} +{"current_steps": 6771, "total_steps": 8680, "loss": 0.8735921382904053, "lr": 2.5327987713059986e-07, "epoch": 1.5601382488479263, "percentage": 78.01, "elapsed_time": "9:25:23", "remaining_time": "2:39:24"} +{"current_steps": 6772, "total_steps": 8680, "loss": 0.7263825535774231, "lr": 2.530265243644958e-07, "epoch": 1.56036866359447, "percentage": 78.02, "elapsed_time": "9:25:28", "remaining_time": "2:39:19"} +{"current_steps": 6773, "total_steps": 8680, "loss": 0.8642966747283936, "lr": 2.5277328002060296e-07, "epoch": 1.560599078341014, "percentage": 78.03, "elapsed_time": "9:25:33", "remaining_time": "2:39:14"} +{"current_steps": 6774, "total_steps": 8680, "loss": 0.6928948163986206, "lr": 2.525201441356789e-07, "epoch": 1.5608294930875575, "percentage": 78.04, "elapsed_time": "9:25:38", "remaining_time": "2:39:09"} +{"current_steps": 6775, "total_steps": 8680, "loss": 0.7841427326202393, "lr": 2.522671167464667e-07, "epoch": 1.5610599078341014, "percentage": 78.05, "elapsed_time": "9:25:42", "remaining_time": "2:39:04"} +{"current_steps": 6776, "total_steps": 8680, "loss": 0.6539766192436218, "lr": 2.5201419788969267e-07, "epoch": 1.5612903225806452, "percentage": 78.06, "elapsed_time": "9:25:49", "remaining_time": "2:38:59"} +{"current_steps": 6777, "total_steps": 8680, "loss": 0.7817956805229187, "lr": 2.5176138760206734e-07, "epoch": 1.5615207373271889, "percentage": 78.08, "elapsed_time": "9:25:54", "remaining_time": "2:38:54"} +{"current_steps": 6778, "total_steps": 8680, "loss": 0.7847198843955994, "lr": 2.5150868592028626e-07, "epoch": 1.5617511520737328, "percentage": 78.09, "elapsed_time": "9:25:59", "remaining_time": "2:38:49"} +{"current_steps": 6779, "total_steps": 8680, "loss": 0.8248952627182007, "lr": 2.5125609288102856e-07, "epoch": 1.5619815668202763, "percentage": 78.1, "elapsed_time": "9:26:05", "remaining_time": "2:38:44"} +{"current_steps": 6780, "total_steps": 8680, "loss": 0.8099820613861084, "lr": 2.510036085209578e-07, "epoch": 1.5622119815668203, "percentage": 78.11, "elapsed_time": "9:26:09", "remaining_time": "2:38:39"} +{"current_steps": 6781, "total_steps": 8680, "loss": 0.7764754295349121, "lr": 2.5075123287672173e-07, "epoch": 1.562442396313364, "percentage": 78.12, "elapsed_time": "9:26:15", "remaining_time": "2:38:34"} +{"current_steps": 6782, "total_steps": 8680, "loss": 0.8055214285850525, "lr": 2.5049896598495234e-07, "epoch": 1.5626728110599077, "percentage": 78.13, "elapsed_time": "9:26:21", "remaining_time": "2:38:29"} +{"current_steps": 6783, "total_steps": 8680, "loss": 0.7536123991012573, "lr": 2.502468078822656e-07, "epoch": 1.5629032258064517, "percentage": 78.15, "elapsed_time": "9:26:27", "remaining_time": "2:38:25"} +{"current_steps": 6784, "total_steps": 8680, "loss": 0.8212461471557617, "lr": 2.499947586052623e-07, "epoch": 1.5631336405529954, "percentage": 78.16, "elapsed_time": "9:26:31", "remaining_time": "2:38:19"} +{"current_steps": 6785, "total_steps": 8680, "loss": 0.7297977209091187, "lr": 2.49742818190527e-07, "epoch": 1.5633640552995391, "percentage": 78.17, "elapsed_time": "9:26:36", "remaining_time": "2:38:15"} +{"current_steps": 6786, "total_steps": 8680, "loss": 0.752082109451294, "lr": 2.494909866746282e-07, "epoch": 1.563594470046083, "percentage": 78.18, "elapsed_time": "9:26:42", "remaining_time": "2:38:10"} +{"current_steps": 6787, "total_steps": 8680, "loss": 0.9181928634643555, "lr": 2.4923926409411934e-07, "epoch": 1.5638248847926266, "percentage": 78.19, "elapsed_time": "9:26:46", "remaining_time": "2:38:05"} +{"current_steps": 6788, "total_steps": 8680, "loss": 0.8607058525085449, "lr": 2.489876504855374e-07, "epoch": 1.5640552995391706, "percentage": 78.2, "elapsed_time": "9:26:51", "remaining_time": "2:37:59"} +{"current_steps": 6789, "total_steps": 8680, "loss": 0.9659625887870789, "lr": 2.4873614588540347e-07, "epoch": 1.5642857142857143, "percentage": 78.21, "elapsed_time": "9:26:55", "remaining_time": "2:37:54"} +{"current_steps": 6790, "total_steps": 8680, "loss": 0.8357822299003601, "lr": 2.4848475033022377e-07, "epoch": 1.564516129032258, "percentage": 78.23, "elapsed_time": "9:27:02", "remaining_time": "2:37:50"} +{"current_steps": 6791, "total_steps": 8680, "loss": 0.7871281504631042, "lr": 2.482334638564877e-07, "epoch": 1.564746543778802, "percentage": 78.24, "elapsed_time": "9:27:09", "remaining_time": "2:37:45"} +{"current_steps": 6792, "total_steps": 8680, "loss": 0.7221591472625732, "lr": 2.4798228650066874e-07, "epoch": 1.5649769585253455, "percentage": 78.25, "elapsed_time": "9:27:17", "remaining_time": "2:37:41"} +{"current_steps": 6793, "total_steps": 8680, "loss": 0.7399123907089233, "lr": 2.4773121829922586e-07, "epoch": 1.5652073732718894, "percentage": 78.26, "elapsed_time": "9:27:22", "remaining_time": "2:37:36"} +{"current_steps": 6794, "total_steps": 8680, "loss": 0.8159279227256775, "lr": 2.474802592886003e-07, "epoch": 1.5654377880184331, "percentage": 78.27, "elapsed_time": "9:27:26", "remaining_time": "2:37:31"} +{"current_steps": 6795, "total_steps": 8680, "loss": 0.8222753405570984, "lr": 2.472294095052192e-07, "epoch": 1.5656682027649769, "percentage": 78.28, "elapsed_time": "9:27:32", "remaining_time": "2:37:26"} +{"current_steps": 6796, "total_steps": 8680, "loss": 0.6586673259735107, "lr": 2.469786689854928e-07, "epoch": 1.5658986175115208, "percentage": 78.29, "elapsed_time": "9:27:38", "remaining_time": "2:37:21"} +{"current_steps": 6797, "total_steps": 8680, "loss": 0.8361790180206299, "lr": 2.467280377658154e-07, "epoch": 1.5661290322580645, "percentage": 78.31, "elapsed_time": "9:27:44", "remaining_time": "2:37:17"} +{"current_steps": 6798, "total_steps": 8680, "loss": 0.7669099569320679, "lr": 2.464775158825665e-07, "epoch": 1.5663594470046083, "percentage": 78.32, "elapsed_time": "9:27:49", "remaining_time": "2:37:11"} +{"current_steps": 6799, "total_steps": 8680, "loss": 0.7876452207565308, "lr": 2.462271033721086e-07, "epoch": 1.5665898617511522, "percentage": 78.33, "elapsed_time": "9:27:54", "remaining_time": "2:37:06"} +{"current_steps": 6800, "total_steps": 8680, "loss": 0.7932916879653931, "lr": 2.459768002707887e-07, "epoch": 1.5668202764976957, "percentage": 78.34, "elapsed_time": "9:28:00", "remaining_time": "2:37:02"} +{"current_steps": 6801, "total_steps": 8680, "loss": 0.734020471572876, "lr": 2.457266066149382e-07, "epoch": 1.5670506912442397, "percentage": 78.35, "elapsed_time": "9:28:07", "remaining_time": "2:36:57"} +{"current_steps": 6802, "total_steps": 8680, "loss": 0.6975284814834595, "lr": 2.4547652244087216e-07, "epoch": 1.5672811059907834, "percentage": 78.36, "elapsed_time": "9:28:13", "remaining_time": "2:36:52"} +{"current_steps": 6803, "total_steps": 8680, "loss": 0.7214465737342834, "lr": 2.452265477848896e-07, "epoch": 1.5675115207373271, "percentage": 78.38, "elapsed_time": "9:28:18", "remaining_time": "2:36:48"} +{"current_steps": 6804, "total_steps": 8680, "loss": 0.8645110130310059, "lr": 2.4497668268327485e-07, "epoch": 1.567741935483871, "percentage": 78.39, "elapsed_time": "9:28:25", "remaining_time": "2:36:43"} +{"current_steps": 6805, "total_steps": 8680, "loss": 0.7389887571334839, "lr": 2.4472692717229504e-07, "epoch": 1.5679723502304146, "percentage": 78.4, "elapsed_time": "9:28:30", "remaining_time": "2:36:38"} +{"current_steps": 6806, "total_steps": 8680, "loss": 0.8462876081466675, "lr": 2.4447728128820165e-07, "epoch": 1.5682027649769585, "percentage": 78.41, "elapsed_time": "9:28:36", "remaining_time": "2:36:33"} +{"current_steps": 6807, "total_steps": 8680, "loss": 0.824936032295227, "lr": 2.44227745067231e-07, "epoch": 1.5684331797235023, "percentage": 78.42, "elapsed_time": "9:28:40", "remaining_time": "2:36:28"} +{"current_steps": 6808, "total_steps": 8680, "loss": 0.8516823053359985, "lr": 2.439783185456027e-07, "epoch": 1.568663594470046, "percentage": 78.43, "elapsed_time": "9:28:47", "remaining_time": "2:36:24"} +{"current_steps": 6809, "total_steps": 8680, "loss": 0.6154674291610718, "lr": 2.4372900175952015e-07, "epoch": 1.56889400921659, "percentage": 78.44, "elapsed_time": "9:28:55", "remaining_time": "2:36:19"} +{"current_steps": 6810, "total_steps": 8680, "loss": 0.7769260406494141, "lr": 2.434797947451722e-07, "epoch": 1.5691244239631337, "percentage": 78.46, "elapsed_time": "9:29:01", "remaining_time": "2:36:15"} +{"current_steps": 6811, "total_steps": 8680, "loss": 0.9525332450866699, "lr": 2.432306975387306e-07, "epoch": 1.5693548387096774, "percentage": 78.47, "elapsed_time": "9:29:07", "remaining_time": "2:36:10"} +{"current_steps": 6812, "total_steps": 8680, "loss": 0.7537581920623779, "lr": 2.429817101763511e-07, "epoch": 1.5695852534562214, "percentage": 78.48, "elapsed_time": "9:29:14", "remaining_time": "2:36:06"} +{"current_steps": 6813, "total_steps": 8680, "loss": 0.814711332321167, "lr": 2.427328326941744e-07, "epoch": 1.5698156682027649, "percentage": 78.49, "elapsed_time": "9:29:20", "remaining_time": "2:36:01"} +{"current_steps": 6814, "total_steps": 8680, "loss": 0.708736777305603, "lr": 2.4248406512832466e-07, "epoch": 1.5700460829493088, "percentage": 78.5, "elapsed_time": "9:29:26", "remaining_time": "2:35:56"} +{"current_steps": 6815, "total_steps": 8680, "loss": 0.6757712960243225, "lr": 2.422354075149098e-07, "epoch": 1.5702764976958525, "percentage": 78.51, "elapsed_time": "9:29:34", "remaining_time": "2:35:52"} +{"current_steps": 6816, "total_steps": 8680, "loss": 0.736266553401947, "lr": 2.4198685989002257e-07, "epoch": 1.5705069124423963, "percentage": 78.53, "elapsed_time": "9:29:39", "remaining_time": "2:35:47"} +{"current_steps": 6817, "total_steps": 8680, "loss": 0.7423173189163208, "lr": 2.417384222897392e-07, "epoch": 1.5707373271889402, "percentage": 78.54, "elapsed_time": "9:29:45", "remaining_time": "2:35:42"} +{"current_steps": 6818, "total_steps": 8680, "loss": 0.7260550260543823, "lr": 2.414900947501197e-07, "epoch": 1.5709677419354837, "percentage": 78.55, "elapsed_time": "9:29:50", "remaining_time": "2:35:37"} +{"current_steps": 6819, "total_steps": 8680, "loss": 0.7125939130783081, "lr": 2.4124187730720915e-07, "epoch": 1.5711981566820277, "percentage": 78.56, "elapsed_time": "9:29:55", "remaining_time": "2:35:32"} +{"current_steps": 6820, "total_steps": 8680, "loss": 0.7429558634757996, "lr": 2.409937699970356e-07, "epoch": 1.5714285714285714, "percentage": 78.57, "elapsed_time": "9:29:59", "remaining_time": "2:35:27"} +{"current_steps": 6821, "total_steps": 8680, "loss": 0.7166736721992493, "lr": 2.407457728556115e-07, "epoch": 1.5716589861751151, "percentage": 78.58, "elapsed_time": "9:30:03", "remaining_time": "2:35:21"} +{"current_steps": 6822, "total_steps": 8680, "loss": 0.7438491582870483, "lr": 2.4049788591893336e-07, "epoch": 1.571889400921659, "percentage": 78.59, "elapsed_time": "9:30:09", "remaining_time": "2:35:16"} +{"current_steps": 6823, "total_steps": 8680, "loss": 0.8031798601150513, "lr": 2.402501092229814e-07, "epoch": 1.5721198156682028, "percentage": 78.61, "elapsed_time": "9:30:13", "remaining_time": "2:35:11"} +{"current_steps": 6824, "total_steps": 8680, "loss": 0.7067087888717651, "lr": 2.400024428037206e-07, "epoch": 1.5723502304147465, "percentage": 78.62, "elapsed_time": "9:30:18", "remaining_time": "2:35:06"} +{"current_steps": 6825, "total_steps": 8680, "loss": 0.7147783041000366, "lr": 2.3975488669709906e-07, "epoch": 1.5725806451612905, "percentage": 78.63, "elapsed_time": "9:30:23", "remaining_time": "2:35:01"} +{"current_steps": 6826, "total_steps": 8680, "loss": 0.8534795641899109, "lr": 2.395074409390491e-07, "epoch": 1.572811059907834, "percentage": 78.64, "elapsed_time": "9:30:28", "remaining_time": "2:34:56"} +{"current_steps": 6827, "total_steps": 8680, "loss": 0.7630984783172607, "lr": 2.392601055654875e-07, "epoch": 1.573041474654378, "percentage": 78.65, "elapsed_time": "9:30:34", "remaining_time": "2:34:51"} +{"current_steps": 6828, "total_steps": 8680, "loss": 0.9395428895950317, "lr": 2.390128806123145e-07, "epoch": 1.5732718894009217, "percentage": 78.66, "elapsed_time": "9:30:38", "remaining_time": "2:34:46"} +{"current_steps": 6829, "total_steps": 8680, "loss": 0.7086023092269897, "lr": 2.3876576611541423e-07, "epoch": 1.5735023041474654, "percentage": 78.68, "elapsed_time": "9:30:44", "remaining_time": "2:34:41"} +{"current_steps": 6830, "total_steps": 8680, "loss": 0.6937201619148254, "lr": 2.385187621106555e-07, "epoch": 1.5737327188940093, "percentage": 78.69, "elapsed_time": "9:30:49", "remaining_time": "2:34:37"} +{"current_steps": 6831, "total_steps": 8680, "loss": 0.7339247465133667, "lr": 2.3827186863389037e-07, "epoch": 1.5739631336405528, "percentage": 78.7, "elapsed_time": "9:30:55", "remaining_time": "2:34:32"} +{"current_steps": 6832, "total_steps": 8680, "loss": 0.8453131318092346, "lr": 2.3802508572095493e-07, "epoch": 1.5741935483870968, "percentage": 78.71, "elapsed_time": "9:31:02", "remaining_time": "2:34:27"} +{"current_steps": 6833, "total_steps": 8680, "loss": 0.7303619384765625, "lr": 2.377784134076698e-07, "epoch": 1.5744239631336405, "percentage": 78.72, "elapsed_time": "9:31:06", "remaining_time": "2:34:22"} +{"current_steps": 6834, "total_steps": 8680, "loss": 0.9635858535766602, "lr": 2.3753185172983893e-07, "epoch": 1.5746543778801843, "percentage": 78.73, "elapsed_time": "9:31:12", "remaining_time": "2:34:17"} +{"current_steps": 6835, "total_steps": 8680, "loss": 0.7174761295318604, "lr": 2.3728540072324998e-07, "epoch": 1.5748847926267282, "percentage": 78.74, "elapsed_time": "9:31:17", "remaining_time": "2:34:12"} +{"current_steps": 6836, "total_steps": 8680, "loss": 0.7375633716583252, "lr": 2.3703906042367584e-07, "epoch": 1.575115207373272, "percentage": 78.76, "elapsed_time": "9:31:23", "remaining_time": "2:34:08"} +{"current_steps": 6837, "total_steps": 8680, "loss": 0.8202652931213379, "lr": 2.3679283086687206e-07, "epoch": 1.5753456221198157, "percentage": 78.77, "elapsed_time": "9:31:29", "remaining_time": "2:34:03"} +{"current_steps": 6838, "total_steps": 8680, "loss": 0.8448499441146851, "lr": 2.3654671208857823e-07, "epoch": 1.5755760368663596, "percentage": 78.78, "elapsed_time": "9:31:33", "remaining_time": "2:33:57"} +{"current_steps": 6839, "total_steps": 8680, "loss": 0.7840893268585205, "lr": 2.3630070412451864e-07, "epoch": 1.5758064516129031, "percentage": 78.79, "elapsed_time": "9:31:39", "remaining_time": "2:33:53"} +{"current_steps": 6840, "total_steps": 8680, "loss": 0.8036940693855286, "lr": 2.3605480701040092e-07, "epoch": 1.576036866359447, "percentage": 78.8, "elapsed_time": "9:31:44", "remaining_time": "2:33:48"} +{"current_steps": 6841, "total_steps": 8680, "loss": 0.8333625793457031, "lr": 2.3580902078191666e-07, "epoch": 1.5762672811059908, "percentage": 78.81, "elapsed_time": "9:31:49", "remaining_time": "2:33:42"} +{"current_steps": 6842, "total_steps": 8680, "loss": 0.804919958114624, "lr": 2.3556334547474133e-07, "epoch": 1.5764976958525345, "percentage": 78.82, "elapsed_time": "9:31:55", "remaining_time": "2:33:38"} +{"current_steps": 6843, "total_steps": 8680, "loss": 0.752541720867157, "lr": 2.3531778112453416e-07, "epoch": 1.5767281105990785, "percentage": 78.84, "elapsed_time": "9:31:59", "remaining_time": "2:33:33"} +{"current_steps": 6844, "total_steps": 8680, "loss": 0.647051215171814, "lr": 2.3507232776693896e-07, "epoch": 1.576958525345622, "percentage": 78.85, "elapsed_time": "9:32:04", "remaining_time": "2:33:28"} +{"current_steps": 6845, "total_steps": 8680, "loss": 0.7546517848968506, "lr": 2.3482698543758285e-07, "epoch": 1.577188940092166, "percentage": 78.86, "elapsed_time": "9:32:09", "remaining_time": "2:33:23"} +{"current_steps": 6846, "total_steps": 8680, "loss": 0.8773425817489624, "lr": 2.345817541720766e-07, "epoch": 1.5774193548387097, "percentage": 78.87, "elapsed_time": "9:32:15", "remaining_time": "2:33:18"} +{"current_steps": 6847, "total_steps": 8680, "loss": 0.9538160562515259, "lr": 2.3433663400601567e-07, "epoch": 1.5776497695852534, "percentage": 78.88, "elapsed_time": "9:32:19", "remaining_time": "2:33:13"} +{"current_steps": 6848, "total_steps": 8680, "loss": 0.6275157332420349, "lr": 2.340916249749787e-07, "epoch": 1.5778801843317973, "percentage": 78.89, "elapsed_time": "9:32:26", "remaining_time": "2:33:08"} +{"current_steps": 6849, "total_steps": 8680, "loss": 0.7729284167289734, "lr": 2.3384672711452812e-07, "epoch": 1.578110599078341, "percentage": 78.91, "elapsed_time": "9:32:31", "remaining_time": "2:33:03"} +{"current_steps": 6850, "total_steps": 8680, "loss": 0.8361644148826599, "lr": 2.3360194046021108e-07, "epoch": 1.5783410138248848, "percentage": 78.92, "elapsed_time": "9:32:37", "remaining_time": "2:32:58"} +{"current_steps": 6851, "total_steps": 8680, "loss": 0.6782940626144409, "lr": 2.3335726504755793e-07, "epoch": 1.5785714285714287, "percentage": 78.93, "elapsed_time": "9:32:42", "remaining_time": "2:32:53"} +{"current_steps": 6852, "total_steps": 8680, "loss": 0.8036615252494812, "lr": 2.3311270091208256e-07, "epoch": 1.5788018433179722, "percentage": 78.94, "elapsed_time": "9:32:46", "remaining_time": "2:32:48"} +{"current_steps": 6853, "total_steps": 8680, "loss": 0.8450125455856323, "lr": 2.3286824808928362e-07, "epoch": 1.5790322580645162, "percentage": 78.95, "elapsed_time": "9:32:52", "remaining_time": "2:32:43"} +{"current_steps": 6854, "total_steps": 8680, "loss": 0.6546198725700378, "lr": 2.3262390661464303e-07, "epoch": 1.57926267281106, "percentage": 78.96, "elapsed_time": "9:32:57", "remaining_time": "2:32:38"} +{"current_steps": 6855, "total_steps": 8680, "loss": 0.8201385140419006, "lr": 2.3237967652362612e-07, "epoch": 1.5794930875576036, "percentage": 78.97, "elapsed_time": "9:33:01", "remaining_time": "2:32:33"} +{"current_steps": 6856, "total_steps": 8680, "loss": 0.8753508925437927, "lr": 2.3213555785168336e-07, "epoch": 1.5797235023041476, "percentage": 78.99, "elapsed_time": "9:33:06", "remaining_time": "2:32:28"} +{"current_steps": 6857, "total_steps": 8680, "loss": 0.5884093642234802, "lr": 2.3189155063424782e-07, "epoch": 1.579953917050691, "percentage": 79.0, "elapsed_time": "9:33:10", "remaining_time": "2:32:22"} +{"current_steps": 6858, "total_steps": 8680, "loss": 0.6494029760360718, "lr": 2.3164765490673654e-07, "epoch": 1.580184331797235, "percentage": 79.01, "elapsed_time": "9:33:15", "remaining_time": "2:32:17"} +{"current_steps": 6859, "total_steps": 8680, "loss": 0.7407097220420837, "lr": 2.3140387070455126e-07, "epoch": 1.5804147465437788, "percentage": 79.02, "elapsed_time": "9:33:19", "remaining_time": "2:32:12"} +{"current_steps": 6860, "total_steps": 8680, "loss": 0.8934177160263062, "lr": 2.3116019806307673e-07, "epoch": 1.5806451612903225, "percentage": 79.03, "elapsed_time": "9:33:23", "remaining_time": "2:32:07"} +{"current_steps": 6861, "total_steps": 8680, "loss": 0.7487956881523132, "lr": 2.309166370176816e-07, "epoch": 1.5808755760368665, "percentage": 79.04, "elapsed_time": "9:33:29", "remaining_time": "2:32:02"} +{"current_steps": 6862, "total_steps": 8680, "loss": 0.7744357585906982, "lr": 2.3067318760371845e-07, "epoch": 1.5811059907834102, "percentage": 79.06, "elapsed_time": "9:33:35", "remaining_time": "2:31:58"} +{"current_steps": 6863, "total_steps": 8680, "loss": 0.8871743679046631, "lr": 2.304298498565237e-07, "epoch": 1.581336405529954, "percentage": 79.07, "elapsed_time": "9:33:41", "remaining_time": "2:31:53"} +{"current_steps": 6864, "total_steps": 8680, "loss": 0.7865666151046753, "lr": 2.3018662381141717e-07, "epoch": 1.5815668202764976, "percentage": 79.08, "elapsed_time": "9:33:46", "remaining_time": "2:31:48"} +{"current_steps": 6865, "total_steps": 8680, "loss": 0.8416531682014465, "lr": 2.2994350950370334e-07, "epoch": 1.5817972350230414, "percentage": 79.09, "elapsed_time": "9:33:51", "remaining_time": "2:31:43"} +{"current_steps": 6866, "total_steps": 8680, "loss": 0.8443950414657593, "lr": 2.2970050696866972e-07, "epoch": 1.5820276497695853, "percentage": 79.1, "elapsed_time": "9:33:55", "remaining_time": "2:31:37"} +{"current_steps": 6867, "total_steps": 8680, "loss": 0.7770054340362549, "lr": 2.2945761624158756e-07, "epoch": 1.582258064516129, "percentage": 79.11, "elapsed_time": "9:33:58", "remaining_time": "2:31:32"} +{"current_steps": 6868, "total_steps": 8680, "loss": 0.7263047695159912, "lr": 2.2921483735771252e-07, "epoch": 1.5824884792626728, "percentage": 79.12, "elapsed_time": "9:34:03", "remaining_time": "2:31:27"} +{"current_steps": 6869, "total_steps": 8680, "loss": 0.8288376927375793, "lr": 2.2897217035228312e-07, "epoch": 1.5827188940092167, "percentage": 79.14, "elapsed_time": "9:34:09", "remaining_time": "2:31:22"} +{"current_steps": 6870, "total_steps": 8680, "loss": 0.8325462937355042, "lr": 2.2872961526052292e-07, "epoch": 1.5829493087557602, "percentage": 79.15, "elapsed_time": "9:34:12", "remaining_time": "2:31:17"} +{"current_steps": 6871, "total_steps": 8680, "loss": 0.7412815093994141, "lr": 2.284871721176379e-07, "epoch": 1.5831797235023042, "percentage": 79.16, "elapsed_time": "9:34:17", "remaining_time": "2:31:11"} +{"current_steps": 6872, "total_steps": 8680, "loss": 0.8958117961883545, "lr": 2.2824484095881823e-07, "epoch": 1.583410138248848, "percentage": 79.17, "elapsed_time": "9:34:22", "remaining_time": "2:31:07"} +{"current_steps": 6873, "total_steps": 8680, "loss": 0.8374444246292114, "lr": 2.2800262181923858e-07, "epoch": 1.5836405529953916, "percentage": 79.18, "elapsed_time": "9:34:27", "remaining_time": "2:31:01"} +{"current_steps": 6874, "total_steps": 8680, "loss": 0.7900353670120239, "lr": 2.2776051473405634e-07, "epoch": 1.5838709677419356, "percentage": 79.19, "elapsed_time": "9:34:31", "remaining_time": "2:30:56"} +{"current_steps": 6875, "total_steps": 8680, "loss": 0.7420408725738525, "lr": 2.2751851973841285e-07, "epoch": 1.5841013824884793, "percentage": 79.21, "elapsed_time": "9:34:38", "remaining_time": "2:30:52"} +{"current_steps": 6876, "total_steps": 8680, "loss": 0.8902314305305481, "lr": 2.2727663686743382e-07, "epoch": 1.584331797235023, "percentage": 79.22, "elapsed_time": "9:34:43", "remaining_time": "2:30:47"} +{"current_steps": 6877, "total_steps": 8680, "loss": 0.739869236946106, "lr": 2.27034866156228e-07, "epoch": 1.5845622119815668, "percentage": 79.23, "elapsed_time": "9:34:48", "remaining_time": "2:30:42"} +{"current_steps": 6878, "total_steps": 8680, "loss": 0.8340646624565125, "lr": 2.2679320763988775e-07, "epoch": 1.5847926267281105, "percentage": 79.24, "elapsed_time": "9:34:54", "remaining_time": "2:30:37"} +{"current_steps": 6879, "total_steps": 8680, "loss": 0.7501030564308167, "lr": 2.2655166135349013e-07, "epoch": 1.5850230414746544, "percentage": 79.25, "elapsed_time": "9:34:58", "remaining_time": "2:30:32"} +{"current_steps": 6880, "total_steps": 8680, "loss": 0.722623348236084, "lr": 2.2631022733209504e-07, "epoch": 1.5852534562211982, "percentage": 79.26, "elapsed_time": "9:35:04", "remaining_time": "2:30:27"} +{"current_steps": 6881, "total_steps": 8680, "loss": 0.8319696187973022, "lr": 2.260689056107461e-07, "epoch": 1.585483870967742, "percentage": 79.27, "elapsed_time": "9:35:09", "remaining_time": "2:30:22"} +{"current_steps": 6882, "total_steps": 8680, "loss": 0.85502028465271, "lr": 2.2582769622447107e-07, "epoch": 1.5857142857142859, "percentage": 79.29, "elapsed_time": "9:35:14", "remaining_time": "2:30:17"} +{"current_steps": 6883, "total_steps": 8680, "loss": 0.7942626476287842, "lr": 2.2558659920828095e-07, "epoch": 1.5859447004608294, "percentage": 79.3, "elapsed_time": "9:35:18", "remaining_time": "2:30:12"} +{"current_steps": 6884, "total_steps": 8680, "loss": 0.6731030941009521, "lr": 2.253456145971705e-07, "epoch": 1.5861751152073733, "percentage": 79.31, "elapsed_time": "9:35:24", "remaining_time": "2:30:07"} +{"current_steps": 6885, "total_steps": 8680, "loss": 0.8479423522949219, "lr": 2.2510474242611887e-07, "epoch": 1.586405529953917, "percentage": 79.32, "elapsed_time": "9:35:28", "remaining_time": "2:30:02"} +{"current_steps": 6886, "total_steps": 8680, "loss": 0.7398810386657715, "lr": 2.2486398273008812e-07, "epoch": 1.5866359447004608, "percentage": 79.33, "elapsed_time": "9:35:34", "remaining_time": "2:29:57"} +{"current_steps": 6887, "total_steps": 8680, "loss": 0.8422881364822388, "lr": 2.246233355440238e-07, "epoch": 1.5868663594470047, "percentage": 79.34, "elapsed_time": "9:35:38", "remaining_time": "2:29:51"} +{"current_steps": 6888, "total_steps": 8680, "loss": 0.8307279944419861, "lr": 2.2438280090285612e-07, "epoch": 1.5870967741935482, "percentage": 79.35, "elapsed_time": "9:35:41", "remaining_time": "2:29:46"} +{"current_steps": 6889, "total_steps": 8680, "loss": 0.8329004049301147, "lr": 2.2414237884149821e-07, "epoch": 1.5873271889400922, "percentage": 79.37, "elapsed_time": "9:35:46", "remaining_time": "2:29:41"} +{"current_steps": 6890, "total_steps": 8680, "loss": 0.801641583442688, "lr": 2.2390206939484645e-07, "epoch": 1.587557603686636, "percentage": 79.38, "elapsed_time": "9:35:51", "remaining_time": "2:29:36"} +{"current_steps": 6891, "total_steps": 8680, "loss": 0.9850986003875732, "lr": 2.2366187259778235e-07, "epoch": 1.5877880184331796, "percentage": 79.39, "elapsed_time": "9:35:56", "remaining_time": "2:29:31"} +{"current_steps": 6892, "total_steps": 8680, "loss": 0.7169715166091919, "lr": 2.2342178848516935e-07, "epoch": 1.5880184331797236, "percentage": 79.4, "elapsed_time": "9:36:01", "remaining_time": "2:29:26"} +{"current_steps": 6893, "total_steps": 8680, "loss": 0.7509033679962158, "lr": 2.2318181709185603e-07, "epoch": 1.5882488479262673, "percentage": 79.41, "elapsed_time": "9:36:08", "remaining_time": "2:29:21"} +{"current_steps": 6894, "total_steps": 8680, "loss": 0.6974655985832214, "lr": 2.2294195845267348e-07, "epoch": 1.588479262672811, "percentage": 79.42, "elapsed_time": "9:36:14", "remaining_time": "2:29:16"} +{"current_steps": 6895, "total_steps": 8680, "loss": 0.7388278245925903, "lr": 2.227022126024367e-07, "epoch": 1.588709677419355, "percentage": 79.44, "elapsed_time": "9:36:19", "remaining_time": "2:29:12"} +{"current_steps": 6896, "total_steps": 8680, "loss": 0.6479122638702393, "lr": 2.2246257957594506e-07, "epoch": 1.5889400921658985, "percentage": 79.45, "elapsed_time": "9:36:25", "remaining_time": "2:29:07"} +{"current_steps": 6897, "total_steps": 8680, "loss": 0.759338915348053, "lr": 2.222230594079807e-07, "epoch": 1.5891705069124424, "percentage": 79.46, "elapsed_time": "9:36:29", "remaining_time": "2:29:02"} +{"current_steps": 6898, "total_steps": 8680, "loss": 0.7299938201904297, "lr": 2.2198365213330937e-07, "epoch": 1.5894009216589862, "percentage": 79.47, "elapsed_time": "9:36:34", "remaining_time": "2:28:57"} +{"current_steps": 6899, "total_steps": 8680, "loss": 0.707555890083313, "lr": 2.2174435778668122e-07, "epoch": 1.58963133640553, "percentage": 79.48, "elapsed_time": "9:36:39", "remaining_time": "2:28:52"} +{"current_steps": 6900, "total_steps": 8680, "loss": 0.8311065435409546, "lr": 2.2150517640282918e-07, "epoch": 1.5898617511520738, "percentage": 79.49, "elapsed_time": "9:36:43", "remaining_time": "2:28:46"} +{"current_steps": 6901, "total_steps": 8680, "loss": 0.6494649648666382, "lr": 2.2126610801647028e-07, "epoch": 1.5900921658986173, "percentage": 79.5, "elapsed_time": "9:36:52", "remaining_time": "2:28:42"} +{"current_steps": 6902, "total_steps": 8680, "loss": 0.6563294529914856, "lr": 2.2102715266230486e-07, "epoch": 1.5903225806451613, "percentage": 79.52, "elapsed_time": "9:36:58", "remaining_time": "2:28:38"} +{"current_steps": 6903, "total_steps": 8680, "loss": 0.7426891326904297, "lr": 2.207883103750171e-07, "epoch": 1.590552995391705, "percentage": 79.53, "elapsed_time": "9:37:04", "remaining_time": "2:28:33"} +{"current_steps": 6904, "total_steps": 8680, "loss": 0.7074661254882812, "lr": 2.2054958118927413e-07, "epoch": 1.5907834101382488, "percentage": 79.54, "elapsed_time": "9:37:10", "remaining_time": "2:28:28"} +{"current_steps": 6905, "total_steps": 8680, "loss": 0.8407880663871765, "lr": 2.203109651397279e-07, "epoch": 1.5910138248847927, "percentage": 79.55, "elapsed_time": "9:37:15", "remaining_time": "2:28:23"} +{"current_steps": 6906, "total_steps": 8680, "loss": 0.7228440642356873, "lr": 2.2007246226101296e-07, "epoch": 1.5912442396313364, "percentage": 79.56, "elapsed_time": "9:37:20", "remaining_time": "2:28:18"} +{"current_steps": 6907, "total_steps": 8680, "loss": 0.6988812685012817, "lr": 2.1983407258774733e-07, "epoch": 1.5914746543778802, "percentage": 79.57, "elapsed_time": "9:37:25", "remaining_time": "2:28:13"} +{"current_steps": 6908, "total_steps": 8680, "loss": 0.793757438659668, "lr": 2.195957961545335e-07, "epoch": 1.591705069124424, "percentage": 79.59, "elapsed_time": "9:37:30", "remaining_time": "2:28:08"} +{"current_steps": 6909, "total_steps": 8680, "loss": 0.8621397018432617, "lr": 2.1935763299595678e-07, "epoch": 1.5919354838709676, "percentage": 79.6, "elapsed_time": "9:37:34", "remaining_time": "2:28:03"} +{"current_steps": 6910, "total_steps": 8680, "loss": 0.7661364078521729, "lr": 2.1911958314658598e-07, "epoch": 1.5921658986175116, "percentage": 79.61, "elapsed_time": "9:37:40", "remaining_time": "2:27:58"} +{"current_steps": 6911, "total_steps": 8680, "loss": 0.9322741031646729, "lr": 2.1888164664097408e-07, "epoch": 1.5923963133640553, "percentage": 79.62, "elapsed_time": "9:37:46", "remaining_time": "2:27:53"} +{"current_steps": 6912, "total_steps": 8680, "loss": 0.8411989212036133, "lr": 2.1864382351365717e-07, "epoch": 1.592626728110599, "percentage": 79.63, "elapsed_time": "9:37:50", "remaining_time": "2:27:48"} +{"current_steps": 6913, "total_steps": 8680, "loss": 0.8212479948997498, "lr": 2.1840611379915464e-07, "epoch": 1.592857142857143, "percentage": 79.64, "elapsed_time": "9:37:55", "remaining_time": "2:27:43"} +{"current_steps": 6914, "total_steps": 8680, "loss": 0.7875508069992065, "lr": 2.181685175319702e-07, "epoch": 1.5930875576036865, "percentage": 79.65, "elapsed_time": "9:38:00", "remaining_time": "2:27:38"} +{"current_steps": 6915, "total_steps": 8680, "loss": 0.8389852046966553, "lr": 2.1793103474659047e-07, "epoch": 1.5933179723502304, "percentage": 79.67, "elapsed_time": "9:38:05", "remaining_time": "2:27:33"} +{"current_steps": 6916, "total_steps": 8680, "loss": 0.8223046660423279, "lr": 2.1769366547748546e-07, "epoch": 1.5935483870967742, "percentage": 79.68, "elapsed_time": "9:38:12", "remaining_time": "2:27:28"} +{"current_steps": 6917, "total_steps": 8680, "loss": 0.8427159786224365, "lr": 2.1745640975910962e-07, "epoch": 1.5937788018433179, "percentage": 79.69, "elapsed_time": "9:38:16", "remaining_time": "2:27:23"} +{"current_steps": 6918, "total_steps": 8680, "loss": 0.7448060512542725, "lr": 2.172192676258996e-07, "epoch": 1.5940092165898618, "percentage": 79.7, "elapsed_time": "9:38:21", "remaining_time": "2:27:18"} +{"current_steps": 6919, "total_steps": 8680, "loss": 0.7122288346290588, "lr": 2.1698223911227686e-07, "epoch": 1.5942396313364056, "percentage": 79.71, "elapsed_time": "9:38:26", "remaining_time": "2:27:13"} +{"current_steps": 6920, "total_steps": 8680, "loss": 0.7712994813919067, "lr": 2.1674532425264548e-07, "epoch": 1.5944700460829493, "percentage": 79.72, "elapsed_time": "9:38:32", "remaining_time": "2:27:08"} +{"current_steps": 6921, "total_steps": 8680, "loss": 0.9656664729118347, "lr": 2.1650852308139355e-07, "epoch": 1.5947004608294932, "percentage": 79.74, "elapsed_time": "9:38:36", "remaining_time": "2:27:03"} +{"current_steps": 6922, "total_steps": 8680, "loss": 0.748894214630127, "lr": 2.162718356328922e-07, "epoch": 1.5949308755760367, "percentage": 79.75, "elapsed_time": "9:38:40", "remaining_time": "2:26:58"} +{"current_steps": 6923, "total_steps": 8680, "loss": 0.6875454187393188, "lr": 2.1603526194149635e-07, "epoch": 1.5951612903225807, "percentage": 79.76, "elapsed_time": "9:38:46", "remaining_time": "2:26:53"} +{"current_steps": 6924, "total_steps": 8680, "loss": 0.8258690237998962, "lr": 2.1579880204154412e-07, "epoch": 1.5953917050691244, "percentage": 79.77, "elapsed_time": "9:38:52", "remaining_time": "2:26:48"} +{"current_steps": 6925, "total_steps": 8680, "loss": 0.7647902965545654, "lr": 2.15562455967358e-07, "epoch": 1.5956221198156681, "percentage": 79.78, "elapsed_time": "9:38:59", "remaining_time": "2:26:43"} +{"current_steps": 6926, "total_steps": 8680, "loss": 0.7004281282424927, "lr": 2.1532622375324284e-07, "epoch": 1.595852534562212, "percentage": 79.79, "elapsed_time": "9:39:05", "remaining_time": "2:26:39"} +{"current_steps": 6927, "total_steps": 8680, "loss": 0.7500345706939697, "lr": 2.1509010543348726e-07, "epoch": 1.5960829493087556, "percentage": 79.8, "elapsed_time": "9:39:11", "remaining_time": "2:26:34"} +{"current_steps": 6928, "total_steps": 8680, "loss": 0.7300195693969727, "lr": 2.148541010423641e-07, "epoch": 1.5963133640552996, "percentage": 79.82, "elapsed_time": "9:39:16", "remaining_time": "2:26:29"} +{"current_steps": 6929, "total_steps": 8680, "loss": 0.7592284679412842, "lr": 2.1461821061412876e-07, "epoch": 1.5965437788018433, "percentage": 79.83, "elapsed_time": "9:39:23", "remaining_time": "2:26:24"} +{"current_steps": 6930, "total_steps": 8680, "loss": 0.7179796099662781, "lr": 2.1438243418302016e-07, "epoch": 1.596774193548387, "percentage": 79.84, "elapsed_time": "9:39:28", "remaining_time": "2:26:19"} +{"current_steps": 6931, "total_steps": 8680, "loss": 0.8829631805419922, "lr": 2.1414677178326157e-07, "epoch": 1.597004608294931, "percentage": 79.85, "elapsed_time": "9:39:32", "remaining_time": "2:26:14"} +{"current_steps": 6932, "total_steps": 8680, "loss": 0.8661972880363464, "lr": 2.1391122344905865e-07, "epoch": 1.5972350230414747, "percentage": 79.86, "elapsed_time": "9:39:39", "remaining_time": "2:26:10"} +{"current_steps": 6933, "total_steps": 8680, "loss": 0.7774989604949951, "lr": 2.136757892146007e-07, "epoch": 1.5974654377880184, "percentage": 79.87, "elapsed_time": "9:39:44", "remaining_time": "2:26:05"} +{"current_steps": 6934, "total_steps": 8680, "loss": 0.7343888878822327, "lr": 2.1344046911406132e-07, "epoch": 1.5976958525345624, "percentage": 79.88, "elapsed_time": "9:39:49", "remaining_time": "2:26:00"} +{"current_steps": 6935, "total_steps": 8680, "loss": 0.7810107469558716, "lr": 2.132052631815966e-07, "epoch": 1.5979262672811059, "percentage": 79.9, "elapsed_time": "9:39:56", "remaining_time": "2:25:55"} +{"current_steps": 6936, "total_steps": 8680, "loss": 0.7585299611091614, "lr": 2.12970171451346e-07, "epoch": 1.5981566820276498, "percentage": 79.91, "elapsed_time": "9:40:01", "remaining_time": "2:25:50"} +{"current_steps": 6937, "total_steps": 8680, "loss": 0.886371910572052, "lr": 2.1273519395743344e-07, "epoch": 1.5983870967741935, "percentage": 79.92, "elapsed_time": "9:40:05", "remaining_time": "2:25:45"} +{"current_steps": 6938, "total_steps": 8680, "loss": 0.6986823081970215, "lr": 2.1250033073396523e-07, "epoch": 1.5986175115207373, "percentage": 79.93, "elapsed_time": "9:40:11", "remaining_time": "2:25:40"} +{"current_steps": 6939, "total_steps": 8680, "loss": 0.7524189352989197, "lr": 2.122655818150312e-07, "epoch": 1.5988479262672812, "percentage": 79.94, "elapsed_time": "9:40:17", "remaining_time": "2:25:35"} +{"current_steps": 6940, "total_steps": 8680, "loss": 0.7699365615844727, "lr": 2.120309472347055e-07, "epoch": 1.5990783410138247, "percentage": 79.95, "elapsed_time": "9:40:22", "remaining_time": "2:25:30"} +{"current_steps": 6941, "total_steps": 8680, "loss": 0.8112696409225464, "lr": 2.1179642702704458e-07, "epoch": 1.5993087557603687, "percentage": 79.97, "elapsed_time": "9:40:28", "remaining_time": "2:25:25"} +{"current_steps": 6942, "total_steps": 8680, "loss": 0.7067416906356812, "lr": 2.115620212260889e-07, "epoch": 1.5995391705069124, "percentage": 79.98, "elapsed_time": "9:40:32", "remaining_time": "2:25:20"} +{"current_steps": 6943, "total_steps": 8680, "loss": 0.787110447883606, "lr": 2.1132772986586211e-07, "epoch": 1.5997695852534561, "percentage": 79.99, "elapsed_time": "9:40:39", "remaining_time": "2:25:16"} +{"current_steps": 6944, "total_steps": 8680, "loss": 0.7356789112091064, "lr": 2.11093552980371e-07, "epoch": 1.6, "percentage": 80.0, "elapsed_time": "9:40:44", "remaining_time": "2:25:11"} +{"current_steps": 6945, "total_steps": 8680, "loss": 0.8057125806808472, "lr": 2.1085949060360653e-07, "epoch": 1.6002304147465438, "percentage": 80.01, "elapsed_time": "9:40:48", "remaining_time": "2:25:05"} +{"current_steps": 6946, "total_steps": 8680, "loss": 0.7169399261474609, "lr": 2.1062554276954225e-07, "epoch": 1.6004608294930875, "percentage": 80.02, "elapsed_time": "9:40:55", "remaining_time": "2:25:01"} +{"current_steps": 6947, "total_steps": 8680, "loss": 0.7219180464744568, "lr": 2.1039170951213526e-07, "epoch": 1.6006912442396315, "percentage": 80.03, "elapsed_time": "9:41:02", "remaining_time": "2:24:56"} +{"current_steps": 6948, "total_steps": 8680, "loss": 0.7530789375305176, "lr": 2.101579908653266e-07, "epoch": 1.600921658986175, "percentage": 80.05, "elapsed_time": "9:41:07", "remaining_time": "2:24:51"} +{"current_steps": 6949, "total_steps": 8680, "loss": 0.8192377090454102, "lr": 2.0992438686303993e-07, "epoch": 1.601152073732719, "percentage": 80.06, "elapsed_time": "9:41:12", "remaining_time": "2:24:46"} +{"current_steps": 6950, "total_steps": 8680, "loss": 0.6904648542404175, "lr": 2.0969089753918223e-07, "epoch": 1.6013824884792627, "percentage": 80.07, "elapsed_time": "9:41:18", "remaining_time": "2:24:41"} +{"current_steps": 6951, "total_steps": 8680, "loss": 0.7289770245552063, "lr": 2.0945752292764495e-07, "epoch": 1.6016129032258064, "percentage": 80.08, "elapsed_time": "9:41:23", "remaining_time": "2:24:37"} +{"current_steps": 6952, "total_steps": 8680, "loss": 0.8467620611190796, "lr": 2.0922426306230157e-07, "epoch": 1.6018433179723504, "percentage": 80.09, "elapsed_time": "9:41:29", "remaining_time": "2:24:32"} +{"current_steps": 6953, "total_steps": 8680, "loss": 0.7835153937339783, "lr": 2.089911179770093e-07, "epoch": 1.6020737327188939, "percentage": 80.1, "elapsed_time": "9:41:34", "remaining_time": "2:24:27"} +{"current_steps": 6954, "total_steps": 8680, "loss": 0.6696668267250061, "lr": 2.0875808770560933e-07, "epoch": 1.6023041474654378, "percentage": 80.12, "elapsed_time": "9:41:41", "remaining_time": "2:24:22"} +{"current_steps": 6955, "total_steps": 8680, "loss": 0.8451364636421204, "lr": 2.0852517228192556e-07, "epoch": 1.6025345622119815, "percentage": 80.13, "elapsed_time": "9:41:46", "remaining_time": "2:24:17"} +{"current_steps": 6956, "total_steps": 8680, "loss": 0.7917240858078003, "lr": 2.0829237173976487e-07, "epoch": 1.6027649769585253, "percentage": 80.14, "elapsed_time": "9:41:51", "remaining_time": "2:24:12"} +{"current_steps": 6957, "total_steps": 8680, "loss": 0.791597843170166, "lr": 2.0805968611291867e-07, "epoch": 1.6029953917050692, "percentage": 80.15, "elapsed_time": "9:41:56", "remaining_time": "2:24:07"} +{"current_steps": 6958, "total_steps": 8680, "loss": 0.7571247816085815, "lr": 2.0782711543516063e-07, "epoch": 1.603225806451613, "percentage": 80.16, "elapsed_time": "9:42:01", "remaining_time": "2:24:02"} +{"current_steps": 6959, "total_steps": 8680, "loss": 0.9196302890777588, "lr": 2.075946597402478e-07, "epoch": 1.6034562211981567, "percentage": 80.17, "elapsed_time": "9:42:05", "remaining_time": "2:23:57"} +{"current_steps": 6960, "total_steps": 8680, "loss": 0.7106618881225586, "lr": 2.0736231906192136e-07, "epoch": 1.6036866359447006, "percentage": 80.18, "elapsed_time": "9:42:11", "remaining_time": "2:23:52"} +{"current_steps": 6961, "total_steps": 8680, "loss": 0.8923465013504028, "lr": 2.071300934339051e-07, "epoch": 1.6039170506912441, "percentage": 80.2, "elapsed_time": "9:42:16", "remaining_time": "2:23:47"} +{"current_steps": 6962, "total_steps": 8680, "loss": 0.6929241418838501, "lr": 2.0689798288990601e-07, "epoch": 1.604147465437788, "percentage": 80.21, "elapsed_time": "9:42:22", "remaining_time": "2:23:42"} +{"current_steps": 6963, "total_steps": 8680, "loss": 0.935944676399231, "lr": 2.0666598746361487e-07, "epoch": 1.6043778801843318, "percentage": 80.22, "elapsed_time": "9:42:27", "remaining_time": "2:23:37"} +{"current_steps": 6964, "total_steps": 8680, "loss": 0.7442188262939453, "lr": 2.0643410718870536e-07, "epoch": 1.6046082949308755, "percentage": 80.23, "elapsed_time": "9:42:32", "remaining_time": "2:23:32"} +{"current_steps": 6965, "total_steps": 8680, "loss": 0.7340278625488281, "lr": 2.0620234209883446e-07, "epoch": 1.6048387096774195, "percentage": 80.24, "elapsed_time": "9:42:38", "remaining_time": "2:23:27"} +{"current_steps": 6966, "total_steps": 8680, "loss": 0.7436190247535706, "lr": 2.0597069222764297e-07, "epoch": 1.605069124423963, "percentage": 80.25, "elapsed_time": "9:42:42", "remaining_time": "2:23:22"} +{"current_steps": 6967, "total_steps": 8680, "loss": 0.9109283685684204, "lr": 2.0573915760875406e-07, "epoch": 1.605299539170507, "percentage": 80.26, "elapsed_time": "9:42:47", "remaining_time": "2:23:17"} +{"current_steps": 6968, "total_steps": 8680, "loss": 0.86224365234375, "lr": 2.0550773827577518e-07, "epoch": 1.6055299539170507, "percentage": 80.28, "elapsed_time": "9:42:53", "remaining_time": "2:23:12"} +{"current_steps": 6969, "total_steps": 8680, "loss": 0.6873685121536255, "lr": 2.0527643426229636e-07, "epoch": 1.6057603686635944, "percentage": 80.29, "elapsed_time": "9:43:00", "remaining_time": "2:23:08"} +{"current_steps": 6970, "total_steps": 8680, "loss": 0.7634609937667847, "lr": 2.0504524560189074e-07, "epoch": 1.6059907834101383, "percentage": 80.3, "elapsed_time": "9:43:04", "remaining_time": "2:23:03"} +{"current_steps": 6971, "total_steps": 8680, "loss": 0.7940595149993896, "lr": 2.0481417232811572e-07, "epoch": 1.606221198156682, "percentage": 80.31, "elapsed_time": "9:43:10", "remaining_time": "2:22:58"} +{"current_steps": 6972, "total_steps": 8680, "loss": 0.7109687924385071, "lr": 2.0458321447451078e-07, "epoch": 1.6064516129032258, "percentage": 80.32, "elapsed_time": "9:43:17", "remaining_time": "2:22:53"} +{"current_steps": 6973, "total_steps": 8680, "loss": 0.9476398825645447, "lr": 2.04352372074599e-07, "epoch": 1.6066820276497698, "percentage": 80.33, "elapsed_time": "9:43:21", "remaining_time": "2:22:48"} +{"current_steps": 6974, "total_steps": 8680, "loss": 0.7563579678535461, "lr": 2.0412164516188747e-07, "epoch": 1.6069124423963133, "percentage": 80.35, "elapsed_time": "9:43:25", "remaining_time": "2:22:43"} +{"current_steps": 6975, "total_steps": 8680, "loss": 0.7928751707077026, "lr": 2.0389103376986538e-07, "epoch": 1.6071428571428572, "percentage": 80.36, "elapsed_time": "9:43:29", "remaining_time": "2:22:37"} +{"current_steps": 6976, "total_steps": 8680, "loss": 0.776961624622345, "lr": 2.0366053793200565e-07, "epoch": 1.607373271889401, "percentage": 80.37, "elapsed_time": "9:43:35", "remaining_time": "2:22:33"} +{"current_steps": 6977, "total_steps": 8680, "loss": 0.6511167883872986, "lr": 2.0343015768176496e-07, "epoch": 1.6076036866359447, "percentage": 80.38, "elapsed_time": "9:43:42", "remaining_time": "2:22:28"} +{"current_steps": 6978, "total_steps": 8680, "loss": 0.6487337350845337, "lr": 2.0319989305258235e-07, "epoch": 1.6078341013824886, "percentage": 80.39, "elapsed_time": "9:43:47", "remaining_time": "2:22:23"} +{"current_steps": 6979, "total_steps": 8680, "loss": 0.921454131603241, "lr": 2.0296974407788004e-07, "epoch": 1.6080645161290321, "percentage": 80.4, "elapsed_time": "9:43:51", "remaining_time": "2:22:18"} +{"current_steps": 6980, "total_steps": 8680, "loss": 0.8145809769630432, "lr": 2.0273971079106467e-07, "epoch": 1.608294930875576, "percentage": 80.41, "elapsed_time": "9:43:56", "remaining_time": "2:22:13"} +{"current_steps": 6981, "total_steps": 8680, "loss": 0.6655904054641724, "lr": 2.0250979322552474e-07, "epoch": 1.6085253456221198, "percentage": 80.43, "elapsed_time": "9:44:01", "remaining_time": "2:22:08"} +{"current_steps": 6982, "total_steps": 8680, "loss": 0.777961254119873, "lr": 2.0227999141463258e-07, "epoch": 1.6087557603686635, "percentage": 80.44, "elapsed_time": "9:44:05", "remaining_time": "2:22:03"} +{"current_steps": 6983, "total_steps": 8680, "loss": 0.6543164253234863, "lr": 2.0205030539174361e-07, "epoch": 1.6089861751152075, "percentage": 80.45, "elapsed_time": "9:44:10", "remaining_time": "2:21:57"} +{"current_steps": 6984, "total_steps": 8680, "loss": 0.7842000722885132, "lr": 2.018207351901966e-07, "epoch": 1.6092165898617512, "percentage": 80.46, "elapsed_time": "9:44:16", "remaining_time": "2:21:53"} +{"current_steps": 6985, "total_steps": 8680, "loss": 0.7264418005943298, "lr": 2.0159128084331278e-07, "epoch": 1.609447004608295, "percentage": 80.47, "elapsed_time": "9:44:21", "remaining_time": "2:21:48"} +{"current_steps": 6986, "total_steps": 8680, "loss": 0.8722596168518066, "lr": 2.0136194238439795e-07, "epoch": 1.6096774193548387, "percentage": 80.48, "elapsed_time": "9:44:26", "remaining_time": "2:21:42"} +{"current_steps": 6987, "total_steps": 8680, "loss": 0.8162735104560852, "lr": 2.0113271984673997e-07, "epoch": 1.6099078341013824, "percentage": 80.5, "elapsed_time": "9:44:31", "remaining_time": "2:21:37"} +{"current_steps": 6988, "total_steps": 8680, "loss": 0.6962481737136841, "lr": 2.0090361326360982e-07, "epoch": 1.6101382488479263, "percentage": 80.51, "elapsed_time": "9:44:35", "remaining_time": "2:21:32"} +{"current_steps": 6989, "total_steps": 8680, "loss": 0.8186852931976318, "lr": 2.0067462266826264e-07, "epoch": 1.61036866359447, "percentage": 80.52, "elapsed_time": "9:44:39", "remaining_time": "2:21:27"} +{"current_steps": 6990, "total_steps": 8680, "loss": 0.8935987949371338, "lr": 2.0044574809393543e-07, "epoch": 1.6105990783410138, "percentage": 80.53, "elapsed_time": "9:44:44", "remaining_time": "2:21:22"} +{"current_steps": 6991, "total_steps": 8680, "loss": 0.9152865409851074, "lr": 2.002169895738498e-07, "epoch": 1.6108294930875577, "percentage": 80.54, "elapsed_time": "9:44:51", "remaining_time": "2:21:17"} +{"current_steps": 6992, "total_steps": 8680, "loss": 0.8042874336242676, "lr": 1.9998834714120928e-07, "epoch": 1.6110599078341012, "percentage": 80.55, "elapsed_time": "9:44:55", "remaining_time": "2:21:12"} +{"current_steps": 6993, "total_steps": 8680, "loss": 0.9621129035949707, "lr": 1.9975982082920083e-07, "epoch": 1.6112903225806452, "percentage": 80.56, "elapsed_time": "9:45:00", "remaining_time": "2:21:07"} +{"current_steps": 6994, "total_steps": 8680, "loss": 0.8296995162963867, "lr": 1.9953141067099533e-07, "epoch": 1.611520737327189, "percentage": 80.58, "elapsed_time": "9:45:06", "remaining_time": "2:21:02"} +{"current_steps": 6995, "total_steps": 8680, "loss": 0.8129373788833618, "lr": 1.9930311669974587e-07, "epoch": 1.6117511520737327, "percentage": 80.59, "elapsed_time": "9:45:11", "remaining_time": "2:20:57"} +{"current_steps": 6996, "total_steps": 8680, "loss": 0.7450911998748779, "lr": 1.9907493894858874e-07, "epoch": 1.6119815668202766, "percentage": 80.6, "elapsed_time": "9:45:16", "remaining_time": "2:20:52"} +{"current_steps": 6997, "total_steps": 8680, "loss": 0.798037052154541, "lr": 1.9884687745064422e-07, "epoch": 1.6122119815668203, "percentage": 80.61, "elapsed_time": "9:45:22", "remaining_time": "2:20:48"} +{"current_steps": 6998, "total_steps": 8680, "loss": 0.8118857145309448, "lr": 1.9861893223901494e-07, "epoch": 1.612442396313364, "percentage": 80.62, "elapsed_time": "9:45:27", "remaining_time": "2:20:43"} +{"current_steps": 6999, "total_steps": 8680, "loss": 0.7954392433166504, "lr": 1.9839110334678632e-07, "epoch": 1.6126728110599078, "percentage": 80.63, "elapsed_time": "9:45:31", "remaining_time": "2:20:37"} +{"current_steps": 7000, "total_steps": 8680, "loss": 0.8055616617202759, "lr": 1.9816339080702825e-07, "epoch": 1.6129032258064515, "percentage": 80.65, "elapsed_time": "9:45:37", "remaining_time": "2:20:32"} +{"current_steps": 7001, "total_steps": 8680, "loss": 0.8949761986732483, "lr": 1.979357946527924e-07, "epoch": 1.6131336405529955, "percentage": 80.66, "elapsed_time": "9:45:43", "remaining_time": "2:20:28"} +{"current_steps": 7002, "total_steps": 8680, "loss": 0.8327617645263672, "lr": 1.9770831491711427e-07, "epoch": 1.6133640552995392, "percentage": 80.67, "elapsed_time": "9:45:47", "remaining_time": "2:20:23"} +{"current_steps": 7003, "total_steps": 8680, "loss": 0.7593148946762085, "lr": 1.9748095163301215e-07, "epoch": 1.613594470046083, "percentage": 80.68, "elapsed_time": "9:45:52", "remaining_time": "2:20:17"} +{"current_steps": 7004, "total_steps": 8680, "loss": 0.7639665603637695, "lr": 1.9725370483348737e-07, "epoch": 1.6138248847926269, "percentage": 80.69, "elapsed_time": "9:45:56", "remaining_time": "2:20:12"} +{"current_steps": 7005, "total_steps": 8680, "loss": 0.8561587929725647, "lr": 1.9702657455152448e-07, "epoch": 1.6140552995391704, "percentage": 80.7, "elapsed_time": "9:46:00", "remaining_time": "2:20:07"} +{"current_steps": 7006, "total_steps": 8680, "loss": 0.835313081741333, "lr": 1.9679956082009154e-07, "epoch": 1.6142857142857143, "percentage": 80.71, "elapsed_time": "9:46:04", "remaining_time": "2:20:02"} +{"current_steps": 7007, "total_steps": 8680, "loss": 0.831456184387207, "lr": 1.9657266367213898e-07, "epoch": 1.614516129032258, "percentage": 80.73, "elapsed_time": "9:46:09", "remaining_time": "2:19:57"} +{"current_steps": 7008, "total_steps": 8680, "loss": 0.699436604976654, "lr": 1.963458831406005e-07, "epoch": 1.6147465437788018, "percentage": 80.74, "elapsed_time": "9:46:14", "remaining_time": "2:19:51"} +{"current_steps": 7009, "total_steps": 8680, "loss": 0.7821902632713318, "lr": 1.9611921925839337e-07, "epoch": 1.6149769585253457, "percentage": 80.75, "elapsed_time": "9:46:18", "remaining_time": "2:19:46"} +{"current_steps": 7010, "total_steps": 8680, "loss": 0.7491241097450256, "lr": 1.9589267205841742e-07, "epoch": 1.6152073732718892, "percentage": 80.76, "elapsed_time": "9:46:23", "remaining_time": "2:19:41"} +{"current_steps": 7011, "total_steps": 8680, "loss": 0.7299652099609375, "lr": 1.956662415735554e-07, "epoch": 1.6154377880184332, "percentage": 80.77, "elapsed_time": "9:46:29", "remaining_time": "2:19:36"} +{"current_steps": 7012, "total_steps": 8680, "loss": 0.692190408706665, "lr": 1.9543992783667385e-07, "epoch": 1.615668202764977, "percentage": 80.78, "elapsed_time": "9:46:34", "remaining_time": "2:19:32"} +{"current_steps": 7013, "total_steps": 8680, "loss": 0.8727273941040039, "lr": 1.9521373088062166e-07, "epoch": 1.6158986175115206, "percentage": 80.79, "elapsed_time": "9:46:39", "remaining_time": "2:19:26"} +{"current_steps": 7014, "total_steps": 8680, "loss": 0.6441171169281006, "lr": 1.9498765073823077e-07, "epoch": 1.6161290322580646, "percentage": 80.81, "elapsed_time": "9:46:46", "remaining_time": "2:19:22"} +{"current_steps": 7015, "total_steps": 8680, "loss": 0.6960387229919434, "lr": 1.947616874423169e-07, "epoch": 1.6163594470046083, "percentage": 80.82, "elapsed_time": "9:46:51", "remaining_time": "2:19:17"} +{"current_steps": 7016, "total_steps": 8680, "loss": 0.9231700301170349, "lr": 1.9453584102567788e-07, "epoch": 1.616589861751152, "percentage": 80.83, "elapsed_time": "9:46:56", "remaining_time": "2:19:12"} +{"current_steps": 7017, "total_steps": 8680, "loss": 0.6957401037216187, "lr": 1.9431011152109555e-07, "epoch": 1.616820276497696, "percentage": 80.84, "elapsed_time": "9:47:02", "remaining_time": "2:19:07"} +{"current_steps": 7018, "total_steps": 8680, "loss": 0.6608580350875854, "lr": 1.9408449896133384e-07, "epoch": 1.6170506912442395, "percentage": 80.85, "elapsed_time": "9:47:07", "remaining_time": "2:19:02"} +{"current_steps": 7019, "total_steps": 8680, "loss": 0.7322397232055664, "lr": 1.9385900337913997e-07, "epoch": 1.6172811059907835, "percentage": 80.86, "elapsed_time": "9:47:13", "remaining_time": "2:18:57"} +{"current_steps": 7020, "total_steps": 8680, "loss": 0.6996288299560547, "lr": 1.9363362480724488e-07, "epoch": 1.6175115207373272, "percentage": 80.88, "elapsed_time": "9:47:18", "remaining_time": "2:18:52"} +{"current_steps": 7021, "total_steps": 8680, "loss": 0.7928623557090759, "lr": 1.9340836327836163e-07, "epoch": 1.617741935483871, "percentage": 80.89, "elapsed_time": "9:47:23", "remaining_time": "2:18:47"} +{"current_steps": 7022, "total_steps": 8680, "loss": 0.6275026202201843, "lr": 1.9318321882518674e-07, "epoch": 1.6179723502304149, "percentage": 80.9, "elapsed_time": "9:47:28", "remaining_time": "2:18:42"} +{"current_steps": 7023, "total_steps": 8680, "loss": 0.6660110950469971, "lr": 1.9295819148039948e-07, "epoch": 1.6182027649769584, "percentage": 80.91, "elapsed_time": "9:47:32", "remaining_time": "2:18:37"} +{"current_steps": 7024, "total_steps": 8680, "loss": 0.8129480481147766, "lr": 1.9273328127666232e-07, "epoch": 1.6184331797235023, "percentage": 80.92, "elapsed_time": "9:47:37", "remaining_time": "2:18:32"} +{"current_steps": 7025, "total_steps": 8680, "loss": 0.8070700168609619, "lr": 1.9250848824662046e-07, "epoch": 1.618663594470046, "percentage": 80.93, "elapsed_time": "9:47:41", "remaining_time": "2:18:27"} +{"current_steps": 7026, "total_steps": 8680, "loss": 0.8123769760131836, "lr": 1.922838124229028e-07, "epoch": 1.6188940092165898, "percentage": 80.94, "elapsed_time": "9:47:45", "remaining_time": "2:18:21"} +{"current_steps": 7027, "total_steps": 8680, "loss": 0.6552244424819946, "lr": 1.920592538381205e-07, "epoch": 1.6191244239631337, "percentage": 80.96, "elapsed_time": "9:47:50", "remaining_time": "2:18:16"} +{"current_steps": 7028, "total_steps": 8680, "loss": 0.8764367699623108, "lr": 1.9183481252486767e-07, "epoch": 1.6193548387096774, "percentage": 80.97, "elapsed_time": "9:47:55", "remaining_time": "2:18:11"} +{"current_steps": 7029, "total_steps": 8680, "loss": 0.9075809717178345, "lr": 1.9161048851572215e-07, "epoch": 1.6195852534562212, "percentage": 80.98, "elapsed_time": "9:47:59", "remaining_time": "2:18:06"} +{"current_steps": 7030, "total_steps": 8680, "loss": 0.7308327555656433, "lr": 1.9138628184324412e-07, "epoch": 1.6198156682027651, "percentage": 80.99, "elapsed_time": "9:48:04", "remaining_time": "2:18:01"} +{"current_steps": 7031, "total_steps": 8680, "loss": 0.838142991065979, "lr": 1.9116219253997655e-07, "epoch": 1.6200460829493086, "percentage": 81.0, "elapsed_time": "9:48:09", "remaining_time": "2:17:56"} +{"current_steps": 7032, "total_steps": 8680, "loss": 0.7681041359901428, "lr": 1.9093822063844623e-07, "epoch": 1.6202764976958526, "percentage": 81.01, "elapsed_time": "9:48:13", "remaining_time": "2:17:51"} +{"current_steps": 7033, "total_steps": 8680, "loss": 0.7179980278015137, "lr": 1.907143661711621e-07, "epoch": 1.6205069124423963, "percentage": 81.03, "elapsed_time": "9:48:18", "remaining_time": "2:17:46"} +{"current_steps": 7034, "total_steps": 8680, "loss": 0.8688361644744873, "lr": 1.9049062917061609e-07, "epoch": 1.62073732718894, "percentage": 81.04, "elapsed_time": "9:48:24", "remaining_time": "2:17:41"} +{"current_steps": 7035, "total_steps": 8680, "loss": 0.6540178656578064, "lr": 1.9026700966928388e-07, "epoch": 1.620967741935484, "percentage": 81.05, "elapsed_time": "9:48:28", "remaining_time": "2:17:36"} +{"current_steps": 7036, "total_steps": 8680, "loss": 0.7834869623184204, "lr": 1.900435076996233e-07, "epoch": 1.6211981566820275, "percentage": 81.06, "elapsed_time": "9:48:33", "remaining_time": "2:17:31"} +{"current_steps": 7037, "total_steps": 8680, "loss": 0.8895971775054932, "lr": 1.8982012329407505e-07, "epoch": 1.6214285714285714, "percentage": 81.07, "elapsed_time": "9:48:38", "remaining_time": "2:17:26"} +{"current_steps": 7038, "total_steps": 8680, "loss": 0.6625858545303345, "lr": 1.8959685648506362e-07, "epoch": 1.6216589861751152, "percentage": 81.08, "elapsed_time": "9:48:44", "remaining_time": "2:17:21"} +{"current_steps": 7039, "total_steps": 8680, "loss": 0.651193380355835, "lr": 1.893737073049957e-07, "epoch": 1.621889400921659, "percentage": 81.09, "elapsed_time": "9:48:50", "remaining_time": "2:17:16"} +{"current_steps": 7040, "total_steps": 8680, "loss": 0.8716636896133423, "lr": 1.8915067578626065e-07, "epoch": 1.6221198156682028, "percentage": 81.11, "elapsed_time": "9:48:54", "remaining_time": "2:17:11"} +{"current_steps": 7041, "total_steps": 8680, "loss": 0.812637984752655, "lr": 1.8892776196123196e-07, "epoch": 1.6223502304147466, "percentage": 81.12, "elapsed_time": "9:48:58", "remaining_time": "2:17:06"} +{"current_steps": 7042, "total_steps": 8680, "loss": 0.7803184986114502, "lr": 1.887049658622648e-07, "epoch": 1.6225806451612903, "percentage": 81.13, "elapsed_time": "9:49:04", "remaining_time": "2:17:01"} +{"current_steps": 7043, "total_steps": 8680, "loss": 0.7884814739227295, "lr": 1.8848228752169793e-07, "epoch": 1.6228110599078343, "percentage": 81.14, "elapsed_time": "9:49:09", "remaining_time": "2:16:56"} +{"current_steps": 7044, "total_steps": 8680, "loss": 0.7250671982765198, "lr": 1.8825972697185265e-07, "epoch": 1.6230414746543778, "percentage": 81.15, "elapsed_time": "9:49:15", "remaining_time": "2:16:51"} +{"current_steps": 7045, "total_steps": 8680, "loss": 0.8078780174255371, "lr": 1.880372842450332e-07, "epoch": 1.6232718894009217, "percentage": 81.16, "elapsed_time": "9:49:20", "remaining_time": "2:16:46"} +{"current_steps": 7046, "total_steps": 8680, "loss": 0.8523818254470825, "lr": 1.878149593735272e-07, "epoch": 1.6235023041474654, "percentage": 81.18, "elapsed_time": "9:49:26", "remaining_time": "2:16:41"} +{"current_steps": 7047, "total_steps": 8680, "loss": 0.8772249221801758, "lr": 1.875927523896047e-07, "epoch": 1.6237327188940092, "percentage": 81.19, "elapsed_time": "9:49:31", "remaining_time": "2:16:36"} +{"current_steps": 7048, "total_steps": 8680, "loss": 0.7906323671340942, "lr": 1.8737066332551843e-07, "epoch": 1.6239631336405531, "percentage": 81.2, "elapsed_time": "9:49:35", "remaining_time": "2:16:31"} +{"current_steps": 7049, "total_steps": 8680, "loss": 0.8010337352752686, "lr": 1.8714869221350492e-07, "epoch": 1.6241935483870966, "percentage": 81.21, "elapsed_time": "9:49:40", "remaining_time": "2:16:26"} +{"current_steps": 7050, "total_steps": 8680, "loss": 0.8978049755096436, "lr": 1.8692683908578267e-07, "epoch": 1.6244239631336406, "percentage": 81.22, "elapsed_time": "9:49:45", "remaining_time": "2:16:21"} +{"current_steps": 7051, "total_steps": 8680, "loss": 0.6622864007949829, "lr": 1.8670510397455297e-07, "epoch": 1.6246543778801843, "percentage": 81.23, "elapsed_time": "9:49:51", "remaining_time": "2:16:16"} +{"current_steps": 7052, "total_steps": 8680, "loss": 0.7795406579971313, "lr": 1.8648348691200112e-07, "epoch": 1.624884792626728, "percentage": 81.24, "elapsed_time": "9:49:57", "remaining_time": "2:16:11"} +{"current_steps": 7053, "total_steps": 8680, "loss": 0.9152054786682129, "lr": 1.8626198793029423e-07, "epoch": 1.625115207373272, "percentage": 81.26, "elapsed_time": "9:50:05", "remaining_time": "2:16:07"} +{"current_steps": 7054, "total_steps": 8680, "loss": 0.719946563243866, "lr": 1.860406070615822e-07, "epoch": 1.6253456221198157, "percentage": 81.27, "elapsed_time": "9:50:11", "remaining_time": "2:16:02"} +{"current_steps": 7055, "total_steps": 8680, "loss": 0.782962441444397, "lr": 1.8581934433799884e-07, "epoch": 1.6255760368663594, "percentage": 81.28, "elapsed_time": "9:50:17", "remaining_time": "2:15:57"} +{"current_steps": 7056, "total_steps": 8680, "loss": 0.8119732737541199, "lr": 1.855981997916597e-07, "epoch": 1.6258064516129034, "percentage": 81.29, "elapsed_time": "9:50:23", "remaining_time": "2:15:52"} +{"current_steps": 7057, "total_steps": 8680, "loss": 0.7585981488227844, "lr": 1.8537717345466351e-07, "epoch": 1.6260368663594469, "percentage": 81.3, "elapsed_time": "9:50:28", "remaining_time": "2:15:47"} +{"current_steps": 7058, "total_steps": 8680, "loss": 0.6846082210540771, "lr": 1.8515626535909258e-07, "epoch": 1.6262672811059908, "percentage": 81.31, "elapsed_time": "9:50:34", "remaining_time": "2:15:43"} +{"current_steps": 7059, "total_steps": 8680, "loss": 0.7355546951293945, "lr": 1.8493547553701083e-07, "epoch": 1.6264976958525346, "percentage": 81.32, "elapsed_time": "9:50:40", "remaining_time": "2:15:38"} +{"current_steps": 7060, "total_steps": 8680, "loss": 0.6828340291976929, "lr": 1.847148040204657e-07, "epoch": 1.6267281105990783, "percentage": 81.34, "elapsed_time": "9:50:45", "remaining_time": "2:15:33"} +{"current_steps": 7061, "total_steps": 8680, "loss": 0.8513988256454468, "lr": 1.8449425084148763e-07, "epoch": 1.6269585253456222, "percentage": 81.35, "elapsed_time": "9:50:51", "remaining_time": "2:15:28"} +{"current_steps": 7062, "total_steps": 8680, "loss": 0.6817762851715088, "lr": 1.8427381603208947e-07, "epoch": 1.6271889400921657, "percentage": 81.36, "elapsed_time": "9:50:59", "remaining_time": "2:15:24"} +{"current_steps": 7063, "total_steps": 8680, "loss": 0.7314180731773376, "lr": 1.8405349962426699e-07, "epoch": 1.6274193548387097, "percentage": 81.37, "elapsed_time": "9:51:04", "remaining_time": "2:15:19"} +{"current_steps": 7064, "total_steps": 8680, "loss": 0.8193466663360596, "lr": 1.8383330164999898e-07, "epoch": 1.6276497695852534, "percentage": 81.38, "elapsed_time": "9:51:11", "remaining_time": "2:15:14"} +{"current_steps": 7065, "total_steps": 8680, "loss": 0.7469823360443115, "lr": 1.8361322214124643e-07, "epoch": 1.6278801843317972, "percentage": 81.39, "elapsed_time": "9:51:18", "remaining_time": "2:15:10"} +{"current_steps": 7066, "total_steps": 8680, "loss": 0.8578816652297974, "lr": 1.8339326112995423e-07, "epoch": 1.628110599078341, "percentage": 81.41, "elapsed_time": "9:51:24", "remaining_time": "2:15:05"} +{"current_steps": 7067, "total_steps": 8680, "loss": 0.8384239077568054, "lr": 1.8317341864804903e-07, "epoch": 1.6283410138248848, "percentage": 81.42, "elapsed_time": "9:51:29", "remaining_time": "2:15:00"} +{"current_steps": 7068, "total_steps": 8680, "loss": 0.8707646131515503, "lr": 1.829536947274406e-07, "epoch": 1.6285714285714286, "percentage": 81.43, "elapsed_time": "9:51:34", "remaining_time": "2:14:55"} +{"current_steps": 7069, "total_steps": 8680, "loss": 0.6869943141937256, "lr": 1.82734089400022e-07, "epoch": 1.6288018433179725, "percentage": 81.44, "elapsed_time": "9:51:40", "remaining_time": "2:14:50"} +{"current_steps": 7070, "total_steps": 8680, "loss": 0.7776129245758057, "lr": 1.8251460269766848e-07, "epoch": 1.629032258064516, "percentage": 81.45, "elapsed_time": "9:51:47", "remaining_time": "2:14:45"} +{"current_steps": 7071, "total_steps": 8680, "loss": 0.8126854300498962, "lr": 1.8229523465223785e-07, "epoch": 1.62926267281106, "percentage": 81.46, "elapsed_time": "9:51:52", "remaining_time": "2:14:40"} +{"current_steps": 7072, "total_steps": 8680, "loss": 0.6570720672607422, "lr": 1.8207598529557166e-07, "epoch": 1.6294930875576037, "percentage": 81.47, "elapsed_time": "9:51:58", "remaining_time": "2:14:35"} +{"current_steps": 7073, "total_steps": 8680, "loss": 0.6485599875450134, "lr": 1.818568546594934e-07, "epoch": 1.6297235023041474, "percentage": 81.49, "elapsed_time": "9:52:05", "remaining_time": "2:14:31"} +{"current_steps": 7074, "total_steps": 8680, "loss": 0.9132766723632812, "lr": 1.816378427758093e-07, "epoch": 1.6299539170506914, "percentage": 81.5, "elapsed_time": "9:52:10", "remaining_time": "2:14:26"} +{"current_steps": 7075, "total_steps": 8680, "loss": 0.8277286291122437, "lr": 1.8141894967630932e-07, "epoch": 1.6301843317972349, "percentage": 81.51, "elapsed_time": "9:52:15", "remaining_time": "2:14:21"} +{"current_steps": 7076, "total_steps": 8680, "loss": 0.7409358024597168, "lr": 1.812001753927651e-07, "epoch": 1.6304147465437788, "percentage": 81.52, "elapsed_time": "9:52:19", "remaining_time": "2:14:16"} +{"current_steps": 7077, "total_steps": 8680, "loss": 0.8233339786529541, "lr": 1.809815199569311e-07, "epoch": 1.6306451612903226, "percentage": 81.53, "elapsed_time": "9:52:24", "remaining_time": "2:14:11"} +{"current_steps": 7078, "total_steps": 8680, "loss": 0.8704487085342407, "lr": 1.8076298340054563e-07, "epoch": 1.6308755760368663, "percentage": 81.54, "elapsed_time": "9:52:30", "remaining_time": "2:14:06"} +{"current_steps": 7079, "total_steps": 8680, "loss": 0.8845789432525635, "lr": 1.8054456575532862e-07, "epoch": 1.6311059907834102, "percentage": 81.56, "elapsed_time": "9:52:35", "remaining_time": "2:14:01"} +{"current_steps": 7080, "total_steps": 8680, "loss": 0.7241162061691284, "lr": 1.8032626705298272e-07, "epoch": 1.631336405529954, "percentage": 81.57, "elapsed_time": "9:52:40", "remaining_time": "2:13:56"} +{"current_steps": 7081, "total_steps": 8680, "loss": 0.7065681219100952, "lr": 1.8010808732519433e-07, "epoch": 1.6315668202764977, "percentage": 81.58, "elapsed_time": "9:52:46", "remaining_time": "2:13:51"} +{"current_steps": 7082, "total_steps": 8680, "loss": 0.6492339372634888, "lr": 1.7989002660363162e-07, "epoch": 1.6317972350230416, "percentage": 81.59, "elapsed_time": "9:52:51", "remaining_time": "2:13:46"} +{"current_steps": 7083, "total_steps": 8680, "loss": 0.7089248895645142, "lr": 1.79672084919946e-07, "epoch": 1.6320276497695851, "percentage": 81.6, "elapsed_time": "9:52:58", "remaining_time": "2:13:41"} +{"current_steps": 7084, "total_steps": 8680, "loss": 0.7030316591262817, "lr": 1.794542623057712e-07, "epoch": 1.632258064516129, "percentage": 81.61, "elapsed_time": "9:53:04", "remaining_time": "2:13:37"} +{"current_steps": 7085, "total_steps": 8680, "loss": 0.8626528978347778, "lr": 1.792365587927239e-07, "epoch": 1.6324884792626728, "percentage": 81.62, "elapsed_time": "9:53:09", "remaining_time": "2:13:31"} +{"current_steps": 7086, "total_steps": 8680, "loss": 0.8468672037124634, "lr": 1.7901897441240333e-07, "epoch": 1.6327188940092165, "percentage": 81.64, "elapsed_time": "9:53:13", "remaining_time": "2:13:26"} +{"current_steps": 7087, "total_steps": 8680, "loss": 0.8546739816665649, "lr": 1.7880150919639214e-07, "epoch": 1.6329493087557605, "percentage": 81.65, "elapsed_time": "9:53:18", "remaining_time": "2:13:21"} +{"current_steps": 7088, "total_steps": 8680, "loss": 0.9187895655632019, "lr": 1.7858416317625468e-07, "epoch": 1.633179723502304, "percentage": 81.66, "elapsed_time": "9:53:24", "remaining_time": "2:13:17"} +{"current_steps": 7089, "total_steps": 8680, "loss": 0.7496293783187866, "lr": 1.7836693638353827e-07, "epoch": 1.633410138248848, "percentage": 81.67, "elapsed_time": "9:53:33", "remaining_time": "2:13:12"} +{"current_steps": 7090, "total_steps": 8680, "loss": 0.682653546333313, "lr": 1.7814982884977358e-07, "epoch": 1.6336405529953917, "percentage": 81.68, "elapsed_time": "9:53:40", "remaining_time": "2:13:08"} +{"current_steps": 7091, "total_steps": 8680, "loss": 0.8065551519393921, "lr": 1.7793284060647295e-07, "epoch": 1.6338709677419354, "percentage": 81.69, "elapsed_time": "9:53:47", "remaining_time": "2:13:03"} +{"current_steps": 7092, "total_steps": 8680, "loss": 0.6605588793754578, "lr": 1.7771597168513263e-07, "epoch": 1.6341013824884794, "percentage": 81.71, "elapsed_time": "9:53:55", "remaining_time": "2:12:59"} +{"current_steps": 7093, "total_steps": 8680, "loss": 0.7257254123687744, "lr": 1.7749922211723034e-07, "epoch": 1.634331797235023, "percentage": 81.72, "elapsed_time": "9:54:01", "remaining_time": "2:12:54"} +{"current_steps": 7094, "total_steps": 8680, "loss": 0.7438890933990479, "lr": 1.772825919342269e-07, "epoch": 1.6345622119815668, "percentage": 81.73, "elapsed_time": "9:54:07", "remaining_time": "2:12:49"} +{"current_steps": 7095, "total_steps": 8680, "loss": 0.8546249866485596, "lr": 1.770660811675664e-07, "epoch": 1.6347926267281108, "percentage": 81.74, "elapsed_time": "9:54:13", "remaining_time": "2:12:44"} +{"current_steps": 7096, "total_steps": 8680, "loss": 0.727516770362854, "lr": 1.7684968984867466e-07, "epoch": 1.6350230414746543, "percentage": 81.75, "elapsed_time": "9:54:20", "remaining_time": "2:12:40"} +{"current_steps": 7097, "total_steps": 8680, "loss": 0.7578408718109131, "lr": 1.766334180089606e-07, "epoch": 1.6352534562211982, "percentage": 81.76, "elapsed_time": "9:54:26", "remaining_time": "2:12:35"} +{"current_steps": 7098, "total_steps": 8680, "loss": 0.8253650665283203, "lr": 1.7641726567981606e-07, "epoch": 1.635483870967742, "percentage": 81.77, "elapsed_time": "9:54:33", "remaining_time": "2:12:30"} +{"current_steps": 7099, "total_steps": 8680, "loss": 0.8932347297668457, "lr": 1.7620123289261523e-07, "epoch": 1.6357142857142857, "percentage": 81.79, "elapsed_time": "9:54:39", "remaining_time": "2:12:26"} +{"current_steps": 7100, "total_steps": 8680, "loss": 0.6661143898963928, "lr": 1.7598531967871465e-07, "epoch": 1.6359447004608296, "percentage": 81.8, "elapsed_time": "9:54:45", "remaining_time": "2:12:21"} +{"current_steps": 7101, "total_steps": 8680, "loss": 0.8413572311401367, "lr": 1.7576952606945415e-07, "epoch": 1.6361751152073731, "percentage": 81.81, "elapsed_time": "9:54:54", "remaining_time": "2:12:17"} +{"current_steps": 7102, "total_steps": 8680, "loss": 0.713816225528717, "lr": 1.7555385209615603e-07, "epoch": 1.636405529953917, "percentage": 81.82, "elapsed_time": "9:55:00", "remaining_time": "2:12:12"} +{"current_steps": 7103, "total_steps": 8680, "loss": 0.8588179349899292, "lr": 1.7533829779012466e-07, "epoch": 1.6366359447004608, "percentage": 81.83, "elapsed_time": "9:55:06", "remaining_time": "2:12:07"} +{"current_steps": 7104, "total_steps": 8680, "loss": 0.8666437864303589, "lr": 1.7512286318264778e-07, "epoch": 1.6368663594470045, "percentage": 81.84, "elapsed_time": "9:55:11", "remaining_time": "2:12:02"} +{"current_steps": 7105, "total_steps": 8680, "loss": 0.9219843745231628, "lr": 1.7490754830499522e-07, "epoch": 1.6370967741935485, "percentage": 81.85, "elapsed_time": "9:55:16", "remaining_time": "2:11:57"} +{"current_steps": 7106, "total_steps": 8680, "loss": 0.93767249584198, "lr": 1.7469235318841956e-07, "epoch": 1.6373271889400922, "percentage": 81.87, "elapsed_time": "9:55:24", "remaining_time": "2:11:53"} +{"current_steps": 7107, "total_steps": 8680, "loss": 0.7317457795143127, "lr": 1.7447727786415644e-07, "epoch": 1.637557603686636, "percentage": 81.88, "elapsed_time": "9:55:32", "remaining_time": "2:11:48"} +{"current_steps": 7108, "total_steps": 8680, "loss": 0.850578784942627, "lr": 1.7426232236342365e-07, "epoch": 1.6377880184331797, "percentage": 81.89, "elapsed_time": "9:55:38", "remaining_time": "2:11:43"} +{"current_steps": 7109, "total_steps": 8680, "loss": 0.7580707669258118, "lr": 1.7404748671742143e-07, "epoch": 1.6380184331797234, "percentage": 81.9, "elapsed_time": "9:55:46", "remaining_time": "2:11:39"} +{"current_steps": 7110, "total_steps": 8680, "loss": 0.8393806219100952, "lr": 1.738327709573333e-07, "epoch": 1.6382488479262673, "percentage": 81.91, "elapsed_time": "9:55:53", "remaining_time": "2:11:34"} +{"current_steps": 7111, "total_steps": 8680, "loss": 0.6641673445701599, "lr": 1.7361817511432474e-07, "epoch": 1.638479262672811, "percentage": 81.92, "elapsed_time": "9:55:59", "remaining_time": "2:11:30"} +{"current_steps": 7112, "total_steps": 8680, "loss": 0.7570137977600098, "lr": 1.734036992195438e-07, "epoch": 1.6387096774193548, "percentage": 81.94, "elapsed_time": "9:56:06", "remaining_time": "2:11:25"} +{"current_steps": 7113, "total_steps": 8680, "loss": 0.78557288646698, "lr": 1.7318934330412194e-07, "epoch": 1.6389400921658988, "percentage": 81.95, "elapsed_time": "9:56:13", "remaining_time": "2:11:20"} +{"current_steps": 7114, "total_steps": 8680, "loss": 0.8309692740440369, "lr": 1.729751073991721e-07, "epoch": 1.6391705069124423, "percentage": 81.96, "elapsed_time": "9:56:19", "remaining_time": "2:11:16"} +{"current_steps": 7115, "total_steps": 8680, "loss": 0.6409872770309448, "lr": 1.727609915357908e-07, "epoch": 1.6394009216589862, "percentage": 81.97, "elapsed_time": "9:56:25", "remaining_time": "2:11:11"} +{"current_steps": 7116, "total_steps": 8680, "loss": 0.7916153073310852, "lr": 1.7254699574505648e-07, "epoch": 1.63963133640553, "percentage": 81.98, "elapsed_time": "9:56:31", "remaining_time": "2:11:06"} +{"current_steps": 7117, "total_steps": 8680, "loss": 0.7925357818603516, "lr": 1.7233312005803015e-07, "epoch": 1.6398617511520737, "percentage": 81.99, "elapsed_time": "9:56:37", "remaining_time": "2:11:01"} +{"current_steps": 7118, "total_steps": 8680, "loss": 0.9124211668968201, "lr": 1.7211936450575594e-07, "epoch": 1.6400921658986176, "percentage": 82.0, "elapsed_time": "9:56:43", "remaining_time": "2:10:56"} +{"current_steps": 7119, "total_steps": 8680, "loss": 0.8905198574066162, "lr": 1.7190572911925994e-07, "epoch": 1.6403225806451613, "percentage": 82.02, "elapsed_time": "9:56:49", "remaining_time": "2:10:51"} +{"current_steps": 7120, "total_steps": 8680, "loss": 0.8139728307723999, "lr": 1.716922139295509e-07, "epoch": 1.640552995391705, "percentage": 82.03, "elapsed_time": "9:56:54", "remaining_time": "2:10:46"} +{"current_steps": 7121, "total_steps": 8680, "loss": 0.7607166767120361, "lr": 1.7147881896762074e-07, "epoch": 1.6407834101382488, "percentage": 82.04, "elapsed_time": "9:57:00", "remaining_time": "2:10:42"} +{"current_steps": 7122, "total_steps": 8680, "loss": 0.806864857673645, "lr": 1.7126554426444316e-07, "epoch": 1.6410138248847925, "percentage": 82.05, "elapsed_time": "9:57:07", "remaining_time": "2:10:37"} +{"current_steps": 7123, "total_steps": 8680, "loss": 0.697334885597229, "lr": 1.710523898509747e-07, "epoch": 1.6412442396313365, "percentage": 82.06, "elapsed_time": "9:57:16", "remaining_time": "2:10:33"} +{"current_steps": 7124, "total_steps": 8680, "loss": 0.7313966751098633, "lr": 1.7083935575815455e-07, "epoch": 1.6414746543778802, "percentage": 82.07, "elapsed_time": "9:57:21", "remaining_time": "2:10:28"} +{"current_steps": 7125, "total_steps": 8680, "loss": 0.8857930898666382, "lr": 1.7062644201690413e-07, "epoch": 1.641705069124424, "percentage": 82.09, "elapsed_time": "9:57:27", "remaining_time": "2:10:23"} +{"current_steps": 7126, "total_steps": 8680, "loss": 0.7451884746551514, "lr": 1.7041364865812758e-07, "epoch": 1.6419354838709679, "percentage": 82.1, "elapsed_time": "9:57:33", "remaining_time": "2:10:18"} +{"current_steps": 7127, "total_steps": 8680, "loss": 0.7023841142654419, "lr": 1.7020097571271186e-07, "epoch": 1.6421658986175114, "percentage": 82.11, "elapsed_time": "9:57:38", "remaining_time": "2:10:13"} +{"current_steps": 7128, "total_steps": 8680, "loss": 0.708385705947876, "lr": 1.6998842321152607e-07, "epoch": 1.6423963133640553, "percentage": 82.12, "elapsed_time": "9:57:43", "remaining_time": "2:10:08"} +{"current_steps": 7129, "total_steps": 8680, "loss": 0.7885474562644958, "lr": 1.697759911854215e-07, "epoch": 1.642626728110599, "percentage": 82.13, "elapsed_time": "9:57:50", "remaining_time": "2:10:03"} +{"current_steps": 7130, "total_steps": 8680, "loss": 0.7054568529129028, "lr": 1.695636796652331e-07, "epoch": 1.6428571428571428, "percentage": 82.14, "elapsed_time": "9:57:55", "remaining_time": "2:09:59"} +{"current_steps": 7131, "total_steps": 8680, "loss": 0.6899726986885071, "lr": 1.6935148868177718e-07, "epoch": 1.6430875576036867, "percentage": 82.15, "elapsed_time": "9:58:01", "remaining_time": "2:09:54"} +{"current_steps": 7132, "total_steps": 8680, "loss": 0.8558614253997803, "lr": 1.6913941826585288e-07, "epoch": 1.6433179723502302, "percentage": 82.17, "elapsed_time": "9:58:07", "remaining_time": "2:09:49"} +{"current_steps": 7133, "total_steps": 8680, "loss": 0.7741858959197998, "lr": 1.6892746844824223e-07, "epoch": 1.6435483870967742, "percentage": 82.18, "elapsed_time": "9:58:11", "remaining_time": "2:09:44"} +{"current_steps": 7134, "total_steps": 8680, "loss": 0.7332532405853271, "lr": 1.6871563925970943e-07, "epoch": 1.643778801843318, "percentage": 82.19, "elapsed_time": "9:58:16", "remaining_time": "2:09:39"} +{"current_steps": 7135, "total_steps": 8680, "loss": 0.8288085460662842, "lr": 1.6850393073100078e-07, "epoch": 1.6440092165898617, "percentage": 82.2, "elapsed_time": "9:58:20", "remaining_time": "2:09:33"} +{"current_steps": 7136, "total_steps": 8680, "loss": 0.9470697641372681, "lr": 1.682923428928461e-07, "epoch": 1.6442396313364056, "percentage": 82.21, "elapsed_time": "9:58:24", "remaining_time": "2:09:28"} +{"current_steps": 7137, "total_steps": 8680, "loss": 0.7123041749000549, "lr": 1.6808087577595686e-07, "epoch": 1.6444700460829493, "percentage": 82.22, "elapsed_time": "9:58:31", "remaining_time": "2:09:23"} +{"current_steps": 7138, "total_steps": 8680, "loss": 0.8077690005302429, "lr": 1.6786952941102694e-07, "epoch": 1.644700460829493, "percentage": 82.24, "elapsed_time": "9:58:35", "remaining_time": "2:09:18"} +{"current_steps": 7139, "total_steps": 8680, "loss": 0.767215371131897, "lr": 1.6765830382873348e-07, "epoch": 1.644930875576037, "percentage": 82.25, "elapsed_time": "9:58:40", "remaining_time": "2:09:13"} +{"current_steps": 7140, "total_steps": 8680, "loss": 0.7488540410995483, "lr": 1.6744719905973502e-07, "epoch": 1.6451612903225805, "percentage": 82.26, "elapsed_time": "9:58:45", "remaining_time": "2:09:08"} +{"current_steps": 7141, "total_steps": 8680, "loss": 0.7841323018074036, "lr": 1.6723621513467378e-07, "epoch": 1.6453917050691245, "percentage": 82.27, "elapsed_time": "9:58:49", "remaining_time": "2:09:03"} +{"current_steps": 7142, "total_steps": 8680, "loss": 0.65464186668396, "lr": 1.6702535208417346e-07, "epoch": 1.6456221198156682, "percentage": 82.28, "elapsed_time": "9:58:56", "remaining_time": "2:08:58"} +{"current_steps": 7143, "total_steps": 8680, "loss": 0.8845036029815674, "lr": 1.6681460993884056e-07, "epoch": 1.645852534562212, "percentage": 82.29, "elapsed_time": "9:59:00", "remaining_time": "2:08:53"} +{"current_steps": 7144, "total_steps": 8680, "loss": 0.6741687655448914, "lr": 1.6660398872926396e-07, "epoch": 1.6460829493087559, "percentage": 82.3, "elapsed_time": "9:59:06", "remaining_time": "2:08:48"} +{"current_steps": 7145, "total_steps": 8680, "loss": 0.8656717538833618, "lr": 1.663934884860152e-07, "epoch": 1.6463133640552994, "percentage": 82.32, "elapsed_time": "9:59:10", "remaining_time": "2:08:43"} +{"current_steps": 7146, "total_steps": 8680, "loss": 0.7588434219360352, "lr": 1.6618310923964785e-07, "epoch": 1.6465437788018433, "percentage": 82.33, "elapsed_time": "9:59:14", "remaining_time": "2:08:38"} +{"current_steps": 7147, "total_steps": 8680, "loss": 0.7180176973342896, "lr": 1.6597285102069846e-07, "epoch": 1.646774193548387, "percentage": 82.34, "elapsed_time": "9:59:19", "remaining_time": "2:08:33"} +{"current_steps": 7148, "total_steps": 8680, "loss": 0.8253776431083679, "lr": 1.6576271385968576e-07, "epoch": 1.6470046082949308, "percentage": 82.35, "elapsed_time": "9:59:24", "remaining_time": "2:08:28"} +{"current_steps": 7149, "total_steps": 8680, "loss": 0.7200941443443298, "lr": 1.6555269778711046e-07, "epoch": 1.6472350230414747, "percentage": 82.36, "elapsed_time": "9:59:29", "remaining_time": "2:08:23"} +{"current_steps": 7150, "total_steps": 8680, "loss": 0.7076164484024048, "lr": 1.653428028334567e-07, "epoch": 1.6474654377880185, "percentage": 82.37, "elapsed_time": "9:59:35", "remaining_time": "2:08:18"} +{"current_steps": 7151, "total_steps": 8680, "loss": 0.8068090677261353, "lr": 1.6513302902919003e-07, "epoch": 1.6476958525345622, "percentage": 82.38, "elapsed_time": "9:59:40", "remaining_time": "2:08:13"} +{"current_steps": 7152, "total_steps": 8680, "loss": 0.9712029099464417, "lr": 1.6492337640475884e-07, "epoch": 1.6479262672811061, "percentage": 82.4, "elapsed_time": "9:59:46", "remaining_time": "2:08:08"} +{"current_steps": 7153, "total_steps": 8680, "loss": 0.8359737992286682, "lr": 1.6471384499059438e-07, "epoch": 1.6481566820276496, "percentage": 82.41, "elapsed_time": "9:59:51", "remaining_time": "2:08:03"} +{"current_steps": 7154, "total_steps": 8680, "loss": 0.8066359758377075, "lr": 1.645044348171094e-07, "epoch": 1.6483870967741936, "percentage": 82.42, "elapsed_time": "9:59:57", "remaining_time": "2:07:58"} +{"current_steps": 7155, "total_steps": 8680, "loss": 0.8717833757400513, "lr": 1.642951459146995e-07, "epoch": 1.6486175115207373, "percentage": 82.43, "elapsed_time": "10:00:00", "remaining_time": "2:07:53"} +{"current_steps": 7156, "total_steps": 8680, "loss": 0.7335910201072693, "lr": 1.6408597831374305e-07, "epoch": 1.648847926267281, "percentage": 82.44, "elapsed_time": "10:00:05", "remaining_time": "2:07:48"} +{"current_steps": 7157, "total_steps": 8680, "loss": 0.816049337387085, "lr": 1.6387693204460028e-07, "epoch": 1.649078341013825, "percentage": 82.45, "elapsed_time": "10:00:09", "remaining_time": "2:07:42"} +{"current_steps": 7158, "total_steps": 8680, "loss": 0.8060640096664429, "lr": 1.6366800713761364e-07, "epoch": 1.6493087557603685, "percentage": 82.47, "elapsed_time": "10:00:13", "remaining_time": "2:07:37"} +{"current_steps": 7159, "total_steps": 8680, "loss": 0.8477619886398315, "lr": 1.6345920362310894e-07, "epoch": 1.6495391705069125, "percentage": 82.48, "elapsed_time": "10:00:17", "remaining_time": "2:07:32"} +{"current_steps": 7160, "total_steps": 8680, "loss": 0.9793992638587952, "lr": 1.6325052153139329e-07, "epoch": 1.6497695852534562, "percentage": 82.49, "elapsed_time": "10:00:21", "remaining_time": "2:07:27"} +{"current_steps": 7161, "total_steps": 8680, "loss": 0.8020002245903015, "lr": 1.6304196089275658e-07, "epoch": 1.65, "percentage": 82.5, "elapsed_time": "10:00:27", "remaining_time": "2:07:22"} +{"current_steps": 7162, "total_steps": 8680, "loss": 0.8226429224014282, "lr": 1.6283352173747146e-07, "epoch": 1.6502304147465439, "percentage": 82.51, "elapsed_time": "10:00:31", "remaining_time": "2:07:16"} +{"current_steps": 7163, "total_steps": 8680, "loss": 0.7029248476028442, "lr": 1.6262520409579227e-07, "epoch": 1.6504608294930876, "percentage": 82.52, "elapsed_time": "10:00:36", "remaining_time": "2:07:11"} +{"current_steps": 7164, "total_steps": 8680, "loss": 0.7234015464782715, "lr": 1.6241700799795631e-07, "epoch": 1.6506912442396313, "percentage": 82.53, "elapsed_time": "10:00:42", "remaining_time": "2:07:07"} +{"current_steps": 7165, "total_steps": 8680, "loss": 0.854112982749939, "lr": 1.6220893347418285e-07, "epoch": 1.6509216589861753, "percentage": 82.55, "elapsed_time": "10:00:48", "remaining_time": "2:07:02"} +{"current_steps": 7166, "total_steps": 8680, "loss": 0.8098663091659546, "lr": 1.6200098055467325e-07, "epoch": 1.6511520737327188, "percentage": 82.56, "elapsed_time": "10:00:52", "remaining_time": "2:06:56"} +{"current_steps": 7167, "total_steps": 8680, "loss": 0.9032876491546631, "lr": 1.617931492696123e-07, "epoch": 1.6513824884792627, "percentage": 82.57, "elapsed_time": "10:00:56", "remaining_time": "2:06:51"} +{"current_steps": 7168, "total_steps": 8680, "loss": 0.7048916816711426, "lr": 1.6158543964916606e-07, "epoch": 1.6516129032258065, "percentage": 82.58, "elapsed_time": "10:01:02", "remaining_time": "2:06:46"} +{"current_steps": 7169, "total_steps": 8680, "loss": 0.879542350769043, "lr": 1.6137785172348307e-07, "epoch": 1.6518433179723502, "percentage": 82.59, "elapsed_time": "10:01:07", "remaining_time": "2:06:41"} +{"current_steps": 7170, "total_steps": 8680, "loss": 0.7851279377937317, "lr": 1.611703855226949e-07, "epoch": 1.6520737327188941, "percentage": 82.6, "elapsed_time": "10:01:12", "remaining_time": "2:06:36"} +{"current_steps": 7171, "total_steps": 8680, "loss": 0.779682457447052, "lr": 1.6096304107691493e-07, "epoch": 1.6523041474654376, "percentage": 82.62, "elapsed_time": "10:01:17", "remaining_time": "2:06:31"} +{"current_steps": 7172, "total_steps": 8680, "loss": 0.7761027812957764, "lr": 1.6075581841623854e-07, "epoch": 1.6525345622119816, "percentage": 82.63, "elapsed_time": "10:01:22", "remaining_time": "2:06:26"} +{"current_steps": 7173, "total_steps": 8680, "loss": 0.726230263710022, "lr": 1.605487175707443e-07, "epoch": 1.6527649769585253, "percentage": 82.64, "elapsed_time": "10:01:28", "remaining_time": "2:06:21"} +{"current_steps": 7174, "total_steps": 8680, "loss": 0.915956437587738, "lr": 1.6034173857049238e-07, "epoch": 1.652995391705069, "percentage": 82.65, "elapsed_time": "10:01:33", "remaining_time": "2:06:16"} +{"current_steps": 7175, "total_steps": 8680, "loss": 0.8435969352722168, "lr": 1.6013488144552534e-07, "epoch": 1.653225806451613, "percentage": 82.66, "elapsed_time": "10:01:38", "remaining_time": "2:06:11"} +{"current_steps": 7176, "total_steps": 8680, "loss": 0.7775791883468628, "lr": 1.599281462258687e-07, "epoch": 1.6534562211981567, "percentage": 82.67, "elapsed_time": "10:01:43", "remaining_time": "2:06:06"} +{"current_steps": 7177, "total_steps": 8680, "loss": 0.7578383684158325, "lr": 1.5972153294152945e-07, "epoch": 1.6536866359447004, "percentage": 82.68, "elapsed_time": "10:01:49", "remaining_time": "2:06:01"} +{"current_steps": 7178, "total_steps": 8680, "loss": 0.8378545045852661, "lr": 1.5951504162249706e-07, "epoch": 1.6539170506912444, "percentage": 82.7, "elapsed_time": "10:01:55", "remaining_time": "2:05:57"} +{"current_steps": 7179, "total_steps": 8680, "loss": 0.7071488499641418, "lr": 1.59308672298744e-07, "epoch": 1.654147465437788, "percentage": 82.71, "elapsed_time": "10:02:02", "remaining_time": "2:05:52"} +{"current_steps": 7180, "total_steps": 8680, "loss": 0.7424521446228027, "lr": 1.591024250002243e-07, "epoch": 1.6543778801843319, "percentage": 82.72, "elapsed_time": "10:02:08", "remaining_time": "2:05:47"} +{"current_steps": 7181, "total_steps": 8680, "loss": 0.6503180265426636, "lr": 1.5889629975687401e-07, "epoch": 1.6546082949308756, "percentage": 82.73, "elapsed_time": "10:02:13", "remaining_time": "2:05:42"} +{"current_steps": 7182, "total_steps": 8680, "loss": 0.7589888572692871, "lr": 1.5869029659861265e-07, "epoch": 1.6548387096774193, "percentage": 82.74, "elapsed_time": "10:02:19", "remaining_time": "2:05:37"} +{"current_steps": 7183, "total_steps": 8680, "loss": 0.7609498500823975, "lr": 1.5848441555534109e-07, "epoch": 1.6550691244239633, "percentage": 82.75, "elapsed_time": "10:02:25", "remaining_time": "2:05:33"} +{"current_steps": 7184, "total_steps": 8680, "loss": 0.7813476324081421, "lr": 1.582786566569425e-07, "epoch": 1.6552995391705068, "percentage": 82.76, "elapsed_time": "10:02:31", "remaining_time": "2:05:28"} +{"current_steps": 7185, "total_steps": 8680, "loss": 0.7386292219161987, "lr": 1.5807301993328258e-07, "epoch": 1.6555299539170507, "percentage": 82.78, "elapsed_time": "10:02:38", "remaining_time": "2:05:23"} +{"current_steps": 7186, "total_steps": 8680, "loss": 1.0402865409851074, "lr": 1.5786750541420922e-07, "epoch": 1.6557603686635944, "percentage": 82.79, "elapsed_time": "10:02:43", "remaining_time": "2:05:18"} +{"current_steps": 7187, "total_steps": 8680, "loss": 0.7375132441520691, "lr": 1.5766211312955246e-07, "epoch": 1.6559907834101382, "percentage": 82.8, "elapsed_time": "10:02:49", "remaining_time": "2:05:13"} +{"current_steps": 7188, "total_steps": 8680, "loss": 0.7903615236282349, "lr": 1.574568431091251e-07, "epoch": 1.6562211981566821, "percentage": 82.81, "elapsed_time": "10:02:54", "remaining_time": "2:05:08"} +{"current_steps": 7189, "total_steps": 8680, "loss": 0.6912896633148193, "lr": 1.5725169538272132e-07, "epoch": 1.6564516129032258, "percentage": 82.82, "elapsed_time": "10:03:00", "remaining_time": "2:05:03"} +{"current_steps": 7190, "total_steps": 8680, "loss": 0.7181826233863831, "lr": 1.570466699801185e-07, "epoch": 1.6566820276497696, "percentage": 82.83, "elapsed_time": "10:03:06", "remaining_time": "2:04:59"} +{"current_steps": 7191, "total_steps": 8680, "loss": 0.8328898549079895, "lr": 1.5684176693107566e-07, "epoch": 1.6569124423963135, "percentage": 82.85, "elapsed_time": "10:03:11", "remaining_time": "2:04:53"} +{"current_steps": 7192, "total_steps": 8680, "loss": 0.7775120735168457, "lr": 1.5663698626533384e-07, "epoch": 1.657142857142857, "percentage": 82.86, "elapsed_time": "10:03:17", "remaining_time": "2:04:49"} +{"current_steps": 7193, "total_steps": 8680, "loss": 0.8412137031555176, "lr": 1.564323280126173e-07, "epoch": 1.657373271889401, "percentage": 82.87, "elapsed_time": "10:03:23", "remaining_time": "2:04:44"} +{"current_steps": 7194, "total_steps": 8680, "loss": 0.7046825885772705, "lr": 1.562277922026316e-07, "epoch": 1.6576036866359447, "percentage": 82.88, "elapsed_time": "10:03:28", "remaining_time": "2:04:39"} +{"current_steps": 7195, "total_steps": 8680, "loss": 0.7107498645782471, "lr": 1.5602337886506468e-07, "epoch": 1.6578341013824884, "percentage": 82.89, "elapsed_time": "10:03:34", "remaining_time": "2:04:34"} +{"current_steps": 7196, "total_steps": 8680, "loss": 0.640724778175354, "lr": 1.558190880295872e-07, "epoch": 1.6580645161290324, "percentage": 82.9, "elapsed_time": "10:03:39", "remaining_time": "2:04:29"} +{"current_steps": 7197, "total_steps": 8680, "loss": 0.7856858968734741, "lr": 1.556149197258515e-07, "epoch": 1.658294930875576, "percentage": 82.91, "elapsed_time": "10:03:44", "remaining_time": "2:04:24"} +{"current_steps": 7198, "total_steps": 8680, "loss": 0.7956376075744629, "lr": 1.554108739834923e-07, "epoch": 1.6585253456221198, "percentage": 82.93, "elapsed_time": "10:03:50", "remaining_time": "2:04:19"} +{"current_steps": 7199, "total_steps": 8680, "loss": 0.721325159072876, "lr": 1.5520695083212675e-07, "epoch": 1.6587557603686636, "percentage": 82.94, "elapsed_time": "10:03:56", "remaining_time": "2:04:14"} +{"current_steps": 7200, "total_steps": 8680, "loss": 0.7043335437774658, "lr": 1.550031503013539e-07, "epoch": 1.6589861751152073, "percentage": 82.95, "elapsed_time": "10:04:03", "remaining_time": "2:04:10"} +{"current_steps": 7201, "total_steps": 8680, "loss": 0.7154408693313599, "lr": 1.5479947242075496e-07, "epoch": 1.6592165898617512, "percentage": 82.96, "elapsed_time": "10:04:11", "remaining_time": "2:04:05"} +{"current_steps": 7202, "total_steps": 8680, "loss": 0.7353748083114624, "lr": 1.5459591721989397e-07, "epoch": 1.659447004608295, "percentage": 82.97, "elapsed_time": "10:04:18", "remaining_time": "2:04:00"} +{"current_steps": 7203, "total_steps": 8680, "loss": 0.7404372692108154, "lr": 1.5439248472831644e-07, "epoch": 1.6596774193548387, "percentage": 82.98, "elapsed_time": "10:04:24", "remaining_time": "2:03:56"} +{"current_steps": 7204, "total_steps": 8680, "loss": 0.8678613305091858, "lr": 1.541891749755503e-07, "epoch": 1.6599078341013827, "percentage": 83.0, "elapsed_time": "10:04:29", "remaining_time": "2:03:51"} +{"current_steps": 7205, "total_steps": 8680, "loss": 0.7177796363830566, "lr": 1.5398598799110562e-07, "epoch": 1.6601382488479262, "percentage": 83.01, "elapsed_time": "10:04:36", "remaining_time": "2:03:46"} +{"current_steps": 7206, "total_steps": 8680, "loss": 0.7610895037651062, "lr": 1.537829238044749e-07, "epoch": 1.66036866359447, "percentage": 83.02, "elapsed_time": "10:04:41", "remaining_time": "2:03:41"} +{"current_steps": 7207, "total_steps": 8680, "loss": 0.7340127825737, "lr": 1.5357998244513227e-07, "epoch": 1.6605990783410138, "percentage": 83.03, "elapsed_time": "10:04:47", "remaining_time": "2:03:36"} +{"current_steps": 7208, "total_steps": 8680, "loss": 0.7060200572013855, "lr": 1.5337716394253498e-07, "epoch": 1.6608294930875576, "percentage": 83.04, "elapsed_time": "10:04:54", "remaining_time": "2:03:32"} +{"current_steps": 7209, "total_steps": 8680, "loss": 0.8592087030410767, "lr": 1.5317446832612147e-07, "epoch": 1.6610599078341015, "percentage": 83.05, "elapsed_time": "10:05:03", "remaining_time": "2:03:27"} +{"current_steps": 7210, "total_steps": 8680, "loss": 0.8687897324562073, "lr": 1.5297189562531264e-07, "epoch": 1.661290322580645, "percentage": 83.06, "elapsed_time": "10:05:08", "remaining_time": "2:03:22"} +{"current_steps": 7211, "total_steps": 8680, "loss": 0.8158563375473022, "lr": 1.5276944586951202e-07, "epoch": 1.661520737327189, "percentage": 83.08, "elapsed_time": "10:05:14", "remaining_time": "2:03:17"} +{"current_steps": 7212, "total_steps": 8680, "loss": 0.7734059691429138, "lr": 1.5256711908810482e-07, "epoch": 1.6617511520737327, "percentage": 83.09, "elapsed_time": "10:05:20", "remaining_time": "2:03:13"} +{"current_steps": 7213, "total_steps": 8680, "loss": 0.8302994966506958, "lr": 1.5236491531045815e-07, "epoch": 1.6619815668202764, "percentage": 83.1, "elapsed_time": "10:05:26", "remaining_time": "2:03:08"} +{"current_steps": 7214, "total_steps": 8680, "loss": 0.8474830389022827, "lr": 1.5216283456592216e-07, "epoch": 1.6622119815668204, "percentage": 83.11, "elapsed_time": "10:05:32", "remaining_time": "2:03:03"} +{"current_steps": 7215, "total_steps": 8680, "loss": 0.7903469800949097, "lr": 1.5196087688382808e-07, "epoch": 1.662442396313364, "percentage": 83.12, "elapsed_time": "10:05:37", "remaining_time": "2:02:58"} +{"current_steps": 7216, "total_steps": 8680, "loss": 0.7756912708282471, "lr": 1.5175904229349035e-07, "epoch": 1.6626728110599078, "percentage": 83.13, "elapsed_time": "10:05:44", "remaining_time": "2:02:53"} +{"current_steps": 7217, "total_steps": 8680, "loss": 0.7495905756950378, "lr": 1.5155733082420463e-07, "epoch": 1.6629032258064518, "percentage": 83.15, "elapsed_time": "10:05:52", "remaining_time": "2:02:49"} +{"current_steps": 7218, "total_steps": 8680, "loss": 0.8536649942398071, "lr": 1.5135574250524897e-07, "epoch": 1.6631336405529953, "percentage": 83.16, "elapsed_time": "10:05:59", "remaining_time": "2:02:44"} +{"current_steps": 7219, "total_steps": 8680, "loss": 0.7301580905914307, "lr": 1.5115427736588404e-07, "epoch": 1.6633640552995392, "percentage": 83.17, "elapsed_time": "10:06:04", "remaining_time": "2:02:39"} +{"current_steps": 7220, "total_steps": 8680, "loss": 0.7131164073944092, "lr": 1.5095293543535203e-07, "epoch": 1.663594470046083, "percentage": 83.18, "elapsed_time": "10:06:08", "remaining_time": "2:02:34"} +{"current_steps": 7221, "total_steps": 8680, "loss": 0.708457350730896, "lr": 1.5075171674287712e-07, "epoch": 1.6638248847926267, "percentage": 83.19, "elapsed_time": "10:06:15", "remaining_time": "2:02:29"} +{"current_steps": 7222, "total_steps": 8680, "loss": 0.7509758472442627, "lr": 1.5055062131766662e-07, "epoch": 1.6640552995391706, "percentage": 83.2, "elapsed_time": "10:06:21", "remaining_time": "2:02:24"} +{"current_steps": 7223, "total_steps": 8680, "loss": 0.8401786088943481, "lr": 1.503496491889089e-07, "epoch": 1.6642857142857141, "percentage": 83.21, "elapsed_time": "10:06:25", "remaining_time": "2:02:19"} +{"current_steps": 7224, "total_steps": 8680, "loss": 0.8578320741653442, "lr": 1.5014880038577482e-07, "epoch": 1.664516129032258, "percentage": 83.23, "elapsed_time": "10:06:30", "remaining_time": "2:02:14"} +{"current_steps": 7225, "total_steps": 8680, "loss": 0.6890276670455933, "lr": 1.4994807493741723e-07, "epoch": 1.6647465437788018, "percentage": 83.24, "elapsed_time": "10:06:35", "remaining_time": "2:02:09"} +{"current_steps": 7226, "total_steps": 8680, "loss": 0.785246729850769, "lr": 1.4974747287297128e-07, "epoch": 1.6649769585253456, "percentage": 83.25, "elapsed_time": "10:06:41", "remaining_time": "2:02:04"} +{"current_steps": 7227, "total_steps": 8680, "loss": 0.7826062440872192, "lr": 1.4954699422155382e-07, "epoch": 1.6652073732718895, "percentage": 83.26, "elapsed_time": "10:06:46", "remaining_time": "2:01:59"} +{"current_steps": 7228, "total_steps": 8680, "loss": 0.807513952255249, "lr": 1.4934663901226452e-07, "epoch": 1.6654377880184332, "percentage": 83.27, "elapsed_time": "10:06:51", "remaining_time": "2:01:54"} +{"current_steps": 7229, "total_steps": 8680, "loss": 0.8138872385025024, "lr": 1.4914640727418448e-07, "epoch": 1.665668202764977, "percentage": 83.28, "elapsed_time": "10:06:59", "remaining_time": "2:01:50"} +{"current_steps": 7230, "total_steps": 8680, "loss": 0.8465121984481812, "lr": 1.489462990363768e-07, "epoch": 1.6658986175115207, "percentage": 83.29, "elapsed_time": "10:07:04", "remaining_time": "2:01:45"} +{"current_steps": 7231, "total_steps": 8680, "loss": 0.7649251222610474, "lr": 1.4874631432788743e-07, "epoch": 1.6661290322580644, "percentage": 83.31, "elapsed_time": "10:07:10", "remaining_time": "2:01:40"} +{"current_steps": 7232, "total_steps": 8680, "loss": 0.8297271132469177, "lr": 1.485464531777436e-07, "epoch": 1.6663594470046084, "percentage": 83.32, "elapsed_time": "10:07:15", "remaining_time": "2:01:35"} +{"current_steps": 7233, "total_steps": 8680, "loss": 0.7873194217681885, "lr": 1.483467156149546e-07, "epoch": 1.666589861751152, "percentage": 83.33, "elapsed_time": "10:07:22", "remaining_time": "2:01:30"} +{"current_steps": 7234, "total_steps": 8680, "loss": 0.6924761533737183, "lr": 1.4814710166851274e-07, "epoch": 1.6668202764976958, "percentage": 83.34, "elapsed_time": "10:07:28", "remaining_time": "2:01:25"} +{"current_steps": 7235, "total_steps": 8680, "loss": 0.6600887179374695, "lr": 1.4794761136739132e-07, "epoch": 1.6670506912442398, "percentage": 83.35, "elapsed_time": "10:07:36", "remaining_time": "2:01:21"} +{"current_steps": 7236, "total_steps": 8680, "loss": 0.6552041172981262, "lr": 1.477482447405458e-07, "epoch": 1.6672811059907833, "percentage": 83.36, "elapsed_time": "10:07:43", "remaining_time": "2:01:16"} +{"current_steps": 7237, "total_steps": 8680, "loss": 0.8609327077865601, "lr": 1.4754900181691465e-07, "epoch": 1.6675115207373272, "percentage": 83.38, "elapsed_time": "10:07:50", "remaining_time": "2:01:11"} +{"current_steps": 7238, "total_steps": 8680, "loss": 0.6970123052597046, "lr": 1.4734988262541726e-07, "epoch": 1.667741935483871, "percentage": 83.39, "elapsed_time": "10:07:56", "remaining_time": "2:01:07"} +{"current_steps": 7239, "total_steps": 8680, "loss": 0.7859683036804199, "lr": 1.4715088719495573e-07, "epoch": 1.6679723502304147, "percentage": 83.4, "elapsed_time": "10:08:03", "remaining_time": "2:01:02"} +{"current_steps": 7240, "total_steps": 8680, "loss": 0.7448029518127441, "lr": 1.4695201555441393e-07, "epoch": 1.6682027649769586, "percentage": 83.41, "elapsed_time": "10:08:09", "remaining_time": "2:00:57"} +{"current_steps": 7241, "total_steps": 8680, "loss": 0.7566728591918945, "lr": 1.4675326773265762e-07, "epoch": 1.6684331797235024, "percentage": 83.42, "elapsed_time": "10:08:14", "remaining_time": "2:00:52"} +{"current_steps": 7242, "total_steps": 8680, "loss": 0.7563366889953613, "lr": 1.465546437585351e-07, "epoch": 1.668663594470046, "percentage": 83.43, "elapsed_time": "10:08:20", "remaining_time": "2:00:47"} +{"current_steps": 7243, "total_steps": 8680, "loss": 0.8580834865570068, "lr": 1.4635614366087623e-07, "epoch": 1.6688940092165898, "percentage": 83.44, "elapsed_time": "10:08:26", "remaining_time": "2:00:42"} +{"current_steps": 7244, "total_steps": 8680, "loss": 0.6200178861618042, "lr": 1.4615776746849306e-07, "epoch": 1.6691244239631335, "percentage": 83.46, "elapsed_time": "10:08:36", "remaining_time": "2:00:38"} +{"current_steps": 7245, "total_steps": 8680, "loss": 0.8052491545677185, "lr": 1.4595951521017958e-07, "epoch": 1.6693548387096775, "percentage": 83.47, "elapsed_time": "10:08:43", "remaining_time": "2:00:34"} +{"current_steps": 7246, "total_steps": 8680, "loss": 0.7383530735969543, "lr": 1.4576138691471186e-07, "epoch": 1.6695852534562212, "percentage": 83.48, "elapsed_time": "10:08:48", "remaining_time": "2:00:29"} +{"current_steps": 7247, "total_steps": 8680, "loss": 0.6735742092132568, "lr": 1.4556338261084776e-07, "epoch": 1.669815668202765, "percentage": 83.49, "elapsed_time": "10:08:55", "remaining_time": "2:00:24"} +{"current_steps": 7248, "total_steps": 8680, "loss": 0.7570016980171204, "lr": 1.453655023273277e-07, "epoch": 1.670046082949309, "percentage": 83.5, "elapsed_time": "10:09:00", "remaining_time": "2:00:19"} +{"current_steps": 7249, "total_steps": 8680, "loss": 0.7271980047225952, "lr": 1.4516774609287364e-07, "epoch": 1.6702764976958524, "percentage": 83.51, "elapsed_time": "10:09:07", "remaining_time": "2:00:14"} +{"current_steps": 7250, "total_steps": 8680, "loss": 0.8567354083061218, "lr": 1.449701139361894e-07, "epoch": 1.6705069124423964, "percentage": 83.53, "elapsed_time": "10:09:12", "remaining_time": "2:00:09"} +{"current_steps": 7251, "total_steps": 8680, "loss": 0.8675428628921509, "lr": 1.447726058859614e-07, "epoch": 1.67073732718894, "percentage": 83.54, "elapsed_time": "10:09:15", "remaining_time": "2:00:04"} +{"current_steps": 7252, "total_steps": 8680, "loss": 0.9131098389625549, "lr": 1.4457522197085748e-07, "epoch": 1.6709677419354838, "percentage": 83.55, "elapsed_time": "10:09:20", "remaining_time": "1:59:59"} +{"current_steps": 7253, "total_steps": 8680, "loss": 0.7921037673950195, "lr": 1.4437796221952748e-07, "epoch": 1.6711981566820278, "percentage": 83.56, "elapsed_time": "10:09:25", "remaining_time": "1:59:54"} +{"current_steps": 7254, "total_steps": 8680, "loss": 0.7559863328933716, "lr": 1.441808266606037e-07, "epoch": 1.6714285714285713, "percentage": 83.57, "elapsed_time": "10:09:30", "remaining_time": "1:59:49"} +{"current_steps": 7255, "total_steps": 8680, "loss": 0.7433857917785645, "lr": 1.4398381532269998e-07, "epoch": 1.6716589861751152, "percentage": 83.58, "elapsed_time": "10:09:35", "remaining_time": "1:59:44"} +{"current_steps": 7256, "total_steps": 8680, "loss": 0.8171184062957764, "lr": 1.4378692823441207e-07, "epoch": 1.671889400921659, "percentage": 83.59, "elapsed_time": "10:09:40", "remaining_time": "1:59:38"} +{"current_steps": 7257, "total_steps": 8680, "loss": 0.7296291589736938, "lr": 1.4359016542431824e-07, "epoch": 1.6721198156682027, "percentage": 83.61, "elapsed_time": "10:09:45", "remaining_time": "1:59:33"} +{"current_steps": 7258, "total_steps": 8680, "loss": 0.7397829294204712, "lr": 1.4339352692097828e-07, "epoch": 1.6723502304147466, "percentage": 83.62, "elapsed_time": "10:09:49", "remaining_time": "1:59:28"} +{"current_steps": 7259, "total_steps": 8680, "loss": 0.6724194884300232, "lr": 1.431970127529335e-07, "epoch": 1.6725806451612903, "percentage": 83.63, "elapsed_time": "10:09:54", "remaining_time": "1:59:23"} +{"current_steps": 7260, "total_steps": 8680, "loss": 0.7711449861526489, "lr": 1.430006229487084e-07, "epoch": 1.672811059907834, "percentage": 83.64, "elapsed_time": "10:10:00", "remaining_time": "1:59:18"} +{"current_steps": 7261, "total_steps": 8680, "loss": 0.7581815719604492, "lr": 1.428043575368083e-07, "epoch": 1.673041474654378, "percentage": 83.65, "elapsed_time": "10:10:06", "remaining_time": "1:59:13"} +{"current_steps": 7262, "total_steps": 8680, "loss": 0.7092517614364624, "lr": 1.4260821654572063e-07, "epoch": 1.6732718894009215, "percentage": 83.66, "elapsed_time": "10:10:11", "remaining_time": "1:59:08"} +{"current_steps": 7263, "total_steps": 8680, "loss": 0.646745502948761, "lr": 1.4241220000391562e-07, "epoch": 1.6735023041474655, "percentage": 83.68, "elapsed_time": "10:10:16", "remaining_time": "1:59:03"} +{"current_steps": 7264, "total_steps": 8680, "loss": 0.7364122867584229, "lr": 1.4221630793984453e-07, "epoch": 1.6737327188940092, "percentage": 83.69, "elapsed_time": "10:10:20", "remaining_time": "1:58:58"} +{"current_steps": 7265, "total_steps": 8680, "loss": 0.8186795711517334, "lr": 1.4202054038194068e-07, "epoch": 1.673963133640553, "percentage": 83.7, "elapsed_time": "10:10:24", "remaining_time": "1:58:53"} +{"current_steps": 7266, "total_steps": 8680, "loss": 0.7172378301620483, "lr": 1.4182489735861957e-07, "epoch": 1.6741935483870969, "percentage": 83.71, "elapsed_time": "10:10:29", "remaining_time": "1:58:48"} +{"current_steps": 7267, "total_steps": 8680, "loss": 0.8780974745750427, "lr": 1.416293788982783e-07, "epoch": 1.6744239631336404, "percentage": 83.72, "elapsed_time": "10:10:34", "remaining_time": "1:58:43"} +{"current_steps": 7268, "total_steps": 8680, "loss": 0.9034930467605591, "lr": 1.4143398502929672e-07, "epoch": 1.6746543778801843, "percentage": 83.73, "elapsed_time": "10:10:39", "remaining_time": "1:58:38"} +{"current_steps": 7269, "total_steps": 8680, "loss": 0.7994415760040283, "lr": 1.4123871578003543e-07, "epoch": 1.674884792626728, "percentage": 83.74, "elapsed_time": "10:10:44", "remaining_time": "1:58:33"} +{"current_steps": 7270, "total_steps": 8680, "loss": 0.8327854871749878, "lr": 1.410435711788376e-07, "epoch": 1.6751152073732718, "percentage": 83.76, "elapsed_time": "10:10:48", "remaining_time": "1:58:27"} +{"current_steps": 7271, "total_steps": 8680, "loss": 0.7667550444602966, "lr": 1.408485512540285e-07, "epoch": 1.6753456221198157, "percentage": 83.77, "elapsed_time": "10:10:53", "remaining_time": "1:58:22"} +{"current_steps": 7272, "total_steps": 8680, "loss": 0.8073924779891968, "lr": 1.4065365603391478e-07, "epoch": 1.6755760368663595, "percentage": 83.78, "elapsed_time": "10:10:58", "remaining_time": "1:58:17"} +{"current_steps": 7273, "total_steps": 8680, "loss": 0.7265589237213135, "lr": 1.4045888554678497e-07, "epoch": 1.6758064516129032, "percentage": 83.79, "elapsed_time": "10:11:04", "remaining_time": "1:58:12"} +{"current_steps": 7274, "total_steps": 8680, "loss": 0.6912035942077637, "lr": 1.402642398209104e-07, "epoch": 1.6760368663594472, "percentage": 83.8, "elapsed_time": "10:11:08", "remaining_time": "1:58:07"} +{"current_steps": 7275, "total_steps": 8680, "loss": 0.917953372001648, "lr": 1.400697188845432e-07, "epoch": 1.6762672811059907, "percentage": 83.81, "elapsed_time": "10:11:14", "remaining_time": "1:58:02"} +{"current_steps": 7276, "total_steps": 8680, "loss": 0.6989340782165527, "lr": 1.3987532276591774e-07, "epoch": 1.6764976958525346, "percentage": 83.82, "elapsed_time": "10:11:19", "remaining_time": "1:57:57"} +{"current_steps": 7277, "total_steps": 8680, "loss": 0.6648346185684204, "lr": 1.396810514932507e-07, "epoch": 1.6767281105990783, "percentage": 83.84, "elapsed_time": "10:11:24", "remaining_time": "1:57:52"} +{"current_steps": 7278, "total_steps": 8680, "loss": 0.6462730169296265, "lr": 1.3948690509474014e-07, "epoch": 1.676958525345622, "percentage": 83.85, "elapsed_time": "10:11:30", "remaining_time": "1:57:47"} +{"current_steps": 7279, "total_steps": 8680, "loss": 0.6084051132202148, "lr": 1.3929288359856584e-07, "epoch": 1.677188940092166, "percentage": 83.86, "elapsed_time": "10:11:36", "remaining_time": "1:57:42"} +{"current_steps": 7280, "total_steps": 8680, "loss": 0.8593035936355591, "lr": 1.3909898703289037e-07, "epoch": 1.6774193548387095, "percentage": 83.87, "elapsed_time": "10:11:41", "remaining_time": "1:57:37"} +{"current_steps": 7281, "total_steps": 8680, "loss": 0.8064925670623779, "lr": 1.389052154258572e-07, "epoch": 1.6776497695852535, "percentage": 83.88, "elapsed_time": "10:11:46", "remaining_time": "1:57:32"} +{"current_steps": 7282, "total_steps": 8680, "loss": 0.7366064786911011, "lr": 1.3871156880559186e-07, "epoch": 1.6778801843317972, "percentage": 83.89, "elapsed_time": "10:11:50", "remaining_time": "1:57:27"} +{"current_steps": 7283, "total_steps": 8680, "loss": 0.8090124726295471, "lr": 1.3851804720020233e-07, "epoch": 1.678110599078341, "percentage": 83.91, "elapsed_time": "10:11:55", "remaining_time": "1:57:22"} +{"current_steps": 7284, "total_steps": 8680, "loss": 0.7326936721801758, "lr": 1.3832465063777787e-07, "epoch": 1.6783410138248849, "percentage": 83.92, "elapsed_time": "10:12:00", "remaining_time": "1:57:17"} +{"current_steps": 7285, "total_steps": 8680, "loss": 0.7142004370689392, "lr": 1.3813137914638961e-07, "epoch": 1.6785714285714286, "percentage": 83.93, "elapsed_time": "10:12:05", "remaining_time": "1:57:12"} +{"current_steps": 7286, "total_steps": 8680, "loss": 0.8358181715011597, "lr": 1.3793823275409066e-07, "epoch": 1.6788018433179723, "percentage": 83.94, "elapsed_time": "10:12:12", "remaining_time": "1:57:07"} +{"current_steps": 7287, "total_steps": 8680, "loss": 0.7337081432342529, "lr": 1.3774521148891583e-07, "epoch": 1.6790322580645163, "percentage": 83.95, "elapsed_time": "10:12:19", "remaining_time": "1:57:03"} +{"current_steps": 7288, "total_steps": 8680, "loss": 0.8029334545135498, "lr": 1.3755231537888222e-07, "epoch": 1.6792626728110598, "percentage": 83.96, "elapsed_time": "10:12:26", "remaining_time": "1:56:58"} +{"current_steps": 7289, "total_steps": 8680, "loss": 0.8132611513137817, "lr": 1.373595444519884e-07, "epoch": 1.6794930875576037, "percentage": 83.97, "elapsed_time": "10:12:32", "remaining_time": "1:56:53"} +{"current_steps": 7290, "total_steps": 8680, "loss": 0.7377278804779053, "lr": 1.3716689873621446e-07, "epoch": 1.6797235023041475, "percentage": 83.99, "elapsed_time": "10:12:38", "remaining_time": "1:56:48"} +{"current_steps": 7291, "total_steps": 8680, "loss": 0.788368284702301, "lr": 1.3697437825952307e-07, "epoch": 1.6799539170506912, "percentage": 84.0, "elapsed_time": "10:12:43", "remaining_time": "1:56:43"} +{"current_steps": 7292, "total_steps": 8680, "loss": 0.8288586139678955, "lr": 1.3678198304985822e-07, "epoch": 1.6801843317972351, "percentage": 84.01, "elapsed_time": "10:12:50", "remaining_time": "1:56:39"} +{"current_steps": 7293, "total_steps": 8680, "loss": 0.8534054160118103, "lr": 1.3658971313514567e-07, "epoch": 1.6804147465437786, "percentage": 84.02, "elapsed_time": "10:12:54", "remaining_time": "1:56:33"} +{"current_steps": 7294, "total_steps": 8680, "loss": 0.8730596303939819, "lr": 1.363975685432933e-07, "epoch": 1.6806451612903226, "percentage": 84.03, "elapsed_time": "10:13:00", "remaining_time": "1:56:29"} +{"current_steps": 7295, "total_steps": 8680, "loss": 0.6891343593597412, "lr": 1.3620554930219076e-07, "epoch": 1.6808755760368663, "percentage": 84.04, "elapsed_time": "10:13:07", "remaining_time": "1:56:24"} +{"current_steps": 7296, "total_steps": 8680, "loss": 0.8575270175933838, "lr": 1.360136554397089e-07, "epoch": 1.68110599078341, "percentage": 84.06, "elapsed_time": "10:13:13", "remaining_time": "1:56:19"} +{"current_steps": 7297, "total_steps": 8680, "loss": 0.82694011926651, "lr": 1.3582188698370134e-07, "epoch": 1.681336405529954, "percentage": 84.07, "elapsed_time": "10:13:18", "remaining_time": "1:56:14"} +{"current_steps": 7298, "total_steps": 8680, "loss": 0.6468113660812378, "lr": 1.3563024396200296e-07, "epoch": 1.6815668202764977, "percentage": 84.08, "elapsed_time": "10:13:23", "remaining_time": "1:56:09"} +{"current_steps": 7299, "total_steps": 8680, "loss": 0.6818577647209167, "lr": 1.3543872640243016e-07, "epoch": 1.6817972350230415, "percentage": 84.09, "elapsed_time": "10:13:27", "remaining_time": "1:56:04"} +{"current_steps": 7300, "total_steps": 8680, "loss": 0.7630767822265625, "lr": 1.352473343327819e-07, "epoch": 1.6820276497695854, "percentage": 84.1, "elapsed_time": "10:13:33", "remaining_time": "1:55:59"} +{"current_steps": 7301, "total_steps": 8680, "loss": 0.9019678831100464, "lr": 1.3505606778083832e-07, "epoch": 1.682258064516129, "percentage": 84.11, "elapsed_time": "10:13:42", "remaining_time": "1:55:54"} +{"current_steps": 7302, "total_steps": 8680, "loss": 0.821324348449707, "lr": 1.3486492677436123e-07, "epoch": 1.6824884792626729, "percentage": 84.12, "elapsed_time": "10:13:47", "remaining_time": "1:55:49"} +{"current_steps": 7303, "total_steps": 8680, "loss": 0.796151876449585, "lr": 1.3467391134109495e-07, "epoch": 1.6827188940092166, "percentage": 84.14, "elapsed_time": "10:13:52", "remaining_time": "1:55:44"} +{"current_steps": 7304, "total_steps": 8680, "loss": 0.8020445108413696, "lr": 1.3448302150876488e-07, "epoch": 1.6829493087557603, "percentage": 84.15, "elapsed_time": "10:13:56", "remaining_time": "1:55:39"} +{"current_steps": 7305, "total_steps": 8680, "loss": 0.7215749025344849, "lr": 1.3429225730507843e-07, "epoch": 1.6831797235023043, "percentage": 84.16, "elapsed_time": "10:14:03", "remaining_time": "1:55:34"} +{"current_steps": 7306, "total_steps": 8680, "loss": 0.920941174030304, "lr": 1.3410161875772474e-07, "epoch": 1.6834101382488478, "percentage": 84.17, "elapsed_time": "10:14:10", "remaining_time": "1:55:30"} +{"current_steps": 7307, "total_steps": 8680, "loss": 0.8979494571685791, "lr": 1.3391110589437494e-07, "epoch": 1.6836405529953917, "percentage": 84.18, "elapsed_time": "10:14:15", "remaining_time": "1:55:25"} +{"current_steps": 7308, "total_steps": 8680, "loss": 0.9125145673751831, "lr": 1.337207187426812e-07, "epoch": 1.6838709677419355, "percentage": 84.19, "elapsed_time": "10:14:21", "remaining_time": "1:55:20"} +{"current_steps": 7309, "total_steps": 8680, "loss": 0.8205714225769043, "lr": 1.3353045733027858e-07, "epoch": 1.6841013824884792, "percentage": 84.21, "elapsed_time": "10:14:24", "remaining_time": "1:55:15"} +{"current_steps": 7310, "total_steps": 8680, "loss": 0.6914113759994507, "lr": 1.3334032168478305e-07, "epoch": 1.6843317972350231, "percentage": 84.22, "elapsed_time": "10:14:29", "remaining_time": "1:55:09"} +{"current_steps": 7311, "total_steps": 8680, "loss": 0.7355014085769653, "lr": 1.3315031183379233e-07, "epoch": 1.6845622119815669, "percentage": 84.23, "elapsed_time": "10:14:33", "remaining_time": "1:55:04"} +{"current_steps": 7312, "total_steps": 8680, "loss": 0.7564182281494141, "lr": 1.3296042780488637e-07, "epoch": 1.6847926267281106, "percentage": 84.24, "elapsed_time": "10:14:38", "remaining_time": "1:54:59"} +{"current_steps": 7313, "total_steps": 8680, "loss": 0.8091372847557068, "lr": 1.3277066962562643e-07, "epoch": 1.6850230414746545, "percentage": 84.25, "elapsed_time": "10:14:42", "remaining_time": "1:54:54"} +{"current_steps": 7314, "total_steps": 8680, "loss": 0.7457877993583679, "lr": 1.3258103732355586e-07, "epoch": 1.685253456221198, "percentage": 84.26, "elapsed_time": "10:14:47", "remaining_time": "1:54:49"} +{"current_steps": 7315, "total_steps": 8680, "loss": 0.861819863319397, "lr": 1.3239153092619948e-07, "epoch": 1.685483870967742, "percentage": 84.27, "elapsed_time": "10:14:53", "remaining_time": "1:54:44"} +{"current_steps": 7316, "total_steps": 8680, "loss": 0.7698357105255127, "lr": 1.3220215046106353e-07, "epoch": 1.6857142857142857, "percentage": 84.29, "elapsed_time": "10:14:58", "remaining_time": "1:54:39"} +{"current_steps": 7317, "total_steps": 8680, "loss": 0.7889456152915955, "lr": 1.320128959556369e-07, "epoch": 1.6859447004608294, "percentage": 84.3, "elapsed_time": "10:15:03", "remaining_time": "1:54:34"} +{"current_steps": 7318, "total_steps": 8680, "loss": 0.6467938423156738, "lr": 1.3182376743738932e-07, "epoch": 1.6861751152073734, "percentage": 84.31, "elapsed_time": "10:15:07", "remaining_time": "1:54:29"} +{"current_steps": 7319, "total_steps": 8680, "loss": 0.7202441692352295, "lr": 1.3163476493377245e-07, "epoch": 1.686405529953917, "percentage": 84.32, "elapsed_time": "10:15:12", "remaining_time": "1:54:24"} +{"current_steps": 7320, "total_steps": 8680, "loss": 0.7464008331298828, "lr": 1.3144588847222004e-07, "epoch": 1.6866359447004609, "percentage": 84.33, "elapsed_time": "10:15:16", "remaining_time": "1:54:18"} +{"current_steps": 7321, "total_steps": 8680, "loss": 0.8924611806869507, "lr": 1.3125713808014704e-07, "epoch": 1.6868663594470046, "percentage": 84.34, "elapsed_time": "10:15:21", "remaining_time": "1:54:13"} +{"current_steps": 7322, "total_steps": 8680, "loss": 0.6943146586418152, "lr": 1.3106851378495044e-07, "epoch": 1.6870967741935483, "percentage": 84.35, "elapsed_time": "10:15:25", "remaining_time": "1:54:08"} +{"current_steps": 7323, "total_steps": 8680, "loss": 0.7335963249206543, "lr": 1.308800156140085e-07, "epoch": 1.6873271889400923, "percentage": 84.37, "elapsed_time": "10:15:29", "remaining_time": "1:54:03"} +{"current_steps": 7324, "total_steps": 8680, "loss": 0.6900516748428345, "lr": 1.30691643594682e-07, "epoch": 1.687557603686636, "percentage": 84.38, "elapsed_time": "10:15:33", "remaining_time": "1:53:58"} +{"current_steps": 7325, "total_steps": 8680, "loss": 0.7230286598205566, "lr": 1.3050339775431262e-07, "epoch": 1.6877880184331797, "percentage": 84.39, "elapsed_time": "10:15:38", "remaining_time": "1:53:53"} +{"current_steps": 7326, "total_steps": 8680, "loss": 0.8069840669631958, "lr": 1.3031527812022403e-07, "epoch": 1.6880184331797237, "percentage": 84.4, "elapsed_time": "10:15:42", "remaining_time": "1:53:47"} +{"current_steps": 7327, "total_steps": 8680, "loss": 0.7598710060119629, "lr": 1.3012728471972134e-07, "epoch": 1.6882488479262672, "percentage": 84.41, "elapsed_time": "10:15:47", "remaining_time": "1:53:42"} +{"current_steps": 7328, "total_steps": 8680, "loss": 0.6817609071731567, "lr": 1.2993941758009164e-07, "epoch": 1.6884792626728111, "percentage": 84.42, "elapsed_time": "10:15:52", "remaining_time": "1:53:37"} +{"current_steps": 7329, "total_steps": 8680, "loss": 0.6958975791931152, "lr": 1.2975167672860387e-07, "epoch": 1.6887096774193548, "percentage": 84.44, "elapsed_time": "10:15:56", "remaining_time": "1:53:32"} +{"current_steps": 7330, "total_steps": 8680, "loss": 0.8270853757858276, "lr": 1.2956406219250814e-07, "epoch": 1.6889400921658986, "percentage": 84.45, "elapsed_time": "10:16:02", "remaining_time": "1:53:27"} +{"current_steps": 7331, "total_steps": 8680, "loss": 0.8045610189437866, "lr": 1.2937657399903623e-07, "epoch": 1.6891705069124425, "percentage": 84.46, "elapsed_time": "10:16:08", "remaining_time": "1:53:22"} +{"current_steps": 7332, "total_steps": 8680, "loss": 0.6685627698898315, "lr": 1.2918921217540224e-07, "epoch": 1.689400921658986, "percentage": 84.47, "elapsed_time": "10:16:13", "remaining_time": "1:53:17"} +{"current_steps": 7333, "total_steps": 8680, "loss": 0.8157398700714111, "lr": 1.2900197674880142e-07, "epoch": 1.68963133640553, "percentage": 84.48, "elapsed_time": "10:16:18", "remaining_time": "1:53:12"} +{"current_steps": 7334, "total_steps": 8680, "loss": 0.6142218112945557, "lr": 1.2881486774641025e-07, "epoch": 1.6898617511520737, "percentage": 84.49, "elapsed_time": "10:16:24", "remaining_time": "1:53:07"} +{"current_steps": 7335, "total_steps": 8680, "loss": 0.7849327921867371, "lr": 1.2862788519538815e-07, "epoch": 1.6900921658986174, "percentage": 84.5, "elapsed_time": "10:16:28", "remaining_time": "1:53:02"} +{"current_steps": 7336, "total_steps": 8680, "loss": 0.8035926818847656, "lr": 1.2844102912287457e-07, "epoch": 1.6903225806451614, "percentage": 84.52, "elapsed_time": "10:16:34", "remaining_time": "1:52:57"} +{"current_steps": 7337, "total_steps": 8680, "loss": 0.8456575870513916, "lr": 1.2825429955599209e-07, "epoch": 1.6905529953917051, "percentage": 84.53, "elapsed_time": "10:16:38", "remaining_time": "1:52:52"} +{"current_steps": 7338, "total_steps": 8680, "loss": 0.7436026334762573, "lr": 1.2806769652184402e-07, "epoch": 1.6907834101382488, "percentage": 84.54, "elapsed_time": "10:16:43", "remaining_time": "1:52:47"} +{"current_steps": 7339, "total_steps": 8680, "loss": 0.8315454721450806, "lr": 1.2788122004751522e-07, "epoch": 1.6910138248847926, "percentage": 84.55, "elapsed_time": "10:16:49", "remaining_time": "1:52:42"} +{"current_steps": 7340, "total_steps": 8680, "loss": 0.7425665855407715, "lr": 1.2769487016007307e-07, "epoch": 1.6912442396313363, "percentage": 84.56, "elapsed_time": "10:16:54", "remaining_time": "1:52:37"} +{"current_steps": 7341, "total_steps": 8680, "loss": 0.7899731993675232, "lr": 1.2750864688656572e-07, "epoch": 1.6914746543778802, "percentage": 84.57, "elapsed_time": "10:17:00", "remaining_time": "1:52:32"} +{"current_steps": 7342, "total_steps": 8680, "loss": 0.7637509703636169, "lr": 1.2732255025402327e-07, "epoch": 1.691705069124424, "percentage": 84.59, "elapsed_time": "10:17:06", "remaining_time": "1:52:27"} +{"current_steps": 7343, "total_steps": 8680, "loss": 0.793779730796814, "lr": 1.2713658028945717e-07, "epoch": 1.6919354838709677, "percentage": 84.6, "elapsed_time": "10:17:10", "remaining_time": "1:52:22"} +{"current_steps": 7344, "total_steps": 8680, "loss": 0.7248083353042603, "lr": 1.2695073701986103e-07, "epoch": 1.6921658986175117, "percentage": 84.61, "elapsed_time": "10:17:14", "remaining_time": "1:52:17"} +{"current_steps": 7345, "total_steps": 8680, "loss": 0.7506270408630371, "lr": 1.2676502047220973e-07, "epoch": 1.6923963133640552, "percentage": 84.62, "elapsed_time": "10:17:18", "remaining_time": "1:52:12"} +{"current_steps": 7346, "total_steps": 8680, "loss": 0.7921839952468872, "lr": 1.2657943067345965e-07, "epoch": 1.692626728110599, "percentage": 84.63, "elapsed_time": "10:17:23", "remaining_time": "1:52:06"} +{"current_steps": 7347, "total_steps": 8680, "loss": 0.7627893686294556, "lr": 1.263939676505491e-07, "epoch": 1.6928571428571428, "percentage": 84.64, "elapsed_time": "10:17:28", "remaining_time": "1:52:01"} +{"current_steps": 7348, "total_steps": 8680, "loss": 0.788955807685852, "lr": 1.262086314303973e-07, "epoch": 1.6930875576036866, "percentage": 84.65, "elapsed_time": "10:17:33", "remaining_time": "1:51:56"} +{"current_steps": 7349, "total_steps": 8680, "loss": 0.5527241826057434, "lr": 1.2602342203990612e-07, "epoch": 1.6933179723502305, "percentage": 84.67, "elapsed_time": "10:17:38", "remaining_time": "1:51:51"} +{"current_steps": 7350, "total_steps": 8680, "loss": 0.7324573397636414, "lr": 1.2583833950595825e-07, "epoch": 1.6935483870967742, "percentage": 84.68, "elapsed_time": "10:17:43", "remaining_time": "1:51:46"} +{"current_steps": 7351, "total_steps": 8680, "loss": 0.6588207483291626, "lr": 1.256533838554179e-07, "epoch": 1.693778801843318, "percentage": 84.69, "elapsed_time": "10:17:48", "remaining_time": "1:51:41"} +{"current_steps": 7352, "total_steps": 8680, "loss": 0.7597184181213379, "lr": 1.2546855511513165e-07, "epoch": 1.6940092165898617, "percentage": 84.7, "elapsed_time": "10:17:55", "remaining_time": "1:51:36"} +{"current_steps": 7353, "total_steps": 8680, "loss": 0.7487671375274658, "lr": 1.2528385331192692e-07, "epoch": 1.6942396313364054, "percentage": 84.71, "elapsed_time": "10:17:59", "remaining_time": "1:51:31"} +{"current_steps": 7354, "total_steps": 8680, "loss": 0.757739245891571, "lr": 1.250992784726126e-07, "epoch": 1.6944700460829494, "percentage": 84.72, "elapsed_time": "10:18:05", "remaining_time": "1:51:26"} +{"current_steps": 7355, "total_steps": 8680, "loss": 0.616966724395752, "lr": 1.249148306239801e-07, "epoch": 1.694700460829493, "percentage": 84.74, "elapsed_time": "10:18:12", "remaining_time": "1:51:22"} +{"current_steps": 7356, "total_steps": 8680, "loss": 0.9415719509124756, "lr": 1.2473050979280142e-07, "epoch": 1.6949308755760368, "percentage": 84.75, "elapsed_time": "10:18:18", "remaining_time": "1:51:17"} +{"current_steps": 7357, "total_steps": 8680, "loss": 0.7731447815895081, "lr": 1.2454631600583044e-07, "epoch": 1.6951612903225808, "percentage": 84.76, "elapsed_time": "10:18:23", "remaining_time": "1:51:12"} +{"current_steps": 7358, "total_steps": 8680, "loss": 0.800236701965332, "lr": 1.2436224928980276e-07, "epoch": 1.6953917050691243, "percentage": 84.77, "elapsed_time": "10:18:29", "remaining_time": "1:51:07"} +{"current_steps": 7359, "total_steps": 8680, "loss": 0.8113845586776733, "lr": 1.241783096714356e-07, "epoch": 1.6956221198156682, "percentage": 84.78, "elapsed_time": "10:18:35", "remaining_time": "1:51:02"} +{"current_steps": 7360, "total_steps": 8680, "loss": 0.748763382434845, "lr": 1.2399449717742706e-07, "epoch": 1.695852534562212, "percentage": 84.79, "elapsed_time": "10:18:40", "remaining_time": "1:50:57"} +{"current_steps": 7361, "total_steps": 8680, "loss": 0.8595450520515442, "lr": 1.2381081183445774e-07, "epoch": 1.6960829493087557, "percentage": 84.8, "elapsed_time": "10:18:45", "remaining_time": "1:50:52"} +{"current_steps": 7362, "total_steps": 8680, "loss": 0.7800960540771484, "lr": 1.2362725366918913e-07, "epoch": 1.6963133640552996, "percentage": 84.82, "elapsed_time": "10:18:50", "remaining_time": "1:50:47"} +{"current_steps": 7363, "total_steps": 8680, "loss": 0.6549400687217712, "lr": 1.2344382270826438e-07, "epoch": 1.6965437788018434, "percentage": 84.83, "elapsed_time": "10:18:56", "remaining_time": "1:50:42"} +{"current_steps": 7364, "total_steps": 8680, "loss": 0.7839380502700806, "lr": 1.2326051897830858e-07, "epoch": 1.696774193548387, "percentage": 84.84, "elapsed_time": "10:19:04", "remaining_time": "1:50:37"} +{"current_steps": 7365, "total_steps": 8680, "loss": 0.8436654806137085, "lr": 1.230773425059277e-07, "epoch": 1.6970046082949308, "percentage": 84.85, "elapsed_time": "10:19:08", "remaining_time": "1:50:32"} +{"current_steps": 7366, "total_steps": 8680, "loss": 0.6517987251281738, "lr": 1.2289429331770974e-07, "epoch": 1.6972350230414746, "percentage": 84.86, "elapsed_time": "10:19:16", "remaining_time": "1:50:28"} +{"current_steps": 7367, "total_steps": 8680, "loss": 0.7108355760574341, "lr": 1.2271137144022392e-07, "epoch": 1.6974654377880185, "percentage": 84.87, "elapsed_time": "10:19:23", "remaining_time": "1:50:23"} +{"current_steps": 7368, "total_steps": 8680, "loss": 0.7801471948623657, "lr": 1.2252857690002094e-07, "epoch": 1.6976958525345622, "percentage": 84.88, "elapsed_time": "10:19:28", "remaining_time": "1:50:18"} +{"current_steps": 7369, "total_steps": 8680, "loss": 0.8240209221839905, "lr": 1.2234590972363358e-07, "epoch": 1.697926267281106, "percentage": 84.9, "elapsed_time": "10:19:32", "remaining_time": "1:50:13"} +{"current_steps": 7370, "total_steps": 8680, "loss": 0.8119853138923645, "lr": 1.2216336993757558e-07, "epoch": 1.69815668202765, "percentage": 84.91, "elapsed_time": "10:19:36", "remaining_time": "1:50:08"} +{"current_steps": 7371, "total_steps": 8680, "loss": 0.7685642838478088, "lr": 1.2198095756834216e-07, "epoch": 1.6983870967741934, "percentage": 84.92, "elapsed_time": "10:19:42", "remaining_time": "1:50:03"} +{"current_steps": 7372, "total_steps": 8680, "loss": 0.7820984125137329, "lr": 1.217986726424106e-07, "epoch": 1.6986175115207374, "percentage": 84.93, "elapsed_time": "10:19:47", "remaining_time": "1:49:58"} +{"current_steps": 7373, "total_steps": 8680, "loss": 0.8051085472106934, "lr": 1.2161651518623916e-07, "epoch": 1.698847926267281, "percentage": 84.94, "elapsed_time": "10:19:52", "remaining_time": "1:49:53"} +{"current_steps": 7374, "total_steps": 8680, "loss": 0.828999400138855, "lr": 1.2143448522626742e-07, "epoch": 1.6990783410138248, "percentage": 84.95, "elapsed_time": "10:19:57", "remaining_time": "1:49:47"} +{"current_steps": 7375, "total_steps": 8680, "loss": 0.8215579986572266, "lr": 1.2125258278891738e-07, "epoch": 1.6993087557603688, "percentage": 84.97, "elapsed_time": "10:20:02", "remaining_time": "1:49:42"} +{"current_steps": 7376, "total_steps": 8680, "loss": 0.9362014532089233, "lr": 1.2107080790059156e-07, "epoch": 1.6995391705069123, "percentage": 84.98, "elapsed_time": "10:20:07", "remaining_time": "1:49:37"} +{"current_steps": 7377, "total_steps": 8680, "loss": 0.7789602279663086, "lr": 1.2088916058767428e-07, "epoch": 1.6997695852534562, "percentage": 84.99, "elapsed_time": "10:20:13", "remaining_time": "1:49:33"} +{"current_steps": 7378, "total_steps": 8680, "loss": 0.8371152877807617, "lr": 1.2070764087653163e-07, "epoch": 1.7, "percentage": 85.0, "elapsed_time": "10:20:19", "remaining_time": "1:49:28"} +{"current_steps": 7379, "total_steps": 8680, "loss": 0.64423668384552, "lr": 1.2052624879351103e-07, "epoch": 1.7002304147465437, "percentage": 85.01, "elapsed_time": "10:20:24", "remaining_time": "1:49:23"} +{"current_steps": 7380, "total_steps": 8680, "loss": 0.7635257244110107, "lr": 1.203449843649409e-07, "epoch": 1.7004608294930876, "percentage": 85.02, "elapsed_time": "10:20:28", "remaining_time": "1:49:17"} +{"current_steps": 7381, "total_steps": 8680, "loss": 0.7859230041503906, "lr": 1.2016384761713194e-07, "epoch": 1.7006912442396314, "percentage": 85.03, "elapsed_time": "10:20:36", "remaining_time": "1:49:13"} +{"current_steps": 7382, "total_steps": 8680, "loss": 0.7066336870193481, "lr": 1.199828385763757e-07, "epoch": 1.700921658986175, "percentage": 85.05, "elapsed_time": "10:20:40", "remaining_time": "1:49:08"} +{"current_steps": 7383, "total_steps": 8680, "loss": 0.7190531492233276, "lr": 1.198019572689455e-07, "epoch": 1.701152073732719, "percentage": 85.06, "elapsed_time": "10:20:46", "remaining_time": "1:49:03"} +{"current_steps": 7384, "total_steps": 8680, "loss": 0.7389136552810669, "lr": 1.1962120372109586e-07, "epoch": 1.7013824884792625, "percentage": 85.07, "elapsed_time": "10:20:51", "remaining_time": "1:48:58"} +{"current_steps": 7385, "total_steps": 8680, "loss": 0.774425745010376, "lr": 1.1944057795906316e-07, "epoch": 1.7016129032258065, "percentage": 85.08, "elapsed_time": "10:20:55", "remaining_time": "1:48:52"} +{"current_steps": 7386, "total_steps": 8680, "loss": 0.7566725015640259, "lr": 1.1926008000906484e-07, "epoch": 1.7018433179723502, "percentage": 85.09, "elapsed_time": "10:21:02", "remaining_time": "1:48:48"} +{"current_steps": 7387, "total_steps": 8680, "loss": 0.6891475915908813, "lr": 1.1907970989729987e-07, "epoch": 1.702073732718894, "percentage": 85.1, "elapsed_time": "10:21:08", "remaining_time": "1:48:43"} +{"current_steps": 7388, "total_steps": 8680, "loss": 0.6188378930091858, "lr": 1.1889946764994873e-07, "epoch": 1.702304147465438, "percentage": 85.12, "elapsed_time": "10:21:16", "remaining_time": "1:48:38"} +{"current_steps": 7389, "total_steps": 8680, "loss": 0.703027069568634, "lr": 1.1871935329317362e-07, "epoch": 1.7025345622119814, "percentage": 85.13, "elapsed_time": "10:21:22", "remaining_time": "1:48:33"} +{"current_steps": 7390, "total_steps": 8680, "loss": 0.9253139495849609, "lr": 1.1853936685311772e-07, "epoch": 1.7027649769585254, "percentage": 85.14, "elapsed_time": "10:21:28", "remaining_time": "1:48:29"} +{"current_steps": 7391, "total_steps": 8680, "loss": 0.6504430770874023, "lr": 1.1835950835590569e-07, "epoch": 1.702995391705069, "percentage": 85.15, "elapsed_time": "10:21:34", "remaining_time": "1:48:24"} +{"current_steps": 7392, "total_steps": 8680, "loss": 0.6656354665756226, "lr": 1.18179777827644e-07, "epoch": 1.7032258064516128, "percentage": 85.16, "elapsed_time": "10:21:42", "remaining_time": "1:48:19"} +{"current_steps": 7393, "total_steps": 8680, "loss": 0.8534063100814819, "lr": 1.1800017529442019e-07, "epoch": 1.7034562211981568, "percentage": 85.17, "elapsed_time": "10:21:48", "remaining_time": "1:48:14"} +{"current_steps": 7394, "total_steps": 8680, "loss": 0.8315893411636353, "lr": 1.178207007823031e-07, "epoch": 1.7036866359447005, "percentage": 85.18, "elapsed_time": "10:21:54", "remaining_time": "1:48:09"} +{"current_steps": 7395, "total_steps": 8680, "loss": 0.8161677718162537, "lr": 1.1764135431734367e-07, "epoch": 1.7039170506912442, "percentage": 85.2, "elapsed_time": "10:21:59", "remaining_time": "1:48:04"} +{"current_steps": 7396, "total_steps": 8680, "loss": 0.7942687273025513, "lr": 1.1746213592557352e-07, "epoch": 1.7041474654377882, "percentage": 85.21, "elapsed_time": "10:22:04", "remaining_time": "1:47:59"} +{"current_steps": 7397, "total_steps": 8680, "loss": 0.8056384325027466, "lr": 1.1728304563300584e-07, "epoch": 1.7043778801843317, "percentage": 85.22, "elapsed_time": "10:22:09", "remaining_time": "1:47:54"} +{"current_steps": 7398, "total_steps": 8680, "loss": 0.8535007238388062, "lr": 1.1710408346563583e-07, "epoch": 1.7046082949308756, "percentage": 85.23, "elapsed_time": "10:22:16", "remaining_time": "1:47:49"} +{"current_steps": 7399, "total_steps": 8680, "loss": 0.7729576826095581, "lr": 1.1692524944943916e-07, "epoch": 1.7048387096774194, "percentage": 85.24, "elapsed_time": "10:22:22", "remaining_time": "1:47:45"} +{"current_steps": 7400, "total_steps": 8680, "loss": 0.7755489349365234, "lr": 1.1674654361037328e-07, "epoch": 1.705069124423963, "percentage": 85.25, "elapsed_time": "10:22:28", "remaining_time": "1:47:40"} +{"current_steps": 7401, "total_steps": 8680, "loss": 0.8752193450927734, "lr": 1.1656796597437757e-07, "epoch": 1.705299539170507, "percentage": 85.26, "elapsed_time": "10:22:37", "remaining_time": "1:47:35"} +{"current_steps": 7402, "total_steps": 8680, "loss": 0.7135917544364929, "lr": 1.1638951656737217e-07, "epoch": 1.7055299539170505, "percentage": 85.28, "elapsed_time": "10:22:45", "remaining_time": "1:47:31"} +{"current_steps": 7403, "total_steps": 8680, "loss": 0.7378124594688416, "lr": 1.1621119541525859e-07, "epoch": 1.7057603686635945, "percentage": 85.29, "elapsed_time": "10:22:52", "remaining_time": "1:47:26"} +{"current_steps": 7404, "total_steps": 8680, "loss": 0.637479305267334, "lr": 1.1603300254391978e-07, "epoch": 1.7059907834101382, "percentage": 85.3, "elapsed_time": "10:22:58", "remaining_time": "1:47:21"} +{"current_steps": 7405, "total_steps": 8680, "loss": 0.6162394881248474, "lr": 1.1585493797922075e-07, "epoch": 1.706221198156682, "percentage": 85.31, "elapsed_time": "10:23:05", "remaining_time": "1:47:17"} +{"current_steps": 7406, "total_steps": 8680, "loss": 0.7836494445800781, "lr": 1.1567700174700701e-07, "epoch": 1.706451612903226, "percentage": 85.32, "elapsed_time": "10:23:10", "remaining_time": "1:47:12"} +{"current_steps": 7407, "total_steps": 8680, "loss": 0.6297281980514526, "lr": 1.154991938731057e-07, "epoch": 1.7066820276497696, "percentage": 85.33, "elapsed_time": "10:23:15", "remaining_time": "1:47:06"} +{"current_steps": 7408, "total_steps": 8680, "loss": 0.7190115451812744, "lr": 1.1532151438332549e-07, "epoch": 1.7069124423963133, "percentage": 85.35, "elapsed_time": "10:23:19", "remaining_time": "1:47:01"} +{"current_steps": 7409, "total_steps": 8680, "loss": 0.7578086853027344, "lr": 1.151439633034561e-07, "epoch": 1.7071428571428573, "percentage": 85.36, "elapsed_time": "10:23:25", "remaining_time": "1:46:56"} +{"current_steps": 7410, "total_steps": 8680, "loss": 0.7347216010093689, "lr": 1.1496654065926925e-07, "epoch": 1.7073732718894008, "percentage": 85.37, "elapsed_time": "10:23:31", "remaining_time": "1:46:51"} +{"current_steps": 7411, "total_steps": 8680, "loss": 0.7940168380737305, "lr": 1.1478924647651711e-07, "epoch": 1.7076036866359448, "percentage": 85.38, "elapsed_time": "10:23:36", "remaining_time": "1:46:46"} +{"current_steps": 7412, "total_steps": 8680, "loss": 0.7625843286514282, "lr": 1.1461208078093431e-07, "epoch": 1.7078341013824885, "percentage": 85.39, "elapsed_time": "10:23:41", "remaining_time": "1:46:41"} +{"current_steps": 7413, "total_steps": 8680, "loss": 0.7603492736816406, "lr": 1.1443504359823585e-07, "epoch": 1.7080645161290322, "percentage": 85.4, "elapsed_time": "10:23:46", "remaining_time": "1:46:36"} +{"current_steps": 7414, "total_steps": 8680, "loss": 0.8746018409729004, "lr": 1.1425813495411817e-07, "epoch": 1.7082949308755762, "percentage": 85.41, "elapsed_time": "10:23:51", "remaining_time": "1:46:31"} +{"current_steps": 7415, "total_steps": 8680, "loss": 0.72724449634552, "lr": 1.1408135487425996e-07, "epoch": 1.7085253456221197, "percentage": 85.43, "elapsed_time": "10:23:54", "remaining_time": "1:46:26"} +{"current_steps": 7416, "total_steps": 8680, "loss": 0.6874721646308899, "lr": 1.1390470338432023e-07, "epoch": 1.7087557603686636, "percentage": 85.44, "elapsed_time": "10:24:01", "remaining_time": "1:46:21"} +{"current_steps": 7417, "total_steps": 8680, "loss": 0.7129265666007996, "lr": 1.1372818050993959e-07, "epoch": 1.7089861751152073, "percentage": 85.45, "elapsed_time": "10:24:07", "remaining_time": "1:46:16"} +{"current_steps": 7418, "total_steps": 8680, "loss": 0.7505607008934021, "lr": 1.1355178627674045e-07, "epoch": 1.709216589861751, "percentage": 85.46, "elapsed_time": "10:24:12", "remaining_time": "1:46:11"} +{"current_steps": 7419, "total_steps": 8680, "loss": 0.7497769594192505, "lr": 1.1337552071032608e-07, "epoch": 1.709447004608295, "percentage": 85.47, "elapsed_time": "10:24:17", "remaining_time": "1:46:06"} +{"current_steps": 7420, "total_steps": 8680, "loss": 0.792352020740509, "lr": 1.1319938383628092e-07, "epoch": 1.7096774193548387, "percentage": 85.48, "elapsed_time": "10:24:23", "remaining_time": "1:46:01"} +{"current_steps": 7421, "total_steps": 8680, "loss": 0.780627965927124, "lr": 1.1302337568017139e-07, "epoch": 1.7099078341013825, "percentage": 85.5, "elapsed_time": "10:24:28", "remaining_time": "1:45:56"} +{"current_steps": 7422, "total_steps": 8680, "loss": 0.7024368047714233, "lr": 1.1284749626754464e-07, "epoch": 1.7101382488479264, "percentage": 85.51, "elapsed_time": "10:24:33", "remaining_time": "1:45:51"} +{"current_steps": 7423, "total_steps": 8680, "loss": 0.756782591342926, "lr": 1.1267174562392945e-07, "epoch": 1.71036866359447, "percentage": 85.52, "elapsed_time": "10:24:37", "remaining_time": "1:45:46"} +{"current_steps": 7424, "total_steps": 8680, "loss": 0.8585456609725952, "lr": 1.1249612377483552e-07, "epoch": 1.7105990783410139, "percentage": 85.53, "elapsed_time": "10:24:41", "remaining_time": "1:45:41"} +{"current_steps": 7425, "total_steps": 8680, "loss": 0.8610610961914062, "lr": 1.1232063074575449e-07, "epoch": 1.7108294930875576, "percentage": 85.54, "elapsed_time": "10:24:46", "remaining_time": "1:45:36"} +{"current_steps": 7426, "total_steps": 8680, "loss": 0.7493829131126404, "lr": 1.1214526656215872e-07, "epoch": 1.7110599078341013, "percentage": 85.55, "elapsed_time": "10:24:52", "remaining_time": "1:45:31"} +{"current_steps": 7427, "total_steps": 8680, "loss": 0.7479410171508789, "lr": 1.1197003124950222e-07, "epoch": 1.7112903225806453, "percentage": 85.56, "elapsed_time": "10:24:57", "remaining_time": "1:45:26"} +{"current_steps": 7428, "total_steps": 8680, "loss": 0.8056051135063171, "lr": 1.1179492483322006e-07, "epoch": 1.7115207373271888, "percentage": 85.58, "elapsed_time": "10:25:02", "remaining_time": "1:45:21"} +{"current_steps": 7429, "total_steps": 8680, "loss": 0.8448202610015869, "lr": 1.1161994733872848e-07, "epoch": 1.7117511520737327, "percentage": 85.59, "elapsed_time": "10:25:06", "remaining_time": "1:45:15"} +{"current_steps": 7430, "total_steps": 8680, "loss": 0.7783033847808838, "lr": 1.1144509879142571e-07, "epoch": 1.7119815668202765, "percentage": 85.6, "elapsed_time": "10:25:12", "remaining_time": "1:45:11"} +{"current_steps": 7431, "total_steps": 8680, "loss": 0.6591838598251343, "lr": 1.1127037921669058e-07, "epoch": 1.7122119815668202, "percentage": 85.61, "elapsed_time": "10:25:18", "remaining_time": "1:45:06"} +{"current_steps": 7432, "total_steps": 8680, "loss": 0.8508287668228149, "lr": 1.1109578863988322e-07, "epoch": 1.7124423963133641, "percentage": 85.62, "elapsed_time": "10:25:22", "remaining_time": "1:45:00"} +{"current_steps": 7433, "total_steps": 8680, "loss": 0.7981588840484619, "lr": 1.1092132708634549e-07, "epoch": 1.7126728110599079, "percentage": 85.63, "elapsed_time": "10:25:26", "remaining_time": "1:44:55"} +{"current_steps": 7434, "total_steps": 8680, "loss": 0.7754761576652527, "lr": 1.1074699458140025e-07, "epoch": 1.7129032258064516, "percentage": 85.65, "elapsed_time": "10:25:31", "remaining_time": "1:44:50"} +{"current_steps": 7435, "total_steps": 8680, "loss": 0.8487040996551514, "lr": 1.1057279115035124e-07, "epoch": 1.7131336405529956, "percentage": 85.66, "elapsed_time": "10:25:35", "remaining_time": "1:44:45"} +{"current_steps": 7436, "total_steps": 8680, "loss": 0.8175803422927856, "lr": 1.1039871681848433e-07, "epoch": 1.713364055299539, "percentage": 85.67, "elapsed_time": "10:25:40", "remaining_time": "1:44:40"} +{"current_steps": 7437, "total_steps": 8680, "loss": 0.8361574411392212, "lr": 1.1022477161106591e-07, "epoch": 1.713594470046083, "percentage": 85.68, "elapsed_time": "10:25:44", "remaining_time": "1:44:35"} +{"current_steps": 7438, "total_steps": 8680, "loss": 0.6253053545951843, "lr": 1.1005095555334409e-07, "epoch": 1.7138248847926267, "percentage": 85.69, "elapsed_time": "10:25:50", "remaining_time": "1:44:30"} +{"current_steps": 7439, "total_steps": 8680, "loss": 0.8035168647766113, "lr": 1.0987726867054792e-07, "epoch": 1.7140552995391705, "percentage": 85.7, "elapsed_time": "10:25:53", "remaining_time": "1:44:24"} +{"current_steps": 7440, "total_steps": 8680, "loss": 0.7352867722511292, "lr": 1.0970371098788767e-07, "epoch": 1.7142857142857144, "percentage": 85.71, "elapsed_time": "10:25:58", "remaining_time": "1:44:19"} +{"current_steps": 7441, "total_steps": 8680, "loss": 0.7540202140808105, "lr": 1.0953028253055541e-07, "epoch": 1.714516129032258, "percentage": 85.73, "elapsed_time": "10:26:05", "remaining_time": "1:44:14"} +{"current_steps": 7442, "total_steps": 8680, "loss": 0.7883191108703613, "lr": 1.0935698332372379e-07, "epoch": 1.7147465437788019, "percentage": 85.74, "elapsed_time": "10:26:11", "remaining_time": "1:44:10"} +{"current_steps": 7443, "total_steps": 8680, "loss": 0.7581819295883179, "lr": 1.0918381339254701e-07, "epoch": 1.7149769585253456, "percentage": 85.75, "elapsed_time": "10:26:17", "remaining_time": "1:44:05"} +{"current_steps": 7444, "total_steps": 8680, "loss": 0.8066321611404419, "lr": 1.090107727621603e-07, "epoch": 1.7152073732718893, "percentage": 85.76, "elapsed_time": "10:26:22", "remaining_time": "1:44:00"} +{"current_steps": 7445, "total_steps": 8680, "loss": 0.7427937984466553, "lr": 1.0883786145768037e-07, "epoch": 1.7154377880184333, "percentage": 85.77, "elapsed_time": "10:26:28", "remaining_time": "1:43:55"} +{"current_steps": 7446, "total_steps": 8680, "loss": 0.7736409902572632, "lr": 1.0866507950420523e-07, "epoch": 1.715668202764977, "percentage": 85.78, "elapsed_time": "10:26:34", "remaining_time": "1:43:50"} +{"current_steps": 7447, "total_steps": 8680, "loss": 0.7253416776657104, "lr": 1.0849242692681382e-07, "epoch": 1.7158986175115207, "percentage": 85.79, "elapsed_time": "10:26:39", "remaining_time": "1:43:45"} +{"current_steps": 7448, "total_steps": 8680, "loss": 0.7933270931243896, "lr": 1.0831990375056643e-07, "epoch": 1.7161290322580647, "percentage": 85.81, "elapsed_time": "10:26:45", "remaining_time": "1:43:40"} +{"current_steps": 7449, "total_steps": 8680, "loss": 0.7946739196777344, "lr": 1.0814751000050437e-07, "epoch": 1.7163594470046082, "percentage": 85.82, "elapsed_time": "10:26:50", "remaining_time": "1:43:35"} +{"current_steps": 7450, "total_steps": 8680, "loss": 0.7798205614089966, "lr": 1.0797524570165073e-07, "epoch": 1.7165898617511521, "percentage": 85.83, "elapsed_time": "10:26:56", "remaining_time": "1:43:30"} +{"current_steps": 7451, "total_steps": 8680, "loss": 0.616565465927124, "lr": 1.078031108790094e-07, "epoch": 1.7168202764976959, "percentage": 85.84, "elapsed_time": "10:27:03", "remaining_time": "1:43:25"} +{"current_steps": 7452, "total_steps": 8680, "loss": 0.8406517505645752, "lr": 1.0763110555756516e-07, "epoch": 1.7170506912442396, "percentage": 85.85, "elapsed_time": "10:27:08", "remaining_time": "1:43:20"} +{"current_steps": 7453, "total_steps": 8680, "loss": 0.8827311992645264, "lr": 1.0745922976228483e-07, "epoch": 1.7172811059907835, "percentage": 85.86, "elapsed_time": "10:27:15", "remaining_time": "1:43:15"} +{"current_steps": 7454, "total_steps": 8680, "loss": 0.585588812828064, "lr": 1.0728748351811567e-07, "epoch": 1.717511520737327, "percentage": 85.88, "elapsed_time": "10:27:20", "remaining_time": "1:43:10"} +{"current_steps": 7455, "total_steps": 8680, "loss": 0.6305320858955383, "lr": 1.0711586684998631e-07, "epoch": 1.717741935483871, "percentage": 85.89, "elapsed_time": "10:27:27", "remaining_time": "1:43:06"} +{"current_steps": 7456, "total_steps": 8680, "loss": 0.7982319593429565, "lr": 1.0694437978280701e-07, "epoch": 1.7179723502304147, "percentage": 85.9, "elapsed_time": "10:27:32", "remaining_time": "1:43:01"} +{"current_steps": 7457, "total_steps": 8680, "loss": 0.7792943716049194, "lr": 1.0677302234146879e-07, "epoch": 1.7182027649769585, "percentage": 85.91, "elapsed_time": "10:27:40", "remaining_time": "1:42:56"} +{"current_steps": 7458, "total_steps": 8680, "loss": 0.7019332051277161, "lr": 1.0660179455084372e-07, "epoch": 1.7184331797235024, "percentage": 85.92, "elapsed_time": "10:27:46", "remaining_time": "1:42:51"} +{"current_steps": 7459, "total_steps": 8680, "loss": 0.8088894486427307, "lr": 1.0643069643578562e-07, "epoch": 1.7186635944700461, "percentage": 85.93, "elapsed_time": "10:27:53", "remaining_time": "1:42:46"} +{"current_steps": 7460, "total_steps": 8680, "loss": 0.799231767654419, "lr": 1.0625972802112882e-07, "epoch": 1.7188940092165899, "percentage": 85.94, "elapsed_time": "10:27:59", "remaining_time": "1:42:42"} +{"current_steps": 7461, "total_steps": 8680, "loss": 0.7265694737434387, "lr": 1.0608888933168958e-07, "epoch": 1.7191244239631336, "percentage": 85.96, "elapsed_time": "10:28:06", "remaining_time": "1:42:37"} +{"current_steps": 7462, "total_steps": 8680, "loss": 0.8566714525222778, "lr": 1.0591818039226464e-07, "epoch": 1.7193548387096773, "percentage": 85.97, "elapsed_time": "10:28:10", "remaining_time": "1:42:32"} +{"current_steps": 7463, "total_steps": 8680, "loss": 0.811874508857727, "lr": 1.0574760122763216e-07, "epoch": 1.7195852534562213, "percentage": 85.98, "elapsed_time": "10:28:14", "remaining_time": "1:42:26"} +{"current_steps": 7464, "total_steps": 8680, "loss": 0.7990631461143494, "lr": 1.0557715186255156e-07, "epoch": 1.719815668202765, "percentage": 85.99, "elapsed_time": "10:28:21", "remaining_time": "1:42:22"} +{"current_steps": 7465, "total_steps": 8680, "loss": 0.8108334541320801, "lr": 1.0540683232176307e-07, "epoch": 1.7200460829493087, "percentage": 86.0, "elapsed_time": "10:28:25", "remaining_time": "1:42:16"} +{"current_steps": 7466, "total_steps": 8680, "loss": 0.8927996158599854, "lr": 1.0523664262998888e-07, "epoch": 1.7202764976958527, "percentage": 86.01, "elapsed_time": "10:28:30", "remaining_time": "1:42:11"} +{"current_steps": 7467, "total_steps": 8680, "loss": 0.7277737855911255, "lr": 1.0506658281193138e-07, "epoch": 1.7205069124423962, "percentage": 86.03, "elapsed_time": "10:28:35", "remaining_time": "1:42:06"} +{"current_steps": 7468, "total_steps": 8680, "loss": 0.7229233980178833, "lr": 1.0489665289227467e-07, "epoch": 1.7207373271889401, "percentage": 86.04, "elapsed_time": "10:28:39", "remaining_time": "1:42:01"} +{"current_steps": 7469, "total_steps": 8680, "loss": 0.7211846709251404, "lr": 1.0472685289568373e-07, "epoch": 1.7209677419354839, "percentage": 86.05, "elapsed_time": "10:28:44", "remaining_time": "1:41:56"} +{"current_steps": 7470, "total_steps": 8680, "loss": 0.8239504098892212, "lr": 1.0455718284680504e-07, "epoch": 1.7211981566820276, "percentage": 86.06, "elapsed_time": "10:28:48", "remaining_time": "1:41:51"} +{"current_steps": 7471, "total_steps": 8680, "loss": 0.7492972612380981, "lr": 1.0438764277026579e-07, "epoch": 1.7214285714285715, "percentage": 86.07, "elapsed_time": "10:28:52", "remaining_time": "1:41:46"} +{"current_steps": 7472, "total_steps": 8680, "loss": 0.7658303380012512, "lr": 1.0421823269067442e-07, "epoch": 1.7216589861751153, "percentage": 86.08, "elapsed_time": "10:28:56", "remaining_time": "1:41:40"} +{"current_steps": 7473, "total_steps": 8680, "loss": 0.708244800567627, "lr": 1.0404895263262092e-07, "epoch": 1.721889400921659, "percentage": 86.09, "elapsed_time": "10:29:02", "remaining_time": "1:41:36"} +{"current_steps": 7474, "total_steps": 8680, "loss": 0.7575969696044922, "lr": 1.0387980262067575e-07, "epoch": 1.7221198156682027, "percentage": 86.11, "elapsed_time": "10:29:07", "remaining_time": "1:41:30"} +{"current_steps": 7475, "total_steps": 8680, "loss": 0.7321910262107849, "lr": 1.0371078267939082e-07, "epoch": 1.7223502304147464, "percentage": 86.12, "elapsed_time": "10:29:12", "remaining_time": "1:41:25"} +{"current_steps": 7476, "total_steps": 8680, "loss": 0.7812562584877014, "lr": 1.035418928332995e-07, "epoch": 1.7225806451612904, "percentage": 86.13, "elapsed_time": "10:29:17", "remaining_time": "1:41:20"} +{"current_steps": 7477, "total_steps": 8680, "loss": 0.7272104620933533, "lr": 1.0337313310691565e-07, "epoch": 1.7228110599078341, "percentage": 86.14, "elapsed_time": "10:29:23", "remaining_time": "1:41:15"} +{"current_steps": 7478, "total_steps": 8680, "loss": 0.7006442546844482, "lr": 1.032045035247343e-07, "epoch": 1.7230414746543778, "percentage": 86.15, "elapsed_time": "10:29:29", "remaining_time": "1:41:10"} +{"current_steps": 7479, "total_steps": 8680, "loss": 0.7082154750823975, "lr": 1.0303600411123226e-07, "epoch": 1.7232718894009218, "percentage": 86.16, "elapsed_time": "10:29:33", "remaining_time": "1:41:05"} +{"current_steps": 7480, "total_steps": 8680, "loss": 0.7204899191856384, "lr": 1.0286763489086681e-07, "epoch": 1.7235023041474653, "percentage": 86.18, "elapsed_time": "10:29:38", "remaining_time": "1:41:00"} +{"current_steps": 7481, "total_steps": 8680, "loss": 0.9119626879692078, "lr": 1.026993958880763e-07, "epoch": 1.7237327188940093, "percentage": 86.19, "elapsed_time": "10:29:43", "remaining_time": "1:40:55"} +{"current_steps": 7482, "total_steps": 8680, "loss": 0.5961707830429077, "lr": 1.0253128712728088e-07, "epoch": 1.723963133640553, "percentage": 86.2, "elapsed_time": "10:29:47", "remaining_time": "1:40:50"} +{"current_steps": 7483, "total_steps": 8680, "loss": 0.7469611167907715, "lr": 1.023633086328809e-07, "epoch": 1.7241935483870967, "percentage": 86.21, "elapsed_time": "10:29:53", "remaining_time": "1:40:45"} +{"current_steps": 7484, "total_steps": 8680, "loss": 0.8353795409202576, "lr": 1.0219546042925841e-07, "epoch": 1.7244239631336407, "percentage": 86.22, "elapsed_time": "10:29:59", "remaining_time": "1:40:40"} +{"current_steps": 7485, "total_steps": 8680, "loss": 0.6587873101234436, "lr": 1.0202774254077618e-07, "epoch": 1.7246543778801844, "percentage": 86.23, "elapsed_time": "10:30:06", "remaining_time": "1:40:35"} +{"current_steps": 7486, "total_steps": 8680, "loss": 0.8595654964447021, "lr": 1.0186015499177847e-07, "epoch": 1.7248847926267281, "percentage": 86.24, "elapsed_time": "10:30:10", "remaining_time": "1:40:30"} +{"current_steps": 7487, "total_steps": 8680, "loss": 0.7683298587799072, "lr": 1.0169269780659028e-07, "epoch": 1.7251152073732718, "percentage": 86.26, "elapsed_time": "10:30:15", "remaining_time": "1:40:25"} +{"current_steps": 7488, "total_steps": 8680, "loss": 0.888152003288269, "lr": 1.0152537100951786e-07, "epoch": 1.7253456221198156, "percentage": 86.27, "elapsed_time": "10:30:21", "remaining_time": "1:40:20"} +{"current_steps": 7489, "total_steps": 8680, "loss": 0.7835309505462646, "lr": 1.013581746248482e-07, "epoch": 1.7255760368663595, "percentage": 86.28, "elapsed_time": "10:30:26", "remaining_time": "1:40:15"} +{"current_steps": 7490, "total_steps": 8680, "loss": 0.9744646549224854, "lr": 1.0119110867684999e-07, "epoch": 1.7258064516129032, "percentage": 86.29, "elapsed_time": "10:30:30", "remaining_time": "1:40:10"} +{"current_steps": 7491, "total_steps": 8680, "loss": 0.6842091083526611, "lr": 1.0102417318977251e-07, "epoch": 1.726036866359447, "percentage": 86.3, "elapsed_time": "10:30:37", "remaining_time": "1:40:05"} +{"current_steps": 7492, "total_steps": 8680, "loss": 0.7435774207115173, "lr": 1.0085736818784607e-07, "epoch": 1.726267281105991, "percentage": 86.31, "elapsed_time": "10:30:44", "remaining_time": "1:40:01"} +{"current_steps": 7493, "total_steps": 8680, "loss": 0.8430237770080566, "lr": 1.0069069369528249e-07, "epoch": 1.7264976958525344, "percentage": 86.32, "elapsed_time": "10:30:51", "remaining_time": "1:39:56"} +{"current_steps": 7494, "total_steps": 8680, "loss": 0.8203141689300537, "lr": 1.0052414973627421e-07, "epoch": 1.7267281105990784, "percentage": 86.34, "elapsed_time": "10:30:58", "remaining_time": "1:39:51"} +{"current_steps": 7495, "total_steps": 8680, "loss": 0.7491584420204163, "lr": 1.0035773633499456e-07, "epoch": 1.726958525345622, "percentage": 86.35, "elapsed_time": "10:31:02", "remaining_time": "1:39:46"} +{"current_steps": 7496, "total_steps": 8680, "loss": 0.6738899946212769, "lr": 1.0019145351559876e-07, "epoch": 1.7271889400921658, "percentage": 86.36, "elapsed_time": "10:31:06", "remaining_time": "1:39:41"} +{"current_steps": 7497, "total_steps": 8680, "loss": 0.8628265857696533, "lr": 1.0002530130222231e-07, "epoch": 1.7274193548387098, "percentage": 86.37, "elapsed_time": "10:31:11", "remaining_time": "1:39:35"} +{"current_steps": 7498, "total_steps": 8680, "loss": 1.0158125162124634, "lr": 9.985927971898178e-08, "epoch": 1.7276497695852533, "percentage": 86.38, "elapsed_time": "10:31:17", "remaining_time": "1:39:31"} +{"current_steps": 7499, "total_steps": 8680, "loss": 0.7269070148468018, "lr": 9.969338878997535e-08, "epoch": 1.7278801843317972, "percentage": 86.39, "elapsed_time": "10:31:22", "remaining_time": "1:39:26"} +{"current_steps": 7500, "total_steps": 8680, "loss": 0.8769187927246094, "lr": 9.952762853928165e-08, "epoch": 1.728110599078341, "percentage": 86.41, "elapsed_time": "10:31:27", "remaining_time": "1:39:20"} +{"current_steps": 7501, "total_steps": 8680, "loss": 0.7841119170188904, "lr": 9.936199899096042e-08, "epoch": 1.7283410138248847, "percentage": 86.42, "elapsed_time": "10:31:36", "remaining_time": "1:39:16"} +{"current_steps": 7502, "total_steps": 8680, "loss": 0.9209425449371338, "lr": 9.91965001690529e-08, "epoch": 1.7285714285714286, "percentage": 86.43, "elapsed_time": "10:31:42", "remaining_time": "1:39:11"} +{"current_steps": 7503, "total_steps": 8680, "loss": 0.7795250415802002, "lr": 9.903113209758096e-08, "epoch": 1.7288018433179724, "percentage": 86.44, "elapsed_time": "10:31:49", "remaining_time": "1:39:06"} +{"current_steps": 7504, "total_steps": 8680, "loss": 0.7131094932556152, "lr": 9.886589480054741e-08, "epoch": 1.729032258064516, "percentage": 86.45, "elapsed_time": "10:31:54", "remaining_time": "1:39:01"} +{"current_steps": 7505, "total_steps": 8680, "loss": 0.8090137839317322, "lr": 9.870078830193629e-08, "epoch": 1.72926267281106, "percentage": 86.46, "elapsed_time": "10:32:01", "remaining_time": "1:38:57"} +{"current_steps": 7506, "total_steps": 8680, "loss": 0.7797958850860596, "lr": 9.853581262571231e-08, "epoch": 1.7294930875576036, "percentage": 86.47, "elapsed_time": "10:32:08", "remaining_time": "1:38:52"} +{"current_steps": 7507, "total_steps": 8680, "loss": 0.6927989721298218, "lr": 9.83709677958221e-08, "epoch": 1.7297235023041475, "percentage": 86.49, "elapsed_time": "10:32:14", "remaining_time": "1:38:47"} +{"current_steps": 7508, "total_steps": 8680, "loss": 0.8009092807769775, "lr": 9.820625383619219e-08, "epoch": 1.7299539170506912, "percentage": 86.5, "elapsed_time": "10:32:19", "remaining_time": "1:38:42"} +{"current_steps": 7509, "total_steps": 8680, "loss": 0.761864423751831, "lr": 9.804167077073056e-08, "epoch": 1.730184331797235, "percentage": 86.51, "elapsed_time": "10:32:25", "remaining_time": "1:38:37"} +{"current_steps": 7510, "total_steps": 8680, "loss": 0.7459509372711182, "lr": 9.787721862332654e-08, "epoch": 1.730414746543779, "percentage": 86.52, "elapsed_time": "10:32:29", "remaining_time": "1:38:32"} +{"current_steps": 7511, "total_steps": 8680, "loss": 0.8216449022293091, "lr": 9.771289741785005e-08, "epoch": 1.7306451612903224, "percentage": 86.53, "elapsed_time": "10:32:35", "remaining_time": "1:38:27"} +{"current_steps": 7512, "total_steps": 8680, "loss": 0.7860604524612427, "lr": 9.754870717815177e-08, "epoch": 1.7308755760368664, "percentage": 86.54, "elapsed_time": "10:32:40", "remaining_time": "1:38:22"} +{"current_steps": 7513, "total_steps": 8680, "loss": 0.7727769613265991, "lr": 9.738464792806422e-08, "epoch": 1.73110599078341, "percentage": 86.56, "elapsed_time": "10:32:46", "remaining_time": "1:38:17"} +{"current_steps": 7514, "total_steps": 8680, "loss": 0.874458909034729, "lr": 9.722071969140011e-08, "epoch": 1.7313364055299538, "percentage": 86.57, "elapsed_time": "10:32:51", "remaining_time": "1:38:12"} +{"current_steps": 7515, "total_steps": 8680, "loss": 0.840191125869751, "lr": 9.705692249195319e-08, "epoch": 1.7315668202764978, "percentage": 86.58, "elapsed_time": "10:32:55", "remaining_time": "1:38:07"} +{"current_steps": 7516, "total_steps": 8680, "loss": 0.7169238924980164, "lr": 9.689325635349877e-08, "epoch": 1.7317972350230415, "percentage": 86.59, "elapsed_time": "10:33:02", "remaining_time": "1:38:02"} +{"current_steps": 7517, "total_steps": 8680, "loss": 0.7554492950439453, "lr": 9.672972129979273e-08, "epoch": 1.7320276497695852, "percentage": 86.6, "elapsed_time": "10:33:08", "remaining_time": "1:37:57"} +{"current_steps": 7518, "total_steps": 8680, "loss": 0.5734076499938965, "lr": 9.656631735457154e-08, "epoch": 1.7322580645161292, "percentage": 86.61, "elapsed_time": "10:33:14", "remaining_time": "1:37:52"} +{"current_steps": 7519, "total_steps": 8680, "loss": 0.7867637872695923, "lr": 9.640304454155369e-08, "epoch": 1.7324884792626727, "percentage": 86.62, "elapsed_time": "10:33:19", "remaining_time": "1:37:47"} +{"current_steps": 7520, "total_steps": 8680, "loss": 0.7330230474472046, "lr": 9.623990288443773e-08, "epoch": 1.7327188940092166, "percentage": 86.64, "elapsed_time": "10:33:23", "remaining_time": "1:37:42"} +{"current_steps": 7521, "total_steps": 8680, "loss": 0.7880058288574219, "lr": 9.607689240690319e-08, "epoch": 1.7329493087557604, "percentage": 86.65, "elapsed_time": "10:33:28", "remaining_time": "1:37:37"} +{"current_steps": 7522, "total_steps": 8680, "loss": 0.796575665473938, "lr": 9.591401313261139e-08, "epoch": 1.733179723502304, "percentage": 86.66, "elapsed_time": "10:33:34", "remaining_time": "1:37:32"} +{"current_steps": 7523, "total_steps": 8680, "loss": 0.8101698160171509, "lr": 9.575126508520359e-08, "epoch": 1.733410138248848, "percentage": 86.67, "elapsed_time": "10:33:41", "remaining_time": "1:37:27"} +{"current_steps": 7524, "total_steps": 8680, "loss": 0.7811597585678101, "lr": 9.55886482883026e-08, "epoch": 1.7336405529953915, "percentage": 86.68, "elapsed_time": "10:33:47", "remaining_time": "1:37:22"} +{"current_steps": 7525, "total_steps": 8680, "loss": 0.7680011987686157, "lr": 9.542616276551208e-08, "epoch": 1.7338709677419355, "percentage": 86.69, "elapsed_time": "10:33:53", "remaining_time": "1:37:17"} +{"current_steps": 7526, "total_steps": 8680, "loss": 0.8018794059753418, "lr": 9.526380854041638e-08, "epoch": 1.7341013824884792, "percentage": 86.71, "elapsed_time": "10:33:59", "remaining_time": "1:37:12"} +{"current_steps": 7527, "total_steps": 8680, "loss": 0.7770500183105469, "lr": 9.510158563658133e-08, "epoch": 1.734331797235023, "percentage": 86.72, "elapsed_time": "10:34:03", "remaining_time": "1:37:07"} +{"current_steps": 7528, "total_steps": 8680, "loss": 0.7622300982475281, "lr": 9.493949407755309e-08, "epoch": 1.734562211981567, "percentage": 86.73, "elapsed_time": "10:34:09", "remaining_time": "1:37:02"} +{"current_steps": 7529, "total_steps": 8680, "loss": 0.831570029258728, "lr": 9.477753388685928e-08, "epoch": 1.7347926267281106, "percentage": 86.74, "elapsed_time": "10:34:14", "remaining_time": "1:36:57"} +{"current_steps": 7530, "total_steps": 8680, "loss": 0.7987254858016968, "lr": 9.461570508800776e-08, "epoch": 1.7350230414746544, "percentage": 86.75, "elapsed_time": "10:34:20", "remaining_time": "1:36:52"} +{"current_steps": 7531, "total_steps": 8680, "loss": 0.8219848275184631, "lr": 9.44540077044883e-08, "epoch": 1.7352534562211983, "percentage": 86.76, "elapsed_time": "10:34:25", "remaining_time": "1:36:47"} +{"current_steps": 7532, "total_steps": 8680, "loss": 0.8273369073867798, "lr": 9.429244175977092e-08, "epoch": 1.7354838709677418, "percentage": 86.77, "elapsed_time": "10:34:31", "remaining_time": "1:36:42"} +{"current_steps": 7533, "total_steps": 8680, "loss": 0.8241056203842163, "lr": 9.413100727730628e-08, "epoch": 1.7357142857142858, "percentage": 86.79, "elapsed_time": "10:34:36", "remaining_time": "1:36:37"} +{"current_steps": 7534, "total_steps": 8680, "loss": 0.6880715489387512, "lr": 9.396970428052697e-08, "epoch": 1.7359447004608295, "percentage": 86.8, "elapsed_time": "10:34:41", "remaining_time": "1:36:32"} +{"current_steps": 7535, "total_steps": 8680, "loss": 0.7355446815490723, "lr": 9.380853279284551e-08, "epoch": 1.7361751152073732, "percentage": 86.81, "elapsed_time": "10:34:47", "remaining_time": "1:36:27"} +{"current_steps": 7536, "total_steps": 8680, "loss": 0.8835841417312622, "lr": 9.364749283765604e-08, "epoch": 1.7364055299539172, "percentage": 86.82, "elapsed_time": "10:34:53", "remaining_time": "1:36:22"} +{"current_steps": 7537, "total_steps": 8680, "loss": 0.80763840675354, "lr": 9.348658443833313e-08, "epoch": 1.7366359447004607, "percentage": 86.83, "elapsed_time": "10:34:59", "remaining_time": "1:36:17"} +{"current_steps": 7538, "total_steps": 8680, "loss": 0.7473145723342896, "lr": 9.332580761823227e-08, "epoch": 1.7368663594470046, "percentage": 86.84, "elapsed_time": "10:35:07", "remaining_time": "1:36:13"} +{"current_steps": 7539, "total_steps": 8680, "loss": 0.6618188619613647, "lr": 9.316516240069028e-08, "epoch": 1.7370967741935484, "percentage": 86.85, "elapsed_time": "10:35:13", "remaining_time": "1:36:08"} +{"current_steps": 7540, "total_steps": 8680, "loss": 0.7432928085327148, "lr": 9.300464880902447e-08, "epoch": 1.737327188940092, "percentage": 86.87, "elapsed_time": "10:35:21", "remaining_time": "1:36:03"} +{"current_steps": 7541, "total_steps": 8680, "loss": 0.7915963530540466, "lr": 9.284426686653302e-08, "epoch": 1.737557603686636, "percentage": 86.88, "elapsed_time": "10:35:27", "remaining_time": "1:35:58"} +{"current_steps": 7542, "total_steps": 8680, "loss": 0.6428440809249878, "lr": 9.26840165964955e-08, "epoch": 1.7377880184331798, "percentage": 86.89, "elapsed_time": "10:35:33", "remaining_time": "1:35:53"} +{"current_steps": 7543, "total_steps": 8680, "loss": 0.7142912149429321, "lr": 9.252389802217187e-08, "epoch": 1.7380184331797235, "percentage": 86.9, "elapsed_time": "10:35:40", "remaining_time": "1:35:49"} +{"current_steps": 7544, "total_steps": 8680, "loss": 0.878044605255127, "lr": 9.236391116680309e-08, "epoch": 1.7382488479262674, "percentage": 86.91, "elapsed_time": "10:35:45", "remaining_time": "1:35:44"} +{"current_steps": 7545, "total_steps": 8680, "loss": 0.6861810684204102, "lr": 9.220405605361103e-08, "epoch": 1.738479262672811, "percentage": 86.92, "elapsed_time": "10:35:52", "remaining_time": "1:35:39"} +{"current_steps": 7546, "total_steps": 8680, "loss": 0.7638171911239624, "lr": 9.204433270579825e-08, "epoch": 1.738709677419355, "percentage": 86.94, "elapsed_time": "10:35:57", "remaining_time": "1:35:34"} +{"current_steps": 7547, "total_steps": 8680, "loss": 0.7149873971939087, "lr": 9.188474114654876e-08, "epoch": 1.7389400921658986, "percentage": 86.95, "elapsed_time": "10:36:02", "remaining_time": "1:35:29"} +{"current_steps": 7548, "total_steps": 8680, "loss": 0.7249442338943481, "lr": 9.172528139902703e-08, "epoch": 1.7391705069124423, "percentage": 86.96, "elapsed_time": "10:36:07", "remaining_time": "1:35:24"} +{"current_steps": 7549, "total_steps": 8680, "loss": 0.6846513748168945, "lr": 9.156595348637819e-08, "epoch": 1.7394009216589863, "percentage": 86.97, "elapsed_time": "10:36:13", "remaining_time": "1:35:19"} +{"current_steps": 7550, "total_steps": 8680, "loss": 0.9332281351089478, "lr": 9.140675743172843e-08, "epoch": 1.7396313364055298, "percentage": 86.98, "elapsed_time": "10:36:18", "remaining_time": "1:35:14"} +{"current_steps": 7551, "total_steps": 8680, "loss": 0.6878118515014648, "lr": 9.124769325818526e-08, "epoch": 1.7398617511520738, "percentage": 86.99, "elapsed_time": "10:36:24", "remaining_time": "1:35:09"} +{"current_steps": 7552, "total_steps": 8680, "loss": 0.7695426344871521, "lr": 9.108876098883633e-08, "epoch": 1.7400921658986175, "percentage": 87.0, "elapsed_time": "10:36:31", "remaining_time": "1:35:04"} +{"current_steps": 7553, "total_steps": 8680, "loss": 0.7983303666114807, "lr": 9.09299606467503e-08, "epoch": 1.7403225806451612, "percentage": 87.02, "elapsed_time": "10:36:35", "remaining_time": "1:34:59"} +{"current_steps": 7554, "total_steps": 8680, "loss": 0.8158761262893677, "lr": 9.077129225497726e-08, "epoch": 1.7405529953917052, "percentage": 87.03, "elapsed_time": "10:36:43", "remaining_time": "1:34:54"} +{"current_steps": 7555, "total_steps": 8680, "loss": 0.8064214587211609, "lr": 9.061275583654748e-08, "epoch": 1.7407834101382489, "percentage": 87.04, "elapsed_time": "10:36:50", "remaining_time": "1:34:49"} +{"current_steps": 7556, "total_steps": 8680, "loss": 0.9058080911636353, "lr": 9.045435141447211e-08, "epoch": 1.7410138248847926, "percentage": 87.05, "elapsed_time": "10:36:58", "remaining_time": "1:34:45"} +{"current_steps": 7557, "total_steps": 8680, "loss": 0.7392270565032959, "lr": 9.029607901174374e-08, "epoch": 1.7412442396313366, "percentage": 87.06, "elapsed_time": "10:37:05", "remaining_time": "1:34:40"} +{"current_steps": 7558, "total_steps": 8680, "loss": 0.7114729881286621, "lr": 9.013793865133501e-08, "epoch": 1.74147465437788, "percentage": 87.07, "elapsed_time": "10:37:10", "remaining_time": "1:34:35"} +{"current_steps": 7559, "total_steps": 8680, "loss": 0.8675493597984314, "lr": 8.997993035620022e-08, "epoch": 1.741705069124424, "percentage": 87.09, "elapsed_time": "10:37:17", "remaining_time": "1:34:30"} +{"current_steps": 7560, "total_steps": 8680, "loss": 0.8103020191192627, "lr": 8.98220541492738e-08, "epoch": 1.7419354838709677, "percentage": 87.1, "elapsed_time": "10:37:24", "remaining_time": "1:34:25"} +{"current_steps": 7561, "total_steps": 8680, "loss": 0.7339279651641846, "lr": 8.966431005347109e-08, "epoch": 1.7421658986175115, "percentage": 87.11, "elapsed_time": "10:37:31", "remaining_time": "1:34:21"} +{"current_steps": 7562, "total_steps": 8680, "loss": 0.6971707344055176, "lr": 8.950669809168887e-08, "epoch": 1.7423963133640554, "percentage": 87.12, "elapsed_time": "10:37:36", "remaining_time": "1:34:15"} +{"current_steps": 7563, "total_steps": 8680, "loss": 0.8633124232292175, "lr": 8.934921828680408e-08, "epoch": 1.742626728110599, "percentage": 87.13, "elapsed_time": "10:37:44", "remaining_time": "1:34:11"} +{"current_steps": 7564, "total_steps": 8680, "loss": 0.7704664468765259, "lr": 8.919187066167466e-08, "epoch": 1.7428571428571429, "percentage": 87.14, "elapsed_time": "10:37:49", "remaining_time": "1:34:06"} +{"current_steps": 7565, "total_steps": 8680, "loss": 0.7063533067703247, "lr": 8.903465523913955e-08, "epoch": 1.7430875576036866, "percentage": 87.15, "elapsed_time": "10:37:55", "remaining_time": "1:34:01"} +{"current_steps": 7566, "total_steps": 8680, "loss": 0.7094486951828003, "lr": 8.887757204201817e-08, "epoch": 1.7433179723502303, "percentage": 87.17, "elapsed_time": "10:38:00", "remaining_time": "1:33:56"} +{"current_steps": 7567, "total_steps": 8680, "loss": 0.8743780255317688, "lr": 8.872062109311096e-08, "epoch": 1.7435483870967743, "percentage": 87.18, "elapsed_time": "10:38:05", "remaining_time": "1:33:51"} +{"current_steps": 7568, "total_steps": 8680, "loss": 0.7282687425613403, "lr": 8.856380241519935e-08, "epoch": 1.743778801843318, "percentage": 87.19, "elapsed_time": "10:38:10", "remaining_time": "1:33:46"} +{"current_steps": 7569, "total_steps": 8680, "loss": 0.7507487535476685, "lr": 8.840711603104523e-08, "epoch": 1.7440092165898617, "percentage": 87.2, "elapsed_time": "10:38:14", "remaining_time": "1:33:40"} +{"current_steps": 7570, "total_steps": 8680, "loss": 0.807691216468811, "lr": 8.82505619633912e-08, "epoch": 1.7442396313364057, "percentage": 87.21, "elapsed_time": "10:38:20", "remaining_time": "1:33:36"} +{"current_steps": 7571, "total_steps": 8680, "loss": 0.8650702238082886, "lr": 8.809414023496142e-08, "epoch": 1.7444700460829492, "percentage": 87.22, "elapsed_time": "10:38:25", "remaining_time": "1:33:31"} +{"current_steps": 7572, "total_steps": 8680, "loss": 0.6872273683547974, "lr": 8.793785086845984e-08, "epoch": 1.7447004608294931, "percentage": 87.24, "elapsed_time": "10:38:33", "remaining_time": "1:33:26"} +{"current_steps": 7573, "total_steps": 8680, "loss": 0.7242698669433594, "lr": 8.778169388657163e-08, "epoch": 1.7449308755760369, "percentage": 87.25, "elapsed_time": "10:38:38", "remaining_time": "1:33:21"} +{"current_steps": 7574, "total_steps": 8680, "loss": 0.741705060005188, "lr": 8.762566931196313e-08, "epoch": 1.7451612903225806, "percentage": 87.26, "elapsed_time": "10:38:44", "remaining_time": "1:33:16"} +{"current_steps": 7575, "total_steps": 8680, "loss": 0.7293061017990112, "lr": 8.746977716728099e-08, "epoch": 1.7453917050691246, "percentage": 87.27, "elapsed_time": "10:38:49", "remaining_time": "1:33:11"} +{"current_steps": 7576, "total_steps": 8680, "loss": 0.8385475277900696, "lr": 8.731401747515244e-08, "epoch": 1.745622119815668, "percentage": 87.28, "elapsed_time": "10:38:53", "remaining_time": "1:33:06"} +{"current_steps": 7577, "total_steps": 8680, "loss": 0.8484489917755127, "lr": 8.715839025818617e-08, "epoch": 1.745852534562212, "percentage": 87.29, "elapsed_time": "10:38:59", "remaining_time": "1:33:01"} +{"current_steps": 7578, "total_steps": 8680, "loss": 0.6511530876159668, "lr": 8.7002895538971e-08, "epoch": 1.7460829493087557, "percentage": 87.3, "elapsed_time": "10:39:05", "remaining_time": "1:32:56"} +{"current_steps": 7579, "total_steps": 8680, "loss": 0.8274673223495483, "lr": 8.684753334007688e-08, "epoch": 1.7463133640552995, "percentage": 87.32, "elapsed_time": "10:39:09", "remaining_time": "1:32:50"} +{"current_steps": 7580, "total_steps": 8680, "loss": 0.7367755174636841, "lr": 8.669230368405456e-08, "epoch": 1.7465437788018434, "percentage": 87.33, "elapsed_time": "10:39:14", "remaining_time": "1:32:45"} +{"current_steps": 7581, "total_steps": 8680, "loss": 0.80199134349823, "lr": 8.653720659343522e-08, "epoch": 1.7467741935483871, "percentage": 87.34, "elapsed_time": "10:39:19", "remaining_time": "1:32:40"} +{"current_steps": 7582, "total_steps": 8680, "loss": 0.7782701253890991, "lr": 8.638224209073097e-08, "epoch": 1.7470046082949309, "percentage": 87.35, "elapsed_time": "10:39:25", "remaining_time": "1:32:36"} +{"current_steps": 7583, "total_steps": 8680, "loss": 0.7613752484321594, "lr": 8.622741019843504e-08, "epoch": 1.7472350230414746, "percentage": 87.36, "elapsed_time": "10:39:30", "remaining_time": "1:32:30"} +{"current_steps": 7584, "total_steps": 8680, "loss": 0.8213690519332886, "lr": 8.60727109390208e-08, "epoch": 1.7474654377880183, "percentage": 87.37, "elapsed_time": "10:39:35", "remaining_time": "1:32:25"} +{"current_steps": 7585, "total_steps": 8680, "loss": 0.7064045667648315, "lr": 8.59181443349426e-08, "epoch": 1.7476958525345623, "percentage": 87.38, "elapsed_time": "10:39:40", "remaining_time": "1:32:20"} +{"current_steps": 7586, "total_steps": 8680, "loss": 0.6686617136001587, "lr": 8.576371040863573e-08, "epoch": 1.747926267281106, "percentage": 87.4, "elapsed_time": "10:39:45", "remaining_time": "1:32:15"} +{"current_steps": 7587, "total_steps": 8680, "loss": 0.7520097494125366, "lr": 8.560940918251592e-08, "epoch": 1.7481566820276497, "percentage": 87.41, "elapsed_time": "10:39:51", "remaining_time": "1:32:10"} +{"current_steps": 7588, "total_steps": 8680, "loss": 0.8176038265228271, "lr": 8.545524067897991e-08, "epoch": 1.7483870967741937, "percentage": 87.42, "elapsed_time": "10:39:58", "remaining_time": "1:32:05"} +{"current_steps": 7589, "total_steps": 8680, "loss": 0.6680614948272705, "lr": 8.530120492040505e-08, "epoch": 1.7486175115207372, "percentage": 87.43, "elapsed_time": "10:40:03", "remaining_time": "1:32:00"} +{"current_steps": 7590, "total_steps": 8680, "loss": 0.7421592473983765, "lr": 8.514730192914921e-08, "epoch": 1.7488479262672811, "percentage": 87.44, "elapsed_time": "10:40:07", "remaining_time": "1:31:55"} +{"current_steps": 7591, "total_steps": 8680, "loss": 0.8869342803955078, "lr": 8.499353172755164e-08, "epoch": 1.7490783410138249, "percentage": 87.45, "elapsed_time": "10:40:12", "remaining_time": "1:31:50"} +{"current_steps": 7592, "total_steps": 8680, "loss": 0.6850584745407104, "lr": 8.48398943379316e-08, "epoch": 1.7493087557603686, "percentage": 87.47, "elapsed_time": "10:40:17", "remaining_time": "1:31:45"} +{"current_steps": 7593, "total_steps": 8680, "loss": 0.7433363199234009, "lr": 8.468638978258914e-08, "epoch": 1.7495391705069125, "percentage": 87.48, "elapsed_time": "10:40:23", "remaining_time": "1:31:40"} +{"current_steps": 7594, "total_steps": 8680, "loss": 0.7744357585906982, "lr": 8.453301808380564e-08, "epoch": 1.7497695852534563, "percentage": 87.49, "elapsed_time": "10:40:27", "remaining_time": "1:31:35"} +{"current_steps": 7595, "total_steps": 8680, "loss": 0.8236217498779297, "lr": 8.437977926384277e-08, "epoch": 1.75, "percentage": 87.5, "elapsed_time": "10:40:31", "remaining_time": "1:31:30"} +{"current_steps": 7596, "total_steps": 8680, "loss": 0.8552603721618652, "lr": 8.422667334494249e-08, "epoch": 1.7502304147465437, "percentage": 87.51, "elapsed_time": "10:40:37", "remaining_time": "1:31:25"} +{"current_steps": 7597, "total_steps": 8680, "loss": 0.7755998373031616, "lr": 8.407370034932859e-08, "epoch": 1.7504608294930875, "percentage": 87.52, "elapsed_time": "10:40:42", "remaining_time": "1:31:20"} +{"current_steps": 7598, "total_steps": 8680, "loss": 0.8105130195617676, "lr": 8.392086029920442e-08, "epoch": 1.7506912442396314, "percentage": 87.53, "elapsed_time": "10:40:46", "remaining_time": "1:31:15"} +{"current_steps": 7599, "total_steps": 8680, "loss": 0.8787405490875244, "lr": 8.376815321675457e-08, "epoch": 1.7509216589861751, "percentage": 87.55, "elapsed_time": "10:40:51", "remaining_time": "1:31:09"} +{"current_steps": 7600, "total_steps": 8680, "loss": 0.6107788681983948, "lr": 8.361557912414441e-08, "epoch": 1.7511520737327189, "percentage": 87.56, "elapsed_time": "10:40:56", "remaining_time": "1:31:04"} +{"current_steps": 7601, "total_steps": 8680, "loss": 0.6825795769691467, "lr": 8.34631380435199e-08, "epoch": 1.7513824884792628, "percentage": 87.57, "elapsed_time": "10:41:02", "remaining_time": "1:31:00"} +{"current_steps": 7602, "total_steps": 8680, "loss": 0.7069272994995117, "lr": 8.331082999700734e-08, "epoch": 1.7516129032258063, "percentage": 87.58, "elapsed_time": "10:41:08", "remaining_time": "1:30:55"} +{"current_steps": 7603, "total_steps": 8680, "loss": 0.7784801721572876, "lr": 8.315865500671449e-08, "epoch": 1.7518433179723503, "percentage": 87.59, "elapsed_time": "10:41:13", "remaining_time": "1:30:49"} +{"current_steps": 7604, "total_steps": 8680, "loss": 0.7653795480728149, "lr": 8.300661309472912e-08, "epoch": 1.752073732718894, "percentage": 87.6, "elapsed_time": "10:41:17", "remaining_time": "1:30:44"} +{"current_steps": 7605, "total_steps": 8680, "loss": 0.7386122941970825, "lr": 8.285470428311991e-08, "epoch": 1.7523041474654377, "percentage": 87.62, "elapsed_time": "10:41:22", "remaining_time": "1:30:39"} +{"current_steps": 7606, "total_steps": 8680, "loss": 0.7828700542449951, "lr": 8.270292859393613e-08, "epoch": 1.7525345622119817, "percentage": 87.63, "elapsed_time": "10:41:29", "remaining_time": "1:30:34"} +{"current_steps": 7607, "total_steps": 8680, "loss": 0.8955565094947815, "lr": 8.255128604920792e-08, "epoch": 1.7527649769585254, "percentage": 87.64, "elapsed_time": "10:41:33", "remaining_time": "1:30:29"} +{"current_steps": 7608, "total_steps": 8680, "loss": 0.9071576595306396, "lr": 8.2399776670946e-08, "epoch": 1.7529953917050691, "percentage": 87.65, "elapsed_time": "10:41:37", "remaining_time": "1:30:24"} +{"current_steps": 7609, "total_steps": 8680, "loss": 0.752417802810669, "lr": 8.22484004811419e-08, "epoch": 1.7532258064516129, "percentage": 87.66, "elapsed_time": "10:41:42", "remaining_time": "1:30:19"} +{"current_steps": 7610, "total_steps": 8680, "loss": 0.8611370325088501, "lr": 8.209715750176727e-08, "epoch": 1.7534562211981566, "percentage": 87.67, "elapsed_time": "10:41:47", "remaining_time": "1:30:14"} +{"current_steps": 7611, "total_steps": 8680, "loss": 0.745223879814148, "lr": 8.19460477547752e-08, "epoch": 1.7536866359447005, "percentage": 87.68, "elapsed_time": "10:41:52", "remaining_time": "1:30:09"} +{"current_steps": 7612, "total_steps": 8680, "loss": 0.7799668908119202, "lr": 8.179507126209906e-08, "epoch": 1.7539170506912443, "percentage": 87.7, "elapsed_time": "10:41:57", "remaining_time": "1:30:04"} +{"current_steps": 7613, "total_steps": 8680, "loss": 0.8177207708358765, "lr": 8.164422804565263e-08, "epoch": 1.754147465437788, "percentage": 87.71, "elapsed_time": "10:42:01", "remaining_time": "1:29:59"} +{"current_steps": 7614, "total_steps": 8680, "loss": 0.7111436128616333, "lr": 8.149351812733085e-08, "epoch": 1.754377880184332, "percentage": 87.72, "elapsed_time": "10:42:07", "remaining_time": "1:29:54"} +{"current_steps": 7615, "total_steps": 8680, "loss": 0.6840728521347046, "lr": 8.1342941529009e-08, "epoch": 1.7546082949308754, "percentage": 87.73, "elapsed_time": "10:42:12", "remaining_time": "1:29:49"} +{"current_steps": 7616, "total_steps": 8680, "loss": 0.6115491986274719, "lr": 8.119249827254281e-08, "epoch": 1.7548387096774194, "percentage": 87.74, "elapsed_time": "10:42:18", "remaining_time": "1:29:43"} +{"current_steps": 7617, "total_steps": 8680, "loss": 0.7149351239204407, "lr": 8.104218837976939e-08, "epoch": 1.7550691244239631, "percentage": 87.75, "elapsed_time": "10:42:23", "remaining_time": "1:29:38"} +{"current_steps": 7618, "total_steps": 8680, "loss": 0.688147783279419, "lr": 8.089201187250571e-08, "epoch": 1.7552995391705069, "percentage": 87.76, "elapsed_time": "10:42:28", "remaining_time": "1:29:33"} +{"current_steps": 7619, "total_steps": 8680, "loss": 0.8092058300971985, "lr": 8.074196877254969e-08, "epoch": 1.7555299539170508, "percentage": 87.78, "elapsed_time": "10:42:34", "remaining_time": "1:29:29"} +{"current_steps": 7620, "total_steps": 8680, "loss": 0.7375935912132263, "lr": 8.05920591016801e-08, "epoch": 1.7557603686635943, "percentage": 87.79, "elapsed_time": "10:42:41", "remaining_time": "1:29:24"} +{"current_steps": 7621, "total_steps": 8680, "loss": 0.6793934106826782, "lr": 8.044228288165599e-08, "epoch": 1.7559907834101383, "percentage": 87.8, "elapsed_time": "10:42:48", "remaining_time": "1:29:19"} +{"current_steps": 7622, "total_steps": 8680, "loss": 0.7395757436752319, "lr": 8.0292640134217e-08, "epoch": 1.756221198156682, "percentage": 87.81, "elapsed_time": "10:42:55", "remaining_time": "1:29:14"} +{"current_steps": 7623, "total_steps": 8680, "loss": 0.546409010887146, "lr": 8.014313088108394e-08, "epoch": 1.7564516129032257, "percentage": 87.82, "elapsed_time": "10:43:01", "remaining_time": "1:29:09"} +{"current_steps": 7624, "total_steps": 8680, "loss": 0.7790534496307373, "lr": 7.999375514395778e-08, "epoch": 1.7566820276497697, "percentage": 87.83, "elapsed_time": "10:43:06", "remaining_time": "1:29:04"} +{"current_steps": 7625, "total_steps": 8680, "loss": 0.7398231625556946, "lr": 7.984451294452e-08, "epoch": 1.7569124423963134, "percentage": 87.85, "elapsed_time": "10:43:12", "remaining_time": "1:28:59"} +{"current_steps": 7626, "total_steps": 8680, "loss": 0.7414441108703613, "lr": 7.969540430443311e-08, "epoch": 1.7571428571428571, "percentage": 87.86, "elapsed_time": "10:43:17", "remaining_time": "1:28:54"} +{"current_steps": 7627, "total_steps": 8680, "loss": 0.7548750638961792, "lr": 7.954642924533994e-08, "epoch": 1.757373271889401, "percentage": 87.87, "elapsed_time": "10:43:21", "remaining_time": "1:28:49"} +{"current_steps": 7628, "total_steps": 8680, "loss": 0.7546773552894592, "lr": 7.939758778886385e-08, "epoch": 1.7576036866359446, "percentage": 87.88, "elapsed_time": "10:43:28", "remaining_time": "1:28:44"} +{"current_steps": 7629, "total_steps": 8680, "loss": 0.7373867630958557, "lr": 7.924887995660945e-08, "epoch": 1.7578341013824885, "percentage": 87.89, "elapsed_time": "10:43:35", "remaining_time": "1:28:39"} +{"current_steps": 7630, "total_steps": 8680, "loss": 0.7271026968955994, "lr": 7.910030577016113e-08, "epoch": 1.7580645161290323, "percentage": 87.9, "elapsed_time": "10:43:42", "remaining_time": "1:28:35"} +{"current_steps": 7631, "total_steps": 8680, "loss": 0.8723413944244385, "lr": 7.89518652510841e-08, "epoch": 1.758294930875576, "percentage": 87.91, "elapsed_time": "10:43:47", "remaining_time": "1:28:29"} +{"current_steps": 7632, "total_steps": 8680, "loss": 0.8282548189163208, "lr": 7.880355842092468e-08, "epoch": 1.75852534562212, "percentage": 87.93, "elapsed_time": "10:43:52", "remaining_time": "1:28:24"} +{"current_steps": 7633, "total_steps": 8680, "loss": 0.7436991930007935, "lr": 7.865538530120918e-08, "epoch": 1.7587557603686634, "percentage": 87.94, "elapsed_time": "10:43:57", "remaining_time": "1:28:19"} +{"current_steps": 7634, "total_steps": 8680, "loss": 0.7750650644302368, "lr": 7.850734591344488e-08, "epoch": 1.7589861751152074, "percentage": 87.95, "elapsed_time": "10:44:02", "remaining_time": "1:28:14"} +{"current_steps": 7635, "total_steps": 8680, "loss": 0.6824958324432373, "lr": 7.835944027911957e-08, "epoch": 1.7592165898617511, "percentage": 87.96, "elapsed_time": "10:44:07", "remaining_time": "1:28:09"} +{"current_steps": 7636, "total_steps": 8680, "loss": 0.8500322103500366, "lr": 7.821166841970107e-08, "epoch": 1.7594470046082948, "percentage": 87.97, "elapsed_time": "10:44:12", "remaining_time": "1:28:04"} +{"current_steps": 7637, "total_steps": 8680, "loss": 0.7111128568649292, "lr": 7.806403035663889e-08, "epoch": 1.7596774193548388, "percentage": 87.98, "elapsed_time": "10:44:18", "remaining_time": "1:27:59"} +{"current_steps": 7638, "total_steps": 8680, "loss": 0.7320532202720642, "lr": 7.791652611136212e-08, "epoch": 1.7599078341013825, "percentage": 88.0, "elapsed_time": "10:44:24", "remaining_time": "1:27:54"} +{"current_steps": 7639, "total_steps": 8680, "loss": 0.8439149856567383, "lr": 7.776915570528076e-08, "epoch": 1.7601382488479262, "percentage": 88.01, "elapsed_time": "10:44:31", "remaining_time": "1:27:49"} +{"current_steps": 7640, "total_steps": 8680, "loss": 0.6489125490188599, "lr": 7.762191915978578e-08, "epoch": 1.7603686635944702, "percentage": 88.02, "elapsed_time": "10:44:37", "remaining_time": "1:27:45"} +{"current_steps": 7641, "total_steps": 8680, "loss": 0.8118616342544556, "lr": 7.74748164962482e-08, "epoch": 1.7605990783410137, "percentage": 88.03, "elapsed_time": "10:44:44", "remaining_time": "1:27:40"} +{"current_steps": 7642, "total_steps": 8680, "loss": 0.7528102397918701, "lr": 7.732784773601953e-08, "epoch": 1.7608294930875577, "percentage": 88.04, "elapsed_time": "10:44:49", "remaining_time": "1:27:35"} +{"current_steps": 7643, "total_steps": 8680, "loss": 0.7856849431991577, "lr": 7.718101290043244e-08, "epoch": 1.7610599078341014, "percentage": 88.05, "elapsed_time": "10:44:54", "remaining_time": "1:27:30"} +{"current_steps": 7644, "total_steps": 8680, "loss": 0.8001973628997803, "lr": 7.703431201079969e-08, "epoch": 1.761290322580645, "percentage": 88.06, "elapsed_time": "10:44:59", "remaining_time": "1:27:25"} +{"current_steps": 7645, "total_steps": 8680, "loss": 0.8332471251487732, "lr": 7.688774508841478e-08, "epoch": 1.761520737327189, "percentage": 88.08, "elapsed_time": "10:45:04", "remaining_time": "1:27:19"} +{"current_steps": 7646, "total_steps": 8680, "loss": 0.7534940242767334, "lr": 7.67413121545516e-08, "epoch": 1.7617511520737326, "percentage": 88.09, "elapsed_time": "10:45:09", "remaining_time": "1:27:14"} +{"current_steps": 7647, "total_steps": 8680, "loss": 0.737503170967102, "lr": 7.65950132304647e-08, "epoch": 1.7619815668202765, "percentage": 88.1, "elapsed_time": "10:45:15", "remaining_time": "1:27:09"} +{"current_steps": 7648, "total_steps": 8680, "loss": 0.7407201528549194, "lr": 7.644884833738896e-08, "epoch": 1.7622119815668202, "percentage": 88.11, "elapsed_time": "10:45:20", "remaining_time": "1:27:04"} +{"current_steps": 7649, "total_steps": 8680, "loss": 0.7589377164840698, "lr": 7.630281749654055e-08, "epoch": 1.762442396313364, "percentage": 88.12, "elapsed_time": "10:45:26", "remaining_time": "1:26:59"} +{"current_steps": 7650, "total_steps": 8680, "loss": 0.6586496829986572, "lr": 7.615692072911528e-08, "epoch": 1.762672811059908, "percentage": 88.13, "elapsed_time": "10:45:32", "remaining_time": "1:26:54"} +{"current_steps": 7651, "total_steps": 8680, "loss": 0.705591082572937, "lr": 7.601115805628977e-08, "epoch": 1.7629032258064516, "percentage": 88.15, "elapsed_time": "10:45:36", "remaining_time": "1:26:49"} +{"current_steps": 7652, "total_steps": 8680, "loss": 0.7889619469642639, "lr": 7.586552949922176e-08, "epoch": 1.7631336405529954, "percentage": 88.16, "elapsed_time": "10:45:41", "remaining_time": "1:26:44"} +{"current_steps": 7653, "total_steps": 8680, "loss": 0.6912282705307007, "lr": 7.572003507904868e-08, "epoch": 1.7633640552995393, "percentage": 88.17, "elapsed_time": "10:45:47", "remaining_time": "1:26:39"} +{"current_steps": 7654, "total_steps": 8680, "loss": 0.7374964952468872, "lr": 7.557467481688873e-08, "epoch": 1.7635944700460828, "percentage": 88.18, "elapsed_time": "10:45:53", "remaining_time": "1:26:34"} +{"current_steps": 7655, "total_steps": 8680, "loss": 0.7302298545837402, "lr": 7.542944873384105e-08, "epoch": 1.7638248847926268, "percentage": 88.19, "elapsed_time": "10:45:58", "remaining_time": "1:26:29"} +{"current_steps": 7656, "total_steps": 8680, "loss": 0.8323671817779541, "lr": 7.5284356850985e-08, "epoch": 1.7640552995391705, "percentage": 88.2, "elapsed_time": "10:46:03", "remaining_time": "1:26:24"} +{"current_steps": 7657, "total_steps": 8680, "loss": 0.6654655933380127, "lr": 7.513939918938028e-08, "epoch": 1.7642857142857142, "percentage": 88.21, "elapsed_time": "10:46:09", "remaining_time": "1:26:19"} +{"current_steps": 7658, "total_steps": 8680, "loss": 0.6371186375617981, "lr": 7.499457577006751e-08, "epoch": 1.7645161290322582, "percentage": 88.23, "elapsed_time": "10:46:14", "remaining_time": "1:26:14"} +{"current_steps": 7659, "total_steps": 8680, "loss": 0.7761695384979248, "lr": 7.484988661406733e-08, "epoch": 1.7647465437788017, "percentage": 88.24, "elapsed_time": "10:46:20", "remaining_time": "1:26:09"} +{"current_steps": 7660, "total_steps": 8680, "loss": 0.779335618019104, "lr": 7.470533174238158e-08, "epoch": 1.7649769585253456, "percentage": 88.25, "elapsed_time": "10:46:26", "remaining_time": "1:26:04"} +{"current_steps": 7661, "total_steps": 8680, "loss": 0.7642731666564941, "lr": 7.456091117599195e-08, "epoch": 1.7652073732718894, "percentage": 88.26, "elapsed_time": "10:46:30", "remaining_time": "1:25:59"} +{"current_steps": 7662, "total_steps": 8680, "loss": 0.7490801215171814, "lr": 7.441662493586076e-08, "epoch": 1.765437788018433, "percentage": 88.27, "elapsed_time": "10:46:36", "remaining_time": "1:25:54"} +{"current_steps": 7663, "total_steps": 8680, "loss": 0.9480686187744141, "lr": 7.427247304293139e-08, "epoch": 1.765668202764977, "percentage": 88.28, "elapsed_time": "10:46:41", "remaining_time": "1:25:49"} +{"current_steps": 7664, "total_steps": 8680, "loss": 0.6208070516586304, "lr": 7.412845551812707e-08, "epoch": 1.7658986175115208, "percentage": 88.29, "elapsed_time": "10:46:46", "remaining_time": "1:25:44"} +{"current_steps": 7665, "total_steps": 8680, "loss": 0.7782050371170044, "lr": 7.398457238235167e-08, "epoch": 1.7661290322580645, "percentage": 88.31, "elapsed_time": "10:46:51", "remaining_time": "1:25:39"} +{"current_steps": 7666, "total_steps": 8680, "loss": 0.6725378632545471, "lr": 7.38408236564897e-08, "epoch": 1.7663594470046085, "percentage": 88.32, "elapsed_time": "10:46:58", "remaining_time": "1:25:34"} +{"current_steps": 7667, "total_steps": 8680, "loss": 0.8247120380401611, "lr": 7.369720936140611e-08, "epoch": 1.766589861751152, "percentage": 88.33, "elapsed_time": "10:47:04", "remaining_time": "1:25:29"} +{"current_steps": 7668, "total_steps": 8680, "loss": 0.7866288423538208, "lr": 7.355372951794614e-08, "epoch": 1.766820276497696, "percentage": 88.34, "elapsed_time": "10:47:10", "remaining_time": "1:25:24"} +{"current_steps": 7669, "total_steps": 8680, "loss": 0.8096400499343872, "lr": 7.341038414693613e-08, "epoch": 1.7670506912442396, "percentage": 88.35, "elapsed_time": "10:47:15", "remaining_time": "1:25:19"} +{"current_steps": 7670, "total_steps": 8680, "loss": 0.7799873352050781, "lr": 7.326717326918208e-08, "epoch": 1.7672811059907834, "percentage": 88.36, "elapsed_time": "10:47:21", "remaining_time": "1:25:14"} +{"current_steps": 7671, "total_steps": 8680, "loss": 0.809285044670105, "lr": 7.312409690547095e-08, "epoch": 1.7675115207373273, "percentage": 88.38, "elapsed_time": "10:47:26", "remaining_time": "1:25:09"} +{"current_steps": 7672, "total_steps": 8680, "loss": 0.874248743057251, "lr": 7.298115507657021e-08, "epoch": 1.7677419354838708, "percentage": 88.39, "elapsed_time": "10:47:31", "remaining_time": "1:25:04"} +{"current_steps": 7673, "total_steps": 8680, "loss": 0.7418022155761719, "lr": 7.283834780322761e-08, "epoch": 1.7679723502304148, "percentage": 88.4, "elapsed_time": "10:47:36", "remaining_time": "1:24:59"} +{"current_steps": 7674, "total_steps": 8680, "loss": 0.720660388469696, "lr": 7.269567510617126e-08, "epoch": 1.7682027649769585, "percentage": 88.41, "elapsed_time": "10:47:41", "remaining_time": "1:24:54"} +{"current_steps": 7675, "total_steps": 8680, "loss": 0.7655429840087891, "lr": 7.255313700611032e-08, "epoch": 1.7684331797235022, "percentage": 88.42, "elapsed_time": "10:47:48", "remaining_time": "1:24:49"} +{"current_steps": 7676, "total_steps": 8680, "loss": 0.7303705215454102, "lr": 7.241073352373361e-08, "epoch": 1.7686635944700462, "percentage": 88.43, "elapsed_time": "10:47:53", "remaining_time": "1:24:44"} +{"current_steps": 7677, "total_steps": 8680, "loss": 0.7997909188270569, "lr": 7.226846467971093e-08, "epoch": 1.76889400921659, "percentage": 88.44, "elapsed_time": "10:47:59", "remaining_time": "1:24:39"} +{"current_steps": 7678, "total_steps": 8680, "loss": 0.6546763181686401, "lr": 7.212633049469264e-08, "epoch": 1.7691244239631336, "percentage": 88.46, "elapsed_time": "10:48:07", "remaining_time": "1:24:34"} +{"current_steps": 7679, "total_steps": 8680, "loss": 0.6374444961547852, "lr": 7.1984330989309e-08, "epoch": 1.7693548387096776, "percentage": 88.47, "elapsed_time": "10:48:14", "remaining_time": "1:24:30"} +{"current_steps": 7680, "total_steps": 8680, "loss": 0.7092937231063843, "lr": 7.184246618417111e-08, "epoch": 1.769585253456221, "percentage": 88.48, "elapsed_time": "10:48:19", "remaining_time": "1:24:25"} +{"current_steps": 7681, "total_steps": 8680, "loss": 0.7702305316925049, "lr": 7.17007360998706e-08, "epoch": 1.769815668202765, "percentage": 88.49, "elapsed_time": "10:48:26", "remaining_time": "1:24:20"} +{"current_steps": 7682, "total_steps": 8680, "loss": 0.7763724327087402, "lr": 7.155914075697933e-08, "epoch": 1.7700460829493088, "percentage": 88.5, "elapsed_time": "10:48:33", "remaining_time": "1:24:15"} +{"current_steps": 7683, "total_steps": 8680, "loss": 0.6409577131271362, "lr": 7.141768017604966e-08, "epoch": 1.7702764976958525, "percentage": 88.51, "elapsed_time": "10:48:37", "remaining_time": "1:24:10"} +{"current_steps": 7684, "total_steps": 8680, "loss": 0.7500795125961304, "lr": 7.127635437761459e-08, "epoch": 1.7705069124423964, "percentage": 88.53, "elapsed_time": "10:48:43", "remaining_time": "1:24:05"} +{"current_steps": 7685, "total_steps": 8680, "loss": 0.7312004566192627, "lr": 7.113516338218717e-08, "epoch": 1.77073732718894, "percentage": 88.54, "elapsed_time": "10:48:48", "remaining_time": "1:24:00"} +{"current_steps": 7686, "total_steps": 8680, "loss": 0.823514997959137, "lr": 7.099410721026112e-08, "epoch": 1.770967741935484, "percentage": 88.55, "elapsed_time": "10:48:54", "remaining_time": "1:23:55"} +{"current_steps": 7687, "total_steps": 8680, "loss": 0.9504063129425049, "lr": 7.085318588231048e-08, "epoch": 1.7711981566820276, "percentage": 88.56, "elapsed_time": "10:48:58", "remaining_time": "1:23:50"} +{"current_steps": 7688, "total_steps": 8680, "loss": 0.7850733399391174, "lr": 7.071239941878981e-08, "epoch": 1.7714285714285714, "percentage": 88.57, "elapsed_time": "10:49:04", "remaining_time": "1:23:45"} +{"current_steps": 7689, "total_steps": 8680, "loss": 0.9447094798088074, "lr": 7.057174784013431e-08, "epoch": 1.7716589861751153, "percentage": 88.58, "elapsed_time": "10:49:09", "remaining_time": "1:23:40"} +{"current_steps": 7690, "total_steps": 8680, "loss": 0.6675062775611877, "lr": 7.04312311667592e-08, "epoch": 1.771889400921659, "percentage": 88.59, "elapsed_time": "10:49:15", "remaining_time": "1:23:35"} +{"current_steps": 7691, "total_steps": 8680, "loss": 0.6875232458114624, "lr": 7.029084941906005e-08, "epoch": 1.7721198156682028, "percentage": 88.61, "elapsed_time": "10:49:21", "remaining_time": "1:23:30"} +{"current_steps": 7692, "total_steps": 8680, "loss": 0.7847919464111328, "lr": 7.015060261741357e-08, "epoch": 1.7723502304147467, "percentage": 88.62, "elapsed_time": "10:49:27", "remaining_time": "1:23:25"} +{"current_steps": 7693, "total_steps": 8680, "loss": 0.7924584150314331, "lr": 7.001049078217613e-08, "epoch": 1.7725806451612902, "percentage": 88.63, "elapsed_time": "10:49:32", "remaining_time": "1:23:20"} +{"current_steps": 7694, "total_steps": 8680, "loss": 0.8802344799041748, "lr": 6.987051393368471e-08, "epoch": 1.7728110599078342, "percentage": 88.64, "elapsed_time": "10:49:36", "remaining_time": "1:23:14"} +{"current_steps": 7695, "total_steps": 8680, "loss": 0.7038631439208984, "lr": 6.973067209225692e-08, "epoch": 1.773041474654378, "percentage": 88.65, "elapsed_time": "10:49:40", "remaining_time": "1:23:09"} +{"current_steps": 7696, "total_steps": 8680, "loss": 0.9016700387001038, "lr": 6.959096527819064e-08, "epoch": 1.7732718894009216, "percentage": 88.66, "elapsed_time": "10:49:45", "remaining_time": "1:23:04"} +{"current_steps": 7697, "total_steps": 8680, "loss": 0.7678165435791016, "lr": 6.945139351176387e-08, "epoch": 1.7735023041474656, "percentage": 88.68, "elapsed_time": "10:49:49", "remaining_time": "1:22:59"} +{"current_steps": 7698, "total_steps": 8680, "loss": 0.6612143516540527, "lr": 6.931195681323565e-08, "epoch": 1.773732718894009, "percentage": 88.69, "elapsed_time": "10:49:54", "remaining_time": "1:22:54"} +{"current_steps": 7699, "total_steps": 8680, "loss": 0.840233325958252, "lr": 6.917265520284476e-08, "epoch": 1.773963133640553, "percentage": 88.7, "elapsed_time": "10:49:58", "remaining_time": "1:22:49"} +{"current_steps": 7700, "total_steps": 8680, "loss": 0.7913506031036377, "lr": 6.90334887008106e-08, "epoch": 1.7741935483870968, "percentage": 88.71, "elapsed_time": "10:50:04", "remaining_time": "1:22:44"} +{"current_steps": 7701, "total_steps": 8680, "loss": 0.7523634433746338, "lr": 6.889445732733323e-08, "epoch": 1.7744239631336405, "percentage": 88.72, "elapsed_time": "10:50:12", "remaining_time": "1:22:39"} +{"current_steps": 7702, "total_steps": 8680, "loss": 0.7009792327880859, "lr": 6.875556110259273e-08, "epoch": 1.7746543778801844, "percentage": 88.73, "elapsed_time": "10:50:18", "remaining_time": "1:22:34"} +{"current_steps": 7703, "total_steps": 8680, "loss": 0.6533738970756531, "lr": 6.861680004674963e-08, "epoch": 1.7748847926267282, "percentage": 88.74, "elapsed_time": "10:50:24", "remaining_time": "1:22:29"} +{"current_steps": 7704, "total_steps": 8680, "loss": 0.860493540763855, "lr": 6.847817417994517e-08, "epoch": 1.7751152073732719, "percentage": 88.76, "elapsed_time": "10:50:29", "remaining_time": "1:22:24"} +{"current_steps": 7705, "total_steps": 8680, "loss": 0.810010552406311, "lr": 6.833968352230057e-08, "epoch": 1.7753456221198156, "percentage": 88.77, "elapsed_time": "10:50:35", "remaining_time": "1:22:19"} +{"current_steps": 7706, "total_steps": 8680, "loss": 0.8443198204040527, "lr": 6.820132809391743e-08, "epoch": 1.7755760368663593, "percentage": 88.78, "elapsed_time": "10:50:41", "remaining_time": "1:22:14"} +{"current_steps": 7707, "total_steps": 8680, "loss": 0.758772611618042, "lr": 6.806310791487813e-08, "epoch": 1.7758064516129033, "percentage": 88.79, "elapsed_time": "10:50:46", "remaining_time": "1:22:09"} +{"current_steps": 7708, "total_steps": 8680, "loss": 0.8438040614128113, "lr": 6.792502300524472e-08, "epoch": 1.776036866359447, "percentage": 88.8, "elapsed_time": "10:50:51", "remaining_time": "1:22:04"} +{"current_steps": 7709, "total_steps": 8680, "loss": 0.7727431058883667, "lr": 6.778707338506051e-08, "epoch": 1.7762672811059907, "percentage": 88.81, "elapsed_time": "10:50:55", "remaining_time": "1:21:59"} +{"current_steps": 7710, "total_steps": 8680, "loss": 0.8118282556533813, "lr": 6.764925907434849e-08, "epoch": 1.7764976958525347, "percentage": 88.82, "elapsed_time": "10:51:00", "remaining_time": "1:21:54"} +{"current_steps": 7711, "total_steps": 8680, "loss": 0.7667281627655029, "lr": 6.75115800931122e-08, "epoch": 1.7767281105990782, "percentage": 88.84, "elapsed_time": "10:51:05", "remaining_time": "1:21:49"} +{"current_steps": 7712, "total_steps": 8680, "loss": 0.7824913263320923, "lr": 6.737403646133566e-08, "epoch": 1.7769585253456222, "percentage": 88.85, "elapsed_time": "10:51:11", "remaining_time": "1:21:44"} +{"current_steps": 7713, "total_steps": 8680, "loss": 0.7318419218063354, "lr": 6.723662819898312e-08, "epoch": 1.7771889400921659, "percentage": 88.86, "elapsed_time": "10:51:15", "remaining_time": "1:21:39"} +{"current_steps": 7714, "total_steps": 8680, "loss": 0.7060009241104126, "lr": 6.709935532599897e-08, "epoch": 1.7774193548387096, "percentage": 88.87, "elapsed_time": "10:51:20", "remaining_time": "1:21:33"} +{"current_steps": 7715, "total_steps": 8680, "loss": 0.7367588877677917, "lr": 6.69622178623086e-08, "epoch": 1.7776497695852536, "percentage": 88.88, "elapsed_time": "10:51:25", "remaining_time": "1:21:28"} +{"current_steps": 7716, "total_steps": 8680, "loss": 0.7340742349624634, "lr": 6.682521582781708e-08, "epoch": 1.7778801843317973, "percentage": 88.89, "elapsed_time": "10:51:30", "remaining_time": "1:21:23"} +{"current_steps": 7717, "total_steps": 8680, "loss": 0.6655991077423096, "lr": 6.668834924240995e-08, "epoch": 1.778110599078341, "percentage": 88.91, "elapsed_time": "10:51:37", "remaining_time": "1:21:18"} +{"current_steps": 7718, "total_steps": 8680, "loss": 0.7562434673309326, "lr": 6.655161812595367e-08, "epoch": 1.7783410138248847, "percentage": 88.92, "elapsed_time": "10:51:43", "remaining_time": "1:21:13"} +{"current_steps": 7719, "total_steps": 8680, "loss": 0.8078730702400208, "lr": 6.641502249829423e-08, "epoch": 1.7785714285714285, "percentage": 88.93, "elapsed_time": "10:51:48", "remaining_time": "1:21:08"} +{"current_steps": 7720, "total_steps": 8680, "loss": 0.6285899877548218, "lr": 6.627856237925811e-08, "epoch": 1.7788018433179724, "percentage": 88.94, "elapsed_time": "10:51:53", "remaining_time": "1:21:03"} +{"current_steps": 7721, "total_steps": 8680, "loss": 0.6633951663970947, "lr": 6.61422377886528e-08, "epoch": 1.7790322580645161, "percentage": 88.95, "elapsed_time": "10:51:59", "remaining_time": "1:20:58"} +{"current_steps": 7722, "total_steps": 8680, "loss": 0.7273050546646118, "lr": 6.600604874626548e-08, "epoch": 1.7792626728110599, "percentage": 88.96, "elapsed_time": "10:52:04", "remaining_time": "1:20:53"} +{"current_steps": 7723, "total_steps": 8680, "loss": 0.6665729284286499, "lr": 6.586999527186354e-08, "epoch": 1.7794930875576038, "percentage": 88.97, "elapsed_time": "10:52:10", "remaining_time": "1:20:48"} +{"current_steps": 7724, "total_steps": 8680, "loss": 0.7332675457000732, "lr": 6.573407738519531e-08, "epoch": 1.7797235023041473, "percentage": 88.99, "elapsed_time": "10:52:16", "remaining_time": "1:20:43"} +{"current_steps": 7725, "total_steps": 8680, "loss": 0.7439071536064148, "lr": 6.559829510598892e-08, "epoch": 1.7799539170506913, "percentage": 89.0, "elapsed_time": "10:52:22", "remaining_time": "1:20:38"} +{"current_steps": 7726, "total_steps": 8680, "loss": 0.7104752063751221, "lr": 6.546264845395299e-08, "epoch": 1.780184331797235, "percentage": 89.01, "elapsed_time": "10:52:28", "remaining_time": "1:20:33"} +{"current_steps": 7727, "total_steps": 8680, "loss": 0.7792220115661621, "lr": 6.53271374487765e-08, "epoch": 1.7804147465437787, "percentage": 89.02, "elapsed_time": "10:52:34", "remaining_time": "1:20:29"} +{"current_steps": 7728, "total_steps": 8680, "loss": 0.6379693746566772, "lr": 6.519176211012867e-08, "epoch": 1.7806451612903227, "percentage": 89.03, "elapsed_time": "10:52:39", "remaining_time": "1:20:24"} +{"current_steps": 7729, "total_steps": 8680, "loss": 0.7737444639205933, "lr": 6.505652245765881e-08, "epoch": 1.7808755760368664, "percentage": 89.04, "elapsed_time": "10:52:44", "remaining_time": "1:20:18"} +{"current_steps": 7730, "total_steps": 8680, "loss": 0.7681130170822144, "lr": 6.49214185109973e-08, "epoch": 1.7811059907834101, "percentage": 89.06, "elapsed_time": "10:52:49", "remaining_time": "1:20:13"} +{"current_steps": 7731, "total_steps": 8680, "loss": 0.8718420267105103, "lr": 6.478645028975372e-08, "epoch": 1.7813364055299539, "percentage": 89.07, "elapsed_time": "10:52:55", "remaining_time": "1:20:08"} +{"current_steps": 7732, "total_steps": 8680, "loss": 0.7557366490364075, "lr": 6.465161781351914e-08, "epoch": 1.7815668202764976, "percentage": 89.08, "elapsed_time": "10:53:01", "remaining_time": "1:20:03"} +{"current_steps": 7733, "total_steps": 8680, "loss": 0.6794936656951904, "lr": 6.45169211018638e-08, "epoch": 1.7817972350230415, "percentage": 89.09, "elapsed_time": "10:53:06", "remaining_time": "1:19:58"} +{"current_steps": 7734, "total_steps": 8680, "loss": 0.8390437364578247, "lr": 6.438236017433895e-08, "epoch": 1.7820276497695853, "percentage": 89.1, "elapsed_time": "10:53:11", "remaining_time": "1:19:53"} +{"current_steps": 7735, "total_steps": 8680, "loss": 0.8024254441261292, "lr": 6.424793505047599e-08, "epoch": 1.782258064516129, "percentage": 89.11, "elapsed_time": "10:53:17", "remaining_time": "1:19:48"} +{"current_steps": 7736, "total_steps": 8680, "loss": 0.6382162570953369, "lr": 6.411364574978651e-08, "epoch": 1.782488479262673, "percentage": 89.12, "elapsed_time": "10:53:23", "remaining_time": "1:19:43"} +{"current_steps": 7737, "total_steps": 8680, "loss": 0.6832011938095093, "lr": 6.397949229176225e-08, "epoch": 1.7827188940092165, "percentage": 89.14, "elapsed_time": "10:53:29", "remaining_time": "1:19:38"} +{"current_steps": 7738, "total_steps": 8680, "loss": 0.9003958702087402, "lr": 6.384547469587564e-08, "epoch": 1.7829493087557604, "percentage": 89.15, "elapsed_time": "10:53:33", "remaining_time": "1:19:33"} +{"current_steps": 7739, "total_steps": 8680, "loss": 0.7030328512191772, "lr": 6.371159298157913e-08, "epoch": 1.7831797235023041, "percentage": 89.16, "elapsed_time": "10:53:38", "remaining_time": "1:19:28"} +{"current_steps": 7740, "total_steps": 8680, "loss": 0.8153259754180908, "lr": 6.357784716830528e-08, "epoch": 1.7834101382488479, "percentage": 89.17, "elapsed_time": "10:53:41", "remaining_time": "1:19:23"} +{"current_steps": 7741, "total_steps": 8680, "loss": 0.8229082226753235, "lr": 6.344423727546744e-08, "epoch": 1.7836405529953918, "percentage": 89.18, "elapsed_time": "10:53:46", "remaining_time": "1:19:18"} +{"current_steps": 7742, "total_steps": 8680, "loss": 0.7306294441223145, "lr": 6.331076332245866e-08, "epoch": 1.7838709677419353, "percentage": 89.19, "elapsed_time": "10:53:51", "remaining_time": "1:19:13"} +{"current_steps": 7743, "total_steps": 8680, "loss": 0.7613078951835632, "lr": 6.317742532865233e-08, "epoch": 1.7841013824884793, "percentage": 89.21, "elapsed_time": "10:53:55", "remaining_time": "1:19:08"} +{"current_steps": 7744, "total_steps": 8680, "loss": 0.9164611101150513, "lr": 6.304422331340275e-08, "epoch": 1.784331797235023, "percentage": 89.22, "elapsed_time": "10:54:01", "remaining_time": "1:19:03"} +{"current_steps": 7745, "total_steps": 8680, "loss": 0.8770956993103027, "lr": 6.29111572960439e-08, "epoch": 1.7845622119815667, "percentage": 89.23, "elapsed_time": "10:54:05", "remaining_time": "1:18:57"} +{"current_steps": 7746, "total_steps": 8680, "loss": 0.7482821941375732, "lr": 6.277822729588989e-08, "epoch": 1.7847926267281107, "percentage": 89.24, "elapsed_time": "10:54:11", "remaining_time": "1:18:52"} +{"current_steps": 7747, "total_steps": 8680, "loss": 0.7850298881530762, "lr": 6.264543333223549e-08, "epoch": 1.7850230414746544, "percentage": 89.25, "elapsed_time": "10:54:16", "remaining_time": "1:18:47"} +{"current_steps": 7748, "total_steps": 8680, "loss": 0.5781385898590088, "lr": 6.251277542435552e-08, "epoch": 1.7852534562211981, "percentage": 89.26, "elapsed_time": "10:54:22", "remaining_time": "1:18:42"} +{"current_steps": 7749, "total_steps": 8680, "loss": 0.8217513561248779, "lr": 6.238025359150501e-08, "epoch": 1.785483870967742, "percentage": 89.27, "elapsed_time": "10:54:26", "remaining_time": "1:18:37"} +{"current_steps": 7750, "total_steps": 8680, "loss": 0.8500482439994812, "lr": 6.224786785291969e-08, "epoch": 1.7857142857142856, "percentage": 89.29, "elapsed_time": "10:54:31", "remaining_time": "1:18:32"} +{"current_steps": 7751, "total_steps": 8680, "loss": 0.8146470785140991, "lr": 6.211561822781474e-08, "epoch": 1.7859447004608295, "percentage": 89.3, "elapsed_time": "10:54:35", "remaining_time": "1:18:27"} +{"current_steps": 7752, "total_steps": 8680, "loss": 0.7351702451705933, "lr": 6.198350473538616e-08, "epoch": 1.7861751152073733, "percentage": 89.31, "elapsed_time": "10:54:41", "remaining_time": "1:18:22"} +{"current_steps": 7753, "total_steps": 8680, "loss": 0.7993056774139404, "lr": 6.185152739481026e-08, "epoch": 1.786405529953917, "percentage": 89.32, "elapsed_time": "10:54:46", "remaining_time": "1:18:17"} +{"current_steps": 7754, "total_steps": 8680, "loss": 0.8570160865783691, "lr": 6.171968622524315e-08, "epoch": 1.786635944700461, "percentage": 89.33, "elapsed_time": "10:54:50", "remaining_time": "1:18:12"} +{"current_steps": 7755, "total_steps": 8680, "loss": 0.6200212836265564, "lr": 6.158798124582143e-08, "epoch": 1.7868663594470044, "percentage": 89.34, "elapsed_time": "10:54:56", "remaining_time": "1:18:07"} +{"current_steps": 7756, "total_steps": 8680, "loss": 0.8196465373039246, "lr": 6.145641247566202e-08, "epoch": 1.7870967741935484, "percentage": 89.35, "elapsed_time": "10:55:02", "remaining_time": "1:18:02"} +{"current_steps": 7757, "total_steps": 8680, "loss": 0.7038032412528992, "lr": 6.132497993386165e-08, "epoch": 1.7873271889400921, "percentage": 89.37, "elapsed_time": "10:55:09", "remaining_time": "1:17:57"} +{"current_steps": 7758, "total_steps": 8680, "loss": 0.7222307324409485, "lr": 6.119368363949806e-08, "epoch": 1.7875576036866359, "percentage": 89.38, "elapsed_time": "10:55:14", "remaining_time": "1:17:52"} +{"current_steps": 7759, "total_steps": 8680, "loss": 0.8457501530647278, "lr": 6.106252361162834e-08, "epoch": 1.7877880184331798, "percentage": 89.39, "elapsed_time": "10:55:18", "remaining_time": "1:17:47"} +{"current_steps": 7760, "total_steps": 8680, "loss": 0.7543236017227173, "lr": 6.093149986929025e-08, "epoch": 1.7880184331797235, "percentage": 89.4, "elapsed_time": "10:55:23", "remaining_time": "1:17:42"} +{"current_steps": 7761, "total_steps": 8680, "loss": 0.5728875398635864, "lr": 6.080061243150191e-08, "epoch": 1.7882488479262673, "percentage": 89.41, "elapsed_time": "10:55:30", "remaining_time": "1:17:37"} +{"current_steps": 7762, "total_steps": 8680, "loss": 0.6864895820617676, "lr": 6.066986131726138e-08, "epoch": 1.7884792626728112, "percentage": 89.42, "elapsed_time": "10:55:36", "remaining_time": "1:17:32"} +{"current_steps": 7763, "total_steps": 8680, "loss": 0.8580472469329834, "lr": 6.053924654554687e-08, "epoch": 1.7887096774193547, "percentage": 89.44, "elapsed_time": "10:55:42", "remaining_time": "1:17:27"} +{"current_steps": 7764, "total_steps": 8680, "loss": 0.7670924663543701, "lr": 6.040876813531714e-08, "epoch": 1.7889400921658987, "percentage": 89.45, "elapsed_time": "10:55:47", "remaining_time": "1:17:22"} +{"current_steps": 7765, "total_steps": 8680, "loss": 0.6558287739753723, "lr": 6.027842610551082e-08, "epoch": 1.7891705069124424, "percentage": 89.46, "elapsed_time": "10:55:52", "remaining_time": "1:17:17"} +{"current_steps": 7766, "total_steps": 8680, "loss": 0.8186839818954468, "lr": 6.014822047504697e-08, "epoch": 1.7894009216589861, "percentage": 89.47, "elapsed_time": "10:55:57", "remaining_time": "1:17:12"} +{"current_steps": 7767, "total_steps": 8680, "loss": 0.7862167358398438, "lr": 6.001815126282462e-08, "epoch": 1.78963133640553, "percentage": 89.48, "elapsed_time": "10:56:02", "remaining_time": "1:17:07"} +{"current_steps": 7768, "total_steps": 8680, "loss": 0.8594048023223877, "lr": 5.98882184877233e-08, "epoch": 1.7898617511520736, "percentage": 89.49, "elapsed_time": "10:56:07", "remaining_time": "1:17:01"} +{"current_steps": 7769, "total_steps": 8680, "loss": 0.804019033908844, "lr": 5.975842216860238e-08, "epoch": 1.7900921658986175, "percentage": 89.5, "elapsed_time": "10:56:12", "remaining_time": "1:16:56"} +{"current_steps": 7770, "total_steps": 8680, "loss": 0.7404098510742188, "lr": 5.962876232430192e-08, "epoch": 1.7903225806451613, "percentage": 89.52, "elapsed_time": "10:56:18", "remaining_time": "1:16:51"} +{"current_steps": 7771, "total_steps": 8680, "loss": 0.7726024389266968, "lr": 5.949923897364173e-08, "epoch": 1.790552995391705, "percentage": 89.53, "elapsed_time": "10:56:24", "remaining_time": "1:16:46"} +{"current_steps": 7772, "total_steps": 8680, "loss": 0.8225048184394836, "lr": 5.936985213542178e-08, "epoch": 1.790783410138249, "percentage": 89.54, "elapsed_time": "10:56:30", "remaining_time": "1:16:41"} +{"current_steps": 7773, "total_steps": 8680, "loss": 0.8485706448554993, "lr": 5.924060182842272e-08, "epoch": 1.7910138248847927, "percentage": 89.55, "elapsed_time": "10:56:34", "remaining_time": "1:16:36"} +{"current_steps": 7774, "total_steps": 8680, "loss": 0.6580322980880737, "lr": 5.9111488071404867e-08, "epoch": 1.7912442396313364, "percentage": 89.56, "elapsed_time": "10:56:40", "remaining_time": "1:16:31"} +{"current_steps": 7775, "total_steps": 8680, "loss": 0.7486656904220581, "lr": 5.898251088310879e-08, "epoch": 1.7914746543778803, "percentage": 89.57, "elapsed_time": "10:56:45", "remaining_time": "1:16:26"} +{"current_steps": 7776, "total_steps": 8680, "loss": 0.9068334102630615, "lr": 5.885367028225574e-08, "epoch": 1.7917050691244238, "percentage": 89.59, "elapsed_time": "10:56:49", "remaining_time": "1:16:21"} +{"current_steps": 7777, "total_steps": 8680, "loss": 0.7091449499130249, "lr": 5.872496628754653e-08, "epoch": 1.7919354838709678, "percentage": 89.6, "elapsed_time": "10:56:55", "remaining_time": "1:16:16"} +{"current_steps": 7778, "total_steps": 8680, "loss": 0.7248316407203674, "lr": 5.8596398917662107e-08, "epoch": 1.7921658986175115, "percentage": 89.61, "elapsed_time": "10:57:00", "remaining_time": "1:16:11"} +{"current_steps": 7779, "total_steps": 8680, "loss": 0.7740335464477539, "lr": 5.8467968191264315e-08, "epoch": 1.7923963133640552, "percentage": 89.62, "elapsed_time": "10:57:05", "remaining_time": "1:16:06"} +{"current_steps": 7780, "total_steps": 8680, "loss": 0.7810479402542114, "lr": 5.833967412699448e-08, "epoch": 1.7926267281105992, "percentage": 89.63, "elapsed_time": "10:57:11", "remaining_time": "1:16:01"} +{"current_steps": 7781, "total_steps": 8680, "loss": 0.7072443962097168, "lr": 5.821151674347435e-08, "epoch": 1.7928571428571427, "percentage": 89.64, "elapsed_time": "10:57:16", "remaining_time": "1:15:56"} +{"current_steps": 7782, "total_steps": 8680, "loss": 0.8218289613723755, "lr": 5.808349605930585e-08, "epoch": 1.7930875576036867, "percentage": 89.65, "elapsed_time": "10:57:20", "remaining_time": "1:15:51"} +{"current_steps": 7783, "total_steps": 8680, "loss": 0.8928433656692505, "lr": 5.795561209307087e-08, "epoch": 1.7933179723502304, "percentage": 89.67, "elapsed_time": "10:57:25", "remaining_time": "1:15:46"} +{"current_steps": 7784, "total_steps": 8680, "loss": 0.765188455581665, "lr": 5.7827864863331796e-08, "epoch": 1.793548387096774, "percentage": 89.68, "elapsed_time": "10:57:30", "remaining_time": "1:15:41"} +{"current_steps": 7785, "total_steps": 8680, "loss": 0.7149494886398315, "lr": 5.7700254388630795e-08, "epoch": 1.793778801843318, "percentage": 89.69, "elapsed_time": "10:57:36", "remaining_time": "1:15:36"} +{"current_steps": 7786, "total_steps": 8680, "loss": 0.8144164085388184, "lr": 5.75727806874905e-08, "epoch": 1.7940092165898618, "percentage": 89.7, "elapsed_time": "10:57:42", "remaining_time": "1:15:31"} +{"current_steps": 7787, "total_steps": 8680, "loss": 0.7549517154693604, "lr": 5.744544377841354e-08, "epoch": 1.7942396313364055, "percentage": 89.71, "elapsed_time": "10:57:48", "remaining_time": "1:15:26"} +{"current_steps": 7788, "total_steps": 8680, "loss": 0.7820652723312378, "lr": 5.731824367988258e-08, "epoch": 1.7944700460829495, "percentage": 89.72, "elapsed_time": "10:57:55", "remaining_time": "1:15:21"} +{"current_steps": 7789, "total_steps": 8680, "loss": 0.8253183364868164, "lr": 5.719118041036042e-08, "epoch": 1.794700460829493, "percentage": 89.74, "elapsed_time": "10:57:59", "remaining_time": "1:15:16"} +{"current_steps": 7790, "total_steps": 8680, "loss": 0.8177148103713989, "lr": 5.70642539882904e-08, "epoch": 1.794930875576037, "percentage": 89.75, "elapsed_time": "10:58:04", "remaining_time": "1:15:11"} +{"current_steps": 7791, "total_steps": 8680, "loss": 0.722260594367981, "lr": 5.69374644320958e-08, "epoch": 1.7951612903225806, "percentage": 89.76, "elapsed_time": "10:58:09", "remaining_time": "1:15:05"} +{"current_steps": 7792, "total_steps": 8680, "loss": 0.8128643035888672, "lr": 5.6810811760179434e-08, "epoch": 1.7953917050691244, "percentage": 89.77, "elapsed_time": "10:58:15", "remaining_time": "1:15:01"} +{"current_steps": 7793, "total_steps": 8680, "loss": 0.8267233371734619, "lr": 5.6684295990925394e-08, "epoch": 1.7956221198156683, "percentage": 89.78, "elapsed_time": "10:58:19", "remaining_time": "1:14:55"} +{"current_steps": 7794, "total_steps": 8680, "loss": 0.8385082483291626, "lr": 5.655791714269697e-08, "epoch": 1.7958525345622118, "percentage": 89.79, "elapsed_time": "10:58:24", "remaining_time": "1:14:50"} +{"current_steps": 7795, "total_steps": 8680, "loss": 0.8705167770385742, "lr": 5.643167523383785e-08, "epoch": 1.7960829493087558, "percentage": 89.8, "elapsed_time": "10:58:29", "remaining_time": "1:14:45"} +{"current_steps": 7796, "total_steps": 8680, "loss": 0.7628496885299683, "lr": 5.6305570282672024e-08, "epoch": 1.7963133640552995, "percentage": 89.82, "elapsed_time": "10:58:35", "remaining_time": "1:14:40"} +{"current_steps": 7797, "total_steps": 8680, "loss": 0.8246536254882812, "lr": 5.61796023075034e-08, "epoch": 1.7965437788018432, "percentage": 89.83, "elapsed_time": "10:58:40", "remaining_time": "1:14:35"} +{"current_steps": 7798, "total_steps": 8680, "loss": 0.7103257179260254, "lr": 5.6053771326615815e-08, "epoch": 1.7967741935483872, "percentage": 89.84, "elapsed_time": "10:58:45", "remaining_time": "1:14:30"} +{"current_steps": 7799, "total_steps": 8680, "loss": 0.614989161491394, "lr": 5.5928077358273984e-08, "epoch": 1.797004608294931, "percentage": 89.85, "elapsed_time": "10:58:51", "remaining_time": "1:14:25"} +{"current_steps": 7800, "total_steps": 8680, "loss": 0.9876137971878052, "lr": 5.5802520420721866e-08, "epoch": 1.7972350230414746, "percentage": 89.86, "elapsed_time": "10:58:55", "remaining_time": "1:14:20"} +{"current_steps": 7801, "total_steps": 8680, "loss": 0.7023773193359375, "lr": 5.5677100532183775e-08, "epoch": 1.7974654377880186, "percentage": 89.87, "elapsed_time": "10:59:03", "remaining_time": "1:14:15"} +{"current_steps": 7802, "total_steps": 8680, "loss": 0.6680843830108643, "lr": 5.555181771086459e-08, "epoch": 1.797695852534562, "percentage": 89.88, "elapsed_time": "10:59:07", "remaining_time": "1:14:10"} +{"current_steps": 7803, "total_steps": 8680, "loss": 0.7221776843070984, "lr": 5.542667197494877e-08, "epoch": 1.797926267281106, "percentage": 89.9, "elapsed_time": "10:59:13", "remaining_time": "1:14:05"} +{"current_steps": 7804, "total_steps": 8680, "loss": 0.7473262548446655, "lr": 5.5301663342601e-08, "epoch": 1.7981566820276498, "percentage": 89.91, "elapsed_time": "10:59:18", "remaining_time": "1:14:00"} +{"current_steps": 7805, "total_steps": 8680, "loss": 0.8690468072891235, "lr": 5.517679183196622e-08, "epoch": 1.7983870967741935, "percentage": 89.92, "elapsed_time": "10:59:23", "remaining_time": "1:13:55"} +{"current_steps": 7806, "total_steps": 8680, "loss": 0.8353981971740723, "lr": 5.505205746116937e-08, "epoch": 1.7986175115207375, "percentage": 89.93, "elapsed_time": "10:59:28", "remaining_time": "1:13:50"} +{"current_steps": 7807, "total_steps": 8680, "loss": 0.7691711187362671, "lr": 5.4927460248315405e-08, "epoch": 1.798847926267281, "percentage": 89.94, "elapsed_time": "10:59:33", "remaining_time": "1:13:45"} +{"current_steps": 7808, "total_steps": 8680, "loss": 0.6732556819915771, "lr": 5.480300021148953e-08, "epoch": 1.799078341013825, "percentage": 89.95, "elapsed_time": "10:59:39", "remaining_time": "1:13:40"} +{"current_steps": 7809, "total_steps": 8680, "loss": 0.7273567914962769, "lr": 5.467867736875664e-08, "epoch": 1.7993087557603686, "percentage": 89.97, "elapsed_time": "10:59:43", "remaining_time": "1:13:35"} +{"current_steps": 7810, "total_steps": 8680, "loss": 0.7951864004135132, "lr": 5.455449173816251e-08, "epoch": 1.7995391705069124, "percentage": 89.98, "elapsed_time": "10:59:48", "remaining_time": "1:13:29"} +{"current_steps": 7811, "total_steps": 8680, "loss": 0.7073169350624084, "lr": 5.4430443337732276e-08, "epoch": 1.7997695852534563, "percentage": 89.99, "elapsed_time": "10:59:53", "remaining_time": "1:13:24"} +{"current_steps": 7812, "total_steps": 8680, "loss": 0.682072639465332, "lr": 5.430653218547132e-08, "epoch": 1.8, "percentage": 90.0, "elapsed_time": "10:59:57", "remaining_time": "1:13:19"} +{"current_steps": 7813, "total_steps": 8680, "loss": 0.7512049674987793, "lr": 5.4182758299365364e-08, "epoch": 1.8002304147465438, "percentage": 90.01, "elapsed_time": "11:00:04", "remaining_time": "1:13:14"} +{"current_steps": 7814, "total_steps": 8680, "loss": 0.7470980882644653, "lr": 5.405912169738003e-08, "epoch": 1.8004608294930877, "percentage": 90.02, "elapsed_time": "11:00:10", "remaining_time": "1:13:09"} +{"current_steps": 7815, "total_steps": 8680, "loss": 0.792417049407959, "lr": 5.3935622397460634e-08, "epoch": 1.8006912442396312, "percentage": 90.03, "elapsed_time": "11:00:14", "remaining_time": "1:13:04"} +{"current_steps": 7816, "total_steps": 8680, "loss": 0.8600934743881226, "lr": 5.3812260417533505e-08, "epoch": 1.8009216589861752, "percentage": 90.05, "elapsed_time": "11:00:19", "remaining_time": "1:12:59"} +{"current_steps": 7817, "total_steps": 8680, "loss": 0.6931058168411255, "lr": 5.36890357755041e-08, "epoch": 1.801152073732719, "percentage": 90.06, "elapsed_time": "11:00:25", "remaining_time": "1:12:54"} +{"current_steps": 7818, "total_steps": 8680, "loss": 0.7382420897483826, "lr": 5.3565948489258216e-08, "epoch": 1.8013824884792626, "percentage": 90.07, "elapsed_time": "11:00:31", "remaining_time": "1:12:49"} +{"current_steps": 7819, "total_steps": 8680, "loss": 0.6811971068382263, "lr": 5.344299857666224e-08, "epoch": 1.8016129032258066, "percentage": 90.08, "elapsed_time": "11:00:36", "remaining_time": "1:12:44"} +{"current_steps": 7820, "total_steps": 8680, "loss": 0.8551425933837891, "lr": 5.332018605556188e-08, "epoch": 1.80184331797235, "percentage": 90.09, "elapsed_time": "11:00:41", "remaining_time": "1:12:39"} +{"current_steps": 7821, "total_steps": 8680, "loss": 0.7907109260559082, "lr": 5.319751094378322e-08, "epoch": 1.802073732718894, "percentage": 90.1, "elapsed_time": "11:00:48", "remaining_time": "1:12:34"} +{"current_steps": 7822, "total_steps": 8680, "loss": 0.6995817422866821, "lr": 5.3074973259132464e-08, "epoch": 1.8023041474654378, "percentage": 90.12, "elapsed_time": "11:00:53", "remaining_time": "1:12:29"} +{"current_steps": 7823, "total_steps": 8680, "loss": 0.9157558679580688, "lr": 5.295257301939582e-08, "epoch": 1.8025345622119815, "percentage": 90.13, "elapsed_time": "11:00:58", "remaining_time": "1:12:24"} +{"current_steps": 7824, "total_steps": 8680, "loss": 0.8181086778640747, "lr": 5.283031024233942e-08, "epoch": 1.8027649769585254, "percentage": 90.14, "elapsed_time": "11:01:03", "remaining_time": "1:12:19"} +{"current_steps": 7825, "total_steps": 8680, "loss": 0.7170151472091675, "lr": 5.270818494570961e-08, "epoch": 1.8029953917050692, "percentage": 90.15, "elapsed_time": "11:01:09", "remaining_time": "1:12:14"} +{"current_steps": 7826, "total_steps": 8680, "loss": 0.7548947334289551, "lr": 5.258619714723278e-08, "epoch": 1.803225806451613, "percentage": 90.16, "elapsed_time": "11:01:15", "remaining_time": "1:12:09"} +{"current_steps": 7827, "total_steps": 8680, "loss": 0.7482869625091553, "lr": 5.2464346864615204e-08, "epoch": 1.8034562211981566, "percentage": 90.17, "elapsed_time": "11:01:19", "remaining_time": "1:12:04"} +{"current_steps": 7828, "total_steps": 8680, "loss": 0.6984925270080566, "lr": 5.234263411554329e-08, "epoch": 1.8036866359447004, "percentage": 90.18, "elapsed_time": "11:01:25", "remaining_time": "1:11:59"} +{"current_steps": 7829, "total_steps": 8680, "loss": 0.910038948059082, "lr": 5.222105891768347e-08, "epoch": 1.8039170506912443, "percentage": 90.2, "elapsed_time": "11:01:29", "remaining_time": "1:11:54"} +{"current_steps": 7830, "total_steps": 8680, "loss": 0.8936711549758911, "lr": 5.2099621288682174e-08, "epoch": 1.804147465437788, "percentage": 90.21, "elapsed_time": "11:01:34", "remaining_time": "1:11:49"} +{"current_steps": 7831, "total_steps": 8680, "loss": 0.7376326322555542, "lr": 5.197832124616608e-08, "epoch": 1.8043778801843318, "percentage": 90.22, "elapsed_time": "11:01:38", "remaining_time": "1:11:43"} +{"current_steps": 7832, "total_steps": 8680, "loss": 0.8373547792434692, "lr": 5.1857158807741554e-08, "epoch": 1.8046082949308757, "percentage": 90.23, "elapsed_time": "11:01:42", "remaining_time": "1:11:38"} +{"current_steps": 7833, "total_steps": 8680, "loss": 0.7018512487411499, "lr": 5.17361339909953e-08, "epoch": 1.8048387096774192, "percentage": 90.24, "elapsed_time": "11:01:46", "remaining_time": "1:11:33"} +{"current_steps": 7834, "total_steps": 8680, "loss": 0.6111225485801697, "lr": 5.161524681349394e-08, "epoch": 1.8050691244239632, "percentage": 90.25, "elapsed_time": "11:01:52", "remaining_time": "1:11:28"} +{"current_steps": 7835, "total_steps": 8680, "loss": 0.6961934566497803, "lr": 5.149449729278388e-08, "epoch": 1.805299539170507, "percentage": 90.26, "elapsed_time": "11:01:58", "remaining_time": "1:11:23"} +{"current_steps": 7836, "total_steps": 8680, "loss": 0.677324116230011, "lr": 5.137388544639198e-08, "epoch": 1.8055299539170506, "percentage": 90.28, "elapsed_time": "11:02:04", "remaining_time": "1:11:18"} +{"current_steps": 7837, "total_steps": 8680, "loss": 0.7124897837638855, "lr": 5.125341129182481e-08, "epoch": 1.8057603686635946, "percentage": 90.29, "elapsed_time": "11:02:10", "remaining_time": "1:11:13"} +{"current_steps": 7838, "total_steps": 8680, "loss": 0.7474578619003296, "lr": 5.1133074846568815e-08, "epoch": 1.8059907834101383, "percentage": 90.3, "elapsed_time": "11:02:16", "remaining_time": "1:11:08"} +{"current_steps": 7839, "total_steps": 8680, "loss": 0.699856162071228, "lr": 5.101287612809102e-08, "epoch": 1.806221198156682, "percentage": 90.31, "elapsed_time": "11:02:21", "remaining_time": "1:11:03"} +{"current_steps": 7840, "total_steps": 8680, "loss": 0.6548302173614502, "lr": 5.089281515383803e-08, "epoch": 1.8064516129032258, "percentage": 90.32, "elapsed_time": "11:02:26", "remaining_time": "1:10:58"} +{"current_steps": 7841, "total_steps": 8680, "loss": 0.8376108407974243, "lr": 5.077289194123624e-08, "epoch": 1.8066820276497695, "percentage": 90.33, "elapsed_time": "11:02:32", "remaining_time": "1:10:53"} +{"current_steps": 7842, "total_steps": 8680, "loss": 0.741931140422821, "lr": 5.065310650769283e-08, "epoch": 1.8069124423963134, "percentage": 90.35, "elapsed_time": "11:02:36", "remaining_time": "1:10:48"} +{"current_steps": 7843, "total_steps": 8680, "loss": 0.7253270149230957, "lr": 5.053345887059413e-08, "epoch": 1.8071428571428572, "percentage": 90.36, "elapsed_time": "11:02:40", "remaining_time": "1:10:43"} +{"current_steps": 7844, "total_steps": 8680, "loss": 0.8248677849769592, "lr": 5.0413949047306894e-08, "epoch": 1.807373271889401, "percentage": 90.37, "elapsed_time": "11:02:44", "remaining_time": "1:10:38"} +{"current_steps": 7845, "total_steps": 8680, "loss": 0.7571253776550293, "lr": 5.0294577055177925e-08, "epoch": 1.8076036866359448, "percentage": 90.38, "elapsed_time": "11:02:48", "remaining_time": "1:10:32"} +{"current_steps": 7846, "total_steps": 8680, "loss": 0.8256274461746216, "lr": 5.017534291153391e-08, "epoch": 1.8078341013824883, "percentage": 90.39, "elapsed_time": "11:02:52", "remaining_time": "1:10:27"} +{"current_steps": 7847, "total_steps": 8680, "loss": 0.8609060049057007, "lr": 5.0056246633681356e-08, "epoch": 1.8080645161290323, "percentage": 90.4, "elapsed_time": "11:02:57", "remaining_time": "1:10:22"} +{"current_steps": 7848, "total_steps": 8680, "loss": 0.7005047798156738, "lr": 4.9937288238907196e-08, "epoch": 1.808294930875576, "percentage": 90.41, "elapsed_time": "11:03:02", "remaining_time": "1:10:17"} +{"current_steps": 7849, "total_steps": 8680, "loss": 0.8640049695968628, "lr": 4.981846774447784e-08, "epoch": 1.8085253456221198, "percentage": 90.43, "elapsed_time": "11:03:06", "remaining_time": "1:10:12"} +{"current_steps": 7850, "total_steps": 8680, "loss": 0.8385862112045288, "lr": 4.969978516763984e-08, "epoch": 1.8087557603686637, "percentage": 90.44, "elapsed_time": "11:03:11", "remaining_time": "1:10:07"} +{"current_steps": 7851, "total_steps": 8680, "loss": 0.845676064491272, "lr": 4.9581240525620184e-08, "epoch": 1.8089861751152074, "percentage": 90.45, "elapsed_time": "11:03:17", "remaining_time": "1:10:02"} +{"current_steps": 7852, "total_steps": 8680, "loss": 0.7638444304466248, "lr": 4.9462833835625327e-08, "epoch": 1.8092165898617512, "percentage": 90.46, "elapsed_time": "11:03:22", "remaining_time": "1:09:57"} +{"current_steps": 7853, "total_steps": 8680, "loss": 0.813924252986908, "lr": 4.934456511484153e-08, "epoch": 1.8094470046082949, "percentage": 90.47, "elapsed_time": "11:03:28", "remaining_time": "1:09:52"} +{"current_steps": 7854, "total_steps": 8680, "loss": 0.8773660659790039, "lr": 4.9226434380435835e-08, "epoch": 1.8096774193548386, "percentage": 90.48, "elapsed_time": "11:03:32", "remaining_time": "1:09:47"} +{"current_steps": 7855, "total_steps": 8680, "loss": 0.6703497171401978, "lr": 4.91084416495543e-08, "epoch": 1.8099078341013826, "percentage": 90.5, "elapsed_time": "11:03:38", "remaining_time": "1:09:42"} +{"current_steps": 7856, "total_steps": 8680, "loss": 0.760738730430603, "lr": 4.8990586939323896e-08, "epoch": 1.8101382488479263, "percentage": 90.51, "elapsed_time": "11:03:43", "remaining_time": "1:09:37"} +{"current_steps": 7857, "total_steps": 8680, "loss": 0.6466494798660278, "lr": 4.887287026685072e-08, "epoch": 1.81036866359447, "percentage": 90.52, "elapsed_time": "11:03:49", "remaining_time": "1:09:32"} +{"current_steps": 7858, "total_steps": 8680, "loss": 0.7416050434112549, "lr": 4.8755291649221206e-08, "epoch": 1.810599078341014, "percentage": 90.53, "elapsed_time": "11:03:53", "remaining_time": "1:09:26"} +{"current_steps": 7859, "total_steps": 8680, "loss": 0.6841444373130798, "lr": 4.863785110350205e-08, "epoch": 1.8108294930875575, "percentage": 90.54, "elapsed_time": "11:03:58", "remaining_time": "1:09:21"} +{"current_steps": 7860, "total_steps": 8680, "loss": 0.7818359732627869, "lr": 4.8520548646739265e-08, "epoch": 1.8110599078341014, "percentage": 90.55, "elapsed_time": "11:04:04", "remaining_time": "1:09:16"} +{"current_steps": 7861, "total_steps": 8680, "loss": 0.8802354335784912, "lr": 4.840338429595914e-08, "epoch": 1.8112903225806452, "percentage": 90.56, "elapsed_time": "11:04:09", "remaining_time": "1:09:11"} +{"current_steps": 7862, "total_steps": 8680, "loss": 0.8105144500732422, "lr": 4.8286358068168055e-08, "epoch": 1.8115207373271889, "percentage": 90.58, "elapsed_time": "11:04:15", "remaining_time": "1:09:06"} +{"current_steps": 7863, "total_steps": 8680, "loss": 0.6613968014717102, "lr": 4.816946998035232e-08, "epoch": 1.8117511520737328, "percentage": 90.59, "elapsed_time": "11:04:21", "remaining_time": "1:09:01"} +{"current_steps": 7864, "total_steps": 8680, "loss": 0.7945050001144409, "lr": 4.80527200494778e-08, "epoch": 1.8119815668202763, "percentage": 90.6, "elapsed_time": "11:04:27", "remaining_time": "1:08:56"} +{"current_steps": 7865, "total_steps": 8680, "loss": 0.7268643379211426, "lr": 4.793610829249084e-08, "epoch": 1.8122119815668203, "percentage": 90.61, "elapsed_time": "11:04:32", "remaining_time": "1:08:51"} +{"current_steps": 7866, "total_steps": 8680, "loss": 0.7416445016860962, "lr": 4.781963472631745e-08, "epoch": 1.812442396313364, "percentage": 90.62, "elapsed_time": "11:04:38", "remaining_time": "1:08:46"} +{"current_steps": 7867, "total_steps": 8680, "loss": 0.8536533117294312, "lr": 4.770329936786355e-08, "epoch": 1.8126728110599077, "percentage": 90.63, "elapsed_time": "11:04:42", "remaining_time": "1:08:41"} +{"current_steps": 7868, "total_steps": 8680, "loss": 0.8258422017097473, "lr": 4.7587102234015074e-08, "epoch": 1.8129032258064517, "percentage": 90.65, "elapsed_time": "11:04:46", "remaining_time": "1:08:36"} +{"current_steps": 7869, "total_steps": 8680, "loss": 0.6976941823959351, "lr": 4.7471043341637874e-08, "epoch": 1.8131336405529954, "percentage": 90.66, "elapsed_time": "11:04:50", "remaining_time": "1:08:31"} +{"current_steps": 7870, "total_steps": 8680, "loss": 0.8213087916374207, "lr": 4.735512270757758e-08, "epoch": 1.8133640552995391, "percentage": 90.67, "elapsed_time": "11:04:55", "remaining_time": "1:08:26"} +{"current_steps": 7871, "total_steps": 8680, "loss": 0.8012057542800903, "lr": 4.723934034866028e-08, "epoch": 1.813594470046083, "percentage": 90.68, "elapsed_time": "11:05:01", "remaining_time": "1:08:21"} +{"current_steps": 7872, "total_steps": 8680, "loss": 0.7802866697311401, "lr": 4.7123696281691436e-08, "epoch": 1.8138248847926266, "percentage": 90.69, "elapsed_time": "11:05:05", "remaining_time": "1:08:16"} +{"current_steps": 7873, "total_steps": 8680, "loss": 0.8024426698684692, "lr": 4.700819052345639e-08, "epoch": 1.8140552995391706, "percentage": 90.7, "elapsed_time": "11:05:09", "remaining_time": "1:08:10"} +{"current_steps": 7874, "total_steps": 8680, "loss": 0.6383114457130432, "lr": 4.689282309072107e-08, "epoch": 1.8142857142857143, "percentage": 90.71, "elapsed_time": "11:05:15", "remaining_time": "1:08:05"} +{"current_steps": 7875, "total_steps": 8680, "loss": 0.7226015329360962, "lr": 4.677759400023085e-08, "epoch": 1.814516129032258, "percentage": 90.73, "elapsed_time": "11:05:20", "remaining_time": "1:08:00"} +{"current_steps": 7876, "total_steps": 8680, "loss": 0.8390164971351624, "lr": 4.6662503268710684e-08, "epoch": 1.814746543778802, "percentage": 90.74, "elapsed_time": "11:05:25", "remaining_time": "1:07:55"} +{"current_steps": 7877, "total_steps": 8680, "loss": 0.8120134472846985, "lr": 4.654755091286633e-08, "epoch": 1.8149769585253455, "percentage": 90.75, "elapsed_time": "11:05:29", "remaining_time": "1:07:50"} +{"current_steps": 7878, "total_steps": 8680, "loss": 0.6554470062255859, "lr": 4.6432736949382656e-08, "epoch": 1.8152073732718894, "percentage": 90.76, "elapsed_time": "11:05:34", "remaining_time": "1:07:45"} +{"current_steps": 7879, "total_steps": 8680, "loss": 0.7268370985984802, "lr": 4.631806139492478e-08, "epoch": 1.8154377880184331, "percentage": 90.77, "elapsed_time": "11:05:39", "remaining_time": "1:07:40"} +{"current_steps": 7880, "total_steps": 8680, "loss": 0.7991992831230164, "lr": 4.620352426613794e-08, "epoch": 1.8156682027649769, "percentage": 90.78, "elapsed_time": "11:05:45", "remaining_time": "1:07:35"} +{"current_steps": 7881, "total_steps": 8680, "loss": 0.7695842981338501, "lr": 4.608912557964673e-08, "epoch": 1.8158986175115208, "percentage": 90.79, "elapsed_time": "11:05:50", "remaining_time": "1:07:30"} +{"current_steps": 7882, "total_steps": 8680, "loss": 0.8633268475532532, "lr": 4.59748653520563e-08, "epoch": 1.8161290322580645, "percentage": 90.81, "elapsed_time": "11:05:53", "remaining_time": "1:07:25"} +{"current_steps": 7883, "total_steps": 8680, "loss": 0.7018440961837769, "lr": 4.586074359995118e-08, "epoch": 1.8163594470046083, "percentage": 90.82, "elapsed_time": "11:05:58", "remaining_time": "1:07:19"} +{"current_steps": 7884, "total_steps": 8680, "loss": 0.7304259538650513, "lr": 4.574676033989589e-08, "epoch": 1.8165898617511522, "percentage": 90.83, "elapsed_time": "11:06:04", "remaining_time": "1:07:14"} +{"current_steps": 7885, "total_steps": 8680, "loss": 0.7408654689788818, "lr": 4.563291558843518e-08, "epoch": 1.8168202764976957, "percentage": 90.84, "elapsed_time": "11:06:10", "remaining_time": "1:07:09"} +{"current_steps": 7886, "total_steps": 8680, "loss": 0.6378169059753418, "lr": 4.55192093620933e-08, "epoch": 1.8170506912442397, "percentage": 90.85, "elapsed_time": "11:06:16", "remaining_time": "1:07:04"} +{"current_steps": 7887, "total_steps": 8680, "loss": 0.8854331374168396, "lr": 4.540564167737471e-08, "epoch": 1.8172811059907834, "percentage": 90.86, "elapsed_time": "11:06:20", "remaining_time": "1:06:59"} +{"current_steps": 7888, "total_steps": 8680, "loss": 0.6948372721672058, "lr": 4.529221255076343e-08, "epoch": 1.8175115207373271, "percentage": 90.88, "elapsed_time": "11:06:25", "remaining_time": "1:06:54"} +{"current_steps": 7889, "total_steps": 8680, "loss": 0.8199236392974854, "lr": 4.517892199872364e-08, "epoch": 1.817741935483871, "percentage": 90.89, "elapsed_time": "11:06:30", "remaining_time": "1:06:49"} +{"current_steps": 7890, "total_steps": 8680, "loss": 0.6967995762825012, "lr": 4.506577003769918e-08, "epoch": 1.8179723502304146, "percentage": 90.9, "elapsed_time": "11:06:35", "remaining_time": "1:06:44"} +{"current_steps": 7891, "total_steps": 8680, "loss": 0.848435640335083, "lr": 4.495275668411425e-08, "epoch": 1.8182027649769585, "percentage": 90.91, "elapsed_time": "11:06:39", "remaining_time": "1:06:39"} +{"current_steps": 7892, "total_steps": 8680, "loss": 0.7085731029510498, "lr": 4.483988195437227e-08, "epoch": 1.8184331797235023, "percentage": 90.92, "elapsed_time": "11:06:45", "remaining_time": "1:06:34"} +{"current_steps": 7893, "total_steps": 8680, "loss": 0.7400653958320618, "lr": 4.472714586485682e-08, "epoch": 1.818663594470046, "percentage": 90.93, "elapsed_time": "11:06:49", "remaining_time": "1:06:29"} +{"current_steps": 7894, "total_steps": 8680, "loss": 0.7636830806732178, "lr": 4.461454843193169e-08, "epoch": 1.81889400921659, "percentage": 90.94, "elapsed_time": "11:06:55", "remaining_time": "1:06:24"} +{"current_steps": 7895, "total_steps": 8680, "loss": 0.6902754306793213, "lr": 4.4502089671940135e-08, "epoch": 1.8191244239631337, "percentage": 90.96, "elapsed_time": "11:07:01", "remaining_time": "1:06:19"} +{"current_steps": 7896, "total_steps": 8680, "loss": 0.8397349119186401, "lr": 4.438976960120522e-08, "epoch": 1.8193548387096774, "percentage": 90.97, "elapsed_time": "11:07:07", "remaining_time": "1:06:14"} +{"current_steps": 7897, "total_steps": 8680, "loss": 0.7505836486816406, "lr": 4.4277588236030226e-08, "epoch": 1.8195852534562214, "percentage": 90.98, "elapsed_time": "11:07:12", "remaining_time": "1:06:09"} +{"current_steps": 7898, "total_steps": 8680, "loss": 0.9310287833213806, "lr": 4.416554559269814e-08, "epoch": 1.8198156682027649, "percentage": 90.99, "elapsed_time": "11:07:17", "remaining_time": "1:06:04"} +{"current_steps": 7899, "total_steps": 8680, "loss": 0.724685549736023, "lr": 4.405364168747161e-08, "epoch": 1.8200460829493088, "percentage": 91.0, "elapsed_time": "11:07:22", "remaining_time": "1:05:59"} +{"current_steps": 7900, "total_steps": 8680, "loss": 0.6554735898971558, "lr": 4.394187653659365e-08, "epoch": 1.8202764976958525, "percentage": 91.01, "elapsed_time": "11:07:26", "remaining_time": "1:05:53"} +{"current_steps": 7901, "total_steps": 8680, "loss": 0.7494597434997559, "lr": 4.383025015628661e-08, "epoch": 1.8205069124423963, "percentage": 91.03, "elapsed_time": "11:07:33", "remaining_time": "1:05:49"} +{"current_steps": 7902, "total_steps": 8680, "loss": 0.817386269569397, "lr": 4.371876256275287e-08, "epoch": 1.8207373271889402, "percentage": 91.04, "elapsed_time": "11:07:38", "remaining_time": "1:05:43"} +{"current_steps": 7903, "total_steps": 8680, "loss": 0.8668064475059509, "lr": 4.3607413772174806e-08, "epoch": 1.8209677419354837, "percentage": 91.05, "elapsed_time": "11:07:42", "remaining_time": "1:05:38"} +{"current_steps": 7904, "total_steps": 8680, "loss": 0.7400633096694946, "lr": 4.34962038007145e-08, "epoch": 1.8211981566820277, "percentage": 91.06, "elapsed_time": "11:07:48", "remaining_time": "1:05:33"} +{"current_steps": 7905, "total_steps": 8680, "loss": 0.7273544073104858, "lr": 4.3385132664514046e-08, "epoch": 1.8214285714285714, "percentage": 91.07, "elapsed_time": "11:07:54", "remaining_time": "1:05:28"} +{"current_steps": 7906, "total_steps": 8680, "loss": 0.7133193016052246, "lr": 4.3274200379695315e-08, "epoch": 1.8216589861751151, "percentage": 91.08, "elapsed_time": "11:07:59", "remaining_time": "1:05:23"} +{"current_steps": 7907, "total_steps": 8680, "loss": 0.9390736222267151, "lr": 4.316340696235976e-08, "epoch": 1.821889400921659, "percentage": 91.09, "elapsed_time": "11:08:02", "remaining_time": "1:05:18"} +{"current_steps": 7908, "total_steps": 8680, "loss": 0.7065613269805908, "lr": 4.3052752428588966e-08, "epoch": 1.8221198156682028, "percentage": 91.11, "elapsed_time": "11:08:07", "remaining_time": "1:05:13"} +{"current_steps": 7909, "total_steps": 8680, "loss": 0.813999354839325, "lr": 4.294223679444442e-08, "epoch": 1.8223502304147465, "percentage": 91.12, "elapsed_time": "11:08:13", "remaining_time": "1:05:08"} +{"current_steps": 7910, "total_steps": 8680, "loss": 0.9234256148338318, "lr": 4.2831860075966955e-08, "epoch": 1.8225806451612905, "percentage": 91.13, "elapsed_time": "11:08:18", "remaining_time": "1:05:03"} +{"current_steps": 7911, "total_steps": 8680, "loss": 0.8630207777023315, "lr": 4.272162228917808e-08, "epoch": 1.822811059907834, "percentage": 91.14, "elapsed_time": "11:08:22", "remaining_time": "1:04:58"} +{"current_steps": 7912, "total_steps": 8680, "loss": 0.7827208042144775, "lr": 4.2611523450078456e-08, "epoch": 1.823041474654378, "percentage": 91.15, "elapsed_time": "11:08:27", "remaining_time": "1:04:53"} +{"current_steps": 7913, "total_steps": 8680, "loss": 0.884107232093811, "lr": 4.250156357464873e-08, "epoch": 1.8232718894009217, "percentage": 91.16, "elapsed_time": "11:08:32", "remaining_time": "1:04:48"} +{"current_steps": 7914, "total_steps": 8680, "loss": 0.8615697026252747, "lr": 4.2391742678849484e-08, "epoch": 1.8235023041474654, "percentage": 91.18, "elapsed_time": "11:08:37", "remaining_time": "1:04:42"} +{"current_steps": 7915, "total_steps": 8680, "loss": 0.8001279830932617, "lr": 4.2282060778621174e-08, "epoch": 1.8237327188940093, "percentage": 91.19, "elapsed_time": "11:08:42", "remaining_time": "1:04:37"} +{"current_steps": 7916, "total_steps": 8680, "loss": 0.7183214426040649, "lr": 4.217251788988374e-08, "epoch": 1.8239631336405528, "percentage": 91.2, "elapsed_time": "11:08:47", "remaining_time": "1:04:32"} +{"current_steps": 7917, "total_steps": 8680, "loss": 0.7751119136810303, "lr": 4.206311402853746e-08, "epoch": 1.8241935483870968, "percentage": 91.21, "elapsed_time": "11:08:53", "remaining_time": "1:04:27"} +{"current_steps": 7918, "total_steps": 8680, "loss": 0.8073426485061646, "lr": 4.195384921046208e-08, "epoch": 1.8244239631336405, "percentage": 91.22, "elapsed_time": "11:08:57", "remaining_time": "1:04:22"} +{"current_steps": 7919, "total_steps": 8680, "loss": 0.7918455600738525, "lr": 4.1844723451517017e-08, "epoch": 1.8246543778801843, "percentage": 91.23, "elapsed_time": "11:09:02", "remaining_time": "1:04:17"} +{"current_steps": 7920, "total_steps": 8680, "loss": 0.8070017099380493, "lr": 4.1735736767542054e-08, "epoch": 1.8248847926267282, "percentage": 91.24, "elapsed_time": "11:09:07", "remaining_time": "1:04:12"} +{"current_steps": 7921, "total_steps": 8680, "loss": 0.7202159762382507, "lr": 4.1626889174356306e-08, "epoch": 1.825115207373272, "percentage": 91.26, "elapsed_time": "11:09:12", "remaining_time": "1:04:07"} +{"current_steps": 7922, "total_steps": 8680, "loss": 0.8412283658981323, "lr": 4.15181806877587e-08, "epoch": 1.8253456221198157, "percentage": 91.27, "elapsed_time": "11:09:17", "remaining_time": "1:04:02"} +{"current_steps": 7923, "total_steps": 8680, "loss": 0.6230478286743164, "lr": 4.140961132352849e-08, "epoch": 1.8255760368663596, "percentage": 91.28, "elapsed_time": "11:09:23", "remaining_time": "1:03:57"} +{"current_steps": 7924, "total_steps": 8680, "loss": 0.6475099921226501, "lr": 4.1301181097424196e-08, "epoch": 1.8258064516129031, "percentage": 91.29, "elapsed_time": "11:09:29", "remaining_time": "1:03:52"} +{"current_steps": 7925, "total_steps": 8680, "loss": 0.6277462244033813, "lr": 4.1192890025184223e-08, "epoch": 1.826036866359447, "percentage": 91.3, "elapsed_time": "11:09:34", "remaining_time": "1:03:47"} +{"current_steps": 7926, "total_steps": 8680, "loss": 0.784058690071106, "lr": 4.1084738122527e-08, "epoch": 1.8262672811059908, "percentage": 91.31, "elapsed_time": "11:09:39", "remaining_time": "1:03:42"} +{"current_steps": 7927, "total_steps": 8680, "loss": 0.7214534282684326, "lr": 4.097672540515063e-08, "epoch": 1.8264976958525345, "percentage": 91.32, "elapsed_time": "11:09:44", "remaining_time": "1:03:37"} +{"current_steps": 7928, "total_steps": 8680, "loss": 0.7504015564918518, "lr": 4.086885188873302e-08, "epoch": 1.8267281105990785, "percentage": 91.34, "elapsed_time": "11:09:49", "remaining_time": "1:03:32"} +{"current_steps": 7929, "total_steps": 8680, "loss": 0.8837840557098389, "lr": 4.076111758893175e-08, "epoch": 1.826958525345622, "percentage": 91.35, "elapsed_time": "11:09:55", "remaining_time": "1:03:27"} +{"current_steps": 7930, "total_steps": 8680, "loss": 0.6903706789016724, "lr": 4.065352252138443e-08, "epoch": 1.827188940092166, "percentage": 91.36, "elapsed_time": "11:10:00", "remaining_time": "1:03:22"} +{"current_steps": 7931, "total_steps": 8680, "loss": 0.6120485067367554, "lr": 4.054606670170824e-08, "epoch": 1.8274193548387097, "percentage": 91.37, "elapsed_time": "11:10:07", "remaining_time": "1:03:17"} +{"current_steps": 7932, "total_steps": 8680, "loss": 0.9566253423690796, "lr": 4.043875014550047e-08, "epoch": 1.8276497695852534, "percentage": 91.38, "elapsed_time": "11:10:11", "remaining_time": "1:03:12"} +{"current_steps": 7933, "total_steps": 8680, "loss": 0.7702776193618774, "lr": 4.033157286833766e-08, "epoch": 1.8278801843317973, "percentage": 91.39, "elapsed_time": "11:10:16", "remaining_time": "1:03:06"} +{"current_steps": 7934, "total_steps": 8680, "loss": 0.7326529026031494, "lr": 4.0224534885776706e-08, "epoch": 1.828110599078341, "percentage": 91.41, "elapsed_time": "11:10:22", "remaining_time": "1:03:01"} +{"current_steps": 7935, "total_steps": 8680, "loss": 0.8161343336105347, "lr": 4.011763621335395e-08, "epoch": 1.8283410138248848, "percentage": 91.42, "elapsed_time": "11:10:26", "remaining_time": "1:02:56"} +{"current_steps": 7936, "total_steps": 8680, "loss": 0.7167537212371826, "lr": 4.001087686658544e-08, "epoch": 1.8285714285714287, "percentage": 91.43, "elapsed_time": "11:10:31", "remaining_time": "1:02:51"} +{"current_steps": 7937, "total_steps": 8680, "loss": 0.9195249080657959, "lr": 3.9904256860967433e-08, "epoch": 1.8288018433179722, "percentage": 91.44, "elapsed_time": "11:10:36", "remaining_time": "1:02:46"} +{"current_steps": 7938, "total_steps": 8680, "loss": 0.9483609199523926, "lr": 3.979777621197544e-08, "epoch": 1.8290322580645162, "percentage": 91.45, "elapsed_time": "11:10:42", "remaining_time": "1:02:41"} +{"current_steps": 7939, "total_steps": 8680, "loss": 0.6521364450454712, "lr": 3.96914349350651e-08, "epoch": 1.82926267281106, "percentage": 91.46, "elapsed_time": "11:10:46", "remaining_time": "1:02:36"} +{"current_steps": 7940, "total_steps": 8680, "loss": 0.714328408241272, "lr": 3.958523304567174e-08, "epoch": 1.8294930875576036, "percentage": 91.47, "elapsed_time": "11:10:53", "remaining_time": "1:02:31"} +{"current_steps": 7941, "total_steps": 8680, "loss": 0.705136775970459, "lr": 3.9479170559210464e-08, "epoch": 1.8297235023041476, "percentage": 91.49, "elapsed_time": "11:10:58", "remaining_time": "1:02:26"} +{"current_steps": 7942, "total_steps": 8680, "loss": 0.9096843004226685, "lr": 3.937324749107584e-08, "epoch": 1.829953917050691, "percentage": 91.5, "elapsed_time": "11:11:02", "remaining_time": "1:02:21"} +{"current_steps": 7943, "total_steps": 8680, "loss": 0.7797929048538208, "lr": 3.9267463856642704e-08, "epoch": 1.830184331797235, "percentage": 91.51, "elapsed_time": "11:11:07", "remaining_time": "1:02:16"} +{"current_steps": 7944, "total_steps": 8680, "loss": 0.739689290523529, "lr": 3.9161819671265414e-08, "epoch": 1.8304147465437788, "percentage": 91.52, "elapsed_time": "11:11:13", "remaining_time": "1:02:11"} +{"current_steps": 7945, "total_steps": 8680, "loss": 0.7297589778900146, "lr": 3.905631495027795e-08, "epoch": 1.8306451612903225, "percentage": 91.53, "elapsed_time": "11:11:18", "remaining_time": "1:02:06"} +{"current_steps": 7946, "total_steps": 8680, "loss": 0.6632317900657654, "lr": 3.895094970899426e-08, "epoch": 1.8308755760368665, "percentage": 91.54, "elapsed_time": "11:11:23", "remaining_time": "1:02:01"} +{"current_steps": 7947, "total_steps": 8680, "loss": 0.8075754642486572, "lr": 3.884572396270802e-08, "epoch": 1.8311059907834102, "percentage": 91.56, "elapsed_time": "11:11:30", "remaining_time": "1:01:56"} +{"current_steps": 7948, "total_steps": 8680, "loss": 0.879385232925415, "lr": 3.874063772669256e-08, "epoch": 1.831336405529954, "percentage": 91.57, "elapsed_time": "11:11:36", "remaining_time": "1:01:51"} +{"current_steps": 7949, "total_steps": 8680, "loss": 0.7182341814041138, "lr": 3.86356910162009e-08, "epoch": 1.8315668202764976, "percentage": 91.58, "elapsed_time": "11:11:42", "remaining_time": "1:01:46"} +{"current_steps": 7950, "total_steps": 8680, "loss": 0.8980770111083984, "lr": 3.853088384646608e-08, "epoch": 1.8317972350230414, "percentage": 91.59, "elapsed_time": "11:11:47", "remaining_time": "1:01:41"} +{"current_steps": 7951, "total_steps": 8680, "loss": 0.7798547744750977, "lr": 3.8426216232700483e-08, "epoch": 1.8320276497695853, "percentage": 91.6, "elapsed_time": "11:11:52", "remaining_time": "1:01:36"} +{"current_steps": 7952, "total_steps": 8680, "loss": 0.7545509934425354, "lr": 3.832168819009685e-08, "epoch": 1.832258064516129, "percentage": 91.61, "elapsed_time": "11:11:58", "remaining_time": "1:01:31"} +{"current_steps": 7953, "total_steps": 8680, "loss": 0.7394163608551025, "lr": 3.821729973382681e-08, "epoch": 1.8324884792626728, "percentage": 91.62, "elapsed_time": "11:12:05", "remaining_time": "1:01:26"} +{"current_steps": 7954, "total_steps": 8680, "loss": 0.7771584987640381, "lr": 3.811305087904271e-08, "epoch": 1.8327188940092167, "percentage": 91.64, "elapsed_time": "11:12:10", "remaining_time": "1:01:21"} +{"current_steps": 7955, "total_steps": 8680, "loss": 0.6490596532821655, "lr": 3.800894164087587e-08, "epoch": 1.8329493087557602, "percentage": 91.65, "elapsed_time": "11:12:14", "remaining_time": "1:01:16"} +{"current_steps": 7956, "total_steps": 8680, "loss": 0.8465416431427002, "lr": 3.7904972034437546e-08, "epoch": 1.8331797235023042, "percentage": 91.66, "elapsed_time": "11:12:20", "remaining_time": "1:01:10"} +{"current_steps": 7957, "total_steps": 8680, "loss": 0.6769351363182068, "lr": 3.780114207481899e-08, "epoch": 1.833410138248848, "percentage": 91.67, "elapsed_time": "11:12:25", "remaining_time": "1:01:05"} +{"current_steps": 7958, "total_steps": 8680, "loss": 0.8187215328216553, "lr": 3.769745177709094e-08, "epoch": 1.8336405529953916, "percentage": 91.68, "elapsed_time": "11:12:29", "remaining_time": "1:01:00"} +{"current_steps": 7959, "total_steps": 8680, "loss": 0.7524763345718384, "lr": 3.759390115630356e-08, "epoch": 1.8338709677419356, "percentage": 91.69, "elapsed_time": "11:12:34", "remaining_time": "1:00:55"} +{"current_steps": 7960, "total_steps": 8680, "loss": 0.8019517064094543, "lr": 3.749049022748762e-08, "epoch": 1.8341013824884793, "percentage": 91.71, "elapsed_time": "11:12:38", "remaining_time": "1:00:50"} +{"current_steps": 7961, "total_steps": 8680, "loss": 0.7732158899307251, "lr": 3.738721900565278e-08, "epoch": 1.834331797235023, "percentage": 91.72, "elapsed_time": "11:12:44", "remaining_time": "1:00:45"} +{"current_steps": 7962, "total_steps": 8680, "loss": 0.7152917385101318, "lr": 3.728408750578871e-08, "epoch": 1.8345622119815668, "percentage": 91.73, "elapsed_time": "11:12:49", "remaining_time": "1:00:40"} +{"current_steps": 7963, "total_steps": 8680, "loss": 0.7117735147476196, "lr": 3.7181095742864876e-08, "epoch": 1.8347926267281105, "percentage": 91.74, "elapsed_time": "11:12:54", "remaining_time": "1:00:35"} +{"current_steps": 7964, "total_steps": 8680, "loss": 0.7651360034942627, "lr": 3.7078243731830436e-08, "epoch": 1.8350230414746544, "percentage": 91.75, "elapsed_time": "11:13:00", "remaining_time": "1:00:30"} +{"current_steps": 7965, "total_steps": 8680, "loss": 0.6686996817588806, "lr": 3.697553148761412e-08, "epoch": 1.8352534562211982, "percentage": 91.76, "elapsed_time": "11:13:05", "remaining_time": "1:00:25"} +{"current_steps": 7966, "total_steps": 8680, "loss": 0.8654145002365112, "lr": 3.687295902512455e-08, "epoch": 1.835483870967742, "percentage": 91.77, "elapsed_time": "11:13:09", "remaining_time": "1:00:20"} +{"current_steps": 7967, "total_steps": 8680, "loss": 0.7883874177932739, "lr": 3.6770526359250046e-08, "epoch": 1.8357142857142859, "percentage": 91.79, "elapsed_time": "11:13:14", "remaining_time": "1:00:15"} +{"current_steps": 7968, "total_steps": 8680, "loss": 0.7270755767822266, "lr": 3.666823350485848e-08, "epoch": 1.8359447004608294, "percentage": 91.8, "elapsed_time": "11:13:19", "remaining_time": "1:00:09"} +{"current_steps": 7969, "total_steps": 8680, "loss": 0.654710054397583, "lr": 3.656608047679744e-08, "epoch": 1.8361751152073733, "percentage": 91.81, "elapsed_time": "11:13:24", "remaining_time": "1:00:04"} +{"current_steps": 7970, "total_steps": 8680, "loss": 0.688032329082489, "lr": 3.6464067289894485e-08, "epoch": 1.836405529953917, "percentage": 91.82, "elapsed_time": "11:13:29", "remaining_time": "0:59:59"} +{"current_steps": 7971, "total_steps": 8680, "loss": 0.901115894317627, "lr": 3.6362193958956457e-08, "epoch": 1.8366359447004608, "percentage": 91.83, "elapsed_time": "11:13:34", "remaining_time": "0:59:54"} +{"current_steps": 7972, "total_steps": 8680, "loss": 0.7335774302482605, "lr": 3.6260460498770404e-08, "epoch": 1.8368663594470047, "percentage": 91.84, "elapsed_time": "11:13:40", "remaining_time": "0:59:49"} +{"current_steps": 7973, "total_steps": 8680, "loss": 0.8056570291519165, "lr": 3.615886692410275e-08, "epoch": 1.8370967741935482, "percentage": 91.85, "elapsed_time": "11:13:45", "remaining_time": "0:59:44"} +{"current_steps": 7974, "total_steps": 8680, "loss": 0.82081538438797, "lr": 3.6057413249699356e-08, "epoch": 1.8373271889400922, "percentage": 91.87, "elapsed_time": "11:13:49", "remaining_time": "0:59:39"} +{"current_steps": 7975, "total_steps": 8680, "loss": 0.7741475105285645, "lr": 3.595609949028655e-08, "epoch": 1.837557603686636, "percentage": 91.88, "elapsed_time": "11:13:55", "remaining_time": "0:59:34"} +{"current_steps": 7976, "total_steps": 8680, "loss": 0.9020792245864868, "lr": 3.5854925660569693e-08, "epoch": 1.8377880184331796, "percentage": 91.89, "elapsed_time": "11:14:00", "remaining_time": "0:59:29"} +{"current_steps": 7977, "total_steps": 8680, "loss": 0.759677529335022, "lr": 3.57538917752338e-08, "epoch": 1.8380184331797236, "percentage": 91.9, "elapsed_time": "11:14:06", "remaining_time": "0:59:24"} +{"current_steps": 7978, "total_steps": 8680, "loss": 0.6658498644828796, "lr": 3.565299784894427e-08, "epoch": 1.8382488479262673, "percentage": 91.91, "elapsed_time": "11:14:13", "remaining_time": "0:59:19"} +{"current_steps": 7979, "total_steps": 8680, "loss": 0.8359798192977905, "lr": 3.5552243896345254e-08, "epoch": 1.838479262672811, "percentage": 91.92, "elapsed_time": "11:14:18", "remaining_time": "0:59:14"} +{"current_steps": 7980, "total_steps": 8680, "loss": 0.656216025352478, "lr": 3.545162993206141e-08, "epoch": 1.838709677419355, "percentage": 91.94, "elapsed_time": "11:14:24", "remaining_time": "0:59:09"} +{"current_steps": 7981, "total_steps": 8680, "loss": 0.7783077359199524, "lr": 3.53511559706966e-08, "epoch": 1.8389400921658985, "percentage": 91.95, "elapsed_time": "11:14:29", "remaining_time": "0:59:04"} +{"current_steps": 7982, "total_steps": 8680, "loss": 0.7726818919181824, "lr": 3.525082202683427e-08, "epoch": 1.8391705069124424, "percentage": 91.96, "elapsed_time": "11:14:34", "remaining_time": "0:58:59"} +{"current_steps": 7983, "total_steps": 8680, "loss": 0.6797339916229248, "lr": 3.5150628115038213e-08, "epoch": 1.8394009216589862, "percentage": 91.97, "elapsed_time": "11:14:39", "remaining_time": "0:58:54"} +{"current_steps": 7984, "total_steps": 8680, "loss": 0.818444013595581, "lr": 3.505057424985114e-08, "epoch": 1.83963133640553, "percentage": 91.98, "elapsed_time": "11:14:42", "remaining_time": "0:58:49"} +{"current_steps": 7985, "total_steps": 8680, "loss": 0.716003954410553, "lr": 3.495066044579564e-08, "epoch": 1.8398617511520738, "percentage": 91.99, "elapsed_time": "11:14:46", "remaining_time": "0:58:43"} +{"current_steps": 7986, "total_steps": 8680, "loss": 0.8214380741119385, "lr": 3.485088671737435e-08, "epoch": 1.8400921658986173, "percentage": 92.0, "elapsed_time": "11:14:51", "remaining_time": "0:58:38"} +{"current_steps": 7987, "total_steps": 8680, "loss": 0.8004239797592163, "lr": 3.475125307906923e-08, "epoch": 1.8403225806451613, "percentage": 92.02, "elapsed_time": "11:14:56", "remaining_time": "0:58:33"} +{"current_steps": 7988, "total_steps": 8680, "loss": 0.724868655204773, "lr": 3.465175954534183e-08, "epoch": 1.840552995391705, "percentage": 92.03, "elapsed_time": "11:15:02", "remaining_time": "0:58:28"} +{"current_steps": 7989, "total_steps": 8680, "loss": 0.6774435043334961, "lr": 3.455240613063359e-08, "epoch": 1.8407834101382488, "percentage": 92.04, "elapsed_time": "11:15:08", "remaining_time": "0:58:23"} +{"current_steps": 7990, "total_steps": 8680, "loss": 0.7618406414985657, "lr": 3.445319284936543e-08, "epoch": 1.8410138248847927, "percentage": 92.05, "elapsed_time": "11:15:15", "remaining_time": "0:58:18"} +{"current_steps": 7991, "total_steps": 8680, "loss": 0.8176794648170471, "lr": 3.4354119715938154e-08, "epoch": 1.8412442396313364, "percentage": 92.06, "elapsed_time": "11:15:20", "remaining_time": "0:58:13"} +{"current_steps": 7992, "total_steps": 8680, "loss": 0.7540123462677002, "lr": 3.4255186744732045e-08, "epoch": 1.8414746543778802, "percentage": 92.07, "elapsed_time": "11:15:25", "remaining_time": "0:58:08"} +{"current_steps": 7993, "total_steps": 8680, "loss": 0.6888976097106934, "lr": 3.4156393950107164e-08, "epoch": 1.841705069124424, "percentage": 92.09, "elapsed_time": "11:15:31", "remaining_time": "0:58:03"} +{"current_steps": 7994, "total_steps": 8680, "loss": 0.6719028949737549, "lr": 3.405774134640294e-08, "epoch": 1.8419354838709676, "percentage": 92.1, "elapsed_time": "11:15:37", "remaining_time": "0:57:58"} +{"current_steps": 7995, "total_steps": 8680, "loss": 0.817806601524353, "lr": 3.3959228947938903e-08, "epoch": 1.8421658986175116, "percentage": 92.11, "elapsed_time": "11:15:42", "remaining_time": "0:57:53"} +{"current_steps": 7996, "total_steps": 8680, "loss": 0.6681252717971802, "lr": 3.3860856769013955e-08, "epoch": 1.8423963133640553, "percentage": 92.12, "elapsed_time": "11:15:48", "remaining_time": "0:57:48"} +{"current_steps": 7997, "total_steps": 8680, "loss": 0.7965174317359924, "lr": 3.3762624823906574e-08, "epoch": 1.842626728110599, "percentage": 92.13, "elapsed_time": "11:15:54", "remaining_time": "0:57:43"} +{"current_steps": 7998, "total_steps": 8680, "loss": 0.714171826839447, "lr": 3.366453312687512e-08, "epoch": 1.842857142857143, "percentage": 92.14, "elapsed_time": "11:15:59", "remaining_time": "0:57:38"} +{"current_steps": 7999, "total_steps": 8680, "loss": 0.7489287853240967, "lr": 3.356658169215743e-08, "epoch": 1.8430875576036865, "percentage": 92.15, "elapsed_time": "11:16:05", "remaining_time": "0:57:33"} +{"current_steps": 8000, "total_steps": 8680, "loss": 0.790866494178772, "lr": 3.34687705339709e-08, "epoch": 1.8433179723502304, "percentage": 92.17, "elapsed_time": "11:16:10", "remaining_time": "0:57:28"} +{"current_steps": 8001, "total_steps": 8680, "loss": 0.8208349943161011, "lr": 3.337109966651297e-08, "epoch": 1.8435483870967742, "percentage": 92.18, "elapsed_time": "11:16:16", "remaining_time": "0:57:23"} +{"current_steps": 8002, "total_steps": 8680, "loss": 0.7974207401275635, "lr": 3.3273569103960174e-08, "epoch": 1.8437788018433179, "percentage": 92.19, "elapsed_time": "11:16:20", "remaining_time": "0:57:18"} +{"current_steps": 8003, "total_steps": 8680, "loss": 0.751643180847168, "lr": 3.317617886046908e-08, "epoch": 1.8440092165898618, "percentage": 92.2, "elapsed_time": "11:16:26", "remaining_time": "0:57:13"} +{"current_steps": 8004, "total_steps": 8680, "loss": 0.9231137037277222, "lr": 3.3078928950175724e-08, "epoch": 1.8442396313364056, "percentage": 92.21, "elapsed_time": "11:16:31", "remaining_time": "0:57:08"} +{"current_steps": 8005, "total_steps": 8680, "loss": 0.7975907325744629, "lr": 3.2981819387195683e-08, "epoch": 1.8444700460829493, "percentage": 92.22, "elapsed_time": "11:16:37", "remaining_time": "0:57:03"} +{"current_steps": 8006, "total_steps": 8680, "loss": 0.7467124462127686, "lr": 3.288485018562448e-08, "epoch": 1.8447004608294932, "percentage": 92.24, "elapsed_time": "11:16:41", "remaining_time": "0:56:58"} +{"current_steps": 8007, "total_steps": 8680, "loss": 0.7983080148696899, "lr": 3.278802135953706e-08, "epoch": 1.8449308755760367, "percentage": 92.25, "elapsed_time": "11:16:46", "remaining_time": "0:56:53"} +{"current_steps": 8008, "total_steps": 8680, "loss": 0.7991635799407959, "lr": 3.269133292298787e-08, "epoch": 1.8451612903225807, "percentage": 92.26, "elapsed_time": "11:16:52", "remaining_time": "0:56:48"} +{"current_steps": 8009, "total_steps": 8680, "loss": 0.9309900403022766, "lr": 3.259478489001111e-08, "epoch": 1.8453917050691244, "percentage": 92.27, "elapsed_time": "11:16:57", "remaining_time": "0:56:42"} +{"current_steps": 8010, "total_steps": 8680, "loss": 0.7667444944381714, "lr": 3.249837727462068e-08, "epoch": 1.8456221198156681, "percentage": 92.28, "elapsed_time": "11:17:02", "remaining_time": "0:56:37"} +{"current_steps": 8011, "total_steps": 8680, "loss": 0.722775936126709, "lr": 3.2402110090809955e-08, "epoch": 1.845852534562212, "percentage": 92.29, "elapsed_time": "11:17:07", "remaining_time": "0:56:32"} +{"current_steps": 8012, "total_steps": 8680, "loss": 0.7049660682678223, "lr": 3.230598335255208e-08, "epoch": 1.8460829493087556, "percentage": 92.3, "elapsed_time": "11:17:12", "remaining_time": "0:56:27"} +{"current_steps": 8013, "total_steps": 8680, "loss": 0.7543717622756958, "lr": 3.220999707379957e-08, "epoch": 1.8463133640552996, "percentage": 92.32, "elapsed_time": "11:17:17", "remaining_time": "0:56:22"} +{"current_steps": 8014, "total_steps": 8680, "loss": 0.705594539642334, "lr": 3.2114151268484825e-08, "epoch": 1.8465437788018433, "percentage": 92.33, "elapsed_time": "11:17:23", "remaining_time": "0:56:17"} +{"current_steps": 8015, "total_steps": 8680, "loss": 0.8663946390151978, "lr": 3.201844595051972e-08, "epoch": 1.846774193548387, "percentage": 92.34, "elapsed_time": "11:17:27", "remaining_time": "0:56:12"} +{"current_steps": 8016, "total_steps": 8680, "loss": 0.6990827918052673, "lr": 3.192288113379582e-08, "epoch": 1.847004608294931, "percentage": 92.35, "elapsed_time": "11:17:33", "remaining_time": "0:56:07"} +{"current_steps": 8017, "total_steps": 8680, "loss": 0.8494592905044556, "lr": 3.182745683218391e-08, "epoch": 1.8472350230414747, "percentage": 92.36, "elapsed_time": "11:17:37", "remaining_time": "0:56:02"} +{"current_steps": 8018, "total_steps": 8680, "loss": 0.7689815163612366, "lr": 3.173217305953524e-08, "epoch": 1.8474654377880184, "percentage": 92.37, "elapsed_time": "11:17:43", "remaining_time": "0:55:57"} +{"current_steps": 8019, "total_steps": 8680, "loss": 0.7961923480033875, "lr": 3.163702982967964e-08, "epoch": 1.8476958525345624, "percentage": 92.38, "elapsed_time": "11:17:50", "remaining_time": "0:55:52"} +{"current_steps": 8020, "total_steps": 8680, "loss": 0.7290681600570679, "lr": 3.154202715642729e-08, "epoch": 1.8479262672811059, "percentage": 92.4, "elapsed_time": "11:17:55", "remaining_time": "0:55:47"} +{"current_steps": 8021, "total_steps": 8680, "loss": 0.7486605048179626, "lr": 3.1447165053567594e-08, "epoch": 1.8481566820276498, "percentage": 92.41, "elapsed_time": "11:18:00", "remaining_time": "0:55:42"} +{"current_steps": 8022, "total_steps": 8680, "loss": 0.8263967633247375, "lr": 3.135244353486977e-08, "epoch": 1.8483870967741935, "percentage": 92.42, "elapsed_time": "11:18:04", "remaining_time": "0:55:37"} +{"current_steps": 8023, "total_steps": 8680, "loss": 0.7462657690048218, "lr": 3.1257862614082254e-08, "epoch": 1.8486175115207373, "percentage": 92.43, "elapsed_time": "11:18:08", "remaining_time": "0:55:31"} +{"current_steps": 8024, "total_steps": 8680, "loss": 0.9305819272994995, "lr": 3.116342230493374e-08, "epoch": 1.8488479262672812, "percentage": 92.44, "elapsed_time": "11:18:12", "remaining_time": "0:55:26"} +{"current_steps": 8025, "total_steps": 8680, "loss": 0.7202557325363159, "lr": 3.1069122621131925e-08, "epoch": 1.8490783410138247, "percentage": 92.45, "elapsed_time": "11:18:17", "remaining_time": "0:55:21"} +{"current_steps": 8026, "total_steps": 8680, "loss": 0.723913311958313, "lr": 3.097496357636409e-08, "epoch": 1.8493087557603687, "percentage": 92.47, "elapsed_time": "11:18:23", "remaining_time": "0:55:16"} +{"current_steps": 8027, "total_steps": 8680, "loss": 0.7067763805389404, "lr": 3.088094518429751e-08, "epoch": 1.8495391705069124, "percentage": 92.48, "elapsed_time": "11:18:27", "remaining_time": "0:55:11"} +{"current_steps": 8028, "total_steps": 8680, "loss": 0.7853527665138245, "lr": 3.078706745857884e-08, "epoch": 1.8497695852534561, "percentage": 92.49, "elapsed_time": "11:18:32", "remaining_time": "0:55:06"} +{"current_steps": 8029, "total_steps": 8680, "loss": 0.7183133363723755, "lr": 3.0693330412834285e-08, "epoch": 1.85, "percentage": 92.5, "elapsed_time": "11:18:38", "remaining_time": "0:55:01"} +{"current_steps": 8030, "total_steps": 8680, "loss": 0.8041096925735474, "lr": 3.0599734060669626e-08, "epoch": 1.8502304147465438, "percentage": 92.51, "elapsed_time": "11:18:44", "remaining_time": "0:54:56"} +{"current_steps": 8031, "total_steps": 8680, "loss": 0.7259166240692139, "lr": 3.050627841567022e-08, "epoch": 1.8504608294930875, "percentage": 92.52, "elapsed_time": "11:18:49", "remaining_time": "0:54:51"} +{"current_steps": 8032, "total_steps": 8680, "loss": 0.8844292163848877, "lr": 3.041296349140099e-08, "epoch": 1.8506912442396315, "percentage": 92.53, "elapsed_time": "11:18:53", "remaining_time": "0:54:46"} +{"current_steps": 8033, "total_steps": 8680, "loss": 0.7566810846328735, "lr": 3.031978930140666e-08, "epoch": 1.850921658986175, "percentage": 92.55, "elapsed_time": "11:18:57", "remaining_time": "0:54:41"} +{"current_steps": 8034, "total_steps": 8680, "loss": 0.8365379571914673, "lr": 3.0226755859211085e-08, "epoch": 1.851152073732719, "percentage": 92.56, "elapsed_time": "11:19:01", "remaining_time": "0:54:35"} +{"current_steps": 8035, "total_steps": 8680, "loss": 0.6786175966262817, "lr": 3.013386317831823e-08, "epoch": 1.8513824884792627, "percentage": 92.57, "elapsed_time": "11:19:08", "remaining_time": "0:54:31"} +{"current_steps": 8036, "total_steps": 8680, "loss": 0.5450198650360107, "lr": 3.0041111272211206e-08, "epoch": 1.8516129032258064, "percentage": 92.58, "elapsed_time": "11:19:14", "remaining_time": "0:54:26"} +{"current_steps": 8037, "total_steps": 8680, "loss": 0.8792393207550049, "lr": 2.994850015435269e-08, "epoch": 1.8518433179723504, "percentage": 92.59, "elapsed_time": "11:19:19", "remaining_time": "0:54:20"} +{"current_steps": 8038, "total_steps": 8680, "loss": 0.8463287353515625, "lr": 2.985602983818525e-08, "epoch": 1.8520737327188939, "percentage": 92.6, "elapsed_time": "11:19:24", "remaining_time": "0:54:15"} +{"current_steps": 8039, "total_steps": 8680, "loss": 0.77659010887146, "lr": 2.9763700337130827e-08, "epoch": 1.8523041474654378, "percentage": 92.62, "elapsed_time": "11:19:29", "remaining_time": "0:54:10"} +{"current_steps": 8040, "total_steps": 8680, "loss": 0.6180428266525269, "lr": 2.9671511664590698e-08, "epoch": 1.8525345622119815, "percentage": 92.63, "elapsed_time": "11:19:36", "remaining_time": "0:54:05"} +{"current_steps": 8041, "total_steps": 8680, "loss": 0.7886658906936646, "lr": 2.9579463833946273e-08, "epoch": 1.8527649769585253, "percentage": 92.64, "elapsed_time": "11:19:41", "remaining_time": "0:54:00"} +{"current_steps": 8042, "total_steps": 8680, "loss": 0.8371871709823608, "lr": 2.9487556858557972e-08, "epoch": 1.8529953917050692, "percentage": 92.65, "elapsed_time": "11:19:46", "remaining_time": "0:53:55"} +{"current_steps": 8043, "total_steps": 8680, "loss": 0.7082366347312927, "lr": 2.9395790751765904e-08, "epoch": 1.853225806451613, "percentage": 92.66, "elapsed_time": "11:19:51", "remaining_time": "0:53:50"} +{"current_steps": 8044, "total_steps": 8680, "loss": 0.7866584062576294, "lr": 2.930416552689008e-08, "epoch": 1.8534562211981567, "percentage": 92.67, "elapsed_time": "11:19:57", "remaining_time": "0:53:45"} +{"current_steps": 8045, "total_steps": 8680, "loss": 0.8789514303207397, "lr": 2.9212681197229527e-08, "epoch": 1.8536866359447006, "percentage": 92.68, "elapsed_time": "11:20:02", "remaining_time": "0:53:40"} +{"current_steps": 8046, "total_steps": 8680, "loss": 0.7041239738464355, "lr": 2.9121337776063072e-08, "epoch": 1.8539170506912441, "percentage": 92.7, "elapsed_time": "11:20:07", "remaining_time": "0:53:35"} +{"current_steps": 8047, "total_steps": 8680, "loss": 0.8290516138076782, "lr": 2.9030135276649215e-08, "epoch": 1.854147465437788, "percentage": 92.71, "elapsed_time": "11:20:12", "remaining_time": "0:53:30"} +{"current_steps": 8048, "total_steps": 8680, "loss": 0.8532444834709167, "lr": 2.8939073712225813e-08, "epoch": 1.8543778801843318, "percentage": 92.72, "elapsed_time": "11:20:17", "remaining_time": "0:53:25"} +{"current_steps": 8049, "total_steps": 8680, "loss": 0.8635869026184082, "lr": 2.8848153096010407e-08, "epoch": 1.8546082949308755, "percentage": 92.73, "elapsed_time": "11:20:21", "remaining_time": "0:53:20"} +{"current_steps": 8050, "total_steps": 8680, "loss": 0.723747730255127, "lr": 2.8757373441199885e-08, "epoch": 1.8548387096774195, "percentage": 92.74, "elapsed_time": "11:20:27", "remaining_time": "0:53:15"} +{"current_steps": 8051, "total_steps": 8680, "loss": 0.893456220626831, "lr": 2.8666734760970925e-08, "epoch": 1.855069124423963, "percentage": 92.75, "elapsed_time": "11:20:33", "remaining_time": "0:53:10"} +{"current_steps": 8052, "total_steps": 8680, "loss": 0.6871381998062134, "lr": 2.8576237068479335e-08, "epoch": 1.855299539170507, "percentage": 92.76, "elapsed_time": "11:20:39", "remaining_time": "0:53:05"} +{"current_steps": 8053, "total_steps": 8680, "loss": 0.7820594906806946, "lr": 2.848588037686106e-08, "epoch": 1.8555299539170507, "percentage": 92.78, "elapsed_time": "11:20:46", "remaining_time": "0:53:00"} +{"current_steps": 8054, "total_steps": 8680, "loss": 0.7783479690551758, "lr": 2.839566469923105e-08, "epoch": 1.8557603686635944, "percentage": 92.79, "elapsed_time": "11:20:50", "remaining_time": "0:52:55"} +{"current_steps": 8055, "total_steps": 8680, "loss": 0.7612866163253784, "lr": 2.8305590048684268e-08, "epoch": 1.8559907834101383, "percentage": 92.8, "elapsed_time": "11:20:54", "remaining_time": "0:52:49"} +{"current_steps": 8056, "total_steps": 8680, "loss": 0.7483590841293335, "lr": 2.82156564382946e-08, "epoch": 1.856221198156682, "percentage": 92.81, "elapsed_time": "11:21:00", "remaining_time": "0:52:44"} +{"current_steps": 8057, "total_steps": 8680, "loss": 0.7553579807281494, "lr": 2.812586388111582e-08, "epoch": 1.8564516129032258, "percentage": 92.82, "elapsed_time": "11:21:05", "remaining_time": "0:52:39"} +{"current_steps": 8058, "total_steps": 8680, "loss": 0.8895602226257324, "lr": 2.80362123901815e-08, "epoch": 1.8566820276497698, "percentage": 92.83, "elapsed_time": "11:21:09", "remaining_time": "0:52:34"} +{"current_steps": 8059, "total_steps": 8680, "loss": 0.7974053621292114, "lr": 2.794670197850424e-08, "epoch": 1.8569124423963133, "percentage": 92.85, "elapsed_time": "11:21:15", "remaining_time": "0:52:29"} +{"current_steps": 8060, "total_steps": 8680, "loss": 0.7730135917663574, "lr": 2.7857332659076193e-08, "epoch": 1.8571428571428572, "percentage": 92.86, "elapsed_time": "11:21:21", "remaining_time": "0:52:24"} +{"current_steps": 8061, "total_steps": 8680, "loss": 0.7258738279342651, "lr": 2.7768104444869434e-08, "epoch": 1.857373271889401, "percentage": 92.87, "elapsed_time": "11:21:27", "remaining_time": "0:52:19"} +{"current_steps": 8062, "total_steps": 8680, "loss": 0.7068890333175659, "lr": 2.7679017348835264e-08, "epoch": 1.8576036866359447, "percentage": 92.88, "elapsed_time": "11:21:31", "remaining_time": "0:52:14"} +{"current_steps": 8063, "total_steps": 8680, "loss": 0.8741557002067566, "lr": 2.7590071383904568e-08, "epoch": 1.8578341013824886, "percentage": 92.89, "elapsed_time": "11:21:36", "remaining_time": "0:52:09"} +{"current_steps": 8064, "total_steps": 8680, "loss": 0.8723797798156738, "lr": 2.750126656298768e-08, "epoch": 1.8580645161290321, "percentage": 92.9, "elapsed_time": "11:21:41", "remaining_time": "0:52:04"} +{"current_steps": 8065, "total_steps": 8680, "loss": 0.8510957956314087, "lr": 2.7412602898974514e-08, "epoch": 1.858294930875576, "percentage": 92.91, "elapsed_time": "11:21:46", "remaining_time": "0:51:59"} +{"current_steps": 8066, "total_steps": 8680, "loss": 0.6875216960906982, "lr": 2.732408040473444e-08, "epoch": 1.8585253456221198, "percentage": 92.93, "elapsed_time": "11:21:51", "remaining_time": "0:51:54"} +{"current_steps": 8067, "total_steps": 8680, "loss": 0.8057721257209778, "lr": 2.7235699093116515e-08, "epoch": 1.8587557603686635, "percentage": 92.94, "elapsed_time": "11:21:56", "remaining_time": "0:51:49"} +{"current_steps": 8068, "total_steps": 8680, "loss": 0.7547335624694824, "lr": 2.7147458976949145e-08, "epoch": 1.8589861751152075, "percentage": 92.95, "elapsed_time": "11:22:00", "remaining_time": "0:51:44"} +{"current_steps": 8069, "total_steps": 8680, "loss": 0.8301708102226257, "lr": 2.7059360069040193e-08, "epoch": 1.8592165898617512, "percentage": 92.96, "elapsed_time": "11:22:05", "remaining_time": "0:51:38"} +{"current_steps": 8070, "total_steps": 8680, "loss": 0.8313431143760681, "lr": 2.69714023821771e-08, "epoch": 1.859447004608295, "percentage": 92.97, "elapsed_time": "11:22:09", "remaining_time": "0:51:33"} +{"current_steps": 8071, "total_steps": 8680, "loss": 0.6631792783737183, "lr": 2.6883585929126872e-08, "epoch": 1.8596774193548387, "percentage": 92.98, "elapsed_time": "11:22:16", "remaining_time": "0:51:28"} +{"current_steps": 8072, "total_steps": 8680, "loss": 0.7643609046936035, "lr": 2.679591072263576e-08, "epoch": 1.8599078341013824, "percentage": 93.0, "elapsed_time": "11:22:20", "remaining_time": "0:51:23"} +{"current_steps": 8073, "total_steps": 8680, "loss": 0.8543407917022705, "lr": 2.670837677543003e-08, "epoch": 1.8601382488479263, "percentage": 93.01, "elapsed_time": "11:22:26", "remaining_time": "0:51:18"} +{"current_steps": 8074, "total_steps": 8680, "loss": 0.8051489591598511, "lr": 2.662098410021485e-08, "epoch": 1.86036866359447, "percentage": 93.02, "elapsed_time": "11:22:31", "remaining_time": "0:51:13"} +{"current_steps": 8075, "total_steps": 8680, "loss": 0.7065767645835876, "lr": 2.653373270967518e-08, "epoch": 1.8605990783410138, "percentage": 93.03, "elapsed_time": "11:22:36", "remaining_time": "0:51:08"} +{"current_steps": 8076, "total_steps": 8680, "loss": 0.672603189945221, "lr": 2.6446622616475566e-08, "epoch": 1.8608294930875577, "percentage": 93.04, "elapsed_time": "11:22:42", "remaining_time": "0:51:03"} +{"current_steps": 8077, "total_steps": 8680, "loss": 0.7201080918312073, "lr": 2.6359653833259776e-08, "epoch": 1.8610599078341012, "percentage": 93.05, "elapsed_time": "11:22:47", "remaining_time": "0:50:58"} +{"current_steps": 8078, "total_steps": 8680, "loss": 0.7147494554519653, "lr": 2.627282637265149e-08, "epoch": 1.8612903225806452, "percentage": 93.06, "elapsed_time": "11:22:51", "remaining_time": "0:50:53"} +{"current_steps": 8079, "total_steps": 8680, "loss": 0.7051082253456116, "lr": 2.6186140247253297e-08, "epoch": 1.861520737327189, "percentage": 93.08, "elapsed_time": "11:22:56", "remaining_time": "0:50:48"} +{"current_steps": 8080, "total_steps": 8680, "loss": 0.5786069631576538, "lr": 2.6099595469647683e-08, "epoch": 1.8617511520737327, "percentage": 93.09, "elapsed_time": "11:23:02", "remaining_time": "0:50:43"} +{"current_steps": 8081, "total_steps": 8680, "loss": 0.8880232572555542, "lr": 2.6013192052396493e-08, "epoch": 1.8619815668202766, "percentage": 93.1, "elapsed_time": "11:23:06", "remaining_time": "0:50:38"} +{"current_steps": 8082, "total_steps": 8680, "loss": 0.9295729398727417, "lr": 2.5926930008041137e-08, "epoch": 1.8622119815668203, "percentage": 93.11, "elapsed_time": "11:23:12", "remaining_time": "0:50:33"} +{"current_steps": 8083, "total_steps": 8680, "loss": 0.6963248252868652, "lr": 2.5840809349102378e-08, "epoch": 1.862442396313364, "percentage": 93.12, "elapsed_time": "11:23:17", "remaining_time": "0:50:28"} +{"current_steps": 8084, "total_steps": 8680, "loss": 0.8788298964500427, "lr": 2.5754830088080548e-08, "epoch": 1.8626728110599078, "percentage": 93.13, "elapsed_time": "11:23:22", "remaining_time": "0:50:22"} +{"current_steps": 8085, "total_steps": 8680, "loss": 0.7454242706298828, "lr": 2.5668992237455334e-08, "epoch": 1.8629032258064515, "percentage": 93.15, "elapsed_time": "11:23:28", "remaining_time": "0:50:17"} +{"current_steps": 8086, "total_steps": 8680, "loss": 0.7659780383110046, "lr": 2.558329580968599e-08, "epoch": 1.8631336405529955, "percentage": 93.16, "elapsed_time": "11:23:31", "remaining_time": "0:50:12"} +{"current_steps": 8087, "total_steps": 8680, "loss": 0.8799881935119629, "lr": 2.5497740817211456e-08, "epoch": 1.8633640552995392, "percentage": 93.17, "elapsed_time": "11:23:35", "remaining_time": "0:50:07"} +{"current_steps": 8088, "total_steps": 8680, "loss": 0.7319198846817017, "lr": 2.5412327272449684e-08, "epoch": 1.863594470046083, "percentage": 93.18, "elapsed_time": "11:23:40", "remaining_time": "0:50:02"} +{"current_steps": 8089, "total_steps": 8680, "loss": 0.6450645923614502, "lr": 2.532705518779854e-08, "epoch": 1.8638248847926269, "percentage": 93.19, "elapsed_time": "11:23:46", "remaining_time": "0:49:57"} +{"current_steps": 8090, "total_steps": 8680, "loss": 0.7213672399520874, "lr": 2.52419245756349e-08, "epoch": 1.8640552995391704, "percentage": 93.2, "elapsed_time": "11:23:52", "remaining_time": "0:49:52"} +{"current_steps": 8091, "total_steps": 8680, "loss": 0.790163516998291, "lr": 2.515693544831554e-08, "epoch": 1.8642857142857143, "percentage": 93.21, "elapsed_time": "11:23:57", "remaining_time": "0:49:47"} +{"current_steps": 8092, "total_steps": 8680, "loss": 0.8324074745178223, "lr": 2.507208781817638e-08, "epoch": 1.864516129032258, "percentage": 93.23, "elapsed_time": "11:24:03", "remaining_time": "0:49:42"} +{"current_steps": 8093, "total_steps": 8680, "loss": 0.879224419593811, "lr": 2.4987381697533227e-08, "epoch": 1.8647465437788018, "percentage": 93.24, "elapsed_time": "11:24:07", "remaining_time": "0:49:37"} +{"current_steps": 8094, "total_steps": 8680, "loss": 0.8668204545974731, "lr": 2.4902817098680807e-08, "epoch": 1.8649769585253457, "percentage": 93.25, "elapsed_time": "11:24:11", "remaining_time": "0:49:32"} +{"current_steps": 8095, "total_steps": 8680, "loss": 0.6737711429595947, "lr": 2.481839403389341e-08, "epoch": 1.8652073732718892, "percentage": 93.26, "elapsed_time": "11:24:17", "remaining_time": "0:49:27"} +{"current_steps": 8096, "total_steps": 8680, "loss": 0.8948237299919128, "lr": 2.4734112515425343e-08, "epoch": 1.8654377880184332, "percentage": 93.27, "elapsed_time": "11:24:21", "remaining_time": "0:49:21"} +{"current_steps": 8097, "total_steps": 8680, "loss": 0.6866592168807983, "lr": 2.4649972555509823e-08, "epoch": 1.865668202764977, "percentage": 93.28, "elapsed_time": "11:24:27", "remaining_time": "0:49:16"} +{"current_steps": 8098, "total_steps": 8680, "loss": 0.8852076530456543, "lr": 2.4565974166359416e-08, "epoch": 1.8658986175115206, "percentage": 93.29, "elapsed_time": "11:24:32", "remaining_time": "0:49:11"} +{"current_steps": 8099, "total_steps": 8680, "loss": 0.7402448654174805, "lr": 2.44821173601667e-08, "epoch": 1.8661290322580646, "percentage": 93.31, "elapsed_time": "11:24:38", "remaining_time": "0:49:06"} +{"current_steps": 8100, "total_steps": 8680, "loss": 0.8536320924758911, "lr": 2.439840214910316e-08, "epoch": 1.8663594470046083, "percentage": 93.32, "elapsed_time": "11:24:43", "remaining_time": "0:49:01"} +{"current_steps": 8101, "total_steps": 8680, "loss": 0.6408628225326538, "lr": 2.4314828545319965e-08, "epoch": 1.866589861751152, "percentage": 93.33, "elapsed_time": "11:24:58", "remaining_time": "0:48:57"} +{"current_steps": 8102, "total_steps": 8680, "loss": 0.9578930735588074, "lr": 2.4231396560947858e-08, "epoch": 1.866820276497696, "percentage": 93.34, "elapsed_time": "11:25:03", "remaining_time": "0:48:52"} +{"current_steps": 8103, "total_steps": 8680, "loss": 0.7606109976768494, "lr": 2.4148106208096708e-08, "epoch": 1.8670506912442395, "percentage": 93.35, "elapsed_time": "11:25:09", "remaining_time": "0:48:47"} +{"current_steps": 8104, "total_steps": 8680, "loss": 0.7446529865264893, "lr": 2.4064957498856177e-08, "epoch": 1.8672811059907835, "percentage": 93.36, "elapsed_time": "11:25:13", "remaining_time": "0:48:42"} +{"current_steps": 8105, "total_steps": 8680, "loss": 0.6086497902870178, "lr": 2.398195044529505e-08, "epoch": 1.8675115207373272, "percentage": 93.38, "elapsed_time": "11:25:19", "remaining_time": "0:48:37"} +{"current_steps": 8106, "total_steps": 8680, "loss": 0.9348995685577393, "lr": 2.389908505946181e-08, "epoch": 1.867741935483871, "percentage": 93.39, "elapsed_time": "11:25:22", "remaining_time": "0:48:31"} +{"current_steps": 8107, "total_steps": 8680, "loss": 0.6817007660865784, "lr": 2.381636135338405e-08, "epoch": 1.8679723502304149, "percentage": 93.4, "elapsed_time": "11:25:28", "remaining_time": "0:48:26"} +{"current_steps": 8108, "total_steps": 8680, "loss": 0.7228778600692749, "lr": 2.373377933906917e-08, "epoch": 1.8682027649769584, "percentage": 93.41, "elapsed_time": "11:25:34", "remaining_time": "0:48:21"} +{"current_steps": 8109, "total_steps": 8680, "loss": 0.6974154114723206, "lr": 2.3651339028503913e-08, "epoch": 1.8684331797235023, "percentage": 93.42, "elapsed_time": "11:25:41", "remaining_time": "0:48:17"} +{"current_steps": 8110, "total_steps": 8680, "loss": 0.8025680780410767, "lr": 2.3569040433654264e-08, "epoch": 1.868663594470046, "percentage": 93.43, "elapsed_time": "11:25:46", "remaining_time": "0:48:11"} +{"current_steps": 8111, "total_steps": 8680, "loss": 0.7570391893386841, "lr": 2.3486883566465777e-08, "epoch": 1.8688940092165898, "percentage": 93.44, "elapsed_time": "11:25:54", "remaining_time": "0:48:07"} +{"current_steps": 8112, "total_steps": 8680, "loss": 0.7982438802719116, "lr": 2.3404868438863246e-08, "epoch": 1.8691244239631337, "percentage": 93.46, "elapsed_time": "11:25:59", "remaining_time": "0:48:01"} +{"current_steps": 8113, "total_steps": 8680, "loss": 0.6615588665008545, "lr": 2.3322995062751372e-08, "epoch": 1.8693548387096774, "percentage": 93.47, "elapsed_time": "11:26:03", "remaining_time": "0:47:56"} +{"current_steps": 8114, "total_steps": 8680, "loss": 0.7748852968215942, "lr": 2.324126345001376e-08, "epoch": 1.8695852534562212, "percentage": 93.48, "elapsed_time": "11:26:09", "remaining_time": "0:47:51"} +{"current_steps": 8115, "total_steps": 8680, "loss": 0.7238468527793884, "lr": 2.3159673612513587e-08, "epoch": 1.8698156682027651, "percentage": 93.49, "elapsed_time": "11:26:15", "remaining_time": "0:47:46"} +{"current_steps": 8116, "total_steps": 8680, "loss": 0.8146705627441406, "lr": 2.3078225562093822e-08, "epoch": 1.8700460829493086, "percentage": 93.5, "elapsed_time": "11:26:20", "remaining_time": "0:47:41"} +{"current_steps": 8117, "total_steps": 8680, "loss": 0.8393594026565552, "lr": 2.2996919310576235e-08, "epoch": 1.8702764976958526, "percentage": 93.51, "elapsed_time": "11:26:25", "remaining_time": "0:47:36"} +{"current_steps": 8118, "total_steps": 8680, "loss": 0.9619652032852173, "lr": 2.2915754869762384e-08, "epoch": 1.8705069124423963, "percentage": 93.53, "elapsed_time": "11:26:30", "remaining_time": "0:47:31"} +{"current_steps": 8119, "total_steps": 8680, "loss": 0.8301321268081665, "lr": 2.2834732251433286e-08, "epoch": 1.87073732718894, "percentage": 93.54, "elapsed_time": "11:26:35", "remaining_time": "0:47:26"} +{"current_steps": 8120, "total_steps": 8680, "loss": 0.8236079812049866, "lr": 2.2753851467349206e-08, "epoch": 1.870967741935484, "percentage": 93.55, "elapsed_time": "11:26:40", "remaining_time": "0:47:21"} +{"current_steps": 8121, "total_steps": 8680, "loss": 0.9007565379142761, "lr": 2.267311252924975e-08, "epoch": 1.8711981566820275, "percentage": 93.56, "elapsed_time": "11:26:44", "remaining_time": "0:47:16"} +{"current_steps": 8122, "total_steps": 8680, "loss": 0.7430707216262817, "lr": 2.2592515448854432e-08, "epoch": 1.8714285714285714, "percentage": 93.57, "elapsed_time": "11:26:50", "remaining_time": "0:47:11"} +{"current_steps": 8123, "total_steps": 8680, "loss": 0.7562465667724609, "lr": 2.2512060237861452e-08, "epoch": 1.8716589861751152, "percentage": 93.58, "elapsed_time": "11:26:55", "remaining_time": "0:47:06"} +{"current_steps": 8124, "total_steps": 8680, "loss": 0.7736096978187561, "lr": 2.24317469079488e-08, "epoch": 1.871889400921659, "percentage": 93.59, "elapsed_time": "11:27:00", "remaining_time": "0:47:01"} +{"current_steps": 8125, "total_steps": 8680, "loss": 0.7652724981307983, "lr": 2.2351575470774153e-08, "epoch": 1.8721198156682028, "percentage": 93.61, "elapsed_time": "11:27:05", "remaining_time": "0:46:56"} +{"current_steps": 8126, "total_steps": 8680, "loss": 0.8034792542457581, "lr": 2.2271545937973978e-08, "epoch": 1.8723502304147466, "percentage": 93.62, "elapsed_time": "11:27:10", "remaining_time": "0:46:50"} +{"current_steps": 8127, "total_steps": 8680, "loss": 0.6158101558685303, "lr": 2.219165832116454e-08, "epoch": 1.8725806451612903, "percentage": 93.63, "elapsed_time": "11:27:15", "remaining_time": "0:46:45"} +{"current_steps": 8128, "total_steps": 8680, "loss": 0.6514682769775391, "lr": 2.2111912631941564e-08, "epoch": 1.8728110599078343, "percentage": 93.64, "elapsed_time": "11:27:21", "remaining_time": "0:46:40"} +{"current_steps": 8129, "total_steps": 8680, "loss": 0.833041787147522, "lr": 2.203230888187979e-08, "epoch": 1.8730414746543778, "percentage": 93.65, "elapsed_time": "11:27:25", "remaining_time": "0:46:35"} +{"current_steps": 8130, "total_steps": 8680, "loss": 0.8033208250999451, "lr": 2.1952847082533864e-08, "epoch": 1.8732718894009217, "percentage": 93.66, "elapsed_time": "11:27:31", "remaining_time": "0:46:30"} +{"current_steps": 8131, "total_steps": 8680, "loss": 0.742051362991333, "lr": 2.187352724543734e-08, "epoch": 1.8735023041474654, "percentage": 93.68, "elapsed_time": "11:27:35", "remaining_time": "0:46:25"} +{"current_steps": 8132, "total_steps": 8680, "loss": 0.7411169409751892, "lr": 2.1794349382103337e-08, "epoch": 1.8737327188940092, "percentage": 93.69, "elapsed_time": "11:27:40", "remaining_time": "0:46:20"} +{"current_steps": 8133, "total_steps": 8680, "loss": 0.7517165541648865, "lr": 2.171531350402467e-08, "epoch": 1.8739631336405531, "percentage": 93.7, "elapsed_time": "11:27:45", "remaining_time": "0:46:15"} +{"current_steps": 8134, "total_steps": 8680, "loss": 0.8010021448135376, "lr": 2.1636419622673263e-08, "epoch": 1.8741935483870966, "percentage": 93.71, "elapsed_time": "11:27:50", "remaining_time": "0:46:10"} +{"current_steps": 8135, "total_steps": 8680, "loss": 0.7265241742134094, "lr": 2.1557667749500187e-08, "epoch": 1.8744239631336406, "percentage": 93.72, "elapsed_time": "11:27:55", "remaining_time": "0:46:05"} +{"current_steps": 8136, "total_steps": 8680, "loss": 0.6809227466583252, "lr": 2.1479057895936403e-08, "epoch": 1.8746543778801843, "percentage": 93.73, "elapsed_time": "11:28:01", "remaining_time": "0:46:00"} +{"current_steps": 8137, "total_steps": 8680, "loss": 0.8235769271850586, "lr": 2.140059007339201e-08, "epoch": 1.874884792626728, "percentage": 93.74, "elapsed_time": "11:28:05", "remaining_time": "0:45:55"} +{"current_steps": 8138, "total_steps": 8680, "loss": 0.7556289434432983, "lr": 2.132226429325634e-08, "epoch": 1.875115207373272, "percentage": 93.76, "elapsed_time": "11:28:11", "remaining_time": "0:45:50"} +{"current_steps": 8139, "total_steps": 8680, "loss": 0.7765048742294312, "lr": 2.1244080566898638e-08, "epoch": 1.8753456221198157, "percentage": 93.77, "elapsed_time": "11:28:17", "remaining_time": "0:45:45"} +{"current_steps": 8140, "total_steps": 8680, "loss": 0.7637666463851929, "lr": 2.1166038905666816e-08, "epoch": 1.8755760368663594, "percentage": 93.78, "elapsed_time": "11:28:22", "remaining_time": "0:45:39"} +{"current_steps": 8141, "total_steps": 8680, "loss": 0.8413453698158264, "lr": 2.10881393208886e-08, "epoch": 1.8758064516129034, "percentage": 93.79, "elapsed_time": "11:28:28", "remaining_time": "0:45:34"} +{"current_steps": 8142, "total_steps": 8680, "loss": 0.7937475442886353, "lr": 2.101038182387105e-08, "epoch": 1.8760368663594469, "percentage": 93.8, "elapsed_time": "11:28:33", "remaining_time": "0:45:29"} +{"current_steps": 8143, "total_steps": 8680, "loss": 0.7654982805252075, "lr": 2.0932766425900585e-08, "epoch": 1.8762672811059908, "percentage": 93.81, "elapsed_time": "11:28:37", "remaining_time": "0:45:24"} +{"current_steps": 8144, "total_steps": 8680, "loss": 0.8950663805007935, "lr": 2.0855293138242968e-08, "epoch": 1.8764976958525346, "percentage": 93.82, "elapsed_time": "11:28:41", "remaining_time": "0:45:19"} +{"current_steps": 8145, "total_steps": 8680, "loss": 0.6405420303344727, "lr": 2.077796197214332e-08, "epoch": 1.8767281105990783, "percentage": 93.84, "elapsed_time": "11:28:46", "remaining_time": "0:45:14"} +{"current_steps": 8146, "total_steps": 8680, "loss": 0.7724314332008362, "lr": 2.0700772938826217e-08, "epoch": 1.8769585253456222, "percentage": 93.85, "elapsed_time": "11:28:50", "remaining_time": "0:45:09"} +{"current_steps": 8147, "total_steps": 8680, "loss": 0.7929061651229858, "lr": 2.0623726049495472e-08, "epoch": 1.8771889400921657, "percentage": 93.86, "elapsed_time": "11:28:56", "remaining_time": "0:45:04"} +{"current_steps": 8148, "total_steps": 8680, "loss": 0.7207096815109253, "lr": 2.0546821315334363e-08, "epoch": 1.8774193548387097, "percentage": 93.87, "elapsed_time": "11:29:01", "remaining_time": "0:44:59"} +{"current_steps": 8149, "total_steps": 8680, "loss": 0.9234127402305603, "lr": 2.0470058747505513e-08, "epoch": 1.8776497695852534, "percentage": 93.88, "elapsed_time": "11:29:06", "remaining_time": "0:44:54"} +{"current_steps": 8150, "total_steps": 8680, "loss": 0.9006322026252747, "lr": 2.0393438357150906e-08, "epoch": 1.8778801843317972, "percentage": 93.89, "elapsed_time": "11:29:10", "remaining_time": "0:44:49"} +{"current_steps": 8151, "total_steps": 8680, "loss": 0.6289799809455872, "lr": 2.0316960155391972e-08, "epoch": 1.878110599078341, "percentage": 93.91, "elapsed_time": "11:29:16", "remaining_time": "0:44:44"} +{"current_steps": 8152, "total_steps": 8680, "loss": 0.8551793098449707, "lr": 2.0240624153329168e-08, "epoch": 1.8783410138248848, "percentage": 93.92, "elapsed_time": "11:29:21", "remaining_time": "0:44:38"} +{"current_steps": 8153, "total_steps": 8680, "loss": 0.8065170645713806, "lr": 2.016443036204285e-08, "epoch": 1.8785714285714286, "percentage": 93.93, "elapsed_time": "11:29:26", "remaining_time": "0:44:33"} +{"current_steps": 8154, "total_steps": 8680, "loss": 0.6361274719238281, "lr": 2.0088378792592286e-08, "epoch": 1.8788018433179725, "percentage": 93.94, "elapsed_time": "11:29:33", "remaining_time": "0:44:28"} +{"current_steps": 8155, "total_steps": 8680, "loss": 0.8539700508117676, "lr": 2.0012469456016312e-08, "epoch": 1.879032258064516, "percentage": 93.95, "elapsed_time": "11:29:36", "remaining_time": "0:44:23"} +{"current_steps": 8156, "total_steps": 8680, "loss": 0.7424989938735962, "lr": 1.9936702363333115e-08, "epoch": 1.87926267281106, "percentage": 93.96, "elapsed_time": "11:29:42", "remaining_time": "0:44:18"} +{"current_steps": 8157, "total_steps": 8680, "loss": 0.5831520557403564, "lr": 1.9861077525540116e-08, "epoch": 1.8794930875576037, "percentage": 93.97, "elapsed_time": "11:29:47", "remaining_time": "0:44:13"} +{"current_steps": 8158, "total_steps": 8680, "loss": 0.8080646991729736, "lr": 1.9785594953614093e-08, "epoch": 1.8797235023041474, "percentage": 93.99, "elapsed_time": "11:29:52", "remaining_time": "0:44:08"} +{"current_steps": 8159, "total_steps": 8680, "loss": 0.8008537292480469, "lr": 1.9710254658511392e-08, "epoch": 1.8799539170506914, "percentage": 94.0, "elapsed_time": "11:29:58", "remaining_time": "0:44:03"} +{"current_steps": 8160, "total_steps": 8680, "loss": 0.7317294478416443, "lr": 1.9635056651167492e-08, "epoch": 1.8801843317972349, "percentage": 94.01, "elapsed_time": "11:30:05", "remaining_time": "0:43:58"} +{"current_steps": 8161, "total_steps": 8680, "loss": 0.803238034248352, "lr": 1.956000094249721e-08, "epoch": 1.8804147465437788, "percentage": 94.02, "elapsed_time": "11:30:10", "remaining_time": "0:43:53"} +{"current_steps": 8162, "total_steps": 8680, "loss": 0.7202219367027283, "lr": 1.948508754339506e-08, "epoch": 1.8806451612903226, "percentage": 94.03, "elapsed_time": "11:30:15", "remaining_time": "0:43:48"} +{"current_steps": 8163, "total_steps": 8680, "loss": 0.7691160440444946, "lr": 1.9410316464734233e-08, "epoch": 1.8808755760368663, "percentage": 94.04, "elapsed_time": "11:30:21", "remaining_time": "0:43:43"} +{"current_steps": 8164, "total_steps": 8680, "loss": 0.7092962265014648, "lr": 1.933568771736782e-08, "epoch": 1.8811059907834102, "percentage": 94.06, "elapsed_time": "11:30:28", "remaining_time": "0:43:38"} +{"current_steps": 8165, "total_steps": 8680, "loss": 0.819804310798645, "lr": 1.9261201312128274e-08, "epoch": 1.881336405529954, "percentage": 94.07, "elapsed_time": "11:30:31", "remaining_time": "0:43:33"} +{"current_steps": 8166, "total_steps": 8680, "loss": 0.9127538204193115, "lr": 1.918685725982694e-08, "epoch": 1.8815668202764977, "percentage": 94.08, "elapsed_time": "11:30:36", "remaining_time": "0:43:28"} +{"current_steps": 8167, "total_steps": 8680, "loss": 0.8023328185081482, "lr": 1.9112655571254855e-08, "epoch": 1.8817972350230416, "percentage": 94.09, "elapsed_time": "11:30:41", "remaining_time": "0:43:23"} +{"current_steps": 8168, "total_steps": 8680, "loss": 0.723065972328186, "lr": 1.903859625718218e-08, "epoch": 1.8820276497695851, "percentage": 94.1, "elapsed_time": "11:30:47", "remaining_time": "0:43:18"} +{"current_steps": 8169, "total_steps": 8680, "loss": 0.7838670611381531, "lr": 1.896467932835877e-08, "epoch": 1.882258064516129, "percentage": 94.11, "elapsed_time": "11:30:53", "remaining_time": "0:43:13"} +{"current_steps": 8170, "total_steps": 8680, "loss": 0.6029871702194214, "lr": 1.8890904795513475e-08, "epoch": 1.8824884792626728, "percentage": 94.12, "elapsed_time": "11:30:57", "remaining_time": "0:43:07"} +{"current_steps": 8171, "total_steps": 8680, "loss": 0.7622933387756348, "lr": 1.8817272669354512e-08, "epoch": 1.8827188940092165, "percentage": 94.14, "elapsed_time": "11:31:03", "remaining_time": "0:43:02"} +{"current_steps": 8172, "total_steps": 8680, "loss": 0.7702913284301758, "lr": 1.8743782960569444e-08, "epoch": 1.8829493087557605, "percentage": 94.15, "elapsed_time": "11:31:08", "remaining_time": "0:42:57"} +{"current_steps": 8173, "total_steps": 8680, "loss": 0.6385080814361572, "lr": 1.867043567982518e-08, "epoch": 1.883179723502304, "percentage": 94.16, "elapsed_time": "11:31:14", "remaining_time": "0:42:52"} +{"current_steps": 8174, "total_steps": 8680, "loss": 0.6886409521102905, "lr": 1.8597230837768208e-08, "epoch": 1.883410138248848, "percentage": 94.17, "elapsed_time": "11:31:19", "remaining_time": "0:42:47"} +{"current_steps": 8175, "total_steps": 8680, "loss": 0.7697125673294067, "lr": 1.8524168445023803e-08, "epoch": 1.8836405529953917, "percentage": 94.18, "elapsed_time": "11:31:23", "remaining_time": "0:42:42"} +{"current_steps": 8176, "total_steps": 8680, "loss": 0.7942332029342651, "lr": 1.8451248512197148e-08, "epoch": 1.8838709677419354, "percentage": 94.19, "elapsed_time": "11:31:28", "remaining_time": "0:42:37"} +{"current_steps": 8177, "total_steps": 8680, "loss": 0.7751410007476807, "lr": 1.8378471049872445e-08, "epoch": 1.8841013824884794, "percentage": 94.21, "elapsed_time": "11:31:34", "remaining_time": "0:42:32"} +{"current_steps": 8178, "total_steps": 8680, "loss": 0.8650992512702942, "lr": 1.8305836068613023e-08, "epoch": 1.884331797235023, "percentage": 94.22, "elapsed_time": "11:31:38", "remaining_time": "0:42:27"} +{"current_steps": 8179, "total_steps": 8680, "loss": 0.7084495425224304, "lr": 1.8233343578962e-08, "epoch": 1.8845622119815668, "percentage": 94.23, "elapsed_time": "11:31:43", "remaining_time": "0:42:22"} +{"current_steps": 8180, "total_steps": 8680, "loss": 0.7428494691848755, "lr": 1.8160993591441408e-08, "epoch": 1.8847926267281108, "percentage": 94.24, "elapsed_time": "11:31:49", "remaining_time": "0:42:17"} +{"current_steps": 8181, "total_steps": 8680, "loss": 0.7431809902191162, "lr": 1.8088786116552844e-08, "epoch": 1.8850230414746543, "percentage": 94.25, "elapsed_time": "11:31:54", "remaining_time": "0:42:12"} +{"current_steps": 8182, "total_steps": 8680, "loss": 0.8312518000602722, "lr": 1.801672116477715e-08, "epoch": 1.8852534562211982, "percentage": 94.26, "elapsed_time": "11:31:59", "remaining_time": "0:42:07"} +{"current_steps": 8183, "total_steps": 8680, "loss": 0.8574832081794739, "lr": 1.7944798746574285e-08, "epoch": 1.885483870967742, "percentage": 94.27, "elapsed_time": "11:32:04", "remaining_time": "0:42:02"} +{"current_steps": 8184, "total_steps": 8680, "loss": 0.7716966867446899, "lr": 1.7873018872383793e-08, "epoch": 1.8857142857142857, "percentage": 94.29, "elapsed_time": "11:32:09", "remaining_time": "0:41:56"} +{"current_steps": 8185, "total_steps": 8680, "loss": 0.8536000847816467, "lr": 1.780138155262456e-08, "epoch": 1.8859447004608296, "percentage": 94.3, "elapsed_time": "11:32:12", "remaining_time": "0:41:51"} +{"current_steps": 8186, "total_steps": 8680, "loss": 0.6559889316558838, "lr": 1.7729886797694606e-08, "epoch": 1.8861751152073731, "percentage": 94.31, "elapsed_time": "11:32:18", "remaining_time": "0:41:46"} +{"current_steps": 8187, "total_steps": 8680, "loss": 0.7371512651443481, "lr": 1.7658534617971065e-08, "epoch": 1.886405529953917, "percentage": 94.32, "elapsed_time": "11:32:24", "remaining_time": "0:41:41"} +{"current_steps": 8188, "total_steps": 8680, "loss": 0.8092008829116821, "lr": 1.7587325023810773e-08, "epoch": 1.8866359447004608, "percentage": 94.33, "elapsed_time": "11:32:28", "remaining_time": "0:41:36"} +{"current_steps": 8189, "total_steps": 8680, "loss": 0.7793067693710327, "lr": 1.751625802554979e-08, "epoch": 1.8868663594470045, "percentage": 94.34, "elapsed_time": "11:32:33", "remaining_time": "0:41:31"} +{"current_steps": 8190, "total_steps": 8680, "loss": 0.8102752566337585, "lr": 1.7445333633503312e-08, "epoch": 1.8870967741935485, "percentage": 94.35, "elapsed_time": "11:32:38", "remaining_time": "0:41:26"} +{"current_steps": 8191, "total_steps": 8680, "loss": 0.7141490578651428, "lr": 1.737455185796588e-08, "epoch": 1.8873271889400922, "percentage": 94.37, "elapsed_time": "11:32:46", "remaining_time": "0:41:21"} +{"current_steps": 8192, "total_steps": 8680, "loss": 0.8010870218276978, "lr": 1.7303912709211497e-08, "epoch": 1.887557603686636, "percentage": 94.38, "elapsed_time": "11:32:50", "remaining_time": "0:41:16"} +{"current_steps": 8193, "total_steps": 8680, "loss": 0.7945431470870972, "lr": 1.723341619749319e-08, "epoch": 1.8877880184331797, "percentage": 94.39, "elapsed_time": "11:32:54", "remaining_time": "0:41:11"} +{"current_steps": 8194, "total_steps": 8680, "loss": 0.765398383140564, "lr": 1.7163062333043544e-08, "epoch": 1.8880184331797234, "percentage": 94.4, "elapsed_time": "11:33:01", "remaining_time": "0:41:06"} +{"current_steps": 8195, "total_steps": 8680, "loss": 0.8645910024642944, "lr": 1.709285112607428e-08, "epoch": 1.8882488479262673, "percentage": 94.41, "elapsed_time": "11:33:05", "remaining_time": "0:41:01"} +{"current_steps": 8196, "total_steps": 8680, "loss": 0.7650351524353027, "lr": 1.7022782586776363e-08, "epoch": 1.888479262672811, "percentage": 94.42, "elapsed_time": "11:33:11", "remaining_time": "0:40:56"} +{"current_steps": 8197, "total_steps": 8680, "loss": 0.8059902191162109, "lr": 1.695285672532043e-08, "epoch": 1.8887096774193548, "percentage": 94.44, "elapsed_time": "11:33:16", "remaining_time": "0:40:51"} +{"current_steps": 8198, "total_steps": 8680, "loss": 0.8389305472373962, "lr": 1.688307355185592e-08, "epoch": 1.8889400921658988, "percentage": 94.45, "elapsed_time": "11:33:20", "remaining_time": "0:40:45"} +{"current_steps": 8199, "total_steps": 8680, "loss": 0.755578875541687, "lr": 1.681343307651173e-08, "epoch": 1.8891705069124423, "percentage": 94.46, "elapsed_time": "11:33:25", "remaining_time": "0:40:40"} +{"current_steps": 8200, "total_steps": 8680, "loss": 0.822825014591217, "lr": 1.6743935309396218e-08, "epoch": 1.8894009216589862, "percentage": 94.47, "elapsed_time": "11:33:29", "remaining_time": "0:40:35"} +{"current_steps": 8201, "total_steps": 8680, "loss": 0.8229342699050903, "lr": 1.667458026059676e-08, "epoch": 1.88963133640553, "percentage": 94.48, "elapsed_time": "11:33:37", "remaining_time": "0:40:30"} +{"current_steps": 8202, "total_steps": 8680, "loss": 0.7142254114151001, "lr": 1.6605367940180303e-08, "epoch": 1.8898617511520737, "percentage": 94.49, "elapsed_time": "11:33:42", "remaining_time": "0:40:25"} +{"current_steps": 8203, "total_steps": 8680, "loss": 0.8904600739479065, "lr": 1.6536298358192812e-08, "epoch": 1.8900921658986176, "percentage": 94.5, "elapsed_time": "11:33:46", "remaining_time": "0:40:20"} +{"current_steps": 8204, "total_steps": 8680, "loss": 0.8758517503738403, "lr": 1.6467371524659603e-08, "epoch": 1.8903225806451613, "percentage": 94.52, "elapsed_time": "11:33:51", "remaining_time": "0:40:15"} +{"current_steps": 8205, "total_steps": 8680, "loss": 0.7609111666679382, "lr": 1.6398587449585555e-08, "epoch": 1.890552995391705, "percentage": 94.53, "elapsed_time": "11:33:55", "remaining_time": "0:40:10"} +{"current_steps": 8206, "total_steps": 8680, "loss": 0.8177064657211304, "lr": 1.6329946142954353e-08, "epoch": 1.8907834101382488, "percentage": 94.54, "elapsed_time": "11:34:01", "remaining_time": "0:40:05"} +{"current_steps": 8207, "total_steps": 8680, "loss": 0.6342105865478516, "lr": 1.626144761472925e-08, "epoch": 1.8910138248847925, "percentage": 94.55, "elapsed_time": "11:34:06", "remaining_time": "0:40:00"} +{"current_steps": 8208, "total_steps": 8680, "loss": 0.6025499105453491, "lr": 1.6193091874852627e-08, "epoch": 1.8912442396313365, "percentage": 94.56, "elapsed_time": "11:34:11", "remaining_time": "0:39:55"} +{"current_steps": 8209, "total_steps": 8680, "loss": 0.78373783826828, "lr": 1.6124878933246543e-08, "epoch": 1.8914746543778802, "percentage": 94.57, "elapsed_time": "11:34:16", "remaining_time": "0:39:50"} +{"current_steps": 8210, "total_steps": 8680, "loss": 0.8072086572647095, "lr": 1.605680879981164e-08, "epoch": 1.891705069124424, "percentage": 94.59, "elapsed_time": "11:34:21", "remaining_time": "0:39:45"} +{"current_steps": 8211, "total_steps": 8680, "loss": 0.9057372212409973, "lr": 1.5988881484428453e-08, "epoch": 1.8919354838709679, "percentage": 94.6, "elapsed_time": "11:34:27", "remaining_time": "0:39:39"} +{"current_steps": 8212, "total_steps": 8680, "loss": 0.8235929012298584, "lr": 1.592109699695643e-08, "epoch": 1.8921658986175114, "percentage": 94.61, "elapsed_time": "11:34:31", "remaining_time": "0:39:34"} +{"current_steps": 8213, "total_steps": 8680, "loss": 0.6610825061798096, "lr": 1.5853455347234366e-08, "epoch": 1.8923963133640553, "percentage": 94.62, "elapsed_time": "11:34:35", "remaining_time": "0:39:29"} +{"current_steps": 8214, "total_steps": 8680, "loss": 0.7152366638183594, "lr": 1.5785956545080415e-08, "epoch": 1.892626728110599, "percentage": 94.63, "elapsed_time": "11:34:41", "remaining_time": "0:39:24"} +{"current_steps": 8215, "total_steps": 8680, "loss": 0.7971903085708618, "lr": 1.5718600600292066e-08, "epoch": 1.8928571428571428, "percentage": 94.64, "elapsed_time": "11:34:46", "remaining_time": "0:39:19"} +{"current_steps": 8216, "total_steps": 8680, "loss": 0.7639449238777161, "lr": 1.565138752264572e-08, "epoch": 1.8930875576036867, "percentage": 94.65, "elapsed_time": "11:34:50", "remaining_time": "0:39:14"} +{"current_steps": 8217, "total_steps": 8680, "loss": 0.6396117806434631, "lr": 1.5584317321897356e-08, "epoch": 1.8933179723502302, "percentage": 94.67, "elapsed_time": "11:34:55", "remaining_time": "0:39:09"} +{"current_steps": 8218, "total_steps": 8680, "loss": 0.790566086769104, "lr": 1.5517390007782183e-08, "epoch": 1.8935483870967742, "percentage": 94.68, "elapsed_time": "11:35:00", "remaining_time": "0:39:04"} +{"current_steps": 8219, "total_steps": 8680, "loss": 0.7948310971260071, "lr": 1.5450605590014544e-08, "epoch": 1.893778801843318, "percentage": 94.69, "elapsed_time": "11:35:04", "remaining_time": "0:38:59"} +{"current_steps": 8220, "total_steps": 8680, "loss": 0.9425654411315918, "lr": 1.5383964078288124e-08, "epoch": 1.8940092165898617, "percentage": 94.7, "elapsed_time": "11:35:08", "remaining_time": "0:38:54"} +{"current_steps": 8221, "total_steps": 8680, "loss": 0.8001678586006165, "lr": 1.531746548227586e-08, "epoch": 1.8942396313364056, "percentage": 94.71, "elapsed_time": "11:35:11", "remaining_time": "0:38:48"} +{"current_steps": 8222, "total_steps": 8680, "loss": 0.6636781692504883, "lr": 1.5251109811629915e-08, "epoch": 1.8944700460829493, "percentage": 94.72, "elapsed_time": "11:35:18", "remaining_time": "0:38:43"} +{"current_steps": 8223, "total_steps": 8680, "loss": 0.7884416580200195, "lr": 1.5184897075981807e-08, "epoch": 1.894700460829493, "percentage": 94.74, "elapsed_time": "11:35:23", "remaining_time": "0:38:38"} +{"current_steps": 8224, "total_steps": 8680, "loss": 0.6888208389282227, "lr": 1.511882728494218e-08, "epoch": 1.894930875576037, "percentage": 94.75, "elapsed_time": "11:35:28", "remaining_time": "0:38:33"} +{"current_steps": 8225, "total_steps": 8680, "loss": 0.7253614664077759, "lr": 1.5052900448100815e-08, "epoch": 1.8951612903225805, "percentage": 94.76, "elapsed_time": "11:35:33", "remaining_time": "0:38:28"} +{"current_steps": 8226, "total_steps": 8680, "loss": 0.7865983843803406, "lr": 1.498711657502716e-08, "epoch": 1.8953917050691245, "percentage": 94.77, "elapsed_time": "11:35:37", "remaining_time": "0:38:23"} +{"current_steps": 8227, "total_steps": 8680, "loss": 0.8778063654899597, "lr": 1.492147567526947e-08, "epoch": 1.8956221198156682, "percentage": 94.78, "elapsed_time": "11:35:42", "remaining_time": "0:38:18"} +{"current_steps": 8228, "total_steps": 8680, "loss": 0.7812581062316895, "lr": 1.4855977758355675e-08, "epoch": 1.895852534562212, "percentage": 94.79, "elapsed_time": "11:35:45", "remaining_time": "0:38:13"} +{"current_steps": 8229, "total_steps": 8680, "loss": 0.7160226106643677, "lr": 1.4790622833792287e-08, "epoch": 1.8960829493087559, "percentage": 94.8, "elapsed_time": "11:35:50", "remaining_time": "0:38:08"} +{"current_steps": 8230, "total_steps": 8680, "loss": 0.8187412619590759, "lr": 1.472541091106594e-08, "epoch": 1.8963133640552994, "percentage": 94.82, "elapsed_time": "11:35:55", "remaining_time": "0:38:03"} +{"current_steps": 8231, "total_steps": 8680, "loss": 0.7517846822738647, "lr": 1.4660341999641834e-08, "epoch": 1.8965437788018433, "percentage": 94.83, "elapsed_time": "11:35:59", "remaining_time": "0:37:57"} +{"current_steps": 8232, "total_steps": 8680, "loss": 0.9230127334594727, "lr": 1.4595416108964753e-08, "epoch": 1.896774193548387, "percentage": 94.84, "elapsed_time": "11:36:04", "remaining_time": "0:37:52"} +{"current_steps": 8233, "total_steps": 8680, "loss": 0.6803582906723022, "lr": 1.4530633248458269e-08, "epoch": 1.8970046082949308, "percentage": 94.85, "elapsed_time": "11:36:08", "remaining_time": "0:37:47"} +{"current_steps": 8234, "total_steps": 8680, "loss": 0.8444511294364929, "lr": 1.4465993427525968e-08, "epoch": 1.8972350230414747, "percentage": 94.86, "elapsed_time": "11:36:15", "remaining_time": "0:37:42"} +{"current_steps": 8235, "total_steps": 8680, "loss": 0.7622519731521606, "lr": 1.4401496655550016e-08, "epoch": 1.8974654377880185, "percentage": 94.87, "elapsed_time": "11:36:19", "remaining_time": "0:37:37"} +{"current_steps": 8236, "total_steps": 8680, "loss": 0.687129020690918, "lr": 1.4337142941892033e-08, "epoch": 1.8976958525345622, "percentage": 94.88, "elapsed_time": "11:36:24", "remaining_time": "0:37:32"} +{"current_steps": 8237, "total_steps": 8680, "loss": 0.6421219110488892, "lr": 1.4272932295892992e-08, "epoch": 1.8979262672811061, "percentage": 94.9, "elapsed_time": "11:36:29", "remaining_time": "0:37:27"} +{"current_steps": 8238, "total_steps": 8680, "loss": 0.7829388380050659, "lr": 1.4208864726872772e-08, "epoch": 1.8981566820276496, "percentage": 94.91, "elapsed_time": "11:36:34", "remaining_time": "0:37:22"} +{"current_steps": 8239, "total_steps": 8680, "loss": 0.7754424810409546, "lr": 1.4144940244130821e-08, "epoch": 1.8983870967741936, "percentage": 94.92, "elapsed_time": "11:36:40", "remaining_time": "0:37:17"} +{"current_steps": 8240, "total_steps": 8680, "loss": 0.6544859409332275, "lr": 1.4081158856945719e-08, "epoch": 1.8986175115207373, "percentage": 94.93, "elapsed_time": "11:36:45", "remaining_time": "0:37:12"} +{"current_steps": 8241, "total_steps": 8680, "loss": 0.8020427227020264, "lr": 1.4017520574575282e-08, "epoch": 1.898847926267281, "percentage": 94.94, "elapsed_time": "11:36:49", "remaining_time": "0:37:07"} +{"current_steps": 8242, "total_steps": 8680, "loss": 0.7343212366104126, "lr": 1.3954025406256343e-08, "epoch": 1.899078341013825, "percentage": 94.95, "elapsed_time": "11:36:53", "remaining_time": "0:37:02"} +{"current_steps": 8243, "total_steps": 8680, "loss": 0.7643232345581055, "lr": 1.3890673361205418e-08, "epoch": 1.8993087557603685, "percentage": 94.97, "elapsed_time": "11:36:57", "remaining_time": "0:36:56"} +{"current_steps": 8244, "total_steps": 8680, "loss": 0.7806165814399719, "lr": 1.3827464448617709e-08, "epoch": 1.8995391705069125, "percentage": 94.98, "elapsed_time": "11:37:03", "remaining_time": "0:36:51"} +{"current_steps": 8245, "total_steps": 8680, "loss": 0.8533280491828918, "lr": 1.3764398677667988e-08, "epoch": 1.8997695852534562, "percentage": 94.99, "elapsed_time": "11:37:08", "remaining_time": "0:36:46"} +{"current_steps": 8246, "total_steps": 8680, "loss": 0.773565411567688, "lr": 1.3701476057510264e-08, "epoch": 1.9, "percentage": 95.0, "elapsed_time": "11:37:12", "remaining_time": "0:36:41"} +{"current_steps": 8247, "total_steps": 8680, "loss": 0.7752503752708435, "lr": 1.3638696597277677e-08, "epoch": 1.9002304147465439, "percentage": 95.01, "elapsed_time": "11:37:17", "remaining_time": "0:36:36"} +{"current_steps": 8248, "total_steps": 8680, "loss": 0.7466747760772705, "lr": 1.3576060306082383e-08, "epoch": 1.9004608294930876, "percentage": 95.02, "elapsed_time": "11:37:23", "remaining_time": "0:36:31"} +{"current_steps": 8249, "total_steps": 8680, "loss": 0.8103033304214478, "lr": 1.3513567193016106e-08, "epoch": 1.9006912442396313, "percentage": 95.03, "elapsed_time": "11:37:27", "remaining_time": "0:36:26"} +{"current_steps": 8250, "total_steps": 8680, "loss": 0.6501287817955017, "lr": 1.3451217267149595e-08, "epoch": 1.9009216589861753, "percentage": 95.05, "elapsed_time": "11:37:32", "remaining_time": "0:36:21"} +{"current_steps": 8251, "total_steps": 8680, "loss": 0.7329230308532715, "lr": 1.3389010537532941e-08, "epoch": 1.9011520737327188, "percentage": 95.06, "elapsed_time": "11:37:37", "remaining_time": "0:36:16"} +{"current_steps": 8252, "total_steps": 8680, "loss": 0.8413917422294617, "lr": 1.3326947013195255e-08, "epoch": 1.9013824884792627, "percentage": 95.07, "elapsed_time": "11:37:41", "remaining_time": "0:36:11"} +{"current_steps": 8253, "total_steps": 8680, "loss": 0.7283090353012085, "lr": 1.3265026703144999e-08, "epoch": 1.9016129032258065, "percentage": 95.08, "elapsed_time": "11:37:46", "remaining_time": "0:36:06"} +{"current_steps": 8254, "total_steps": 8680, "loss": 0.8378126621246338, "lr": 1.3203249616369872e-08, "epoch": 1.9018433179723502, "percentage": 95.09, "elapsed_time": "11:37:51", "remaining_time": "0:36:01"} +{"current_steps": 8255, "total_steps": 8680, "loss": 0.7675777673721313, "lr": 1.3141615761836811e-08, "epoch": 1.9020737327188941, "percentage": 95.1, "elapsed_time": "11:37:56", "remaining_time": "0:35:55"} +{"current_steps": 8256, "total_steps": 8680, "loss": 0.6448104381561279, "lr": 1.308012514849155e-08, "epoch": 1.9023041474654376, "percentage": 95.12, "elapsed_time": "11:38:01", "remaining_time": "0:35:50"} +{"current_steps": 8257, "total_steps": 8680, "loss": 0.8024395704269409, "lr": 1.3018777785259838e-08, "epoch": 1.9025345622119816, "percentage": 95.13, "elapsed_time": "11:38:07", "remaining_time": "0:35:45"} +{"current_steps": 8258, "total_steps": 8680, "loss": 0.8159325122833252, "lr": 1.2957573681045887e-08, "epoch": 1.9027649769585253, "percentage": 95.14, "elapsed_time": "11:38:11", "remaining_time": "0:35:40"} +{"current_steps": 8259, "total_steps": 8680, "loss": 0.7916233539581299, "lr": 1.2896512844733365e-08, "epoch": 1.902995391705069, "percentage": 95.15, "elapsed_time": "11:38:16", "remaining_time": "0:35:35"} +{"current_steps": 8260, "total_steps": 8680, "loss": 0.798140823841095, "lr": 1.2835595285185296e-08, "epoch": 1.903225806451613, "percentage": 95.16, "elapsed_time": "11:38:21", "remaining_time": "0:35:30"} +{"current_steps": 8261, "total_steps": 8680, "loss": 0.7881651520729065, "lr": 1.277482101124383e-08, "epoch": 1.9034562211981567, "percentage": 95.17, "elapsed_time": "11:38:26", "remaining_time": "0:35:25"} +{"current_steps": 8262, "total_steps": 8680, "loss": 0.7023189663887024, "lr": 1.2714190031730021e-08, "epoch": 1.9036866359447004, "percentage": 95.18, "elapsed_time": "11:38:31", "remaining_time": "0:35:20"} +{"current_steps": 8263, "total_steps": 8680, "loss": 0.8286309242248535, "lr": 1.2653702355444606e-08, "epoch": 1.9039170506912444, "percentage": 95.2, "elapsed_time": "11:38:35", "remaining_time": "0:35:15"} +{"current_steps": 8264, "total_steps": 8680, "loss": 0.7626973986625671, "lr": 1.259335799116723e-08, "epoch": 1.904147465437788, "percentage": 95.21, "elapsed_time": "11:38:40", "remaining_time": "0:35:10"} +{"current_steps": 8265, "total_steps": 8680, "loss": 1.0350267887115479, "lr": 1.2533156947656665e-08, "epoch": 1.9043778801843319, "percentage": 95.22, "elapsed_time": "11:38:43", "remaining_time": "0:35:05"} +{"current_steps": 8266, "total_steps": 8680, "loss": 0.6378228664398193, "lr": 1.2473099233651251e-08, "epoch": 1.9046082949308756, "percentage": 95.23, "elapsed_time": "11:38:49", "remaining_time": "0:35:00"} +{"current_steps": 8267, "total_steps": 8680, "loss": 0.8265732526779175, "lr": 1.2413184857868241e-08, "epoch": 1.9048387096774193, "percentage": 95.24, "elapsed_time": "11:38:56", "remaining_time": "0:34:55"} +{"current_steps": 8268, "total_steps": 8680, "loss": 0.8545348644256592, "lr": 1.23534138290039e-08, "epoch": 1.9050691244239633, "percentage": 95.25, "elapsed_time": "11:39:02", "remaining_time": "0:34:50"} +{"current_steps": 8269, "total_steps": 8680, "loss": 0.660080075263977, "lr": 1.2293786155734176e-08, "epoch": 1.9052995391705068, "percentage": 95.26, "elapsed_time": "11:39:06", "remaining_time": "0:34:44"} +{"current_steps": 8270, "total_steps": 8680, "loss": 0.8409689664840698, "lr": 1.2234301846713813e-08, "epoch": 1.9055299539170507, "percentage": 95.28, "elapsed_time": "11:39:10", "remaining_time": "0:34:39"} +{"current_steps": 8271, "total_steps": 8680, "loss": 0.8026434183120728, "lr": 1.2174960910576904e-08, "epoch": 1.9057603686635944, "percentage": 95.29, "elapsed_time": "11:39:15", "remaining_time": "0:34:34"} +{"current_steps": 8272, "total_steps": 8680, "loss": 0.8315812945365906, "lr": 1.2115763355936671e-08, "epoch": 1.9059907834101382, "percentage": 95.3, "elapsed_time": "11:39:20", "remaining_time": "0:34:29"} +{"current_steps": 8273, "total_steps": 8680, "loss": 0.7373194694519043, "lr": 1.2056709191385572e-08, "epoch": 1.9062211981566821, "percentage": 95.31, "elapsed_time": "11:39:24", "remaining_time": "0:34:24"} +{"current_steps": 8274, "total_steps": 8680, "loss": 0.7502317428588867, "lr": 1.1997798425495309e-08, "epoch": 1.9064516129032258, "percentage": 95.32, "elapsed_time": "11:39:29", "remaining_time": "0:34:19"} +{"current_steps": 8275, "total_steps": 8680, "loss": 0.8208760023117065, "lr": 1.1939031066816707e-08, "epoch": 1.9066820276497696, "percentage": 95.33, "elapsed_time": "11:39:34", "remaining_time": "0:34:14"} +{"current_steps": 8276, "total_steps": 8680, "loss": 0.7584094405174255, "lr": 1.188040712387961e-08, "epoch": 1.9069124423963135, "percentage": 95.35, "elapsed_time": "11:39:38", "remaining_time": "0:34:09"} +{"current_steps": 8277, "total_steps": 8680, "loss": 0.7776647210121155, "lr": 1.1821926605193433e-08, "epoch": 1.907142857142857, "percentage": 95.36, "elapsed_time": "11:39:43", "remaining_time": "0:34:04"} +{"current_steps": 8278, "total_steps": 8680, "loss": 0.7739659547805786, "lr": 1.1763589519246387e-08, "epoch": 1.907373271889401, "percentage": 95.37, "elapsed_time": "11:39:47", "remaining_time": "0:33:59"} +{"current_steps": 8279, "total_steps": 8680, "loss": 0.7276068925857544, "lr": 1.170539587450603e-08, "epoch": 1.9076036866359447, "percentage": 95.38, "elapsed_time": "11:39:52", "remaining_time": "0:33:53"} +{"current_steps": 8280, "total_steps": 8680, "loss": 0.624208927154541, "lr": 1.1647345679419163e-08, "epoch": 1.9078341013824884, "percentage": 95.39, "elapsed_time": "11:39:59", "remaining_time": "0:33:48"} +{"current_steps": 8281, "total_steps": 8680, "loss": 0.7865229845046997, "lr": 1.1589438942411712e-08, "epoch": 1.9080645161290324, "percentage": 95.4, "elapsed_time": "11:40:03", "remaining_time": "0:33:43"} +{"current_steps": 8282, "total_steps": 8680, "loss": 0.8290715217590332, "lr": 1.1531675671888619e-08, "epoch": 1.908294930875576, "percentage": 95.41, "elapsed_time": "11:40:08", "remaining_time": "0:33:38"} +{"current_steps": 8283, "total_steps": 8680, "loss": 0.8750064969062805, "lr": 1.1474055876234289e-08, "epoch": 1.9085253456221198, "percentage": 95.43, "elapsed_time": "11:40:13", "remaining_time": "0:33:33"} +{"current_steps": 8284, "total_steps": 8680, "loss": 0.7946900129318237, "lr": 1.1416579563812146e-08, "epoch": 1.9087557603686636, "percentage": 95.44, "elapsed_time": "11:40:18", "remaining_time": "0:33:28"} +{"current_steps": 8285, "total_steps": 8680, "loss": 0.6673855781555176, "lr": 1.1359246742964623e-08, "epoch": 1.9089861751152073, "percentage": 95.45, "elapsed_time": "11:40:23", "remaining_time": "0:33:23"} +{"current_steps": 8286, "total_steps": 8680, "loss": 0.8423609137535095, "lr": 1.1302057422013734e-08, "epoch": 1.9092165898617512, "percentage": 95.46, "elapsed_time": "11:40:27", "remaining_time": "0:33:18"} +{"current_steps": 8287, "total_steps": 8680, "loss": 0.7583299279212952, "lr": 1.124501160926039e-08, "epoch": 1.909447004608295, "percentage": 95.47, "elapsed_time": "11:40:31", "remaining_time": "0:33:13"} +{"current_steps": 8288, "total_steps": 8680, "loss": 0.8489730358123779, "lr": 1.1188109312984639e-08, "epoch": 1.9096774193548387, "percentage": 95.48, "elapsed_time": "11:40:36", "remaining_time": "0:33:08"} +{"current_steps": 8289, "total_steps": 8680, "loss": 0.7460636496543884, "lr": 1.1131350541445871e-08, "epoch": 1.9099078341013827, "percentage": 95.5, "elapsed_time": "11:40:41", "remaining_time": "0:33:03"} +{"current_steps": 8290, "total_steps": 8680, "loss": 0.7310905456542969, "lr": 1.1074735302882387e-08, "epoch": 1.9101382488479262, "percentage": 95.51, "elapsed_time": "11:40:46", "remaining_time": "0:32:58"} +{"current_steps": 8291, "total_steps": 8680, "loss": 0.8411405086517334, "lr": 1.1018263605511946e-08, "epoch": 1.91036866359447, "percentage": 95.52, "elapsed_time": "11:40:50", "remaining_time": "0:32:52"} +{"current_steps": 8292, "total_steps": 8680, "loss": 0.7980802059173584, "lr": 1.0961935457531323e-08, "epoch": 1.9105990783410138, "percentage": 95.53, "elapsed_time": "11:40:56", "remaining_time": "0:32:47"} +{"current_steps": 8293, "total_steps": 8680, "loss": 0.779492974281311, "lr": 1.0905750867116426e-08, "epoch": 1.9108294930875576, "percentage": 95.54, "elapsed_time": "11:41:02", "remaining_time": "0:32:42"} +{"current_steps": 8294, "total_steps": 8680, "loss": 0.7893733978271484, "lr": 1.0849709842422283e-08, "epoch": 1.9110599078341015, "percentage": 95.55, "elapsed_time": "11:41:07", "remaining_time": "0:32:37"} +{"current_steps": 8295, "total_steps": 8680, "loss": 0.8281872272491455, "lr": 1.07938123915835e-08, "epoch": 1.911290322580645, "percentage": 95.56, "elapsed_time": "11:41:12", "remaining_time": "0:32:32"} +{"current_steps": 8296, "total_steps": 8680, "loss": 0.721331775188446, "lr": 1.0738058522713144e-08, "epoch": 1.911520737327189, "percentage": 95.58, "elapsed_time": "11:41:17", "remaining_time": "0:32:27"} +{"current_steps": 8297, "total_steps": 8680, "loss": 0.6043491363525391, "lr": 1.0682448243904073e-08, "epoch": 1.9117511520737327, "percentage": 95.59, "elapsed_time": "11:41:24", "remaining_time": "0:32:22"} +{"current_steps": 8298, "total_steps": 8680, "loss": 0.7737481594085693, "lr": 1.0626981563227943e-08, "epoch": 1.9119815668202764, "percentage": 95.6, "elapsed_time": "11:41:30", "remaining_time": "0:32:17"} +{"current_steps": 8299, "total_steps": 8680, "loss": 0.771499514579773, "lr": 1.0571658488735536e-08, "epoch": 1.9122119815668204, "percentage": 95.61, "elapsed_time": "11:41:34", "remaining_time": "0:32:12"} +{"current_steps": 8300, "total_steps": 8680, "loss": 0.6711971759796143, "lr": 1.0516479028457204e-08, "epoch": 1.912442396313364, "percentage": 95.62, "elapsed_time": "11:41:40", "remaining_time": "0:32:07"} +{"current_steps": 8301, "total_steps": 8680, "loss": 0.691685140132904, "lr": 1.0461443190402097e-08, "epoch": 1.9126728110599078, "percentage": 95.63, "elapsed_time": "11:41:50", "remaining_time": "0:32:02"} +{"current_steps": 8302, "total_steps": 8680, "loss": 0.7339159250259399, "lr": 1.0406550982558382e-08, "epoch": 1.9129032258064518, "percentage": 95.65, "elapsed_time": "11:41:57", "remaining_time": "0:31:57"} +{"current_steps": 8303, "total_steps": 8680, "loss": 0.7832008600234985, "lr": 1.0351802412893796e-08, "epoch": 1.9131336405529953, "percentage": 95.66, "elapsed_time": "11:42:04", "remaining_time": "0:31:52"} +{"current_steps": 8304, "total_steps": 8680, "loss": 0.862671971321106, "lr": 1.0297197489355092e-08, "epoch": 1.9133640552995392, "percentage": 95.67, "elapsed_time": "11:42:08", "remaining_time": "0:31:47"} +{"current_steps": 8305, "total_steps": 8680, "loss": 0.6442357897758484, "lr": 1.0242736219867821e-08, "epoch": 1.913594470046083, "percentage": 95.68, "elapsed_time": "11:42:14", "remaining_time": "0:31:42"} +{"current_steps": 8306, "total_steps": 8680, "loss": 0.8777452707290649, "lr": 1.0188418612337102e-08, "epoch": 1.9138248847926267, "percentage": 95.69, "elapsed_time": "11:42:19", "remaining_time": "0:31:37"} +{"current_steps": 8307, "total_steps": 8680, "loss": 0.7672470808029175, "lr": 1.0134244674647186e-08, "epoch": 1.9140552995391706, "percentage": 95.7, "elapsed_time": "11:42:25", "remaining_time": "0:31:32"} +{"current_steps": 8308, "total_steps": 8680, "loss": 0.7338177561759949, "lr": 1.0080214414661226e-08, "epoch": 1.9142857142857141, "percentage": 95.71, "elapsed_time": "11:42:29", "remaining_time": "0:31:27"} +{"current_steps": 8309, "total_steps": 8680, "loss": 0.7546414136886597, "lr": 1.0026327840221727e-08, "epoch": 1.914516129032258, "percentage": 95.73, "elapsed_time": "11:42:35", "remaining_time": "0:31:22"} +{"current_steps": 8310, "total_steps": 8680, "loss": 0.621455192565918, "lr": 9.972584959149988e-09, "epoch": 1.9147465437788018, "percentage": 95.74, "elapsed_time": "11:42:40", "remaining_time": "0:31:17"} +{"current_steps": 8311, "total_steps": 8680, "loss": 0.7403131723403931, "lr": 9.918985779247102e-09, "epoch": 1.9149769585253456, "percentage": 95.75, "elapsed_time": "11:42:45", "remaining_time": "0:31:12"} +{"current_steps": 8312, "total_steps": 8680, "loss": 0.7924279570579529, "lr": 9.865530308292624e-09, "epoch": 1.9152073732718895, "percentage": 95.76, "elapsed_time": "11:42:51", "remaining_time": "0:31:07"} +{"current_steps": 8313, "total_steps": 8680, "loss": 0.8831228017807007, "lr": 9.81221855404568e-09, "epoch": 1.9154377880184332, "percentage": 95.77, "elapsed_time": "11:42:56", "remaining_time": "0:31:02"} +{"current_steps": 8314, "total_steps": 8680, "loss": 0.6786219477653503, "lr": 9.759050524244417e-09, "epoch": 1.915668202764977, "percentage": 95.78, "elapsed_time": "11:43:02", "remaining_time": "0:30:56"} +{"current_steps": 8315, "total_steps": 8680, "loss": 0.7311046123504639, "lr": 9.70602622660599e-09, "epoch": 1.9158986175115207, "percentage": 95.79, "elapsed_time": "11:43:08", "remaining_time": "0:30:51"} +{"current_steps": 8316, "total_steps": 8680, "loss": 0.8914301991462708, "lr": 9.653145668826912e-09, "epoch": 1.9161290322580644, "percentage": 95.81, "elapsed_time": "11:43:12", "remaining_time": "0:30:46"} +{"current_steps": 8317, "total_steps": 8680, "loss": 0.8144292831420898, "lr": 9.600408858582709e-09, "epoch": 1.9163594470046084, "percentage": 95.82, "elapsed_time": "11:43:17", "remaining_time": "0:30:41"} +{"current_steps": 8318, "total_steps": 8680, "loss": 0.6670823097229004, "lr": 9.547815803528036e-09, "epoch": 1.916589861751152, "percentage": 95.83, "elapsed_time": "11:43:23", "remaining_time": "0:30:36"} +{"current_steps": 8319, "total_steps": 8680, "loss": 0.6801552772521973, "lr": 9.495366511296676e-09, "epoch": 1.9168202764976958, "percentage": 95.84, "elapsed_time": "11:43:27", "remaining_time": "0:30:31"} +{"current_steps": 8320, "total_steps": 8680, "loss": 0.8144240379333496, "lr": 9.44306098950165e-09, "epoch": 1.9170506912442398, "percentage": 95.85, "elapsed_time": "11:43:32", "remaining_time": "0:30:26"} +{"current_steps": 8321, "total_steps": 8680, "loss": 0.6352888345718384, "lr": 9.390899245734995e-09, "epoch": 1.9172811059907833, "percentage": 95.86, "elapsed_time": "11:43:36", "remaining_time": "0:30:21"} +{"current_steps": 8322, "total_steps": 8680, "loss": 0.7513711452484131, "lr": 9.33888128756788e-09, "epoch": 1.9175115207373272, "percentage": 95.88, "elapsed_time": "11:43:42", "remaining_time": "0:30:16"} +{"current_steps": 8323, "total_steps": 8680, "loss": 0.7699171304702759, "lr": 9.287007122550705e-09, "epoch": 1.917741935483871, "percentage": 95.89, "elapsed_time": "11:43:46", "remaining_time": "0:30:11"} +{"current_steps": 8324, "total_steps": 8680, "loss": 0.8321002721786499, "lr": 9.235276758212895e-09, "epoch": 1.9179723502304147, "percentage": 95.9, "elapsed_time": "11:43:50", "remaining_time": "0:30:06"} +{"current_steps": 8325, "total_steps": 8680, "loss": 0.6815298795700073, "lr": 9.183690202062999e-09, "epoch": 1.9182027649769586, "percentage": 95.91, "elapsed_time": "11:43:55", "remaining_time": "0:30:01"} +{"current_steps": 8326, "total_steps": 8680, "loss": 0.7135178446769714, "lr": 9.132247461588915e-09, "epoch": 1.9184331797235024, "percentage": 95.92, "elapsed_time": "11:44:01", "remaining_time": "0:29:56"} +{"current_steps": 8327, "total_steps": 8680, "loss": 0.8452005982398987, "lr": 9.080948544257338e-09, "epoch": 1.918663594470046, "percentage": 95.93, "elapsed_time": "11:44:05", "remaining_time": "0:29:50"} +{"current_steps": 8328, "total_steps": 8680, "loss": 0.7449440956115723, "lr": 9.029793457514312e-09, "epoch": 1.9188940092165898, "percentage": 95.94, "elapsed_time": "11:44:11", "remaining_time": "0:29:45"} +{"current_steps": 8329, "total_steps": 8680, "loss": 0.8172955513000488, "lr": 8.978782208784897e-09, "epoch": 1.9191244239631335, "percentage": 95.96, "elapsed_time": "11:44:16", "remaining_time": "0:29:40"} +{"current_steps": 8330, "total_steps": 8680, "loss": 0.6682305335998535, "lr": 8.92791480547317e-09, "epoch": 1.9193548387096775, "percentage": 95.97, "elapsed_time": "11:44:22", "remaining_time": "0:29:35"} +{"current_steps": 8331, "total_steps": 8680, "loss": 0.6874973773956299, "lr": 8.877191254962779e-09, "epoch": 1.9195852534562212, "percentage": 95.98, "elapsed_time": "11:44:27", "remaining_time": "0:29:30"} +{"current_steps": 8332, "total_steps": 8680, "loss": 0.8371694684028625, "lr": 8.826611564615949e-09, "epoch": 1.919815668202765, "percentage": 95.99, "elapsed_time": "11:44:32", "remaining_time": "0:29:25"} +{"current_steps": 8333, "total_steps": 8680, "loss": 0.7147493362426758, "lr": 8.77617574177425e-09, "epoch": 1.920046082949309, "percentage": 96.0, "elapsed_time": "11:44:37", "remaining_time": "0:29:20"} +{"current_steps": 8334, "total_steps": 8680, "loss": 0.7444115877151489, "lr": 8.725883793758382e-09, "epoch": 1.9202764976958524, "percentage": 96.01, "elapsed_time": "11:44:42", "remaining_time": "0:29:15"} +{"current_steps": 8335, "total_steps": 8680, "loss": 0.7772307395935059, "lr": 8.675735727868283e-09, "epoch": 1.9205069124423964, "percentage": 96.03, "elapsed_time": "11:44:48", "remaining_time": "0:29:10"} +{"current_steps": 8336, "total_steps": 8680, "loss": 0.702937126159668, "lr": 8.625731551382798e-09, "epoch": 1.92073732718894, "percentage": 96.04, "elapsed_time": "11:44:52", "remaining_time": "0:29:05"} +{"current_steps": 8337, "total_steps": 8680, "loss": 0.7404709458351135, "lr": 8.575871271559898e-09, "epoch": 1.9209677419354838, "percentage": 96.05, "elapsed_time": "11:44:56", "remaining_time": "0:29:00"} +{"current_steps": 8338, "total_steps": 8680, "loss": 0.7142058610916138, "lr": 8.526154895636906e-09, "epoch": 1.9211981566820278, "percentage": 96.06, "elapsed_time": "11:45:01", "remaining_time": "0:28:55"} +{"current_steps": 8339, "total_steps": 8680, "loss": 0.8950545191764832, "lr": 8.476582430830048e-09, "epoch": 1.9214285714285713, "percentage": 96.07, "elapsed_time": "11:45:06", "remaining_time": "0:28:49"} +{"current_steps": 8340, "total_steps": 8680, "loss": 0.6939054131507874, "lr": 8.42715388433446e-09, "epoch": 1.9216589861751152, "percentage": 96.08, "elapsed_time": "11:45:10", "remaining_time": "0:28:44"} +{"current_steps": 8341, "total_steps": 8680, "loss": 0.7916324138641357, "lr": 8.377869263324954e-09, "epoch": 1.921889400921659, "percentage": 96.09, "elapsed_time": "11:45:15", "remaining_time": "0:28:39"} +{"current_steps": 8342, "total_steps": 8680, "loss": 0.8059754371643066, "lr": 8.328728574954924e-09, "epoch": 1.9221198156682027, "percentage": 96.11, "elapsed_time": "11:45:20", "remaining_time": "0:28:34"} +{"current_steps": 8343, "total_steps": 8680, "loss": 0.650648295879364, "lr": 8.279731826357105e-09, "epoch": 1.9223502304147466, "percentage": 96.12, "elapsed_time": "11:45:25", "remaining_time": "0:28:29"} +{"current_steps": 8344, "total_steps": 8680, "loss": 0.6912552118301392, "lr": 8.230879024643478e-09, "epoch": 1.9225806451612903, "percentage": 96.13, "elapsed_time": "11:45:32", "remaining_time": "0:28:24"} +{"current_steps": 8345, "total_steps": 8680, "loss": 0.7430927753448486, "lr": 8.182170176904702e-09, "epoch": 1.922811059907834, "percentage": 96.14, "elapsed_time": "11:45:37", "remaining_time": "0:28:19"} +{"current_steps": 8346, "total_steps": 8680, "loss": 0.7550772428512573, "lr": 8.133605290210898e-09, "epoch": 1.923041474654378, "percentage": 96.15, "elapsed_time": "11:45:42", "remaining_time": "0:28:14"} +{"current_steps": 8347, "total_steps": 8680, "loss": 0.7235819101333618, "lr": 8.08518437161132e-09, "epoch": 1.9232718894009215, "percentage": 96.16, "elapsed_time": "11:45:47", "remaining_time": "0:28:09"} +{"current_steps": 8348, "total_steps": 8680, "loss": 0.790582537651062, "lr": 8.036907428134121e-09, "epoch": 1.9235023041474655, "percentage": 96.18, "elapsed_time": "11:45:53", "remaining_time": "0:28:04"} +{"current_steps": 8349, "total_steps": 8680, "loss": 0.7350871562957764, "lr": 7.988774466786585e-09, "epoch": 1.9237327188940092, "percentage": 96.19, "elapsed_time": "11:45:59", "remaining_time": "0:27:59"} +{"current_steps": 8350, "total_steps": 8680, "loss": 0.86177659034729, "lr": 7.940785494555124e-09, "epoch": 1.923963133640553, "percentage": 96.2, "elapsed_time": "11:46:03", "remaining_time": "0:27:54"} +{"current_steps": 8351, "total_steps": 8680, "loss": 0.8039232492446899, "lr": 7.892940518405499e-09, "epoch": 1.9241935483870969, "percentage": 96.21, "elapsed_time": "11:46:07", "remaining_time": "0:27:49"} +{"current_steps": 8352, "total_steps": 8680, "loss": 0.7130967378616333, "lr": 7.845239545282046e-09, "epoch": 1.9244239631336404, "percentage": 96.22, "elapsed_time": "11:46:12", "remaining_time": "0:27:44"} +{"current_steps": 8353, "total_steps": 8680, "loss": 0.7297911047935486, "lr": 7.797682582108667e-09, "epoch": 1.9246543778801843, "percentage": 96.23, "elapsed_time": "11:46:19", "remaining_time": "0:27:39"} +{"current_steps": 8354, "total_steps": 8680, "loss": 0.7302875518798828, "lr": 7.750269635788065e-09, "epoch": 1.924884792626728, "percentage": 96.24, "elapsed_time": "11:46:25", "remaining_time": "0:27:34"} +{"current_steps": 8355, "total_steps": 8680, "loss": 0.7976555824279785, "lr": 7.703000713202401e-09, "epoch": 1.9251152073732718, "percentage": 96.26, "elapsed_time": "11:46:32", "remaining_time": "0:27:29"} +{"current_steps": 8356, "total_steps": 8680, "loss": 0.6747829914093018, "lr": 7.65587582121252e-09, "epoch": 1.9253456221198157, "percentage": 96.27, "elapsed_time": "11:46:38", "remaining_time": "0:27:23"} +{"current_steps": 8357, "total_steps": 8680, "loss": 0.7217142581939697, "lr": 7.608894966658509e-09, "epoch": 1.9255760368663595, "percentage": 96.28, "elapsed_time": "11:46:45", "remaining_time": "0:27:18"} +{"current_steps": 8358, "total_steps": 8680, "loss": 0.8635888695716858, "lr": 7.562058156359685e-09, "epoch": 1.9258064516129032, "percentage": 96.29, "elapsed_time": "11:46:50", "remaining_time": "0:27:13"} +{"current_steps": 8359, "total_steps": 8680, "loss": 0.8435994386672974, "lr": 7.515365397114282e-09, "epoch": 1.9260368663594472, "percentage": 96.3, "elapsed_time": "11:46:55", "remaining_time": "0:27:08"} +{"current_steps": 8360, "total_steps": 8680, "loss": 0.8632286787033081, "lr": 7.468816695699653e-09, "epoch": 1.9262672811059907, "percentage": 96.31, "elapsed_time": "11:47:01", "remaining_time": "0:27:03"} +{"current_steps": 8361, "total_steps": 8680, "loss": 0.7916556596755981, "lr": 7.422412058872396e-09, "epoch": 1.9264976958525346, "percentage": 96.32, "elapsed_time": "11:47:07", "remaining_time": "0:26:58"} +{"current_steps": 8362, "total_steps": 8680, "loss": 0.8307663202285767, "lr": 7.376151493368121e-09, "epoch": 1.9267281105990783, "percentage": 96.34, "elapsed_time": "11:47:12", "remaining_time": "0:26:53"} +{"current_steps": 8363, "total_steps": 8680, "loss": 0.9020388126373291, "lr": 7.330035005901236e-09, "epoch": 1.926958525345622, "percentage": 96.35, "elapsed_time": "11:47:18", "remaining_time": "0:26:48"} +{"current_steps": 8364, "total_steps": 8680, "loss": 0.7926114797592163, "lr": 7.28406260316572e-09, "epoch": 1.927188940092166, "percentage": 96.36, "elapsed_time": "11:47:23", "remaining_time": "0:26:43"} +{"current_steps": 8365, "total_steps": 8680, "loss": 0.7609784603118896, "lr": 7.2382342918343446e-09, "epoch": 1.9274193548387095, "percentage": 96.37, "elapsed_time": "11:47:28", "remaining_time": "0:26:38"} +{"current_steps": 8366, "total_steps": 8680, "loss": 0.6010490655899048, "lr": 7.192550078559012e-09, "epoch": 1.9276497695852535, "percentage": 96.38, "elapsed_time": "11:47:34", "remaining_time": "0:26:33"} +{"current_steps": 8367, "total_steps": 8680, "loss": 0.8219606876373291, "lr": 7.147009969970641e-09, "epoch": 1.9278801843317972, "percentage": 96.39, "elapsed_time": "11:47:41", "remaining_time": "0:26:28"} +{"current_steps": 8368, "total_steps": 8680, "loss": 0.8688151836395264, "lr": 7.101613972679499e-09, "epoch": 1.928110599078341, "percentage": 96.41, "elapsed_time": "11:47:46", "remaining_time": "0:26:23"} +{"current_steps": 8369, "total_steps": 8680, "loss": 0.7654411792755127, "lr": 7.0563620932747595e-09, "epoch": 1.9283410138248849, "percentage": 96.42, "elapsed_time": "11:47:53", "remaining_time": "0:26:18"} +{"current_steps": 8370, "total_steps": 8680, "loss": 0.6878413558006287, "lr": 7.01125433832439e-09, "epoch": 1.9285714285714286, "percentage": 96.43, "elapsed_time": "11:47:59", "remaining_time": "0:26:13"} +{"current_steps": 8371, "total_steps": 8680, "loss": 0.6703332662582397, "lr": 6.966290714375933e-09, "epoch": 1.9288018433179723, "percentage": 96.44, "elapsed_time": "11:48:05", "remaining_time": "0:26:08"} +{"current_steps": 8372, "total_steps": 8680, "loss": 0.752200722694397, "lr": 6.921471227955833e-09, "epoch": 1.9290322580645163, "percentage": 96.45, "elapsed_time": "11:48:11", "remaining_time": "0:26:03"} +{"current_steps": 8373, "total_steps": 8680, "loss": 0.8107069730758667, "lr": 6.8767958855695526e-09, "epoch": 1.9292626728110598, "percentage": 96.46, "elapsed_time": "11:48:17", "remaining_time": "0:25:58"} +{"current_steps": 8374, "total_steps": 8680, "loss": 0.8816967010498047, "lr": 6.832264693701573e-09, "epoch": 1.9294930875576037, "percentage": 96.47, "elapsed_time": "11:48:23", "remaining_time": "0:25:53"} +{"current_steps": 8375, "total_steps": 8680, "loss": 0.7889697551727295, "lr": 6.78787765881561e-09, "epoch": 1.9297235023041475, "percentage": 96.49, "elapsed_time": "11:48:29", "remaining_time": "0:25:48"} +{"current_steps": 8376, "total_steps": 8680, "loss": 0.7218060493469238, "lr": 6.743634787354291e-09, "epoch": 1.9299539170506912, "percentage": 96.5, "elapsed_time": "11:48:35", "remaining_time": "0:25:43"} +{"current_steps": 8377, "total_steps": 8680, "loss": 0.8061347007751465, "lr": 6.699536085739588e-09, "epoch": 1.9301843317972351, "percentage": 96.51, "elapsed_time": "11:48:42", "remaining_time": "0:25:38"} +{"current_steps": 8378, "total_steps": 8680, "loss": 0.7320632934570312, "lr": 6.655581560372159e-09, "epoch": 1.9304147465437786, "percentage": 96.52, "elapsed_time": "11:48:49", "remaining_time": "0:25:33"} +{"current_steps": 8379, "total_steps": 8680, "loss": 0.7039695978164673, "lr": 6.611771217632123e-09, "epoch": 1.9306451612903226, "percentage": 96.53, "elapsed_time": "11:48:56", "remaining_time": "0:25:28"} +{"current_steps": 8380, "total_steps": 8680, "loss": 0.7056317925453186, "lr": 6.568105063878393e-09, "epoch": 1.9308755760368663, "percentage": 96.54, "elapsed_time": "11:49:03", "remaining_time": "0:25:23"} +{"current_steps": 8381, "total_steps": 8680, "loss": 0.9265607595443726, "lr": 6.524583105449122e-09, "epoch": 1.93110599078341, "percentage": 96.56, "elapsed_time": "11:49:08", "remaining_time": "0:25:17"} +{"current_steps": 8382, "total_steps": 8680, "loss": 0.7249365448951721, "lr": 6.481205348661367e-09, "epoch": 1.931336405529954, "percentage": 96.57, "elapsed_time": "11:49:14", "remaining_time": "0:25:12"} +{"current_steps": 8383, "total_steps": 8680, "loss": 0.8216372728347778, "lr": 6.4379717998114256e-09, "epoch": 1.9315668202764977, "percentage": 96.58, "elapsed_time": "11:49:19", "remaining_time": "0:25:07"} +{"current_steps": 8384, "total_steps": 8680, "loss": 0.6750606894493103, "lr": 6.394882465174611e-09, "epoch": 1.9317972350230415, "percentage": 96.59, "elapsed_time": "11:49:25", "remaining_time": "0:25:02"} +{"current_steps": 8385, "total_steps": 8680, "loss": 0.8265045285224915, "lr": 6.351937351005143e-09, "epoch": 1.9320276497695854, "percentage": 96.6, "elapsed_time": "11:49:30", "remaining_time": "0:24:57"} +{"current_steps": 8386, "total_steps": 8680, "loss": 0.5992317795753479, "lr": 6.309136463536591e-09, "epoch": 1.932258064516129, "percentage": 96.61, "elapsed_time": "11:49:37", "remaining_time": "0:24:52"} +{"current_steps": 8387, "total_steps": 8680, "loss": 0.6586567163467407, "lr": 6.266479808981428e-09, "epoch": 1.9324884792626729, "percentage": 96.62, "elapsed_time": "11:49:44", "remaining_time": "0:24:47"} +{"current_steps": 8388, "total_steps": 8680, "loss": 0.7496415376663208, "lr": 6.223967393531259e-09, "epoch": 1.9327188940092166, "percentage": 96.64, "elapsed_time": "11:49:50", "remaining_time": "0:24:42"} +{"current_steps": 8389, "total_steps": 8680, "loss": 0.8637027740478516, "lr": 6.181599223356593e-09, "epoch": 1.9329493087557603, "percentage": 96.65, "elapsed_time": "11:49:56", "remaining_time": "0:24:37"} +{"current_steps": 8390, "total_steps": 8680, "loss": 0.6925984621047974, "lr": 6.139375304607064e-09, "epoch": 1.9331797235023043, "percentage": 96.66, "elapsed_time": "11:50:01", "remaining_time": "0:24:32"} +{"current_steps": 8391, "total_steps": 8680, "loss": 0.8345432877540588, "lr": 6.0972956434115485e-09, "epoch": 1.9334101382488478, "percentage": 96.67, "elapsed_time": "11:50:06", "remaining_time": "0:24:27"} +{"current_steps": 8392, "total_steps": 8680, "loss": 0.797752857208252, "lr": 6.055360245877938e-09, "epoch": 1.9336405529953917, "percentage": 96.68, "elapsed_time": "11:50:13", "remaining_time": "0:24:22"} +{"current_steps": 8393, "total_steps": 8680, "loss": 0.7460094690322876, "lr": 6.013569118092809e-09, "epoch": 1.9338709677419355, "percentage": 96.69, "elapsed_time": "11:50:18", "remaining_time": "0:24:17"} +{"current_steps": 8394, "total_steps": 8680, "loss": 0.7695547342300415, "lr": 5.97192226612242e-09, "epoch": 1.9341013824884792, "percentage": 96.71, "elapsed_time": "11:50:26", "remaining_time": "0:24:12"} +{"current_steps": 8395, "total_steps": 8680, "loss": 0.8372104167938232, "lr": 5.9304196960113795e-09, "epoch": 1.9343317972350231, "percentage": 96.72, "elapsed_time": "11:50:31", "remaining_time": "0:24:07"} +{"current_steps": 8396, "total_steps": 8680, "loss": 0.7647950053215027, "lr": 5.889061413784091e-09, "epoch": 1.9345622119815669, "percentage": 96.73, "elapsed_time": "11:50:37", "remaining_time": "0:24:02"} +{"current_steps": 8397, "total_steps": 8680, "loss": 0.6958519220352173, "lr": 5.84784742544353e-09, "epoch": 1.9347926267281106, "percentage": 96.74, "elapsed_time": "11:50:45", "remaining_time": "0:23:57"} +{"current_steps": 8398, "total_steps": 8680, "loss": 0.8488763570785522, "lr": 5.806777736971691e-09, "epoch": 1.9350230414746545, "percentage": 96.75, "elapsed_time": "11:50:52", "remaining_time": "0:23:52"} +{"current_steps": 8399, "total_steps": 8680, "loss": 0.6448318958282471, "lr": 5.765852354330025e-09, "epoch": 1.935253456221198, "percentage": 96.76, "elapsed_time": "11:50:58", "remaining_time": "0:23:47"} +{"current_steps": 8400, "total_steps": 8680, "loss": 0.7449144124984741, "lr": 5.725071283458671e-09, "epoch": 1.935483870967742, "percentage": 96.77, "elapsed_time": "11:51:05", "remaining_time": "0:23:42"} +{"current_steps": 8401, "total_steps": 8680, "loss": 0.8339489102363586, "lr": 5.684434530277005e-09, "epoch": 1.9357142857142857, "percentage": 96.79, "elapsed_time": "11:51:13", "remaining_time": "0:23:37"} +{"current_steps": 8402, "total_steps": 8680, "loss": 0.7758409380912781, "lr": 5.643942100683308e-09, "epoch": 1.9359447004608294, "percentage": 96.8, "elapsed_time": "11:51:18", "remaining_time": "0:23:32"} +{"current_steps": 8403, "total_steps": 8680, "loss": 0.8604291081428528, "lr": 5.60359400055499e-09, "epoch": 1.9361751152073734, "percentage": 96.81, "elapsed_time": "11:51:23", "remaining_time": "0:23:27"} +{"current_steps": 8404, "total_steps": 8680, "loss": 0.7379741668701172, "lr": 5.5633902357487e-09, "epoch": 1.936405529953917, "percentage": 96.82, "elapsed_time": "11:51:28", "remaining_time": "0:23:21"} +{"current_steps": 8405, "total_steps": 8680, "loss": 0.6943101286888123, "lr": 5.52333081209988e-09, "epoch": 1.9366359447004609, "percentage": 96.83, "elapsed_time": "11:51:33", "remaining_time": "0:23:16"} +{"current_steps": 8406, "total_steps": 8680, "loss": 0.7397646903991699, "lr": 5.483415735422992e-09, "epoch": 1.9368663594470046, "percentage": 96.84, "elapsed_time": "11:51:38", "remaining_time": "0:23:11"} +{"current_steps": 8407, "total_steps": 8680, "loss": 0.7566234469413757, "lr": 5.443645011511844e-09, "epoch": 1.9370967741935483, "percentage": 96.85, "elapsed_time": "11:51:44", "remaining_time": "0:23:06"} +{"current_steps": 8408, "total_steps": 8680, "loss": 0.6345827579498291, "lr": 5.40401864613893e-09, "epoch": 1.9373271889400923, "percentage": 96.87, "elapsed_time": "11:51:49", "remaining_time": "0:23:01"} +{"current_steps": 8409, "total_steps": 8680, "loss": 0.7259831428527832, "lr": 5.3645366450560944e-09, "epoch": 1.937557603686636, "percentage": 96.88, "elapsed_time": "11:51:54", "remaining_time": "0:22:56"} +{"current_steps": 8410, "total_steps": 8680, "loss": 0.7897600531578064, "lr": 5.325199013993975e-09, "epoch": 1.9377880184331797, "percentage": 96.89, "elapsed_time": "11:51:59", "remaining_time": "0:22:51"} +{"current_steps": 8411, "total_steps": 8680, "loss": 0.8421739339828491, "lr": 5.286005758662448e-09, "epoch": 1.9380184331797237, "percentage": 96.9, "elapsed_time": "11:52:05", "remaining_time": "0:22:46"} +{"current_steps": 8412, "total_steps": 8680, "loss": 0.7652501463890076, "lr": 5.2469568847504085e-09, "epoch": 1.9382488479262672, "percentage": 96.91, "elapsed_time": "11:52:08", "remaining_time": "0:22:41"} +{"current_steps": 8413, "total_steps": 8680, "loss": 0.6397069096565247, "lr": 5.2080523979256556e-09, "epoch": 1.9384792626728111, "percentage": 96.92, "elapsed_time": "11:52:12", "remaining_time": "0:22:36"} +{"current_steps": 8414, "total_steps": 8680, "loss": 0.840052604675293, "lr": 5.169292303835116e-09, "epoch": 1.9387096774193548, "percentage": 96.94, "elapsed_time": "11:52:17", "remaining_time": "0:22:31"} +{"current_steps": 8415, "total_steps": 8680, "loss": 0.8453920483589172, "lr": 5.130676608104845e-09, "epoch": 1.9389400921658986, "percentage": 96.95, "elapsed_time": "11:52:22", "remaining_time": "0:22:26"} +{"current_steps": 8416, "total_steps": 8680, "loss": 0.8301386833190918, "lr": 5.092205316339915e-09, "epoch": 1.9391705069124425, "percentage": 96.96, "elapsed_time": "11:52:26", "remaining_time": "0:22:20"} +{"current_steps": 8417, "total_steps": 8680, "loss": 0.6682429313659668, "lr": 5.0538784341241924e-09, "epoch": 1.939400921658986, "percentage": 96.97, "elapsed_time": "11:52:32", "remaining_time": "0:22:15"} +{"current_steps": 8418, "total_steps": 8680, "loss": 0.7881286144256592, "lr": 5.0156959670208945e-09, "epoch": 1.93963133640553, "percentage": 96.98, "elapsed_time": "11:52:39", "remaining_time": "0:22:10"} +{"current_steps": 8419, "total_steps": 8680, "loss": 0.7413277626037598, "lr": 4.9776579205721424e-09, "epoch": 1.9398617511520737, "percentage": 96.99, "elapsed_time": "11:52:46", "remaining_time": "0:22:05"} +{"current_steps": 8420, "total_steps": 8680, "loss": 0.6718757152557373, "lr": 4.939764300299187e-09, "epoch": 1.9400921658986174, "percentage": 97.0, "elapsed_time": "11:52:51", "remaining_time": "0:22:00"} +{"current_steps": 8421, "total_steps": 8680, "loss": 0.8595068454742432, "lr": 4.9020151117019625e-09, "epoch": 1.9403225806451614, "percentage": 97.02, "elapsed_time": "11:52:56", "remaining_time": "0:21:55"} +{"current_steps": 8422, "total_steps": 8680, "loss": 0.6985205411911011, "lr": 4.864410360260085e-09, "epoch": 1.9405529953917051, "percentage": 97.03, "elapsed_time": "11:53:02", "remaining_time": "0:21:50"} +{"current_steps": 8423, "total_steps": 8680, "loss": 0.7148889303207397, "lr": 4.826950051431522e-09, "epoch": 1.9407834101382488, "percentage": 97.04, "elapsed_time": "11:53:07", "remaining_time": "0:21:45"} +{"current_steps": 8424, "total_steps": 8680, "loss": 0.8109019994735718, "lr": 4.789634190653813e-09, "epoch": 1.9410138248847926, "percentage": 97.05, "elapsed_time": "11:53:12", "remaining_time": "0:21:40"} +{"current_steps": 8425, "total_steps": 8680, "loss": 0.8268437385559082, "lr": 4.752462783343292e-09, "epoch": 1.9412442396313363, "percentage": 97.06, "elapsed_time": "11:53:18", "remaining_time": "0:21:35"} +{"current_steps": 8426, "total_steps": 8680, "loss": 0.7300432920455933, "lr": 4.715435834895088e-09, "epoch": 1.9414746543778802, "percentage": 97.07, "elapsed_time": "11:53:25", "remaining_time": "0:21:30"} +{"current_steps": 8427, "total_steps": 8680, "loss": 0.848440408706665, "lr": 4.6785533506839005e-09, "epoch": 1.941705069124424, "percentage": 97.09, "elapsed_time": "11:53:31", "remaining_time": "0:21:25"} +{"current_steps": 8428, "total_steps": 8680, "loss": 0.7526305913925171, "lr": 4.6418153360630044e-09, "epoch": 1.9419354838709677, "percentage": 97.1, "elapsed_time": "11:53:35", "remaining_time": "0:21:20"} +{"current_steps": 8429, "total_steps": 8680, "loss": 0.6987402439117432, "lr": 4.605221796365022e-09, "epoch": 1.9421658986175117, "percentage": 97.11, "elapsed_time": "11:53:40", "remaining_time": "0:21:15"} +{"current_steps": 8430, "total_steps": 8680, "loss": 0.7944519519805908, "lr": 4.568772736901261e-09, "epoch": 1.9423963133640552, "percentage": 97.12, "elapsed_time": "11:53:45", "remaining_time": "0:21:10"} +{"current_steps": 8431, "total_steps": 8680, "loss": 0.7206175327301025, "lr": 4.532468162962378e-09, "epoch": 1.942626728110599, "percentage": 97.13, "elapsed_time": "11:53:50", "remaining_time": "0:21:04"} +{"current_steps": 8432, "total_steps": 8680, "loss": 0.6892992854118347, "lr": 4.4963080798179345e-09, "epoch": 1.9428571428571428, "percentage": 97.14, "elapsed_time": "11:53:57", "remaining_time": "0:20:59"} +{"current_steps": 8433, "total_steps": 8680, "loss": 0.696158766746521, "lr": 4.460292492716511e-09, "epoch": 1.9430875576036866, "percentage": 97.15, "elapsed_time": "11:54:04", "remaining_time": "0:20:54"} +{"current_steps": 8434, "total_steps": 8680, "loss": 0.8007163405418396, "lr": 4.424421406885704e-09, "epoch": 1.9433179723502305, "percentage": 97.17, "elapsed_time": "11:54:09", "remaining_time": "0:20:49"} +{"current_steps": 8435, "total_steps": 8680, "loss": 0.7969222068786621, "lr": 4.3886948275320135e-09, "epoch": 1.9435483870967742, "percentage": 97.18, "elapsed_time": "11:54:14", "remaining_time": "0:20:44"} +{"current_steps": 8436, "total_steps": 8680, "loss": 0.7752852439880371, "lr": 4.353112759841404e-09, "epoch": 1.943778801843318, "percentage": 97.19, "elapsed_time": "11:54:18", "remaining_time": "0:20:39"} +{"current_steps": 8437, "total_steps": 8680, "loss": 0.7788258790969849, "lr": 4.317675208978411e-09, "epoch": 1.9440092165898617, "percentage": 97.2, "elapsed_time": "11:54:23", "remaining_time": "0:20:34"} +{"current_steps": 8438, "total_steps": 8680, "loss": 0.838456392288208, "lr": 4.2823821800866964e-09, "epoch": 1.9442396313364054, "percentage": 97.21, "elapsed_time": "11:54:28", "remaining_time": "0:20:29"} +{"current_steps": 8439, "total_steps": 8680, "loss": 0.7503675222396851, "lr": 4.2472336782890525e-09, "epoch": 1.9444700460829494, "percentage": 97.22, "elapsed_time": "11:54:33", "remaining_time": "0:20:24"} +{"current_steps": 8440, "total_steps": 8680, "loss": 0.810901403427124, "lr": 4.212229708687287e-09, "epoch": 1.944700460829493, "percentage": 97.24, "elapsed_time": "11:54:38", "remaining_time": "0:20:19"} +{"current_steps": 8441, "total_steps": 8680, "loss": 0.7551805973052979, "lr": 4.1773702763621135e-09, "epoch": 1.9449308755760368, "percentage": 97.25, "elapsed_time": "11:54:44", "remaining_time": "0:20:14"} +{"current_steps": 8442, "total_steps": 8680, "loss": 0.9387043714523315, "lr": 4.142655386373373e-09, "epoch": 1.9451612903225808, "percentage": 97.26, "elapsed_time": "11:54:49", "remaining_time": "0:20:09"} +{"current_steps": 8443, "total_steps": 8680, "loss": 0.7508292198181152, "lr": 4.1080850437598124e-09, "epoch": 1.9453917050691243, "percentage": 97.27, "elapsed_time": "11:54:55", "remaining_time": "0:20:04"} +{"current_steps": 8444, "total_steps": 8680, "loss": 0.737107515335083, "lr": 4.073659253539308e-09, "epoch": 1.9456221198156682, "percentage": 97.28, "elapsed_time": "11:55:01", "remaining_time": "0:19:59"} +{"current_steps": 8445, "total_steps": 8680, "loss": 0.86794114112854, "lr": 4.03937802070875e-09, "epoch": 1.945852534562212, "percentage": 97.29, "elapsed_time": "11:55:04", "remaining_time": "0:19:53"} +{"current_steps": 8446, "total_steps": 8680, "loss": 0.7629859447479248, "lr": 4.005241350243937e-09, "epoch": 1.9460829493087557, "percentage": 97.3, "elapsed_time": "11:55:10", "remaining_time": "0:19:48"} +{"current_steps": 8447, "total_steps": 8680, "loss": 0.7455410957336426, "lr": 3.971249247099906e-09, "epoch": 1.9463133640552996, "percentage": 97.32, "elapsed_time": "11:55:16", "remaining_time": "0:19:43"} +{"current_steps": 8448, "total_steps": 8680, "loss": 0.8322222828865051, "lr": 3.937401716210376e-09, "epoch": 1.9465437788018434, "percentage": 97.33, "elapsed_time": "11:55:20", "remaining_time": "0:19:38"} +{"current_steps": 8449, "total_steps": 8680, "loss": 0.7961260676383972, "lr": 3.903698762488528e-09, "epoch": 1.946774193548387, "percentage": 97.34, "elapsed_time": "11:55:26", "remaining_time": "0:19:33"} +{"current_steps": 8450, "total_steps": 8680, "loss": 0.8144096732139587, "lr": 3.870140390826005e-09, "epoch": 1.9470046082949308, "percentage": 97.35, "elapsed_time": "11:55:30", "remaining_time": "0:19:28"} +{"current_steps": 8451, "total_steps": 8680, "loss": 0.7973348498344421, "lr": 3.8367266060939095e-09, "epoch": 1.9472350230414746, "percentage": 97.36, "elapsed_time": "11:55:35", "remaining_time": "0:19:23"} +{"current_steps": 8452, "total_steps": 8680, "loss": 0.8311715126037598, "lr": 3.803457413142253e-09, "epoch": 1.9474654377880185, "percentage": 97.37, "elapsed_time": "11:55:39", "remaining_time": "0:19:18"} +{"current_steps": 8453, "total_steps": 8680, "loss": 0.7851812839508057, "lr": 3.770332816799948e-09, "epoch": 1.9476958525345622, "percentage": 97.38, "elapsed_time": "11:55:44", "remaining_time": "0:19:13"} +{"current_steps": 8454, "total_steps": 8680, "loss": 0.8721193075180054, "lr": 3.737352821875039e-09, "epoch": 1.947926267281106, "percentage": 97.4, "elapsed_time": "11:55:49", "remaining_time": "0:19:08"} +{"current_steps": 8455, "total_steps": 8680, "loss": 0.8594118356704712, "lr": 3.704517433154364e-09, "epoch": 1.94815668202765, "percentage": 97.41, "elapsed_time": "11:55:56", "remaining_time": "0:19:03"} +{"current_steps": 8456, "total_steps": 8680, "loss": 0.6526527404785156, "lr": 3.671826655404109e-09, "epoch": 1.9483870967741934, "percentage": 97.42, "elapsed_time": "11:56:01", "remaining_time": "0:18:58"} +{"current_steps": 8457, "total_steps": 8680, "loss": 0.7577145099639893, "lr": 3.639280493369368e-09, "epoch": 1.9486175115207374, "percentage": 97.43, "elapsed_time": "11:56:06", "remaining_time": "0:18:52"} +{"current_steps": 8458, "total_steps": 8680, "loss": 0.9176833629608154, "lr": 3.6068789517739173e-09, "epoch": 1.948847926267281, "percentage": 97.44, "elapsed_time": "11:56:11", "remaining_time": "0:18:47"} +{"current_steps": 8459, "total_steps": 8680, "loss": 0.8947671055793762, "lr": 3.5746220353209956e-09, "epoch": 1.9490783410138248, "percentage": 97.45, "elapsed_time": "11:56:16", "remaining_time": "0:18:42"} +{"current_steps": 8460, "total_steps": 8680, "loss": 0.8791666030883789, "lr": 3.542509748692524e-09, "epoch": 1.9493087557603688, "percentage": 97.47, "elapsed_time": "11:56:21", "remaining_time": "0:18:37"} +{"current_steps": 8461, "total_steps": 8680, "loss": 0.7431247234344482, "lr": 3.5105420965496626e-09, "epoch": 1.9495391705069123, "percentage": 97.48, "elapsed_time": "11:56:27", "remaining_time": "0:18:32"} +{"current_steps": 8462, "total_steps": 8680, "loss": 0.7998695373535156, "lr": 3.4787190835324775e-09, "epoch": 1.9497695852534562, "percentage": 97.49, "elapsed_time": "11:56:32", "remaining_time": "0:18:27"} +{"current_steps": 8463, "total_steps": 8680, "loss": 0.8120161294937134, "lr": 3.447040714259941e-09, "epoch": 1.95, "percentage": 97.5, "elapsed_time": "11:56:36", "remaining_time": "0:18:22"} +{"current_steps": 8464, "total_steps": 8680, "loss": 0.8062546849250793, "lr": 3.415506993330153e-09, "epoch": 1.9502304147465437, "percentage": 97.51, "elapsed_time": "11:56:41", "remaining_time": "0:18:17"} +{"current_steps": 8465, "total_steps": 8680, "loss": 0.8100919723510742, "lr": 3.384117925320229e-09, "epoch": 1.9504608294930876, "percentage": 97.52, "elapsed_time": "11:56:45", "remaining_time": "0:18:12"} +{"current_steps": 8466, "total_steps": 8680, "loss": 0.7376535534858704, "lr": 3.352873514786303e-09, "epoch": 1.9506912442396314, "percentage": 97.53, "elapsed_time": "11:56:49", "remaining_time": "0:18:07"} +{"current_steps": 8467, "total_steps": 8680, "loss": 0.7534361481666565, "lr": 3.321773766263303e-09, "epoch": 1.950921658986175, "percentage": 97.55, "elapsed_time": "11:56:54", "remaining_time": "0:18:02"} +{"current_steps": 8468, "total_steps": 8680, "loss": 0.6914925575256348, "lr": 3.290818684265506e-09, "epoch": 1.951152073732719, "percentage": 97.56, "elapsed_time": "11:56:58", "remaining_time": "0:17:56"} +{"current_steps": 8469, "total_steps": 8680, "loss": 0.837024450302124, "lr": 3.2600082732858746e-09, "epoch": 1.9513824884792625, "percentage": 97.57, "elapsed_time": "11:57:03", "remaining_time": "0:17:51"} +{"current_steps": 8470, "total_steps": 8680, "loss": 0.7960337400436401, "lr": 3.229342537796609e-09, "epoch": 1.9516129032258065, "percentage": 97.58, "elapsed_time": "11:57:09", "remaining_time": "0:17:46"} +{"current_steps": 8471, "total_steps": 8680, "loss": 0.6611788868904114, "lr": 3.1988214822485928e-09, "epoch": 1.9518433179723502, "percentage": 97.59, "elapsed_time": "11:57:14", "remaining_time": "0:17:41"} +{"current_steps": 8472, "total_steps": 8680, "loss": 0.8798158168792725, "lr": 3.16844511107206e-09, "epoch": 1.952073732718894, "percentage": 97.6, "elapsed_time": "11:57:20", "remaining_time": "0:17:36"} +{"current_steps": 8473, "total_steps": 8680, "loss": 0.790015459060669, "lr": 3.1382134286761506e-09, "epoch": 1.952304147465438, "percentage": 97.62, "elapsed_time": "11:57:26", "remaining_time": "0:17:31"} +{"current_steps": 8474, "total_steps": 8680, "loss": 0.7676407098770142, "lr": 3.1081264394489103e-09, "epoch": 1.9525345622119814, "percentage": 97.63, "elapsed_time": "11:57:32", "remaining_time": "0:17:26"} +{"current_steps": 8475, "total_steps": 8680, "loss": 0.8213051557540894, "lr": 3.07818414775729e-09, "epoch": 1.9527649769585254, "percentage": 97.64, "elapsed_time": "11:57:38", "remaining_time": "0:17:21"} +{"current_steps": 8476, "total_steps": 8680, "loss": 0.8909401893615723, "lr": 3.048386557947591e-09, "epoch": 1.952995391705069, "percentage": 97.65, "elapsed_time": "11:57:42", "remaining_time": "0:17:16"} +{"current_steps": 8477, "total_steps": 8680, "loss": 0.838227391242981, "lr": 3.0187336743446867e-09, "epoch": 1.9532258064516128, "percentage": 97.66, "elapsed_time": "11:57:47", "remaining_time": "0:17:11"} +{"current_steps": 8478, "total_steps": 8680, "loss": 0.7297696471214294, "lr": 2.9892255012528013e-09, "epoch": 1.9534562211981568, "percentage": 97.67, "elapsed_time": "11:57:54", "remaining_time": "0:17:06"} +{"current_steps": 8479, "total_steps": 8680, "loss": 1.0060585737228394, "lr": 2.9598620429550636e-09, "epoch": 1.9536866359447005, "percentage": 97.68, "elapsed_time": "11:58:00", "remaining_time": "0:17:01"} +{"current_steps": 8480, "total_steps": 8680, "loss": 0.7812967300415039, "lr": 2.9306433037132873e-09, "epoch": 1.9539170506912442, "percentage": 97.7, "elapsed_time": "11:58:04", "remaining_time": "0:16:56"} +{"current_steps": 8481, "total_steps": 8680, "loss": 0.7349315881729126, "lr": 2.901569287768746e-09, "epoch": 1.9541474654377882, "percentage": 97.71, "elapsed_time": "11:58:09", "remaining_time": "0:16:51"} +{"current_steps": 8482, "total_steps": 8680, "loss": 0.7083498239517212, "lr": 2.8726399993415085e-09, "epoch": 1.9543778801843317, "percentage": 97.72, "elapsed_time": "11:58:15", "remaining_time": "0:16:45"} +{"current_steps": 8483, "total_steps": 8680, "loss": 0.7969732880592346, "lr": 2.8438554426304386e-09, "epoch": 1.9546082949308756, "percentage": 97.73, "elapsed_time": "11:58:20", "remaining_time": "0:16:40"} +{"current_steps": 8484, "total_steps": 8680, "loss": 0.7701122164726257, "lr": 2.815215621813749e-09, "epoch": 1.9548387096774194, "percentage": 97.74, "elapsed_time": "11:58:26", "remaining_time": "0:16:35"} +{"current_steps": 8485, "total_steps": 8680, "loss": 0.7323017120361328, "lr": 2.7867205410484485e-09, "epoch": 1.955069124423963, "percentage": 97.75, "elapsed_time": "11:58:30", "remaining_time": "0:16:30"} +{"current_steps": 8486, "total_steps": 8680, "loss": 0.8357248306274414, "lr": 2.7583702044704504e-09, "epoch": 1.955299539170507, "percentage": 97.76, "elapsed_time": "11:58:36", "remaining_time": "0:16:25"} +{"current_steps": 8487, "total_steps": 8680, "loss": 0.8164674043655396, "lr": 2.7301646161947966e-09, "epoch": 1.9555299539170505, "percentage": 97.78, "elapsed_time": "11:58:42", "remaining_time": "0:16:20"} +{"current_steps": 8488, "total_steps": 8680, "loss": 0.7972782850265503, "lr": 2.7021037803156566e-09, "epoch": 1.9557603686635945, "percentage": 97.79, "elapsed_time": "11:58:48", "remaining_time": "0:16:15"} +{"current_steps": 8489, "total_steps": 8680, "loss": 0.864342987537384, "lr": 2.6741877009058835e-09, "epoch": 1.9559907834101382, "percentage": 97.8, "elapsed_time": "11:58:53", "remaining_time": "0:16:10"} +{"current_steps": 8490, "total_steps": 8680, "loss": 0.7428402900695801, "lr": 2.646416382017458e-09, "epoch": 1.956221198156682, "percentage": 97.81, "elapsed_time": "11:58:59", "remaining_time": "0:16:05"} +{"current_steps": 8491, "total_steps": 8680, "loss": 0.7164437770843506, "lr": 2.618789827681378e-09, "epoch": 1.956451612903226, "percentage": 97.82, "elapsed_time": "11:59:05", "remaining_time": "0:16:00"} +{"current_steps": 8492, "total_steps": 8680, "loss": 0.6997767686843872, "lr": 2.5913080419075473e-09, "epoch": 1.9566820276497696, "percentage": 97.83, "elapsed_time": "11:59:11", "remaining_time": "0:15:55"} +{"current_steps": 8493, "total_steps": 8680, "loss": 0.6399234533309937, "lr": 2.563971028684886e-09, "epoch": 1.9569124423963133, "percentage": 97.85, "elapsed_time": "11:59:18", "remaining_time": "0:15:50"} +{"current_steps": 8494, "total_steps": 8680, "loss": 0.7642914056777954, "lr": 2.536778791981553e-09, "epoch": 1.9571428571428573, "percentage": 97.86, "elapsed_time": "11:59:23", "remaining_time": "0:15:45"} +{"current_steps": 8495, "total_steps": 8680, "loss": 0.8284746408462524, "lr": 2.5097313357442806e-09, "epoch": 1.9573732718894008, "percentage": 97.87, "elapsed_time": "11:59:28", "remaining_time": "0:15:40"} +{"current_steps": 8496, "total_steps": 8680, "loss": 0.6680238246917725, "lr": 2.4828286638989282e-09, "epoch": 1.9576036866359448, "percentage": 97.88, "elapsed_time": "11:59:33", "remaining_time": "0:15:35"} +{"current_steps": 8497, "total_steps": 8680, "loss": 0.7621040344238281, "lr": 2.4560707803504834e-09, "epoch": 1.9578341013824885, "percentage": 97.89, "elapsed_time": "11:59:40", "remaining_time": "0:15:29"} +{"current_steps": 8498, "total_steps": 8680, "loss": 0.7326159477233887, "lr": 2.4294576889827278e-09, "epoch": 1.9580645161290322, "percentage": 97.9, "elapsed_time": "11:59:44", "remaining_time": "0:15:24"} +{"current_steps": 8499, "total_steps": 8680, "loss": 0.6496877670288086, "lr": 2.4029893936586833e-09, "epoch": 1.9582949308755762, "percentage": 97.91, "elapsed_time": "11:59:50", "remaining_time": "0:15:19"} +{"current_steps": 8500, "total_steps": 8680, "loss": 0.665170431137085, "lr": 2.376665898220054e-09, "epoch": 1.9585253456221197, "percentage": 97.93, "elapsed_time": "11:59:56", "remaining_time": "0:15:14"} +{"current_steps": 8501, "total_steps": 8680, "loss": 0.7238261699676514, "lr": 2.3504872064876724e-09, "epoch": 1.9587557603686636, "percentage": 97.94, "elapsed_time": "12:00:03", "remaining_time": "0:15:09"} +{"current_steps": 8502, "total_steps": 8680, "loss": 0.6696983575820923, "lr": 2.3244533222613882e-09, "epoch": 1.9589861751152073, "percentage": 97.95, "elapsed_time": "12:00:08", "remaining_time": "0:15:04"} +{"current_steps": 8503, "total_steps": 8680, "loss": 0.8414099216461182, "lr": 2.2985642493199563e-09, "epoch": 1.959216589861751, "percentage": 97.96, "elapsed_time": "12:00:13", "remaining_time": "0:14:59"} +{"current_steps": 8504, "total_steps": 8680, "loss": 0.7390140295028687, "lr": 2.2728199914210377e-09, "epoch": 1.959447004608295, "percentage": 97.97, "elapsed_time": "12:00:18", "remaining_time": "0:14:54"} +{"current_steps": 8505, "total_steps": 8680, "loss": 0.7910370826721191, "lr": 2.247220552301532e-09, "epoch": 1.9596774193548387, "percentage": 97.98, "elapsed_time": "12:00:23", "remaining_time": "0:14:49"} +{"current_steps": 8506, "total_steps": 8680, "loss": 0.8111266493797302, "lr": 2.2217659356771334e-09, "epoch": 1.9599078341013825, "percentage": 98.0, "elapsed_time": "12:00:27", "remaining_time": "0:14:44"} +{"current_steps": 8507, "total_steps": 8680, "loss": 0.8748809099197388, "lr": 2.1964561452425535e-09, "epoch": 1.9601382488479264, "percentage": 98.01, "elapsed_time": "12:00:31", "remaining_time": "0:14:39"} +{"current_steps": 8508, "total_steps": 8680, "loss": 0.726898193359375, "lr": 2.1712911846714088e-09, "epoch": 1.96036866359447, "percentage": 98.02, "elapsed_time": "12:00:36", "remaining_time": "0:14:34"} +{"current_steps": 8509, "total_steps": 8680, "loss": 0.5221005082130432, "lr": 2.1462710576163335e-09, "epoch": 1.9605990783410139, "percentage": 98.03, "elapsed_time": "12:00:42", "remaining_time": "0:14:29"} +{"current_steps": 8510, "total_steps": 8680, "loss": 0.7336875200271606, "lr": 2.1213957677090887e-09, "epoch": 1.9608294930875576, "percentage": 98.04, "elapsed_time": "12:00:47", "remaining_time": "0:14:23"} +{"current_steps": 8511, "total_steps": 8680, "loss": 0.9653327465057373, "lr": 2.096665318560231e-09, "epoch": 1.9610599078341013, "percentage": 98.05, "elapsed_time": "12:00:52", "remaining_time": "0:14:18"} +{"current_steps": 8512, "total_steps": 8680, "loss": 0.8309473991394043, "lr": 2.0720797137594448e-09, "epoch": 1.9612903225806453, "percentage": 98.06, "elapsed_time": "12:00:57", "remaining_time": "0:14:13"} +{"current_steps": 8513, "total_steps": 8680, "loss": 0.7829124331474304, "lr": 2.047638956874986e-09, "epoch": 1.9615207373271888, "percentage": 98.08, "elapsed_time": "12:01:02", "remaining_time": "0:14:08"} +{"current_steps": 8514, "total_steps": 8680, "loss": 0.8399544358253479, "lr": 2.0233430514547955e-09, "epoch": 1.9617511520737327, "percentage": 98.09, "elapsed_time": "12:01:06", "remaining_time": "0:14:03"} +{"current_steps": 8515, "total_steps": 8680, "loss": 0.7827579975128174, "lr": 1.999192001025163e-09, "epoch": 1.9619815668202765, "percentage": 98.1, "elapsed_time": "12:01:11", "remaining_time": "0:13:58"} +{"current_steps": 8516, "total_steps": 8680, "loss": 0.8617441654205322, "lr": 1.9751858090916174e-09, "epoch": 1.9622119815668202, "percentage": 98.11, "elapsed_time": "12:01:15", "remaining_time": "0:13:53"} +{"current_steps": 8517, "total_steps": 8680, "loss": 0.758098840713501, "lr": 1.951324479138594e-09, "epoch": 1.9624423963133641, "percentage": 98.12, "elapsed_time": "12:01:20", "remaining_time": "0:13:48"} +{"current_steps": 8518, "total_steps": 8680, "loss": 0.738059937953949, "lr": 1.927608014629656e-09, "epoch": 1.9626728110599079, "percentage": 98.13, "elapsed_time": "12:01:26", "remaining_time": "0:13:43"} +{"current_steps": 8519, "total_steps": 8680, "loss": 0.6286636590957642, "lr": 1.9040364190070492e-09, "epoch": 1.9629032258064516, "percentage": 98.15, "elapsed_time": "12:01:31", "remaining_time": "0:13:38"} +{"current_steps": 8520, "total_steps": 8680, "loss": 0.764518141746521, "lr": 1.88060969569237e-09, "epoch": 1.9631336405529956, "percentage": 98.16, "elapsed_time": "12:01:35", "remaining_time": "0:13:33"} +{"current_steps": 8521, "total_steps": 8680, "loss": 0.775516152381897, "lr": 1.8573278480857878e-09, "epoch": 1.963364055299539, "percentage": 98.17, "elapsed_time": "12:01:43", "remaining_time": "0:13:28"} +{"current_steps": 8522, "total_steps": 8680, "loss": 0.8513185977935791, "lr": 1.8341908795665994e-09, "epoch": 1.963594470046083, "percentage": 98.18, "elapsed_time": "12:01:49", "remaining_time": "0:13:22"} +{"current_steps": 8523, "total_steps": 8680, "loss": 0.7300710082054138, "lr": 1.8111987934933404e-09, "epoch": 1.9638248847926267, "percentage": 98.19, "elapsed_time": "12:01:53", "remaining_time": "0:13:17"} +{"current_steps": 8524, "total_steps": 8680, "loss": 0.7346746921539307, "lr": 1.788351593203119e-09, "epoch": 1.9640552995391705, "percentage": 98.2, "elapsed_time": "12:01:58", "remaining_time": "0:13:12"} +{"current_steps": 8525, "total_steps": 8680, "loss": 0.8231781721115112, "lr": 1.7656492820121715e-09, "epoch": 1.9642857142857144, "percentage": 98.21, "elapsed_time": "12:02:05", "remaining_time": "0:13:07"} +{"current_steps": 8526, "total_steps": 8680, "loss": 0.6972112655639648, "lr": 1.743091863215751e-09, "epoch": 1.964516129032258, "percentage": 98.23, "elapsed_time": "12:02:09", "remaining_time": "0:13:02"} +{"current_steps": 8527, "total_steps": 8680, "loss": 0.6512203812599182, "lr": 1.720679340088016e-09, "epoch": 1.9647465437788019, "percentage": 98.24, "elapsed_time": "12:02:15", "remaining_time": "0:12:57"} +{"current_steps": 8528, "total_steps": 8680, "loss": 0.6755591630935669, "lr": 1.698411715882253e-09, "epoch": 1.9649769585253456, "percentage": 98.25, "elapsed_time": "12:02:22", "remaining_time": "0:12:52"} +{"current_steps": 8529, "total_steps": 8680, "loss": 0.6858727335929871, "lr": 1.6762889938303215e-09, "epoch": 1.9652073732718893, "percentage": 98.26, "elapsed_time": "12:02:27", "remaining_time": "0:12:47"} +{"current_steps": 8530, "total_steps": 8680, "loss": 0.7820768356323242, "lr": 1.6543111771434303e-09, "epoch": 1.9654377880184333, "percentage": 98.27, "elapsed_time": "12:02:33", "remaining_time": "0:12:42"} +{"current_steps": 8531, "total_steps": 8680, "loss": 0.7841604948043823, "lr": 1.6324782690116944e-09, "epoch": 1.965668202764977, "percentage": 98.28, "elapsed_time": "12:02:37", "remaining_time": "0:12:37"} +{"current_steps": 8532, "total_steps": 8680, "loss": 0.8665674328804016, "lr": 1.6107902726040234e-09, "epoch": 1.9658986175115207, "percentage": 98.29, "elapsed_time": "12:02:41", "remaining_time": "0:12:32"} +{"current_steps": 8533, "total_steps": 8680, "loss": 0.6764376163482666, "lr": 1.5892471910684547e-09, "epoch": 1.9661290322580647, "percentage": 98.31, "elapsed_time": "12:02:46", "remaining_time": "0:12:27"} +{"current_steps": 8534, "total_steps": 8680, "loss": 0.8453094959259033, "lr": 1.5678490275319312e-09, "epoch": 1.9663594470046082, "percentage": 98.32, "elapsed_time": "12:02:52", "remaining_time": "0:12:22"} +{"current_steps": 8535, "total_steps": 8680, "loss": 0.7798272371292114, "lr": 1.546595785100413e-09, "epoch": 1.9665898617511521, "percentage": 98.33, "elapsed_time": "12:02:58", "remaining_time": "0:12:16"} +{"current_steps": 8536, "total_steps": 8680, "loss": 0.7426424026489258, "lr": 1.5254874668586548e-09, "epoch": 1.9668202764976959, "percentage": 98.34, "elapsed_time": "12:03:04", "remaining_time": "0:12:11"} +{"current_steps": 8537, "total_steps": 8680, "loss": 0.8443984985351562, "lr": 1.5045240758706501e-09, "epoch": 1.9670506912442396, "percentage": 98.35, "elapsed_time": "12:03:09", "remaining_time": "0:12:06"} +{"current_steps": 8538, "total_steps": 8680, "loss": 0.8439072370529175, "lr": 1.4837056151790762e-09, "epoch": 1.9672811059907835, "percentage": 98.36, "elapsed_time": "12:03:14", "remaining_time": "0:12:01"} +{"current_steps": 8539, "total_steps": 8680, "loss": 0.8307704925537109, "lr": 1.463032087805849e-09, "epoch": 1.967511520737327, "percentage": 98.38, "elapsed_time": "12:03:20", "remaining_time": "0:11:56"} +{"current_steps": 8540, "total_steps": 8680, "loss": 0.678236722946167, "lr": 1.442503496751568e-09, "epoch": 1.967741935483871, "percentage": 98.39, "elapsed_time": "12:03:25", "remaining_time": "0:11:51"} +{"current_steps": 8541, "total_steps": 8680, "loss": 0.7072663307189941, "lr": 1.4221198449960724e-09, "epoch": 1.9679723502304147, "percentage": 98.4, "elapsed_time": "12:03:30", "remaining_time": "0:11:46"} +{"current_steps": 8542, "total_steps": 8680, "loss": 0.7825980186462402, "lr": 1.4018811354977732e-09, "epoch": 1.9682027649769585, "percentage": 98.41, "elapsed_time": "12:03:35", "remaining_time": "0:11:41"} +{"current_steps": 8543, "total_steps": 8680, "loss": 0.786361813545227, "lr": 1.3817873711945426e-09, "epoch": 1.9684331797235024, "percentage": 98.42, "elapsed_time": "12:03:39", "remaining_time": "0:11:36"} +{"current_steps": 8544, "total_steps": 8680, "loss": 1.00287926197052, "lr": 1.3618385550029365e-09, "epoch": 1.9686635944700461, "percentage": 98.43, "elapsed_time": "12:03:43", "remaining_time": "0:11:31"} +{"current_steps": 8545, "total_steps": 8680, "loss": 0.7320775389671326, "lr": 1.3420346898183054e-09, "epoch": 1.9688940092165899, "percentage": 98.44, "elapsed_time": "12:03:48", "remaining_time": "0:11:26"} +{"current_steps": 8546, "total_steps": 8680, "loss": 0.7127507925033569, "lr": 1.322375778515461e-09, "epoch": 1.9691244239631336, "percentage": 98.46, "elapsed_time": "12:03:54", "remaining_time": "0:11:21"} +{"current_steps": 8547, "total_steps": 8680, "loss": 0.818395733833313, "lr": 1.3028618239475652e-09, "epoch": 1.9693548387096773, "percentage": 98.47, "elapsed_time": "12:04:00", "remaining_time": "0:11:15"} +{"current_steps": 8548, "total_steps": 8680, "loss": 0.6384972929954529, "lr": 1.2834928289472413e-09, "epoch": 1.9695852534562213, "percentage": 98.48, "elapsed_time": "12:04:05", "remaining_time": "0:11:10"} +{"current_steps": 8549, "total_steps": 8680, "loss": 0.7358517646789551, "lr": 1.2642687963256849e-09, "epoch": 1.969815668202765, "percentage": 98.49, "elapsed_time": "12:04:09", "remaining_time": "0:11:05"} +{"current_steps": 8550, "total_steps": 8680, "loss": 0.7311068773269653, "lr": 1.2451897288734414e-09, "epoch": 1.9700460829493087, "percentage": 98.5, "elapsed_time": "12:04:14", "remaining_time": "0:11:00"} +{"current_steps": 8551, "total_steps": 8680, "loss": 0.8390932083129883, "lr": 1.2262556293597403e-09, "epoch": 1.9702764976958527, "percentage": 98.51, "elapsed_time": "12:04:18", "remaining_time": "0:10:55"} +{"current_steps": 8552, "total_steps": 8680, "loss": 0.8114689588546753, "lr": 1.2074665005328277e-09, "epoch": 1.9705069124423962, "percentage": 98.53, "elapsed_time": "12:04:24", "remaining_time": "0:10:50"} +{"current_steps": 8553, "total_steps": 8680, "loss": 1.0044716596603394, "lr": 1.1888223451199665e-09, "epoch": 1.9707373271889401, "percentage": 98.54, "elapsed_time": "12:04:28", "remaining_time": "0:10:45"} +{"current_steps": 8554, "total_steps": 8680, "loss": 0.7566370368003845, "lr": 1.170323165827214e-09, "epoch": 1.9709677419354839, "percentage": 98.55, "elapsed_time": "12:04:34", "remaining_time": "0:10:40"} +{"current_steps": 8555, "total_steps": 8680, "loss": 0.7543225288391113, "lr": 1.1519689653397557e-09, "epoch": 1.9711981566820276, "percentage": 98.56, "elapsed_time": "12:04:41", "remaining_time": "0:10:35"} +{"current_steps": 8556, "total_steps": 8680, "loss": 0.8291902542114258, "lr": 1.1337597463217941e-09, "epoch": 1.9714285714285715, "percentage": 98.57, "elapsed_time": "12:04:46", "remaining_time": "0:10:30"} +{"current_steps": 8557, "total_steps": 8680, "loss": 0.7363135814666748, "lr": 1.1156955114162147e-09, "epoch": 1.9716589861751153, "percentage": 98.58, "elapsed_time": "12:04:51", "remaining_time": "0:10:25"} +{"current_steps": 8558, "total_steps": 8680, "loss": 0.7180813550949097, "lr": 1.0977762632451427e-09, "epoch": 1.971889400921659, "percentage": 98.59, "elapsed_time": "12:04:57", "remaining_time": "0:10:20"} +{"current_steps": 8559, "total_steps": 8680, "loss": 0.7220569849014282, "lr": 1.0800020044093861e-09, "epoch": 1.9721198156682027, "percentage": 98.61, "elapsed_time": "12:05:03", "remaining_time": "0:10:15"} +{"current_steps": 8560, "total_steps": 8680, "loss": 0.8839110136032104, "lr": 1.0623727374889925e-09, "epoch": 1.9723502304147464, "percentage": 98.62, "elapsed_time": "12:05:08", "remaining_time": "0:10:09"} +{"current_steps": 8561, "total_steps": 8680, "loss": 0.7210807800292969, "lr": 1.0448884650426926e-09, "epoch": 1.9725806451612904, "percentage": 98.63, "elapsed_time": "12:05:12", "remaining_time": "0:10:04"} +{"current_steps": 8562, "total_steps": 8680, "loss": 0.6993537545204163, "lr": 1.0275491896084565e-09, "epoch": 1.9728110599078341, "percentage": 98.64, "elapsed_time": "12:05:19", "remaining_time": "0:09:59"} +{"current_steps": 8563, "total_steps": 8680, "loss": 0.6951562166213989, "lr": 1.0103549137030486e-09, "epoch": 1.9730414746543778, "percentage": 98.65, "elapsed_time": "12:05:26", "remaining_time": "0:09:54"} +{"current_steps": 8564, "total_steps": 8680, "loss": 0.855778694152832, "lr": 9.933056398220285e-10, "epoch": 1.9732718894009218, "percentage": 98.66, "elapsed_time": "12:05:33", "remaining_time": "0:09:49"} +{"current_steps": 8565, "total_steps": 8680, "loss": 0.8461301326751709, "lr": 9.76401370440194e-10, "epoch": 1.9735023041474653, "percentage": 98.68, "elapsed_time": "12:05:38", "remaining_time": "0:09:44"} +{"current_steps": 8566, "total_steps": 8680, "loss": 0.6144053936004639, "lr": 9.596421080112493e-10, "epoch": 1.9737327188940093, "percentage": 98.69, "elapsed_time": "12:05:44", "remaining_time": "0:09:39"} +{"current_steps": 8567, "total_steps": 8680, "loss": 0.6623581647872925, "lr": 9.430278549675818e-10, "epoch": 1.973963133640553, "percentage": 98.7, "elapsed_time": "12:05:49", "remaining_time": "0:09:34"} +{"current_steps": 8568, "total_steps": 8680, "loss": 0.9540686011314392, "lr": 9.265586137209292e-10, "epoch": 1.9741935483870967, "percentage": 98.71, "elapsed_time": "12:05:52", "remaining_time": "0:09:29"} +{"current_steps": 8569, "total_steps": 8680, "loss": 0.7231987714767456, "lr": 9.102343866616014e-10, "epoch": 1.9744239631336407, "percentage": 98.72, "elapsed_time": "12:05:57", "remaining_time": "0:09:24"} +{"current_steps": 8570, "total_steps": 8680, "loss": 0.7759320735931396, "lr": 8.940551761592585e-10, "epoch": 1.9746543778801844, "percentage": 98.73, "elapsed_time": "12:06:04", "remaining_time": "0:09:19"} +{"current_steps": 8571, "total_steps": 8680, "loss": 0.8277846574783325, "lr": 8.780209845621334e-10, "epoch": 1.9748847926267281, "percentage": 98.74, "elapsed_time": "12:06:09", "remaining_time": "0:09:14"} +{"current_steps": 8572, "total_steps": 8680, "loss": 0.7913431525230408, "lr": 8.621318141974754e-10, "epoch": 1.9751152073732718, "percentage": 98.76, "elapsed_time": "12:06:14", "remaining_time": "0:09:08"} +{"current_steps": 8573, "total_steps": 8680, "loss": 0.7011829614639282, "lr": 8.46387667371773e-10, "epoch": 1.9753456221198156, "percentage": 98.77, "elapsed_time": "12:06:19", "remaining_time": "0:09:03"} +{"current_steps": 8574, "total_steps": 8680, "loss": 0.8762087821960449, "lr": 8.30788546370198e-10, "epoch": 1.9755760368663595, "percentage": 98.78, "elapsed_time": "12:06:25", "remaining_time": "0:08:58"} +{"current_steps": 8575, "total_steps": 8680, "loss": 0.7944581508636475, "lr": 8.153344534569396e-10, "epoch": 1.9758064516129032, "percentage": 98.79, "elapsed_time": "12:06:31", "remaining_time": "0:08:53"} +{"current_steps": 8576, "total_steps": 8680, "loss": 0.7086907625198364, "lr": 8.00025390875203e-10, "epoch": 1.976036866359447, "percentage": 98.8, "elapsed_time": "12:06:37", "remaining_time": "0:08:48"} +{"current_steps": 8577, "total_steps": 8680, "loss": 0.7263821959495544, "lr": 7.848613608468779e-10, "epoch": 1.976267281105991, "percentage": 98.81, "elapsed_time": "12:06:41", "remaining_time": "0:08:43"} +{"current_steps": 8578, "total_steps": 8680, "loss": 0.714054524898529, "lr": 7.698423655732034e-10, "epoch": 1.9764976958525344, "percentage": 98.82, "elapsed_time": "12:06:48", "remaining_time": "0:08:38"} +{"current_steps": 8579, "total_steps": 8680, "loss": 0.817487359046936, "lr": 7.549684072341023e-10, "epoch": 1.9767281105990784, "percentage": 98.84, "elapsed_time": "12:06:54", "remaining_time": "0:08:33"} +{"current_steps": 8580, "total_steps": 8680, "loss": 0.7933021783828735, "lr": 7.402394879885143e-10, "epoch": 1.976958525345622, "percentage": 98.85, "elapsed_time": "12:06:59", "remaining_time": "0:08:28"} +{"current_steps": 8581, "total_steps": 8680, "loss": 0.8699008822441101, "lr": 7.25655609974396e-10, "epoch": 1.9771889400921658, "percentage": 98.86, "elapsed_time": "12:07:03", "remaining_time": "0:08:23"} +{"current_steps": 8582, "total_steps": 8680, "loss": 0.804245114326477, "lr": 7.112167753083876e-10, "epoch": 1.9774193548387098, "percentage": 98.87, "elapsed_time": "12:07:09", "remaining_time": "0:08:18"} +{"current_steps": 8583, "total_steps": 8680, "loss": 0.8334434628486633, "lr": 6.969229860863679e-10, "epoch": 1.9776497695852533, "percentage": 98.88, "elapsed_time": "12:07:14", "remaining_time": "0:08:13"} +{"current_steps": 8584, "total_steps": 8680, "loss": 0.7549147605895996, "lr": 6.827742443831219e-10, "epoch": 1.9778801843317972, "percentage": 98.89, "elapsed_time": "12:07:19", "remaining_time": "0:08:08"} +{"current_steps": 8585, "total_steps": 8680, "loss": 0.69701087474823, "lr": 6.687705522522291e-10, "epoch": 1.978110599078341, "percentage": 98.91, "elapsed_time": "12:07:26", "remaining_time": "0:08:02"} +{"current_steps": 8586, "total_steps": 8680, "loss": 0.727588415145874, "lr": 6.549119117263969e-10, "epoch": 1.9783410138248847, "percentage": 98.92, "elapsed_time": "12:07:33", "remaining_time": "0:07:57"} +{"current_steps": 8587, "total_steps": 8680, "loss": 0.7309392094612122, "lr": 6.411983248171271e-10, "epoch": 1.9785714285714286, "percentage": 98.93, "elapsed_time": "12:07:40", "remaining_time": "0:07:52"} +{"current_steps": 8588, "total_steps": 8680, "loss": 0.8299658298492432, "lr": 6.276297935149388e-10, "epoch": 1.9788018433179724, "percentage": 98.94, "elapsed_time": "12:07:45", "remaining_time": "0:07:47"} +{"current_steps": 8589, "total_steps": 8680, "loss": 0.9731055498123169, "lr": 6.142063197892566e-10, "epoch": 1.979032258064516, "percentage": 98.95, "elapsed_time": "12:07:51", "remaining_time": "0:07:42"} +{"current_steps": 8590, "total_steps": 8680, "loss": 0.6292351484298706, "lr": 6.009279055885219e-10, "epoch": 1.97926267281106, "percentage": 98.96, "elapsed_time": "12:07:58", "remaining_time": "0:07:37"} +{"current_steps": 8591, "total_steps": 8680, "loss": 0.7881810665130615, "lr": 5.877945528400818e-10, "epoch": 1.9794930875576036, "percentage": 98.97, "elapsed_time": "12:08:04", "remaining_time": "0:07:32"} +{"current_steps": 8592, "total_steps": 8680, "loss": 0.7910494804382324, "lr": 5.748062634501894e-10, "epoch": 1.9797235023041475, "percentage": 98.99, "elapsed_time": "12:08:10", "remaining_time": "0:07:27"} +{"current_steps": 8593, "total_steps": 8680, "loss": 0.8255902528762817, "lr": 5.619630393042252e-10, "epoch": 1.9799539170506912, "percentage": 99.0, "elapsed_time": "12:08:17", "remaining_time": "0:07:22"} +{"current_steps": 8594, "total_steps": 8680, "loss": 0.788017749786377, "lr": 5.492648822660318e-10, "epoch": 1.980184331797235, "percentage": 99.01, "elapsed_time": "12:08:25", "remaining_time": "0:07:17"} +{"current_steps": 8595, "total_steps": 8680, "loss": 0.8717716932296753, "lr": 5.367117941791343e-10, "epoch": 1.980414746543779, "percentage": 99.02, "elapsed_time": "12:08:31", "remaining_time": "0:07:12"} +{"current_steps": 8596, "total_steps": 8680, "loss": 0.7220178246498108, "lr": 5.243037768652981e-10, "epoch": 1.9806451612903224, "percentage": 99.03, "elapsed_time": "12:08:37", "remaining_time": "0:07:07"} +{"current_steps": 8597, "total_steps": 8680, "loss": 0.7536830902099609, "lr": 5.120408321256376e-10, "epoch": 1.9808755760368664, "percentage": 99.04, "elapsed_time": "12:08:44", "remaining_time": "0:07:02"} +{"current_steps": 8598, "total_steps": 8680, "loss": 0.7480939626693726, "lr": 4.999229617401735e-10, "epoch": 1.98110599078341, "percentage": 99.06, "elapsed_time": "12:08:51", "remaining_time": "0:06:57"} +{"current_steps": 8599, "total_steps": 8680, "loss": 0.7168867588043213, "lr": 4.879501674676101e-10, "epoch": 1.9813364055299538, "percentage": 99.07, "elapsed_time": "12:08:58", "remaining_time": "0:06:52"} +{"current_steps": 8600, "total_steps": 8680, "loss": 0.8352792263031006, "lr": 4.761224510460016e-10, "epoch": 1.9815668202764978, "percentage": 99.08, "elapsed_time": "12:09:04", "remaining_time": "0:06:46"} +{"current_steps": 8601, "total_steps": 8680, "loss": 0.6987372636795044, "lr": 4.644398141919748e-10, "epoch": 1.9817972350230415, "percentage": 99.09, "elapsed_time": "12:09:15", "remaining_time": "0:06:41"} +{"current_steps": 8602, "total_steps": 8680, "loss": 0.6844612956047058, "lr": 4.5290225860128426e-10, "epoch": 1.9820276497695852, "percentage": 99.1, "elapsed_time": "12:09:22", "remaining_time": "0:06:36"} +{"current_steps": 8603, "total_steps": 8680, "loss": 0.7659348249435425, "lr": 4.4150978594859055e-10, "epoch": 1.9822580645161292, "percentage": 99.11, "elapsed_time": "12:09:27", "remaining_time": "0:06:31"} +{"current_steps": 8604, "total_steps": 8680, "loss": 0.8163154125213623, "lr": 4.3026239788757077e-10, "epoch": 1.9824884792626727, "percentage": 99.12, "elapsed_time": "12:09:33", "remaining_time": "0:06:26"} +{"current_steps": 8605, "total_steps": 8680, "loss": 0.8688125610351562, "lr": 4.191600960505859e-10, "epoch": 1.9827188940092166, "percentage": 99.14, "elapsed_time": "12:09:40", "remaining_time": "0:06:21"} +{"current_steps": 8606, "total_steps": 8680, "loss": 0.8250670433044434, "lr": 4.082028820493466e-10, "epoch": 1.9829493087557604, "percentage": 99.15, "elapsed_time": "12:09:46", "remaining_time": "0:06:16"} +{"current_steps": 8607, "total_steps": 8680, "loss": 0.9378982782363892, "lr": 3.973907574741364e-10, "epoch": 1.983179723502304, "percentage": 99.16, "elapsed_time": "12:09:52", "remaining_time": "0:06:11"} +{"current_steps": 8608, "total_steps": 8680, "loss": 0.8764913082122803, "lr": 3.867237238943666e-10, "epoch": 1.983410138248848, "percentage": 99.17, "elapsed_time": "12:09:58", "remaining_time": "0:06:06"} +{"current_steps": 8609, "total_steps": 8680, "loss": 0.7690116763114929, "lr": 3.762017828583541e-10, "epoch": 1.9836405529953915, "percentage": 99.18, "elapsed_time": "12:10:04", "remaining_time": "0:06:01"} +{"current_steps": 8610, "total_steps": 8680, "loss": 0.6977133750915527, "lr": 3.6582493589332187e-10, "epoch": 1.9838709677419355, "percentage": 99.19, "elapsed_time": "12:10:09", "remaining_time": "0:05:56"} +{"current_steps": 8611, "total_steps": 8680, "loss": 0.7362618446350098, "lr": 3.5559318450539835e-10, "epoch": 1.9841013824884792, "percentage": 99.21, "elapsed_time": "12:10:16", "remaining_time": "0:05:51"} +{"current_steps": 8612, "total_steps": 8680, "loss": 0.7065306305885315, "lr": 3.455065301798399e-10, "epoch": 1.984331797235023, "percentage": 99.22, "elapsed_time": "12:10:22", "remaining_time": "0:05:46"} +{"current_steps": 8613, "total_steps": 8680, "loss": 0.812393307685852, "lr": 3.355649743805866e-10, "epoch": 1.984562211981567, "percentage": 99.23, "elapsed_time": "12:10:27", "remaining_time": "0:05:40"} +{"current_steps": 8614, "total_steps": 8680, "loss": 0.6947695016860962, "lr": 3.2576851855070644e-10, "epoch": 1.9847926267281106, "percentage": 99.24, "elapsed_time": "12:10:35", "remaining_time": "0:05:35"} +{"current_steps": 8615, "total_steps": 8680, "loss": 0.6745340824127197, "lr": 3.161171641121729e-10, "epoch": 1.9850230414746544, "percentage": 99.25, "elapsed_time": "12:10:42", "remaining_time": "0:05:30"} +{"current_steps": 8616, "total_steps": 8680, "loss": 0.7426450848579407, "lr": 3.0661091246575454e-10, "epoch": 1.9852534562211983, "percentage": 99.26, "elapsed_time": "12:10:47", "remaining_time": "0:05:25"} +{"current_steps": 8617, "total_steps": 8680, "loss": 0.7769409418106079, "lr": 2.9724976499134745e-10, "epoch": 1.9854838709677418, "percentage": 99.27, "elapsed_time": "12:10:53", "remaining_time": "0:05:20"} +{"current_steps": 8618, "total_steps": 8680, "loss": 0.9591978192329407, "lr": 2.8803372304775365e-10, "epoch": 1.9857142857142858, "percentage": 99.29, "elapsed_time": "12:10:58", "remaining_time": "0:05:15"} +{"current_steps": 8619, "total_steps": 8680, "loss": 0.7504953742027283, "lr": 2.789627879725698e-10, "epoch": 1.9859447004608295, "percentage": 99.3, "elapsed_time": "12:11:04", "remaining_time": "0:05:10"} +{"current_steps": 8620, "total_steps": 8680, "loss": 0.8990021347999573, "lr": 2.700369610825204e-10, "epoch": 1.9861751152073732, "percentage": 99.31, "elapsed_time": "12:11:10", "remaining_time": "0:05:05"} +{"current_steps": 8621, "total_steps": 8680, "loss": 0.786778450012207, "lr": 2.612562436731247e-10, "epoch": 1.9864055299539172, "percentage": 99.32, "elapsed_time": "12:11:15", "remaining_time": "0:05:00"} +{"current_steps": 8622, "total_steps": 8680, "loss": 0.7387717366218567, "lr": 2.526206370189188e-10, "epoch": 1.9866359447004607, "percentage": 99.33, "elapsed_time": "12:11:21", "remaining_time": "0:04:55"} +{"current_steps": 8623, "total_steps": 8680, "loss": 0.7672144174575806, "lr": 2.4413014237323336e-10, "epoch": 1.9868663594470046, "percentage": 99.34, "elapsed_time": "12:11:28", "remaining_time": "0:04:50"} +{"current_steps": 8624, "total_steps": 8680, "loss": 0.6191907525062561, "lr": 2.357847609686381e-10, "epoch": 1.9870967741935484, "percentage": 99.35, "elapsed_time": "12:11:34", "remaining_time": "0:04:45"} +{"current_steps": 8625, "total_steps": 8680, "loss": 0.7257785201072693, "lr": 2.2758449401638624e-10, "epoch": 1.987327188940092, "percentage": 99.37, "elapsed_time": "12:11:40", "remaining_time": "0:04:39"} +{"current_steps": 8626, "total_steps": 8680, "loss": 0.6997271776199341, "lr": 2.195293427066369e-10, "epoch": 1.987557603686636, "percentage": 99.38, "elapsed_time": "12:11:46", "remaining_time": "0:04:34"} +{"current_steps": 8627, "total_steps": 8680, "loss": 0.7813891768455505, "lr": 2.1161930820878804e-10, "epoch": 1.9877880184331798, "percentage": 99.39, "elapsed_time": "12:11:52", "remaining_time": "0:04:29"} +{"current_steps": 8628, "total_steps": 8680, "loss": 0.8003429174423218, "lr": 2.0385439167069917e-10, "epoch": 1.9880184331797235, "percentage": 99.4, "elapsed_time": "12:11:58", "remaining_time": "0:04:24"} +{"current_steps": 8629, "total_steps": 8680, "loss": 0.8020645380020142, "lr": 1.962345942196908e-10, "epoch": 1.9882488479262674, "percentage": 99.41, "elapsed_time": "12:12:03", "remaining_time": "0:04:19"} +{"current_steps": 8630, "total_steps": 8680, "loss": 0.9189429879188538, "lr": 1.8875991696165604e-10, "epoch": 1.988479262672811, "percentage": 99.42, "elapsed_time": "12:12:08", "remaining_time": "0:04:14"} +{"current_steps": 8631, "total_steps": 8680, "loss": 0.7399884462356567, "lr": 1.8143036098150487e-10, "epoch": 1.988709677419355, "percentage": 99.44, "elapsed_time": "12:12:14", "remaining_time": "0:04:09"} +{"current_steps": 8632, "total_steps": 8680, "loss": 0.7725361585617065, "lr": 1.7424592734316402e-10, "epoch": 1.9889400921658986, "percentage": 99.45, "elapsed_time": "12:12:19", "remaining_time": "0:04:04"} +{"current_steps": 8633, "total_steps": 8680, "loss": 0.7887094020843506, "lr": 1.6720661708946593e-10, "epoch": 1.9891705069124423, "percentage": 99.46, "elapsed_time": "12:12:25", "remaining_time": "0:03:59"} +{"current_steps": 8634, "total_steps": 8680, "loss": 0.8007388114929199, "lr": 1.6031243124203786e-10, "epoch": 1.9894009216589863, "percentage": 99.47, "elapsed_time": "12:12:32", "remaining_time": "0:03:54"} +{"current_steps": 8635, "total_steps": 8680, "loss": 0.6478462219238281, "lr": 1.5356337080174587e-10, "epoch": 1.9896313364055298, "percentage": 99.48, "elapsed_time": "12:12:39", "remaining_time": "0:03:49"} +{"current_steps": 8636, "total_steps": 8680, "loss": 0.8274422287940979, "lr": 1.469594367480287e-10, "epoch": 1.9898617511520738, "percentage": 99.49, "elapsed_time": "12:12:44", "remaining_time": "0:03:43"} +{"current_steps": 8637, "total_steps": 8680, "loss": 0.7919641733169556, "lr": 1.4050063003956391e-10, "epoch": 1.9900921658986175, "percentage": 99.5, "elapsed_time": "12:12:49", "remaining_time": "0:03:38"} +{"current_steps": 8638, "total_steps": 8680, "loss": 0.7973719239234924, "lr": 1.3418695161382388e-10, "epoch": 1.9903225806451612, "percentage": 99.52, "elapsed_time": "12:12:55", "remaining_time": "0:03:33"} +{"current_steps": 8639, "total_steps": 8680, "loss": 0.8002075552940369, "lr": 1.280184023870756e-10, "epoch": 1.9905529953917052, "percentage": 99.53, "elapsed_time": "12:13:00", "remaining_time": "0:03:28"} +{"current_steps": 8640, "total_steps": 8680, "loss": 0.748448371887207, "lr": 1.2199498325482506e-10, "epoch": 1.9907834101382489, "percentage": 99.54, "elapsed_time": "12:13:05", "remaining_time": "0:03:23"} +{"current_steps": 8641, "total_steps": 8680, "loss": 0.7333977222442627, "lr": 1.1611669509137278e-10, "epoch": 1.9910138248847926, "percentage": 99.55, "elapsed_time": "12:13:13", "remaining_time": "0:03:18"} +{"current_steps": 8642, "total_steps": 8680, "loss": 0.7760608196258545, "lr": 1.1038353874992524e-10, "epoch": 1.9912442396313366, "percentage": 99.56, "elapsed_time": "12:13:17", "remaining_time": "0:03:13"} +{"current_steps": 8643, "total_steps": 8680, "loss": 0.6129526495933533, "lr": 1.0479551506259455e-10, "epoch": 1.99147465437788, "percentage": 99.57, "elapsed_time": "12:13:24", "remaining_time": "0:03:08"} +{"current_steps": 8644, "total_steps": 8680, "loss": 0.68567955493927, "lr": 9.935262484062068e-11, "epoch": 1.991705069124424, "percentage": 99.59, "elapsed_time": "12:13:30", "remaining_time": "0:03:03"} +{"current_steps": 8645, "total_steps": 8680, "loss": 0.9042092561721802, "lr": 9.405486887381631e-11, "epoch": 1.9919354838709677, "percentage": 99.6, "elapsed_time": "12:13:36", "remaining_time": "0:02:58"} +{"current_steps": 8646, "total_steps": 8680, "loss": 0.7143117189407349, "lr": 8.890224793123291e-11, "epoch": 1.9921658986175115, "percentage": 99.61, "elapsed_time": "12:13:42", "remaining_time": "0:02:53"} +{"current_steps": 8647, "total_steps": 8680, "loss": 0.7486213445663452, "lr": 8.389476276071672e-11, "epoch": 1.9923963133640554, "percentage": 99.62, "elapsed_time": "12:13:48", "remaining_time": "0:02:48"} +{"current_steps": 8648, "total_steps": 8680, "loss": 0.8554232716560364, "lr": 7.903241408924177e-11, "epoch": 1.992626728110599, "percentage": 99.63, "elapsed_time": "12:13:54", "remaining_time": "0:02:42"} +{"current_steps": 8649, "total_steps": 8680, "loss": 0.6604819297790527, "lr": 7.431520262246582e-11, "epoch": 1.9928571428571429, "percentage": 99.64, "elapsed_time": "12:14:00", "remaining_time": "0:02:37"} +{"current_steps": 8650, "total_steps": 8680, "loss": 0.8032737970352173, "lr": 6.974312904517443e-11, "epoch": 1.9930875576036866, "percentage": 99.65, "elapsed_time": "12:14:05", "remaining_time": "0:02:32"} +{"current_steps": 8651, "total_steps": 8680, "loss": 0.7712494730949402, "lr": 6.531619402083687e-11, "epoch": 1.9933179723502303, "percentage": 99.67, "elapsed_time": "12:14:11", "remaining_time": "0:02:27"} +{"current_steps": 8652, "total_steps": 8680, "loss": 0.7894617915153503, "lr": 6.103439819216127e-11, "epoch": 1.9935483870967743, "percentage": 99.68, "elapsed_time": "12:14:17", "remaining_time": "0:02:22"} +{"current_steps": 8653, "total_steps": 8680, "loss": 0.8386135697364807, "lr": 5.689774218065046e-11, "epoch": 1.993778801843318, "percentage": 99.69, "elapsed_time": "12:14:23", "remaining_time": "0:02:17"} +{"current_steps": 8654, "total_steps": 8680, "loss": 0.744853138923645, "lr": 5.290622658660204e-11, "epoch": 1.9940092165898617, "percentage": 99.7, "elapsed_time": "12:14:29", "remaining_time": "0:02:12"} +{"current_steps": 8655, "total_steps": 8680, "loss": 0.7604823112487793, "lr": 4.90598519894414e-11, "epoch": 1.9942396313364057, "percentage": 99.71, "elapsed_time": "12:14:38", "remaining_time": "0:02:07"} +{"current_steps": 8656, "total_steps": 8680, "loss": 0.7552424669265747, "lr": 4.53586189474997e-11, "epoch": 1.9944700460829492, "percentage": 99.72, "elapsed_time": "12:14:44", "remaining_time": "0:02:02"} +{"current_steps": 8657, "total_steps": 8680, "loss": 0.9652698636054993, "lr": 4.180252799801387e-11, "epoch": 1.9947004608294931, "percentage": 99.74, "elapsed_time": "12:14:50", "remaining_time": "0:01:57"} +{"current_steps": 8658, "total_steps": 8680, "loss": 0.9589856266975403, "lr": 3.839157965712658e-11, "epoch": 1.9949308755760369, "percentage": 99.75, "elapsed_time": "12:14:56", "remaining_time": "0:01:52"} +{"current_steps": 8659, "total_steps": 8680, "loss": 0.6802269220352173, "lr": 3.512577441988629e-11, "epoch": 1.9951612903225806, "percentage": 99.76, "elapsed_time": "12:15:02", "remaining_time": "0:01:46"} +{"current_steps": 8660, "total_steps": 8680, "loss": 0.8262367248535156, "lr": 3.200511276035822e-11, "epoch": 1.9953917050691246, "percentage": 99.77, "elapsed_time": "12:15:09", "remaining_time": "0:01:41"} +{"current_steps": 8661, "total_steps": 8680, "loss": 0.8353632688522339, "lr": 2.9029595131513372e-11, "epoch": 1.995622119815668, "percentage": 99.78, "elapsed_time": "12:15:15", "remaining_time": "0:01:36"} +{"current_steps": 8662, "total_steps": 8680, "loss": 0.8807231187820435, "lr": 2.61992219652285e-11, "epoch": 1.995852534562212, "percentage": 99.79, "elapsed_time": "12:15:23", "remaining_time": "0:01:31"} +{"current_steps": 8663, "total_steps": 8680, "loss": 0.8394359350204468, "lr": 2.3513993672397148e-11, "epoch": 1.9960829493087557, "percentage": 99.8, "elapsed_time": "12:15:29", "remaining_time": "0:01:26"} +{"current_steps": 8664, "total_steps": 8680, "loss": 0.8343399167060852, "lr": 2.0973910642707592e-11, "epoch": 1.9963133640552995, "percentage": 99.82, "elapsed_time": "12:15:35", "remaining_time": "0:01:21"} +{"current_steps": 8665, "total_steps": 8680, "loss": 0.7168834209442139, "lr": 1.857897324475388e-11, "epoch": 1.9965437788018434, "percentage": 99.83, "elapsed_time": "12:15:41", "remaining_time": "0:01:16"} +{"current_steps": 8666, "total_steps": 8680, "loss": 0.7825703620910645, "lr": 1.6329181826257866e-11, "epoch": 1.9967741935483871, "percentage": 99.84, "elapsed_time": "12:15:46", "remaining_time": "0:01:11"} +{"current_steps": 8667, "total_steps": 8680, "loss": 0.6497002840042114, "lr": 1.4224536713847157e-11, "epoch": 1.9970046082949309, "percentage": 99.85, "elapsed_time": "12:15:50", "remaining_time": "0:01:06"} +{"current_steps": 8668, "total_steps": 8680, "loss": 0.8188776969909668, "lr": 1.2265038212944112e-11, "epoch": 1.9972350230414746, "percentage": 99.86, "elapsed_time": "12:15:55", "remaining_time": "0:01:01"} +{"current_steps": 8669, "total_steps": 8680, "loss": 0.898658812046051, "lr": 1.0450686607987869e-11, "epoch": 1.9974654377880183, "percentage": 99.87, "elapsed_time": "12:16:00", "remaining_time": "0:00:56"} +{"current_steps": 8670, "total_steps": 8680, "loss": 0.8580871820449829, "lr": 8.781482162212306e-12, "epoch": 1.9976958525345623, "percentage": 99.88, "elapsed_time": "12:16:06", "remaining_time": "0:00:50"} +{"current_steps": 8671, "total_steps": 8680, "loss": 0.7657710313796997, "lr": 7.25742511797911e-12, "epoch": 1.997926267281106, "percentage": 99.9, "elapsed_time": "12:16:12", "remaining_time": "0:00:45"} +{"current_steps": 8672, "total_steps": 8680, "loss": 0.7881382703781128, "lr": 5.87851569655573e-12, "epoch": 1.9981566820276497, "percentage": 99.91, "elapsed_time": "12:16:16", "remaining_time": "0:00:40"} +{"current_steps": 8673, "total_steps": 8680, "loss": 0.8711144924163818, "lr": 4.644754098004356e-12, "epoch": 1.9983870967741937, "percentage": 99.92, "elapsed_time": "12:16:22", "remaining_time": "0:00:35"} +{"current_steps": 8674, "total_steps": 8680, "loss": 0.6993192434310913, "lr": 3.5561405015149814e-12, "epoch": 1.9986175115207372, "percentage": 99.93, "elapsed_time": "12:16:28", "remaining_time": "0:00:30"} +{"current_steps": 8675, "total_steps": 8680, "loss": 0.7348669767379761, "lr": 2.6126750650723452e-12, "epoch": 1.9988479262672811, "percentage": 99.94, "elapsed_time": "12:16:36", "remaining_time": "0:00:25"} +{"current_steps": 8676, "total_steps": 8680, "loss": 0.7356513142585754, "lr": 1.8143579254559227e-12, "epoch": 1.9990783410138249, "percentage": 99.95, "elapsed_time": "12:16:41", "remaining_time": "0:00:20"} +{"current_steps": 8677, "total_steps": 8680, "loss": 0.7969627380371094, "lr": 1.1611891986840206e-12, "epoch": 1.9993087557603686, "percentage": 99.97, "elapsed_time": "12:16:46", "remaining_time": "0:00:15"} +{"current_steps": 8678, "total_steps": 8680, "loss": 0.7247132062911987, "lr": 6.531689795696848e-13, "epoch": 1.9995391705069125, "percentage": 99.98, "elapsed_time": "12:16:51", "remaining_time": "0:00:10"} +{"current_steps": 8679, "total_steps": 8680, "loss": 0.6177656650543213, "lr": 2.902973418317245e-13, "epoch": 1.9997695852534563, "percentage": 99.99, "elapsed_time": "12:16:56", "remaining_time": "0:00:05"} +{"current_steps": 8680, "total_steps": 8680, "loss": 0.8378380537033081, "lr": 7.25743380947108e-14, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "12:17:01", "remaining_time": "0:00:00"} +{"current_steps": 8680, "total_steps": 8680, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "12:17:04", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e40f721eda766278a9158a690a3004faa251c61f --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,60803 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 8680, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002304147465437788, + "grad_norm": 0.3584135221139379, + "learning_rate": 0.0, + "loss": 1.1575632095336914, + "step": 1 + }, + { + "epoch": 0.0004608294930875576, + "grad_norm": 0.3035367055626511, + "learning_rate": 4.6082949308755755e-09, + "loss": 0.9973502159118652, + "step": 2 + }, + { + "epoch": 0.0006912442396313364, + "grad_norm": 0.39685233086299543, + "learning_rate": 9.216589861751151e-09, + "loss": 1.0778999328613281, + "step": 3 + }, + { + "epoch": 0.0009216589861751152, + "grad_norm": 0.4029042979509503, + "learning_rate": 1.3824884792626728e-08, + "loss": 1.1912263631820679, + "step": 4 + }, + { + "epoch": 0.001152073732718894, + "grad_norm": 0.3943812894307851, + "learning_rate": 1.8433179723502302e-08, + "loss": 1.136031150817871, + "step": 5 + }, + { + "epoch": 0.0013824884792626728, + "grad_norm": 0.472718552613566, + "learning_rate": 2.304147465437788e-08, + "loss": 1.1647956371307373, + "step": 6 + }, + { + "epoch": 0.0016129032258064516, + "grad_norm": 0.4378363913681294, + "learning_rate": 2.7649769585253456e-08, + "loss": 1.144924283027649, + "step": 7 + }, + { + "epoch": 0.0018433179723502304, + "grad_norm": 0.412264706125121, + "learning_rate": 3.225806451612903e-08, + "loss": 1.1821019649505615, + "step": 8 + }, + { + "epoch": 0.0020737327188940094, + "grad_norm": 0.35864626774735575, + "learning_rate": 3.6866359447004604e-08, + "loss": 1.0586045980453491, + "step": 9 + }, + { + "epoch": 0.002304147465437788, + "grad_norm": 0.497058147699291, + "learning_rate": 4.1474654377880186e-08, + "loss": 1.2029818296432495, + "step": 10 + }, + { + "epoch": 0.002534562211981567, + "grad_norm": 0.465265464928516, + "learning_rate": 4.608294930875576e-08, + "loss": 1.1411634683609009, + "step": 11 + }, + { + "epoch": 0.0027649769585253456, + "grad_norm": 0.4356529753705429, + "learning_rate": 5.069124423963134e-08, + "loss": 1.2719087600708008, + "step": 12 + }, + { + "epoch": 0.0029953917050691246, + "grad_norm": 0.4469831586732583, + "learning_rate": 5.529953917050691e-08, + "loss": 1.1132495403289795, + "step": 13 + }, + { + "epoch": 0.0032258064516129032, + "grad_norm": 0.3918942421249174, + "learning_rate": 5.990783410138249e-08, + "loss": 1.1900808811187744, + "step": 14 + }, + { + "epoch": 0.0034562211981566822, + "grad_norm": 0.33446734054876004, + "learning_rate": 6.451612903225806e-08, + "loss": 1.2273608446121216, + "step": 15 + }, + { + "epoch": 0.003686635944700461, + "grad_norm": 0.4610551419026991, + "learning_rate": 6.912442396313364e-08, + "loss": 1.2130601406097412, + "step": 16 + }, + { + "epoch": 0.00391705069124424, + "grad_norm": 0.4765520188128542, + "learning_rate": 7.373271889400921e-08, + "loss": 1.0534124374389648, + "step": 17 + }, + { + "epoch": 0.004147465437788019, + "grad_norm": 0.4247458361448018, + "learning_rate": 7.834101382488478e-08, + "loss": 1.1796221733093262, + "step": 18 + }, + { + "epoch": 0.004377880184331797, + "grad_norm": 0.42651087679972033, + "learning_rate": 8.294930875576037e-08, + "loss": 1.118175745010376, + "step": 19 + }, + { + "epoch": 0.004608294930875576, + "grad_norm": 0.37538111415149067, + "learning_rate": 8.755760368663594e-08, + "loss": 1.140963077545166, + "step": 20 + }, + { + "epoch": 0.004838709677419355, + "grad_norm": 0.39373769301837386, + "learning_rate": 9.216589861751152e-08, + "loss": 1.107339859008789, + "step": 21 + }, + { + "epoch": 0.005069124423963134, + "grad_norm": 0.5053900590341595, + "learning_rate": 9.677419354838709e-08, + "loss": 1.171803593635559, + "step": 22 + }, + { + "epoch": 0.005299539170506912, + "grad_norm": 0.32897537004851696, + "learning_rate": 1.0138248847926267e-07, + "loss": 0.9935251474380493, + "step": 23 + }, + { + "epoch": 0.005529953917050691, + "grad_norm": 0.4355535799950001, + "learning_rate": 1.0599078341013824e-07, + "loss": 1.0870952606201172, + "step": 24 + }, + { + "epoch": 0.00576036866359447, + "grad_norm": 0.5215895570336967, + "learning_rate": 1.1059907834101383e-07, + "loss": 1.1520278453826904, + "step": 25 + }, + { + "epoch": 0.005990783410138249, + "grad_norm": 0.4878994851998504, + "learning_rate": 1.152073732718894e-07, + "loss": 1.3603750467300415, + "step": 26 + }, + { + "epoch": 0.006221198156682027, + "grad_norm": 0.3985371704289713, + "learning_rate": 1.1981566820276498e-07, + "loss": 1.230550765991211, + "step": 27 + }, + { + "epoch": 0.0064516129032258064, + "grad_norm": 0.4105556408349015, + "learning_rate": 1.2442396313364054e-07, + "loss": 1.267604112625122, + "step": 28 + }, + { + "epoch": 0.0066820276497695855, + "grad_norm": 0.3604672745500653, + "learning_rate": 1.2903225806451611e-07, + "loss": 1.344348669052124, + "step": 29 + }, + { + "epoch": 0.0069124423963133645, + "grad_norm": 0.42234881975895605, + "learning_rate": 1.336405529953917e-07, + "loss": 1.2794291973114014, + "step": 30 + }, + { + "epoch": 0.007142857142857143, + "grad_norm": 0.39749887698930225, + "learning_rate": 1.3824884792626728e-07, + "loss": 1.2841103076934814, + "step": 31 + }, + { + "epoch": 0.007373271889400922, + "grad_norm": 0.34204310388035036, + "learning_rate": 1.4285714285714285e-07, + "loss": 1.1505224704742432, + "step": 32 + }, + { + "epoch": 0.007603686635944701, + "grad_norm": 0.36676388907062357, + "learning_rate": 1.4746543778801842e-07, + "loss": 0.9800833463668823, + "step": 33 + }, + { + "epoch": 0.00783410138248848, + "grad_norm": 0.4216809539302965, + "learning_rate": 1.52073732718894e-07, + "loss": 1.3712589740753174, + "step": 34 + }, + { + "epoch": 0.008064516129032258, + "grad_norm": 0.46644559931224167, + "learning_rate": 1.5668202764976955e-07, + "loss": 1.2274689674377441, + "step": 35 + }, + { + "epoch": 0.008294930875576038, + "grad_norm": 0.41359150478695417, + "learning_rate": 1.6129032258064515e-07, + "loss": 1.0673755407333374, + "step": 36 + }, + { + "epoch": 0.008525345622119816, + "grad_norm": 0.534062363030203, + "learning_rate": 1.6589861751152074e-07, + "loss": 1.242164134979248, + "step": 37 + }, + { + "epoch": 0.008755760368663594, + "grad_norm": 0.48756247774131056, + "learning_rate": 1.705069124423963e-07, + "loss": 1.190554141998291, + "step": 38 + }, + { + "epoch": 0.008986175115207374, + "grad_norm": 0.35848282094721656, + "learning_rate": 1.7511520737327188e-07, + "loss": 1.3119773864746094, + "step": 39 + }, + { + "epoch": 0.009216589861751152, + "grad_norm": 0.4466769921356875, + "learning_rate": 1.7972350230414745e-07, + "loss": 1.2532517910003662, + "step": 40 + }, + { + "epoch": 0.00944700460829493, + "grad_norm": 0.4271763580587928, + "learning_rate": 1.8433179723502305e-07, + "loss": 1.307154655456543, + "step": 41 + }, + { + "epoch": 0.00967741935483871, + "grad_norm": 0.432221455567464, + "learning_rate": 1.889400921658986e-07, + "loss": 1.1899281740188599, + "step": 42 + }, + { + "epoch": 0.009907834101382488, + "grad_norm": 0.48501644393966153, + "learning_rate": 1.9354838709677418e-07, + "loss": 1.1928249597549438, + "step": 43 + }, + { + "epoch": 0.010138248847926268, + "grad_norm": 0.35170632131851265, + "learning_rate": 1.9815668202764975e-07, + "loss": 1.1663157939910889, + "step": 44 + }, + { + "epoch": 0.010368663594470046, + "grad_norm": 0.43449129429745276, + "learning_rate": 2.0276497695852535e-07, + "loss": 1.1806118488311768, + "step": 45 + }, + { + "epoch": 0.010599078341013824, + "grad_norm": 0.39933118678172597, + "learning_rate": 2.073732718894009e-07, + "loss": 1.1704952716827393, + "step": 46 + }, + { + "epoch": 0.010829493087557604, + "grad_norm": 0.46071358975984034, + "learning_rate": 2.1198156682027649e-07, + "loss": 1.2124149799346924, + "step": 47 + }, + { + "epoch": 0.011059907834101382, + "grad_norm": 0.325920139351066, + "learning_rate": 2.1658986175115208e-07, + "loss": 1.041813850402832, + "step": 48 + }, + { + "epoch": 0.01129032258064516, + "grad_norm": 0.4189805583015969, + "learning_rate": 2.2119815668202765e-07, + "loss": 1.255402684211731, + "step": 49 + }, + { + "epoch": 0.01152073732718894, + "grad_norm": 0.369986826532368, + "learning_rate": 2.2580645161290322e-07, + "loss": 1.1115221977233887, + "step": 50 + }, + { + "epoch": 0.011751152073732719, + "grad_norm": 0.501835295036206, + "learning_rate": 2.304147465437788e-07, + "loss": 1.4048426151275635, + "step": 51 + }, + { + "epoch": 0.011981566820276499, + "grad_norm": 0.38759638044019523, + "learning_rate": 2.3502304147465438e-07, + "loss": 1.1690936088562012, + "step": 52 + }, + { + "epoch": 0.012211981566820277, + "grad_norm": 0.43771993971927803, + "learning_rate": 2.3963133640552995e-07, + "loss": 1.164888620376587, + "step": 53 + }, + { + "epoch": 0.012442396313364055, + "grad_norm": 0.5047093250847474, + "learning_rate": 2.442396313364055e-07, + "loss": 1.004424810409546, + "step": 54 + }, + { + "epoch": 0.012672811059907835, + "grad_norm": 0.371768250028493, + "learning_rate": 2.488479262672811e-07, + "loss": 0.8810856342315674, + "step": 55 + }, + { + "epoch": 0.012903225806451613, + "grad_norm": 0.41437582347111235, + "learning_rate": 2.534562211981567e-07, + "loss": 1.300262451171875, + "step": 56 + }, + { + "epoch": 0.013133640552995391, + "grad_norm": 0.44923919860912964, + "learning_rate": 2.5806451612903223e-07, + "loss": 1.3624285459518433, + "step": 57 + }, + { + "epoch": 0.013364055299539171, + "grad_norm": 0.37916325568511644, + "learning_rate": 2.6267281105990777e-07, + "loss": 1.2133375406265259, + "step": 58 + }, + { + "epoch": 0.013594470046082949, + "grad_norm": 0.3665676434937369, + "learning_rate": 2.672811059907834e-07, + "loss": 1.2203283309936523, + "step": 59 + }, + { + "epoch": 0.013824884792626729, + "grad_norm": 0.4314731168039537, + "learning_rate": 2.7188940092165896e-07, + "loss": 1.291412353515625, + "step": 60 + }, + { + "epoch": 0.014055299539170507, + "grad_norm": 0.46787898249820037, + "learning_rate": 2.7649769585253456e-07, + "loss": 1.1596577167510986, + "step": 61 + }, + { + "epoch": 0.014285714285714285, + "grad_norm": 0.34850075759056304, + "learning_rate": 2.8110599078341015e-07, + "loss": 0.9789823889732361, + "step": 62 + }, + { + "epoch": 0.014516129032258065, + "grad_norm": 0.46810420323672, + "learning_rate": 2.857142857142857e-07, + "loss": 1.220383882522583, + "step": 63 + }, + { + "epoch": 0.014746543778801843, + "grad_norm": 0.36577992953429955, + "learning_rate": 2.903225806451613e-07, + "loss": 1.0961871147155762, + "step": 64 + }, + { + "epoch": 0.014976958525345621, + "grad_norm": 0.4155727286496237, + "learning_rate": 2.9493087557603683e-07, + "loss": 1.2281936407089233, + "step": 65 + }, + { + "epoch": 0.015207373271889401, + "grad_norm": 0.48770399467414544, + "learning_rate": 2.9953917050691243e-07, + "loss": 1.279728889465332, + "step": 66 + }, + { + "epoch": 0.01543778801843318, + "grad_norm": 0.3697109399388579, + "learning_rate": 3.04147465437788e-07, + "loss": 1.0932798385620117, + "step": 67 + }, + { + "epoch": 0.01566820276497696, + "grad_norm": 0.4768828309013543, + "learning_rate": 3.0875576036866356e-07, + "loss": 1.1612955331802368, + "step": 68 + }, + { + "epoch": 0.015898617511520736, + "grad_norm": 0.335260500319883, + "learning_rate": 3.133640552995391e-07, + "loss": 1.193152666091919, + "step": 69 + }, + { + "epoch": 0.016129032258064516, + "grad_norm": 0.3754577001974335, + "learning_rate": 3.1797235023041476e-07, + "loss": 1.3303695917129517, + "step": 70 + }, + { + "epoch": 0.016359447004608296, + "grad_norm": 0.5384978005623245, + "learning_rate": 3.225806451612903e-07, + "loss": 1.3735731840133667, + "step": 71 + }, + { + "epoch": 0.016589861751152075, + "grad_norm": 0.44147085813841874, + "learning_rate": 3.271889400921659e-07, + "loss": 1.162925124168396, + "step": 72 + }, + { + "epoch": 0.016820276497695852, + "grad_norm": 0.46260262466297236, + "learning_rate": 3.317972350230415e-07, + "loss": 1.3879203796386719, + "step": 73 + }, + { + "epoch": 0.017050691244239632, + "grad_norm": 0.33864035083037825, + "learning_rate": 3.3640552995391703e-07, + "loss": 1.2721638679504395, + "step": 74 + }, + { + "epoch": 0.01728110599078341, + "grad_norm": 0.5797449954735189, + "learning_rate": 3.410138248847926e-07, + "loss": 1.3997783660888672, + "step": 75 + }, + { + "epoch": 0.017511520737327188, + "grad_norm": 0.3824734589731608, + "learning_rate": 3.4562211981566817e-07, + "loss": 1.1099059581756592, + "step": 76 + }, + { + "epoch": 0.017741935483870968, + "grad_norm": 0.6286343528066216, + "learning_rate": 3.5023041474654376e-07, + "loss": 1.341759204864502, + "step": 77 + }, + { + "epoch": 0.017972350230414748, + "grad_norm": 0.41058458963409694, + "learning_rate": 3.5483870967741936e-07, + "loss": 1.343479871749878, + "step": 78 + }, + { + "epoch": 0.018202764976958524, + "grad_norm": 0.41653629518149576, + "learning_rate": 3.594470046082949e-07, + "loss": 1.2225772142410278, + "step": 79 + }, + { + "epoch": 0.018433179723502304, + "grad_norm": 0.37871730557010347, + "learning_rate": 3.6405529953917044e-07, + "loss": 1.1934573650360107, + "step": 80 + }, + { + "epoch": 0.018663594470046084, + "grad_norm": 0.36930989407616927, + "learning_rate": 3.686635944700461e-07, + "loss": 1.099440336227417, + "step": 81 + }, + { + "epoch": 0.01889400921658986, + "grad_norm": 0.4445938548359885, + "learning_rate": 3.7327188940092163e-07, + "loss": 1.0864269733428955, + "step": 82 + }, + { + "epoch": 0.01912442396313364, + "grad_norm": 0.4183127094774659, + "learning_rate": 3.778801843317972e-07, + "loss": 1.0706703662872314, + "step": 83 + }, + { + "epoch": 0.01935483870967742, + "grad_norm": 0.3377183372891763, + "learning_rate": 3.824884792626728e-07, + "loss": 1.1675662994384766, + "step": 84 + }, + { + "epoch": 0.019585253456221197, + "grad_norm": 0.4219766455348787, + "learning_rate": 3.8709677419354837e-07, + "loss": 1.3294553756713867, + "step": 85 + }, + { + "epoch": 0.019815668202764977, + "grad_norm": 0.39357768126078463, + "learning_rate": 3.9170506912442396e-07, + "loss": 1.050878882408142, + "step": 86 + }, + { + "epoch": 0.020046082949308756, + "grad_norm": 0.5263429396452582, + "learning_rate": 3.963133640552995e-07, + "loss": 1.3243739604949951, + "step": 87 + }, + { + "epoch": 0.020276497695852536, + "grad_norm": 0.4373425676890139, + "learning_rate": 4.009216589861751e-07, + "loss": 1.1350429058074951, + "step": 88 + }, + { + "epoch": 0.020506912442396313, + "grad_norm": 0.39555461421299365, + "learning_rate": 4.055299539170507e-07, + "loss": 1.24526047706604, + "step": 89 + }, + { + "epoch": 0.020737327188940093, + "grad_norm": 0.5372699223271491, + "learning_rate": 4.1013824884792624e-07, + "loss": 1.3459908962249756, + "step": 90 + }, + { + "epoch": 0.020967741935483872, + "grad_norm": 0.45711998906450413, + "learning_rate": 4.147465437788018e-07, + "loss": 1.2129223346710205, + "step": 91 + }, + { + "epoch": 0.02119815668202765, + "grad_norm": 0.396171288478396, + "learning_rate": 4.1935483870967743e-07, + "loss": 1.0522969961166382, + "step": 92 + }, + { + "epoch": 0.02142857142857143, + "grad_norm": 0.4102245507283394, + "learning_rate": 4.2396313364055297e-07, + "loss": 1.3128937482833862, + "step": 93 + }, + { + "epoch": 0.02165898617511521, + "grad_norm": 0.4498995421630644, + "learning_rate": 4.285714285714285e-07, + "loss": 1.3582855463027954, + "step": 94 + }, + { + "epoch": 0.021889400921658985, + "grad_norm": 0.401280081593378, + "learning_rate": 4.3317972350230416e-07, + "loss": 1.3959028720855713, + "step": 95 + }, + { + "epoch": 0.022119815668202765, + "grad_norm": 0.34811166324547105, + "learning_rate": 4.377880184331797e-07, + "loss": 1.149501085281372, + "step": 96 + }, + { + "epoch": 0.022350230414746545, + "grad_norm": 0.48133121679013907, + "learning_rate": 4.423963133640553e-07, + "loss": 1.024135708808899, + "step": 97 + }, + { + "epoch": 0.02258064516129032, + "grad_norm": 0.42298775317954185, + "learning_rate": 4.4700460829493084e-07, + "loss": 0.9255483150482178, + "step": 98 + }, + { + "epoch": 0.0228110599078341, + "grad_norm": 0.4429779269301727, + "learning_rate": 4.5161290322580644e-07, + "loss": 1.1694722175598145, + "step": 99 + }, + { + "epoch": 0.02304147465437788, + "grad_norm": 0.5257102588195529, + "learning_rate": 4.5622119815668203e-07, + "loss": 1.1588457822799683, + "step": 100 + }, + { + "epoch": 0.023271889400921657, + "grad_norm": 0.37478821324150746, + "learning_rate": 4.608294930875576e-07, + "loss": 1.172672986984253, + "step": 101 + }, + { + "epoch": 0.023502304147465437, + "grad_norm": 0.5416446977134604, + "learning_rate": 4.654377880184331e-07, + "loss": 1.092405915260315, + "step": 102 + }, + { + "epoch": 0.023732718894009217, + "grad_norm": 0.40304171727239163, + "learning_rate": 4.7004608294930877e-07, + "loss": 1.11540687084198, + "step": 103 + }, + { + "epoch": 0.023963133640552997, + "grad_norm": 0.46185115643683655, + "learning_rate": 4.746543778801843e-07, + "loss": 1.1380189657211304, + "step": 104 + }, + { + "epoch": 0.024193548387096774, + "grad_norm": 0.4705857339336588, + "learning_rate": 4.792626728110599e-07, + "loss": 1.1031086444854736, + "step": 105 + }, + { + "epoch": 0.024423963133640553, + "grad_norm": 0.38094574356569405, + "learning_rate": 4.838709677419355e-07, + "loss": 1.1988024711608887, + "step": 106 + }, + { + "epoch": 0.024654377880184333, + "grad_norm": 0.48794686062473364, + "learning_rate": 4.88479262672811e-07, + "loss": 1.0814614295959473, + "step": 107 + }, + { + "epoch": 0.02488479262672811, + "grad_norm": 0.41304010922593737, + "learning_rate": 4.930875576036866e-07, + "loss": 1.0541695356369019, + "step": 108 + }, + { + "epoch": 0.02511520737327189, + "grad_norm": 0.4262047073398665, + "learning_rate": 4.976958525345622e-07, + "loss": 1.2281692028045654, + "step": 109 + }, + { + "epoch": 0.02534562211981567, + "grad_norm": 0.4617413170072456, + "learning_rate": 5.023041474654378e-07, + "loss": 1.2542369365692139, + "step": 110 + }, + { + "epoch": 0.025576036866359446, + "grad_norm": 0.46571699511286535, + "learning_rate": 5.069124423963134e-07, + "loss": 1.36039137840271, + "step": 111 + }, + { + "epoch": 0.025806451612903226, + "grad_norm": 0.3893860976585314, + "learning_rate": 5.11520737327189e-07, + "loss": 1.1092976331710815, + "step": 112 + }, + { + "epoch": 0.026036866359447006, + "grad_norm": 0.4636216593448083, + "learning_rate": 5.161290322580645e-07, + "loss": 1.0634076595306396, + "step": 113 + }, + { + "epoch": 0.026267281105990782, + "grad_norm": 0.3440530135190564, + "learning_rate": 5.2073732718894e-07, + "loss": 1.0024809837341309, + "step": 114 + }, + { + "epoch": 0.026497695852534562, + "grad_norm": 0.4346835070660911, + "learning_rate": 5.253456221198155e-07, + "loss": 1.1691724061965942, + "step": 115 + }, + { + "epoch": 0.026728110599078342, + "grad_norm": 0.46992230717269323, + "learning_rate": 5.299539170506912e-07, + "loss": 1.2053219079971313, + "step": 116 + }, + { + "epoch": 0.02695852534562212, + "grad_norm": 0.3668719861525143, + "learning_rate": 5.345622119815668e-07, + "loss": 1.119420051574707, + "step": 117 + }, + { + "epoch": 0.027188940092165898, + "grad_norm": 0.44063509410116297, + "learning_rate": 5.391705069124423e-07, + "loss": 1.1640167236328125, + "step": 118 + }, + { + "epoch": 0.027419354838709678, + "grad_norm": 0.41158620514350025, + "learning_rate": 5.437788018433179e-07, + "loss": 1.180116057395935, + "step": 119 + }, + { + "epoch": 0.027649769585253458, + "grad_norm": 0.4684655855415561, + "learning_rate": 5.483870967741935e-07, + "loss": 1.0726159811019897, + "step": 120 + }, + { + "epoch": 0.027880184331797234, + "grad_norm": 0.44443528947779826, + "learning_rate": 5.529953917050691e-07, + "loss": 1.03219473361969, + "step": 121 + }, + { + "epoch": 0.028110599078341014, + "grad_norm": 0.4615930748718386, + "learning_rate": 5.576036866359447e-07, + "loss": 1.1545735597610474, + "step": 122 + }, + { + "epoch": 0.028341013824884794, + "grad_norm": 0.4154044637047318, + "learning_rate": 5.622119815668203e-07, + "loss": 1.2409746646881104, + "step": 123 + }, + { + "epoch": 0.02857142857142857, + "grad_norm": 0.48642203067509454, + "learning_rate": 5.668202764976958e-07, + "loss": 1.2717409133911133, + "step": 124 + }, + { + "epoch": 0.02880184331797235, + "grad_norm": 0.5633308049530943, + "learning_rate": 5.714285714285714e-07, + "loss": 1.523846983909607, + "step": 125 + }, + { + "epoch": 0.02903225806451613, + "grad_norm": 0.47068700261388136, + "learning_rate": 5.760368663594469e-07, + "loss": 1.3386890888214111, + "step": 126 + }, + { + "epoch": 0.029262672811059907, + "grad_norm": 0.5199142981609907, + "learning_rate": 5.806451612903226e-07, + "loss": 1.3080404996871948, + "step": 127 + }, + { + "epoch": 0.029493087557603687, + "grad_norm": 0.530224330517059, + "learning_rate": 5.852534562211982e-07, + "loss": 1.3194537162780762, + "step": 128 + }, + { + "epoch": 0.029723502304147466, + "grad_norm": 0.49119251759787413, + "learning_rate": 5.898617511520737e-07, + "loss": 1.0546228885650635, + "step": 129 + }, + { + "epoch": 0.029953917050691243, + "grad_norm": 0.44238233872112126, + "learning_rate": 5.944700460829493e-07, + "loss": 1.3160395622253418, + "step": 130 + }, + { + "epoch": 0.030184331797235023, + "grad_norm": 0.5551864793339897, + "learning_rate": 5.990783410138249e-07, + "loss": 1.3497555255889893, + "step": 131 + }, + { + "epoch": 0.030414746543778803, + "grad_norm": 0.41383181378393813, + "learning_rate": 6.036866359447004e-07, + "loss": 1.0863350629806519, + "step": 132 + }, + { + "epoch": 0.03064516129032258, + "grad_norm": 0.4913368059485873, + "learning_rate": 6.08294930875576e-07, + "loss": 1.1640913486480713, + "step": 133 + }, + { + "epoch": 0.03087557603686636, + "grad_norm": 0.4309615007654084, + "learning_rate": 6.129032258064516e-07, + "loss": 1.398510217666626, + "step": 134 + }, + { + "epoch": 0.03110599078341014, + "grad_norm": 0.46249423735581563, + "learning_rate": 6.175115207373271e-07, + "loss": 1.3015594482421875, + "step": 135 + }, + { + "epoch": 0.03133640552995392, + "grad_norm": 0.5511951371835903, + "learning_rate": 6.221198156682027e-07, + "loss": 1.2786016464233398, + "step": 136 + }, + { + "epoch": 0.031566820276497695, + "grad_norm": 0.35056112177409643, + "learning_rate": 6.267281105990782e-07, + "loss": 1.0863161087036133, + "step": 137 + }, + { + "epoch": 0.03179723502304147, + "grad_norm": 0.49469780540978775, + "learning_rate": 6.313364055299539e-07, + "loss": 1.1590030193328857, + "step": 138 + }, + { + "epoch": 0.032027649769585255, + "grad_norm": 0.4498097850802204, + "learning_rate": 6.359447004608295e-07, + "loss": 1.2473185062408447, + "step": 139 + }, + { + "epoch": 0.03225806451612903, + "grad_norm": 0.46996183926649465, + "learning_rate": 6.40552995391705e-07, + "loss": 1.1982496976852417, + "step": 140 + }, + { + "epoch": 0.03248847926267281, + "grad_norm": 0.39627654459475076, + "learning_rate": 6.451612903225806e-07, + "loss": 1.078690528869629, + "step": 141 + }, + { + "epoch": 0.03271889400921659, + "grad_norm": 0.4831308537053794, + "learning_rate": 6.497695852534562e-07, + "loss": 1.1540311574935913, + "step": 142 + }, + { + "epoch": 0.03294930875576037, + "grad_norm": 0.4510531995801552, + "learning_rate": 6.543778801843318e-07, + "loss": 1.319035530090332, + "step": 143 + }, + { + "epoch": 0.03317972350230415, + "grad_norm": 0.46683155201608206, + "learning_rate": 6.589861751152074e-07, + "loss": 1.199448585510254, + "step": 144 + }, + { + "epoch": 0.03341013824884793, + "grad_norm": 0.526397133846452, + "learning_rate": 6.63594470046083e-07, + "loss": 1.212646484375, + "step": 145 + }, + { + "epoch": 0.033640552995391704, + "grad_norm": 0.6339080221663279, + "learning_rate": 6.682027649769585e-07, + "loss": 1.2833064794540405, + "step": 146 + }, + { + "epoch": 0.03387096774193549, + "grad_norm": 0.6111094782416204, + "learning_rate": 6.728110599078341e-07, + "loss": 1.2852118015289307, + "step": 147 + }, + { + "epoch": 0.034101382488479264, + "grad_norm": 0.36790627555446376, + "learning_rate": 6.774193548387096e-07, + "loss": 1.0287699699401855, + "step": 148 + }, + { + "epoch": 0.03433179723502304, + "grad_norm": 0.4705970251054534, + "learning_rate": 6.820276497695853e-07, + "loss": 1.2580914497375488, + "step": 149 + }, + { + "epoch": 0.03456221198156682, + "grad_norm": 0.4446865658925291, + "learning_rate": 6.866359447004608e-07, + "loss": 1.0557801723480225, + "step": 150 + }, + { + "epoch": 0.0347926267281106, + "grad_norm": 0.4962737867323335, + "learning_rate": 6.912442396313363e-07, + "loss": 1.1820557117462158, + "step": 151 + }, + { + "epoch": 0.035023041474654376, + "grad_norm": 0.4496579463689646, + "learning_rate": 6.958525345622119e-07, + "loss": 1.2777981758117676, + "step": 152 + }, + { + "epoch": 0.03525345622119816, + "grad_norm": 0.4664315599937052, + "learning_rate": 7.004608294930875e-07, + "loss": 1.1465356349945068, + "step": 153 + }, + { + "epoch": 0.035483870967741936, + "grad_norm": 0.5245233624695497, + "learning_rate": 7.05069124423963e-07, + "loss": 1.3553744554519653, + "step": 154 + }, + { + "epoch": 0.03571428571428571, + "grad_norm": 0.5474513239817841, + "learning_rate": 7.096774193548387e-07, + "loss": 1.176223874092102, + "step": 155 + }, + { + "epoch": 0.035944700460829496, + "grad_norm": 0.4022708922904972, + "learning_rate": 7.142857142857143e-07, + "loss": 1.1771761178970337, + "step": 156 + }, + { + "epoch": 0.03617511520737327, + "grad_norm": 0.5000685120319052, + "learning_rate": 7.188940092165898e-07, + "loss": 1.1598860025405884, + "step": 157 + }, + { + "epoch": 0.03640552995391705, + "grad_norm": 0.4955460688514832, + "learning_rate": 7.235023041474654e-07, + "loss": 1.0689195394515991, + "step": 158 + }, + { + "epoch": 0.03663594470046083, + "grad_norm": 0.5324202700222229, + "learning_rate": 7.281105990783409e-07, + "loss": 1.1444990634918213, + "step": 159 + }, + { + "epoch": 0.03686635944700461, + "grad_norm": 0.441885052912425, + "learning_rate": 7.327188940092166e-07, + "loss": 1.2261321544647217, + "step": 160 + }, + { + "epoch": 0.037096774193548385, + "grad_norm": 0.47946473640002796, + "learning_rate": 7.373271889400922e-07, + "loss": 0.9325876235961914, + "step": 161 + }, + { + "epoch": 0.03732718894009217, + "grad_norm": 0.46688477365444836, + "learning_rate": 7.419354838709677e-07, + "loss": 1.071167230606079, + "step": 162 + }, + { + "epoch": 0.037557603686635944, + "grad_norm": 0.5188018198616766, + "learning_rate": 7.465437788018433e-07, + "loss": 1.1856298446655273, + "step": 163 + }, + { + "epoch": 0.03778801843317972, + "grad_norm": 0.5279511073474723, + "learning_rate": 7.511520737327189e-07, + "loss": 1.13883376121521, + "step": 164 + }, + { + "epoch": 0.038018433179723504, + "grad_norm": 0.4671725091927055, + "learning_rate": 7.557603686635944e-07, + "loss": 1.2896685600280762, + "step": 165 + }, + { + "epoch": 0.03824884792626728, + "grad_norm": 0.6286776240106037, + "learning_rate": 7.603686635944701e-07, + "loss": 1.3122754096984863, + "step": 166 + }, + { + "epoch": 0.03847926267281106, + "grad_norm": 0.5120060171404104, + "learning_rate": 7.649769585253457e-07, + "loss": 1.165675163269043, + "step": 167 + }, + { + "epoch": 0.03870967741935484, + "grad_norm": 0.5132036652169082, + "learning_rate": 7.695852534562211e-07, + "loss": 1.1348214149475098, + "step": 168 + }, + { + "epoch": 0.03894009216589862, + "grad_norm": 0.5816469452243797, + "learning_rate": 7.741935483870967e-07, + "loss": 1.287818431854248, + "step": 169 + }, + { + "epoch": 0.03917050691244239, + "grad_norm": 0.4886112893618036, + "learning_rate": 7.788018433179722e-07, + "loss": 1.0723031759262085, + "step": 170 + }, + { + "epoch": 0.03940092165898618, + "grad_norm": 0.5572220637370465, + "learning_rate": 7.834101382488479e-07, + "loss": 1.29054594039917, + "step": 171 + }, + { + "epoch": 0.03963133640552995, + "grad_norm": 0.4996602061858042, + "learning_rate": 7.880184331797235e-07, + "loss": 1.201147198677063, + "step": 172 + }, + { + "epoch": 0.03986175115207373, + "grad_norm": 0.47488604971715725, + "learning_rate": 7.92626728110599e-07, + "loss": 1.2529574632644653, + "step": 173 + }, + { + "epoch": 0.04009216589861751, + "grad_norm": 0.5420947446150967, + "learning_rate": 7.972350230414746e-07, + "loss": 1.3255105018615723, + "step": 174 + }, + { + "epoch": 0.04032258064516129, + "grad_norm": 0.5367164884336, + "learning_rate": 8.018433179723502e-07, + "loss": 1.3167433738708496, + "step": 175 + }, + { + "epoch": 0.04055299539170507, + "grad_norm": 0.5124027812324866, + "learning_rate": 8.064516129032257e-07, + "loss": 1.4780502319335938, + "step": 176 + }, + { + "epoch": 0.04078341013824885, + "grad_norm": 0.49049200777499574, + "learning_rate": 8.110599078341014e-07, + "loss": 1.3096996545791626, + "step": 177 + }, + { + "epoch": 0.041013824884792625, + "grad_norm": 0.5684690759624818, + "learning_rate": 8.15668202764977e-07, + "loss": 1.3124895095825195, + "step": 178 + }, + { + "epoch": 0.04124423963133641, + "grad_norm": 0.5746940747619091, + "learning_rate": 8.202764976958525e-07, + "loss": 1.2589681148529053, + "step": 179 + }, + { + "epoch": 0.041474654377880185, + "grad_norm": 0.5351550863930432, + "learning_rate": 8.248847926267281e-07, + "loss": 1.0576659440994263, + "step": 180 + }, + { + "epoch": 0.04170506912442396, + "grad_norm": 0.5804930108989373, + "learning_rate": 8.294930875576036e-07, + "loss": 1.2647404670715332, + "step": 181 + }, + { + "epoch": 0.041935483870967745, + "grad_norm": 0.5527713530674592, + "learning_rate": 8.341013824884793e-07, + "loss": 1.072542428970337, + "step": 182 + }, + { + "epoch": 0.04216589861751152, + "grad_norm": 0.636913740412271, + "learning_rate": 8.387096774193549e-07, + "loss": 1.2417643070220947, + "step": 183 + }, + { + "epoch": 0.0423963133640553, + "grad_norm": 0.4636179655744076, + "learning_rate": 8.433179723502303e-07, + "loss": 1.2490241527557373, + "step": 184 + }, + { + "epoch": 0.04262672811059908, + "grad_norm": 0.5714553493227277, + "learning_rate": 8.479262672811059e-07, + "loss": 1.1169328689575195, + "step": 185 + }, + { + "epoch": 0.04285714285714286, + "grad_norm": 0.5893436962226742, + "learning_rate": 8.525345622119815e-07, + "loss": 1.1799774169921875, + "step": 186 + }, + { + "epoch": 0.043087557603686634, + "grad_norm": 0.4840759402042485, + "learning_rate": 8.57142857142857e-07, + "loss": 0.9655753374099731, + "step": 187 + }, + { + "epoch": 0.04331797235023042, + "grad_norm": 0.5473512318665162, + "learning_rate": 8.617511520737327e-07, + "loss": 1.2863562107086182, + "step": 188 + }, + { + "epoch": 0.043548387096774194, + "grad_norm": 0.5971573505450626, + "learning_rate": 8.663594470046083e-07, + "loss": 1.056877613067627, + "step": 189 + }, + { + "epoch": 0.04377880184331797, + "grad_norm": 0.5903656134268881, + "learning_rate": 8.709677419354838e-07, + "loss": 1.2128019332885742, + "step": 190 + }, + { + "epoch": 0.044009216589861753, + "grad_norm": 0.5042165136835149, + "learning_rate": 8.755760368663594e-07, + "loss": 1.1397441625595093, + "step": 191 + }, + { + "epoch": 0.04423963133640553, + "grad_norm": 0.5007324461761941, + "learning_rate": 8.801843317972349e-07, + "loss": 1.062232255935669, + "step": 192 + }, + { + "epoch": 0.044470046082949306, + "grad_norm": 0.5077694656116347, + "learning_rate": 8.847926267281106e-07, + "loss": 1.0102736949920654, + "step": 193 + }, + { + "epoch": 0.04470046082949309, + "grad_norm": 0.5039275409209952, + "learning_rate": 8.894009216589862e-07, + "loss": 1.155517339706421, + "step": 194 + }, + { + "epoch": 0.044930875576036866, + "grad_norm": 0.4568536555143312, + "learning_rate": 8.940092165898617e-07, + "loss": 1.042372703552246, + "step": 195 + }, + { + "epoch": 0.04516129032258064, + "grad_norm": 0.6118356615587064, + "learning_rate": 8.986175115207373e-07, + "loss": 1.1158320903778076, + "step": 196 + }, + { + "epoch": 0.045391705069124426, + "grad_norm": 0.6547758969058546, + "learning_rate": 9.032258064516129e-07, + "loss": 1.4693050384521484, + "step": 197 + }, + { + "epoch": 0.0456221198156682, + "grad_norm": 0.5189200191294998, + "learning_rate": 9.078341013824884e-07, + "loss": 1.0990574359893799, + "step": 198 + }, + { + "epoch": 0.04585253456221198, + "grad_norm": 0.5123720508165549, + "learning_rate": 9.124423963133641e-07, + "loss": 1.0259861946105957, + "step": 199 + }, + { + "epoch": 0.04608294930875576, + "grad_norm": 0.4638504791285932, + "learning_rate": 9.170506912442397e-07, + "loss": 1.2708477973937988, + "step": 200 + }, + { + "epoch": 0.04631336405529954, + "grad_norm": 0.426472351706666, + "learning_rate": 9.216589861751152e-07, + "loss": 1.052978754043579, + "step": 201 + }, + { + "epoch": 0.046543778801843315, + "grad_norm": 0.5548008737632977, + "learning_rate": 9.262672811059907e-07, + "loss": 1.3405938148498535, + "step": 202 + }, + { + "epoch": 0.0467741935483871, + "grad_norm": 0.4311530218247671, + "learning_rate": 9.308755760368662e-07, + "loss": 0.9464558362960815, + "step": 203 + }, + { + "epoch": 0.047004608294930875, + "grad_norm": 0.6377195135282403, + "learning_rate": 9.354838709677418e-07, + "loss": 1.3019077777862549, + "step": 204 + }, + { + "epoch": 0.04723502304147465, + "grad_norm": 0.6029329005096047, + "learning_rate": 9.400921658986175e-07, + "loss": 1.146841049194336, + "step": 205 + }, + { + "epoch": 0.047465437788018434, + "grad_norm": 0.6136536598800337, + "learning_rate": 9.44700460829493e-07, + "loss": 1.106084942817688, + "step": 206 + }, + { + "epoch": 0.04769585253456221, + "grad_norm": 0.6661299934206126, + "learning_rate": 9.493087557603686e-07, + "loss": 1.2930629253387451, + "step": 207 + }, + { + "epoch": 0.047926267281105994, + "grad_norm": 0.5555271013101563, + "learning_rate": 9.539170506912442e-07, + "loss": 1.1637842655181885, + "step": 208 + }, + { + "epoch": 0.04815668202764977, + "grad_norm": 0.444081897230925, + "learning_rate": 9.585253456221198e-07, + "loss": 1.1753308773040771, + "step": 209 + }, + { + "epoch": 0.04838709677419355, + "grad_norm": 0.5362299776231612, + "learning_rate": 9.631336405529954e-07, + "loss": 1.2304046154022217, + "step": 210 + }, + { + "epoch": 0.04861751152073733, + "grad_norm": 0.6898819231347578, + "learning_rate": 9.67741935483871e-07, + "loss": 1.4326789379119873, + "step": 211 + }, + { + "epoch": 0.04884792626728111, + "grad_norm": 0.614044501232848, + "learning_rate": 9.723502304147466e-07, + "loss": 1.0759861469268799, + "step": 212 + }, + { + "epoch": 0.04907834101382488, + "grad_norm": 0.5971609176488232, + "learning_rate": 9.76958525345622e-07, + "loss": 1.1514811515808105, + "step": 213 + }, + { + "epoch": 0.04930875576036867, + "grad_norm": 0.49252816443356506, + "learning_rate": 9.815668202764976e-07, + "loss": 1.1618578433990479, + "step": 214 + }, + { + "epoch": 0.04953917050691244, + "grad_norm": 0.5677669382006955, + "learning_rate": 9.861751152073732e-07, + "loss": 1.0321345329284668, + "step": 215 + }, + { + "epoch": 0.04976958525345622, + "grad_norm": 0.4551655972629908, + "learning_rate": 9.907834101382488e-07, + "loss": 1.0391438007354736, + "step": 216 + }, + { + "epoch": 0.05, + "grad_norm": 0.6188957189455181, + "learning_rate": 9.953917050691244e-07, + "loss": 1.080418586730957, + "step": 217 + }, + { + "epoch": 0.05023041474654378, + "grad_norm": 0.6531841586974683, + "learning_rate": 1e-06, + "loss": 1.2095223665237427, + "step": 218 + }, + { + "epoch": 0.050460829493087556, + "grad_norm": 0.5036313537560552, + "learning_rate": 1.0046082949308756e-06, + "loss": 1.1144485473632812, + "step": 219 + }, + { + "epoch": 0.05069124423963134, + "grad_norm": 0.6466646674884302, + "learning_rate": 1.0092165898617511e-06, + "loss": 1.2560818195343018, + "step": 220 + }, + { + "epoch": 0.050921658986175115, + "grad_norm": 0.586777516357483, + "learning_rate": 1.0138248847926267e-06, + "loss": 1.1043426990509033, + "step": 221 + }, + { + "epoch": 0.05115207373271889, + "grad_norm": 0.41448570454396455, + "learning_rate": 1.0184331797235021e-06, + "loss": 1.0725831985473633, + "step": 222 + }, + { + "epoch": 0.051382488479262675, + "grad_norm": 0.5713867853647446, + "learning_rate": 1.023041474654378e-06, + "loss": 0.9764004349708557, + "step": 223 + }, + { + "epoch": 0.05161290322580645, + "grad_norm": 0.6662412690615445, + "learning_rate": 1.0276497695852535e-06, + "loss": 1.2172776460647583, + "step": 224 + }, + { + "epoch": 0.05184331797235023, + "grad_norm": 0.610800258000843, + "learning_rate": 1.032258064516129e-06, + "loss": 1.1065070629119873, + "step": 225 + }, + { + "epoch": 0.05207373271889401, + "grad_norm": 0.5057724484519791, + "learning_rate": 1.0368663594470047e-06, + "loss": 1.0840628147125244, + "step": 226 + }, + { + "epoch": 0.05230414746543779, + "grad_norm": 0.5250793281243177, + "learning_rate": 1.04147465437788e-06, + "loss": 1.109276294708252, + "step": 227 + }, + { + "epoch": 0.052534562211981564, + "grad_norm": 0.7348582040933043, + "learning_rate": 1.0460829493087557e-06, + "loss": 1.186352252960205, + "step": 228 + }, + { + "epoch": 0.05276497695852535, + "grad_norm": 0.48569306871313883, + "learning_rate": 1.050691244239631e-06, + "loss": 1.1605256795883179, + "step": 229 + }, + { + "epoch": 0.052995391705069124, + "grad_norm": 0.6312799860168967, + "learning_rate": 1.0552995391705069e-06, + "loss": 1.0269646644592285, + "step": 230 + }, + { + "epoch": 0.0532258064516129, + "grad_norm": 0.6446173917231129, + "learning_rate": 1.0599078341013825e-06, + "loss": 0.9595874547958374, + "step": 231 + }, + { + "epoch": 0.053456221198156684, + "grad_norm": 0.6010998567907583, + "learning_rate": 1.0645161290322579e-06, + "loss": 1.1606154441833496, + "step": 232 + }, + { + "epoch": 0.05368663594470046, + "grad_norm": 0.6379425251609956, + "learning_rate": 1.0691244239631337e-06, + "loss": 0.9920428991317749, + "step": 233 + }, + { + "epoch": 0.05391705069124424, + "grad_norm": 0.6346840342097714, + "learning_rate": 1.073732718894009e-06, + "loss": 1.2124650478363037, + "step": 234 + }, + { + "epoch": 0.05414746543778802, + "grad_norm": 0.5761223431136224, + "learning_rate": 1.0783410138248847e-06, + "loss": 1.2237420082092285, + "step": 235 + }, + { + "epoch": 0.054377880184331796, + "grad_norm": 0.5178799666370111, + "learning_rate": 1.0829493087557605e-06, + "loss": 1.1484715938568115, + "step": 236 + }, + { + "epoch": 0.05460829493087557, + "grad_norm": 0.5910590598999479, + "learning_rate": 1.0875576036866358e-06, + "loss": 1.2143291234970093, + "step": 237 + }, + { + "epoch": 0.054838709677419356, + "grad_norm": 0.568116947952991, + "learning_rate": 1.0921658986175114e-06, + "loss": 1.1995420455932617, + "step": 238 + }, + { + "epoch": 0.05506912442396313, + "grad_norm": 0.6128333972066793, + "learning_rate": 1.096774193548387e-06, + "loss": 1.2577292919158936, + "step": 239 + }, + { + "epoch": 0.055299539170506916, + "grad_norm": 0.6177738975799152, + "learning_rate": 1.1013824884792626e-06, + "loss": 1.2170629501342773, + "step": 240 + }, + { + "epoch": 0.05552995391705069, + "grad_norm": 0.3580107479174479, + "learning_rate": 1.1059907834101382e-06, + "loss": 0.8318669199943542, + "step": 241 + }, + { + "epoch": 0.05576036866359447, + "grad_norm": 0.4976235536822315, + "learning_rate": 1.1105990783410138e-06, + "loss": 1.0760166645050049, + "step": 242 + }, + { + "epoch": 0.05599078341013825, + "grad_norm": 0.7197455436310494, + "learning_rate": 1.1152073732718894e-06, + "loss": 1.2437031269073486, + "step": 243 + }, + { + "epoch": 0.05622119815668203, + "grad_norm": 0.5957655407019126, + "learning_rate": 1.1198156682027648e-06, + "loss": 1.1680852174758911, + "step": 244 + }, + { + "epoch": 0.056451612903225805, + "grad_norm": 0.6708075502500678, + "learning_rate": 1.1244239631336406e-06, + "loss": 1.051478385925293, + "step": 245 + }, + { + "epoch": 0.05668202764976959, + "grad_norm": 0.547285271256248, + "learning_rate": 1.1290322580645162e-06, + "loss": 1.1433100700378418, + "step": 246 + }, + { + "epoch": 0.056912442396313365, + "grad_norm": 0.6428413238154085, + "learning_rate": 1.1336405529953916e-06, + "loss": 0.9521546363830566, + "step": 247 + }, + { + "epoch": 0.05714285714285714, + "grad_norm": 0.6790518899839243, + "learning_rate": 1.1382488479262674e-06, + "loss": 1.226189136505127, + "step": 248 + }, + { + "epoch": 0.057373271889400924, + "grad_norm": 0.7178538920010674, + "learning_rate": 1.1428571428571428e-06, + "loss": 1.108027696609497, + "step": 249 + }, + { + "epoch": 0.0576036866359447, + "grad_norm": 0.4608432366288286, + "learning_rate": 1.1474654377880184e-06, + "loss": 1.042288064956665, + "step": 250 + }, + { + "epoch": 0.05783410138248848, + "grad_norm": 0.8171244559521852, + "learning_rate": 1.1520737327188938e-06, + "loss": 1.193603754043579, + "step": 251 + }, + { + "epoch": 0.05806451612903226, + "grad_norm": 0.6766522772283506, + "learning_rate": 1.1566820276497696e-06, + "loss": 1.193584680557251, + "step": 252 + }, + { + "epoch": 0.05829493087557604, + "grad_norm": 0.5714710938556213, + "learning_rate": 1.1612903225806452e-06, + "loss": 1.2318934202194214, + "step": 253 + }, + { + "epoch": 0.05852534562211981, + "grad_norm": 0.6443899979691422, + "learning_rate": 1.1658986175115205e-06, + "loss": 1.1626521348953247, + "step": 254 + }, + { + "epoch": 0.0587557603686636, + "grad_norm": 0.6336855527034527, + "learning_rate": 1.1705069124423963e-06, + "loss": 1.2402286529541016, + "step": 255 + }, + { + "epoch": 0.05898617511520737, + "grad_norm": 0.599628545600123, + "learning_rate": 1.1751152073732717e-06, + "loss": 1.190323829650879, + "step": 256 + }, + { + "epoch": 0.05921658986175115, + "grad_norm": 0.655955321737197, + "learning_rate": 1.1797235023041473e-06, + "loss": 1.121636986732483, + "step": 257 + }, + { + "epoch": 0.05944700460829493, + "grad_norm": 0.5349922437861245, + "learning_rate": 1.1843317972350231e-06, + "loss": 1.099304437637329, + "step": 258 + }, + { + "epoch": 0.05967741935483871, + "grad_norm": 0.5611568770807159, + "learning_rate": 1.1889400921658985e-06, + "loss": 1.1730690002441406, + "step": 259 + }, + { + "epoch": 0.059907834101382486, + "grad_norm": 0.5874751551203973, + "learning_rate": 1.1935483870967741e-06, + "loss": 1.1450574398040771, + "step": 260 + }, + { + "epoch": 0.06013824884792627, + "grad_norm": 0.6634311667010621, + "learning_rate": 1.1981566820276497e-06, + "loss": 1.1435421705245972, + "step": 261 + }, + { + "epoch": 0.060368663594470046, + "grad_norm": 0.6113712565981082, + "learning_rate": 1.2027649769585253e-06, + "loss": 1.2153000831604004, + "step": 262 + }, + { + "epoch": 0.06059907834101382, + "grad_norm": 0.4715675476477507, + "learning_rate": 1.207373271889401e-06, + "loss": 1.0380406379699707, + "step": 263 + }, + { + "epoch": 0.060829493087557605, + "grad_norm": 0.5396758253019809, + "learning_rate": 1.2119815668202765e-06, + "loss": 1.1639207601547241, + "step": 264 + }, + { + "epoch": 0.06105990783410138, + "grad_norm": 0.7193765184254299, + "learning_rate": 1.216589861751152e-06, + "loss": 1.1862819194793701, + "step": 265 + }, + { + "epoch": 0.06129032258064516, + "grad_norm": 0.5621136552568688, + "learning_rate": 1.2211981566820275e-06, + "loss": 1.2122020721435547, + "step": 266 + }, + { + "epoch": 0.06152073732718894, + "grad_norm": 0.506518590231947, + "learning_rate": 1.2258064516129033e-06, + "loss": 1.1201646327972412, + "step": 267 + }, + { + "epoch": 0.06175115207373272, + "grad_norm": 0.6015371724768855, + "learning_rate": 1.2304147465437787e-06, + "loss": 0.9520926475524902, + "step": 268 + }, + { + "epoch": 0.061981566820276494, + "grad_norm": 0.6815507447701216, + "learning_rate": 1.2350230414746543e-06, + "loss": 1.0426976680755615, + "step": 269 + }, + { + "epoch": 0.06221198156682028, + "grad_norm": 0.5129880337213574, + "learning_rate": 1.23963133640553e-06, + "loss": 0.934493899345398, + "step": 270 + }, + { + "epoch": 0.062442396313364054, + "grad_norm": 0.5416312735509534, + "learning_rate": 1.2442396313364054e-06, + "loss": 1.23980712890625, + "step": 271 + }, + { + "epoch": 0.06267281105990784, + "grad_norm": 0.5947336924258313, + "learning_rate": 1.248847926267281e-06, + "loss": 1.094742774963379, + "step": 272 + }, + { + "epoch": 0.06290322580645161, + "grad_norm": 0.5496219212827214, + "learning_rate": 1.2534562211981564e-06, + "loss": 1.0271551609039307, + "step": 273 + }, + { + "epoch": 0.06313364055299539, + "grad_norm": 0.43924704821878574, + "learning_rate": 1.2580645161290322e-06, + "loss": 1.159210205078125, + "step": 274 + }, + { + "epoch": 0.06336405529953917, + "grad_norm": 0.6336734571964621, + "learning_rate": 1.2626728110599078e-06, + "loss": 1.127510666847229, + "step": 275 + }, + { + "epoch": 0.06359447004608294, + "grad_norm": 0.564136508309977, + "learning_rate": 1.2672811059907832e-06, + "loss": 1.1371517181396484, + "step": 276 + }, + { + "epoch": 0.06382488479262673, + "grad_norm": 0.5092569849346139, + "learning_rate": 1.271889400921659e-06, + "loss": 1.0296730995178223, + "step": 277 + }, + { + "epoch": 0.06405529953917051, + "grad_norm": 0.47819096787751125, + "learning_rate": 1.2764976958525344e-06, + "loss": 1.036975383758545, + "step": 278 + }, + { + "epoch": 0.06428571428571428, + "grad_norm": 0.5933788958917384, + "learning_rate": 1.28110599078341e-06, + "loss": 1.2120393514633179, + "step": 279 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 0.5094532117085869, + "learning_rate": 1.2857142857142858e-06, + "loss": 1.0084068775177002, + "step": 280 + }, + { + "epoch": 0.06474654377880185, + "grad_norm": 0.5556672645421422, + "learning_rate": 1.2903225806451612e-06, + "loss": 1.2005786895751953, + "step": 281 + }, + { + "epoch": 0.06497695852534562, + "grad_norm": 0.5273275990471241, + "learning_rate": 1.2949308755760368e-06, + "loss": 1.1506783962249756, + "step": 282 + }, + { + "epoch": 0.0652073732718894, + "grad_norm": 0.6565311834699108, + "learning_rate": 1.2995391705069124e-06, + "loss": 1.1219947338104248, + "step": 283 + }, + { + "epoch": 0.06543778801843318, + "grad_norm": 0.5392805741788703, + "learning_rate": 1.304147465437788e-06, + "loss": 1.2041170597076416, + "step": 284 + }, + { + "epoch": 0.06566820276497695, + "grad_norm": 0.4958618059812673, + "learning_rate": 1.3087557603686636e-06, + "loss": 1.0903037786483765, + "step": 285 + }, + { + "epoch": 0.06589861751152074, + "grad_norm": 0.5739593792710319, + "learning_rate": 1.3133640552995392e-06, + "loss": 1.2140064239501953, + "step": 286 + }, + { + "epoch": 0.06612903225806452, + "grad_norm": 0.6611408054194472, + "learning_rate": 1.3179723502304148e-06, + "loss": 1.3026092052459717, + "step": 287 + }, + { + "epoch": 0.0663594470046083, + "grad_norm": 0.5994162091601994, + "learning_rate": 1.3225806451612901e-06, + "loss": 1.0937910079956055, + "step": 288 + }, + { + "epoch": 0.06658986175115207, + "grad_norm": 0.5087892316212932, + "learning_rate": 1.327188940092166e-06, + "loss": 1.1768109798431396, + "step": 289 + }, + { + "epoch": 0.06682027649769585, + "grad_norm": 0.6601843016778813, + "learning_rate": 1.3317972350230413e-06, + "loss": 1.0796440839767456, + "step": 290 + }, + { + "epoch": 0.06705069124423964, + "grad_norm": 0.5059222364831474, + "learning_rate": 1.336405529953917e-06, + "loss": 0.9972932934761047, + "step": 291 + }, + { + "epoch": 0.06728110599078341, + "grad_norm": 0.5571474335328804, + "learning_rate": 1.3410138248847927e-06, + "loss": 0.9860717058181763, + "step": 292 + }, + { + "epoch": 0.06751152073732719, + "grad_norm": 0.5418320654969337, + "learning_rate": 1.3456221198156681e-06, + "loss": 1.045119047164917, + "step": 293 + }, + { + "epoch": 0.06774193548387097, + "grad_norm": 0.5469511174229076, + "learning_rate": 1.3502304147465437e-06, + "loss": 1.2740920782089233, + "step": 294 + }, + { + "epoch": 0.06797235023041474, + "grad_norm": 0.5280888059979016, + "learning_rate": 1.354838709677419e-06, + "loss": 1.0860114097595215, + "step": 295 + }, + { + "epoch": 0.06820276497695853, + "grad_norm": 0.6361673375880608, + "learning_rate": 1.359447004608295e-06, + "loss": 1.111539602279663, + "step": 296 + }, + { + "epoch": 0.06843317972350231, + "grad_norm": 0.6640553054344481, + "learning_rate": 1.3640552995391705e-06, + "loss": 1.1628870964050293, + "step": 297 + }, + { + "epoch": 0.06866359447004608, + "grad_norm": 0.5665129055040568, + "learning_rate": 1.3686635944700459e-06, + "loss": 1.042768955230713, + "step": 298 + }, + { + "epoch": 0.06889400921658986, + "grad_norm": 0.43340931133190164, + "learning_rate": 1.3732718894009217e-06, + "loss": 0.9970331192016602, + "step": 299 + }, + { + "epoch": 0.06912442396313365, + "grad_norm": 0.5645710736996077, + "learning_rate": 1.377880184331797e-06, + "loss": 1.1270179748535156, + "step": 300 + }, + { + "epoch": 0.06935483870967742, + "grad_norm": 0.5065704773498506, + "learning_rate": 1.3824884792626727e-06, + "loss": 0.9505646824836731, + "step": 301 + }, + { + "epoch": 0.0695852534562212, + "grad_norm": 0.5178052985950043, + "learning_rate": 1.3870967741935485e-06, + "loss": 1.0997588634490967, + "step": 302 + }, + { + "epoch": 0.06981566820276498, + "grad_norm": 0.46976885146719827, + "learning_rate": 1.3917050691244239e-06, + "loss": 1.1512106657028198, + "step": 303 + }, + { + "epoch": 0.07004608294930875, + "grad_norm": 0.5368431131511487, + "learning_rate": 1.3963133640552995e-06, + "loss": 1.1340759992599487, + "step": 304 + }, + { + "epoch": 0.07027649769585254, + "grad_norm": 0.6153911846871725, + "learning_rate": 1.400921658986175e-06, + "loss": 1.187511682510376, + "step": 305 + }, + { + "epoch": 0.07050691244239632, + "grad_norm": 0.511555535336468, + "learning_rate": 1.4055299539170507e-06, + "loss": 1.0711122751235962, + "step": 306 + }, + { + "epoch": 0.07073732718894009, + "grad_norm": 0.48287298633713555, + "learning_rate": 1.410138248847926e-06, + "loss": 0.9636896848678589, + "step": 307 + }, + { + "epoch": 0.07096774193548387, + "grad_norm": 0.5910127759130634, + "learning_rate": 1.4147465437788018e-06, + "loss": 1.0506833791732788, + "step": 308 + }, + { + "epoch": 0.07119815668202766, + "grad_norm": 0.46621570534633416, + "learning_rate": 1.4193548387096774e-06, + "loss": 1.1076349020004272, + "step": 309 + }, + { + "epoch": 0.07142857142857142, + "grad_norm": 0.5023143786431462, + "learning_rate": 1.4239631336405528e-06, + "loss": 1.0878944396972656, + "step": 310 + }, + { + "epoch": 0.07165898617511521, + "grad_norm": 0.5894127846415432, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.0808600187301636, + "step": 311 + }, + { + "epoch": 0.07188940092165899, + "grad_norm": 0.6608655757057322, + "learning_rate": 1.433179723502304e-06, + "loss": 1.2117588520050049, + "step": 312 + }, + { + "epoch": 0.07211981566820276, + "grad_norm": 0.49227698344069387, + "learning_rate": 1.4377880184331796e-06, + "loss": 1.0899101495742798, + "step": 313 + }, + { + "epoch": 0.07235023041474654, + "grad_norm": 0.4429228185732454, + "learning_rate": 1.4423963133640554e-06, + "loss": 0.9540426135063171, + "step": 314 + }, + { + "epoch": 0.07258064516129033, + "grad_norm": 0.6262415135725191, + "learning_rate": 1.4470046082949308e-06, + "loss": 1.1170068979263306, + "step": 315 + }, + { + "epoch": 0.0728110599078341, + "grad_norm": 0.5398534028349639, + "learning_rate": 1.4516129032258064e-06, + "loss": 1.2030160427093506, + "step": 316 + }, + { + "epoch": 0.07304147465437788, + "grad_norm": 0.5750696017486863, + "learning_rate": 1.4562211981566818e-06, + "loss": 1.1599903106689453, + "step": 317 + }, + { + "epoch": 0.07327188940092166, + "grad_norm": 0.4940370201046436, + "learning_rate": 1.4608294930875576e-06, + "loss": 1.0777950286865234, + "step": 318 + }, + { + "epoch": 0.07350230414746543, + "grad_norm": 0.5527232601625117, + "learning_rate": 1.4654377880184332e-06, + "loss": 1.1250553131103516, + "step": 319 + }, + { + "epoch": 0.07373271889400922, + "grad_norm": 0.4905671836592669, + "learning_rate": 1.4700460829493086e-06, + "loss": 1.10176420211792, + "step": 320 + }, + { + "epoch": 0.073963133640553, + "grad_norm": 0.5457078879226115, + "learning_rate": 1.4746543778801844e-06, + "loss": 1.111799716949463, + "step": 321 + }, + { + "epoch": 0.07419354838709677, + "grad_norm": 0.4195800331952007, + "learning_rate": 1.4792626728110598e-06, + "loss": 1.1555054187774658, + "step": 322 + }, + { + "epoch": 0.07442396313364055, + "grad_norm": 0.46236670595596, + "learning_rate": 1.4838709677419353e-06, + "loss": 1.0977535247802734, + "step": 323 + }, + { + "epoch": 0.07465437788018434, + "grad_norm": 0.5097860724223924, + "learning_rate": 1.4884792626728112e-06, + "loss": 0.9058012962341309, + "step": 324 + }, + { + "epoch": 0.0748847926267281, + "grad_norm": 0.5077577953430894, + "learning_rate": 1.4930875576036865e-06, + "loss": 1.1147960424423218, + "step": 325 + }, + { + "epoch": 0.07511520737327189, + "grad_norm": 0.44169448790763116, + "learning_rate": 1.4976958525345621e-06, + "loss": 1.1315648555755615, + "step": 326 + }, + { + "epoch": 0.07534562211981567, + "grad_norm": 0.5088086763700569, + "learning_rate": 1.5023041474654377e-06, + "loss": 0.9134868383407593, + "step": 327 + }, + { + "epoch": 0.07557603686635944, + "grad_norm": 0.44118138965972864, + "learning_rate": 1.5069124423963133e-06, + "loss": 1.017493724822998, + "step": 328 + }, + { + "epoch": 0.07580645161290323, + "grad_norm": 0.5038134502792564, + "learning_rate": 1.5115207373271887e-06, + "loss": 1.220658540725708, + "step": 329 + }, + { + "epoch": 0.07603686635944701, + "grad_norm": 0.49196264739665124, + "learning_rate": 1.5161290322580645e-06, + "loss": 1.2254307270050049, + "step": 330 + }, + { + "epoch": 0.07626728110599078, + "grad_norm": 0.6442066774537618, + "learning_rate": 1.5207373271889401e-06, + "loss": 1.2653989791870117, + "step": 331 + }, + { + "epoch": 0.07649769585253456, + "grad_norm": 0.5214989033274106, + "learning_rate": 1.5253456221198155e-06, + "loss": 1.199981451034546, + "step": 332 + }, + { + "epoch": 0.07672811059907834, + "grad_norm": 0.5987857165424706, + "learning_rate": 1.5299539170506913e-06, + "loss": 1.1141018867492676, + "step": 333 + }, + { + "epoch": 0.07695852534562211, + "grad_norm": 0.5942646354683767, + "learning_rate": 1.5345622119815667e-06, + "loss": 1.2139991521835327, + "step": 334 + }, + { + "epoch": 0.0771889400921659, + "grad_norm": 0.46506211352562865, + "learning_rate": 1.5391705069124423e-06, + "loss": 1.0647475719451904, + "step": 335 + }, + { + "epoch": 0.07741935483870968, + "grad_norm": 0.41334086285294086, + "learning_rate": 1.543778801843318e-06, + "loss": 0.9740357398986816, + "step": 336 + }, + { + "epoch": 0.07764976958525345, + "grad_norm": 0.3730662683323707, + "learning_rate": 1.5483870967741935e-06, + "loss": 0.877153754234314, + "step": 337 + }, + { + "epoch": 0.07788018433179723, + "grad_norm": 0.5608914234538745, + "learning_rate": 1.552995391705069e-06, + "loss": 1.2472789287567139, + "step": 338 + }, + { + "epoch": 0.07811059907834102, + "grad_norm": 0.49369711494641005, + "learning_rate": 1.5576036866359445e-06, + "loss": 1.1873078346252441, + "step": 339 + }, + { + "epoch": 0.07834101382488479, + "grad_norm": 0.47054639516827856, + "learning_rate": 1.5622119815668203e-06, + "loss": 1.0728449821472168, + "step": 340 + }, + { + "epoch": 0.07857142857142857, + "grad_norm": 0.5084311239727589, + "learning_rate": 1.5668202764976959e-06, + "loss": 0.9974904656410217, + "step": 341 + }, + { + "epoch": 0.07880184331797235, + "grad_norm": 0.5100945633220172, + "learning_rate": 1.5714285714285712e-06, + "loss": 1.0591039657592773, + "step": 342 + }, + { + "epoch": 0.07903225806451612, + "grad_norm": 0.5927330256525677, + "learning_rate": 1.576036866359447e-06, + "loss": 1.04117751121521, + "step": 343 + }, + { + "epoch": 0.0792626728110599, + "grad_norm": 0.40984725482311923, + "learning_rate": 1.5806451612903224e-06, + "loss": 0.934100866317749, + "step": 344 + }, + { + "epoch": 0.07949308755760369, + "grad_norm": 0.4545690285130126, + "learning_rate": 1.585253456221198e-06, + "loss": 1.0333890914916992, + "step": 345 + }, + { + "epoch": 0.07972350230414746, + "grad_norm": 0.4848318013907446, + "learning_rate": 1.5898617511520738e-06, + "loss": 1.1762741804122925, + "step": 346 + }, + { + "epoch": 0.07995391705069124, + "grad_norm": 0.4082821303075448, + "learning_rate": 1.5944700460829492e-06, + "loss": 1.081842303276062, + "step": 347 + }, + { + "epoch": 0.08018433179723503, + "grad_norm": 0.48343962912190763, + "learning_rate": 1.5990783410138248e-06, + "loss": 1.140712022781372, + "step": 348 + }, + { + "epoch": 0.0804147465437788, + "grad_norm": 0.3519464028715392, + "learning_rate": 1.6036866359447004e-06, + "loss": 1.0155198574066162, + "step": 349 + }, + { + "epoch": 0.08064516129032258, + "grad_norm": 0.4719922940268299, + "learning_rate": 1.608294930875576e-06, + "loss": 1.0673280954360962, + "step": 350 + }, + { + "epoch": 0.08087557603686636, + "grad_norm": 0.44336917730159625, + "learning_rate": 1.6129032258064514e-06, + "loss": 1.1061692237854004, + "step": 351 + }, + { + "epoch": 0.08110599078341015, + "grad_norm": 0.6227306591455409, + "learning_rate": 1.6175115207373272e-06, + "loss": 1.0120354890823364, + "step": 352 + }, + { + "epoch": 0.08133640552995391, + "grad_norm": 0.5343939607764295, + "learning_rate": 1.6221198156682028e-06, + "loss": 1.1260986328125, + "step": 353 + }, + { + "epoch": 0.0815668202764977, + "grad_norm": 0.514597043189326, + "learning_rate": 1.6267281105990782e-06, + "loss": 1.0376214981079102, + "step": 354 + }, + { + "epoch": 0.08179723502304148, + "grad_norm": 0.41314458702115897, + "learning_rate": 1.631336405529954e-06, + "loss": 1.0802130699157715, + "step": 355 + }, + { + "epoch": 0.08202764976958525, + "grad_norm": 0.5113844239661658, + "learning_rate": 1.6359447004608294e-06, + "loss": 1.217378854751587, + "step": 356 + }, + { + "epoch": 0.08225806451612903, + "grad_norm": 0.3681604891354872, + "learning_rate": 1.640552995391705e-06, + "loss": 0.9107617139816284, + "step": 357 + }, + { + "epoch": 0.08248847926267282, + "grad_norm": 0.4567828094638987, + "learning_rate": 1.6451612903225808e-06, + "loss": 1.089385986328125, + "step": 358 + }, + { + "epoch": 0.08271889400921659, + "grad_norm": 0.42382031863730735, + "learning_rate": 1.6497695852534561e-06, + "loss": 1.1420392990112305, + "step": 359 + }, + { + "epoch": 0.08294930875576037, + "grad_norm": 0.4385300551654332, + "learning_rate": 1.6543778801843317e-06, + "loss": 0.9308648705482483, + "step": 360 + }, + { + "epoch": 0.08317972350230415, + "grad_norm": 0.4691608891369802, + "learning_rate": 1.6589861751152071e-06, + "loss": 0.9463413953781128, + "step": 361 + }, + { + "epoch": 0.08341013824884792, + "grad_norm": 0.4312953553305326, + "learning_rate": 1.663594470046083e-06, + "loss": 1.0364834070205688, + "step": 362 + }, + { + "epoch": 0.0836405529953917, + "grad_norm": 0.4601141894995307, + "learning_rate": 1.6682027649769585e-06, + "loss": 0.9992797374725342, + "step": 363 + }, + { + "epoch": 0.08387096774193549, + "grad_norm": 0.4110829230093509, + "learning_rate": 1.672811059907834e-06, + "loss": 0.9862687587738037, + "step": 364 + }, + { + "epoch": 0.08410138248847926, + "grad_norm": 0.538237007116734, + "learning_rate": 1.6774193548387097e-06, + "loss": 1.0882744789123535, + "step": 365 + }, + { + "epoch": 0.08433179723502304, + "grad_norm": 0.38129891099780466, + "learning_rate": 1.682027649769585e-06, + "loss": 0.9217149615287781, + "step": 366 + }, + { + "epoch": 0.08456221198156683, + "grad_norm": 0.47566005804946043, + "learning_rate": 1.6866359447004607e-06, + "loss": 1.0384632349014282, + "step": 367 + }, + { + "epoch": 0.0847926267281106, + "grad_norm": 0.41334228678230484, + "learning_rate": 1.6912442396313363e-06, + "loss": 0.8760565519332886, + "step": 368 + }, + { + "epoch": 0.08502304147465438, + "grad_norm": 0.38194490761666694, + "learning_rate": 1.6958525345622119e-06, + "loss": 0.9868614077568054, + "step": 369 + }, + { + "epoch": 0.08525345622119816, + "grad_norm": 0.41853356164893474, + "learning_rate": 1.7004608294930875e-06, + "loss": 1.0386936664581299, + "step": 370 + }, + { + "epoch": 0.08548387096774193, + "grad_norm": 0.4969082634490474, + "learning_rate": 1.705069124423963e-06, + "loss": 1.2201364040374756, + "step": 371 + }, + { + "epoch": 0.08571428571428572, + "grad_norm": 0.45684500070085005, + "learning_rate": 1.7096774193548387e-06, + "loss": 0.9892920255661011, + "step": 372 + }, + { + "epoch": 0.0859447004608295, + "grad_norm": 0.3411435628885881, + "learning_rate": 1.714285714285714e-06, + "loss": 0.9379667639732361, + "step": 373 + }, + { + "epoch": 0.08617511520737327, + "grad_norm": 0.4493279942699278, + "learning_rate": 1.7188940092165899e-06, + "loss": 1.0150624513626099, + "step": 374 + }, + { + "epoch": 0.08640552995391705, + "grad_norm": 0.3873317793310882, + "learning_rate": 1.7235023041474655e-06, + "loss": 0.8724589943885803, + "step": 375 + }, + { + "epoch": 0.08663594470046083, + "grad_norm": 0.451020649692039, + "learning_rate": 1.7281105990783408e-06, + "loss": 1.005715012550354, + "step": 376 + }, + { + "epoch": 0.0868663594470046, + "grad_norm": 0.40515583321904614, + "learning_rate": 1.7327188940092167e-06, + "loss": 1.0238345861434937, + "step": 377 + }, + { + "epoch": 0.08709677419354839, + "grad_norm": 0.5713304603163627, + "learning_rate": 1.737327188940092e-06, + "loss": 1.061020851135254, + "step": 378 + }, + { + "epoch": 0.08732718894009217, + "grad_norm": 0.31543151666242697, + "learning_rate": 1.7419354838709676e-06, + "loss": 0.8607133626937866, + "step": 379 + }, + { + "epoch": 0.08755760368663594, + "grad_norm": 0.396586403800106, + "learning_rate": 1.7465437788018434e-06, + "loss": 0.9070740938186646, + "step": 380 + }, + { + "epoch": 0.08778801843317972, + "grad_norm": 0.4114853593210784, + "learning_rate": 1.7511520737327188e-06, + "loss": 0.993092954158783, + "step": 381 + }, + { + "epoch": 0.08801843317972351, + "grad_norm": 0.5030976624666732, + "learning_rate": 1.7557603686635944e-06, + "loss": 1.1119567155838013, + "step": 382 + }, + { + "epoch": 0.08824884792626728, + "grad_norm": 0.3947649464982104, + "learning_rate": 1.7603686635944698e-06, + "loss": 1.030786395072937, + "step": 383 + }, + { + "epoch": 0.08847926267281106, + "grad_norm": 0.413233744996873, + "learning_rate": 1.7649769585253456e-06, + "loss": 1.0578559637069702, + "step": 384 + }, + { + "epoch": 0.08870967741935484, + "grad_norm": 0.5116874225270758, + "learning_rate": 1.7695852534562212e-06, + "loss": 1.1282391548156738, + "step": 385 + }, + { + "epoch": 0.08894009216589861, + "grad_norm": 0.36883704269137796, + "learning_rate": 1.7741935483870966e-06, + "loss": 0.7838784456253052, + "step": 386 + }, + { + "epoch": 0.0891705069124424, + "grad_norm": 0.4028833159886203, + "learning_rate": 1.7788018433179724e-06, + "loss": 0.9244300127029419, + "step": 387 + }, + { + "epoch": 0.08940092165898618, + "grad_norm": 0.37786066556159736, + "learning_rate": 1.7834101382488478e-06, + "loss": 0.916866660118103, + "step": 388 + }, + { + "epoch": 0.08963133640552995, + "grad_norm": 0.3466207390337416, + "learning_rate": 1.7880184331797234e-06, + "loss": 0.9918155670166016, + "step": 389 + }, + { + "epoch": 0.08986175115207373, + "grad_norm": 0.49146787648511026, + "learning_rate": 1.792626728110599e-06, + "loss": 0.9879001379013062, + "step": 390 + }, + { + "epoch": 0.09009216589861752, + "grad_norm": 0.4467876721911936, + "learning_rate": 1.7972350230414746e-06, + "loss": 1.0252082347869873, + "step": 391 + }, + { + "epoch": 0.09032258064516129, + "grad_norm": 0.4519090202816701, + "learning_rate": 1.8018433179723502e-06, + "loss": 1.0376901626586914, + "step": 392 + }, + { + "epoch": 0.09055299539170507, + "grad_norm": 0.4158305964101772, + "learning_rate": 1.8064516129032258e-06, + "loss": 1.0237072706222534, + "step": 393 + }, + { + "epoch": 0.09078341013824885, + "grad_norm": 0.3903660894092682, + "learning_rate": 1.8110599078341013e-06, + "loss": 1.004181146621704, + "step": 394 + }, + { + "epoch": 0.09101382488479262, + "grad_norm": 0.4844697096481946, + "learning_rate": 1.8156682027649767e-06, + "loss": 1.1162958145141602, + "step": 395 + }, + { + "epoch": 0.0912442396313364, + "grad_norm": 0.43484007077470926, + "learning_rate": 1.8202764976958525e-06, + "loss": 0.9634548425674438, + "step": 396 + }, + { + "epoch": 0.09147465437788019, + "grad_norm": 0.34256483920586434, + "learning_rate": 1.8248847926267281e-06, + "loss": 0.9306463599205017, + "step": 397 + }, + { + "epoch": 0.09170506912442396, + "grad_norm": 0.4291772843094685, + "learning_rate": 1.8294930875576035e-06, + "loss": 1.0243630409240723, + "step": 398 + }, + { + "epoch": 0.09193548387096774, + "grad_norm": 0.37150575195192614, + "learning_rate": 1.8341013824884793e-06, + "loss": 0.9261370897293091, + "step": 399 + }, + { + "epoch": 0.09216589861751152, + "grad_norm": 0.41574639263883495, + "learning_rate": 1.8387096774193547e-06, + "loss": 0.9929264783859253, + "step": 400 + }, + { + "epoch": 0.0923963133640553, + "grad_norm": 0.4086620199652483, + "learning_rate": 1.8433179723502303e-06, + "loss": 1.0245590209960938, + "step": 401 + }, + { + "epoch": 0.09262672811059908, + "grad_norm": 0.4485366734014856, + "learning_rate": 1.8479262672811061e-06, + "loss": 0.9801148176193237, + "step": 402 + }, + { + "epoch": 0.09285714285714286, + "grad_norm": 0.48045286204627596, + "learning_rate": 1.8525345622119815e-06, + "loss": 1.181383728981018, + "step": 403 + }, + { + "epoch": 0.09308755760368663, + "grad_norm": 0.41845043157279344, + "learning_rate": 1.857142857142857e-06, + "loss": 0.9493411779403687, + "step": 404 + }, + { + "epoch": 0.09331797235023041, + "grad_norm": 0.4897744794150158, + "learning_rate": 1.8617511520737325e-06, + "loss": 1.1096491813659668, + "step": 405 + }, + { + "epoch": 0.0935483870967742, + "grad_norm": 0.4480175053230346, + "learning_rate": 1.8663594470046083e-06, + "loss": 1.1019275188446045, + "step": 406 + }, + { + "epoch": 0.09377880184331797, + "grad_norm": 0.3732577959232657, + "learning_rate": 1.8709677419354837e-06, + "loss": 0.973988950252533, + "step": 407 + }, + { + "epoch": 0.09400921658986175, + "grad_norm": 0.4400203989690802, + "learning_rate": 1.8755760368663593e-06, + "loss": 1.1670622825622559, + "step": 408 + }, + { + "epoch": 0.09423963133640553, + "grad_norm": 0.3329146322312322, + "learning_rate": 1.880184331797235e-06, + "loss": 0.8550488948822021, + "step": 409 + }, + { + "epoch": 0.0944700460829493, + "grad_norm": 0.4080056832475701, + "learning_rate": 1.8847926267281104e-06, + "loss": 1.0501651763916016, + "step": 410 + }, + { + "epoch": 0.09470046082949309, + "grad_norm": 0.4667020783139675, + "learning_rate": 1.889400921658986e-06, + "loss": 1.1323202848434448, + "step": 411 + }, + { + "epoch": 0.09493087557603687, + "grad_norm": 0.4438011539128225, + "learning_rate": 1.8940092165898616e-06, + "loss": 1.168154001235962, + "step": 412 + }, + { + "epoch": 0.09516129032258064, + "grad_norm": 0.5043395094497101, + "learning_rate": 1.8986175115207372e-06, + "loss": 1.0667431354522705, + "step": 413 + }, + { + "epoch": 0.09539170506912442, + "grad_norm": 0.42921175733784445, + "learning_rate": 1.9032258064516128e-06, + "loss": 1.1447162628173828, + "step": 414 + }, + { + "epoch": 0.0956221198156682, + "grad_norm": 0.42501454608228506, + "learning_rate": 1.9078341013824884e-06, + "loss": 0.9403433799743652, + "step": 415 + }, + { + "epoch": 0.09585253456221199, + "grad_norm": 0.4016688989337606, + "learning_rate": 1.912442396313364e-06, + "loss": 0.9837527275085449, + "step": 416 + }, + { + "epoch": 0.09608294930875576, + "grad_norm": 0.422068085350648, + "learning_rate": 1.9170506912442396e-06, + "loss": 1.071333408355713, + "step": 417 + }, + { + "epoch": 0.09631336405529954, + "grad_norm": 0.5124388054628781, + "learning_rate": 1.921658986175115e-06, + "loss": 1.0156168937683105, + "step": 418 + }, + { + "epoch": 0.09654377880184332, + "grad_norm": 0.4338501331744671, + "learning_rate": 1.926267281105991e-06, + "loss": 0.9705266952514648, + "step": 419 + }, + { + "epoch": 0.0967741935483871, + "grad_norm": 0.407144156286867, + "learning_rate": 1.930875576036866e-06, + "loss": 1.0570204257965088, + "step": 420 + }, + { + "epoch": 0.09700460829493088, + "grad_norm": 0.43729360857600713, + "learning_rate": 1.935483870967742e-06, + "loss": 1.141861915588379, + "step": 421 + }, + { + "epoch": 0.09723502304147466, + "grad_norm": 0.4507835554387818, + "learning_rate": 1.9400921658986174e-06, + "loss": 0.9849745631217957, + "step": 422 + }, + { + "epoch": 0.09746543778801843, + "grad_norm": 0.4932195036683519, + "learning_rate": 1.944700460829493e-06, + "loss": 1.0279912948608398, + "step": 423 + }, + { + "epoch": 0.09769585253456221, + "grad_norm": 0.4014365475110759, + "learning_rate": 1.9493087557603686e-06, + "loss": 1.0707788467407227, + "step": 424 + }, + { + "epoch": 0.097926267281106, + "grad_norm": 0.37856248369077095, + "learning_rate": 1.953917050691244e-06, + "loss": 0.9391129016876221, + "step": 425 + }, + { + "epoch": 0.09815668202764977, + "grad_norm": 0.3604046417791118, + "learning_rate": 1.9585253456221198e-06, + "loss": 0.9792884588241577, + "step": 426 + }, + { + "epoch": 0.09838709677419355, + "grad_norm": 0.42091691400517506, + "learning_rate": 1.963133640552995e-06, + "loss": 1.0111792087554932, + "step": 427 + }, + { + "epoch": 0.09861751152073733, + "grad_norm": 0.2951881364083913, + "learning_rate": 1.967741935483871e-06, + "loss": 1.0020272731781006, + "step": 428 + }, + { + "epoch": 0.0988479262672811, + "grad_norm": 0.42473763380817414, + "learning_rate": 1.9723502304147463e-06, + "loss": 1.1002991199493408, + "step": 429 + }, + { + "epoch": 0.09907834101382489, + "grad_norm": 0.3977328364337887, + "learning_rate": 1.976958525345622e-06, + "loss": 0.9656131267547607, + "step": 430 + }, + { + "epoch": 0.09930875576036867, + "grad_norm": 0.4163794190517341, + "learning_rate": 1.9815668202764975e-06, + "loss": 1.1845166683197021, + "step": 431 + }, + { + "epoch": 0.09953917050691244, + "grad_norm": 0.4102761511182145, + "learning_rate": 1.9861751152073733e-06, + "loss": 0.8743879795074463, + "step": 432 + }, + { + "epoch": 0.09976958525345622, + "grad_norm": 0.48299006340600875, + "learning_rate": 1.9907834101382487e-06, + "loss": 1.0800082683563232, + "step": 433 + }, + { + "epoch": 0.1, + "grad_norm": 0.39412754669182365, + "learning_rate": 1.995391705069124e-06, + "loss": 1.0410808324813843, + "step": 434 + }, + { + "epoch": 0.10023041474654378, + "grad_norm": 0.4817128357084655, + "learning_rate": 2e-06, + "loss": 1.0214624404907227, + "step": 435 + }, + { + "epoch": 0.10046082949308756, + "grad_norm": 0.4738161753055533, + "learning_rate": 1.9999999274256618e-06, + "loss": 1.0304028987884521, + "step": 436 + }, + { + "epoch": 0.10069124423963134, + "grad_norm": 0.3946923205513698, + "learning_rate": 1.9999997097026583e-06, + "loss": 1.0457626581192017, + "step": 437 + }, + { + "epoch": 0.10092165898617511, + "grad_norm": 0.43567215904100204, + "learning_rate": 1.9999993468310205e-06, + "loss": 0.9837691187858582, + "step": 438 + }, + { + "epoch": 0.1011520737327189, + "grad_norm": 0.5216317957588074, + "learning_rate": 1.9999988388108013e-06, + "loss": 1.0819612741470337, + "step": 439 + }, + { + "epoch": 0.10138248847926268, + "grad_norm": 0.31182314858852395, + "learning_rate": 1.9999981856420743e-06, + "loss": 1.0417449474334717, + "step": 440 + }, + { + "epoch": 0.10161290322580645, + "grad_norm": 0.5477105048499294, + "learning_rate": 1.999997387324935e-06, + "loss": 1.0501068830490112, + "step": 441 + }, + { + "epoch": 0.10184331797235023, + "grad_norm": 0.4106183150059033, + "learning_rate": 1.999996443859498e-06, + "loss": 1.0635120868682861, + "step": 442 + }, + { + "epoch": 0.10207373271889401, + "grad_norm": 0.4873224989082174, + "learning_rate": 1.999995355245902e-06, + "loss": 0.9732234477996826, + "step": 443 + }, + { + "epoch": 0.10230414746543778, + "grad_norm": 0.3718846857755592, + "learning_rate": 1.9999941214843034e-06, + "loss": 0.9493811130523682, + "step": 444 + }, + { + "epoch": 0.10253456221198157, + "grad_norm": 0.5595191439491263, + "learning_rate": 1.9999927425748817e-06, + "loss": 1.1455141305923462, + "step": 445 + }, + { + "epoch": 0.10276497695852535, + "grad_norm": 0.4237177518607636, + "learning_rate": 1.9999912185178374e-06, + "loss": 0.9341592788696289, + "step": 446 + }, + { + "epoch": 0.10299539170506912, + "grad_norm": 0.3913224265375377, + "learning_rate": 1.9999895493133916e-06, + "loss": 0.9535608291625977, + "step": 447 + }, + { + "epoch": 0.1032258064516129, + "grad_norm": 0.4687207319213409, + "learning_rate": 1.999987734961787e-06, + "loss": 1.1977221965789795, + "step": 448 + }, + { + "epoch": 0.10345622119815669, + "grad_norm": 0.45995634872516833, + "learning_rate": 1.999985775463286e-06, + "loss": 1.1658375263214111, + "step": 449 + }, + { + "epoch": 0.10368663594470046, + "grad_norm": 0.47830181543951694, + "learning_rate": 1.9999836708181734e-06, + "loss": 1.1171612739562988, + "step": 450 + }, + { + "epoch": 0.10391705069124424, + "grad_norm": 0.3823354001067843, + "learning_rate": 1.999981421026755e-06, + "loss": 1.0864373445510864, + "step": 451 + }, + { + "epoch": 0.10414746543778802, + "grad_norm": 0.43518989690984766, + "learning_rate": 1.999979026089357e-06, + "loss": 1.1211299896240234, + "step": 452 + }, + { + "epoch": 0.10437788018433179, + "grad_norm": 0.45163820634554874, + "learning_rate": 1.9999764860063277e-06, + "loss": 1.071751594543457, + "step": 453 + }, + { + "epoch": 0.10460829493087558, + "grad_norm": 0.3749468590501543, + "learning_rate": 1.9999738007780347e-06, + "loss": 1.0377576351165771, + "step": 454 + }, + { + "epoch": 0.10483870967741936, + "grad_norm": 0.42625340690366553, + "learning_rate": 1.9999709704048685e-06, + "loss": 0.9658410549163818, + "step": 455 + }, + { + "epoch": 0.10506912442396313, + "grad_norm": 0.4022888050751363, + "learning_rate": 1.9999679948872395e-06, + "loss": 0.9070194959640503, + "step": 456 + }, + { + "epoch": 0.10529953917050691, + "grad_norm": 0.5570523464378584, + "learning_rate": 1.9999648742255803e-06, + "loss": 1.2197664976119995, + "step": 457 + }, + { + "epoch": 0.1055299539170507, + "grad_norm": 0.3961372853294897, + "learning_rate": 1.9999616084203426e-06, + "loss": 0.9032889604568481, + "step": 458 + }, + { + "epoch": 0.10576036866359446, + "grad_norm": 0.39060467678942784, + "learning_rate": 1.9999581974720017e-06, + "loss": 0.9458762407302856, + "step": 459 + }, + { + "epoch": 0.10599078341013825, + "grad_norm": 0.5068153216782157, + "learning_rate": 1.9999546413810526e-06, + "loss": 1.0024757385253906, + "step": 460 + }, + { + "epoch": 0.10622119815668203, + "grad_norm": 0.38148764403186025, + "learning_rate": 1.9999509401480108e-06, + "loss": 0.9499050378799438, + "step": 461 + }, + { + "epoch": 0.1064516129032258, + "grad_norm": 0.4354491299812492, + "learning_rate": 1.9999470937734132e-06, + "loss": 1.0764188766479492, + "step": 462 + }, + { + "epoch": 0.10668202764976958, + "grad_norm": 0.42800401210878014, + "learning_rate": 1.9999431022578194e-06, + "loss": 0.9858300089836121, + "step": 463 + }, + { + "epoch": 0.10691244239631337, + "grad_norm": 0.41132718920336847, + "learning_rate": 1.999938965601808e-06, + "loss": 0.8965580463409424, + "step": 464 + }, + { + "epoch": 0.10714285714285714, + "grad_norm": 0.39699129711694964, + "learning_rate": 1.9999346838059788e-06, + "loss": 0.8860410451889038, + "step": 465 + }, + { + "epoch": 0.10737327188940092, + "grad_norm": 0.48300723462768347, + "learning_rate": 1.9999302568709546e-06, + "loss": 1.0621274709701538, + "step": 466 + }, + { + "epoch": 0.1076036866359447, + "grad_norm": 0.45149909069714367, + "learning_rate": 1.9999256847973774e-06, + "loss": 0.8894643783569336, + "step": 467 + }, + { + "epoch": 0.10783410138248847, + "grad_norm": 0.3529913357119793, + "learning_rate": 1.999920967585911e-06, + "loss": 0.98856520652771, + "step": 468 + }, + { + "epoch": 0.10806451612903226, + "grad_norm": 0.3260735960256147, + "learning_rate": 1.999916105237239e-06, + "loss": 0.7885239124298096, + "step": 469 + }, + { + "epoch": 0.10829493087557604, + "grad_norm": 0.4477697599226733, + "learning_rate": 1.9999110977520687e-06, + "loss": 1.0274477005004883, + "step": 470 + }, + { + "epoch": 0.10852534562211981, + "grad_norm": 0.3938409891368368, + "learning_rate": 1.999905945131126e-06, + "loss": 0.8672109842300415, + "step": 471 + }, + { + "epoch": 0.10875576036866359, + "grad_norm": 0.37173415889586336, + "learning_rate": 1.9999006473751594e-06, + "loss": 0.852576732635498, + "step": 472 + }, + { + "epoch": 0.10898617511520738, + "grad_norm": 0.3670138423827908, + "learning_rate": 1.9998952044849375e-06, + "loss": 0.9553557634353638, + "step": 473 + }, + { + "epoch": 0.10921658986175115, + "grad_norm": 0.4402707979796638, + "learning_rate": 1.99988961646125e-06, + "loss": 1.1375620365142822, + "step": 474 + }, + { + "epoch": 0.10944700460829493, + "grad_norm": 0.4045716386517098, + "learning_rate": 1.9998838833049083e-06, + "loss": 0.9653681516647339, + "step": 475 + }, + { + "epoch": 0.10967741935483871, + "grad_norm": 0.3653559897200667, + "learning_rate": 1.999878005016745e-06, + "loss": 1.1139185428619385, + "step": 476 + }, + { + "epoch": 0.10990783410138248, + "grad_norm": 0.37459420946595523, + "learning_rate": 1.9998719815976127e-06, + "loss": 0.8375418186187744, + "step": 477 + }, + { + "epoch": 0.11013824884792627, + "grad_norm": 0.33053822521695836, + "learning_rate": 1.999865813048386e-06, + "loss": 1.0005979537963867, + "step": 478 + }, + { + "epoch": 0.11036866359447005, + "grad_norm": 0.39083306344420843, + "learning_rate": 1.99985949936996e-06, + "loss": 0.8499772548675537, + "step": 479 + }, + { + "epoch": 0.11059907834101383, + "grad_norm": 0.3575835338316839, + "learning_rate": 1.999853040563252e-06, + "loss": 0.9805284738540649, + "step": 480 + }, + { + "epoch": 0.1108294930875576, + "grad_norm": 0.43340835059987204, + "learning_rate": 1.9998464366291983e-06, + "loss": 0.9462177753448486, + "step": 481 + }, + { + "epoch": 0.11105990783410138, + "grad_norm": 0.44706726559657484, + "learning_rate": 1.999839687568758e-06, + "loss": 1.1023187637329102, + "step": 482 + }, + { + "epoch": 0.11129032258064517, + "grad_norm": 0.3754824087757579, + "learning_rate": 1.9998327933829103e-06, + "loss": 0.9361279010772705, + "step": 483 + }, + { + "epoch": 0.11152073732718894, + "grad_norm": 0.38419186899738067, + "learning_rate": 1.9998257540726567e-06, + "loss": 0.9811379909515381, + "step": 484 + }, + { + "epoch": 0.11175115207373272, + "grad_norm": 0.4030421476721474, + "learning_rate": 1.9998185696390184e-06, + "loss": 1.0246069431304932, + "step": 485 + }, + { + "epoch": 0.1119815668202765, + "grad_norm": 0.4555360249805513, + "learning_rate": 1.9998112400830385e-06, + "loss": 1.0614899396896362, + "step": 486 + }, + { + "epoch": 0.11221198156682027, + "grad_norm": 0.4347652169333907, + "learning_rate": 1.9998037654057803e-06, + "loss": 1.02305269241333, + "step": 487 + }, + { + "epoch": 0.11244239631336406, + "grad_norm": 0.43672158413630835, + "learning_rate": 1.999796145608329e-06, + "loss": 1.044907808303833, + "step": 488 + }, + { + "epoch": 0.11267281105990784, + "grad_norm": 0.4917956866782855, + "learning_rate": 1.999788380691791e-06, + "loss": 0.9669852256774902, + "step": 489 + }, + { + "epoch": 0.11290322580645161, + "grad_norm": 0.3857920087478492, + "learning_rate": 1.9997804706572933e-06, + "loss": 1.0235236883163452, + "step": 490 + }, + { + "epoch": 0.1131336405529954, + "grad_norm": 0.4541175977583441, + "learning_rate": 1.9997724155059835e-06, + "loss": 0.8982692360877991, + "step": 491 + }, + { + "epoch": 0.11336405529953918, + "grad_norm": 0.481910238333043, + "learning_rate": 1.9997642152390312e-06, + "loss": 0.8390282988548279, + "step": 492 + }, + { + "epoch": 0.11359447004608295, + "grad_norm": 0.39882686276748835, + "learning_rate": 1.9997558698576266e-06, + "loss": 0.8938695192337036, + "step": 493 + }, + { + "epoch": 0.11382488479262673, + "grad_norm": 0.5064684870077569, + "learning_rate": 1.9997473793629813e-06, + "loss": 0.9747422933578491, + "step": 494 + }, + { + "epoch": 0.11405529953917051, + "grad_norm": 0.443509358045386, + "learning_rate": 1.999738743756327e-06, + "loss": 1.050918698310852, + "step": 495 + }, + { + "epoch": 0.11428571428571428, + "grad_norm": 0.5368423996158629, + "learning_rate": 1.9997299630389174e-06, + "loss": 0.9169312715530396, + "step": 496 + }, + { + "epoch": 0.11451612903225807, + "grad_norm": 0.452695866401899, + "learning_rate": 1.9997210372120272e-06, + "loss": 1.0258065462112427, + "step": 497 + }, + { + "epoch": 0.11474654377880185, + "grad_norm": 0.3831239007423439, + "learning_rate": 1.9997119662769523e-06, + "loss": 1.066356897354126, + "step": 498 + }, + { + "epoch": 0.11497695852534562, + "grad_norm": 0.4319474855040805, + "learning_rate": 1.9997027502350086e-06, + "loss": 1.0336101055145264, + "step": 499 + }, + { + "epoch": 0.1152073732718894, + "grad_norm": 0.36856882435983085, + "learning_rate": 1.9996933890875342e-06, + "loss": 1.0434989929199219, + "step": 500 + }, + { + "epoch": 0.11543778801843319, + "grad_norm": 0.4366750071509639, + "learning_rate": 1.9996838828358876e-06, + "loss": 1.0081424713134766, + "step": 501 + }, + { + "epoch": 0.11566820276497695, + "grad_norm": 0.4424253641379215, + "learning_rate": 1.999674231481449e-06, + "loss": 1.0998575687408447, + "step": 502 + }, + { + "epoch": 0.11589861751152074, + "grad_norm": 0.43915567985422416, + "learning_rate": 1.9996644350256193e-06, + "loss": 1.0325868129730225, + "step": 503 + }, + { + "epoch": 0.11612903225806452, + "grad_norm": 0.39758687932867864, + "learning_rate": 1.99965449346982e-06, + "loss": 1.0520741939544678, + "step": 504 + }, + { + "epoch": 0.11635944700460829, + "grad_norm": 0.4373332869451062, + "learning_rate": 1.9996444068154943e-06, + "loss": 0.9355484247207642, + "step": 505 + }, + { + "epoch": 0.11658986175115207, + "grad_norm": 0.478944942365821, + "learning_rate": 1.9996341750641067e-06, + "loss": 1.2088062763214111, + "step": 506 + }, + { + "epoch": 0.11682027649769586, + "grad_norm": 0.45703939880277317, + "learning_rate": 1.9996237982171416e-06, + "loss": 1.007477045059204, + "step": 507 + }, + { + "epoch": 0.11705069124423963, + "grad_norm": 0.516029780444843, + "learning_rate": 1.9996132762761054e-06, + "loss": 0.9528911113739014, + "step": 508 + }, + { + "epoch": 0.11728110599078341, + "grad_norm": 0.44144049831872473, + "learning_rate": 1.9996026092425258e-06, + "loss": 1.0906065702438354, + "step": 509 + }, + { + "epoch": 0.1175115207373272, + "grad_norm": 0.45635386377861326, + "learning_rate": 1.9995917971179507e-06, + "loss": 1.1328812837600708, + "step": 510 + }, + { + "epoch": 0.11774193548387096, + "grad_norm": 0.5010986511700435, + "learning_rate": 1.9995808399039493e-06, + "loss": 1.1367099285125732, + "step": 511 + }, + { + "epoch": 0.11797235023041475, + "grad_norm": 0.5738525299064665, + "learning_rate": 1.999569737602112e-06, + "loss": 1.22605562210083, + "step": 512 + }, + { + "epoch": 0.11820276497695853, + "grad_norm": 0.40700112362856533, + "learning_rate": 1.9995584902140514e-06, + "loss": 0.8814148306846619, + "step": 513 + }, + { + "epoch": 0.1184331797235023, + "grad_norm": 0.4018062947026822, + "learning_rate": 1.9995470977413988e-06, + "loss": 0.916766881942749, + "step": 514 + }, + { + "epoch": 0.11866359447004608, + "grad_norm": 0.3907370494982875, + "learning_rate": 1.999535560185808e-06, + "loss": 0.8088599443435669, + "step": 515 + }, + { + "epoch": 0.11889400921658987, + "grad_norm": 0.5585215819507526, + "learning_rate": 1.9995238775489538e-06, + "loss": 1.0029397010803223, + "step": 516 + }, + { + "epoch": 0.11912442396313364, + "grad_norm": 0.47103060321263474, + "learning_rate": 1.9995120498325322e-06, + "loss": 1.157515287399292, + "step": 517 + }, + { + "epoch": 0.11935483870967742, + "grad_norm": 0.43934234876750516, + "learning_rate": 1.99950007703826e-06, + "loss": 0.989453911781311, + "step": 518 + }, + { + "epoch": 0.1195852534562212, + "grad_norm": 0.501533126043576, + "learning_rate": 1.999487959167874e-06, + "loss": 0.9791898727416992, + "step": 519 + }, + { + "epoch": 0.11981566820276497, + "grad_norm": 0.3947583681206324, + "learning_rate": 1.9994756962231343e-06, + "loss": 0.9994203448295593, + "step": 520 + }, + { + "epoch": 0.12004608294930876, + "grad_norm": 0.4064680989752179, + "learning_rate": 1.999463288205821e-06, + "loss": 0.9096299409866333, + "step": 521 + }, + { + "epoch": 0.12027649769585254, + "grad_norm": 0.5675118509929592, + "learning_rate": 1.999450735117734e-06, + "loss": 0.9956046342849731, + "step": 522 + }, + { + "epoch": 0.12050691244239631, + "grad_norm": 0.40854646192247485, + "learning_rate": 1.9994380369606956e-06, + "loss": 1.0336079597473145, + "step": 523 + }, + { + "epoch": 0.12073732718894009, + "grad_norm": 0.4028964743045085, + "learning_rate": 1.99942519373655e-06, + "loss": 0.8828116655349731, + "step": 524 + }, + { + "epoch": 0.12096774193548387, + "grad_norm": 0.4113573248244064, + "learning_rate": 1.9994122054471597e-06, + "loss": 0.8733093738555908, + "step": 525 + }, + { + "epoch": 0.12119815668202764, + "grad_norm": 0.4633889976755098, + "learning_rate": 1.9993990720944114e-06, + "loss": 1.0312494039535522, + "step": 526 + }, + { + "epoch": 0.12142857142857143, + "grad_norm": 0.39342421435973574, + "learning_rate": 1.9993857936802105e-06, + "loss": 0.9229701161384583, + "step": 527 + }, + { + "epoch": 0.12165898617511521, + "grad_norm": 0.4629141668744642, + "learning_rate": 1.9993723702064853e-06, + "loss": 0.8980100154876709, + "step": 528 + }, + { + "epoch": 0.12188940092165898, + "grad_norm": 0.42208035145091816, + "learning_rate": 1.999358801675183e-06, + "loss": 0.939933180809021, + "step": 529 + }, + { + "epoch": 0.12211981566820276, + "grad_norm": 0.3966309171286601, + "learning_rate": 1.9993450880882733e-06, + "loss": 1.0014444589614868, + "step": 530 + }, + { + "epoch": 0.12235023041474655, + "grad_norm": 0.4166874579150977, + "learning_rate": 1.9993312294477477e-06, + "loss": 0.9995889663696289, + "step": 531 + }, + { + "epoch": 0.12258064516129032, + "grad_norm": 0.37598019229960666, + "learning_rate": 1.9993172257556167e-06, + "loss": 1.0010197162628174, + "step": 532 + }, + { + "epoch": 0.1228110599078341, + "grad_norm": 0.3629842057209114, + "learning_rate": 1.9993030770139135e-06, + "loss": 0.972966194152832, + "step": 533 + }, + { + "epoch": 0.12304147465437788, + "grad_norm": 0.4160633061352588, + "learning_rate": 1.9992887832246917e-06, + "loss": 0.8033444881439209, + "step": 534 + }, + { + "epoch": 0.12327188940092165, + "grad_norm": 0.3895553967201257, + "learning_rate": 1.9992743443900254e-06, + "loss": 0.7532742619514465, + "step": 535 + }, + { + "epoch": 0.12350230414746544, + "grad_norm": 0.46964696388446997, + "learning_rate": 1.9992597605120113e-06, + "loss": 1.058760643005371, + "step": 536 + }, + { + "epoch": 0.12373271889400922, + "grad_norm": 0.37591416731208094, + "learning_rate": 1.9992450315927658e-06, + "loss": 0.8559634685516357, + "step": 537 + }, + { + "epoch": 0.12396313364055299, + "grad_norm": 0.4216079229956694, + "learning_rate": 1.9992301576344267e-06, + "loss": 1.053638219833374, + "step": 538 + }, + { + "epoch": 0.12419354838709677, + "grad_norm": 0.5423293655738015, + "learning_rate": 1.9992151386391528e-06, + "loss": 0.8841970562934875, + "step": 539 + }, + { + "epoch": 0.12442396313364056, + "grad_norm": 0.5667972752402203, + "learning_rate": 1.9991999746091247e-06, + "loss": 0.9355173110961914, + "step": 540 + }, + { + "epoch": 0.12465437788018432, + "grad_norm": 0.43323548094659586, + "learning_rate": 1.999184665546543e-06, + "loss": 0.9978284239768982, + "step": 541 + }, + { + "epoch": 0.12488479262672811, + "grad_norm": 0.4166718713190779, + "learning_rate": 1.99916921145363e-06, + "loss": 0.8855264782905579, + "step": 542 + }, + { + "epoch": 0.1251152073732719, + "grad_norm": 0.5314416958418489, + "learning_rate": 1.9991536123326283e-06, + "loss": 0.885519802570343, + "step": 543 + }, + { + "epoch": 0.12534562211981568, + "grad_norm": 0.4381118612604031, + "learning_rate": 1.9991378681858024e-06, + "loss": 0.9772528409957886, + "step": 544 + }, + { + "epoch": 0.12557603686635946, + "grad_norm": 0.46876887659201405, + "learning_rate": 1.999121979015438e-06, + "loss": 0.8817745447158813, + "step": 545 + }, + { + "epoch": 0.12580645161290321, + "grad_norm": 0.36530562318650095, + "learning_rate": 1.9991059448238404e-06, + "loss": 0.9374080896377563, + "step": 546 + }, + { + "epoch": 0.126036866359447, + "grad_norm": 0.3669313811039727, + "learning_rate": 1.9990897656133383e-06, + "loss": 0.9174116253852844, + "step": 547 + }, + { + "epoch": 0.12626728110599078, + "grad_norm": 0.401361126928626, + "learning_rate": 1.999073441386279e-06, + "loss": 0.9514039158821106, + "step": 548 + }, + { + "epoch": 0.12649769585253456, + "grad_norm": 0.4665811721686224, + "learning_rate": 1.999056972145032e-06, + "loss": 1.10535728931427, + "step": 549 + }, + { + "epoch": 0.12672811059907835, + "grad_norm": 0.4609610092344924, + "learning_rate": 1.999040357891989e-06, + "loss": 1.0641597509384155, + "step": 550 + }, + { + "epoch": 0.12695852534562213, + "grad_norm": 0.39409304359090785, + "learning_rate": 1.99902359862956e-06, + "loss": 0.9596017599105835, + "step": 551 + }, + { + "epoch": 0.1271889400921659, + "grad_norm": 0.4899166130843387, + "learning_rate": 1.9990066943601777e-06, + "loss": 1.083927869796753, + "step": 552 + }, + { + "epoch": 0.12741935483870967, + "grad_norm": 0.42007806110658624, + "learning_rate": 1.998989645086297e-06, + "loss": 0.9146738052368164, + "step": 553 + }, + { + "epoch": 0.12764976958525345, + "grad_norm": 0.41224202627344914, + "learning_rate": 1.998972450810391e-06, + "loss": 0.9038050770759583, + "step": 554 + }, + { + "epoch": 0.12788018433179724, + "grad_norm": 0.45759233489952406, + "learning_rate": 1.9989551115349574e-06, + "loss": 0.973220705986023, + "step": 555 + }, + { + "epoch": 0.12811059907834102, + "grad_norm": 0.424280511041039, + "learning_rate": 1.998937627262511e-06, + "loss": 0.8804281949996948, + "step": 556 + }, + { + "epoch": 0.1283410138248848, + "grad_norm": 0.47603807991909786, + "learning_rate": 1.9989199979955903e-06, + "loss": 1.100919485092163, + "step": 557 + }, + { + "epoch": 0.12857142857142856, + "grad_norm": 0.5871199693144976, + "learning_rate": 1.998902223736755e-06, + "loss": 1.1152353286743164, + "step": 558 + }, + { + "epoch": 0.12880184331797234, + "grad_norm": 0.4236469989661471, + "learning_rate": 1.9988843044885837e-06, + "loss": 1.0721793174743652, + "step": 559 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 0.4234271408586371, + "learning_rate": 1.9988662402536783e-06, + "loss": 0.9035133123397827, + "step": 560 + }, + { + "epoch": 0.1292626728110599, + "grad_norm": 0.4210053632602843, + "learning_rate": 1.9988480310346603e-06, + "loss": 1.0053937435150146, + "step": 561 + }, + { + "epoch": 0.1294930875576037, + "grad_norm": 0.5230478085674195, + "learning_rate": 1.9988296768341728e-06, + "loss": 0.8536228537559509, + "step": 562 + }, + { + "epoch": 0.12972350230414748, + "grad_norm": 0.41493118398063783, + "learning_rate": 1.9988111776548797e-06, + "loss": 0.9673396348953247, + "step": 563 + }, + { + "epoch": 0.12995391705069123, + "grad_norm": 0.36295429679681995, + "learning_rate": 1.998792533499467e-06, + "loss": 0.9402456879615784, + "step": 564 + }, + { + "epoch": 0.13018433179723501, + "grad_norm": 0.3983153697524455, + "learning_rate": 1.99877374437064e-06, + "loss": 0.8900678157806396, + "step": 565 + }, + { + "epoch": 0.1304147465437788, + "grad_norm": 0.47587146443270817, + "learning_rate": 1.9987548102711264e-06, + "loss": 0.9112892150878906, + "step": 566 + }, + { + "epoch": 0.13064516129032258, + "grad_norm": 0.3969666466780631, + "learning_rate": 1.9987357312036743e-06, + "loss": 0.763452410697937, + "step": 567 + }, + { + "epoch": 0.13087557603686636, + "grad_norm": 0.44573355289133143, + "learning_rate": 1.9987165071710527e-06, + "loss": 1.0410873889923096, + "step": 568 + }, + { + "epoch": 0.13110599078341015, + "grad_norm": 0.389372329442145, + "learning_rate": 1.9986971381760524e-06, + "loss": 1.029583215713501, + "step": 569 + }, + { + "epoch": 0.1313364055299539, + "grad_norm": 0.46351745512727555, + "learning_rate": 1.9986776242214845e-06, + "loss": 0.994928777217865, + "step": 570 + }, + { + "epoch": 0.1315668202764977, + "grad_norm": 0.49139035828687805, + "learning_rate": 1.9986579653101817e-06, + "loss": 1.001985788345337, + "step": 571 + }, + { + "epoch": 0.13179723502304147, + "grad_norm": 0.45831221525956994, + "learning_rate": 1.998638161444997e-06, + "loss": 0.9813050031661987, + "step": 572 + }, + { + "epoch": 0.13202764976958525, + "grad_norm": 0.45157644768988, + "learning_rate": 1.9986182126288053e-06, + "loss": 0.8678451180458069, + "step": 573 + }, + { + "epoch": 0.13225806451612904, + "grad_norm": 0.42446769343835317, + "learning_rate": 1.998598118864502e-06, + "loss": 1.0393729209899902, + "step": 574 + }, + { + "epoch": 0.13248847926267282, + "grad_norm": 0.39102315770569207, + "learning_rate": 1.998577880155004e-06, + "loss": 0.9631935358047485, + "step": 575 + }, + { + "epoch": 0.1327188940092166, + "grad_norm": 0.37655183711017187, + "learning_rate": 1.9985574965032483e-06, + "loss": 0.8480437994003296, + "step": 576 + }, + { + "epoch": 0.13294930875576036, + "grad_norm": 0.432103661547375, + "learning_rate": 1.998536967912194e-06, + "loss": 1.0450071096420288, + "step": 577 + }, + { + "epoch": 0.13317972350230414, + "grad_norm": 0.5144084645376303, + "learning_rate": 1.9985162943848207e-06, + "loss": 0.9374763369560242, + "step": 578 + }, + { + "epoch": 0.13341013824884793, + "grad_norm": 0.45444537143479036, + "learning_rate": 1.9984954759241293e-06, + "loss": 0.9405182600021362, + "step": 579 + }, + { + "epoch": 0.1336405529953917, + "grad_norm": 0.42824704739155545, + "learning_rate": 1.998474512533141e-06, + "loss": 0.8406375646591187, + "step": 580 + }, + { + "epoch": 0.1338709677419355, + "grad_norm": 0.48847321743709643, + "learning_rate": 1.9984534042148994e-06, + "loss": 0.9323312044143677, + "step": 581 + }, + { + "epoch": 0.13410138248847928, + "grad_norm": 0.43641992007971325, + "learning_rate": 1.998432150972468e-06, + "loss": 1.0704214572906494, + "step": 582 + }, + { + "epoch": 0.13433179723502303, + "grad_norm": 0.38681502715760335, + "learning_rate": 1.9984107528089315e-06, + "loss": 0.8636025190353394, + "step": 583 + }, + { + "epoch": 0.13456221198156681, + "grad_norm": 0.4361205380771583, + "learning_rate": 1.998389209727396e-06, + "loss": 0.9616303443908691, + "step": 584 + }, + { + "epoch": 0.1347926267281106, + "grad_norm": 0.4406937724242653, + "learning_rate": 1.998367521730988e-06, + "loss": 1.0457193851470947, + "step": 585 + }, + { + "epoch": 0.13502304147465438, + "grad_norm": 0.4060450620979949, + "learning_rate": 1.9983456888228566e-06, + "loss": 1.0460572242736816, + "step": 586 + }, + { + "epoch": 0.13525345622119817, + "grad_norm": 0.3771944294411782, + "learning_rate": 1.9983237110061695e-06, + "loss": 0.9693883657455444, + "step": 587 + }, + { + "epoch": 0.13548387096774195, + "grad_norm": 0.4355709352067949, + "learning_rate": 1.9983015882841175e-06, + "loss": 0.8823472857475281, + "step": 588 + }, + { + "epoch": 0.1357142857142857, + "grad_norm": 0.5606637533068962, + "learning_rate": 1.998279320659912e-06, + "loss": 1.1602983474731445, + "step": 589 + }, + { + "epoch": 0.1359447004608295, + "grad_norm": 0.40130101265364443, + "learning_rate": 1.9982569081367843e-06, + "loss": 0.8191353678703308, + "step": 590 + }, + { + "epoch": 0.13617511520737327, + "grad_norm": 0.40863931644700857, + "learning_rate": 1.9982343507179876e-06, + "loss": 1.141557216644287, + "step": 591 + }, + { + "epoch": 0.13640552995391705, + "grad_norm": 0.4712969186607289, + "learning_rate": 1.998211648406797e-06, + "loss": 0.9688570499420166, + "step": 592 + }, + { + "epoch": 0.13663594470046084, + "grad_norm": 0.4543844570436241, + "learning_rate": 1.9981888012065068e-06, + "loss": 1.0218561887741089, + "step": 593 + }, + { + "epoch": 0.13686635944700462, + "grad_norm": 0.5219271265911207, + "learning_rate": 1.9981658091204334e-06, + "loss": 0.9531952142715454, + "step": 594 + }, + { + "epoch": 0.13709677419354838, + "grad_norm": 0.5314208269690397, + "learning_rate": 1.9981426721519143e-06, + "loss": 1.1421492099761963, + "step": 595 + }, + { + "epoch": 0.13732718894009216, + "grad_norm": 0.3970137466851754, + "learning_rate": 1.9981193903043074e-06, + "loss": 0.8173041343688965, + "step": 596 + }, + { + "epoch": 0.13755760368663594, + "grad_norm": 0.43200053855682263, + "learning_rate": 1.998095963580993e-06, + "loss": 0.8842465877532959, + "step": 597 + }, + { + "epoch": 0.13778801843317973, + "grad_norm": 0.6492506358781442, + "learning_rate": 1.9980723919853703e-06, + "loss": 0.8547788858413696, + "step": 598 + }, + { + "epoch": 0.1380184331797235, + "grad_norm": 0.5287255050220753, + "learning_rate": 1.998048675520861e-06, + "loss": 1.0085712671279907, + "step": 599 + }, + { + "epoch": 0.1382488479262673, + "grad_norm": 0.5226769291219134, + "learning_rate": 1.9980248141909083e-06, + "loss": 0.9276378750801086, + "step": 600 + }, + { + "epoch": 0.13847926267281105, + "grad_norm": 0.44292446989501455, + "learning_rate": 1.998000807998975e-06, + "loss": 0.9236693382263184, + "step": 601 + }, + { + "epoch": 0.13870967741935483, + "grad_norm": 0.43514287150953085, + "learning_rate": 1.9979766569485454e-06, + "loss": 1.0353924036026, + "step": 602 + }, + { + "epoch": 0.13894009216589862, + "grad_norm": 0.3831256791535214, + "learning_rate": 1.9979523610431246e-06, + "loss": 0.8456567525863647, + "step": 603 + }, + { + "epoch": 0.1391705069124424, + "grad_norm": 0.46736174894260846, + "learning_rate": 1.997927920286241e-06, + "loss": 0.997468888759613, + "step": 604 + }, + { + "epoch": 0.13940092165898618, + "grad_norm": 0.38558572890255066, + "learning_rate": 1.9979033346814397e-06, + "loss": 0.8962260484695435, + "step": 605 + }, + { + "epoch": 0.13963133640552997, + "grad_norm": 0.4829548009529998, + "learning_rate": 1.997878604232291e-06, + "loss": 0.8586266040802002, + "step": 606 + }, + { + "epoch": 0.13986175115207372, + "grad_norm": 0.4776734917637134, + "learning_rate": 1.9978537289423837e-06, + "loss": 0.9639670848846436, + "step": 607 + }, + { + "epoch": 0.1400921658986175, + "grad_norm": 0.4115822234384495, + "learning_rate": 1.9978287088153286e-06, + "loss": 1.005727767944336, + "step": 608 + }, + { + "epoch": 0.1403225806451613, + "grad_norm": 0.44858527541471366, + "learning_rate": 1.9978035438547575e-06, + "loss": 1.148871898651123, + "step": 609 + }, + { + "epoch": 0.14055299539170507, + "grad_norm": 0.4357664217922314, + "learning_rate": 1.9977782340643226e-06, + "loss": 1.0459539890289307, + "step": 610 + }, + { + "epoch": 0.14078341013824885, + "grad_norm": 0.43229915305128663, + "learning_rate": 1.9977527794476985e-06, + "loss": 0.92689448595047, + "step": 611 + }, + { + "epoch": 0.14101382488479264, + "grad_norm": 0.5514025110097415, + "learning_rate": 1.997727180008579e-06, + "loss": 0.9735790491104126, + "step": 612 + }, + { + "epoch": 0.1412442396313364, + "grad_norm": 0.5114055458545007, + "learning_rate": 1.99770143575068e-06, + "loss": 0.8882870674133301, + "step": 613 + }, + { + "epoch": 0.14147465437788018, + "grad_norm": 0.47604166837238787, + "learning_rate": 1.9976755466777386e-06, + "loss": 0.9229795932769775, + "step": 614 + }, + { + "epoch": 0.14170506912442396, + "grad_norm": 0.39391923738635765, + "learning_rate": 1.997649512793512e-06, + "loss": 0.9097769260406494, + "step": 615 + }, + { + "epoch": 0.14193548387096774, + "grad_norm": 0.429877903042447, + "learning_rate": 1.9976233341017798e-06, + "loss": 0.7751711010932922, + "step": 616 + }, + { + "epoch": 0.14216589861751153, + "grad_norm": 0.4585028421290768, + "learning_rate": 1.9975970106063414e-06, + "loss": 0.9071080684661865, + "step": 617 + }, + { + "epoch": 0.1423963133640553, + "grad_norm": 0.372835081071011, + "learning_rate": 1.997570542311017e-06, + "loss": 0.8444115519523621, + "step": 618 + }, + { + "epoch": 0.14262672811059907, + "grad_norm": 0.46125503087530084, + "learning_rate": 1.9975439292196496e-06, + "loss": 0.9159516096115112, + "step": 619 + }, + { + "epoch": 0.14285714285714285, + "grad_norm": 0.45879242474243875, + "learning_rate": 1.997517171336101e-06, + "loss": 0.9697242975234985, + "step": 620 + }, + { + "epoch": 0.14308755760368663, + "grad_norm": 0.4605305033840643, + "learning_rate": 1.9974902686642557e-06, + "loss": 0.9894170761108398, + "step": 621 + }, + { + "epoch": 0.14331797235023042, + "grad_norm": 0.48517122287493847, + "learning_rate": 1.9974632212080184e-06, + "loss": 1.0364127159118652, + "step": 622 + }, + { + "epoch": 0.1435483870967742, + "grad_norm": 0.39169164845291754, + "learning_rate": 1.997436028971315e-06, + "loss": 0.8980219960212708, + "step": 623 + }, + { + "epoch": 0.14377880184331798, + "grad_norm": 0.4857070397144096, + "learning_rate": 1.9974086919580925e-06, + "loss": 1.0293703079223633, + "step": 624 + }, + { + "epoch": 0.14400921658986174, + "grad_norm": 0.46693238253454916, + "learning_rate": 1.9973812101723186e-06, + "loss": 1.006148099899292, + "step": 625 + }, + { + "epoch": 0.14423963133640552, + "grad_norm": 0.5525790188158035, + "learning_rate": 1.9973535836179825e-06, + "loss": 0.9489799737930298, + "step": 626 + }, + { + "epoch": 0.1444700460829493, + "grad_norm": 0.3704152285915921, + "learning_rate": 1.997325812299094e-06, + "loss": 0.7601498961448669, + "step": 627 + }, + { + "epoch": 0.1447004608294931, + "grad_norm": 0.6225002321802279, + "learning_rate": 1.9972978962196843e-06, + "loss": 0.8345643281936646, + "step": 628 + }, + { + "epoch": 0.14493087557603687, + "grad_norm": 0.48694459235316484, + "learning_rate": 1.9972698353838053e-06, + "loss": 0.8705894947052002, + "step": 629 + }, + { + "epoch": 0.14516129032258066, + "grad_norm": 0.42033173985472694, + "learning_rate": 1.9972416297955294e-06, + "loss": 0.9515185356140137, + "step": 630 + }, + { + "epoch": 0.1453917050691244, + "grad_norm": 0.38157222553103914, + "learning_rate": 1.9972132794589514e-06, + "loss": 0.7616517543792725, + "step": 631 + }, + { + "epoch": 0.1456221198156682, + "grad_norm": 0.47593248323635307, + "learning_rate": 1.9971847843781862e-06, + "loss": 0.8870444297790527, + "step": 632 + }, + { + "epoch": 0.14585253456221198, + "grad_norm": 0.45987330163099194, + "learning_rate": 1.9971561445573696e-06, + "loss": 0.8709393739700317, + "step": 633 + }, + { + "epoch": 0.14608294930875576, + "grad_norm": 0.35616612587319196, + "learning_rate": 1.997127360000658e-06, + "loss": 0.865444540977478, + "step": 634 + }, + { + "epoch": 0.14631336405529954, + "grad_norm": 0.4431578416665891, + "learning_rate": 1.997098430712231e-06, + "loss": 0.9560728073120117, + "step": 635 + }, + { + "epoch": 0.14654377880184333, + "grad_norm": 0.4107966614124612, + "learning_rate": 1.9970693566962866e-06, + "loss": 0.7579058408737183, + "step": 636 + }, + { + "epoch": 0.14677419354838708, + "grad_norm": 0.4609569973718347, + "learning_rate": 1.997040137957045e-06, + "loss": 1.0709021091461182, + "step": 637 + }, + { + "epoch": 0.14700460829493087, + "grad_norm": 0.5029329480654331, + "learning_rate": 1.9970107744987474e-06, + "loss": 0.9911563396453857, + "step": 638 + }, + { + "epoch": 0.14723502304147465, + "grad_norm": 0.45338591583748106, + "learning_rate": 1.996981266325655e-06, + "loss": 0.9673472046852112, + "step": 639 + }, + { + "epoch": 0.14746543778801843, + "grad_norm": 0.3918341582647882, + "learning_rate": 1.9969516134420523e-06, + "loss": 0.7728441953659058, + "step": 640 + }, + { + "epoch": 0.14769585253456222, + "grad_norm": 0.532382418423259, + "learning_rate": 1.9969218158522426e-06, + "loss": 1.0198101997375488, + "step": 641 + }, + { + "epoch": 0.147926267281106, + "grad_norm": 0.45259693038053805, + "learning_rate": 1.996891873560551e-06, + "loss": 0.9710760116577148, + "step": 642 + }, + { + "epoch": 0.14815668202764978, + "grad_norm": 0.41281494255735757, + "learning_rate": 1.9968617865713237e-06, + "loss": 0.9956847429275513, + "step": 643 + }, + { + "epoch": 0.14838709677419354, + "grad_norm": 0.40081796016292187, + "learning_rate": 1.996831554888928e-06, + "loss": 1.0974771976470947, + "step": 644 + }, + { + "epoch": 0.14861751152073732, + "grad_norm": 0.5353172634899142, + "learning_rate": 1.9968011785177513e-06, + "loss": 0.914455771446228, + "step": 645 + }, + { + "epoch": 0.1488479262672811, + "grad_norm": 0.5511418094652546, + "learning_rate": 1.9967706574622033e-06, + "loss": 1.1308314800262451, + "step": 646 + }, + { + "epoch": 0.1490783410138249, + "grad_norm": 0.5114786055194052, + "learning_rate": 1.9967399917267142e-06, + "loss": 0.981814444065094, + "step": 647 + }, + { + "epoch": 0.14930875576036867, + "grad_norm": 0.431645238473459, + "learning_rate": 1.9967091813157345e-06, + "loss": 0.874076247215271, + "step": 648 + }, + { + "epoch": 0.14953917050691246, + "grad_norm": 0.39621973386547166, + "learning_rate": 1.9966782262337365e-06, + "loss": 0.8496171832084656, + "step": 649 + }, + { + "epoch": 0.1497695852534562, + "grad_norm": 0.49468581823361646, + "learning_rate": 1.9966471264852136e-06, + "loss": 0.9395674467086792, + "step": 650 + }, + { + "epoch": 0.15, + "grad_norm": 0.4120224768195847, + "learning_rate": 1.99661588207468e-06, + "loss": 0.8363018035888672, + "step": 651 + }, + { + "epoch": 0.15023041474654378, + "grad_norm": 0.4552124844336583, + "learning_rate": 1.9965844930066696e-06, + "loss": 1.0035831928253174, + "step": 652 + }, + { + "epoch": 0.15046082949308756, + "grad_norm": 0.3910663219458386, + "learning_rate": 1.99655295928574e-06, + "loss": 1.0316795110702515, + "step": 653 + }, + { + "epoch": 0.15069124423963134, + "grad_norm": 0.4287067909796643, + "learning_rate": 1.9965212809164676e-06, + "loss": 0.9545150995254517, + "step": 654 + }, + { + "epoch": 0.15092165898617513, + "grad_norm": 0.4577708396372056, + "learning_rate": 1.99648945790345e-06, + "loss": 0.993801474571228, + "step": 655 + }, + { + "epoch": 0.15115207373271888, + "grad_norm": 0.4032410507172632, + "learning_rate": 1.9964574902513075e-06, + "loss": 0.8666588664054871, + "step": 656 + }, + { + "epoch": 0.15138248847926267, + "grad_norm": 0.48179220104835324, + "learning_rate": 1.9964253779646787e-06, + "loss": 0.9507651925086975, + "step": 657 + }, + { + "epoch": 0.15161290322580645, + "grad_norm": 0.4899406622119438, + "learning_rate": 1.996393121048226e-06, + "loss": 0.8700851202011108, + "step": 658 + }, + { + "epoch": 0.15184331797235023, + "grad_norm": 0.40256613113119405, + "learning_rate": 1.9963607195066307e-06, + "loss": 0.9966975450515747, + "step": 659 + }, + { + "epoch": 0.15207373271889402, + "grad_norm": 0.44964674216674483, + "learning_rate": 1.9963281733445957e-06, + "loss": 0.9552028179168701, + "step": 660 + }, + { + "epoch": 0.1523041474654378, + "grad_norm": 0.47921018457871023, + "learning_rate": 1.9962954825668456e-06, + "loss": 1.0182740688323975, + "step": 661 + }, + { + "epoch": 0.15253456221198156, + "grad_norm": 0.5096203598929419, + "learning_rate": 1.996262647178125e-06, + "loss": 1.0001778602600098, + "step": 662 + }, + { + "epoch": 0.15276497695852534, + "grad_norm": 0.44730944505165277, + "learning_rate": 1.9962296671832e-06, + "loss": 0.9902865886688232, + "step": 663 + }, + { + "epoch": 0.15299539170506912, + "grad_norm": 0.44977913840647327, + "learning_rate": 1.9961965425868575e-06, + "loss": 0.9272845983505249, + "step": 664 + }, + { + "epoch": 0.1532258064516129, + "grad_norm": 0.5011405916103636, + "learning_rate": 1.996163273393906e-06, + "loss": 0.9705777168273926, + "step": 665 + }, + { + "epoch": 0.1534562211981567, + "grad_norm": 0.5035568947424544, + "learning_rate": 1.9961298596091736e-06, + "loss": 0.9472209215164185, + "step": 666 + }, + { + "epoch": 0.15368663594470047, + "grad_norm": 0.3982786140702462, + "learning_rate": 1.9960963012375113e-06, + "loss": 0.9734043478965759, + "step": 667 + }, + { + "epoch": 0.15391705069124423, + "grad_norm": 0.516464356110248, + "learning_rate": 1.9960625982837894e-06, + "loss": 0.8765468001365662, + "step": 668 + }, + { + "epoch": 0.154147465437788, + "grad_norm": 0.6158206412964224, + "learning_rate": 1.9960287507529e-06, + "loss": 1.0224063396453857, + "step": 669 + }, + { + "epoch": 0.1543778801843318, + "grad_norm": 0.4417623345727513, + "learning_rate": 1.995994758649756e-06, + "loss": 0.983299970626831, + "step": 670 + }, + { + "epoch": 0.15460829493087558, + "grad_norm": 0.4680475004359934, + "learning_rate": 1.9959606219792914e-06, + "loss": 1.0524147748947144, + "step": 671 + }, + { + "epoch": 0.15483870967741936, + "grad_norm": 0.45758073401288785, + "learning_rate": 1.9959263407464606e-06, + "loss": 1.1012977361679077, + "step": 672 + }, + { + "epoch": 0.15506912442396314, + "grad_norm": 0.6298296463565526, + "learning_rate": 1.99589191495624e-06, + "loss": 0.8494049310684204, + "step": 673 + }, + { + "epoch": 0.1552995391705069, + "grad_norm": 0.4795860182347848, + "learning_rate": 1.9958573446136263e-06, + "loss": 0.8677045106887817, + "step": 674 + }, + { + "epoch": 0.15552995391705068, + "grad_norm": 0.5514260857685808, + "learning_rate": 1.995822629723638e-06, + "loss": 1.1034941673278809, + "step": 675 + }, + { + "epoch": 0.15576036866359447, + "grad_norm": 0.3959041890885462, + "learning_rate": 1.9957877702913128e-06, + "loss": 0.8428820371627808, + "step": 676 + }, + { + "epoch": 0.15599078341013825, + "grad_norm": 0.5435721054179383, + "learning_rate": 1.9957527663217107e-06, + "loss": 0.8584408760070801, + "step": 677 + }, + { + "epoch": 0.15622119815668203, + "grad_norm": 0.47044010204436115, + "learning_rate": 1.995717617819913e-06, + "loss": 0.8089514970779419, + "step": 678 + }, + { + "epoch": 0.15645161290322582, + "grad_norm": 0.48360065475233177, + "learning_rate": 1.9956823247910217e-06, + "loss": 0.8459775447845459, + "step": 679 + }, + { + "epoch": 0.15668202764976957, + "grad_norm": 0.441023419118176, + "learning_rate": 1.9956468872401583e-06, + "loss": 1.0583066940307617, + "step": 680 + }, + { + "epoch": 0.15691244239631336, + "grad_norm": 0.4427871322496545, + "learning_rate": 1.995611305172468e-06, + "loss": 0.9396135807037354, + "step": 681 + }, + { + "epoch": 0.15714285714285714, + "grad_norm": 0.4888169944824013, + "learning_rate": 1.995575578593114e-06, + "loss": 1.0143593549728394, + "step": 682 + }, + { + "epoch": 0.15737327188940092, + "grad_norm": 0.44801312951365924, + "learning_rate": 1.9955397075072833e-06, + "loss": 0.8822500109672546, + "step": 683 + }, + { + "epoch": 0.1576036866359447, + "grad_norm": 0.4936771776275296, + "learning_rate": 1.995503691920182e-06, + "loss": 0.8841962218284607, + "step": 684 + }, + { + "epoch": 0.1578341013824885, + "grad_norm": 0.4240877666200064, + "learning_rate": 1.9954675318370374e-06, + "loss": 0.8537080883979797, + "step": 685 + }, + { + "epoch": 0.15806451612903225, + "grad_norm": 0.5056113314098377, + "learning_rate": 1.9954312272630985e-06, + "loss": 1.0292394161224365, + "step": 686 + }, + { + "epoch": 0.15829493087557603, + "grad_norm": 0.5106923922410934, + "learning_rate": 1.995394778203635e-06, + "loss": 0.8741706013679504, + "step": 687 + }, + { + "epoch": 0.1585253456221198, + "grad_norm": 0.47911475912836377, + "learning_rate": 1.995358184663937e-06, + "loss": 0.9429572820663452, + "step": 688 + }, + { + "epoch": 0.1587557603686636, + "grad_norm": 0.5562334593954328, + "learning_rate": 1.995321446649316e-06, + "loss": 0.9522494077682495, + "step": 689 + }, + { + "epoch": 0.15898617511520738, + "grad_norm": 0.5394048021515351, + "learning_rate": 1.9952845641651046e-06, + "loss": 0.9743782877922058, + "step": 690 + }, + { + "epoch": 0.15921658986175116, + "grad_norm": 0.4663620909245047, + "learning_rate": 1.995247537216657e-06, + "loss": 0.926364541053772, + "step": 691 + }, + { + "epoch": 0.15944700460829492, + "grad_norm": 0.4595450639525932, + "learning_rate": 1.995210365809346e-06, + "loss": 0.8355565071105957, + "step": 692 + }, + { + "epoch": 0.1596774193548387, + "grad_norm": 0.44548737988500176, + "learning_rate": 1.9951730499485684e-06, + "loss": 0.9200692772865295, + "step": 693 + }, + { + "epoch": 0.15990783410138248, + "grad_norm": 0.36513232613054547, + "learning_rate": 1.99513558963974e-06, + "loss": 0.7571361064910889, + "step": 694 + }, + { + "epoch": 0.16013824884792627, + "grad_norm": 0.48187866859107054, + "learning_rate": 1.995097984888298e-06, + "loss": 0.935307502746582, + "step": 695 + }, + { + "epoch": 0.16036866359447005, + "grad_norm": 0.5833897193983939, + "learning_rate": 1.995060235699701e-06, + "loss": 1.1118557453155518, + "step": 696 + }, + { + "epoch": 0.16059907834101383, + "grad_norm": 0.3866866326578979, + "learning_rate": 1.995022342079428e-06, + "loss": 0.8024749755859375, + "step": 697 + }, + { + "epoch": 0.1608294930875576, + "grad_norm": 0.44217187311148026, + "learning_rate": 1.994984304032979e-06, + "loss": 0.9018943309783936, + "step": 698 + }, + { + "epoch": 0.16105990783410137, + "grad_norm": 0.4729402911259197, + "learning_rate": 1.9949461215658757e-06, + "loss": 0.8571128249168396, + "step": 699 + }, + { + "epoch": 0.16129032258064516, + "grad_norm": 0.4822593475964477, + "learning_rate": 1.99490779468366e-06, + "loss": 0.9707971215248108, + "step": 700 + }, + { + "epoch": 0.16152073732718894, + "grad_norm": 0.4341551988253619, + "learning_rate": 1.994869323391895e-06, + "loss": 0.8157618045806885, + "step": 701 + }, + { + "epoch": 0.16175115207373272, + "grad_norm": 0.4620050649733586, + "learning_rate": 1.994830707696165e-06, + "loss": 0.9009906053543091, + "step": 702 + }, + { + "epoch": 0.1619815668202765, + "grad_norm": 0.5270647594020066, + "learning_rate": 1.9947919476020745e-06, + "loss": 1.0093860626220703, + "step": 703 + }, + { + "epoch": 0.1622119815668203, + "grad_norm": 0.4233068308539462, + "learning_rate": 1.9947530431152494e-06, + "loss": 1.018160343170166, + "step": 704 + }, + { + "epoch": 0.16244239631336405, + "grad_norm": 0.5753809013533212, + "learning_rate": 1.9947139942413378e-06, + "loss": 0.9755370616912842, + "step": 705 + }, + { + "epoch": 0.16267281105990783, + "grad_norm": 0.490686071812002, + "learning_rate": 1.994674800986006e-06, + "loss": 0.9406822919845581, + "step": 706 + }, + { + "epoch": 0.1629032258064516, + "grad_norm": 0.4856505350445516, + "learning_rate": 1.994635463354944e-06, + "loss": 0.9128296971321106, + "step": 707 + }, + { + "epoch": 0.1631336405529954, + "grad_norm": 0.42889971607025285, + "learning_rate": 1.994595981353861e-06, + "loss": 0.929735541343689, + "step": 708 + }, + { + "epoch": 0.16336405529953918, + "grad_norm": 0.5176054911036664, + "learning_rate": 1.994556354988488e-06, + "loss": 0.9021023511886597, + "step": 709 + }, + { + "epoch": 0.16359447004608296, + "grad_norm": 0.46567553841056064, + "learning_rate": 1.994516584264577e-06, + "loss": 0.9187623262405396, + "step": 710 + }, + { + "epoch": 0.16382488479262672, + "grad_norm": 0.4564071002670219, + "learning_rate": 1.9944766691879e-06, + "loss": 0.8283985257148743, + "step": 711 + }, + { + "epoch": 0.1640552995391705, + "grad_norm": 0.5448909609220928, + "learning_rate": 1.994436609764251e-06, + "loss": 1.0592901706695557, + "step": 712 + }, + { + "epoch": 0.16428571428571428, + "grad_norm": 0.5512946720093808, + "learning_rate": 1.9943964059994446e-06, + "loss": 0.98726487159729, + "step": 713 + }, + { + "epoch": 0.16451612903225807, + "grad_norm": 0.5060774432164115, + "learning_rate": 1.9943560578993165e-06, + "loss": 0.8761749267578125, + "step": 714 + }, + { + "epoch": 0.16474654377880185, + "grad_norm": 0.4759569802502017, + "learning_rate": 1.9943155654697227e-06, + "loss": 0.878170371055603, + "step": 715 + }, + { + "epoch": 0.16497695852534563, + "grad_norm": 0.5212205127966931, + "learning_rate": 1.9942749287165414e-06, + "loss": 0.9444767236709595, + "step": 716 + }, + { + "epoch": 0.1652073732718894, + "grad_norm": 0.436107073640643, + "learning_rate": 1.9942341476456697e-06, + "loss": 0.8270057439804077, + "step": 717 + }, + { + "epoch": 0.16543778801843317, + "grad_norm": 0.36828111446023454, + "learning_rate": 1.9941932222630284e-06, + "loss": 0.825955867767334, + "step": 718 + }, + { + "epoch": 0.16566820276497696, + "grad_norm": 0.4748059596727922, + "learning_rate": 1.9941521525745564e-06, + "loss": 0.9384286403656006, + "step": 719 + }, + { + "epoch": 0.16589861751152074, + "grad_norm": 0.5968010950850139, + "learning_rate": 1.994110938586216e-06, + "loss": 0.9627010226249695, + "step": 720 + }, + { + "epoch": 0.16612903225806452, + "grad_norm": 0.40665371786149496, + "learning_rate": 1.9940695803039886e-06, + "loss": 0.8436836004257202, + "step": 721 + }, + { + "epoch": 0.1663594470046083, + "grad_norm": 0.48219849106464674, + "learning_rate": 1.994028077733878e-06, + "loss": 1.0689928531646729, + "step": 722 + }, + { + "epoch": 0.16658986175115206, + "grad_norm": 0.4600242469407339, + "learning_rate": 1.993986430881907e-06, + "loss": 0.911309003829956, + "step": 723 + }, + { + "epoch": 0.16682027649769585, + "grad_norm": 0.5404195969690949, + "learning_rate": 1.993944639754122e-06, + "loss": 0.9897152185440063, + "step": 724 + }, + { + "epoch": 0.16705069124423963, + "grad_norm": 0.48212503869308937, + "learning_rate": 1.9939027043565883e-06, + "loss": 1.0230367183685303, + "step": 725 + }, + { + "epoch": 0.1672811059907834, + "grad_norm": 0.4398728967426152, + "learning_rate": 1.993860624695393e-06, + "loss": 0.8067069053649902, + "step": 726 + }, + { + "epoch": 0.1675115207373272, + "grad_norm": 0.5835576425821721, + "learning_rate": 1.9938184007766434e-06, + "loss": 0.9784343242645264, + "step": 727 + }, + { + "epoch": 0.16774193548387098, + "grad_norm": 0.5139557651921927, + "learning_rate": 1.9937760326064686e-06, + "loss": 0.8617877960205078, + "step": 728 + }, + { + "epoch": 0.16797235023041474, + "grad_norm": 0.5276605551773887, + "learning_rate": 1.9937335201910183e-06, + "loss": 1.0390141010284424, + "step": 729 + }, + { + "epoch": 0.16820276497695852, + "grad_norm": 0.5007165894606777, + "learning_rate": 1.9936908635364633e-06, + "loss": 1.0478965044021606, + "step": 730 + }, + { + "epoch": 0.1684331797235023, + "grad_norm": 0.46789644745982956, + "learning_rate": 1.9936480626489944e-06, + "loss": 0.8396252393722534, + "step": 731 + }, + { + "epoch": 0.16866359447004609, + "grad_norm": 0.4366381763655398, + "learning_rate": 1.9936051175348256e-06, + "loss": 0.8690099120140076, + "step": 732 + }, + { + "epoch": 0.16889400921658987, + "grad_norm": 0.44373038767323764, + "learning_rate": 1.993562028200189e-06, + "loss": 0.944722056388855, + "step": 733 + }, + { + "epoch": 0.16912442396313365, + "grad_norm": 0.4480067961897654, + "learning_rate": 1.9935187946513385e-06, + "loss": 0.7134733200073242, + "step": 734 + }, + { + "epoch": 0.1693548387096774, + "grad_norm": 0.44081731431481436, + "learning_rate": 1.993475416894551e-06, + "loss": 0.8102486729621887, + "step": 735 + }, + { + "epoch": 0.1695852534562212, + "grad_norm": 0.5621249368486638, + "learning_rate": 1.9934318949361215e-06, + "loss": 0.924787163734436, + "step": 736 + }, + { + "epoch": 0.16981566820276497, + "grad_norm": 0.4621168425652111, + "learning_rate": 1.993388228782368e-06, + "loss": 0.9595087766647339, + "step": 737 + }, + { + "epoch": 0.17004608294930876, + "grad_norm": 0.4164356485660062, + "learning_rate": 1.993344418439628e-06, + "loss": 0.9949792623519897, + "step": 738 + }, + { + "epoch": 0.17027649769585254, + "grad_norm": 0.6359964400004778, + "learning_rate": 1.9933004639142604e-06, + "loss": 1.0905860662460327, + "step": 739 + }, + { + "epoch": 0.17050691244239632, + "grad_norm": 0.39800173884382345, + "learning_rate": 1.9932563652126455e-06, + "loss": 0.9638324975967407, + "step": 740 + }, + { + "epoch": 0.17073732718894008, + "grad_norm": 0.4909114039853375, + "learning_rate": 1.9932121223411844e-06, + "loss": 0.9434946179389954, + "step": 741 + }, + { + "epoch": 0.17096774193548386, + "grad_norm": 0.49072837958490606, + "learning_rate": 1.9931677353062983e-06, + "loss": 0.9050095081329346, + "step": 742 + }, + { + "epoch": 0.17119815668202765, + "grad_norm": 0.509303736181324, + "learning_rate": 1.9931232041144303e-06, + "loss": 1.0698316097259521, + "step": 743 + }, + { + "epoch": 0.17142857142857143, + "grad_norm": 0.393391743712663, + "learning_rate": 1.993078528772044e-06, + "loss": 0.7938296794891357, + "step": 744 + }, + { + "epoch": 0.1716589861751152, + "grad_norm": 0.46597408496400117, + "learning_rate": 1.993033709285624e-06, + "loss": 0.8485043048858643, + "step": 745 + }, + { + "epoch": 0.171889400921659, + "grad_norm": 0.4736797887475262, + "learning_rate": 1.9929887456616754e-06, + "loss": 0.8605694770812988, + "step": 746 + }, + { + "epoch": 0.17211981566820275, + "grad_norm": 0.40523028160004354, + "learning_rate": 1.9929436379067253e-06, + "loss": 0.7101563215255737, + "step": 747 + }, + { + "epoch": 0.17235023041474654, + "grad_norm": 0.4519555914654837, + "learning_rate": 1.9928983860273205e-06, + "loss": 1.093912959098816, + "step": 748 + }, + { + "epoch": 0.17258064516129032, + "grad_norm": 0.4930830686705908, + "learning_rate": 1.9928529900300294e-06, + "loss": 0.8099753856658936, + "step": 749 + }, + { + "epoch": 0.1728110599078341, + "grad_norm": 0.3752662958180716, + "learning_rate": 1.992807449921441e-06, + "loss": 0.7816359400749207, + "step": 750 + }, + { + "epoch": 0.17304147465437789, + "grad_norm": 0.5180432792159949, + "learning_rate": 1.9927617657081656e-06, + "loss": 0.8887455463409424, + "step": 751 + }, + { + "epoch": 0.17327188940092167, + "grad_norm": 0.6260862232080928, + "learning_rate": 1.992715937396834e-06, + "loss": 1.0926017761230469, + "step": 752 + }, + { + "epoch": 0.17350230414746542, + "grad_norm": 0.5546410088380269, + "learning_rate": 1.9926699649940985e-06, + "loss": 0.7657707929611206, + "step": 753 + }, + { + "epoch": 0.1737327188940092, + "grad_norm": 0.5766197712214459, + "learning_rate": 1.992623848506632e-06, + "loss": 0.9350340366363525, + "step": 754 + }, + { + "epoch": 0.173963133640553, + "grad_norm": 0.5011774306610247, + "learning_rate": 1.9925775879411276e-06, + "loss": 0.883575439453125, + "step": 755 + }, + { + "epoch": 0.17419354838709677, + "grad_norm": 0.3678933943457833, + "learning_rate": 1.9925311833043e-06, + "loss": 0.814304769039154, + "step": 756 + }, + { + "epoch": 0.17442396313364056, + "grad_norm": 0.5857143887476359, + "learning_rate": 1.992484634602886e-06, + "loss": 0.9263690710067749, + "step": 757 + }, + { + "epoch": 0.17465437788018434, + "grad_norm": 0.49862680540203774, + "learning_rate": 1.9924379418436402e-06, + "loss": 1.0321627855300903, + "step": 758 + }, + { + "epoch": 0.1748847926267281, + "grad_norm": 0.5062063825952041, + "learning_rate": 1.9923911050333413e-06, + "loss": 0.969459056854248, + "step": 759 + }, + { + "epoch": 0.17511520737327188, + "grad_norm": 0.4554436665394103, + "learning_rate": 1.9923441241787874e-06, + "loss": 0.9926396012306213, + "step": 760 + }, + { + "epoch": 0.17534562211981566, + "grad_norm": 0.43315077691547155, + "learning_rate": 1.9922969992867975e-06, + "loss": 0.776180624961853, + "step": 761 + }, + { + "epoch": 0.17557603686635945, + "grad_norm": 0.5350913373105377, + "learning_rate": 1.992249730364212e-06, + "loss": 0.9413800239562988, + "step": 762 + }, + { + "epoch": 0.17580645161290323, + "grad_norm": 0.48045178893419493, + "learning_rate": 1.9922023174178913e-06, + "loss": 0.8365576267242432, + "step": 763 + }, + { + "epoch": 0.17603686635944701, + "grad_norm": 0.47752363664412967, + "learning_rate": 1.992154760454718e-06, + "loss": 1.023102879524231, + "step": 764 + }, + { + "epoch": 0.17626728110599077, + "grad_norm": 0.6035875388891613, + "learning_rate": 1.9921070594815944e-06, + "loss": 1.079930067062378, + "step": 765 + }, + { + "epoch": 0.17649769585253455, + "grad_norm": 0.44885698296531085, + "learning_rate": 1.9920592145054445e-06, + "loss": 0.8974392414093018, + "step": 766 + }, + { + "epoch": 0.17672811059907834, + "grad_norm": 0.5363940338283703, + "learning_rate": 1.9920112255332133e-06, + "loss": 0.9509298205375671, + "step": 767 + }, + { + "epoch": 0.17695852534562212, + "grad_norm": 0.3960858930926947, + "learning_rate": 1.991963092571866e-06, + "loss": 0.938835620880127, + "step": 768 + }, + { + "epoch": 0.1771889400921659, + "grad_norm": 0.3409332869225393, + "learning_rate": 1.9919148156283888e-06, + "loss": 0.7918044328689575, + "step": 769 + }, + { + "epoch": 0.1774193548387097, + "grad_norm": 0.46985590284048473, + "learning_rate": 1.9918663947097893e-06, + "loss": 0.8235958814620972, + "step": 770 + }, + { + "epoch": 0.17764976958525347, + "grad_norm": 0.4734643903674827, + "learning_rate": 1.9918178298230953e-06, + "loss": 0.9079158902168274, + "step": 771 + }, + { + "epoch": 0.17788018433179723, + "grad_norm": 0.5764167010482935, + "learning_rate": 1.9917691209753563e-06, + "loss": 0.8548607230186462, + "step": 772 + }, + { + "epoch": 0.178110599078341, + "grad_norm": 0.47446352682333093, + "learning_rate": 1.9917202681736428e-06, + "loss": 0.8327757120132446, + "step": 773 + }, + { + "epoch": 0.1783410138248848, + "grad_norm": 0.5415533792438672, + "learning_rate": 1.991671271425045e-06, + "loss": 1.0511503219604492, + "step": 774 + }, + { + "epoch": 0.17857142857142858, + "grad_norm": 0.4310425860855909, + "learning_rate": 1.991622130736675e-06, + "loss": 0.9168857932090759, + "step": 775 + }, + { + "epoch": 0.17880184331797236, + "grad_norm": 0.44391822434593214, + "learning_rate": 1.9915728461156654e-06, + "loss": 0.8740782737731934, + "step": 776 + }, + { + "epoch": 0.17903225806451614, + "grad_norm": 0.5841506637592749, + "learning_rate": 1.99152341756917e-06, + "loss": 0.9706588983535767, + "step": 777 + }, + { + "epoch": 0.1792626728110599, + "grad_norm": 0.5492923015057676, + "learning_rate": 1.9914738451043627e-06, + "loss": 1.144281268119812, + "step": 778 + }, + { + "epoch": 0.17949308755760368, + "grad_norm": 0.4170516305027483, + "learning_rate": 1.9914241287284403e-06, + "loss": 0.973777174949646, + "step": 779 + }, + { + "epoch": 0.17972350230414746, + "grad_norm": 0.4502683719091688, + "learning_rate": 1.991374268448617e-06, + "loss": 0.9002145528793335, + "step": 780 + }, + { + "epoch": 0.17995391705069125, + "grad_norm": 0.5526460425242373, + "learning_rate": 1.9913242642721316e-06, + "loss": 0.9234670400619507, + "step": 781 + }, + { + "epoch": 0.18018433179723503, + "grad_norm": 0.4959743401985291, + "learning_rate": 1.9912741162062415e-06, + "loss": 0.9552402496337891, + "step": 782 + }, + { + "epoch": 0.18041474654377881, + "grad_norm": 0.5510111451188886, + "learning_rate": 1.9912238242582257e-06, + "loss": 1.0485708713531494, + "step": 783 + }, + { + "epoch": 0.18064516129032257, + "grad_norm": 0.5447745918227888, + "learning_rate": 1.991173388435384e-06, + "loss": 0.9852809906005859, + "step": 784 + }, + { + "epoch": 0.18087557603686635, + "grad_norm": 0.4726322734582533, + "learning_rate": 1.991122808745037e-06, + "loss": 0.7824808359146118, + "step": 785 + }, + { + "epoch": 0.18110599078341014, + "grad_norm": 0.6534462420793078, + "learning_rate": 1.9910720851945268e-06, + "loss": 1.0380492210388184, + "step": 786 + }, + { + "epoch": 0.18133640552995392, + "grad_norm": 0.48532232647089923, + "learning_rate": 1.991021217791215e-06, + "loss": 0.9808282256126404, + "step": 787 + }, + { + "epoch": 0.1815668202764977, + "grad_norm": 0.4791928008108061, + "learning_rate": 1.9909702065424854e-06, + "loss": 0.8636116981506348, + "step": 788 + }, + { + "epoch": 0.1817972350230415, + "grad_norm": 0.45783287516468024, + "learning_rate": 1.9909190514557427e-06, + "loss": 0.8179407715797424, + "step": 789 + }, + { + "epoch": 0.18202764976958524, + "grad_norm": 0.4760021295113364, + "learning_rate": 1.990867752538411e-06, + "loss": 0.9424594044685364, + "step": 790 + }, + { + "epoch": 0.18225806451612903, + "grad_norm": 0.5558557995369799, + "learning_rate": 1.9908163097979366e-06, + "loss": 0.9429298043251038, + "step": 791 + }, + { + "epoch": 0.1824884792626728, + "grad_norm": 0.5944732273868478, + "learning_rate": 1.990764723241787e-06, + "loss": 0.9671716690063477, + "step": 792 + }, + { + "epoch": 0.1827188940092166, + "grad_norm": 0.6041148299127167, + "learning_rate": 1.9907129928774494e-06, + "loss": 1.0063345432281494, + "step": 793 + }, + { + "epoch": 0.18294930875576038, + "grad_norm": 0.4817475331580677, + "learning_rate": 1.990661118712432e-06, + "loss": 0.9932061433792114, + "step": 794 + }, + { + "epoch": 0.18317972350230416, + "grad_norm": 0.4648544131499562, + "learning_rate": 1.990609100754265e-06, + "loss": 0.859153151512146, + "step": 795 + }, + { + "epoch": 0.18341013824884791, + "grad_norm": 0.4738825500961963, + "learning_rate": 1.9905569390104984e-06, + "loss": 0.9328111410140991, + "step": 796 + }, + { + "epoch": 0.1836405529953917, + "grad_norm": 0.542624486663781, + "learning_rate": 1.9905046334887033e-06, + "loss": 0.9970628619194031, + "step": 797 + }, + { + "epoch": 0.18387096774193548, + "grad_norm": 0.41971271798029636, + "learning_rate": 1.990452184196472e-06, + "loss": 1.0347282886505127, + "step": 798 + }, + { + "epoch": 0.18410138248847926, + "grad_norm": 0.4270967132251902, + "learning_rate": 1.990399591141417e-06, + "loss": 0.9167106747627258, + "step": 799 + }, + { + "epoch": 0.18433179723502305, + "grad_norm": 0.5046236893106074, + "learning_rate": 1.990346854331173e-06, + "loss": 0.8895610570907593, + "step": 800 + }, + { + "epoch": 0.18456221198156683, + "grad_norm": 0.5237845429219861, + "learning_rate": 1.990293973773394e-06, + "loss": 0.8525041341781616, + "step": 801 + }, + { + "epoch": 0.1847926267281106, + "grad_norm": 0.4894836264572075, + "learning_rate": 1.9902409494757553e-06, + "loss": 0.8184069395065308, + "step": 802 + }, + { + "epoch": 0.18502304147465437, + "grad_norm": 0.430895578738413, + "learning_rate": 1.9901877814459544e-06, + "loss": 0.8342509269714355, + "step": 803 + }, + { + "epoch": 0.18525345622119815, + "grad_norm": 0.49779999067704434, + "learning_rate": 1.9901344696917072e-06, + "loss": 0.9254395365715027, + "step": 804 + }, + { + "epoch": 0.18548387096774194, + "grad_norm": 0.5124892914660328, + "learning_rate": 1.990081014220753e-06, + "loss": 0.9537396430969238, + "step": 805 + }, + { + "epoch": 0.18571428571428572, + "grad_norm": 0.47100696643896606, + "learning_rate": 1.99002741504085e-06, + "loss": 0.871498167514801, + "step": 806 + }, + { + "epoch": 0.1859447004608295, + "grad_norm": 0.43363760401100476, + "learning_rate": 1.9899736721597786e-06, + "loss": 0.879954993724823, + "step": 807 + }, + { + "epoch": 0.18617511520737326, + "grad_norm": 0.5651525829110051, + "learning_rate": 1.9899197855853386e-06, + "loss": 0.9238240718841553, + "step": 808 + }, + { + "epoch": 0.18640552995391704, + "grad_norm": 0.43185548411741037, + "learning_rate": 1.9898657553253527e-06, + "loss": 0.7939119935035706, + "step": 809 + }, + { + "epoch": 0.18663594470046083, + "grad_norm": 0.42423118388289394, + "learning_rate": 1.989811581387663e-06, + "loss": 0.8536086082458496, + "step": 810 + }, + { + "epoch": 0.1868663594470046, + "grad_norm": 0.7488569193689159, + "learning_rate": 1.9897572637801322e-06, + "loss": 0.8272225856781006, + "step": 811 + }, + { + "epoch": 0.1870967741935484, + "grad_norm": 0.5639808995976617, + "learning_rate": 1.989702802510645e-06, + "loss": 0.9187904596328735, + "step": 812 + }, + { + "epoch": 0.18732718894009218, + "grad_norm": 0.5096509814307604, + "learning_rate": 1.989648197587106e-06, + "loss": 0.905516505241394, + "step": 813 + }, + { + "epoch": 0.18755760368663593, + "grad_norm": 0.46349746061643887, + "learning_rate": 1.9895934490174415e-06, + "loss": 0.7548567056655884, + "step": 814 + }, + { + "epoch": 0.18778801843317972, + "grad_norm": 0.5916446556749395, + "learning_rate": 1.9895385568095978e-06, + "loss": 0.8242576122283936, + "step": 815 + }, + { + "epoch": 0.1880184331797235, + "grad_norm": 0.47871736963615374, + "learning_rate": 1.9894835209715427e-06, + "loss": 0.9861007928848267, + "step": 816 + }, + { + "epoch": 0.18824884792626728, + "grad_norm": 0.5325996448618295, + "learning_rate": 1.989428341511264e-06, + "loss": 0.9705426096916199, + "step": 817 + }, + { + "epoch": 0.18847926267281107, + "grad_norm": 0.5222036147665577, + "learning_rate": 1.9893730184367722e-06, + "loss": 0.9773565530776978, + "step": 818 + }, + { + "epoch": 0.18870967741935485, + "grad_norm": 0.42837248272258044, + "learning_rate": 1.989317551756096e-06, + "loss": 0.7929856777191162, + "step": 819 + }, + { + "epoch": 0.1889400921658986, + "grad_norm": 0.48925051722314383, + "learning_rate": 1.9892619414772866e-06, + "loss": 0.9749126434326172, + "step": 820 + }, + { + "epoch": 0.1891705069124424, + "grad_norm": 0.49968815355517815, + "learning_rate": 1.9892061876084166e-06, + "loss": 0.9945374727249146, + "step": 821 + }, + { + "epoch": 0.18940092165898617, + "grad_norm": 0.3942389156154952, + "learning_rate": 1.9891502901575776e-06, + "loss": 0.8016892075538635, + "step": 822 + }, + { + "epoch": 0.18963133640552995, + "grad_norm": 0.5604199160430772, + "learning_rate": 1.9890942491328837e-06, + "loss": 0.9389557838439941, + "step": 823 + }, + { + "epoch": 0.18986175115207374, + "grad_norm": 0.38179956879765936, + "learning_rate": 1.9890380645424686e-06, + "loss": 0.724082887172699, + "step": 824 + }, + { + "epoch": 0.19009216589861752, + "grad_norm": 0.5409880819899738, + "learning_rate": 1.988981736394488e-06, + "loss": 0.8877915143966675, + "step": 825 + }, + { + "epoch": 0.19032258064516128, + "grad_norm": 0.6992705135248997, + "learning_rate": 1.9889252646971177e-06, + "loss": 1.207446813583374, + "step": 826 + }, + { + "epoch": 0.19055299539170506, + "grad_norm": 0.5040994233955279, + "learning_rate": 1.9888686494585542e-06, + "loss": 0.9155057668685913, + "step": 827 + }, + { + "epoch": 0.19078341013824884, + "grad_norm": 0.5532998867192596, + "learning_rate": 1.9888118906870154e-06, + "loss": 1.005772352218628, + "step": 828 + }, + { + "epoch": 0.19101382488479263, + "grad_norm": 0.42790166152469256, + "learning_rate": 1.9887549883907394e-06, + "loss": 0.9060605764389038, + "step": 829 + }, + { + "epoch": 0.1912442396313364, + "grad_norm": 0.5177028577691919, + "learning_rate": 1.988697942577986e-06, + "loss": 0.7652161717414856, + "step": 830 + }, + { + "epoch": 0.1914746543778802, + "grad_norm": 0.5981838434161031, + "learning_rate": 1.9886407532570354e-06, + "loss": 1.0191380977630615, + "step": 831 + }, + { + "epoch": 0.19170506912442398, + "grad_norm": 0.4987711114148914, + "learning_rate": 1.9885834204361876e-06, + "loss": 0.9497933387756348, + "step": 832 + }, + { + "epoch": 0.19193548387096773, + "grad_norm": 0.462035144334916, + "learning_rate": 1.9885259441237657e-06, + "loss": 0.7728058099746704, + "step": 833 + }, + { + "epoch": 0.19216589861751152, + "grad_norm": 0.517810203206895, + "learning_rate": 1.9884683243281113e-06, + "loss": 0.8961999416351318, + "step": 834 + }, + { + "epoch": 0.1923963133640553, + "grad_norm": 0.49386963761649333, + "learning_rate": 1.9884105610575885e-06, + "loss": 0.9218904972076416, + "step": 835 + }, + { + "epoch": 0.19262672811059908, + "grad_norm": 0.49785428541631027, + "learning_rate": 1.9883526543205807e-06, + "loss": 0.8411329984664917, + "step": 836 + }, + { + "epoch": 0.19285714285714287, + "grad_norm": 0.42947794662366, + "learning_rate": 1.988294604125494e-06, + "loss": 0.9536285400390625, + "step": 837 + }, + { + "epoch": 0.19308755760368665, + "grad_norm": 0.589338261376726, + "learning_rate": 1.9882364104807535e-06, + "loss": 0.9404321908950806, + "step": 838 + }, + { + "epoch": 0.1933179723502304, + "grad_norm": 0.6889982860652113, + "learning_rate": 1.9881780733948066e-06, + "loss": 1.2520880699157715, + "step": 839 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 0.5071547317768794, + "learning_rate": 1.9881195928761205e-06, + "loss": 0.8961449861526489, + "step": 840 + }, + { + "epoch": 0.19377880184331797, + "grad_norm": 0.5612915327251169, + "learning_rate": 1.9880609689331833e-06, + "loss": 0.8844394683837891, + "step": 841 + }, + { + "epoch": 0.19400921658986175, + "grad_norm": 0.6383643268501873, + "learning_rate": 1.9880022015745044e-06, + "loss": 1.1305835247039795, + "step": 842 + }, + { + "epoch": 0.19423963133640554, + "grad_norm": 0.5396685716999928, + "learning_rate": 1.9879432908086143e-06, + "loss": 0.9980956315994263, + "step": 843 + }, + { + "epoch": 0.19447004608294932, + "grad_norm": 0.46511386172638836, + "learning_rate": 1.987884236644063e-06, + "loss": 0.7613730430603027, + "step": 844 + }, + { + "epoch": 0.19470046082949308, + "grad_norm": 0.6010725617242704, + "learning_rate": 1.987825039089423e-06, + "loss": 0.9742579460144043, + "step": 845 + }, + { + "epoch": 0.19493087557603686, + "grad_norm": 0.4022001131058661, + "learning_rate": 1.9877656981532864e-06, + "loss": 0.7118766903877258, + "step": 846 + }, + { + "epoch": 0.19516129032258064, + "grad_norm": 0.48902949112989696, + "learning_rate": 1.9877062138442657e-06, + "loss": 0.8657095432281494, + "step": 847 + }, + { + "epoch": 0.19539170506912443, + "grad_norm": 0.42720754806325495, + "learning_rate": 1.987646586170996e-06, + "loss": 0.8543902039527893, + "step": 848 + }, + { + "epoch": 0.1956221198156682, + "grad_norm": 0.4842820004763047, + "learning_rate": 1.9875868151421317e-06, + "loss": 0.8896970748901367, + "step": 849 + }, + { + "epoch": 0.195852534562212, + "grad_norm": 0.5225855938017534, + "learning_rate": 1.9875269007663486e-06, + "loss": 0.8662775754928589, + "step": 850 + }, + { + "epoch": 0.19608294930875575, + "grad_norm": 0.48460338230512107, + "learning_rate": 1.9874668430523434e-06, + "loss": 0.8241516351699829, + "step": 851 + }, + { + "epoch": 0.19631336405529953, + "grad_norm": 0.5278134062893883, + "learning_rate": 1.987406642008833e-06, + "loss": 0.973886251449585, + "step": 852 + }, + { + "epoch": 0.19654377880184332, + "grad_norm": 0.48464213201098744, + "learning_rate": 1.9873462976445554e-06, + "loss": 0.8133533000946045, + "step": 853 + }, + { + "epoch": 0.1967741935483871, + "grad_norm": 0.6657370368562822, + "learning_rate": 1.9872858099682697e-06, + "loss": 1.120869755744934, + "step": 854 + }, + { + "epoch": 0.19700460829493088, + "grad_norm": 0.47886128108046017, + "learning_rate": 1.9872251789887562e-06, + "loss": 0.9376444816589355, + "step": 855 + }, + { + "epoch": 0.19723502304147467, + "grad_norm": 0.4627008078705538, + "learning_rate": 1.9871644047148148e-06, + "loss": 0.8763699531555176, + "step": 856 + }, + { + "epoch": 0.19746543778801842, + "grad_norm": 0.5436736732062664, + "learning_rate": 1.9871034871552667e-06, + "loss": 0.7993260622024536, + "step": 857 + }, + { + "epoch": 0.1976958525345622, + "grad_norm": 0.5225344117964711, + "learning_rate": 1.9870424263189542e-06, + "loss": 1.0312654972076416, + "step": 858 + }, + { + "epoch": 0.197926267281106, + "grad_norm": 0.6040828842975151, + "learning_rate": 1.98698122221474e-06, + "loss": 1.0784629583358765, + "step": 859 + }, + { + "epoch": 0.19815668202764977, + "grad_norm": 0.5681257026488339, + "learning_rate": 1.9869198748515085e-06, + "loss": 1.136039137840271, + "step": 860 + }, + { + "epoch": 0.19838709677419356, + "grad_norm": 0.5123381612546825, + "learning_rate": 1.986858384238163e-06, + "loss": 0.834873378276825, + "step": 861 + }, + { + "epoch": 0.19861751152073734, + "grad_norm": 0.5505167057841309, + "learning_rate": 1.98679675038363e-06, + "loss": 0.9705442190170288, + "step": 862 + }, + { + "epoch": 0.1988479262672811, + "grad_norm": 0.6567761197272963, + "learning_rate": 1.9867349732968547e-06, + "loss": 0.9343886375427246, + "step": 863 + }, + { + "epoch": 0.19907834101382488, + "grad_norm": 0.49387008808397015, + "learning_rate": 1.986673052986805e-06, + "loss": 0.9140456914901733, + "step": 864 + }, + { + "epoch": 0.19930875576036866, + "grad_norm": 0.5850607327811402, + "learning_rate": 1.986610989462467e-06, + "loss": 0.9121139049530029, + "step": 865 + }, + { + "epoch": 0.19953917050691244, + "grad_norm": 0.4775789448856378, + "learning_rate": 1.9865487827328505e-06, + "loss": 0.7333672642707825, + "step": 866 + }, + { + "epoch": 0.19976958525345623, + "grad_norm": 0.5039450613377916, + "learning_rate": 1.986486432806984e-06, + "loss": 0.8405989408493042, + "step": 867 + }, + { + "epoch": 0.2, + "grad_norm": 0.47371690470710304, + "learning_rate": 1.9864239396939176e-06, + "loss": 0.8693375587463379, + "step": 868 + }, + { + "epoch": 0.20023041474654377, + "grad_norm": 0.5727654616233698, + "learning_rate": 1.9863613034027223e-06, + "loss": 1.0137104988098145, + "step": 869 + }, + { + "epoch": 0.20046082949308755, + "grad_norm": 0.5382771457657299, + "learning_rate": 1.9862985239424895e-06, + "loss": 1.0283832550048828, + "step": 870 + }, + { + "epoch": 0.20069124423963133, + "grad_norm": 0.6200501422886965, + "learning_rate": 1.9862356013223316e-06, + "loss": 1.117444634437561, + "step": 871 + }, + { + "epoch": 0.20092165898617512, + "grad_norm": 0.6309070895129882, + "learning_rate": 1.986172535551382e-06, + "loss": 0.8861427307128906, + "step": 872 + }, + { + "epoch": 0.2011520737327189, + "grad_norm": 0.5017852774763055, + "learning_rate": 1.9861093266387946e-06, + "loss": 1.0273747444152832, + "step": 873 + }, + { + "epoch": 0.20138248847926268, + "grad_norm": 0.5141875246573869, + "learning_rate": 1.9860459745937437e-06, + "loss": 0.918023943901062, + "step": 874 + }, + { + "epoch": 0.20161290322580644, + "grad_norm": 0.5278755996885149, + "learning_rate": 1.9859824794254246e-06, + "loss": 0.8983356952667236, + "step": 875 + }, + { + "epoch": 0.20184331797235022, + "grad_norm": 0.5803540160351622, + "learning_rate": 1.985918841143054e-06, + "loss": 1.0180974006652832, + "step": 876 + }, + { + "epoch": 0.202073732718894, + "grad_norm": 0.48253787858386377, + "learning_rate": 1.985855059755869e-06, + "loss": 0.9656573534011841, + "step": 877 + }, + { + "epoch": 0.2023041474654378, + "grad_norm": 0.5015537059540116, + "learning_rate": 1.9857911352731273e-06, + "loss": 0.8522181510925293, + "step": 878 + }, + { + "epoch": 0.20253456221198157, + "grad_norm": 0.4883752495192941, + "learning_rate": 1.985727067704107e-06, + "loss": 0.9180892705917358, + "step": 879 + }, + { + "epoch": 0.20276497695852536, + "grad_norm": 0.5817140345419661, + "learning_rate": 1.985662857058108e-06, + "loss": 0.9979432821273804, + "step": 880 + }, + { + "epoch": 0.2029953917050691, + "grad_norm": 0.5608420179715049, + "learning_rate": 1.98559850334445e-06, + "loss": 0.8916480541229248, + "step": 881 + }, + { + "epoch": 0.2032258064516129, + "grad_norm": 0.41973060059994494, + "learning_rate": 1.9855340065724738e-06, + "loss": 0.8755770921707153, + "step": 882 + }, + { + "epoch": 0.20345622119815668, + "grad_norm": 0.5596516763963291, + "learning_rate": 1.9854693667515418e-06, + "loss": 1.0200350284576416, + "step": 883 + }, + { + "epoch": 0.20368663594470046, + "grad_norm": 0.5199867730002389, + "learning_rate": 1.9854045838910353e-06, + "loss": 0.928024172782898, + "step": 884 + }, + { + "epoch": 0.20391705069124424, + "grad_norm": 0.5756725941645391, + "learning_rate": 1.9853396580003582e-06, + "loss": 0.8617212176322937, + "step": 885 + }, + { + "epoch": 0.20414746543778803, + "grad_norm": 0.5415263717139983, + "learning_rate": 1.985274589088934e-06, + "loss": 0.9383209943771362, + "step": 886 + }, + { + "epoch": 0.20437788018433178, + "grad_norm": 0.48094986017269503, + "learning_rate": 1.985209377166208e-06, + "loss": 0.7217687368392944, + "step": 887 + }, + { + "epoch": 0.20460829493087557, + "grad_norm": 0.612593081169746, + "learning_rate": 1.9851440222416446e-06, + "loss": 1.0717028379440308, + "step": 888 + }, + { + "epoch": 0.20483870967741935, + "grad_norm": 0.6063882651782059, + "learning_rate": 1.9850785243247303e-06, + "loss": 1.0137064456939697, + "step": 889 + }, + { + "epoch": 0.20506912442396313, + "grad_norm": 0.5244411173844509, + "learning_rate": 1.985012883424973e-06, + "loss": 0.8569058179855347, + "step": 890 + }, + { + "epoch": 0.20529953917050692, + "grad_norm": 0.6524290996376207, + "learning_rate": 1.9849470995518993e-06, + "loss": 0.9398901462554932, + "step": 891 + }, + { + "epoch": 0.2055299539170507, + "grad_norm": 0.3752296846015947, + "learning_rate": 1.9848811727150577e-06, + "loss": 0.731800377368927, + "step": 892 + }, + { + "epoch": 0.20576036866359446, + "grad_norm": 0.5142990565199794, + "learning_rate": 1.984815102924018e-06, + "loss": 0.8543055653572083, + "step": 893 + }, + { + "epoch": 0.20599078341013824, + "grad_norm": 0.5278314343821748, + "learning_rate": 1.98474889018837e-06, + "loss": 0.9112114906311035, + "step": 894 + }, + { + "epoch": 0.20622119815668202, + "grad_norm": 0.50708997202126, + "learning_rate": 1.984682534517724e-06, + "loss": 0.8272690773010254, + "step": 895 + }, + { + "epoch": 0.2064516129032258, + "grad_norm": 0.5912295968473946, + "learning_rate": 1.984616035921712e-06, + "loss": 0.9680918455123901, + "step": 896 + }, + { + "epoch": 0.2066820276497696, + "grad_norm": 0.6089139321115737, + "learning_rate": 1.984549394409985e-06, + "loss": 0.815123438835144, + "step": 897 + }, + { + "epoch": 0.20691244239631337, + "grad_norm": 0.4952276433479721, + "learning_rate": 1.984482609992218e-06, + "loss": 0.8035521507263184, + "step": 898 + }, + { + "epoch": 0.20714285714285716, + "grad_norm": 0.548354244530079, + "learning_rate": 1.9844156826781027e-06, + "loss": 0.9000132083892822, + "step": 899 + }, + { + "epoch": 0.2073732718894009, + "grad_norm": 0.6652515011666116, + "learning_rate": 1.9843486124773543e-06, + "loss": 1.06328547000885, + "step": 900 + }, + { + "epoch": 0.2076036866359447, + "grad_norm": 0.4596762245312169, + "learning_rate": 1.9842813993997083e-06, + "loss": 0.9028425216674805, + "step": 901 + }, + { + "epoch": 0.20783410138248848, + "grad_norm": 0.5779573613376965, + "learning_rate": 1.9842140434549196e-06, + "loss": 0.7786350250244141, + "step": 902 + }, + { + "epoch": 0.20806451612903226, + "grad_norm": 0.5102795361356062, + "learning_rate": 1.9841465446527656e-06, + "loss": 0.8041539788246155, + "step": 903 + }, + { + "epoch": 0.20829493087557605, + "grad_norm": 0.4348300351835264, + "learning_rate": 1.9840789030030434e-06, + "loss": 0.8380184173583984, + "step": 904 + }, + { + "epoch": 0.20852534562211983, + "grad_norm": 0.7151525379978475, + "learning_rate": 1.984011118515572e-06, + "loss": 0.8191432952880859, + "step": 905 + }, + { + "epoch": 0.20875576036866358, + "grad_norm": 0.5006646807997585, + "learning_rate": 1.9839431912001885e-06, + "loss": 0.8236384391784668, + "step": 906 + }, + { + "epoch": 0.20898617511520737, + "grad_norm": 0.4959155947407375, + "learning_rate": 1.9838751210667534e-06, + "loss": 0.8218076825141907, + "step": 907 + }, + { + "epoch": 0.20921658986175115, + "grad_norm": 0.5127899266702147, + "learning_rate": 1.983806908125147e-06, + "loss": 0.9140353202819824, + "step": 908 + }, + { + "epoch": 0.20944700460829493, + "grad_norm": 0.5063732794644019, + "learning_rate": 1.9837385523852706e-06, + "loss": 0.9179826974868774, + "step": 909 + }, + { + "epoch": 0.20967741935483872, + "grad_norm": 0.5385574519868781, + "learning_rate": 1.9836700538570456e-06, + "loss": 0.8888909816741943, + "step": 910 + }, + { + "epoch": 0.2099078341013825, + "grad_norm": 0.5208969379705799, + "learning_rate": 1.9836014125504143e-06, + "loss": 0.8951253890991211, + "step": 911 + }, + { + "epoch": 0.21013824884792626, + "grad_norm": 0.6093988535410455, + "learning_rate": 1.98353262847534e-06, + "loss": 1.084958553314209, + "step": 912 + }, + { + "epoch": 0.21036866359447004, + "grad_norm": 0.5061127496745415, + "learning_rate": 1.983463701641807e-06, + "loss": 0.8590713739395142, + "step": 913 + }, + { + "epoch": 0.21059907834101382, + "grad_norm": 0.6396228440899432, + "learning_rate": 1.9833946320598195e-06, + "loss": 1.0393706560134888, + "step": 914 + }, + { + "epoch": 0.2108294930875576, + "grad_norm": 0.49567487165870866, + "learning_rate": 1.983325419739403e-06, + "loss": 0.9403085708618164, + "step": 915 + }, + { + "epoch": 0.2110599078341014, + "grad_norm": 0.49912224081019996, + "learning_rate": 1.9832560646906038e-06, + "loss": 0.8431342244148254, + "step": 916 + }, + { + "epoch": 0.21129032258064517, + "grad_norm": 0.5558843704958377, + "learning_rate": 1.9831865669234884e-06, + "loss": 0.9024044871330261, + "step": 917 + }, + { + "epoch": 0.21152073732718893, + "grad_norm": 0.44775113902692637, + "learning_rate": 1.9831169264481443e-06, + "loss": 0.747347354888916, + "step": 918 + }, + { + "epoch": 0.2117511520737327, + "grad_norm": 0.46715914917156914, + "learning_rate": 1.9830471432746796e-06, + "loss": 0.8266197443008423, + "step": 919 + }, + { + "epoch": 0.2119815668202765, + "grad_norm": 0.5566270603086758, + "learning_rate": 1.9829772174132235e-06, + "loss": 0.8633416295051575, + "step": 920 + }, + { + "epoch": 0.21221198156682028, + "grad_norm": 0.5228096908540074, + "learning_rate": 1.9829071488739256e-06, + "loss": 1.0290095806121826, + "step": 921 + }, + { + "epoch": 0.21244239631336406, + "grad_norm": 0.667274912811163, + "learning_rate": 1.9828369376669566e-06, + "loss": 0.8193448781967163, + "step": 922 + }, + { + "epoch": 0.21267281105990785, + "grad_norm": 0.5677549533509479, + "learning_rate": 1.982766583802507e-06, + "loss": 0.8828415870666504, + "step": 923 + }, + { + "epoch": 0.2129032258064516, + "grad_norm": 0.597806988660978, + "learning_rate": 1.9826960872907885e-06, + "loss": 0.8806191682815552, + "step": 924 + }, + { + "epoch": 0.21313364055299538, + "grad_norm": 0.40902701240404726, + "learning_rate": 1.982625448142034e-06, + "loss": 0.8441533446311951, + "step": 925 + }, + { + "epoch": 0.21336405529953917, + "grad_norm": 0.5142754504345473, + "learning_rate": 1.9825546663664963e-06, + "loss": 0.9084080457687378, + "step": 926 + }, + { + "epoch": 0.21359447004608295, + "grad_norm": 0.7318607240255686, + "learning_rate": 1.98248374197445e-06, + "loss": 0.9005601406097412, + "step": 927 + }, + { + "epoch": 0.21382488479262673, + "grad_norm": 0.48930991442842664, + "learning_rate": 1.9824126749761893e-06, + "loss": 1.0415414571762085, + "step": 928 + }, + { + "epoch": 0.21405529953917052, + "grad_norm": 0.4380456409582823, + "learning_rate": 1.982341465382029e-06, + "loss": 0.8130594491958618, + "step": 929 + }, + { + "epoch": 0.21428571428571427, + "grad_norm": 0.4623167832467728, + "learning_rate": 1.9822701132023053e-06, + "loss": 0.9178205728530884, + "step": 930 + }, + { + "epoch": 0.21451612903225806, + "grad_norm": 0.5894382821211327, + "learning_rate": 1.9821986184473754e-06, + "loss": 0.9927947521209717, + "step": 931 + }, + { + "epoch": 0.21474654377880184, + "grad_norm": 0.5621440238225328, + "learning_rate": 1.982126981127616e-06, + "loss": 0.9172670841217041, + "step": 932 + }, + { + "epoch": 0.21497695852534562, + "grad_norm": 0.5805773191302366, + "learning_rate": 1.9820552012534255e-06, + "loss": 0.9513058066368103, + "step": 933 + }, + { + "epoch": 0.2152073732718894, + "grad_norm": 0.6596090379041671, + "learning_rate": 1.9819832788352227e-06, + "loss": 1.014827013015747, + "step": 934 + }, + { + "epoch": 0.2154377880184332, + "grad_norm": 0.5483468550441934, + "learning_rate": 1.9819112138834473e-06, + "loss": 1.0225746631622314, + "step": 935 + }, + { + "epoch": 0.21566820276497695, + "grad_norm": 0.46659867801168237, + "learning_rate": 1.9818390064085584e-06, + "loss": 0.8804227113723755, + "step": 936 + }, + { + "epoch": 0.21589861751152073, + "grad_norm": 0.42738644934381204, + "learning_rate": 1.9817666564210376e-06, + "loss": 0.7215760350227356, + "step": 937 + }, + { + "epoch": 0.2161290322580645, + "grad_norm": 0.6620668522422565, + "learning_rate": 1.981694163931387e-06, + "loss": 0.9978986978530884, + "step": 938 + }, + { + "epoch": 0.2163594470046083, + "grad_norm": 0.5846107454293807, + "learning_rate": 1.981621528950128e-06, + "loss": 0.8646233081817627, + "step": 939 + }, + { + "epoch": 0.21658986175115208, + "grad_norm": 0.44150430663795637, + "learning_rate": 1.981548751487803e-06, + "loss": 0.9619132876396179, + "step": 940 + }, + { + "epoch": 0.21682027649769586, + "grad_norm": 0.543839377462045, + "learning_rate": 1.981475831554976e-06, + "loss": 0.9209504127502441, + "step": 941 + }, + { + "epoch": 0.21705069124423962, + "grad_norm": 0.563351483363654, + "learning_rate": 1.9814027691622318e-06, + "loss": 0.7629299163818359, + "step": 942 + }, + { + "epoch": 0.2172811059907834, + "grad_norm": 0.4885334834965844, + "learning_rate": 1.9813295643201747e-06, + "loss": 0.8702583312988281, + "step": 943 + }, + { + "epoch": 0.21751152073732719, + "grad_norm": 0.5579102568918498, + "learning_rate": 1.9812562170394305e-06, + "loss": 0.9571657180786133, + "step": 944 + }, + { + "epoch": 0.21774193548387097, + "grad_norm": 0.43227127189367615, + "learning_rate": 1.9811827273306456e-06, + "loss": 0.7271617650985718, + "step": 945 + }, + { + "epoch": 0.21797235023041475, + "grad_norm": 0.46137899963900864, + "learning_rate": 1.9811090952044865e-06, + "loss": 0.8189597725868225, + "step": 946 + }, + { + "epoch": 0.21820276497695854, + "grad_norm": 0.49142212284435566, + "learning_rate": 1.981035320671641e-06, + "loss": 0.7933987379074097, + "step": 947 + }, + { + "epoch": 0.2184331797235023, + "grad_norm": 0.48207328184354004, + "learning_rate": 1.9809614037428174e-06, + "loss": 0.9687645435333252, + "step": 948 + }, + { + "epoch": 0.21866359447004607, + "grad_norm": 0.5647695490676888, + "learning_rate": 1.980887344428745e-06, + "loss": 0.8293745517730713, + "step": 949 + }, + { + "epoch": 0.21889400921658986, + "grad_norm": 0.6489579503887147, + "learning_rate": 1.9808131427401727e-06, + "loss": 1.0447471141815186, + "step": 950 + }, + { + "epoch": 0.21912442396313364, + "grad_norm": 0.48010625791746325, + "learning_rate": 1.9807387986878715e-06, + "loss": 0.8916672468185425, + "step": 951 + }, + { + "epoch": 0.21935483870967742, + "grad_norm": 0.5436399520986829, + "learning_rate": 1.980664312282632e-06, + "loss": 0.8380981683731079, + "step": 952 + }, + { + "epoch": 0.2195852534562212, + "grad_norm": 0.4634469099281989, + "learning_rate": 1.9805896835352656e-06, + "loss": 0.887790322303772, + "step": 953 + }, + { + "epoch": 0.21981566820276496, + "grad_norm": 0.5184548533508342, + "learning_rate": 1.9805149124566048e-06, + "loss": 0.8353140950202942, + "step": 954 + }, + { + "epoch": 0.22004608294930875, + "grad_norm": 0.7177333773715296, + "learning_rate": 1.9804399990575026e-06, + "loss": 1.0337531566619873, + "step": 955 + }, + { + "epoch": 0.22027649769585253, + "grad_norm": 0.4262367777660272, + "learning_rate": 1.9803649433488324e-06, + "loss": 0.8845529556274414, + "step": 956 + }, + { + "epoch": 0.2205069124423963, + "grad_norm": 0.4271901286679727, + "learning_rate": 1.9802897453414884e-06, + "loss": 0.7408445477485657, + "step": 957 + }, + { + "epoch": 0.2207373271889401, + "grad_norm": 0.5478873632644168, + "learning_rate": 1.980214405046386e-06, + "loss": 0.873178243637085, + "step": 958 + }, + { + "epoch": 0.22096774193548388, + "grad_norm": 0.556535747180833, + "learning_rate": 1.98013892247446e-06, + "loss": 1.0207639932632446, + "step": 959 + }, + { + "epoch": 0.22119815668202766, + "grad_norm": 0.5890989419509002, + "learning_rate": 1.980063297636667e-06, + "loss": 0.8626997470855713, + "step": 960 + }, + { + "epoch": 0.22142857142857142, + "grad_norm": 0.5912616927968722, + "learning_rate": 1.9799875305439836e-06, + "loss": 0.8961347341537476, + "step": 961 + }, + { + "epoch": 0.2216589861751152, + "grad_norm": 0.495639914718092, + "learning_rate": 1.9799116212074075e-06, + "loss": 0.8115944862365723, + "step": 962 + }, + { + "epoch": 0.22188940092165899, + "grad_norm": 0.5281413221179645, + "learning_rate": 1.979835569637957e-06, + "loss": 0.8274029493331909, + "step": 963 + }, + { + "epoch": 0.22211981566820277, + "grad_norm": 0.5782364794204825, + "learning_rate": 1.9797593758466706e-06, + "loss": 1.020345687866211, + "step": 964 + }, + { + "epoch": 0.22235023041474655, + "grad_norm": 0.586333023609623, + "learning_rate": 1.979683039844608e-06, + "loss": 0.8164723515510559, + "step": 965 + }, + { + "epoch": 0.22258064516129034, + "grad_norm": 0.48956655235723145, + "learning_rate": 1.979606561642849e-06, + "loss": 0.832849383354187, + "step": 966 + }, + { + "epoch": 0.2228110599078341, + "grad_norm": 0.5810232623043905, + "learning_rate": 1.9795299412524945e-06, + "loss": 0.9765876531600952, + "step": 967 + }, + { + "epoch": 0.22304147465437787, + "grad_norm": 0.5610292572060406, + "learning_rate": 1.9794531786846657e-06, + "loss": 0.9280411005020142, + "step": 968 + }, + { + "epoch": 0.22327188940092166, + "grad_norm": 0.6528516733941818, + "learning_rate": 1.9793762739505042e-06, + "loss": 1.122058629989624, + "step": 969 + }, + { + "epoch": 0.22350230414746544, + "grad_norm": 0.4582570301724996, + "learning_rate": 1.9792992270611737e-06, + "loss": 0.824627161026001, + "step": 970 + }, + { + "epoch": 0.22373271889400922, + "grad_norm": 0.750391550156154, + "learning_rate": 1.9792220380278565e-06, + "loss": 1.0583840608596802, + "step": 971 + }, + { + "epoch": 0.223963133640553, + "grad_norm": 0.5277817422831291, + "learning_rate": 1.979144706861757e-06, + "loss": 1.053803563117981, + "step": 972 + }, + { + "epoch": 0.22419354838709676, + "grad_norm": 0.5197675200798639, + "learning_rate": 1.9790672335740993e-06, + "loss": 0.8572183847427368, + "step": 973 + }, + { + "epoch": 0.22442396313364055, + "grad_norm": 0.5956201422774761, + "learning_rate": 1.978989618176129e-06, + "loss": 0.7955416440963745, + "step": 974 + }, + { + "epoch": 0.22465437788018433, + "grad_norm": 0.6931203377433601, + "learning_rate": 1.9789118606791113e-06, + "loss": 0.9455063343048096, + "step": 975 + }, + { + "epoch": 0.2248847926267281, + "grad_norm": 0.5553738972507489, + "learning_rate": 1.978833961094333e-06, + "loss": 0.788895845413208, + "step": 976 + }, + { + "epoch": 0.2251152073732719, + "grad_norm": 0.4854852275390097, + "learning_rate": 1.9787559194331014e-06, + "loss": 0.8344719409942627, + "step": 977 + }, + { + "epoch": 0.22534562211981568, + "grad_norm": 0.5098723288351352, + "learning_rate": 1.9786777357067436e-06, + "loss": 0.85140061378479, + "step": 978 + }, + { + "epoch": 0.22557603686635944, + "grad_norm": 0.43945689098482754, + "learning_rate": 1.978599409926608e-06, + "loss": 0.8511399030685425, + "step": 979 + }, + { + "epoch": 0.22580645161290322, + "grad_norm": 0.4893125980217, + "learning_rate": 1.9785209421040636e-06, + "loss": 0.9243351221084595, + "step": 980 + }, + { + "epoch": 0.226036866359447, + "grad_norm": 0.5349074342918002, + "learning_rate": 1.9784423322504996e-06, + "loss": 0.9043580293655396, + "step": 981 + }, + { + "epoch": 0.2262672811059908, + "grad_norm": 0.654146848198394, + "learning_rate": 1.978363580377327e-06, + "loss": 0.854049563407898, + "step": 982 + }, + { + "epoch": 0.22649769585253457, + "grad_norm": 0.43507484708504635, + "learning_rate": 1.9782846864959754e-06, + "loss": 0.7785296440124512, + "step": 983 + }, + { + "epoch": 0.22672811059907835, + "grad_norm": 0.5830354059161934, + "learning_rate": 1.9782056506178965e-06, + "loss": 0.8464720845222473, + "step": 984 + }, + { + "epoch": 0.2269585253456221, + "grad_norm": 0.5249975809892665, + "learning_rate": 1.9781264727545624e-06, + "loss": 0.8519179821014404, + "step": 985 + }, + { + "epoch": 0.2271889400921659, + "grad_norm": 0.6176158235785483, + "learning_rate": 1.978047152917466e-06, + "loss": 0.956415057182312, + "step": 986 + }, + { + "epoch": 0.22741935483870968, + "grad_norm": 0.5046722242039021, + "learning_rate": 1.97796769111812e-06, + "loss": 1.028620719909668, + "step": 987 + }, + { + "epoch": 0.22764976958525346, + "grad_norm": 0.4889451789926323, + "learning_rate": 1.9778880873680585e-06, + "loss": 0.8707184195518494, + "step": 988 + }, + { + "epoch": 0.22788018433179724, + "grad_norm": 0.5212071576326044, + "learning_rate": 1.9778083416788355e-06, + "loss": 0.9842795729637146, + "step": 989 + }, + { + "epoch": 0.22811059907834103, + "grad_norm": 0.5963522406410062, + "learning_rate": 1.977728454062026e-06, + "loss": 0.8827522993087769, + "step": 990 + }, + { + "epoch": 0.22834101382488478, + "grad_norm": 0.5285989804764033, + "learning_rate": 1.9776484245292256e-06, + "loss": 0.8608568906784058, + "step": 991 + }, + { + "epoch": 0.22857142857142856, + "grad_norm": 0.7428648265675979, + "learning_rate": 1.977568253092051e-06, + "loss": 0.8512595891952515, + "step": 992 + }, + { + "epoch": 0.22880184331797235, + "grad_norm": 0.520235896024025, + "learning_rate": 1.9774879397621383e-06, + "loss": 0.7335344552993774, + "step": 993 + }, + { + "epoch": 0.22903225806451613, + "grad_norm": 0.6711607827981731, + "learning_rate": 1.9774074845511457e-06, + "loss": 1.0301114320755005, + "step": 994 + }, + { + "epoch": 0.22926267281105991, + "grad_norm": 0.515409965463074, + "learning_rate": 1.97732688747075e-06, + "loss": 0.9011565446853638, + "step": 995 + }, + { + "epoch": 0.2294930875576037, + "grad_norm": 0.5657170632178228, + "learning_rate": 1.9772461485326507e-06, + "loss": 0.8644282221794128, + "step": 996 + }, + { + "epoch": 0.22972350230414745, + "grad_norm": 0.49795498598042737, + "learning_rate": 1.9771652677485664e-06, + "loss": 0.8107467889785767, + "step": 997 + }, + { + "epoch": 0.22995391705069124, + "grad_norm": 0.5832229133316258, + "learning_rate": 1.9770842451302373e-06, + "loss": 1.0090508460998535, + "step": 998 + }, + { + "epoch": 0.23018433179723502, + "grad_norm": 0.4910768822506593, + "learning_rate": 1.977003080689424e-06, + "loss": 0.8153292536735535, + "step": 999 + }, + { + "epoch": 0.2304147465437788, + "grad_norm": 0.6502643477323704, + "learning_rate": 1.976921774437906e-06, + "loss": 0.8446916341781616, + "step": 1000 + }, + { + "epoch": 0.2306451612903226, + "grad_norm": 0.5179047651030808, + "learning_rate": 1.9768403263874865e-06, + "loss": 0.759350597858429, + "step": 1001 + }, + { + "epoch": 0.23087557603686637, + "grad_norm": 0.5414654559095757, + "learning_rate": 1.9767587365499862e-06, + "loss": 0.9181695580482483, + "step": 1002 + }, + { + "epoch": 0.23110599078341013, + "grad_norm": 0.4755050115257823, + "learning_rate": 1.976677004937249e-06, + "loss": 0.8450978994369507, + "step": 1003 + }, + { + "epoch": 0.2313364055299539, + "grad_norm": 0.5616575268963485, + "learning_rate": 1.9765951315611365e-06, + "loss": 0.775252640247345, + "step": 1004 + }, + { + "epoch": 0.2315668202764977, + "grad_norm": 0.5248180263396327, + "learning_rate": 1.976513116433534e-06, + "loss": 0.8682440519332886, + "step": 1005 + }, + { + "epoch": 0.23179723502304148, + "grad_norm": 0.6093284414229693, + "learning_rate": 1.9764309595663457e-06, + "loss": 1.0701451301574707, + "step": 1006 + }, + { + "epoch": 0.23202764976958526, + "grad_norm": 0.5747684398408948, + "learning_rate": 1.976348660971496e-06, + "loss": 0.9381946921348572, + "step": 1007 + }, + { + "epoch": 0.23225806451612904, + "grad_norm": 0.5225356801303237, + "learning_rate": 1.976266220660931e-06, + "loss": 0.7836539149284363, + "step": 1008 + }, + { + "epoch": 0.2324884792626728, + "grad_norm": 0.5379097818020191, + "learning_rate": 1.9761836386466156e-06, + "loss": 0.9271948337554932, + "step": 1009 + }, + { + "epoch": 0.23271889400921658, + "grad_norm": 0.514797473753123, + "learning_rate": 1.976100914940538e-06, + "loss": 0.8268035650253296, + "step": 1010 + }, + { + "epoch": 0.23294930875576036, + "grad_norm": 0.5105764513310544, + "learning_rate": 1.976018049554705e-06, + "loss": 0.8266786336898804, + "step": 1011 + }, + { + "epoch": 0.23317972350230415, + "grad_norm": 0.6250953922330988, + "learning_rate": 1.9759350425011435e-06, + "loss": 0.9437457323074341, + "step": 1012 + }, + { + "epoch": 0.23341013824884793, + "grad_norm": 0.5629533372281755, + "learning_rate": 1.9758518937919033e-06, + "loss": 0.9078803062438965, + "step": 1013 + }, + { + "epoch": 0.23364055299539171, + "grad_norm": 0.5994095472581402, + "learning_rate": 1.975768603439052e-06, + "loss": 0.9873687624931335, + "step": 1014 + }, + { + "epoch": 0.23387096774193547, + "grad_norm": 0.5010269853722422, + "learning_rate": 1.97568517145468e-06, + "loss": 0.9450196027755737, + "step": 1015 + }, + { + "epoch": 0.23410138248847925, + "grad_norm": 0.5173338079683222, + "learning_rate": 1.975601597850897e-06, + "loss": 0.8804495334625244, + "step": 1016 + }, + { + "epoch": 0.23433179723502304, + "grad_norm": 0.5286639294307074, + "learning_rate": 1.9755178826398333e-06, + "loss": 0.9646104574203491, + "step": 1017 + }, + { + "epoch": 0.23456221198156682, + "grad_norm": 0.5917923655178416, + "learning_rate": 1.9754340258336403e-06, + "loss": 0.9829385280609131, + "step": 1018 + }, + { + "epoch": 0.2347926267281106, + "grad_norm": 0.5022802882731887, + "learning_rate": 1.97535002744449e-06, + "loss": 0.8433707356452942, + "step": 1019 + }, + { + "epoch": 0.2350230414746544, + "grad_norm": 0.5984717862988072, + "learning_rate": 1.9752658874845744e-06, + "loss": 0.9892767071723938, + "step": 1020 + }, + { + "epoch": 0.23525345622119814, + "grad_norm": 0.5038568694461213, + "learning_rate": 1.9751816059661065e-06, + "loss": 0.8367536664009094, + "step": 1021 + }, + { + "epoch": 0.23548387096774193, + "grad_norm": 0.6009503951092086, + "learning_rate": 1.9750971829013194e-06, + "loss": 0.8947298526763916, + "step": 1022 + }, + { + "epoch": 0.2357142857142857, + "grad_norm": 0.4955473883987944, + "learning_rate": 1.975012618302467e-06, + "loss": 0.9218910336494446, + "step": 1023 + }, + { + "epoch": 0.2359447004608295, + "grad_norm": 0.46527028147066757, + "learning_rate": 1.9749279121818236e-06, + "loss": 0.8744943141937256, + "step": 1024 + }, + { + "epoch": 0.23617511520737328, + "grad_norm": 0.5457797851350515, + "learning_rate": 1.9748430645516845e-06, + "loss": 0.9023007154464722, + "step": 1025 + }, + { + "epoch": 0.23640552995391706, + "grad_norm": 0.5361296427556177, + "learning_rate": 1.974758075424365e-06, + "loss": 0.8475106954574585, + "step": 1026 + }, + { + "epoch": 0.23663594470046084, + "grad_norm": 0.5535275060374267, + "learning_rate": 1.9746729448122013e-06, + "loss": 0.8594635725021362, + "step": 1027 + }, + { + "epoch": 0.2368663594470046, + "grad_norm": 0.6574105474773485, + "learning_rate": 1.97458767272755e-06, + "loss": 0.9601756930351257, + "step": 1028 + }, + { + "epoch": 0.23709677419354838, + "grad_norm": 0.5454698959338334, + "learning_rate": 1.9745022591827886e-06, + "loss": 0.9281105399131775, + "step": 1029 + }, + { + "epoch": 0.23732718894009217, + "grad_norm": 0.4631930883062957, + "learning_rate": 1.9744167041903136e-06, + "loss": 0.8240020275115967, + "step": 1030 + }, + { + "epoch": 0.23755760368663595, + "grad_norm": 0.5116113956014486, + "learning_rate": 1.9743310077625446e-06, + "loss": 0.807030200958252, + "step": 1031 + }, + { + "epoch": 0.23778801843317973, + "grad_norm": 0.5399356518827937, + "learning_rate": 1.9742451699119194e-06, + "loss": 0.8044267892837524, + "step": 1032 + }, + { + "epoch": 0.23801843317972352, + "grad_norm": 0.5022311335968053, + "learning_rate": 1.9741591906508975e-06, + "loss": 0.9198760390281677, + "step": 1033 + }, + { + "epoch": 0.23824884792626727, + "grad_norm": 0.6382005412114766, + "learning_rate": 1.974073069991959e-06, + "loss": 0.7951973676681519, + "step": 1034 + }, + { + "epoch": 0.23847926267281105, + "grad_norm": 0.5488288386867366, + "learning_rate": 1.9739868079476035e-06, + "loss": 0.8366928100585938, + "step": 1035 + }, + { + "epoch": 0.23870967741935484, + "grad_norm": 0.5327938531465227, + "learning_rate": 1.9739004045303524e-06, + "loss": 0.9644484519958496, + "step": 1036 + }, + { + "epoch": 0.23894009216589862, + "grad_norm": 0.47502000880743445, + "learning_rate": 1.9738138597527464e-06, + "loss": 0.8332105875015259, + "step": 1037 + }, + { + "epoch": 0.2391705069124424, + "grad_norm": 0.4812648524584188, + "learning_rate": 1.9737271736273482e-06, + "loss": 0.8923197388648987, + "step": 1038 + }, + { + "epoch": 0.2394009216589862, + "grad_norm": 0.48693803999160823, + "learning_rate": 1.97364034616674e-06, + "loss": 0.861129879951477, + "step": 1039 + }, + { + "epoch": 0.23963133640552994, + "grad_norm": 0.49858003070315154, + "learning_rate": 1.973553377383524e-06, + "loss": 0.8042281270027161, + "step": 1040 + }, + { + "epoch": 0.23986175115207373, + "grad_norm": 0.603264823916037, + "learning_rate": 1.9734662672903247e-06, + "loss": 1.0315792560577393, + "step": 1041 + }, + { + "epoch": 0.2400921658986175, + "grad_norm": 0.524902457294173, + "learning_rate": 1.973379015899785e-06, + "loss": 0.8165839910507202, + "step": 1042 + }, + { + "epoch": 0.2403225806451613, + "grad_norm": 0.5868579839473654, + "learning_rate": 1.97329162322457e-06, + "loss": 1.0002663135528564, + "step": 1043 + }, + { + "epoch": 0.24055299539170508, + "grad_norm": 0.579630177733921, + "learning_rate": 1.9732040892773642e-06, + "loss": 0.9340938925743103, + "step": 1044 + }, + { + "epoch": 0.24078341013824886, + "grad_norm": 0.40394518210500746, + "learning_rate": 1.973116414070873e-06, + "loss": 0.7457709312438965, + "step": 1045 + }, + { + "epoch": 0.24101382488479262, + "grad_norm": 0.5468265646556031, + "learning_rate": 1.9730285976178227e-06, + "loss": 0.846583366394043, + "step": 1046 + }, + { + "epoch": 0.2412442396313364, + "grad_norm": 0.597351972991794, + "learning_rate": 1.9729406399309594e-06, + "loss": 0.9701514840126038, + "step": 1047 + }, + { + "epoch": 0.24147465437788018, + "grad_norm": 0.430042606733588, + "learning_rate": 1.9728525410230506e-06, + "loss": 0.7943054437637329, + "step": 1048 + }, + { + "epoch": 0.24170506912442397, + "grad_norm": 0.690774172762037, + "learning_rate": 1.972764300906883e-06, + "loss": 0.8885551691055298, + "step": 1049 + }, + { + "epoch": 0.24193548387096775, + "grad_norm": 0.522936671850185, + "learning_rate": 1.9726759195952653e-06, + "loss": 0.8258899450302124, + "step": 1050 + }, + { + "epoch": 0.24216589861751153, + "grad_norm": 0.586622666679495, + "learning_rate": 1.9725873971010255e-06, + "loss": 1.0085303783416748, + "step": 1051 + }, + { + "epoch": 0.2423963133640553, + "grad_norm": 0.49596210148454095, + "learning_rate": 1.9724987334370124e-06, + "loss": 0.814777135848999, + "step": 1052 + }, + { + "epoch": 0.24262672811059907, + "grad_norm": 0.5592433145931486, + "learning_rate": 1.9724099286160953e-06, + "loss": 0.8328995704650879, + "step": 1053 + }, + { + "epoch": 0.24285714285714285, + "grad_norm": 0.5857793622474846, + "learning_rate": 1.9723209826511645e-06, + "loss": 0.8699138164520264, + "step": 1054 + }, + { + "epoch": 0.24308755760368664, + "grad_norm": 0.5678867062742812, + "learning_rate": 1.9722318955551303e-06, + "loss": 0.8298562169075012, + "step": 1055 + }, + { + "epoch": 0.24331797235023042, + "grad_norm": 0.5976489688453608, + "learning_rate": 1.9721426673409236e-06, + "loss": 0.9470195770263672, + "step": 1056 + }, + { + "epoch": 0.2435483870967742, + "grad_norm": 0.48875505327809854, + "learning_rate": 1.9720532980214955e-06, + "loss": 0.7733730673789978, + "step": 1057 + }, + { + "epoch": 0.24377880184331796, + "grad_norm": 0.46823524678841166, + "learning_rate": 1.9719637876098184e-06, + "loss": 0.7761770486831665, + "step": 1058 + }, + { + "epoch": 0.24400921658986174, + "grad_norm": 0.445725356281168, + "learning_rate": 1.971874136118884e-06, + "loss": 0.9270585775375366, + "step": 1059 + }, + { + "epoch": 0.24423963133640553, + "grad_norm": 0.42406381632115403, + "learning_rate": 1.971784343561705e-06, + "loss": 0.906977653503418, + "step": 1060 + }, + { + "epoch": 0.2444700460829493, + "grad_norm": 0.6412884076264423, + "learning_rate": 1.971694409951316e-06, + "loss": 0.9668625593185425, + "step": 1061 + }, + { + "epoch": 0.2447004608294931, + "grad_norm": 0.49415949875048953, + "learning_rate": 1.971604335300769e-06, + "loss": 0.8215349316596985, + "step": 1062 + }, + { + "epoch": 0.24493087557603688, + "grad_norm": 0.5322070043492434, + "learning_rate": 1.971514119623139e-06, + "loss": 0.8351551294326782, + "step": 1063 + }, + { + "epoch": 0.24516129032258063, + "grad_norm": 0.47999809865085763, + "learning_rate": 1.9714237629315206e-06, + "loss": 0.8778517246246338, + "step": 1064 + }, + { + "epoch": 0.24539170506912442, + "grad_norm": 0.5396014898113735, + "learning_rate": 1.9713332652390293e-06, + "loss": 0.9415761232376099, + "step": 1065 + }, + { + "epoch": 0.2456221198156682, + "grad_norm": 0.5420605598116663, + "learning_rate": 1.9712426265588e-06, + "loss": 0.9040292501449585, + "step": 1066 + }, + { + "epoch": 0.24585253456221198, + "grad_norm": 0.6005715295467339, + "learning_rate": 1.9711518469039894e-06, + "loss": 0.8886675834655762, + "step": 1067 + }, + { + "epoch": 0.24608294930875577, + "grad_norm": 0.6273079636247865, + "learning_rate": 1.971060926287774e-06, + "loss": 0.8439750671386719, + "step": 1068 + }, + { + "epoch": 0.24631336405529955, + "grad_norm": 0.5872743245126388, + "learning_rate": 1.9709698647233507e-06, + "loss": 0.8698763251304626, + "step": 1069 + }, + { + "epoch": 0.2465437788018433, + "grad_norm": 0.5858508124188764, + "learning_rate": 1.970878662223937e-06, + "loss": 0.7866508364677429, + "step": 1070 + }, + { + "epoch": 0.2467741935483871, + "grad_norm": 0.46529709331014274, + "learning_rate": 1.97078731880277e-06, + "loss": 0.8652541637420654, + "step": 1071 + }, + { + "epoch": 0.24700460829493087, + "grad_norm": 0.4617144249036463, + "learning_rate": 1.97069583447311e-06, + "loss": 0.8614386320114136, + "step": 1072 + }, + { + "epoch": 0.24723502304147466, + "grad_norm": 0.5647954006429063, + "learning_rate": 1.970604209248234e-06, + "loss": 0.9367830753326416, + "step": 1073 + }, + { + "epoch": 0.24746543778801844, + "grad_norm": 0.5744177103855904, + "learning_rate": 1.9705124431414417e-06, + "loss": 0.8851934671401978, + "step": 1074 + }, + { + "epoch": 0.24769585253456222, + "grad_norm": 0.49563724633359013, + "learning_rate": 1.9704205361660534e-06, + "loss": 0.9619653224945068, + "step": 1075 + }, + { + "epoch": 0.24792626728110598, + "grad_norm": 0.5649060756387019, + "learning_rate": 1.9703284883354094e-06, + "loss": 0.8826392889022827, + "step": 1076 + }, + { + "epoch": 0.24815668202764976, + "grad_norm": 0.6563751938003036, + "learning_rate": 1.970236299662869e-06, + "loss": 0.9075444340705872, + "step": 1077 + }, + { + "epoch": 0.24838709677419354, + "grad_norm": 0.5796370649143662, + "learning_rate": 1.9701439701618147e-06, + "loss": 1.048058032989502, + "step": 1078 + }, + { + "epoch": 0.24861751152073733, + "grad_norm": 0.5313768074192232, + "learning_rate": 1.970051499845647e-06, + "loss": 0.8460798263549805, + "step": 1079 + }, + { + "epoch": 0.2488479262672811, + "grad_norm": 0.7193266180122563, + "learning_rate": 1.9699588887277886e-06, + "loss": 0.9410982131958008, + "step": 1080 + }, + { + "epoch": 0.2490783410138249, + "grad_norm": 0.5102129399153178, + "learning_rate": 1.9698661368216816e-06, + "loss": 0.8247401714324951, + "step": 1081 + }, + { + "epoch": 0.24930875576036865, + "grad_norm": 0.5269386839997043, + "learning_rate": 1.969773244140789e-06, + "loss": 0.8543484210968018, + "step": 1082 + }, + { + "epoch": 0.24953917050691243, + "grad_norm": 0.6681776129080308, + "learning_rate": 1.9696802106985933e-06, + "loss": 0.9339861273765564, + "step": 1083 + }, + { + "epoch": 0.24976958525345622, + "grad_norm": 0.6394378735221973, + "learning_rate": 1.969587036508599e-06, + "loss": 0.8268687725067139, + "step": 1084 + }, + { + "epoch": 0.25, + "grad_norm": 0.5565533707237263, + "learning_rate": 1.96949372158433e-06, + "loss": 0.9990735054016113, + "step": 1085 + }, + { + "epoch": 0.2502304147465438, + "grad_norm": 0.5875792221187977, + "learning_rate": 1.9694002659393305e-06, + "loss": 0.871169924736023, + "step": 1086 + }, + { + "epoch": 0.25046082949308757, + "grad_norm": 0.5066699305192991, + "learning_rate": 1.9693066695871657e-06, + "loss": 0.9275476932525635, + "step": 1087 + }, + { + "epoch": 0.25069124423963135, + "grad_norm": 0.5987932412868929, + "learning_rate": 1.969212932541421e-06, + "loss": 0.802006721496582, + "step": 1088 + }, + { + "epoch": 0.25092165898617513, + "grad_norm": 0.6594060142183631, + "learning_rate": 1.9691190548157023e-06, + "loss": 1.158774495124817, + "step": 1089 + }, + { + "epoch": 0.2511520737327189, + "grad_norm": 0.5926971423347241, + "learning_rate": 1.969025036423636e-06, + "loss": 0.8979278802871704, + "step": 1090 + }, + { + "epoch": 0.2513824884792627, + "grad_norm": 0.48149308442816224, + "learning_rate": 1.968930877378868e-06, + "loss": 0.9486579895019531, + "step": 1091 + }, + { + "epoch": 0.25161290322580643, + "grad_norm": 0.5203236583717573, + "learning_rate": 1.968836577695066e-06, + "loss": 0.8661590814590454, + "step": 1092 + }, + { + "epoch": 0.2518433179723502, + "grad_norm": 0.5636787742284843, + "learning_rate": 1.9687421373859173e-06, + "loss": 0.9224900007247925, + "step": 1093 + }, + { + "epoch": 0.252073732718894, + "grad_norm": 0.6117977186323622, + "learning_rate": 1.96864755646513e-06, + "loss": 0.9563734531402588, + "step": 1094 + }, + { + "epoch": 0.2523041474654378, + "grad_norm": 0.535175631127211, + "learning_rate": 1.968552834946432e-06, + "loss": 0.7457284927368164, + "step": 1095 + }, + { + "epoch": 0.25253456221198156, + "grad_norm": 0.5387959310508903, + "learning_rate": 1.9684579728435727e-06, + "loss": 0.8763077259063721, + "step": 1096 + }, + { + "epoch": 0.25276497695852534, + "grad_norm": 0.5765732282352442, + "learning_rate": 1.9683629701703203e-06, + "loss": 0.8476013541221619, + "step": 1097 + }, + { + "epoch": 0.25299539170506913, + "grad_norm": 0.6265041816963897, + "learning_rate": 1.9682678269404647e-06, + "loss": 0.9706464409828186, + "step": 1098 + }, + { + "epoch": 0.2532258064516129, + "grad_norm": 0.5592313042434921, + "learning_rate": 1.968172543167816e-06, + "loss": 0.9898370504379272, + "step": 1099 + }, + { + "epoch": 0.2534562211981567, + "grad_norm": 0.5273265970472166, + "learning_rate": 1.9680771188662043e-06, + "loss": 0.9073352813720703, + "step": 1100 + }, + { + "epoch": 0.2536866359447005, + "grad_norm": 0.5101975110861352, + "learning_rate": 1.9679815540494805e-06, + "loss": 0.698054850101471, + "step": 1101 + }, + { + "epoch": 0.25391705069124426, + "grad_norm": 0.5334723333803978, + "learning_rate": 1.967885848731515e-06, + "loss": 0.8755865097045898, + "step": 1102 + }, + { + "epoch": 0.25414746543778804, + "grad_norm": 0.7353231676630018, + "learning_rate": 1.9677900029262004e-06, + "loss": 0.8884447813034058, + "step": 1103 + }, + { + "epoch": 0.2543778801843318, + "grad_norm": 0.48855032311862734, + "learning_rate": 1.967694016647448e-06, + "loss": 0.738738477230072, + "step": 1104 + }, + { + "epoch": 0.25460829493087556, + "grad_norm": 0.5363150933196312, + "learning_rate": 1.96759788990919e-06, + "loss": 0.8024383783340454, + "step": 1105 + }, + { + "epoch": 0.25483870967741934, + "grad_norm": 0.703802110686274, + "learning_rate": 1.967501622725379e-06, + "loss": 0.8780910968780518, + "step": 1106 + }, + { + "epoch": 0.2550691244239631, + "grad_norm": 0.47799328608287317, + "learning_rate": 1.967405215109989e-06, + "loss": 0.8709204196929932, + "step": 1107 + }, + { + "epoch": 0.2552995391705069, + "grad_norm": 0.5771096865101828, + "learning_rate": 1.9673086670770122e-06, + "loss": 0.8838910460472107, + "step": 1108 + }, + { + "epoch": 0.2555299539170507, + "grad_norm": 0.6122299943883392, + "learning_rate": 1.967211978640463e-06, + "loss": 0.9310617446899414, + "step": 1109 + }, + { + "epoch": 0.2557603686635945, + "grad_norm": 0.5172180782022067, + "learning_rate": 1.9671151498143756e-06, + "loss": 0.8453254699707031, + "step": 1110 + }, + { + "epoch": 0.25599078341013826, + "grad_norm": 0.6724028308795985, + "learning_rate": 1.967018180612804e-06, + "loss": 1.0201973915100098, + "step": 1111 + }, + { + "epoch": 0.25622119815668204, + "grad_norm": 0.5304279166188671, + "learning_rate": 1.9669210710498242e-06, + "loss": 0.84140944480896, + "step": 1112 + }, + { + "epoch": 0.2564516129032258, + "grad_norm": 0.5850181467371437, + "learning_rate": 1.9668238211395308e-06, + "loss": 0.9012273550033569, + "step": 1113 + }, + { + "epoch": 0.2566820276497696, + "grad_norm": 0.5516270166899023, + "learning_rate": 1.9667264308960394e-06, + "loss": 0.820103645324707, + "step": 1114 + }, + { + "epoch": 0.2569124423963134, + "grad_norm": 0.7253674338479518, + "learning_rate": 1.9666289003334868e-06, + "loss": 1.0709048509597778, + "step": 1115 + }, + { + "epoch": 0.2571428571428571, + "grad_norm": 0.6606805333344365, + "learning_rate": 1.966531229466029e-06, + "loss": 0.9408602714538574, + "step": 1116 + }, + { + "epoch": 0.2573732718894009, + "grad_norm": 0.7074764796406602, + "learning_rate": 1.9664334183078425e-06, + "loss": 0.967316210269928, + "step": 1117 + }, + { + "epoch": 0.2576036866359447, + "grad_norm": 0.7069704403267734, + "learning_rate": 1.9663354668731248e-06, + "loss": 0.9483754634857178, + "step": 1118 + }, + { + "epoch": 0.25783410138248847, + "grad_norm": 0.7072881911304519, + "learning_rate": 1.966237375176093e-06, + "loss": 0.7978509664535522, + "step": 1119 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.5719987288484106, + "learning_rate": 1.9661391432309862e-06, + "loss": 0.8720531463623047, + "step": 1120 + }, + { + "epoch": 0.25829493087557603, + "grad_norm": 0.6673697559796071, + "learning_rate": 1.966040771052061e-06, + "loss": 0.7984024286270142, + "step": 1121 + }, + { + "epoch": 0.2585253456221198, + "grad_norm": 0.5693036626081565, + "learning_rate": 1.965942258653597e-06, + "loss": 0.9255385398864746, + "step": 1122 + }, + { + "epoch": 0.2587557603686636, + "grad_norm": 0.5886763980683305, + "learning_rate": 1.9658436060498927e-06, + "loss": 0.9028007984161377, + "step": 1123 + }, + { + "epoch": 0.2589861751152074, + "grad_norm": 0.5256574840125579, + "learning_rate": 1.9657448132552677e-06, + "loss": 0.8773014545440674, + "step": 1124 + }, + { + "epoch": 0.25921658986175117, + "grad_norm": 0.5356122505196939, + "learning_rate": 1.9656458802840617e-06, + "loss": 0.9280908107757568, + "step": 1125 + }, + { + "epoch": 0.25944700460829495, + "grad_norm": 0.6473213250874083, + "learning_rate": 1.9655468071506344e-06, + "loss": 0.820783793926239, + "step": 1126 + }, + { + "epoch": 0.25967741935483873, + "grad_norm": 0.490374992394704, + "learning_rate": 1.9654475938693663e-06, + "loss": 0.7832465171813965, + "step": 1127 + }, + { + "epoch": 0.25990783410138246, + "grad_norm": 0.6097626342555662, + "learning_rate": 1.965348240454658e-06, + "loss": 0.8824669122695923, + "step": 1128 + }, + { + "epoch": 0.26013824884792625, + "grad_norm": 0.5472888524636408, + "learning_rate": 1.9652487469209305e-06, + "loss": 0.8782131671905518, + "step": 1129 + }, + { + "epoch": 0.26036866359447003, + "grad_norm": 0.6689126051687625, + "learning_rate": 1.9651491132826255e-06, + "loss": 0.938920259475708, + "step": 1130 + }, + { + "epoch": 0.2605990783410138, + "grad_norm": 0.5811243675216263, + "learning_rate": 1.965049339554204e-06, + "loss": 0.8733320236206055, + "step": 1131 + }, + { + "epoch": 0.2608294930875576, + "grad_norm": 0.5773916722243296, + "learning_rate": 1.9649494257501485e-06, + "loss": 0.8688358664512634, + "step": 1132 + }, + { + "epoch": 0.2610599078341014, + "grad_norm": 0.5867794198483245, + "learning_rate": 1.9648493718849617e-06, + "loss": 0.9250427484512329, + "step": 1133 + }, + { + "epoch": 0.26129032258064516, + "grad_norm": 0.5093685293336041, + "learning_rate": 1.9647491779731655e-06, + "loss": 0.7890609502792358, + "step": 1134 + }, + { + "epoch": 0.26152073732718895, + "grad_norm": 0.5526465355704269, + "learning_rate": 1.964648844029303e-06, + "loss": 0.83612060546875, + "step": 1135 + }, + { + "epoch": 0.26175115207373273, + "grad_norm": 0.597714005790405, + "learning_rate": 1.9645483700679387e-06, + "loss": 0.7951240539550781, + "step": 1136 + }, + { + "epoch": 0.2619815668202765, + "grad_norm": 0.5785889079746135, + "learning_rate": 1.9644477561036546e-06, + "loss": 0.9746277332305908, + "step": 1137 + }, + { + "epoch": 0.2622119815668203, + "grad_norm": 0.6092572079482067, + "learning_rate": 1.9643470021510556e-06, + "loss": 0.856966495513916, + "step": 1138 + }, + { + "epoch": 0.2624423963133641, + "grad_norm": 0.5158468607686231, + "learning_rate": 1.9642461082247663e-06, + "loss": 0.7419042587280273, + "step": 1139 + }, + { + "epoch": 0.2626728110599078, + "grad_norm": 0.6141847224483623, + "learning_rate": 1.9641450743394304e-06, + "loss": 0.8868693709373474, + "step": 1140 + }, + { + "epoch": 0.2629032258064516, + "grad_norm": 0.6400145867633011, + "learning_rate": 1.9640439005097133e-06, + "loss": 1.0111520290374756, + "step": 1141 + }, + { + "epoch": 0.2631336405529954, + "grad_norm": 0.5946199662941717, + "learning_rate": 1.9639425867503006e-06, + "loss": 0.9379187226295471, + "step": 1142 + }, + { + "epoch": 0.26336405529953916, + "grad_norm": 0.6188285038344139, + "learning_rate": 1.9638411330758973e-06, + "loss": 0.8451071977615356, + "step": 1143 + }, + { + "epoch": 0.26359447004608294, + "grad_norm": 0.6988429276503174, + "learning_rate": 1.9637395395012295e-06, + "loss": 1.0407288074493408, + "step": 1144 + }, + { + "epoch": 0.2638248847926267, + "grad_norm": 0.7122851693009883, + "learning_rate": 1.9636378060410433e-06, + "loss": 0.9594388008117676, + "step": 1145 + }, + { + "epoch": 0.2640552995391705, + "grad_norm": 0.4400072369022715, + "learning_rate": 1.9635359327101057e-06, + "loss": 0.7940789461135864, + "step": 1146 + }, + { + "epoch": 0.2642857142857143, + "grad_norm": 0.6347840140846547, + "learning_rate": 1.9634339195232025e-06, + "loss": 0.9707269668579102, + "step": 1147 + }, + { + "epoch": 0.2645161290322581, + "grad_norm": 0.6349984514987448, + "learning_rate": 1.9633317664951417e-06, + "loss": 0.9554522037506104, + "step": 1148 + }, + { + "epoch": 0.26474654377880186, + "grad_norm": 0.7144693638673882, + "learning_rate": 1.9632294736407497e-06, + "loss": 1.009516716003418, + "step": 1149 + }, + { + "epoch": 0.26497695852534564, + "grad_norm": 0.5429306162333095, + "learning_rate": 1.9631270409748754e-06, + "loss": 0.8337735533714294, + "step": 1150 + }, + { + "epoch": 0.2652073732718894, + "grad_norm": 0.5901765838606909, + "learning_rate": 1.963024468512386e-06, + "loss": 0.9103367328643799, + "step": 1151 + }, + { + "epoch": 0.2654377880184332, + "grad_norm": 0.3703807183273661, + "learning_rate": 1.9629217562681694e-06, + "loss": 0.7258249521255493, + "step": 1152 + }, + { + "epoch": 0.26566820276497694, + "grad_norm": 0.6322578847379198, + "learning_rate": 1.962818904257135e-06, + "loss": 0.7696776390075684, + "step": 1153 + }, + { + "epoch": 0.2658986175115207, + "grad_norm": 0.5842074670437798, + "learning_rate": 1.962715912494211e-06, + "loss": 0.9027894139289856, + "step": 1154 + }, + { + "epoch": 0.2661290322580645, + "grad_norm": 0.6016444551454023, + "learning_rate": 1.962612780994347e-06, + "loss": 1.0412788391113281, + "step": 1155 + }, + { + "epoch": 0.2663594470046083, + "grad_norm": 0.5483158655152818, + "learning_rate": 1.962509509772512e-06, + "loss": 0.8656542897224426, + "step": 1156 + }, + { + "epoch": 0.26658986175115207, + "grad_norm": 0.56350579921959, + "learning_rate": 1.9624060988436964e-06, + "loss": 0.9541186094284058, + "step": 1157 + }, + { + "epoch": 0.26682027649769585, + "grad_norm": 0.6019903664727945, + "learning_rate": 1.962302548222909e-06, + "loss": 0.7684942483901978, + "step": 1158 + }, + { + "epoch": 0.26705069124423964, + "grad_norm": 0.5978642328134118, + "learning_rate": 1.962198857925181e-06, + "loss": 0.8934941291809082, + "step": 1159 + }, + { + "epoch": 0.2672811059907834, + "grad_norm": 0.8041491872239377, + "learning_rate": 1.962095027965562e-06, + "loss": 0.8674842715263367, + "step": 1160 + }, + { + "epoch": 0.2675115207373272, + "grad_norm": 0.5520577783269698, + "learning_rate": 1.9619910583591237e-06, + "loss": 0.8850778937339783, + "step": 1161 + }, + { + "epoch": 0.267741935483871, + "grad_norm": 0.5547632066870658, + "learning_rate": 1.961886949120957e-06, + "loss": 0.9140915870666504, + "step": 1162 + }, + { + "epoch": 0.26797235023041477, + "grad_norm": 0.5171975434439527, + "learning_rate": 1.9617827002661733e-06, + "loss": 0.7557287812232971, + "step": 1163 + }, + { + "epoch": 0.26820276497695855, + "grad_norm": 0.6409514019909783, + "learning_rate": 1.9616783118099032e-06, + "loss": 0.8780542612075806, + "step": 1164 + }, + { + "epoch": 0.2684331797235023, + "grad_norm": 0.5407478984703894, + "learning_rate": 1.9615737837672995e-06, + "loss": 0.8352043628692627, + "step": 1165 + }, + { + "epoch": 0.26866359447004606, + "grad_norm": 0.5628947650252879, + "learning_rate": 1.961469116153534e-06, + "loss": 0.8119357228279114, + "step": 1166 + }, + { + "epoch": 0.26889400921658985, + "grad_norm": 0.5744461460266088, + "learning_rate": 1.9613643089837992e-06, + "loss": 0.8953120708465576, + "step": 1167 + }, + { + "epoch": 0.26912442396313363, + "grad_norm": 0.5867925171054906, + "learning_rate": 1.9612593622733074e-06, + "loss": 0.9078162908554077, + "step": 1168 + }, + { + "epoch": 0.2693548387096774, + "grad_norm": 0.5358654275940312, + "learning_rate": 1.961154276037292e-06, + "loss": 0.9118859767913818, + "step": 1169 + }, + { + "epoch": 0.2695852534562212, + "grad_norm": 0.5501238198976731, + "learning_rate": 1.9610490502910056e-06, + "loss": 0.8456159234046936, + "step": 1170 + }, + { + "epoch": 0.269815668202765, + "grad_norm": 0.6291583788438779, + "learning_rate": 1.9609436850497222e-06, + "loss": 0.7860552072525024, + "step": 1171 + }, + { + "epoch": 0.27004608294930876, + "grad_norm": 0.5078912747038423, + "learning_rate": 1.9608381803287343e-06, + "loss": 0.8121567368507385, + "step": 1172 + }, + { + "epoch": 0.27027649769585255, + "grad_norm": 0.6271384929565738, + "learning_rate": 1.9607325361433574e-06, + "loss": 0.9212384819984436, + "step": 1173 + }, + { + "epoch": 0.27050691244239633, + "grad_norm": 0.5704107274797215, + "learning_rate": 1.960626752508924e-06, + "loss": 0.9528858661651611, + "step": 1174 + }, + { + "epoch": 0.2707373271889401, + "grad_norm": 0.5901390376692353, + "learning_rate": 1.9605208294407894e-06, + "loss": 0.8561227321624756, + "step": 1175 + }, + { + "epoch": 0.2709677419354839, + "grad_norm": 0.5308748660328867, + "learning_rate": 1.960414766954328e-06, + "loss": 0.9333669543266296, + "step": 1176 + }, + { + "epoch": 0.2711981566820276, + "grad_norm": 0.5146250417484006, + "learning_rate": 1.9603085650649345e-06, + "loss": 0.8879388570785522, + "step": 1177 + }, + { + "epoch": 0.2714285714285714, + "grad_norm": 0.6699060572110628, + "learning_rate": 1.9602022237880244e-06, + "loss": 1.0099214315414429, + "step": 1178 + }, + { + "epoch": 0.2716589861751152, + "grad_norm": 0.5456103597772948, + "learning_rate": 1.9600957431390324e-06, + "loss": 0.9341822862625122, + "step": 1179 + }, + { + "epoch": 0.271889400921659, + "grad_norm": 0.48145703185786454, + "learning_rate": 1.9599891231334144e-06, + "loss": 0.7616428136825562, + "step": 1180 + }, + { + "epoch": 0.27211981566820276, + "grad_norm": 0.4889684884403523, + "learning_rate": 1.959882363786646e-06, + "loss": 0.8270235061645508, + "step": 1181 + }, + { + "epoch": 0.27235023041474654, + "grad_norm": 0.5354748169041671, + "learning_rate": 1.9597754651142233e-06, + "loss": 0.8715114593505859, + "step": 1182 + }, + { + "epoch": 0.2725806451612903, + "grad_norm": 0.5251650427533354, + "learning_rate": 1.959668427131662e-06, + "loss": 0.6910781860351562, + "step": 1183 + }, + { + "epoch": 0.2728110599078341, + "grad_norm": 0.5425639259870759, + "learning_rate": 1.9595612498544997e-06, + "loss": 0.9158545136451721, + "step": 1184 + }, + { + "epoch": 0.2730414746543779, + "grad_norm": 0.4274378587816055, + "learning_rate": 1.9594539332982917e-06, + "loss": 0.7129944562911987, + "step": 1185 + }, + { + "epoch": 0.2732718894009217, + "grad_norm": 0.5549453334752472, + "learning_rate": 1.9593464774786155e-06, + "loss": 0.9487595558166504, + "step": 1186 + }, + { + "epoch": 0.27350230414746546, + "grad_norm": 0.490496609840347, + "learning_rate": 1.959238882411068e-06, + "loss": 0.9455368518829346, + "step": 1187 + }, + { + "epoch": 0.27373271889400924, + "grad_norm": 0.5638225468967204, + "learning_rate": 1.959131148111267e-06, + "loss": 0.9005390405654907, + "step": 1188 + }, + { + "epoch": 0.27396313364055297, + "grad_norm": 0.6239187759866925, + "learning_rate": 1.9590232745948494e-06, + "loss": 0.91117262840271, + "step": 1189 + }, + { + "epoch": 0.27419354838709675, + "grad_norm": 0.46530917608588857, + "learning_rate": 1.958915261877473e-06, + "loss": 0.7940579652786255, + "step": 1190 + }, + { + "epoch": 0.27442396313364054, + "grad_norm": 0.5621028227805456, + "learning_rate": 1.9588071099748155e-06, + "loss": 1.0705196857452393, + "step": 1191 + }, + { + "epoch": 0.2746543778801843, + "grad_norm": 0.7402334674842445, + "learning_rate": 1.9586988189025756e-06, + "loss": 0.9311869740486145, + "step": 1192 + }, + { + "epoch": 0.2748847926267281, + "grad_norm": 0.5809380189675816, + "learning_rate": 1.9585903886764715e-06, + "loss": 0.9400506019592285, + "step": 1193 + }, + { + "epoch": 0.2751152073732719, + "grad_norm": 0.5097271764516258, + "learning_rate": 1.958481819312241e-06, + "loss": 0.8282920122146606, + "step": 1194 + }, + { + "epoch": 0.27534562211981567, + "grad_norm": 0.6446418001070287, + "learning_rate": 1.9583731108256435e-06, + "loss": 0.9111119508743286, + "step": 1195 + }, + { + "epoch": 0.27557603686635945, + "grad_norm": 0.6208204199981331, + "learning_rate": 1.9582642632324576e-06, + "loss": 0.9486548900604248, + "step": 1196 + }, + { + "epoch": 0.27580645161290324, + "grad_norm": 0.634036768829364, + "learning_rate": 1.9581552765484828e-06, + "loss": 0.8452764749526978, + "step": 1197 + }, + { + "epoch": 0.276036866359447, + "grad_norm": 0.6457489846855801, + "learning_rate": 1.958046150789538e-06, + "loss": 0.8636663556098938, + "step": 1198 + }, + { + "epoch": 0.2762672811059908, + "grad_norm": 0.6308230498005049, + "learning_rate": 1.9579368859714623e-06, + "loss": 0.9819158315658569, + "step": 1199 + }, + { + "epoch": 0.2764976958525346, + "grad_norm": 0.6100305190055095, + "learning_rate": 1.957827482110116e-06, + "loss": 0.8010607957839966, + "step": 1200 + }, + { + "epoch": 0.2767281105990783, + "grad_norm": 0.44236661935550003, + "learning_rate": 1.957717939221379e-06, + "loss": 0.7686241865158081, + "step": 1201 + }, + { + "epoch": 0.2769585253456221, + "grad_norm": 0.5324278038856628, + "learning_rate": 1.9576082573211507e-06, + "loss": 0.8548723459243774, + "step": 1202 + }, + { + "epoch": 0.2771889400921659, + "grad_norm": 0.5873649231612361, + "learning_rate": 1.957498436425351e-06, + "loss": 0.7866852283477783, + "step": 1203 + }, + { + "epoch": 0.27741935483870966, + "grad_norm": 0.5578610745935356, + "learning_rate": 1.9573884765499215e-06, + "loss": 0.8086235523223877, + "step": 1204 + }, + { + "epoch": 0.27764976958525345, + "grad_norm": 0.6489442522213279, + "learning_rate": 1.9572783777108217e-06, + "loss": 1.0310871601104736, + "step": 1205 + }, + { + "epoch": 0.27788018433179723, + "grad_norm": 0.6639195648959771, + "learning_rate": 1.957168139924033e-06, + "loss": 0.9482970237731934, + "step": 1206 + }, + { + "epoch": 0.278110599078341, + "grad_norm": 0.5595205782283428, + "learning_rate": 1.957057763205556e-06, + "loss": 0.809493899345398, + "step": 1207 + }, + { + "epoch": 0.2783410138248848, + "grad_norm": 0.5835729385419335, + "learning_rate": 1.956947247571411e-06, + "loss": 0.8679298162460327, + "step": 1208 + }, + { + "epoch": 0.2785714285714286, + "grad_norm": 0.5339273489408208, + "learning_rate": 1.95683659303764e-06, + "loss": 0.8870571255683899, + "step": 1209 + }, + { + "epoch": 0.27880184331797236, + "grad_norm": 0.6400258685482293, + "learning_rate": 1.9567257996203046e-06, + "loss": 0.8452431559562683, + "step": 1210 + }, + { + "epoch": 0.27903225806451615, + "grad_norm": 0.585371400581961, + "learning_rate": 1.9566148673354855e-06, + "loss": 0.8376550674438477, + "step": 1211 + }, + { + "epoch": 0.27926267281105993, + "grad_norm": 0.468171015360779, + "learning_rate": 1.9565037961992853e-06, + "loss": 0.7686463594436646, + "step": 1212 + }, + { + "epoch": 0.2794930875576037, + "grad_norm": 0.6305180956441923, + "learning_rate": 1.956392586227825e-06, + "loss": 1.0064536333084106, + "step": 1213 + }, + { + "epoch": 0.27972350230414744, + "grad_norm": 0.5204866621768998, + "learning_rate": 1.956281237437247e-06, + "loss": 0.9087784290313721, + "step": 1214 + }, + { + "epoch": 0.2799539170506912, + "grad_norm": 0.5800831908467822, + "learning_rate": 1.9561697498437133e-06, + "loss": 0.8528383374214172, + "step": 1215 + }, + { + "epoch": 0.280184331797235, + "grad_norm": 0.492586251170718, + "learning_rate": 1.9560581234634062e-06, + "loss": 0.8229737281799316, + "step": 1216 + }, + { + "epoch": 0.2804147465437788, + "grad_norm": 0.6543530371868361, + "learning_rate": 1.9559463583125285e-06, + "loss": 0.8957454562187195, + "step": 1217 + }, + { + "epoch": 0.2806451612903226, + "grad_norm": 0.6116476174626837, + "learning_rate": 1.955834454407302e-06, + "loss": 0.8373404741287231, + "step": 1218 + }, + { + "epoch": 0.28087557603686636, + "grad_norm": 0.6339166918490768, + "learning_rate": 1.9557224117639698e-06, + "loss": 0.9117659330368042, + "step": 1219 + }, + { + "epoch": 0.28110599078341014, + "grad_norm": 0.7009847380548185, + "learning_rate": 1.9556102303987946e-06, + "loss": 0.9079498052597046, + "step": 1220 + }, + { + "epoch": 0.2813364055299539, + "grad_norm": 0.6797187898490639, + "learning_rate": 1.9554979103280597e-06, + "loss": 0.8127235174179077, + "step": 1221 + }, + { + "epoch": 0.2815668202764977, + "grad_norm": 0.4430544694455362, + "learning_rate": 1.9553854515680684e-06, + "loss": 0.6790676712989807, + "step": 1222 + }, + { + "epoch": 0.2817972350230415, + "grad_norm": 0.547920786044559, + "learning_rate": 1.955272854135143e-06, + "loss": 0.93434739112854, + "step": 1223 + }, + { + "epoch": 0.2820276497695853, + "grad_norm": 0.5831429716678932, + "learning_rate": 1.9551601180456274e-06, + "loss": 0.8624403476715088, + "step": 1224 + }, + { + "epoch": 0.28225806451612906, + "grad_norm": 0.5942670172250124, + "learning_rate": 1.9550472433158856e-06, + "loss": 0.8871273994445801, + "step": 1225 + }, + { + "epoch": 0.2824884792626728, + "grad_norm": 0.6403907324028919, + "learning_rate": 1.9549342299623007e-06, + "loss": 1.0226445198059082, + "step": 1226 + }, + { + "epoch": 0.28271889400921657, + "grad_norm": 0.5570530371692032, + "learning_rate": 1.9548210780012764e-06, + "loss": 0.9232503771781921, + "step": 1227 + }, + { + "epoch": 0.28294930875576035, + "grad_norm": 0.5562171255847491, + "learning_rate": 1.9547077874492367e-06, + "loss": 0.944965124130249, + "step": 1228 + }, + { + "epoch": 0.28317972350230414, + "grad_norm": 0.7815951055502713, + "learning_rate": 1.9545943583226255e-06, + "loss": 0.9491870403289795, + "step": 1229 + }, + { + "epoch": 0.2834101382488479, + "grad_norm": 0.5531880644641158, + "learning_rate": 1.9544807906379065e-06, + "loss": 0.8477638363838196, + "step": 1230 + }, + { + "epoch": 0.2836405529953917, + "grad_norm": 0.6334904267465776, + "learning_rate": 1.9543670844115647e-06, + "loss": 0.9733752012252808, + "step": 1231 + }, + { + "epoch": 0.2838709677419355, + "grad_norm": 0.5077250781055755, + "learning_rate": 1.954253239660104e-06, + "loss": 0.8158911466598511, + "step": 1232 + }, + { + "epoch": 0.28410138248847927, + "grad_norm": 0.47003121688563365, + "learning_rate": 1.9541392564000487e-06, + "loss": 0.8814271092414856, + "step": 1233 + }, + { + "epoch": 0.28433179723502305, + "grad_norm": 0.5974631149552703, + "learning_rate": 1.9540251346479435e-06, + "loss": 0.8366897106170654, + "step": 1234 + }, + { + "epoch": 0.28456221198156684, + "grad_norm": 0.5122641090735244, + "learning_rate": 1.953910874420353e-06, + "loss": 0.8043497800827026, + "step": 1235 + }, + { + "epoch": 0.2847926267281106, + "grad_norm": 0.6923450749153209, + "learning_rate": 1.953796475733862e-06, + "loss": 0.904765248298645, + "step": 1236 + }, + { + "epoch": 0.2850230414746544, + "grad_norm": 0.6316427864189956, + "learning_rate": 1.953681938605075e-06, + "loss": 0.9092245101928711, + "step": 1237 + }, + { + "epoch": 0.28525345622119813, + "grad_norm": 0.44433825637231683, + "learning_rate": 1.953567263050617e-06, + "loss": 0.9119021892547607, + "step": 1238 + }, + { + "epoch": 0.2854838709677419, + "grad_norm": 0.5258256580858013, + "learning_rate": 1.9534524490871336e-06, + "loss": 0.8380709886550903, + "step": 1239 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 0.6731382971935342, + "learning_rate": 1.9533374967312894e-06, + "loss": 0.9410983324050903, + "step": 1240 + }, + { + "epoch": 0.2859447004608295, + "grad_norm": 0.5901005556596554, + "learning_rate": 1.953222405999769e-06, + "loss": 0.882665753364563, + "step": 1241 + }, + { + "epoch": 0.28617511520737327, + "grad_norm": 0.600142706864601, + "learning_rate": 1.953107176909279e-06, + "loss": 0.9334039688110352, + "step": 1242 + }, + { + "epoch": 0.28640552995391705, + "grad_norm": 0.649506044390801, + "learning_rate": 1.9529918094765433e-06, + "loss": 0.8743090033531189, + "step": 1243 + }, + { + "epoch": 0.28663594470046083, + "grad_norm": 0.5149777367828677, + "learning_rate": 1.9528763037183086e-06, + "loss": 0.9017846584320068, + "step": 1244 + }, + { + "epoch": 0.2868663594470046, + "grad_norm": 0.6718877038666831, + "learning_rate": 1.95276065965134e-06, + "loss": 0.9412289261817932, + "step": 1245 + }, + { + "epoch": 0.2870967741935484, + "grad_norm": 0.5829455891585096, + "learning_rate": 1.9526448772924222e-06, + "loss": 0.9008835554122925, + "step": 1246 + }, + { + "epoch": 0.2873271889400922, + "grad_norm": 0.5850809594667484, + "learning_rate": 1.9525289566583622e-06, + "loss": 0.803752064704895, + "step": 1247 + }, + { + "epoch": 0.28755760368663597, + "grad_norm": 0.642250740432813, + "learning_rate": 1.952412897765985e-06, + "loss": 0.8354049921035767, + "step": 1248 + }, + { + "epoch": 0.28778801843317975, + "grad_norm": 0.711123311118831, + "learning_rate": 1.9522967006321363e-06, + "loss": 1.047461748123169, + "step": 1249 + }, + { + "epoch": 0.2880184331797235, + "grad_norm": 0.5664585984555107, + "learning_rate": 1.9521803652736826e-06, + "loss": 0.9036056399345398, + "step": 1250 + }, + { + "epoch": 0.28824884792626726, + "grad_norm": 0.6380477461120507, + "learning_rate": 1.952063891707509e-06, + "loss": 0.9534894227981567, + "step": 1251 + }, + { + "epoch": 0.28847926267281104, + "grad_norm": 0.6213868500155985, + "learning_rate": 1.9519472799505217e-06, + "loss": 0.9200841188430786, + "step": 1252 + }, + { + "epoch": 0.2887096774193548, + "grad_norm": 0.6071864938745559, + "learning_rate": 1.9518305300196475e-06, + "loss": 0.8917449712753296, + "step": 1253 + }, + { + "epoch": 0.2889400921658986, + "grad_norm": 0.43859246681042113, + "learning_rate": 1.9517136419318317e-06, + "loss": 0.92131507396698, + "step": 1254 + }, + { + "epoch": 0.2891705069124424, + "grad_norm": 0.5459214675052779, + "learning_rate": 1.951596615704041e-06, + "loss": 0.8862432241439819, + "step": 1255 + }, + { + "epoch": 0.2894009216589862, + "grad_norm": 0.5238034407201325, + "learning_rate": 1.951479451353261e-06, + "loss": 0.7789605855941772, + "step": 1256 + }, + { + "epoch": 0.28963133640552996, + "grad_norm": 0.6480376013887345, + "learning_rate": 1.951362148896498e-06, + "loss": 0.8187062740325928, + "step": 1257 + }, + { + "epoch": 0.28986175115207374, + "grad_norm": 0.651824990199355, + "learning_rate": 1.9512447083507784e-06, + "loss": 1.0575072765350342, + "step": 1258 + }, + { + "epoch": 0.2900921658986175, + "grad_norm": 0.5300946141437952, + "learning_rate": 1.9511271297331493e-06, + "loss": 0.8027279376983643, + "step": 1259 + }, + { + "epoch": 0.2903225806451613, + "grad_norm": 0.549023479491683, + "learning_rate": 1.951009413060676e-06, + "loss": 0.6641743183135986, + "step": 1260 + }, + { + "epoch": 0.2905529953917051, + "grad_norm": 0.4919566770154341, + "learning_rate": 1.950891558350446e-06, + "loss": 0.7937613725662231, + "step": 1261 + }, + { + "epoch": 0.2907834101382488, + "grad_norm": 0.6213972326398296, + "learning_rate": 1.950773565619564e-06, + "loss": 0.9600511193275452, + "step": 1262 + }, + { + "epoch": 0.2910138248847926, + "grad_norm": 0.6514763319649333, + "learning_rate": 1.9506554348851585e-06, + "loss": 0.8275980353355408, + "step": 1263 + }, + { + "epoch": 0.2912442396313364, + "grad_norm": 0.598467260157347, + "learning_rate": 1.950537166164375e-06, + "loss": 0.9008789658546448, + "step": 1264 + }, + { + "epoch": 0.29147465437788017, + "grad_norm": 0.5520168646542984, + "learning_rate": 1.95041875947438e-06, + "loss": 0.8701465129852295, + "step": 1265 + }, + { + "epoch": 0.29170506912442395, + "grad_norm": 0.5793489097336151, + "learning_rate": 1.95030021483236e-06, + "loss": 0.9313883781433105, + "step": 1266 + }, + { + "epoch": 0.29193548387096774, + "grad_norm": 0.5738973536331494, + "learning_rate": 1.9501815322555222e-06, + "loss": 0.883125901222229, + "step": 1267 + }, + { + "epoch": 0.2921658986175115, + "grad_norm": 0.5430628147775056, + "learning_rate": 1.9500627117610927e-06, + "loss": 0.8856269121170044, + "step": 1268 + }, + { + "epoch": 0.2923963133640553, + "grad_norm": 0.4857560088008075, + "learning_rate": 1.9499437533663184e-06, + "loss": 0.8817840218544006, + "step": 1269 + }, + { + "epoch": 0.2926267281105991, + "grad_norm": 0.7079159031386842, + "learning_rate": 1.949824657088466e-06, + "loss": 0.9911330342292786, + "step": 1270 + }, + { + "epoch": 0.29285714285714287, + "grad_norm": 0.6283382634413396, + "learning_rate": 1.949705422944822e-06, + "loss": 0.8902890682220459, + "step": 1271 + }, + { + "epoch": 0.29308755760368665, + "grad_norm": 0.5381213123876506, + "learning_rate": 1.949586050952693e-06, + "loss": 0.6846401691436768, + "step": 1272 + }, + { + "epoch": 0.29331797235023044, + "grad_norm": 0.6164805880844991, + "learning_rate": 1.9494665411294057e-06, + "loss": 0.9186165928840637, + "step": 1273 + }, + { + "epoch": 0.29354838709677417, + "grad_norm": 0.4648178531483389, + "learning_rate": 1.949346893492307e-06, + "loss": 0.8614095449447632, + "step": 1274 + }, + { + "epoch": 0.29377880184331795, + "grad_norm": 0.6146731068970395, + "learning_rate": 1.9492271080587637e-06, + "loss": 0.7824405431747437, + "step": 1275 + }, + { + "epoch": 0.29400921658986173, + "grad_norm": 0.5415059908334089, + "learning_rate": 1.949107184846162e-06, + "loss": 0.8694697618484497, + "step": 1276 + }, + { + "epoch": 0.2942396313364055, + "grad_norm": 0.6070495052767576, + "learning_rate": 1.948987123871909e-06, + "loss": 0.8839597105979919, + "step": 1277 + }, + { + "epoch": 0.2944700460829493, + "grad_norm": 0.5155544169686388, + "learning_rate": 1.948866925153431e-06, + "loss": 0.832268238067627, + "step": 1278 + }, + { + "epoch": 0.2947004608294931, + "grad_norm": 0.48264272480740306, + "learning_rate": 1.948746588708175e-06, + "loss": 0.8243123888969421, + "step": 1279 + }, + { + "epoch": 0.29493087557603687, + "grad_norm": 0.7516695382591614, + "learning_rate": 1.948626114553608e-06, + "loss": 0.99314284324646, + "step": 1280 + }, + { + "epoch": 0.29516129032258065, + "grad_norm": 0.6001488755214682, + "learning_rate": 1.948505502707216e-06, + "loss": 0.8853542804718018, + "step": 1281 + }, + { + "epoch": 0.29539170506912443, + "grad_norm": 0.7940640499991963, + "learning_rate": 1.948384753186506e-06, + "loss": 0.9623305797576904, + "step": 1282 + }, + { + "epoch": 0.2956221198156682, + "grad_norm": 0.64774993620639, + "learning_rate": 1.948263866009005e-06, + "loss": 0.8321142792701721, + "step": 1283 + }, + { + "epoch": 0.295852534562212, + "grad_norm": 0.6059595321597901, + "learning_rate": 1.948142841192258e-06, + "loss": 0.8911606669425964, + "step": 1284 + }, + { + "epoch": 0.2960829493087558, + "grad_norm": 0.6228210357050852, + "learning_rate": 1.948021678753834e-06, + "loss": 0.9501996040344238, + "step": 1285 + }, + { + "epoch": 0.29631336405529957, + "grad_norm": 0.5846881548888203, + "learning_rate": 1.947900378711318e-06, + "loss": 0.8555784225463867, + "step": 1286 + }, + { + "epoch": 0.2965437788018433, + "grad_norm": 0.5726752466099971, + "learning_rate": 1.9477789410823163e-06, + "loss": 0.7703878283500671, + "step": 1287 + }, + { + "epoch": 0.2967741935483871, + "grad_norm": 0.5629458043150717, + "learning_rate": 1.947657365884457e-06, + "loss": 1.0072009563446045, + "step": 1288 + }, + { + "epoch": 0.29700460829493086, + "grad_norm": 0.5698014348408978, + "learning_rate": 1.9475356531353847e-06, + "loss": 0.7633493542671204, + "step": 1289 + }, + { + "epoch": 0.29723502304147464, + "grad_norm": 0.5241558601711666, + "learning_rate": 1.9474138028527674e-06, + "loss": 0.88579261302948, + "step": 1290 + }, + { + "epoch": 0.2974654377880184, + "grad_norm": 0.6037880677787516, + "learning_rate": 1.94729181505429e-06, + "loss": 0.8356794118881226, + "step": 1291 + }, + { + "epoch": 0.2976958525345622, + "grad_norm": 0.6197051238228268, + "learning_rate": 1.94716968975766e-06, + "loss": 0.8330395817756653, + "step": 1292 + }, + { + "epoch": 0.297926267281106, + "grad_norm": 0.6667932213948545, + "learning_rate": 1.947047426980604e-06, + "loss": 0.9219698905944824, + "step": 1293 + }, + { + "epoch": 0.2981566820276498, + "grad_norm": 0.5409653154450632, + "learning_rate": 1.9469250267408674e-06, + "loss": 0.880803644657135, + "step": 1294 + }, + { + "epoch": 0.29838709677419356, + "grad_norm": 0.5789679620224094, + "learning_rate": 1.9468024890562165e-06, + "loss": 0.8212012052536011, + "step": 1295 + }, + { + "epoch": 0.29861751152073734, + "grad_norm": 0.6209106243517916, + "learning_rate": 1.946679813944438e-06, + "loss": 1.0118587017059326, + "step": 1296 + }, + { + "epoch": 0.2988479262672811, + "grad_norm": 0.6374046746708436, + "learning_rate": 1.9465570014233377e-06, + "loss": 0.8708915710449219, + "step": 1297 + }, + { + "epoch": 0.2990783410138249, + "grad_norm": 0.6373146041782783, + "learning_rate": 1.9464340515107415e-06, + "loss": 0.9386067986488342, + "step": 1298 + }, + { + "epoch": 0.29930875576036864, + "grad_norm": 0.5346925830356088, + "learning_rate": 1.9463109642244958e-06, + "loss": 0.8672319650650024, + "step": 1299 + }, + { + "epoch": 0.2995391705069124, + "grad_norm": 0.7198371333215221, + "learning_rate": 1.9461877395824662e-06, + "loss": 0.9002958536148071, + "step": 1300 + }, + { + "epoch": 0.2997695852534562, + "grad_norm": 0.6247724220238058, + "learning_rate": 1.946064377602539e-06, + "loss": 0.9206029772758484, + "step": 1301 + }, + { + "epoch": 0.3, + "grad_norm": 0.8295443472719992, + "learning_rate": 1.94594087830262e-06, + "loss": 1.0063598155975342, + "step": 1302 + }, + { + "epoch": 0.3002304147465438, + "grad_norm": 0.5149695005553171, + "learning_rate": 1.9458172417006346e-06, + "loss": 0.7616912126541138, + "step": 1303 + }, + { + "epoch": 0.30046082949308756, + "grad_norm": 0.5462398029065331, + "learning_rate": 1.945693467814529e-06, + "loss": 0.8385730385780334, + "step": 1304 + }, + { + "epoch": 0.30069124423963134, + "grad_norm": 0.4854220181479302, + "learning_rate": 1.9455695566622677e-06, + "loss": 0.7032216787338257, + "step": 1305 + }, + { + "epoch": 0.3009216589861751, + "grad_norm": 0.5554776786626977, + "learning_rate": 1.9454455082618373e-06, + "loss": 0.7647181749343872, + "step": 1306 + }, + { + "epoch": 0.3011520737327189, + "grad_norm": 0.7119385935860951, + "learning_rate": 1.945321322631243e-06, + "loss": 0.9918918013572693, + "step": 1307 + }, + { + "epoch": 0.3013824884792627, + "grad_norm": 0.5689741757687454, + "learning_rate": 1.945196999788511e-06, + "loss": 0.838451623916626, + "step": 1308 + }, + { + "epoch": 0.3016129032258065, + "grad_norm": 0.7156229049064139, + "learning_rate": 1.945072539751685e-06, + "loss": 0.9739303588867188, + "step": 1309 + }, + { + "epoch": 0.30184331797235026, + "grad_norm": 0.4850858592361209, + "learning_rate": 1.9449479425388305e-06, + "loss": 0.8233742713928223, + "step": 1310 + }, + { + "epoch": 0.302073732718894, + "grad_norm": 0.666231819455408, + "learning_rate": 1.944823208168034e-06, + "loss": 0.9765088558197021, + "step": 1311 + }, + { + "epoch": 0.30230414746543777, + "grad_norm": 0.5940530240559707, + "learning_rate": 1.944698336657399e-06, + "loss": 0.7614048719406128, + "step": 1312 + }, + { + "epoch": 0.30253456221198155, + "grad_norm": 0.5807403996402337, + "learning_rate": 1.9445733280250512e-06, + "loss": 0.760692834854126, + "step": 1313 + }, + { + "epoch": 0.30276497695852533, + "grad_norm": 0.710580819926471, + "learning_rate": 1.944448182289135e-06, + "loss": 0.8484706878662109, + "step": 1314 + }, + { + "epoch": 0.3029953917050691, + "grad_norm": 0.6131916776262658, + "learning_rate": 1.944322899467816e-06, + "loss": 0.8857289552688599, + "step": 1315 + }, + { + "epoch": 0.3032258064516129, + "grad_norm": 0.7120330171482998, + "learning_rate": 1.944197479579278e-06, + "loss": 0.8375179171562195, + "step": 1316 + }, + { + "epoch": 0.3034562211981567, + "grad_norm": 0.5402001956337824, + "learning_rate": 1.9440719226417263e-06, + "loss": 0.8141925930976868, + "step": 1317 + }, + { + "epoch": 0.30368663594470047, + "grad_norm": 0.7607357810019435, + "learning_rate": 1.943946228673384e-06, + "loss": 0.9970111846923828, + "step": 1318 + }, + { + "epoch": 0.30391705069124425, + "grad_norm": 0.5721230302327327, + "learning_rate": 1.9438203976924966e-06, + "loss": 0.9542866349220276, + "step": 1319 + }, + { + "epoch": 0.30414746543778803, + "grad_norm": 0.5904074306009988, + "learning_rate": 1.943694429717328e-06, + "loss": 0.8808399438858032, + "step": 1320 + }, + { + "epoch": 0.3043778801843318, + "grad_norm": 0.5734964183027593, + "learning_rate": 1.9435683247661623e-06, + "loss": 0.8541150093078613, + "step": 1321 + }, + { + "epoch": 0.3046082949308756, + "grad_norm": 0.7749551173384804, + "learning_rate": 1.943442082857303e-06, + "loss": 0.8887044191360474, + "step": 1322 + }, + { + "epoch": 0.30483870967741933, + "grad_norm": 0.6530281616907251, + "learning_rate": 1.9433157040090746e-06, + "loss": 0.8699131011962891, + "step": 1323 + }, + { + "epoch": 0.3050691244239631, + "grad_norm": 0.6811202971751444, + "learning_rate": 1.9431891882398205e-06, + "loss": 0.7096077799797058, + "step": 1324 + }, + { + "epoch": 0.3052995391705069, + "grad_norm": 0.5279135582200482, + "learning_rate": 1.9430625355679045e-06, + "loss": 0.8040453195571899, + "step": 1325 + }, + { + "epoch": 0.3055299539170507, + "grad_norm": 0.5904456084555657, + "learning_rate": 1.9429357460117093e-06, + "loss": 0.8275970220565796, + "step": 1326 + }, + { + "epoch": 0.30576036866359446, + "grad_norm": 0.5947614996956965, + "learning_rate": 1.9428088195896393e-06, + "loss": 0.9724141359329224, + "step": 1327 + }, + { + "epoch": 0.30599078341013825, + "grad_norm": 0.6696756628924122, + "learning_rate": 1.9426817563201174e-06, + "loss": 0.9293274879455566, + "step": 1328 + }, + { + "epoch": 0.30622119815668203, + "grad_norm": 0.5976334939970911, + "learning_rate": 1.9425545562215865e-06, + "loss": 0.9454036951065063, + "step": 1329 + }, + { + "epoch": 0.3064516129032258, + "grad_norm": 0.48928245529374687, + "learning_rate": 1.9424272193125094e-06, + "loss": 0.7751365900039673, + "step": 1330 + }, + { + "epoch": 0.3066820276497696, + "grad_norm": 0.5211050083614731, + "learning_rate": 1.942299745611369e-06, + "loss": 0.8444688320159912, + "step": 1331 + }, + { + "epoch": 0.3069124423963134, + "grad_norm": 0.6370602856216532, + "learning_rate": 1.9421721351366684e-06, + "loss": 0.7751414775848389, + "step": 1332 + }, + { + "epoch": 0.30714285714285716, + "grad_norm": 0.6732034032956694, + "learning_rate": 1.9420443879069287e-06, + "loss": 0.912209153175354, + "step": 1333 + }, + { + "epoch": 0.30737327188940095, + "grad_norm": 0.4990267188564962, + "learning_rate": 1.941916503940694e-06, + "loss": 0.8897542357444763, + "step": 1334 + }, + { + "epoch": 0.3076036866359447, + "grad_norm": 0.6319943447022882, + "learning_rate": 1.9417884832565257e-06, + "loss": 0.8562046885490417, + "step": 1335 + }, + { + "epoch": 0.30783410138248846, + "grad_norm": 0.6422557067377674, + "learning_rate": 1.9416603258730055e-06, + "loss": 0.8886401653289795, + "step": 1336 + }, + { + "epoch": 0.30806451612903224, + "grad_norm": 0.5477788033894715, + "learning_rate": 1.9415320318087354e-06, + "loss": 0.7401903867721558, + "step": 1337 + }, + { + "epoch": 0.308294930875576, + "grad_norm": 0.5300494853019255, + "learning_rate": 1.941403601082338e-06, + "loss": 0.761360764503479, + "step": 1338 + }, + { + "epoch": 0.3085253456221198, + "grad_norm": 0.5079078108497779, + "learning_rate": 1.9412750337124537e-06, + "loss": 0.9223028421401978, + "step": 1339 + }, + { + "epoch": 0.3087557603686636, + "grad_norm": 0.6370349934611669, + "learning_rate": 1.9411463297177446e-06, + "loss": 0.9287113547325134, + "step": 1340 + }, + { + "epoch": 0.3089861751152074, + "grad_norm": 0.6186566628026451, + "learning_rate": 1.941017489116891e-06, + "loss": 0.8548502922058105, + "step": 1341 + }, + { + "epoch": 0.30921658986175116, + "grad_norm": 0.6058300330585435, + "learning_rate": 1.9408885119285953e-06, + "loss": 0.8885709643363953, + "step": 1342 + }, + { + "epoch": 0.30944700460829494, + "grad_norm": 0.6807261513363189, + "learning_rate": 1.940759398171577e-06, + "loss": 0.8970856666564941, + "step": 1343 + }, + { + "epoch": 0.3096774193548387, + "grad_norm": 0.5664251996297385, + "learning_rate": 1.9406301478645783e-06, + "loss": 0.847138524055481, + "step": 1344 + }, + { + "epoch": 0.3099078341013825, + "grad_norm": 0.5354847877369665, + "learning_rate": 1.9405007610263584e-06, + "loss": 0.7892216444015503, + "step": 1345 + }, + { + "epoch": 0.3101382488479263, + "grad_norm": 0.6610649492142503, + "learning_rate": 1.940371237675698e-06, + "loss": 0.8869141340255737, + "step": 1346 + }, + { + "epoch": 0.3103686635944701, + "grad_norm": 0.6628677961578048, + "learning_rate": 1.9402415778313976e-06, + "loss": 0.8405635356903076, + "step": 1347 + }, + { + "epoch": 0.3105990783410138, + "grad_norm": 0.6646875425992601, + "learning_rate": 1.9401117815122768e-06, + "loss": 0.914352536201477, + "step": 1348 + }, + { + "epoch": 0.3108294930875576, + "grad_norm": 0.5494930695935469, + "learning_rate": 1.9399818487371756e-06, + "loss": 0.9059416055679321, + "step": 1349 + }, + { + "epoch": 0.31105990783410137, + "grad_norm": 0.7196818748440269, + "learning_rate": 1.939851779524953e-06, + "loss": 1.0513644218444824, + "step": 1350 + }, + { + "epoch": 0.31129032258064515, + "grad_norm": 0.6538820317800585, + "learning_rate": 1.9397215738944893e-06, + "loss": 0.8673127889633179, + "step": 1351 + }, + { + "epoch": 0.31152073732718893, + "grad_norm": 0.5818727011440767, + "learning_rate": 1.9395912318646827e-06, + "loss": 0.7907043695449829, + "step": 1352 + }, + { + "epoch": 0.3117511520737327, + "grad_norm": 0.6065295506543811, + "learning_rate": 1.9394607534544527e-06, + "loss": 0.8127990961074829, + "step": 1353 + }, + { + "epoch": 0.3119815668202765, + "grad_norm": 0.598693369273182, + "learning_rate": 1.9393301386827387e-06, + "loss": 0.877837061882019, + "step": 1354 + }, + { + "epoch": 0.3122119815668203, + "grad_norm": 0.734976387219602, + "learning_rate": 1.939199387568498e-06, + "loss": 0.8518592715263367, + "step": 1355 + }, + { + "epoch": 0.31244239631336407, + "grad_norm": 0.5473192670176156, + "learning_rate": 1.9390685001307093e-06, + "loss": 0.7151869535446167, + "step": 1356 + }, + { + "epoch": 0.31267281105990785, + "grad_norm": 0.5581062201544951, + "learning_rate": 1.9389374763883716e-06, + "loss": 0.8325271606445312, + "step": 1357 + }, + { + "epoch": 0.31290322580645163, + "grad_norm": 0.6052904551524719, + "learning_rate": 1.938806316360502e-06, + "loss": 0.6875327825546265, + "step": 1358 + }, + { + "epoch": 0.3131336405529954, + "grad_norm": 0.5481097616797531, + "learning_rate": 1.9386750200661382e-06, + "loss": 0.8944000005722046, + "step": 1359 + }, + { + "epoch": 0.31336405529953915, + "grad_norm": 0.4954617799257055, + "learning_rate": 1.938543587524338e-06, + "loss": 0.8544747829437256, + "step": 1360 + }, + { + "epoch": 0.31359447004608293, + "grad_norm": 0.6938391730058001, + "learning_rate": 1.9384120187541788e-06, + "loss": 0.9216448068618774, + "step": 1361 + }, + { + "epoch": 0.3138248847926267, + "grad_norm": 0.6834174981389874, + "learning_rate": 1.938280313774757e-06, + "loss": 0.9239494800567627, + "step": 1362 + }, + { + "epoch": 0.3140552995391705, + "grad_norm": 0.6816430700209432, + "learning_rate": 1.9381484726051896e-06, + "loss": 0.8903297185897827, + "step": 1363 + }, + { + "epoch": 0.3142857142857143, + "grad_norm": 0.46405748253250195, + "learning_rate": 1.9380164952646137e-06, + "loss": 0.7019625306129456, + "step": 1364 + }, + { + "epoch": 0.31451612903225806, + "grad_norm": 0.6844663370999042, + "learning_rate": 1.9378843817721854e-06, + "loss": 0.8668909072875977, + "step": 1365 + }, + { + "epoch": 0.31474654377880185, + "grad_norm": 0.6454844689755169, + "learning_rate": 1.9377521321470804e-06, + "loss": 0.8124282360076904, + "step": 1366 + }, + { + "epoch": 0.31497695852534563, + "grad_norm": 0.5251795296125459, + "learning_rate": 1.937619746408495e-06, + "loss": 0.7543507814407349, + "step": 1367 + }, + { + "epoch": 0.3152073732718894, + "grad_norm": 0.6140420297919054, + "learning_rate": 1.9374872245756448e-06, + "loss": 0.8355506062507629, + "step": 1368 + }, + { + "epoch": 0.3154377880184332, + "grad_norm": 0.5898778959170111, + "learning_rate": 1.937354566667765e-06, + "loss": 0.9642060399055481, + "step": 1369 + }, + { + "epoch": 0.315668202764977, + "grad_norm": 0.5556038505388771, + "learning_rate": 1.93722177270411e-06, + "loss": 0.9044197201728821, + "step": 1370 + }, + { + "epoch": 0.31589861751152076, + "grad_norm": 0.4271939145273097, + "learning_rate": 1.937088842703956e-06, + "loss": 0.8077869415283203, + "step": 1371 + }, + { + "epoch": 0.3161290322580645, + "grad_norm": 0.6032982707731396, + "learning_rate": 1.9369557766865968e-06, + "loss": 0.8319793939590454, + "step": 1372 + }, + { + "epoch": 0.3163594470046083, + "grad_norm": 0.6304953638761566, + "learning_rate": 1.9368225746713475e-06, + "loss": 0.8233131170272827, + "step": 1373 + }, + { + "epoch": 0.31658986175115206, + "grad_norm": 0.6631214954178034, + "learning_rate": 1.936689236677541e-06, + "loss": 0.7898514270782471, + "step": 1374 + }, + { + "epoch": 0.31682027649769584, + "grad_norm": 0.6121849479571054, + "learning_rate": 1.9365557627245326e-06, + "loss": 0.9243249893188477, + "step": 1375 + }, + { + "epoch": 0.3170506912442396, + "grad_norm": 0.5673475924264754, + "learning_rate": 1.9364221528316946e-06, + "loss": 0.8153131008148193, + "step": 1376 + }, + { + "epoch": 0.3172811059907834, + "grad_norm": 0.6767166003638188, + "learning_rate": 1.936288407018421e-06, + "loss": 0.9203826189041138, + "step": 1377 + }, + { + "epoch": 0.3175115207373272, + "grad_norm": 0.6187562743125278, + "learning_rate": 1.936154525304124e-06, + "loss": 0.902605414390564, + "step": 1378 + }, + { + "epoch": 0.317741935483871, + "grad_norm": 0.6256929156852202, + "learning_rate": 1.936020507708238e-06, + "loss": 0.9504558444023132, + "step": 1379 + }, + { + "epoch": 0.31797235023041476, + "grad_norm": 0.6737932441495208, + "learning_rate": 1.9358863542502133e-06, + "loss": 0.8068373203277588, + "step": 1380 + }, + { + "epoch": 0.31820276497695854, + "grad_norm": 0.6309381884158767, + "learning_rate": 1.935752064949524e-06, + "loss": 1.00711989402771, + "step": 1381 + }, + { + "epoch": 0.3184331797235023, + "grad_norm": 0.6297604875594859, + "learning_rate": 1.935617639825661e-06, + "loss": 0.8271746039390564, + "step": 1382 + }, + { + "epoch": 0.3186635944700461, + "grad_norm": 0.658739150286029, + "learning_rate": 1.9354830788981363e-06, + "loss": 0.8478754758834839, + "step": 1383 + }, + { + "epoch": 0.31889400921658984, + "grad_norm": 0.6165108812612344, + "learning_rate": 1.935348382186481e-06, + "loss": 0.9240723252296448, + "step": 1384 + }, + { + "epoch": 0.3191244239631336, + "grad_norm": 0.6446571506984649, + "learning_rate": 1.935213549710246e-06, + "loss": 0.9275645613670349, + "step": 1385 + }, + { + "epoch": 0.3193548387096774, + "grad_norm": 0.6060948743586713, + "learning_rate": 1.9350785814890027e-06, + "loss": 0.9838275909423828, + "step": 1386 + }, + { + "epoch": 0.3195852534562212, + "grad_norm": 0.5765714017880346, + "learning_rate": 1.934943477542341e-06, + "loss": 0.9259177446365356, + "step": 1387 + }, + { + "epoch": 0.31981566820276497, + "grad_norm": 0.6051365106169855, + "learning_rate": 1.9348082378898714e-06, + "loss": 0.9252835512161255, + "step": 1388 + }, + { + "epoch": 0.32004608294930875, + "grad_norm": 0.5670107070091258, + "learning_rate": 1.9346728625512235e-06, + "loss": 0.8929460048675537, + "step": 1389 + }, + { + "epoch": 0.32027649769585254, + "grad_norm": 0.5325931239107909, + "learning_rate": 1.934537351546047e-06, + "loss": 0.8909564018249512, + "step": 1390 + }, + { + "epoch": 0.3205069124423963, + "grad_norm": 0.6295332947946368, + "learning_rate": 1.934401704894011e-06, + "loss": 0.8745983839035034, + "step": 1391 + }, + { + "epoch": 0.3207373271889401, + "grad_norm": 0.5987888846505133, + "learning_rate": 1.934265922614805e-06, + "loss": 0.8622266054153442, + "step": 1392 + }, + { + "epoch": 0.3209677419354839, + "grad_norm": 0.5587707056179402, + "learning_rate": 1.9341300047281365e-06, + "loss": 0.6796590089797974, + "step": 1393 + }, + { + "epoch": 0.32119815668202767, + "grad_norm": 0.6156409956015295, + "learning_rate": 1.9339939512537344e-06, + "loss": 0.9012733697891235, + "step": 1394 + }, + { + "epoch": 0.32142857142857145, + "grad_norm": 0.5898128750933246, + "learning_rate": 1.933857762211347e-06, + "loss": 0.9196282625198364, + "step": 1395 + }, + { + "epoch": 0.3216589861751152, + "grad_norm": 0.716981638669288, + "learning_rate": 1.9337214376207417e-06, + "loss": 0.7717788219451904, + "step": 1396 + }, + { + "epoch": 0.32188940092165896, + "grad_norm": 0.6574432706431985, + "learning_rate": 1.9335849775017057e-06, + "loss": 0.8516619801521301, + "step": 1397 + }, + { + "epoch": 0.32211981566820275, + "grad_norm": 0.6319036543472709, + "learning_rate": 1.933448381874046e-06, + "loss": 0.8089120388031006, + "step": 1398 + }, + { + "epoch": 0.32235023041474653, + "grad_norm": 0.7117992019263996, + "learning_rate": 1.9333116507575895e-06, + "loss": 0.8940925598144531, + "step": 1399 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 1.1103495530975782, + "learning_rate": 1.9331747841721827e-06, + "loss": 1.0240859985351562, + "step": 1400 + }, + { + "epoch": 0.3228110599078341, + "grad_norm": 0.6110124319562482, + "learning_rate": 1.9330377821376916e-06, + "loss": 0.742689847946167, + "step": 1401 + }, + { + "epoch": 0.3230414746543779, + "grad_norm": 0.6830153635526487, + "learning_rate": 1.932900644674001e-06, + "loss": 0.9843875169754028, + "step": 1402 + }, + { + "epoch": 0.32327188940092166, + "grad_norm": 0.6043326796009376, + "learning_rate": 1.932763371801017e-06, + "loss": 0.7289329767227173, + "step": 1403 + }, + { + "epoch": 0.32350230414746545, + "grad_norm": 0.676828647698979, + "learning_rate": 1.9326259635386644e-06, + "loss": 0.7706295251846313, + "step": 1404 + }, + { + "epoch": 0.32373271889400923, + "grad_norm": 0.526047650367784, + "learning_rate": 1.932488419906888e-06, + "loss": 0.87788325548172, + "step": 1405 + }, + { + "epoch": 0.323963133640553, + "grad_norm": 0.5971998478662486, + "learning_rate": 1.9323507409256515e-06, + "loss": 0.863690972328186, + "step": 1406 + }, + { + "epoch": 0.3241935483870968, + "grad_norm": 0.700825296208237, + "learning_rate": 1.9322129266149396e-06, + "loss": 0.9333875179290771, + "step": 1407 + }, + { + "epoch": 0.3244239631336406, + "grad_norm": 0.6642455421211582, + "learning_rate": 1.9320749769947555e-06, + "loss": 0.9170523881912231, + "step": 1408 + }, + { + "epoch": 0.3246543778801843, + "grad_norm": 0.7524235771818621, + "learning_rate": 1.931936892085122e-06, + "loss": 0.9337698221206665, + "step": 1409 + }, + { + "epoch": 0.3248847926267281, + "grad_norm": 0.5832115844679703, + "learning_rate": 1.9317986719060824e-06, + "loss": 0.8436682224273682, + "step": 1410 + }, + { + "epoch": 0.3251152073732719, + "grad_norm": 0.5569674571153642, + "learning_rate": 1.9316603164776996e-06, + "loss": 0.6652755737304688, + "step": 1411 + }, + { + "epoch": 0.32534562211981566, + "grad_norm": 0.5895248621851672, + "learning_rate": 1.931521825820055e-06, + "loss": 0.7966932654380798, + "step": 1412 + }, + { + "epoch": 0.32557603686635944, + "grad_norm": 0.7207375493085693, + "learning_rate": 1.93138319995325e-06, + "loss": 0.9791682958602905, + "step": 1413 + }, + { + "epoch": 0.3258064516129032, + "grad_norm": 0.6505701538481653, + "learning_rate": 1.931244438897407e-06, + "loss": 0.7403467297554016, + "step": 1414 + }, + { + "epoch": 0.326036866359447, + "grad_norm": 0.5881243698924259, + "learning_rate": 1.931105542672667e-06, + "loss": 0.7758523225784302, + "step": 1415 + }, + { + "epoch": 0.3262672811059908, + "grad_norm": 0.6866613437755184, + "learning_rate": 1.9309665112991894e-06, + "loss": 0.8444551229476929, + "step": 1416 + }, + { + "epoch": 0.3264976958525346, + "grad_norm": 0.6987387290897759, + "learning_rate": 1.9308273447971553e-06, + "loss": 0.8796061277389526, + "step": 1417 + }, + { + "epoch": 0.32672811059907836, + "grad_norm": 0.6235742967720523, + "learning_rate": 1.9306880431867643e-06, + "loss": 0.8386640548706055, + "step": 1418 + }, + { + "epoch": 0.32695852534562214, + "grad_norm": 0.669578268248941, + "learning_rate": 1.930548606488236e-06, + "loss": 0.9229142665863037, + "step": 1419 + }, + { + "epoch": 0.3271889400921659, + "grad_norm": 0.6307605261613933, + "learning_rate": 1.9304090347218094e-06, + "loss": 0.9938615560531616, + "step": 1420 + }, + { + "epoch": 0.32741935483870965, + "grad_norm": 0.6526253572614591, + "learning_rate": 1.930269327907743e-06, + "loss": 0.7946186661720276, + "step": 1421 + }, + { + "epoch": 0.32764976958525344, + "grad_norm": 0.6717401804422498, + "learning_rate": 1.930129486066315e-06, + "loss": 0.9456713199615479, + "step": 1422 + }, + { + "epoch": 0.3278801843317972, + "grad_norm": 0.5156577436912951, + "learning_rate": 1.929989509217824e-06, + "loss": 0.844656765460968, + "step": 1423 + }, + { + "epoch": 0.328110599078341, + "grad_norm": 0.5219846430026822, + "learning_rate": 1.9298493973825862e-06, + "loss": 0.7534950971603394, + "step": 1424 + }, + { + "epoch": 0.3283410138248848, + "grad_norm": 0.7328149629860281, + "learning_rate": 1.92970915058094e-06, + "loss": 0.934429407119751, + "step": 1425 + }, + { + "epoch": 0.32857142857142857, + "grad_norm": 0.6913075282966522, + "learning_rate": 1.929568768833241e-06, + "loss": 0.9491959810256958, + "step": 1426 + }, + { + "epoch": 0.32880184331797235, + "grad_norm": 0.6938433783461605, + "learning_rate": 1.9294282521598657e-06, + "loss": 0.9739001989364624, + "step": 1427 + }, + { + "epoch": 0.32903225806451614, + "grad_norm": 0.7260904191446513, + "learning_rate": 1.92928760058121e-06, + "loss": 0.8159639835357666, + "step": 1428 + }, + { + "epoch": 0.3292626728110599, + "grad_norm": 0.6287238530590293, + "learning_rate": 1.9291468141176894e-06, + "loss": 0.8752772808074951, + "step": 1429 + }, + { + "epoch": 0.3294930875576037, + "grad_norm": 0.6480201898337635, + "learning_rate": 1.929005892789739e-06, + "loss": 0.8543882369995117, + "step": 1430 + }, + { + "epoch": 0.3297235023041475, + "grad_norm": 0.7294679881265868, + "learning_rate": 1.928864836617813e-06, + "loss": 0.8837493658065796, + "step": 1431 + }, + { + "epoch": 0.32995391705069127, + "grad_norm": 0.7638461032292205, + "learning_rate": 1.9287236456223854e-06, + "loss": 0.9320387840270996, + "step": 1432 + }, + { + "epoch": 0.330184331797235, + "grad_norm": 0.5042343025936808, + "learning_rate": 1.92858231982395e-06, + "loss": 0.8272919654846191, + "step": 1433 + }, + { + "epoch": 0.3304147465437788, + "grad_norm": 0.6965906133224807, + "learning_rate": 1.9284408592430207e-06, + "loss": 0.9415527582168579, + "step": 1434 + }, + { + "epoch": 0.33064516129032256, + "grad_norm": 0.7215035047368656, + "learning_rate": 1.928299263900129e-06, + "loss": 0.91558837890625, + "step": 1435 + }, + { + "epoch": 0.33087557603686635, + "grad_norm": 0.5956823050741555, + "learning_rate": 1.9281575338158287e-06, + "loss": 0.9333036541938782, + "step": 1436 + }, + { + "epoch": 0.33110599078341013, + "grad_norm": 0.6051938214219355, + "learning_rate": 1.928015669010691e-06, + "loss": 0.7823847532272339, + "step": 1437 + }, + { + "epoch": 0.3313364055299539, + "grad_norm": 0.7462826372754077, + "learning_rate": 1.9278736695053075e-06, + "loss": 0.8436610102653503, + "step": 1438 + }, + { + "epoch": 0.3315668202764977, + "grad_norm": 0.7254037554281902, + "learning_rate": 1.927731535320289e-06, + "loss": 0.8658925890922546, + "step": 1439 + }, + { + "epoch": 0.3317972350230415, + "grad_norm": 0.6229809292573231, + "learning_rate": 1.9275892664762665e-06, + "loss": 0.8510075807571411, + "step": 1440 + }, + { + "epoch": 0.33202764976958526, + "grad_norm": 0.6349856559462502, + "learning_rate": 1.9274468629938897e-06, + "loss": 0.8002004623413086, + "step": 1441 + }, + { + "epoch": 0.33225806451612905, + "grad_norm": 0.6766111098462606, + "learning_rate": 1.9273043248938287e-06, + "loss": 1.0030219554901123, + "step": 1442 + }, + { + "epoch": 0.33248847926267283, + "grad_norm": 0.6313930076569801, + "learning_rate": 1.9271616521967723e-06, + "loss": 0.8415981531143188, + "step": 1443 + }, + { + "epoch": 0.3327188940092166, + "grad_norm": 0.5599899399531522, + "learning_rate": 1.9270188449234295e-06, + "loss": 0.7704254388809204, + "step": 1444 + }, + { + "epoch": 0.33294930875576034, + "grad_norm": 0.5742869826690059, + "learning_rate": 1.9268759030945294e-06, + "loss": 0.8350723385810852, + "step": 1445 + }, + { + "epoch": 0.3331797235023041, + "grad_norm": 0.7177949171518314, + "learning_rate": 1.926732826730818e-06, + "loss": 0.8729690313339233, + "step": 1446 + }, + { + "epoch": 0.3334101382488479, + "grad_norm": 0.64691268148931, + "learning_rate": 1.926589615853064e-06, + "loss": 0.7758746147155762, + "step": 1447 + }, + { + "epoch": 0.3336405529953917, + "grad_norm": 0.6330035443782508, + "learning_rate": 1.926446270482054e-06, + "loss": 0.7895134687423706, + "step": 1448 + }, + { + "epoch": 0.3338709677419355, + "grad_norm": 0.5710370240153678, + "learning_rate": 1.9263027906385936e-06, + "loss": 1.0239053964614868, + "step": 1449 + }, + { + "epoch": 0.33410138248847926, + "grad_norm": 0.6423159813237256, + "learning_rate": 1.9261591763435104e-06, + "loss": 0.9294595122337341, + "step": 1450 + }, + { + "epoch": 0.33433179723502304, + "grad_norm": 0.690830605411519, + "learning_rate": 1.9260154276176484e-06, + "loss": 0.9786148071289062, + "step": 1451 + }, + { + "epoch": 0.3345622119815668, + "grad_norm": 0.5115027993477321, + "learning_rate": 1.925871544481873e-06, + "loss": 0.8513587117195129, + "step": 1452 + }, + { + "epoch": 0.3347926267281106, + "grad_norm": 0.4974492616751121, + "learning_rate": 1.9257275269570686e-06, + "loss": 0.7737371921539307, + "step": 1453 + }, + { + "epoch": 0.3350230414746544, + "grad_norm": 0.6186615203368176, + "learning_rate": 1.9255833750641392e-06, + "loss": 0.8567382097244263, + "step": 1454 + }, + { + "epoch": 0.3352534562211982, + "grad_norm": 0.5498745898568592, + "learning_rate": 1.9254390888240078e-06, + "loss": 0.893741250038147, + "step": 1455 + }, + { + "epoch": 0.33548387096774196, + "grad_norm": 0.5996544133152318, + "learning_rate": 1.9252946682576184e-06, + "loss": 0.9558119773864746, + "step": 1456 + }, + { + "epoch": 0.3357142857142857, + "grad_norm": 0.6629164295929078, + "learning_rate": 1.9251501133859323e-06, + "loss": 0.7055593729019165, + "step": 1457 + }, + { + "epoch": 0.33594470046082947, + "grad_norm": 0.652213418545905, + "learning_rate": 1.9250054242299326e-06, + "loss": 0.8409907817840576, + "step": 1458 + }, + { + "epoch": 0.33617511520737325, + "grad_norm": 0.5648924790833157, + "learning_rate": 1.9248606008106196e-06, + "loss": 0.9459772109985352, + "step": 1459 + }, + { + "epoch": 0.33640552995391704, + "grad_norm": 0.6285611694534835, + "learning_rate": 1.924715643149015e-06, + "loss": 0.7848879098892212, + "step": 1460 + }, + { + "epoch": 0.3366359447004608, + "grad_norm": 0.8030718131506138, + "learning_rate": 1.924570551266159e-06, + "loss": 1.0365980863571167, + "step": 1461 + }, + { + "epoch": 0.3368663594470046, + "grad_norm": 0.6014174038703485, + "learning_rate": 1.924425325183111e-06, + "loss": 0.7331318855285645, + "step": 1462 + }, + { + "epoch": 0.3370967741935484, + "grad_norm": 0.6427865459032713, + "learning_rate": 1.9242799649209515e-06, + "loss": 0.8536237478256226, + "step": 1463 + }, + { + "epoch": 0.33732718894009217, + "grad_norm": 0.6525839289073214, + "learning_rate": 1.9241344705007784e-06, + "loss": 0.9296326637268066, + "step": 1464 + }, + { + "epoch": 0.33755760368663595, + "grad_norm": 0.887947392639257, + "learning_rate": 1.92398884194371e-06, + "loss": 0.9084932804107666, + "step": 1465 + }, + { + "epoch": 0.33778801843317974, + "grad_norm": 0.5270165853452017, + "learning_rate": 1.9238430792708847e-06, + "loss": 0.7426833510398865, + "step": 1466 + }, + { + "epoch": 0.3380184331797235, + "grad_norm": 0.5410658114261949, + "learning_rate": 1.9236971825034595e-06, + "loss": 0.7655431032180786, + "step": 1467 + }, + { + "epoch": 0.3382488479262673, + "grad_norm": 0.8331011387344854, + "learning_rate": 1.923551151662611e-06, + "loss": 0.9463646411895752, + "step": 1468 + }, + { + "epoch": 0.3384792626728111, + "grad_norm": 0.5486811314665706, + "learning_rate": 1.9234049867695355e-06, + "loss": 0.75661301612854, + "step": 1469 + }, + { + "epoch": 0.3387096774193548, + "grad_norm": 0.6386489226368193, + "learning_rate": 1.9232586878454486e-06, + "loss": 0.7411723136901855, + "step": 1470 + }, + { + "epoch": 0.3389400921658986, + "grad_norm": 0.6921074075590697, + "learning_rate": 1.9231122549115854e-06, + "loss": 0.9537360072135925, + "step": 1471 + }, + { + "epoch": 0.3391705069124424, + "grad_norm": 0.6895160542670777, + "learning_rate": 1.9229656879892004e-06, + "loss": 0.9527197480201721, + "step": 1472 + }, + { + "epoch": 0.33940092165898617, + "grad_norm": 0.7025720730409266, + "learning_rate": 1.9228189870995674e-06, + "loss": 0.9083822965621948, + "step": 1473 + }, + { + "epoch": 0.33963133640552995, + "grad_norm": 0.5301970222083436, + "learning_rate": 1.9226721522639804e-06, + "loss": 0.8546823263168335, + "step": 1474 + }, + { + "epoch": 0.33986175115207373, + "grad_norm": 0.6709689097402769, + "learning_rate": 1.922525183503752e-06, + "loss": 0.7429832816123962, + "step": 1475 + }, + { + "epoch": 0.3400921658986175, + "grad_norm": 0.62032231336291, + "learning_rate": 1.922378080840214e-06, + "loss": 0.8805499076843262, + "step": 1476 + }, + { + "epoch": 0.3403225806451613, + "grad_norm": 0.681736765273056, + "learning_rate": 1.9222308442947193e-06, + "loss": 1.0177074670791626, + "step": 1477 + }, + { + "epoch": 0.3405529953917051, + "grad_norm": 0.5202393927717802, + "learning_rate": 1.922083473888638e-06, + "loss": 0.778317391872406, + "step": 1478 + }, + { + "epoch": 0.34078341013824887, + "grad_norm": 0.5628134051805, + "learning_rate": 1.921935969643361e-06, + "loss": 0.8461896181106567, + "step": 1479 + }, + { + "epoch": 0.34101382488479265, + "grad_norm": 0.5553667327802273, + "learning_rate": 1.921788331580299e-06, + "loss": 0.8028895258903503, + "step": 1480 + }, + { + "epoch": 0.34124423963133643, + "grad_norm": 0.5368047903298083, + "learning_rate": 1.9216405597208803e-06, + "loss": 0.9071121215820312, + "step": 1481 + }, + { + "epoch": 0.34147465437788016, + "grad_norm": 0.6427007304701287, + "learning_rate": 1.921492654086555e-06, + "loss": 0.7715062499046326, + "step": 1482 + }, + { + "epoch": 0.34170506912442394, + "grad_norm": 0.5552851307839923, + "learning_rate": 1.9213446146987907e-06, + "loss": 0.8446664810180664, + "step": 1483 + }, + { + "epoch": 0.3419354838709677, + "grad_norm": 0.712846002939772, + "learning_rate": 1.9211964415790754e-06, + "loss": 0.9835283756256104, + "step": 1484 + }, + { + "epoch": 0.3421658986175115, + "grad_norm": 0.8210412746012221, + "learning_rate": 1.921048134748916e-06, + "loss": 1.0630817413330078, + "step": 1485 + }, + { + "epoch": 0.3423963133640553, + "grad_norm": 0.6748930312757173, + "learning_rate": 1.920899694229839e-06, + "loss": 0.8514837622642517, + "step": 1486 + }, + { + "epoch": 0.3426267281105991, + "grad_norm": 0.6222560657794074, + "learning_rate": 1.920751120043391e-06, + "loss": 0.7302432060241699, + "step": 1487 + }, + { + "epoch": 0.34285714285714286, + "grad_norm": 0.7079869651359869, + "learning_rate": 1.920602412211136e-06, + "loss": 0.778337836265564, + "step": 1488 + }, + { + "epoch": 0.34308755760368664, + "grad_norm": 0.6890026561089317, + "learning_rate": 1.92045357075466e-06, + "loss": 0.815348207950592, + "step": 1489 + }, + { + "epoch": 0.3433179723502304, + "grad_norm": 0.5476065495891982, + "learning_rate": 1.920304595695567e-06, + "loss": 0.7844003438949585, + "step": 1490 + }, + { + "epoch": 0.3435483870967742, + "grad_norm": 0.6758218109549144, + "learning_rate": 1.92015548705548e-06, + "loss": 0.9513435363769531, + "step": 1491 + }, + { + "epoch": 0.343778801843318, + "grad_norm": 0.6450445262879821, + "learning_rate": 1.9200062448560424e-06, + "loss": 0.7506752610206604, + "step": 1492 + }, + { + "epoch": 0.3440092165898618, + "grad_norm": 0.6233205865485715, + "learning_rate": 1.919856869118916e-06, + "loss": 0.739554762840271, + "step": 1493 + }, + { + "epoch": 0.3442396313364055, + "grad_norm": 0.7436551378630792, + "learning_rate": 1.9197073598657826e-06, + "loss": 0.8167033791542053, + "step": 1494 + }, + { + "epoch": 0.3444700460829493, + "grad_norm": 0.6904439986569212, + "learning_rate": 1.919557717118344e-06, + "loss": 0.9308677911758423, + "step": 1495 + }, + { + "epoch": 0.34470046082949307, + "grad_norm": 0.6340340245140523, + "learning_rate": 1.9194079408983197e-06, + "loss": 0.8601467609405518, + "step": 1496 + }, + { + "epoch": 0.34493087557603686, + "grad_norm": 0.5645119744435318, + "learning_rate": 1.91925803122745e-06, + "loss": 0.8062653541564941, + "step": 1497 + }, + { + "epoch": 0.34516129032258064, + "grad_norm": 0.6267130901098985, + "learning_rate": 1.9191079881274943e-06, + "loss": 0.8910555839538574, + "step": 1498 + }, + { + "epoch": 0.3453917050691244, + "grad_norm": 0.6398235864437706, + "learning_rate": 1.9189578116202307e-06, + "loss": 0.8604668378829956, + "step": 1499 + }, + { + "epoch": 0.3456221198156682, + "grad_norm": 0.660935387898433, + "learning_rate": 1.918807501727457e-06, + "loss": 0.7255126237869263, + "step": 1500 + }, + { + "epoch": 0.345852534562212, + "grad_norm": 0.6873891579533423, + "learning_rate": 1.9186570584709912e-06, + "loss": 0.998108983039856, + "step": 1501 + }, + { + "epoch": 0.34608294930875577, + "grad_norm": 0.6220147185177797, + "learning_rate": 1.918506481872669e-06, + "loss": 0.7660422325134277, + "step": 1502 + }, + { + "epoch": 0.34631336405529956, + "grad_norm": 0.6579892645247903, + "learning_rate": 1.9183557719543472e-06, + "loss": 0.868739902973175, + "step": 1503 + }, + { + "epoch": 0.34654377880184334, + "grad_norm": 0.5789973673480234, + "learning_rate": 1.918204928737901e-06, + "loss": 0.6630350351333618, + "step": 1504 + }, + { + "epoch": 0.3467741935483871, + "grad_norm": 0.5444610824332694, + "learning_rate": 1.9180539522452247e-06, + "loss": 0.8651586771011353, + "step": 1505 + }, + { + "epoch": 0.34700460829493085, + "grad_norm": 0.5927111235913876, + "learning_rate": 1.9179028424982326e-06, + "loss": 0.8584417700767517, + "step": 1506 + }, + { + "epoch": 0.34723502304147463, + "grad_norm": 0.5575547611441275, + "learning_rate": 1.917751599518858e-06, + "loss": 0.7793893814086914, + "step": 1507 + }, + { + "epoch": 0.3474654377880184, + "grad_norm": 0.768634414143097, + "learning_rate": 1.9176002233290542e-06, + "loss": 0.8499815464019775, + "step": 1508 + }, + { + "epoch": 0.3476958525345622, + "grad_norm": 0.7795460044280101, + "learning_rate": 1.917448713950792e-06, + "loss": 0.7914199829101562, + "step": 1509 + }, + { + "epoch": 0.347926267281106, + "grad_norm": 0.8510793838671106, + "learning_rate": 1.9172970714060637e-06, + "loss": 0.942331850528717, + "step": 1510 + }, + { + "epoch": 0.34815668202764977, + "grad_norm": 0.621963787262809, + "learning_rate": 1.9171452957168803e-06, + "loss": 0.7780032157897949, + "step": 1511 + }, + { + "epoch": 0.34838709677419355, + "grad_norm": 0.6399045325995384, + "learning_rate": 1.916993386905271e-06, + "loss": 0.8544708490371704, + "step": 1512 + }, + { + "epoch": 0.34861751152073733, + "grad_norm": 0.6890752127070114, + "learning_rate": 1.9168413449932855e-06, + "loss": 0.798173725605011, + "step": 1513 + }, + { + "epoch": 0.3488479262672811, + "grad_norm": 0.7396810139453504, + "learning_rate": 1.9166891700029922e-06, + "loss": 0.9426852464675903, + "step": 1514 + }, + { + "epoch": 0.3490783410138249, + "grad_norm": 0.7455227520654529, + "learning_rate": 1.91653686195648e-06, + "loss": 0.922240138053894, + "step": 1515 + }, + { + "epoch": 0.3493087557603687, + "grad_norm": 0.630161091555718, + "learning_rate": 1.9163844208758556e-06, + "loss": 0.7997978925704956, + "step": 1516 + }, + { + "epoch": 0.34953917050691247, + "grad_norm": 0.7560374253096135, + "learning_rate": 1.9162318467832455e-06, + "loss": 1.0597525835037231, + "step": 1517 + }, + { + "epoch": 0.3497695852534562, + "grad_norm": 0.6669142658812499, + "learning_rate": 1.9160791397007957e-06, + "loss": 0.8211681842803955, + "step": 1518 + }, + { + "epoch": 0.35, + "grad_norm": 0.6134468456903489, + "learning_rate": 1.9159262996506716e-06, + "loss": 0.8078022003173828, + "step": 1519 + }, + { + "epoch": 0.35023041474654376, + "grad_norm": 0.8800559709758627, + "learning_rate": 1.915773326655057e-06, + "loss": 0.9449256658554077, + "step": 1520 + }, + { + "epoch": 0.35046082949308754, + "grad_norm": 0.6806561068219223, + "learning_rate": 1.915620220736157e-06, + "loss": 0.8744012117385864, + "step": 1521 + }, + { + "epoch": 0.35069124423963133, + "grad_norm": 0.501693303726274, + "learning_rate": 1.9154669819161946e-06, + "loss": 0.9503095746040344, + "step": 1522 + }, + { + "epoch": 0.3509216589861751, + "grad_norm": 0.7422922368497302, + "learning_rate": 1.9153136102174106e-06, + "loss": 1.055432915687561, + "step": 1523 + }, + { + "epoch": 0.3511520737327189, + "grad_norm": 0.7420134076461076, + "learning_rate": 1.9151601056620684e-06, + "loss": 0.8540226221084595, + "step": 1524 + }, + { + "epoch": 0.3513824884792627, + "grad_norm": 0.6432500784024293, + "learning_rate": 1.915006468272448e-06, + "loss": 0.8846266865730286, + "step": 1525 + }, + { + "epoch": 0.35161290322580646, + "grad_norm": 0.6065038491164693, + "learning_rate": 1.9148526980708507e-06, + "loss": 0.8941656947135925, + "step": 1526 + }, + { + "epoch": 0.35184331797235024, + "grad_norm": 0.657637251938276, + "learning_rate": 1.914698795079595e-06, + "loss": 0.868419885635376, + "step": 1527 + }, + { + "epoch": 0.35207373271889403, + "grad_norm": 0.6471997072963731, + "learning_rate": 1.91454475932102e-06, + "loss": 0.7375580072402954, + "step": 1528 + }, + { + "epoch": 0.3523041474654378, + "grad_norm": 0.5813494020686044, + "learning_rate": 1.9143905908174844e-06, + "loss": 0.9415492415428162, + "step": 1529 + }, + { + "epoch": 0.35253456221198154, + "grad_norm": 0.5845641741459107, + "learning_rate": 1.9142362895913646e-06, + "loss": 0.8395911455154419, + "step": 1530 + }, + { + "epoch": 0.3527649769585253, + "grad_norm": 0.6214793611789142, + "learning_rate": 1.914081855665057e-06, + "loss": 0.831234335899353, + "step": 1531 + }, + { + "epoch": 0.3529953917050691, + "grad_norm": 0.6337865377576076, + "learning_rate": 1.9139272890609794e-06, + "loss": 0.8975566029548645, + "step": 1532 + }, + { + "epoch": 0.3532258064516129, + "grad_norm": 0.629586080319263, + "learning_rate": 1.913772589801565e-06, + "loss": 0.8134264945983887, + "step": 1533 + }, + { + "epoch": 0.3534562211981567, + "grad_norm": 0.6728325426784268, + "learning_rate": 1.913617757909269e-06, + "loss": 0.9507275819778442, + "step": 1534 + }, + { + "epoch": 0.35368663594470046, + "grad_norm": 0.6431752162471284, + "learning_rate": 1.913462793406565e-06, + "loss": 0.8839038610458374, + "step": 1535 + }, + { + "epoch": 0.35391705069124424, + "grad_norm": 0.5543997844984022, + "learning_rate": 1.9133076963159453e-06, + "loss": 0.8708392381668091, + "step": 1536 + }, + { + "epoch": 0.354147465437788, + "grad_norm": 0.6062385114401656, + "learning_rate": 1.913152466659923e-06, + "loss": 0.7609391212463379, + "step": 1537 + }, + { + "epoch": 0.3543778801843318, + "grad_norm": 0.7180303128257083, + "learning_rate": 1.912997104461029e-06, + "loss": 0.9231283664703369, + "step": 1538 + }, + { + "epoch": 0.3546082949308756, + "grad_norm": 0.6890910539107805, + "learning_rate": 1.912841609741814e-06, + "loss": 1.0297726392745972, + "step": 1539 + }, + { + "epoch": 0.3548387096774194, + "grad_norm": 0.75971130189085, + "learning_rate": 1.9126859825248475e-06, + "loss": 0.8798987865447998, + "step": 1540 + }, + { + "epoch": 0.35506912442396316, + "grad_norm": 0.7030378763019209, + "learning_rate": 1.912530222832719e-06, + "loss": 0.9104069471359253, + "step": 1541 + }, + { + "epoch": 0.35529953917050694, + "grad_norm": 0.6534729730017157, + "learning_rate": 1.9123743306880368e-06, + "loss": 0.7618073225021362, + "step": 1542 + }, + { + "epoch": 0.35552995391705067, + "grad_norm": 0.7461748863693719, + "learning_rate": 1.912218306113428e-06, + "loss": 0.8397510051727295, + "step": 1543 + }, + { + "epoch": 0.35576036866359445, + "grad_norm": 0.7060377086024656, + "learning_rate": 1.91206214913154e-06, + "loss": 0.9884299039840698, + "step": 1544 + }, + { + "epoch": 0.35599078341013823, + "grad_norm": 0.9576081524625122, + "learning_rate": 1.9119058597650385e-06, + "loss": 0.9878349304199219, + "step": 1545 + }, + { + "epoch": 0.356221198156682, + "grad_norm": 0.6493274093007226, + "learning_rate": 1.9117494380366086e-06, + "loss": 0.8790488243103027, + "step": 1546 + }, + { + "epoch": 0.3564516129032258, + "grad_norm": 0.5310131973918355, + "learning_rate": 1.9115928839689546e-06, + "loss": 0.7390745878219604, + "step": 1547 + }, + { + "epoch": 0.3566820276497696, + "grad_norm": 0.6882029258971281, + "learning_rate": 1.9114361975848004e-06, + "loss": 0.7354288101196289, + "step": 1548 + }, + { + "epoch": 0.35691244239631337, + "grad_norm": 0.7667535594605746, + "learning_rate": 1.911279378906889e-06, + "loss": 0.9234673976898193, + "step": 1549 + }, + { + "epoch": 0.35714285714285715, + "grad_norm": 0.6115013610277281, + "learning_rate": 1.911122427957982e-06, + "loss": 0.8913710117340088, + "step": 1550 + }, + { + "epoch": 0.35737327188940093, + "grad_norm": 0.7050561523779678, + "learning_rate": 1.9109653447608605e-06, + "loss": 0.754358172416687, + "step": 1551 + }, + { + "epoch": 0.3576036866359447, + "grad_norm": 0.784312775933048, + "learning_rate": 1.910808129338325e-06, + "loss": 0.7361906170845032, + "step": 1552 + }, + { + "epoch": 0.3578341013824885, + "grad_norm": 0.7799572736490341, + "learning_rate": 1.9106507817131957e-06, + "loss": 0.8167279362678528, + "step": 1553 + }, + { + "epoch": 0.3580645161290323, + "grad_norm": 0.5335250967831215, + "learning_rate": 1.910493301908311e-06, + "loss": 0.7504739761352539, + "step": 1554 + }, + { + "epoch": 0.358294930875576, + "grad_norm": 0.7032319483863736, + "learning_rate": 1.9103356899465287e-06, + "loss": 0.8452355861663818, + "step": 1555 + }, + { + "epoch": 0.3585253456221198, + "grad_norm": 0.6126249946093243, + "learning_rate": 1.9101779458507263e-06, + "loss": 0.891547679901123, + "step": 1556 + }, + { + "epoch": 0.3587557603686636, + "grad_norm": 0.6935978783962933, + "learning_rate": 1.9100200696438e-06, + "loss": 0.8132680654525757, + "step": 1557 + }, + { + "epoch": 0.35898617511520736, + "grad_norm": 0.6519674133121284, + "learning_rate": 1.9098620613486646e-06, + "loss": 0.799482524394989, + "step": 1558 + }, + { + "epoch": 0.35921658986175115, + "grad_norm": 0.5904521460015955, + "learning_rate": 1.909703920988256e-06, + "loss": 0.8490267992019653, + "step": 1559 + }, + { + "epoch": 0.35944700460829493, + "grad_norm": 0.6819976276562522, + "learning_rate": 1.9095456485855277e-06, + "loss": 0.8608428239822388, + "step": 1560 + }, + { + "epoch": 0.3596774193548387, + "grad_norm": 0.710056379748393, + "learning_rate": 1.9093872441634526e-06, + "loss": 0.8460499048233032, + "step": 1561 + }, + { + "epoch": 0.3599078341013825, + "grad_norm": 0.7727130217690178, + "learning_rate": 1.9092287077450226e-06, + "loss": 0.9268433451652527, + "step": 1562 + }, + { + "epoch": 0.3601382488479263, + "grad_norm": 0.612809776724531, + "learning_rate": 1.90907003935325e-06, + "loss": 0.7354154586791992, + "step": 1563 + }, + { + "epoch": 0.36036866359447006, + "grad_norm": 0.6941943523357101, + "learning_rate": 1.9089112390111637e-06, + "loss": 0.87982177734375, + "step": 1564 + }, + { + "epoch": 0.36059907834101385, + "grad_norm": 0.7092001355075633, + "learning_rate": 1.9087523067418148e-06, + "loss": 0.994953453540802, + "step": 1565 + }, + { + "epoch": 0.36082949308755763, + "grad_norm": 0.7240785511234525, + "learning_rate": 1.9085932425682715e-06, + "loss": 0.8623256087303162, + "step": 1566 + }, + { + "epoch": 0.36105990783410136, + "grad_norm": 0.7577571727617612, + "learning_rate": 1.908434046513622e-06, + "loss": 0.8752846717834473, + "step": 1567 + }, + { + "epoch": 0.36129032258064514, + "grad_norm": 0.7538020694732109, + "learning_rate": 1.908274718600973e-06, + "loss": 0.9002033472061157, + "step": 1568 + }, + { + "epoch": 0.3615207373271889, + "grad_norm": 0.6751938160957709, + "learning_rate": 1.908115258853451e-06, + "loss": 0.7290444374084473, + "step": 1569 + }, + { + "epoch": 0.3617511520737327, + "grad_norm": 0.5739449847646289, + "learning_rate": 1.9079556672942016e-06, + "loss": 0.6833889484405518, + "step": 1570 + }, + { + "epoch": 0.3619815668202765, + "grad_norm": 0.7271514059808825, + "learning_rate": 1.907795943946389e-06, + "loss": 1.0033842325210571, + "step": 1571 + }, + { + "epoch": 0.3622119815668203, + "grad_norm": 0.7261786878454322, + "learning_rate": 1.907636088833197e-06, + "loss": 0.9590950012207031, + "step": 1572 + }, + { + "epoch": 0.36244239631336406, + "grad_norm": 0.6796147019608265, + "learning_rate": 1.907476101977828e-06, + "loss": 0.8812122344970703, + "step": 1573 + }, + { + "epoch": 0.36267281105990784, + "grad_norm": 0.5509770826635522, + "learning_rate": 1.9073159834035045e-06, + "loss": 0.7549433708190918, + "step": 1574 + }, + { + "epoch": 0.3629032258064516, + "grad_norm": 0.8344983468044503, + "learning_rate": 1.9071557331334667e-06, + "loss": 0.9235562086105347, + "step": 1575 + }, + { + "epoch": 0.3631336405529954, + "grad_norm": 0.6317903590715543, + "learning_rate": 1.9069953511909755e-06, + "loss": 0.8468542098999023, + "step": 1576 + }, + { + "epoch": 0.3633640552995392, + "grad_norm": 0.5574642699953357, + "learning_rate": 1.9068348375993096e-06, + "loss": 0.8804000616073608, + "step": 1577 + }, + { + "epoch": 0.363594470046083, + "grad_norm": 0.5912501411899118, + "learning_rate": 1.9066741923817676e-06, + "loss": 0.762598991394043, + "step": 1578 + }, + { + "epoch": 0.3638248847926267, + "grad_norm": 0.7706966706442087, + "learning_rate": 1.9065134155616666e-06, + "loss": 0.8791940212249756, + "step": 1579 + }, + { + "epoch": 0.3640552995391705, + "grad_norm": 0.7168527524200441, + "learning_rate": 1.9063525071623439e-06, + "loss": 0.7041842937469482, + "step": 1580 + }, + { + "epoch": 0.36428571428571427, + "grad_norm": 0.6160916310238944, + "learning_rate": 1.9061914672071543e-06, + "loss": 0.9526468515396118, + "step": 1581 + }, + { + "epoch": 0.36451612903225805, + "grad_norm": 0.7118890640067297, + "learning_rate": 1.906030295719473e-06, + "loss": 0.9388316869735718, + "step": 1582 + }, + { + "epoch": 0.36474654377880183, + "grad_norm": 0.6899284739234433, + "learning_rate": 1.9058689927226936e-06, + "loss": 0.7295777797698975, + "step": 1583 + }, + { + "epoch": 0.3649769585253456, + "grad_norm": 0.773766722090894, + "learning_rate": 1.905707558240229e-06, + "loss": 0.7540932297706604, + "step": 1584 + }, + { + "epoch": 0.3652073732718894, + "grad_norm": 0.7012558071518832, + "learning_rate": 1.9055459922955118e-06, + "loss": 0.9457792639732361, + "step": 1585 + }, + { + "epoch": 0.3654377880184332, + "grad_norm": 0.8248538436303866, + "learning_rate": 1.9053842949119923e-06, + "loss": 0.9121883511543274, + "step": 1586 + }, + { + "epoch": 0.36566820276497697, + "grad_norm": 0.7283384308967912, + "learning_rate": 1.905222466113141e-06, + "loss": 0.8140746355056763, + "step": 1587 + }, + { + "epoch": 0.36589861751152075, + "grad_norm": 0.6419705545105435, + "learning_rate": 1.905060505922447e-06, + "loss": 0.7403484582901001, + "step": 1588 + }, + { + "epoch": 0.36612903225806454, + "grad_norm": 0.581047347336086, + "learning_rate": 1.9048984143634188e-06, + "loss": 0.9040734171867371, + "step": 1589 + }, + { + "epoch": 0.3663594470046083, + "grad_norm": 0.8763582049227886, + "learning_rate": 1.9047361914595834e-06, + "loss": 0.9060958623886108, + "step": 1590 + }, + { + "epoch": 0.36658986175115205, + "grad_norm": 0.563240407907546, + "learning_rate": 1.904573837234488e-06, + "loss": 0.6925936937332153, + "step": 1591 + }, + { + "epoch": 0.36682027649769583, + "grad_norm": 0.6465995527416484, + "learning_rate": 1.9044113517116973e-06, + "loss": 0.8120197057723999, + "step": 1592 + }, + { + "epoch": 0.3670506912442396, + "grad_norm": 0.6544256373051048, + "learning_rate": 1.9042487349147965e-06, + "loss": 0.796414852142334, + "step": 1593 + }, + { + "epoch": 0.3672811059907834, + "grad_norm": 0.5916998574283423, + "learning_rate": 1.9040859868673885e-06, + "loss": 0.8390822410583496, + "step": 1594 + }, + { + "epoch": 0.3675115207373272, + "grad_norm": 0.6567403008386238, + "learning_rate": 1.9039231075930967e-06, + "loss": 0.990093469619751, + "step": 1595 + }, + { + "epoch": 0.36774193548387096, + "grad_norm": 0.733917290012865, + "learning_rate": 1.9037600971155623e-06, + "loss": 0.8548597097396851, + "step": 1596 + }, + { + "epoch": 0.36797235023041475, + "grad_norm": 0.5429475903618856, + "learning_rate": 1.9035969554584464e-06, + "loss": 0.687299370765686, + "step": 1597 + }, + { + "epoch": 0.36820276497695853, + "grad_norm": 0.9276548262086025, + "learning_rate": 1.9034336826454282e-06, + "loss": 0.7857942581176758, + "step": 1598 + }, + { + "epoch": 0.3684331797235023, + "grad_norm": 0.7345227244712206, + "learning_rate": 1.9032702787002072e-06, + "loss": 0.8836538195610046, + "step": 1599 + }, + { + "epoch": 0.3686635944700461, + "grad_norm": 0.723858907192251, + "learning_rate": 1.9031067436465011e-06, + "loss": 0.8132715225219727, + "step": 1600 + }, + { + "epoch": 0.3688940092165899, + "grad_norm": 0.6649285274594987, + "learning_rate": 1.9029430775080467e-06, + "loss": 0.7632347345352173, + "step": 1601 + }, + { + "epoch": 0.36912442396313366, + "grad_norm": 0.6319858893374919, + "learning_rate": 1.9027792803086e-06, + "loss": 0.8616297841072083, + "step": 1602 + }, + { + "epoch": 0.36935483870967745, + "grad_norm": 0.6067565637769744, + "learning_rate": 1.9026153520719358e-06, + "loss": 0.8418172597885132, + "step": 1603 + }, + { + "epoch": 0.3695852534562212, + "grad_norm": 0.7094320350542224, + "learning_rate": 1.902451292821848e-06, + "loss": 0.7253717184066772, + "step": 1604 + }, + { + "epoch": 0.36981566820276496, + "grad_norm": 0.8059000016280097, + "learning_rate": 1.90228710258215e-06, + "loss": 0.9746035933494568, + "step": 1605 + }, + { + "epoch": 0.37004608294930874, + "grad_norm": 0.5259402340057983, + "learning_rate": 1.9021227813766733e-06, + "loss": 0.7722853422164917, + "step": 1606 + }, + { + "epoch": 0.3702764976958525, + "grad_norm": 0.6925264238716391, + "learning_rate": 1.9019583292292693e-06, + "loss": 0.8278614282608032, + "step": 1607 + }, + { + "epoch": 0.3705069124423963, + "grad_norm": 0.6439238935194896, + "learning_rate": 1.9017937461638078e-06, + "loss": 0.7433085441589355, + "step": 1608 + }, + { + "epoch": 0.3707373271889401, + "grad_norm": 0.5505689424398915, + "learning_rate": 1.901629032204178e-06, + "loss": 0.9194153547286987, + "step": 1609 + }, + { + "epoch": 0.3709677419354839, + "grad_norm": 0.5866951472740422, + "learning_rate": 1.9014641873742877e-06, + "loss": 0.8502616882324219, + "step": 1610 + }, + { + "epoch": 0.37119815668202766, + "grad_norm": 0.6242266615517361, + "learning_rate": 1.9012992116980637e-06, + "loss": 0.8494570255279541, + "step": 1611 + }, + { + "epoch": 0.37142857142857144, + "grad_norm": 0.7369836132356214, + "learning_rate": 1.9011341051994526e-06, + "loss": 0.8567800521850586, + "step": 1612 + }, + { + "epoch": 0.3716589861751152, + "grad_norm": 0.6246604791910833, + "learning_rate": 1.9009688679024189e-06, + "loss": 0.7739682197570801, + "step": 1613 + }, + { + "epoch": 0.371889400921659, + "grad_norm": 0.754158311495332, + "learning_rate": 1.900803499830947e-06, + "loss": 0.8548814058303833, + "step": 1614 + }, + { + "epoch": 0.3721198156682028, + "grad_norm": 0.5813822362984273, + "learning_rate": 1.9006380010090395e-06, + "loss": 0.7444359064102173, + "step": 1615 + }, + { + "epoch": 0.3723502304147465, + "grad_norm": 1.02732235167255, + "learning_rate": 1.9004723714607183e-06, + "loss": 1.0483827590942383, + "step": 1616 + }, + { + "epoch": 0.3725806451612903, + "grad_norm": 0.7020606936102383, + "learning_rate": 1.9003066112100248e-06, + "loss": 0.7734435200691223, + "step": 1617 + }, + { + "epoch": 0.3728110599078341, + "grad_norm": 0.7388837596699729, + "learning_rate": 1.9001407202810181e-06, + "loss": 0.856806755065918, + "step": 1618 + }, + { + "epoch": 0.37304147465437787, + "grad_norm": 0.6630252498689021, + "learning_rate": 1.8999746986977776e-06, + "loss": 0.8708832263946533, + "step": 1619 + }, + { + "epoch": 0.37327188940092165, + "grad_norm": 0.7833548721469644, + "learning_rate": 1.899808546484401e-06, + "loss": 0.9295653104782104, + "step": 1620 + }, + { + "epoch": 0.37350230414746544, + "grad_norm": 0.8120612065986471, + "learning_rate": 1.8996422636650054e-06, + "loss": 0.8799598217010498, + "step": 1621 + }, + { + "epoch": 0.3737327188940092, + "grad_norm": 0.6113644757026901, + "learning_rate": 1.8994758502637259e-06, + "loss": 0.8014140725135803, + "step": 1622 + }, + { + "epoch": 0.373963133640553, + "grad_norm": 0.7305462035644114, + "learning_rate": 1.8993093063047174e-06, + "loss": 0.8252615928649902, + "step": 1623 + }, + { + "epoch": 0.3741935483870968, + "grad_norm": 0.5571708900709818, + "learning_rate": 1.899142631812154e-06, + "loss": 0.8617361783981323, + "step": 1624 + }, + { + "epoch": 0.37442396313364057, + "grad_norm": 0.7088005059034134, + "learning_rate": 1.8989758268102274e-06, + "loss": 0.9316745400428772, + "step": 1625 + }, + { + "epoch": 0.37465437788018435, + "grad_norm": 0.5449801119846465, + "learning_rate": 1.89880889132315e-06, + "loss": 0.8195457458496094, + "step": 1626 + }, + { + "epoch": 0.37488479262672814, + "grad_norm": 0.7143201633211917, + "learning_rate": 1.8986418253751516e-06, + "loss": 0.7828787565231323, + "step": 1627 + }, + { + "epoch": 0.37511520737327186, + "grad_norm": 0.6506165386805676, + "learning_rate": 1.898474628990482e-06, + "loss": 0.8130955696105957, + "step": 1628 + }, + { + "epoch": 0.37534562211981565, + "grad_norm": 0.7388682274593752, + "learning_rate": 1.8983073021934097e-06, + "loss": 0.9925695657730103, + "step": 1629 + }, + { + "epoch": 0.37557603686635943, + "grad_norm": 0.7851734301973293, + "learning_rate": 1.8981398450082216e-06, + "loss": 0.8547999858856201, + "step": 1630 + }, + { + "epoch": 0.3758064516129032, + "grad_norm": 0.7016894400602667, + "learning_rate": 1.897972257459224e-06, + "loss": 0.8922954797744751, + "step": 1631 + }, + { + "epoch": 0.376036866359447, + "grad_norm": 0.641235710173759, + "learning_rate": 1.8978045395707415e-06, + "loss": 0.8553646802902222, + "step": 1632 + }, + { + "epoch": 0.3762672811059908, + "grad_norm": 0.6780369843564141, + "learning_rate": 1.897636691367119e-06, + "loss": 0.7854139804840088, + "step": 1633 + }, + { + "epoch": 0.37649769585253456, + "grad_norm": 0.8291834208164379, + "learning_rate": 1.897468712872719e-06, + "loss": 0.8968626260757446, + "step": 1634 + }, + { + "epoch": 0.37672811059907835, + "grad_norm": 0.8135056284613995, + "learning_rate": 1.8973006041119234e-06, + "loss": 0.8898152112960815, + "step": 1635 + }, + { + "epoch": 0.37695852534562213, + "grad_norm": 0.7215595529410248, + "learning_rate": 1.8971323651091332e-06, + "loss": 0.8499374389648438, + "step": 1636 + }, + { + "epoch": 0.3771889400921659, + "grad_norm": 0.5955881573233954, + "learning_rate": 1.8969639958887677e-06, + "loss": 0.7803430557250977, + "step": 1637 + }, + { + "epoch": 0.3774193548387097, + "grad_norm": 0.672225539346555, + "learning_rate": 1.8967954964752657e-06, + "loss": 0.7669799327850342, + "step": 1638 + }, + { + "epoch": 0.3776497695852535, + "grad_norm": 0.7164416850564317, + "learning_rate": 1.8966268668930845e-06, + "loss": 0.9085204601287842, + "step": 1639 + }, + { + "epoch": 0.3778801843317972, + "grad_norm": 0.8492247946008473, + "learning_rate": 1.8964581071667005e-06, + "loss": 0.7793002724647522, + "step": 1640 + }, + { + "epoch": 0.378110599078341, + "grad_norm": 0.6359200183287212, + "learning_rate": 1.896289217320609e-06, + "loss": 0.8649430274963379, + "step": 1641 + }, + { + "epoch": 0.3783410138248848, + "grad_norm": 0.6424804906800053, + "learning_rate": 1.8961201973793243e-06, + "loss": 0.856898844242096, + "step": 1642 + }, + { + "epoch": 0.37857142857142856, + "grad_norm": 0.7702312360726356, + "learning_rate": 1.895951047367379e-06, + "loss": 0.8221957087516785, + "step": 1643 + }, + { + "epoch": 0.37880184331797234, + "grad_norm": 0.7163935487823062, + "learning_rate": 1.8957817673093256e-06, + "loss": 0.8158079385757446, + "step": 1644 + }, + { + "epoch": 0.3790322580645161, + "grad_norm": 0.8008902981825888, + "learning_rate": 1.8956123572297343e-06, + "loss": 0.7803312540054321, + "step": 1645 + }, + { + "epoch": 0.3792626728110599, + "grad_norm": 0.7902834195938876, + "learning_rate": 1.8954428171531949e-06, + "loss": 1.035685420036316, + "step": 1646 + }, + { + "epoch": 0.3794930875576037, + "grad_norm": 0.6044824314396153, + "learning_rate": 1.8952731471043161e-06, + "loss": 0.6871123313903809, + "step": 1647 + }, + { + "epoch": 0.3797235023041475, + "grad_norm": 0.6400629937897654, + "learning_rate": 1.8951033471077253e-06, + "loss": 0.9651780128479004, + "step": 1648 + }, + { + "epoch": 0.37995391705069126, + "grad_norm": 0.7485926311468839, + "learning_rate": 1.8949334171880687e-06, + "loss": 1.018349528312683, + "step": 1649 + }, + { + "epoch": 0.38018433179723504, + "grad_norm": 0.6571349103626993, + "learning_rate": 1.894763357370011e-06, + "loss": 0.6839278936386108, + "step": 1650 + }, + { + "epoch": 0.3804147465437788, + "grad_norm": 0.6757724586058976, + "learning_rate": 1.894593167678237e-06, + "loss": 0.8442174196243286, + "step": 1651 + }, + { + "epoch": 0.38064516129032255, + "grad_norm": 0.6368918088972565, + "learning_rate": 1.8944228481374484e-06, + "loss": 0.8224585056304932, + "step": 1652 + }, + { + "epoch": 0.38087557603686634, + "grad_norm": 0.6970802562618803, + "learning_rate": 1.8942523987723678e-06, + "loss": 0.8570500612258911, + "step": 1653 + }, + { + "epoch": 0.3811059907834101, + "grad_norm": 0.731718201815575, + "learning_rate": 1.8940818196077354e-06, + "loss": 0.7696554660797119, + "step": 1654 + }, + { + "epoch": 0.3813364055299539, + "grad_norm": 0.7456139352122005, + "learning_rate": 1.8939111106683103e-06, + "loss": 0.822563886642456, + "step": 1655 + }, + { + "epoch": 0.3815668202764977, + "grad_norm": 0.46565320695076334, + "learning_rate": 1.8937402719788711e-06, + "loss": 0.6537219882011414, + "step": 1656 + }, + { + "epoch": 0.38179723502304147, + "grad_norm": 0.8414098679023442, + "learning_rate": 1.8935693035642145e-06, + "loss": 0.9081932306289673, + "step": 1657 + }, + { + "epoch": 0.38202764976958525, + "grad_norm": 0.5018818977531995, + "learning_rate": 1.8933982054491563e-06, + "loss": 0.6839661598205566, + "step": 1658 + }, + { + "epoch": 0.38225806451612904, + "grad_norm": 0.6964355972832653, + "learning_rate": 1.8932269776585313e-06, + "loss": 0.9187283515930176, + "step": 1659 + }, + { + "epoch": 0.3824884792626728, + "grad_norm": 0.8100260748701062, + "learning_rate": 1.893055620217193e-06, + "loss": 0.9567047357559204, + "step": 1660 + }, + { + "epoch": 0.3827188940092166, + "grad_norm": 0.7345697660292878, + "learning_rate": 1.8928841331500136e-06, + "loss": 0.785561203956604, + "step": 1661 + }, + { + "epoch": 0.3829493087557604, + "grad_norm": 0.882033286363023, + "learning_rate": 1.8927125164818842e-06, + "loss": 0.8986088037490845, + "step": 1662 + }, + { + "epoch": 0.38317972350230417, + "grad_norm": 0.7191553093714457, + "learning_rate": 1.892540770237715e-06, + "loss": 1.0027087926864624, + "step": 1663 + }, + { + "epoch": 0.38341013824884795, + "grad_norm": 0.6970721775230337, + "learning_rate": 1.8923688944424346e-06, + "loss": 0.8502041697502136, + "step": 1664 + }, + { + "epoch": 0.3836405529953917, + "grad_norm": 0.6684142159321271, + "learning_rate": 1.8921968891209907e-06, + "loss": 0.8526991605758667, + "step": 1665 + }, + { + "epoch": 0.38387096774193546, + "grad_norm": 0.7082372977886758, + "learning_rate": 1.8920247542983492e-06, + "loss": 0.8084676265716553, + "step": 1666 + }, + { + "epoch": 0.38410138248847925, + "grad_norm": 0.6206558140284871, + "learning_rate": 1.8918524899994957e-06, + "loss": 0.8922938704490662, + "step": 1667 + }, + { + "epoch": 0.38433179723502303, + "grad_norm": 0.768771022868596, + "learning_rate": 1.8916800962494337e-06, + "loss": 0.7965600490570068, + "step": 1668 + }, + { + "epoch": 0.3845622119815668, + "grad_norm": 0.6752105100256773, + "learning_rate": 1.8915075730731865e-06, + "loss": 0.9505549073219299, + "step": 1669 + }, + { + "epoch": 0.3847926267281106, + "grad_norm": 0.6897214722687708, + "learning_rate": 1.8913349204957947e-06, + "loss": 0.9459924697875977, + "step": 1670 + }, + { + "epoch": 0.3850230414746544, + "grad_norm": 0.6215985429421047, + "learning_rate": 1.8911621385423195e-06, + "loss": 0.8433674573898315, + "step": 1671 + }, + { + "epoch": 0.38525345622119817, + "grad_norm": 0.7790027974124772, + "learning_rate": 1.8909892272378398e-06, + "loss": 0.8945955038070679, + "step": 1672 + }, + { + "epoch": 0.38548387096774195, + "grad_norm": 0.6828005324330048, + "learning_rate": 1.890816186607453e-06, + "loss": 0.8580358624458313, + "step": 1673 + }, + { + "epoch": 0.38571428571428573, + "grad_norm": 0.6249387555876122, + "learning_rate": 1.8906430166762761e-06, + "loss": 0.7708698511123657, + "step": 1674 + }, + { + "epoch": 0.3859447004608295, + "grad_norm": 0.7418139824839276, + "learning_rate": 1.8904697174694446e-06, + "loss": 0.8647153377532959, + "step": 1675 + }, + { + "epoch": 0.3861751152073733, + "grad_norm": 0.7428074816121766, + "learning_rate": 1.890296289012112e-06, + "loss": 0.9380506277084351, + "step": 1676 + }, + { + "epoch": 0.386405529953917, + "grad_norm": 0.6218965089791644, + "learning_rate": 1.8901227313294519e-06, + "loss": 0.8814103603363037, + "step": 1677 + }, + { + "epoch": 0.3866359447004608, + "grad_norm": 0.7768206335574417, + "learning_rate": 1.8899490444466556e-06, + "loss": 0.9348419904708862, + "step": 1678 + }, + { + "epoch": 0.3868663594470046, + "grad_norm": 0.5956095891599564, + "learning_rate": 1.8897752283889338e-06, + "loss": 0.7502046823501587, + "step": 1679 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 0.567040551050712, + "learning_rate": 1.8896012831815155e-06, + "loss": 0.8499769568443298, + "step": 1680 + }, + { + "epoch": 0.38732718894009216, + "grad_norm": 0.6506272613615357, + "learning_rate": 1.8894272088496487e-06, + "loss": 0.8253993391990662, + "step": 1681 + }, + { + "epoch": 0.38755760368663594, + "grad_norm": 0.7707626449058277, + "learning_rate": 1.8892530054185998e-06, + "loss": 0.8494073152542114, + "step": 1682 + }, + { + "epoch": 0.3877880184331797, + "grad_norm": 0.7608738547672518, + "learning_rate": 1.8890786729136546e-06, + "loss": 0.8836106061935425, + "step": 1683 + }, + { + "epoch": 0.3880184331797235, + "grad_norm": 0.636256009552465, + "learning_rate": 1.8889042113601166e-06, + "loss": 0.8949145078659058, + "step": 1684 + }, + { + "epoch": 0.3882488479262673, + "grad_norm": 0.5966436023392323, + "learning_rate": 1.8887296207833095e-06, + "loss": 0.6210965514183044, + "step": 1685 + }, + { + "epoch": 0.3884792626728111, + "grad_norm": 0.8527942588919344, + "learning_rate": 1.8885549012085744e-06, + "loss": 0.9216527938842773, + "step": 1686 + }, + { + "epoch": 0.38870967741935486, + "grad_norm": 0.6878600463475216, + "learning_rate": 1.8883800526612715e-06, + "loss": 0.9266358613967896, + "step": 1687 + }, + { + "epoch": 0.38894009216589864, + "grad_norm": 0.7261249184769291, + "learning_rate": 1.88820507516678e-06, + "loss": 0.8550606966018677, + "step": 1688 + }, + { + "epoch": 0.38917050691244237, + "grad_norm": 0.702582367534852, + "learning_rate": 1.888029968750498e-06, + "loss": 0.8632181882858276, + "step": 1689 + }, + { + "epoch": 0.38940092165898615, + "grad_norm": 0.8055419508573982, + "learning_rate": 1.8878547334378415e-06, + "loss": 0.8795493841171265, + "step": 1690 + }, + { + "epoch": 0.38963133640552994, + "grad_norm": 0.8491490559655837, + "learning_rate": 1.8876793692542456e-06, + "loss": 0.9750456809997559, + "step": 1691 + }, + { + "epoch": 0.3898617511520737, + "grad_norm": 0.7818793926101317, + "learning_rate": 1.8875038762251645e-06, + "loss": 0.9270161390304565, + "step": 1692 + }, + { + "epoch": 0.3900921658986175, + "grad_norm": 0.7260894881906815, + "learning_rate": 1.8873282543760705e-06, + "loss": 0.8154089450836182, + "step": 1693 + }, + { + "epoch": 0.3903225806451613, + "grad_norm": 0.692223503364103, + "learning_rate": 1.887152503732455e-06, + "loss": 0.9245043992996216, + "step": 1694 + }, + { + "epoch": 0.39055299539170507, + "grad_norm": 0.7622355519095229, + "learning_rate": 1.8869766243198284e-06, + "loss": 0.9218056201934814, + "step": 1695 + }, + { + "epoch": 0.39078341013824885, + "grad_norm": 0.5749624768358436, + "learning_rate": 1.8868006161637192e-06, + "loss": 0.7753894329071045, + "step": 1696 + }, + { + "epoch": 0.39101382488479264, + "grad_norm": 0.7181901167791495, + "learning_rate": 1.8866244792896739e-06, + "loss": 0.8455277681350708, + "step": 1697 + }, + { + "epoch": 0.3912442396313364, + "grad_norm": 0.7361657621974459, + "learning_rate": 1.8864482137232596e-06, + "loss": 0.8301571607589722, + "step": 1698 + }, + { + "epoch": 0.3914746543778802, + "grad_norm": 0.5504243602930398, + "learning_rate": 1.8862718194900602e-06, + "loss": 0.9768285155296326, + "step": 1699 + }, + { + "epoch": 0.391705069124424, + "grad_norm": 0.7416616964447972, + "learning_rate": 1.8860952966156798e-06, + "loss": 0.9659395217895508, + "step": 1700 + }, + { + "epoch": 0.3919354838709677, + "grad_norm": 0.731283063502841, + "learning_rate": 1.8859186451257401e-06, + "loss": 0.9975444078445435, + "step": 1701 + }, + { + "epoch": 0.3921658986175115, + "grad_norm": 0.712824030540976, + "learning_rate": 1.8857418650458816e-06, + "loss": 0.9248796701431274, + "step": 1702 + }, + { + "epoch": 0.3923963133640553, + "grad_norm": 0.6864309886370629, + "learning_rate": 1.8855649564017642e-06, + "loss": 0.8792428970336914, + "step": 1703 + }, + { + "epoch": 0.39262672811059907, + "grad_norm": 0.7264626081176593, + "learning_rate": 1.8853879192190657e-06, + "loss": 0.8387417197227478, + "step": 1704 + }, + { + "epoch": 0.39285714285714285, + "grad_norm": 0.707677593822268, + "learning_rate": 1.8852107535234828e-06, + "loss": 0.7020218372344971, + "step": 1705 + }, + { + "epoch": 0.39308755760368663, + "grad_norm": 0.673092322659609, + "learning_rate": 1.885033459340731e-06, + "loss": 0.7388321161270142, + "step": 1706 + }, + { + "epoch": 0.3933179723502304, + "grad_norm": 0.7503922468030345, + "learning_rate": 1.8848560366965441e-06, + "loss": 0.7536240220069885, + "step": 1707 + }, + { + "epoch": 0.3935483870967742, + "grad_norm": 0.7237343332600692, + "learning_rate": 1.8846784856166746e-06, + "loss": 0.747667670249939, + "step": 1708 + }, + { + "epoch": 0.393778801843318, + "grad_norm": 0.7263541821971573, + "learning_rate": 1.8845008061268945e-06, + "loss": 0.8068975210189819, + "step": 1709 + }, + { + "epoch": 0.39400921658986177, + "grad_norm": 0.7581453840562968, + "learning_rate": 1.8843229982529932e-06, + "loss": 0.7613410949707031, + "step": 1710 + }, + { + "epoch": 0.39423963133640555, + "grad_norm": 0.6546080156681554, + "learning_rate": 1.8841450620207793e-06, + "loss": 0.8579158782958984, + "step": 1711 + }, + { + "epoch": 0.39447004608294933, + "grad_norm": 0.6400652758844664, + "learning_rate": 1.88396699745608e-06, + "loss": 0.8754673004150391, + "step": 1712 + }, + { + "epoch": 0.39470046082949306, + "grad_norm": 0.7227539443635326, + "learning_rate": 1.8837888045847415e-06, + "loss": 0.7988177537918091, + "step": 1713 + }, + { + "epoch": 0.39493087557603684, + "grad_norm": 0.7533730909693769, + "learning_rate": 1.8836104834326279e-06, + "loss": 0.8658367395401001, + "step": 1714 + }, + { + "epoch": 0.3951612903225806, + "grad_norm": 0.7819630929666835, + "learning_rate": 1.8834320340256223e-06, + "loss": 0.8777489066123962, + "step": 1715 + }, + { + "epoch": 0.3953917050691244, + "grad_norm": 0.6763778401068745, + "learning_rate": 1.8832534563896264e-06, + "loss": 0.9785901308059692, + "step": 1716 + }, + { + "epoch": 0.3956221198156682, + "grad_norm": 0.7796554840537433, + "learning_rate": 1.883074750550561e-06, + "loss": 0.847503125667572, + "step": 1717 + }, + { + "epoch": 0.395852534562212, + "grad_norm": 0.7786503806499795, + "learning_rate": 1.8828959165343643e-06, + "loss": 1.0159538984298706, + "step": 1718 + }, + { + "epoch": 0.39608294930875576, + "grad_norm": 0.8472423063084373, + "learning_rate": 1.882716954366994e-06, + "loss": 0.9064888954162598, + "step": 1719 + }, + { + "epoch": 0.39631336405529954, + "grad_norm": 0.7664117713246195, + "learning_rate": 1.8825378640744264e-06, + "loss": 0.956849217414856, + "step": 1720 + }, + { + "epoch": 0.3965437788018433, + "grad_norm": 0.758389558529891, + "learning_rate": 1.882358645682656e-06, + "loss": 0.8983441591262817, + "step": 1721 + }, + { + "epoch": 0.3967741935483871, + "grad_norm": 0.5702990900386659, + "learning_rate": 1.8821792992176967e-06, + "loss": 0.7698956727981567, + "step": 1722 + }, + { + "epoch": 0.3970046082949309, + "grad_norm": 0.8118873070872795, + "learning_rate": 1.8819998247055797e-06, + "loss": 0.9376351833343506, + "step": 1723 + }, + { + "epoch": 0.3972350230414747, + "grad_norm": 0.8486728692509508, + "learning_rate": 1.881820222172356e-06, + "loss": 0.8776079416275024, + "step": 1724 + }, + { + "epoch": 0.39746543778801846, + "grad_norm": 0.9552617438975642, + "learning_rate": 1.8816404916440942e-06, + "loss": 0.9776726961135864, + "step": 1725 + }, + { + "epoch": 0.3976958525345622, + "grad_norm": 0.5841959382882552, + "learning_rate": 1.8814606331468822e-06, + "loss": 0.7699686288833618, + "step": 1726 + }, + { + "epoch": 0.39792626728110597, + "grad_norm": 0.7581748259398383, + "learning_rate": 1.8812806467068265e-06, + "loss": 0.8256866931915283, + "step": 1727 + }, + { + "epoch": 0.39815668202764976, + "grad_norm": 0.6320724280659841, + "learning_rate": 1.881100532350051e-06, + "loss": 0.8493847846984863, + "step": 1728 + }, + { + "epoch": 0.39838709677419354, + "grad_norm": 0.6592895509903398, + "learning_rate": 1.8809202901027002e-06, + "loss": 0.8138688802719116, + "step": 1729 + }, + { + "epoch": 0.3986175115207373, + "grad_norm": 0.7569638843586648, + "learning_rate": 1.880739919990935e-06, + "loss": 0.8637882471084595, + "step": 1730 + }, + { + "epoch": 0.3988479262672811, + "grad_norm": 0.5847233582227849, + "learning_rate": 1.880559422040937e-06, + "loss": 0.8988152742385864, + "step": 1731 + }, + { + "epoch": 0.3990783410138249, + "grad_norm": 0.4724369020135308, + "learning_rate": 1.880378796278904e-06, + "loss": 0.8247279524803162, + "step": 1732 + }, + { + "epoch": 0.39930875576036867, + "grad_norm": 0.8071560192562027, + "learning_rate": 1.8801980427310546e-06, + "loss": 0.9699070453643799, + "step": 1733 + }, + { + "epoch": 0.39953917050691246, + "grad_norm": 0.8108307817175047, + "learning_rate": 1.8800171614236241e-06, + "loss": 0.9516465663909912, + "step": 1734 + }, + { + "epoch": 0.39976958525345624, + "grad_norm": 0.655632769560408, + "learning_rate": 1.879836152382868e-06, + "loss": 0.9553602933883667, + "step": 1735 + }, + { + "epoch": 0.4, + "grad_norm": 0.666214042250043, + "learning_rate": 1.879655015635059e-06, + "loss": 0.7805094718933105, + "step": 1736 + }, + { + "epoch": 0.4002304147465438, + "grad_norm": 0.730264537734651, + "learning_rate": 1.8794737512064888e-06, + "loss": 0.9509962797164917, + "step": 1737 + }, + { + "epoch": 0.40046082949308753, + "grad_norm": 0.6755335543884481, + "learning_rate": 1.8792923591234683e-06, + "loss": 0.8663454055786133, + "step": 1738 + }, + { + "epoch": 0.4006912442396313, + "grad_norm": 0.7325230471707477, + "learning_rate": 1.8791108394123257e-06, + "loss": 0.8773336410522461, + "step": 1739 + }, + { + "epoch": 0.4009216589861751, + "grad_norm": 0.6493515009165077, + "learning_rate": 1.8789291920994086e-06, + "loss": 0.7201284766197205, + "step": 1740 + }, + { + "epoch": 0.4011520737327189, + "grad_norm": 0.6665806307840867, + "learning_rate": 1.8787474172110826e-06, + "loss": 0.799161434173584, + "step": 1741 + }, + { + "epoch": 0.40138248847926267, + "grad_norm": 0.8651407328311, + "learning_rate": 1.8785655147737326e-06, + "loss": 0.8987375497817993, + "step": 1742 + }, + { + "epoch": 0.40161290322580645, + "grad_norm": 0.8706739093465035, + "learning_rate": 1.878383484813761e-06, + "loss": 0.8553296327590942, + "step": 1743 + }, + { + "epoch": 0.40184331797235023, + "grad_norm": 0.6706596266673751, + "learning_rate": 1.8782013273575895e-06, + "loss": 0.8376551270484924, + "step": 1744 + }, + { + "epoch": 0.402073732718894, + "grad_norm": 0.7963067027250083, + "learning_rate": 1.8780190424316578e-06, + "loss": 0.8220775723457336, + "step": 1745 + }, + { + "epoch": 0.4023041474654378, + "grad_norm": 0.7339356821882034, + "learning_rate": 1.8778366300624244e-06, + "loss": 0.8614820241928101, + "step": 1746 + }, + { + "epoch": 0.4025345622119816, + "grad_norm": 0.8065421465945496, + "learning_rate": 1.8776540902763665e-06, + "loss": 0.9434851408004761, + "step": 1747 + }, + { + "epoch": 0.40276497695852537, + "grad_norm": 0.8102544073977809, + "learning_rate": 1.877471423099979e-06, + "loss": 0.8150373101234436, + "step": 1748 + }, + { + "epoch": 0.40299539170506915, + "grad_norm": 0.5910178895755134, + "learning_rate": 1.8772886285597762e-06, + "loss": 0.7660368084907532, + "step": 1749 + }, + { + "epoch": 0.4032258064516129, + "grad_norm": 0.7262631962712356, + "learning_rate": 1.8771057066822903e-06, + "loss": 0.7647032141685486, + "step": 1750 + }, + { + "epoch": 0.40345622119815666, + "grad_norm": 0.6238918567790319, + "learning_rate": 1.8769226574940723e-06, + "loss": 0.6034061908721924, + "step": 1751 + }, + { + "epoch": 0.40368663594470044, + "grad_norm": 0.7344154412243011, + "learning_rate": 1.8767394810216914e-06, + "loss": 1.0062675476074219, + "step": 1752 + }, + { + "epoch": 0.40391705069124423, + "grad_norm": 0.6966552417777933, + "learning_rate": 1.8765561772917354e-06, + "loss": 0.9791489839553833, + "step": 1753 + }, + { + "epoch": 0.404147465437788, + "grad_norm": 0.5825611392130148, + "learning_rate": 1.8763727463308108e-06, + "loss": 0.9054251909255981, + "step": 1754 + }, + { + "epoch": 0.4043778801843318, + "grad_norm": 0.7455727854900284, + "learning_rate": 1.8761891881655423e-06, + "loss": 0.9156093597412109, + "step": 1755 + }, + { + "epoch": 0.4046082949308756, + "grad_norm": 0.6983601123297067, + "learning_rate": 1.876005502822573e-06, + "loss": 0.7525647878646851, + "step": 1756 + }, + { + "epoch": 0.40483870967741936, + "grad_norm": 0.6156689393045622, + "learning_rate": 1.8758216903285643e-06, + "loss": 0.8321493864059448, + "step": 1757 + }, + { + "epoch": 0.40506912442396314, + "grad_norm": 0.888147060404811, + "learning_rate": 1.8756377507101973e-06, + "loss": 0.9937042593955994, + "step": 1758 + }, + { + "epoch": 0.40529953917050693, + "grad_norm": 0.553604524827559, + "learning_rate": 1.8754536839941694e-06, + "loss": 0.7001460790634155, + "step": 1759 + }, + { + "epoch": 0.4055299539170507, + "grad_norm": 0.7747422377442987, + "learning_rate": 1.8752694902071986e-06, + "loss": 1.0062569379806519, + "step": 1760 + }, + { + "epoch": 0.4057603686635945, + "grad_norm": 0.7145787925683823, + "learning_rate": 1.8750851693760199e-06, + "loss": 0.7414188385009766, + "step": 1761 + }, + { + "epoch": 0.4059907834101382, + "grad_norm": 0.6306403135362045, + "learning_rate": 1.8749007215273873e-06, + "loss": 0.7181771397590637, + "step": 1762 + }, + { + "epoch": 0.406221198156682, + "grad_norm": 0.7763317855361268, + "learning_rate": 1.8747161466880732e-06, + "loss": 0.8797845244407654, + "step": 1763 + }, + { + "epoch": 0.4064516129032258, + "grad_norm": 0.6123636271862207, + "learning_rate": 1.8745314448848684e-06, + "loss": 0.7774960398674011, + "step": 1764 + }, + { + "epoch": 0.4066820276497696, + "grad_norm": 0.9110978120854332, + "learning_rate": 1.874346616144582e-06, + "loss": 0.8499422073364258, + "step": 1765 + }, + { + "epoch": 0.40691244239631336, + "grad_norm": 0.6306854745937814, + "learning_rate": 1.874161660494042e-06, + "loss": 0.7070250511169434, + "step": 1766 + }, + { + "epoch": 0.40714285714285714, + "grad_norm": 0.6762437905211294, + "learning_rate": 1.8739765779600939e-06, + "loss": 0.8009281158447266, + "step": 1767 + }, + { + "epoch": 0.4073732718894009, + "grad_norm": 0.6084135312041689, + "learning_rate": 1.8737913685696027e-06, + "loss": 0.6866155862808228, + "step": 1768 + }, + { + "epoch": 0.4076036866359447, + "grad_norm": 0.7813040754942882, + "learning_rate": 1.873606032349451e-06, + "loss": 0.8200059533119202, + "step": 1769 + }, + { + "epoch": 0.4078341013824885, + "grad_norm": 0.629385301974861, + "learning_rate": 1.8734205693265404e-06, + "loss": 0.8413814902305603, + "step": 1770 + }, + { + "epoch": 0.4080645161290323, + "grad_norm": 0.776612651465312, + "learning_rate": 1.8732349795277903e-06, + "loss": 0.9935271143913269, + "step": 1771 + }, + { + "epoch": 0.40829493087557606, + "grad_norm": 0.6589503544607032, + "learning_rate": 1.873049262980139e-06, + "loss": 0.8718058466911316, + "step": 1772 + }, + { + "epoch": 0.40852534562211984, + "grad_norm": 0.8620050398467397, + "learning_rate": 1.8728634197105428e-06, + "loss": 0.9009358882904053, + "step": 1773 + }, + { + "epoch": 0.40875576036866357, + "grad_norm": 0.7755306532739165, + "learning_rate": 1.8726774497459768e-06, + "loss": 0.9128156900405884, + "step": 1774 + }, + { + "epoch": 0.40898617511520735, + "grad_norm": 0.6450271750629438, + "learning_rate": 1.8724913531134342e-06, + "loss": 0.8524078130722046, + "step": 1775 + }, + { + "epoch": 0.40921658986175113, + "grad_norm": 0.7569328214438452, + "learning_rate": 1.872305129839927e-06, + "loss": 0.9431420564651489, + "step": 1776 + }, + { + "epoch": 0.4094470046082949, + "grad_norm": 0.6746261931292995, + "learning_rate": 1.8721187799524846e-06, + "loss": 0.7666694521903992, + "step": 1777 + }, + { + "epoch": 0.4096774193548387, + "grad_norm": 0.6448149830483173, + "learning_rate": 1.871932303478156e-06, + "loss": 0.872551679611206, + "step": 1778 + }, + { + "epoch": 0.4099078341013825, + "grad_norm": 0.6320914450645303, + "learning_rate": 1.8717457004440079e-06, + "loss": 0.7596250176429749, + "step": 1779 + }, + { + "epoch": 0.41013824884792627, + "grad_norm": 0.9751786230729174, + "learning_rate": 1.8715589708771253e-06, + "loss": 1.0098414421081543, + "step": 1780 + }, + { + "epoch": 0.41036866359447005, + "grad_norm": 0.9695096083628231, + "learning_rate": 1.871372114804612e-06, + "loss": 0.9961523413658142, + "step": 1781 + }, + { + "epoch": 0.41059907834101383, + "grad_norm": 0.8458697864526913, + "learning_rate": 1.8711851322535896e-06, + "loss": 0.9065390825271606, + "step": 1782 + }, + { + "epoch": 0.4108294930875576, + "grad_norm": 0.5445685826440523, + "learning_rate": 1.8709980232511987e-06, + "loss": 0.7906428575515747, + "step": 1783 + }, + { + "epoch": 0.4110599078341014, + "grad_norm": 0.5783797348856774, + "learning_rate": 1.8708107878245976e-06, + "loss": 0.798285722732544, + "step": 1784 + }, + { + "epoch": 0.4112903225806452, + "grad_norm": 0.7492534516122694, + "learning_rate": 1.870623426000964e-06, + "loss": 0.7809790372848511, + "step": 1785 + }, + { + "epoch": 0.4115207373271889, + "grad_norm": 0.8776810150838931, + "learning_rate": 1.8704359378074921e-06, + "loss": 0.8931630849838257, + "step": 1786 + }, + { + "epoch": 0.4117511520737327, + "grad_norm": 0.6321595970525742, + "learning_rate": 1.870248323271396e-06, + "loss": 0.8219889402389526, + "step": 1787 + }, + { + "epoch": 0.4119815668202765, + "grad_norm": 0.9973808347817518, + "learning_rate": 1.8700605824199084e-06, + "loss": 0.8371819257736206, + "step": 1788 + }, + { + "epoch": 0.41221198156682026, + "grad_norm": 0.7869196176383942, + "learning_rate": 1.8698727152802789e-06, + "loss": 0.951171875, + "step": 1789 + }, + { + "epoch": 0.41244239631336405, + "grad_norm": 0.6763081680317143, + "learning_rate": 1.8696847218797763e-06, + "loss": 0.7678385972976685, + "step": 1790 + }, + { + "epoch": 0.41267281105990783, + "grad_norm": 0.567634539573834, + "learning_rate": 1.8694966022456872e-06, + "loss": 0.9296993017196655, + "step": 1791 + }, + { + "epoch": 0.4129032258064516, + "grad_norm": 0.5450828031444163, + "learning_rate": 1.8693083564053178e-06, + "loss": 0.8991763591766357, + "step": 1792 + }, + { + "epoch": 0.4131336405529954, + "grad_norm": 0.5967294444907658, + "learning_rate": 1.8691199843859913e-06, + "loss": 0.8332901000976562, + "step": 1793 + }, + { + "epoch": 0.4133640552995392, + "grad_norm": 0.7571962190593917, + "learning_rate": 1.8689314862150497e-06, + "loss": 0.7723548412322998, + "step": 1794 + }, + { + "epoch": 0.41359447004608296, + "grad_norm": 0.6588409150246594, + "learning_rate": 1.868742861919853e-06, + "loss": 0.7768993377685547, + "step": 1795 + }, + { + "epoch": 0.41382488479262675, + "grad_norm": 0.43193778142300604, + "learning_rate": 1.86855411152778e-06, + "loss": 0.6058932542800903, + "step": 1796 + }, + { + "epoch": 0.41405529953917053, + "grad_norm": 0.8667574432138021, + "learning_rate": 1.8683652350662274e-06, + "loss": 0.8711605072021484, + "step": 1797 + }, + { + "epoch": 0.4142857142857143, + "grad_norm": 0.8780154463369872, + "learning_rate": 1.8681762325626104e-06, + "loss": 0.9023469090461731, + "step": 1798 + }, + { + "epoch": 0.41451612903225804, + "grad_norm": 0.6070102500189553, + "learning_rate": 1.867987104044363e-06, + "loss": 0.7735910415649414, + "step": 1799 + }, + { + "epoch": 0.4147465437788018, + "grad_norm": 0.6293725885471063, + "learning_rate": 1.8677978495389364e-06, + "loss": 0.6609020829200745, + "step": 1800 + }, + { + "epoch": 0.4149769585253456, + "grad_norm": 0.6485782104038655, + "learning_rate": 1.8676084690738005e-06, + "loss": 0.7823291420936584, + "step": 1801 + }, + { + "epoch": 0.4152073732718894, + "grad_norm": 0.8472581681306268, + "learning_rate": 1.867418962676444e-06, + "loss": 0.9076563715934753, + "step": 1802 + }, + { + "epoch": 0.4154377880184332, + "grad_norm": 0.561807586977654, + "learning_rate": 1.8672293303743735e-06, + "loss": 0.8645772933959961, + "step": 1803 + }, + { + "epoch": 0.41566820276497696, + "grad_norm": 0.6821058596015542, + "learning_rate": 1.8670395721951135e-06, + "loss": 0.8071421384811401, + "step": 1804 + }, + { + "epoch": 0.41589861751152074, + "grad_norm": 0.7396557376618352, + "learning_rate": 1.8668496881662077e-06, + "loss": 0.8459846377372742, + "step": 1805 + }, + { + "epoch": 0.4161290322580645, + "grad_norm": 0.7167052224732033, + "learning_rate": 1.866659678315217e-06, + "loss": 0.8467865586280823, + "step": 1806 + }, + { + "epoch": 0.4163594470046083, + "grad_norm": 0.8262164291061972, + "learning_rate": 1.8664695426697215e-06, + "loss": 0.8963291645050049, + "step": 1807 + }, + { + "epoch": 0.4165898617511521, + "grad_norm": 0.528766323006704, + "learning_rate": 1.8662792812573188e-06, + "loss": 0.7901826500892639, + "step": 1808 + }, + { + "epoch": 0.4168202764976959, + "grad_norm": 0.8974116604603759, + "learning_rate": 1.8660888941056252e-06, + "loss": 0.807115912437439, + "step": 1809 + }, + { + "epoch": 0.41705069124423966, + "grad_norm": 0.6271237317374816, + "learning_rate": 1.8658983812422753e-06, + "loss": 0.8439537286758423, + "step": 1810 + }, + { + "epoch": 0.4172811059907834, + "grad_norm": 0.8360600380108553, + "learning_rate": 1.8657077426949214e-06, + "loss": 0.6920834183692932, + "step": 1811 + }, + { + "epoch": 0.41751152073732717, + "grad_norm": 0.7603232216568709, + "learning_rate": 1.865516978491235e-06, + "loss": 0.8712124824523926, + "step": 1812 + }, + { + "epoch": 0.41774193548387095, + "grad_norm": 0.718498571919399, + "learning_rate": 1.865326088658905e-06, + "loss": 0.7720927596092224, + "step": 1813 + }, + { + "epoch": 0.41797235023041474, + "grad_norm": 0.6953832780918029, + "learning_rate": 1.8651350732256386e-06, + "loss": 0.8003814220428467, + "step": 1814 + }, + { + "epoch": 0.4182027649769585, + "grad_norm": 0.838076886250554, + "learning_rate": 1.8649439322191616e-06, + "loss": 0.8999850749969482, + "step": 1815 + }, + { + "epoch": 0.4184331797235023, + "grad_norm": 0.584714014216153, + "learning_rate": 1.8647526656672179e-06, + "loss": 0.6752324104309082, + "step": 1816 + }, + { + "epoch": 0.4186635944700461, + "grad_norm": 0.7365325720475113, + "learning_rate": 1.8645612735975696e-06, + "loss": 0.8521262407302856, + "step": 1817 + }, + { + "epoch": 0.41889400921658987, + "grad_norm": 0.7194058023938104, + "learning_rate": 1.864369756037997e-06, + "loss": 0.8813315629959106, + "step": 1818 + }, + { + "epoch": 0.41912442396313365, + "grad_norm": 0.742428235010686, + "learning_rate": 1.8641781130162986e-06, + "loss": 0.8358273506164551, + "step": 1819 + }, + { + "epoch": 0.41935483870967744, + "grad_norm": 0.591500867449821, + "learning_rate": 1.863986344560291e-06, + "loss": 0.8051023483276367, + "step": 1820 + }, + { + "epoch": 0.4195852534562212, + "grad_norm": 0.7791039105049288, + "learning_rate": 1.863794450697809e-06, + "loss": 0.768791675567627, + "step": 1821 + }, + { + "epoch": 0.419815668202765, + "grad_norm": 0.9369354252226071, + "learning_rate": 1.8636024314567065e-06, + "loss": 0.8420040607452393, + "step": 1822 + }, + { + "epoch": 0.42004608294930873, + "grad_norm": 0.673055652482875, + "learning_rate": 1.8634102868648542e-06, + "loss": 0.7670450806617737, + "step": 1823 + }, + { + "epoch": 0.4202764976958525, + "grad_norm": 0.6699812957272996, + "learning_rate": 1.863218016950142e-06, + "loss": 0.8292283415794373, + "step": 1824 + }, + { + "epoch": 0.4205069124423963, + "grad_norm": 0.6058254395333167, + "learning_rate": 1.8630256217404767e-06, + "loss": 0.8005781769752502, + "step": 1825 + }, + { + "epoch": 0.4207373271889401, + "grad_norm": 0.923190166351158, + "learning_rate": 1.8628331012637854e-06, + "loss": 0.8214897513389587, + "step": 1826 + }, + { + "epoch": 0.42096774193548386, + "grad_norm": 0.6734314204378448, + "learning_rate": 1.8626404555480118e-06, + "loss": 0.7938524484634399, + "step": 1827 + }, + { + "epoch": 0.42119815668202765, + "grad_norm": 0.7824933974022145, + "learning_rate": 1.862447684621118e-06, + "loss": 1.0047048330307007, + "step": 1828 + }, + { + "epoch": 0.42142857142857143, + "grad_norm": 0.7060449091561402, + "learning_rate": 1.862254788511084e-06, + "loss": 0.7660601139068604, + "step": 1829 + }, + { + "epoch": 0.4216589861751152, + "grad_norm": 0.7940468118829026, + "learning_rate": 1.8620617672459096e-06, + "loss": 0.8227912783622742, + "step": 1830 + }, + { + "epoch": 0.421889400921659, + "grad_norm": 0.8322274877206185, + "learning_rate": 1.8618686208536106e-06, + "loss": 0.8570956587791443, + "step": 1831 + }, + { + "epoch": 0.4221198156682028, + "grad_norm": 0.6215191834076389, + "learning_rate": 1.8616753493622221e-06, + "loss": 0.7472532987594604, + "step": 1832 + }, + { + "epoch": 0.42235023041474656, + "grad_norm": 0.702673502332975, + "learning_rate": 1.8614819527997976e-06, + "loss": 0.812872052192688, + "step": 1833 + }, + { + "epoch": 0.42258064516129035, + "grad_norm": 0.7168526420375322, + "learning_rate": 1.861288431194408e-06, + "loss": 0.7801386117935181, + "step": 1834 + }, + { + "epoch": 0.4228110599078341, + "grad_norm": 0.8740851917776313, + "learning_rate": 1.8610947845741426e-06, + "loss": 0.7834687829017639, + "step": 1835 + }, + { + "epoch": 0.42304147465437786, + "grad_norm": 0.8009990500080056, + "learning_rate": 1.8609010129671097e-06, + "loss": 0.786865234375, + "step": 1836 + }, + { + "epoch": 0.42327188940092164, + "grad_norm": 0.6559457181196078, + "learning_rate": 1.860707116401434e-06, + "loss": 0.7728738784790039, + "step": 1837 + }, + { + "epoch": 0.4235023041474654, + "grad_norm": 0.6384024302830484, + "learning_rate": 1.8605130949052598e-06, + "loss": 0.6508793830871582, + "step": 1838 + }, + { + "epoch": 0.4237327188940092, + "grad_norm": 0.6544986461362278, + "learning_rate": 1.8603189485067492e-06, + "loss": 0.7949484586715698, + "step": 1839 + }, + { + "epoch": 0.423963133640553, + "grad_norm": 0.7679729608195138, + "learning_rate": 1.8601246772340822e-06, + "loss": 0.7151408195495605, + "step": 1840 + }, + { + "epoch": 0.4241935483870968, + "grad_norm": 0.6910188883895837, + "learning_rate": 1.859930281115457e-06, + "loss": 0.7678598165512085, + "step": 1841 + }, + { + "epoch": 0.42442396313364056, + "grad_norm": 0.6547923584739629, + "learning_rate": 1.8597357601790895e-06, + "loss": 0.8042058944702148, + "step": 1842 + }, + { + "epoch": 0.42465437788018434, + "grad_norm": 0.6889925049755639, + "learning_rate": 1.859541114453215e-06, + "loss": 0.7328081130981445, + "step": 1843 + }, + { + "epoch": 0.4248847926267281, + "grad_norm": 0.7385850960276812, + "learning_rate": 1.8593463439660853e-06, + "loss": 0.7646626234054565, + "step": 1844 + }, + { + "epoch": 0.4251152073732719, + "grad_norm": 0.7455331415840897, + "learning_rate": 1.8591514487459717e-06, + "loss": 0.8965721726417542, + "step": 1845 + }, + { + "epoch": 0.4253456221198157, + "grad_norm": 0.6783955368622289, + "learning_rate": 1.8589564288211623e-06, + "loss": 0.8892468810081482, + "step": 1846 + }, + { + "epoch": 0.4255760368663594, + "grad_norm": 0.669354336924349, + "learning_rate": 1.8587612842199648e-06, + "loss": 0.8314409255981445, + "step": 1847 + }, + { + "epoch": 0.4258064516129032, + "grad_norm": 0.7299222952808436, + "learning_rate": 1.8585660149707034e-06, + "loss": 0.7713892459869385, + "step": 1848 + }, + { + "epoch": 0.426036866359447, + "grad_norm": 0.7583328231707663, + "learning_rate": 1.8583706211017216e-06, + "loss": 0.9349459409713745, + "step": 1849 + }, + { + "epoch": 0.42626728110599077, + "grad_norm": 0.7309436500165829, + "learning_rate": 1.8581751026413805e-06, + "loss": 0.8438700437545776, + "step": 1850 + }, + { + "epoch": 0.42649769585253455, + "grad_norm": 1.0171962155435006, + "learning_rate": 1.8579794596180594e-06, + "loss": 0.9559776782989502, + "step": 1851 + }, + { + "epoch": 0.42672811059907834, + "grad_norm": 0.6701533748146308, + "learning_rate": 1.8577836920601556e-06, + "loss": 0.7124872803688049, + "step": 1852 + }, + { + "epoch": 0.4269585253456221, + "grad_norm": 0.8613289026694887, + "learning_rate": 1.8575877999960842e-06, + "loss": 0.7935503125190735, + "step": 1853 + }, + { + "epoch": 0.4271889400921659, + "grad_norm": 0.7107096707504692, + "learning_rate": 1.8573917834542792e-06, + "loss": 0.9145890474319458, + "step": 1854 + }, + { + "epoch": 0.4274193548387097, + "grad_norm": 0.7290504646059204, + "learning_rate": 1.8571956424631918e-06, + "loss": 0.8239228129386902, + "step": 1855 + }, + { + "epoch": 0.42764976958525347, + "grad_norm": 0.6018983094431002, + "learning_rate": 1.8569993770512916e-06, + "loss": 0.8767688274383545, + "step": 1856 + }, + { + "epoch": 0.42788018433179725, + "grad_norm": 0.6742014961339767, + "learning_rate": 1.8568029872470663e-06, + "loss": 0.7860859632492065, + "step": 1857 + }, + { + "epoch": 0.42811059907834104, + "grad_norm": 0.6990668023927343, + "learning_rate": 1.8566064730790218e-06, + "loss": 0.8855729103088379, + "step": 1858 + }, + { + "epoch": 0.4283410138248848, + "grad_norm": 0.8518974155898882, + "learning_rate": 1.8564098345756815e-06, + "loss": 1.023299217224121, + "step": 1859 + }, + { + "epoch": 0.42857142857142855, + "grad_norm": 0.7174059285774532, + "learning_rate": 1.8562130717655878e-06, + "loss": 0.7665202617645264, + "step": 1860 + }, + { + "epoch": 0.42880184331797233, + "grad_norm": 0.7036772811538429, + "learning_rate": 1.8560161846773e-06, + "loss": 0.8456651568412781, + "step": 1861 + }, + { + "epoch": 0.4290322580645161, + "grad_norm": 0.7229483822116546, + "learning_rate": 1.8558191733393964e-06, + "loss": 0.8920061588287354, + "step": 1862 + }, + { + "epoch": 0.4292626728110599, + "grad_norm": 0.8104170426239989, + "learning_rate": 1.8556220377804723e-06, + "loss": 0.8686853051185608, + "step": 1863 + }, + { + "epoch": 0.4294930875576037, + "grad_norm": 0.5832986779631602, + "learning_rate": 1.8554247780291425e-06, + "loss": 0.6976242065429688, + "step": 1864 + }, + { + "epoch": 0.42972350230414746, + "grad_norm": 0.7347161353185314, + "learning_rate": 1.8552273941140387e-06, + "loss": 0.9612032771110535, + "step": 1865 + }, + { + "epoch": 0.42995391705069125, + "grad_norm": 0.6243829709767468, + "learning_rate": 1.8550298860638108e-06, + "loss": 0.9288003444671631, + "step": 1866 + }, + { + "epoch": 0.43018433179723503, + "grad_norm": 0.6743712494799082, + "learning_rate": 1.8548322539071263e-06, + "loss": 0.8397525548934937, + "step": 1867 + }, + { + "epoch": 0.4304147465437788, + "grad_norm": 0.5881426126037044, + "learning_rate": 1.8546344976726722e-06, + "loss": 0.6311365365982056, + "step": 1868 + }, + { + "epoch": 0.4306451612903226, + "grad_norm": 0.7497017851812813, + "learning_rate": 1.8544366173891523e-06, + "loss": 0.7868270874023438, + "step": 1869 + }, + { + "epoch": 0.4308755760368664, + "grad_norm": 0.6265515804052451, + "learning_rate": 1.8542386130852883e-06, + "loss": 0.9197052717208862, + "step": 1870 + }, + { + "epoch": 0.43110599078341016, + "grad_norm": 0.7018278829983491, + "learning_rate": 1.8540404847898206e-06, + "loss": 0.7875635027885437, + "step": 1871 + }, + { + "epoch": 0.4313364055299539, + "grad_norm": 0.7789284724063816, + "learning_rate": 1.853842232531507e-06, + "loss": 0.9805077910423279, + "step": 1872 + }, + { + "epoch": 0.4315668202764977, + "grad_norm": 0.838470325159009, + "learning_rate": 1.8536438563391236e-06, + "loss": 0.8906866312026978, + "step": 1873 + }, + { + "epoch": 0.43179723502304146, + "grad_norm": 0.73247587866706, + "learning_rate": 1.8534453562414649e-06, + "loss": 0.7506693601608276, + "step": 1874 + }, + { + "epoch": 0.43202764976958524, + "grad_norm": 0.6576915367586517, + "learning_rate": 1.8532467322673422e-06, + "loss": 0.6173181533813477, + "step": 1875 + }, + { + "epoch": 0.432258064516129, + "grad_norm": 0.6907344817423696, + "learning_rate": 1.853047984445586e-06, + "loss": 0.9217972755432129, + "step": 1876 + }, + { + "epoch": 0.4324884792626728, + "grad_norm": 0.8808471726659616, + "learning_rate": 1.8528491128050442e-06, + "loss": 0.8300588130950928, + "step": 1877 + }, + { + "epoch": 0.4327188940092166, + "grad_norm": 0.7869544847637374, + "learning_rate": 1.8526501173745826e-06, + "loss": 0.8109279870986938, + "step": 1878 + }, + { + "epoch": 0.4329493087557604, + "grad_norm": 0.8253705845492948, + "learning_rate": 1.852450998183085e-06, + "loss": 0.9243700504302979, + "step": 1879 + }, + { + "epoch": 0.43317972350230416, + "grad_norm": 0.7291726511705204, + "learning_rate": 1.8522517552594539e-06, + "loss": 0.7983531951904297, + "step": 1880 + }, + { + "epoch": 0.43341013824884794, + "grad_norm": 0.837506072245515, + "learning_rate": 1.8520523886326088e-06, + "loss": 0.9931240081787109, + "step": 1881 + }, + { + "epoch": 0.4336405529953917, + "grad_norm": 0.7782064692415819, + "learning_rate": 1.8518528983314874e-06, + "loss": 0.923255443572998, + "step": 1882 + }, + { + "epoch": 0.4338709677419355, + "grad_norm": 0.5003052765919304, + "learning_rate": 1.8516532843850454e-06, + "loss": 0.8470325469970703, + "step": 1883 + }, + { + "epoch": 0.43410138248847924, + "grad_norm": 0.7497886449083292, + "learning_rate": 1.8514535468222566e-06, + "loss": 0.9175074696540833, + "step": 1884 + }, + { + "epoch": 0.434331797235023, + "grad_norm": 0.7474680310474195, + "learning_rate": 1.8512536856721126e-06, + "loss": 0.8617827892303467, + "step": 1885 + }, + { + "epoch": 0.4345622119815668, + "grad_norm": 0.6779026169933022, + "learning_rate": 1.8510537009636231e-06, + "loss": 0.6787248849868774, + "step": 1886 + }, + { + "epoch": 0.4347926267281106, + "grad_norm": 0.6948062534132075, + "learning_rate": 1.8508535927258157e-06, + "loss": 0.8031569719314575, + "step": 1887 + }, + { + "epoch": 0.43502304147465437, + "grad_norm": 0.8219581995376891, + "learning_rate": 1.8506533609877354e-06, + "loss": 1.0252577066421509, + "step": 1888 + }, + { + "epoch": 0.43525345622119815, + "grad_norm": 0.6297691459816858, + "learning_rate": 1.850453005778446e-06, + "loss": 0.7947444915771484, + "step": 1889 + }, + { + "epoch": 0.43548387096774194, + "grad_norm": 0.7974729793994046, + "learning_rate": 1.8502525271270288e-06, + "loss": 0.817523717880249, + "step": 1890 + }, + { + "epoch": 0.4357142857142857, + "grad_norm": 0.905445482286677, + "learning_rate": 1.850051925062583e-06, + "loss": 0.8029658794403076, + "step": 1891 + }, + { + "epoch": 0.4359447004608295, + "grad_norm": 0.7902601112013473, + "learning_rate": 1.8498511996142253e-06, + "loss": 0.871408224105835, + "step": 1892 + }, + { + "epoch": 0.4361751152073733, + "grad_norm": 0.7279346643764769, + "learning_rate": 1.849650350811091e-06, + "loss": 1.0133098363876343, + "step": 1893 + }, + { + "epoch": 0.43640552995391707, + "grad_norm": 0.5859043876213773, + "learning_rate": 1.8494493786823333e-06, + "loss": 0.8320624828338623, + "step": 1894 + }, + { + "epoch": 0.43663594470046085, + "grad_norm": 0.7240549495084485, + "learning_rate": 1.8492482832571225e-06, + "loss": 0.7757631540298462, + "step": 1895 + }, + { + "epoch": 0.4368663594470046, + "grad_norm": 0.7606146142454437, + "learning_rate": 1.8490470645646479e-06, + "loss": 0.8503100872039795, + "step": 1896 + }, + { + "epoch": 0.43709677419354837, + "grad_norm": 0.7560932530175453, + "learning_rate": 1.8488457226341158e-06, + "loss": 0.8145939707756042, + "step": 1897 + }, + { + "epoch": 0.43732718894009215, + "grad_norm": 0.8041258430075643, + "learning_rate": 1.848644257494751e-06, + "loss": 0.831500232219696, + "step": 1898 + }, + { + "epoch": 0.43755760368663593, + "grad_norm": 0.6473340838552745, + "learning_rate": 1.8484426691757956e-06, + "loss": 0.9340692758560181, + "step": 1899 + }, + { + "epoch": 0.4377880184331797, + "grad_norm": 0.7851684163129825, + "learning_rate": 1.8482409577065097e-06, + "loss": 1.011988639831543, + "step": 1900 + }, + { + "epoch": 0.4380184331797235, + "grad_norm": 0.6819650200659566, + "learning_rate": 1.848039123116172e-06, + "loss": 0.8110378980636597, + "step": 1901 + }, + { + "epoch": 0.4382488479262673, + "grad_norm": 0.6310651453357742, + "learning_rate": 1.8478371654340779e-06, + "loss": 0.8230330944061279, + "step": 1902 + }, + { + "epoch": 0.43847926267281107, + "grad_norm": 0.8335502206603579, + "learning_rate": 1.8476350846895419e-06, + "loss": 0.875052809715271, + "step": 1903 + }, + { + "epoch": 0.43870967741935485, + "grad_norm": 0.7394371211482306, + "learning_rate": 1.8474328809118953e-06, + "loss": 0.9373071193695068, + "step": 1904 + }, + { + "epoch": 0.43894009216589863, + "grad_norm": 0.7538115820848524, + "learning_rate": 1.847230554130488e-06, + "loss": 0.8341633677482605, + "step": 1905 + }, + { + "epoch": 0.4391705069124424, + "grad_norm": 0.6579829053639499, + "learning_rate": 1.8470281043746873e-06, + "loss": 0.8147767782211304, + "step": 1906 + }, + { + "epoch": 0.4394009216589862, + "grad_norm": 0.6022228592985512, + "learning_rate": 1.8468255316738785e-06, + "loss": 0.740512490272522, + "step": 1907 + }, + { + "epoch": 0.4396313364055299, + "grad_norm": 0.7743265443588842, + "learning_rate": 1.846622836057465e-06, + "loss": 0.7754743099212646, + "step": 1908 + }, + { + "epoch": 0.4398617511520737, + "grad_norm": 0.7535493986684056, + "learning_rate": 1.8464200175548677e-06, + "loss": 0.9131484031677246, + "step": 1909 + }, + { + "epoch": 0.4400921658986175, + "grad_norm": 0.7099012564704421, + "learning_rate": 1.8462170761955252e-06, + "loss": 0.7084713578224182, + "step": 1910 + }, + { + "epoch": 0.4403225806451613, + "grad_norm": 0.7949281739735957, + "learning_rate": 1.8460140120088945e-06, + "loss": 0.8535224199295044, + "step": 1911 + }, + { + "epoch": 0.44055299539170506, + "grad_norm": 0.8579322326008002, + "learning_rate": 1.8458108250244498e-06, + "loss": 0.7661323547363281, + "step": 1912 + }, + { + "epoch": 0.44078341013824884, + "grad_norm": 0.7355189670899542, + "learning_rate": 1.8456075152716837e-06, + "loss": 0.8064024448394775, + "step": 1913 + }, + { + "epoch": 0.4410138248847926, + "grad_norm": 0.7422340222781728, + "learning_rate": 1.8454040827801058e-06, + "loss": 0.7858735918998718, + "step": 1914 + }, + { + "epoch": 0.4412442396313364, + "grad_norm": 0.6589873136371734, + "learning_rate": 1.8452005275792448e-06, + "loss": 0.9251735210418701, + "step": 1915 + }, + { + "epoch": 0.4414746543778802, + "grad_norm": 0.718018605876598, + "learning_rate": 1.8449968496986461e-06, + "loss": 0.7237124443054199, + "step": 1916 + }, + { + "epoch": 0.441705069124424, + "grad_norm": 0.7573893032737062, + "learning_rate": 1.8447930491678732e-06, + "loss": 0.8939133882522583, + "step": 1917 + }, + { + "epoch": 0.44193548387096776, + "grad_norm": 0.8373489922925343, + "learning_rate": 1.8445891260165076e-06, + "loss": 0.8815577626228333, + "step": 1918 + }, + { + "epoch": 0.44216589861751154, + "grad_norm": 0.8703539982402225, + "learning_rate": 1.8443850802741485e-06, + "loss": 0.943426787853241, + "step": 1919 + }, + { + "epoch": 0.4423963133640553, + "grad_norm": 0.6998600920537428, + "learning_rate": 1.8441809119704126e-06, + "loss": 0.8001632690429688, + "step": 1920 + }, + { + "epoch": 0.44262672811059905, + "grad_norm": 0.8531362441371287, + "learning_rate": 1.8439766211349352e-06, + "loss": 0.8656308650970459, + "step": 1921 + }, + { + "epoch": 0.44285714285714284, + "grad_norm": 0.7261410922718881, + "learning_rate": 1.8437722077973686e-06, + "loss": 0.9774024486541748, + "step": 1922 + }, + { + "epoch": 0.4430875576036866, + "grad_norm": 0.728823767818971, + "learning_rate": 1.8435676719873827e-06, + "loss": 0.7655738592147827, + "step": 1923 + }, + { + "epoch": 0.4433179723502304, + "grad_norm": 0.6595509202419896, + "learning_rate": 1.8433630137346657e-06, + "loss": 0.6455004811286926, + "step": 1924 + }, + { + "epoch": 0.4435483870967742, + "grad_norm": 0.7214853647491487, + "learning_rate": 1.8431582330689243e-06, + "loss": 0.8221153020858765, + "step": 1925 + }, + { + "epoch": 0.44377880184331797, + "grad_norm": 0.7718374957528886, + "learning_rate": 1.8429533300198816e-06, + "loss": 0.7878339886665344, + "step": 1926 + }, + { + "epoch": 0.44400921658986175, + "grad_norm": 0.7666174978175726, + "learning_rate": 1.8427483046172787e-06, + "loss": 0.8292763829231262, + "step": 1927 + }, + { + "epoch": 0.44423963133640554, + "grad_norm": 0.7395800766154846, + "learning_rate": 1.842543156890875e-06, + "loss": 0.7774572372436523, + "step": 1928 + }, + { + "epoch": 0.4444700460829493, + "grad_norm": 0.7419338266362171, + "learning_rate": 1.8423378868704476e-06, + "loss": 0.7327601909637451, + "step": 1929 + }, + { + "epoch": 0.4447004608294931, + "grad_norm": 0.7176112305038147, + "learning_rate": 1.8421324945857909e-06, + "loss": 0.8067511320114136, + "step": 1930 + }, + { + "epoch": 0.4449308755760369, + "grad_norm": 0.780684647138278, + "learning_rate": 1.8419269800667173e-06, + "loss": 0.851010799407959, + "step": 1931 + }, + { + "epoch": 0.44516129032258067, + "grad_norm": 0.7848772154457995, + "learning_rate": 1.8417213433430576e-06, + "loss": 0.8402234315872192, + "step": 1932 + }, + { + "epoch": 0.4453917050691244, + "grad_norm": 0.7848428302916386, + "learning_rate": 1.8415155844446591e-06, + "loss": 0.8857355117797852, + "step": 1933 + }, + { + "epoch": 0.4456221198156682, + "grad_norm": 0.6465222204250215, + "learning_rate": 1.841309703401387e-06, + "loss": 0.7517881393432617, + "step": 1934 + }, + { + "epoch": 0.44585253456221197, + "grad_norm": 0.8220839741097039, + "learning_rate": 1.8411037002431257e-06, + "loss": 0.8583779335021973, + "step": 1935 + }, + { + "epoch": 0.44608294930875575, + "grad_norm": 0.7149579567670102, + "learning_rate": 1.8408975749997758e-06, + "loss": 0.7691524028778076, + "step": 1936 + }, + { + "epoch": 0.44631336405529953, + "grad_norm": 0.6891731440130011, + "learning_rate": 1.8406913277012558e-06, + "loss": 0.9164496660232544, + "step": 1937 + }, + { + "epoch": 0.4465437788018433, + "grad_norm": 0.6382978906826758, + "learning_rate": 1.8404849583775025e-06, + "loss": 0.843226432800293, + "step": 1938 + }, + { + "epoch": 0.4467741935483871, + "grad_norm": 0.843769912689158, + "learning_rate": 1.8402784670584706e-06, + "loss": 0.8492633104324341, + "step": 1939 + }, + { + "epoch": 0.4470046082949309, + "grad_norm": 0.7117202181402426, + "learning_rate": 1.8400718537741314e-06, + "loss": 0.8088324069976807, + "step": 1940 + }, + { + "epoch": 0.44723502304147467, + "grad_norm": 0.8584564611753391, + "learning_rate": 1.8398651185544746e-06, + "loss": 0.8879667520523071, + "step": 1941 + }, + { + "epoch": 0.44746543778801845, + "grad_norm": 0.6515549607308898, + "learning_rate": 1.8396582614295078e-06, + "loss": 0.8926588892936707, + "step": 1942 + }, + { + "epoch": 0.44769585253456223, + "grad_norm": 0.6885634929225364, + "learning_rate": 1.8394512824292558e-06, + "loss": 0.8007583618164062, + "step": 1943 + }, + { + "epoch": 0.447926267281106, + "grad_norm": 0.6940540666117992, + "learning_rate": 1.8392441815837613e-06, + "loss": 0.7420827746391296, + "step": 1944 + }, + { + "epoch": 0.44815668202764974, + "grad_norm": 0.6846873323136197, + "learning_rate": 1.839036958923085e-06, + "loss": 0.7653264999389648, + "step": 1945 + }, + { + "epoch": 0.4483870967741935, + "grad_norm": 0.6684685460178057, + "learning_rate": 1.838829614477305e-06, + "loss": 0.886576771736145, + "step": 1946 + }, + { + "epoch": 0.4486175115207373, + "grad_norm": 0.7769567865097903, + "learning_rate": 1.8386221482765168e-06, + "loss": 0.904376745223999, + "step": 1947 + }, + { + "epoch": 0.4488479262672811, + "grad_norm": 0.6833196213451335, + "learning_rate": 1.838414560350834e-06, + "loss": 0.6791579723358154, + "step": 1948 + }, + { + "epoch": 0.4490783410138249, + "grad_norm": 0.8296885335278092, + "learning_rate": 1.838206850730388e-06, + "loss": 0.9402183294296265, + "step": 1949 + }, + { + "epoch": 0.44930875576036866, + "grad_norm": 0.9215175287627321, + "learning_rate": 1.8379990194453265e-06, + "loss": 0.9756022691726685, + "step": 1950 + }, + { + "epoch": 0.44953917050691244, + "grad_norm": 0.9502651388093868, + "learning_rate": 1.8377910665258173e-06, + "loss": 0.7311051487922668, + "step": 1951 + }, + { + "epoch": 0.4497695852534562, + "grad_norm": 0.5687721596613555, + "learning_rate": 1.8375829920020438e-06, + "loss": 0.6966956853866577, + "step": 1952 + }, + { + "epoch": 0.45, + "grad_norm": 0.7191813033419734, + "learning_rate": 1.8373747959042076e-06, + "loss": 0.7327426671981812, + "step": 1953 + }, + { + "epoch": 0.4502304147465438, + "grad_norm": 0.8067848664348717, + "learning_rate": 1.8371664782625285e-06, + "loss": 0.8650925755500793, + "step": 1954 + }, + { + "epoch": 0.4504608294930876, + "grad_norm": 0.8028206677205298, + "learning_rate": 1.8369580391072431e-06, + "loss": 0.876739501953125, + "step": 1955 + }, + { + "epoch": 0.45069124423963136, + "grad_norm": 0.7092651204784524, + "learning_rate": 1.8367494784686066e-06, + "loss": 0.7787455320358276, + "step": 1956 + }, + { + "epoch": 0.4509216589861751, + "grad_norm": 0.7762123563340246, + "learning_rate": 1.836540796376891e-06, + "loss": 0.8874029517173767, + "step": 1957 + }, + { + "epoch": 0.4511520737327189, + "grad_norm": 0.7670080315961673, + "learning_rate": 1.8363319928623862e-06, + "loss": 0.8944835662841797, + "step": 1958 + }, + { + "epoch": 0.45138248847926266, + "grad_norm": 0.570293089893543, + "learning_rate": 1.8361230679553996e-06, + "loss": 0.7106739282608032, + "step": 1959 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.7068996407627426, + "learning_rate": 1.835914021686257e-06, + "loss": 0.8668634295463562, + "step": 1960 + }, + { + "epoch": 0.4518433179723502, + "grad_norm": 0.7818076957354034, + "learning_rate": 1.8357048540853003e-06, + "loss": 0.8123712539672852, + "step": 1961 + }, + { + "epoch": 0.452073732718894, + "grad_norm": 0.7369058807274856, + "learning_rate": 1.8354955651828907e-06, + "loss": 0.865728497505188, + "step": 1962 + }, + { + "epoch": 0.4523041474654378, + "grad_norm": 0.7502978391788373, + "learning_rate": 1.8352861550094056e-06, + "loss": 0.8066651225090027, + "step": 1963 + }, + { + "epoch": 0.4525345622119816, + "grad_norm": 1.2076261262226256, + "learning_rate": 1.835076623595241e-06, + "loss": 1.020591139793396, + "step": 1964 + }, + { + "epoch": 0.45276497695852536, + "grad_norm": 0.7642119123557376, + "learning_rate": 1.83486697097081e-06, + "loss": 0.839346706867218, + "step": 1965 + }, + { + "epoch": 0.45299539170506914, + "grad_norm": 0.663652311830839, + "learning_rate": 1.8346571971665434e-06, + "loss": 0.7707340121269226, + "step": 1966 + }, + { + "epoch": 0.4532258064516129, + "grad_norm": 0.6603686601649886, + "learning_rate": 1.8344473022128897e-06, + "loss": 0.7969534397125244, + "step": 1967 + }, + { + "epoch": 0.4534562211981567, + "grad_norm": 0.8431782882642489, + "learning_rate": 1.8342372861403143e-06, + "loss": 0.9371283650398254, + "step": 1968 + }, + { + "epoch": 0.45368663594470043, + "grad_norm": 0.7102966402282939, + "learning_rate": 1.8340271489793015e-06, + "loss": 0.7915256023406982, + "step": 1969 + }, + { + "epoch": 0.4539170506912442, + "grad_norm": 0.6028172078632871, + "learning_rate": 1.8338168907603522e-06, + "loss": 0.8394884467124939, + "step": 1970 + }, + { + "epoch": 0.454147465437788, + "grad_norm": 0.8133055611447335, + "learning_rate": 1.833606511513985e-06, + "loss": 0.7786067128181458, + "step": 1971 + }, + { + "epoch": 0.4543778801843318, + "grad_norm": 0.905741517676821, + "learning_rate": 1.833396011270736e-06, + "loss": 0.9237443208694458, + "step": 1972 + }, + { + "epoch": 0.45460829493087557, + "grad_norm": 0.9055049100464759, + "learning_rate": 1.8331853900611596e-06, + "loss": 0.7530162334442139, + "step": 1973 + }, + { + "epoch": 0.45483870967741935, + "grad_norm": 0.7172947421019107, + "learning_rate": 1.8329746479158263e-06, + "loss": 0.8349624872207642, + "step": 1974 + }, + { + "epoch": 0.45506912442396313, + "grad_norm": 0.9222448487169791, + "learning_rate": 1.8327637848653259e-06, + "loss": 0.8748637437820435, + "step": 1975 + }, + { + "epoch": 0.4552995391705069, + "grad_norm": 0.7416851295200875, + "learning_rate": 1.832552800940265e-06, + "loss": 0.9111478924751282, + "step": 1976 + }, + { + "epoch": 0.4555299539170507, + "grad_norm": 0.6251856024732342, + "learning_rate": 1.8323416961712665e-06, + "loss": 0.8108797073364258, + "step": 1977 + }, + { + "epoch": 0.4557603686635945, + "grad_norm": 0.9459625715160394, + "learning_rate": 1.832130470588973e-06, + "loss": 0.9266520738601685, + "step": 1978 + }, + { + "epoch": 0.45599078341013827, + "grad_norm": 0.7773850051724754, + "learning_rate": 1.831919124224043e-06, + "loss": 0.9092522859573364, + "step": 1979 + }, + { + "epoch": 0.45622119815668205, + "grad_norm": 0.664954530341155, + "learning_rate": 1.8317076571071536e-06, + "loss": 0.8249068260192871, + "step": 1980 + }, + { + "epoch": 0.45645161290322583, + "grad_norm": 0.770896895795481, + "learning_rate": 1.8314960692689992e-06, + "loss": 0.7497084140777588, + "step": 1981 + }, + { + "epoch": 0.45668202764976956, + "grad_norm": 0.7450904317902424, + "learning_rate": 1.8312843607402907e-06, + "loss": 0.7360142469406128, + "step": 1982 + }, + { + "epoch": 0.45691244239631335, + "grad_norm": 0.7224490513690306, + "learning_rate": 1.8310725315517578e-06, + "loss": 0.8443512320518494, + "step": 1983 + }, + { + "epoch": 0.45714285714285713, + "grad_norm": 0.6770718154001021, + "learning_rate": 1.830860581734147e-06, + "loss": 0.7995656728744507, + "step": 1984 + }, + { + "epoch": 0.4573732718894009, + "grad_norm": 0.8305927985197211, + "learning_rate": 1.8306485113182229e-06, + "loss": 0.7396436929702759, + "step": 1985 + }, + { + "epoch": 0.4576036866359447, + "grad_norm": 0.7351757860546534, + "learning_rate": 1.8304363203347668e-06, + "loss": 0.7415385246276855, + "step": 1986 + }, + { + "epoch": 0.4578341013824885, + "grad_norm": 0.8416697439034252, + "learning_rate": 1.8302240088145784e-06, + "loss": 0.9316694736480713, + "step": 1987 + }, + { + "epoch": 0.45806451612903226, + "grad_norm": 0.6482250359686991, + "learning_rate": 1.830011576788474e-06, + "loss": 0.7692697048187256, + "step": 1988 + }, + { + "epoch": 0.45829493087557605, + "grad_norm": 0.7546540101557039, + "learning_rate": 1.829799024287288e-06, + "loss": 0.8377524614334106, + "step": 1989 + }, + { + "epoch": 0.45852534562211983, + "grad_norm": 0.800432018333432, + "learning_rate": 1.8295863513418724e-06, + "loss": 0.8005630970001221, + "step": 1990 + }, + { + "epoch": 0.4587557603686636, + "grad_norm": 0.6132717130341248, + "learning_rate": 1.829373557983096e-06, + "loss": 0.8609297275543213, + "step": 1991 + }, + { + "epoch": 0.4589861751152074, + "grad_norm": 0.7611348757483902, + "learning_rate": 1.8291606442418454e-06, + "loss": 0.9111521244049072, + "step": 1992 + }, + { + "epoch": 0.4592165898617512, + "grad_norm": 0.6486046074488622, + "learning_rate": 1.8289476101490254e-06, + "loss": 0.7540388107299805, + "step": 1993 + }, + { + "epoch": 0.4594470046082949, + "grad_norm": 0.7891604292973137, + "learning_rate": 1.8287344557355565e-06, + "loss": 0.9018936157226562, + "step": 1994 + }, + { + "epoch": 0.4596774193548387, + "grad_norm": 0.8558307889574596, + "learning_rate": 1.8285211810323791e-06, + "loss": 0.918912947177887, + "step": 1995 + }, + { + "epoch": 0.4599078341013825, + "grad_norm": 0.6889746928021416, + "learning_rate": 1.8283077860704488e-06, + "loss": 0.7777351140975952, + "step": 1996 + }, + { + "epoch": 0.46013824884792626, + "grad_norm": 0.8546199279018112, + "learning_rate": 1.82809427088074e-06, + "loss": 0.9283437132835388, + "step": 1997 + }, + { + "epoch": 0.46036866359447004, + "grad_norm": 0.7206983576837674, + "learning_rate": 1.8278806354942442e-06, + "loss": 0.7032894492149353, + "step": 1998 + }, + { + "epoch": 0.4605990783410138, + "grad_norm": 0.7084552833839082, + "learning_rate": 1.8276668799419696e-06, + "loss": 0.8392905592918396, + "step": 1999 + }, + { + "epoch": 0.4608294930875576, + "grad_norm": 0.8216520324249929, + "learning_rate": 1.8274530042549434e-06, + "loss": 0.8059369325637817, + "step": 2000 + }, + { + "epoch": 0.4610599078341014, + "grad_norm": 0.7022225516164876, + "learning_rate": 1.827239008464209e-06, + "loss": 0.7738519906997681, + "step": 2001 + }, + { + "epoch": 0.4612903225806452, + "grad_norm": 0.894321981759021, + "learning_rate": 1.8270248926008275e-06, + "loss": 0.9189014434814453, + "step": 2002 + }, + { + "epoch": 0.46152073732718896, + "grad_norm": 0.9750927332357222, + "learning_rate": 1.8268106566958782e-06, + "loss": 0.8878552913665771, + "step": 2003 + }, + { + "epoch": 0.46175115207373274, + "grad_norm": 0.7601663032895281, + "learning_rate": 1.826596300780456e-06, + "loss": 0.9786058664321899, + "step": 2004 + }, + { + "epoch": 0.4619815668202765, + "grad_norm": 0.7513085122069586, + "learning_rate": 1.8263818248856754e-06, + "loss": 0.7887653112411499, + "step": 2005 + }, + { + "epoch": 0.46221198156682025, + "grad_norm": 0.7571825247765968, + "learning_rate": 1.8261672290426668e-06, + "loss": 0.8773549795150757, + "step": 2006 + }, + { + "epoch": 0.46244239631336403, + "grad_norm": 0.6543768471355319, + "learning_rate": 1.8259525132825786e-06, + "loss": 0.6929831504821777, + "step": 2007 + }, + { + "epoch": 0.4626728110599078, + "grad_norm": 0.8544099497368944, + "learning_rate": 1.8257376776365765e-06, + "loss": 0.9438232183456421, + "step": 2008 + }, + { + "epoch": 0.4629032258064516, + "grad_norm": 0.6803330432545487, + "learning_rate": 1.8255227221358435e-06, + "loss": 0.7559594511985779, + "step": 2009 + }, + { + "epoch": 0.4631336405529954, + "grad_norm": 0.7347158890455135, + "learning_rate": 1.8253076468115805e-06, + "loss": 0.8990212678909302, + "step": 2010 + }, + { + "epoch": 0.46336405529953917, + "grad_norm": 0.7325838411869188, + "learning_rate": 1.825092451695005e-06, + "loss": 0.8638331890106201, + "step": 2011 + }, + { + "epoch": 0.46359447004608295, + "grad_norm": 0.7537964319175384, + "learning_rate": 1.8248771368173522e-06, + "loss": 0.9262570142745972, + "step": 2012 + }, + { + "epoch": 0.46382488479262673, + "grad_norm": 0.770620841657562, + "learning_rate": 1.8246617022098754e-06, + "loss": 0.7412514090538025, + "step": 2013 + }, + { + "epoch": 0.4640552995391705, + "grad_norm": 0.8304378021605247, + "learning_rate": 1.8244461479038437e-06, + "loss": 0.8680287599563599, + "step": 2014 + }, + { + "epoch": 0.4642857142857143, + "grad_norm": 0.7004084931574237, + "learning_rate": 1.8242304739305457e-06, + "loss": 0.7774302959442139, + "step": 2015 + }, + { + "epoch": 0.4645161290322581, + "grad_norm": 0.8275882534036313, + "learning_rate": 1.824014680321285e-06, + "loss": 0.9278442859649658, + "step": 2016 + }, + { + "epoch": 0.46474654377880187, + "grad_norm": 0.6808747325759799, + "learning_rate": 1.8237987671073846e-06, + "loss": 0.9617106914520264, + "step": 2017 + }, + { + "epoch": 0.4649769585253456, + "grad_norm": 0.682915952128137, + "learning_rate": 1.8235827343201838e-06, + "loss": 0.7983255386352539, + "step": 2018 + }, + { + "epoch": 0.4652073732718894, + "grad_norm": 0.7878897167758285, + "learning_rate": 1.8233665819910393e-06, + "loss": 0.7966747283935547, + "step": 2019 + }, + { + "epoch": 0.46543778801843316, + "grad_norm": 0.893729443286113, + "learning_rate": 1.8231503101513253e-06, + "loss": 0.8977803587913513, + "step": 2020 + }, + { + "epoch": 0.46566820276497695, + "grad_norm": 0.6522874054217892, + "learning_rate": 1.8229339188324334e-06, + "loss": 0.7098231911659241, + "step": 2021 + }, + { + "epoch": 0.46589861751152073, + "grad_norm": 0.6971785978535421, + "learning_rate": 1.822717408065773e-06, + "loss": 0.6402776837348938, + "step": 2022 + }, + { + "epoch": 0.4661290322580645, + "grad_norm": 0.7272467550896602, + "learning_rate": 1.8225007778827698e-06, + "loss": 0.797479510307312, + "step": 2023 + }, + { + "epoch": 0.4663594470046083, + "grad_norm": 0.7464543289112394, + "learning_rate": 1.8222840283148675e-06, + "loss": 0.8205317258834839, + "step": 2024 + }, + { + "epoch": 0.4665898617511521, + "grad_norm": 0.755319646803663, + "learning_rate": 1.822067159393527e-06, + "loss": 0.8123108148574829, + "step": 2025 + }, + { + "epoch": 0.46682027649769586, + "grad_norm": 0.7470494916721893, + "learning_rate": 1.8218501711502262e-06, + "loss": 0.9103116989135742, + "step": 2026 + }, + { + "epoch": 0.46705069124423965, + "grad_norm": 0.8399971318490079, + "learning_rate": 1.8216330636164617e-06, + "loss": 0.725040078163147, + "step": 2027 + }, + { + "epoch": 0.46728110599078343, + "grad_norm": 0.8693243601175246, + "learning_rate": 1.8214158368237456e-06, + "loss": 0.8598217964172363, + "step": 2028 + }, + { + "epoch": 0.4675115207373272, + "grad_norm": 0.9587381766929439, + "learning_rate": 1.821198490803608e-06, + "loss": 0.9139465093612671, + "step": 2029 + }, + { + "epoch": 0.46774193548387094, + "grad_norm": 0.7850806397253399, + "learning_rate": 1.8209810255875966e-06, + "loss": 0.8331620097160339, + "step": 2030 + }, + { + "epoch": 0.4679723502304147, + "grad_norm": 0.8908286579751021, + "learning_rate": 1.8207634412072764e-06, + "loss": 0.7901387810707092, + "step": 2031 + }, + { + "epoch": 0.4682027649769585, + "grad_norm": 0.6861413854458724, + "learning_rate": 1.8205457376942288e-06, + "loss": 0.7651060819625854, + "step": 2032 + }, + { + "epoch": 0.4684331797235023, + "grad_norm": 0.7738923235394239, + "learning_rate": 1.820327915080054e-06, + "loss": 0.7382134199142456, + "step": 2033 + }, + { + "epoch": 0.4686635944700461, + "grad_norm": 0.6962774548883505, + "learning_rate": 1.8201099733963682e-06, + "loss": 0.7851507067680359, + "step": 2034 + }, + { + "epoch": 0.46889400921658986, + "grad_norm": 0.8995005169228616, + "learning_rate": 1.8198919126748056e-06, + "loss": 0.9357708692550659, + "step": 2035 + }, + { + "epoch": 0.46912442396313364, + "grad_norm": 0.8238296907521364, + "learning_rate": 1.819673732947017e-06, + "loss": 0.8188502788543701, + "step": 2036 + }, + { + "epoch": 0.4693548387096774, + "grad_norm": 1.0258349340262545, + "learning_rate": 1.8194554342446712e-06, + "loss": 0.81590735912323, + "step": 2037 + }, + { + "epoch": 0.4695852534562212, + "grad_norm": 0.811644542087897, + "learning_rate": 1.8192370165994544e-06, + "loss": 0.6879743933677673, + "step": 2038 + }, + { + "epoch": 0.469815668202765, + "grad_norm": 0.8669848845646889, + "learning_rate": 1.8190184800430686e-06, + "loss": 0.9287742376327515, + "step": 2039 + }, + { + "epoch": 0.4700460829493088, + "grad_norm": 0.9807524438459786, + "learning_rate": 1.818799824607235e-06, + "loss": 0.9625484943389893, + "step": 2040 + }, + { + "epoch": 0.47027649769585256, + "grad_norm": 0.8259194997097902, + "learning_rate": 1.8185810503236904e-06, + "loss": 0.8267782926559448, + "step": 2041 + }, + { + "epoch": 0.4705069124423963, + "grad_norm": 0.8404148332122154, + "learning_rate": 1.8183621572241904e-06, + "loss": 0.8827054500579834, + "step": 2042 + }, + { + "epoch": 0.47073732718894007, + "grad_norm": 0.7550183773883651, + "learning_rate": 1.8181431453405067e-06, + "loss": 0.7755721807479858, + "step": 2043 + }, + { + "epoch": 0.47096774193548385, + "grad_norm": 0.9234865066349518, + "learning_rate": 1.8179240147044285e-06, + "loss": 0.8320283889770508, + "step": 2044 + }, + { + "epoch": 0.47119815668202764, + "grad_norm": 0.7077773446032107, + "learning_rate": 1.8177047653477619e-06, + "loss": 0.8737574815750122, + "step": 2045 + }, + { + "epoch": 0.4714285714285714, + "grad_norm": 0.8821209974643925, + "learning_rate": 1.8174853973023317e-06, + "loss": 0.7007719278335571, + "step": 2046 + }, + { + "epoch": 0.4716589861751152, + "grad_norm": 0.822666216900424, + "learning_rate": 1.817265910599978e-06, + "loss": 0.8062577247619629, + "step": 2047 + }, + { + "epoch": 0.471889400921659, + "grad_norm": 0.6775605665320994, + "learning_rate": 1.8170463052725594e-06, + "loss": 0.7059667110443115, + "step": 2048 + }, + { + "epoch": 0.47211981566820277, + "grad_norm": 0.7830423922028903, + "learning_rate": 1.816826581351951e-06, + "loss": 0.9025841951370239, + "step": 2049 + }, + { + "epoch": 0.47235023041474655, + "grad_norm": 0.8388278274768075, + "learning_rate": 1.8166067388700458e-06, + "loss": 0.7534186840057373, + "step": 2050 + }, + { + "epoch": 0.47258064516129034, + "grad_norm": 0.7623620329649421, + "learning_rate": 1.8163867778587534e-06, + "loss": 0.9447616338729858, + "step": 2051 + }, + { + "epoch": 0.4728110599078341, + "grad_norm": 0.6423913345578718, + "learning_rate": 1.8161666983500012e-06, + "loss": 0.7092128992080688, + "step": 2052 + }, + { + "epoch": 0.4730414746543779, + "grad_norm": 0.8648864734786782, + "learning_rate": 1.815946500375733e-06, + "loss": 0.8689497113227844, + "step": 2053 + }, + { + "epoch": 0.4732718894009217, + "grad_norm": 0.8941588190294093, + "learning_rate": 1.8157261839679105e-06, + "loss": 0.9298638105392456, + "step": 2054 + }, + { + "epoch": 0.4735023041474654, + "grad_norm": 0.6527064378770876, + "learning_rate": 1.8155057491585125e-06, + "loss": 0.7138030529022217, + "step": 2055 + }, + { + "epoch": 0.4737327188940092, + "grad_norm": 0.6699370139228978, + "learning_rate": 1.815285195979534e-06, + "loss": 0.825221836566925, + "step": 2056 + }, + { + "epoch": 0.473963133640553, + "grad_norm": 0.8559190132682327, + "learning_rate": 1.8150645244629891e-06, + "loss": 0.8643208742141724, + "step": 2057 + }, + { + "epoch": 0.47419354838709676, + "grad_norm": 0.8338353738235549, + "learning_rate": 1.8148437346409073e-06, + "loss": 0.9611828327178955, + "step": 2058 + }, + { + "epoch": 0.47442396313364055, + "grad_norm": 0.8119567978397472, + "learning_rate": 1.8146228265453363e-06, + "loss": 0.8609912991523743, + "step": 2059 + }, + { + "epoch": 0.47465437788018433, + "grad_norm": 0.7540582566966652, + "learning_rate": 1.8144018002083404e-06, + "loss": 0.8277603387832642, + "step": 2060 + }, + { + "epoch": 0.4748847926267281, + "grad_norm": 0.8438703930452028, + "learning_rate": 1.814180655662001e-06, + "loss": 0.8601360321044922, + "step": 2061 + }, + { + "epoch": 0.4751152073732719, + "grad_norm": 0.7023202538855939, + "learning_rate": 1.8139593929384178e-06, + "loss": 0.8454653024673462, + "step": 2062 + }, + { + "epoch": 0.4753456221198157, + "grad_norm": 0.8270167900724995, + "learning_rate": 1.8137380120697059e-06, + "loss": 0.870082437992096, + "step": 2063 + }, + { + "epoch": 0.47557603686635946, + "grad_norm": 0.8497953303327396, + "learning_rate": 1.8135165130879988e-06, + "loss": 0.8064073324203491, + "step": 2064 + }, + { + "epoch": 0.47580645161290325, + "grad_norm": 0.5532170457954219, + "learning_rate": 1.813294896025447e-06, + "loss": 0.829608678817749, + "step": 2065 + }, + { + "epoch": 0.47603686635944703, + "grad_norm": 0.7131662100806325, + "learning_rate": 1.8130731609142176e-06, + "loss": 0.8185791969299316, + "step": 2066 + }, + { + "epoch": 0.47626728110599076, + "grad_norm": 0.9405207635689381, + "learning_rate": 1.812851307786495e-06, + "loss": 0.8855293989181519, + "step": 2067 + }, + { + "epoch": 0.47649769585253454, + "grad_norm": 0.6766659884445188, + "learning_rate": 1.8126293366744815e-06, + "loss": 0.7495461106300354, + "step": 2068 + }, + { + "epoch": 0.4767281105990783, + "grad_norm": 0.9706294845402844, + "learning_rate": 1.8124072476103956e-06, + "loss": 0.9435098171234131, + "step": 2069 + }, + { + "epoch": 0.4769585253456221, + "grad_norm": 0.7637936743615437, + "learning_rate": 1.8121850406264727e-06, + "loss": 0.9299448728561401, + "step": 2070 + }, + { + "epoch": 0.4771889400921659, + "grad_norm": 0.9500813357187163, + "learning_rate": 1.8119627157549665e-06, + "loss": 0.9011991024017334, + "step": 2071 + }, + { + "epoch": 0.4774193548387097, + "grad_norm": 0.6847341374863515, + "learning_rate": 1.8117402730281476e-06, + "loss": 0.7326598167419434, + "step": 2072 + }, + { + "epoch": 0.47764976958525346, + "grad_norm": 0.7364560962143368, + "learning_rate": 1.8115177124783024e-06, + "loss": 0.8137445449829102, + "step": 2073 + }, + { + "epoch": 0.47788018433179724, + "grad_norm": 0.9429635333298672, + "learning_rate": 1.811295034137735e-06, + "loss": 0.8653519153594971, + "step": 2074 + }, + { + "epoch": 0.478110599078341, + "grad_norm": 0.8511205154632088, + "learning_rate": 1.811072238038768e-06, + "loss": 0.9140677452087402, + "step": 2075 + }, + { + "epoch": 0.4783410138248848, + "grad_norm": 0.8012710450337872, + "learning_rate": 1.810849324213739e-06, + "loss": 0.8878934979438782, + "step": 2076 + }, + { + "epoch": 0.4785714285714286, + "grad_norm": 0.6571390792752639, + "learning_rate": 1.8106262926950045e-06, + "loss": 0.8238190412521362, + "step": 2077 + }, + { + "epoch": 0.4788018433179724, + "grad_norm": 0.8097531572330602, + "learning_rate": 1.8104031435149362e-06, + "loss": 0.7722488641738892, + "step": 2078 + }, + { + "epoch": 0.4790322580645161, + "grad_norm": 0.890992078514086, + "learning_rate": 1.8101798767059248e-06, + "loss": 0.9338192939758301, + "step": 2079 + }, + { + "epoch": 0.4792626728110599, + "grad_norm": 0.8000986035452533, + "learning_rate": 1.8099564923003767e-06, + "loss": 0.7342168688774109, + "step": 2080 + }, + { + "epoch": 0.47949308755760367, + "grad_norm": 0.7644530181466097, + "learning_rate": 1.809732990330716e-06, + "loss": 0.8445772528648376, + "step": 2081 + }, + { + "epoch": 0.47972350230414745, + "grad_norm": 0.7291725333905612, + "learning_rate": 1.8095093708293839e-06, + "loss": 0.825678825378418, + "step": 2082 + }, + { + "epoch": 0.47995391705069124, + "grad_norm": 0.8072481370959372, + "learning_rate": 1.8092856338288381e-06, + "loss": 0.7995405197143555, + "step": 2083 + }, + { + "epoch": 0.480184331797235, + "grad_norm": 0.8193777121106555, + "learning_rate": 1.8090617793615536e-06, + "loss": 0.7811745405197144, + "step": 2084 + }, + { + "epoch": 0.4804147465437788, + "grad_norm": 0.7364459454678961, + "learning_rate": 1.8088378074600231e-06, + "loss": 0.842727780342102, + "step": 2085 + }, + { + "epoch": 0.4806451612903226, + "grad_norm": 0.7640299868769393, + "learning_rate": 1.808613718156756e-06, + "loss": 0.840941309928894, + "step": 2086 + }, + { + "epoch": 0.48087557603686637, + "grad_norm": 0.7783965916533324, + "learning_rate": 1.808389511484278e-06, + "loss": 0.9024466872215271, + "step": 2087 + }, + { + "epoch": 0.48110599078341015, + "grad_norm": 0.8943218774431004, + "learning_rate": 1.8081651874751325e-06, + "loss": 0.9112771153450012, + "step": 2088 + }, + { + "epoch": 0.48133640552995394, + "grad_norm": 0.6675207900987881, + "learning_rate": 1.8079407461618797e-06, + "loss": 0.834719181060791, + "step": 2089 + }, + { + "epoch": 0.4815668202764977, + "grad_norm": 0.8421358450475633, + "learning_rate": 1.8077161875770971e-06, + "loss": 0.8472555875778198, + "step": 2090 + }, + { + "epoch": 0.48179723502304145, + "grad_norm": 0.7303169649115268, + "learning_rate": 1.8074915117533796e-06, + "loss": 0.8459140062332153, + "step": 2091 + }, + { + "epoch": 0.48202764976958523, + "grad_norm": 0.6945162401362365, + "learning_rate": 1.807266718723338e-06, + "loss": 0.6570066213607788, + "step": 2092 + }, + { + "epoch": 0.482258064516129, + "grad_norm": 0.7314212575092469, + "learning_rate": 1.8070418085196006e-06, + "loss": 0.8897342681884766, + "step": 2093 + }, + { + "epoch": 0.4824884792626728, + "grad_norm": 0.8312385191950623, + "learning_rate": 1.8068167811748132e-06, + "loss": 0.8339060544967651, + "step": 2094 + }, + { + "epoch": 0.4827188940092166, + "grad_norm": 0.7547678583050421, + "learning_rate": 1.8065916367216383e-06, + "loss": 0.7972484827041626, + "step": 2095 + }, + { + "epoch": 0.48294930875576036, + "grad_norm": 0.7424060773179767, + "learning_rate": 1.806366375192755e-06, + "loss": 0.7894760966300964, + "step": 2096 + }, + { + "epoch": 0.48317972350230415, + "grad_norm": 0.7408232706643347, + "learning_rate": 1.8061409966208597e-06, + "loss": 0.713944673538208, + "step": 2097 + }, + { + "epoch": 0.48341013824884793, + "grad_norm": 0.8423029874540192, + "learning_rate": 1.8059155010386662e-06, + "loss": 0.7832180261611938, + "step": 2098 + }, + { + "epoch": 0.4836405529953917, + "grad_norm": 0.6563887159918735, + "learning_rate": 1.8056898884789043e-06, + "loss": 0.8873809576034546, + "step": 2099 + }, + { + "epoch": 0.4838709677419355, + "grad_norm": 0.8864132111812594, + "learning_rate": 1.8054641589743218e-06, + "loss": 0.8174929618835449, + "step": 2100 + }, + { + "epoch": 0.4841013824884793, + "grad_norm": 0.6797946394214075, + "learning_rate": 1.805238312557683e-06, + "loss": 0.876921534538269, + "step": 2101 + }, + { + "epoch": 0.48433179723502306, + "grad_norm": 0.7629892942789464, + "learning_rate": 1.8050123492617693e-06, + "loss": 0.9455937147140503, + "step": 2102 + }, + { + "epoch": 0.4845622119815668, + "grad_norm": 0.6880522665173857, + "learning_rate": 1.8047862691193784e-06, + "loss": 0.8146508932113647, + "step": 2103 + }, + { + "epoch": 0.4847926267281106, + "grad_norm": 0.762873599305404, + "learning_rate": 1.8045600721633262e-06, + "loss": 0.8513495326042175, + "step": 2104 + }, + { + "epoch": 0.48502304147465436, + "grad_norm": 0.8329533644475985, + "learning_rate": 1.8043337584264443e-06, + "loss": 0.8430027961730957, + "step": 2105 + }, + { + "epoch": 0.48525345622119814, + "grad_norm": 0.6323595862794837, + "learning_rate": 1.8041073279415826e-06, + "loss": 0.7683960199356079, + "step": 2106 + }, + { + "epoch": 0.4854838709677419, + "grad_norm": 0.6620613064117244, + "learning_rate": 1.8038807807416067e-06, + "loss": 0.7099664211273193, + "step": 2107 + }, + { + "epoch": 0.4857142857142857, + "grad_norm": 0.725415262213876, + "learning_rate": 1.8036541168593994e-06, + "loss": 0.8046330213546753, + "step": 2108 + }, + { + "epoch": 0.4859447004608295, + "grad_norm": 0.7817858416968994, + "learning_rate": 1.803427336327861e-06, + "loss": 0.8387504816055298, + "step": 2109 + }, + { + "epoch": 0.4861751152073733, + "grad_norm": 0.7135784962709865, + "learning_rate": 1.8032004391799085e-06, + "loss": 0.883955717086792, + "step": 2110 + }, + { + "epoch": 0.48640552995391706, + "grad_norm": 0.7408960119431725, + "learning_rate": 1.8029734254484756e-06, + "loss": 0.7622070908546448, + "step": 2111 + }, + { + "epoch": 0.48663594470046084, + "grad_norm": 0.7726145388563513, + "learning_rate": 1.802746295166513e-06, + "loss": 0.6625584363937378, + "step": 2112 + }, + { + "epoch": 0.4868663594470046, + "grad_norm": 0.8189497209718242, + "learning_rate": 1.8025190483669878e-06, + "loss": 0.8232327699661255, + "step": 2113 + }, + { + "epoch": 0.4870967741935484, + "grad_norm": 0.8528139298235252, + "learning_rate": 1.8022916850828857e-06, + "loss": 0.9083148241043091, + "step": 2114 + }, + { + "epoch": 0.4873271889400922, + "grad_norm": 0.7392938308731752, + "learning_rate": 1.8020642053472074e-06, + "loss": 0.8248398303985596, + "step": 2115 + }, + { + "epoch": 0.4875576036866359, + "grad_norm": 0.7121240208517446, + "learning_rate": 1.8018366091929717e-06, + "loss": 0.8055423498153687, + "step": 2116 + }, + { + "epoch": 0.4877880184331797, + "grad_norm": 0.778973471543998, + "learning_rate": 1.8016088966532135e-06, + "loss": 0.8716787695884705, + "step": 2117 + }, + { + "epoch": 0.4880184331797235, + "grad_norm": 0.7561230225795058, + "learning_rate": 1.801381067760985e-06, + "loss": 0.8530780673027039, + "step": 2118 + }, + { + "epoch": 0.48824884792626727, + "grad_norm": 0.6774037273322415, + "learning_rate": 1.8011531225493557e-06, + "loss": 0.7958484888076782, + "step": 2119 + }, + { + "epoch": 0.48847926267281105, + "grad_norm": 0.8596146173926187, + "learning_rate": 1.800925061051411e-06, + "loss": 0.8312872648239136, + "step": 2120 + }, + { + "epoch": 0.48870967741935484, + "grad_norm": 0.8135900564482533, + "learning_rate": 1.8006968833002541e-06, + "loss": 0.8097391128540039, + "step": 2121 + }, + { + "epoch": 0.4889400921658986, + "grad_norm": 0.9139337120301166, + "learning_rate": 1.8004685893290046e-06, + "loss": 0.8636112213134766, + "step": 2122 + }, + { + "epoch": 0.4891705069124424, + "grad_norm": 0.9088930992891967, + "learning_rate": 1.800240179170799e-06, + "loss": 0.9122721552848816, + "step": 2123 + }, + { + "epoch": 0.4894009216589862, + "grad_norm": 0.914017678688966, + "learning_rate": 1.8000116528587907e-06, + "loss": 0.8172330856323242, + "step": 2124 + }, + { + "epoch": 0.48963133640552997, + "grad_norm": 0.8007018337125341, + "learning_rate": 1.7997830104261502e-06, + "loss": 0.7377575635910034, + "step": 2125 + }, + { + "epoch": 0.48986175115207375, + "grad_norm": 0.9218847107737449, + "learning_rate": 1.7995542519060644e-06, + "loss": 0.7278136014938354, + "step": 2126 + }, + { + "epoch": 0.49009216589861754, + "grad_norm": 0.8808842591031234, + "learning_rate": 1.7993253773317374e-06, + "loss": 0.8977715969085693, + "step": 2127 + }, + { + "epoch": 0.49032258064516127, + "grad_norm": 0.7019593909183576, + "learning_rate": 1.7990963867363902e-06, + "loss": 0.789979100227356, + "step": 2128 + }, + { + "epoch": 0.49055299539170505, + "grad_norm": 0.7069412826082713, + "learning_rate": 1.7988672801532602e-06, + "loss": 0.8304328322410583, + "step": 2129 + }, + { + "epoch": 0.49078341013824883, + "grad_norm": 0.7922910084647693, + "learning_rate": 1.7986380576156019e-06, + "loss": 0.7597516179084778, + "step": 2130 + }, + { + "epoch": 0.4910138248847926, + "grad_norm": 0.6007262757544611, + "learning_rate": 1.7984087191566873e-06, + "loss": 0.661639928817749, + "step": 2131 + }, + { + "epoch": 0.4912442396313364, + "grad_norm": 0.7484873666922557, + "learning_rate": 1.7981792648098035e-06, + "loss": 0.7871333360671997, + "step": 2132 + }, + { + "epoch": 0.4914746543778802, + "grad_norm": 0.7758289248832314, + "learning_rate": 1.7979496946082565e-06, + "loss": 0.8166402578353882, + "step": 2133 + }, + { + "epoch": 0.49170506912442397, + "grad_norm": 0.6906377275927077, + "learning_rate": 1.7977200085853674e-06, + "loss": 0.7112412452697754, + "step": 2134 + }, + { + "epoch": 0.49193548387096775, + "grad_norm": 0.8103572300867555, + "learning_rate": 1.7974902067744752e-06, + "loss": 0.8358132839202881, + "step": 2135 + }, + { + "epoch": 0.49216589861751153, + "grad_norm": 0.7103875590554449, + "learning_rate": 1.7972602892089353e-06, + "loss": 0.8544377088546753, + "step": 2136 + }, + { + "epoch": 0.4923963133640553, + "grad_norm": 0.9004573017295656, + "learning_rate": 1.7970302559221197e-06, + "loss": 1.0105161666870117, + "step": 2137 + }, + { + "epoch": 0.4926267281105991, + "grad_norm": 0.7525179633837843, + "learning_rate": 1.7968001069474176e-06, + "loss": 0.7666197419166565, + "step": 2138 + }, + { + "epoch": 0.4928571428571429, + "grad_norm": 0.9209694432294897, + "learning_rate": 1.7965698423182349e-06, + "loss": 0.9250742197036743, + "step": 2139 + }, + { + "epoch": 0.4930875576036866, + "grad_norm": 0.8066717978287462, + "learning_rate": 1.7963394620679942e-06, + "loss": 0.8269995450973511, + "step": 2140 + }, + { + "epoch": 0.4933179723502304, + "grad_norm": 0.9533305612537857, + "learning_rate": 1.7961089662301346e-06, + "loss": 1.0431339740753174, + "step": 2141 + }, + { + "epoch": 0.4935483870967742, + "grad_norm": 0.7107784117562762, + "learning_rate": 1.7958783548381125e-06, + "loss": 0.7474809288978577, + "step": 2142 + }, + { + "epoch": 0.49377880184331796, + "grad_norm": 0.7729911498332706, + "learning_rate": 1.7956476279254007e-06, + "loss": 0.8850520849227905, + "step": 2143 + }, + { + "epoch": 0.49400921658986174, + "grad_norm": 0.8566824172714074, + "learning_rate": 1.7954167855254893e-06, + "loss": 0.8898880481719971, + "step": 2144 + }, + { + "epoch": 0.4942396313364055, + "grad_norm": 0.886855392770134, + "learning_rate": 1.7951858276718842e-06, + "loss": 0.8718239068984985, + "step": 2145 + }, + { + "epoch": 0.4944700460829493, + "grad_norm": 0.7604278475621951, + "learning_rate": 1.794954754398109e-06, + "loss": 0.8407484292984009, + "step": 2146 + }, + { + "epoch": 0.4947004608294931, + "grad_norm": 0.9582215314216729, + "learning_rate": 1.7947235657377036e-06, + "loss": 0.8453764915466309, + "step": 2147 + }, + { + "epoch": 0.4949308755760369, + "grad_norm": 0.6332693049941237, + "learning_rate": 1.794492261724225e-06, + "loss": 0.5795568227767944, + "step": 2148 + }, + { + "epoch": 0.49516129032258066, + "grad_norm": 0.9864343717736791, + "learning_rate": 1.794260842391246e-06, + "loss": 0.8601347208023071, + "step": 2149 + }, + { + "epoch": 0.49539170506912444, + "grad_norm": 0.8909931853274754, + "learning_rate": 1.7940293077723573e-06, + "loss": 0.8328324556350708, + "step": 2150 + }, + { + "epoch": 0.4956221198156682, + "grad_norm": 0.6691517417241877, + "learning_rate": 1.7937976579011655e-06, + "loss": 0.8924463391304016, + "step": 2151 + }, + { + "epoch": 0.49585253456221196, + "grad_norm": 0.7983254161536232, + "learning_rate": 1.7935658928112947e-06, + "loss": 0.9725968837738037, + "step": 2152 + }, + { + "epoch": 0.49608294930875574, + "grad_norm": 0.7649378566504706, + "learning_rate": 1.7933340125363855e-06, + "loss": 0.7814322710037231, + "step": 2153 + }, + { + "epoch": 0.4963133640552995, + "grad_norm": 0.795129549448148, + "learning_rate": 1.793102017110094e-06, + "loss": 0.8022886514663696, + "step": 2154 + }, + { + "epoch": 0.4965437788018433, + "grad_norm": 0.9455352743035539, + "learning_rate": 1.7928699065660951e-06, + "loss": 0.9747333526611328, + "step": 2155 + }, + { + "epoch": 0.4967741935483871, + "grad_norm": 1.0353782305768249, + "learning_rate": 1.7926376809380783e-06, + "loss": 0.9039797782897949, + "step": 2156 + }, + { + "epoch": 0.49700460829493087, + "grad_norm": 1.000992925643121, + "learning_rate": 1.7924053402597518e-06, + "loss": 0.9444677829742432, + "step": 2157 + }, + { + "epoch": 0.49723502304147466, + "grad_norm": 0.7688551400180308, + "learning_rate": 1.7921728845648393e-06, + "loss": 0.8442031741142273, + "step": 2158 + }, + { + "epoch": 0.49746543778801844, + "grad_norm": 0.8590371435800439, + "learning_rate": 1.7919403138870813e-06, + "loss": 0.9410362839698792, + "step": 2159 + }, + { + "epoch": 0.4976958525345622, + "grad_norm": 0.8168398725206235, + "learning_rate": 1.791707628260235e-06, + "loss": 0.8929172158241272, + "step": 2160 + }, + { + "epoch": 0.497926267281106, + "grad_norm": 0.970370102226972, + "learning_rate": 1.7914748277180745e-06, + "loss": 0.9259560108184814, + "step": 2161 + }, + { + "epoch": 0.4981566820276498, + "grad_norm": 0.7778204252845836, + "learning_rate": 1.7912419122943904e-06, + "loss": 0.8201638460159302, + "step": 2162 + }, + { + "epoch": 0.49838709677419357, + "grad_norm": 0.7628075269760098, + "learning_rate": 1.7910088820229907e-06, + "loss": 0.7554556131362915, + "step": 2163 + }, + { + "epoch": 0.4986175115207373, + "grad_norm": 0.7698860809397133, + "learning_rate": 1.7907757369376984e-06, + "loss": 0.8206801414489746, + "step": 2164 + }, + { + "epoch": 0.4988479262672811, + "grad_norm": 0.7606971261006891, + "learning_rate": 1.7905424770723551e-06, + "loss": 0.765400767326355, + "step": 2165 + }, + { + "epoch": 0.49907834101382487, + "grad_norm": 0.9629614917036793, + "learning_rate": 1.7903091024608177e-06, + "loss": 0.9191527366638184, + "step": 2166 + }, + { + "epoch": 0.49930875576036865, + "grad_norm": 1.0883591834210613, + "learning_rate": 1.7900756131369601e-06, + "loss": 0.8515042662620544, + "step": 2167 + }, + { + "epoch": 0.49953917050691243, + "grad_norm": 0.7623230395498896, + "learning_rate": 1.7898420091346736e-06, + "loss": 0.8509752750396729, + "step": 2168 + }, + { + "epoch": 0.4997695852534562, + "grad_norm": 0.7417934516303272, + "learning_rate": 1.7896082904878647e-06, + "loss": 0.8007084131240845, + "step": 2169 + }, + { + "epoch": 0.5, + "grad_norm": 0.8597818097533757, + "learning_rate": 1.789374457230458e-06, + "loss": 0.8395413756370544, + "step": 2170 + }, + { + "epoch": 0.5002304147465437, + "grad_norm": 0.7232889708808644, + "learning_rate": 1.7891405093963937e-06, + "loss": 0.8624853491783142, + "step": 2171 + }, + { + "epoch": 0.5004608294930876, + "grad_norm": 0.6629899968556545, + "learning_rate": 1.788906447019629e-06, + "loss": 0.8141548037528992, + "step": 2172 + }, + { + "epoch": 0.5006912442396313, + "grad_norm": 0.6495144260680482, + "learning_rate": 1.7886722701341382e-06, + "loss": 0.6764500141143799, + "step": 2173 + }, + { + "epoch": 0.5009216589861751, + "grad_norm": 0.6701022764652186, + "learning_rate": 1.7884379787739112e-06, + "loss": 0.710756778717041, + "step": 2174 + }, + { + "epoch": 0.5011520737327189, + "grad_norm": 0.8273999117205362, + "learning_rate": 1.7882035729729555e-06, + "loss": 0.8090574145317078, + "step": 2175 + }, + { + "epoch": 0.5013824884792627, + "grad_norm": 0.6977221855783239, + "learning_rate": 1.7879690527652943e-06, + "loss": 0.7639138102531433, + "step": 2176 + }, + { + "epoch": 0.5016129032258064, + "grad_norm": 0.9185836860641033, + "learning_rate": 1.7877344181849687e-06, + "loss": 0.8093903660774231, + "step": 2177 + }, + { + "epoch": 0.5018433179723503, + "grad_norm": 0.7610855435865236, + "learning_rate": 1.7874996692660348e-06, + "loss": 0.8705824017524719, + "step": 2178 + }, + { + "epoch": 0.502073732718894, + "grad_norm": 0.7815265219501579, + "learning_rate": 1.7872648060425666e-06, + "loss": 0.7365947961807251, + "step": 2179 + }, + { + "epoch": 0.5023041474654378, + "grad_norm": 0.8989287933893153, + "learning_rate": 1.787029828548654e-06, + "loss": 0.9405299425125122, + "step": 2180 + }, + { + "epoch": 0.5025345622119816, + "grad_norm": 0.907417749032586, + "learning_rate": 1.7867947368184036e-06, + "loss": 0.9232017993927002, + "step": 2181 + }, + { + "epoch": 0.5027649769585254, + "grad_norm": 1.0801728154122552, + "learning_rate": 1.7865595308859388e-06, + "loss": 0.9941537380218506, + "step": 2182 + }, + { + "epoch": 0.5029953917050691, + "grad_norm": 0.7341611336832391, + "learning_rate": 1.7863242107853993e-06, + "loss": 0.6981802582740784, + "step": 2183 + }, + { + "epoch": 0.5032258064516129, + "grad_norm": 0.8346521198909456, + "learning_rate": 1.7860887765509417e-06, + "loss": 0.8155109882354736, + "step": 2184 + }, + { + "epoch": 0.5034562211981567, + "grad_norm": 0.8846374910749497, + "learning_rate": 1.7858532282167385e-06, + "loss": 0.7246255874633789, + "step": 2185 + }, + { + "epoch": 0.5036866359447004, + "grad_norm": 0.7027049895049993, + "learning_rate": 1.7856175658169796e-06, + "loss": 0.7042064666748047, + "step": 2186 + }, + { + "epoch": 0.5039170506912443, + "grad_norm": 0.8633735424450812, + "learning_rate": 1.7853817893858714e-06, + "loss": 0.7522145509719849, + "step": 2187 + }, + { + "epoch": 0.504147465437788, + "grad_norm": 0.8170927084265063, + "learning_rate": 1.7851458989576359e-06, + "loss": 1.0157709121704102, + "step": 2188 + }, + { + "epoch": 0.5043778801843318, + "grad_norm": 0.8537305826863457, + "learning_rate": 1.7849098945665127e-06, + "loss": 0.7096433639526367, + "step": 2189 + }, + { + "epoch": 0.5046082949308756, + "grad_norm": 0.8293401368813538, + "learning_rate": 1.7846737762467572e-06, + "loss": 0.7743037939071655, + "step": 2190 + }, + { + "epoch": 0.5048387096774194, + "grad_norm": 0.802261593558941, + "learning_rate": 1.784437544032642e-06, + "loss": 0.7907241582870483, + "step": 2191 + }, + { + "epoch": 0.5050691244239631, + "grad_norm": 0.9488985791352184, + "learning_rate": 1.7842011979584557e-06, + "loss": 0.8692185878753662, + "step": 2192 + }, + { + "epoch": 0.505299539170507, + "grad_norm": 1.0636987469588612, + "learning_rate": 1.783964738058504e-06, + "loss": 0.9678715467453003, + "step": 2193 + }, + { + "epoch": 0.5055299539170507, + "grad_norm": 0.7713527005281836, + "learning_rate": 1.7837281643671077e-06, + "loss": 0.855170726776123, + "step": 2194 + }, + { + "epoch": 0.5057603686635944, + "grad_norm": 0.7469430705420217, + "learning_rate": 1.7834914769186065e-06, + "loss": 0.8452733755111694, + "step": 2195 + }, + { + "epoch": 0.5059907834101383, + "grad_norm": 0.6866121153572871, + "learning_rate": 1.7832546757473543e-06, + "loss": 0.7517217397689819, + "step": 2196 + }, + { + "epoch": 0.506221198156682, + "grad_norm": 0.7453227048555126, + "learning_rate": 1.783017760887723e-06, + "loss": 0.6971632838249207, + "step": 2197 + }, + { + "epoch": 0.5064516129032258, + "grad_norm": 0.7964964192157018, + "learning_rate": 1.7827807323741002e-06, + "loss": 0.8638256192207336, + "step": 2198 + }, + { + "epoch": 0.5066820276497696, + "grad_norm": 0.7941877452524988, + "learning_rate": 1.7825435902408903e-06, + "loss": 0.8410143256187439, + "step": 2199 + }, + { + "epoch": 0.5069124423963134, + "grad_norm": 0.7902588767037179, + "learning_rate": 1.7823063345225143e-06, + "loss": 0.8127691745758057, + "step": 2200 + }, + { + "epoch": 0.5071428571428571, + "grad_norm": 0.7618481515663807, + "learning_rate": 1.7820689652534096e-06, + "loss": 0.7351404428482056, + "step": 2201 + }, + { + "epoch": 0.507373271889401, + "grad_norm": 0.6691944306500267, + "learning_rate": 1.7818314824680298e-06, + "loss": 0.7258716821670532, + "step": 2202 + }, + { + "epoch": 0.5076036866359447, + "grad_norm": 1.0029859864492747, + "learning_rate": 1.7815938862008454e-06, + "loss": 0.9509599208831787, + "step": 2203 + }, + { + "epoch": 0.5078341013824885, + "grad_norm": 0.7738532710061052, + "learning_rate": 1.7813561764863429e-06, + "loss": 0.8600929379463196, + "step": 2204 + }, + { + "epoch": 0.5080645161290323, + "grad_norm": 0.9689099485850551, + "learning_rate": 1.7811183533590257e-06, + "loss": 0.8688119649887085, + "step": 2205 + }, + { + "epoch": 0.5082949308755761, + "grad_norm": 0.7599344683888546, + "learning_rate": 1.780880416853414e-06, + "loss": 0.8447986841201782, + "step": 2206 + }, + { + "epoch": 0.5085253456221198, + "grad_norm": 0.6953642388755117, + "learning_rate": 1.7806423670040433e-06, + "loss": 0.8262573480606079, + "step": 2207 + }, + { + "epoch": 0.5087557603686635, + "grad_norm": 0.7640117945069856, + "learning_rate": 1.7804042038454666e-06, + "loss": 0.9534487724304199, + "step": 2208 + }, + { + "epoch": 0.5089861751152074, + "grad_norm": 0.7513792438385134, + "learning_rate": 1.7801659274122527e-06, + "loss": 0.7712565064430237, + "step": 2209 + }, + { + "epoch": 0.5092165898617511, + "grad_norm": 0.8714588056175714, + "learning_rate": 1.7799275377389873e-06, + "loss": 0.8190760016441345, + "step": 2210 + }, + { + "epoch": 0.509447004608295, + "grad_norm": 0.9379540710774249, + "learning_rate": 1.7796890348602722e-06, + "loss": 0.8647592067718506, + "step": 2211 + }, + { + "epoch": 0.5096774193548387, + "grad_norm": 0.7912467632232041, + "learning_rate": 1.7794504188107257e-06, + "loss": 0.7788198590278625, + "step": 2212 + }, + { + "epoch": 0.5099078341013825, + "grad_norm": 0.7053754197084299, + "learning_rate": 1.779211689624983e-06, + "loss": 0.8610718250274658, + "step": 2213 + }, + { + "epoch": 0.5101382488479262, + "grad_norm": 0.7783569383566119, + "learning_rate": 1.7789728473376952e-06, + "loss": 0.832200825214386, + "step": 2214 + }, + { + "epoch": 0.5103686635944701, + "grad_norm": 0.7823482622118234, + "learning_rate": 1.7787338919835298e-06, + "loss": 0.7325488328933716, + "step": 2215 + }, + { + "epoch": 0.5105990783410138, + "grad_norm": 0.8903627357495159, + "learning_rate": 1.7784948235971707e-06, + "loss": 0.8038203716278076, + "step": 2216 + }, + { + "epoch": 0.5108294930875577, + "grad_norm": 0.6275186054972087, + "learning_rate": 1.7782556422133185e-06, + "loss": 0.7016317248344421, + "step": 2217 + }, + { + "epoch": 0.5110599078341014, + "grad_norm": 0.8951545762278973, + "learning_rate": 1.7780163478666905e-06, + "loss": 0.7964655160903931, + "step": 2218 + }, + { + "epoch": 0.5112903225806451, + "grad_norm": 0.7709224710894249, + "learning_rate": 1.777776940592019e-06, + "loss": 0.6681785583496094, + "step": 2219 + }, + { + "epoch": 0.511520737327189, + "grad_norm": 0.8934880823893885, + "learning_rate": 1.7775374204240547e-06, + "loss": 0.835777759552002, + "step": 2220 + }, + { + "epoch": 0.5117511520737327, + "grad_norm": 1.0248178001051076, + "learning_rate": 1.777297787397563e-06, + "loss": 0.9442443251609802, + "step": 2221 + }, + { + "epoch": 0.5119815668202765, + "grad_norm": 1.072158922361294, + "learning_rate": 1.7770580415473267e-06, + "loss": 0.9351231455802917, + "step": 2222 + }, + { + "epoch": 0.5122119815668202, + "grad_norm": 0.878332211622375, + "learning_rate": 1.776818182908144e-06, + "loss": 0.7238374352455139, + "step": 2223 + }, + { + "epoch": 0.5124423963133641, + "grad_norm": 0.7001659306792695, + "learning_rate": 1.7765782115148308e-06, + "loss": 0.8206230998039246, + "step": 2224 + }, + { + "epoch": 0.5126728110599078, + "grad_norm": 0.6546302150578799, + "learning_rate": 1.7763381274022176e-06, + "loss": 0.748784065246582, + "step": 2225 + }, + { + "epoch": 0.5129032258064516, + "grad_norm": 0.7566703422977776, + "learning_rate": 1.7760979306051533e-06, + "loss": 0.7980858087539673, + "step": 2226 + }, + { + "epoch": 0.5131336405529954, + "grad_norm": 0.8877968508757134, + "learning_rate": 1.7758576211585018e-06, + "loss": 0.8631168603897095, + "step": 2227 + }, + { + "epoch": 0.5133640552995392, + "grad_norm": 0.7405217897025548, + "learning_rate": 1.7756171990971441e-06, + "loss": 0.9405999779701233, + "step": 2228 + }, + { + "epoch": 0.5135944700460829, + "grad_norm": 0.8867257371824923, + "learning_rate": 1.7753766644559763e-06, + "loss": 0.9055094718933105, + "step": 2229 + }, + { + "epoch": 0.5138248847926268, + "grad_norm": 0.827493910498757, + "learning_rate": 1.775136017269912e-06, + "loss": 0.7583146691322327, + "step": 2230 + }, + { + "epoch": 0.5140552995391705, + "grad_norm": 0.8689067612775456, + "learning_rate": 1.7748952575738811e-06, + "loss": 0.8728743195533752, + "step": 2231 + }, + { + "epoch": 0.5142857142857142, + "grad_norm": 0.7067707521741841, + "learning_rate": 1.7746543854028295e-06, + "loss": 0.8133460283279419, + "step": 2232 + }, + { + "epoch": 0.5145161290322581, + "grad_norm": 0.7177694794353267, + "learning_rate": 1.7744134007917194e-06, + "loss": 0.8389721512794495, + "step": 2233 + }, + { + "epoch": 0.5147465437788018, + "grad_norm": 0.9617522193850644, + "learning_rate": 1.774172303775529e-06, + "loss": 0.7016798257827759, + "step": 2234 + }, + { + "epoch": 0.5149769585253456, + "grad_norm": 0.7999711451764379, + "learning_rate": 1.7739310943892538e-06, + "loss": 0.7920540571212769, + "step": 2235 + }, + { + "epoch": 0.5152073732718894, + "grad_norm": 0.6990088891534603, + "learning_rate": 1.7736897726679048e-06, + "loss": 0.900149405002594, + "step": 2236 + }, + { + "epoch": 0.5154377880184332, + "grad_norm": 0.743220745754201, + "learning_rate": 1.7734483386465096e-06, + "loss": 0.8537915349006653, + "step": 2237 + }, + { + "epoch": 0.5156682027649769, + "grad_norm": 0.8134323205434837, + "learning_rate": 1.7732067923601121e-06, + "loss": 0.7418123483657837, + "step": 2238 + }, + { + "epoch": 0.5158986175115208, + "grad_norm": 1.108361921569266, + "learning_rate": 1.7729651338437721e-06, + "loss": 0.8890011310577393, + "step": 2239 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9841321811418366, + "learning_rate": 1.7727233631325663e-06, + "loss": 0.9082813262939453, + "step": 2240 + }, + { + "epoch": 0.5163594470046083, + "grad_norm": 0.9268737545625799, + "learning_rate": 1.7724814802615868e-06, + "loss": 0.8337695598602295, + "step": 2241 + }, + { + "epoch": 0.5165898617511521, + "grad_norm": 1.1037050608526282, + "learning_rate": 1.7722394852659437e-06, + "loss": 0.8990765810012817, + "step": 2242 + }, + { + "epoch": 0.5168202764976959, + "grad_norm": 0.8552834719912825, + "learning_rate": 1.7719973781807614e-06, + "loss": 0.720890998840332, + "step": 2243 + }, + { + "epoch": 0.5170506912442396, + "grad_norm": 0.6406815235154244, + "learning_rate": 1.7717551590411817e-06, + "loss": 0.7966938018798828, + "step": 2244 + }, + { + "epoch": 0.5172811059907834, + "grad_norm": 0.8614270693246835, + "learning_rate": 1.7715128278823622e-06, + "loss": 0.9290107488632202, + "step": 2245 + }, + { + "epoch": 0.5175115207373272, + "grad_norm": 0.8755598994931274, + "learning_rate": 1.771270384739477e-06, + "loss": 0.8388533592224121, + "step": 2246 + }, + { + "epoch": 0.5177419354838709, + "grad_norm": 0.8200932411512113, + "learning_rate": 1.7710278296477169e-06, + "loss": 0.8845043182373047, + "step": 2247 + }, + { + "epoch": 0.5179723502304148, + "grad_norm": 0.8499976704860752, + "learning_rate": 1.7707851626422875e-06, + "loss": 0.879709780216217, + "step": 2248 + }, + { + "epoch": 0.5182027649769585, + "grad_norm": 0.8407815201465851, + "learning_rate": 1.7705423837584123e-06, + "loss": 0.8215152025222778, + "step": 2249 + }, + { + "epoch": 0.5184331797235023, + "grad_norm": 0.8770027311962882, + "learning_rate": 1.7702994930313305e-06, + "loss": 0.8108627796173096, + "step": 2250 + }, + { + "epoch": 0.5186635944700461, + "grad_norm": 0.9106818329739914, + "learning_rate": 1.7700564904962966e-06, + "loss": 0.8391602039337158, + "step": 2251 + }, + { + "epoch": 0.5188940092165899, + "grad_norm": 0.82724043269172, + "learning_rate": 1.769813376188583e-06, + "loss": 0.8664923906326294, + "step": 2252 + }, + { + "epoch": 0.5191244239631336, + "grad_norm": 0.8478256896643234, + "learning_rate": 1.7695701501434765e-06, + "loss": 0.9670882821083069, + "step": 2253 + }, + { + "epoch": 0.5193548387096775, + "grad_norm": 0.8831524743377538, + "learning_rate": 1.7693268123962816e-06, + "loss": 0.946273684501648, + "step": 2254 + }, + { + "epoch": 0.5195852534562212, + "grad_norm": 0.7643743435262689, + "learning_rate": 1.7690833629823184e-06, + "loss": 0.9691795706748962, + "step": 2255 + }, + { + "epoch": 0.5198156682027649, + "grad_norm": 0.7833370135674333, + "learning_rate": 1.7688398019369232e-06, + "loss": 0.8086103200912476, + "step": 2256 + }, + { + "epoch": 0.5200460829493088, + "grad_norm": 0.8183770044685874, + "learning_rate": 1.7685961292954486e-06, + "loss": 0.8574277758598328, + "step": 2257 + }, + { + "epoch": 0.5202764976958525, + "grad_norm": 0.7089387180946831, + "learning_rate": 1.7683523450932633e-06, + "loss": 0.7841963171958923, + "step": 2258 + }, + { + "epoch": 0.5205069124423963, + "grad_norm": 0.7629735238937895, + "learning_rate": 1.7681084493657523e-06, + "loss": 0.6972980499267578, + "step": 2259 + }, + { + "epoch": 0.5207373271889401, + "grad_norm": 0.7917333859989639, + "learning_rate": 1.7678644421483163e-06, + "loss": 0.9193723201751709, + "step": 2260 + }, + { + "epoch": 0.5209677419354839, + "grad_norm": 0.9714597630384237, + "learning_rate": 1.7676203234763736e-06, + "loss": 0.7902654409408569, + "step": 2261 + }, + { + "epoch": 0.5211981566820276, + "grad_norm": 0.7983060164629807, + "learning_rate": 1.767376093385357e-06, + "loss": 0.8804734945297241, + "step": 2262 + }, + { + "epoch": 0.5214285714285715, + "grad_norm": 0.9065709846386143, + "learning_rate": 1.7671317519107163e-06, + "loss": 0.7884976863861084, + "step": 2263 + }, + { + "epoch": 0.5216589861751152, + "grad_norm": 0.9252417906886758, + "learning_rate": 1.7668872990879173e-06, + "loss": 0.8233190774917603, + "step": 2264 + }, + { + "epoch": 0.521889400921659, + "grad_norm": 0.7126124532622758, + "learning_rate": 1.766642734952442e-06, + "loss": 0.7985334396362305, + "step": 2265 + }, + { + "epoch": 0.5221198156682028, + "grad_norm": 0.8073440338214538, + "learning_rate": 1.7663980595397887e-06, + "loss": 0.7805646657943726, + "step": 2266 + }, + { + "epoch": 0.5223502304147466, + "grad_norm": 0.9455838488830395, + "learning_rate": 1.7661532728854718e-06, + "loss": 0.8528248071670532, + "step": 2267 + }, + { + "epoch": 0.5225806451612903, + "grad_norm": 0.882590365173732, + "learning_rate": 1.7659083750250215e-06, + "loss": 0.7714066505432129, + "step": 2268 + }, + { + "epoch": 0.522811059907834, + "grad_norm": 0.7632999883965862, + "learning_rate": 1.7656633659939843e-06, + "loss": 0.8250499963760376, + "step": 2269 + }, + { + "epoch": 0.5230414746543779, + "grad_norm": 0.6787990523098465, + "learning_rate": 1.7654182458279231e-06, + "loss": 0.7878777384757996, + "step": 2270 + }, + { + "epoch": 0.5232718894009216, + "grad_norm": 0.8263772967033729, + "learning_rate": 1.7651730145624174e-06, + "loss": 0.9080224633216858, + "step": 2271 + }, + { + "epoch": 0.5235023041474655, + "grad_norm": 0.8137376292994275, + "learning_rate": 1.7649276722330607e-06, + "loss": 0.8010937571525574, + "step": 2272 + }, + { + "epoch": 0.5237327188940092, + "grad_norm": 0.8996847055009526, + "learning_rate": 1.7646822188754658e-06, + "loss": 0.903404951095581, + "step": 2273 + }, + { + "epoch": 0.523963133640553, + "grad_norm": 0.928692707021516, + "learning_rate": 1.7644366545252589e-06, + "loss": 0.9009061455726624, + "step": 2274 + }, + { + "epoch": 0.5241935483870968, + "grad_norm": 0.7651260343716183, + "learning_rate": 1.7641909792180834e-06, + "loss": 0.7158697843551636, + "step": 2275 + }, + { + "epoch": 0.5244239631336406, + "grad_norm": 0.8041302440889452, + "learning_rate": 1.763945192989599e-06, + "loss": 0.8101463317871094, + "step": 2276 + }, + { + "epoch": 0.5246543778801843, + "grad_norm": 0.8174455436475604, + "learning_rate": 1.7636992958754812e-06, + "loss": 0.758610725402832, + "step": 2277 + }, + { + "epoch": 0.5248847926267282, + "grad_norm": 0.9651314388158028, + "learning_rate": 1.7634532879114216e-06, + "loss": 0.9469501972198486, + "step": 2278 + }, + { + "epoch": 0.5251152073732719, + "grad_norm": 0.6853415956002341, + "learning_rate": 1.7632071691331281e-06, + "loss": 0.7528036236763, + "step": 2279 + }, + { + "epoch": 0.5253456221198156, + "grad_norm": 0.9124447697867164, + "learning_rate": 1.7629609395763242e-06, + "loss": 0.8519324064254761, + "step": 2280 + }, + { + "epoch": 0.5255760368663595, + "grad_norm": 0.9239480610002251, + "learning_rate": 1.7627145992767498e-06, + "loss": 0.8620004653930664, + "step": 2281 + }, + { + "epoch": 0.5258064516129032, + "grad_norm": 0.7831738680942184, + "learning_rate": 1.762468148270161e-06, + "loss": 0.8066067695617676, + "step": 2282 + }, + { + "epoch": 0.526036866359447, + "grad_norm": 0.8314773622163678, + "learning_rate": 1.7622215865923301e-06, + "loss": 0.865642786026001, + "step": 2283 + }, + { + "epoch": 0.5262672811059907, + "grad_norm": 0.7269170910166286, + "learning_rate": 1.761974914279045e-06, + "loss": 0.8478001356124878, + "step": 2284 + }, + { + "epoch": 0.5264976958525346, + "grad_norm": 0.8461811606118353, + "learning_rate": 1.7617281313661098e-06, + "loss": 0.7984344363212585, + "step": 2285 + }, + { + "epoch": 0.5267281105990783, + "grad_norm": 0.8489168247147351, + "learning_rate": 1.7614812378893444e-06, + "loss": 0.8480801582336426, + "step": 2286 + }, + { + "epoch": 0.5269585253456222, + "grad_norm": 0.9126795310234661, + "learning_rate": 1.7612342338845859e-06, + "loss": 0.8667479753494263, + "step": 2287 + }, + { + "epoch": 0.5271889400921659, + "grad_norm": 0.9533468835174431, + "learning_rate": 1.7609871193876854e-06, + "loss": 0.8431364297866821, + "step": 2288 + }, + { + "epoch": 0.5274193548387097, + "grad_norm": 0.8628781350943807, + "learning_rate": 1.7607398944345127e-06, + "loss": 0.8544220924377441, + "step": 2289 + }, + { + "epoch": 0.5276497695852534, + "grad_norm": 0.9575259696859837, + "learning_rate": 1.760492559060951e-06, + "loss": 0.9298971891403198, + "step": 2290 + }, + { + "epoch": 0.5278801843317973, + "grad_norm": 0.8854664005974592, + "learning_rate": 1.760245113302901e-06, + "loss": 0.739667534828186, + "step": 2291 + }, + { + "epoch": 0.528110599078341, + "grad_norm": 0.9418693515744256, + "learning_rate": 1.7599975571962796e-06, + "loss": 0.8981268405914307, + "step": 2292 + }, + { + "epoch": 0.5283410138248847, + "grad_norm": 0.8489202000746718, + "learning_rate": 1.7597498907770185e-06, + "loss": 0.8027834892272949, + "step": 2293 + }, + { + "epoch": 0.5285714285714286, + "grad_norm": 0.7244957329263912, + "learning_rate": 1.7595021140810669e-06, + "loss": 0.7018242478370667, + "step": 2294 + }, + { + "epoch": 0.5288018433179723, + "grad_norm": 0.8699196704594798, + "learning_rate": 1.7592542271443887e-06, + "loss": 0.7655147910118103, + "step": 2295 + }, + { + "epoch": 0.5290322580645161, + "grad_norm": 0.8169123509935803, + "learning_rate": 1.7590062300029644e-06, + "loss": 0.8283153772354126, + "step": 2296 + }, + { + "epoch": 0.5292626728110599, + "grad_norm": 1.0550792201388366, + "learning_rate": 1.7587581226927907e-06, + "loss": 1.0430598258972168, + "step": 2297 + }, + { + "epoch": 0.5294930875576037, + "grad_norm": 0.7609036061197976, + "learning_rate": 1.7585099052498802e-06, + "loss": 0.6683472990989685, + "step": 2298 + }, + { + "epoch": 0.5297235023041474, + "grad_norm": 0.7278178698575015, + "learning_rate": 1.7582615777102609e-06, + "loss": 0.7254939079284668, + "step": 2299 + }, + { + "epoch": 0.5299539170506913, + "grad_norm": 0.7049477325497308, + "learning_rate": 1.7580131401099774e-06, + "loss": 0.7913245558738708, + "step": 2300 + }, + { + "epoch": 0.530184331797235, + "grad_norm": 0.8416230641508338, + "learning_rate": 1.75776459248509e-06, + "loss": 0.7832915782928467, + "step": 2301 + }, + { + "epoch": 0.5304147465437788, + "grad_norm": 0.7722959383546871, + "learning_rate": 1.7575159348716754e-06, + "loss": 0.9754987955093384, + "step": 2302 + }, + { + "epoch": 0.5306451612903226, + "grad_norm": 0.8614799765536667, + "learning_rate": 1.7572671673058254e-06, + "loss": 0.8343901634216309, + "step": 2303 + }, + { + "epoch": 0.5308755760368664, + "grad_norm": 0.862069962418511, + "learning_rate": 1.757018289823649e-06, + "loss": 0.9836198091506958, + "step": 2304 + }, + { + "epoch": 0.5311059907834101, + "grad_norm": 0.7978699236275345, + "learning_rate": 1.7567693024612695e-06, + "loss": 0.8258972764015198, + "step": 2305 + }, + { + "epoch": 0.5313364055299539, + "grad_norm": 0.8169244061103897, + "learning_rate": 1.7565202052548277e-06, + "loss": 0.8822964429855347, + "step": 2306 + }, + { + "epoch": 0.5315668202764977, + "grad_norm": 0.8094894252842574, + "learning_rate": 1.7562709982404797e-06, + "loss": 0.721222996711731, + "step": 2307 + }, + { + "epoch": 0.5317972350230414, + "grad_norm": 0.7759663122688174, + "learning_rate": 1.7560216814543974e-06, + "loss": 0.7273069620132446, + "step": 2308 + }, + { + "epoch": 0.5320276497695853, + "grad_norm": 0.749740659090673, + "learning_rate": 1.755772254932769e-06, + "loss": 0.8031520843505859, + "step": 2309 + }, + { + "epoch": 0.532258064516129, + "grad_norm": 0.8746676083569236, + "learning_rate": 1.7555227187117982e-06, + "loss": 0.8767163157463074, + "step": 2310 + }, + { + "epoch": 0.5324884792626728, + "grad_norm": 1.052374988916139, + "learning_rate": 1.755273072827705e-06, + "loss": 0.8018463850021362, + "step": 2311 + }, + { + "epoch": 0.5327188940092166, + "grad_norm": 0.9632384627648846, + "learning_rate": 1.7550233173167252e-06, + "loss": 0.8281232118606567, + "step": 2312 + }, + { + "epoch": 0.5329493087557604, + "grad_norm": 0.9472067369973646, + "learning_rate": 1.7547734522151103e-06, + "loss": 0.8802565336227417, + "step": 2313 + }, + { + "epoch": 0.5331797235023041, + "grad_norm": 0.7195582219345643, + "learning_rate": 1.754523477559128e-06, + "loss": 0.8055544495582581, + "step": 2314 + }, + { + "epoch": 0.533410138248848, + "grad_norm": 0.9358658916449707, + "learning_rate": 1.754273393385062e-06, + "loss": 0.8163481950759888, + "step": 2315 + }, + { + "epoch": 0.5336405529953917, + "grad_norm": 0.9365559775291885, + "learning_rate": 1.7540231997292111e-06, + "loss": 0.8308255076408386, + "step": 2316 + }, + { + "epoch": 0.5338709677419354, + "grad_norm": 0.9031429015213124, + "learning_rate": 1.7537728966278913e-06, + "loss": 0.8387685418128967, + "step": 2317 + }, + { + "epoch": 0.5341013824884793, + "grad_norm": 0.7470153179334161, + "learning_rate": 1.7535224841174333e-06, + "loss": 0.8668780326843262, + "step": 2318 + }, + { + "epoch": 0.534331797235023, + "grad_norm": 0.7449540611731051, + "learning_rate": 1.7532719622341842e-06, + "loss": 0.8394712209701538, + "step": 2319 + }, + { + "epoch": 0.5345622119815668, + "grad_norm": 0.7539905771593468, + "learning_rate": 1.7530213310145073e-06, + "loss": 0.7755688428878784, + "step": 2320 + }, + { + "epoch": 0.5347926267281106, + "grad_norm": 0.8150738821263226, + "learning_rate": 1.7527705904947805e-06, + "loss": 0.7714632749557495, + "step": 2321 + }, + { + "epoch": 0.5350230414746544, + "grad_norm": 0.807680924946579, + "learning_rate": 1.7525197407113997e-06, + "loss": 0.8810869455337524, + "step": 2322 + }, + { + "epoch": 0.5352534562211981, + "grad_norm": 1.0672299468188131, + "learning_rate": 1.7522687817007742e-06, + "loss": 0.8445242643356323, + "step": 2323 + }, + { + "epoch": 0.535483870967742, + "grad_norm": 1.1338085945775938, + "learning_rate": 1.7520177134993311e-06, + "loss": 0.9602948427200317, + "step": 2324 + }, + { + "epoch": 0.5357142857142857, + "grad_norm": 0.7789379367396811, + "learning_rate": 1.7517665361435126e-06, + "loss": 0.7865237593650818, + "step": 2325 + }, + { + "epoch": 0.5359447004608295, + "grad_norm": 0.8870578602537817, + "learning_rate": 1.7515152496697763e-06, + "loss": 0.8062880039215088, + "step": 2326 + }, + { + "epoch": 0.5361751152073733, + "grad_norm": 0.9742037408160464, + "learning_rate": 1.7512638541145966e-06, + "loss": 0.8386664986610413, + "step": 2327 + }, + { + "epoch": 0.5364055299539171, + "grad_norm": 1.0154937609139327, + "learning_rate": 1.7510123495144629e-06, + "loss": 0.973692774772644, + "step": 2328 + }, + { + "epoch": 0.5366359447004608, + "grad_norm": 0.9023959356834507, + "learning_rate": 1.7507607359058808e-06, + "loss": 0.8250089883804321, + "step": 2329 + }, + { + "epoch": 0.5368663594470046, + "grad_norm": 0.8457870176131529, + "learning_rate": 1.750509013325372e-06, + "loss": 0.8578102588653564, + "step": 2330 + }, + { + "epoch": 0.5370967741935484, + "grad_norm": 0.8804595958614453, + "learning_rate": 1.7502571818094732e-06, + "loss": 0.916475236415863, + "step": 2331 + }, + { + "epoch": 0.5373271889400921, + "grad_norm": 0.9225430635370255, + "learning_rate": 1.7500052413947377e-06, + "loss": 0.8210046291351318, + "step": 2332 + }, + { + "epoch": 0.537557603686636, + "grad_norm": 0.7091387099201478, + "learning_rate": 1.7497531921177344e-06, + "loss": 0.816267728805542, + "step": 2333 + }, + { + "epoch": 0.5377880184331797, + "grad_norm": 0.9764630645457667, + "learning_rate": 1.7495010340150478e-06, + "loss": 1.0091882944107056, + "step": 2334 + }, + { + "epoch": 0.5380184331797235, + "grad_norm": 0.982812584725329, + "learning_rate": 1.7492487671232783e-06, + "loss": 0.7549277544021606, + "step": 2335 + }, + { + "epoch": 0.5382488479262673, + "grad_norm": 0.8589431412898547, + "learning_rate": 1.7489963914790423e-06, + "loss": 0.9584934711456299, + "step": 2336 + }, + { + "epoch": 0.5384792626728111, + "grad_norm": 0.7167225081500926, + "learning_rate": 1.7487439071189713e-06, + "loss": 0.8189069628715515, + "step": 2337 + }, + { + "epoch": 0.5387096774193548, + "grad_norm": 0.976466384445042, + "learning_rate": 1.7484913140797138e-06, + "loss": 0.7529993057250977, + "step": 2338 + }, + { + "epoch": 0.5389400921658987, + "grad_norm": 0.9894954868399615, + "learning_rate": 1.7482386123979324e-06, + "loss": 0.8611496686935425, + "step": 2339 + }, + { + "epoch": 0.5391705069124424, + "grad_norm": 1.2753256885249857, + "learning_rate": 1.7479858021103074e-06, + "loss": 0.9400241374969482, + "step": 2340 + }, + { + "epoch": 0.5394009216589861, + "grad_norm": 0.7513824016722385, + "learning_rate": 1.7477328832535332e-06, + "loss": 0.6686737537384033, + "step": 2341 + }, + { + "epoch": 0.53963133640553, + "grad_norm": 0.7834119073150019, + "learning_rate": 1.747479855864321e-06, + "loss": 0.864795982837677, + "step": 2342 + }, + { + "epoch": 0.5398617511520737, + "grad_norm": 0.9942068845664563, + "learning_rate": 1.7472267199793971e-06, + "loss": 0.9579563140869141, + "step": 2343 + }, + { + "epoch": 0.5400921658986175, + "grad_norm": 0.9464284115225821, + "learning_rate": 1.746973475635504e-06, + "loss": 0.7492884397506714, + "step": 2344 + }, + { + "epoch": 0.5403225806451613, + "grad_norm": 1.1301826150440575, + "learning_rate": 1.7467201228694e-06, + "loss": 1.020420789718628, + "step": 2345 + }, + { + "epoch": 0.5405529953917051, + "grad_norm": 0.8996882097606888, + "learning_rate": 1.7464666617178585e-06, + "loss": 0.8277238011360168, + "step": 2346 + }, + { + "epoch": 0.5407834101382488, + "grad_norm": 0.8343415166384458, + "learning_rate": 1.7462130922176694e-06, + "loss": 0.8160337209701538, + "step": 2347 + }, + { + "epoch": 0.5410138248847927, + "grad_norm": 0.940177897473061, + "learning_rate": 1.7459594144056378e-06, + "loss": 0.8742454648017883, + "step": 2348 + }, + { + "epoch": 0.5412442396313364, + "grad_norm": 0.8263630155636004, + "learning_rate": 1.7457056283185847e-06, + "loss": 0.7987914085388184, + "step": 2349 + }, + { + "epoch": 0.5414746543778802, + "grad_norm": 0.8096196719588583, + "learning_rate": 1.7454517339933467e-06, + "loss": 0.6917734146118164, + "step": 2350 + }, + { + "epoch": 0.541705069124424, + "grad_norm": 0.9860357050478065, + "learning_rate": 1.7451977314667763e-06, + "loss": 0.8338258266448975, + "step": 2351 + }, + { + "epoch": 0.5419354838709678, + "grad_norm": 0.6906626367704619, + "learning_rate": 1.7449436207757418e-06, + "loss": 0.8308743238449097, + "step": 2352 + }, + { + "epoch": 0.5421658986175115, + "grad_norm": 0.7126371911422212, + "learning_rate": 1.744689401957127e-06, + "loss": 0.7843145728111267, + "step": 2353 + }, + { + "epoch": 0.5423963133640552, + "grad_norm": 0.6637904176126797, + "learning_rate": 1.7444350750478314e-06, + "loss": 0.9088687896728516, + "step": 2354 + }, + { + "epoch": 0.5426267281105991, + "grad_norm": 1.1601519737508017, + "learning_rate": 1.74418064008477e-06, + "loss": 0.876841127872467, + "step": 2355 + }, + { + "epoch": 0.5428571428571428, + "grad_norm": 0.804702758707697, + "learning_rate": 1.743926097104874e-06, + "loss": 0.7169051170349121, + "step": 2356 + }, + { + "epoch": 0.5430875576036867, + "grad_norm": 0.8414445338031196, + "learning_rate": 1.7436714461450897e-06, + "loss": 0.7979093194007874, + "step": 2357 + }, + { + "epoch": 0.5433179723502304, + "grad_norm": 0.796767744969521, + "learning_rate": 1.7434166872423795e-06, + "loss": 0.9152545928955078, + "step": 2358 + }, + { + "epoch": 0.5435483870967742, + "grad_norm": 0.8612716514728646, + "learning_rate": 1.7431618204337212e-06, + "loss": 0.8968983888626099, + "step": 2359 + }, + { + "epoch": 0.543778801843318, + "grad_norm": 0.7451796864953032, + "learning_rate": 1.7429068457561086e-06, + "loss": 0.7591085433959961, + "step": 2360 + }, + { + "epoch": 0.5440092165898618, + "grad_norm": 0.8434007797764556, + "learning_rate": 1.7426517632465508e-06, + "loss": 0.6931861639022827, + "step": 2361 + }, + { + "epoch": 0.5442396313364055, + "grad_norm": 0.816030716232177, + "learning_rate": 1.7423965729420729e-06, + "loss": 0.7715095281600952, + "step": 2362 + }, + { + "epoch": 0.5444700460829494, + "grad_norm": 0.7333839549943538, + "learning_rate": 1.742141274879715e-06, + "loss": 0.8282119035720825, + "step": 2363 + }, + { + "epoch": 0.5447004608294931, + "grad_norm": 0.8282161479585932, + "learning_rate": 1.7418858690965337e-06, + "loss": 0.7595704197883606, + "step": 2364 + }, + { + "epoch": 0.5449308755760369, + "grad_norm": 0.8861519618227073, + "learning_rate": 1.7416303556296005e-06, + "loss": 0.8738422393798828, + "step": 2365 + }, + { + "epoch": 0.5451612903225806, + "grad_norm": 0.819062403403448, + "learning_rate": 1.741374734516003e-06, + "loss": 0.8399837017059326, + "step": 2366 + }, + { + "epoch": 0.5453917050691244, + "grad_norm": 0.9147252373002325, + "learning_rate": 1.7411190057928442e-06, + "loss": 0.8213151693344116, + "step": 2367 + }, + { + "epoch": 0.5456221198156682, + "grad_norm": 0.862161359681962, + "learning_rate": 1.740863169497243e-06, + "loss": 0.748835563659668, + "step": 2368 + }, + { + "epoch": 0.5458525345622119, + "grad_norm": 0.6925915187477067, + "learning_rate": 1.7406072256663333e-06, + "loss": 0.9222339391708374, + "step": 2369 + }, + { + "epoch": 0.5460829493087558, + "grad_norm": 0.6352006169320189, + "learning_rate": 1.7403511743372655e-06, + "loss": 0.6543160676956177, + "step": 2370 + }, + { + "epoch": 0.5463133640552995, + "grad_norm": 0.9993386394035012, + "learning_rate": 1.7400950155472046e-06, + "loss": 0.9828567504882812, + "step": 2371 + }, + { + "epoch": 0.5465437788018433, + "grad_norm": 0.9620494284169527, + "learning_rate": 1.739838749333332e-06, + "loss": 0.95346599817276, + "step": 2372 + }, + { + "epoch": 0.5467741935483871, + "grad_norm": 0.4533946729074916, + "learning_rate": 1.7395823757328442e-06, + "loss": 0.626889705657959, + "step": 2373 + }, + { + "epoch": 0.5470046082949309, + "grad_norm": 0.6641652944774505, + "learning_rate": 1.739325894782954e-06, + "loss": 0.8152071833610535, + "step": 2374 + }, + { + "epoch": 0.5472350230414746, + "grad_norm": 0.7149653321076401, + "learning_rate": 1.7390693065208889e-06, + "loss": 0.8244980573654175, + "step": 2375 + }, + { + "epoch": 0.5474654377880185, + "grad_norm": 0.8801604517186058, + "learning_rate": 1.738812610983892e-06, + "loss": 0.8234372138977051, + "step": 2376 + }, + { + "epoch": 0.5476958525345622, + "grad_norm": 0.8626749383303203, + "learning_rate": 1.7385558082092228e-06, + "loss": 0.9334712624549866, + "step": 2377 + }, + { + "epoch": 0.5479262672811059, + "grad_norm": 0.8866496689156442, + "learning_rate": 1.7382988982341557e-06, + "loss": 0.7873882055282593, + "step": 2378 + }, + { + "epoch": 0.5481566820276498, + "grad_norm": 0.7814140858155267, + "learning_rate": 1.7380418810959814e-06, + "loss": 0.7971000671386719, + "step": 2379 + }, + { + "epoch": 0.5483870967741935, + "grad_norm": 0.7452714019733373, + "learning_rate": 1.7377847568320046e-06, + "loss": 0.8617004156112671, + "step": 2380 + }, + { + "epoch": 0.5486175115207373, + "grad_norm": 0.7316280745753603, + "learning_rate": 1.7375275254795472e-06, + "loss": 0.6798374056816101, + "step": 2381 + }, + { + "epoch": 0.5488479262672811, + "grad_norm": 0.8600424341995414, + "learning_rate": 1.7372701870759459e-06, + "loss": 0.8621633052825928, + "step": 2382 + }, + { + "epoch": 0.5490783410138249, + "grad_norm": 0.78685909041996, + "learning_rate": 1.7370127416585527e-06, + "loss": 0.6533470153808594, + "step": 2383 + }, + { + "epoch": 0.5493087557603686, + "grad_norm": 0.9199843580999427, + "learning_rate": 1.736755189264736e-06, + "loss": 0.8854461908340454, + "step": 2384 + }, + { + "epoch": 0.5495391705069125, + "grad_norm": 1.0020485772603467, + "learning_rate": 1.7364975299318786e-06, + "loss": 0.9461240768432617, + "step": 2385 + }, + { + "epoch": 0.5497695852534562, + "grad_norm": 1.0179837516521926, + "learning_rate": 1.73623976369738e-06, + "loss": 0.8936882019042969, + "step": 2386 + }, + { + "epoch": 0.55, + "grad_norm": 0.7527230779520249, + "learning_rate": 1.7359818905986544e-06, + "loss": 0.8177640438079834, + "step": 2387 + }, + { + "epoch": 0.5502304147465438, + "grad_norm": 0.7539178622826256, + "learning_rate": 1.7357239106731317e-06, + "loss": 0.793328046798706, + "step": 2388 + }, + { + "epoch": 0.5504608294930876, + "grad_norm": 0.8548599569350254, + "learning_rate": 1.7354658239582572e-06, + "loss": 0.8837069272994995, + "step": 2389 + }, + { + "epoch": 0.5506912442396313, + "grad_norm": 0.8764277126116193, + "learning_rate": 1.7352076304914918e-06, + "loss": 0.8801138401031494, + "step": 2390 + }, + { + "epoch": 0.5509216589861751, + "grad_norm": 0.7981260720892804, + "learning_rate": 1.7349493303103123e-06, + "loss": 0.865073025226593, + "step": 2391 + }, + { + "epoch": 0.5511520737327189, + "grad_norm": 0.5938962289027067, + "learning_rate": 1.7346909234522107e-06, + "loss": 0.8712339401245117, + "step": 2392 + }, + { + "epoch": 0.5513824884792626, + "grad_norm": 0.6857068624612402, + "learning_rate": 1.7344324099546938e-06, + "loss": 0.7689294815063477, + "step": 2393 + }, + { + "epoch": 0.5516129032258065, + "grad_norm": 0.6784843872797971, + "learning_rate": 1.7341737898552851e-06, + "loss": 0.9228999614715576, + "step": 2394 + }, + { + "epoch": 0.5518433179723502, + "grad_norm": 1.025443261317525, + "learning_rate": 1.7339150631915228e-06, + "loss": 0.9473327398300171, + "step": 2395 + }, + { + "epoch": 0.552073732718894, + "grad_norm": 0.9317831571882359, + "learning_rate": 1.7336562300009604e-06, + "loss": 0.7724621295928955, + "step": 2396 + }, + { + "epoch": 0.5523041474654378, + "grad_norm": 0.7823556125482615, + "learning_rate": 1.7333972903211675e-06, + "loss": 0.8646600246429443, + "step": 2397 + }, + { + "epoch": 0.5525345622119816, + "grad_norm": 0.6673069571562762, + "learning_rate": 1.7331382441897286e-06, + "loss": 0.7143402099609375, + "step": 2398 + }, + { + "epoch": 0.5527649769585253, + "grad_norm": 0.9600129950475998, + "learning_rate": 1.7328790916442446e-06, + "loss": 0.8229624032974243, + "step": 2399 + }, + { + "epoch": 0.5529953917050692, + "grad_norm": 0.8815652742153803, + "learning_rate": 1.7326198327223303e-06, + "loss": 0.7244875431060791, + "step": 2400 + }, + { + "epoch": 0.5532258064516129, + "grad_norm": 0.8586401947703556, + "learning_rate": 1.7323604674616173e-06, + "loss": 0.7797688245773315, + "step": 2401 + }, + { + "epoch": 0.5534562211981566, + "grad_norm": 0.7923271764392044, + "learning_rate": 1.7321009958997519e-06, + "loss": 0.752421498298645, + "step": 2402 + }, + { + "epoch": 0.5536866359447005, + "grad_norm": 0.880725843060538, + "learning_rate": 1.7318414180743962e-06, + "loss": 0.8285892009735107, + "step": 2403 + }, + { + "epoch": 0.5539170506912442, + "grad_norm": 0.7844500606150882, + "learning_rate": 1.7315817340232272e-06, + "loss": 0.8247888088226318, + "step": 2404 + }, + { + "epoch": 0.554147465437788, + "grad_norm": 0.7041289847587934, + "learning_rate": 1.7313219437839384e-06, + "loss": 0.7713418006896973, + "step": 2405 + }, + { + "epoch": 0.5543778801843318, + "grad_norm": 0.8575067968238488, + "learning_rate": 1.7310620473942374e-06, + "loss": 0.8748825788497925, + "step": 2406 + }, + { + "epoch": 0.5546082949308756, + "grad_norm": 0.899949436927101, + "learning_rate": 1.730802044891848e-06, + "loss": 0.9255902767181396, + "step": 2407 + }, + { + "epoch": 0.5548387096774193, + "grad_norm": 0.7968868837370462, + "learning_rate": 1.7305419363145093e-06, + "loss": 0.7226976156234741, + "step": 2408 + }, + { + "epoch": 0.5550691244239632, + "grad_norm": 0.8868777191693532, + "learning_rate": 1.7302817216999754e-06, + "loss": 0.9024704694747925, + "step": 2409 + }, + { + "epoch": 0.5552995391705069, + "grad_norm": 0.8331382998314191, + "learning_rate": 1.7300214010860168e-06, + "loss": 0.7857767343521118, + "step": 2410 + }, + { + "epoch": 0.5555299539170507, + "grad_norm": 0.7111146090264087, + "learning_rate": 1.7297609745104183e-06, + "loss": 0.7280064821243286, + "step": 2411 + }, + { + "epoch": 0.5557603686635945, + "grad_norm": 0.8916895272866717, + "learning_rate": 1.72950044201098e-06, + "loss": 0.8909369111061096, + "step": 2412 + }, + { + "epoch": 0.5559907834101383, + "grad_norm": 0.8724458169518867, + "learning_rate": 1.7292398036255183e-06, + "loss": 0.8543871641159058, + "step": 2413 + }, + { + "epoch": 0.556221198156682, + "grad_norm": 0.7364121573266219, + "learning_rate": 1.7289790593918648e-06, + "loss": 0.6934928894042969, + "step": 2414 + }, + { + "epoch": 0.5564516129032258, + "grad_norm": 0.7288921937743348, + "learning_rate": 1.7287182093478658e-06, + "loss": 0.6323058605194092, + "step": 2415 + }, + { + "epoch": 0.5566820276497696, + "grad_norm": 0.9203399963548066, + "learning_rate": 1.7284572535313833e-06, + "loss": 0.8607437014579773, + "step": 2416 + }, + { + "epoch": 0.5569124423963133, + "grad_norm": 0.8312318653257402, + "learning_rate": 1.7281961919802948e-06, + "loss": 0.932594358921051, + "step": 2417 + }, + { + "epoch": 0.5571428571428572, + "grad_norm": 0.8132622554262421, + "learning_rate": 1.727935024732493e-06, + "loss": 0.7239062786102295, + "step": 2418 + }, + { + "epoch": 0.5573732718894009, + "grad_norm": 0.770772581447816, + "learning_rate": 1.727673751825886e-06, + "loss": 0.7600498199462891, + "step": 2419 + }, + { + "epoch": 0.5576036866359447, + "grad_norm": 0.9553759629640377, + "learning_rate": 1.7274123732983977e-06, + "loss": 0.6888710260391235, + "step": 2420 + }, + { + "epoch": 0.5578341013824885, + "grad_norm": 0.9472816188704319, + "learning_rate": 1.7271508891879657e-06, + "loss": 0.9768370389938354, + "step": 2421 + }, + { + "epoch": 0.5580645161290323, + "grad_norm": 0.7612474564207412, + "learning_rate": 1.7268892995325453e-06, + "loss": 0.7302272319793701, + "step": 2422 + }, + { + "epoch": 0.558294930875576, + "grad_norm": 0.952809818405442, + "learning_rate": 1.7266276043701052e-06, + "loss": 0.7664496898651123, + "step": 2423 + }, + { + "epoch": 0.5585253456221199, + "grad_norm": 0.7105308716985692, + "learning_rate": 1.72636580373863e-06, + "loss": 0.7672723531723022, + "step": 2424 + }, + { + "epoch": 0.5587557603686636, + "grad_norm": 0.9094827818764729, + "learning_rate": 1.7261038976761203e-06, + "loss": 0.7467625141143799, + "step": 2425 + }, + { + "epoch": 0.5589861751152074, + "grad_norm": 1.0609555724090778, + "learning_rate": 1.7258418862205908e-06, + "loss": 0.899692177772522, + "step": 2426 + }, + { + "epoch": 0.5592165898617512, + "grad_norm": 0.8726314105037919, + "learning_rate": 1.7255797694100724e-06, + "loss": 0.9654138088226318, + "step": 2427 + }, + { + "epoch": 0.5594470046082949, + "grad_norm": 1.0261431779245342, + "learning_rate": 1.725317547282611e-06, + "loss": 0.8487396836280823, + "step": 2428 + }, + { + "epoch": 0.5596774193548387, + "grad_norm": 0.7692614118612008, + "learning_rate": 1.7250552198762682e-06, + "loss": 0.7785199284553528, + "step": 2429 + }, + { + "epoch": 0.5599078341013825, + "grad_norm": 0.7931069179642137, + "learning_rate": 1.7247927872291198e-06, + "loss": 0.9243934750556946, + "step": 2430 + }, + { + "epoch": 0.5601382488479263, + "grad_norm": 0.6935679959823647, + "learning_rate": 1.724530249379258e-06, + "loss": 0.8674443960189819, + "step": 2431 + }, + { + "epoch": 0.56036866359447, + "grad_norm": 0.7564063858493598, + "learning_rate": 1.7242676063647895e-06, + "loss": 0.8022270202636719, + "step": 2432 + }, + { + "epoch": 0.5605990783410139, + "grad_norm": 0.8222900385869091, + "learning_rate": 1.7240048582238367e-06, + "loss": 0.8696796894073486, + "step": 2433 + }, + { + "epoch": 0.5608294930875576, + "grad_norm": 0.8560234672396506, + "learning_rate": 1.7237420049945374e-06, + "loss": 0.7752439975738525, + "step": 2434 + }, + { + "epoch": 0.5610599078341014, + "grad_norm": 0.9286340475505503, + "learning_rate": 1.723479046715044e-06, + "loss": 0.7660201787948608, + "step": 2435 + }, + { + "epoch": 0.5612903225806452, + "grad_norm": 0.7639410477119124, + "learning_rate": 1.7232159834235249e-06, + "loss": 0.9319918155670166, + "step": 2436 + }, + { + "epoch": 0.561520737327189, + "grad_norm": 0.8121463742755932, + "learning_rate": 1.722952815158163e-06, + "loss": 0.8175421357154846, + "step": 2437 + }, + { + "epoch": 0.5617511520737327, + "grad_norm": 0.5646145066796834, + "learning_rate": 1.7226895419571573e-06, + "loss": 0.6959598064422607, + "step": 2438 + }, + { + "epoch": 0.5619815668202764, + "grad_norm": 0.9804875774075569, + "learning_rate": 1.722426163858721e-06, + "loss": 0.8629111051559448, + "step": 2439 + }, + { + "epoch": 0.5622119815668203, + "grad_norm": 1.1148628556143985, + "learning_rate": 1.7221626809010833e-06, + "loss": 0.8222612142562866, + "step": 2440 + }, + { + "epoch": 0.562442396313364, + "grad_norm": 0.7126052614291007, + "learning_rate": 1.721899093122489e-06, + "loss": 0.8329352140426636, + "step": 2441 + }, + { + "epoch": 0.5626728110599079, + "grad_norm": 0.7803804718208336, + "learning_rate": 1.7216354005611966e-06, + "loss": 0.8777236938476562, + "step": 2442 + }, + { + "epoch": 0.5629032258064516, + "grad_norm": 0.8601336969746237, + "learning_rate": 1.7213716032554814e-06, + "loss": 0.8487246036529541, + "step": 2443 + }, + { + "epoch": 0.5631336405529954, + "grad_norm": 0.9035051311861264, + "learning_rate": 1.7211077012436327e-06, + "loss": 0.8429645299911499, + "step": 2444 + }, + { + "epoch": 0.5633640552995391, + "grad_norm": 0.9883668092610399, + "learning_rate": 1.720843694563956e-06, + "loss": 0.7683241367340088, + "step": 2445 + }, + { + "epoch": 0.563594470046083, + "grad_norm": 0.839045001132387, + "learning_rate": 1.7205795832547715e-06, + "loss": 0.8468153476715088, + "step": 2446 + }, + { + "epoch": 0.5638248847926267, + "grad_norm": 0.7865527461309724, + "learning_rate": 1.7203153673544136e-06, + "loss": 0.7957276105880737, + "step": 2447 + }, + { + "epoch": 0.5640552995391706, + "grad_norm": 0.7301149604369097, + "learning_rate": 1.7200510469012343e-06, + "loss": 0.703586757183075, + "step": 2448 + }, + { + "epoch": 0.5642857142857143, + "grad_norm": 0.9237896103754119, + "learning_rate": 1.7197866219335988e-06, + "loss": 0.8399583101272583, + "step": 2449 + }, + { + "epoch": 0.5645161290322581, + "grad_norm": 0.9147331037465749, + "learning_rate": 1.7195220924898882e-06, + "loss": 0.8198127746582031, + "step": 2450 + }, + { + "epoch": 0.5647465437788018, + "grad_norm": 0.8751939719560463, + "learning_rate": 1.7192574586084977e-06, + "loss": 0.8345620632171631, + "step": 2451 + }, + { + "epoch": 0.5649769585253456, + "grad_norm": 0.5798955427424709, + "learning_rate": 1.71899272032784e-06, + "loss": 0.7717207670211792, + "step": 2452 + }, + { + "epoch": 0.5652073732718894, + "grad_norm": 1.0279650439820616, + "learning_rate": 1.7187278776863402e-06, + "loss": 0.9178022146224976, + "step": 2453 + }, + { + "epoch": 0.5654377880184331, + "grad_norm": 0.8586126622693072, + "learning_rate": 1.7184629307224405e-06, + "loss": 0.802221417427063, + "step": 2454 + }, + { + "epoch": 0.565668202764977, + "grad_norm": 0.9691589621671786, + "learning_rate": 1.718197879474598e-06, + "loss": 0.8785420656204224, + "step": 2455 + }, + { + "epoch": 0.5658986175115207, + "grad_norm": 0.8087978885886937, + "learning_rate": 1.7179327239812835e-06, + "loss": 0.866797924041748, + "step": 2456 + }, + { + "epoch": 0.5661290322580645, + "grad_norm": 0.7850858892434726, + "learning_rate": 1.7176674642809848e-06, + "loss": 0.8483223915100098, + "step": 2457 + }, + { + "epoch": 0.5663594470046083, + "grad_norm": 0.7634922973789945, + "learning_rate": 1.7174021004122038e-06, + "loss": 0.815066933631897, + "step": 2458 + }, + { + "epoch": 0.5665898617511521, + "grad_norm": 0.7286124953848899, + "learning_rate": 1.7171366324134575e-06, + "loss": 0.8584767580032349, + "step": 2459 + }, + { + "epoch": 0.5668202764976958, + "grad_norm": 0.8250445352678845, + "learning_rate": 1.7168710603232783e-06, + "loss": 0.8710953593254089, + "step": 2460 + }, + { + "epoch": 0.5670506912442397, + "grad_norm": 0.9434416859632441, + "learning_rate": 1.7166053841802137e-06, + "loss": 0.8174586892127991, + "step": 2461 + }, + { + "epoch": 0.5672811059907834, + "grad_norm": 0.8270311207697365, + "learning_rate": 1.7163396040228263e-06, + "loss": 0.7240795493125916, + "step": 2462 + }, + { + "epoch": 0.5675115207373271, + "grad_norm": 0.9011815170935621, + "learning_rate": 1.7160737198896938e-06, + "loss": 0.8026313781738281, + "step": 2463 + }, + { + "epoch": 0.567741935483871, + "grad_norm": 0.906377679717593, + "learning_rate": 1.7158077318194088e-06, + "loss": 0.8170863389968872, + "step": 2464 + }, + { + "epoch": 0.5679723502304147, + "grad_norm": 0.7708394273236241, + "learning_rate": 1.7155416398505794e-06, + "loss": 0.7524861097335815, + "step": 2465 + }, + { + "epoch": 0.5682027649769585, + "grad_norm": 1.053627484653556, + "learning_rate": 1.7152754440218278e-06, + "loss": 0.9895739555358887, + "step": 2466 + }, + { + "epoch": 0.5684331797235023, + "grad_norm": 0.8044893250734789, + "learning_rate": 1.7150091443717924e-06, + "loss": 0.840786874294281, + "step": 2467 + }, + { + "epoch": 0.5686635944700461, + "grad_norm": 0.7235386782272144, + "learning_rate": 1.7147427409391265e-06, + "loss": 0.8896929025650024, + "step": 2468 + }, + { + "epoch": 0.5688940092165898, + "grad_norm": 0.930785639448215, + "learning_rate": 1.714476233762498e-06, + "loss": 0.9940589666366577, + "step": 2469 + }, + { + "epoch": 0.5691244239631337, + "grad_norm": 0.8541894175832414, + "learning_rate": 1.7142096228805896e-06, + "loss": 0.8827046155929565, + "step": 2470 + }, + { + "epoch": 0.5693548387096774, + "grad_norm": 0.8477738552913107, + "learning_rate": 1.7139429083321003e-06, + "loss": 0.8402417302131653, + "step": 2471 + }, + { + "epoch": 0.5695852534562212, + "grad_norm": 1.0681644319875638, + "learning_rate": 1.7136760901557428e-06, + "loss": 0.9298208951950073, + "step": 2472 + }, + { + "epoch": 0.569815668202765, + "grad_norm": 0.799198798955049, + "learning_rate": 1.7134091683902456e-06, + "loss": 0.7272841930389404, + "step": 2473 + }, + { + "epoch": 0.5700460829493088, + "grad_norm": 0.9504491625382946, + "learning_rate": 1.7131421430743522e-06, + "loss": 0.7767274379730225, + "step": 2474 + }, + { + "epoch": 0.5702764976958525, + "grad_norm": 0.8321899881110706, + "learning_rate": 1.7128750142468205e-06, + "loss": 0.8381883502006531, + "step": 2475 + }, + { + "epoch": 0.5705069124423963, + "grad_norm": 0.722993858034587, + "learning_rate": 1.7126077819464247e-06, + "loss": 0.6917109489440918, + "step": 2476 + }, + { + "epoch": 0.5707373271889401, + "grad_norm": 0.8529687693157456, + "learning_rate": 1.712340446211952e-06, + "loss": 0.848122239112854, + "step": 2477 + }, + { + "epoch": 0.5709677419354838, + "grad_norm": 0.8115142651418973, + "learning_rate": 1.7120730070822074e-06, + "loss": 0.7880194187164307, + "step": 2478 + }, + { + "epoch": 0.5711981566820277, + "grad_norm": 0.7900923038142705, + "learning_rate": 1.7118054645960077e-06, + "loss": 0.8782297372817993, + "step": 2479 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 0.8386744568018749, + "learning_rate": 1.7115378187921876e-06, + "loss": 0.9030005931854248, + "step": 2480 + }, + { + "epoch": 0.5716589861751152, + "grad_norm": 1.0512780177061767, + "learning_rate": 1.7112700697095953e-06, + "loss": 0.9950683116912842, + "step": 2481 + }, + { + "epoch": 0.571889400921659, + "grad_norm": 0.7851257012482162, + "learning_rate": 1.7110022173870933e-06, + "loss": 0.8825187683105469, + "step": 2482 + }, + { + "epoch": 0.5721198156682028, + "grad_norm": 0.7742449968104124, + "learning_rate": 1.710734261863561e-06, + "loss": 0.7918775081634521, + "step": 2483 + }, + { + "epoch": 0.5723502304147465, + "grad_norm": 0.8385191739759446, + "learning_rate": 1.7104662031778916e-06, + "loss": 1.0219467878341675, + "step": 2484 + }, + { + "epoch": 0.5725806451612904, + "grad_norm": 0.7273611559924746, + "learning_rate": 1.7101980413689931e-06, + "loss": 0.7633316516876221, + "step": 2485 + }, + { + "epoch": 0.5728110599078341, + "grad_norm": 0.9207367628977638, + "learning_rate": 1.7099297764757891e-06, + "loss": 0.8972171545028687, + "step": 2486 + }, + { + "epoch": 0.5730414746543778, + "grad_norm": 0.9268590747994748, + "learning_rate": 1.7096614085372183e-06, + "loss": 0.9467268586158752, + "step": 2487 + }, + { + "epoch": 0.5732718894009217, + "grad_norm": 0.6697903314360253, + "learning_rate": 1.709392937592233e-06, + "loss": 0.7688668370246887, + "step": 2488 + }, + { + "epoch": 0.5735023041474654, + "grad_norm": 0.9069250629096394, + "learning_rate": 1.7091243636798022e-06, + "loss": 0.8521163463592529, + "step": 2489 + }, + { + "epoch": 0.5737327188940092, + "grad_norm": 1.1876566208797892, + "learning_rate": 1.7088556868389087e-06, + "loss": 0.937403678894043, + "step": 2490 + }, + { + "epoch": 0.573963133640553, + "grad_norm": 0.7484200220587712, + "learning_rate": 1.7085869071085507e-06, + "loss": 0.929175853729248, + "step": 2491 + }, + { + "epoch": 0.5741935483870968, + "grad_norm": 0.75868423962596, + "learning_rate": 1.708318024527741e-06, + "loss": 0.8213154673576355, + "step": 2492 + }, + { + "epoch": 0.5744239631336405, + "grad_norm": 0.8570973138589657, + "learning_rate": 1.708049039135508e-06, + "loss": 0.7666962146759033, + "step": 2493 + }, + { + "epoch": 0.5746543778801844, + "grad_norm": 0.944726193523685, + "learning_rate": 1.707779950970894e-06, + "loss": 0.9787846803665161, + "step": 2494 + }, + { + "epoch": 0.5748847926267281, + "grad_norm": 0.9499725243145639, + "learning_rate": 1.7075107600729575e-06, + "loss": 0.9688804149627686, + "step": 2495 + }, + { + "epoch": 0.5751152073732719, + "grad_norm": 0.7169812071362754, + "learning_rate": 1.7072414664807706e-06, + "loss": 0.7186019420623779, + "step": 2496 + }, + { + "epoch": 0.5753456221198157, + "grad_norm": 0.8737696103531859, + "learning_rate": 1.706972070233421e-06, + "loss": 0.814068615436554, + "step": 2497 + }, + { + "epoch": 0.5755760368663595, + "grad_norm": 0.8930538892783126, + "learning_rate": 1.7067025713700111e-06, + "loss": 0.8439940214157104, + "step": 2498 + }, + { + "epoch": 0.5758064516129032, + "grad_norm": 1.0358274070142592, + "learning_rate": 1.706432969929659e-06, + "loss": 1.0199556350708008, + "step": 2499 + }, + { + "epoch": 0.576036866359447, + "grad_norm": 0.8418547467759998, + "learning_rate": 1.7061632659514964e-06, + "loss": 0.9422338008880615, + "step": 2500 + }, + { + "epoch": 0.5762672811059908, + "grad_norm": 0.8692517624840741, + "learning_rate": 1.7058934594746704e-06, + "loss": 0.9307081699371338, + "step": 2501 + }, + { + "epoch": 0.5764976958525345, + "grad_norm": 0.8121605874769848, + "learning_rate": 1.7056235505383433e-06, + "loss": 0.7202768325805664, + "step": 2502 + }, + { + "epoch": 0.5767281105990784, + "grad_norm": 0.915285295701684, + "learning_rate": 1.7053535391816923e-06, + "loss": 1.0184223651885986, + "step": 2503 + }, + { + "epoch": 0.5769585253456221, + "grad_norm": 0.8238573361353964, + "learning_rate": 1.7050834254439085e-06, + "loss": 0.7957574129104614, + "step": 2504 + }, + { + "epoch": 0.5771889400921659, + "grad_norm": 0.9632097611385487, + "learning_rate": 1.7048132093641989e-06, + "loss": 0.9694541096687317, + "step": 2505 + }, + { + "epoch": 0.5774193548387097, + "grad_norm": 0.7406781740567284, + "learning_rate": 1.704542890981785e-06, + "loss": 0.8427075147628784, + "step": 2506 + }, + { + "epoch": 0.5776497695852535, + "grad_norm": 0.7137957479223747, + "learning_rate": 1.7042724703359032e-06, + "loss": 0.7745763063430786, + "step": 2507 + }, + { + "epoch": 0.5778801843317972, + "grad_norm": 0.8935647722203462, + "learning_rate": 1.7040019474658047e-06, + "loss": 0.8179641962051392, + "step": 2508 + }, + { + "epoch": 0.5781105990783411, + "grad_norm": 0.9010033541227577, + "learning_rate": 1.7037313224107557e-06, + "loss": 0.8118200302124023, + "step": 2509 + }, + { + "epoch": 0.5783410138248848, + "grad_norm": 0.7297456575398072, + "learning_rate": 1.7034605952100364e-06, + "loss": 0.7892665863037109, + "step": 2510 + }, + { + "epoch": 0.5785714285714286, + "grad_norm": 0.736874372872981, + "learning_rate": 1.7031897659029434e-06, + "loss": 0.7442026734352112, + "step": 2511 + }, + { + "epoch": 0.5788018433179724, + "grad_norm": 0.9375581770522491, + "learning_rate": 1.7029188345287865e-06, + "loss": 0.8179585933685303, + "step": 2512 + }, + { + "epoch": 0.5790322580645161, + "grad_norm": 0.8710660194733852, + "learning_rate": 1.7026478011268918e-06, + "loss": 0.7569797039031982, + "step": 2513 + }, + { + "epoch": 0.5792626728110599, + "grad_norm": 0.8952615874674131, + "learning_rate": 1.7023766657365984e-06, + "loss": 0.8464581966400146, + "step": 2514 + }, + { + "epoch": 0.5794930875576036, + "grad_norm": 0.9645554070219402, + "learning_rate": 1.702105428397262e-06, + "loss": 0.7326645255088806, + "step": 2515 + }, + { + "epoch": 0.5797235023041475, + "grad_norm": 0.8243138835822689, + "learning_rate": 1.7018340891482522e-06, + "loss": 0.7993732690811157, + "step": 2516 + }, + { + "epoch": 0.5799539170506912, + "grad_norm": 0.7406582307230963, + "learning_rate": 1.7015626480289532e-06, + "loss": 0.8124513626098633, + "step": 2517 + }, + { + "epoch": 0.580184331797235, + "grad_norm": 0.7758431888553803, + "learning_rate": 1.701291105078765e-06, + "loss": 0.9075840711593628, + "step": 2518 + }, + { + "epoch": 0.5804147465437788, + "grad_norm": 0.8900052121004013, + "learning_rate": 1.7010194603371009e-06, + "loss": 0.8212069272994995, + "step": 2519 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.8737089153257858, + "learning_rate": 1.7007477138433903e-06, + "loss": 0.7582074999809265, + "step": 2520 + }, + { + "epoch": 0.5808755760368663, + "grad_norm": 0.7402264811343096, + "learning_rate": 1.7004758656370769e-06, + "loss": 0.8917636871337891, + "step": 2521 + }, + { + "epoch": 0.5811059907834102, + "grad_norm": 0.9496944008191128, + "learning_rate": 1.7002039157576186e-06, + "loss": 0.8919704556465149, + "step": 2522 + }, + { + "epoch": 0.5813364055299539, + "grad_norm": 0.8803733592170607, + "learning_rate": 1.699931864244489e-06, + "loss": 0.7474988698959351, + "step": 2523 + }, + { + "epoch": 0.5815668202764976, + "grad_norm": 0.9179665061824968, + "learning_rate": 1.6996597111371758e-06, + "loss": 0.8596241474151611, + "step": 2524 + }, + { + "epoch": 0.5817972350230415, + "grad_norm": 0.8260474861422493, + "learning_rate": 1.699387456475182e-06, + "loss": 0.9316335916519165, + "step": 2525 + }, + { + "epoch": 0.5820276497695852, + "grad_norm": 0.7937616616577486, + "learning_rate": 1.6991151002980248e-06, + "loss": 0.7364813089370728, + "step": 2526 + }, + { + "epoch": 0.582258064516129, + "grad_norm": 0.9072210580359311, + "learning_rate": 1.698842642645236e-06, + "loss": 0.789472758769989, + "step": 2527 + }, + { + "epoch": 0.5824884792626728, + "grad_norm": 0.9988239379820413, + "learning_rate": 1.6985700835563627e-06, + "loss": 1.024861216545105, + "step": 2528 + }, + { + "epoch": 0.5827188940092166, + "grad_norm": 0.9746619752287254, + "learning_rate": 1.6982974230709667e-06, + "loss": 0.8465025424957275, + "step": 2529 + }, + { + "epoch": 0.5829493087557603, + "grad_norm": 1.0146741583341603, + "learning_rate": 1.6980246612286244e-06, + "loss": 0.7502799034118652, + "step": 2530 + }, + { + "epoch": 0.5831797235023042, + "grad_norm": 0.866831185770848, + "learning_rate": 1.6977517980689264e-06, + "loss": 0.8019870519638062, + "step": 2531 + }, + { + "epoch": 0.5834101382488479, + "grad_norm": 0.783761351839215, + "learning_rate": 1.6974788336314788e-06, + "loss": 0.9048774242401123, + "step": 2532 + }, + { + "epoch": 0.5836405529953917, + "grad_norm": 0.8577409607010705, + "learning_rate": 1.6972057679559018e-06, + "loss": 0.8411067724227905, + "step": 2533 + }, + { + "epoch": 0.5838709677419355, + "grad_norm": 0.7158353942796929, + "learning_rate": 1.6969326010818304e-06, + "loss": 0.7399133443832397, + "step": 2534 + }, + { + "epoch": 0.5841013824884793, + "grad_norm": 0.7309631229110555, + "learning_rate": 1.6966593330489144e-06, + "loss": 0.7553995847702026, + "step": 2535 + }, + { + "epoch": 0.584331797235023, + "grad_norm": 0.7563702103772202, + "learning_rate": 1.6963859638968188e-06, + "loss": 0.8405054807662964, + "step": 2536 + }, + { + "epoch": 0.5845622119815668, + "grad_norm": 0.739785555800379, + "learning_rate": 1.6961124936652223e-06, + "loss": 0.7619640231132507, + "step": 2537 + }, + { + "epoch": 0.5847926267281106, + "grad_norm": 0.6189871014888121, + "learning_rate": 1.6958389223938187e-06, + "loss": 0.7785576581954956, + "step": 2538 + }, + { + "epoch": 0.5850230414746543, + "grad_norm": 1.0593569746028593, + "learning_rate": 1.695565250122317e-06, + "loss": 0.9230754375457764, + "step": 2539 + }, + { + "epoch": 0.5852534562211982, + "grad_norm": 0.9087046574881754, + "learning_rate": 1.69529147689044e-06, + "loss": 0.798599362373352, + "step": 2540 + }, + { + "epoch": 0.5854838709677419, + "grad_norm": 0.7546263570181881, + "learning_rate": 1.6950176027379253e-06, + "loss": 0.8491491079330444, + "step": 2541 + }, + { + "epoch": 0.5857142857142857, + "grad_norm": 0.9063392015432612, + "learning_rate": 1.694743627704526e-06, + "loss": 0.7906054854393005, + "step": 2542 + }, + { + "epoch": 0.5859447004608295, + "grad_norm": 0.8834118839199732, + "learning_rate": 1.6944695518300084e-06, + "loss": 0.8178746700286865, + "step": 2543 + }, + { + "epoch": 0.5861751152073733, + "grad_norm": 0.9444844508582247, + "learning_rate": 1.6941953751541552e-06, + "loss": 0.867972731590271, + "step": 2544 + }, + { + "epoch": 0.586405529953917, + "grad_norm": 0.8815618278989616, + "learning_rate": 1.6939210977167622e-06, + "loss": 0.8000613451004028, + "step": 2545 + }, + { + "epoch": 0.5866359447004609, + "grad_norm": 0.938056940810552, + "learning_rate": 1.6936467195576403e-06, + "loss": 0.8473562002182007, + "step": 2546 + }, + { + "epoch": 0.5868663594470046, + "grad_norm": 0.960324746454341, + "learning_rate": 1.6933722407166156e-06, + "loss": 0.971686065196991, + "step": 2547 + }, + { + "epoch": 0.5870967741935483, + "grad_norm": 0.718798566737211, + "learning_rate": 1.6930976612335276e-06, + "loss": 0.6679604053497314, + "step": 2548 + }, + { + "epoch": 0.5873271889400922, + "grad_norm": 0.8662288511956259, + "learning_rate": 1.692822981148232e-06, + "loss": 0.81952303647995, + "step": 2549 + }, + { + "epoch": 0.5875576036866359, + "grad_norm": 0.7171085968938, + "learning_rate": 1.6925482005005978e-06, + "loss": 0.8711779713630676, + "step": 2550 + }, + { + "epoch": 0.5877880184331797, + "grad_norm": 0.8419799604008648, + "learning_rate": 1.6922733193305093e-06, + "loss": 0.930451512336731, + "step": 2551 + }, + { + "epoch": 0.5880184331797235, + "grad_norm": 0.8349862719015169, + "learning_rate": 1.6919983376778647e-06, + "loss": 0.8435598611831665, + "step": 2552 + }, + { + "epoch": 0.5882488479262673, + "grad_norm": 0.8491940209701643, + "learning_rate": 1.6917232555825774e-06, + "loss": 0.8868621587753296, + "step": 2553 + }, + { + "epoch": 0.588479262672811, + "grad_norm": 0.7537041162487105, + "learning_rate": 1.6914480730845752e-06, + "loss": 0.6821786165237427, + "step": 2554 + }, + { + "epoch": 0.5887096774193549, + "grad_norm": 0.8487688242201222, + "learning_rate": 1.691172790223801e-06, + "loss": 0.7241402864456177, + "step": 2555 + }, + { + "epoch": 0.5889400921658986, + "grad_norm": 0.7422220828348832, + "learning_rate": 1.690897407040211e-06, + "loss": 0.7477490305900574, + "step": 2556 + }, + { + "epoch": 0.5891705069124424, + "grad_norm": 0.7636915444427955, + "learning_rate": 1.690621923573777e-06, + "loss": 0.7881484031677246, + "step": 2557 + }, + { + "epoch": 0.5894009216589862, + "grad_norm": 0.959692830610789, + "learning_rate": 1.6903463398644848e-06, + "loss": 0.8292979001998901, + "step": 2558 + }, + { + "epoch": 0.58963133640553, + "grad_norm": 0.711937804642515, + "learning_rate": 1.690070655952336e-06, + "loss": 0.7068917751312256, + "step": 2559 + }, + { + "epoch": 0.5898617511520737, + "grad_norm": 1.1143023950252693, + "learning_rate": 1.6897948718773443e-06, + "loss": 0.8907356262207031, + "step": 2560 + }, + { + "epoch": 0.5900921658986175, + "grad_norm": 0.7930222105996996, + "learning_rate": 1.6895189876795405e-06, + "loss": 0.7762824892997742, + "step": 2561 + }, + { + "epoch": 0.5903225806451613, + "grad_norm": 1.0922797891559575, + "learning_rate": 1.6892430033989685e-06, + "loss": 0.9682759046554565, + "step": 2562 + }, + { + "epoch": 0.590552995391705, + "grad_norm": 0.8231082510824629, + "learning_rate": 1.6889669190756866e-06, + "loss": 0.7594735622406006, + "step": 2563 + }, + { + "epoch": 0.5907834101382489, + "grad_norm": 0.8117866090414669, + "learning_rate": 1.6886907347497687e-06, + "loss": 0.8161605000495911, + "step": 2564 + }, + { + "epoch": 0.5910138248847926, + "grad_norm": 0.8557086150703954, + "learning_rate": 1.6884144504613023e-06, + "loss": 0.9390331506729126, + "step": 2565 + }, + { + "epoch": 0.5912442396313364, + "grad_norm": 0.9387748138594502, + "learning_rate": 1.68813806625039e-06, + "loss": 0.8895832300186157, + "step": 2566 + }, + { + "epoch": 0.5914746543778802, + "grad_norm": 0.8802161511936953, + "learning_rate": 1.687861582157148e-06, + "loss": 0.7779919505119324, + "step": 2567 + }, + { + "epoch": 0.591705069124424, + "grad_norm": 1.139110447936057, + "learning_rate": 1.687584998221708e-06, + "loss": 0.8974252343177795, + "step": 2568 + }, + { + "epoch": 0.5919354838709677, + "grad_norm": 0.8073269492940187, + "learning_rate": 1.687308314484216e-06, + "loss": 0.8487393856048584, + "step": 2569 + }, + { + "epoch": 0.5921658986175116, + "grad_norm": 0.8310515688854938, + "learning_rate": 1.6870315309848318e-06, + "loss": 0.8356295824050903, + "step": 2570 + }, + { + "epoch": 0.5923963133640553, + "grad_norm": 0.9033360313158958, + "learning_rate": 1.6867546477637307e-06, + "loss": 0.8180248737335205, + "step": 2571 + }, + { + "epoch": 0.5926267281105991, + "grad_norm": 0.6950974205275126, + "learning_rate": 1.6864776648611013e-06, + "loss": 0.8456830978393555, + "step": 2572 + }, + { + "epoch": 0.5928571428571429, + "grad_norm": 0.9039181033590447, + "learning_rate": 1.6862005823171476e-06, + "loss": 0.8378905057907104, + "step": 2573 + }, + { + "epoch": 0.5930875576036866, + "grad_norm": 0.835432630485808, + "learning_rate": 1.685923400172088e-06, + "loss": 0.8060408234596252, + "step": 2574 + }, + { + "epoch": 0.5933179723502304, + "grad_norm": 0.8354491785263655, + "learning_rate": 1.685646118466155e-06, + "loss": 0.7550709247589111, + "step": 2575 + }, + { + "epoch": 0.5935483870967742, + "grad_norm": 0.805260271869055, + "learning_rate": 1.6853687372395955e-06, + "loss": 0.8475208282470703, + "step": 2576 + }, + { + "epoch": 0.593778801843318, + "grad_norm": 1.0626255995304192, + "learning_rate": 1.6850912565326709e-06, + "loss": 0.8681533336639404, + "step": 2577 + }, + { + "epoch": 0.5940092165898617, + "grad_norm": 0.9000714044087056, + "learning_rate": 1.6848136763856573e-06, + "loss": 0.7756578922271729, + "step": 2578 + }, + { + "epoch": 0.5942396313364056, + "grad_norm": 1.1163759985623336, + "learning_rate": 1.6845359968388456e-06, + "loss": 0.8910564184188843, + "step": 2579 + }, + { + "epoch": 0.5944700460829493, + "grad_norm": 0.7484768523036672, + "learning_rate": 1.6842582179325397e-06, + "loss": 0.7293382883071899, + "step": 2580 + }, + { + "epoch": 0.5947004608294931, + "grad_norm": 0.8208214849988605, + "learning_rate": 1.6839803397070597e-06, + "loss": 0.8497427105903625, + "step": 2581 + }, + { + "epoch": 0.5949308755760369, + "grad_norm": 0.9124854441462121, + "learning_rate": 1.6837023622027386e-06, + "loss": 0.800891637802124, + "step": 2582 + }, + { + "epoch": 0.5951612903225807, + "grad_norm": 0.8887114325795745, + "learning_rate": 1.683424285459925e-06, + "loss": 0.889703631401062, + "step": 2583 + }, + { + "epoch": 0.5953917050691244, + "grad_norm": 0.83139201735135, + "learning_rate": 1.6831461095189808e-06, + "loss": 0.7500913143157959, + "step": 2584 + }, + { + "epoch": 0.5956221198156681, + "grad_norm": 0.8260167845821169, + "learning_rate": 1.6828678344202834e-06, + "loss": 0.8575263023376465, + "step": 2585 + }, + { + "epoch": 0.595852534562212, + "grad_norm": 0.8796083393133354, + "learning_rate": 1.6825894602042238e-06, + "loss": 0.7754372358322144, + "step": 2586 + }, + { + "epoch": 0.5960829493087557, + "grad_norm": 1.0529816523070568, + "learning_rate": 1.6823109869112074e-06, + "loss": 0.8861502408981323, + "step": 2587 + }, + { + "epoch": 0.5963133640552996, + "grad_norm": 0.7738036894554111, + "learning_rate": 1.6820324145816548e-06, + "loss": 0.725920557975769, + "step": 2588 + }, + { + "epoch": 0.5965437788018433, + "grad_norm": 0.7887605961214393, + "learning_rate": 1.6817537432559998e-06, + "loss": 0.6195499897003174, + "step": 2589 + }, + { + "epoch": 0.5967741935483871, + "grad_norm": 0.8405918169035362, + "learning_rate": 1.6814749729746918e-06, + "loss": 0.8757472038269043, + "step": 2590 + }, + { + "epoch": 0.5970046082949308, + "grad_norm": 0.8710168774832879, + "learning_rate": 1.6811961037781934e-06, + "loss": 0.8024059534072876, + "step": 2591 + }, + { + "epoch": 0.5972350230414747, + "grad_norm": 1.1763814328442668, + "learning_rate": 1.6809171357069825e-06, + "loss": 0.8397082090377808, + "step": 2592 + }, + { + "epoch": 0.5974654377880184, + "grad_norm": 0.8163820389720032, + "learning_rate": 1.6806380688015507e-06, + "loss": 0.7693872451782227, + "step": 2593 + }, + { + "epoch": 0.5976958525345623, + "grad_norm": 0.7668441612993817, + "learning_rate": 1.6803589031024043e-06, + "loss": 0.7918043732643127, + "step": 2594 + }, + { + "epoch": 0.597926267281106, + "grad_norm": 0.7951277033960863, + "learning_rate": 1.680079638650064e-06, + "loss": 0.8046969175338745, + "step": 2595 + }, + { + "epoch": 0.5981566820276498, + "grad_norm": 0.9724191958452253, + "learning_rate": 1.6798002754850643e-06, + "loss": 0.7889789938926697, + "step": 2596 + }, + { + "epoch": 0.5983870967741935, + "grad_norm": 0.8356070849986357, + "learning_rate": 1.6795208136479543e-06, + "loss": 0.874780535697937, + "step": 2597 + }, + { + "epoch": 0.5986175115207373, + "grad_norm": 0.8380940855873632, + "learning_rate": 1.679241253179298e-06, + "loss": 0.8728631734848022, + "step": 2598 + }, + { + "epoch": 0.5988479262672811, + "grad_norm": 0.7909132896338992, + "learning_rate": 1.678961594119673e-06, + "loss": 0.5940345525741577, + "step": 2599 + }, + { + "epoch": 0.5990783410138248, + "grad_norm": 0.7873638428289793, + "learning_rate": 1.6786818365096712e-06, + "loss": 0.8524528741836548, + "step": 2600 + }, + { + "epoch": 0.5993087557603687, + "grad_norm": 1.2099119623298256, + "learning_rate": 1.6784019803899e-06, + "loss": 1.0738554000854492, + "step": 2601 + }, + { + "epoch": 0.5995391705069124, + "grad_norm": 0.9987206599474828, + "learning_rate": 1.6781220258009787e-06, + "loss": 0.9146362543106079, + "step": 2602 + }, + { + "epoch": 0.5997695852534562, + "grad_norm": 0.9546196333490053, + "learning_rate": 1.6778419727835434e-06, + "loss": 0.8846019506454468, + "step": 2603 + }, + { + "epoch": 0.6, + "grad_norm": 1.0356705992849526, + "learning_rate": 1.6775618213782427e-06, + "loss": 0.9564694166183472, + "step": 2604 + }, + { + "epoch": 0.6002304147465438, + "grad_norm": 0.8649265876220377, + "learning_rate": 1.6772815716257411e-06, + "loss": 0.7311475276947021, + "step": 2605 + }, + { + "epoch": 0.6004608294930875, + "grad_norm": 0.9996641063184493, + "learning_rate": 1.6770012235667157e-06, + "loss": 0.8198719024658203, + "step": 2606 + }, + { + "epoch": 0.6006912442396314, + "grad_norm": 0.8625199282325245, + "learning_rate": 1.676720777241859e-06, + "loss": 0.7667897939682007, + "step": 2607 + }, + { + "epoch": 0.6009216589861751, + "grad_norm": 0.8068998344787891, + "learning_rate": 1.6764402326918775e-06, + "loss": 0.8438166379928589, + "step": 2608 + }, + { + "epoch": 0.6011520737327188, + "grad_norm": 0.8540979807575545, + "learning_rate": 1.6761595899574913e-06, + "loss": 0.801039457321167, + "step": 2609 + }, + { + "epoch": 0.6013824884792627, + "grad_norm": 0.8234203241271092, + "learning_rate": 1.6758788490794362e-06, + "loss": 0.8063384294509888, + "step": 2610 + }, + { + "epoch": 0.6016129032258064, + "grad_norm": 0.6526013686548677, + "learning_rate": 1.6755980100984609e-06, + "loss": 0.7574378848075867, + "step": 2611 + }, + { + "epoch": 0.6018433179723502, + "grad_norm": 0.9515660687698646, + "learning_rate": 1.6753170730553285e-06, + "loss": 0.7640282511711121, + "step": 2612 + }, + { + "epoch": 0.602073732718894, + "grad_norm": 0.8028588885811085, + "learning_rate": 1.675036037990817e-06, + "loss": 0.8366582989692688, + "step": 2613 + }, + { + "epoch": 0.6023041474654378, + "grad_norm": 0.9790278189412774, + "learning_rate": 1.6747549049457184e-06, + "loss": 0.851488471031189, + "step": 2614 + }, + { + "epoch": 0.6025345622119815, + "grad_norm": 0.8888933014827352, + "learning_rate": 1.6744736739608385e-06, + "loss": 0.6821870803833008, + "step": 2615 + }, + { + "epoch": 0.6027649769585254, + "grad_norm": 0.9884428615602953, + "learning_rate": 1.6741923450769977e-06, + "loss": 0.9263452887535095, + "step": 2616 + }, + { + "epoch": 0.6029953917050691, + "grad_norm": 0.7660541738576696, + "learning_rate": 1.6739109183350303e-06, + "loss": 0.7471155524253845, + "step": 2617 + }, + { + "epoch": 0.603225806451613, + "grad_norm": 0.8463548916487829, + "learning_rate": 1.6736293937757858e-06, + "loss": 0.8859940767288208, + "step": 2618 + }, + { + "epoch": 0.6034562211981567, + "grad_norm": 0.7725702923302962, + "learning_rate": 1.673347771440126e-06, + "loss": 0.8078656792640686, + "step": 2619 + }, + { + "epoch": 0.6036866359447005, + "grad_norm": 0.8796637852565455, + "learning_rate": 1.673066051368929e-06, + "loss": 0.7663185596466064, + "step": 2620 + }, + { + "epoch": 0.6039170506912442, + "grad_norm": 0.7762146466532337, + "learning_rate": 1.6727842336030855e-06, + "loss": 0.7924770712852478, + "step": 2621 + }, + { + "epoch": 0.604147465437788, + "grad_norm": 0.6362525346897695, + "learning_rate": 1.672502318183501e-06, + "loss": 0.7781439423561096, + "step": 2622 + }, + { + "epoch": 0.6043778801843318, + "grad_norm": 0.7824821748809755, + "learning_rate": 1.6722203051510953e-06, + "loss": 0.9342260360717773, + "step": 2623 + }, + { + "epoch": 0.6046082949308755, + "grad_norm": 0.9113412146225311, + "learning_rate": 1.6719381945468024e-06, + "loss": 0.8589230179786682, + "step": 2624 + }, + { + "epoch": 0.6048387096774194, + "grad_norm": 0.9092021688294594, + "learning_rate": 1.67165598641157e-06, + "loss": 0.8692198991775513, + "step": 2625 + }, + { + "epoch": 0.6050691244239631, + "grad_norm": 0.9811252814075038, + "learning_rate": 1.6713736807863606e-06, + "loss": 0.9220771789550781, + "step": 2626 + }, + { + "epoch": 0.6052995391705069, + "grad_norm": 0.7869789442575379, + "learning_rate": 1.6710912777121497e-06, + "loss": 0.670639157295227, + "step": 2627 + }, + { + "epoch": 0.6055299539170507, + "grad_norm": 0.8458627233906328, + "learning_rate": 1.6708087772299287e-06, + "loss": 0.780914306640625, + "step": 2628 + }, + { + "epoch": 0.6057603686635945, + "grad_norm": 0.7718782555310939, + "learning_rate": 1.6705261793807014e-06, + "loss": 0.836430549621582, + "step": 2629 + }, + { + "epoch": 0.6059907834101382, + "grad_norm": 0.8965474432723056, + "learning_rate": 1.670243484205487e-06, + "loss": 0.84266197681427, + "step": 2630 + }, + { + "epoch": 0.6062211981566821, + "grad_norm": 0.8992013517980091, + "learning_rate": 1.6699606917453184e-06, + "loss": 0.9276752471923828, + "step": 2631 + }, + { + "epoch": 0.6064516129032258, + "grad_norm": 0.8740634897243095, + "learning_rate": 1.6696778020412418e-06, + "loss": 0.8319100141525269, + "step": 2632 + }, + { + "epoch": 0.6066820276497696, + "grad_norm": 0.9778851785690291, + "learning_rate": 1.669394815134319e-06, + "loss": 0.7511987686157227, + "step": 2633 + }, + { + "epoch": 0.6069124423963134, + "grad_norm": 0.9559089829828732, + "learning_rate": 1.6691117310656249e-06, + "loss": 0.7847566604614258, + "step": 2634 + }, + { + "epoch": 0.6071428571428571, + "grad_norm": 0.7352732117136743, + "learning_rate": 1.668828549876249e-06, + "loss": 0.8598428964614868, + "step": 2635 + }, + { + "epoch": 0.6073732718894009, + "grad_norm": 0.9632462301651329, + "learning_rate": 1.6685452716072942e-06, + "loss": 0.8676267266273499, + "step": 2636 + }, + { + "epoch": 0.6076036866359447, + "grad_norm": 0.9796050613045469, + "learning_rate": 1.6682618962998787e-06, + "loss": 0.8139858841896057, + "step": 2637 + }, + { + "epoch": 0.6078341013824885, + "grad_norm": 0.9214980939594923, + "learning_rate": 1.6679784239951334e-06, + "loss": 0.878848671913147, + "step": 2638 + }, + { + "epoch": 0.6080645161290322, + "grad_norm": 0.8942413316087445, + "learning_rate": 1.6676948547342038e-06, + "loss": 0.7094229459762573, + "step": 2639 + }, + { + "epoch": 0.6082949308755761, + "grad_norm": 0.7183954232108332, + "learning_rate": 1.6674111885582502e-06, + "loss": 0.7908186912536621, + "step": 2640 + }, + { + "epoch": 0.6085253456221198, + "grad_norm": 0.705517985038791, + "learning_rate": 1.6671274255084465e-06, + "loss": 0.7205992341041565, + "step": 2641 + }, + { + "epoch": 0.6087557603686636, + "grad_norm": 0.937951031991606, + "learning_rate": 1.6668435656259796e-06, + "loss": 0.8098955750465393, + "step": 2642 + }, + { + "epoch": 0.6089861751152074, + "grad_norm": 0.8047793122116887, + "learning_rate": 1.6665596089520522e-06, + "loss": 0.9344205856323242, + "step": 2643 + }, + { + "epoch": 0.6092165898617512, + "grad_norm": 0.73132257965357, + "learning_rate": 1.6662755555278798e-06, + "loss": 0.6149121522903442, + "step": 2644 + }, + { + "epoch": 0.6094470046082949, + "grad_norm": 1.1550816011183633, + "learning_rate": 1.6659914053946929e-06, + "loss": 0.790631115436554, + "step": 2645 + }, + { + "epoch": 0.6096774193548387, + "grad_norm": 0.9832349740984434, + "learning_rate": 1.6657071585937349e-06, + "loss": 0.7789372801780701, + "step": 2646 + }, + { + "epoch": 0.6099078341013825, + "grad_norm": 0.7425679816784971, + "learning_rate": 1.6654228151662641e-06, + "loss": 0.9119753837585449, + "step": 2647 + }, + { + "epoch": 0.6101382488479262, + "grad_norm": 1.0635804319271085, + "learning_rate": 1.6651383751535526e-06, + "loss": 0.827568769454956, + "step": 2648 + }, + { + "epoch": 0.6103686635944701, + "grad_norm": 0.9620609244203838, + "learning_rate": 1.6648538385968865e-06, + "loss": 0.8862377405166626, + "step": 2649 + }, + { + "epoch": 0.6105990783410138, + "grad_norm": 0.7954209003880245, + "learning_rate": 1.6645692055375658e-06, + "loss": 0.7765665054321289, + "step": 2650 + }, + { + "epoch": 0.6108294930875576, + "grad_norm": 0.7698374340240739, + "learning_rate": 1.6642844760169048e-06, + "loss": 0.7673745155334473, + "step": 2651 + }, + { + "epoch": 0.6110599078341014, + "grad_norm": 1.051257553540871, + "learning_rate": 1.6639996500762313e-06, + "loss": 0.8539090752601624, + "step": 2652 + }, + { + "epoch": 0.6112903225806452, + "grad_norm": 0.8676017636407886, + "learning_rate": 1.663714727756888e-06, + "loss": 0.9146299362182617, + "step": 2653 + }, + { + "epoch": 0.6115207373271889, + "grad_norm": 0.9802646170879412, + "learning_rate": 1.6634297091002304e-06, + "loss": 0.6720675230026245, + "step": 2654 + }, + { + "epoch": 0.6117511520737328, + "grad_norm": 0.9963804792413621, + "learning_rate": 1.6631445941476287e-06, + "loss": 0.876419186592102, + "step": 2655 + }, + { + "epoch": 0.6119815668202765, + "grad_norm": 0.8251901500966289, + "learning_rate": 1.6628593829404673e-06, + "loss": 0.781826376914978, + "step": 2656 + }, + { + "epoch": 0.6122119815668203, + "grad_norm": 1.0156308960299383, + "learning_rate": 1.662574075520144e-06, + "loss": 0.8700725436210632, + "step": 2657 + }, + { + "epoch": 0.6124423963133641, + "grad_norm": 0.8730333366815507, + "learning_rate": 1.6622886719280703e-06, + "loss": 0.7927212715148926, + "step": 2658 + }, + { + "epoch": 0.6126728110599078, + "grad_norm": 0.9472958125063492, + "learning_rate": 1.6620031722056732e-06, + "loss": 0.8402982354164124, + "step": 2659 + }, + { + "epoch": 0.6129032258064516, + "grad_norm": 0.9246784332742947, + "learning_rate": 1.6617175763943916e-06, + "loss": 0.844031572341919, + "step": 2660 + }, + { + "epoch": 0.6131336405529954, + "grad_norm": 1.1749754124811849, + "learning_rate": 1.66143188453568e-06, + "loss": 0.7927590608596802, + "step": 2661 + }, + { + "epoch": 0.6133640552995392, + "grad_norm": 0.7562363270320578, + "learning_rate": 1.6611460966710057e-06, + "loss": 0.6881238222122192, + "step": 2662 + }, + { + "epoch": 0.6135944700460829, + "grad_norm": 0.7503304726479316, + "learning_rate": 1.6608602128418512e-06, + "loss": 0.8782250881195068, + "step": 2663 + }, + { + "epoch": 0.6138248847926268, + "grad_norm": 0.764429872232153, + "learning_rate": 1.6605742330897112e-06, + "loss": 0.810072124004364, + "step": 2664 + }, + { + "epoch": 0.6140552995391705, + "grad_norm": 0.7959070796498304, + "learning_rate": 1.660288157456096e-06, + "loss": 0.9278649091720581, + "step": 2665 + }, + { + "epoch": 0.6142857142857143, + "grad_norm": 0.8518702716538695, + "learning_rate": 1.6600019859825287e-06, + "loss": 0.7821990251541138, + "step": 2666 + }, + { + "epoch": 0.614516129032258, + "grad_norm": 0.8000150810917545, + "learning_rate": 1.6597157187105474e-06, + "loss": 0.7945138216018677, + "step": 2667 + }, + { + "epoch": 0.6147465437788019, + "grad_norm": 0.9158855636867193, + "learning_rate": 1.659429355681702e-06, + "loss": 0.7796168327331543, + "step": 2668 + }, + { + "epoch": 0.6149769585253456, + "grad_norm": 0.8778480996767207, + "learning_rate": 1.659142896937559e-06, + "loss": 0.8412867784500122, + "step": 2669 + }, + { + "epoch": 0.6152073732718893, + "grad_norm": 0.8776586025383009, + "learning_rate": 1.6588563425196976e-06, + "loss": 0.8507891893386841, + "step": 2670 + }, + { + "epoch": 0.6154377880184332, + "grad_norm": 0.7470530836348557, + "learning_rate": 1.6585696924697097e-06, + "loss": 0.7538737654685974, + "step": 2671 + }, + { + "epoch": 0.6156682027649769, + "grad_norm": 0.7938343055651664, + "learning_rate": 1.6582829468292027e-06, + "loss": 0.7241994142532349, + "step": 2672 + }, + { + "epoch": 0.6158986175115208, + "grad_norm": 0.7740707689038899, + "learning_rate": 1.6579961056397979e-06, + "loss": 0.8282276391983032, + "step": 2673 + }, + { + "epoch": 0.6161290322580645, + "grad_norm": 0.9834275785675608, + "learning_rate": 1.657709168943129e-06, + "loss": 0.7823094725608826, + "step": 2674 + }, + { + "epoch": 0.6163594470046083, + "grad_norm": 0.7814560466718257, + "learning_rate": 1.6574221367808452e-06, + "loss": 0.7682117819786072, + "step": 2675 + }, + { + "epoch": 0.616589861751152, + "grad_norm": 0.791790817396352, + "learning_rate": 1.6571350091946084e-06, + "loss": 0.7483188509941101, + "step": 2676 + }, + { + "epoch": 0.6168202764976959, + "grad_norm": 0.7904062559480196, + "learning_rate": 1.656847786226095e-06, + "loss": 0.8244579434394836, + "step": 2677 + }, + { + "epoch": 0.6170506912442396, + "grad_norm": 0.935192090002093, + "learning_rate": 1.6565604679169951e-06, + "loss": 0.9741685390472412, + "step": 2678 + }, + { + "epoch": 0.6172811059907835, + "grad_norm": 1.2715516239943523, + "learning_rate": 1.6562730543090122e-06, + "loss": 1.0004706382751465, + "step": 2679 + }, + { + "epoch": 0.6175115207373272, + "grad_norm": 0.7382412100690486, + "learning_rate": 1.6559855454438644e-06, + "loss": 0.6897011399269104, + "step": 2680 + }, + { + "epoch": 0.617741935483871, + "grad_norm": 0.6330897297720288, + "learning_rate": 1.6556979413632833e-06, + "loss": 0.7250478267669678, + "step": 2681 + }, + { + "epoch": 0.6179723502304147, + "grad_norm": 0.9717515360338855, + "learning_rate": 1.6554102421090137e-06, + "loss": 0.850714385509491, + "step": 2682 + }, + { + "epoch": 0.6182027649769585, + "grad_norm": 0.917367886199939, + "learning_rate": 1.6551224477228152e-06, + "loss": 0.8389794230461121, + "step": 2683 + }, + { + "epoch": 0.6184331797235023, + "grad_norm": 0.8244704754842406, + "learning_rate": 1.6548345582464608e-06, + "loss": 0.8004277944564819, + "step": 2684 + }, + { + "epoch": 0.618663594470046, + "grad_norm": 0.9438052955461359, + "learning_rate": 1.654546573721737e-06, + "loss": 0.8439298868179321, + "step": 2685 + }, + { + "epoch": 0.6188940092165899, + "grad_norm": 0.9506767899718855, + "learning_rate": 1.6542584941904448e-06, + "loss": 0.7715939283370972, + "step": 2686 + }, + { + "epoch": 0.6191244239631336, + "grad_norm": 0.7277066195828455, + "learning_rate": 1.6539703196943982e-06, + "loss": 0.8521275520324707, + "step": 2687 + }, + { + "epoch": 0.6193548387096774, + "grad_norm": 0.9502964788805838, + "learning_rate": 1.6536820502754249e-06, + "loss": 0.8773370981216431, + "step": 2688 + }, + { + "epoch": 0.6195852534562212, + "grad_norm": 0.8896877670997408, + "learning_rate": 1.653393685975368e-06, + "loss": 0.7613356113433838, + "step": 2689 + }, + { + "epoch": 0.619815668202765, + "grad_norm": 0.7872525626089157, + "learning_rate": 1.6531052268360823e-06, + "loss": 0.7534692287445068, + "step": 2690 + }, + { + "epoch": 0.6200460829493087, + "grad_norm": 0.8888603991720845, + "learning_rate": 1.652816672899438e-06, + "loss": 0.861242413520813, + "step": 2691 + }, + { + "epoch": 0.6202764976958526, + "grad_norm": 1.0955455640383855, + "learning_rate": 1.652528024207317e-06, + "loss": 0.9778954982757568, + "step": 2692 + }, + { + "epoch": 0.6205069124423963, + "grad_norm": 0.8389124431813023, + "learning_rate": 1.6522392808016176e-06, + "loss": 0.7874879240989685, + "step": 2693 + }, + { + "epoch": 0.6207373271889401, + "grad_norm": 1.038077147354541, + "learning_rate": 1.6519504427242503e-06, + "loss": 0.8306739330291748, + "step": 2694 + }, + { + "epoch": 0.6209677419354839, + "grad_norm": 0.890554970207788, + "learning_rate": 1.651661510017139e-06, + "loss": 0.7617331743240356, + "step": 2695 + }, + { + "epoch": 0.6211981566820276, + "grad_norm": 0.8325839299854928, + "learning_rate": 1.6513724827222223e-06, + "loss": 0.8912776708602905, + "step": 2696 + }, + { + "epoch": 0.6214285714285714, + "grad_norm": 0.9626202232237234, + "learning_rate": 1.6510833608814519e-06, + "loss": 0.832025945186615, + "step": 2697 + }, + { + "epoch": 0.6216589861751152, + "grad_norm": 0.8573045739455887, + "learning_rate": 1.6507941445367934e-06, + "loss": 0.7391358613967896, + "step": 2698 + }, + { + "epoch": 0.621889400921659, + "grad_norm": 0.8417803604945624, + "learning_rate": 1.6505048337302267e-06, + "loss": 0.7968891263008118, + "step": 2699 + }, + { + "epoch": 0.6221198156682027, + "grad_norm": 0.7943584636642551, + "learning_rate": 1.6502154285037446e-06, + "loss": 0.8268226981163025, + "step": 2700 + }, + { + "epoch": 0.6223502304147466, + "grad_norm": 0.8943748659016423, + "learning_rate": 1.6499259288993536e-06, + "loss": 0.8727509379386902, + "step": 2701 + }, + { + "epoch": 0.6225806451612903, + "grad_norm": 0.9781149876582625, + "learning_rate": 1.6496363349590746e-06, + "loss": 0.8419584035873413, + "step": 2702 + }, + { + "epoch": 0.6228110599078341, + "grad_norm": 0.9222004845701074, + "learning_rate": 1.6493466467249415e-06, + "loss": 0.7753620743751526, + "step": 2703 + }, + { + "epoch": 0.6230414746543779, + "grad_norm": 0.8188505837862442, + "learning_rate": 1.6490568642390022e-06, + "loss": 0.7735302448272705, + "step": 2704 + }, + { + "epoch": 0.6232718894009217, + "grad_norm": 0.892742684163995, + "learning_rate": 1.6487669875433183e-06, + "loss": 0.8730747699737549, + "step": 2705 + }, + { + "epoch": 0.6235023041474654, + "grad_norm": 1.081206789540213, + "learning_rate": 1.648477016679965e-06, + "loss": 1.026259183883667, + "step": 2706 + }, + { + "epoch": 0.6237327188940092, + "grad_norm": 1.1700615414540931, + "learning_rate": 1.6481869516910314e-06, + "loss": 1.0710067749023438, + "step": 2707 + }, + { + "epoch": 0.623963133640553, + "grad_norm": 0.8750649396873535, + "learning_rate": 1.6478967926186196e-06, + "loss": 0.8451842069625854, + "step": 2708 + }, + { + "epoch": 0.6241935483870967, + "grad_norm": 1.0025312740636694, + "learning_rate": 1.6476065395048463e-06, + "loss": 0.8114550113677979, + "step": 2709 + }, + { + "epoch": 0.6244239631336406, + "grad_norm": 0.9543936745980088, + "learning_rate": 1.6473161923918408e-06, + "loss": 0.9158897399902344, + "step": 2710 + }, + { + "epoch": 0.6246543778801843, + "grad_norm": 0.9073320322912862, + "learning_rate": 1.6470257513217471e-06, + "loss": 0.8455985188484192, + "step": 2711 + }, + { + "epoch": 0.6248847926267281, + "grad_norm": 0.9409835862192949, + "learning_rate": 1.6467352163367224e-06, + "loss": 0.7869806885719299, + "step": 2712 + }, + { + "epoch": 0.6251152073732719, + "grad_norm": 0.9720046165998673, + "learning_rate": 1.6464445874789369e-06, + "loss": 0.7813467979431152, + "step": 2713 + }, + { + "epoch": 0.6253456221198157, + "grad_norm": 0.9253768349404401, + "learning_rate": 1.646153864790575e-06, + "loss": 0.7607834339141846, + "step": 2714 + }, + { + "epoch": 0.6255760368663594, + "grad_norm": 0.7655542834849622, + "learning_rate": 1.6458630483138354e-06, + "loss": 0.6316394209861755, + "step": 2715 + }, + { + "epoch": 0.6258064516129033, + "grad_norm": 1.0037920503955002, + "learning_rate": 1.6455721380909293e-06, + "loss": 0.8613089323043823, + "step": 2716 + }, + { + "epoch": 0.626036866359447, + "grad_norm": 0.900314234710346, + "learning_rate": 1.6452811341640823e-06, + "loss": 0.8521597385406494, + "step": 2717 + }, + { + "epoch": 0.6262672811059908, + "grad_norm": 0.863334614503053, + "learning_rate": 1.6449900365755322e-06, + "loss": 0.7649816870689392, + "step": 2718 + }, + { + "epoch": 0.6264976958525346, + "grad_norm": 0.7921235061169694, + "learning_rate": 1.6446988453675327e-06, + "loss": 0.669215738773346, + "step": 2719 + }, + { + "epoch": 0.6267281105990783, + "grad_norm": 1.0085146323707468, + "learning_rate": 1.6444075605823491e-06, + "loss": 0.7795897722244263, + "step": 2720 + }, + { + "epoch": 0.6269585253456221, + "grad_norm": 1.0985096718321175, + "learning_rate": 1.6441161822622612e-06, + "loss": 0.9773029088973999, + "step": 2721 + }, + { + "epoch": 0.6271889400921659, + "grad_norm": 0.88062279724108, + "learning_rate": 1.6438247104495622e-06, + "loss": 0.8313496112823486, + "step": 2722 + }, + { + "epoch": 0.6274193548387097, + "grad_norm": 0.8741823244787398, + "learning_rate": 1.6435331451865589e-06, + "loss": 0.822803258895874, + "step": 2723 + }, + { + "epoch": 0.6276497695852534, + "grad_norm": 1.1191623839144935, + "learning_rate": 1.643241486515571e-06, + "loss": 0.8933405876159668, + "step": 2724 + }, + { + "epoch": 0.6278801843317973, + "grad_norm": 0.8721873626078817, + "learning_rate": 1.6429497344789334e-06, + "loss": 0.865382194519043, + "step": 2725 + }, + { + "epoch": 0.628110599078341, + "grad_norm": 0.6623424743433429, + "learning_rate": 1.6426578891189929e-06, + "loss": 0.5955609679222107, + "step": 2726 + }, + { + "epoch": 0.6283410138248848, + "grad_norm": 0.9379654908769754, + "learning_rate": 1.6423659504781102e-06, + "loss": 0.7832648754119873, + "step": 2727 + }, + { + "epoch": 0.6285714285714286, + "grad_norm": 0.9904172136436726, + "learning_rate": 1.6420739185986606e-06, + "loss": 0.8939651250839233, + "step": 2728 + }, + { + "epoch": 0.6288018433179724, + "grad_norm": 0.8754504203733118, + "learning_rate": 1.6417817935230316e-06, + "loss": 0.7950553894042969, + "step": 2729 + }, + { + "epoch": 0.6290322580645161, + "grad_norm": 0.7473547756110924, + "learning_rate": 1.6414895752936247e-06, + "loss": 0.7011410593986511, + "step": 2730 + }, + { + "epoch": 0.6292626728110599, + "grad_norm": 0.8298073820867625, + "learning_rate": 1.6411972639528553e-06, + "loss": 0.8745814561843872, + "step": 2731 + }, + { + "epoch": 0.6294930875576037, + "grad_norm": 0.9643129286331958, + "learning_rate": 1.640904859543152e-06, + "loss": 0.9487906694412231, + "step": 2732 + }, + { + "epoch": 0.6297235023041474, + "grad_norm": 1.0003996457820634, + "learning_rate": 1.6406123621069565e-06, + "loss": 0.8493598103523254, + "step": 2733 + }, + { + "epoch": 0.6299539170506913, + "grad_norm": 0.7043952970778223, + "learning_rate": 1.640319771686725e-06, + "loss": 0.8176105618476868, + "step": 2734 + }, + { + "epoch": 0.630184331797235, + "grad_norm": 1.1365398207749948, + "learning_rate": 1.640027088324926e-06, + "loss": 0.8331952691078186, + "step": 2735 + }, + { + "epoch": 0.6304147465437788, + "grad_norm": 0.9152153352251905, + "learning_rate": 1.6397343120640428e-06, + "loss": 0.7507727146148682, + "step": 2736 + }, + { + "epoch": 0.6306451612903226, + "grad_norm": 0.8498087936716523, + "learning_rate": 1.6394414429465707e-06, + "loss": 0.7681083679199219, + "step": 2737 + }, + { + "epoch": 0.6308755760368664, + "grad_norm": 1.0207970870125542, + "learning_rate": 1.6391484810150197e-06, + "loss": 0.86592036485672, + "step": 2738 + }, + { + "epoch": 0.6311059907834101, + "grad_norm": 0.7893726077346048, + "learning_rate": 1.6388554263119133e-06, + "loss": 0.6561422348022461, + "step": 2739 + }, + { + "epoch": 0.631336405529954, + "grad_norm": 0.8691518888981297, + "learning_rate": 1.6385622788797871e-06, + "loss": 1.0149214267730713, + "step": 2740 + }, + { + "epoch": 0.6315668202764977, + "grad_norm": 3.1459869291369578, + "learning_rate": 1.6382690387611912e-06, + "loss": 0.8542313575744629, + "step": 2741 + }, + { + "epoch": 0.6317972350230415, + "grad_norm": 0.8459688860048273, + "learning_rate": 1.6379757059986898e-06, + "loss": 0.8561190366744995, + "step": 2742 + }, + { + "epoch": 0.6320276497695853, + "grad_norm": 0.8945733601522768, + "learning_rate": 1.6376822806348591e-06, + "loss": 0.7487457990646362, + "step": 2743 + }, + { + "epoch": 0.632258064516129, + "grad_norm": 0.7710656021686645, + "learning_rate": 1.6373887627122894e-06, + "loss": 0.6169087886810303, + "step": 2744 + }, + { + "epoch": 0.6324884792626728, + "grad_norm": 0.9363459151732765, + "learning_rate": 1.6370951522735848e-06, + "loss": 0.8384301662445068, + "step": 2745 + }, + { + "epoch": 0.6327188940092165, + "grad_norm": 0.8816116065345285, + "learning_rate": 1.636801449361362e-06, + "loss": 0.8009958267211914, + "step": 2746 + }, + { + "epoch": 0.6329493087557604, + "grad_norm": 0.7782605199549586, + "learning_rate": 1.6365076540182518e-06, + "loss": 0.7277840375900269, + "step": 2747 + }, + { + "epoch": 0.6331797235023041, + "grad_norm": 0.8629211607674182, + "learning_rate": 1.6362137662868988e-06, + "loss": 0.7994974255561829, + "step": 2748 + }, + { + "epoch": 0.633410138248848, + "grad_norm": 0.9972871876044257, + "learning_rate": 1.6359197862099592e-06, + "loss": 0.9940546751022339, + "step": 2749 + }, + { + "epoch": 0.6336405529953917, + "grad_norm": 0.7083636808435892, + "learning_rate": 1.6356257138301048e-06, + "loss": 0.776983916759491, + "step": 2750 + }, + { + "epoch": 0.6338709677419355, + "grad_norm": 1.0813287689618403, + "learning_rate": 1.6353315491900194e-06, + "loss": 0.8218704462051392, + "step": 2751 + }, + { + "epoch": 0.6341013824884792, + "grad_norm": 0.9285197745822434, + "learning_rate": 1.635037292332401e-06, + "loss": 0.8437784910202026, + "step": 2752 + }, + { + "epoch": 0.6343317972350231, + "grad_norm": 0.7951039096878332, + "learning_rate": 1.63474294329996e-06, + "loss": 0.7774004340171814, + "step": 2753 + }, + { + "epoch": 0.6345622119815668, + "grad_norm": 0.7998446978982631, + "learning_rate": 1.634448502135421e-06, + "loss": 0.8480523824691772, + "step": 2754 + }, + { + "epoch": 0.6347926267281107, + "grad_norm": 0.8710356721404071, + "learning_rate": 1.634153968881522e-06, + "loss": 0.838944673538208, + "step": 2755 + }, + { + "epoch": 0.6350230414746544, + "grad_norm": 0.9609360504840417, + "learning_rate": 1.633859343581014e-06, + "loss": 0.7989159822463989, + "step": 2756 + }, + { + "epoch": 0.6352534562211981, + "grad_norm": 0.8906618388597183, + "learning_rate": 1.6335646262766612e-06, + "loss": 0.8122522234916687, + "step": 2757 + }, + { + "epoch": 0.635483870967742, + "grad_norm": 1.0306905026592958, + "learning_rate": 1.6332698170112418e-06, + "loss": 0.7472352981567383, + "step": 2758 + }, + { + "epoch": 0.6357142857142857, + "grad_norm": 0.7470082329854858, + "learning_rate": 1.6329749158275466e-06, + "loss": 0.7160866260528564, + "step": 2759 + }, + { + "epoch": 0.6359447004608295, + "grad_norm": 0.9276359862380839, + "learning_rate": 1.6326799227683803e-06, + "loss": 0.850339412689209, + "step": 2760 + }, + { + "epoch": 0.6361751152073732, + "grad_norm": 0.8334408182150722, + "learning_rate": 1.632384837876561e-06, + "loss": 0.7683566808700562, + "step": 2761 + }, + { + "epoch": 0.6364055299539171, + "grad_norm": 1.0070287688728312, + "learning_rate": 1.6320896611949197e-06, + "loss": 0.820326030254364, + "step": 2762 + }, + { + "epoch": 0.6366359447004608, + "grad_norm": 0.9088399606663712, + "learning_rate": 1.6317943927663005e-06, + "loss": 0.9319206476211548, + "step": 2763 + }, + { + "epoch": 0.6368663594470046, + "grad_norm": 0.854101738795234, + "learning_rate": 1.6314990326335619e-06, + "loss": 0.8473616242408752, + "step": 2764 + }, + { + "epoch": 0.6370967741935484, + "grad_norm": 0.9083270544798837, + "learning_rate": 1.6312035808395746e-06, + "loss": 0.7515239715576172, + "step": 2765 + }, + { + "epoch": 0.6373271889400922, + "grad_norm": 0.9691327918436982, + "learning_rate": 1.630908037427223e-06, + "loss": 0.8780150413513184, + "step": 2766 + }, + { + "epoch": 0.6375576036866359, + "grad_norm": 0.8183908015853972, + "learning_rate": 1.6306124024394051e-06, + "loss": 0.7502909898757935, + "step": 2767 + }, + { + "epoch": 0.6377880184331797, + "grad_norm": 1.0244030314506845, + "learning_rate": 1.630316675919032e-06, + "loss": 0.8440920114517212, + "step": 2768 + }, + { + "epoch": 0.6380184331797235, + "grad_norm": 0.9479398820781787, + "learning_rate": 1.6300208579090275e-06, + "loss": 0.7769831418991089, + "step": 2769 + }, + { + "epoch": 0.6382488479262672, + "grad_norm": 0.7616107153752498, + "learning_rate": 1.6297249484523297e-06, + "loss": 0.6217764616012573, + "step": 2770 + }, + { + "epoch": 0.6384792626728111, + "grad_norm": 0.7961962297717475, + "learning_rate": 1.6294289475918891e-06, + "loss": 0.8726013898849487, + "step": 2771 + }, + { + "epoch": 0.6387096774193548, + "grad_norm": 0.9993347618775529, + "learning_rate": 1.6291328553706702e-06, + "loss": 0.9624546766281128, + "step": 2772 + }, + { + "epoch": 0.6389400921658986, + "grad_norm": 0.9073330627878557, + "learning_rate": 1.62883667183165e-06, + "loss": 0.733322024345398, + "step": 2773 + }, + { + "epoch": 0.6391705069124424, + "grad_norm": 0.828990327728417, + "learning_rate": 1.6285403970178197e-06, + "loss": 0.7944040298461914, + "step": 2774 + }, + { + "epoch": 0.6394009216589862, + "grad_norm": 0.945508092850191, + "learning_rate": 1.6282440309721825e-06, + "loss": 0.8006964921951294, + "step": 2775 + }, + { + "epoch": 0.6396313364055299, + "grad_norm": 0.8235251563991838, + "learning_rate": 1.6279475737377562e-06, + "loss": 0.8226393461227417, + "step": 2776 + }, + { + "epoch": 0.6398617511520738, + "grad_norm": 0.9205648176506509, + "learning_rate": 1.6276510253575707e-06, + "loss": 0.8216049671173096, + "step": 2777 + }, + { + "epoch": 0.6400921658986175, + "grad_norm": 1.2879339929003093, + "learning_rate": 1.6273543858746698e-06, + "loss": 0.9556760191917419, + "step": 2778 + }, + { + "epoch": 0.6403225806451613, + "grad_norm": 1.226309717633737, + "learning_rate": 1.6270576553321103e-06, + "loss": 0.9736160039901733, + "step": 2779 + }, + { + "epoch": 0.6405529953917051, + "grad_norm": 0.7107959971647043, + "learning_rate": 1.6267608337729622e-06, + "loss": 0.6930527687072754, + "step": 2780 + }, + { + "epoch": 0.6407834101382488, + "grad_norm": 0.8158686811134676, + "learning_rate": 1.6264639212403089e-06, + "loss": 0.8047456741333008, + "step": 2781 + }, + { + "epoch": 0.6410138248847926, + "grad_norm": 0.8454524938044947, + "learning_rate": 1.6261669177772465e-06, + "loss": 0.7278450727462769, + "step": 2782 + }, + { + "epoch": 0.6412442396313364, + "grad_norm": 0.8520417006771478, + "learning_rate": 1.6258698234268852e-06, + "loss": 0.7768574357032776, + "step": 2783 + }, + { + "epoch": 0.6414746543778802, + "grad_norm": 1.0890287289964238, + "learning_rate": 1.6255726382323475e-06, + "loss": 0.7621645331382751, + "step": 2784 + }, + { + "epoch": 0.6417050691244239, + "grad_norm": 0.7437513689171984, + "learning_rate": 1.6252753622367695e-06, + "loss": 0.7566754221916199, + "step": 2785 + }, + { + "epoch": 0.6419354838709678, + "grad_norm": 0.8832427803322862, + "learning_rate": 1.6249779954833005e-06, + "loss": 0.7609840631484985, + "step": 2786 + }, + { + "epoch": 0.6421658986175115, + "grad_norm": 0.7482883809435998, + "learning_rate": 1.6246805380151028e-06, + "loss": 0.7360000610351562, + "step": 2787 + }, + { + "epoch": 0.6423963133640553, + "grad_norm": 1.1130271498528226, + "learning_rate": 1.624382989875352e-06, + "loss": 0.7951081395149231, + "step": 2788 + }, + { + "epoch": 0.6426267281105991, + "grad_norm": 0.7939855049580037, + "learning_rate": 1.6240853511072367e-06, + "loss": 0.7273311614990234, + "step": 2789 + }, + { + "epoch": 0.6428571428571429, + "grad_norm": 1.0416971384804878, + "learning_rate": 1.6237876217539588e-06, + "loss": 0.9270737171173096, + "step": 2790 + }, + { + "epoch": 0.6430875576036866, + "grad_norm": 0.97801359210753, + "learning_rate": 1.6234898018587336e-06, + "loss": 0.7624385356903076, + "step": 2791 + }, + { + "epoch": 0.6433179723502304, + "grad_norm": 0.8529799225121792, + "learning_rate": 1.6231918914647889e-06, + "loss": 0.8266719579696655, + "step": 2792 + }, + { + "epoch": 0.6435483870967742, + "grad_norm": 0.6435153338840431, + "learning_rate": 1.6228938906153663e-06, + "loss": 0.7606902122497559, + "step": 2793 + }, + { + "epoch": 0.6437788018433179, + "grad_norm": 1.022572162531227, + "learning_rate": 1.6225957993537197e-06, + "loss": 0.8239191174507141, + "step": 2794 + }, + { + "epoch": 0.6440092165898618, + "grad_norm": 0.8871272102711673, + "learning_rate": 1.6222976177231174e-06, + "loss": 0.8313608169555664, + "step": 2795 + }, + { + "epoch": 0.6442396313364055, + "grad_norm": 0.7541910127898682, + "learning_rate": 1.6219993457668396e-06, + "loss": 0.7725037932395935, + "step": 2796 + }, + { + "epoch": 0.6444700460829493, + "grad_norm": 0.8887584465014293, + "learning_rate": 1.6217009835281802e-06, + "loss": 0.8791182041168213, + "step": 2797 + }, + { + "epoch": 0.6447004608294931, + "grad_norm": 0.9285171614449231, + "learning_rate": 1.621402531050446e-06, + "loss": 0.7157453298568726, + "step": 2798 + }, + { + "epoch": 0.6449308755760369, + "grad_norm": 0.9675001114911925, + "learning_rate": 1.621103988376957e-06, + "loss": 0.8248307704925537, + "step": 2799 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8114025469253138, + "learning_rate": 1.6208053555510467e-06, + "loss": 0.7094661593437195, + "step": 2800 + }, + { + "epoch": 0.6453917050691245, + "grad_norm": 0.997320269594231, + "learning_rate": 1.6205066326160605e-06, + "loss": 0.9130781888961792, + "step": 2801 + }, + { + "epoch": 0.6456221198156682, + "grad_norm": 0.8555561883924394, + "learning_rate": 1.620207819615358e-06, + "loss": 0.7140541076660156, + "step": 2802 + }, + { + "epoch": 0.645852534562212, + "grad_norm": 0.8223075667705522, + "learning_rate": 1.6199089165923116e-06, + "loss": 0.8638602495193481, + "step": 2803 + }, + { + "epoch": 0.6460829493087558, + "grad_norm": 0.8487880176317714, + "learning_rate": 1.6196099235903068e-06, + "loss": 0.9055536389350891, + "step": 2804 + }, + { + "epoch": 0.6463133640552995, + "grad_norm": 0.9356547902583738, + "learning_rate": 1.6193108406527416e-06, + "loss": 0.7694590091705322, + "step": 2805 + }, + { + "epoch": 0.6465437788018433, + "grad_norm": 0.9047595380936525, + "learning_rate": 1.619011667823028e-06, + "loss": 0.7512019872665405, + "step": 2806 + }, + { + "epoch": 0.646774193548387, + "grad_norm": 0.8406537006369587, + "learning_rate": 1.6187124051445903e-06, + "loss": 0.6362565159797668, + "step": 2807 + }, + { + "epoch": 0.6470046082949309, + "grad_norm": 1.328031327807814, + "learning_rate": 1.6184130526608656e-06, + "loss": 0.885259747505188, + "step": 2808 + }, + { + "epoch": 0.6472350230414746, + "grad_norm": 0.9445009081248091, + "learning_rate": 1.6181136104153054e-06, + "loss": 0.7868754863739014, + "step": 2809 + }, + { + "epoch": 0.6474654377880185, + "grad_norm": 0.901923102146858, + "learning_rate": 1.6178140784513729e-06, + "loss": 0.889660120010376, + "step": 2810 + }, + { + "epoch": 0.6476958525345622, + "grad_norm": 0.7380215273328754, + "learning_rate": 1.6175144568125444e-06, + "loss": 0.8460343480110168, + "step": 2811 + }, + { + "epoch": 0.647926267281106, + "grad_norm": 0.9963582050847237, + "learning_rate": 1.6172147455423105e-06, + "loss": 0.8729731440544128, + "step": 2812 + }, + { + "epoch": 0.6481566820276498, + "grad_norm": 0.9500689129739934, + "learning_rate": 1.616914944684173e-06, + "loss": 0.7937173843383789, + "step": 2813 + }, + { + "epoch": 0.6483870967741936, + "grad_norm": 1.068299419221943, + "learning_rate": 1.6166150542816483e-06, + "loss": 0.8764641284942627, + "step": 2814 + }, + { + "epoch": 0.6486175115207373, + "grad_norm": 0.8942547003902331, + "learning_rate": 1.6163150743782645e-06, + "loss": 0.8078420758247375, + "step": 2815 + }, + { + "epoch": 0.6488479262672812, + "grad_norm": 0.9410598977678883, + "learning_rate": 1.6160150050175636e-06, + "loss": 0.9124993085861206, + "step": 2816 + }, + { + "epoch": 0.6490783410138249, + "grad_norm": 0.8852573714623596, + "learning_rate": 1.6157148462431003e-06, + "loss": 0.9584136009216309, + "step": 2817 + }, + { + "epoch": 0.6493087557603686, + "grad_norm": 1.0833527157774228, + "learning_rate": 1.6154145980984422e-06, + "loss": 0.8404672145843506, + "step": 2818 + }, + { + "epoch": 0.6495391705069125, + "grad_norm": 0.9498348014278839, + "learning_rate": 1.6151142606271695e-06, + "loss": 0.7928001880645752, + "step": 2819 + }, + { + "epoch": 0.6497695852534562, + "grad_norm": 0.8444903444994009, + "learning_rate": 1.6148138338728766e-06, + "loss": 0.7877479791641235, + "step": 2820 + }, + { + "epoch": 0.65, + "grad_norm": 0.814898961059689, + "learning_rate": 1.6145133178791695e-06, + "loss": 0.9502429366111755, + "step": 2821 + }, + { + "epoch": 0.6502304147465438, + "grad_norm": 0.791549779828082, + "learning_rate": 1.6142127126896679e-06, + "loss": 0.7866412401199341, + "step": 2822 + }, + { + "epoch": 0.6504608294930876, + "grad_norm": 0.7841896313928699, + "learning_rate": 1.613912018348004e-06, + "loss": 0.8315345644950867, + "step": 2823 + }, + { + "epoch": 0.6506912442396313, + "grad_norm": 0.6841019539216254, + "learning_rate": 1.6136112348978236e-06, + "loss": 0.9718044400215149, + "step": 2824 + }, + { + "epoch": 0.6509216589861752, + "grad_norm": 0.6502753552916141, + "learning_rate": 1.6133103623827843e-06, + "loss": 0.5874941349029541, + "step": 2825 + }, + { + "epoch": 0.6511520737327189, + "grad_norm": 0.8954999916723304, + "learning_rate": 1.613009400846558e-06, + "loss": 0.9498391151428223, + "step": 2826 + }, + { + "epoch": 0.6513824884792627, + "grad_norm": 0.9527387242959447, + "learning_rate": 1.612708350332829e-06, + "loss": 0.858715295791626, + "step": 2827 + }, + { + "epoch": 0.6516129032258065, + "grad_norm": 0.7771583744459308, + "learning_rate": 1.6124072108852938e-06, + "loss": 0.8618113994598389, + "step": 2828 + }, + { + "epoch": 0.6518433179723502, + "grad_norm": 0.7504136233680345, + "learning_rate": 1.6121059825476628e-06, + "loss": 0.8024446964263916, + "step": 2829 + }, + { + "epoch": 0.652073732718894, + "grad_norm": 0.8461077162414828, + "learning_rate": 1.6118046653636586e-06, + "loss": 0.8021122813224792, + "step": 2830 + }, + { + "epoch": 0.6523041474654377, + "grad_norm": 0.8330044091738112, + "learning_rate": 1.6115032593770176e-06, + "loss": 0.8092107772827148, + "step": 2831 + }, + { + "epoch": 0.6525345622119816, + "grad_norm": 0.8480183578387018, + "learning_rate": 1.6112017646314872e-06, + "loss": 0.9842641353607178, + "step": 2832 + }, + { + "epoch": 0.6527649769585253, + "grad_norm": 0.8051494817524167, + "learning_rate": 1.6109001811708305e-06, + "loss": 0.744353175163269, + "step": 2833 + }, + { + "epoch": 0.6529953917050692, + "grad_norm": 1.0610555371871784, + "learning_rate": 1.6105985090388209e-06, + "loss": 0.7089616060256958, + "step": 2834 + }, + { + "epoch": 0.6532258064516129, + "grad_norm": 0.9119028582239228, + "learning_rate": 1.610296748279246e-06, + "loss": 0.9043736457824707, + "step": 2835 + }, + { + "epoch": 0.6534562211981567, + "grad_norm": 1.0078987757698072, + "learning_rate": 1.6099948989359061e-06, + "loss": 0.9170948266983032, + "step": 2836 + }, + { + "epoch": 0.6536866359447004, + "grad_norm": 0.9289963097672949, + "learning_rate": 1.6096929610526145e-06, + "loss": 0.8275802135467529, + "step": 2837 + }, + { + "epoch": 0.6539170506912443, + "grad_norm": 0.9146670757237039, + "learning_rate": 1.6093909346731965e-06, + "loss": 0.9180251955986023, + "step": 2838 + }, + { + "epoch": 0.654147465437788, + "grad_norm": 0.708269208459363, + "learning_rate": 1.6090888198414908e-06, + "loss": 0.8041235208511353, + "step": 2839 + }, + { + "epoch": 0.6543778801843319, + "grad_norm": 0.9431191202102605, + "learning_rate": 1.6087866166013492e-06, + "loss": 0.7833176851272583, + "step": 2840 + }, + { + "epoch": 0.6546082949308756, + "grad_norm": 0.8680924352570318, + "learning_rate": 1.6084843249966364e-06, + "loss": 0.838886022567749, + "step": 2841 + }, + { + "epoch": 0.6548387096774193, + "grad_norm": 0.8317233103954151, + "learning_rate": 1.6081819450712293e-06, + "loss": 0.837687611579895, + "step": 2842 + }, + { + "epoch": 0.6550691244239631, + "grad_norm": 0.8737630969117387, + "learning_rate": 1.607879476869018e-06, + "loss": 0.6572843790054321, + "step": 2843 + }, + { + "epoch": 0.6552995391705069, + "grad_norm": 0.8513917948170456, + "learning_rate": 1.6075769204339053e-06, + "loss": 0.7698653936386108, + "step": 2844 + }, + { + "epoch": 0.6555299539170507, + "grad_norm": 0.9469558820500475, + "learning_rate": 1.607274275809807e-06, + "loss": 0.8639169335365295, + "step": 2845 + }, + { + "epoch": 0.6557603686635944, + "grad_norm": 0.8250799867539951, + "learning_rate": 1.6069715430406517e-06, + "loss": 0.837492823600769, + "step": 2846 + }, + { + "epoch": 0.6559907834101383, + "grad_norm": 0.9277000604833184, + "learning_rate": 1.6066687221703803e-06, + "loss": 0.8824087381362915, + "step": 2847 + }, + { + "epoch": 0.656221198156682, + "grad_norm": 0.9304701724719217, + "learning_rate": 1.6063658132429468e-06, + "loss": 0.8161731958389282, + "step": 2848 + }, + { + "epoch": 0.6564516129032258, + "grad_norm": 0.7988044282931124, + "learning_rate": 1.6060628163023183e-06, + "loss": 0.8365877270698547, + "step": 2849 + }, + { + "epoch": 0.6566820276497696, + "grad_norm": 0.8477393490951164, + "learning_rate": 1.6057597313924745e-06, + "loss": 0.877829909324646, + "step": 2850 + }, + { + "epoch": 0.6569124423963134, + "grad_norm": 0.857078285622655, + "learning_rate": 1.6054565585574075e-06, + "loss": 0.756903886795044, + "step": 2851 + }, + { + "epoch": 0.6571428571428571, + "grad_norm": 1.0124401818225557, + "learning_rate": 1.6051532978411223e-06, + "loss": 0.7777276039123535, + "step": 2852 + }, + { + "epoch": 0.6573732718894009, + "grad_norm": 0.9464152715401636, + "learning_rate": 1.6048499492876375e-06, + "loss": 0.9191532135009766, + "step": 2853 + }, + { + "epoch": 0.6576036866359447, + "grad_norm": 0.7885787618366824, + "learning_rate": 1.6045465129409829e-06, + "loss": 0.7693309783935547, + "step": 2854 + }, + { + "epoch": 0.6578341013824884, + "grad_norm": 0.8787314035574895, + "learning_rate": 1.6042429888452024e-06, + "loss": 0.7865023612976074, + "step": 2855 + }, + { + "epoch": 0.6580645161290323, + "grad_norm": 0.8588996745183644, + "learning_rate": 1.6039393770443521e-06, + "loss": 0.844336748123169, + "step": 2856 + }, + { + "epoch": 0.658294930875576, + "grad_norm": 0.9455502994869639, + "learning_rate": 1.6036356775825009e-06, + "loss": 0.9590705633163452, + "step": 2857 + }, + { + "epoch": 0.6585253456221198, + "grad_norm": 0.904582718768817, + "learning_rate": 1.6033318905037297e-06, + "loss": 0.8687748312950134, + "step": 2858 + }, + { + "epoch": 0.6587557603686636, + "grad_norm": 0.8848681311153475, + "learning_rate": 1.6030280158521336e-06, + "loss": 0.8669745922088623, + "step": 2859 + }, + { + "epoch": 0.6589861751152074, + "grad_norm": 0.8829211466390271, + "learning_rate": 1.6027240536718191e-06, + "loss": 0.6929436922073364, + "step": 2860 + }, + { + "epoch": 0.6592165898617511, + "grad_norm": 0.9047325967091919, + "learning_rate": 1.6024200040069065e-06, + "loss": 0.6965433359146118, + "step": 2861 + }, + { + "epoch": 0.659447004608295, + "grad_norm": 0.9743729570848424, + "learning_rate": 1.6021158669015273e-06, + "loss": 0.780353307723999, + "step": 2862 + }, + { + "epoch": 0.6596774193548387, + "grad_norm": 0.7726382879850381, + "learning_rate": 1.6018116423998277e-06, + "loss": 0.685762882232666, + "step": 2863 + }, + { + "epoch": 0.6599078341013825, + "grad_norm": 0.8607619933867399, + "learning_rate": 1.6015073305459646e-06, + "loss": 0.8249918222427368, + "step": 2864 + }, + { + "epoch": 0.6601382488479263, + "grad_norm": 0.7388237148259402, + "learning_rate": 1.6012029313841086e-06, + "loss": 0.7327184677124023, + "step": 2865 + }, + { + "epoch": 0.66036866359447, + "grad_norm": 0.9554378042614118, + "learning_rate": 1.6008984449584433e-06, + "loss": 0.7785891890525818, + "step": 2866 + }, + { + "epoch": 0.6605990783410138, + "grad_norm": 0.7196967379779726, + "learning_rate": 1.600593871313164e-06, + "loss": 0.7307751178741455, + "step": 2867 + }, + { + "epoch": 0.6608294930875576, + "grad_norm": 1.2601680054093507, + "learning_rate": 1.6002892104924796e-06, + "loss": 0.8802257180213928, + "step": 2868 + }, + { + "epoch": 0.6610599078341014, + "grad_norm": 1.0302753711943056, + "learning_rate": 1.5999844625406106e-06, + "loss": 0.8699140548706055, + "step": 2869 + }, + { + "epoch": 0.6612903225806451, + "grad_norm": 0.8146336951608913, + "learning_rate": 1.5996796275017914e-06, + "loss": 0.6453604102134705, + "step": 2870 + }, + { + "epoch": 0.661520737327189, + "grad_norm": 0.807532897551279, + "learning_rate": 1.5993747054202682e-06, + "loss": 0.7319324016571045, + "step": 2871 + }, + { + "epoch": 0.6617511520737327, + "grad_norm": 0.9337023535064233, + "learning_rate": 1.5990696963402998e-06, + "loss": 0.8357574343681335, + "step": 2872 + }, + { + "epoch": 0.6619815668202765, + "grad_norm": 0.854915024221744, + "learning_rate": 1.5987646003061581e-06, + "loss": 0.7647984027862549, + "step": 2873 + }, + { + "epoch": 0.6622119815668203, + "grad_norm": 1.0099884737934117, + "learning_rate": 1.5984594173621274e-06, + "loss": 0.8542075753211975, + "step": 2874 + }, + { + "epoch": 0.6624423963133641, + "grad_norm": 0.9685596460194386, + "learning_rate": 1.5981541475525044e-06, + "loss": 0.7689328193664551, + "step": 2875 + }, + { + "epoch": 0.6626728110599078, + "grad_norm": 0.8183777315007433, + "learning_rate": 1.5978487909215987e-06, + "loss": 0.7459174990653992, + "step": 2876 + }, + { + "epoch": 0.6629032258064517, + "grad_norm": 0.8697380019030229, + "learning_rate": 1.5975433475137329e-06, + "loss": 0.8268495202064514, + "step": 2877 + }, + { + "epoch": 0.6631336405529954, + "grad_norm": 0.9013422410425754, + "learning_rate": 1.5972378173732406e-06, + "loss": 0.8254266977310181, + "step": 2878 + }, + { + "epoch": 0.6633640552995391, + "grad_norm": 1.0427681980244552, + "learning_rate": 1.59693220054447e-06, + "loss": 0.8552727103233337, + "step": 2879 + }, + { + "epoch": 0.663594470046083, + "grad_norm": 0.7469699255899254, + "learning_rate": 1.596626497071781e-06, + "loss": 0.7196269035339355, + "step": 2880 + }, + { + "epoch": 0.6638248847926267, + "grad_norm": 0.9146202447996906, + "learning_rate": 1.5963207069995455e-06, + "loss": 0.815540075302124, + "step": 2881 + }, + { + "epoch": 0.6640552995391705, + "grad_norm": 0.8585411055523222, + "learning_rate": 1.596014830372149e-06, + "loss": 0.8040128350257874, + "step": 2882 + }, + { + "epoch": 0.6642857142857143, + "grad_norm": 0.8592608746136836, + "learning_rate": 1.5957088672339887e-06, + "loss": 0.7990812659263611, + "step": 2883 + }, + { + "epoch": 0.6645161290322581, + "grad_norm": 0.9139395957334936, + "learning_rate": 1.5954028176294746e-06, + "loss": 0.956179141998291, + "step": 2884 + }, + { + "epoch": 0.6647465437788018, + "grad_norm": 0.9544806325504157, + "learning_rate": 1.5950966816030304e-06, + "loss": 0.7730144262313843, + "step": 2885 + }, + { + "epoch": 0.6649769585253457, + "grad_norm": 1.0230957824823068, + "learning_rate": 1.5947904591990904e-06, + "loss": 0.902834415435791, + "step": 2886 + }, + { + "epoch": 0.6652073732718894, + "grad_norm": 0.8987169052425068, + "learning_rate": 1.5944841504621027e-06, + "loss": 0.7234599590301514, + "step": 2887 + }, + { + "epoch": 0.6654377880184332, + "grad_norm": 0.9849005395145788, + "learning_rate": 1.5941777554365271e-06, + "loss": 1.0267843008041382, + "step": 2888 + }, + { + "epoch": 0.665668202764977, + "grad_norm": 1.1615941669691254, + "learning_rate": 1.5938712741668376e-06, + "loss": 0.7431002855300903, + "step": 2889 + }, + { + "epoch": 0.6658986175115207, + "grad_norm": 0.8013605201375282, + "learning_rate": 1.5935647066975185e-06, + "loss": 0.7843111753463745, + "step": 2890 + }, + { + "epoch": 0.6661290322580645, + "grad_norm": 0.9498522711625995, + "learning_rate": 1.593258053073068e-06, + "loss": 0.8775256872177124, + "step": 2891 + }, + { + "epoch": 0.6663594470046083, + "grad_norm": 0.8363878343517416, + "learning_rate": 1.5929513133379966e-06, + "loss": 0.7861695289611816, + "step": 2892 + }, + { + "epoch": 0.6665898617511521, + "grad_norm": 1.1446598361432248, + "learning_rate": 1.5926444875368267e-06, + "loss": 0.8721977472305298, + "step": 2893 + }, + { + "epoch": 0.6668202764976958, + "grad_norm": 0.7591669830135314, + "learning_rate": 1.5923375757140941e-06, + "loss": 0.648263692855835, + "step": 2894 + }, + { + "epoch": 0.6670506912442397, + "grad_norm": 0.8984763952333247, + "learning_rate": 1.592030577914347e-06, + "loss": 0.8334729075431824, + "step": 2895 + }, + { + "epoch": 0.6672811059907834, + "grad_norm": 0.7757586607492352, + "learning_rate": 1.591723494182145e-06, + "loss": 0.6105949878692627, + "step": 2896 + }, + { + "epoch": 0.6675115207373272, + "grad_norm": 0.8562379620561761, + "learning_rate": 1.5914163245620608e-06, + "loss": 0.7895448207855225, + "step": 2897 + }, + { + "epoch": 0.667741935483871, + "grad_norm": 0.9487051467126763, + "learning_rate": 1.5911090690986805e-06, + "loss": 0.8728576302528381, + "step": 2898 + }, + { + "epoch": 0.6679723502304148, + "grad_norm": 0.7480056751597441, + "learning_rate": 1.590801727836601e-06, + "loss": 0.7637856006622314, + "step": 2899 + }, + { + "epoch": 0.6682027649769585, + "grad_norm": 1.0125939986027075, + "learning_rate": 1.590494300820433e-06, + "loss": 0.8988397717475891, + "step": 2900 + }, + { + "epoch": 0.6684331797235024, + "grad_norm": 0.9324485554010499, + "learning_rate": 1.590186788094799e-06, + "loss": 0.7486827373504639, + "step": 2901 + }, + { + "epoch": 0.6686635944700461, + "grad_norm": 0.7629631437151, + "learning_rate": 1.589879189704334e-06, + "loss": 0.8212865591049194, + "step": 2902 + }, + { + "epoch": 0.6688940092165898, + "grad_norm": 0.7640149838894683, + "learning_rate": 1.5895715056936853e-06, + "loss": 0.7421284914016724, + "step": 2903 + }, + { + "epoch": 0.6691244239631337, + "grad_norm": 0.8407199034997399, + "learning_rate": 1.5892637361075132e-06, + "loss": 0.8721676468849182, + "step": 2904 + }, + { + "epoch": 0.6693548387096774, + "grad_norm": 0.9214400782360851, + "learning_rate": 1.58895588099049e-06, + "loss": 0.7265836000442505, + "step": 2905 + }, + { + "epoch": 0.6695852534562212, + "grad_norm": 0.959235173078028, + "learning_rate": 1.5886479403873e-06, + "loss": 0.863615870475769, + "step": 2906 + }, + { + "epoch": 0.669815668202765, + "grad_norm": 0.788219849900096, + "learning_rate": 1.588339914342641e-06, + "loss": 0.8362177610397339, + "step": 2907 + }, + { + "epoch": 0.6700460829493088, + "grad_norm": 1.0142262876785297, + "learning_rate": 1.5880318029012223e-06, + "loss": 0.9076892137527466, + "step": 2908 + }, + { + "epoch": 0.6702764976958525, + "grad_norm": 0.957653217332238, + "learning_rate": 1.5877236061077658e-06, + "loss": 0.9149065017700195, + "step": 2909 + }, + { + "epoch": 0.6705069124423964, + "grad_norm": 0.8820705070600866, + "learning_rate": 1.5874153240070062e-06, + "loss": 0.7761013507843018, + "step": 2910 + }, + { + "epoch": 0.6707373271889401, + "grad_norm": 1.049261864076062, + "learning_rate": 1.5871069566436894e-06, + "loss": 0.8671830892562866, + "step": 2911 + }, + { + "epoch": 0.6709677419354839, + "grad_norm": 0.9461120142941367, + "learning_rate": 1.5867985040625755e-06, + "loss": 0.9433870315551758, + "step": 2912 + }, + { + "epoch": 0.6711981566820276, + "grad_norm": 0.934114103387592, + "learning_rate": 1.5864899663084352e-06, + "loss": 0.8009352684020996, + "step": 2913 + }, + { + "epoch": 0.6714285714285714, + "grad_norm": 0.9285902098427739, + "learning_rate": 1.5861813434260528e-06, + "loss": 0.6813808083534241, + "step": 2914 + }, + { + "epoch": 0.6716589861751152, + "grad_norm": 0.7891360814530397, + "learning_rate": 1.5858726354602248e-06, + "loss": 0.712783932685852, + "step": 2915 + }, + { + "epoch": 0.6718894009216589, + "grad_norm": 0.9971879600214522, + "learning_rate": 1.5855638424557588e-06, + "loss": 0.7871056795120239, + "step": 2916 + }, + { + "epoch": 0.6721198156682028, + "grad_norm": 0.9551471269364743, + "learning_rate": 1.5852549644574766e-06, + "loss": 0.8590981960296631, + "step": 2917 + }, + { + "epoch": 0.6723502304147465, + "grad_norm": 0.9338373296128487, + "learning_rate": 1.584946001510211e-06, + "loss": 0.7952913641929626, + "step": 2918 + }, + { + "epoch": 0.6725806451612903, + "grad_norm": 1.0716689971646949, + "learning_rate": 1.5846369536588078e-06, + "loss": 0.8567384481430054, + "step": 2919 + }, + { + "epoch": 0.6728110599078341, + "grad_norm": 1.0797852963412387, + "learning_rate": 1.5843278209481246e-06, + "loss": 0.859541654586792, + "step": 2920 + }, + { + "epoch": 0.6730414746543779, + "grad_norm": 1.1734504357127358, + "learning_rate": 1.5840186034230318e-06, + "loss": 0.7843801975250244, + "step": 2921 + }, + { + "epoch": 0.6732718894009216, + "grad_norm": 0.7736885985619673, + "learning_rate": 1.5837093011284118e-06, + "loss": 0.7448940277099609, + "step": 2922 + }, + { + "epoch": 0.6735023041474655, + "grad_norm": 1.0803788544256392, + "learning_rate": 1.5833999141091593e-06, + "loss": 0.9325242042541504, + "step": 2923 + }, + { + "epoch": 0.6737327188940092, + "grad_norm": 1.2302390941080075, + "learning_rate": 1.5830904424101816e-06, + "loss": 0.8005647659301758, + "step": 2924 + }, + { + "epoch": 0.673963133640553, + "grad_norm": 0.9271295903754758, + "learning_rate": 1.5827808860763984e-06, + "loss": 0.8897464275360107, + "step": 2925 + }, + { + "epoch": 0.6741935483870968, + "grad_norm": 1.0218758099034497, + "learning_rate": 1.5824712451527409e-06, + "loss": 0.8319039344787598, + "step": 2926 + }, + { + "epoch": 0.6744239631336405, + "grad_norm": 1.0734614103347653, + "learning_rate": 1.5821615196841533e-06, + "loss": 0.7638111114501953, + "step": 2927 + }, + { + "epoch": 0.6746543778801843, + "grad_norm": 0.8552316991076688, + "learning_rate": 1.581851709715592e-06, + "loss": 0.7617092132568359, + "step": 2928 + }, + { + "epoch": 0.6748847926267281, + "grad_norm": 1.0119419737078916, + "learning_rate": 1.581541815292025e-06, + "loss": 0.813319742679596, + "step": 2929 + }, + { + "epoch": 0.6751152073732719, + "grad_norm": 0.8324815306646182, + "learning_rate": 1.5812318364584334e-06, + "loss": 0.7495343089103699, + "step": 2930 + }, + { + "epoch": 0.6753456221198156, + "grad_norm": 1.0070331562925772, + "learning_rate": 1.5809217732598103e-06, + "loss": 0.9064745306968689, + "step": 2931 + }, + { + "epoch": 0.6755760368663595, + "grad_norm": 0.77529378116571, + "learning_rate": 1.580611625741161e-06, + "loss": 0.699098527431488, + "step": 2932 + }, + { + "epoch": 0.6758064516129032, + "grad_norm": 0.9525126023464006, + "learning_rate": 1.5803013939475025e-06, + "loss": 0.9168096780776978, + "step": 2933 + }, + { + "epoch": 0.676036866359447, + "grad_norm": 0.8145178437764095, + "learning_rate": 1.5799910779238652e-06, + "loss": 0.8848644495010376, + "step": 2934 + }, + { + "epoch": 0.6762672811059908, + "grad_norm": 0.8852934324704809, + "learning_rate": 1.5796806777152903e-06, + "loss": 0.7795228958129883, + "step": 2935 + }, + { + "epoch": 0.6764976958525346, + "grad_norm": 0.9901973226971541, + "learning_rate": 1.5793701933668327e-06, + "loss": 0.9287698268890381, + "step": 2936 + }, + { + "epoch": 0.6767281105990783, + "grad_norm": 0.9605403793187631, + "learning_rate": 1.5790596249235587e-06, + "loss": 0.8661396503448486, + "step": 2937 + }, + { + "epoch": 0.6769585253456222, + "grad_norm": 1.0073544692346657, + "learning_rate": 1.5787489724305464e-06, + "loss": 0.7544706463813782, + "step": 2938 + }, + { + "epoch": 0.6771889400921659, + "grad_norm": 1.350397583464208, + "learning_rate": 1.5784382359328872e-06, + "loss": 0.8613651990890503, + "step": 2939 + }, + { + "epoch": 0.6774193548387096, + "grad_norm": 1.0225856960398716, + "learning_rate": 1.5781274154756833e-06, + "loss": 0.8695065975189209, + "step": 2940 + }, + { + "epoch": 0.6776497695852535, + "grad_norm": 1.1450515007973723, + "learning_rate": 1.577816511104051e-06, + "loss": 0.9453287720680237, + "step": 2941 + }, + { + "epoch": 0.6778801843317972, + "grad_norm": 0.7720442193305806, + "learning_rate": 1.577505522863117e-06, + "loss": 0.8599261045455933, + "step": 2942 + }, + { + "epoch": 0.678110599078341, + "grad_norm": 0.8831442525084486, + "learning_rate": 1.5771944507980205e-06, + "loss": 0.8143391609191895, + "step": 2943 + }, + { + "epoch": 0.6783410138248848, + "grad_norm": 0.9328639928073722, + "learning_rate": 1.576883294953914e-06, + "loss": 0.9558438062667847, + "step": 2944 + }, + { + "epoch": 0.6785714285714286, + "grad_norm": 0.6484366074680237, + "learning_rate": 1.5765720553759605e-06, + "loss": 0.7348268628120422, + "step": 2945 + }, + { + "epoch": 0.6788018433179723, + "grad_norm": 1.0387482604326927, + "learning_rate": 1.5762607321093366e-06, + "loss": 0.9361155033111572, + "step": 2946 + }, + { + "epoch": 0.6790322580645162, + "grad_norm": 0.9855095789147831, + "learning_rate": 1.5759493251992303e-06, + "loss": 0.8094985485076904, + "step": 2947 + }, + { + "epoch": 0.6792626728110599, + "grad_norm": 1.631714554631539, + "learning_rate": 1.575637834690842e-06, + "loss": 0.8746658563613892, + "step": 2948 + }, + { + "epoch": 0.6794930875576037, + "grad_norm": 0.9249217331606766, + "learning_rate": 1.575326260629384e-06, + "loss": 0.7433050870895386, + "step": 2949 + }, + { + "epoch": 0.6797235023041475, + "grad_norm": 0.9856239464338491, + "learning_rate": 1.5750146030600808e-06, + "loss": 0.8621053695678711, + "step": 2950 + }, + { + "epoch": 0.6799539170506912, + "grad_norm": 0.9119478915395727, + "learning_rate": 1.5747028620281695e-06, + "loss": 0.7541971206665039, + "step": 2951 + }, + { + "epoch": 0.680184331797235, + "grad_norm": 1.0099311239329205, + "learning_rate": 1.5743910375788982e-06, + "loss": 0.9817987680435181, + "step": 2952 + }, + { + "epoch": 0.6804147465437788, + "grad_norm": 1.046074262522893, + "learning_rate": 1.5740791297575283e-06, + "loss": 0.7763534188270569, + "step": 2953 + }, + { + "epoch": 0.6806451612903226, + "grad_norm": 1.0303747349913415, + "learning_rate": 1.573767138609333e-06, + "loss": 0.7482337355613708, + "step": 2954 + }, + { + "epoch": 0.6808755760368663, + "grad_norm": 1.0308347032013807, + "learning_rate": 1.5734550641795967e-06, + "loss": 0.7352473735809326, + "step": 2955 + }, + { + "epoch": 0.6811059907834102, + "grad_norm": 0.9086715245515472, + "learning_rate": 1.573142906513617e-06, + "loss": 0.8657293319702148, + "step": 2956 + }, + { + "epoch": 0.6813364055299539, + "grad_norm": 0.9597438975913184, + "learning_rate": 1.5728306656567033e-06, + "loss": 0.8035376667976379, + "step": 2957 + }, + { + "epoch": 0.6815668202764977, + "grad_norm": 0.9481340627224691, + "learning_rate": 1.572518341654177e-06, + "loss": 0.8030140399932861, + "step": 2958 + }, + { + "epoch": 0.6817972350230415, + "grad_norm": 0.956950799259568, + "learning_rate": 1.5722059345513711e-06, + "loss": 0.797377347946167, + "step": 2959 + }, + { + "epoch": 0.6820276497695853, + "grad_norm": 0.7086079395333297, + "learning_rate": 1.5718934443936311e-06, + "loss": 0.7041053175926208, + "step": 2960 + }, + { + "epoch": 0.682258064516129, + "grad_norm": 1.0251660128790803, + "learning_rate": 1.571580871226315e-06, + "loss": 0.7911885976791382, + "step": 2961 + }, + { + "epoch": 0.6824884792626729, + "grad_norm": 0.8834527581303466, + "learning_rate": 1.5712682150947922e-06, + "loss": 0.7908599376678467, + "step": 2962 + }, + { + "epoch": 0.6827188940092166, + "grad_norm": 0.8159267525070817, + "learning_rate": 1.5709554760444442e-06, + "loss": 0.860281229019165, + "step": 2963 + }, + { + "epoch": 0.6829493087557603, + "grad_norm": 0.8226887233242035, + "learning_rate": 1.5706426541206645e-06, + "loss": 0.6987707018852234, + "step": 2964 + }, + { + "epoch": 0.6831797235023042, + "grad_norm": 0.8719992040747229, + "learning_rate": 1.5703297493688592e-06, + "loss": 0.7198495864868164, + "step": 2965 + }, + { + "epoch": 0.6834101382488479, + "grad_norm": 1.1775957395401402, + "learning_rate": 1.5700167618344455e-06, + "loss": 0.8232598304748535, + "step": 2966 + }, + { + "epoch": 0.6836405529953917, + "grad_norm": 0.8962037845514019, + "learning_rate": 1.569703691562854e-06, + "loss": 0.8425456285476685, + "step": 2967 + }, + { + "epoch": 0.6838709677419355, + "grad_norm": 0.8746880672166448, + "learning_rate": 1.5693905385995252e-06, + "loss": 0.7758797407150269, + "step": 2968 + }, + { + "epoch": 0.6841013824884793, + "grad_norm": 0.9739325658587258, + "learning_rate": 1.569077302989914e-06, + "loss": 0.7478910684585571, + "step": 2969 + }, + { + "epoch": 0.684331797235023, + "grad_norm": 0.88099670074057, + "learning_rate": 1.5687639847794854e-06, + "loss": 0.8274309635162354, + "step": 2970 + }, + { + "epoch": 0.6845622119815669, + "grad_norm": 0.9125307567181903, + "learning_rate": 1.5684505840137173e-06, + "loss": 0.6800183653831482, + "step": 2971 + }, + { + "epoch": 0.6847926267281106, + "grad_norm": 1.1416810893109246, + "learning_rate": 1.5681371007380996e-06, + "loss": 0.7768006324768066, + "step": 2972 + }, + { + "epoch": 0.6850230414746544, + "grad_norm": 0.8308804334079786, + "learning_rate": 1.5678235349981338e-06, + "loss": 0.7462732195854187, + "step": 2973 + }, + { + "epoch": 0.6852534562211982, + "grad_norm": 0.935725297382271, + "learning_rate": 1.5675098868393335e-06, + "loss": 0.8461781144142151, + "step": 2974 + }, + { + "epoch": 0.6854838709677419, + "grad_norm": 0.9717984846524689, + "learning_rate": 1.5671961563072244e-06, + "loss": 0.7968491911888123, + "step": 2975 + }, + { + "epoch": 0.6857142857142857, + "grad_norm": 0.9710985084042064, + "learning_rate": 1.5668823434473443e-06, + "loss": 0.805394172668457, + "step": 2976 + }, + { + "epoch": 0.6859447004608294, + "grad_norm": 0.9297793560483373, + "learning_rate": 1.5665684483052424e-06, + "loss": 0.7241736650466919, + "step": 2977 + }, + { + "epoch": 0.6861751152073733, + "grad_norm": 0.9673260038513803, + "learning_rate": 1.5662544709264801e-06, + "loss": 0.7345866560935974, + "step": 2978 + }, + { + "epoch": 0.686405529953917, + "grad_norm": 0.8604134561659843, + "learning_rate": 1.5659404113566312e-06, + "loss": 0.7605085372924805, + "step": 2979 + }, + { + "epoch": 0.6866359447004609, + "grad_norm": 0.9618303204830516, + "learning_rate": 1.5656262696412808e-06, + "loss": 0.8555188179016113, + "step": 2980 + }, + { + "epoch": 0.6868663594470046, + "grad_norm": 0.8604009092225049, + "learning_rate": 1.5653120458260261e-06, + "loss": 0.7139542698860168, + "step": 2981 + }, + { + "epoch": 0.6870967741935484, + "grad_norm": 0.9290410772154322, + "learning_rate": 1.564997739956476e-06, + "loss": 0.8676587343215942, + "step": 2982 + }, + { + "epoch": 0.6873271889400921, + "grad_norm": 0.9524807718966832, + "learning_rate": 1.5646833520782523e-06, + "loss": 0.8121025562286377, + "step": 2983 + }, + { + "epoch": 0.687557603686636, + "grad_norm": 0.7889521702672326, + "learning_rate": 1.5643688822369873e-06, + "loss": 0.7757136821746826, + "step": 2984 + }, + { + "epoch": 0.6877880184331797, + "grad_norm": 0.8884194014759353, + "learning_rate": 1.5640543304783264e-06, + "loss": 0.8357381820678711, + "step": 2985 + }, + { + "epoch": 0.6880184331797236, + "grad_norm": 0.9725078170053829, + "learning_rate": 1.563739696847926e-06, + "loss": 0.8635811805725098, + "step": 2986 + }, + { + "epoch": 0.6882488479262673, + "grad_norm": 0.9539959391598165, + "learning_rate": 1.563424981391455e-06, + "loss": 0.90900057554245, + "step": 2987 + }, + { + "epoch": 0.688479262672811, + "grad_norm": 1.056070683011334, + "learning_rate": 1.563110184154594e-06, + "loss": 0.9001314043998718, + "step": 2988 + }, + { + "epoch": 0.6887096774193548, + "grad_norm": 0.7893194308475292, + "learning_rate": 1.5627953051830353e-06, + "loss": 0.7482000589370728, + "step": 2989 + }, + { + "epoch": 0.6889400921658986, + "grad_norm": 1.0183435769639337, + "learning_rate": 1.5624803445224829e-06, + "loss": 0.8504235744476318, + "step": 2990 + }, + { + "epoch": 0.6891705069124424, + "grad_norm": 0.9687684393899343, + "learning_rate": 1.5621653022186526e-06, + "loss": 0.7887089252471924, + "step": 2991 + }, + { + "epoch": 0.6894009216589861, + "grad_norm": 0.9412995775666883, + "learning_rate": 1.5618501783172735e-06, + "loss": 0.8745719790458679, + "step": 2992 + }, + { + "epoch": 0.68963133640553, + "grad_norm": 0.8960957701589951, + "learning_rate": 1.5615349728640848e-06, + "loss": 0.8269633054733276, + "step": 2993 + }, + { + "epoch": 0.6898617511520737, + "grad_norm": 0.802430248071724, + "learning_rate": 1.5612196859048382e-06, + "loss": 0.7355072498321533, + "step": 2994 + }, + { + "epoch": 0.6900921658986175, + "grad_norm": 0.9768940563158048, + "learning_rate": 1.5609043174852966e-06, + "loss": 0.857653021812439, + "step": 2995 + }, + { + "epoch": 0.6903225806451613, + "grad_norm": 1.0766498115550724, + "learning_rate": 1.5605888676512365e-06, + "loss": 0.8575785160064697, + "step": 2996 + }, + { + "epoch": 0.6905529953917051, + "grad_norm": 0.8803208034747956, + "learning_rate": 1.560273336448444e-06, + "loss": 0.8631561994552612, + "step": 2997 + }, + { + "epoch": 0.6907834101382488, + "grad_norm": 1.0014936433552548, + "learning_rate": 1.5599577239227185e-06, + "loss": 0.7993800044059753, + "step": 2998 + }, + { + "epoch": 0.6910138248847926, + "grad_norm": 0.8990076202156756, + "learning_rate": 1.5596420301198707e-06, + "loss": 0.7961007356643677, + "step": 2999 + }, + { + "epoch": 0.6912442396313364, + "grad_norm": 1.0216355950582598, + "learning_rate": 1.5593262550857232e-06, + "loss": 0.7536421418190002, + "step": 3000 + }, + { + "epoch": 0.6914746543778801, + "grad_norm": 0.8348839196110558, + "learning_rate": 1.55901039886611e-06, + "loss": 0.70341956615448, + "step": 3001 + }, + { + "epoch": 0.691705069124424, + "grad_norm": 1.0093771985733984, + "learning_rate": 1.5586944615068776e-06, + "loss": 0.8152127265930176, + "step": 3002 + }, + { + "epoch": 0.6919354838709677, + "grad_norm": 0.9332692294841357, + "learning_rate": 1.5583784430538838e-06, + "loss": 0.6728770732879639, + "step": 3003 + }, + { + "epoch": 0.6921658986175115, + "grad_norm": 1.0871891474224546, + "learning_rate": 1.558062343552998e-06, + "loss": 0.8406884670257568, + "step": 3004 + }, + { + "epoch": 0.6923963133640553, + "grad_norm": 0.8920706269230131, + "learning_rate": 1.5577461630501018e-06, + "loss": 0.766754686832428, + "step": 3005 + }, + { + "epoch": 0.6926267281105991, + "grad_norm": 0.714004026253109, + "learning_rate": 1.5574299015910889e-06, + "loss": 0.7456642389297485, + "step": 3006 + }, + { + "epoch": 0.6928571428571428, + "grad_norm": 0.8290815943958627, + "learning_rate": 1.557113559221863e-06, + "loss": 0.7834097743034363, + "step": 3007 + }, + { + "epoch": 0.6930875576036867, + "grad_norm": 0.91346801287595, + "learning_rate": 1.556797135988342e-06, + "loss": 0.7425946593284607, + "step": 3008 + }, + { + "epoch": 0.6933179723502304, + "grad_norm": 1.0483330104966306, + "learning_rate": 1.5564806319364534e-06, + "loss": 0.7914093732833862, + "step": 3009 + }, + { + "epoch": 0.6935483870967742, + "grad_norm": 0.9665010461345012, + "learning_rate": 1.556164047112138e-06, + "loss": 0.819783091545105, + "step": 3010 + }, + { + "epoch": 0.693778801843318, + "grad_norm": 0.985903986481312, + "learning_rate": 1.5558473815613474e-06, + "loss": 0.7147302627563477, + "step": 3011 + }, + { + "epoch": 0.6940092165898617, + "grad_norm": 1.1240220664371217, + "learning_rate": 1.5555306353300452e-06, + "loss": 0.7247470617294312, + "step": 3012 + }, + { + "epoch": 0.6942396313364055, + "grad_norm": 1.2403633886338306, + "learning_rate": 1.5552138084642067e-06, + "loss": 0.8277294635772705, + "step": 3013 + }, + { + "epoch": 0.6944700460829493, + "grad_norm": 0.9054626931882043, + "learning_rate": 1.554896901009819e-06, + "loss": 0.8014394640922546, + "step": 3014 + }, + { + "epoch": 0.6947004608294931, + "grad_norm": 0.9274937399954835, + "learning_rate": 1.5545799130128808e-06, + "loss": 0.7468869686126709, + "step": 3015 + }, + { + "epoch": 0.6949308755760368, + "grad_norm": 0.8904964499744723, + "learning_rate": 1.554262844519402e-06, + "loss": 0.7854933142662048, + "step": 3016 + }, + { + "epoch": 0.6951612903225807, + "grad_norm": 0.9536718451900233, + "learning_rate": 1.5539456955754053e-06, + "loss": 0.8359543681144714, + "step": 3017 + }, + { + "epoch": 0.6953917050691244, + "grad_norm": 0.8313774511874621, + "learning_rate": 1.5536284662269243e-06, + "loss": 0.7767773866653442, + "step": 3018 + }, + { + "epoch": 0.6956221198156682, + "grad_norm": 0.7370790678700915, + "learning_rate": 1.5533111565200044e-06, + "loss": 0.8388162851333618, + "step": 3019 + }, + { + "epoch": 0.695852534562212, + "grad_norm": 0.9159856551917743, + "learning_rate": 1.5529937665007024e-06, + "loss": 0.7791208028793335, + "step": 3020 + }, + { + "epoch": 0.6960829493087558, + "grad_norm": 0.9740300384215894, + "learning_rate": 1.5526762962150875e-06, + "loss": 0.8662698864936829, + "step": 3021 + }, + { + "epoch": 0.6963133640552995, + "grad_norm": 0.7004253764922403, + "learning_rate": 1.5523587457092394e-06, + "loss": 0.737492024898529, + "step": 3022 + }, + { + "epoch": 0.6965437788018434, + "grad_norm": 1.0408775765092733, + "learning_rate": 1.552041115029251e-06, + "loss": 0.83610999584198, + "step": 3023 + }, + { + "epoch": 0.6967741935483871, + "grad_norm": 1.1134023704947162, + "learning_rate": 1.5517234042212254e-06, + "loss": 0.930977463722229, + "step": 3024 + }, + { + "epoch": 0.6970046082949308, + "grad_norm": 0.8756044667716456, + "learning_rate": 1.551405613331278e-06, + "loss": 0.7587058544158936, + "step": 3025 + }, + { + "epoch": 0.6972350230414747, + "grad_norm": 0.7720525053545241, + "learning_rate": 1.551087742405536e-06, + "loss": 0.7549247741699219, + "step": 3026 + }, + { + "epoch": 0.6974654377880184, + "grad_norm": 0.8108175030001162, + "learning_rate": 1.5507697914901376e-06, + "loss": 0.6906812787055969, + "step": 3027 + }, + { + "epoch": 0.6976958525345622, + "grad_norm": 0.7358502568670926, + "learning_rate": 1.5504517606312332e-06, + "loss": 0.7806124687194824, + "step": 3028 + }, + { + "epoch": 0.697926267281106, + "grad_norm": 0.8191496367359047, + "learning_rate": 1.5501336498749846e-06, + "loss": 0.8091036081314087, + "step": 3029 + }, + { + "epoch": 0.6981566820276498, + "grad_norm": 0.923718506351422, + "learning_rate": 1.5498154592675646e-06, + "loss": 0.721937894821167, + "step": 3030 + }, + { + "epoch": 0.6983870967741935, + "grad_norm": 0.729194360630959, + "learning_rate": 1.5494971888551587e-06, + "loss": 0.712378740310669, + "step": 3031 + }, + { + "epoch": 0.6986175115207374, + "grad_norm": 0.9809936276606201, + "learning_rate": 1.5491788386839635e-06, + "loss": 0.8106495141983032, + "step": 3032 + }, + { + "epoch": 0.6988479262672811, + "grad_norm": 1.0550994014291641, + "learning_rate": 1.5488604088001866e-06, + "loss": 0.7886521816253662, + "step": 3033 + }, + { + "epoch": 0.6990783410138249, + "grad_norm": 0.9413909460240358, + "learning_rate": 1.5485418992500479e-06, + "loss": 0.7483402490615845, + "step": 3034 + }, + { + "epoch": 0.6993087557603687, + "grad_norm": 0.9735513924670123, + "learning_rate": 1.5482233100797788e-06, + "loss": 0.6236725449562073, + "step": 3035 + }, + { + "epoch": 0.6995391705069124, + "grad_norm": 1.023064942988146, + "learning_rate": 1.5479046413356222e-06, + "loss": 0.9477910995483398, + "step": 3036 + }, + { + "epoch": 0.6997695852534562, + "grad_norm": 1.0993186685690193, + "learning_rate": 1.5475858930638322e-06, + "loss": 0.8921213746070862, + "step": 3037 + }, + { + "epoch": 0.7, + "grad_norm": 0.7179145673247356, + "learning_rate": 1.5472670653106744e-06, + "loss": 0.7460963726043701, + "step": 3038 + }, + { + "epoch": 0.7002304147465438, + "grad_norm": 0.8319225077693166, + "learning_rate": 1.5469481581224271e-06, + "loss": 0.6135849356651306, + "step": 3039 + }, + { + "epoch": 0.7004608294930875, + "grad_norm": 0.8739744675210649, + "learning_rate": 1.546629171545378e-06, + "loss": 0.8039313554763794, + "step": 3040 + }, + { + "epoch": 0.7006912442396314, + "grad_norm": 1.2210857419731846, + "learning_rate": 1.5463101056258289e-06, + "loss": 0.8751651048660278, + "step": 3041 + }, + { + "epoch": 0.7009216589861751, + "grad_norm": 0.9070575590392688, + "learning_rate": 1.545990960410091e-06, + "loss": 0.7600879669189453, + "step": 3042 + }, + { + "epoch": 0.7011520737327189, + "grad_norm": 0.9983949583794295, + "learning_rate": 1.545671735944488e-06, + "loss": 0.8118841648101807, + "step": 3043 + }, + { + "epoch": 0.7013824884792627, + "grad_norm": 0.7470799565000998, + "learning_rate": 1.5453524322753546e-06, + "loss": 0.7144184112548828, + "step": 3044 + }, + { + "epoch": 0.7016129032258065, + "grad_norm": 1.149288210915265, + "learning_rate": 1.545033049449038e-06, + "loss": 0.9730075001716614, + "step": 3045 + }, + { + "epoch": 0.7018433179723502, + "grad_norm": 0.9334735321523672, + "learning_rate": 1.5447135875118957e-06, + "loss": 0.6930910348892212, + "step": 3046 + }, + { + "epoch": 0.7020737327188941, + "grad_norm": 1.0190518922073715, + "learning_rate": 1.5443940465102973e-06, + "loss": 0.8517031669616699, + "step": 3047 + }, + { + "epoch": 0.7023041474654378, + "grad_norm": 0.9199109424213672, + "learning_rate": 1.5440744264906237e-06, + "loss": 0.7939779758453369, + "step": 3048 + }, + { + "epoch": 0.7025345622119815, + "grad_norm": 1.0310125567194028, + "learning_rate": 1.5437547274992672e-06, + "loss": 0.8946782350540161, + "step": 3049 + }, + { + "epoch": 0.7027649769585254, + "grad_norm": 1.1682685309372194, + "learning_rate": 1.543434949582632e-06, + "loss": 0.9273954033851624, + "step": 3050 + }, + { + "epoch": 0.7029953917050691, + "grad_norm": 0.8496559046178408, + "learning_rate": 1.5431150927871333e-06, + "loss": 0.7731457352638245, + "step": 3051 + }, + { + "epoch": 0.7032258064516129, + "grad_norm": 0.9900519408386056, + "learning_rate": 1.542795157159198e-06, + "loss": 0.7982608079910278, + "step": 3052 + }, + { + "epoch": 0.7034562211981567, + "grad_norm": 1.0252185126476046, + "learning_rate": 1.542475142745264e-06, + "loss": 0.8422989845275879, + "step": 3053 + }, + { + "epoch": 0.7036866359447005, + "grad_norm": 1.1364598749635721, + "learning_rate": 1.542155049591781e-06, + "loss": 0.8344876766204834, + "step": 3054 + }, + { + "epoch": 0.7039170506912442, + "grad_norm": 1.3240029855230715, + "learning_rate": 1.541834877745211e-06, + "loss": 0.8830629587173462, + "step": 3055 + }, + { + "epoch": 0.7041474654377881, + "grad_norm": 0.8841605120149971, + "learning_rate": 1.5415146272520247e-06, + "loss": 0.823864221572876, + "step": 3056 + }, + { + "epoch": 0.7043778801843318, + "grad_norm": 1.226256029650695, + "learning_rate": 1.5411942981587077e-06, + "loss": 0.8577016592025757, + "step": 3057 + }, + { + "epoch": 0.7046082949308756, + "grad_norm": 0.9938154526101401, + "learning_rate": 1.540873890511755e-06, + "loss": 0.7431750297546387, + "step": 3058 + }, + { + "epoch": 0.7048387096774194, + "grad_norm": 1.3100911793106818, + "learning_rate": 1.5405534043576729e-06, + "loss": 0.8219394683837891, + "step": 3059 + }, + { + "epoch": 0.7050691244239631, + "grad_norm": 0.8179546123014678, + "learning_rate": 1.5402328397429795e-06, + "loss": 0.706437349319458, + "step": 3060 + }, + { + "epoch": 0.7052995391705069, + "grad_norm": 0.9400567182130463, + "learning_rate": 1.5399121967142051e-06, + "loss": 0.8669443130493164, + "step": 3061 + }, + { + "epoch": 0.7055299539170506, + "grad_norm": 0.9808762608140087, + "learning_rate": 1.5395914753178897e-06, + "loss": 0.7995564937591553, + "step": 3062 + }, + { + "epoch": 0.7057603686635945, + "grad_norm": 1.0691077372052262, + "learning_rate": 1.5392706756005862e-06, + "loss": 0.7840889692306519, + "step": 3063 + }, + { + "epoch": 0.7059907834101382, + "grad_norm": 0.9593102373354429, + "learning_rate": 1.5389497976088582e-06, + "loss": 0.8231604695320129, + "step": 3064 + }, + { + "epoch": 0.706221198156682, + "grad_norm": 1.0423471516482703, + "learning_rate": 1.5386288413892801e-06, + "loss": 0.7821571826934814, + "step": 3065 + }, + { + "epoch": 0.7064516129032258, + "grad_norm": 0.9221304357539406, + "learning_rate": 1.538307806988439e-06, + "loss": 0.736830472946167, + "step": 3066 + }, + { + "epoch": 0.7066820276497696, + "grad_norm": 0.8124713959576904, + "learning_rate": 1.537986694452932e-06, + "loss": 0.7783113718032837, + "step": 3067 + }, + { + "epoch": 0.7069124423963133, + "grad_norm": 0.8679700879266566, + "learning_rate": 1.5376655038293692e-06, + "loss": 0.8000421524047852, + "step": 3068 + }, + { + "epoch": 0.7071428571428572, + "grad_norm": 0.8513728527683974, + "learning_rate": 1.5373442351643696e-06, + "loss": 0.7446980476379395, + "step": 3069 + }, + { + "epoch": 0.7073732718894009, + "grad_norm": 0.8188336762916474, + "learning_rate": 1.537022888504566e-06, + "loss": 0.7018321752548218, + "step": 3070 + }, + { + "epoch": 0.7076036866359448, + "grad_norm": 0.8259052522128728, + "learning_rate": 1.5367014638966008e-06, + "loss": 0.6903716325759888, + "step": 3071 + }, + { + "epoch": 0.7078341013824885, + "grad_norm": 1.0909385113291765, + "learning_rate": 1.5363799613871289e-06, + "loss": 0.9635254144668579, + "step": 3072 + }, + { + "epoch": 0.7080645161290322, + "grad_norm": 0.7335179559352851, + "learning_rate": 1.5360583810228156e-06, + "loss": 0.8612154722213745, + "step": 3073 + }, + { + "epoch": 0.708294930875576, + "grad_norm": 0.9395034612023028, + "learning_rate": 1.5357367228503376e-06, + "loss": 0.8632407784461975, + "step": 3074 + }, + { + "epoch": 0.7085253456221198, + "grad_norm": 0.9383639731759232, + "learning_rate": 1.5354149869163839e-06, + "loss": 0.8117856979370117, + "step": 3075 + }, + { + "epoch": 0.7087557603686636, + "grad_norm": 0.9770895875008837, + "learning_rate": 1.5350931732676538e-06, + "loss": 0.8062559366226196, + "step": 3076 + }, + { + "epoch": 0.7089861751152073, + "grad_norm": 0.9191794034062433, + "learning_rate": 1.5347712819508576e-06, + "loss": 0.7918965816497803, + "step": 3077 + }, + { + "epoch": 0.7092165898617512, + "grad_norm": 0.7897301018455927, + "learning_rate": 1.534449313012718e-06, + "loss": 0.7564986944198608, + "step": 3078 + }, + { + "epoch": 0.7094470046082949, + "grad_norm": 0.774017262501344, + "learning_rate": 1.534127266499968e-06, + "loss": 0.8261928558349609, + "step": 3079 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.9288792217475005, + "learning_rate": 1.5338051424593524e-06, + "loss": 0.705269455909729, + "step": 3080 + }, + { + "epoch": 0.7099078341013825, + "grad_norm": 0.8500383243043894, + "learning_rate": 1.5334829409376271e-06, + "loss": 0.823144793510437, + "step": 3081 + }, + { + "epoch": 0.7101382488479263, + "grad_norm": 0.7512588375717618, + "learning_rate": 1.5331606619815588e-06, + "loss": 0.7772066593170166, + "step": 3082 + }, + { + "epoch": 0.71036866359447, + "grad_norm": 1.0827682012637947, + "learning_rate": 1.5328383056379265e-06, + "loss": 0.8901097178459167, + "step": 3083 + }, + { + "epoch": 0.7105990783410139, + "grad_norm": 0.9540489638748495, + "learning_rate": 1.5325158719535196e-06, + "loss": 0.8454819917678833, + "step": 3084 + }, + { + "epoch": 0.7108294930875576, + "grad_norm": 0.8879734338037916, + "learning_rate": 1.5321933609751388e-06, + "loss": 0.8444693684577942, + "step": 3085 + }, + { + "epoch": 0.7110599078341013, + "grad_norm": 1.0157021807199436, + "learning_rate": 1.5318707727495964e-06, + "loss": 0.7893826961517334, + "step": 3086 + }, + { + "epoch": 0.7112903225806452, + "grad_norm": 0.9711563338551928, + "learning_rate": 1.531548107323715e-06, + "loss": 0.7536686658859253, + "step": 3087 + }, + { + "epoch": 0.7115207373271889, + "grad_norm": 1.1272305964721914, + "learning_rate": 1.53122536474433e-06, + "loss": 0.8105358481407166, + "step": 3088 + }, + { + "epoch": 0.7117511520737327, + "grad_norm": 0.8430783893005721, + "learning_rate": 1.530902545058286e-06, + "loss": 0.8104212284088135, + "step": 3089 + }, + { + "epoch": 0.7119815668202765, + "grad_norm": 1.1740010494566606, + "learning_rate": 1.5305796483124405e-06, + "loss": 0.7738373279571533, + "step": 3090 + }, + { + "epoch": 0.7122119815668203, + "grad_norm": 0.8346644560955941, + "learning_rate": 1.5302566745536618e-06, + "loss": 0.7583746910095215, + "step": 3091 + }, + { + "epoch": 0.712442396313364, + "grad_norm": 1.0290772907257426, + "learning_rate": 1.5299336238288286e-06, + "loss": 0.8370871543884277, + "step": 3092 + }, + { + "epoch": 0.7126728110599079, + "grad_norm": 0.8908237623549358, + "learning_rate": 1.5296104961848314e-06, + "loss": 0.7833988666534424, + "step": 3093 + }, + { + "epoch": 0.7129032258064516, + "grad_norm": 1.135734716262211, + "learning_rate": 1.5292872916685717e-06, + "loss": 0.8024515509605408, + "step": 3094 + }, + { + "epoch": 0.7131336405529954, + "grad_norm": 0.8156588034123838, + "learning_rate": 1.5289640103269623e-06, + "loss": 0.8044738173484802, + "step": 3095 + }, + { + "epoch": 0.7133640552995392, + "grad_norm": 0.846268334708117, + "learning_rate": 1.5286406522069273e-06, + "loss": 0.7783721685409546, + "step": 3096 + }, + { + "epoch": 0.7135944700460829, + "grad_norm": 0.8004616169511741, + "learning_rate": 1.5283172173554014e-06, + "loss": 0.693443238735199, + "step": 3097 + }, + { + "epoch": 0.7138248847926267, + "grad_norm": 0.9862921565687749, + "learning_rate": 1.527993705819331e-06, + "loss": 0.8142237663269043, + "step": 3098 + }, + { + "epoch": 0.7140552995391705, + "grad_norm": 0.9077662799949481, + "learning_rate": 1.5276701176456726e-06, + "loss": 0.790626049041748, + "step": 3099 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 1.0485200242859731, + "learning_rate": 1.5273464528813953e-06, + "loss": 0.9460805654525757, + "step": 3100 + }, + { + "epoch": 0.714516129032258, + "grad_norm": 0.902776913050398, + "learning_rate": 1.5270227115734789e-06, + "loss": 0.6906337738037109, + "step": 3101 + }, + { + "epoch": 0.7147465437788019, + "grad_norm": 0.8514512995363496, + "learning_rate": 1.526698893768913e-06, + "loss": 0.8828556537628174, + "step": 3102 + }, + { + "epoch": 0.7149769585253456, + "grad_norm": 1.0568586756231748, + "learning_rate": 1.5263749995147004e-06, + "loss": 0.8395771980285645, + "step": 3103 + }, + { + "epoch": 0.7152073732718894, + "grad_norm": 0.814014727084384, + "learning_rate": 1.5260510288578535e-06, + "loss": 0.7103895545005798, + "step": 3104 + }, + { + "epoch": 0.7154377880184332, + "grad_norm": 1.0670304040497072, + "learning_rate": 1.5257269818453956e-06, + "loss": 0.9780298471450806, + "step": 3105 + }, + { + "epoch": 0.715668202764977, + "grad_norm": 0.777700102492748, + "learning_rate": 1.525402858524363e-06, + "loss": 0.8176128268241882, + "step": 3106 + }, + { + "epoch": 0.7158986175115207, + "grad_norm": 0.8127092170976247, + "learning_rate": 1.5250786589418008e-06, + "loss": 0.6766567230224609, + "step": 3107 + }, + { + "epoch": 0.7161290322580646, + "grad_norm": 0.8076252538068988, + "learning_rate": 1.5247543831447662e-06, + "loss": 0.7910950183868408, + "step": 3108 + }, + { + "epoch": 0.7163594470046083, + "grad_norm": 0.76882132080824, + "learning_rate": 1.5244300311803275e-06, + "loss": 0.8444501161575317, + "step": 3109 + }, + { + "epoch": 0.716589861751152, + "grad_norm": 0.9073390489490682, + "learning_rate": 1.5241056030955642e-06, + "loss": 0.7180038690567017, + "step": 3110 + }, + { + "epoch": 0.7168202764976959, + "grad_norm": 0.8535510406326756, + "learning_rate": 1.5237810989375663e-06, + "loss": 0.8563181757926941, + "step": 3111 + }, + { + "epoch": 0.7170506912442396, + "grad_norm": 0.7281554723991874, + "learning_rate": 1.5234565187534353e-06, + "loss": 0.7792840003967285, + "step": 3112 + }, + { + "epoch": 0.7172811059907834, + "grad_norm": 1.2546504724448617, + "learning_rate": 1.5231318625902835e-06, + "loss": 0.8414837121963501, + "step": 3113 + }, + { + "epoch": 0.7175115207373272, + "grad_norm": 0.9151299107605344, + "learning_rate": 1.5228071304952348e-06, + "loss": 0.8549888134002686, + "step": 3114 + }, + { + "epoch": 0.717741935483871, + "grad_norm": 0.8858229770055023, + "learning_rate": 1.5224823225154228e-06, + "loss": 0.7973321676254272, + "step": 3115 + }, + { + "epoch": 0.7179723502304147, + "grad_norm": 0.8923496131316503, + "learning_rate": 1.5221574386979937e-06, + "loss": 0.7328228950500488, + "step": 3116 + }, + { + "epoch": 0.7182027649769586, + "grad_norm": 0.8315355877258431, + "learning_rate": 1.5218324790901033e-06, + "loss": 0.8953883051872253, + "step": 3117 + }, + { + "epoch": 0.7184331797235023, + "grad_norm": 0.8252416441396693, + "learning_rate": 1.5215074437389195e-06, + "loss": 0.7804527282714844, + "step": 3118 + }, + { + "epoch": 0.7186635944700461, + "grad_norm": 1.0592650685202745, + "learning_rate": 1.5211823326916204e-06, + "loss": 0.7581363320350647, + "step": 3119 + }, + { + "epoch": 0.7188940092165899, + "grad_norm": 0.9812896234713268, + "learning_rate": 1.520857145995396e-06, + "loss": 0.7720214128494263, + "step": 3120 + }, + { + "epoch": 0.7191244239631336, + "grad_norm": 0.8448153689850479, + "learning_rate": 1.5205318836974463e-06, + "loss": 0.7142826914787292, + "step": 3121 + }, + { + "epoch": 0.7193548387096774, + "grad_norm": 1.0627992363231917, + "learning_rate": 1.520206545844983e-06, + "loss": 0.715612530708313, + "step": 3122 + }, + { + "epoch": 0.7195852534562212, + "grad_norm": 1.1048993433011334, + "learning_rate": 1.5198811324852277e-06, + "loss": 0.8851219415664673, + "step": 3123 + }, + { + "epoch": 0.719815668202765, + "grad_norm": 0.9292687584217408, + "learning_rate": 1.5195556436654146e-06, + "loss": 0.981631875038147, + "step": 3124 + }, + { + "epoch": 0.7200460829493087, + "grad_norm": 1.043088312445038, + "learning_rate": 1.5192300794327876e-06, + "loss": 0.8586313724517822, + "step": 3125 + }, + { + "epoch": 0.7202764976958526, + "grad_norm": 1.082548105463139, + "learning_rate": 1.518904439834602e-06, + "loss": 0.8863250017166138, + "step": 3126 + }, + { + "epoch": 0.7205069124423963, + "grad_norm": 0.8136107336174612, + "learning_rate": 1.5185787249181239e-06, + "loss": 0.864910900592804, + "step": 3127 + }, + { + "epoch": 0.7207373271889401, + "grad_norm": 0.9898417106954193, + "learning_rate": 1.5182529347306302e-06, + "loss": 0.8120951652526855, + "step": 3128 + }, + { + "epoch": 0.7209677419354839, + "grad_norm": 1.008844559262399, + "learning_rate": 1.517927069319409e-06, + "loss": 0.7866026163101196, + "step": 3129 + }, + { + "epoch": 0.7211981566820277, + "grad_norm": 0.9577789377394936, + "learning_rate": 1.5176011287317598e-06, + "loss": 0.8610655069351196, + "step": 3130 + }, + { + "epoch": 0.7214285714285714, + "grad_norm": 0.8861108738387133, + "learning_rate": 1.5172751130149915e-06, + "loss": 0.7463846206665039, + "step": 3131 + }, + { + "epoch": 0.7216589861751153, + "grad_norm": 0.7361410685782023, + "learning_rate": 1.5169490222164254e-06, + "loss": 0.6578936576843262, + "step": 3132 + }, + { + "epoch": 0.721889400921659, + "grad_norm": 0.9361369886672088, + "learning_rate": 1.516622856383393e-06, + "loss": 0.6849668025970459, + "step": 3133 + }, + { + "epoch": 0.7221198156682027, + "grad_norm": 1.0686822202217916, + "learning_rate": 1.5162966155632372e-06, + "loss": 0.9549611806869507, + "step": 3134 + }, + { + "epoch": 0.7223502304147466, + "grad_norm": 0.9063080856885865, + "learning_rate": 1.5159702998033113e-06, + "loss": 0.8005616664886475, + "step": 3135 + }, + { + "epoch": 0.7225806451612903, + "grad_norm": 1.089721709643384, + "learning_rate": 1.5156439091509793e-06, + "loss": 0.8980830311775208, + "step": 3136 + }, + { + "epoch": 0.7228110599078341, + "grad_norm": 1.012161312959267, + "learning_rate": 1.5153174436536166e-06, + "loss": 0.8247464895248413, + "step": 3137 + }, + { + "epoch": 0.7230414746543778, + "grad_norm": 0.9582357561913161, + "learning_rate": 1.5149909033586088e-06, + "loss": 0.818629264831543, + "step": 3138 + }, + { + "epoch": 0.7232718894009217, + "grad_norm": 0.7730251673290138, + "learning_rate": 1.5146642883133532e-06, + "loss": 0.8928704261779785, + "step": 3139 + }, + { + "epoch": 0.7235023041474654, + "grad_norm": 1.199560365249708, + "learning_rate": 1.5143375985652576e-06, + "loss": 0.9330282807350159, + "step": 3140 + }, + { + "epoch": 0.7237327188940093, + "grad_norm": 0.9749101527395967, + "learning_rate": 1.5140108341617405e-06, + "loss": 0.7961822748184204, + "step": 3141 + }, + { + "epoch": 0.723963133640553, + "grad_norm": 0.9244859383947029, + "learning_rate": 1.513683995150231e-06, + "loss": 0.8073769807815552, + "step": 3142 + }, + { + "epoch": 0.7241935483870968, + "grad_norm": 1.0469784848396728, + "learning_rate": 1.51335708157817e-06, + "loss": 0.946292519569397, + "step": 3143 + }, + { + "epoch": 0.7244239631336405, + "grad_norm": 0.8214787899217685, + "learning_rate": 1.513030093493008e-06, + "loss": 0.806084156036377, + "step": 3144 + }, + { + "epoch": 0.7246543778801844, + "grad_norm": 0.9086362129225068, + "learning_rate": 1.5127030309422072e-06, + "loss": 0.8804534673690796, + "step": 3145 + }, + { + "epoch": 0.7248847926267281, + "grad_norm": 0.973773267534968, + "learning_rate": 1.51237589397324e-06, + "loss": 0.7489848136901855, + "step": 3146 + }, + { + "epoch": 0.7251152073732718, + "grad_norm": 1.047973105384132, + "learning_rate": 1.5120486826335905e-06, + "loss": 0.875586986541748, + "step": 3147 + }, + { + "epoch": 0.7253456221198157, + "grad_norm": 0.8473382638758681, + "learning_rate": 1.5117213969707522e-06, + "loss": 0.8334758281707764, + "step": 3148 + }, + { + "epoch": 0.7255760368663594, + "grad_norm": 0.8693445792084491, + "learning_rate": 1.5113940370322306e-06, + "loss": 0.8010859489440918, + "step": 3149 + }, + { + "epoch": 0.7258064516129032, + "grad_norm": 0.8638975130346471, + "learning_rate": 1.5110666028655417e-06, + "loss": 0.7907547950744629, + "step": 3150 + }, + { + "epoch": 0.726036866359447, + "grad_norm": 0.9542895726151109, + "learning_rate": 1.5107390945182117e-06, + "loss": 0.8922848105430603, + "step": 3151 + }, + { + "epoch": 0.7262672811059908, + "grad_norm": 0.7865624103758176, + "learning_rate": 1.5104115120377783e-06, + "loss": 0.7418628931045532, + "step": 3152 + }, + { + "epoch": 0.7264976958525345, + "grad_norm": 1.0285540479216404, + "learning_rate": 1.51008385547179e-06, + "loss": 0.9063338041305542, + "step": 3153 + }, + { + "epoch": 0.7267281105990784, + "grad_norm": 1.0080575916686718, + "learning_rate": 1.5097561248678047e-06, + "loss": 0.8718822002410889, + "step": 3154 + }, + { + "epoch": 0.7269585253456221, + "grad_norm": 1.0055226715830414, + "learning_rate": 1.5094283202733934e-06, + "loss": 0.950742244720459, + "step": 3155 + }, + { + "epoch": 0.727188940092166, + "grad_norm": 1.126636802719941, + "learning_rate": 1.5091004417361353e-06, + "loss": 0.7963443994522095, + "step": 3156 + }, + { + "epoch": 0.7274193548387097, + "grad_norm": 1.0644638923319971, + "learning_rate": 1.5087724893036225e-06, + "loss": 0.8428621888160706, + "step": 3157 + }, + { + "epoch": 0.7276497695852534, + "grad_norm": 1.0421355661787988, + "learning_rate": 1.508444463023456e-06, + "loss": 0.8271539211273193, + "step": 3158 + }, + { + "epoch": 0.7278801843317972, + "grad_norm": 0.7345991655152693, + "learning_rate": 1.508116362943249e-06, + "loss": 0.7899917364120483, + "step": 3159 + }, + { + "epoch": 0.728110599078341, + "grad_norm": 1.1916065857121023, + "learning_rate": 1.5077881891106246e-06, + "loss": 0.8734809160232544, + "step": 3160 + }, + { + "epoch": 0.7283410138248848, + "grad_norm": 1.0138536766133128, + "learning_rate": 1.5074599415732164e-06, + "loss": 0.7740491628646851, + "step": 3161 + }, + { + "epoch": 0.7285714285714285, + "grad_norm": 0.8952462084516831, + "learning_rate": 1.5071316203786698e-06, + "loss": 0.7219515442848206, + "step": 3162 + }, + { + "epoch": 0.7288018433179724, + "grad_norm": 0.7779518912065628, + "learning_rate": 1.50680322557464e-06, + "loss": 0.8122725486755371, + "step": 3163 + }, + { + "epoch": 0.7290322580645161, + "grad_norm": 0.9965727720770509, + "learning_rate": 1.5064747572087923e-06, + "loss": 0.8280072212219238, + "step": 3164 + }, + { + "epoch": 0.7292626728110599, + "grad_norm": 0.9097690003119847, + "learning_rate": 1.5061462153288047e-06, + "loss": 0.7287842035293579, + "step": 3165 + }, + { + "epoch": 0.7294930875576037, + "grad_norm": 1.0497146109580189, + "learning_rate": 1.5058175999823639e-06, + "loss": 0.8404949903488159, + "step": 3166 + }, + { + "epoch": 0.7297235023041475, + "grad_norm": 0.9887517999095412, + "learning_rate": 1.505488911217168e-06, + "loss": 0.6572415828704834, + "step": 3167 + }, + { + "epoch": 0.7299539170506912, + "grad_norm": 1.0946078663351873, + "learning_rate": 1.5051601490809257e-06, + "loss": 0.8924484848976135, + "step": 3168 + }, + { + "epoch": 0.7301843317972351, + "grad_norm": 1.1648951213224894, + "learning_rate": 1.5048313136213566e-06, + "loss": 0.8701428174972534, + "step": 3169 + }, + { + "epoch": 0.7304147465437788, + "grad_norm": 1.1475520143482136, + "learning_rate": 1.5045024048861906e-06, + "loss": 0.8327716588973999, + "step": 3170 + }, + { + "epoch": 0.7306451612903225, + "grad_norm": 0.9261768702303601, + "learning_rate": 1.5041734229231686e-06, + "loss": 0.8379253149032593, + "step": 3171 + }, + { + "epoch": 0.7308755760368664, + "grad_norm": 0.944084791074753, + "learning_rate": 1.5038443677800413e-06, + "loss": 0.7475664019584656, + "step": 3172 + }, + { + "epoch": 0.7311059907834101, + "grad_norm": 1.2226580752686416, + "learning_rate": 1.5035152395045714e-06, + "loss": 0.9002243280410767, + "step": 3173 + }, + { + "epoch": 0.7313364055299539, + "grad_norm": 0.8355701729873874, + "learning_rate": 1.503186038144531e-06, + "loss": 0.6718685626983643, + "step": 3174 + }, + { + "epoch": 0.7315668202764977, + "grad_norm": 0.8961232238271665, + "learning_rate": 1.5028567637477033e-06, + "loss": 0.6836501359939575, + "step": 3175 + }, + { + "epoch": 0.7317972350230415, + "grad_norm": 0.8859536342600928, + "learning_rate": 1.502527416361882e-06, + "loss": 0.7548954486846924, + "step": 3176 + }, + { + "epoch": 0.7320276497695852, + "grad_norm": 0.9826706955950207, + "learning_rate": 1.5021979960348714e-06, + "loss": 0.8385212421417236, + "step": 3177 + }, + { + "epoch": 0.7322580645161291, + "grad_norm": 0.8341383572022868, + "learning_rate": 1.5018685028144864e-06, + "loss": 0.8605425357818604, + "step": 3178 + }, + { + "epoch": 0.7324884792626728, + "grad_norm": 0.9464588739740442, + "learning_rate": 1.501538936748553e-06, + "loss": 0.8831393718719482, + "step": 3179 + }, + { + "epoch": 0.7327188940092166, + "grad_norm": 0.8991947067614845, + "learning_rate": 1.5012092978849062e-06, + "loss": 0.6965172290802002, + "step": 3180 + }, + { + "epoch": 0.7329493087557604, + "grad_norm": 1.0090692893685214, + "learning_rate": 1.500879586271394e-06, + "loss": 0.8062859773635864, + "step": 3181 + }, + { + "epoch": 0.7331797235023041, + "grad_norm": 0.7952177607289516, + "learning_rate": 1.5005498019558724e-06, + "loss": 0.8285790681838989, + "step": 3182 + }, + { + "epoch": 0.7334101382488479, + "grad_norm": 0.9848452236152132, + "learning_rate": 1.50021994498621e-06, + "loss": 0.612429141998291, + "step": 3183 + }, + { + "epoch": 0.7336405529953917, + "grad_norm": 0.9156545700522013, + "learning_rate": 1.4998900154102847e-06, + "loss": 0.8271423578262329, + "step": 3184 + }, + { + "epoch": 0.7338709677419355, + "grad_norm": 1.033787601007848, + "learning_rate": 1.499560013275986e-06, + "loss": 0.838964581489563, + "step": 3185 + }, + { + "epoch": 0.7341013824884792, + "grad_norm": 0.973220548768116, + "learning_rate": 1.4992299386312119e-06, + "loss": 0.7902333736419678, + "step": 3186 + }, + { + "epoch": 0.7343317972350231, + "grad_norm": 1.0086369878855088, + "learning_rate": 1.4988997915238735e-06, + "loss": 0.8520635366439819, + "step": 3187 + }, + { + "epoch": 0.7345622119815668, + "grad_norm": 0.9892742658321851, + "learning_rate": 1.4985695720018905e-06, + "loss": 0.8666567206382751, + "step": 3188 + }, + { + "epoch": 0.7347926267281106, + "grad_norm": 0.9672613309802366, + "learning_rate": 1.4982392801131944e-06, + "loss": 0.6930691003799438, + "step": 3189 + }, + { + "epoch": 0.7350230414746544, + "grad_norm": 0.7049869743164157, + "learning_rate": 1.4979089159057263e-06, + "loss": 0.7957722544670105, + "step": 3190 + }, + { + "epoch": 0.7352534562211982, + "grad_norm": 1.0247601673009343, + "learning_rate": 1.4975784794274383e-06, + "loss": 0.8966697454452515, + "step": 3191 + }, + { + "epoch": 0.7354838709677419, + "grad_norm": 0.9082832739975722, + "learning_rate": 1.4972479707262926e-06, + "loss": 0.7478537559509277, + "step": 3192 + }, + { + "epoch": 0.7357142857142858, + "grad_norm": 0.9541041339746362, + "learning_rate": 1.4969173898502624e-06, + "loss": 0.8862416744232178, + "step": 3193 + }, + { + "epoch": 0.7359447004608295, + "grad_norm": 0.8171852448254098, + "learning_rate": 1.4965867368473306e-06, + "loss": 0.7910712957382202, + "step": 3194 + }, + { + "epoch": 0.7361751152073732, + "grad_norm": 1.1219879646982642, + "learning_rate": 1.4962560117654916e-06, + "loss": 0.7371944785118103, + "step": 3195 + }, + { + "epoch": 0.7364055299539171, + "grad_norm": 1.097733223938739, + "learning_rate": 1.4959252146527496e-06, + "loss": 0.7966737151145935, + "step": 3196 + }, + { + "epoch": 0.7366359447004608, + "grad_norm": 1.0499505243286467, + "learning_rate": 1.4955943455571188e-06, + "loss": 0.8474653363227844, + "step": 3197 + }, + { + "epoch": 0.7368663594470046, + "grad_norm": 1.1042914253537062, + "learning_rate": 1.4952634045266249e-06, + "loss": 1.0197458267211914, + "step": 3198 + }, + { + "epoch": 0.7370967741935484, + "grad_norm": 1.054872102822339, + "learning_rate": 1.4949323916093036e-06, + "loss": 0.8813979625701904, + "step": 3199 + }, + { + "epoch": 0.7373271889400922, + "grad_norm": 0.9264193586497762, + "learning_rate": 1.4946013068532008e-06, + "loss": 0.9323042631149292, + "step": 3200 + }, + { + "epoch": 0.7375576036866359, + "grad_norm": 1.1184797510334814, + "learning_rate": 1.494270150306373e-06, + "loss": 0.8637902736663818, + "step": 3201 + }, + { + "epoch": 0.7377880184331798, + "grad_norm": 1.1006860616870338, + "learning_rate": 1.4939389220168875e-06, + "loss": 0.8046854734420776, + "step": 3202 + }, + { + "epoch": 0.7380184331797235, + "grad_norm": 0.9882241685181946, + "learning_rate": 1.4936076220328211e-06, + "loss": 0.7616177201271057, + "step": 3203 + }, + { + "epoch": 0.7382488479262673, + "grad_norm": 1.0795779512267711, + "learning_rate": 1.4932762504022619e-06, + "loss": 0.8548959493637085, + "step": 3204 + }, + { + "epoch": 0.738479262672811, + "grad_norm": 0.7907178615166577, + "learning_rate": 1.492944807173308e-06, + "loss": 0.8062562942504883, + "step": 3205 + }, + { + "epoch": 0.7387096774193549, + "grad_norm": 1.3004819436990922, + "learning_rate": 1.492613292394068e-06, + "loss": 0.8776403069496155, + "step": 3206 + }, + { + "epoch": 0.7389400921658986, + "grad_norm": 1.0654471822316505, + "learning_rate": 1.4922817061126605e-06, + "loss": 0.7528336048126221, + "step": 3207 + }, + { + "epoch": 0.7391705069124423, + "grad_norm": 0.9288011243231857, + "learning_rate": 1.4919500483772152e-06, + "loss": 0.7441881895065308, + "step": 3208 + }, + { + "epoch": 0.7394009216589862, + "grad_norm": 0.9496581250230889, + "learning_rate": 1.4916183192358715e-06, + "loss": 0.8925758004188538, + "step": 3209 + }, + { + "epoch": 0.7396313364055299, + "grad_norm": 0.999519243113449, + "learning_rate": 1.4912865187367798e-06, + "loss": 0.7527008652687073, + "step": 3210 + }, + { + "epoch": 0.7398617511520738, + "grad_norm": 0.8631940848050832, + "learning_rate": 1.4909546469281e-06, + "loss": 0.753572404384613, + "step": 3211 + }, + { + "epoch": 0.7400921658986175, + "grad_norm": 0.938203260102219, + "learning_rate": 1.4906227038580036e-06, + "loss": 0.8884274959564209, + "step": 3212 + }, + { + "epoch": 0.7403225806451613, + "grad_norm": 0.7835821294972823, + "learning_rate": 1.4902906895746707e-06, + "loss": 0.7702244520187378, + "step": 3213 + }, + { + "epoch": 0.740552995391705, + "grad_norm": 1.0140732775513552, + "learning_rate": 1.4899586041262936e-06, + "loss": 0.8662835359573364, + "step": 3214 + }, + { + "epoch": 0.7407834101382489, + "grad_norm": 1.0357827096613574, + "learning_rate": 1.4896264475610736e-06, + "loss": 0.9819997549057007, + "step": 3215 + }, + { + "epoch": 0.7410138248847926, + "grad_norm": 1.0094197188590162, + "learning_rate": 1.4892942199272232e-06, + "loss": 0.9137614965438843, + "step": 3216 + }, + { + "epoch": 0.7412442396313365, + "grad_norm": 0.8442315992670393, + "learning_rate": 1.488961921272964e-06, + "loss": 0.7554785013198853, + "step": 3217 + }, + { + "epoch": 0.7414746543778802, + "grad_norm": 1.1172745597106868, + "learning_rate": 1.4886295516465296e-06, + "loss": 0.8528940677642822, + "step": 3218 + }, + { + "epoch": 0.7417050691244239, + "grad_norm": 0.9056918439443091, + "learning_rate": 1.4882971110961626e-06, + "loss": 0.7212377786636353, + "step": 3219 + }, + { + "epoch": 0.7419354838709677, + "grad_norm": 0.9349124518247459, + "learning_rate": 1.4879645996701161e-06, + "loss": 0.7767617702484131, + "step": 3220 + }, + { + "epoch": 0.7421658986175115, + "grad_norm": 0.8749389005214587, + "learning_rate": 1.4876320174166542e-06, + "loss": 0.8083292245864868, + "step": 3221 + }, + { + "epoch": 0.7423963133640553, + "grad_norm": 1.14484646357819, + "learning_rate": 1.4872993643840506e-06, + "loss": 0.8652364015579224, + "step": 3222 + }, + { + "epoch": 0.742626728110599, + "grad_norm": 0.9176030431238368, + "learning_rate": 1.486966640620589e-06, + "loss": 0.7455019950866699, + "step": 3223 + }, + { + "epoch": 0.7428571428571429, + "grad_norm": 1.0637469159007076, + "learning_rate": 1.4866338461745644e-06, + "loss": 0.7881917953491211, + "step": 3224 + }, + { + "epoch": 0.7430875576036866, + "grad_norm": 1.0955814961304737, + "learning_rate": 1.4863009810942813e-06, + "loss": 0.8148372173309326, + "step": 3225 + }, + { + "epoch": 0.7433179723502304, + "grad_norm": 0.7991384008669099, + "learning_rate": 1.4859680454280547e-06, + "loss": 0.6574658751487732, + "step": 3226 + }, + { + "epoch": 0.7435483870967742, + "grad_norm": 0.9231484623709659, + "learning_rate": 1.4856350392242094e-06, + "loss": 0.7831655740737915, + "step": 3227 + }, + { + "epoch": 0.743778801843318, + "grad_norm": 0.8080817272772121, + "learning_rate": 1.485301962531081e-06, + "loss": 0.7406231164932251, + "step": 3228 + }, + { + "epoch": 0.7440092165898617, + "grad_norm": 0.9500561612529754, + "learning_rate": 1.4849688153970154e-06, + "loss": 0.8092324733734131, + "step": 3229 + }, + { + "epoch": 0.7442396313364056, + "grad_norm": 0.969093760928221, + "learning_rate": 1.4846355978703679e-06, + "loss": 0.6662560701370239, + "step": 3230 + }, + { + "epoch": 0.7444700460829493, + "grad_norm": 0.8941354868939383, + "learning_rate": 1.4843023099995052e-06, + "loss": 0.8064731359481812, + "step": 3231 + }, + { + "epoch": 0.744700460829493, + "grad_norm": 1.0463529761361023, + "learning_rate": 1.4839689518328037e-06, + "loss": 0.7424519658088684, + "step": 3232 + }, + { + "epoch": 0.7449308755760369, + "grad_norm": 0.9618875213680247, + "learning_rate": 1.4836355234186489e-06, + "loss": 0.7851438522338867, + "step": 3233 + }, + { + "epoch": 0.7451612903225806, + "grad_norm": 1.2534680382280676, + "learning_rate": 1.4833020248054381e-06, + "loss": 0.896986722946167, + "step": 3234 + }, + { + "epoch": 0.7453917050691244, + "grad_norm": 1.3688846458082455, + "learning_rate": 1.4829684560415787e-06, + "loss": 0.9469928741455078, + "step": 3235 + }, + { + "epoch": 0.7456221198156682, + "grad_norm": 0.8653442286827894, + "learning_rate": 1.4826348171754872e-06, + "loss": 0.7527188062667847, + "step": 3236 + }, + { + "epoch": 0.745852534562212, + "grad_norm": 0.9575212903893582, + "learning_rate": 1.4823011082555907e-06, + "loss": 0.7758080959320068, + "step": 3237 + }, + { + "epoch": 0.7460829493087557, + "grad_norm": 0.9454436343118328, + "learning_rate": 1.481967329330327e-06, + "loss": 0.8359881043434143, + "step": 3238 + }, + { + "epoch": 0.7463133640552996, + "grad_norm": 0.7567559878181612, + "learning_rate": 1.4816334804481434e-06, + "loss": 0.6576982736587524, + "step": 3239 + }, + { + "epoch": 0.7465437788018433, + "grad_norm": 1.0012365138594377, + "learning_rate": 1.4812995616574978e-06, + "loss": 0.7919917106628418, + "step": 3240 + }, + { + "epoch": 0.7467741935483871, + "grad_norm": 0.7865137499791297, + "learning_rate": 1.480965573006858e-06, + "loss": 0.7682263851165771, + "step": 3241 + }, + { + "epoch": 0.7470046082949309, + "grad_norm": 1.0123241682054298, + "learning_rate": 1.4806315145447017e-06, + "loss": 0.8573193550109863, + "step": 3242 + }, + { + "epoch": 0.7472350230414746, + "grad_norm": 0.8191884786597581, + "learning_rate": 1.4802973863195174e-06, + "loss": 0.8473606109619141, + "step": 3243 + }, + { + "epoch": 0.7474654377880184, + "grad_norm": 0.8754073951862541, + "learning_rate": 1.4799631883798033e-06, + "loss": 0.8110678195953369, + "step": 3244 + }, + { + "epoch": 0.7476958525345622, + "grad_norm": 1.2161581760732987, + "learning_rate": 1.4796289207740681e-06, + "loss": 0.6624661087989807, + "step": 3245 + }, + { + "epoch": 0.747926267281106, + "grad_norm": 0.7356293873938221, + "learning_rate": 1.47929458355083e-06, + "loss": 0.8145536184310913, + "step": 3246 + }, + { + "epoch": 0.7481566820276497, + "grad_norm": 0.921128997158793, + "learning_rate": 1.4789601767586172e-06, + "loss": 0.7819876074790955, + "step": 3247 + }, + { + "epoch": 0.7483870967741936, + "grad_norm": 0.973465003660405, + "learning_rate": 1.4786257004459692e-06, + "loss": 0.7573810815811157, + "step": 3248 + }, + { + "epoch": 0.7486175115207373, + "grad_norm": 1.061603620628762, + "learning_rate": 1.4782911546614343e-06, + "loss": 0.8149522542953491, + "step": 3249 + }, + { + "epoch": 0.7488479262672811, + "grad_norm": 1.023358335101362, + "learning_rate": 1.4779565394535714e-06, + "loss": 0.9935284852981567, + "step": 3250 + }, + { + "epoch": 0.7490783410138249, + "grad_norm": 0.8488935416479958, + "learning_rate": 1.4776218548709497e-06, + "loss": 0.8673371076583862, + "step": 3251 + }, + { + "epoch": 0.7493087557603687, + "grad_norm": 1.0304468521950305, + "learning_rate": 1.4772871009621477e-06, + "loss": 0.8569149374961853, + "step": 3252 + }, + { + "epoch": 0.7495391705069124, + "grad_norm": 0.8613722173703313, + "learning_rate": 1.4769522777757551e-06, + "loss": 0.7177854776382446, + "step": 3253 + }, + { + "epoch": 0.7497695852534563, + "grad_norm": 1.0681726446759283, + "learning_rate": 1.4766173853603706e-06, + "loss": 0.8115622997283936, + "step": 3254 + }, + { + "epoch": 0.75, + "grad_norm": 0.782977490159237, + "learning_rate": 1.4762824237646038e-06, + "loss": 0.7209019660949707, + "step": 3255 + }, + { + "epoch": 0.7502304147465437, + "grad_norm": 0.9264325214188774, + "learning_rate": 1.4759473930370736e-06, + "loss": 0.8433470726013184, + "step": 3256 + }, + { + "epoch": 0.7504608294930876, + "grad_norm": 1.0399152705693322, + "learning_rate": 1.4756122932264093e-06, + "loss": 0.853674054145813, + "step": 3257 + }, + { + "epoch": 0.7506912442396313, + "grad_norm": 0.9978956076189626, + "learning_rate": 1.4752771243812503e-06, + "loss": 0.8645769357681274, + "step": 3258 + }, + { + "epoch": 0.7509216589861751, + "grad_norm": 1.4046905803968728, + "learning_rate": 1.474941886550246e-06, + "loss": 0.927452564239502, + "step": 3259 + }, + { + "epoch": 0.7511520737327189, + "grad_norm": 0.8642581213790671, + "learning_rate": 1.4746065797820552e-06, + "loss": 0.7461255788803101, + "step": 3260 + }, + { + "epoch": 0.7513824884792627, + "grad_norm": 0.9230380534710827, + "learning_rate": 1.4742712041253481e-06, + "loss": 0.8737163543701172, + "step": 3261 + }, + { + "epoch": 0.7516129032258064, + "grad_norm": 0.8624828182814519, + "learning_rate": 1.4739357596288036e-06, + "loss": 0.7148758172988892, + "step": 3262 + }, + { + "epoch": 0.7518433179723503, + "grad_norm": 0.8930446588032352, + "learning_rate": 1.4736002463411108e-06, + "loss": 0.738334596157074, + "step": 3263 + }, + { + "epoch": 0.752073732718894, + "grad_norm": 0.9237791770446419, + "learning_rate": 1.4732646643109692e-06, + "loss": 0.7733340263366699, + "step": 3264 + }, + { + "epoch": 0.7523041474654378, + "grad_norm": 0.8815526032135323, + "learning_rate": 1.4729290135870883e-06, + "loss": 0.7882881164550781, + "step": 3265 + }, + { + "epoch": 0.7525345622119816, + "grad_norm": 1.029688172185613, + "learning_rate": 1.472593294218187e-06, + "loss": 0.7908357381820679, + "step": 3266 + }, + { + "epoch": 0.7527649769585254, + "grad_norm": 1.0791156682188368, + "learning_rate": 1.4722575062529946e-06, + "loss": 0.8818062543869019, + "step": 3267 + }, + { + "epoch": 0.7529953917050691, + "grad_norm": 0.9552677127935061, + "learning_rate": 1.4719216497402504e-06, + "loss": 0.7152599692344666, + "step": 3268 + }, + { + "epoch": 0.7532258064516129, + "grad_norm": 0.8322037056106782, + "learning_rate": 1.4715857247287036e-06, + "loss": 0.8503165245056152, + "step": 3269 + }, + { + "epoch": 0.7534562211981567, + "grad_norm": 0.9223729567181368, + "learning_rate": 1.4712497312671128e-06, + "loss": 0.8382623195648193, + "step": 3270 + }, + { + "epoch": 0.7536866359447004, + "grad_norm": 1.0456882119229616, + "learning_rate": 1.4709136694042479e-06, + "loss": 0.8358533382415771, + "step": 3271 + }, + { + "epoch": 0.7539170506912443, + "grad_norm": 0.850717529465525, + "learning_rate": 1.4705775391888868e-06, + "loss": 0.6735624670982361, + "step": 3272 + }, + { + "epoch": 0.754147465437788, + "grad_norm": 0.8890452669379437, + "learning_rate": 1.470241340669819e-06, + "loss": 0.8343949317932129, + "step": 3273 + }, + { + "epoch": 0.7543778801843318, + "grad_norm": 0.9508610560109901, + "learning_rate": 1.4699050738958434e-06, + "loss": 0.8204318284988403, + "step": 3274 + }, + { + "epoch": 0.7546082949308756, + "grad_norm": 0.9484772286558124, + "learning_rate": 1.4695687389157684e-06, + "loss": 0.7541854977607727, + "step": 3275 + }, + { + "epoch": 0.7548387096774194, + "grad_norm": 0.8425504123859369, + "learning_rate": 1.4692323357784122e-06, + "loss": 0.8144943714141846, + "step": 3276 + }, + { + "epoch": 0.7550691244239631, + "grad_norm": 0.8699783126306536, + "learning_rate": 1.468895864532604e-06, + "loss": 0.9045677781105042, + "step": 3277 + }, + { + "epoch": 0.755299539170507, + "grad_norm": 1.1586104318366583, + "learning_rate": 1.4685593252271816e-06, + "loss": 0.8818730115890503, + "step": 3278 + }, + { + "epoch": 0.7555299539170507, + "grad_norm": 1.013621065000431, + "learning_rate": 1.4682227179109932e-06, + "loss": 0.8582229614257812, + "step": 3279 + }, + { + "epoch": 0.7557603686635944, + "grad_norm": 1.016541372354986, + "learning_rate": 1.4678860426328977e-06, + "loss": 0.8769974708557129, + "step": 3280 + }, + { + "epoch": 0.7559907834101383, + "grad_norm": 0.8474484944100091, + "learning_rate": 1.467549299441762e-06, + "loss": 0.8034937381744385, + "step": 3281 + }, + { + "epoch": 0.756221198156682, + "grad_norm": 0.9998169463505984, + "learning_rate": 1.4672124883864646e-06, + "loss": 0.9057378768920898, + "step": 3282 + }, + { + "epoch": 0.7564516129032258, + "grad_norm": 0.9160359407680143, + "learning_rate": 1.4668756095158929e-06, + "loss": 0.8039969205856323, + "step": 3283 + }, + { + "epoch": 0.7566820276497696, + "grad_norm": 0.7311572278532684, + "learning_rate": 1.4665386628789448e-06, + "loss": 0.887493908405304, + "step": 3284 + }, + { + "epoch": 0.7569124423963134, + "grad_norm": 0.9749833066021305, + "learning_rate": 1.4662016485245271e-06, + "loss": 0.783561646938324, + "step": 3285 + }, + { + "epoch": 0.7571428571428571, + "grad_norm": 1.1972955361865625, + "learning_rate": 1.4658645665015579e-06, + "loss": 0.7526337504386902, + "step": 3286 + }, + { + "epoch": 0.757373271889401, + "grad_norm": 1.0074911468135093, + "learning_rate": 1.4655274168589633e-06, + "loss": 0.8583099842071533, + "step": 3287 + }, + { + "epoch": 0.7576036866359447, + "grad_norm": 0.9193819222275846, + "learning_rate": 1.4651901996456802e-06, + "loss": 0.743253767490387, + "step": 3288 + }, + { + "epoch": 0.7578341013824885, + "grad_norm": 0.9481332173734432, + "learning_rate": 1.4648529149106555e-06, + "loss": 0.8763987421989441, + "step": 3289 + }, + { + "epoch": 0.7580645161290323, + "grad_norm": 0.9531439206540595, + "learning_rate": 1.4645155627028455e-06, + "loss": 0.8388645648956299, + "step": 3290 + }, + { + "epoch": 0.7582949308755761, + "grad_norm": 0.9430549047432926, + "learning_rate": 1.4641781430712167e-06, + "loss": 0.8943589925765991, + "step": 3291 + }, + { + "epoch": 0.7585253456221198, + "grad_norm": 0.897306276129885, + "learning_rate": 1.463840656064745e-06, + "loss": 0.9224259257316589, + "step": 3292 + }, + { + "epoch": 0.7587557603686635, + "grad_norm": 0.7118962108569266, + "learning_rate": 1.463503101732416e-06, + "loss": 0.5836232900619507, + "step": 3293 + }, + { + "epoch": 0.7589861751152074, + "grad_norm": 1.2610309452085111, + "learning_rate": 1.4631654801232255e-06, + "loss": 0.6700382828712463, + "step": 3294 + }, + { + "epoch": 0.7592165898617511, + "grad_norm": 0.9159006934526643, + "learning_rate": 1.4628277912861785e-06, + "loss": 0.7876112461090088, + "step": 3295 + }, + { + "epoch": 0.759447004608295, + "grad_norm": 0.9073380438964382, + "learning_rate": 1.4624900352702905e-06, + "loss": 0.8410799503326416, + "step": 3296 + }, + { + "epoch": 0.7596774193548387, + "grad_norm": 0.931630117662002, + "learning_rate": 1.4621522121245859e-06, + "loss": 0.9615974426269531, + "step": 3297 + }, + { + "epoch": 0.7599078341013825, + "grad_norm": 1.1213393394374043, + "learning_rate": 1.4618143218980996e-06, + "loss": 0.7973389625549316, + "step": 3298 + }, + { + "epoch": 0.7601382488479262, + "grad_norm": 0.7835636014361216, + "learning_rate": 1.461476364639876e-06, + "loss": 0.7734094858169556, + "step": 3299 + }, + { + "epoch": 0.7603686635944701, + "grad_norm": 0.9681758067915807, + "learning_rate": 1.461138340398969e-06, + "loss": 0.7365939617156982, + "step": 3300 + }, + { + "epoch": 0.7605990783410138, + "grad_norm": 0.9251627601521192, + "learning_rate": 1.4608002492244421e-06, + "loss": 0.822052001953125, + "step": 3301 + }, + { + "epoch": 0.7608294930875577, + "grad_norm": 0.83536047590978, + "learning_rate": 1.460462091165369e-06, + "loss": 0.7220577001571655, + "step": 3302 + }, + { + "epoch": 0.7610599078341014, + "grad_norm": 0.9806834080573716, + "learning_rate": 1.4601238662708332e-06, + "loss": 0.9795923233032227, + "step": 3303 + }, + { + "epoch": 0.7612903225806451, + "grad_norm": 1.0452301496717684, + "learning_rate": 1.4597855745899273e-06, + "loss": 0.804523229598999, + "step": 3304 + }, + { + "epoch": 0.761520737327189, + "grad_norm": 0.936039712838613, + "learning_rate": 1.4594472161717536e-06, + "loss": 0.7630297541618347, + "step": 3305 + }, + { + "epoch": 0.7617511520737327, + "grad_norm": 1.008258749087615, + "learning_rate": 1.4591087910654254e-06, + "loss": 0.7088560461997986, + "step": 3306 + }, + { + "epoch": 0.7619815668202765, + "grad_norm": 0.8612515545716848, + "learning_rate": 1.4587702993200637e-06, + "loss": 0.6627416014671326, + "step": 3307 + }, + { + "epoch": 0.7622119815668202, + "grad_norm": 1.0700034611745908, + "learning_rate": 1.4584317409848001e-06, + "loss": 0.7931111454963684, + "step": 3308 + }, + { + "epoch": 0.7624423963133641, + "grad_norm": 0.918004873184285, + "learning_rate": 1.4580931161087763e-06, + "loss": 0.8107850551605225, + "step": 3309 + }, + { + "epoch": 0.7626728110599078, + "grad_norm": 1.1251596055699022, + "learning_rate": 1.4577544247411431e-06, + "loss": 0.8211404085159302, + "step": 3310 + }, + { + "epoch": 0.7629032258064516, + "grad_norm": 1.1825093837600291, + "learning_rate": 1.457415666931061e-06, + "loss": 0.9861341714859009, + "step": 3311 + }, + { + "epoch": 0.7631336405529954, + "grad_norm": 1.0573079532917569, + "learning_rate": 1.4570768427277007e-06, + "loss": 0.8963409662246704, + "step": 3312 + }, + { + "epoch": 0.7633640552995392, + "grad_norm": 1.1183054914337, + "learning_rate": 1.4567379521802416e-06, + "loss": 0.7510147094726562, + "step": 3313 + }, + { + "epoch": 0.7635944700460829, + "grad_norm": 1.0312269750408198, + "learning_rate": 1.4563989953378734e-06, + "loss": 0.7761805057525635, + "step": 3314 + }, + { + "epoch": 0.7638248847926268, + "grad_norm": 0.782434581691777, + "learning_rate": 1.4560599722497953e-06, + "loss": 0.6202781200408936, + "step": 3315 + }, + { + "epoch": 0.7640552995391705, + "grad_norm": 0.9114320197488165, + "learning_rate": 1.4557208829652159e-06, + "loss": 0.711891770362854, + "step": 3316 + }, + { + "epoch": 0.7642857142857142, + "grad_norm": 1.0888571874972786, + "learning_rate": 1.4553817275333537e-06, + "loss": 0.8689517974853516, + "step": 3317 + }, + { + "epoch": 0.7645161290322581, + "grad_norm": 0.847547372029402, + "learning_rate": 1.4550425060034365e-06, + "loss": 0.7323688268661499, + "step": 3318 + }, + { + "epoch": 0.7647465437788018, + "grad_norm": 0.954006429800706, + "learning_rate": 1.4547032184247022e-06, + "loss": 0.8934407234191895, + "step": 3319 + }, + { + "epoch": 0.7649769585253456, + "grad_norm": 0.9830574702749578, + "learning_rate": 1.4543638648463975e-06, + "loss": 0.7729885578155518, + "step": 3320 + }, + { + "epoch": 0.7652073732718894, + "grad_norm": 0.9967355019103026, + "learning_rate": 1.454024445317779e-06, + "loss": 0.8962388038635254, + "step": 3321 + }, + { + "epoch": 0.7654377880184332, + "grad_norm": 0.8821073382766633, + "learning_rate": 1.4536849598881137e-06, + "loss": 0.8655213117599487, + "step": 3322 + }, + { + "epoch": 0.7656682027649769, + "grad_norm": 0.8780656658271131, + "learning_rate": 1.453345408606677e-06, + "loss": 0.6471779346466064, + "step": 3323 + }, + { + "epoch": 0.7658986175115208, + "grad_norm": 0.7335596828312507, + "learning_rate": 1.4530057915227545e-06, + "loss": 0.8665071129798889, + "step": 3324 + }, + { + "epoch": 0.7661290322580645, + "grad_norm": 1.054528188345679, + "learning_rate": 1.4526661086856407e-06, + "loss": 0.9504371285438538, + "step": 3325 + }, + { + "epoch": 0.7663594470046083, + "grad_norm": 1.017396914206461, + "learning_rate": 1.452326360144641e-06, + "loss": 0.8122013807296753, + "step": 3326 + }, + { + "epoch": 0.7665898617511521, + "grad_norm": 1.0019111601549837, + "learning_rate": 1.4519865459490687e-06, + "loss": 0.817001223564148, + "step": 3327 + }, + { + "epoch": 0.7668202764976959, + "grad_norm": 0.9387626004792055, + "learning_rate": 1.4516466661482474e-06, + "loss": 0.732322096824646, + "step": 3328 + }, + { + "epoch": 0.7670506912442396, + "grad_norm": 0.8844021324185192, + "learning_rate": 1.4513067207915106e-06, + "loss": 0.7961580157279968, + "step": 3329 + }, + { + "epoch": 0.7672811059907834, + "grad_norm": 0.9579783239612414, + "learning_rate": 1.4509667099282007e-06, + "loss": 0.7660717368125916, + "step": 3330 + }, + { + "epoch": 0.7675115207373272, + "grad_norm": 0.8487336367256668, + "learning_rate": 1.4506266336076698e-06, + "loss": 0.8279193639755249, + "step": 3331 + }, + { + "epoch": 0.7677419354838709, + "grad_norm": 0.8431407438554851, + "learning_rate": 1.4502864918792796e-06, + "loss": 0.7050153017044067, + "step": 3332 + }, + { + "epoch": 0.7679723502304148, + "grad_norm": 0.9386347952909049, + "learning_rate": 1.4499462847924013e-06, + "loss": 0.8146064877510071, + "step": 3333 + }, + { + "epoch": 0.7682027649769585, + "grad_norm": 0.8248232070769104, + "learning_rate": 1.4496060123964153e-06, + "loss": 0.8300814628601074, + "step": 3334 + }, + { + "epoch": 0.7684331797235023, + "grad_norm": 0.848400587593364, + "learning_rate": 1.4492656747407117e-06, + "loss": 0.8240403532981873, + "step": 3335 + }, + { + "epoch": 0.7686635944700461, + "grad_norm": 1.1661360506901004, + "learning_rate": 1.4489252718746908e-06, + "loss": 0.901625394821167, + "step": 3336 + }, + { + "epoch": 0.7688940092165899, + "grad_norm": 0.8620744709914054, + "learning_rate": 1.4485848038477604e-06, + "loss": 0.827139675617218, + "step": 3337 + }, + { + "epoch": 0.7691244239631336, + "grad_norm": 1.111541176491108, + "learning_rate": 1.4482442707093397e-06, + "loss": 0.7032946348190308, + "step": 3338 + }, + { + "epoch": 0.7693548387096775, + "grad_norm": 0.8506038004087974, + "learning_rate": 1.4479036725088564e-06, + "loss": 0.6805816888809204, + "step": 3339 + }, + { + "epoch": 0.7695852534562212, + "grad_norm": 0.8063208135295213, + "learning_rate": 1.447563009295748e-06, + "loss": 0.673591136932373, + "step": 3340 + }, + { + "epoch": 0.7698156682027649, + "grad_norm": 0.8116035277545482, + "learning_rate": 1.4472222811194614e-06, + "loss": 0.6513386964797974, + "step": 3341 + }, + { + "epoch": 0.7700460829493088, + "grad_norm": 0.7654089652768199, + "learning_rate": 1.4468814880294529e-06, + "loss": 0.7367297410964966, + "step": 3342 + }, + { + "epoch": 0.7702764976958525, + "grad_norm": 1.0405555538712603, + "learning_rate": 1.4465406300751878e-06, + "loss": 0.7393670082092285, + "step": 3343 + }, + { + "epoch": 0.7705069124423963, + "grad_norm": 0.7135144631405288, + "learning_rate": 1.4461997073061411e-06, + "loss": 0.7525930404663086, + "step": 3344 + }, + { + "epoch": 0.7707373271889401, + "grad_norm": 0.7583677101512988, + "learning_rate": 1.445858719771798e-06, + "loss": 0.6679942011833191, + "step": 3345 + }, + { + "epoch": 0.7709677419354839, + "grad_norm": 1.0903018310329022, + "learning_rate": 1.4455176675216518e-06, + "loss": 0.8440653085708618, + "step": 3346 + }, + { + "epoch": 0.7711981566820276, + "grad_norm": 0.9929368208299709, + "learning_rate": 1.4451765506052063e-06, + "loss": 0.8765773177146912, + "step": 3347 + }, + { + "epoch": 0.7714285714285715, + "grad_norm": 0.9183070258317377, + "learning_rate": 1.4448353690719732e-06, + "loss": 0.7309157848358154, + "step": 3348 + }, + { + "epoch": 0.7716589861751152, + "grad_norm": 0.8130162073408548, + "learning_rate": 1.4444941229714758e-06, + "loss": 0.8043340444564819, + "step": 3349 + }, + { + "epoch": 0.771889400921659, + "grad_norm": 0.8488386913998837, + "learning_rate": 1.4441528123532443e-06, + "loss": 0.6528831124305725, + "step": 3350 + }, + { + "epoch": 0.7721198156682028, + "grad_norm": 0.7632405080168834, + "learning_rate": 1.4438114372668202e-06, + "loss": 0.7973155975341797, + "step": 3351 + }, + { + "epoch": 0.7723502304147466, + "grad_norm": 0.8366450624031991, + "learning_rate": 1.443469997761754e-06, + "loss": 0.940142810344696, + "step": 3352 + }, + { + "epoch": 0.7725806451612903, + "grad_norm": 1.0048812991349738, + "learning_rate": 1.443128493887604e-06, + "loss": 0.7936829328536987, + "step": 3353 + }, + { + "epoch": 0.772811059907834, + "grad_norm": 0.8583665989338275, + "learning_rate": 1.44278692569394e-06, + "loss": 0.8369218111038208, + "step": 3354 + }, + { + "epoch": 0.7730414746543779, + "grad_norm": 1.313808566044562, + "learning_rate": 1.4424452932303398e-06, + "loss": 0.9305802583694458, + "step": 3355 + }, + { + "epoch": 0.7732718894009216, + "grad_norm": 0.8862565116465879, + "learning_rate": 1.4421035965463916e-06, + "loss": 0.913454532623291, + "step": 3356 + }, + { + "epoch": 0.7735023041474655, + "grad_norm": 1.0772806984700294, + "learning_rate": 1.4417618356916912e-06, + "loss": 0.8552114963531494, + "step": 3357 + }, + { + "epoch": 0.7737327188940092, + "grad_norm": 1.080720564237515, + "learning_rate": 1.4414200107158452e-06, + "loss": 0.8674488067626953, + "step": 3358 + }, + { + "epoch": 0.773963133640553, + "grad_norm": 1.0999604158561203, + "learning_rate": 1.441078121668469e-06, + "loss": 0.9142898321151733, + "step": 3359 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 1.0964749277789683, + "learning_rate": 1.4407361685991872e-06, + "loss": 0.8258639574050903, + "step": 3360 + }, + { + "epoch": 0.7744239631336406, + "grad_norm": 1.062716295700188, + "learning_rate": 1.4403941515576343e-06, + "loss": 0.773646354675293, + "step": 3361 + }, + { + "epoch": 0.7746543778801843, + "grad_norm": 1.1397221950146432, + "learning_rate": 1.440052070593453e-06, + "loss": 0.9481985569000244, + "step": 3362 + }, + { + "epoch": 0.7748847926267282, + "grad_norm": 1.0332478363266029, + "learning_rate": 1.4397099257562965e-06, + "loss": 0.7915977239608765, + "step": 3363 + }, + { + "epoch": 0.7751152073732719, + "grad_norm": 1.057946693927254, + "learning_rate": 1.4393677170958261e-06, + "loss": 0.887650191783905, + "step": 3364 + }, + { + "epoch": 0.7753456221198156, + "grad_norm": 0.8250912024788589, + "learning_rate": 1.4390254446617137e-06, + "loss": 0.8516546487808228, + "step": 3365 + }, + { + "epoch": 0.7755760368663595, + "grad_norm": 0.9895329351481195, + "learning_rate": 1.4386831085036386e-06, + "loss": 0.8076090812683105, + "step": 3366 + }, + { + "epoch": 0.7758064516129032, + "grad_norm": 0.9203902257484836, + "learning_rate": 1.4383407086712913e-06, + "loss": 0.7480059862136841, + "step": 3367 + }, + { + "epoch": 0.776036866359447, + "grad_norm": 1.1101542314671893, + "learning_rate": 1.4379982452143704e-06, + "loss": 0.8586190938949585, + "step": 3368 + }, + { + "epoch": 0.7762672811059907, + "grad_norm": 0.9197679868181698, + "learning_rate": 1.4376557181825842e-06, + "loss": 0.7581472396850586, + "step": 3369 + }, + { + "epoch": 0.7764976958525346, + "grad_norm": 1.2064630913320733, + "learning_rate": 1.4373131276256495e-06, + "loss": 0.7482568621635437, + "step": 3370 + }, + { + "epoch": 0.7767281105990783, + "grad_norm": 1.2204489088505164, + "learning_rate": 1.4369704735932935e-06, + "loss": 0.8822590112686157, + "step": 3371 + }, + { + "epoch": 0.7769585253456222, + "grad_norm": 0.9171528830764245, + "learning_rate": 1.4366277561352517e-06, + "loss": 0.7762279510498047, + "step": 3372 + }, + { + "epoch": 0.7771889400921659, + "grad_norm": 0.9649262790570658, + "learning_rate": 1.4362849753012692e-06, + "loss": 0.8059147596359253, + "step": 3373 + }, + { + "epoch": 0.7774193548387097, + "grad_norm": 1.0529652703364816, + "learning_rate": 1.4359421311411e-06, + "loss": 0.778538703918457, + "step": 3374 + }, + { + "epoch": 0.7776497695852534, + "grad_norm": 1.1587212424703164, + "learning_rate": 1.4355992237045077e-06, + "loss": 0.9422975778579712, + "step": 3375 + }, + { + "epoch": 0.7778801843317973, + "grad_norm": 1.0109308621512796, + "learning_rate": 1.4352562530412645e-06, + "loss": 0.7437118291854858, + "step": 3376 + }, + { + "epoch": 0.778110599078341, + "grad_norm": 0.8961203034935337, + "learning_rate": 1.4349132192011525e-06, + "loss": 0.6935930252075195, + "step": 3377 + }, + { + "epoch": 0.7783410138248847, + "grad_norm": 1.1629979064489353, + "learning_rate": 1.4345701222339628e-06, + "loss": 0.7797117829322815, + "step": 3378 + }, + { + "epoch": 0.7785714285714286, + "grad_norm": 1.0591342199366531, + "learning_rate": 1.434226962189495e-06, + "loss": 0.8795931339263916, + "step": 3379 + }, + { + "epoch": 0.7788018433179723, + "grad_norm": 1.071603440273884, + "learning_rate": 1.433883739117558e-06, + "loss": 0.8936992287635803, + "step": 3380 + }, + { + "epoch": 0.7790322580645161, + "grad_norm": 1.0412928095771106, + "learning_rate": 1.4335404530679708e-06, + "loss": 0.9142701625823975, + "step": 3381 + }, + { + "epoch": 0.7792626728110599, + "grad_norm": 1.0966643259622728, + "learning_rate": 1.4331971040905613e-06, + "loss": 0.8996907472610474, + "step": 3382 + }, + { + "epoch": 0.7794930875576037, + "grad_norm": 1.020250921022328, + "learning_rate": 1.4328536922351654e-06, + "loss": 0.9645330905914307, + "step": 3383 + }, + { + "epoch": 0.7797235023041474, + "grad_norm": 0.7173807290755059, + "learning_rate": 1.4325102175516289e-06, + "loss": 0.5122036933898926, + "step": 3384 + }, + { + "epoch": 0.7799539170506913, + "grad_norm": 0.8487864939918429, + "learning_rate": 1.432166680089807e-06, + "loss": 0.6556990742683411, + "step": 3385 + }, + { + "epoch": 0.780184331797235, + "grad_norm": 0.7980125905366343, + "learning_rate": 1.4318230798995634e-06, + "loss": 0.6642920970916748, + "step": 3386 + }, + { + "epoch": 0.7804147465437788, + "grad_norm": 1.1205844690065134, + "learning_rate": 1.4314794170307718e-06, + "loss": 0.9373915195465088, + "step": 3387 + }, + { + "epoch": 0.7806451612903226, + "grad_norm": 1.1583496011366634, + "learning_rate": 1.4311356915333139e-06, + "loss": 0.8295063972473145, + "step": 3388 + }, + { + "epoch": 0.7808755760368664, + "grad_norm": 1.0075666840710995, + "learning_rate": 1.4307919034570809e-06, + "loss": 0.8167035579681396, + "step": 3389 + }, + { + "epoch": 0.7811059907834101, + "grad_norm": 1.045465756545736, + "learning_rate": 1.4304480528519736e-06, + "loss": 0.8444087505340576, + "step": 3390 + }, + { + "epoch": 0.7813364055299539, + "grad_norm": 0.9731986846355507, + "learning_rate": 1.4301041397679012e-06, + "loss": 0.7753941416740417, + "step": 3391 + }, + { + "epoch": 0.7815668202764977, + "grad_norm": 1.0117493931274548, + "learning_rate": 1.4297601642547824e-06, + "loss": 0.7885915040969849, + "step": 3392 + }, + { + "epoch": 0.7817972350230414, + "grad_norm": 0.9902641403084854, + "learning_rate": 1.4294161263625444e-06, + "loss": 0.730733335018158, + "step": 3393 + }, + { + "epoch": 0.7820276497695853, + "grad_norm": 0.8781208509199174, + "learning_rate": 1.4290720261411241e-06, + "loss": 0.8505427837371826, + "step": 3394 + }, + { + "epoch": 0.782258064516129, + "grad_norm": 0.9435888376510791, + "learning_rate": 1.4287278636404676e-06, + "loss": 0.7370787858963013, + "step": 3395 + }, + { + "epoch": 0.7824884792626728, + "grad_norm": 0.8683550268652552, + "learning_rate": 1.428383638910529e-06, + "loss": 0.6776250600814819, + "step": 3396 + }, + { + "epoch": 0.7827188940092166, + "grad_norm": 1.158711583120319, + "learning_rate": 1.4280393520012726e-06, + "loss": 0.8878101706504822, + "step": 3397 + }, + { + "epoch": 0.7829493087557604, + "grad_norm": 1.0028929146104306, + "learning_rate": 1.427695002962671e-06, + "loss": 0.789238691329956, + "step": 3398 + }, + { + "epoch": 0.7831797235023041, + "grad_norm": 1.0382561381902518, + "learning_rate": 1.4273505918447052e-06, + "loss": 0.772524356842041, + "step": 3399 + }, + { + "epoch": 0.783410138248848, + "grad_norm": 0.8483839499127978, + "learning_rate": 1.4270061186973673e-06, + "loss": 0.682374119758606, + "step": 3400 + }, + { + "epoch": 0.7836405529953917, + "grad_norm": 0.9396222987314208, + "learning_rate": 1.4266615835706566e-06, + "loss": 0.874775767326355, + "step": 3401 + }, + { + "epoch": 0.7838709677419354, + "grad_norm": 1.3780294752863322, + "learning_rate": 1.4263169865145816e-06, + "loss": 0.9141736626625061, + "step": 3402 + }, + { + "epoch": 0.7841013824884793, + "grad_norm": 1.0849695477918648, + "learning_rate": 1.4259723275791603e-06, + "loss": 0.8533145189285278, + "step": 3403 + }, + { + "epoch": 0.784331797235023, + "grad_norm": 0.9340136683520418, + "learning_rate": 1.4256276068144198e-06, + "loss": 0.7920266389846802, + "step": 3404 + }, + { + "epoch": 0.7845622119815668, + "grad_norm": 0.9462841256440514, + "learning_rate": 1.4252828242703957e-06, + "loss": 0.7822731733322144, + "step": 3405 + }, + { + "epoch": 0.7847926267281106, + "grad_norm": 0.9890597976168253, + "learning_rate": 1.4249379799971324e-06, + "loss": 0.7103791832923889, + "step": 3406 + }, + { + "epoch": 0.7850230414746544, + "grad_norm": 1.0298833059227221, + "learning_rate": 1.4245930740446841e-06, + "loss": 0.7857639789581299, + "step": 3407 + }, + { + "epoch": 0.7852534562211981, + "grad_norm": 1.1065594183312877, + "learning_rate": 1.4242481064631134e-06, + "loss": 0.8069730997085571, + "step": 3408 + }, + { + "epoch": 0.785483870967742, + "grad_norm": 1.0472042802008708, + "learning_rate": 1.4239030773024912e-06, + "loss": 0.8758031129837036, + "step": 3409 + }, + { + "epoch": 0.7857142857142857, + "grad_norm": 1.015785019886056, + "learning_rate": 1.4235579866128983e-06, + "loss": 0.895712673664093, + "step": 3410 + }, + { + "epoch": 0.7859447004608295, + "grad_norm": 0.9442660407745113, + "learning_rate": 1.423212834444425e-06, + "loss": 0.7904561758041382, + "step": 3411 + }, + { + "epoch": 0.7861751152073733, + "grad_norm": 1.0957623852355893, + "learning_rate": 1.4228676208471685e-06, + "loss": 0.9322203993797302, + "step": 3412 + }, + { + "epoch": 0.7864055299539171, + "grad_norm": 0.7668753687506044, + "learning_rate": 1.422522345871237e-06, + "loss": 0.9693628549575806, + "step": 3413 + }, + { + "epoch": 0.7866359447004608, + "grad_norm": 0.8417164970136307, + "learning_rate": 1.4221770095667462e-06, + "loss": 0.6737014651298523, + "step": 3414 + }, + { + "epoch": 0.7868663594470046, + "grad_norm": 1.1466654292657967, + "learning_rate": 1.4218316119838215e-06, + "loss": 0.8682050108909607, + "step": 3415 + }, + { + "epoch": 0.7870967741935484, + "grad_norm": 1.058324160083765, + "learning_rate": 1.4214861531725966e-06, + "loss": 0.7920347452163696, + "step": 3416 + }, + { + "epoch": 0.7873271889400921, + "grad_norm": 1.0147867893383273, + "learning_rate": 1.4211406331832144e-06, + "loss": 0.8330510854721069, + "step": 3417 + }, + { + "epoch": 0.787557603686636, + "grad_norm": 0.8802491842183522, + "learning_rate": 1.4207950520658272e-06, + "loss": 0.8314074873924255, + "step": 3418 + }, + { + "epoch": 0.7877880184331797, + "grad_norm": 1.069355954495663, + "learning_rate": 1.420449409870595e-06, + "loss": 0.7045331001281738, + "step": 3419 + }, + { + "epoch": 0.7880184331797235, + "grad_norm": 0.9484390721895568, + "learning_rate": 1.4201037066476876e-06, + "loss": 0.7825411558151245, + "step": 3420 + }, + { + "epoch": 0.7882488479262673, + "grad_norm": 0.86611108370867, + "learning_rate": 1.4197579424472834e-06, + "loss": 0.6960075497627258, + "step": 3421 + }, + { + "epoch": 0.7884792626728111, + "grad_norm": 1.038692849963906, + "learning_rate": 1.4194121173195694e-06, + "loss": 0.8366748094558716, + "step": 3422 + }, + { + "epoch": 0.7887096774193548, + "grad_norm": 0.8605441828045868, + "learning_rate": 1.4190662313147419e-06, + "loss": 0.8859039545059204, + "step": 3423 + }, + { + "epoch": 0.7889400921658987, + "grad_norm": 1.0572382908005622, + "learning_rate": 1.4187202844830057e-06, + "loss": 0.7098245620727539, + "step": 3424 + }, + { + "epoch": 0.7891705069124424, + "grad_norm": 0.9126448008384304, + "learning_rate": 1.4183742768745743e-06, + "loss": 0.7410455942153931, + "step": 3425 + }, + { + "epoch": 0.7894009216589861, + "grad_norm": 0.8007200450015498, + "learning_rate": 1.4180282085396706e-06, + "loss": 0.7414010763168335, + "step": 3426 + }, + { + "epoch": 0.78963133640553, + "grad_norm": 1.090062212374054, + "learning_rate": 1.417682079528526e-06, + "loss": 0.9043526649475098, + "step": 3427 + }, + { + "epoch": 0.7898617511520737, + "grad_norm": 0.8510201071166715, + "learning_rate": 1.4173358898913804e-06, + "loss": 0.7709499597549438, + "step": 3428 + }, + { + "epoch": 0.7900921658986175, + "grad_norm": 1.0829385459770577, + "learning_rate": 1.416989639678483e-06, + "loss": 0.7499940395355225, + "step": 3429 + }, + { + "epoch": 0.7903225806451613, + "grad_norm": 0.766744185733082, + "learning_rate": 1.4166433289400911e-06, + "loss": 0.7401680946350098, + "step": 3430 + }, + { + "epoch": 0.7905529953917051, + "grad_norm": 0.8802012939982503, + "learning_rate": 1.4162969577264718e-06, + "loss": 1.0132567882537842, + "step": 3431 + }, + { + "epoch": 0.7907834101382488, + "grad_norm": 0.9758763490715631, + "learning_rate": 1.4159505260879004e-06, + "loss": 0.8438389301300049, + "step": 3432 + }, + { + "epoch": 0.7910138248847927, + "grad_norm": 1.2075583274029744, + "learning_rate": 1.4156040340746603e-06, + "loss": 0.9149703979492188, + "step": 3433 + }, + { + "epoch": 0.7912442396313364, + "grad_norm": 1.4960555955584764, + "learning_rate": 1.4152574817370451e-06, + "loss": 0.9141047596931458, + "step": 3434 + }, + { + "epoch": 0.7914746543778802, + "grad_norm": 0.924125511762228, + "learning_rate": 1.414910869125356e-06, + "loss": 0.6896570324897766, + "step": 3435 + }, + { + "epoch": 0.791705069124424, + "grad_norm": 0.9277571830040596, + "learning_rate": 1.4145641962899035e-06, + "loss": 0.742916464805603, + "step": 3436 + }, + { + "epoch": 0.7919354838709678, + "grad_norm": 1.0041274553911197, + "learning_rate": 1.414217463281007e-06, + "loss": 0.9315029382705688, + "step": 3437 + }, + { + "epoch": 0.7921658986175115, + "grad_norm": 0.9532695013501692, + "learning_rate": 1.4138706701489942e-06, + "loss": 0.7645175457000732, + "step": 3438 + }, + { + "epoch": 0.7923963133640552, + "grad_norm": 1.0166687927137474, + "learning_rate": 1.413523816944201e-06, + "loss": 0.8253934383392334, + "step": 3439 + }, + { + "epoch": 0.7926267281105991, + "grad_norm": 1.055807296618818, + "learning_rate": 1.4131769037169736e-06, + "loss": 0.8650136590003967, + "step": 3440 + }, + { + "epoch": 0.7928571428571428, + "grad_norm": 1.0239985264965783, + "learning_rate": 1.4128299305176654e-06, + "loss": 0.7453975677490234, + "step": 3441 + }, + { + "epoch": 0.7930875576036867, + "grad_norm": 1.1689392671270256, + "learning_rate": 1.4124828973966392e-06, + "loss": 0.9121813774108887, + "step": 3442 + }, + { + "epoch": 0.7933179723502304, + "grad_norm": 1.16007005259146, + "learning_rate": 1.4121358044042667e-06, + "loss": 0.9097952842712402, + "step": 3443 + }, + { + "epoch": 0.7935483870967742, + "grad_norm": 0.9263687778783555, + "learning_rate": 1.4117886515909277e-06, + "loss": 0.7185770273208618, + "step": 3444 + }, + { + "epoch": 0.793778801843318, + "grad_norm": 0.9816189958888628, + "learning_rate": 1.4114414390070111e-06, + "loss": 0.8192715644836426, + "step": 3445 + }, + { + "epoch": 0.7940092165898618, + "grad_norm": 0.8830372557771754, + "learning_rate": 1.4110941667029143e-06, + "loss": 0.7864251136779785, + "step": 3446 + }, + { + "epoch": 0.7942396313364055, + "grad_norm": 0.9262266668392852, + "learning_rate": 1.4107468347290431e-06, + "loss": 0.7433357834815979, + "step": 3447 + }, + { + "epoch": 0.7944700460829494, + "grad_norm": 0.8826486406616629, + "learning_rate": 1.4103994431358133e-06, + "loss": 0.8196350336074829, + "step": 3448 + }, + { + "epoch": 0.7947004608294931, + "grad_norm": 1.0379031741076927, + "learning_rate": 1.410051991973647e-06, + "loss": 0.7698987126350403, + "step": 3449 + }, + { + "epoch": 0.7949308755760369, + "grad_norm": 1.228700210939763, + "learning_rate": 1.4097044812929776e-06, + "loss": 0.9404128789901733, + "step": 3450 + }, + { + "epoch": 0.7951612903225806, + "grad_norm": 0.9114628140508482, + "learning_rate": 1.4093569111442443e-06, + "loss": 0.827290952205658, + "step": 3451 + }, + { + "epoch": 0.7953917050691244, + "grad_norm": 1.0612294009838623, + "learning_rate": 1.4090092815778976e-06, + "loss": 0.8126389384269714, + "step": 3452 + }, + { + "epoch": 0.7956221198156682, + "grad_norm": 0.9598694992596972, + "learning_rate": 1.4086615926443953e-06, + "loss": 0.7439650297164917, + "step": 3453 + }, + { + "epoch": 0.7958525345622119, + "grad_norm": 0.9952168701899716, + "learning_rate": 1.4083138443942036e-06, + "loss": 0.7505590915679932, + "step": 3454 + }, + { + "epoch": 0.7960829493087558, + "grad_norm": 0.8299073365871691, + "learning_rate": 1.407966036877798e-06, + "loss": 0.7070168256759644, + "step": 3455 + }, + { + "epoch": 0.7963133640552995, + "grad_norm": 0.9422601313607071, + "learning_rate": 1.4076181701456623e-06, + "loss": 0.8271987438201904, + "step": 3456 + }, + { + "epoch": 0.7965437788018433, + "grad_norm": 0.8558890366072001, + "learning_rate": 1.4072702442482886e-06, + "loss": 0.72886061668396, + "step": 3457 + }, + { + "epoch": 0.7967741935483871, + "grad_norm": 1.1355616522222822, + "learning_rate": 1.4069222592361784e-06, + "loss": 0.838603138923645, + "step": 3458 + }, + { + "epoch": 0.7970046082949309, + "grad_norm": 1.1314183210174298, + "learning_rate": 1.4065742151598408e-06, + "loss": 0.9829634428024292, + "step": 3459 + }, + { + "epoch": 0.7972350230414746, + "grad_norm": 1.0528251173572156, + "learning_rate": 1.406226112069794e-06, + "loss": 0.8269632458686829, + "step": 3460 + }, + { + "epoch": 0.7974654377880185, + "grad_norm": 1.0290510208624037, + "learning_rate": 1.405877950016565e-06, + "loss": 0.7234654426574707, + "step": 3461 + }, + { + "epoch": 0.7976958525345622, + "grad_norm": 0.89079385428478, + "learning_rate": 1.4055297290506887e-06, + "loss": 0.7843908071517944, + "step": 3462 + }, + { + "epoch": 0.7979262672811059, + "grad_norm": 0.8247890912721374, + "learning_rate": 1.4051814492227094e-06, + "loss": 0.7294371128082275, + "step": 3463 + }, + { + "epoch": 0.7981566820276498, + "grad_norm": 1.1727486785997119, + "learning_rate": 1.4048331105831787e-06, + "loss": 0.8805780410766602, + "step": 3464 + }, + { + "epoch": 0.7983870967741935, + "grad_norm": 0.9922079942807702, + "learning_rate": 1.404484713182658e-06, + "loss": 0.6933708190917969, + "step": 3465 + }, + { + "epoch": 0.7986175115207373, + "grad_norm": 1.0638183747733119, + "learning_rate": 1.404136257071717e-06, + "loss": 0.8720458745956421, + "step": 3466 + }, + { + "epoch": 0.7988479262672811, + "grad_norm": 1.1404138575251217, + "learning_rate": 1.403787742300933e-06, + "loss": 0.7675988674163818, + "step": 3467 + }, + { + "epoch": 0.7990783410138249, + "grad_norm": 1.0188982193786602, + "learning_rate": 1.403439168920893e-06, + "loss": 0.7630051374435425, + "step": 3468 + }, + { + "epoch": 0.7993087557603686, + "grad_norm": 0.9607713149142998, + "learning_rate": 1.4030905369821914e-06, + "loss": 0.9195173978805542, + "step": 3469 + }, + { + "epoch": 0.7995391705069125, + "grad_norm": 0.966603725031027, + "learning_rate": 1.402741846535432e-06, + "loss": 0.9347431659698486, + "step": 3470 + }, + { + "epoch": 0.7997695852534562, + "grad_norm": 1.0423944793385256, + "learning_rate": 1.4023930976312271e-06, + "loss": 0.7812551259994507, + "step": 3471 + }, + { + "epoch": 0.8, + "grad_norm": 1.0230073164776583, + "learning_rate": 1.4020442903201963e-06, + "loss": 0.7655330896377563, + "step": 3472 + }, + { + "epoch": 0.8002304147465438, + "grad_norm": 1.2791975931288466, + "learning_rate": 1.4016954246529694e-06, + "loss": 0.7543904185295105, + "step": 3473 + }, + { + "epoch": 0.8004608294930876, + "grad_norm": 0.8246426244987128, + "learning_rate": 1.4013465006801833e-06, + "loss": 0.9343980550765991, + "step": 3474 + }, + { + "epoch": 0.8006912442396313, + "grad_norm": 1.1458439395589735, + "learning_rate": 1.4009975184524838e-06, + "loss": 0.7366182208061218, + "step": 3475 + }, + { + "epoch": 0.8009216589861751, + "grad_norm": 1.0109168818205314, + "learning_rate": 1.4006484780205254e-06, + "loss": 0.7028899192810059, + "step": 3476 + }, + { + "epoch": 0.8011520737327189, + "grad_norm": 1.1092959183189253, + "learning_rate": 1.4002993794349708e-06, + "loss": 0.9259153604507446, + "step": 3477 + }, + { + "epoch": 0.8013824884792626, + "grad_norm": 1.091442085001374, + "learning_rate": 1.3999502227464914e-06, + "loss": 0.7263842225074768, + "step": 3478 + }, + { + "epoch": 0.8016129032258065, + "grad_norm": 0.9964781390280828, + "learning_rate": 1.3996010080057664e-06, + "loss": 0.8177748918533325, + "step": 3479 + }, + { + "epoch": 0.8018433179723502, + "grad_norm": 1.080145531043834, + "learning_rate": 1.3992517352634842e-06, + "loss": 0.8526895046234131, + "step": 3480 + }, + { + "epoch": 0.802073732718894, + "grad_norm": 1.031018616296166, + "learning_rate": 1.398902404570341e-06, + "loss": 0.7914575338363647, + "step": 3481 + }, + { + "epoch": 0.8023041474654378, + "grad_norm": 0.816157508913072, + "learning_rate": 1.398553015977042e-06, + "loss": 0.7546013593673706, + "step": 3482 + }, + { + "epoch": 0.8025345622119816, + "grad_norm": 1.0408293581677805, + "learning_rate": 1.3982035695343005e-06, + "loss": 0.7250038385391235, + "step": 3483 + }, + { + "epoch": 0.8027649769585253, + "grad_norm": 1.023275477136697, + "learning_rate": 1.3978540652928376e-06, + "loss": 0.8650141954421997, + "step": 3484 + }, + { + "epoch": 0.8029953917050692, + "grad_norm": 0.9633891302798026, + "learning_rate": 1.3975045033033838e-06, + "loss": 0.8020066022872925, + "step": 3485 + }, + { + "epoch": 0.8032258064516129, + "grad_norm": 0.9146174916063312, + "learning_rate": 1.3971548836166782e-06, + "loss": 0.7376772165298462, + "step": 3486 + }, + { + "epoch": 0.8034562211981566, + "grad_norm": 0.9278800283054291, + "learning_rate": 1.3968052062834665e-06, + "loss": 0.8440769910812378, + "step": 3487 + }, + { + "epoch": 0.8036866359447005, + "grad_norm": 0.8964312010034259, + "learning_rate": 1.3964554713545047e-06, + "loss": 0.7886836528778076, + "step": 3488 + }, + { + "epoch": 0.8039170506912442, + "grad_norm": 0.9177920963823754, + "learning_rate": 1.396105678880556e-06, + "loss": 0.9167575836181641, + "step": 3489 + }, + { + "epoch": 0.804147465437788, + "grad_norm": 0.8367032180339474, + "learning_rate": 1.3957558289123922e-06, + "loss": 0.6761677861213684, + "step": 3490 + }, + { + "epoch": 0.8043778801843318, + "grad_norm": 0.9716984065235628, + "learning_rate": 1.3954059215007938e-06, + "loss": 0.7775592803955078, + "step": 3491 + }, + { + "epoch": 0.8046082949308756, + "grad_norm": 1.00005526663364, + "learning_rate": 1.3950559566965494e-06, + "loss": 0.8127217292785645, + "step": 3492 + }, + { + "epoch": 0.8048387096774193, + "grad_norm": 1.007116682040637, + "learning_rate": 1.394705934550456e-06, + "loss": 0.8134229779243469, + "step": 3493 + }, + { + "epoch": 0.8050691244239632, + "grad_norm": 1.3224030787110577, + "learning_rate": 1.3943558551133186e-06, + "loss": 0.8853167295455933, + "step": 3494 + }, + { + "epoch": 0.8052995391705069, + "grad_norm": 1.0544152264027669, + "learning_rate": 1.3940057184359506e-06, + "loss": 0.8024332523345947, + "step": 3495 + }, + { + "epoch": 0.8055299539170507, + "grad_norm": 0.6779010833647611, + "learning_rate": 1.3936555245691745e-06, + "loss": 0.7581099271774292, + "step": 3496 + }, + { + "epoch": 0.8057603686635945, + "grad_norm": 1.0509729333579008, + "learning_rate": 1.3933052735638203e-06, + "loss": 0.979412317276001, + "step": 3497 + }, + { + "epoch": 0.8059907834101383, + "grad_norm": 0.9816833973848147, + "learning_rate": 1.392954965470726e-06, + "loss": 0.7917830944061279, + "step": 3498 + }, + { + "epoch": 0.806221198156682, + "grad_norm": 0.9622725908619084, + "learning_rate": 1.392604600340739e-06, + "loss": 0.8565326929092407, + "step": 3499 + }, + { + "epoch": 0.8064516129032258, + "grad_norm": 1.0170451339424116, + "learning_rate": 1.3922541782247136e-06, + "loss": 0.7276358604431152, + "step": 3500 + }, + { + "epoch": 0.8066820276497696, + "grad_norm": 0.8351645839157906, + "learning_rate": 1.3919036991735138e-06, + "loss": 0.734528660774231, + "step": 3501 + }, + { + "epoch": 0.8069124423963133, + "grad_norm": 1.1746648423168138, + "learning_rate": 1.391553163238011e-06, + "loss": 0.8786039352416992, + "step": 3502 + }, + { + "epoch": 0.8071428571428572, + "grad_norm": 1.1050955424788658, + "learning_rate": 1.3912025704690844e-06, + "loss": 0.9509482383728027, + "step": 3503 + }, + { + "epoch": 0.8073732718894009, + "grad_norm": 0.8741751886687131, + "learning_rate": 1.3908519209176225e-06, + "loss": 0.7188615202903748, + "step": 3504 + }, + { + "epoch": 0.8076036866359447, + "grad_norm": 1.0307846021250762, + "learning_rate": 1.3905012146345221e-06, + "loss": 0.7681115865707397, + "step": 3505 + }, + { + "epoch": 0.8078341013824885, + "grad_norm": 1.0988034793572021, + "learning_rate": 1.3901504516706874e-06, + "loss": 0.8835415840148926, + "step": 3506 + }, + { + "epoch": 0.8080645161290323, + "grad_norm": 1.0724177836810997, + "learning_rate": 1.389799632077031e-06, + "loss": 0.8179003000259399, + "step": 3507 + }, + { + "epoch": 0.808294930875576, + "grad_norm": 1.1244187286361234, + "learning_rate": 1.3894487559044742e-06, + "loss": 0.9690247774124146, + "step": 3508 + }, + { + "epoch": 0.8085253456221199, + "grad_norm": 0.9601740737567672, + "learning_rate": 1.389097823203946e-06, + "loss": 0.9759812951087952, + "step": 3509 + }, + { + "epoch": 0.8087557603686636, + "grad_norm": 0.8953376224758026, + "learning_rate": 1.3887468340263838e-06, + "loss": 0.6649112105369568, + "step": 3510 + }, + { + "epoch": 0.8089861751152074, + "grad_norm": 0.8803647716437188, + "learning_rate": 1.388395788422733e-06, + "loss": 0.7824583053588867, + "step": 3511 + }, + { + "epoch": 0.8092165898617512, + "grad_norm": 1.0776551292843717, + "learning_rate": 1.3880446864439482e-06, + "loss": 0.8226176500320435, + "step": 3512 + }, + { + "epoch": 0.8094470046082949, + "grad_norm": 1.0775758718001336, + "learning_rate": 1.3876935281409904e-06, + "loss": 0.7708876729011536, + "step": 3513 + }, + { + "epoch": 0.8096774193548387, + "grad_norm": 1.1275141981575327, + "learning_rate": 1.3873423135648303e-06, + "loss": 0.7162825465202332, + "step": 3514 + }, + { + "epoch": 0.8099078341013825, + "grad_norm": 1.1973823780619761, + "learning_rate": 1.3869910427664464e-06, + "loss": 0.815816342830658, + "step": 3515 + }, + { + "epoch": 0.8101382488479263, + "grad_norm": 1.0491570029475803, + "learning_rate": 1.3866397157968248e-06, + "loss": 0.9166251420974731, + "step": 3516 + }, + { + "epoch": 0.81036866359447, + "grad_norm": 1.185963303947227, + "learning_rate": 1.3862883327069606e-06, + "loss": 0.9193897843360901, + "step": 3517 + }, + { + "epoch": 0.8105990783410139, + "grad_norm": 1.1492579516601074, + "learning_rate": 1.3859368935478557e-06, + "loss": 0.9019489288330078, + "step": 3518 + }, + { + "epoch": 0.8108294930875576, + "grad_norm": 1.0706438739080621, + "learning_rate": 1.3855853983705222e-06, + "loss": 0.8616153597831726, + "step": 3519 + }, + { + "epoch": 0.8110599078341014, + "grad_norm": 0.9368530229676858, + "learning_rate": 1.3852338472259782e-06, + "loss": 0.8898462057113647, + "step": 3520 + }, + { + "epoch": 0.8112903225806452, + "grad_norm": 0.9891797921278073, + "learning_rate": 1.3848822401652513e-06, + "loss": 0.770263135433197, + "step": 3521 + }, + { + "epoch": 0.811520737327189, + "grad_norm": 0.950594228231774, + "learning_rate": 1.384530577239377e-06, + "loss": 0.7524563074111938, + "step": 3522 + }, + { + "epoch": 0.8117511520737327, + "grad_norm": 0.8975349550091929, + "learning_rate": 1.3841788584993981e-06, + "loss": 0.776715874671936, + "step": 3523 + }, + { + "epoch": 0.8119815668202764, + "grad_norm": 0.6412822466784485, + "learning_rate": 1.3838270839963666e-06, + "loss": 0.7165439128875732, + "step": 3524 + }, + { + "epoch": 0.8122119815668203, + "grad_norm": 1.0082147827954213, + "learning_rate": 1.383475253781342e-06, + "loss": 0.7641004323959351, + "step": 3525 + }, + { + "epoch": 0.812442396313364, + "grad_norm": 0.9278762834298543, + "learning_rate": 1.3831233679053921e-06, + "loss": 0.7493933439254761, + "step": 3526 + }, + { + "epoch": 0.8126728110599079, + "grad_norm": 1.1064599998463516, + "learning_rate": 1.3827714264195924e-06, + "loss": 0.7981607913970947, + "step": 3527 + }, + { + "epoch": 0.8129032258064516, + "grad_norm": 1.2555949352929368, + "learning_rate": 1.3824194293750272e-06, + "loss": 0.9130103588104248, + "step": 3528 + }, + { + "epoch": 0.8131336405529954, + "grad_norm": 1.0192840808161379, + "learning_rate": 1.3820673768227878e-06, + "loss": 0.7208644151687622, + "step": 3529 + }, + { + "epoch": 0.8133640552995391, + "grad_norm": 0.9880323858602741, + "learning_rate": 1.3817152688139745e-06, + "loss": 0.9134006500244141, + "step": 3530 + }, + { + "epoch": 0.813594470046083, + "grad_norm": 0.836575472485664, + "learning_rate": 1.381363105399695e-06, + "loss": 0.7383376359939575, + "step": 3531 + }, + { + "epoch": 0.8138248847926267, + "grad_norm": 1.4743208995655537, + "learning_rate": 1.381010886631066e-06, + "loss": 0.9143035411834717, + "step": 3532 + }, + { + "epoch": 0.8140552995391706, + "grad_norm": 0.8030889519622723, + "learning_rate": 1.3806586125592107e-06, + "loss": 0.7972506284713745, + "step": 3533 + }, + { + "epoch": 0.8142857142857143, + "grad_norm": 0.9706054308316248, + "learning_rate": 1.380306283235262e-06, + "loss": 0.8999859094619751, + "step": 3534 + }, + { + "epoch": 0.8145161290322581, + "grad_norm": 1.4136312048518, + "learning_rate": 1.37995389871036e-06, + "loss": 0.7759672999382019, + "step": 3535 + }, + { + "epoch": 0.8147465437788018, + "grad_norm": 0.8852561621502252, + "learning_rate": 1.3796014590356522e-06, + "loss": 0.7915023565292358, + "step": 3536 + }, + { + "epoch": 0.8149769585253456, + "grad_norm": 1.0626460640648143, + "learning_rate": 1.3792489642622956e-06, + "loss": 0.8259623050689697, + "step": 3537 + }, + { + "epoch": 0.8152073732718894, + "grad_norm": 0.9193643373115533, + "learning_rate": 1.3788964144414534e-06, + "loss": 0.7786526679992676, + "step": 3538 + }, + { + "epoch": 0.8154377880184331, + "grad_norm": 0.8743120056652736, + "learning_rate": 1.3785438096242987e-06, + "loss": 0.8655314445495605, + "step": 3539 + }, + { + "epoch": 0.815668202764977, + "grad_norm": 1.073925215345039, + "learning_rate": 1.3781911498620108e-06, + "loss": 0.8116016387939453, + "step": 3540 + }, + { + "epoch": 0.8158986175115207, + "grad_norm": 1.07781870851745, + "learning_rate": 1.3778384352057781e-06, + "loss": 0.712907075881958, + "step": 3541 + }, + { + "epoch": 0.8161290322580645, + "grad_norm": 0.9419481549244654, + "learning_rate": 1.377485665706797e-06, + "loss": 0.8271318674087524, + "step": 3542 + }, + { + "epoch": 0.8163594470046083, + "grad_norm": 1.231349694992367, + "learning_rate": 1.3771328414162713e-06, + "loss": 0.9161353707313538, + "step": 3543 + }, + { + "epoch": 0.8165898617511521, + "grad_norm": 1.1900246832578463, + "learning_rate": 1.3767799623854125e-06, + "loss": 0.9555908441543579, + "step": 3544 + }, + { + "epoch": 0.8168202764976958, + "grad_norm": 0.9121338000164769, + "learning_rate": 1.3764270286654414e-06, + "loss": 0.7863249778747559, + "step": 3545 + }, + { + "epoch": 0.8170506912442397, + "grad_norm": 1.0362996056258458, + "learning_rate": 1.3760740403075853e-06, + "loss": 0.9086883068084717, + "step": 3546 + }, + { + "epoch": 0.8172811059907834, + "grad_norm": 0.9211768991499883, + "learning_rate": 1.37572099736308e-06, + "loss": 0.6231412887573242, + "step": 3547 + }, + { + "epoch": 0.8175115207373271, + "grad_norm": 0.94903309328564, + "learning_rate": 1.3753678998831692e-06, + "loss": 0.8221716284751892, + "step": 3548 + }, + { + "epoch": 0.817741935483871, + "grad_norm": 1.0641797094094223, + "learning_rate": 1.375014747919105e-06, + "loss": 0.8077783584594727, + "step": 3549 + }, + { + "epoch": 0.8179723502304147, + "grad_norm": 1.0675643850007648, + "learning_rate": 1.3746615415221463e-06, + "loss": 0.6882060766220093, + "step": 3550 + }, + { + "epoch": 0.8182027649769585, + "grad_norm": 0.8393670588117293, + "learning_rate": 1.3743082807435614e-06, + "loss": 0.700161337852478, + "step": 3551 + }, + { + "epoch": 0.8184331797235023, + "grad_norm": 0.8856084645963668, + "learning_rate": 1.3739549656346243e-06, + "loss": 0.737981915473938, + "step": 3552 + }, + { + "epoch": 0.8186635944700461, + "grad_norm": 0.8562104816360829, + "learning_rate": 1.3736015962466193e-06, + "loss": 0.8025717735290527, + "step": 3553 + }, + { + "epoch": 0.8188940092165898, + "grad_norm": 1.1233745076434911, + "learning_rate": 1.3732481726308372e-06, + "loss": 0.8855722546577454, + "step": 3554 + }, + { + "epoch": 0.8191244239631337, + "grad_norm": 1.2861487220187957, + "learning_rate": 1.3728946948385768e-06, + "loss": 0.819130539894104, + "step": 3555 + }, + { + "epoch": 0.8193548387096774, + "grad_norm": 1.086213399760416, + "learning_rate": 1.3725411629211454e-06, + "loss": 0.8419625759124756, + "step": 3556 + }, + { + "epoch": 0.8195852534562212, + "grad_norm": 0.8659477904111433, + "learning_rate": 1.3721875769298575e-06, + "loss": 0.8478890657424927, + "step": 3557 + }, + { + "epoch": 0.819815668202765, + "grad_norm": 0.9446742102947047, + "learning_rate": 1.371833936916035e-06, + "loss": 0.8654077053070068, + "step": 3558 + }, + { + "epoch": 0.8200460829493088, + "grad_norm": 1.132873117876266, + "learning_rate": 1.371480242931009e-06, + "loss": 0.8898686170578003, + "step": 3559 + }, + { + "epoch": 0.8202764976958525, + "grad_norm": 1.0419861877874252, + "learning_rate": 1.3711264950261176e-06, + "loss": 0.873773455619812, + "step": 3560 + }, + { + "epoch": 0.8205069124423963, + "grad_norm": 0.8068261635969198, + "learning_rate": 1.3707726932527068e-06, + "loss": 0.6323572397232056, + "step": 3561 + }, + { + "epoch": 0.8207373271889401, + "grad_norm": 1.1038849604905803, + "learning_rate": 1.3704188376621304e-06, + "loss": 0.7018281817436218, + "step": 3562 + }, + { + "epoch": 0.8209677419354838, + "grad_norm": 1.084497532058705, + "learning_rate": 1.37006492830575e-06, + "loss": 0.8052775859832764, + "step": 3563 + }, + { + "epoch": 0.8211981566820277, + "grad_norm": 1.0795040103988192, + "learning_rate": 1.3697109652349352e-06, + "loss": 0.8057233095169067, + "step": 3564 + }, + { + "epoch": 0.8214285714285714, + "grad_norm": 1.1240440402053398, + "learning_rate": 1.3693569485010633e-06, + "loss": 0.8647899627685547, + "step": 3565 + }, + { + "epoch": 0.8216589861751152, + "grad_norm": 0.9167509343069911, + "learning_rate": 1.369002878155519e-06, + "loss": 0.8022265434265137, + "step": 3566 + }, + { + "epoch": 0.821889400921659, + "grad_norm": 1.0569217144551386, + "learning_rate": 1.368648754249696e-06, + "loss": 0.8534140586853027, + "step": 3567 + }, + { + "epoch": 0.8221198156682028, + "grad_norm": 1.1336199597215886, + "learning_rate": 1.3682945768349935e-06, + "loss": 0.905183732509613, + "step": 3568 + }, + { + "epoch": 0.8223502304147465, + "grad_norm": 1.0114816874699049, + "learning_rate": 1.3679403459628215e-06, + "loss": 0.6096831560134888, + "step": 3569 + }, + { + "epoch": 0.8225806451612904, + "grad_norm": 1.0433167842442863, + "learning_rate": 1.367586061684595e-06, + "loss": 0.7220188975334167, + "step": 3570 + }, + { + "epoch": 0.8228110599078341, + "grad_norm": 1.2434665139770538, + "learning_rate": 1.3672317240517386e-06, + "loss": 0.8028903007507324, + "step": 3571 + }, + { + "epoch": 0.8230414746543778, + "grad_norm": 0.8999816334081224, + "learning_rate": 1.3668773331156831e-06, + "loss": 0.8121141791343689, + "step": 3572 + }, + { + "epoch": 0.8232718894009217, + "grad_norm": 0.9985064007808814, + "learning_rate": 1.3665228889278687e-06, + "loss": 0.8259282112121582, + "step": 3573 + }, + { + "epoch": 0.8235023041474654, + "grad_norm": 1.0492496227314838, + "learning_rate": 1.3661683915397423e-06, + "loss": 0.9356029033660889, + "step": 3574 + }, + { + "epoch": 0.8237327188940092, + "grad_norm": 0.9103215470779688, + "learning_rate": 1.3658138410027582e-06, + "loss": 0.738788366317749, + "step": 3575 + }, + { + "epoch": 0.823963133640553, + "grad_norm": 0.9813034370683628, + "learning_rate": 1.3654592373683794e-06, + "loss": 0.7775605320930481, + "step": 3576 + }, + { + "epoch": 0.8241935483870968, + "grad_norm": 1.0650813981062164, + "learning_rate": 1.3651045806880766e-06, + "loss": 0.7645376324653625, + "step": 3577 + }, + { + "epoch": 0.8244239631336405, + "grad_norm": 0.9731809944135928, + "learning_rate": 1.3647498710133272e-06, + "loss": 0.7713958024978638, + "step": 3578 + }, + { + "epoch": 0.8246543778801844, + "grad_norm": 1.148498187573576, + "learning_rate": 1.3643951083956165e-06, + "loss": 0.6920947432518005, + "step": 3579 + }, + { + "epoch": 0.8248847926267281, + "grad_norm": 0.8263814798727009, + "learning_rate": 1.3640402928864382e-06, + "loss": 0.7108405828475952, + "step": 3580 + }, + { + "epoch": 0.8251152073732719, + "grad_norm": 1.0141959867722847, + "learning_rate": 1.3636854245372936e-06, + "loss": 0.7879295945167542, + "step": 3581 + }, + { + "epoch": 0.8253456221198157, + "grad_norm": 0.8796188222287911, + "learning_rate": 1.3633305033996909e-06, + "loss": 0.8173119425773621, + "step": 3582 + }, + { + "epoch": 0.8255760368663595, + "grad_norm": 1.230625652029921, + "learning_rate": 1.3629755295251466e-06, + "loss": 0.8530454635620117, + "step": 3583 + }, + { + "epoch": 0.8258064516129032, + "grad_norm": 0.7851178128331011, + "learning_rate": 1.3626205029651846e-06, + "loss": 0.7749553918838501, + "step": 3584 + }, + { + "epoch": 0.826036866359447, + "grad_norm": 0.9879629515788971, + "learning_rate": 1.362265423771337e-06, + "loss": 0.8313847780227661, + "step": 3585 + }, + { + "epoch": 0.8262672811059908, + "grad_norm": 0.9997153587851354, + "learning_rate": 1.3619102919951424e-06, + "loss": 0.7285455465316772, + "step": 3586 + }, + { + "epoch": 0.8264976958525345, + "grad_norm": 1.053529475482116, + "learning_rate": 1.361555107688148e-06, + "loss": 0.8084003925323486, + "step": 3587 + }, + { + "epoch": 0.8267281105990784, + "grad_norm": 1.1979034262658517, + "learning_rate": 1.3611998709019088e-06, + "loss": 0.8506543040275574, + "step": 3588 + }, + { + "epoch": 0.8269585253456221, + "grad_norm": 1.150137696376644, + "learning_rate": 1.3608445816879864e-06, + "loss": 0.8320293426513672, + "step": 3589 + }, + { + "epoch": 0.8271889400921659, + "grad_norm": 1.0954200087136678, + "learning_rate": 1.3604892400979501e-06, + "loss": 0.8116205930709839, + "step": 3590 + }, + { + "epoch": 0.8274193548387097, + "grad_norm": 0.988607654244707, + "learning_rate": 1.3601338461833785e-06, + "loss": 0.8317450284957886, + "step": 3591 + }, + { + "epoch": 0.8276497695852535, + "grad_norm": 1.0502248139840338, + "learning_rate": 1.3597783999958553e-06, + "loss": 0.7348642349243164, + "step": 3592 + }, + { + "epoch": 0.8278801843317972, + "grad_norm": 0.8829971344500126, + "learning_rate": 1.359422901586974e-06, + "loss": 0.8087270259857178, + "step": 3593 + }, + { + "epoch": 0.8281105990783411, + "grad_norm": 1.1012699484003496, + "learning_rate": 1.3590673510083345e-06, + "loss": 0.7964637875556946, + "step": 3594 + }, + { + "epoch": 0.8283410138248848, + "grad_norm": 0.8597833865541051, + "learning_rate": 1.358711748311544e-06, + "loss": 0.6192176342010498, + "step": 3595 + }, + { + "epoch": 0.8285714285714286, + "grad_norm": 1.458647590594062, + "learning_rate": 1.3583560935482182e-06, + "loss": 0.7735739946365356, + "step": 3596 + }, + { + "epoch": 0.8288018433179724, + "grad_norm": 1.209934555151429, + "learning_rate": 1.35800038676998e-06, + "loss": 0.7965315580368042, + "step": 3597 + }, + { + "epoch": 0.8290322580645161, + "grad_norm": 1.0086229436787473, + "learning_rate": 1.3576446280284595e-06, + "loss": 0.6489244699478149, + "step": 3598 + }, + { + "epoch": 0.8292626728110599, + "grad_norm": 1.041271189758682, + "learning_rate": 1.3572888173752946e-06, + "loss": 0.8073695302009583, + "step": 3599 + }, + { + "epoch": 0.8294930875576036, + "grad_norm": 0.7544591630478071, + "learning_rate": 1.3569329548621309e-06, + "loss": 0.7925900816917419, + "step": 3600 + }, + { + "epoch": 0.8297235023041475, + "grad_norm": 1.1274353505725723, + "learning_rate": 1.356577040540621e-06, + "loss": 0.83954918384552, + "step": 3601 + }, + { + "epoch": 0.8299539170506912, + "grad_norm": 0.69092010707332, + "learning_rate": 1.356221074462426e-06, + "loss": 0.6384706497192383, + "step": 3602 + }, + { + "epoch": 0.830184331797235, + "grad_norm": 0.8604009933780791, + "learning_rate": 1.3558650566792136e-06, + "loss": 0.8308184146881104, + "step": 3603 + }, + { + "epoch": 0.8304147465437788, + "grad_norm": 0.9893567222365065, + "learning_rate": 1.3555089872426596e-06, + "loss": 0.7972864508628845, + "step": 3604 + }, + { + "epoch": 0.8306451612903226, + "grad_norm": 1.0575497381629144, + "learning_rate": 1.3551528662044463e-06, + "loss": 0.8038849830627441, + "step": 3605 + }, + { + "epoch": 0.8308755760368663, + "grad_norm": 1.0146034272672162, + "learning_rate": 1.3547966936162646e-06, + "loss": 0.7735980749130249, + "step": 3606 + }, + { + "epoch": 0.8311059907834102, + "grad_norm": 1.169701687059532, + "learning_rate": 1.354440469529813e-06, + "loss": 0.7717504501342773, + "step": 3607 + }, + { + "epoch": 0.8313364055299539, + "grad_norm": 0.8981514617249363, + "learning_rate": 1.3540841939967962e-06, + "loss": 0.9405615329742432, + "step": 3608 + }, + { + "epoch": 0.8315668202764976, + "grad_norm": 0.9913743440349779, + "learning_rate": 1.3537278670689273e-06, + "loss": 0.7730603814125061, + "step": 3609 + }, + { + "epoch": 0.8317972350230415, + "grad_norm": 1.1958069213876743, + "learning_rate": 1.353371488797927e-06, + "loss": 0.8677463531494141, + "step": 3610 + }, + { + "epoch": 0.8320276497695852, + "grad_norm": 1.0362704574624084, + "learning_rate": 1.3530150592355227e-06, + "loss": 0.8261700868606567, + "step": 3611 + }, + { + "epoch": 0.832258064516129, + "grad_norm": 0.9430749395940993, + "learning_rate": 1.35265857843345e-06, + "loss": 0.6799050569534302, + "step": 3612 + }, + { + "epoch": 0.8324884792626728, + "grad_norm": 1.0479319081515341, + "learning_rate": 1.3523020464434514e-06, + "loss": 0.9117664098739624, + "step": 3613 + }, + { + "epoch": 0.8327188940092166, + "grad_norm": 1.0691436327470698, + "learning_rate": 1.3519454633172771e-06, + "loss": 0.8637168407440186, + "step": 3614 + }, + { + "epoch": 0.8329493087557603, + "grad_norm": 0.8579929983536723, + "learning_rate": 1.3515888291066848e-06, + "loss": 0.8169793486595154, + "step": 3615 + }, + { + "epoch": 0.8331797235023042, + "grad_norm": 0.920659117563804, + "learning_rate": 1.3512321438634392e-06, + "loss": 0.6901019811630249, + "step": 3616 + }, + { + "epoch": 0.8334101382488479, + "grad_norm": 1.350300242304736, + "learning_rate": 1.3508754076393133e-06, + "loss": 0.868461012840271, + "step": 3617 + }, + { + "epoch": 0.8336405529953917, + "grad_norm": 0.9765625383196332, + "learning_rate": 1.3505186204860864e-06, + "loss": 0.7916195392608643, + "step": 3618 + }, + { + "epoch": 0.8338709677419355, + "grad_norm": 0.9685384546753151, + "learning_rate": 1.3501617824555456e-06, + "loss": 0.7078498601913452, + "step": 3619 + }, + { + "epoch": 0.8341013824884793, + "grad_norm": 1.2242730037688179, + "learning_rate": 1.3498048935994857e-06, + "loss": 0.890669584274292, + "step": 3620 + }, + { + "epoch": 0.834331797235023, + "grad_norm": 0.8358453705503323, + "learning_rate": 1.3494479539697087e-06, + "loss": 0.8162761926651001, + "step": 3621 + }, + { + "epoch": 0.8345622119815668, + "grad_norm": 1.013077112717635, + "learning_rate": 1.3490909636180233e-06, + "loss": 0.7743235230445862, + "step": 3622 + }, + { + "epoch": 0.8347926267281106, + "grad_norm": 1.0099386147746707, + "learning_rate": 1.3487339225962472e-06, + "loss": 0.8297950029373169, + "step": 3623 + }, + { + "epoch": 0.8350230414746543, + "grad_norm": 1.1865830325248257, + "learning_rate": 1.3483768309562035e-06, + "loss": 0.9550352692604065, + "step": 3624 + }, + { + "epoch": 0.8352534562211982, + "grad_norm": 0.9576603479694407, + "learning_rate": 1.3480196887497242e-06, + "loss": 0.7343823909759521, + "step": 3625 + }, + { + "epoch": 0.8354838709677419, + "grad_norm": 1.0312198523972542, + "learning_rate": 1.3476624960286479e-06, + "loss": 0.8942683935165405, + "step": 3626 + }, + { + "epoch": 0.8357142857142857, + "grad_norm": 1.0216203737583824, + "learning_rate": 1.34730525284482e-06, + "loss": 0.778289794921875, + "step": 3627 + }, + { + "epoch": 0.8359447004608295, + "grad_norm": 0.8374039418656565, + "learning_rate": 1.3469479592500951e-06, + "loss": 0.5924088954925537, + "step": 3628 + }, + { + "epoch": 0.8361751152073733, + "grad_norm": 1.6640914693337763, + "learning_rate": 1.3465906152963329e-06, + "loss": 1.0363706350326538, + "step": 3629 + }, + { + "epoch": 0.836405529953917, + "grad_norm": 1.1094517477504633, + "learning_rate": 1.346233221035402e-06, + "loss": 0.7927669286727905, + "step": 3630 + }, + { + "epoch": 0.8366359447004609, + "grad_norm": 1.017803676905956, + "learning_rate": 1.345875776519177e-06, + "loss": 0.8428707718849182, + "step": 3631 + }, + { + "epoch": 0.8368663594470046, + "grad_norm": 1.0894705086513103, + "learning_rate": 1.345518281799541e-06, + "loss": 0.7975403070449829, + "step": 3632 + }, + { + "epoch": 0.8370967741935483, + "grad_norm": 1.0032068733109394, + "learning_rate": 1.3451607369283842e-06, + "loss": 0.8383880853652954, + "step": 3633 + }, + { + "epoch": 0.8373271889400922, + "grad_norm": 1.007543360201824, + "learning_rate": 1.3448031419576028e-06, + "loss": 0.9033386707305908, + "step": 3634 + }, + { + "epoch": 0.8375576036866359, + "grad_norm": 1.1312406567077748, + "learning_rate": 1.3444454969391021e-06, + "loss": 0.8913514018058777, + "step": 3635 + }, + { + "epoch": 0.8377880184331797, + "grad_norm": 1.4041014769308477, + "learning_rate": 1.3440878019247936e-06, + "loss": 0.9051915407180786, + "step": 3636 + }, + { + "epoch": 0.8380184331797235, + "grad_norm": 0.9777048211867199, + "learning_rate": 1.343730056966596e-06, + "loss": 0.8240993618965149, + "step": 3637 + }, + { + "epoch": 0.8382488479262673, + "grad_norm": 1.1788464491037272, + "learning_rate": 1.3433722621164358e-06, + "loss": 0.8276345133781433, + "step": 3638 + }, + { + "epoch": 0.838479262672811, + "grad_norm": 1.1512835626079758, + "learning_rate": 1.343014417426246e-06, + "loss": 0.8250508904457092, + "step": 3639 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 1.0066201319773938, + "learning_rate": 1.342656522947968e-06, + "loss": 0.7872868180274963, + "step": 3640 + }, + { + "epoch": 0.8389400921658986, + "grad_norm": 0.8473767849665474, + "learning_rate": 1.3422985787335491e-06, + "loss": 0.7634146809577942, + "step": 3641 + }, + { + "epoch": 0.8391705069124424, + "grad_norm": 0.9991956505737468, + "learning_rate": 1.3419405848349448e-06, + "loss": 0.63923180103302, + "step": 3642 + }, + { + "epoch": 0.8394009216589862, + "grad_norm": 0.8936657519523178, + "learning_rate": 1.3415825413041173e-06, + "loss": 0.900942325592041, + "step": 3643 + }, + { + "epoch": 0.83963133640553, + "grad_norm": 0.8086145892134451, + "learning_rate": 1.341224448193036e-06, + "loss": 0.6415199041366577, + "step": 3644 + }, + { + "epoch": 0.8398617511520737, + "grad_norm": 0.7541710851332, + "learning_rate": 1.3408663055536775e-06, + "loss": 0.7750275135040283, + "step": 3645 + }, + { + "epoch": 0.8400921658986175, + "grad_norm": 1.0677810215945565, + "learning_rate": 1.3405081134380264e-06, + "loss": 0.8159983158111572, + "step": 3646 + }, + { + "epoch": 0.8403225806451613, + "grad_norm": 1.0361250834896671, + "learning_rate": 1.3401498718980733e-06, + "loss": 0.6870952844619751, + "step": 3647 + }, + { + "epoch": 0.840552995391705, + "grad_norm": 1.0057736881312165, + "learning_rate": 1.3397915809858168e-06, + "loss": 0.8588749170303345, + "step": 3648 + }, + { + "epoch": 0.8407834101382489, + "grad_norm": 0.8944864050117411, + "learning_rate": 1.3394332407532619e-06, + "loss": 0.6926778554916382, + "step": 3649 + }, + { + "epoch": 0.8410138248847926, + "grad_norm": 0.9996715673645244, + "learning_rate": 1.3390748512524213e-06, + "loss": 0.7165309190750122, + "step": 3650 + }, + { + "epoch": 0.8412442396313364, + "grad_norm": 0.8676606625906299, + "learning_rate": 1.3387164125353149e-06, + "loss": 0.7782741189002991, + "step": 3651 + }, + { + "epoch": 0.8414746543778802, + "grad_norm": 1.2076812224962883, + "learning_rate": 1.3383579246539698e-06, + "loss": 0.9153795838356018, + "step": 3652 + }, + { + "epoch": 0.841705069124424, + "grad_norm": 0.9194313077193984, + "learning_rate": 1.33799938766042e-06, + "loss": 0.8419643044471741, + "step": 3653 + }, + { + "epoch": 0.8419354838709677, + "grad_norm": 0.9325821466469247, + "learning_rate": 1.3376408016067064e-06, + "loss": 0.6927728652954102, + "step": 3654 + }, + { + "epoch": 0.8421658986175116, + "grad_norm": 0.8795285549516815, + "learning_rate": 1.3372821665448774e-06, + "loss": 0.7721414566040039, + "step": 3655 + }, + { + "epoch": 0.8423963133640553, + "grad_norm": 0.8650877944504008, + "learning_rate": 1.3369234825269887e-06, + "loss": 0.7277967929840088, + "step": 3656 + }, + { + "epoch": 0.8426267281105991, + "grad_norm": 0.8893990009557013, + "learning_rate": 1.336564749605102e-06, + "loss": 0.7764936089515686, + "step": 3657 + }, + { + "epoch": 0.8428571428571429, + "grad_norm": 1.0366422012708214, + "learning_rate": 1.336205967831288e-06, + "loss": 0.7445545196533203, + "step": 3658 + }, + { + "epoch": 0.8430875576036866, + "grad_norm": 0.9883734306246509, + "learning_rate": 1.3358471372576227e-06, + "loss": 0.8359465599060059, + "step": 3659 + }, + { + "epoch": 0.8433179723502304, + "grad_norm": 1.1992732184975974, + "learning_rate": 1.33548825793619e-06, + "loss": 0.8634141683578491, + "step": 3660 + }, + { + "epoch": 0.8435483870967742, + "grad_norm": 0.9932267949840192, + "learning_rate": 1.3351293299190804e-06, + "loss": 0.7365708351135254, + "step": 3661 + }, + { + "epoch": 0.843778801843318, + "grad_norm": 1.0553779905834517, + "learning_rate": 1.3347703532583927e-06, + "loss": 0.7135465145111084, + "step": 3662 + }, + { + "epoch": 0.8440092165898617, + "grad_norm": 0.9366872036776951, + "learning_rate": 1.3344113280062313e-06, + "loss": 0.7411447763442993, + "step": 3663 + }, + { + "epoch": 0.8442396313364056, + "grad_norm": 1.1654296408446096, + "learning_rate": 1.3340522542147081e-06, + "loss": 0.7765100002288818, + "step": 3664 + }, + { + "epoch": 0.8444700460829493, + "grad_norm": 0.9657216098787882, + "learning_rate": 1.3336931319359426e-06, + "loss": 0.7638096809387207, + "step": 3665 + }, + { + "epoch": 0.8447004608294931, + "grad_norm": 0.8148482611092309, + "learning_rate": 1.3333339612220606e-06, + "loss": 0.7114577889442444, + "step": 3666 + }, + { + "epoch": 0.8449308755760369, + "grad_norm": 1.075345107734405, + "learning_rate": 1.3329747421251955e-06, + "loss": 0.8702960014343262, + "step": 3667 + }, + { + "epoch": 0.8451612903225807, + "grad_norm": 0.8702936794654799, + "learning_rate": 1.3326154746974878e-06, + "loss": 0.7248300313949585, + "step": 3668 + }, + { + "epoch": 0.8453917050691244, + "grad_norm": 1.0810218150457531, + "learning_rate": 1.332256158991084e-06, + "loss": 0.7648389339447021, + "step": 3669 + }, + { + "epoch": 0.8456221198156681, + "grad_norm": 1.1179174327015893, + "learning_rate": 1.3318967950581383e-06, + "loss": 0.7075401544570923, + "step": 3670 + }, + { + "epoch": 0.845852534562212, + "grad_norm": 0.9497106076514022, + "learning_rate": 1.3315373829508122e-06, + "loss": 0.6923220157623291, + "step": 3671 + }, + { + "epoch": 0.8460829493087557, + "grad_norm": 1.100773813694407, + "learning_rate": 1.3311779227212742e-06, + "loss": 0.7522361874580383, + "step": 3672 + }, + { + "epoch": 0.8463133640552996, + "grad_norm": 1.026931960572947, + "learning_rate": 1.3308184144216989e-06, + "loss": 0.7087293863296509, + "step": 3673 + }, + { + "epoch": 0.8465437788018433, + "grad_norm": 0.793322008156401, + "learning_rate": 1.3304588581042688e-06, + "loss": 0.782098650932312, + "step": 3674 + }, + { + "epoch": 0.8467741935483871, + "grad_norm": 1.029621860148689, + "learning_rate": 1.330099253821173e-06, + "loss": 0.7671197652816772, + "step": 3675 + }, + { + "epoch": 0.8470046082949308, + "grad_norm": 0.8604911309489864, + "learning_rate": 1.3297396016246073e-06, + "loss": 0.8098698258399963, + "step": 3676 + }, + { + "epoch": 0.8472350230414747, + "grad_norm": 0.9021265860196932, + "learning_rate": 1.3293799015667751e-06, + "loss": 0.7671023011207581, + "step": 3677 + }, + { + "epoch": 0.8474654377880184, + "grad_norm": 0.9115553667327773, + "learning_rate": 1.3290201536998862e-06, + "loss": 0.7448668479919434, + "step": 3678 + }, + { + "epoch": 0.8476958525345623, + "grad_norm": 1.4463207292378697, + "learning_rate": 1.3286603580761576e-06, + "loss": 0.946117639541626, + "step": 3679 + }, + { + "epoch": 0.847926267281106, + "grad_norm": 0.932975472082494, + "learning_rate": 1.328300514747813e-06, + "loss": 0.8134163618087769, + "step": 3680 + }, + { + "epoch": 0.8481566820276498, + "grad_norm": 1.0433920810873991, + "learning_rate": 1.327940623767083e-06, + "loss": 0.725477933883667, + "step": 3681 + }, + { + "epoch": 0.8483870967741935, + "grad_norm": 0.9434209059724857, + "learning_rate": 1.3275806851862061e-06, + "loss": 0.8278200626373291, + "step": 3682 + }, + { + "epoch": 0.8486175115207373, + "grad_norm": 1.2837572025692205, + "learning_rate": 1.327220699057426e-06, + "loss": 0.8437181711196899, + "step": 3683 + }, + { + "epoch": 0.8488479262672811, + "grad_norm": 1.0932618965520366, + "learning_rate": 1.326860665432995e-06, + "loss": 0.8921856880187988, + "step": 3684 + }, + { + "epoch": 0.8490783410138248, + "grad_norm": 0.9850919430921788, + "learning_rate": 1.326500584365171e-06, + "loss": 0.7285119295120239, + "step": 3685 + }, + { + "epoch": 0.8493087557603687, + "grad_norm": 1.0119244636074918, + "learning_rate": 1.3261404559062196e-06, + "loss": 0.8968918323516846, + "step": 3686 + }, + { + "epoch": 0.8495391705069124, + "grad_norm": 0.9862869524570133, + "learning_rate": 1.3257802801084123e-06, + "loss": 0.6794285774230957, + "step": 3687 + }, + { + "epoch": 0.8497695852534562, + "grad_norm": 1.1495746754769118, + "learning_rate": 1.3254200570240291e-06, + "loss": 0.869774341583252, + "step": 3688 + }, + { + "epoch": 0.85, + "grad_norm": 1.1620464557259493, + "learning_rate": 1.3250597867053553e-06, + "loss": 0.7862332463264465, + "step": 3689 + }, + { + "epoch": 0.8502304147465438, + "grad_norm": 1.1253065949092746, + "learning_rate": 1.3246994692046835e-06, + "loss": 0.8424299955368042, + "step": 3690 + }, + { + "epoch": 0.8504608294930875, + "grad_norm": 0.7041532260107465, + "learning_rate": 1.3243391045743137e-06, + "loss": 0.6232138276100159, + "step": 3691 + }, + { + "epoch": 0.8506912442396314, + "grad_norm": 0.9563538572085633, + "learning_rate": 1.3239786928665523e-06, + "loss": 0.7108159065246582, + "step": 3692 + }, + { + "epoch": 0.8509216589861751, + "grad_norm": 1.0262733388108027, + "learning_rate": 1.3236182341337126e-06, + "loss": 0.7282330393791199, + "step": 3693 + }, + { + "epoch": 0.8511520737327188, + "grad_norm": 1.2079736335999256, + "learning_rate": 1.3232577284281147e-06, + "loss": 0.7864304780960083, + "step": 3694 + }, + { + "epoch": 0.8513824884792627, + "grad_norm": 0.9682428596442779, + "learning_rate": 1.3228971758020852e-06, + "loss": 0.7826365232467651, + "step": 3695 + }, + { + "epoch": 0.8516129032258064, + "grad_norm": 1.0308498953586989, + "learning_rate": 1.322536576307958e-06, + "loss": 0.8429988026618958, + "step": 3696 + }, + { + "epoch": 0.8518433179723502, + "grad_norm": 1.106791902142165, + "learning_rate": 1.322175929998074e-06, + "loss": 0.771148145198822, + "step": 3697 + }, + { + "epoch": 0.852073732718894, + "grad_norm": 1.2323556662321768, + "learning_rate": 1.3218152369247804e-06, + "loss": 0.9610496759414673, + "step": 3698 + }, + { + "epoch": 0.8523041474654378, + "grad_norm": 1.0124488299649408, + "learning_rate": 1.321454497140431e-06, + "loss": 0.7286547422409058, + "step": 3699 + }, + { + "epoch": 0.8525345622119815, + "grad_norm": 0.8362780560832063, + "learning_rate": 1.321093710697387e-06, + "loss": 0.7446750402450562, + "step": 3700 + }, + { + "epoch": 0.8527649769585254, + "grad_norm": 0.8774754337310029, + "learning_rate": 1.3207328776480156e-06, + "loss": 0.7211639881134033, + "step": 3701 + }, + { + "epoch": 0.8529953917050691, + "grad_norm": 0.9667628641735269, + "learning_rate": 1.320371998044692e-06, + "loss": 0.765962541103363, + "step": 3702 + }, + { + "epoch": 0.853225806451613, + "grad_norm": 1.0775083181101466, + "learning_rate": 1.3200110719397967e-06, + "loss": 0.9090084433555603, + "step": 3703 + }, + { + "epoch": 0.8534562211981567, + "grad_norm": 0.9604272002153474, + "learning_rate": 1.319650099385718e-06, + "loss": 0.8222901225090027, + "step": 3704 + }, + { + "epoch": 0.8536866359447005, + "grad_norm": 1.0297311955715076, + "learning_rate": 1.3192890804348508e-06, + "loss": 0.7929965853691101, + "step": 3705 + }, + { + "epoch": 0.8539170506912442, + "grad_norm": 0.9788103737354025, + "learning_rate": 1.318928015139596e-06, + "loss": 0.89229816198349, + "step": 3706 + }, + { + "epoch": 0.854147465437788, + "grad_norm": 1.1185541946390394, + "learning_rate": 1.3185669035523621e-06, + "loss": 0.8348276615142822, + "step": 3707 + }, + { + "epoch": 0.8543778801843318, + "grad_norm": 1.0960703003892842, + "learning_rate": 1.3182057457255639e-06, + "loss": 0.9006820917129517, + "step": 3708 + }, + { + "epoch": 0.8546082949308755, + "grad_norm": 0.8300224623954644, + "learning_rate": 1.3178445417116233e-06, + "loss": 0.665691614151001, + "step": 3709 + }, + { + "epoch": 0.8548387096774194, + "grad_norm": 0.6677558949928035, + "learning_rate": 1.3174832915629677e-06, + "loss": 0.7073110342025757, + "step": 3710 + }, + { + "epoch": 0.8550691244239631, + "grad_norm": 1.0807205184602706, + "learning_rate": 1.317121995332033e-06, + "loss": 0.7125800848007202, + "step": 3711 + }, + { + "epoch": 0.8552995391705069, + "grad_norm": 1.1504081133401938, + "learning_rate": 1.31676065307126e-06, + "loss": 0.847205638885498, + "step": 3712 + }, + { + "epoch": 0.8555299539170507, + "grad_norm": 1.1272186923536152, + "learning_rate": 1.3163992648330979e-06, + "loss": 0.860866904258728, + "step": 3713 + }, + { + "epoch": 0.8557603686635945, + "grad_norm": 0.9974272492162177, + "learning_rate": 1.3160378306700014e-06, + "loss": 0.811161994934082, + "step": 3714 + }, + { + "epoch": 0.8559907834101382, + "grad_norm": 1.059693566679631, + "learning_rate": 1.3156763506344318e-06, + "loss": 1.0276790857315063, + "step": 3715 + }, + { + "epoch": 0.8562211981566821, + "grad_norm": 0.8617440282777447, + "learning_rate": 1.3153148247788584e-06, + "loss": 0.7462253570556641, + "step": 3716 + }, + { + "epoch": 0.8564516129032258, + "grad_norm": 1.281384523734545, + "learning_rate": 1.314953253155755e-06, + "loss": 0.9181896448135376, + "step": 3717 + }, + { + "epoch": 0.8566820276497696, + "grad_norm": 0.7940667691684741, + "learning_rate": 1.3145916358176044e-06, + "loss": 0.5943678021430969, + "step": 3718 + }, + { + "epoch": 0.8569124423963134, + "grad_norm": 0.9268739898787507, + "learning_rate": 1.3142299728168942e-06, + "loss": 0.7908656597137451, + "step": 3719 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 1.2242140267734891, + "learning_rate": 1.3138682642061192e-06, + "loss": 0.8716393709182739, + "step": 3720 + }, + { + "epoch": 0.8573732718894009, + "grad_norm": 0.9921811812486295, + "learning_rate": 1.3135065100377814e-06, + "loss": 0.76909339427948, + "step": 3721 + }, + { + "epoch": 0.8576036866359447, + "grad_norm": 1.0272733292998222, + "learning_rate": 1.3131447103643884e-06, + "loss": 0.7896728515625, + "step": 3722 + }, + { + "epoch": 0.8578341013824885, + "grad_norm": 1.0326134494637835, + "learning_rate": 1.3127828652384554e-06, + "loss": 0.8458575010299683, + "step": 3723 + }, + { + "epoch": 0.8580645161290322, + "grad_norm": 0.9849414066001893, + "learning_rate": 1.3124209747125036e-06, + "loss": 0.7419729232788086, + "step": 3724 + }, + { + "epoch": 0.8582949308755761, + "grad_norm": 0.9131603734827297, + "learning_rate": 1.3120590388390608e-06, + "loss": 0.8801093697547913, + "step": 3725 + }, + { + "epoch": 0.8585253456221198, + "grad_norm": 0.7986933302941567, + "learning_rate": 1.3116970576706617e-06, + "loss": 0.6337816715240479, + "step": 3726 + }, + { + "epoch": 0.8587557603686636, + "grad_norm": 1.1352865331161706, + "learning_rate": 1.3113350312598472e-06, + "loss": 0.8099665641784668, + "step": 3727 + }, + { + "epoch": 0.8589861751152074, + "grad_norm": 1.0467011868433627, + "learning_rate": 1.3109729596591651e-06, + "loss": 0.7430413961410522, + "step": 3728 + }, + { + "epoch": 0.8592165898617512, + "grad_norm": 1.0569982664185076, + "learning_rate": 1.3106108429211699e-06, + "loss": 0.7374905347824097, + "step": 3729 + }, + { + "epoch": 0.8594470046082949, + "grad_norm": 0.7857724004075162, + "learning_rate": 1.3102486810984217e-06, + "loss": 0.71753990650177, + "step": 3730 + }, + { + "epoch": 0.8596774193548387, + "grad_norm": 1.0554970253272185, + "learning_rate": 1.3098864742434885e-06, + "loss": 0.9126461744308472, + "step": 3731 + }, + { + "epoch": 0.8599078341013825, + "grad_norm": 1.1141466235187625, + "learning_rate": 1.3095242224089434e-06, + "loss": 0.846487283706665, + "step": 3732 + }, + { + "epoch": 0.8601382488479262, + "grad_norm": 0.9640305278845377, + "learning_rate": 1.3091619256473671e-06, + "loss": 0.7026070952415466, + "step": 3733 + }, + { + "epoch": 0.8603686635944701, + "grad_norm": 1.2209599470129553, + "learning_rate": 1.3087995840113471e-06, + "loss": 1.0044158697128296, + "step": 3734 + }, + { + "epoch": 0.8605990783410138, + "grad_norm": 1.2732308696122019, + "learning_rate": 1.3084371975534759e-06, + "loss": 0.8061608076095581, + "step": 3735 + }, + { + "epoch": 0.8608294930875576, + "grad_norm": 1.2155874878372677, + "learning_rate": 1.308074766326354e-06, + "loss": 0.9189345836639404, + "step": 3736 + }, + { + "epoch": 0.8610599078341014, + "grad_norm": 3.0839554304770314, + "learning_rate": 1.3077122903825875e-06, + "loss": 0.8183290958404541, + "step": 3737 + }, + { + "epoch": 0.8612903225806452, + "grad_norm": 0.9202037098580877, + "learning_rate": 1.3073497697747893e-06, + "loss": 0.860893726348877, + "step": 3738 + }, + { + "epoch": 0.8615207373271889, + "grad_norm": 0.7717429741205805, + "learning_rate": 1.306987204555579e-06, + "loss": 0.6732957363128662, + "step": 3739 + }, + { + "epoch": 0.8617511520737328, + "grad_norm": 0.9444170667577415, + "learning_rate": 1.3066245947775821e-06, + "loss": 0.7910758256912231, + "step": 3740 + }, + { + "epoch": 0.8619815668202765, + "grad_norm": 1.316217805471382, + "learning_rate": 1.3062619404934317e-06, + "loss": 0.9422181844711304, + "step": 3741 + }, + { + "epoch": 0.8622119815668203, + "grad_norm": 0.9698503213179374, + "learning_rate": 1.3058992417557657e-06, + "loss": 0.7731142044067383, + "step": 3742 + }, + { + "epoch": 0.8624423963133641, + "grad_norm": 0.9561313394387324, + "learning_rate": 1.3055364986172296e-06, + "loss": 0.8419089317321777, + "step": 3743 + }, + { + "epoch": 0.8626728110599078, + "grad_norm": 0.8852750785802604, + "learning_rate": 1.3051737111304757e-06, + "loss": 0.7535419464111328, + "step": 3744 + }, + { + "epoch": 0.8629032258064516, + "grad_norm": 0.8636514927767351, + "learning_rate": 1.3048108793481614e-06, + "loss": 0.7744847536087036, + "step": 3745 + }, + { + "epoch": 0.8631336405529954, + "grad_norm": 1.04058809416254, + "learning_rate": 1.3044480033229513e-06, + "loss": 0.7578398585319519, + "step": 3746 + }, + { + "epoch": 0.8633640552995392, + "grad_norm": 1.2334871836764278, + "learning_rate": 1.3040850831075168e-06, + "loss": 0.8767418265342712, + "step": 3747 + }, + { + "epoch": 0.8635944700460829, + "grad_norm": 1.1256734507930313, + "learning_rate": 1.303722118754535e-06, + "loss": 0.7484671473503113, + "step": 3748 + }, + { + "epoch": 0.8638248847926268, + "grad_norm": 0.9064086460386975, + "learning_rate": 1.3033591103166897e-06, + "loss": 0.7231101989746094, + "step": 3749 + }, + { + "epoch": 0.8640552995391705, + "grad_norm": 0.896473034432068, + "learning_rate": 1.3029960578466709e-06, + "loss": 0.7626307606697083, + "step": 3750 + }, + { + "epoch": 0.8642857142857143, + "grad_norm": 1.0608055188685264, + "learning_rate": 1.302632961397176e-06, + "loss": 0.7244704961776733, + "step": 3751 + }, + { + "epoch": 0.864516129032258, + "grad_norm": 1.0368271143877468, + "learning_rate": 1.3022698210209066e-06, + "loss": 0.8575884103775024, + "step": 3752 + }, + { + "epoch": 0.8647465437788019, + "grad_norm": 1.050928094888414, + "learning_rate": 1.3019066367705733e-06, + "loss": 0.7617322206497192, + "step": 3753 + }, + { + "epoch": 0.8649769585253456, + "grad_norm": 1.0524737157850867, + "learning_rate": 1.3015434086988914e-06, + "loss": 0.7899904251098633, + "step": 3754 + }, + { + "epoch": 0.8652073732718893, + "grad_norm": 0.7826254299372721, + "learning_rate": 1.3011801368585825e-06, + "loss": 0.6405949592590332, + "step": 3755 + }, + { + "epoch": 0.8654377880184332, + "grad_norm": 1.004484214855527, + "learning_rate": 1.300816821302376e-06, + "loss": 0.8473223447799683, + "step": 3756 + }, + { + "epoch": 0.8656682027649769, + "grad_norm": 1.0318183916575985, + "learning_rate": 1.3004534620830059e-06, + "loss": 0.7843037843704224, + "step": 3757 + }, + { + "epoch": 0.8658986175115208, + "grad_norm": 0.8527211236886993, + "learning_rate": 1.3000900592532134e-06, + "loss": 0.7418329119682312, + "step": 3758 + }, + { + "epoch": 0.8661290322580645, + "grad_norm": 1.1686967012789897, + "learning_rate": 1.2997266128657462e-06, + "loss": 0.9007542133331299, + "step": 3759 + }, + { + "epoch": 0.8663594470046083, + "grad_norm": 1.0002999248018631, + "learning_rate": 1.2993631229733582e-06, + "loss": 0.7214536666870117, + "step": 3760 + }, + { + "epoch": 0.866589861751152, + "grad_norm": 1.060698383579802, + "learning_rate": 1.2989995896288085e-06, + "loss": 0.6538300514221191, + "step": 3761 + }, + { + "epoch": 0.8668202764976959, + "grad_norm": 0.8939424364373206, + "learning_rate": 1.2986360128848647e-06, + "loss": 0.8132497668266296, + "step": 3762 + }, + { + "epoch": 0.8670506912442396, + "grad_norm": 1.2692579875098073, + "learning_rate": 1.2982723927942987e-06, + "loss": 0.8940386176109314, + "step": 3763 + }, + { + "epoch": 0.8672811059907835, + "grad_norm": 0.9095968882110219, + "learning_rate": 1.2979087294098904e-06, + "loss": 0.7426153421401978, + "step": 3764 + }, + { + "epoch": 0.8675115207373272, + "grad_norm": 1.2314721218727755, + "learning_rate": 1.2975450227844236e-06, + "loss": 0.8140754103660583, + "step": 3765 + }, + { + "epoch": 0.867741935483871, + "grad_norm": 1.165847048536148, + "learning_rate": 1.2971812729706907e-06, + "loss": 0.9078278541564941, + "step": 3766 + }, + { + "epoch": 0.8679723502304147, + "grad_norm": 0.8581444329277982, + "learning_rate": 1.29681748002149e-06, + "loss": 0.6632627248764038, + "step": 3767 + }, + { + "epoch": 0.8682027649769585, + "grad_norm": 1.0737542944031577, + "learning_rate": 1.2964536439896245e-06, + "loss": 0.913419246673584, + "step": 3768 + }, + { + "epoch": 0.8684331797235023, + "grad_norm": 0.9232699220030103, + "learning_rate": 1.2960897649279054e-06, + "loss": 0.776391863822937, + "step": 3769 + }, + { + "epoch": 0.868663594470046, + "grad_norm": 0.7836255693570048, + "learning_rate": 1.2957258428891488e-06, + "loss": 0.7171014547348022, + "step": 3770 + }, + { + "epoch": 0.8688940092165899, + "grad_norm": 1.072840063629104, + "learning_rate": 1.2953618779261776e-06, + "loss": 0.8848521709442139, + "step": 3771 + }, + { + "epoch": 0.8691244239631336, + "grad_norm": 0.9374655640180731, + "learning_rate": 1.2949978700918207e-06, + "loss": 0.6794570684432983, + "step": 3772 + }, + { + "epoch": 0.8693548387096774, + "grad_norm": 1.1765914680464367, + "learning_rate": 1.2946338194389137e-06, + "loss": 0.7128770351409912, + "step": 3773 + }, + { + "epoch": 0.8695852534562212, + "grad_norm": 1.0061805151394425, + "learning_rate": 1.2942697260202976e-06, + "loss": 0.7794370651245117, + "step": 3774 + }, + { + "epoch": 0.869815668202765, + "grad_norm": 0.8201503807835805, + "learning_rate": 1.2939055898888203e-06, + "loss": 0.7946528196334839, + "step": 3775 + }, + { + "epoch": 0.8700460829493087, + "grad_norm": 0.8253544658473864, + "learning_rate": 1.2935414110973357e-06, + "loss": 0.7052137851715088, + "step": 3776 + }, + { + "epoch": 0.8702764976958526, + "grad_norm": 1.1148062721900278, + "learning_rate": 1.293177189698704e-06, + "loss": 0.785929799079895, + "step": 3777 + }, + { + "epoch": 0.8705069124423963, + "grad_norm": 1.0434715730493578, + "learning_rate": 1.2928129257457915e-06, + "loss": 0.7907861471176147, + "step": 3778 + }, + { + "epoch": 0.8707373271889401, + "grad_norm": 1.0141295879138945, + "learning_rate": 1.2924486192914704e-06, + "loss": 0.9145845770835876, + "step": 3779 + }, + { + "epoch": 0.8709677419354839, + "grad_norm": 1.2821040685334846, + "learning_rate": 1.2920842703886191e-06, + "loss": 0.8332167863845825, + "step": 3780 + }, + { + "epoch": 0.8711981566820276, + "grad_norm": 1.1443987508087015, + "learning_rate": 1.2917198790901229e-06, + "loss": 0.9593367576599121, + "step": 3781 + }, + { + "epoch": 0.8714285714285714, + "grad_norm": 1.1001262078147525, + "learning_rate": 1.2913554454488723e-06, + "loss": 0.9269144535064697, + "step": 3782 + }, + { + "epoch": 0.8716589861751152, + "grad_norm": 0.8577227656018163, + "learning_rate": 1.2909909695177645e-06, + "loss": 0.8474053144454956, + "step": 3783 + }, + { + "epoch": 0.871889400921659, + "grad_norm": 1.0482742591675172, + "learning_rate": 1.2906264513497027e-06, + "loss": 0.8098207116127014, + "step": 3784 + }, + { + "epoch": 0.8721198156682027, + "grad_norm": 0.9400670599728106, + "learning_rate": 1.2902618909975962e-06, + "loss": 0.7394517064094543, + "step": 3785 + }, + { + "epoch": 0.8723502304147466, + "grad_norm": 1.199479550356467, + "learning_rate": 1.2898972885143606e-06, + "loss": 0.8667110204696655, + "step": 3786 + }, + { + "epoch": 0.8725806451612903, + "grad_norm": 1.2600204383371998, + "learning_rate": 1.289532643952917e-06, + "loss": 0.826819121837616, + "step": 3787 + }, + { + "epoch": 0.8728110599078341, + "grad_norm": 0.9212030006613351, + "learning_rate": 1.2891679573661937e-06, + "loss": 0.7765695452690125, + "step": 3788 + }, + { + "epoch": 0.8730414746543779, + "grad_norm": 0.8409152224560986, + "learning_rate": 1.2888032288071245e-06, + "loss": 0.7180448770523071, + "step": 3789 + }, + { + "epoch": 0.8732718894009217, + "grad_norm": 0.9734045628890519, + "learning_rate": 1.2884384583286486e-06, + "loss": 0.7619662880897522, + "step": 3790 + }, + { + "epoch": 0.8735023041474654, + "grad_norm": 1.0439158459354512, + "learning_rate": 1.2880736459837123e-06, + "loss": 0.8332309126853943, + "step": 3791 + }, + { + "epoch": 0.8737327188940092, + "grad_norm": 1.019583919621154, + "learning_rate": 1.2877087918252676e-06, + "loss": 0.9314864277839661, + "step": 3792 + }, + { + "epoch": 0.873963133640553, + "grad_norm": 1.0252621742811456, + "learning_rate": 1.287343895906273e-06, + "loss": 0.8505650758743286, + "step": 3793 + }, + { + "epoch": 0.8741935483870967, + "grad_norm": 1.1808911521686665, + "learning_rate": 1.286978958279692e-06, + "loss": 0.8086442351341248, + "step": 3794 + }, + { + "epoch": 0.8744239631336406, + "grad_norm": 0.9931096763073582, + "learning_rate": 1.2866139789984951e-06, + "loss": 0.9369934797286987, + "step": 3795 + }, + { + "epoch": 0.8746543778801843, + "grad_norm": 1.0923174237783717, + "learning_rate": 1.2862489581156585e-06, + "loss": 0.6776204705238342, + "step": 3796 + }, + { + "epoch": 0.8748847926267281, + "grad_norm": 1.1437930163109349, + "learning_rate": 1.2858838956841646e-06, + "loss": 0.8742507100105286, + "step": 3797 + }, + { + "epoch": 0.8751152073732719, + "grad_norm": 0.8088256156858264, + "learning_rate": 1.285518791757002e-06, + "loss": 0.6592123508453369, + "step": 3798 + }, + { + "epoch": 0.8753456221198157, + "grad_norm": 1.064419209573929, + "learning_rate": 1.2851536463871646e-06, + "loss": 0.727974534034729, + "step": 3799 + }, + { + "epoch": 0.8755760368663594, + "grad_norm": 1.1114963626056278, + "learning_rate": 1.284788459627653e-06, + "loss": 0.734921395778656, + "step": 3800 + }, + { + "epoch": 0.8758064516129033, + "grad_norm": 1.1341924912712853, + "learning_rate": 1.2844232315314734e-06, + "loss": 0.8848391771316528, + "step": 3801 + }, + { + "epoch": 0.876036866359447, + "grad_norm": 0.9036415522550547, + "learning_rate": 1.284057962151638e-06, + "loss": 0.7014757394790649, + "step": 3802 + }, + { + "epoch": 0.8762672811059908, + "grad_norm": 1.1253352689452834, + "learning_rate": 1.2836926515411662e-06, + "loss": 0.9037606716156006, + "step": 3803 + }, + { + "epoch": 0.8764976958525346, + "grad_norm": 1.0304179621449525, + "learning_rate": 1.2833272997530808e-06, + "loss": 0.7842103242874146, + "step": 3804 + }, + { + "epoch": 0.8767281105990783, + "grad_norm": 0.8881021582469312, + "learning_rate": 1.282961906840413e-06, + "loss": 0.7233899831771851, + "step": 3805 + }, + { + "epoch": 0.8769585253456221, + "grad_norm": 1.0965629604169354, + "learning_rate": 1.2825964728561995e-06, + "loss": 0.8439977169036865, + "step": 3806 + }, + { + "epoch": 0.8771889400921659, + "grad_norm": 0.9011702646392625, + "learning_rate": 1.2822309978534817e-06, + "loss": 0.6734062433242798, + "step": 3807 + }, + { + "epoch": 0.8774193548387097, + "grad_norm": 0.8611901516189409, + "learning_rate": 1.2818654818853082e-06, + "loss": 0.8132908344268799, + "step": 3808 + }, + { + "epoch": 0.8776497695852534, + "grad_norm": 1.0055540352806662, + "learning_rate": 1.2814999250047334e-06, + "loss": 0.7867386341094971, + "step": 3809 + }, + { + "epoch": 0.8778801843317973, + "grad_norm": 0.9631857828899055, + "learning_rate": 1.2811343272648172e-06, + "loss": 0.7367507219314575, + "step": 3810 + }, + { + "epoch": 0.878110599078341, + "grad_norm": 0.9475758390620135, + "learning_rate": 1.280768688718625e-06, + "loss": 0.8154586553573608, + "step": 3811 + }, + { + "epoch": 0.8783410138248848, + "grad_norm": 1.2471162716233217, + "learning_rate": 1.2804030094192297e-06, + "loss": 0.9962621331214905, + "step": 3812 + }, + { + "epoch": 0.8785714285714286, + "grad_norm": 0.9442759022004834, + "learning_rate": 1.280037289419709e-06, + "loss": 0.8720508813858032, + "step": 3813 + }, + { + "epoch": 0.8788018433179724, + "grad_norm": 0.9970556206238078, + "learning_rate": 1.2796715287731461e-06, + "loss": 0.7211558818817139, + "step": 3814 + }, + { + "epoch": 0.8790322580645161, + "grad_norm": 1.0985560987492957, + "learning_rate": 1.279305727532631e-06, + "loss": 0.8354029059410095, + "step": 3815 + }, + { + "epoch": 0.8792626728110599, + "grad_norm": 1.2983425606164107, + "learning_rate": 1.2789398857512597e-06, + "loss": 0.9136772155761719, + "step": 3816 + }, + { + "epoch": 0.8794930875576037, + "grad_norm": 1.099731879502331, + "learning_rate": 1.2785740034821328e-06, + "loss": 0.7603391408920288, + "step": 3817 + }, + { + "epoch": 0.8797235023041474, + "grad_norm": 1.0043618459346715, + "learning_rate": 1.2782080807783582e-06, + "loss": 0.8938640356063843, + "step": 3818 + }, + { + "epoch": 0.8799539170506913, + "grad_norm": 0.9668042432935031, + "learning_rate": 1.2778421176930492e-06, + "loss": 0.8041675090789795, + "step": 3819 + }, + { + "epoch": 0.880184331797235, + "grad_norm": 0.858269124078789, + "learning_rate": 1.2774761142793246e-06, + "loss": 0.7128704786300659, + "step": 3820 + }, + { + "epoch": 0.8804147465437788, + "grad_norm": 1.01263470571454, + "learning_rate": 1.277110070590309e-06, + "loss": 0.7927603721618652, + "step": 3821 + }, + { + "epoch": 0.8806451612903226, + "grad_norm": 0.8447601312860044, + "learning_rate": 1.2767439866791342e-06, + "loss": 0.8294891119003296, + "step": 3822 + }, + { + "epoch": 0.8808755760368664, + "grad_norm": 1.0620381421224903, + "learning_rate": 1.2763778625989354e-06, + "loss": 0.8058860301971436, + "step": 3823 + }, + { + "epoch": 0.8811059907834101, + "grad_norm": 1.1264235058600618, + "learning_rate": 1.2760116984028559e-06, + "loss": 0.9073271751403809, + "step": 3824 + }, + { + "epoch": 0.881336405529954, + "grad_norm": 0.9871957246708625, + "learning_rate": 1.2756454941440439e-06, + "loss": 0.755131721496582, + "step": 3825 + }, + { + "epoch": 0.8815668202764977, + "grad_norm": 0.9177831986454672, + "learning_rate": 1.2752792498756532e-06, + "loss": 0.7571133375167847, + "step": 3826 + }, + { + "epoch": 0.8817972350230415, + "grad_norm": 1.0303718222421674, + "learning_rate": 1.2749129656508438e-06, + "loss": 0.8021755218505859, + "step": 3827 + }, + { + "epoch": 0.8820276497695853, + "grad_norm": 0.9628359079626025, + "learning_rate": 1.2745466415227812e-06, + "loss": 0.7817519903182983, + "step": 3828 + }, + { + "epoch": 0.882258064516129, + "grad_norm": 0.9923984386602839, + "learning_rate": 1.2741802775446375e-06, + "loss": 0.7144416570663452, + "step": 3829 + }, + { + "epoch": 0.8824884792626728, + "grad_norm": 1.1770010674703593, + "learning_rate": 1.2738138737695894e-06, + "loss": 0.8154206275939941, + "step": 3830 + }, + { + "epoch": 0.8827188940092165, + "grad_norm": 1.0860031408073831, + "learning_rate": 1.2734474302508199e-06, + "loss": 0.7478733062744141, + "step": 3831 + }, + { + "epoch": 0.8829493087557604, + "grad_norm": 0.9998255564669785, + "learning_rate": 1.2730809470415177e-06, + "loss": 0.7792314291000366, + "step": 3832 + }, + { + "epoch": 0.8831797235023041, + "grad_norm": 1.1952265957395494, + "learning_rate": 1.2727144241948776e-06, + "loss": 0.8550708293914795, + "step": 3833 + }, + { + "epoch": 0.883410138248848, + "grad_norm": 1.14972903127367, + "learning_rate": 1.2723478617641e-06, + "loss": 0.9415113925933838, + "step": 3834 + }, + { + "epoch": 0.8836405529953917, + "grad_norm": 1.1062517985394071, + "learning_rate": 1.2719812598023909e-06, + "loss": 0.8359560370445251, + "step": 3835 + }, + { + "epoch": 0.8838709677419355, + "grad_norm": 1.2039080793867758, + "learning_rate": 1.2716146183629618e-06, + "loss": 0.9515634775161743, + "step": 3836 + }, + { + "epoch": 0.8841013824884792, + "grad_norm": 1.1195735084656264, + "learning_rate": 1.2712479374990302e-06, + "loss": 0.9433277249336243, + "step": 3837 + }, + { + "epoch": 0.8843317972350231, + "grad_norm": 1.022594144324791, + "learning_rate": 1.27088121726382e-06, + "loss": 0.809203028678894, + "step": 3838 + }, + { + "epoch": 0.8845622119815668, + "grad_norm": 1.0243153152488458, + "learning_rate": 1.2705144577105596e-06, + "loss": 0.8003803491592407, + "step": 3839 + }, + { + "epoch": 0.8847926267281107, + "grad_norm": 1.0509871208480976, + "learning_rate": 1.2701476588924837e-06, + "loss": 0.8258087038993835, + "step": 3840 + }, + { + "epoch": 0.8850230414746544, + "grad_norm": 0.8336199164135607, + "learning_rate": 1.2697808208628326e-06, + "loss": 0.7337249517440796, + "step": 3841 + }, + { + "epoch": 0.8852534562211981, + "grad_norm": 1.1988508685394492, + "learning_rate": 1.269413943674853e-06, + "loss": 0.6963306665420532, + "step": 3842 + }, + { + "epoch": 0.885483870967742, + "grad_norm": 1.1494175494849699, + "learning_rate": 1.2690470273817955e-06, + "loss": 0.8849321603775024, + "step": 3843 + }, + { + "epoch": 0.8857142857142857, + "grad_norm": 0.9311581320318796, + "learning_rate": 1.2686800720369183e-06, + "loss": 0.804117739200592, + "step": 3844 + }, + { + "epoch": 0.8859447004608295, + "grad_norm": 0.9139368239237865, + "learning_rate": 1.2683130776934848e-06, + "loss": 0.7873985767364502, + "step": 3845 + }, + { + "epoch": 0.8861751152073732, + "grad_norm": 1.0475484077031534, + "learning_rate": 1.2679460444047627e-06, + "loss": 0.7401156425476074, + "step": 3846 + }, + { + "epoch": 0.8864055299539171, + "grad_norm": 1.1867976153376456, + "learning_rate": 1.2675789722240274e-06, + "loss": 0.8216343522071838, + "step": 3847 + }, + { + "epoch": 0.8866359447004608, + "grad_norm": 1.1126927795380483, + "learning_rate": 1.2672118612045583e-06, + "loss": 0.9367883205413818, + "step": 3848 + }, + { + "epoch": 0.8868663594470046, + "grad_norm": 1.333436966015092, + "learning_rate": 1.2668447113996411e-06, + "loss": 0.959208607673645, + "step": 3849 + }, + { + "epoch": 0.8870967741935484, + "grad_norm": 1.019926575329533, + "learning_rate": 1.2664775228625678e-06, + "loss": 0.754011869430542, + "step": 3850 + }, + { + "epoch": 0.8873271889400922, + "grad_norm": 1.0679613059424808, + "learning_rate": 1.2661102956466343e-06, + "loss": 0.7200918793678284, + "step": 3851 + }, + { + "epoch": 0.8875576036866359, + "grad_norm": 1.1470470713937198, + "learning_rate": 1.2657430298051441e-06, + "loss": 0.7819997072219849, + "step": 3852 + }, + { + "epoch": 0.8877880184331797, + "grad_norm": 0.7442261609023784, + "learning_rate": 1.2653757253914045e-06, + "loss": 0.6145305037498474, + "step": 3853 + }, + { + "epoch": 0.8880184331797235, + "grad_norm": 1.0307629205268725, + "learning_rate": 1.2650083824587298e-06, + "loss": 0.8730908036231995, + "step": 3854 + }, + { + "epoch": 0.8882488479262672, + "grad_norm": 0.8412211397931054, + "learning_rate": 1.2646410010604395e-06, + "loss": 0.7595944404602051, + "step": 3855 + }, + { + "epoch": 0.8884792626728111, + "grad_norm": 1.1742884385001073, + "learning_rate": 1.264273581249858e-06, + "loss": 0.8533104658126831, + "step": 3856 + }, + { + "epoch": 0.8887096774193548, + "grad_norm": 0.9075889816265436, + "learning_rate": 1.263906123080316e-06, + "loss": 0.7239818572998047, + "step": 3857 + }, + { + "epoch": 0.8889400921658986, + "grad_norm": 1.1211735744208717, + "learning_rate": 1.2635386266051498e-06, + "loss": 0.7675650119781494, + "step": 3858 + }, + { + "epoch": 0.8891705069124424, + "grad_norm": 1.03231156560467, + "learning_rate": 1.2631710918777007e-06, + "loss": 0.8886630535125732, + "step": 3859 + }, + { + "epoch": 0.8894009216589862, + "grad_norm": 1.078590523668252, + "learning_rate": 1.2628035189513159e-06, + "loss": 0.798930287361145, + "step": 3860 + }, + { + "epoch": 0.8896313364055299, + "grad_norm": 0.9635414297502106, + "learning_rate": 1.2624359078793484e-06, + "loss": 0.7189278602600098, + "step": 3861 + }, + { + "epoch": 0.8898617511520738, + "grad_norm": 1.0909939790359444, + "learning_rate": 1.2620682587151565e-06, + "loss": 0.8187342882156372, + "step": 3862 + }, + { + "epoch": 0.8900921658986175, + "grad_norm": 1.1174191800105742, + "learning_rate": 1.2617005715121034e-06, + "loss": 0.880839467048645, + "step": 3863 + }, + { + "epoch": 0.8903225806451613, + "grad_norm": 0.9160208180175933, + "learning_rate": 1.2613328463235586e-06, + "loss": 0.84575355052948, + "step": 3864 + }, + { + "epoch": 0.8905529953917051, + "grad_norm": 0.8361425077510937, + "learning_rate": 1.2609650832028978e-06, + "loss": 0.6823658347129822, + "step": 3865 + }, + { + "epoch": 0.8907834101382488, + "grad_norm": 1.0695425966983703, + "learning_rate": 1.2605972822035e-06, + "loss": 0.8295711278915405, + "step": 3866 + }, + { + "epoch": 0.8910138248847926, + "grad_norm": 1.1932993089448705, + "learning_rate": 1.2602294433787518e-06, + "loss": 0.8684213161468506, + "step": 3867 + }, + { + "epoch": 0.8912442396313364, + "grad_norm": 0.8493371065418897, + "learning_rate": 1.2598615667820447e-06, + "loss": 0.6560889482498169, + "step": 3868 + }, + { + "epoch": 0.8914746543778802, + "grad_norm": 1.0552959260029386, + "learning_rate": 1.259493652466775e-06, + "loss": 0.740487277507782, + "step": 3869 + }, + { + "epoch": 0.8917050691244239, + "grad_norm": 0.9680726179927289, + "learning_rate": 1.2591257004863453e-06, + "loss": 0.8167253732681274, + "step": 3870 + }, + { + "epoch": 0.8919354838709678, + "grad_norm": 0.8741208745575088, + "learning_rate": 1.2587577108941634e-06, + "loss": 0.8521690368652344, + "step": 3871 + }, + { + "epoch": 0.8921658986175115, + "grad_norm": 1.263426910808872, + "learning_rate": 1.2583896837436418e-06, + "loss": 0.8830848932266235, + "step": 3872 + }, + { + "epoch": 0.8923963133640553, + "grad_norm": 0.9234650272103238, + "learning_rate": 1.2580216190881999e-06, + "loss": 0.7080649137496948, + "step": 3873 + }, + { + "epoch": 0.8926267281105991, + "grad_norm": 0.9098984938292525, + "learning_rate": 1.2576535169812614e-06, + "loss": 0.8013911247253418, + "step": 3874 + }, + { + "epoch": 0.8928571428571429, + "grad_norm": 0.9781454154869316, + "learning_rate": 1.2572853774762564e-06, + "loss": 0.8307033777236938, + "step": 3875 + }, + { + "epoch": 0.8930875576036866, + "grad_norm": 1.003074779947638, + "learning_rate": 1.256917200626619e-06, + "loss": 0.7514123916625977, + "step": 3876 + }, + { + "epoch": 0.8933179723502304, + "grad_norm": 1.3024082731165083, + "learning_rate": 1.2565489864857903e-06, + "loss": 0.7608132362365723, + "step": 3877 + }, + { + "epoch": 0.8935483870967742, + "grad_norm": 0.9570998315665514, + "learning_rate": 1.256180735107216e-06, + "loss": 0.8011139631271362, + "step": 3878 + }, + { + "epoch": 0.8937788018433179, + "grad_norm": 1.134653936381734, + "learning_rate": 1.2558124465443467e-06, + "loss": 0.9760414958000183, + "step": 3879 + }, + { + "epoch": 0.8940092165898618, + "grad_norm": 1.0547420638261442, + "learning_rate": 1.2554441208506399e-06, + "loss": 0.7292976379394531, + "step": 3880 + }, + { + "epoch": 0.8942396313364055, + "grad_norm": 1.0683215421992245, + "learning_rate": 1.255075758079557e-06, + "loss": 0.819061279296875, + "step": 3881 + }, + { + "epoch": 0.8944700460829493, + "grad_norm": 1.006803716245281, + "learning_rate": 1.2547073582845652e-06, + "loss": 0.8407306671142578, + "step": 3882 + }, + { + "epoch": 0.8947004608294931, + "grad_norm": 0.8233707920449198, + "learning_rate": 1.2543389215191379e-06, + "loss": 0.7452164888381958, + "step": 3883 + }, + { + "epoch": 0.8949308755760369, + "grad_norm": 1.049978361878961, + "learning_rate": 1.2539704478367525e-06, + "loss": 0.9001756310462952, + "step": 3884 + }, + { + "epoch": 0.8951612903225806, + "grad_norm": 0.8057583780945189, + "learning_rate": 1.253601937290893e-06, + "loss": 0.7006322741508484, + "step": 3885 + }, + { + "epoch": 0.8953917050691245, + "grad_norm": 0.9116907763776896, + "learning_rate": 1.253233389935048e-06, + "loss": 0.8464070558547974, + "step": 3886 + }, + { + "epoch": 0.8956221198156682, + "grad_norm": 0.9768693849406578, + "learning_rate": 1.2528648058227117e-06, + "loss": 0.8153925538063049, + "step": 3887 + }, + { + "epoch": 0.895852534562212, + "grad_norm": 0.9311867207234187, + "learning_rate": 1.2524961850073835e-06, + "loss": 0.7093103528022766, + "step": 3888 + }, + { + "epoch": 0.8960829493087558, + "grad_norm": 0.8533841155936702, + "learning_rate": 1.2521275275425685e-06, + "loss": 0.676047682762146, + "step": 3889 + }, + { + "epoch": 0.8963133640552995, + "grad_norm": 0.87097687176947, + "learning_rate": 1.2517588334817765e-06, + "loss": 0.6980170011520386, + "step": 3890 + }, + { + "epoch": 0.8965437788018433, + "grad_norm": 0.9291831127411667, + "learning_rate": 1.2513901028785232e-06, + "loss": 0.7343952655792236, + "step": 3891 + }, + { + "epoch": 0.896774193548387, + "grad_norm": 1.0285752510532034, + "learning_rate": 1.251021335786329e-06, + "loss": 0.6836012005805969, + "step": 3892 + }, + { + "epoch": 0.8970046082949309, + "grad_norm": 0.9328635468922583, + "learning_rate": 1.2506525322587204e-06, + "loss": 0.7405731678009033, + "step": 3893 + }, + { + "epoch": 0.8972350230414746, + "grad_norm": 0.9162563014074782, + "learning_rate": 1.2502836923492288e-06, + "loss": 0.7626791596412659, + "step": 3894 + }, + { + "epoch": 0.8974654377880185, + "grad_norm": 0.8530894630449782, + "learning_rate": 1.2499148161113904e-06, + "loss": 0.951126754283905, + "step": 3895 + }, + { + "epoch": 0.8976958525345622, + "grad_norm": 1.0356266230162976, + "learning_rate": 1.249545903598747e-06, + "loss": 0.8248430490493774, + "step": 3896 + }, + { + "epoch": 0.897926267281106, + "grad_norm": 1.0696916510331513, + "learning_rate": 1.2491769548648466e-06, + "loss": 0.9306991100311279, + "step": 3897 + }, + { + "epoch": 0.8981566820276498, + "grad_norm": 1.2546361240375576, + "learning_rate": 1.2488079699632406e-06, + "loss": 0.8529196977615356, + "step": 3898 + }, + { + "epoch": 0.8983870967741936, + "grad_norm": 1.1432122269665714, + "learning_rate": 1.2484389489474873e-06, + "loss": 0.8614317178726196, + "step": 3899 + }, + { + "epoch": 0.8986175115207373, + "grad_norm": 0.8777341649032664, + "learning_rate": 1.2480698918711494e-06, + "loss": 0.723548173904419, + "step": 3900 + }, + { + "epoch": 0.8988479262672812, + "grad_norm": 0.8559428728446495, + "learning_rate": 1.2477007987877953e-06, + "loss": 0.9424235820770264, + "step": 3901 + }, + { + "epoch": 0.8990783410138249, + "grad_norm": 1.1966583189697881, + "learning_rate": 1.2473316697509982e-06, + "loss": 0.8307658433914185, + "step": 3902 + }, + { + "epoch": 0.8993087557603686, + "grad_norm": 0.9430977683906336, + "learning_rate": 1.2469625048143364e-06, + "loss": 0.7164772748947144, + "step": 3903 + }, + { + "epoch": 0.8995391705069125, + "grad_norm": 1.0578567003352413, + "learning_rate": 1.2465933040313941e-06, + "loss": 0.824491024017334, + "step": 3904 + }, + { + "epoch": 0.8997695852534562, + "grad_norm": 0.9955753469888821, + "learning_rate": 1.24622406745576e-06, + "loss": 0.7468826770782471, + "step": 3905 + }, + { + "epoch": 0.9, + "grad_norm": 1.0419833775918754, + "learning_rate": 1.2458547951410285e-06, + "loss": 0.8049126863479614, + "step": 3906 + }, + { + "epoch": 0.9002304147465438, + "grad_norm": 1.0794114769462158, + "learning_rate": 1.245485487140799e-06, + "loss": 0.658754825592041, + "step": 3907 + }, + { + "epoch": 0.9004608294930876, + "grad_norm": 0.9848364091798514, + "learning_rate": 1.245116143508676e-06, + "loss": 0.6772202849388123, + "step": 3908 + }, + { + "epoch": 0.9006912442396313, + "grad_norm": 0.9291487276824166, + "learning_rate": 1.2447467642982697e-06, + "loss": 0.8160394430160522, + "step": 3909 + }, + { + "epoch": 0.9009216589861752, + "grad_norm": 1.3459000002689838, + "learning_rate": 1.244377349563194e-06, + "loss": 0.8289823532104492, + "step": 3910 + }, + { + "epoch": 0.9011520737327189, + "grad_norm": 1.0130598759262572, + "learning_rate": 1.24400789935707e-06, + "loss": 0.7574084997177124, + "step": 3911 + }, + { + "epoch": 0.9013824884792627, + "grad_norm": 0.9665886404424858, + "learning_rate": 1.2436384137335218e-06, + "loss": 0.8116365671157837, + "step": 3912 + }, + { + "epoch": 0.9016129032258065, + "grad_norm": 1.0860329839978788, + "learning_rate": 1.2432688927461808e-06, + "loss": 0.814805805683136, + "step": 3913 + }, + { + "epoch": 0.9018433179723502, + "grad_norm": 0.9783977746996081, + "learning_rate": 1.2428993364486822e-06, + "loss": 0.7947453260421753, + "step": 3914 + }, + { + "epoch": 0.902073732718894, + "grad_norm": 1.1432103627131167, + "learning_rate": 1.2425297448946661e-06, + "loss": 0.939562976360321, + "step": 3915 + }, + { + "epoch": 0.9023041474654377, + "grad_norm": 0.9342812306918719, + "learning_rate": 1.2421601181377787e-06, + "loss": 0.9460225105285645, + "step": 3916 + }, + { + "epoch": 0.9025345622119816, + "grad_norm": 1.1417876456910938, + "learning_rate": 1.241790456231671e-06, + "loss": 0.9183799028396606, + "step": 3917 + }, + { + "epoch": 0.9027649769585253, + "grad_norm": 1.1195959115117728, + "learning_rate": 1.2414207592299984e-06, + "loss": 0.6793398857116699, + "step": 3918 + }, + { + "epoch": 0.9029953917050692, + "grad_norm": 0.9758451113738527, + "learning_rate": 1.2410510271864222e-06, + "loss": 0.7796125411987305, + "step": 3919 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 1.127885346985943, + "learning_rate": 1.2406812601546085e-06, + "loss": 0.8164567351341248, + "step": 3920 + }, + { + "epoch": 0.9034562211981567, + "grad_norm": 1.327729370966401, + "learning_rate": 1.2403114581882288e-06, + "loss": 0.7267879247665405, + "step": 3921 + }, + { + "epoch": 0.9036866359447004, + "grad_norm": 0.9644037075475709, + "learning_rate": 1.2399416213409586e-06, + "loss": 0.7277103066444397, + "step": 3922 + }, + { + "epoch": 0.9039170506912443, + "grad_norm": 1.1653209742127064, + "learning_rate": 1.23957174966648e-06, + "loss": 0.8507979512214661, + "step": 3923 + }, + { + "epoch": 0.904147465437788, + "grad_norm": 1.2024221808183382, + "learning_rate": 1.2392018432184792e-06, + "loss": 0.9431333541870117, + "step": 3924 + }, + { + "epoch": 0.9043778801843319, + "grad_norm": 0.9610849982223711, + "learning_rate": 1.2388319020506473e-06, + "loss": 0.669041633605957, + "step": 3925 + }, + { + "epoch": 0.9046082949308756, + "grad_norm": 1.0428863031922808, + "learning_rate": 1.2384619262166808e-06, + "loss": 0.7639964818954468, + "step": 3926 + }, + { + "epoch": 0.9048387096774193, + "grad_norm": 0.9055700075744166, + "learning_rate": 1.2380919157702819e-06, + "loss": 0.7390594482421875, + "step": 3927 + }, + { + "epoch": 0.9050691244239631, + "grad_norm": 1.0183193149474203, + "learning_rate": 1.2377218707651562e-06, + "loss": 0.8320105075836182, + "step": 3928 + }, + { + "epoch": 0.9052995391705069, + "grad_norm": 0.9604555269461571, + "learning_rate": 1.237351791255016e-06, + "loss": 0.6820249557495117, + "step": 3929 + }, + { + "epoch": 0.9055299539170507, + "grad_norm": 1.0758012435150028, + "learning_rate": 1.2369816772935773e-06, + "loss": 0.8548537492752075, + "step": 3930 + }, + { + "epoch": 0.9057603686635944, + "grad_norm": 1.0169473440313737, + "learning_rate": 1.236611528934562e-06, + "loss": 0.7226318120956421, + "step": 3931 + }, + { + "epoch": 0.9059907834101383, + "grad_norm": 1.2196278844047388, + "learning_rate": 1.2362413462316963e-06, + "loss": 0.879987359046936, + "step": 3932 + }, + { + "epoch": 0.906221198156682, + "grad_norm": 0.8628507992206548, + "learning_rate": 1.2358711292387122e-06, + "loss": 0.7919881343841553, + "step": 3933 + }, + { + "epoch": 0.9064516129032258, + "grad_norm": 1.0779297510278616, + "learning_rate": 1.2355008780093456e-06, + "loss": 0.8232694268226624, + "step": 3934 + }, + { + "epoch": 0.9066820276497696, + "grad_norm": 1.249487252121194, + "learning_rate": 1.2351305925973385e-06, + "loss": 0.80347740650177, + "step": 3935 + }, + { + "epoch": 0.9069124423963134, + "grad_norm": 1.2510529509996382, + "learning_rate": 1.234760273056437e-06, + "loss": 0.7818408012390137, + "step": 3936 + }, + { + "epoch": 0.9071428571428571, + "grad_norm": 1.1620371895322128, + "learning_rate": 1.2343899194403931e-06, + "loss": 0.8391210436820984, + "step": 3937 + }, + { + "epoch": 0.9073732718894009, + "grad_norm": 1.1380529418025975, + "learning_rate": 1.2340195318029622e-06, + "loss": 0.7937500476837158, + "step": 3938 + }, + { + "epoch": 0.9076036866359447, + "grad_norm": 0.973433345758839, + "learning_rate": 1.2336491101979065e-06, + "loss": 0.7158668041229248, + "step": 3939 + }, + { + "epoch": 0.9078341013824884, + "grad_norm": 0.9549803277521113, + "learning_rate": 1.2332786546789915e-06, + "loss": 0.6956034898757935, + "step": 3940 + }, + { + "epoch": 0.9080645161290323, + "grad_norm": 1.035574155623001, + "learning_rate": 1.2329081652999887e-06, + "loss": 0.7252948880195618, + "step": 3941 + }, + { + "epoch": 0.908294930875576, + "grad_norm": 1.2086784459715743, + "learning_rate": 1.2325376421146739e-06, + "loss": 0.7131162881851196, + "step": 3942 + }, + { + "epoch": 0.9085253456221198, + "grad_norm": 0.8781165558243194, + "learning_rate": 1.2321670851768285e-06, + "loss": 0.7383663654327393, + "step": 3943 + }, + { + "epoch": 0.9087557603686636, + "grad_norm": 0.9355062944038273, + "learning_rate": 1.2317964945402374e-06, + "loss": 0.8296892642974854, + "step": 3944 + }, + { + "epoch": 0.9089861751152074, + "grad_norm": 1.1131069336270092, + "learning_rate": 1.2314258702586923e-06, + "loss": 0.8314273357391357, + "step": 3945 + }, + { + "epoch": 0.9092165898617511, + "grad_norm": 0.9647703306046335, + "learning_rate": 1.2310552123859888e-06, + "loss": 0.7264384031295776, + "step": 3946 + }, + { + "epoch": 0.909447004608295, + "grad_norm": 0.7580621867286127, + "learning_rate": 1.230684520975927e-06, + "loss": 0.6757937073707581, + "step": 3947 + }, + { + "epoch": 0.9096774193548387, + "grad_norm": 0.8884108342506404, + "learning_rate": 1.230313796082312e-06, + "loss": 0.8318504691123962, + "step": 3948 + }, + { + "epoch": 0.9099078341013825, + "grad_norm": 0.7767337233620181, + "learning_rate": 1.2299430377589547e-06, + "loss": 0.7043207883834839, + "step": 3949 + }, + { + "epoch": 0.9101382488479263, + "grad_norm": 1.0668368590995472, + "learning_rate": 1.2295722460596696e-06, + "loss": 0.8499487638473511, + "step": 3950 + }, + { + "epoch": 0.91036866359447, + "grad_norm": 1.1145902688644103, + "learning_rate": 1.2292014210382772e-06, + "loss": 0.8219600319862366, + "step": 3951 + }, + { + "epoch": 0.9105990783410138, + "grad_norm": 1.2329010539695853, + "learning_rate": 1.2288305627486017e-06, + "loss": 0.8136317133903503, + "step": 3952 + }, + { + "epoch": 0.9108294930875576, + "grad_norm": 1.1220482069317936, + "learning_rate": 1.2284596712444735e-06, + "loss": 0.7858958840370178, + "step": 3953 + }, + { + "epoch": 0.9110599078341014, + "grad_norm": 1.182019995516566, + "learning_rate": 1.2280887465797259e-06, + "loss": 0.8108563423156738, + "step": 3954 + }, + { + "epoch": 0.9112903225806451, + "grad_norm": 1.17197106565382, + "learning_rate": 1.2277177888081987e-06, + "loss": 0.8061145544052124, + "step": 3955 + }, + { + "epoch": 0.911520737327189, + "grad_norm": 1.1140830632516712, + "learning_rate": 1.2273467979837361e-06, + "loss": 0.7769665718078613, + "step": 3956 + }, + { + "epoch": 0.9117511520737327, + "grad_norm": 1.5134088570090107, + "learning_rate": 1.2269757741601867e-06, + "loss": 1.0548570156097412, + "step": 3957 + }, + { + "epoch": 0.9119815668202765, + "grad_norm": 0.9732476833800602, + "learning_rate": 1.226604717391404e-06, + "loss": 0.7095952033996582, + "step": 3958 + }, + { + "epoch": 0.9122119815668203, + "grad_norm": 0.8435340807921997, + "learning_rate": 1.226233627731247e-06, + "loss": 0.7330363392829895, + "step": 3959 + }, + { + "epoch": 0.9124423963133641, + "grad_norm": 0.9706068481575616, + "learning_rate": 1.225862505233578e-06, + "loss": 0.7328442931175232, + "step": 3960 + }, + { + "epoch": 0.9126728110599078, + "grad_norm": 1.059740258312267, + "learning_rate": 1.2254913499522656e-06, + "loss": 0.7572993040084839, + "step": 3961 + }, + { + "epoch": 0.9129032258064517, + "grad_norm": 1.0542941153492202, + "learning_rate": 1.2251201619411823e-06, + "loss": 0.7706469297409058, + "step": 3962 + }, + { + "epoch": 0.9131336405529954, + "grad_norm": 1.1436826868313579, + "learning_rate": 1.2247489412542053e-06, + "loss": 0.7830193042755127, + "step": 3963 + }, + { + "epoch": 0.9133640552995391, + "grad_norm": 1.0827904871592715, + "learning_rate": 1.224377687945217e-06, + "loss": 0.8415955901145935, + "step": 3964 + }, + { + "epoch": 0.913594470046083, + "grad_norm": 1.1895924425921953, + "learning_rate": 1.2240064020681044e-06, + "loss": 0.7383062839508057, + "step": 3965 + }, + { + "epoch": 0.9138248847926267, + "grad_norm": 1.1432920832791855, + "learning_rate": 1.2236350836767593e-06, + "loss": 0.7372882962226868, + "step": 3966 + }, + { + "epoch": 0.9140552995391705, + "grad_norm": 1.0941013432151616, + "learning_rate": 1.2232637328250776e-06, + "loss": 0.7914254665374756, + "step": 3967 + }, + { + "epoch": 0.9142857142857143, + "grad_norm": 0.9886213418734634, + "learning_rate": 1.2228923495669605e-06, + "loss": 0.8510675430297852, + "step": 3968 + }, + { + "epoch": 0.9145161290322581, + "grad_norm": 1.045281864627849, + "learning_rate": 1.2225209339563143e-06, + "loss": 0.7391757369041443, + "step": 3969 + }, + { + "epoch": 0.9147465437788018, + "grad_norm": 0.8746728562097662, + "learning_rate": 1.2221494860470491e-06, + "loss": 0.69194495677948, + "step": 3970 + }, + { + "epoch": 0.9149769585253457, + "grad_norm": 1.0907421288179358, + "learning_rate": 1.22177800589308e-06, + "loss": 0.7593865394592285, + "step": 3971 + }, + { + "epoch": 0.9152073732718894, + "grad_norm": 1.037234739347401, + "learning_rate": 1.2214064935483268e-06, + "loss": 0.7831966876983643, + "step": 3972 + }, + { + "epoch": 0.9154377880184332, + "grad_norm": 1.1150279108134162, + "learning_rate": 1.2210349490667145e-06, + "loss": 0.8858723640441895, + "step": 3973 + }, + { + "epoch": 0.915668202764977, + "grad_norm": 1.1381126617682915, + "learning_rate": 1.2206633725021715e-06, + "loss": 0.8645567893981934, + "step": 3974 + }, + { + "epoch": 0.9158986175115207, + "grad_norm": 0.9188905804582469, + "learning_rate": 1.2202917639086322e-06, + "loss": 0.7619047164916992, + "step": 3975 + }, + { + "epoch": 0.9161290322580645, + "grad_norm": 1.0126992141273314, + "learning_rate": 1.2199201233400355e-06, + "loss": 0.8652681112289429, + "step": 3976 + }, + { + "epoch": 0.9163594470046083, + "grad_norm": 0.9961259698766619, + "learning_rate": 1.2195484508503234e-06, + "loss": 0.6860940456390381, + "step": 3977 + }, + { + "epoch": 0.9165898617511521, + "grad_norm": 0.8860870600955693, + "learning_rate": 1.2191767464934444e-06, + "loss": 0.7372464537620544, + "step": 3978 + }, + { + "epoch": 0.9168202764976958, + "grad_norm": 1.3495413684840594, + "learning_rate": 1.218805010323351e-06, + "loss": 0.8719853162765503, + "step": 3979 + }, + { + "epoch": 0.9170506912442397, + "grad_norm": 0.9968927276513252, + "learning_rate": 1.2184332423940003e-06, + "loss": 0.8203779458999634, + "step": 3980 + }, + { + "epoch": 0.9172811059907834, + "grad_norm": 1.197176686739939, + "learning_rate": 1.218061442759353e-06, + "loss": 0.8648861646652222, + "step": 3981 + }, + { + "epoch": 0.9175115207373272, + "grad_norm": 1.0630748229990676, + "learning_rate": 1.2176896114733766e-06, + "loss": 0.7651659250259399, + "step": 3982 + }, + { + "epoch": 0.917741935483871, + "grad_norm": 1.20459191964974, + "learning_rate": 1.2173177485900408e-06, + "loss": 0.8495512008666992, + "step": 3983 + }, + { + "epoch": 0.9179723502304148, + "grad_norm": 1.3559959351470627, + "learning_rate": 1.2169458541633216e-06, + "loss": 0.7997228503227234, + "step": 3984 + }, + { + "epoch": 0.9182027649769585, + "grad_norm": 0.9870494686008755, + "learning_rate": 1.2165739282471987e-06, + "loss": 0.8353173136711121, + "step": 3985 + }, + { + "epoch": 0.9184331797235024, + "grad_norm": 1.2277323881843956, + "learning_rate": 1.216201970895657e-06, + "loss": 0.9039655327796936, + "step": 3986 + }, + { + "epoch": 0.9186635944700461, + "grad_norm": 0.9209288499077958, + "learning_rate": 1.2158299821626854e-06, + "loss": 0.8158592581748962, + "step": 3987 + }, + { + "epoch": 0.9188940092165898, + "grad_norm": 1.2007654555954255, + "learning_rate": 1.2154579621022776e-06, + "loss": 0.8443971872329712, + "step": 3988 + }, + { + "epoch": 0.9191244239631337, + "grad_norm": 0.916322848733307, + "learning_rate": 1.2150859107684318e-06, + "loss": 0.7934167385101318, + "step": 3989 + }, + { + "epoch": 0.9193548387096774, + "grad_norm": 1.1576910593833736, + "learning_rate": 1.2147138282151512e-06, + "loss": 0.750052809715271, + "step": 3990 + }, + { + "epoch": 0.9195852534562212, + "grad_norm": 1.0948767691124337, + "learning_rate": 1.2143417144964423e-06, + "loss": 0.813056468963623, + "step": 3991 + }, + { + "epoch": 0.919815668202765, + "grad_norm": 1.1487977592190233, + "learning_rate": 1.2139695696663174e-06, + "loss": 0.9478945732116699, + "step": 3992 + }, + { + "epoch": 0.9200460829493088, + "grad_norm": 0.9711264468634061, + "learning_rate": 1.2135973937787927e-06, + "loss": 0.687637448310852, + "step": 3993 + }, + { + "epoch": 0.9202764976958525, + "grad_norm": 1.071392128639805, + "learning_rate": 1.213225186887889e-06, + "loss": 0.8073818683624268, + "step": 3994 + }, + { + "epoch": 0.9205069124423964, + "grad_norm": 1.1074324196567935, + "learning_rate": 1.2128529490476318e-06, + "loss": 0.6684166789054871, + "step": 3995 + }, + { + "epoch": 0.9207373271889401, + "grad_norm": 1.1910033963986806, + "learning_rate": 1.2124806803120506e-06, + "loss": 0.7897466421127319, + "step": 3996 + }, + { + "epoch": 0.9209677419354839, + "grad_norm": 1.0375797321803883, + "learning_rate": 1.21210838073518e-06, + "loss": 0.832312822341919, + "step": 3997 + }, + { + "epoch": 0.9211981566820276, + "grad_norm": 1.036059468253791, + "learning_rate": 1.2117360503710588e-06, + "loss": 0.9536067247390747, + "step": 3998 + }, + { + "epoch": 0.9214285714285714, + "grad_norm": 1.123926651312402, + "learning_rate": 1.2113636892737302e-06, + "loss": 0.8959759473800659, + "step": 3999 + }, + { + "epoch": 0.9216589861751152, + "grad_norm": 0.9405530325495998, + "learning_rate": 1.2109912974972422e-06, + "loss": 0.6789166927337646, + "step": 4000 + }, + { + "epoch": 0.9218894009216589, + "grad_norm": 0.9327551909921717, + "learning_rate": 1.2106188750956464e-06, + "loss": 0.7336491346359253, + "step": 4001 + }, + { + "epoch": 0.9221198156682028, + "grad_norm": 0.8000293761487048, + "learning_rate": 1.2102464221229997e-06, + "loss": 0.7838259935379028, + "step": 4002 + }, + { + "epoch": 0.9223502304147465, + "grad_norm": 1.2907858896278495, + "learning_rate": 1.2098739386333631e-06, + "loss": 0.9147623777389526, + "step": 4003 + }, + { + "epoch": 0.9225806451612903, + "grad_norm": 1.3691019040487797, + "learning_rate": 1.2095014246808022e-06, + "loss": 0.7296491265296936, + "step": 4004 + }, + { + "epoch": 0.9228110599078341, + "grad_norm": 1.1028104717001235, + "learning_rate": 1.2091288803193868e-06, + "loss": 0.7898432016372681, + "step": 4005 + }, + { + "epoch": 0.9230414746543779, + "grad_norm": 1.1562470474736035, + "learning_rate": 1.2087563056031914e-06, + "loss": 0.8190659284591675, + "step": 4006 + }, + { + "epoch": 0.9232718894009216, + "grad_norm": 1.4146112766933352, + "learning_rate": 1.2083837005862945e-06, + "loss": 0.8383443355560303, + "step": 4007 + }, + { + "epoch": 0.9235023041474655, + "grad_norm": 0.7251077105825574, + "learning_rate": 1.2080110653227796e-06, + "loss": 0.5987120866775513, + "step": 4008 + }, + { + "epoch": 0.9237327188940092, + "grad_norm": 1.056645940510342, + "learning_rate": 1.2076383998667334e-06, + "loss": 0.8811358213424683, + "step": 4009 + }, + { + "epoch": 0.923963133640553, + "grad_norm": 0.8867108269493398, + "learning_rate": 1.2072657042722486e-06, + "loss": 0.7958807349205017, + "step": 4010 + }, + { + "epoch": 0.9241935483870968, + "grad_norm": 1.1776412427000924, + "learning_rate": 1.2068929785934215e-06, + "loss": 0.7192457914352417, + "step": 4011 + }, + { + "epoch": 0.9244239631336405, + "grad_norm": 1.0545419352254402, + "learning_rate": 1.2065202228843523e-06, + "loss": 0.6854838132858276, + "step": 4012 + }, + { + "epoch": 0.9246543778801843, + "grad_norm": 1.0759672957343283, + "learning_rate": 1.2061474371991457e-06, + "loss": 0.7334680557250977, + "step": 4013 + }, + { + "epoch": 0.9248847926267281, + "grad_norm": 0.9536076812745731, + "learning_rate": 1.205774621591912e-06, + "loss": 0.7614402770996094, + "step": 4014 + }, + { + "epoch": 0.9251152073732719, + "grad_norm": 1.3871826739545572, + "learning_rate": 1.2054017761167644e-06, + "loss": 0.7502505779266357, + "step": 4015 + }, + { + "epoch": 0.9253456221198156, + "grad_norm": 1.044146949688276, + "learning_rate": 1.2050289008278205e-06, + "loss": 0.7922523021697998, + "step": 4016 + }, + { + "epoch": 0.9255760368663595, + "grad_norm": 1.2025329853302307, + "learning_rate": 1.2046559957792032e-06, + "loss": 0.7534265518188477, + "step": 4017 + }, + { + "epoch": 0.9258064516129032, + "grad_norm": 0.9478426591249515, + "learning_rate": 1.2042830610250395e-06, + "loss": 0.6997093558311462, + "step": 4018 + }, + { + "epoch": 0.926036866359447, + "grad_norm": 1.050086676036124, + "learning_rate": 1.2039100966194594e-06, + "loss": 0.7009599208831787, + "step": 4019 + }, + { + "epoch": 0.9262672811059908, + "grad_norm": 1.108108705874163, + "learning_rate": 1.203537102616599e-06, + "loss": 0.795873761177063, + "step": 4020 + }, + { + "epoch": 0.9264976958525346, + "grad_norm": 1.1836803264586404, + "learning_rate": 1.2031640790705972e-06, + "loss": 0.7860225439071655, + "step": 4021 + }, + { + "epoch": 0.9267281105990783, + "grad_norm": 0.9036535621632875, + "learning_rate": 1.2027910260355989e-06, + "loss": 0.7657063007354736, + "step": 4022 + }, + { + "epoch": 0.9269585253456222, + "grad_norm": 1.0407468417409953, + "learning_rate": 1.2024179435657512e-06, + "loss": 0.782909631729126, + "step": 4023 + }, + { + "epoch": 0.9271889400921659, + "grad_norm": 0.8628791908243046, + "learning_rate": 1.202044831715207e-06, + "loss": 0.713431715965271, + "step": 4024 + }, + { + "epoch": 0.9274193548387096, + "grad_norm": 0.9826922843740741, + "learning_rate": 1.201671690538123e-06, + "loss": 0.9126790165901184, + "step": 4025 + }, + { + "epoch": 0.9276497695852535, + "grad_norm": 0.9552497173996132, + "learning_rate": 1.20129852008866e-06, + "loss": 0.8640999794006348, + "step": 4026 + }, + { + "epoch": 0.9278801843317972, + "grad_norm": 1.0290580406520045, + "learning_rate": 1.2009253204209832e-06, + "loss": 0.723473072052002, + "step": 4027 + }, + { + "epoch": 0.928110599078341, + "grad_norm": 0.9995947167655078, + "learning_rate": 1.2005520915892626e-06, + "loss": 0.6764041185379028, + "step": 4028 + }, + { + "epoch": 0.9283410138248848, + "grad_norm": 1.1315388960653066, + "learning_rate": 1.200178833647671e-06, + "loss": 0.8525882959365845, + "step": 4029 + }, + { + "epoch": 0.9285714285714286, + "grad_norm": 1.1279047416289067, + "learning_rate": 1.1998055466503872e-06, + "loss": 0.714957058429718, + "step": 4030 + }, + { + "epoch": 0.9288018433179723, + "grad_norm": 0.9055007840106456, + "learning_rate": 1.1994322306515925e-06, + "loss": 0.8015910387039185, + "step": 4031 + }, + { + "epoch": 0.9290322580645162, + "grad_norm": 1.1314666315910753, + "learning_rate": 1.1990588857054733e-06, + "loss": 1.0306739807128906, + "step": 4032 + }, + { + "epoch": 0.9292626728110599, + "grad_norm": 1.0078215910327748, + "learning_rate": 1.1986855118662205e-06, + "loss": 0.8307464122772217, + "step": 4033 + }, + { + "epoch": 0.9294930875576037, + "grad_norm": 0.9974753472669955, + "learning_rate": 1.1983121091880286e-06, + "loss": 0.8720347881317139, + "step": 4034 + }, + { + "epoch": 0.9297235023041475, + "grad_norm": 1.0249437684832297, + "learning_rate": 1.1979386777250968e-06, + "loss": 0.7716174721717834, + "step": 4035 + }, + { + "epoch": 0.9299539170506912, + "grad_norm": 0.9533075514678258, + "learning_rate": 1.1975652175316279e-06, + "loss": 0.8968960046768188, + "step": 4036 + }, + { + "epoch": 0.930184331797235, + "grad_norm": 1.0235472692311864, + "learning_rate": 1.197191728661829e-06, + "loss": 0.7472472786903381, + "step": 4037 + }, + { + "epoch": 0.9304147465437788, + "grad_norm": 1.209577738801564, + "learning_rate": 1.196818211169912e-06, + "loss": 0.7969691753387451, + "step": 4038 + }, + { + "epoch": 0.9306451612903226, + "grad_norm": 0.8592343628435503, + "learning_rate": 1.196444665110092e-06, + "loss": 0.6187525987625122, + "step": 4039 + }, + { + "epoch": 0.9308755760368663, + "grad_norm": 1.0503056259771648, + "learning_rate": 1.1960710905365893e-06, + "loss": 0.8715502619743347, + "step": 4040 + }, + { + "epoch": 0.9311059907834102, + "grad_norm": 0.9918268480034713, + "learning_rate": 1.1956974875036273e-06, + "loss": 0.7174774408340454, + "step": 4041 + }, + { + "epoch": 0.9313364055299539, + "grad_norm": 0.8743867275561935, + "learning_rate": 1.1953238560654337e-06, + "loss": 0.6546192169189453, + "step": 4042 + }, + { + "epoch": 0.9315668202764977, + "grad_norm": 1.1024794232135675, + "learning_rate": 1.194950196276241e-06, + "loss": 0.8688700199127197, + "step": 4043 + }, + { + "epoch": 0.9317972350230415, + "grad_norm": 1.0449187982587707, + "learning_rate": 1.1945765081902856e-06, + "loss": 0.7679718732833862, + "step": 4044 + }, + { + "epoch": 0.9320276497695853, + "grad_norm": 0.9426197124643214, + "learning_rate": 1.1942027918618073e-06, + "loss": 0.6335175037384033, + "step": 4045 + }, + { + "epoch": 0.932258064516129, + "grad_norm": 1.0452657366695544, + "learning_rate": 1.1938290473450513e-06, + "loss": 0.785153865814209, + "step": 4046 + }, + { + "epoch": 0.9324884792626729, + "grad_norm": 0.9145063707903602, + "learning_rate": 1.1934552746942653e-06, + "loss": 0.6873019337654114, + "step": 4047 + }, + { + "epoch": 0.9327188940092166, + "grad_norm": 0.9707470479007109, + "learning_rate": 1.1930814739637025e-06, + "loss": 0.7416094541549683, + "step": 4048 + }, + { + "epoch": 0.9329493087557603, + "grad_norm": 1.2103943548089806, + "learning_rate": 1.1927076452076193e-06, + "loss": 0.7206372618675232, + "step": 4049 + }, + { + "epoch": 0.9331797235023042, + "grad_norm": 1.1043264858931607, + "learning_rate": 1.1923337884802767e-06, + "loss": 0.8352477550506592, + "step": 4050 + }, + { + "epoch": 0.9334101382488479, + "grad_norm": 1.116832001192149, + "learning_rate": 1.191959903835939e-06, + "loss": 0.8243483304977417, + "step": 4051 + }, + { + "epoch": 0.9336405529953917, + "grad_norm": 1.4110893804735163, + "learning_rate": 1.1915859913288756e-06, + "loss": 0.827987790107727, + "step": 4052 + }, + { + "epoch": 0.9338709677419355, + "grad_norm": 1.1514055762505417, + "learning_rate": 1.1912120510133589e-06, + "loss": 0.8624123334884644, + "step": 4053 + }, + { + "epoch": 0.9341013824884793, + "grad_norm": 1.2091942284642192, + "learning_rate": 1.1908380829436667e-06, + "loss": 0.8615037202835083, + "step": 4054 + }, + { + "epoch": 0.934331797235023, + "grad_norm": 1.2500115524653743, + "learning_rate": 1.190464087174079e-06, + "loss": 0.9367121458053589, + "step": 4055 + }, + { + "epoch": 0.9345622119815669, + "grad_norm": 1.4503623207353766, + "learning_rate": 1.190090063758881e-06, + "loss": 0.927996039390564, + "step": 4056 + }, + { + "epoch": 0.9347926267281106, + "grad_norm": 1.0709061746508743, + "learning_rate": 1.1897160127523623e-06, + "loss": 0.841314435005188, + "step": 4057 + }, + { + "epoch": 0.9350230414746544, + "grad_norm": 1.1021939339887863, + "learning_rate": 1.189341934208815e-06, + "loss": 0.864904522895813, + "step": 4058 + }, + { + "epoch": 0.9352534562211982, + "grad_norm": 1.148301781904619, + "learning_rate": 1.188967828182537e-06, + "loss": 0.9505404829978943, + "step": 4059 + }, + { + "epoch": 0.9354838709677419, + "grad_norm": 1.0791372441668663, + "learning_rate": 1.188593694727829e-06, + "loss": 0.7347132563591003, + "step": 4060 + }, + { + "epoch": 0.9357142857142857, + "grad_norm": 1.1367351426324537, + "learning_rate": 1.1882195338989958e-06, + "loss": 0.6267231106758118, + "step": 4061 + }, + { + "epoch": 0.9359447004608294, + "grad_norm": 1.0946102482081315, + "learning_rate": 1.1878453457503464e-06, + "loss": 0.8052406907081604, + "step": 4062 + }, + { + "epoch": 0.9361751152073733, + "grad_norm": 1.1032845960202522, + "learning_rate": 1.1874711303361933e-06, + "loss": 0.7928211688995361, + "step": 4063 + }, + { + "epoch": 0.936405529953917, + "grad_norm": 1.1265414942472118, + "learning_rate": 1.1870968877108545e-06, + "loss": 0.8863959312438965, + "step": 4064 + }, + { + "epoch": 0.9366359447004609, + "grad_norm": 1.0592501761240638, + "learning_rate": 1.1867226179286496e-06, + "loss": 0.8749874830245972, + "step": 4065 + }, + { + "epoch": 0.9368663594470046, + "grad_norm": 0.9223254168257967, + "learning_rate": 1.186348321043904e-06, + "loss": 0.7516318559646606, + "step": 4066 + }, + { + "epoch": 0.9370967741935484, + "grad_norm": 1.0863969007807137, + "learning_rate": 1.1859739971109467e-06, + "loss": 0.8435031771659851, + "step": 4067 + }, + { + "epoch": 0.9373271889400921, + "grad_norm": 1.08570563607149, + "learning_rate": 1.1855996461841093e-06, + "loss": 0.8766932487487793, + "step": 4068 + }, + { + "epoch": 0.937557603686636, + "grad_norm": 1.2630999347152494, + "learning_rate": 1.1852252683177293e-06, + "loss": 0.8748513460159302, + "step": 4069 + }, + { + "epoch": 0.9377880184331797, + "grad_norm": 1.2689555695038703, + "learning_rate": 1.184850863566147e-06, + "loss": 0.8917855024337769, + "step": 4070 + }, + { + "epoch": 0.9380184331797236, + "grad_norm": 1.0628114663297852, + "learning_rate": 1.1844764319837064e-06, + "loss": 0.7631640434265137, + "step": 4071 + }, + { + "epoch": 0.9382488479262673, + "grad_norm": 1.0140155614547266, + "learning_rate": 1.1841019736247557e-06, + "loss": 0.8354158401489258, + "step": 4072 + }, + { + "epoch": 0.938479262672811, + "grad_norm": 0.8561335978546013, + "learning_rate": 1.1837274885436473e-06, + "loss": 0.8122761845588684, + "step": 4073 + }, + { + "epoch": 0.9387096774193548, + "grad_norm": 1.5776279194471237, + "learning_rate": 1.1833529767947374e-06, + "loss": 0.8281430006027222, + "step": 4074 + }, + { + "epoch": 0.9389400921658986, + "grad_norm": 1.3828203317822199, + "learning_rate": 1.1829784384323856e-06, + "loss": 0.8291982412338257, + "step": 4075 + }, + { + "epoch": 0.9391705069124424, + "grad_norm": 1.3096607265096822, + "learning_rate": 1.1826038735109553e-06, + "loss": 0.8951852321624756, + "step": 4076 + }, + { + "epoch": 0.9394009216589861, + "grad_norm": 1.2165058417213606, + "learning_rate": 1.182229282084815e-06, + "loss": 0.7006446123123169, + "step": 4077 + }, + { + "epoch": 0.93963133640553, + "grad_norm": 1.1269330295000342, + "learning_rate": 1.1818546642083353e-06, + "loss": 0.8944047689437866, + "step": 4078 + }, + { + "epoch": 0.9398617511520737, + "grad_norm": 0.9351299115123082, + "learning_rate": 1.1814800199358919e-06, + "loss": 0.8252646923065186, + "step": 4079 + }, + { + "epoch": 0.9400921658986175, + "grad_norm": 1.2255680666736817, + "learning_rate": 1.181105349321864e-06, + "loss": 0.7852828502655029, + "step": 4080 + }, + { + "epoch": 0.9403225806451613, + "grad_norm": 1.0734973037527151, + "learning_rate": 1.1807306524206347e-06, + "loss": 0.7758563160896301, + "step": 4081 + }, + { + "epoch": 0.9405529953917051, + "grad_norm": 1.0672387708424669, + "learning_rate": 1.1803559292865899e-06, + "loss": 0.7297114133834839, + "step": 4082 + }, + { + "epoch": 0.9407834101382488, + "grad_norm": 1.1802096748579922, + "learning_rate": 1.1799811799741209e-06, + "loss": 0.7974321842193604, + "step": 4083 + }, + { + "epoch": 0.9410138248847926, + "grad_norm": 1.2930194654348013, + "learning_rate": 1.179606404537622e-06, + "loss": 0.6406733989715576, + "step": 4084 + }, + { + "epoch": 0.9412442396313364, + "grad_norm": 0.9862268230007224, + "learning_rate": 1.179231603031491e-06, + "loss": 0.6925486326217651, + "step": 4085 + }, + { + "epoch": 0.9414746543778801, + "grad_norm": 0.9201295652583962, + "learning_rate": 1.17885677551013e-06, + "loss": 0.792647123336792, + "step": 4086 + }, + { + "epoch": 0.941705069124424, + "grad_norm": 1.0460531669846371, + "learning_rate": 1.1784819220279454e-06, + "loss": 0.7499191761016846, + "step": 4087 + }, + { + "epoch": 0.9419354838709677, + "grad_norm": 1.120763335726602, + "learning_rate": 1.1781070426393455e-06, + "loss": 0.8307451009750366, + "step": 4088 + }, + { + "epoch": 0.9421658986175115, + "grad_norm": 1.1015455973526673, + "learning_rate": 1.1777321373987445e-06, + "loss": 0.7859289646148682, + "step": 4089 + }, + { + "epoch": 0.9423963133640553, + "grad_norm": 1.0291702780651948, + "learning_rate": 1.177357206360559e-06, + "loss": 0.761134922504425, + "step": 4090 + }, + { + "epoch": 0.9426267281105991, + "grad_norm": 1.240188832472171, + "learning_rate": 1.1769822495792098e-06, + "loss": 0.8697078227996826, + "step": 4091 + }, + { + "epoch": 0.9428571428571428, + "grad_norm": 1.0395615260234665, + "learning_rate": 1.1766072671091212e-06, + "loss": 0.731541633605957, + "step": 4092 + }, + { + "epoch": 0.9430875576036867, + "grad_norm": 1.1056530512213054, + "learning_rate": 1.1762322590047219e-06, + "loss": 0.7501940727233887, + "step": 4093 + }, + { + "epoch": 0.9433179723502304, + "grad_norm": 1.1531150840189341, + "learning_rate": 1.1758572253204431e-06, + "loss": 0.9448602199554443, + "step": 4094 + }, + { + "epoch": 0.9435483870967742, + "grad_norm": 0.8884441593083074, + "learning_rate": 1.175482166110721e-06, + "loss": 0.7704026699066162, + "step": 4095 + }, + { + "epoch": 0.943778801843318, + "grad_norm": 0.8973060402184874, + "learning_rate": 1.1751070814299947e-06, + "loss": 0.7905057668685913, + "step": 4096 + }, + { + "epoch": 0.9440092165898617, + "grad_norm": 1.238350046583652, + "learning_rate": 1.1747319713327078e-06, + "loss": 0.8957202434539795, + "step": 4097 + }, + { + "epoch": 0.9442396313364055, + "grad_norm": 0.9896078596502195, + "learning_rate": 1.174356835873306e-06, + "loss": 0.7922521233558655, + "step": 4098 + }, + { + "epoch": 0.9444700460829493, + "grad_norm": 0.9974151293119675, + "learning_rate": 1.1739816751062404e-06, + "loss": 0.6501933336257935, + "step": 4099 + }, + { + "epoch": 0.9447004608294931, + "grad_norm": 0.9673699554437744, + "learning_rate": 1.1736064890859654e-06, + "loss": 0.6743361353874207, + "step": 4100 + }, + { + "epoch": 0.9449308755760368, + "grad_norm": 1.0381670362595088, + "learning_rate": 1.173231277866938e-06, + "loss": 0.920632004737854, + "step": 4101 + }, + { + "epoch": 0.9451612903225807, + "grad_norm": 0.872889135902432, + "learning_rate": 1.1728560415036199e-06, + "loss": 0.7498964071273804, + "step": 4102 + }, + { + "epoch": 0.9453917050691244, + "grad_norm": 0.8444235514312883, + "learning_rate": 1.1724807800504765e-06, + "loss": 0.7665064334869385, + "step": 4103 + }, + { + "epoch": 0.9456221198156682, + "grad_norm": 0.8729439782855682, + "learning_rate": 1.172105493561976e-06, + "loss": 0.75946044921875, + "step": 4104 + }, + { + "epoch": 0.945852534562212, + "grad_norm": 1.016811663523364, + "learning_rate": 1.1717301820925908e-06, + "loss": 0.7701961398124695, + "step": 4105 + }, + { + "epoch": 0.9460829493087558, + "grad_norm": 0.9708618505769702, + "learning_rate": 1.1713548456967974e-06, + "loss": 0.7775348424911499, + "step": 4106 + }, + { + "epoch": 0.9463133640552995, + "grad_norm": 0.8519325609053343, + "learning_rate": 1.1709794844290745e-06, + "loss": 0.8149436712265015, + "step": 4107 + }, + { + "epoch": 0.9465437788018434, + "grad_norm": 0.8519085263981432, + "learning_rate": 1.170604098343906e-06, + "loss": 0.7136009335517883, + "step": 4108 + }, + { + "epoch": 0.9467741935483871, + "grad_norm": 1.2048256186284507, + "learning_rate": 1.1702286874957786e-06, + "loss": 0.7678873538970947, + "step": 4109 + }, + { + "epoch": 0.9470046082949308, + "grad_norm": 0.9842223659547223, + "learning_rate": 1.1698532519391827e-06, + "loss": 0.7506710290908813, + "step": 4110 + }, + { + "epoch": 0.9472350230414747, + "grad_norm": 0.900893049038478, + "learning_rate": 1.1694777917286118e-06, + "loss": 0.6646897792816162, + "step": 4111 + }, + { + "epoch": 0.9474654377880184, + "grad_norm": 1.3857066059132386, + "learning_rate": 1.1691023069185639e-06, + "loss": 0.820647120475769, + "step": 4112 + }, + { + "epoch": 0.9476958525345622, + "grad_norm": 0.9795728799566645, + "learning_rate": 1.1687267975635402e-06, + "loss": 0.872378408908844, + "step": 4113 + }, + { + "epoch": 0.947926267281106, + "grad_norm": 1.0760361173899362, + "learning_rate": 1.168351263718045e-06, + "loss": 0.7920655608177185, + "step": 4114 + }, + { + "epoch": 0.9481566820276498, + "grad_norm": 1.1709025489256302, + "learning_rate": 1.1679757054365866e-06, + "loss": 0.6593836545944214, + "step": 4115 + }, + { + "epoch": 0.9483870967741935, + "grad_norm": 1.0965626572699905, + "learning_rate": 1.1676001227736772e-06, + "loss": 0.7473627328872681, + "step": 4116 + }, + { + "epoch": 0.9486175115207374, + "grad_norm": 1.2027339281506744, + "learning_rate": 1.1672245157838317e-06, + "loss": 0.8001665472984314, + "step": 4117 + }, + { + "epoch": 0.9488479262672811, + "grad_norm": 0.9543944768909415, + "learning_rate": 1.1668488845215689e-06, + "loss": 0.7342571020126343, + "step": 4118 + }, + { + "epoch": 0.9490783410138249, + "grad_norm": 1.2428163281726954, + "learning_rate": 1.1664732290414118e-06, + "loss": 0.7616822719573975, + "step": 4119 + }, + { + "epoch": 0.9493087557603687, + "grad_norm": 1.2486031522636918, + "learning_rate": 1.1660975493978857e-06, + "loss": 0.8885634541511536, + "step": 4120 + }, + { + "epoch": 0.9495391705069124, + "grad_norm": 1.1323168185847523, + "learning_rate": 1.1657218456455205e-06, + "loss": 0.7816281318664551, + "step": 4121 + }, + { + "epoch": 0.9497695852534562, + "grad_norm": 0.9570364600334796, + "learning_rate": 1.1653461178388485e-06, + "loss": 0.7412079572677612, + "step": 4122 + }, + { + "epoch": 0.95, + "grad_norm": 0.957883425985998, + "learning_rate": 1.1649703660324064e-06, + "loss": 0.8096172213554382, + "step": 4123 + }, + { + "epoch": 0.9502304147465438, + "grad_norm": 1.0359903594582591, + "learning_rate": 1.164594590280734e-06, + "loss": 0.6690856218338013, + "step": 4124 + }, + { + "epoch": 0.9504608294930875, + "grad_norm": 0.9697541149080181, + "learning_rate": 1.1642187906383746e-06, + "loss": 0.7509289979934692, + "step": 4125 + }, + { + "epoch": 0.9506912442396314, + "grad_norm": 0.8506285939807987, + "learning_rate": 1.1638429671598754e-06, + "loss": 0.6643730401992798, + "step": 4126 + }, + { + "epoch": 0.9509216589861751, + "grad_norm": 0.994475544194171, + "learning_rate": 1.1634671198997864e-06, + "loss": 0.8100850582122803, + "step": 4127 + }, + { + "epoch": 0.9511520737327189, + "grad_norm": 1.392121351288023, + "learning_rate": 1.1630912489126612e-06, + "loss": 0.919742226600647, + "step": 4128 + }, + { + "epoch": 0.9513824884792627, + "grad_norm": 1.144319413666889, + "learning_rate": 1.1627153542530571e-06, + "loss": 0.8953771591186523, + "step": 4129 + }, + { + "epoch": 0.9516129032258065, + "grad_norm": 0.9663802093818391, + "learning_rate": 1.162339435975535e-06, + "loss": 0.7401770949363708, + "step": 4130 + }, + { + "epoch": 0.9518433179723502, + "grad_norm": 1.0071840947097435, + "learning_rate": 1.1619634941346585e-06, + "loss": 0.7618032097816467, + "step": 4131 + }, + { + "epoch": 0.9520737327188941, + "grad_norm": 1.3156218418351784, + "learning_rate": 1.1615875287849955e-06, + "loss": 0.9134000539779663, + "step": 4132 + }, + { + "epoch": 0.9523041474654378, + "grad_norm": 0.9617492928251477, + "learning_rate": 1.1612115399811162e-06, + "loss": 0.7555145025253296, + "step": 4133 + }, + { + "epoch": 0.9525345622119815, + "grad_norm": 0.9434517704683025, + "learning_rate": 1.1608355277775955e-06, + "loss": 0.9125050902366638, + "step": 4134 + }, + { + "epoch": 0.9527649769585254, + "grad_norm": 0.9082549396493419, + "learning_rate": 1.1604594922290106e-06, + "loss": 0.6575542688369751, + "step": 4135 + }, + { + "epoch": 0.9529953917050691, + "grad_norm": 1.0750997369204898, + "learning_rate": 1.1600834333899431e-06, + "loss": 0.7530527114868164, + "step": 4136 + }, + { + "epoch": 0.9532258064516129, + "grad_norm": 0.9603596342147773, + "learning_rate": 1.159707351314977e-06, + "loss": 0.8818701505661011, + "step": 4137 + }, + { + "epoch": 0.9534562211981567, + "grad_norm": 0.9491169409805379, + "learning_rate": 1.1593312460587003e-06, + "loss": 0.7172919511795044, + "step": 4138 + }, + { + "epoch": 0.9536866359447005, + "grad_norm": 1.1122266085503043, + "learning_rate": 1.1589551176757044e-06, + "loss": 0.8701400756835938, + "step": 4139 + }, + { + "epoch": 0.9539170506912442, + "grad_norm": 1.3285866575691943, + "learning_rate": 1.1585789662205834e-06, + "loss": 0.867475152015686, + "step": 4140 + }, + { + "epoch": 0.9541474654377881, + "grad_norm": 1.1851362026267, + "learning_rate": 1.1582027917479356e-06, + "loss": 0.7809052467346191, + "step": 4141 + }, + { + "epoch": 0.9543778801843318, + "grad_norm": 1.1986202884801196, + "learning_rate": 1.1578265943123619e-06, + "loss": 0.8589099645614624, + "step": 4142 + }, + { + "epoch": 0.9546082949308756, + "grad_norm": 0.893566517908755, + "learning_rate": 1.157450373968467e-06, + "loss": 0.7826642394065857, + "step": 4143 + }, + { + "epoch": 0.9548387096774194, + "grad_norm": 1.3652425128856092, + "learning_rate": 1.1570741307708585e-06, + "loss": 0.9550029635429382, + "step": 4144 + }, + { + "epoch": 0.9550691244239631, + "grad_norm": 1.0826442844044148, + "learning_rate": 1.1566978647741478e-06, + "loss": 0.8607431650161743, + "step": 4145 + }, + { + "epoch": 0.9552995391705069, + "grad_norm": 0.8247649155112424, + "learning_rate": 1.15632157603295e-06, + "loss": 0.7350449562072754, + "step": 4146 + }, + { + "epoch": 0.9555299539170506, + "grad_norm": 1.033301557916291, + "learning_rate": 1.1559452646018818e-06, + "loss": 0.853142261505127, + "step": 4147 + }, + { + "epoch": 0.9557603686635945, + "grad_norm": 1.0495554531445934, + "learning_rate": 1.1555689305355651e-06, + "loss": 0.7137192487716675, + "step": 4148 + }, + { + "epoch": 0.9559907834101382, + "grad_norm": 1.158813208265862, + "learning_rate": 1.1551925738886244e-06, + "loss": 0.9007513523101807, + "step": 4149 + }, + { + "epoch": 0.956221198156682, + "grad_norm": 1.1071306366128357, + "learning_rate": 1.1548161947156867e-06, + "loss": 0.8499083518981934, + "step": 4150 + }, + { + "epoch": 0.9564516129032258, + "grad_norm": 0.874419574252059, + "learning_rate": 1.1544397930713836e-06, + "loss": 0.8068628311157227, + "step": 4151 + }, + { + "epoch": 0.9566820276497696, + "grad_norm": 1.1729788609256337, + "learning_rate": 1.1540633690103487e-06, + "loss": 0.8357307314872742, + "step": 4152 + }, + { + "epoch": 0.9569124423963133, + "grad_norm": 1.262397502444813, + "learning_rate": 1.1536869225872198e-06, + "loss": 0.7650378942489624, + "step": 4153 + }, + { + "epoch": 0.9571428571428572, + "grad_norm": 0.9933463317010283, + "learning_rate": 1.1533104538566376e-06, + "loss": 0.8717354536056519, + "step": 4154 + }, + { + "epoch": 0.9573732718894009, + "grad_norm": 0.9807638290234347, + "learning_rate": 1.152933962873246e-06, + "loss": 0.6314762830734253, + "step": 4155 + }, + { + "epoch": 0.9576036866359448, + "grad_norm": 1.1279705073097503, + "learning_rate": 1.152557449691692e-06, + "loss": 0.8949059844017029, + "step": 4156 + }, + { + "epoch": 0.9578341013824885, + "grad_norm": 1.137203803563717, + "learning_rate": 1.1521809143666261e-06, + "loss": 0.7862699031829834, + "step": 4157 + }, + { + "epoch": 0.9580645161290322, + "grad_norm": 0.8970512868442762, + "learning_rate": 1.151804356952702e-06, + "loss": 0.7954641580581665, + "step": 4158 + }, + { + "epoch": 0.958294930875576, + "grad_norm": 1.0478069911824797, + "learning_rate": 1.1514277775045766e-06, + "loss": 0.7654163241386414, + "step": 4159 + }, + { + "epoch": 0.9585253456221198, + "grad_norm": 1.0321973050954667, + "learning_rate": 1.1510511760769097e-06, + "loss": 0.7050681114196777, + "step": 4160 + }, + { + "epoch": 0.9587557603686636, + "grad_norm": 1.0667493196933242, + "learning_rate": 1.1506745527243646e-06, + "loss": 0.8646515607833862, + "step": 4161 + }, + { + "epoch": 0.9589861751152073, + "grad_norm": 0.9392654190881413, + "learning_rate": 1.1502979075016078e-06, + "loss": 0.7427883148193359, + "step": 4162 + }, + { + "epoch": 0.9592165898617512, + "grad_norm": 1.2506151155745373, + "learning_rate": 1.1499212404633083e-06, + "loss": 0.7800190448760986, + "step": 4163 + }, + { + "epoch": 0.9594470046082949, + "grad_norm": 1.0487739651932841, + "learning_rate": 1.1495445516641394e-06, + "loss": 0.789481520652771, + "step": 4164 + }, + { + "epoch": 0.9596774193548387, + "grad_norm": 0.8332785453272284, + "learning_rate": 1.1491678411587768e-06, + "loss": 0.7975008487701416, + "step": 4165 + }, + { + "epoch": 0.9599078341013825, + "grad_norm": 0.9306560917040928, + "learning_rate": 1.1487911090018994e-06, + "loss": 0.7964596748352051, + "step": 4166 + }, + { + "epoch": 0.9601382488479263, + "grad_norm": 0.8915843631095149, + "learning_rate": 1.1484143552481895e-06, + "loss": 0.7008803486824036, + "step": 4167 + }, + { + "epoch": 0.96036866359447, + "grad_norm": 0.888889684402262, + "learning_rate": 1.1480375799523328e-06, + "loss": 0.708189070224762, + "step": 4168 + }, + { + "epoch": 0.9605990783410139, + "grad_norm": 1.1069917813185677, + "learning_rate": 1.1476607831690167e-06, + "loss": 0.8207682371139526, + "step": 4169 + }, + { + "epoch": 0.9608294930875576, + "grad_norm": 1.200280235865814, + "learning_rate": 1.1472839649529337e-06, + "loss": 0.7682942152023315, + "step": 4170 + }, + { + "epoch": 0.9610599078341013, + "grad_norm": 1.0122999990692296, + "learning_rate": 1.1469071253587785e-06, + "loss": 0.8435598611831665, + "step": 4171 + }, + { + "epoch": 0.9612903225806452, + "grad_norm": 0.79536207500534, + "learning_rate": 1.1465302644412483e-06, + "loss": 0.7516113519668579, + "step": 4172 + }, + { + "epoch": 0.9615207373271889, + "grad_norm": 0.881539477347835, + "learning_rate": 1.1461533822550442e-06, + "loss": 0.7125411629676819, + "step": 4173 + }, + { + "epoch": 0.9617511520737327, + "grad_norm": 0.9108745928942158, + "learning_rate": 1.14577647885487e-06, + "loss": 0.7560747861862183, + "step": 4174 + }, + { + "epoch": 0.9619815668202765, + "grad_norm": 0.9027443230900505, + "learning_rate": 1.1453995542954332e-06, + "loss": 0.6702673435211182, + "step": 4175 + }, + { + "epoch": 0.9622119815668203, + "grad_norm": 1.1520258504461998, + "learning_rate": 1.1450226086314433e-06, + "loss": 0.8083088397979736, + "step": 4176 + }, + { + "epoch": 0.962442396313364, + "grad_norm": 0.9906259449003554, + "learning_rate": 1.1446456419176135e-06, + "loss": 0.7579925060272217, + "step": 4177 + }, + { + "epoch": 0.9626728110599079, + "grad_norm": 0.9460352601625827, + "learning_rate": 1.1442686542086609e-06, + "loss": 0.713416576385498, + "step": 4178 + }, + { + "epoch": 0.9629032258064516, + "grad_norm": 1.1770844867552515, + "learning_rate": 1.1438916455593035e-06, + "loss": 0.7767639756202698, + "step": 4179 + }, + { + "epoch": 0.9631336405529954, + "grad_norm": 1.0244180953454374, + "learning_rate": 1.1435146160242645e-06, + "loss": 0.7493964433670044, + "step": 4180 + }, + { + "epoch": 0.9633640552995392, + "grad_norm": 1.1249907720020325, + "learning_rate": 1.1431375656582692e-06, + "loss": 0.8789365291595459, + "step": 4181 + }, + { + "epoch": 0.9635944700460829, + "grad_norm": 1.177047767616621, + "learning_rate": 1.1427604945160457e-06, + "loss": 0.7750524878501892, + "step": 4182 + }, + { + "epoch": 0.9638248847926267, + "grad_norm": 1.1195166665130392, + "learning_rate": 1.142383402652325e-06, + "loss": 0.9330715537071228, + "step": 4183 + }, + { + "epoch": 0.9640552995391705, + "grad_norm": 0.933339002257347, + "learning_rate": 1.142006290121842e-06, + "loss": 0.6845035552978516, + "step": 4184 + }, + { + "epoch": 0.9642857142857143, + "grad_norm": 0.9794843601160967, + "learning_rate": 1.1416291569793343e-06, + "loss": 0.7295390963554382, + "step": 4185 + }, + { + "epoch": 0.964516129032258, + "grad_norm": 1.0666753158619988, + "learning_rate": 1.1412520032795419e-06, + "loss": 0.6869080066680908, + "step": 4186 + }, + { + "epoch": 0.9647465437788019, + "grad_norm": 1.506743316898968, + "learning_rate": 1.140874829077208e-06, + "loss": 1.0916842222213745, + "step": 4187 + }, + { + "epoch": 0.9649769585253456, + "grad_norm": 1.0539994363877199, + "learning_rate": 1.1404976344270793e-06, + "loss": 0.7487984299659729, + "step": 4188 + }, + { + "epoch": 0.9652073732718894, + "grad_norm": 1.024674697115665, + "learning_rate": 1.140120419383905e-06, + "loss": 0.8852604627609253, + "step": 4189 + }, + { + "epoch": 0.9654377880184332, + "grad_norm": 1.065174441144157, + "learning_rate": 1.139743184002437e-06, + "loss": 0.7384698987007141, + "step": 4190 + }, + { + "epoch": 0.965668202764977, + "grad_norm": 1.2009691028192717, + "learning_rate": 1.1393659283374312e-06, + "loss": 0.8033223152160645, + "step": 4191 + }, + { + "epoch": 0.9658986175115207, + "grad_norm": 1.2698866658546557, + "learning_rate": 1.1389886524436453e-06, + "loss": 0.8870355486869812, + "step": 4192 + }, + { + "epoch": 0.9661290322580646, + "grad_norm": 1.1198376045036553, + "learning_rate": 1.1386113563758405e-06, + "loss": 0.869537353515625, + "step": 4193 + }, + { + "epoch": 0.9663594470046083, + "grad_norm": 1.027781409519754, + "learning_rate": 1.1382340401887808e-06, + "loss": 0.8564068675041199, + "step": 4194 + }, + { + "epoch": 0.966589861751152, + "grad_norm": 0.9894593103049535, + "learning_rate": 1.1378567039372332e-06, + "loss": 0.7988623380661011, + "step": 4195 + }, + { + "epoch": 0.9668202764976959, + "grad_norm": 1.0843651981255995, + "learning_rate": 1.1374793476759673e-06, + "loss": 0.9405556917190552, + "step": 4196 + }, + { + "epoch": 0.9670506912442396, + "grad_norm": 0.8756334921680484, + "learning_rate": 1.137101971459756e-06, + "loss": 0.6757407188415527, + "step": 4197 + }, + { + "epoch": 0.9672811059907834, + "grad_norm": 1.1855730012050456, + "learning_rate": 1.1367245753433757e-06, + "loss": 0.7521541118621826, + "step": 4198 + }, + { + "epoch": 0.9675115207373272, + "grad_norm": 1.0137943151941313, + "learning_rate": 1.1363471593816037e-06, + "loss": 0.7306162714958191, + "step": 4199 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8912209844157076, + "learning_rate": 1.135969723629222e-06, + "loss": 0.6884766817092896, + "step": 4200 + }, + { + "epoch": 0.9679723502304147, + "grad_norm": 1.2084507323846643, + "learning_rate": 1.1355922681410152e-06, + "loss": 0.8420373201370239, + "step": 4201 + }, + { + "epoch": 0.9682027649769586, + "grad_norm": 0.7638761509020496, + "learning_rate": 1.1352147929717704e-06, + "loss": 0.7252322435379028, + "step": 4202 + }, + { + "epoch": 0.9684331797235023, + "grad_norm": 0.9448982669089191, + "learning_rate": 1.134837298176277e-06, + "loss": 0.6375538110733032, + "step": 4203 + }, + { + "epoch": 0.9686635944700461, + "grad_norm": 1.0629192948024473, + "learning_rate": 1.1344597838093283e-06, + "loss": 0.713671863079071, + "step": 4204 + }, + { + "epoch": 0.9688940092165899, + "grad_norm": 1.0319385361068514, + "learning_rate": 1.1340822499257201e-06, + "loss": 0.8591479063034058, + "step": 4205 + }, + { + "epoch": 0.9691244239631336, + "grad_norm": 1.0671754327237228, + "learning_rate": 1.1337046965802505e-06, + "loss": 0.7638808488845825, + "step": 4206 + }, + { + "epoch": 0.9693548387096774, + "grad_norm": 1.1032489557963816, + "learning_rate": 1.1333271238277215e-06, + "loss": 0.8133253455162048, + "step": 4207 + }, + { + "epoch": 0.9695852534562212, + "grad_norm": 0.9621754998556686, + "learning_rate": 1.132949531722937e-06, + "loss": 0.6938756704330444, + "step": 4208 + }, + { + "epoch": 0.969815668202765, + "grad_norm": 1.171557608199449, + "learning_rate": 1.132571920320704e-06, + "loss": 0.793639063835144, + "step": 4209 + }, + { + "epoch": 0.9700460829493087, + "grad_norm": 1.066219056403929, + "learning_rate": 1.132194289675832e-06, + "loss": 0.7188536524772644, + "step": 4210 + }, + { + "epoch": 0.9702764976958526, + "grad_norm": 1.2873690827507545, + "learning_rate": 1.1318166398431343e-06, + "loss": 0.8076587319374084, + "step": 4211 + }, + { + "epoch": 0.9705069124423963, + "grad_norm": 1.2434961707112964, + "learning_rate": 1.1314389708774258e-06, + "loss": 0.8390023708343506, + "step": 4212 + }, + { + "epoch": 0.9707373271889401, + "grad_norm": 1.2800250293744322, + "learning_rate": 1.1310612828335243e-06, + "loss": 0.8395706415176392, + "step": 4213 + }, + { + "epoch": 0.9709677419354839, + "grad_norm": 1.1156221851257155, + "learning_rate": 1.1306835757662515e-06, + "loss": 0.9672995805740356, + "step": 4214 + }, + { + "epoch": 0.9711981566820277, + "grad_norm": 1.1859433022618981, + "learning_rate": 1.1303058497304303e-06, + "loss": 0.7716202735900879, + "step": 4215 + }, + { + "epoch": 0.9714285714285714, + "grad_norm": 0.9257750691433206, + "learning_rate": 1.1299281047808876e-06, + "loss": 0.6318329572677612, + "step": 4216 + }, + { + "epoch": 0.9716589861751153, + "grad_norm": 1.1802189065520408, + "learning_rate": 1.1295503409724525e-06, + "loss": 0.8287553787231445, + "step": 4217 + }, + { + "epoch": 0.971889400921659, + "grad_norm": 0.835147088990129, + "learning_rate": 1.129172558359957e-06, + "loss": 0.6903107762336731, + "step": 4218 + }, + { + "epoch": 0.9721198156682027, + "grad_norm": 0.9693907793654548, + "learning_rate": 1.1287947569982355e-06, + "loss": 0.684443473815918, + "step": 4219 + }, + { + "epoch": 0.9723502304147466, + "grad_norm": 1.2152908203730401, + "learning_rate": 1.1284169369421254e-06, + "loss": 0.8566167950630188, + "step": 4220 + }, + { + "epoch": 0.9725806451612903, + "grad_norm": 1.0787740661687364, + "learning_rate": 1.1280390982464673e-06, + "loss": 0.8103536367416382, + "step": 4221 + }, + { + "epoch": 0.9728110599078341, + "grad_norm": 1.115333195517037, + "learning_rate": 1.1276612409661036e-06, + "loss": 0.8027071356773376, + "step": 4222 + }, + { + "epoch": 0.9730414746543778, + "grad_norm": 1.1442493875477038, + "learning_rate": 1.1272833651558796e-06, + "loss": 0.8251115679740906, + "step": 4223 + }, + { + "epoch": 0.9732718894009217, + "grad_norm": 1.1151561398542829, + "learning_rate": 1.1269054708706437e-06, + "loss": 0.6468047499656677, + "step": 4224 + }, + { + "epoch": 0.9735023041474654, + "grad_norm": 1.129830296326307, + "learning_rate": 1.1265275581652465e-06, + "loss": 0.8085706233978271, + "step": 4225 + }, + { + "epoch": 0.9737327188940093, + "grad_norm": 1.139574441171448, + "learning_rate": 1.1261496270945418e-06, + "loss": 0.8396503925323486, + "step": 4226 + }, + { + "epoch": 0.973963133640553, + "grad_norm": 0.9978900351940978, + "learning_rate": 1.1257716777133861e-06, + "loss": 0.7860006093978882, + "step": 4227 + }, + { + "epoch": 0.9741935483870968, + "grad_norm": 1.1484873689809545, + "learning_rate": 1.1253937100766373e-06, + "loss": 0.8630701303482056, + "step": 4228 + }, + { + "epoch": 0.9744239631336405, + "grad_norm": 0.9488769562872501, + "learning_rate": 1.1250157242391577e-06, + "loss": 0.8363114595413208, + "step": 4229 + }, + { + "epoch": 0.9746543778801844, + "grad_norm": 1.1415512207130691, + "learning_rate": 1.1246377202558114e-06, + "loss": 0.7837141156196594, + "step": 4230 + }, + { + "epoch": 0.9748847926267281, + "grad_norm": 1.3474534084840375, + "learning_rate": 1.1242596981814648e-06, + "loss": 0.8283151984214783, + "step": 4231 + }, + { + "epoch": 0.9751152073732718, + "grad_norm": 1.2728043293758005, + "learning_rate": 1.1238816580709878e-06, + "loss": 0.9232061505317688, + "step": 4232 + }, + { + "epoch": 0.9753456221198157, + "grad_norm": 1.125514954365521, + "learning_rate": 1.123503599979252e-06, + "loss": 0.8721164464950562, + "step": 4233 + }, + { + "epoch": 0.9755760368663594, + "grad_norm": 1.0382014546922784, + "learning_rate": 1.1231255239611321e-06, + "loss": 0.9398131370544434, + "step": 4234 + }, + { + "epoch": 0.9758064516129032, + "grad_norm": 1.0916134182788353, + "learning_rate": 1.1227474300715054e-06, + "loss": 0.8124324083328247, + "step": 4235 + }, + { + "epoch": 0.976036866359447, + "grad_norm": 0.8607187401974831, + "learning_rate": 1.1223693183652515e-06, + "loss": 0.8532534837722778, + "step": 4236 + }, + { + "epoch": 0.9762672811059908, + "grad_norm": 1.10871517745179, + "learning_rate": 1.1219911888972536e-06, + "loss": 0.7547662258148193, + "step": 4237 + }, + { + "epoch": 0.9764976958525345, + "grad_norm": 1.036940513326952, + "learning_rate": 1.1216130417223956e-06, + "loss": 0.7407231330871582, + "step": 4238 + }, + { + "epoch": 0.9767281105990784, + "grad_norm": 1.0573090435680337, + "learning_rate": 1.1212348768955657e-06, + "loss": 0.8190197944641113, + "step": 4239 + }, + { + "epoch": 0.9769585253456221, + "grad_norm": 1.111465926757279, + "learning_rate": 1.1208566944716542e-06, + "loss": 0.6641337871551514, + "step": 4240 + }, + { + "epoch": 0.977188940092166, + "grad_norm": 1.224342353107687, + "learning_rate": 1.120478494505553e-06, + "loss": 0.8953202962875366, + "step": 4241 + }, + { + "epoch": 0.9774193548387097, + "grad_norm": 0.9676272600083323, + "learning_rate": 1.1201002770521583e-06, + "loss": 0.7803191542625427, + "step": 4242 + }, + { + "epoch": 0.9776497695852534, + "grad_norm": 1.1107043139306134, + "learning_rate": 1.1197220421663674e-06, + "loss": 0.6827100515365601, + "step": 4243 + }, + { + "epoch": 0.9778801843317972, + "grad_norm": 1.2085442462659117, + "learning_rate": 1.1193437899030802e-06, + "loss": 0.8513565063476562, + "step": 4244 + }, + { + "epoch": 0.978110599078341, + "grad_norm": 0.9785496460004156, + "learning_rate": 1.1189655203172e-06, + "loss": 0.7196829915046692, + "step": 4245 + }, + { + "epoch": 0.9783410138248848, + "grad_norm": 1.0764048064511267, + "learning_rate": 1.1185872334636319e-06, + "loss": 0.7823485136032104, + "step": 4246 + }, + { + "epoch": 0.9785714285714285, + "grad_norm": 1.0963006166840967, + "learning_rate": 1.1182089293972841e-06, + "loss": 0.7178136110305786, + "step": 4247 + }, + { + "epoch": 0.9788018433179724, + "grad_norm": 1.0782886091125194, + "learning_rate": 1.1178306081730664e-06, + "loss": 0.7746715545654297, + "step": 4248 + }, + { + "epoch": 0.9790322580645161, + "grad_norm": 0.9177757629071243, + "learning_rate": 1.117452269845892e-06, + "loss": 0.8829167485237122, + "step": 4249 + }, + { + "epoch": 0.9792626728110599, + "grad_norm": 0.9096983569344097, + "learning_rate": 1.1170739144706764e-06, + "loss": 0.7592206001281738, + "step": 4250 + }, + { + "epoch": 0.9794930875576037, + "grad_norm": 0.8361017174057647, + "learning_rate": 1.1166955421023368e-06, + "loss": 0.8107382655143738, + "step": 4251 + }, + { + "epoch": 0.9797235023041475, + "grad_norm": 0.9837092835211146, + "learning_rate": 1.116317152795794e-06, + "loss": 0.6807001829147339, + "step": 4252 + }, + { + "epoch": 0.9799539170506912, + "grad_norm": 1.1872199804636603, + "learning_rate": 1.1159387466059705e-06, + "loss": 0.7752517461776733, + "step": 4253 + }, + { + "epoch": 0.9801843317972351, + "grad_norm": 0.8560133871531077, + "learning_rate": 1.115560323587791e-06, + "loss": 0.7484745383262634, + "step": 4254 + }, + { + "epoch": 0.9804147465437788, + "grad_norm": 1.153488759551228, + "learning_rate": 1.1151818837961838e-06, + "loss": 0.877413809299469, + "step": 4255 + }, + { + "epoch": 0.9806451612903225, + "grad_norm": 1.0087457568089837, + "learning_rate": 1.1148034272860785e-06, + "loss": 0.7806656360626221, + "step": 4256 + }, + { + "epoch": 0.9808755760368664, + "grad_norm": 0.849135201735791, + "learning_rate": 1.1144249541124078e-06, + "loss": 0.6938076019287109, + "step": 4257 + }, + { + "epoch": 0.9811059907834101, + "grad_norm": 1.0559339187336096, + "learning_rate": 1.1140464643301064e-06, + "loss": 0.8832957148551941, + "step": 4258 + }, + { + "epoch": 0.9813364055299539, + "grad_norm": 1.1632523287766907, + "learning_rate": 1.1136679579941117e-06, + "loss": 0.7794016003608704, + "step": 4259 + }, + { + "epoch": 0.9815668202764977, + "grad_norm": 0.9689102084269609, + "learning_rate": 1.1132894351593636e-06, + "loss": 0.6877585053443909, + "step": 4260 + }, + { + "epoch": 0.9817972350230415, + "grad_norm": 1.0902109747190951, + "learning_rate": 1.1129108958808037e-06, + "loss": 0.8268473148345947, + "step": 4261 + }, + { + "epoch": 0.9820276497695852, + "grad_norm": 1.0260596307079526, + "learning_rate": 1.112532340213377e-06, + "loss": 0.6717547178268433, + "step": 4262 + }, + { + "epoch": 0.9822580645161291, + "grad_norm": 1.0646130416760407, + "learning_rate": 1.11215376821203e-06, + "loss": 0.849999725818634, + "step": 4263 + }, + { + "epoch": 0.9824884792626728, + "grad_norm": 1.005034332417578, + "learning_rate": 1.1117751799317118e-06, + "loss": 0.6562552452087402, + "step": 4264 + }, + { + "epoch": 0.9827188940092166, + "grad_norm": 1.0885536317886024, + "learning_rate": 1.1113965754273743e-06, + "loss": 0.7734784483909607, + "step": 4265 + }, + { + "epoch": 0.9829493087557604, + "grad_norm": 1.0527283904271951, + "learning_rate": 1.1110179547539717e-06, + "loss": 0.7580564022064209, + "step": 4266 + }, + { + "epoch": 0.9831797235023041, + "grad_norm": 1.121984331535499, + "learning_rate": 1.1106393179664595e-06, + "loss": 0.9207481145858765, + "step": 4267 + }, + { + "epoch": 0.9834101382488479, + "grad_norm": 1.1182241685665208, + "learning_rate": 1.1102606651197968e-06, + "loss": 0.8987482786178589, + "step": 4268 + }, + { + "epoch": 0.9836405529953917, + "grad_norm": 0.8558732255272679, + "learning_rate": 1.1098819962689445e-06, + "loss": 0.7486778497695923, + "step": 4269 + }, + { + "epoch": 0.9838709677419355, + "grad_norm": 0.9905311956335509, + "learning_rate": 1.1095033114688662e-06, + "loss": 0.7387109994888306, + "step": 4270 + }, + { + "epoch": 0.9841013824884792, + "grad_norm": 0.913366940312768, + "learning_rate": 1.109124610774527e-06, + "loss": 0.7337637543678284, + "step": 4271 + }, + { + "epoch": 0.9843317972350231, + "grad_norm": 1.1127819698251733, + "learning_rate": 1.1087458942408952e-06, + "loss": 0.7419463396072388, + "step": 4272 + }, + { + "epoch": 0.9845622119815668, + "grad_norm": 1.0024132905496845, + "learning_rate": 1.1083671619229407e-06, + "loss": 0.7525068521499634, + "step": 4273 + }, + { + "epoch": 0.9847926267281106, + "grad_norm": 1.2794306882440036, + "learning_rate": 1.107988413875636e-06, + "loss": 0.8593931198120117, + "step": 4274 + }, + { + "epoch": 0.9850230414746544, + "grad_norm": 1.1058497522784536, + "learning_rate": 1.107609650153956e-06, + "loss": 0.9123519659042358, + "step": 4275 + }, + { + "epoch": 0.9852534562211982, + "grad_norm": 1.0134863035075283, + "learning_rate": 1.107230870812878e-06, + "loss": 0.7099615335464478, + "step": 4276 + }, + { + "epoch": 0.9854838709677419, + "grad_norm": 1.0305482113277953, + "learning_rate": 1.1068520759073807e-06, + "loss": 0.9525141716003418, + "step": 4277 + }, + { + "epoch": 0.9857142857142858, + "grad_norm": 1.078520213597711, + "learning_rate": 1.106473265492446e-06, + "loss": 0.8360154628753662, + "step": 4278 + }, + { + "epoch": 0.9859447004608295, + "grad_norm": 0.835665323629814, + "learning_rate": 1.106094439623058e-06, + "loss": 0.7788960933685303, + "step": 4279 + }, + { + "epoch": 0.9861751152073732, + "grad_norm": 1.4332707697001132, + "learning_rate": 1.1057155983542024e-06, + "loss": 0.76897132396698, + "step": 4280 + }, + { + "epoch": 0.9864055299539171, + "grad_norm": 1.2788839563876278, + "learning_rate": 1.1053367417408678e-06, + "loss": 0.8062764406204224, + "step": 4281 + }, + { + "epoch": 0.9866359447004608, + "grad_norm": 1.0759322336892816, + "learning_rate": 1.1049578698380446e-06, + "loss": 0.6796555519104004, + "step": 4282 + }, + { + "epoch": 0.9868663594470046, + "grad_norm": 1.2156156083740777, + "learning_rate": 1.1045789827007256e-06, + "loss": 0.8495693206787109, + "step": 4283 + }, + { + "epoch": 0.9870967741935484, + "grad_norm": 1.1065961656311563, + "learning_rate": 1.1042000803839054e-06, + "loss": 0.9202588200569153, + "step": 4284 + }, + { + "epoch": 0.9873271889400922, + "grad_norm": 1.0492103887070696, + "learning_rate": 1.1038211629425815e-06, + "loss": 0.8204039335250854, + "step": 4285 + }, + { + "epoch": 0.9875576036866359, + "grad_norm": 1.3424135227199923, + "learning_rate": 1.1034422304317534e-06, + "loss": 0.921082615852356, + "step": 4286 + }, + { + "epoch": 0.9877880184331798, + "grad_norm": 1.1158968493314756, + "learning_rate": 1.1030632829064225e-06, + "loss": 0.8114739656448364, + "step": 4287 + }, + { + "epoch": 0.9880184331797235, + "grad_norm": 1.160400130956272, + "learning_rate": 1.1026843204215924e-06, + "loss": 0.7394933700561523, + "step": 4288 + }, + { + "epoch": 0.9882488479262673, + "grad_norm": 1.102093260654992, + "learning_rate": 1.1023053430322692e-06, + "loss": 0.9515210390090942, + "step": 4289 + }, + { + "epoch": 0.988479262672811, + "grad_norm": 1.0914130901392678, + "learning_rate": 1.1019263507934611e-06, + "loss": 0.6729186773300171, + "step": 4290 + }, + { + "epoch": 0.9887096774193549, + "grad_norm": 0.9547635126100301, + "learning_rate": 1.1015473437601776e-06, + "loss": 0.6455283164978027, + "step": 4291 + }, + { + "epoch": 0.9889400921658986, + "grad_norm": 1.1259220869244864, + "learning_rate": 1.1011683219874322e-06, + "loss": 0.8071424961090088, + "step": 4292 + }, + { + "epoch": 0.9891705069124423, + "grad_norm": 0.8980294635582122, + "learning_rate": 1.1007892855302385e-06, + "loss": 0.7287160754203796, + "step": 4293 + }, + { + "epoch": 0.9894009216589862, + "grad_norm": 0.956104694967055, + "learning_rate": 1.1004102344436135e-06, + "loss": 0.7916513681411743, + "step": 4294 + }, + { + "epoch": 0.9896313364055299, + "grad_norm": 0.948939194234829, + "learning_rate": 1.1000311687825757e-06, + "loss": 0.8075610399246216, + "step": 4295 + }, + { + "epoch": 0.9898617511520738, + "grad_norm": 0.8467724433306772, + "learning_rate": 1.0996520886021465e-06, + "loss": 0.6144437193870544, + "step": 4296 + }, + { + "epoch": 0.9900921658986175, + "grad_norm": 1.1816936561057356, + "learning_rate": 1.0992729939573482e-06, + "loss": 0.830337643623352, + "step": 4297 + }, + { + "epoch": 0.9903225806451613, + "grad_norm": 1.1631921516982922, + "learning_rate": 1.0988938849032063e-06, + "loss": 0.7104393243789673, + "step": 4298 + }, + { + "epoch": 0.990552995391705, + "grad_norm": 1.0166827801425276, + "learning_rate": 1.0985147614947484e-06, + "loss": 0.746238112449646, + "step": 4299 + }, + { + "epoch": 0.9907834101382489, + "grad_norm": 0.8744941548736713, + "learning_rate": 1.0981356237870027e-06, + "loss": 0.7309597730636597, + "step": 4300 + }, + { + "epoch": 0.9910138248847926, + "grad_norm": 1.1787483382236952, + "learning_rate": 1.0977564718350013e-06, + "loss": 0.799136757850647, + "step": 4301 + }, + { + "epoch": 0.9912442396313365, + "grad_norm": 1.146252036070138, + "learning_rate": 1.0973773056937776e-06, + "loss": 0.7477747201919556, + "step": 4302 + }, + { + "epoch": 0.9914746543778802, + "grad_norm": 1.1466743668258872, + "learning_rate": 1.0969981254183668e-06, + "loss": 0.8051053285598755, + "step": 4303 + }, + { + "epoch": 0.9917050691244239, + "grad_norm": 0.9910519080633017, + "learning_rate": 1.0966189310638063e-06, + "loss": 0.8023163080215454, + "step": 4304 + }, + { + "epoch": 0.9919354838709677, + "grad_norm": 0.9483313078672773, + "learning_rate": 1.096239722685136e-06, + "loss": 0.6804348230361938, + "step": 4305 + }, + { + "epoch": 0.9921658986175115, + "grad_norm": 1.119857177527024, + "learning_rate": 1.0958605003373976e-06, + "loss": 0.8276509046554565, + "step": 4306 + }, + { + "epoch": 0.9923963133640553, + "grad_norm": 1.2511674827094457, + "learning_rate": 1.095481264075634e-06, + "loss": 0.9733830690383911, + "step": 4307 + }, + { + "epoch": 0.992626728110599, + "grad_norm": 1.070745120202566, + "learning_rate": 1.0951020139548917e-06, + "loss": 0.824803352355957, + "step": 4308 + }, + { + "epoch": 0.9928571428571429, + "grad_norm": 1.100108017822232, + "learning_rate": 1.094722750030218e-06, + "loss": 0.8144090175628662, + "step": 4309 + }, + { + "epoch": 0.9930875576036866, + "grad_norm": 1.1329325704330306, + "learning_rate": 1.0943434723566623e-06, + "loss": 0.8394016027450562, + "step": 4310 + }, + { + "epoch": 0.9933179723502304, + "grad_norm": 1.0464489724076296, + "learning_rate": 1.0939641809892766e-06, + "loss": 0.7688177824020386, + "step": 4311 + }, + { + "epoch": 0.9935483870967742, + "grad_norm": 1.0599291427198123, + "learning_rate": 1.0935848759831144e-06, + "loss": 0.8157391548156738, + "step": 4312 + }, + { + "epoch": 0.993778801843318, + "grad_norm": 1.0072726544693649, + "learning_rate": 1.0932055573932316e-06, + "loss": 0.7618423700332642, + "step": 4313 + }, + { + "epoch": 0.9940092165898617, + "grad_norm": 0.8996295977906229, + "learning_rate": 1.0928262252746848e-06, + "loss": 0.7404567003250122, + "step": 4314 + }, + { + "epoch": 0.9942396313364056, + "grad_norm": 0.8729845318677907, + "learning_rate": 1.092446879682535e-06, + "loss": 0.6825613975524902, + "step": 4315 + }, + { + "epoch": 0.9944700460829493, + "grad_norm": 0.886318283085954, + "learning_rate": 1.0920675206718428e-06, + "loss": 0.6607732772827148, + "step": 4316 + }, + { + "epoch": 0.994700460829493, + "grad_norm": 1.1703494407740602, + "learning_rate": 1.0916881482976716e-06, + "loss": 0.715195894241333, + "step": 4317 + }, + { + "epoch": 0.9949308755760369, + "grad_norm": 1.0266525014281969, + "learning_rate": 1.0913087626150872e-06, + "loss": 0.7593914270401001, + "step": 4318 + }, + { + "epoch": 0.9951612903225806, + "grad_norm": 0.9546142286310197, + "learning_rate": 1.090929363679157e-06, + "loss": 0.8368399143218994, + "step": 4319 + }, + { + "epoch": 0.9953917050691244, + "grad_norm": 1.0080836713071024, + "learning_rate": 1.0905499515449499e-06, + "loss": 0.7799170613288879, + "step": 4320 + }, + { + "epoch": 0.9956221198156682, + "grad_norm": 1.0450181436512773, + "learning_rate": 1.0901705262675372e-06, + "loss": 0.8194636702537537, + "step": 4321 + }, + { + "epoch": 0.995852534562212, + "grad_norm": 0.7482572391575254, + "learning_rate": 1.0897910879019917e-06, + "loss": 0.7150344848632812, + "step": 4322 + }, + { + "epoch": 0.9960829493087557, + "grad_norm": 1.0624528328831144, + "learning_rate": 1.089411636503389e-06, + "loss": 0.737568736076355, + "step": 4323 + }, + { + "epoch": 0.9963133640552996, + "grad_norm": 0.9578129661977193, + "learning_rate": 1.0890321721268056e-06, + "loss": 0.7037359476089478, + "step": 4324 + }, + { + "epoch": 0.9965437788018433, + "grad_norm": 1.1660806477651886, + "learning_rate": 1.0886526948273206e-06, + "loss": 0.7664542198181152, + "step": 4325 + }, + { + "epoch": 0.9967741935483871, + "grad_norm": 1.1927624722703807, + "learning_rate": 1.0882732046600138e-06, + "loss": 0.7700943946838379, + "step": 4326 + }, + { + "epoch": 0.9970046082949309, + "grad_norm": 0.9828460552540413, + "learning_rate": 1.0878937016799683e-06, + "loss": 0.7634885311126709, + "step": 4327 + }, + { + "epoch": 0.9972350230414746, + "grad_norm": 0.9138031795649807, + "learning_rate": 1.0875141859422685e-06, + "loss": 0.6784960031509399, + "step": 4328 + }, + { + "epoch": 0.9974654377880184, + "grad_norm": 0.9227707667287056, + "learning_rate": 1.0871346575020002e-06, + "loss": 0.7224948406219482, + "step": 4329 + }, + { + "epoch": 0.9976958525345622, + "grad_norm": 1.140456315375248, + "learning_rate": 1.086755116414252e-06, + "loss": 0.7886664867401123, + "step": 4330 + }, + { + "epoch": 0.997926267281106, + "grad_norm": 0.8735584486255558, + "learning_rate": 1.0863755627341133e-06, + "loss": 0.7871295809745789, + "step": 4331 + }, + { + "epoch": 0.9981566820276497, + "grad_norm": 0.9703663985745814, + "learning_rate": 1.085995996516676e-06, + "loss": 0.700717568397522, + "step": 4332 + }, + { + "epoch": 0.9983870967741936, + "grad_norm": 1.0137806073331785, + "learning_rate": 1.085616417817034e-06, + "loss": 0.9090461730957031, + "step": 4333 + }, + { + "epoch": 0.9986175115207373, + "grad_norm": 0.8161279565195018, + "learning_rate": 1.0852368266902818e-06, + "loss": 0.7697109580039978, + "step": 4334 + }, + { + "epoch": 0.9988479262672811, + "grad_norm": 1.1335275167371797, + "learning_rate": 1.0848572231915177e-06, + "loss": 0.8135972023010254, + "step": 4335 + }, + { + "epoch": 0.9990783410138249, + "grad_norm": 0.9620227504979613, + "learning_rate": 1.0844776073758392e-06, + "loss": 0.803811252117157, + "step": 4336 + }, + { + "epoch": 0.9993087557603687, + "grad_norm": 1.1159399325844028, + "learning_rate": 1.0840979792983482e-06, + "loss": 0.874006986618042, + "step": 4337 + }, + { + "epoch": 0.9995391705069124, + "grad_norm": 1.0695664725891423, + "learning_rate": 1.0837183390141472e-06, + "loss": 0.7424730062484741, + "step": 4338 + }, + { + "epoch": 0.9997695852534563, + "grad_norm": 1.0413618177070603, + "learning_rate": 1.0833386865783393e-06, + "loss": 0.8219665884971619, + "step": 4339 + }, + { + "epoch": 1.0, + "grad_norm": 1.2200287736254531, + "learning_rate": 1.0829590220460319e-06, + "loss": 0.7065195441246033, + "step": 4340 + }, + { + "epoch": 1.0002304147465437, + "grad_norm": 1.4255251627812264, + "learning_rate": 1.0825793454723324e-06, + "loss": 0.7988346219062805, + "step": 4341 + }, + { + "epoch": 1.0004608294930875, + "grad_norm": 0.9544404961531333, + "learning_rate": 1.08219965691235e-06, + "loss": 0.6731617450714111, + "step": 4342 + }, + { + "epoch": 1.0006912442396314, + "grad_norm": 1.0713203032897287, + "learning_rate": 1.0818199564211964e-06, + "loss": 0.8058687448501587, + "step": 4343 + }, + { + "epoch": 1.0009216589861751, + "grad_norm": 1.2330384736552804, + "learning_rate": 1.081440244053984e-06, + "loss": 0.8351448178291321, + "step": 4344 + }, + { + "epoch": 1.0011520737327189, + "grad_norm": 0.9578484310628987, + "learning_rate": 1.0810605198658286e-06, + "loss": 0.8619185090065002, + "step": 4345 + }, + { + "epoch": 1.0013824884792626, + "grad_norm": 1.030004028036847, + "learning_rate": 1.0806807839118455e-06, + "loss": 0.7600966691970825, + "step": 4346 + }, + { + "epoch": 1.0016129032258065, + "grad_norm": 1.103182000242006, + "learning_rate": 1.0803010362471536e-06, + "loss": 0.8123422265052795, + "step": 4347 + }, + { + "epoch": 1.0018433179723503, + "grad_norm": 1.0359331933938025, + "learning_rate": 1.0799212769268727e-06, + "loss": 0.8277603983879089, + "step": 4348 + }, + { + "epoch": 1.002073732718894, + "grad_norm": 0.7466130076646643, + "learning_rate": 1.079541506006124e-06, + "loss": 0.6666774153709412, + "step": 4349 + }, + { + "epoch": 1.0023041474654377, + "grad_norm": 1.0582236596847403, + "learning_rate": 1.0791617235400313e-06, + "loss": 0.8483254909515381, + "step": 4350 + }, + { + "epoch": 1.0025345622119817, + "grad_norm": 0.9094409000603249, + "learning_rate": 1.0787819295837193e-06, + "loss": 0.6585661172866821, + "step": 4351 + }, + { + "epoch": 1.0027649769585254, + "grad_norm": 1.0274936512349702, + "learning_rate": 1.0784021241923142e-06, + "loss": 0.7591124773025513, + "step": 4352 + }, + { + "epoch": 1.0029953917050691, + "grad_norm": 1.0201165998262116, + "learning_rate": 1.078022307420945e-06, + "loss": 0.7305805683135986, + "step": 4353 + }, + { + "epoch": 1.0032258064516129, + "grad_norm": 0.8894858318623733, + "learning_rate": 1.0776424793247407e-06, + "loss": 0.6558996438980103, + "step": 4354 + }, + { + "epoch": 1.0034562211981566, + "grad_norm": 1.313034349644303, + "learning_rate": 1.0772626399588336e-06, + "loss": 0.6837360262870789, + "step": 4355 + }, + { + "epoch": 1.0036866359447005, + "grad_norm": 0.9187212026563307, + "learning_rate": 1.0768827893783562e-06, + "loss": 0.778124988079071, + "step": 4356 + }, + { + "epoch": 1.0039170506912443, + "grad_norm": 1.0828207561971888, + "learning_rate": 1.0765029276384438e-06, + "loss": 0.7676408886909485, + "step": 4357 + }, + { + "epoch": 1.004147465437788, + "grad_norm": 1.1604376015370672, + "learning_rate": 1.0761230547942333e-06, + "loss": 0.854246973991394, + "step": 4358 + }, + { + "epoch": 1.0043778801843317, + "grad_norm": 0.9177073619188721, + "learning_rate": 1.0757431709008615e-06, + "loss": 0.716766893863678, + "step": 4359 + }, + { + "epoch": 1.0046082949308757, + "grad_norm": 0.9439720321299626, + "learning_rate": 1.075363276013469e-06, + "loss": 0.6827799081802368, + "step": 4360 + }, + { + "epoch": 1.0048387096774194, + "grad_norm": 0.9539231430903122, + "learning_rate": 1.074983370187197e-06, + "loss": 0.7977348566055298, + "step": 4361 + }, + { + "epoch": 1.0050691244239631, + "grad_norm": 1.1227456227969494, + "learning_rate": 1.0746034534771878e-06, + "loss": 0.6958035826683044, + "step": 4362 + }, + { + "epoch": 1.0052995391705069, + "grad_norm": 0.9288361874867539, + "learning_rate": 1.0742235259385861e-06, + "loss": 0.8407979607582092, + "step": 4363 + }, + { + "epoch": 1.0055299539170508, + "grad_norm": 0.8466973629768922, + "learning_rate": 1.073843587626538e-06, + "loss": 0.8180495500564575, + "step": 4364 + }, + { + "epoch": 1.0057603686635945, + "grad_norm": 0.9973113541484702, + "learning_rate": 1.0734636385961907e-06, + "loss": 0.7551306486129761, + "step": 4365 + }, + { + "epoch": 1.0059907834101383, + "grad_norm": 1.1054013447474482, + "learning_rate": 1.0730836789026936e-06, + "loss": 0.6598455309867859, + "step": 4366 + }, + { + "epoch": 1.006221198156682, + "grad_norm": 0.9578758202335947, + "learning_rate": 1.0727037086011971e-06, + "loss": 0.9186126589775085, + "step": 4367 + }, + { + "epoch": 1.0064516129032257, + "grad_norm": 1.0208878451508383, + "learning_rate": 1.0723237277468538e-06, + "loss": 0.8491259813308716, + "step": 4368 + }, + { + "epoch": 1.0066820276497697, + "grad_norm": 1.0678483382751343, + "learning_rate": 1.071943736394817e-06, + "loss": 0.6938691139221191, + "step": 4369 + }, + { + "epoch": 1.0069124423963134, + "grad_norm": 1.1084737690479445, + "learning_rate": 1.0715637346002423e-06, + "loss": 0.801313579082489, + "step": 4370 + }, + { + "epoch": 1.0071428571428571, + "grad_norm": 0.983698557868892, + "learning_rate": 1.071183722418286e-06, + "loss": 0.7663706541061401, + "step": 4371 + }, + { + "epoch": 1.0073732718894008, + "grad_norm": 0.8508185045615759, + "learning_rate": 1.070803699904107e-06, + "loss": 0.7434467077255249, + "step": 4372 + }, + { + "epoch": 1.0076036866359448, + "grad_norm": 1.331303605136832, + "learning_rate": 1.0704236671128643e-06, + "loss": 0.8366774320602417, + "step": 4373 + }, + { + "epoch": 1.0078341013824885, + "grad_norm": 1.276875198714222, + "learning_rate": 1.07004362409972e-06, + "loss": 0.7027710676193237, + "step": 4374 + }, + { + "epoch": 1.0080645161290323, + "grad_norm": 1.1122995966371962, + "learning_rate": 1.0696635709198357e-06, + "loss": 0.7965548038482666, + "step": 4375 + }, + { + "epoch": 1.008294930875576, + "grad_norm": 1.0387807228424288, + "learning_rate": 1.0692835076283768e-06, + "loss": 0.8058432340621948, + "step": 4376 + }, + { + "epoch": 1.0085253456221197, + "grad_norm": 1.1870264013217662, + "learning_rate": 1.0689034342805085e-06, + "loss": 0.9056248068809509, + "step": 4377 + }, + { + "epoch": 1.0087557603686637, + "grad_norm": 1.0069765876574615, + "learning_rate": 1.0685233509313979e-06, + "loss": 0.8407673835754395, + "step": 4378 + }, + { + "epoch": 1.0089861751152074, + "grad_norm": 1.3133023777292065, + "learning_rate": 1.0681432576362133e-06, + "loss": 0.9138794541358948, + "step": 4379 + }, + { + "epoch": 1.0092165898617511, + "grad_norm": 1.3361237624577444, + "learning_rate": 1.067763154450125e-06, + "loss": 0.6640630960464478, + "step": 4380 + }, + { + "epoch": 1.0094470046082948, + "grad_norm": 1.4646712113013267, + "learning_rate": 1.0673830414283051e-06, + "loss": 0.9387146234512329, + "step": 4381 + }, + { + "epoch": 1.0096774193548388, + "grad_norm": 1.0228212242769696, + "learning_rate": 1.067002918625926e-06, + "loss": 0.7288271188735962, + "step": 4382 + }, + { + "epoch": 1.0099078341013825, + "grad_norm": 1.1693551967727813, + "learning_rate": 1.0666227860981613e-06, + "loss": 0.7886035442352295, + "step": 4383 + }, + { + "epoch": 1.0101382488479262, + "grad_norm": 1.056596025284508, + "learning_rate": 1.066242643900188e-06, + "loss": 0.6929852962493896, + "step": 4384 + }, + { + "epoch": 1.01036866359447, + "grad_norm": 0.9057033157053335, + "learning_rate": 1.065862492087182e-06, + "loss": 0.7709990739822388, + "step": 4385 + }, + { + "epoch": 1.010599078341014, + "grad_norm": 1.0362803754904506, + "learning_rate": 1.065482330714323e-06, + "loss": 0.811382532119751, + "step": 4386 + }, + { + "epoch": 1.0108294930875577, + "grad_norm": 1.2204693151649666, + "learning_rate": 1.0651021598367905e-06, + "loss": 0.8274353742599487, + "step": 4387 + }, + { + "epoch": 1.0110599078341014, + "grad_norm": 0.9995911348883496, + "learning_rate": 1.0647219795097651e-06, + "loss": 0.7449204921722412, + "step": 4388 + }, + { + "epoch": 1.011290322580645, + "grad_norm": 0.906861932756066, + "learning_rate": 1.0643417897884303e-06, + "loss": 0.675945520401001, + "step": 4389 + }, + { + "epoch": 1.0115207373271888, + "grad_norm": 1.183632210098949, + "learning_rate": 1.06396159072797e-06, + "loss": 0.7329400777816772, + "step": 4390 + }, + { + "epoch": 1.0117511520737328, + "grad_norm": 0.9566645616399831, + "learning_rate": 1.0635813823835692e-06, + "loss": 0.7809139490127563, + "step": 4391 + }, + { + "epoch": 1.0119815668202765, + "grad_norm": 1.0167427862718812, + "learning_rate": 1.0632011648104155e-06, + "loss": 0.799081563949585, + "step": 4392 + }, + { + "epoch": 1.0122119815668202, + "grad_norm": 1.0484890321007356, + "learning_rate": 1.062820938063696e-06, + "loss": 0.7738279104232788, + "step": 4393 + }, + { + "epoch": 1.012442396313364, + "grad_norm": 0.9791695127555486, + "learning_rate": 1.0624407021986007e-06, + "loss": 0.895797610282898, + "step": 4394 + }, + { + "epoch": 1.012672811059908, + "grad_norm": 0.9476041908693101, + "learning_rate": 1.0620604572703198e-06, + "loss": 0.6887848973274231, + "step": 4395 + }, + { + "epoch": 1.0129032258064516, + "grad_norm": 1.0915270783702586, + "learning_rate": 1.0616802033340457e-06, + "loss": 0.9540888071060181, + "step": 4396 + }, + { + "epoch": 1.0131336405529954, + "grad_norm": 1.3368596619746418, + "learning_rate": 1.0612999404449721e-06, + "loss": 0.9047783017158508, + "step": 4397 + }, + { + "epoch": 1.013364055299539, + "grad_norm": 0.924946076870977, + "learning_rate": 1.0609196686582931e-06, + "loss": 0.7030448913574219, + "step": 4398 + }, + { + "epoch": 1.013594470046083, + "grad_norm": 0.9501232585433265, + "learning_rate": 1.0605393880292046e-06, + "loss": 0.8097348213195801, + "step": 4399 + }, + { + "epoch": 1.0138248847926268, + "grad_norm": 1.0163791343408108, + "learning_rate": 1.0601590986129045e-06, + "loss": 0.7446185350418091, + "step": 4400 + }, + { + "epoch": 1.0140552995391705, + "grad_norm": 1.0548185515811, + "learning_rate": 1.0597788004645908e-06, + "loss": 0.7450964450836182, + "step": 4401 + }, + { + "epoch": 1.0142857142857142, + "grad_norm": 1.1891450532947472, + "learning_rate": 1.0593984936394632e-06, + "loss": 0.8326355218887329, + "step": 4402 + }, + { + "epoch": 1.014516129032258, + "grad_norm": 1.0194370020803867, + "learning_rate": 1.0590181781927227e-06, + "loss": 0.7013953924179077, + "step": 4403 + }, + { + "epoch": 1.014746543778802, + "grad_norm": 1.2634402455639506, + "learning_rate": 1.0586378541795723e-06, + "loss": 0.7806364297866821, + "step": 4404 + }, + { + "epoch": 1.0149769585253456, + "grad_norm": 1.2061797737844093, + "learning_rate": 1.0582575216552146e-06, + "loss": 0.8207389116287231, + "step": 4405 + }, + { + "epoch": 1.0152073732718894, + "grad_norm": 1.123863770924685, + "learning_rate": 1.0578771806748545e-06, + "loss": 0.8042873740196228, + "step": 4406 + }, + { + "epoch": 1.015437788018433, + "grad_norm": 0.9837741196260199, + "learning_rate": 1.057496831293699e-06, + "loss": 0.7225071787834167, + "step": 4407 + }, + { + "epoch": 1.015668202764977, + "grad_norm": 0.8165867352878113, + "learning_rate": 1.0571164735669538e-06, + "loss": 0.7783743143081665, + "step": 4408 + }, + { + "epoch": 1.0158986175115208, + "grad_norm": 1.1050702802288892, + "learning_rate": 1.0567361075498286e-06, + "loss": 0.7455039024353027, + "step": 4409 + }, + { + "epoch": 1.0161290322580645, + "grad_norm": 1.0331220241961572, + "learning_rate": 1.0563557332975322e-06, + "loss": 0.7819615602493286, + "step": 4410 + }, + { + "epoch": 1.0163594470046082, + "grad_norm": 1.052305833495017, + "learning_rate": 1.0559753508652758e-06, + "loss": 0.6466404795646667, + "step": 4411 + }, + { + "epoch": 1.0165898617511522, + "grad_norm": 0.9503687927611121, + "learning_rate": 1.0555949603082715e-06, + "loss": 0.8728539943695068, + "step": 4412 + }, + { + "epoch": 1.016820276497696, + "grad_norm": 0.9080353373358744, + "learning_rate": 1.055214561681732e-06, + "loss": 0.6082659959793091, + "step": 4413 + }, + { + "epoch": 1.0170506912442396, + "grad_norm": 1.1401384988886654, + "learning_rate": 1.054834155040872e-06, + "loss": 0.8429103493690491, + "step": 4414 + }, + { + "epoch": 1.0172811059907834, + "grad_norm": 0.9060045457810262, + "learning_rate": 1.0544537404409073e-06, + "loss": 0.7953135967254639, + "step": 4415 + }, + { + "epoch": 1.017511520737327, + "grad_norm": 0.6713482182574511, + "learning_rate": 1.0540733179370542e-06, + "loss": 0.7243527173995972, + "step": 4416 + }, + { + "epoch": 1.017741935483871, + "grad_norm": 1.4572192259453962, + "learning_rate": 1.0536928875845303e-06, + "loss": 0.6882613897323608, + "step": 4417 + }, + { + "epoch": 1.0179723502304148, + "grad_norm": 0.9719982264568039, + "learning_rate": 1.053312449438555e-06, + "loss": 0.9157286882400513, + "step": 4418 + }, + { + "epoch": 1.0182027649769585, + "grad_norm": 1.1196456434566004, + "learning_rate": 1.0529320035543482e-06, + "loss": 0.7224643230438232, + "step": 4419 + }, + { + "epoch": 1.0184331797235022, + "grad_norm": 1.4712628070157254, + "learning_rate": 1.0525515499871311e-06, + "loss": 0.874829888343811, + "step": 4420 + }, + { + "epoch": 1.0186635944700462, + "grad_norm": 0.9184049522457163, + "learning_rate": 1.0521710887921262e-06, + "loss": 0.6911267042160034, + "step": 4421 + }, + { + "epoch": 1.01889400921659, + "grad_norm": 1.1423796554253005, + "learning_rate": 1.051790620024557e-06, + "loss": 0.9065574407577515, + "step": 4422 + }, + { + "epoch": 1.0191244239631336, + "grad_norm": 1.225714416603257, + "learning_rate": 1.0514101437396474e-06, + "loss": 0.7671108245849609, + "step": 4423 + }, + { + "epoch": 1.0193548387096774, + "grad_norm": 1.3506661037387142, + "learning_rate": 1.051029659992624e-06, + "loss": 0.8706510066986084, + "step": 4424 + }, + { + "epoch": 1.019585253456221, + "grad_norm": 1.4185673299670827, + "learning_rate": 1.0506491688387128e-06, + "loss": 0.741087794303894, + "step": 4425 + }, + { + "epoch": 1.019815668202765, + "grad_norm": 1.0122076007105019, + "learning_rate": 1.0502686703331419e-06, + "loss": 0.8045330047607422, + "step": 4426 + }, + { + "epoch": 1.0200460829493088, + "grad_norm": 1.1768435258548835, + "learning_rate": 1.0498881645311398e-06, + "loss": 0.8464969992637634, + "step": 4427 + }, + { + "epoch": 1.0202764976958525, + "grad_norm": 1.1260966872974236, + "learning_rate": 1.0495076514879367e-06, + "loss": 0.7660650610923767, + "step": 4428 + }, + { + "epoch": 1.0205069124423962, + "grad_norm": 1.0026539513539563, + "learning_rate": 1.0491271312587636e-06, + "loss": 0.8565669059753418, + "step": 4429 + }, + { + "epoch": 1.0207373271889402, + "grad_norm": 1.306851956145893, + "learning_rate": 1.0487466038988525e-06, + "loss": 0.8884295225143433, + "step": 4430 + }, + { + "epoch": 1.020967741935484, + "grad_norm": 1.0672501887857282, + "learning_rate": 1.0483660694634361e-06, + "loss": 0.7300036549568176, + "step": 4431 + }, + { + "epoch": 1.0211981566820276, + "grad_norm": 1.261937486377886, + "learning_rate": 1.0479855280077493e-06, + "loss": 0.7879898548126221, + "step": 4432 + }, + { + "epoch": 1.0214285714285714, + "grad_norm": 1.5182696761272942, + "learning_rate": 1.0476049795870263e-06, + "loss": 0.9811698198318481, + "step": 4433 + }, + { + "epoch": 1.0216589861751153, + "grad_norm": 1.1962738461411733, + "learning_rate": 1.0472244242565034e-06, + "loss": 0.7706241607666016, + "step": 4434 + }, + { + "epoch": 1.021889400921659, + "grad_norm": 1.289215010975763, + "learning_rate": 1.046843862071418e-06, + "loss": 0.761093020439148, + "step": 4435 + }, + { + "epoch": 1.0221198156682028, + "grad_norm": 1.2142929670752842, + "learning_rate": 1.046463293087008e-06, + "loss": 0.8306092619895935, + "step": 4436 + }, + { + "epoch": 1.0223502304147465, + "grad_norm": 1.0820298518439184, + "learning_rate": 1.0460827173585125e-06, + "loss": 0.9669788479804993, + "step": 4437 + }, + { + "epoch": 1.0225806451612902, + "grad_norm": 1.173748576404213, + "learning_rate": 1.0457021349411715e-06, + "loss": 0.8461639285087585, + "step": 4438 + }, + { + "epoch": 1.0228110599078342, + "grad_norm": 1.0738697424760002, + "learning_rate": 1.0453215458902262e-06, + "loss": 0.7230383157730103, + "step": 4439 + }, + { + "epoch": 1.023041474654378, + "grad_norm": 1.195555915731222, + "learning_rate": 1.0449409502609186e-06, + "loss": 0.7506514191627502, + "step": 4440 + }, + { + "epoch": 1.0232718894009216, + "grad_norm": 1.2468090783946124, + "learning_rate": 1.0445603481084914e-06, + "loss": 0.7530048489570618, + "step": 4441 + }, + { + "epoch": 1.0235023041474653, + "grad_norm": 1.1659142578592716, + "learning_rate": 1.044179739488189e-06, + "loss": 0.8402249813079834, + "step": 4442 + }, + { + "epoch": 1.0237327188940093, + "grad_norm": 0.9379480482149454, + "learning_rate": 1.0437991244552557e-06, + "loss": 0.7661963701248169, + "step": 4443 + }, + { + "epoch": 1.023963133640553, + "grad_norm": 1.484925993605904, + "learning_rate": 1.043418503064937e-06, + "loss": 0.7982668876647949, + "step": 4444 + }, + { + "epoch": 1.0241935483870968, + "grad_norm": 1.5153078123946815, + "learning_rate": 1.0430378753724807e-06, + "loss": 0.899538516998291, + "step": 4445 + }, + { + "epoch": 1.0244239631336405, + "grad_norm": 1.0283178313705175, + "learning_rate": 1.0426572414331337e-06, + "loss": 0.8027441501617432, + "step": 4446 + }, + { + "epoch": 1.0246543778801844, + "grad_norm": 1.0275551729897887, + "learning_rate": 1.0422766013021442e-06, + "loss": 0.8575221300125122, + "step": 4447 + }, + { + "epoch": 1.0248847926267282, + "grad_norm": 1.0529216327738424, + "learning_rate": 1.0418959550347622e-06, + "loss": 0.7001699209213257, + "step": 4448 + }, + { + "epoch": 1.0251152073732719, + "grad_norm": 1.344629476023339, + "learning_rate": 1.041515302686238e-06, + "loss": 0.9296507835388184, + "step": 4449 + }, + { + "epoch": 1.0253456221198156, + "grad_norm": 1.1736142719382505, + "learning_rate": 1.0411346443118222e-06, + "loss": 0.8214550018310547, + "step": 4450 + }, + { + "epoch": 1.0255760368663593, + "grad_norm": 1.111485424859677, + "learning_rate": 1.0407539799667673e-06, + "loss": 0.7598673701286316, + "step": 4451 + }, + { + "epoch": 1.0258064516129033, + "grad_norm": 1.1453890077051856, + "learning_rate": 1.0403733097063265e-06, + "loss": 0.8222990036010742, + "step": 4452 + }, + { + "epoch": 1.026036866359447, + "grad_norm": 0.8681765527907143, + "learning_rate": 1.039992633585753e-06, + "loss": 0.7860872745513916, + "step": 4453 + }, + { + "epoch": 1.0262672811059907, + "grad_norm": 0.7352315377021262, + "learning_rate": 1.0396119516603018e-06, + "loss": 0.6602796912193298, + "step": 4454 + }, + { + "epoch": 1.0264976958525345, + "grad_norm": 0.7865024675454858, + "learning_rate": 1.0392312639852278e-06, + "loss": 0.554654598236084, + "step": 4455 + }, + { + "epoch": 1.0267281105990784, + "grad_norm": 0.997694873166315, + "learning_rate": 1.0388505706157885e-06, + "loss": 0.7977210879325867, + "step": 4456 + }, + { + "epoch": 1.0269585253456222, + "grad_norm": 0.9315155505189272, + "learning_rate": 1.0384698716072398e-06, + "loss": 0.8770938515663147, + "step": 4457 + }, + { + "epoch": 1.0271889400921659, + "grad_norm": 1.1958306146081352, + "learning_rate": 1.0380891670148403e-06, + "loss": 0.710452675819397, + "step": 4458 + }, + { + "epoch": 1.0274193548387096, + "grad_norm": 1.0231453414790668, + "learning_rate": 1.0377084568938485e-06, + "loss": 0.8876768946647644, + "step": 4459 + }, + { + "epoch": 1.0276497695852536, + "grad_norm": 1.1707146109643827, + "learning_rate": 1.0373277412995241e-06, + "loss": 0.7770971059799194, + "step": 4460 + }, + { + "epoch": 1.0278801843317973, + "grad_norm": 1.2438301523835749, + "learning_rate": 1.0369470202871275e-06, + "loss": 0.9199050068855286, + "step": 4461 + }, + { + "epoch": 1.028110599078341, + "grad_norm": 1.225766455591599, + "learning_rate": 1.0365662939119199e-06, + "loss": 0.7931548357009888, + "step": 4462 + }, + { + "epoch": 1.0283410138248847, + "grad_norm": 0.9403888957806107, + "learning_rate": 1.0361855622291636e-06, + "loss": 0.7484941482543945, + "step": 4463 + }, + { + "epoch": 1.0285714285714285, + "grad_norm": 1.1077517121943607, + "learning_rate": 1.03580482529412e-06, + "loss": 0.7639475464820862, + "step": 4464 + }, + { + "epoch": 1.0288018433179724, + "grad_norm": 0.9266455289292281, + "learning_rate": 1.035424083162054e-06, + "loss": 0.7705268859863281, + "step": 4465 + }, + { + "epoch": 1.0290322580645161, + "grad_norm": 1.0602296301972336, + "learning_rate": 1.0350433358882288e-06, + "loss": 0.7714117169380188, + "step": 4466 + }, + { + "epoch": 1.0292626728110599, + "grad_norm": 0.9812855436464868, + "learning_rate": 1.0346625835279102e-06, + "loss": 0.851073145866394, + "step": 4467 + }, + { + "epoch": 1.0294930875576036, + "grad_norm": 0.9352903997309275, + "learning_rate": 1.0342818261363631e-06, + "loss": 0.8001583218574524, + "step": 4468 + }, + { + "epoch": 1.0297235023041476, + "grad_norm": 1.1158901092617035, + "learning_rate": 1.0339010637688547e-06, + "loss": 0.8352588415145874, + "step": 4469 + }, + { + "epoch": 1.0299539170506913, + "grad_norm": 0.91245372061127, + "learning_rate": 1.0335202964806515e-06, + "loss": 0.8136032223701477, + "step": 4470 + }, + { + "epoch": 1.030184331797235, + "grad_norm": 1.1248571903620148, + "learning_rate": 1.0331395243270215e-06, + "loss": 0.8041108846664429, + "step": 4471 + }, + { + "epoch": 1.0304147465437787, + "grad_norm": 0.9370378251466553, + "learning_rate": 1.032758747363234e-06, + "loss": 0.6961067914962769, + "step": 4472 + }, + { + "epoch": 1.0306451612903227, + "grad_norm": 0.8328897533850071, + "learning_rate": 1.0323779656445572e-06, + "loss": 0.8063983917236328, + "step": 4473 + }, + { + "epoch": 1.0308755760368664, + "grad_norm": 1.01915176563276, + "learning_rate": 1.0319971792262618e-06, + "loss": 0.706061601638794, + "step": 4474 + }, + { + "epoch": 1.0311059907834101, + "grad_norm": 1.1193687254143303, + "learning_rate": 1.0316163881636181e-06, + "loss": 0.8510581254959106, + "step": 4475 + }, + { + "epoch": 1.0313364055299539, + "grad_norm": 0.8459775762451333, + "learning_rate": 1.0312355925118975e-06, + "loss": 0.7169028520584106, + "step": 4476 + }, + { + "epoch": 1.0315668202764976, + "grad_norm": 0.8345675502163972, + "learning_rate": 1.0308547923263718e-06, + "loss": 0.7513360977172852, + "step": 4477 + }, + { + "epoch": 1.0317972350230415, + "grad_norm": 1.1826641384928935, + "learning_rate": 1.030473987662314e-06, + "loss": 0.7408783435821533, + "step": 4478 + }, + { + "epoch": 1.0320276497695853, + "grad_norm": 1.2135549739175484, + "learning_rate": 1.0300931785749974e-06, + "loss": 0.8177747130393982, + "step": 4479 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 1.074036475926982, + "learning_rate": 1.0297123651196954e-06, + "loss": 0.7530791759490967, + "step": 4480 + }, + { + "epoch": 1.0324884792626727, + "grad_norm": 1.2947307404575235, + "learning_rate": 1.0293315473516832e-06, + "loss": 0.7958859205245972, + "step": 4481 + }, + { + "epoch": 1.0327188940092167, + "grad_norm": 1.2482360288136136, + "learning_rate": 1.0289507253262357e-06, + "loss": 0.8719943761825562, + "step": 4482 + }, + { + "epoch": 1.0329493087557604, + "grad_norm": 1.0347953021678673, + "learning_rate": 1.028569899098629e-06, + "loss": 0.7584139108657837, + "step": 4483 + }, + { + "epoch": 1.0331797235023041, + "grad_norm": 1.1621251755994506, + "learning_rate": 1.0281890687241387e-06, + "loss": 0.852983832359314, + "step": 4484 + }, + { + "epoch": 1.0334101382488479, + "grad_norm": 0.995758429643109, + "learning_rate": 1.027808234258043e-06, + "loss": 0.7455692291259766, + "step": 4485 + }, + { + "epoch": 1.0336405529953918, + "grad_norm": 0.9126434588001895, + "learning_rate": 1.0274273957556185e-06, + "loss": 0.7078343629837036, + "step": 4486 + }, + { + "epoch": 1.0338709677419355, + "grad_norm": 1.056440353383354, + "learning_rate": 1.027046553272144e-06, + "loss": 0.7580842971801758, + "step": 4487 + }, + { + "epoch": 1.0341013824884793, + "grad_norm": 0.9071452550966383, + "learning_rate": 1.026665706862898e-06, + "loss": 0.7271389961242676, + "step": 4488 + }, + { + "epoch": 1.034331797235023, + "grad_norm": 1.3819767756673818, + "learning_rate": 1.0262848565831599e-06, + "loss": 0.8271546363830566, + "step": 4489 + }, + { + "epoch": 1.0345622119815667, + "grad_norm": 1.1533046933911033, + "learning_rate": 1.0259040024882098e-06, + "loss": 0.6799920201301575, + "step": 4490 + }, + { + "epoch": 1.0347926267281107, + "grad_norm": 0.7837273040397605, + "learning_rate": 1.0255231446333277e-06, + "loss": 0.6962645053863525, + "step": 4491 + }, + { + "epoch": 1.0350230414746544, + "grad_norm": 1.2060107344479347, + "learning_rate": 1.0251422830737955e-06, + "loss": 0.8722797632217407, + "step": 4492 + }, + { + "epoch": 1.0352534562211981, + "grad_norm": 1.0328841633467782, + "learning_rate": 1.024761417864894e-06, + "loss": 0.8054880499839783, + "step": 4493 + }, + { + "epoch": 1.0354838709677419, + "grad_norm": 0.9178345615112383, + "learning_rate": 1.0243805490619053e-06, + "loss": 0.8196548223495483, + "step": 4494 + }, + { + "epoch": 1.0357142857142858, + "grad_norm": 1.5010413914558958, + "learning_rate": 1.0239996767201122e-06, + "loss": 0.8197275400161743, + "step": 4495 + }, + { + "epoch": 1.0359447004608295, + "grad_norm": 1.1223467429515472, + "learning_rate": 1.0236188008947978e-06, + "loss": 0.7704858779907227, + "step": 4496 + }, + { + "epoch": 1.0361751152073733, + "grad_norm": 1.2288506828429187, + "learning_rate": 1.0232379216412459e-06, + "loss": 0.8296232223510742, + "step": 4497 + }, + { + "epoch": 1.036405529953917, + "grad_norm": 1.1910482399414777, + "learning_rate": 1.0228570390147404e-06, + "loss": 0.6546601057052612, + "step": 4498 + }, + { + "epoch": 1.036635944700461, + "grad_norm": 1.0493042801064925, + "learning_rate": 1.0224761530705656e-06, + "loss": 0.808987021446228, + "step": 4499 + }, + { + "epoch": 1.0368663594470047, + "grad_norm": 1.0198435860671902, + "learning_rate": 1.0220952638640073e-06, + "loss": 0.862627387046814, + "step": 4500 + }, + { + "epoch": 1.0370967741935484, + "grad_norm": 0.9314966888515314, + "learning_rate": 1.0217143714503507e-06, + "loss": 0.781114935874939, + "step": 4501 + }, + { + "epoch": 1.0373271889400921, + "grad_norm": 1.1732597442137338, + "learning_rate": 1.0213334758848814e-06, + "loss": 0.7186112403869629, + "step": 4502 + }, + { + "epoch": 1.0375576036866359, + "grad_norm": 0.9870711221115687, + "learning_rate": 1.0209525772228868e-06, + "loss": 0.8112529516220093, + "step": 4503 + }, + { + "epoch": 1.0377880184331798, + "grad_norm": 1.1558866878107408, + "learning_rate": 1.020571675519653e-06, + "loss": 0.7364751100540161, + "step": 4504 + }, + { + "epoch": 1.0380184331797235, + "grad_norm": 1.296821231113786, + "learning_rate": 1.0201907708304681e-06, + "loss": 0.7015886902809143, + "step": 4505 + }, + { + "epoch": 1.0382488479262673, + "grad_norm": 0.8755063657778166, + "learning_rate": 1.0198098632106197e-06, + "loss": 0.7018470168113708, + "step": 4506 + }, + { + "epoch": 1.038479262672811, + "grad_norm": 0.9958013421397902, + "learning_rate": 1.0194289527153953e-06, + "loss": 0.820391058921814, + "step": 4507 + }, + { + "epoch": 1.038709677419355, + "grad_norm": 1.2026544914516983, + "learning_rate": 1.0190480394000844e-06, + "loss": 0.8341129422187805, + "step": 4508 + }, + { + "epoch": 1.0389400921658987, + "grad_norm": 0.8606365913019236, + "learning_rate": 1.0186671233199757e-06, + "loss": 0.7345695495605469, + "step": 4509 + }, + { + "epoch": 1.0391705069124424, + "grad_norm": 1.375974242893794, + "learning_rate": 1.0182862045303589e-06, + "loss": 0.8899500370025635, + "step": 4510 + }, + { + "epoch": 1.0394009216589861, + "grad_norm": 1.001562990779633, + "learning_rate": 1.0179052830865238e-06, + "loss": 0.8158663511276245, + "step": 4511 + }, + { + "epoch": 1.0396313364055298, + "grad_norm": 1.1574048409080129, + "learning_rate": 1.0175243590437604e-06, + "loss": 0.734848141670227, + "step": 4512 + }, + { + "epoch": 1.0398617511520738, + "grad_norm": 1.062511127484639, + "learning_rate": 1.0171434324573596e-06, + "loss": 0.7920876741409302, + "step": 4513 + }, + { + "epoch": 1.0400921658986175, + "grad_norm": 1.2131341489328324, + "learning_rate": 1.0167625033826122e-06, + "loss": 0.9224791526794434, + "step": 4514 + }, + { + "epoch": 1.0403225806451613, + "grad_norm": 1.152494191321953, + "learning_rate": 1.0163815718748096e-06, + "loss": 0.7086025476455688, + "step": 4515 + }, + { + "epoch": 1.040552995391705, + "grad_norm": 1.0223491213154539, + "learning_rate": 1.0160006379892434e-06, + "loss": 0.7657936811447144, + "step": 4516 + }, + { + "epoch": 1.040783410138249, + "grad_norm": 1.11296257844156, + "learning_rate": 1.0156197017812058e-06, + "loss": 0.786298394203186, + "step": 4517 + }, + { + "epoch": 1.0410138248847927, + "grad_norm": 1.1998728834800867, + "learning_rate": 1.0152387633059895e-06, + "loss": 0.8667294979095459, + "step": 4518 + }, + { + "epoch": 1.0412442396313364, + "grad_norm": 1.0233425185279803, + "learning_rate": 1.0148578226188866e-06, + "loss": 0.8479517102241516, + "step": 4519 + }, + { + "epoch": 1.0414746543778801, + "grad_norm": 0.8930216519245627, + "learning_rate": 1.0144768797751904e-06, + "loss": 0.6430692076683044, + "step": 4520 + }, + { + "epoch": 1.041705069124424, + "grad_norm": 1.122852329570553, + "learning_rate": 1.0140959348301946e-06, + "loss": 0.874313473701477, + "step": 4521 + }, + { + "epoch": 1.0419354838709678, + "grad_norm": 1.101097598838231, + "learning_rate": 1.013714987839192e-06, + "loss": 0.8439676761627197, + "step": 4522 + }, + { + "epoch": 1.0421658986175115, + "grad_norm": 1.2477053670484948, + "learning_rate": 1.0133340388574774e-06, + "loss": 0.7480089664459229, + "step": 4523 + }, + { + "epoch": 1.0423963133640552, + "grad_norm": 1.3143250159570112, + "learning_rate": 1.012953087940345e-06, + "loss": 0.8786139488220215, + "step": 4524 + }, + { + "epoch": 1.042626728110599, + "grad_norm": 1.1897211165926171, + "learning_rate": 1.0125721351430885e-06, + "loss": 0.8333299160003662, + "step": 4525 + }, + { + "epoch": 1.042857142857143, + "grad_norm": 1.055645356383861, + "learning_rate": 1.0121911805210032e-06, + "loss": 0.8201998472213745, + "step": 4526 + }, + { + "epoch": 1.0430875576036867, + "grad_norm": 1.160199033506195, + "learning_rate": 1.0118102241293847e-06, + "loss": 0.7793110609054565, + "step": 4527 + }, + { + "epoch": 1.0433179723502304, + "grad_norm": 1.045720270383819, + "learning_rate": 1.0114292660235272e-06, + "loss": 0.7148817777633667, + "step": 4528 + }, + { + "epoch": 1.043548387096774, + "grad_norm": 1.0726942336798908, + "learning_rate": 1.011048306258727e-06, + "loss": 0.7945176362991333, + "step": 4529 + }, + { + "epoch": 1.043778801843318, + "grad_norm": 1.0532791972453868, + "learning_rate": 1.01066734489028e-06, + "loss": 0.7246826887130737, + "step": 4530 + }, + { + "epoch": 1.0440092165898618, + "grad_norm": 1.230297656368, + "learning_rate": 1.0102863819734822e-06, + "loss": 0.7342358827590942, + "step": 4531 + }, + { + "epoch": 1.0442396313364055, + "grad_norm": 1.1072867148521375, + "learning_rate": 1.0099054175636292e-06, + "loss": 0.6837234497070312, + "step": 4532 + }, + { + "epoch": 1.0444700460829492, + "grad_norm": 0.8847188010063922, + "learning_rate": 1.0095244517160184e-06, + "loss": 0.6941408514976501, + "step": 4533 + }, + { + "epoch": 1.0447004608294932, + "grad_norm": 0.9992175314765978, + "learning_rate": 1.009143484485946e-06, + "loss": 0.7835201025009155, + "step": 4534 + }, + { + "epoch": 1.044930875576037, + "grad_norm": 1.1533173348493126, + "learning_rate": 1.0087625159287086e-06, + "loss": 0.7887566089630127, + "step": 4535 + }, + { + "epoch": 1.0451612903225806, + "grad_norm": 0.9980831932241371, + "learning_rate": 1.0083815460996036e-06, + "loss": 0.7106727361679077, + "step": 4536 + }, + { + "epoch": 1.0453917050691244, + "grad_norm": 1.1003103489016812, + "learning_rate": 1.0080005750539287e-06, + "loss": 0.8316382169723511, + "step": 4537 + }, + { + "epoch": 1.045622119815668, + "grad_norm": 1.278017855977623, + "learning_rate": 1.0076196028469805e-06, + "loss": 0.7535592317581177, + "step": 4538 + }, + { + "epoch": 1.045852534562212, + "grad_norm": 1.2167524484109087, + "learning_rate": 1.0072386295340571e-06, + "loss": 0.9255459308624268, + "step": 4539 + }, + { + "epoch": 1.0460829493087558, + "grad_norm": 0.9884104383515986, + "learning_rate": 1.0068576551704561e-06, + "loss": 0.7415009140968323, + "step": 4540 + }, + { + "epoch": 1.0463133640552995, + "grad_norm": 0.9221193872044946, + "learning_rate": 1.0064766798114758e-06, + "loss": 0.673210620880127, + "step": 4541 + }, + { + "epoch": 1.0465437788018432, + "grad_norm": 1.2907861596502346, + "learning_rate": 1.006095703512414e-06, + "loss": 0.7063118815422058, + "step": 4542 + }, + { + "epoch": 1.0467741935483872, + "grad_norm": 1.0344490200256125, + "learning_rate": 1.005714726328569e-06, + "loss": 0.73606276512146, + "step": 4543 + }, + { + "epoch": 1.047004608294931, + "grad_norm": 1.1024687809140408, + "learning_rate": 1.005333748315239e-06, + "loss": 0.6723713874816895, + "step": 4544 + }, + { + "epoch": 1.0472350230414746, + "grad_norm": 1.0566239460690536, + "learning_rate": 1.0049527695277223e-06, + "loss": 0.643845796585083, + "step": 4545 + }, + { + "epoch": 1.0474654377880184, + "grad_norm": 1.1196128686458957, + "learning_rate": 1.0045717900213175e-06, + "loss": 0.8820847272872925, + "step": 4546 + }, + { + "epoch": 1.047695852534562, + "grad_norm": 1.177142500227169, + "learning_rate": 1.0041908098513239e-06, + "loss": 0.6555176973342896, + "step": 4547 + }, + { + "epoch": 1.047926267281106, + "grad_norm": 1.4046987769414077, + "learning_rate": 1.0038098290730394e-06, + "loss": 0.8142974376678467, + "step": 4548 + }, + { + "epoch": 1.0481566820276498, + "grad_norm": 1.3843242800793498, + "learning_rate": 1.0034288477417634e-06, + "loss": 0.8107532262802124, + "step": 4549 + }, + { + "epoch": 1.0483870967741935, + "grad_norm": 1.093115680939654, + "learning_rate": 1.0030478659127947e-06, + "loss": 0.7078464031219482, + "step": 4550 + }, + { + "epoch": 1.0486175115207372, + "grad_norm": 1.3647000829373368, + "learning_rate": 1.0026668836414322e-06, + "loss": 0.9168295860290527, + "step": 4551 + }, + { + "epoch": 1.0488479262672812, + "grad_norm": 0.7154125463388302, + "learning_rate": 1.0022859009829752e-06, + "loss": 0.7384864091873169, + "step": 4552 + }, + { + "epoch": 1.049078341013825, + "grad_norm": 0.9459016715465385, + "learning_rate": 1.0019049179927229e-06, + "loss": 0.6092562675476074, + "step": 4553 + }, + { + "epoch": 1.0493087557603686, + "grad_norm": 1.159695075830992, + "learning_rate": 1.001523934725974e-06, + "loss": 0.713464617729187, + "step": 4554 + }, + { + "epoch": 1.0495391705069124, + "grad_norm": 0.9471368467961162, + "learning_rate": 1.001142951238028e-06, + "loss": 0.7514123916625977, + "step": 4555 + }, + { + "epoch": 1.0497695852534563, + "grad_norm": 1.1414214053095963, + "learning_rate": 1.000761967584184e-06, + "loss": 0.8092095851898193, + "step": 4556 + }, + { + "epoch": 1.05, + "grad_norm": 0.830509770117895, + "learning_rate": 1.000380983819742e-06, + "loss": 0.7609254717826843, + "step": 4557 + }, + { + "epoch": 1.0502304147465438, + "grad_norm": 0.8874333429433436, + "learning_rate": 1e-06, + "loss": 0.8363404273986816, + "step": 4558 + }, + { + "epoch": 1.0504608294930875, + "grad_norm": 1.1983399653767088, + "learning_rate": 9.996190161802584e-07, + "loss": 0.8139501810073853, + "step": 4559 + }, + { + "epoch": 1.0506912442396312, + "grad_norm": 0.8984420952696672, + "learning_rate": 9.992380324158157e-07, + "loss": 0.8064978122711182, + "step": 4560 + }, + { + "epoch": 1.0509216589861752, + "grad_norm": 0.9258651657418774, + "learning_rate": 9.988570487619721e-07, + "loss": 0.7162975072860718, + "step": 4561 + }, + { + "epoch": 1.051152073732719, + "grad_norm": 1.2196516767947119, + "learning_rate": 9.984760652740261e-07, + "loss": 0.9298074245452881, + "step": 4562 + }, + { + "epoch": 1.0513824884792626, + "grad_norm": 1.0770268299074148, + "learning_rate": 9.980950820072773e-07, + "loss": 0.6929144859313965, + "step": 4563 + }, + { + "epoch": 1.0516129032258064, + "grad_norm": 0.919564091111097, + "learning_rate": 9.97714099017025e-07, + "loss": 0.6516381502151489, + "step": 4564 + }, + { + "epoch": 1.0518433179723503, + "grad_norm": 1.091105354713726, + "learning_rate": 9.97333116358568e-07, + "loss": 0.864730715751648, + "step": 4565 + }, + { + "epoch": 1.052073732718894, + "grad_norm": 0.9113453911026408, + "learning_rate": 9.969521340872052e-07, + "loss": 0.7911246418952942, + "step": 4566 + }, + { + "epoch": 1.0523041474654378, + "grad_norm": 1.032556518691269, + "learning_rate": 9.965711522582367e-07, + "loss": 0.7766593098640442, + "step": 4567 + }, + { + "epoch": 1.0525345622119815, + "grad_norm": 1.1309615036566574, + "learning_rate": 9.961901709269607e-07, + "loss": 0.7703378200531006, + "step": 4568 + }, + { + "epoch": 1.0527649769585254, + "grad_norm": 0.9296180823184125, + "learning_rate": 9.958091901486762e-07, + "loss": 0.7068926692008972, + "step": 4569 + }, + { + "epoch": 1.0529953917050692, + "grad_norm": 1.0589255494911889, + "learning_rate": 9.954282099786824e-07, + "loss": 0.740556538105011, + "step": 4570 + }, + { + "epoch": 1.053225806451613, + "grad_norm": 1.1264720214776667, + "learning_rate": 9.950472304722778e-07, + "loss": 0.798403263092041, + "step": 4571 + }, + { + "epoch": 1.0534562211981566, + "grad_norm": 0.9551633921802427, + "learning_rate": 9.94666251684761e-07, + "loss": 0.6945887804031372, + "step": 4572 + }, + { + "epoch": 1.0536866359447004, + "grad_norm": 1.0978186377940822, + "learning_rate": 9.942852736714312e-07, + "loss": 0.8257915377616882, + "step": 4573 + }, + { + "epoch": 1.0539170506912443, + "grad_norm": 1.108870855150134, + "learning_rate": 9.939042964875859e-07, + "loss": 0.751315712928772, + "step": 4574 + }, + { + "epoch": 1.054147465437788, + "grad_norm": 0.8929134755319279, + "learning_rate": 9.935233201885241e-07, + "loss": 0.6607721447944641, + "step": 4575 + }, + { + "epoch": 1.0543778801843318, + "grad_norm": 1.1623094406064765, + "learning_rate": 9.931423448295438e-07, + "loss": 0.9135023355484009, + "step": 4576 + }, + { + "epoch": 1.0546082949308755, + "grad_norm": 1.1079901137426853, + "learning_rate": 9.927613704659428e-07, + "loss": 0.8238483667373657, + "step": 4577 + }, + { + "epoch": 1.0548387096774194, + "grad_norm": 1.0927838633299076, + "learning_rate": 9.923803971530196e-07, + "loss": 0.7657001614570618, + "step": 4578 + }, + { + "epoch": 1.0550691244239632, + "grad_norm": 1.0858899027259339, + "learning_rate": 9.919994249460717e-07, + "loss": 0.6360250115394592, + "step": 4579 + }, + { + "epoch": 1.055299539170507, + "grad_norm": 3.1983788784304843, + "learning_rate": 9.916184539003963e-07, + "loss": 0.6958763003349304, + "step": 4580 + }, + { + "epoch": 1.0555299539170506, + "grad_norm": 1.0079237517587447, + "learning_rate": 9.912374840712915e-07, + "loss": 0.7093038558959961, + "step": 4581 + }, + { + "epoch": 1.0557603686635946, + "grad_norm": 1.0680215254508902, + "learning_rate": 9.908565155140544e-07, + "loss": 0.7641304731369019, + "step": 4582 + }, + { + "epoch": 1.0559907834101383, + "grad_norm": 0.8923201066182703, + "learning_rate": 9.904755482839817e-07, + "loss": 0.7976446151733398, + "step": 4583 + }, + { + "epoch": 1.056221198156682, + "grad_norm": 1.0963737907088362, + "learning_rate": 9.900945824363707e-07, + "loss": 0.8407114744186401, + "step": 4584 + }, + { + "epoch": 1.0564516129032258, + "grad_norm": 1.0695401976763876, + "learning_rate": 9.897136180265181e-07, + "loss": 0.7988634705543518, + "step": 4585 + }, + { + "epoch": 1.0566820276497695, + "grad_norm": 1.072342293651018, + "learning_rate": 9.893326551097198e-07, + "loss": 0.7847359776496887, + "step": 4586 + }, + { + "epoch": 1.0569124423963134, + "grad_norm": 1.0629893453410204, + "learning_rate": 9.889516937412728e-07, + "loss": 0.8458963632583618, + "step": 4587 + }, + { + "epoch": 1.0571428571428572, + "grad_norm": 1.1301054626559641, + "learning_rate": 9.88570733976473e-07, + "loss": 0.8479788899421692, + "step": 4588 + }, + { + "epoch": 1.057373271889401, + "grad_norm": 1.180492999769349, + "learning_rate": 9.881897758706154e-07, + "loss": 0.7467283010482788, + "step": 4589 + }, + { + "epoch": 1.0576036866359446, + "grad_norm": 1.1676226241505752, + "learning_rate": 9.878088194789967e-07, + "loss": 0.9400098323822021, + "step": 4590 + }, + { + "epoch": 1.0578341013824886, + "grad_norm": 1.2151292863225376, + "learning_rate": 9.874278648569118e-07, + "loss": 0.8901257514953613, + "step": 4591 + }, + { + "epoch": 1.0580645161290323, + "grad_norm": 1.2956773767909102, + "learning_rate": 9.870469120596552e-07, + "loss": 0.840053379535675, + "step": 4592 + }, + { + "epoch": 1.058294930875576, + "grad_norm": 0.9938952111506293, + "learning_rate": 9.866659611425225e-07, + "loss": 0.6825235486030579, + "step": 4593 + }, + { + "epoch": 1.0585253456221198, + "grad_norm": 1.2521534530730631, + "learning_rate": 9.86285012160808e-07, + "loss": 0.7783857583999634, + "step": 4594 + }, + { + "epoch": 1.0587557603686637, + "grad_norm": 1.0517032997656734, + "learning_rate": 9.859040651698055e-07, + "loss": 0.7901174426078796, + "step": 4595 + }, + { + "epoch": 1.0589861751152074, + "grad_norm": 1.2211963787816231, + "learning_rate": 9.855231202248097e-07, + "loss": 0.9475124478340149, + "step": 4596 + }, + { + "epoch": 1.0592165898617512, + "grad_norm": 1.1872676544788658, + "learning_rate": 9.851421773811133e-07, + "loss": 0.8582692742347717, + "step": 4597 + }, + { + "epoch": 1.0594470046082949, + "grad_norm": 1.1723948726757356, + "learning_rate": 9.847612366940106e-07, + "loss": 0.7885586023330688, + "step": 4598 + }, + { + "epoch": 1.0596774193548386, + "grad_norm": 1.17635061110199, + "learning_rate": 9.843802982187943e-07, + "loss": 0.7981748580932617, + "step": 4599 + }, + { + "epoch": 1.0599078341013826, + "grad_norm": 0.9066343519689628, + "learning_rate": 9.839993620107563e-07, + "loss": 0.7060403823852539, + "step": 4600 + }, + { + "epoch": 1.0601382488479263, + "grad_norm": 1.2126688495293467, + "learning_rate": 9.836184281251905e-07, + "loss": 0.7902223467826843, + "step": 4601 + }, + { + "epoch": 1.06036866359447, + "grad_norm": 0.9972491115312556, + "learning_rate": 9.83237496617388e-07, + "loss": 0.7074719071388245, + "step": 4602 + }, + { + "epoch": 1.0605990783410137, + "grad_norm": 0.9455936494800175, + "learning_rate": 9.828565675426405e-07, + "loss": 0.7180163264274597, + "step": 4603 + }, + { + "epoch": 1.0608294930875577, + "grad_norm": 0.8990997781996365, + "learning_rate": 9.824756409562397e-07, + "loss": 0.7040787935256958, + "step": 4604 + }, + { + "epoch": 1.0610599078341014, + "grad_norm": 1.0311368456712493, + "learning_rate": 9.820947169134765e-07, + "loss": 0.8387063145637512, + "step": 4605 + }, + { + "epoch": 1.0612903225806452, + "grad_norm": 1.0692817612993422, + "learning_rate": 9.81713795469641e-07, + "loss": 0.8587188124656677, + "step": 4606 + }, + { + "epoch": 1.0615207373271889, + "grad_norm": 1.0418289468184643, + "learning_rate": 9.813328766800242e-07, + "loss": 0.729094386100769, + "step": 4607 + }, + { + "epoch": 1.0617511520737328, + "grad_norm": 1.1884134090864242, + "learning_rate": 9.809519605999158e-07, + "loss": 1.0576609373092651, + "step": 4608 + }, + { + "epoch": 1.0619815668202766, + "grad_norm": 1.1124938149620707, + "learning_rate": 9.805710472846044e-07, + "loss": 0.7605572938919067, + "step": 4609 + }, + { + "epoch": 1.0622119815668203, + "grad_norm": 0.9566684121068049, + "learning_rate": 9.801901367893807e-07, + "loss": 0.722477912902832, + "step": 4610 + }, + { + "epoch": 1.062442396313364, + "grad_norm": 0.9185071862681494, + "learning_rate": 9.79809229169532e-07, + "loss": 0.7335925698280334, + "step": 4611 + }, + { + "epoch": 1.0626728110599077, + "grad_norm": 1.0494538531790283, + "learning_rate": 9.794283244803466e-07, + "loss": 0.8116357922554016, + "step": 4612 + }, + { + "epoch": 1.0629032258064517, + "grad_norm": 1.0519905027101895, + "learning_rate": 9.79047422777113e-07, + "loss": 0.8004311323165894, + "step": 4613 + }, + { + "epoch": 1.0631336405529954, + "grad_norm": 0.9803128568921189, + "learning_rate": 9.786665241151185e-07, + "loss": 0.8198168277740479, + "step": 4614 + }, + { + "epoch": 1.0633640552995391, + "grad_norm": 0.9841178854805237, + "learning_rate": 9.782856285496494e-07, + "loss": 0.7031205892562866, + "step": 4615 + }, + { + "epoch": 1.0635944700460829, + "grad_norm": 1.055262322588535, + "learning_rate": 9.779047361359928e-07, + "loss": 0.7303737998008728, + "step": 4616 + }, + { + "epoch": 1.0638248847926268, + "grad_norm": 1.1694198331033647, + "learning_rate": 9.775238469294345e-07, + "loss": 0.8775424957275391, + "step": 4617 + }, + { + "epoch": 1.0640552995391706, + "grad_norm": 0.9013154484602001, + "learning_rate": 9.771429609852597e-07, + "loss": 0.7463759183883667, + "step": 4618 + }, + { + "epoch": 1.0642857142857143, + "grad_norm": 0.8792691967623277, + "learning_rate": 9.767620783587542e-07, + "loss": 0.7200205326080322, + "step": 4619 + }, + { + "epoch": 1.064516129032258, + "grad_norm": 0.9102194522316246, + "learning_rate": 9.763811991052019e-07, + "loss": 0.8255786299705505, + "step": 4620 + }, + { + "epoch": 1.064746543778802, + "grad_norm": 1.2552865619465912, + "learning_rate": 9.760003232798877e-07, + "loss": 0.7975195050239563, + "step": 4621 + }, + { + "epoch": 1.0649769585253457, + "grad_norm": 0.9993977940644363, + "learning_rate": 9.756194509380948e-07, + "loss": 0.6993064880371094, + "step": 4622 + }, + { + "epoch": 1.0652073732718894, + "grad_norm": 1.314757658160511, + "learning_rate": 9.752385821351062e-07, + "loss": 0.818634033203125, + "step": 4623 + }, + { + "epoch": 1.0654377880184331, + "grad_norm": 1.0949894149977886, + "learning_rate": 9.748577169262046e-07, + "loss": 0.707933783531189, + "step": 4624 + }, + { + "epoch": 1.0656682027649769, + "grad_norm": 1.1439419332653986, + "learning_rate": 9.744768553666723e-07, + "loss": 0.8133440017700195, + "step": 4625 + }, + { + "epoch": 1.0658986175115208, + "grad_norm": 1.1394394770433072, + "learning_rate": 9.740959975117901e-07, + "loss": 0.8818857669830322, + "step": 4626 + }, + { + "epoch": 1.0661290322580645, + "grad_norm": 0.9617616601353652, + "learning_rate": 9.737151434168402e-07, + "loss": 0.6057544946670532, + "step": 4627 + }, + { + "epoch": 1.0663594470046083, + "grad_norm": 1.047486055121172, + "learning_rate": 9.733342931371023e-07, + "loss": 0.7560185194015503, + "step": 4628 + }, + { + "epoch": 1.066589861751152, + "grad_norm": 1.233360971442642, + "learning_rate": 9.72953446727856e-07, + "loss": 0.8196524381637573, + "step": 4629 + }, + { + "epoch": 1.066820276497696, + "grad_norm": 1.031309795003994, + "learning_rate": 9.725726042443814e-07, + "loss": 0.8695862889289856, + "step": 4630 + }, + { + "epoch": 1.0670506912442397, + "grad_norm": 0.9769847065094724, + "learning_rate": 9.721917657419573e-07, + "loss": 0.7753207683563232, + "step": 4631 + }, + { + "epoch": 1.0672811059907834, + "grad_norm": 1.0908524037443617, + "learning_rate": 9.718109312758612e-07, + "loss": 0.8245481252670288, + "step": 4632 + }, + { + "epoch": 1.0675115207373271, + "grad_norm": 1.201628166799481, + "learning_rate": 9.71430100901371e-07, + "loss": 0.8654806613922119, + "step": 4633 + }, + { + "epoch": 1.067741935483871, + "grad_norm": 1.22982718965067, + "learning_rate": 9.710492746737642e-07, + "loss": 0.8667370080947876, + "step": 4634 + }, + { + "epoch": 1.0679723502304148, + "grad_norm": 1.2635323967888392, + "learning_rate": 9.706684526483167e-07, + "loss": 0.7786421775817871, + "step": 4635 + }, + { + "epoch": 1.0682027649769585, + "grad_norm": 1.037203898616246, + "learning_rate": 9.702876348803045e-07, + "loss": 0.7788090705871582, + "step": 4636 + }, + { + "epoch": 1.0684331797235023, + "grad_norm": 1.1815160856137523, + "learning_rate": 9.69906821425003e-07, + "loss": 0.812332034111023, + "step": 4637 + }, + { + "epoch": 1.068663594470046, + "grad_norm": 1.2578908038434822, + "learning_rate": 9.69526012337686e-07, + "loss": 0.7884202599525452, + "step": 4638 + }, + { + "epoch": 1.06889400921659, + "grad_norm": 1.0539526708204177, + "learning_rate": 9.69145207673628e-07, + "loss": 0.725990891456604, + "step": 4639 + }, + { + "epoch": 1.0691244239631337, + "grad_norm": 1.01343921612526, + "learning_rate": 9.687644074881028e-07, + "loss": 0.7277272343635559, + "step": 4640 + }, + { + "epoch": 1.0693548387096774, + "grad_norm": 1.0871506025213427, + "learning_rate": 9.683836118363818e-07, + "loss": 0.8081945180892944, + "step": 4641 + }, + { + "epoch": 1.0695852534562211, + "grad_norm": 1.1050642405984226, + "learning_rate": 9.680028207737383e-07, + "loss": 0.8633503913879395, + "step": 4642 + }, + { + "epoch": 1.069815668202765, + "grad_norm": 0.9415461517108813, + "learning_rate": 9.67622034355443e-07, + "loss": 0.7873313426971436, + "step": 4643 + }, + { + "epoch": 1.0700460829493088, + "grad_norm": 1.269353126640295, + "learning_rate": 9.67241252636766e-07, + "loss": 0.7927644848823547, + "step": 4644 + }, + { + "epoch": 1.0702764976958525, + "grad_norm": 1.395156348091843, + "learning_rate": 9.668604756729784e-07, + "loss": 0.9458138942718506, + "step": 4645 + }, + { + "epoch": 1.0705069124423963, + "grad_norm": 1.2621680271291411, + "learning_rate": 9.664797035193484e-07, + "loss": 0.7471280097961426, + "step": 4646 + }, + { + "epoch": 1.07073732718894, + "grad_norm": 1.0373772164844823, + "learning_rate": 9.660989362311455e-07, + "loss": 0.7666789293289185, + "step": 4647 + }, + { + "epoch": 1.070967741935484, + "grad_norm": 0.8355654249705468, + "learning_rate": 9.65718173863637e-07, + "loss": 0.7846331000328064, + "step": 4648 + }, + { + "epoch": 1.0711981566820277, + "grad_norm": 1.1393955111251446, + "learning_rate": 9.653374164720897e-07, + "loss": 0.7790371179580688, + "step": 4649 + }, + { + "epoch": 1.0714285714285714, + "grad_norm": 1.110758470727215, + "learning_rate": 9.64956664111771e-07, + "loss": 0.9056169986724854, + "step": 4650 + }, + { + "epoch": 1.0716589861751151, + "grad_norm": 0.84240400487228, + "learning_rate": 9.645759168379461e-07, + "loss": 0.6839256286621094, + "step": 4651 + }, + { + "epoch": 1.071889400921659, + "grad_norm": 1.377334701305697, + "learning_rate": 9.641951747058799e-07, + "loss": 0.7071784138679504, + "step": 4652 + }, + { + "epoch": 1.0721198156682028, + "grad_norm": 1.1683127374870803, + "learning_rate": 9.638144377708366e-07, + "loss": 0.8166929483413696, + "step": 4653 + }, + { + "epoch": 1.0723502304147465, + "grad_norm": 1.239204160701412, + "learning_rate": 9.6343370608808e-07, + "loss": 0.8013010621070862, + "step": 4654 + }, + { + "epoch": 1.0725806451612903, + "grad_norm": 1.0825444957318084, + "learning_rate": 9.630529797128722e-07, + "loss": 0.8157169818878174, + "step": 4655 + }, + { + "epoch": 1.072811059907834, + "grad_norm": 1.0890180382455945, + "learning_rate": 9.626722587004758e-07, + "loss": 0.6467397212982178, + "step": 4656 + }, + { + "epoch": 1.073041474654378, + "grad_norm": 0.840613071204114, + "learning_rate": 9.622915431061519e-07, + "loss": 0.6623806953430176, + "step": 4657 + }, + { + "epoch": 1.0732718894009217, + "grad_norm": 0.9242647901691624, + "learning_rate": 9.619108329851596e-07, + "loss": 0.8333703279495239, + "step": 4658 + }, + { + "epoch": 1.0735023041474654, + "grad_norm": 1.1552752606597634, + "learning_rate": 9.615301283927603e-07, + "loss": 0.8798840045928955, + "step": 4659 + }, + { + "epoch": 1.0737327188940091, + "grad_norm": 1.1547075721097313, + "learning_rate": 9.611494293842119e-07, + "loss": 0.8712242841720581, + "step": 4660 + }, + { + "epoch": 1.073963133640553, + "grad_norm": 1.030127804248938, + "learning_rate": 9.60768736014772e-07, + "loss": 0.720801591873169, + "step": 4661 + }, + { + "epoch": 1.0741935483870968, + "grad_norm": 1.0305643381766019, + "learning_rate": 9.603880483396983e-07, + "loss": 0.7974982857704163, + "step": 4662 + }, + { + "epoch": 1.0744239631336405, + "grad_norm": 1.1569753217458012, + "learning_rate": 9.600073664142471e-07, + "loss": 0.7656542062759399, + "step": 4663 + }, + { + "epoch": 1.0746543778801843, + "grad_norm": 1.2831377014983525, + "learning_rate": 9.596266902936737e-07, + "loss": 0.8274385333061218, + "step": 4664 + }, + { + "epoch": 1.0748847926267282, + "grad_norm": 1.1261587516242995, + "learning_rate": 9.592460200332328e-07, + "loss": 0.6508798599243164, + "step": 4665 + }, + { + "epoch": 1.075115207373272, + "grad_norm": 0.8712727383997491, + "learning_rate": 9.588653556881781e-07, + "loss": 0.6393407583236694, + "step": 4666 + }, + { + "epoch": 1.0753456221198157, + "grad_norm": 0.8300127743505744, + "learning_rate": 9.58484697313762e-07, + "loss": 0.7857781052589417, + "step": 4667 + }, + { + "epoch": 1.0755760368663594, + "grad_norm": 1.0591582120645788, + "learning_rate": 9.58104044965238e-07, + "loss": 0.7433615922927856, + "step": 4668 + }, + { + "epoch": 1.0758064516129031, + "grad_norm": 0.9252765779736452, + "learning_rate": 9.57723398697856e-07, + "loss": 0.6694349646568298, + "step": 4669 + }, + { + "epoch": 1.076036866359447, + "grad_norm": 1.06633744555344, + "learning_rate": 9.573427585668664e-07, + "loss": 0.7849506735801697, + "step": 4670 + }, + { + "epoch": 1.0762672811059908, + "grad_norm": 0.948086558097784, + "learning_rate": 9.569621246275194e-07, + "loss": 0.5924462080001831, + "step": 4671 + }, + { + "epoch": 1.0764976958525345, + "grad_norm": 1.0764379613448063, + "learning_rate": 9.565814969350628e-07, + "loss": 0.7679359316825867, + "step": 4672 + }, + { + "epoch": 1.0767281105990782, + "grad_norm": 0.8770076747846444, + "learning_rate": 9.562008755447444e-07, + "loss": 0.803286612033844, + "step": 4673 + }, + { + "epoch": 1.0769585253456222, + "grad_norm": 0.9139287879253918, + "learning_rate": 9.558202605118112e-07, + "loss": 0.6302975416183472, + "step": 4674 + }, + { + "epoch": 1.077188940092166, + "grad_norm": 1.1929014758233443, + "learning_rate": 9.554396518915085e-07, + "loss": 0.7441667914390564, + "step": 4675 + }, + { + "epoch": 1.0774193548387097, + "grad_norm": 1.1469726623234646, + "learning_rate": 9.550590497390815e-07, + "loss": 0.805221438407898, + "step": 4676 + }, + { + "epoch": 1.0776497695852534, + "grad_norm": 1.1540692428304171, + "learning_rate": 9.54678454109774e-07, + "loss": 0.9557743072509766, + "step": 4677 + }, + { + "epoch": 1.0778801843317973, + "grad_norm": 1.0781366924036009, + "learning_rate": 9.542978650588284e-07, + "loss": 0.7361980080604553, + "step": 4678 + }, + { + "epoch": 1.078110599078341, + "grad_norm": 1.2143012487351885, + "learning_rate": 9.539172826414876e-07, + "loss": 0.7474843263626099, + "step": 4679 + }, + { + "epoch": 1.0783410138248848, + "grad_norm": 1.0143818885553835, + "learning_rate": 9.535367069129923e-07, + "loss": 0.595927357673645, + "step": 4680 + }, + { + "epoch": 1.0785714285714285, + "grad_norm": 1.1128254146821686, + "learning_rate": 9.531561379285818e-07, + "loss": 0.894598126411438, + "step": 4681 + }, + { + "epoch": 1.0788018433179722, + "grad_norm": 1.3233034879697116, + "learning_rate": 9.527755757434966e-07, + "loss": 0.915902853012085, + "step": 4682 + }, + { + "epoch": 1.0790322580645162, + "grad_norm": 1.3436084997047495, + "learning_rate": 9.523950204129739e-07, + "loss": 0.8670432567596436, + "step": 4683 + }, + { + "epoch": 1.07926267281106, + "grad_norm": 1.119487791223308, + "learning_rate": 9.520144719922508e-07, + "loss": 0.7829893231391907, + "step": 4684 + }, + { + "epoch": 1.0794930875576036, + "grad_norm": 1.1633745895382166, + "learning_rate": 9.516339305365638e-07, + "loss": 0.6584970951080322, + "step": 4685 + }, + { + "epoch": 1.0797235023041474, + "grad_norm": 1.0240703451548752, + "learning_rate": 9.512533961011478e-07, + "loss": 0.7853457927703857, + "step": 4686 + }, + { + "epoch": 1.0799539170506913, + "grad_norm": 0.8755927642296618, + "learning_rate": 9.508728687412364e-07, + "loss": 0.7890632152557373, + "step": 4687 + }, + { + "epoch": 1.080184331797235, + "grad_norm": 1.1475809434863895, + "learning_rate": 9.504923485120634e-07, + "loss": 0.8281408548355103, + "step": 4688 + }, + { + "epoch": 1.0804147465437788, + "grad_norm": 0.9222741947208914, + "learning_rate": 9.501118354688605e-07, + "loss": 0.7878601551055908, + "step": 4689 + }, + { + "epoch": 1.0806451612903225, + "grad_norm": 1.3827368592572105, + "learning_rate": 9.497313296668582e-07, + "loss": 0.8332592844963074, + "step": 4690 + }, + { + "epoch": 1.0808755760368665, + "grad_norm": 1.0564274993228098, + "learning_rate": 9.493508311612874e-07, + "loss": 0.7680759429931641, + "step": 4691 + }, + { + "epoch": 1.0811059907834102, + "grad_norm": 0.9446139934289677, + "learning_rate": 9.489703400073762e-07, + "loss": 0.6368690729141235, + "step": 4692 + }, + { + "epoch": 1.081336405529954, + "grad_norm": 1.1588361552017052, + "learning_rate": 9.485898562603525e-07, + "loss": 0.7018477916717529, + "step": 4693 + }, + { + "epoch": 1.0815668202764976, + "grad_norm": 1.057066552712669, + "learning_rate": 9.482093799754432e-07, + "loss": 0.8494987487792969, + "step": 4694 + }, + { + "epoch": 1.0817972350230414, + "grad_norm": 1.0119994692546468, + "learning_rate": 9.478289112078736e-07, + "loss": 0.8146306276321411, + "step": 4695 + }, + { + "epoch": 1.0820276497695853, + "grad_norm": 1.054771760893497, + "learning_rate": 9.474484500128689e-07, + "loss": 0.7832612991333008, + "step": 4696 + }, + { + "epoch": 1.082258064516129, + "grad_norm": 1.0487197763357414, + "learning_rate": 9.470679964456519e-07, + "loss": 0.8569360971450806, + "step": 4697 + }, + { + "epoch": 1.0824884792626728, + "grad_norm": 1.1432115985173055, + "learning_rate": 9.466875505614449e-07, + "loss": 0.8145112991333008, + "step": 4698 + }, + { + "epoch": 1.0827188940092165, + "grad_norm": 1.0578814317560323, + "learning_rate": 9.463071124154697e-07, + "loss": 0.6632689237594604, + "step": 4699 + }, + { + "epoch": 1.0829493087557605, + "grad_norm": 1.1233922356996344, + "learning_rate": 9.459266820629461e-07, + "loss": 0.6299769878387451, + "step": 4700 + }, + { + "epoch": 1.0831797235023042, + "grad_norm": 1.0275349813599226, + "learning_rate": 9.455462595590925e-07, + "loss": 0.7722063064575195, + "step": 4701 + }, + { + "epoch": 1.083410138248848, + "grad_norm": 1.2023285008908922, + "learning_rate": 9.451658449591278e-07, + "loss": 0.8219027519226074, + "step": 4702 + }, + { + "epoch": 1.0836405529953916, + "grad_norm": 1.1618110682341312, + "learning_rate": 9.44785438318268e-07, + "loss": 0.9078400731086731, + "step": 4703 + }, + { + "epoch": 1.0838709677419356, + "grad_norm": 1.087404948952653, + "learning_rate": 9.444050396917286e-07, + "loss": 0.8062041997909546, + "step": 4704 + }, + { + "epoch": 1.0841013824884793, + "grad_norm": 0.9599318157385525, + "learning_rate": 9.440246491347242e-07, + "loss": 0.6379001140594482, + "step": 4705 + }, + { + "epoch": 1.084331797235023, + "grad_norm": 1.179840039843376, + "learning_rate": 9.436442667024679e-07, + "loss": 0.919986367225647, + "step": 4706 + }, + { + "epoch": 1.0845622119815668, + "grad_norm": 1.025427308273649, + "learning_rate": 9.432638924501715e-07, + "loss": 0.6534138917922974, + "step": 4707 + }, + { + "epoch": 1.0847926267281105, + "grad_norm": 1.1537368190719173, + "learning_rate": 9.428835264330462e-07, + "loss": 0.8340045809745789, + "step": 4708 + }, + { + "epoch": 1.0850230414746544, + "grad_norm": 1.2598648406656967, + "learning_rate": 9.425031687063014e-07, + "loss": 0.8347625732421875, + "step": 4709 + }, + { + "epoch": 1.0852534562211982, + "grad_norm": 1.080310831214647, + "learning_rate": 9.421228193251452e-07, + "loss": 0.807063639163971, + "step": 4710 + }, + { + "epoch": 1.085483870967742, + "grad_norm": 0.8480154931503633, + "learning_rate": 9.417424783447855e-07, + "loss": 0.7375985383987427, + "step": 4711 + }, + { + "epoch": 1.0857142857142856, + "grad_norm": 0.9219258926876724, + "learning_rate": 9.413621458204281e-07, + "loss": 0.5723168849945068, + "step": 4712 + }, + { + "epoch": 1.0859447004608296, + "grad_norm": 1.20469026899904, + "learning_rate": 9.409818218072772e-07, + "loss": 0.8272668123245239, + "step": 4713 + }, + { + "epoch": 1.0861751152073733, + "grad_norm": 1.0744380351617728, + "learning_rate": 9.406015063605368e-07, + "loss": 0.6400803327560425, + "step": 4714 + }, + { + "epoch": 1.086405529953917, + "grad_norm": 0.9959690478635643, + "learning_rate": 9.402211995354095e-07, + "loss": 0.6829795837402344, + "step": 4715 + }, + { + "epoch": 1.0866359447004608, + "grad_norm": 1.0434747079590168, + "learning_rate": 9.398409013870954e-07, + "loss": 0.8509865999221802, + "step": 4716 + }, + { + "epoch": 1.0868663594470047, + "grad_norm": 1.0730582514021882, + "learning_rate": 9.394606119707954e-07, + "loss": 0.895818829536438, + "step": 4717 + }, + { + "epoch": 1.0870967741935484, + "grad_norm": 1.2584943519033869, + "learning_rate": 9.390803313417072e-07, + "loss": 0.8534268140792847, + "step": 4718 + }, + { + "epoch": 1.0873271889400922, + "grad_norm": 1.0910485662903118, + "learning_rate": 9.38700059555028e-07, + "loss": 0.8603401184082031, + "step": 4719 + }, + { + "epoch": 1.087557603686636, + "grad_norm": 1.1060380385520165, + "learning_rate": 9.383197966659542e-07, + "loss": 0.8810417652130127, + "step": 4720 + }, + { + "epoch": 1.0877880184331796, + "grad_norm": 1.078874247367276, + "learning_rate": 9.3793954272968e-07, + "loss": 0.7144299149513245, + "step": 4721 + }, + { + "epoch": 1.0880184331797236, + "grad_norm": 1.3140311568193026, + "learning_rate": 9.375592978013994e-07, + "loss": 0.8780069351196289, + "step": 4722 + }, + { + "epoch": 1.0882488479262673, + "grad_norm": 1.1329108063995987, + "learning_rate": 9.371790619363041e-07, + "loss": 0.7976780533790588, + "step": 4723 + }, + { + "epoch": 1.088479262672811, + "grad_norm": 1.0979402846559465, + "learning_rate": 9.367988351895846e-07, + "loss": 0.9183385372161865, + "step": 4724 + }, + { + "epoch": 1.0887096774193548, + "grad_norm": 1.0551038276717553, + "learning_rate": 9.364186176164306e-07, + "loss": 0.7891188859939575, + "step": 4725 + }, + { + "epoch": 1.0889400921658987, + "grad_norm": 0.9930223107211231, + "learning_rate": 9.360384092720301e-07, + "loss": 0.7586535215377808, + "step": 4726 + }, + { + "epoch": 1.0891705069124424, + "grad_norm": 1.1542507976324667, + "learning_rate": 9.356582102115696e-07, + "loss": 0.7915316224098206, + "step": 4727 + }, + { + "epoch": 1.0894009216589862, + "grad_norm": 0.901378484170352, + "learning_rate": 9.352780204902349e-07, + "loss": 0.6608257293701172, + "step": 4728 + }, + { + "epoch": 1.08963133640553, + "grad_norm": 1.1982692712799377, + "learning_rate": 9.3489784016321e-07, + "loss": 0.8375273942947388, + "step": 4729 + }, + { + "epoch": 1.0898617511520738, + "grad_norm": 1.43591815259741, + "learning_rate": 9.345176692856768e-07, + "loss": 0.7629055976867676, + "step": 4730 + }, + { + "epoch": 1.0900921658986176, + "grad_norm": 1.3741081876453818, + "learning_rate": 9.341375079128177e-07, + "loss": 0.8037875890731812, + "step": 4731 + }, + { + "epoch": 1.0903225806451613, + "grad_norm": 1.1252370555828741, + "learning_rate": 9.337573560998123e-07, + "loss": 0.8843437433242798, + "step": 4732 + }, + { + "epoch": 1.090552995391705, + "grad_norm": 1.058447534132799, + "learning_rate": 9.333772139018387e-07, + "loss": 0.7164910435676575, + "step": 4733 + }, + { + "epoch": 1.0907834101382488, + "grad_norm": 1.144703504042011, + "learning_rate": 9.329970813740742e-07, + "loss": 0.8076978921890259, + "step": 4734 + }, + { + "epoch": 1.0910138248847927, + "grad_norm": 1.091507904535434, + "learning_rate": 9.326169585716949e-07, + "loss": 0.7265340089797974, + "step": 4735 + }, + { + "epoch": 1.0912442396313364, + "grad_norm": 0.9010611551057135, + "learning_rate": 9.322368455498747e-07, + "loss": 0.7438681125640869, + "step": 4736 + }, + { + "epoch": 1.0914746543778802, + "grad_norm": 1.455573835192626, + "learning_rate": 9.318567423637868e-07, + "loss": 0.8760604858398438, + "step": 4737 + }, + { + "epoch": 1.0917050691244239, + "grad_norm": 1.064698472707054, + "learning_rate": 9.314766490686026e-07, + "loss": 0.7216911315917969, + "step": 4738 + }, + { + "epoch": 1.0919354838709678, + "grad_norm": 1.207051606070953, + "learning_rate": 9.310965657194916e-07, + "loss": 0.8003707528114319, + "step": 4739 + }, + { + "epoch": 1.0921658986175116, + "grad_norm": 0.9484074376515712, + "learning_rate": 9.307164923716233e-07, + "loss": 0.6496548652648926, + "step": 4740 + }, + { + "epoch": 1.0923963133640553, + "grad_norm": 1.0304975730869472, + "learning_rate": 9.303364290801644e-07, + "loss": 0.7659108638763428, + "step": 4741 + }, + { + "epoch": 1.092626728110599, + "grad_norm": 1.016478094690519, + "learning_rate": 9.299563759002802e-07, + "loss": 0.7799512147903442, + "step": 4742 + }, + { + "epoch": 1.092857142857143, + "grad_norm": 0.9921566283768914, + "learning_rate": 9.295763328871357e-07, + "loss": 0.7675691246986389, + "step": 4743 + }, + { + "epoch": 1.0930875576036867, + "grad_norm": 1.0513054078420998, + "learning_rate": 9.291963000958931e-07, + "loss": 0.677080512046814, + "step": 4744 + }, + { + "epoch": 1.0933179723502304, + "grad_norm": 1.0842277521538888, + "learning_rate": 9.28816277581714e-07, + "loss": 0.7885928153991699, + "step": 4745 + }, + { + "epoch": 1.0935483870967742, + "grad_norm": 1.07543209238493, + "learning_rate": 9.28436265399758e-07, + "loss": 0.6568010449409485, + "step": 4746 + }, + { + "epoch": 1.0937788018433179, + "grad_norm": 1.076830779801181, + "learning_rate": 9.280562636051827e-07, + "loss": 0.9438225030899048, + "step": 4747 + }, + { + "epoch": 1.0940092165898618, + "grad_norm": 1.0420094595322553, + "learning_rate": 9.276762722531461e-07, + "loss": 0.8119498491287231, + "step": 4748 + }, + { + "epoch": 1.0942396313364056, + "grad_norm": 0.8228863679585698, + "learning_rate": 9.272962913988029e-07, + "loss": 0.7570452690124512, + "step": 4749 + }, + { + "epoch": 1.0944700460829493, + "grad_norm": 1.0990726312613297, + "learning_rate": 9.269163210973063e-07, + "loss": 0.7541190385818481, + "step": 4750 + }, + { + "epoch": 1.094700460829493, + "grad_norm": 1.015570437282189, + "learning_rate": 9.265363614038093e-07, + "loss": 0.6481921672821045, + "step": 4751 + }, + { + "epoch": 1.094930875576037, + "grad_norm": 1.1173263478947815, + "learning_rate": 9.261564123734623e-07, + "loss": 0.7997267246246338, + "step": 4752 + }, + { + "epoch": 1.0951612903225807, + "grad_norm": 1.4388540160892265, + "learning_rate": 9.25776474061414e-07, + "loss": 0.9093008637428284, + "step": 4753 + }, + { + "epoch": 1.0953917050691244, + "grad_norm": 1.3909093606880625, + "learning_rate": 9.253965465228122e-07, + "loss": 0.7609673142433167, + "step": 4754 + }, + { + "epoch": 1.0956221198156681, + "grad_norm": 1.311027419629587, + "learning_rate": 9.250166298128032e-07, + "loss": 0.8338878154754639, + "step": 4755 + }, + { + "epoch": 1.095852534562212, + "grad_norm": 1.1912490488387477, + "learning_rate": 9.246367239865308e-07, + "loss": 0.7503781318664551, + "step": 4756 + }, + { + "epoch": 1.0960829493087558, + "grad_norm": 1.0417471668794835, + "learning_rate": 9.242568290991384e-07, + "loss": 0.7630816698074341, + "step": 4757 + }, + { + "epoch": 1.0963133640552996, + "grad_norm": 1.4287601409586015, + "learning_rate": 9.238769452057671e-07, + "loss": 0.8026378154754639, + "step": 4758 + }, + { + "epoch": 1.0965437788018433, + "grad_norm": 1.0309152969100308, + "learning_rate": 9.234970723615558e-07, + "loss": 0.8256090879440308, + "step": 4759 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 1.1197681925892131, + "learning_rate": 9.231172106216437e-07, + "loss": 0.7331836223602295, + "step": 4760 + }, + { + "epoch": 1.097004608294931, + "grad_norm": 1.1300301361381715, + "learning_rate": 9.227373600411667e-07, + "loss": 0.886203944683075, + "step": 4761 + }, + { + "epoch": 1.0972350230414747, + "grad_norm": 1.113695044174903, + "learning_rate": 9.223575206752592e-07, + "loss": 0.7802814245223999, + "step": 4762 + }, + { + "epoch": 1.0974654377880184, + "grad_norm": 1.3075634566953063, + "learning_rate": 9.219776925790552e-07, + "loss": 0.9682798385620117, + "step": 4763 + }, + { + "epoch": 1.0976958525345621, + "grad_norm": 1.1689607681364365, + "learning_rate": 9.215978758076858e-07, + "loss": 0.8733793497085571, + "step": 4764 + }, + { + "epoch": 1.097926267281106, + "grad_norm": 1.0890238577837303, + "learning_rate": 9.212180704162809e-07, + "loss": 0.8403818607330322, + "step": 4765 + }, + { + "epoch": 1.0981566820276498, + "grad_norm": 1.0898706001284595, + "learning_rate": 9.208382764599688e-07, + "loss": 0.7957059144973755, + "step": 4766 + }, + { + "epoch": 1.0983870967741935, + "grad_norm": 1.290224136897281, + "learning_rate": 9.204584939938761e-07, + "loss": 0.8943477272987366, + "step": 4767 + }, + { + "epoch": 1.0986175115207373, + "grad_norm": 1.0710230295284595, + "learning_rate": 9.200787230731273e-07, + "loss": 0.7084406018257141, + "step": 4768 + }, + { + "epoch": 1.098847926267281, + "grad_norm": 1.190836398847277, + "learning_rate": 9.196989637528465e-07, + "loss": 0.8374637365341187, + "step": 4769 + }, + { + "epoch": 1.099078341013825, + "grad_norm": 1.3757022429132086, + "learning_rate": 9.193192160881543e-07, + "loss": 0.6963578462600708, + "step": 4770 + }, + { + "epoch": 1.0993087557603687, + "grad_norm": 0.9887346096468936, + "learning_rate": 9.189394801341716e-07, + "loss": 0.6732540130615234, + "step": 4771 + }, + { + "epoch": 1.0995391705069124, + "grad_norm": 1.092710990198668, + "learning_rate": 9.185597559460159e-07, + "loss": 0.7104849219322205, + "step": 4772 + }, + { + "epoch": 1.0997695852534561, + "grad_norm": 1.3885045688613133, + "learning_rate": 9.181800435788037e-07, + "loss": 0.8461153507232666, + "step": 4773 + }, + { + "epoch": 1.1, + "grad_norm": 1.0447899457724443, + "learning_rate": 9.178003430876502e-07, + "loss": 0.7120847105979919, + "step": 4774 + }, + { + "epoch": 1.1002304147465438, + "grad_norm": 1.0881207229188647, + "learning_rate": 9.174206545276677e-07, + "loss": 0.8108617067337036, + "step": 4775 + }, + { + "epoch": 1.1004608294930875, + "grad_norm": 0.9153115264713604, + "learning_rate": 9.170409779539678e-07, + "loss": 0.7019558548927307, + "step": 4776 + }, + { + "epoch": 1.1006912442396313, + "grad_norm": 0.9272452690627847, + "learning_rate": 9.166613134216605e-07, + "loss": 0.7563629150390625, + "step": 4777 + }, + { + "epoch": 1.100921658986175, + "grad_norm": 0.9795708897837844, + "learning_rate": 9.162816609858533e-07, + "loss": 0.777009129524231, + "step": 4778 + }, + { + "epoch": 1.101152073732719, + "grad_norm": 1.143317572483065, + "learning_rate": 9.159020207016516e-07, + "loss": 0.812334418296814, + "step": 4779 + }, + { + "epoch": 1.1013824884792627, + "grad_norm": 0.8685579046345627, + "learning_rate": 9.155223926241608e-07, + "loss": 0.609114408493042, + "step": 4780 + }, + { + "epoch": 1.1016129032258064, + "grad_norm": 1.1689773804888128, + "learning_rate": 9.151427768084828e-07, + "loss": 0.8277549147605896, + "step": 4781 + }, + { + "epoch": 1.1018433179723501, + "grad_norm": 1.2556834532396843, + "learning_rate": 9.147631733097179e-07, + "loss": 0.8649400472640991, + "step": 4782 + }, + { + "epoch": 1.102073732718894, + "grad_norm": 0.8878271909604711, + "learning_rate": 9.14383582182966e-07, + "loss": 0.7894293665885925, + "step": 4783 + }, + { + "epoch": 1.1023041474654378, + "grad_norm": 1.3844953995401048, + "learning_rate": 9.14004003483324e-07, + "loss": 0.9121778011322021, + "step": 4784 + }, + { + "epoch": 1.1025345622119815, + "grad_norm": 1.0899535734318635, + "learning_rate": 9.136244372658867e-07, + "loss": 0.7162299156188965, + "step": 4785 + }, + { + "epoch": 1.1027649769585253, + "grad_norm": 1.1193596859001855, + "learning_rate": 9.132448835857482e-07, + "loss": 0.7059808969497681, + "step": 4786 + }, + { + "epoch": 1.1029953917050692, + "grad_norm": 1.2034226051758443, + "learning_rate": 9.128653424979999e-07, + "loss": 0.8172405958175659, + "step": 4787 + }, + { + "epoch": 1.103225806451613, + "grad_norm": 0.876114016677297, + "learning_rate": 9.124858140577316e-07, + "loss": 0.7672706842422485, + "step": 4788 + }, + { + "epoch": 1.1034562211981567, + "grad_norm": 1.2578760464526295, + "learning_rate": 9.121062983200318e-07, + "loss": 0.7054900527000427, + "step": 4789 + }, + { + "epoch": 1.1036866359447004, + "grad_norm": 1.0063162295686867, + "learning_rate": 9.117267953399865e-07, + "loss": 0.888538122177124, + "step": 4790 + }, + { + "epoch": 1.1039170506912441, + "grad_norm": 1.1758406583219614, + "learning_rate": 9.113473051726796e-07, + "loss": 0.7918668985366821, + "step": 4791 + }, + { + "epoch": 1.104147465437788, + "grad_norm": 1.220328177578168, + "learning_rate": 9.109678278731942e-07, + "loss": 0.7385697960853577, + "step": 4792 + }, + { + "epoch": 1.1043778801843318, + "grad_norm": 1.0627777124669568, + "learning_rate": 9.105883634966107e-07, + "loss": 0.6394056081771851, + "step": 4793 + }, + { + "epoch": 1.1046082949308755, + "grad_norm": 1.2147960582385422, + "learning_rate": 9.102089120980081e-07, + "loss": 0.8372077941894531, + "step": 4794 + }, + { + "epoch": 1.1048387096774193, + "grad_norm": 1.0764884273918471, + "learning_rate": 9.098294737324628e-07, + "loss": 0.6944066286087036, + "step": 4795 + }, + { + "epoch": 1.1050691244239632, + "grad_norm": 1.3210680270500303, + "learning_rate": 9.0945004845505e-07, + "loss": 0.8480994701385498, + "step": 4796 + }, + { + "epoch": 1.105299539170507, + "grad_norm": 1.3778825395187644, + "learning_rate": 9.090706363208431e-07, + "loss": 0.837437629699707, + "step": 4797 + }, + { + "epoch": 1.1055299539170507, + "grad_norm": 1.2126670676110476, + "learning_rate": 9.086912373849128e-07, + "loss": 0.8610002398490906, + "step": 4798 + }, + { + "epoch": 1.1057603686635944, + "grad_norm": 1.1204211704902753, + "learning_rate": 9.083118517023281e-07, + "loss": 0.7323784828186035, + "step": 4799 + }, + { + "epoch": 1.1059907834101383, + "grad_norm": 1.394483021595883, + "learning_rate": 9.079324793281573e-07, + "loss": 0.7838932871818542, + "step": 4800 + }, + { + "epoch": 1.106221198156682, + "grad_norm": 1.1333807320340106, + "learning_rate": 9.075531203174651e-07, + "loss": 0.7655705213546753, + "step": 4801 + }, + { + "epoch": 1.1064516129032258, + "grad_norm": 1.199812107745982, + "learning_rate": 9.071737747253148e-07, + "loss": 0.8320151567459106, + "step": 4802 + }, + { + "epoch": 1.1066820276497695, + "grad_norm": 1.0428789095876687, + "learning_rate": 9.067944426067687e-07, + "loss": 0.7434612512588501, + "step": 4803 + }, + { + "epoch": 1.1069124423963133, + "grad_norm": 1.348302596081637, + "learning_rate": 9.064151240168857e-07, + "loss": 0.8351321220397949, + "step": 4804 + }, + { + "epoch": 1.1071428571428572, + "grad_norm": 0.9731377071478325, + "learning_rate": 9.060358190107233e-07, + "loss": 0.6648053526878357, + "step": 4805 + }, + { + "epoch": 1.107373271889401, + "grad_norm": 1.236779616553706, + "learning_rate": 9.056565276433377e-07, + "loss": 0.7507585287094116, + "step": 4806 + }, + { + "epoch": 1.1076036866359447, + "grad_norm": 1.0866303306873377, + "learning_rate": 9.052772499697823e-07, + "loss": 0.7638635635375977, + "step": 4807 + }, + { + "epoch": 1.1078341013824884, + "grad_norm": 1.3204341922490346, + "learning_rate": 9.048979860451081e-07, + "loss": 0.8066626191139221, + "step": 4808 + }, + { + "epoch": 1.1080645161290323, + "grad_norm": 0.9459322006964221, + "learning_rate": 9.045187359243659e-07, + "loss": 0.7090466022491455, + "step": 4809 + }, + { + "epoch": 1.108294930875576, + "grad_norm": 1.1112578831827626, + "learning_rate": 9.041394996626027e-07, + "loss": 0.7071142792701721, + "step": 4810 + }, + { + "epoch": 1.1085253456221198, + "grad_norm": 1.0134445673972028, + "learning_rate": 9.037602773148638e-07, + "loss": 0.7103942036628723, + "step": 4811 + }, + { + "epoch": 1.1087557603686635, + "grad_norm": 1.1348721368793189, + "learning_rate": 9.033810689361936e-07, + "loss": 0.8408492207527161, + "step": 4812 + }, + { + "epoch": 1.1089861751152075, + "grad_norm": 0.9439878571651674, + "learning_rate": 9.030018745816335e-07, + "loss": 0.7621495723724365, + "step": 4813 + }, + { + "epoch": 1.1092165898617512, + "grad_norm": 1.152461687801826, + "learning_rate": 9.026226943062225e-07, + "loss": 0.7105196714401245, + "step": 4814 + }, + { + "epoch": 1.109447004608295, + "grad_norm": 1.079152769158689, + "learning_rate": 9.022435281649986e-07, + "loss": 0.8733636140823364, + "step": 4815 + }, + { + "epoch": 1.1096774193548387, + "grad_norm": 1.223534472251507, + "learning_rate": 9.018643762129974e-07, + "loss": 0.9097845554351807, + "step": 4816 + }, + { + "epoch": 1.1099078341013824, + "grad_norm": 1.2220607424054495, + "learning_rate": 9.014852385052519e-07, + "loss": 0.8743059635162354, + "step": 4817 + }, + { + "epoch": 1.1101382488479263, + "grad_norm": 1.0404677289419784, + "learning_rate": 9.011061150967937e-07, + "loss": 0.7898736000061035, + "step": 4818 + }, + { + "epoch": 1.11036866359447, + "grad_norm": 1.1698125073586854, + "learning_rate": 9.007270060426516e-07, + "loss": 0.871254563331604, + "step": 4819 + }, + { + "epoch": 1.1105990783410138, + "grad_norm": 1.323286168379092, + "learning_rate": 9.003479113978536e-07, + "loss": 0.6833579540252686, + "step": 4820 + }, + { + "epoch": 1.1108294930875575, + "grad_norm": 1.285642784687423, + "learning_rate": 8.999688312174243e-07, + "loss": 0.8289071321487427, + "step": 4821 + }, + { + "epoch": 1.1110599078341015, + "grad_norm": 1.1884737282905606, + "learning_rate": 8.995897655563864e-07, + "loss": 0.6798583269119263, + "step": 4822 + }, + { + "epoch": 1.1112903225806452, + "grad_norm": 1.1108358813410262, + "learning_rate": 8.992107144697614e-07, + "loss": 0.6518250703811646, + "step": 4823 + }, + { + "epoch": 1.111520737327189, + "grad_norm": 1.3596600109698966, + "learning_rate": 8.988316780125679e-07, + "loss": 0.9316667318344116, + "step": 4824 + }, + { + "epoch": 1.1117511520737327, + "grad_norm": 0.9951654747842746, + "learning_rate": 8.98452656239822e-07, + "loss": 0.755483865737915, + "step": 4825 + }, + { + "epoch": 1.1119815668202766, + "grad_norm": 1.0146600815927005, + "learning_rate": 8.980736492065391e-07, + "loss": 0.7892755270004272, + "step": 4826 + }, + { + "epoch": 1.1122119815668203, + "grad_norm": 0.9930161298314518, + "learning_rate": 8.976946569677308e-07, + "loss": 0.703255295753479, + "step": 4827 + }, + { + "epoch": 1.112442396313364, + "grad_norm": 1.1559327578235137, + "learning_rate": 8.973156795784073e-07, + "loss": 0.7885171175003052, + "step": 4828 + }, + { + "epoch": 1.1126728110599078, + "grad_norm": 1.1407519814570228, + "learning_rate": 8.969367170935776e-07, + "loss": 0.8035199642181396, + "step": 4829 + }, + { + "epoch": 1.1129032258064515, + "grad_norm": 1.0245821351407076, + "learning_rate": 8.965577695682467e-07, + "loss": 0.8272112607955933, + "step": 4830 + }, + { + "epoch": 1.1131336405529955, + "grad_norm": 1.1104598721433627, + "learning_rate": 8.961788370574182e-07, + "loss": 0.8734478950500488, + "step": 4831 + }, + { + "epoch": 1.1133640552995392, + "grad_norm": 1.2722110058519596, + "learning_rate": 8.957999196160946e-07, + "loss": 0.7487469911575317, + "step": 4832 + }, + { + "epoch": 1.113594470046083, + "grad_norm": 1.3783344397611896, + "learning_rate": 8.954210172992748e-07, + "loss": 0.9193693399429321, + "step": 4833 + }, + { + "epoch": 1.1138248847926266, + "grad_norm": 1.4522583636726432, + "learning_rate": 8.950421301619555e-07, + "loss": 0.8228428959846497, + "step": 4834 + }, + { + "epoch": 1.1140552995391706, + "grad_norm": 0.9646412535671615, + "learning_rate": 8.946632582591324e-07, + "loss": 0.7419015169143677, + "step": 4835 + }, + { + "epoch": 1.1142857142857143, + "grad_norm": 1.1957500872812925, + "learning_rate": 8.942844016457975e-07, + "loss": 0.827411949634552, + "step": 4836 + }, + { + "epoch": 1.114516129032258, + "grad_norm": 0.9975223373000859, + "learning_rate": 8.93905560376942e-07, + "loss": 0.7066754102706909, + "step": 4837 + }, + { + "epoch": 1.1147465437788018, + "grad_norm": 1.2336329306802043, + "learning_rate": 8.93526734507554e-07, + "loss": 0.7201621532440186, + "step": 4838 + }, + { + "epoch": 1.1149769585253457, + "grad_norm": 0.8521980282185057, + "learning_rate": 8.931479240926196e-07, + "loss": 0.6363521814346313, + "step": 4839 + }, + { + "epoch": 1.1152073732718895, + "grad_norm": 1.0065898101647581, + "learning_rate": 8.927691291871223e-07, + "loss": 0.8232909440994263, + "step": 4840 + }, + { + "epoch": 1.1154377880184332, + "grad_norm": 1.0354249430711853, + "learning_rate": 8.923903498460441e-07, + "loss": 0.7006033658981323, + "step": 4841 + }, + { + "epoch": 1.115668202764977, + "grad_norm": 1.1957171429651339, + "learning_rate": 8.920115861243638e-07, + "loss": 0.6982721090316772, + "step": 4842 + }, + { + "epoch": 1.1158986175115206, + "grad_norm": 1.039109039901578, + "learning_rate": 8.916328380770593e-07, + "loss": 0.7735922336578369, + "step": 4843 + }, + { + "epoch": 1.1161290322580646, + "grad_norm": 1.189307260310029, + "learning_rate": 8.912541057591049e-07, + "loss": 0.7430423498153687, + "step": 4844 + }, + { + "epoch": 1.1163594470046083, + "grad_norm": 1.0189703427385546, + "learning_rate": 8.908753892254729e-07, + "loss": 0.7783932685852051, + "step": 4845 + }, + { + "epoch": 1.116589861751152, + "grad_norm": 0.895546986970967, + "learning_rate": 8.904966885311339e-07, + "loss": 0.726211428642273, + "step": 4846 + }, + { + "epoch": 1.1168202764976958, + "grad_norm": 1.0042101088511581, + "learning_rate": 8.901180037310555e-07, + "loss": 0.664351761341095, + "step": 4847 + }, + { + "epoch": 1.1170506912442397, + "grad_norm": 1.192545271664204, + "learning_rate": 8.897393348802031e-07, + "loss": 0.8246554136276245, + "step": 4848 + }, + { + "epoch": 1.1172811059907835, + "grad_norm": 1.3113785088290244, + "learning_rate": 8.893606820335405e-07, + "loss": 0.9435447454452515, + "step": 4849 + }, + { + "epoch": 1.1175115207373272, + "grad_norm": 1.1196400925650334, + "learning_rate": 8.889820452460286e-07, + "loss": 0.8471171855926514, + "step": 4850 + }, + { + "epoch": 1.117741935483871, + "grad_norm": 0.9950597161448561, + "learning_rate": 8.886034245726254e-07, + "loss": 0.6038233041763306, + "step": 4851 + }, + { + "epoch": 1.1179723502304149, + "grad_norm": 1.1171540360532777, + "learning_rate": 8.882248200682881e-07, + "loss": 0.8186997771263123, + "step": 4852 + }, + { + "epoch": 1.1182027649769586, + "grad_norm": 1.2436642718372632, + "learning_rate": 8.878462317879702e-07, + "loss": 0.789948582649231, + "step": 4853 + }, + { + "epoch": 1.1184331797235023, + "grad_norm": 1.0789321556804603, + "learning_rate": 8.87467659786623e-07, + "loss": 0.7543652057647705, + "step": 4854 + }, + { + "epoch": 1.118663594470046, + "grad_norm": 1.0717127208024606, + "learning_rate": 8.870891041191963e-07, + "loss": 0.5985269546508789, + "step": 4855 + }, + { + "epoch": 1.1188940092165898, + "grad_norm": 1.109115113465042, + "learning_rate": 8.867105648406364e-07, + "loss": 0.7676643133163452, + "step": 4856 + }, + { + "epoch": 1.1191244239631337, + "grad_norm": 1.0078052507528568, + "learning_rate": 8.863320420058881e-07, + "loss": 0.7317303419113159, + "step": 4857 + }, + { + "epoch": 1.1193548387096774, + "grad_norm": 1.117240479042085, + "learning_rate": 8.859535356698936e-07, + "loss": 0.8357843160629272, + "step": 4858 + }, + { + "epoch": 1.1195852534562212, + "grad_norm": 1.2827717071860176, + "learning_rate": 8.855750458875923e-07, + "loss": 0.7149945497512817, + "step": 4859 + }, + { + "epoch": 1.119815668202765, + "grad_norm": 1.1258754685876486, + "learning_rate": 8.851965727139214e-07, + "loss": 0.7059169411659241, + "step": 4860 + }, + { + "epoch": 1.1200460829493089, + "grad_norm": 1.0779991100813224, + "learning_rate": 8.848181162038163e-07, + "loss": 0.7530190944671631, + "step": 4861 + }, + { + "epoch": 1.1202764976958526, + "grad_norm": 1.12578616970897, + "learning_rate": 8.844396764122092e-07, + "loss": 0.808814287185669, + "step": 4862 + }, + { + "epoch": 1.1205069124423963, + "grad_norm": 1.174668121226261, + "learning_rate": 8.840612533940295e-07, + "loss": 0.7205604910850525, + "step": 4863 + }, + { + "epoch": 1.12073732718894, + "grad_norm": 1.0284636891818573, + "learning_rate": 8.83682847204206e-07, + "loss": 0.7493274211883545, + "step": 4864 + }, + { + "epoch": 1.120967741935484, + "grad_norm": 1.1974475439930412, + "learning_rate": 8.833044578976631e-07, + "loss": 0.8115849494934082, + "step": 4865 + }, + { + "epoch": 1.1211981566820277, + "grad_norm": 1.2224514970634248, + "learning_rate": 8.829260855293237e-07, + "loss": 0.8188419342041016, + "step": 4866 + }, + { + "epoch": 1.1214285714285714, + "grad_norm": 1.372584236180193, + "learning_rate": 8.82547730154108e-07, + "loss": 0.6152349710464478, + "step": 4867 + }, + { + "epoch": 1.1216589861751152, + "grad_norm": 0.9364210771252817, + "learning_rate": 8.821693918269333e-07, + "loss": 0.7629969120025635, + "step": 4868 + }, + { + "epoch": 1.121889400921659, + "grad_norm": 1.0637191210851928, + "learning_rate": 8.81791070602716e-07, + "loss": 0.7063733339309692, + "step": 4869 + }, + { + "epoch": 1.1221198156682028, + "grad_norm": 1.2221996591019166, + "learning_rate": 8.814127665363682e-07, + "loss": 0.729676365852356, + "step": 4870 + }, + { + "epoch": 1.1223502304147466, + "grad_norm": 1.2363948838699006, + "learning_rate": 8.810344796827999e-07, + "loss": 0.8188877105712891, + "step": 4871 + }, + { + "epoch": 1.1225806451612903, + "grad_norm": 1.4364824515163135, + "learning_rate": 8.806562100969199e-07, + "loss": 0.70793217420578, + "step": 4872 + }, + { + "epoch": 1.122811059907834, + "grad_norm": 1.2471671753090219, + "learning_rate": 8.802779578336329e-07, + "loss": 0.8086484670639038, + "step": 4873 + }, + { + "epoch": 1.123041474654378, + "grad_norm": 1.209058465827679, + "learning_rate": 8.798997229478417e-07, + "loss": 0.8954081535339355, + "step": 4874 + }, + { + "epoch": 1.1232718894009217, + "grad_norm": 1.0352094557860352, + "learning_rate": 8.795215054944469e-07, + "loss": 0.6615205407142639, + "step": 4875 + }, + { + "epoch": 1.1235023041474654, + "grad_norm": 1.3182700744777898, + "learning_rate": 8.79143305528346e-07, + "loss": 0.6851116418838501, + "step": 4876 + }, + { + "epoch": 1.1237327188940092, + "grad_norm": 0.9311237252586447, + "learning_rate": 8.787651231044342e-07, + "loss": 0.7594672441482544, + "step": 4877 + }, + { + "epoch": 1.123963133640553, + "grad_norm": 1.2505187148095604, + "learning_rate": 8.783869582776044e-07, + "loss": 0.7170572280883789, + "step": 4878 + }, + { + "epoch": 1.1241935483870968, + "grad_norm": 1.1244851690255748, + "learning_rate": 8.780088111027467e-07, + "loss": 0.9139137864112854, + "step": 4879 + }, + { + "epoch": 1.1244239631336406, + "grad_norm": 1.2468380143920514, + "learning_rate": 8.776306816347482e-07, + "loss": 0.8716791868209839, + "step": 4880 + }, + { + "epoch": 1.1246543778801843, + "grad_norm": 1.5043743610246187, + "learning_rate": 8.772525699284946e-07, + "loss": 0.840330958366394, + "step": 4881 + }, + { + "epoch": 1.124884792626728, + "grad_norm": 1.28802116274467, + "learning_rate": 8.768744760388681e-07, + "loss": 0.7713445425033569, + "step": 4882 + }, + { + "epoch": 1.125115207373272, + "grad_norm": 1.2058132743835892, + "learning_rate": 8.764964000207479e-07, + "loss": 0.8964767456054688, + "step": 4883 + }, + { + "epoch": 1.1253456221198157, + "grad_norm": 1.12361515551762, + "learning_rate": 8.761183419290121e-07, + "loss": 0.8038421869277954, + "step": 4884 + }, + { + "epoch": 1.1255760368663594, + "grad_norm": 0.7722654284456119, + "learning_rate": 8.757403018185351e-07, + "loss": 0.6601011753082275, + "step": 4885 + }, + { + "epoch": 1.1258064516129032, + "grad_norm": 0.8011265369746955, + "learning_rate": 8.753622797441885e-07, + "loss": 0.8226664066314697, + "step": 4886 + }, + { + "epoch": 1.1260368663594469, + "grad_norm": 1.0633366554284305, + "learning_rate": 8.749842757608422e-07, + "loss": 0.7062248587608337, + "step": 4887 + }, + { + "epoch": 1.1262672811059908, + "grad_norm": 1.318395948514478, + "learning_rate": 8.746062899233628e-07, + "loss": 0.8642051815986633, + "step": 4888 + }, + { + "epoch": 1.1264976958525346, + "grad_norm": 1.2332349128972684, + "learning_rate": 8.74228322286614e-07, + "loss": 0.8194048404693604, + "step": 4889 + }, + { + "epoch": 1.1267281105990783, + "grad_norm": 1.121678775220638, + "learning_rate": 8.738503729054583e-07, + "loss": 0.6957820653915405, + "step": 4890 + }, + { + "epoch": 1.1269585253456222, + "grad_norm": 0.9775692035561586, + "learning_rate": 8.734724418347537e-07, + "loss": 0.8107770681381226, + "step": 4891 + }, + { + "epoch": 1.127188940092166, + "grad_norm": 1.1508754542191086, + "learning_rate": 8.730945291293563e-07, + "loss": 0.7727551460266113, + "step": 4892 + }, + { + "epoch": 1.1274193548387097, + "grad_norm": 1.1347047929449647, + "learning_rate": 8.727166348441207e-07, + "loss": 0.7389936447143555, + "step": 4893 + }, + { + "epoch": 1.1276497695852534, + "grad_norm": 1.2733389095695957, + "learning_rate": 8.723387590338964e-07, + "loss": 0.7666463851928711, + "step": 4894 + }, + { + "epoch": 1.1278801843317972, + "grad_norm": 1.1990629153183452, + "learning_rate": 8.719609017535328e-07, + "loss": 0.7795453071594238, + "step": 4895 + }, + { + "epoch": 1.128110599078341, + "grad_norm": 1.1062968437903737, + "learning_rate": 8.715830630578746e-07, + "loss": 0.8560752272605896, + "step": 4896 + }, + { + "epoch": 1.1283410138248848, + "grad_norm": 1.2251043883259816, + "learning_rate": 8.712052430017645e-07, + "loss": 0.7574455738067627, + "step": 4897 + }, + { + "epoch": 1.1285714285714286, + "grad_norm": 1.3025894471719623, + "learning_rate": 8.708274416400432e-07, + "loss": 0.8017276525497437, + "step": 4898 + }, + { + "epoch": 1.1288018433179723, + "grad_norm": 0.9942840399227726, + "learning_rate": 8.704496590275477e-07, + "loss": 0.7046157121658325, + "step": 4899 + }, + { + "epoch": 1.129032258064516, + "grad_norm": 1.187705347283351, + "learning_rate": 8.700718952191124e-07, + "loss": 0.7352035641670227, + "step": 4900 + }, + { + "epoch": 1.12926267281106, + "grad_norm": 0.9471130432852718, + "learning_rate": 8.696941502695698e-07, + "loss": 0.6444690227508545, + "step": 4901 + }, + { + "epoch": 1.1294930875576037, + "grad_norm": 1.0628821586759927, + "learning_rate": 8.69316424233749e-07, + "loss": 0.7909440994262695, + "step": 4902 + }, + { + "epoch": 1.1297235023041474, + "grad_norm": 0.9483928902743061, + "learning_rate": 8.689387171664756e-07, + "loss": 0.646790087223053, + "step": 4903 + }, + { + "epoch": 1.1299539170506911, + "grad_norm": 1.2796319408131067, + "learning_rate": 8.685610291225744e-07, + "loss": 0.786831796169281, + "step": 4904 + }, + { + "epoch": 1.130184331797235, + "grad_norm": 1.143272972798168, + "learning_rate": 8.681833601568657e-07, + "loss": 0.8004348278045654, + "step": 4905 + }, + { + "epoch": 1.1304147465437788, + "grad_norm": 0.996600703731369, + "learning_rate": 8.678057103241677e-07, + "loss": 0.6846532821655273, + "step": 4906 + }, + { + "epoch": 1.1306451612903226, + "grad_norm": 1.299426572962062, + "learning_rate": 8.67428079679296e-07, + "loss": 0.7555707693099976, + "step": 4907 + }, + { + "epoch": 1.1308755760368663, + "grad_norm": 1.3809719247833205, + "learning_rate": 8.67050468277063e-07, + "loss": 0.852725625038147, + "step": 4908 + }, + { + "epoch": 1.1311059907834102, + "grad_norm": 0.9844151846464619, + "learning_rate": 8.666728761722782e-07, + "loss": 0.6990044713020325, + "step": 4909 + }, + { + "epoch": 1.131336405529954, + "grad_norm": 1.223366973696945, + "learning_rate": 8.662953034197493e-07, + "loss": 0.8050999641418457, + "step": 4910 + }, + { + "epoch": 1.1315668202764977, + "grad_norm": 1.3085197840977536, + "learning_rate": 8.659177500742802e-07, + "loss": 0.8169291019439697, + "step": 4911 + }, + { + "epoch": 1.1317972350230414, + "grad_norm": 1.081294035300873, + "learning_rate": 8.655402161906716e-07, + "loss": 0.7814679145812988, + "step": 4912 + }, + { + "epoch": 1.1320276497695851, + "grad_norm": 1.237970773045493, + "learning_rate": 8.651627018237231e-07, + "loss": 0.6734834313392639, + "step": 4913 + }, + { + "epoch": 1.132258064516129, + "grad_norm": 1.1143770605215586, + "learning_rate": 8.647852070282299e-07, + "loss": 0.8765416145324707, + "step": 4914 + }, + { + "epoch": 1.1324884792626728, + "grad_norm": 1.3797966848789986, + "learning_rate": 8.644077318589847e-07, + "loss": 1.0023764371871948, + "step": 4915 + }, + { + "epoch": 1.1327188940092165, + "grad_norm": 1.0387287080137257, + "learning_rate": 8.64030276370778e-07, + "loss": 0.7561393976211548, + "step": 4916 + }, + { + "epoch": 1.1329493087557603, + "grad_norm": 1.123376400728965, + "learning_rate": 8.636528406183961e-07, + "loss": 0.8252062797546387, + "step": 4917 + }, + { + "epoch": 1.1331797235023042, + "grad_norm": 1.3939443114820729, + "learning_rate": 8.632754246566246e-07, + "loss": 0.7598097324371338, + "step": 4918 + }, + { + "epoch": 1.133410138248848, + "grad_norm": 0.8823184534346743, + "learning_rate": 8.628980285402438e-07, + "loss": 0.6113640069961548, + "step": 4919 + }, + { + "epoch": 1.1336405529953917, + "grad_norm": 1.096652563873467, + "learning_rate": 8.625206523240325e-07, + "loss": 0.7457853555679321, + "step": 4920 + }, + { + "epoch": 1.1338709677419354, + "grad_norm": 1.0304826450193199, + "learning_rate": 8.62143296062767e-07, + "loss": 0.7334161996841431, + "step": 4921 + }, + { + "epoch": 1.1341013824884794, + "grad_norm": 1.1383631487720753, + "learning_rate": 8.617659598112195e-07, + "loss": 0.7446962594985962, + "step": 4922 + }, + { + "epoch": 1.134331797235023, + "grad_norm": 0.9360514056176105, + "learning_rate": 8.613886436241594e-07, + "loss": 0.7074497938156128, + "step": 4923 + }, + { + "epoch": 1.1345622119815668, + "grad_norm": 0.9945384740922374, + "learning_rate": 8.610113475563547e-07, + "loss": 0.6728851795196533, + "step": 4924 + }, + { + "epoch": 1.1347926267281105, + "grad_norm": 1.0533766436674836, + "learning_rate": 8.606340716625689e-07, + "loss": 0.7732793092727661, + "step": 4925 + }, + { + "epoch": 1.1350230414746543, + "grad_norm": 1.2301857240081557, + "learning_rate": 8.60256815997563e-07, + "loss": 0.7514671683311462, + "step": 4926 + }, + { + "epoch": 1.1352534562211982, + "grad_norm": 1.2507291163181513, + "learning_rate": 8.598795806160952e-07, + "loss": 0.7824795842170715, + "step": 4927 + }, + { + "epoch": 1.135483870967742, + "grad_norm": 1.1585997268920079, + "learning_rate": 8.59502365572921e-07, + "loss": 0.789236307144165, + "step": 4928 + }, + { + "epoch": 1.1357142857142857, + "grad_norm": 1.1796078109098491, + "learning_rate": 8.591251709227919e-07, + "loss": 0.7005175948143005, + "step": 4929 + }, + { + "epoch": 1.1359447004608294, + "grad_norm": 1.2299124062921447, + "learning_rate": 8.587479967204582e-07, + "loss": 0.7851300239562988, + "step": 4930 + }, + { + "epoch": 1.1361751152073734, + "grad_norm": 1.5129438725714193, + "learning_rate": 8.583708430206658e-07, + "loss": 0.8901405334472656, + "step": 4931 + }, + { + "epoch": 1.136405529953917, + "grad_norm": 1.1049343524856345, + "learning_rate": 8.579937098781576e-07, + "loss": 0.8118528127670288, + "step": 4932 + }, + { + "epoch": 1.1366359447004608, + "grad_norm": 1.0631974751851168, + "learning_rate": 8.57616597347675e-07, + "loss": 0.6500028371810913, + "step": 4933 + }, + { + "epoch": 1.1368663594470045, + "grad_norm": 1.057066415615051, + "learning_rate": 8.572395054839547e-07, + "loss": 0.7752922773361206, + "step": 4934 + }, + { + "epoch": 1.1370967741935485, + "grad_norm": 1.124364781444334, + "learning_rate": 8.568624343417309e-07, + "loss": 0.7346245050430298, + "step": 4935 + }, + { + "epoch": 1.1373271889400922, + "grad_norm": 1.4547001781507483, + "learning_rate": 8.564853839757356e-07, + "loss": 0.9249104261398315, + "step": 4936 + }, + { + "epoch": 1.137557603686636, + "grad_norm": 1.0350864816884677, + "learning_rate": 8.561083544406965e-07, + "loss": 0.7407078742980957, + "step": 4937 + }, + { + "epoch": 1.1377880184331797, + "grad_norm": 1.197156559440129, + "learning_rate": 8.557313457913393e-07, + "loss": 0.7615865468978882, + "step": 4938 + }, + { + "epoch": 1.1380184331797234, + "grad_norm": 1.2125718427071739, + "learning_rate": 8.553543580823866e-07, + "loss": 0.757561445236206, + "step": 4939 + }, + { + "epoch": 1.1382488479262673, + "grad_norm": 1.1468001082336654, + "learning_rate": 8.549773913685572e-07, + "loss": 0.7130411863327026, + "step": 4940 + }, + { + "epoch": 1.138479262672811, + "grad_norm": 1.1282357144069963, + "learning_rate": 8.54600445704567e-07, + "loss": 0.7507551312446594, + "step": 4941 + }, + { + "epoch": 1.1387096774193548, + "grad_norm": 1.0556143227749322, + "learning_rate": 8.542235211451301e-07, + "loss": 0.896443247795105, + "step": 4942 + }, + { + "epoch": 1.1389400921658985, + "grad_norm": 1.145222677509159, + "learning_rate": 8.538466177449557e-07, + "loss": 0.7530815601348877, + "step": 4943 + }, + { + "epoch": 1.1391705069124425, + "grad_norm": 1.2481258172783056, + "learning_rate": 8.534697355587517e-07, + "loss": 0.8730431795120239, + "step": 4944 + }, + { + "epoch": 1.1394009216589862, + "grad_norm": 1.3010516024158107, + "learning_rate": 8.530928746412216e-07, + "loss": 0.6452720165252686, + "step": 4945 + }, + { + "epoch": 1.13963133640553, + "grad_norm": 1.1712957128451178, + "learning_rate": 8.527160350470661e-07, + "loss": 0.7679018974304199, + "step": 4946 + }, + { + "epoch": 1.1398617511520737, + "grad_norm": 1.402874429077297, + "learning_rate": 8.523392168309832e-07, + "loss": 0.8186824321746826, + "step": 4947 + }, + { + "epoch": 1.1400921658986176, + "grad_norm": 1.1669467278440648, + "learning_rate": 8.519624200476676e-07, + "loss": 0.666642427444458, + "step": 4948 + }, + { + "epoch": 1.1403225806451613, + "grad_norm": 1.0160881327834055, + "learning_rate": 8.515856447518104e-07, + "loss": 0.7478682994842529, + "step": 4949 + }, + { + "epoch": 1.140552995391705, + "grad_norm": 1.2340329971083113, + "learning_rate": 8.512088909981007e-07, + "loss": 0.7527793645858765, + "step": 4950 + }, + { + "epoch": 1.1407834101382488, + "grad_norm": 1.136863530366948, + "learning_rate": 8.508321588412235e-07, + "loss": 0.7614094018936157, + "step": 4951 + }, + { + "epoch": 1.1410138248847925, + "grad_norm": 1.2371366016065355, + "learning_rate": 8.504554483358605e-07, + "loss": 0.8294994831085205, + "step": 4952 + }, + { + "epoch": 1.1412442396313365, + "grad_norm": 1.4759487382386114, + "learning_rate": 8.500787595366919e-07, + "loss": 0.8900095224380493, + "step": 4953 + }, + { + "epoch": 1.1414746543778802, + "grad_norm": 1.0721192735972314, + "learning_rate": 8.497020924983926e-07, + "loss": 0.8403744697570801, + "step": 4954 + }, + { + "epoch": 1.141705069124424, + "grad_norm": 1.0449510164412683, + "learning_rate": 8.493254472756355e-07, + "loss": 0.7046208381652832, + "step": 4955 + }, + { + "epoch": 1.1419354838709677, + "grad_norm": 1.3018714779233174, + "learning_rate": 8.489488239230904e-07, + "loss": 0.8226789832115173, + "step": 4956 + }, + { + "epoch": 1.1421658986175116, + "grad_norm": 1.058902427650911, + "learning_rate": 8.485722224954236e-07, + "loss": 0.7248969674110413, + "step": 4957 + }, + { + "epoch": 1.1423963133640553, + "grad_norm": 1.1327549620980084, + "learning_rate": 8.481956430472979e-07, + "loss": 0.8116840124130249, + "step": 4958 + }, + { + "epoch": 1.142626728110599, + "grad_norm": 1.062622286893391, + "learning_rate": 8.478190856333739e-07, + "loss": 0.7534138560295105, + "step": 4959 + }, + { + "epoch": 1.1428571428571428, + "grad_norm": 1.3427980825750856, + "learning_rate": 8.474425503083082e-07, + "loss": 0.8945306539535522, + "step": 4960 + }, + { + "epoch": 1.1430875576036867, + "grad_norm": 1.1592346473165394, + "learning_rate": 8.47066037126754e-07, + "loss": 0.7554503083229065, + "step": 4961 + }, + { + "epoch": 1.1433179723502305, + "grad_norm": 1.4596388821753403, + "learning_rate": 8.466895461433625e-07, + "loss": 0.832726776599884, + "step": 4962 + }, + { + "epoch": 1.1435483870967742, + "grad_norm": 1.250046955776058, + "learning_rate": 8.463130774127804e-07, + "loss": 0.8312773704528809, + "step": 4963 + }, + { + "epoch": 1.143778801843318, + "grad_norm": 0.9153601791246997, + "learning_rate": 8.459366309896512e-07, + "loss": 0.6484537124633789, + "step": 4964 + }, + { + "epoch": 1.1440092165898617, + "grad_norm": 1.2863432770713337, + "learning_rate": 8.455602069286165e-07, + "loss": 0.9216604828834534, + "step": 4965 + }, + { + "epoch": 1.1442396313364056, + "grad_norm": 1.134985678431753, + "learning_rate": 8.451838052843131e-07, + "loss": 0.6213096380233765, + "step": 4966 + }, + { + "epoch": 1.1444700460829493, + "grad_norm": 0.9562822723791001, + "learning_rate": 8.448074261113756e-07, + "loss": 0.6873677968978882, + "step": 4967 + }, + { + "epoch": 1.144700460829493, + "grad_norm": 1.215560824144924, + "learning_rate": 8.444310694644348e-07, + "loss": 0.7883448600769043, + "step": 4968 + }, + { + "epoch": 1.1449308755760368, + "grad_norm": 1.1944176371651494, + "learning_rate": 8.440547353981178e-07, + "loss": 0.724172830581665, + "step": 4969 + }, + { + "epoch": 1.1451612903225807, + "grad_norm": 1.0792006702141475, + "learning_rate": 8.4367842396705e-07, + "loss": 0.7115252017974854, + "step": 4970 + }, + { + "epoch": 1.1453917050691245, + "grad_norm": 1.0823773323138404, + "learning_rate": 8.433021352258521e-07, + "loss": 0.7165110111236572, + "step": 4971 + }, + { + "epoch": 1.1456221198156682, + "grad_norm": 1.0874360604645514, + "learning_rate": 8.429258692291413e-07, + "loss": 0.7563315629959106, + "step": 4972 + }, + { + "epoch": 1.145852534562212, + "grad_norm": 1.1334099478279698, + "learning_rate": 8.425496260315331e-07, + "loss": 0.7528449892997742, + "step": 4973 + }, + { + "epoch": 1.1460829493087559, + "grad_norm": 1.1141426795021205, + "learning_rate": 8.421734056876383e-07, + "loss": 0.7976171970367432, + "step": 4974 + }, + { + "epoch": 1.1463133640552996, + "grad_norm": 1.020985144100356, + "learning_rate": 8.417972082520644e-07, + "loss": 0.7498095035552979, + "step": 4975 + }, + { + "epoch": 1.1465437788018433, + "grad_norm": 1.3446642320448154, + "learning_rate": 8.414210337794165e-07, + "loss": 0.9568856954574585, + "step": 4976 + }, + { + "epoch": 1.146774193548387, + "grad_norm": 0.9499457055768262, + "learning_rate": 8.410448823242957e-07, + "loss": 0.6402908563613892, + "step": 4977 + }, + { + "epoch": 1.1470046082949308, + "grad_norm": 1.1759709167305108, + "learning_rate": 8.406687539412995e-07, + "loss": 0.8224657773971558, + "step": 4978 + }, + { + "epoch": 1.1472350230414747, + "grad_norm": 1.2886598107348421, + "learning_rate": 8.402926486850229e-07, + "loss": 0.7804544568061829, + "step": 4979 + }, + { + "epoch": 1.1474654377880185, + "grad_norm": 1.1861127295236977, + "learning_rate": 8.39916566610057e-07, + "loss": 0.7920527458190918, + "step": 4980 + }, + { + "epoch": 1.1476958525345622, + "grad_norm": 1.1244888328051699, + "learning_rate": 8.395405077709891e-07, + "loss": 0.7672078609466553, + "step": 4981 + }, + { + "epoch": 1.147926267281106, + "grad_norm": 1.2427545332028853, + "learning_rate": 8.391644722224047e-07, + "loss": 0.6997950077056885, + "step": 4982 + }, + { + "epoch": 1.1481566820276499, + "grad_norm": 1.057637628401912, + "learning_rate": 8.38788460018884e-07, + "loss": 0.7754349708557129, + "step": 4983 + }, + { + "epoch": 1.1483870967741936, + "grad_norm": 1.1458978330134115, + "learning_rate": 8.384124712150046e-07, + "loss": 0.706238329410553, + "step": 4984 + }, + { + "epoch": 1.1486175115207373, + "grad_norm": 0.8874927618348325, + "learning_rate": 8.380365058653415e-07, + "loss": 0.7115224599838257, + "step": 4985 + }, + { + "epoch": 1.148847926267281, + "grad_norm": 1.349182229007694, + "learning_rate": 8.376605640244652e-07, + "loss": 0.9026098847389221, + "step": 4986 + }, + { + "epoch": 1.149078341013825, + "grad_norm": 1.359066441839043, + "learning_rate": 8.372846457469428e-07, + "loss": 0.9123632311820984, + "step": 4987 + }, + { + "epoch": 1.1493087557603687, + "grad_norm": 1.1389830084868187, + "learning_rate": 8.369087510873389e-07, + "loss": 0.8365681171417236, + "step": 4988 + }, + { + "epoch": 1.1495391705069125, + "grad_norm": 1.1572327597453433, + "learning_rate": 8.36532880100214e-07, + "loss": 0.7506389617919922, + "step": 4989 + }, + { + "epoch": 1.1497695852534562, + "grad_norm": 1.1932866122784214, + "learning_rate": 8.361570328401246e-07, + "loss": 0.7736936807632446, + "step": 4990 + }, + { + "epoch": 1.15, + "grad_norm": 1.0939095427412457, + "learning_rate": 8.357812093616254e-07, + "loss": 0.7364238500595093, + "step": 4991 + }, + { + "epoch": 1.1502304147465439, + "grad_norm": 1.154457809524142, + "learning_rate": 8.354054097192659e-07, + "loss": 0.8588067293167114, + "step": 4992 + }, + { + "epoch": 1.1504608294930876, + "grad_norm": 1.0040260335609983, + "learning_rate": 8.350296339675938e-07, + "loss": 0.777319073677063, + "step": 4993 + }, + { + "epoch": 1.1506912442396313, + "grad_norm": 1.2472613338245313, + "learning_rate": 8.346538821611517e-07, + "loss": 0.6695454716682434, + "step": 4994 + }, + { + "epoch": 1.150921658986175, + "grad_norm": 1.1333204343634593, + "learning_rate": 8.342781543544796e-07, + "loss": 0.7785383462905884, + "step": 4995 + }, + { + "epoch": 1.1511520737327188, + "grad_norm": 1.2063502081148214, + "learning_rate": 8.339024506021143e-07, + "loss": 0.7386239767074585, + "step": 4996 + }, + { + "epoch": 1.1513824884792627, + "grad_norm": 1.015973129089863, + "learning_rate": 8.335267709585884e-07, + "loss": 0.8044750690460205, + "step": 4997 + }, + { + "epoch": 1.1516129032258065, + "grad_norm": 0.991689333823338, + "learning_rate": 8.331511154784307e-07, + "loss": 0.6925652623176575, + "step": 4998 + }, + { + "epoch": 1.1518433179723502, + "grad_norm": 1.1362021503644928, + "learning_rate": 8.327754842161684e-07, + "loss": 0.7906935214996338, + "step": 4999 + }, + { + "epoch": 1.1520737327188941, + "grad_norm": 1.0865966340855062, + "learning_rate": 8.323998772263231e-07, + "loss": 0.7131960988044739, + "step": 5000 + }, + { + "epoch": 1.1523041474654379, + "grad_norm": 1.0459163670419733, + "learning_rate": 8.320242945634132e-07, + "loss": 0.8412370085716248, + "step": 5001 + }, + { + "epoch": 1.1525345622119816, + "grad_norm": 1.219248495471204, + "learning_rate": 8.316487362819551e-07, + "loss": 0.7800952792167664, + "step": 5002 + }, + { + "epoch": 1.1527649769585253, + "grad_norm": 1.2269188284281454, + "learning_rate": 8.312732024364602e-07, + "loss": 0.8620247840881348, + "step": 5003 + }, + { + "epoch": 1.152995391705069, + "grad_norm": 1.1576962368399284, + "learning_rate": 8.30897693081436e-07, + "loss": 0.7551721334457397, + "step": 5004 + }, + { + "epoch": 1.153225806451613, + "grad_norm": 1.1081098689134552, + "learning_rate": 8.305222082713882e-07, + "loss": 0.8510593175888062, + "step": 5005 + }, + { + "epoch": 1.1534562211981567, + "grad_norm": 1.0356186889640762, + "learning_rate": 8.301467480608176e-07, + "loss": 0.6503845453262329, + "step": 5006 + }, + { + "epoch": 1.1536866359447004, + "grad_norm": 1.1593829978588668, + "learning_rate": 8.297713125042212e-07, + "loss": 0.7729237079620361, + "step": 5007 + }, + { + "epoch": 1.1539170506912442, + "grad_norm": 1.0812796919286354, + "learning_rate": 8.293959016560939e-07, + "loss": 0.77802574634552, + "step": 5008 + }, + { + "epoch": 1.154147465437788, + "grad_norm": 0.9915519400035699, + "learning_rate": 8.290205155709256e-07, + "loss": 0.7977825999259949, + "step": 5009 + }, + { + "epoch": 1.1543778801843319, + "grad_norm": 1.1128731733324948, + "learning_rate": 8.286451543032027e-07, + "loss": 0.7479745149612427, + "step": 5010 + }, + { + "epoch": 1.1546082949308756, + "grad_norm": 1.0554376798438097, + "learning_rate": 8.282698179074092e-07, + "loss": 0.7631532549858093, + "step": 5011 + }, + { + "epoch": 1.1548387096774193, + "grad_norm": 1.1424098237872247, + "learning_rate": 8.278945064380243e-07, + "loss": 0.7437061071395874, + "step": 5012 + }, + { + "epoch": 1.1550691244239633, + "grad_norm": 1.2208599961881346, + "learning_rate": 8.275192199495236e-07, + "loss": 0.9334282875061035, + "step": 5013 + }, + { + "epoch": 1.155299539170507, + "grad_norm": 1.1846438304674103, + "learning_rate": 8.2714395849638e-07, + "loss": 0.7119227647781372, + "step": 5014 + }, + { + "epoch": 1.1555299539170507, + "grad_norm": 1.202224273678675, + "learning_rate": 8.267687221330619e-07, + "loss": 0.8335816860198975, + "step": 5015 + }, + { + "epoch": 1.1557603686635944, + "grad_norm": 1.290989413518125, + "learning_rate": 8.263935109140347e-07, + "loss": 0.6130940914154053, + "step": 5016 + }, + { + "epoch": 1.1559907834101382, + "grad_norm": 1.1118999574659398, + "learning_rate": 8.260183248937595e-07, + "loss": 0.8223903179168701, + "step": 5017 + }, + { + "epoch": 1.1562211981566821, + "grad_norm": 1.1042026567968168, + "learning_rate": 8.256431641266938e-07, + "loss": 0.8024790287017822, + "step": 5018 + }, + { + "epoch": 1.1564516129032258, + "grad_norm": 1.2308316211864536, + "learning_rate": 8.252680286672924e-07, + "loss": 0.7425345182418823, + "step": 5019 + }, + { + "epoch": 1.1566820276497696, + "grad_norm": 0.9907420981370885, + "learning_rate": 8.248929185700053e-07, + "loss": 0.7729727029800415, + "step": 5020 + }, + { + "epoch": 1.1569124423963133, + "grad_norm": 1.096476255015683, + "learning_rate": 8.245178338892788e-07, + "loss": 0.8451874256134033, + "step": 5021 + }, + { + "epoch": 1.157142857142857, + "grad_norm": 1.1584589365926052, + "learning_rate": 8.241427746795569e-07, + "loss": 0.8666542768478394, + "step": 5022 + }, + { + "epoch": 1.157373271889401, + "grad_norm": 1.2897904410488261, + "learning_rate": 8.237677409952784e-07, + "loss": 0.740352988243103, + "step": 5023 + }, + { + "epoch": 1.1576036866359447, + "grad_norm": 0.9937724952342799, + "learning_rate": 8.233927328908788e-07, + "loss": 0.6325985193252563, + "step": 5024 + }, + { + "epoch": 1.1578341013824884, + "grad_norm": 1.0099472902179978, + "learning_rate": 8.230177504207901e-07, + "loss": 0.8075892925262451, + "step": 5025 + }, + { + "epoch": 1.1580645161290322, + "grad_norm": 1.0459718249244707, + "learning_rate": 8.22642793639441e-07, + "loss": 0.7176432609558105, + "step": 5026 + }, + { + "epoch": 1.1582949308755761, + "grad_norm": 1.1804726429614583, + "learning_rate": 8.222678626012554e-07, + "loss": 0.7734829187393188, + "step": 5027 + }, + { + "epoch": 1.1585253456221198, + "grad_norm": 1.3220222245590558, + "learning_rate": 8.218929573606544e-07, + "loss": 0.8642655611038208, + "step": 5028 + }, + { + "epoch": 1.1587557603686636, + "grad_norm": 1.0337487495481472, + "learning_rate": 8.215180779720548e-07, + "loss": 0.7788450121879578, + "step": 5029 + }, + { + "epoch": 1.1589861751152073, + "grad_norm": 0.9361659768144168, + "learning_rate": 8.211432244898696e-07, + "loss": 0.7470313310623169, + "step": 5030 + }, + { + "epoch": 1.1592165898617512, + "grad_norm": 0.9907043815397547, + "learning_rate": 8.207683969685091e-07, + "loss": 0.7691675424575806, + "step": 5031 + }, + { + "epoch": 1.159447004608295, + "grad_norm": 0.9920310393320094, + "learning_rate": 8.203935954623783e-07, + "loss": 0.7060209512710571, + "step": 5032 + }, + { + "epoch": 1.1596774193548387, + "grad_norm": 1.189958639239752, + "learning_rate": 8.20018820025879e-07, + "loss": 0.7617488503456116, + "step": 5033 + }, + { + "epoch": 1.1599078341013824, + "grad_norm": 1.2174023482004634, + "learning_rate": 8.196440707134102e-07, + "loss": 0.7016350626945496, + "step": 5034 + }, + { + "epoch": 1.1601382488479262, + "grad_norm": 1.3407340114210469, + "learning_rate": 8.192693475793657e-07, + "loss": 0.8375445604324341, + "step": 5035 + }, + { + "epoch": 1.16036866359447, + "grad_norm": 1.2333127293881232, + "learning_rate": 8.188946506781359e-07, + "loss": 0.8903663158416748, + "step": 5036 + }, + { + "epoch": 1.1605990783410138, + "grad_norm": 1.1046448662682735, + "learning_rate": 8.18519980064108e-07, + "loss": 0.7613073587417603, + "step": 5037 + }, + { + "epoch": 1.1608294930875576, + "grad_norm": 1.2358045096315418, + "learning_rate": 8.181453357916649e-07, + "loss": 0.7443521022796631, + "step": 5038 + }, + { + "epoch": 1.1610599078341013, + "grad_norm": 1.0132222940739166, + "learning_rate": 8.17770717915185e-07, + "loss": 0.7986443042755127, + "step": 5039 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 1.1475221794766963, + "learning_rate": 8.173961264890447e-07, + "loss": 0.7128815650939941, + "step": 5040 + }, + { + "epoch": 1.161520737327189, + "grad_norm": 2.1353174029488593, + "learning_rate": 8.170215615676144e-07, + "loss": 0.7189117074012756, + "step": 5041 + }, + { + "epoch": 1.1617511520737327, + "grad_norm": 1.0970239097626442, + "learning_rate": 8.166470232052626e-07, + "loss": 0.8358731269836426, + "step": 5042 + }, + { + "epoch": 1.1619815668202764, + "grad_norm": 1.3103703595946257, + "learning_rate": 8.162725114563527e-07, + "loss": 0.7734829187393188, + "step": 5043 + }, + { + "epoch": 1.1622119815668204, + "grad_norm": 1.0836793655881298, + "learning_rate": 8.158980263752443e-07, + "loss": 0.842268705368042, + "step": 5044 + }, + { + "epoch": 1.162442396313364, + "grad_norm": 1.0953254817646525, + "learning_rate": 8.155235680162937e-07, + "loss": 0.7973036766052246, + "step": 5045 + }, + { + "epoch": 1.1626728110599078, + "grad_norm": 1.1431491680692596, + "learning_rate": 8.151491364338532e-07, + "loss": 0.743615984916687, + "step": 5046 + }, + { + "epoch": 1.1629032258064516, + "grad_norm": 1.2354800674331334, + "learning_rate": 8.147747316822705e-07, + "loss": 0.799458384513855, + "step": 5047 + }, + { + "epoch": 1.1631336405529953, + "grad_norm": 1.4365906916451476, + "learning_rate": 8.144003538158907e-07, + "loss": 0.8368128538131714, + "step": 5048 + }, + { + "epoch": 1.1633640552995392, + "grad_norm": 1.0543438991079201, + "learning_rate": 8.140260028890537e-07, + "loss": 0.8543322086334229, + "step": 5049 + }, + { + "epoch": 1.163594470046083, + "grad_norm": 1.4010693577495907, + "learning_rate": 8.136516789560957e-07, + "loss": 0.9586522579193115, + "step": 5050 + }, + { + "epoch": 1.1638248847926267, + "grad_norm": 1.0831898931931903, + "learning_rate": 8.132773820713505e-07, + "loss": 0.7781316041946411, + "step": 5051 + }, + { + "epoch": 1.1640552995391704, + "grad_norm": 1.1820241176000723, + "learning_rate": 8.129031122891459e-07, + "loss": 0.7726340293884277, + "step": 5052 + }, + { + "epoch": 1.1642857142857144, + "grad_norm": 1.2561245635498344, + "learning_rate": 8.125288696638064e-07, + "loss": 0.886093258857727, + "step": 5053 + }, + { + "epoch": 1.164516129032258, + "grad_norm": 1.1568232893052595, + "learning_rate": 8.121546542496538e-07, + "loss": 0.7896960973739624, + "step": 5054 + }, + { + "epoch": 1.1647465437788018, + "grad_norm": 1.066019166680275, + "learning_rate": 8.117804661010045e-07, + "loss": 0.8272452354431152, + "step": 5055 + }, + { + "epoch": 1.1649769585253456, + "grad_norm": 1.216096321256879, + "learning_rate": 8.11406305272171e-07, + "loss": 0.8452264070510864, + "step": 5056 + }, + { + "epoch": 1.1652073732718895, + "grad_norm": 1.1423033593169452, + "learning_rate": 8.11032171817463e-07, + "loss": 0.7973369359970093, + "step": 5057 + }, + { + "epoch": 1.1654377880184332, + "grad_norm": 0.9573952961126706, + "learning_rate": 8.10658065791185e-07, + "loss": 0.8045153617858887, + "step": 5058 + }, + { + "epoch": 1.165668202764977, + "grad_norm": 1.2070626820317865, + "learning_rate": 8.102839872476378e-07, + "loss": 0.8921254873275757, + "step": 5059 + }, + { + "epoch": 1.1658986175115207, + "grad_norm": 1.1196640968944265, + "learning_rate": 8.099099362411191e-07, + "loss": 0.7633669376373291, + "step": 5060 + }, + { + "epoch": 1.1661290322580644, + "grad_norm": 1.4676357149183228, + "learning_rate": 8.095359128259214e-07, + "loss": 0.9303205013275146, + "step": 5061 + }, + { + "epoch": 1.1663594470046084, + "grad_norm": 1.1532839170590041, + "learning_rate": 8.091619170563335e-07, + "loss": 0.867104709148407, + "step": 5062 + }, + { + "epoch": 1.166589861751152, + "grad_norm": 1.2071495700843942, + "learning_rate": 8.087879489866409e-07, + "loss": 0.8136844038963318, + "step": 5063 + }, + { + "epoch": 1.1668202764976958, + "grad_norm": 1.5482117252744063, + "learning_rate": 8.084140086711246e-07, + "loss": 0.9016939997673035, + "step": 5064 + }, + { + "epoch": 1.1670506912442395, + "grad_norm": 1.5795186850129557, + "learning_rate": 8.080400961640608e-07, + "loss": 0.8621236085891724, + "step": 5065 + }, + { + "epoch": 1.1672811059907835, + "grad_norm": 1.336449231038986, + "learning_rate": 8.076662115197234e-07, + "loss": 0.856648862361908, + "step": 5066 + }, + { + "epoch": 1.1675115207373272, + "grad_norm": 1.3107118910408024, + "learning_rate": 8.072923547923805e-07, + "loss": 0.7752784490585327, + "step": 5067 + }, + { + "epoch": 1.167741935483871, + "grad_norm": 1.3093385224686542, + "learning_rate": 8.069185260362974e-07, + "loss": 0.8573904037475586, + "step": 5068 + }, + { + "epoch": 1.1679723502304147, + "grad_norm": 1.1636599679682322, + "learning_rate": 8.065447253057347e-07, + "loss": 0.724372148513794, + "step": 5069 + }, + { + "epoch": 1.1682027649769586, + "grad_norm": 1.146758460237727, + "learning_rate": 8.061709526549486e-07, + "loss": 0.7428436875343323, + "step": 5070 + }, + { + "epoch": 1.1684331797235024, + "grad_norm": 1.273017047999111, + "learning_rate": 8.057972081381925e-07, + "loss": 0.8888595104217529, + "step": 5071 + }, + { + "epoch": 1.168663594470046, + "grad_norm": 0.9497262022662447, + "learning_rate": 8.054234918097146e-07, + "loss": 0.5753290057182312, + "step": 5072 + }, + { + "epoch": 1.1688940092165898, + "grad_norm": 1.037170746248572, + "learning_rate": 8.050498037237589e-07, + "loss": 0.6724086999893188, + "step": 5073 + }, + { + "epoch": 1.1691244239631335, + "grad_norm": 1.1504888789916348, + "learning_rate": 8.046761439345664e-07, + "loss": 0.7410751581192017, + "step": 5074 + }, + { + "epoch": 1.1693548387096775, + "grad_norm": 1.2658920818717738, + "learning_rate": 8.043025124963731e-07, + "loss": 0.8522979021072388, + "step": 5075 + }, + { + "epoch": 1.1695852534562212, + "grad_norm": 0.9918624551952729, + "learning_rate": 8.039289094634109e-07, + "loss": 0.6243441700935364, + "step": 5076 + }, + { + "epoch": 1.169815668202765, + "grad_norm": 1.113826210544245, + "learning_rate": 8.03555334889908e-07, + "loss": 0.9332150220870972, + "step": 5077 + }, + { + "epoch": 1.1700460829493087, + "grad_norm": 1.17170377289517, + "learning_rate": 8.031817888300883e-07, + "loss": 0.7620645761489868, + "step": 5078 + }, + { + "epoch": 1.1702764976958526, + "grad_norm": 1.2693395517069683, + "learning_rate": 8.028082713381708e-07, + "loss": 0.6983245015144348, + "step": 5079 + }, + { + "epoch": 1.1705069124423964, + "grad_norm": 1.049572082944252, + "learning_rate": 8.024347824683723e-07, + "loss": 0.6220129728317261, + "step": 5080 + }, + { + "epoch": 1.17073732718894, + "grad_norm": 1.0906919021349344, + "learning_rate": 8.020613222749034e-07, + "loss": 0.7363810539245605, + "step": 5081 + }, + { + "epoch": 1.1709677419354838, + "grad_norm": 1.1450127350480972, + "learning_rate": 8.016878908119713e-07, + "loss": 0.6864198446273804, + "step": 5082 + }, + { + "epoch": 1.1711981566820278, + "grad_norm": 1.061738817269073, + "learning_rate": 8.013144881337795e-07, + "loss": 0.758607029914856, + "step": 5083 + }, + { + "epoch": 1.1714285714285715, + "grad_norm": 1.038630253415404, + "learning_rate": 8.009411142945269e-07, + "loss": 0.7519336938858032, + "step": 5084 + }, + { + "epoch": 1.1716589861751152, + "grad_norm": 1.132431622302542, + "learning_rate": 8.005677693484076e-07, + "loss": 0.7681798934936523, + "step": 5085 + }, + { + "epoch": 1.171889400921659, + "grad_norm": 1.1022208744006678, + "learning_rate": 8.00194453349613e-07, + "loss": 0.6808522939682007, + "step": 5086 + }, + { + "epoch": 1.1721198156682027, + "grad_norm": 1.039877694159321, + "learning_rate": 7.99821166352329e-07, + "loss": 0.7373358607292175, + "step": 5087 + }, + { + "epoch": 1.1723502304147466, + "grad_norm": 1.0199898679930943, + "learning_rate": 7.994479084107374e-07, + "loss": 0.7272510528564453, + "step": 5088 + }, + { + "epoch": 1.1725806451612903, + "grad_norm": 1.2473385255320408, + "learning_rate": 7.990746795790166e-07, + "loss": 0.845584511756897, + "step": 5089 + }, + { + "epoch": 1.172811059907834, + "grad_norm": 1.188342902392479, + "learning_rate": 7.987014799113397e-07, + "loss": 0.7751157283782959, + "step": 5090 + }, + { + "epoch": 1.1730414746543778, + "grad_norm": 1.1193246813934836, + "learning_rate": 7.98328309461877e-07, + "loss": 0.679701566696167, + "step": 5091 + }, + { + "epoch": 1.1732718894009218, + "grad_norm": 1.1116687434739936, + "learning_rate": 7.979551682847932e-07, + "loss": 0.7630679607391357, + "step": 5092 + }, + { + "epoch": 1.1735023041474655, + "grad_norm": 1.0309555153446328, + "learning_rate": 7.975820564342487e-07, + "loss": 0.700912594795227, + "step": 5093 + }, + { + "epoch": 1.1737327188940092, + "grad_norm": 1.097867809116453, + "learning_rate": 7.972089739644012e-07, + "loss": 0.6789706945419312, + "step": 5094 + }, + { + "epoch": 1.173963133640553, + "grad_norm": 1.411041629986285, + "learning_rate": 7.968359209294027e-07, + "loss": 0.6744855642318726, + "step": 5095 + }, + { + "epoch": 1.1741935483870969, + "grad_norm": 1.060959542495881, + "learning_rate": 7.964628973834011e-07, + "loss": 0.7551798820495605, + "step": 5096 + }, + { + "epoch": 1.1744239631336406, + "grad_norm": 0.9743982939550204, + "learning_rate": 7.960899033805407e-07, + "loss": 0.711478054523468, + "step": 5097 + }, + { + "epoch": 1.1746543778801843, + "grad_norm": 1.1281696794434548, + "learning_rate": 7.95716938974961e-07, + "loss": 0.7464019060134888, + "step": 5098 + }, + { + "epoch": 1.174884792626728, + "grad_norm": 1.2269121334355921, + "learning_rate": 7.953440042207966e-07, + "loss": 0.7667930126190186, + "step": 5099 + }, + { + "epoch": 1.1751152073732718, + "grad_norm": 0.9314104563097803, + "learning_rate": 7.949710991721796e-07, + "loss": 0.7574796676635742, + "step": 5100 + }, + { + "epoch": 1.1753456221198157, + "grad_norm": 0.9285474016256665, + "learning_rate": 7.945982238832361e-07, + "loss": 0.6627304553985596, + "step": 5101 + }, + { + "epoch": 1.1755760368663595, + "grad_norm": 1.2503590742658475, + "learning_rate": 7.942253784080879e-07, + "loss": 0.6803916692733765, + "step": 5102 + }, + { + "epoch": 1.1758064516129032, + "grad_norm": 1.1622603764445048, + "learning_rate": 7.938525628008541e-07, + "loss": 0.7107337713241577, + "step": 5103 + }, + { + "epoch": 1.176036866359447, + "grad_norm": 1.0411872319848583, + "learning_rate": 7.934797771156481e-07, + "loss": 0.7669517993927002, + "step": 5104 + }, + { + "epoch": 1.1762672811059907, + "grad_norm": 1.185214338142044, + "learning_rate": 7.931070214065787e-07, + "loss": 0.7431854605674744, + "step": 5105 + }, + { + "epoch": 1.1764976958525346, + "grad_norm": 1.121798206744332, + "learning_rate": 7.927342957277512e-07, + "loss": 0.7778047323226929, + "step": 5106 + }, + { + "epoch": 1.1767281105990783, + "grad_norm": 1.1095356364162186, + "learning_rate": 7.923616001332666e-07, + "loss": 0.7759886980056763, + "step": 5107 + }, + { + "epoch": 1.176958525345622, + "grad_norm": 1.236811676128496, + "learning_rate": 7.919889346772206e-07, + "loss": 0.8010379076004028, + "step": 5108 + }, + { + "epoch": 1.177188940092166, + "grad_norm": 1.06629818182004, + "learning_rate": 7.916162994137055e-07, + "loss": 0.6671626567840576, + "step": 5109 + }, + { + "epoch": 1.1774193548387097, + "grad_norm": 1.3043487682811514, + "learning_rate": 7.912436943968088e-07, + "loss": 0.7521620988845825, + "step": 5110 + }, + { + "epoch": 1.1776497695852535, + "grad_norm": 1.0243889894502596, + "learning_rate": 7.908711196806131e-07, + "loss": 0.7626729011535645, + "step": 5111 + }, + { + "epoch": 1.1778801843317972, + "grad_norm": 1.2636422633100723, + "learning_rate": 7.904985753191979e-07, + "loss": 0.8247047066688538, + "step": 5112 + }, + { + "epoch": 1.178110599078341, + "grad_norm": 0.9958902943746148, + "learning_rate": 7.901260613666372e-07, + "loss": 0.6851831078529358, + "step": 5113 + }, + { + "epoch": 1.1783410138248849, + "grad_norm": 1.114469339271613, + "learning_rate": 7.897535778770003e-07, + "loss": 0.7752102613449097, + "step": 5114 + }, + { + "epoch": 1.1785714285714286, + "grad_norm": 1.0998339013097813, + "learning_rate": 7.893811249043537e-07, + "loss": 0.8885148167610168, + "step": 5115 + }, + { + "epoch": 1.1788018433179723, + "grad_norm": 1.3062040351627935, + "learning_rate": 7.890087025027579e-07, + "loss": 0.7530373334884644, + "step": 5116 + }, + { + "epoch": 1.179032258064516, + "grad_norm": 1.0400370692656624, + "learning_rate": 7.886363107262697e-07, + "loss": 0.7795672416687012, + "step": 5117 + }, + { + "epoch": 1.1792626728110598, + "grad_norm": 1.0719443222612952, + "learning_rate": 7.882639496289413e-07, + "loss": 0.7563966512680054, + "step": 5118 + }, + { + "epoch": 1.1794930875576037, + "grad_norm": 0.9799024359449507, + "learning_rate": 7.878916192648198e-07, + "loss": 0.7218793630599976, + "step": 5119 + }, + { + "epoch": 1.1797235023041475, + "grad_norm": 1.3292879414667447, + "learning_rate": 7.875193196879494e-07, + "loss": 0.8213250637054443, + "step": 5120 + }, + { + "epoch": 1.1799539170506912, + "grad_norm": 1.118163280715499, + "learning_rate": 7.871470509523685e-07, + "loss": 0.8134827613830566, + "step": 5121 + }, + { + "epoch": 1.1801843317972351, + "grad_norm": 0.9613119464109229, + "learning_rate": 7.867748131121109e-07, + "loss": 0.6135407090187073, + "step": 5122 + }, + { + "epoch": 1.1804147465437789, + "grad_norm": 1.2999694720426915, + "learning_rate": 7.864026062212073e-07, + "loss": 0.8110366463661194, + "step": 5123 + }, + { + "epoch": 1.1806451612903226, + "grad_norm": 0.9962674732824631, + "learning_rate": 7.860304303336827e-07, + "loss": 0.6723964214324951, + "step": 5124 + }, + { + "epoch": 1.1808755760368663, + "grad_norm": 1.2942490465484493, + "learning_rate": 7.856582855035577e-07, + "loss": 0.8308886885643005, + "step": 5125 + }, + { + "epoch": 1.18110599078341, + "grad_norm": 1.023999175845692, + "learning_rate": 7.852861717848488e-07, + "loss": 0.7960010766983032, + "step": 5126 + }, + { + "epoch": 1.181336405529954, + "grad_norm": 1.2456351777125307, + "learning_rate": 7.84914089231568e-07, + "loss": 0.7931640148162842, + "step": 5127 + }, + { + "epoch": 1.1815668202764977, + "grad_norm": 1.2288164842517166, + "learning_rate": 7.845420378977222e-07, + "loss": 0.762995719909668, + "step": 5128 + }, + { + "epoch": 1.1817972350230415, + "grad_norm": 1.373671152705427, + "learning_rate": 7.841700178373146e-07, + "loss": 0.9416301250457764, + "step": 5129 + }, + { + "epoch": 1.1820276497695852, + "grad_norm": 1.0032147289786453, + "learning_rate": 7.837980291043431e-07, + "loss": 0.7666923999786377, + "step": 5130 + }, + { + "epoch": 1.182258064516129, + "grad_norm": 1.1123898953678502, + "learning_rate": 7.834260717528012e-07, + "loss": 0.7668861150741577, + "step": 5131 + }, + { + "epoch": 1.1824884792626729, + "grad_norm": 1.1236616956881595, + "learning_rate": 7.830541458366786e-07, + "loss": 0.7576566934585571, + "step": 5132 + }, + { + "epoch": 1.1827188940092166, + "grad_norm": 1.0432406760791426, + "learning_rate": 7.826822514099595e-07, + "loss": 0.6288204193115234, + "step": 5133 + }, + { + "epoch": 1.1829493087557603, + "grad_norm": 1.2747953745069134, + "learning_rate": 7.823103885266236e-07, + "loss": 0.8332630395889282, + "step": 5134 + }, + { + "epoch": 1.1831797235023043, + "grad_norm": 1.3987532245853456, + "learning_rate": 7.819385572406469e-07, + "loss": 0.9294546246528625, + "step": 5135 + }, + { + "epoch": 1.183410138248848, + "grad_norm": 0.9911973140133253, + "learning_rate": 7.81566757606e-07, + "loss": 0.637617826461792, + "step": 5136 + }, + { + "epoch": 1.1836405529953917, + "grad_norm": 1.2295561738436023, + "learning_rate": 7.81194989676649e-07, + "loss": 0.7614878416061401, + "step": 5137 + }, + { + "epoch": 1.1838709677419355, + "grad_norm": 1.2939539056978149, + "learning_rate": 7.808232535065556e-07, + "loss": 0.8612164258956909, + "step": 5138 + }, + { + "epoch": 1.1841013824884792, + "grad_norm": 1.0758125620247463, + "learning_rate": 7.804515491496765e-07, + "loss": 0.7530151605606079, + "step": 5139 + }, + { + "epoch": 1.1843317972350231, + "grad_norm": 0.9883281570065391, + "learning_rate": 7.800798766599648e-07, + "loss": 0.7739782929420471, + "step": 5140 + }, + { + "epoch": 1.1845622119815669, + "grad_norm": 1.0835226521428547, + "learning_rate": 7.797082360913678e-07, + "loss": 0.7992277145385742, + "step": 5141 + }, + { + "epoch": 1.1847926267281106, + "grad_norm": 1.2343955942215838, + "learning_rate": 7.793366274978284e-07, + "loss": 0.8744574785232544, + "step": 5142 + }, + { + "epoch": 1.1850230414746543, + "grad_norm": 0.9992165946111031, + "learning_rate": 7.789650509332857e-07, + "loss": 0.7522493600845337, + "step": 5143 + }, + { + "epoch": 1.185253456221198, + "grad_norm": 1.1095107175779666, + "learning_rate": 7.785935064516733e-07, + "loss": 0.8811007142066956, + "step": 5144 + }, + { + "epoch": 1.185483870967742, + "grad_norm": 0.9512882648642599, + "learning_rate": 7.782219941069201e-07, + "loss": 0.8141417503356934, + "step": 5145 + }, + { + "epoch": 1.1857142857142857, + "grad_norm": 1.3048397777053706, + "learning_rate": 7.778505139529509e-07, + "loss": 0.9473680257797241, + "step": 5146 + }, + { + "epoch": 1.1859447004608294, + "grad_norm": 1.1561666933094623, + "learning_rate": 7.774790660436857e-07, + "loss": 0.740132212638855, + "step": 5147 + }, + { + "epoch": 1.1861751152073732, + "grad_norm": 1.1265716565789026, + "learning_rate": 7.771076504330392e-07, + "loss": 0.7904594540596008, + "step": 5148 + }, + { + "epoch": 1.1864055299539171, + "grad_norm": 1.1481555737803508, + "learning_rate": 7.767362671749224e-07, + "loss": 0.8085094690322876, + "step": 5149 + }, + { + "epoch": 1.1866359447004609, + "grad_norm": 1.3362082879917547, + "learning_rate": 7.76364916323241e-07, + "loss": 0.6954756379127502, + "step": 5150 + }, + { + "epoch": 1.1868663594470046, + "grad_norm": 1.175085216674836, + "learning_rate": 7.759935979318953e-07, + "loss": 0.8575167059898376, + "step": 5151 + }, + { + "epoch": 1.1870967741935483, + "grad_norm": 0.9330545417113619, + "learning_rate": 7.756223120547829e-07, + "loss": 0.6125110387802124, + "step": 5152 + }, + { + "epoch": 1.1873271889400923, + "grad_norm": 1.1387987197615417, + "learning_rate": 7.752510587457949e-07, + "loss": 0.7737400531768799, + "step": 5153 + }, + { + "epoch": 1.187557603686636, + "grad_norm": 0.9473095115528148, + "learning_rate": 7.748798380588177e-07, + "loss": 0.7300955653190613, + "step": 5154 + }, + { + "epoch": 1.1877880184331797, + "grad_norm": 0.9479432315278626, + "learning_rate": 7.745086500477343e-07, + "loss": 0.7974356412887573, + "step": 5155 + }, + { + "epoch": 1.1880184331797234, + "grad_norm": 1.120213603018525, + "learning_rate": 7.74137494766422e-07, + "loss": 0.8158693313598633, + "step": 5156 + }, + { + "epoch": 1.1882488479262672, + "grad_norm": 0.9086968377624679, + "learning_rate": 7.737663722687531e-07, + "loss": 0.6656177639961243, + "step": 5157 + }, + { + "epoch": 1.1884792626728111, + "grad_norm": 1.284345958176322, + "learning_rate": 7.733952826085958e-07, + "loss": 0.7796640992164612, + "step": 5158 + }, + { + "epoch": 1.1887096774193548, + "grad_norm": 1.1079992534891525, + "learning_rate": 7.730242258398135e-07, + "loss": 0.9224779009819031, + "step": 5159 + }, + { + "epoch": 1.1889400921658986, + "grad_norm": 1.2013047291849663, + "learning_rate": 7.726532020162639e-07, + "loss": 0.7105277180671692, + "step": 5160 + }, + { + "epoch": 1.1891705069124423, + "grad_norm": 0.9139263319393289, + "learning_rate": 7.722822111918012e-07, + "loss": 0.5793930292129517, + "step": 5161 + }, + { + "epoch": 1.1894009216589863, + "grad_norm": 0.9419478266668957, + "learning_rate": 7.719112534202743e-07, + "loss": 0.7319367527961731, + "step": 5162 + }, + { + "epoch": 1.18963133640553, + "grad_norm": 1.182614737199728, + "learning_rate": 7.715403287555266e-07, + "loss": 0.7517954111099243, + "step": 5163 + }, + { + "epoch": 1.1898617511520737, + "grad_norm": 1.1800441614309307, + "learning_rate": 7.711694372513981e-07, + "loss": 0.8633241057395935, + "step": 5164 + }, + { + "epoch": 1.1900921658986174, + "grad_norm": 1.280920610105802, + "learning_rate": 7.707985789617227e-07, + "loss": 0.6453210115432739, + "step": 5165 + }, + { + "epoch": 1.1903225806451614, + "grad_norm": 1.1209224749220659, + "learning_rate": 7.704277539403303e-07, + "loss": 0.7609909772872925, + "step": 5166 + }, + { + "epoch": 1.1905529953917051, + "grad_norm": 1.1829891287159422, + "learning_rate": 7.700569622410453e-07, + "loss": 0.7419755458831787, + "step": 5167 + }, + { + "epoch": 1.1907834101382488, + "grad_norm": 1.0759571852853795, + "learning_rate": 7.696862039176879e-07, + "loss": 0.849078357219696, + "step": 5168 + }, + { + "epoch": 1.1910138248847926, + "grad_norm": 1.3077976619104341, + "learning_rate": 7.693154790240732e-07, + "loss": 0.8147921562194824, + "step": 5169 + }, + { + "epoch": 1.1912442396313363, + "grad_norm": 1.1349568865686221, + "learning_rate": 7.689447876140114e-07, + "loss": 0.7660118937492371, + "step": 5170 + }, + { + "epoch": 1.1914746543778802, + "grad_norm": 0.9919046297525586, + "learning_rate": 7.685741297413075e-07, + "loss": 0.7775185108184814, + "step": 5171 + }, + { + "epoch": 1.191705069124424, + "grad_norm": 1.0634336005518812, + "learning_rate": 7.682035054597624e-07, + "loss": 0.7184321880340576, + "step": 5172 + }, + { + "epoch": 1.1919354838709677, + "grad_norm": 0.9191067866194278, + "learning_rate": 7.678329148231719e-07, + "loss": 0.7108585834503174, + "step": 5173 + }, + { + "epoch": 1.1921658986175114, + "grad_norm": 1.169972531551494, + "learning_rate": 7.674623578853259e-07, + "loss": 0.7252670526504517, + "step": 5174 + }, + { + "epoch": 1.1923963133640554, + "grad_norm": 1.0227424567448893, + "learning_rate": 7.670918347000113e-07, + "loss": 0.818352460861206, + "step": 5175 + }, + { + "epoch": 1.192626728110599, + "grad_norm": 0.8768631462521176, + "learning_rate": 7.667213453210086e-07, + "loss": 0.6538013815879822, + "step": 5176 + }, + { + "epoch": 1.1928571428571428, + "grad_norm": 1.1216359209528128, + "learning_rate": 7.663508898020935e-07, + "loss": 0.7058148384094238, + "step": 5177 + }, + { + "epoch": 1.1930875576036866, + "grad_norm": 1.0528263608484594, + "learning_rate": 7.659804681970377e-07, + "loss": 0.7003160715103149, + "step": 5178 + }, + { + "epoch": 1.1933179723502305, + "grad_norm": 1.2339709506043992, + "learning_rate": 7.656100805596072e-07, + "loss": 0.84567791223526, + "step": 5179 + }, + { + "epoch": 1.1935483870967742, + "grad_norm": 1.239861543806107, + "learning_rate": 7.652397269435626e-07, + "loss": 0.7994743585586548, + "step": 5180 + }, + { + "epoch": 1.193778801843318, + "grad_norm": 1.3106444419652792, + "learning_rate": 7.648694074026615e-07, + "loss": 0.8177791833877563, + "step": 5181 + }, + { + "epoch": 1.1940092165898617, + "grad_norm": 1.362939104353802, + "learning_rate": 7.644991219906545e-07, + "loss": 0.6663975715637207, + "step": 5182 + }, + { + "epoch": 1.1942396313364054, + "grad_norm": 1.1422405746222943, + "learning_rate": 7.641288707612878e-07, + "loss": 0.8275883197784424, + "step": 5183 + }, + { + "epoch": 1.1944700460829494, + "grad_norm": 1.1201157873973466, + "learning_rate": 7.637586537683036e-07, + "loss": 0.7710767388343811, + "step": 5184 + }, + { + "epoch": 1.194700460829493, + "grad_norm": 1.1629669577400157, + "learning_rate": 7.633884710654382e-07, + "loss": 0.7628582715988159, + "step": 5185 + }, + { + "epoch": 1.1949308755760368, + "grad_norm": 1.3793540006541976, + "learning_rate": 7.630183227064227e-07, + "loss": 0.7002676725387573, + "step": 5186 + }, + { + "epoch": 1.1951612903225806, + "grad_norm": 0.9948455527839576, + "learning_rate": 7.626482087449841e-07, + "loss": 0.8272073268890381, + "step": 5187 + }, + { + "epoch": 1.1953917050691245, + "grad_norm": 1.0711227380559258, + "learning_rate": 7.622781292348435e-07, + "loss": 0.7881417274475098, + "step": 5188 + }, + { + "epoch": 1.1956221198156682, + "grad_norm": 1.0728428578693516, + "learning_rate": 7.61908084229718e-07, + "loss": 0.797294020652771, + "step": 5189 + }, + { + "epoch": 1.195852534562212, + "grad_norm": 1.0264450399364256, + "learning_rate": 7.615380737833191e-07, + "loss": 0.7752290964126587, + "step": 5190 + }, + { + "epoch": 1.1960829493087557, + "grad_norm": 1.0830464595218987, + "learning_rate": 7.611680979493525e-07, + "loss": 0.7299143075942993, + "step": 5191 + }, + { + "epoch": 1.1963133640552996, + "grad_norm": 1.4839567137751186, + "learning_rate": 7.60798156781521e-07, + "loss": 0.6749997138977051, + "step": 5192 + }, + { + "epoch": 1.1965437788018434, + "grad_norm": 1.2717197322235172, + "learning_rate": 7.6042825033352e-07, + "loss": 0.7933796048164368, + "step": 5193 + }, + { + "epoch": 1.196774193548387, + "grad_norm": 1.1254669600910374, + "learning_rate": 7.600583786590411e-07, + "loss": 0.7214919328689575, + "step": 5194 + }, + { + "epoch": 1.1970046082949308, + "grad_norm": 1.0000165841598083, + "learning_rate": 7.596885418117713e-07, + "loss": 0.7804256081581116, + "step": 5195 + }, + { + "epoch": 1.1972350230414746, + "grad_norm": 1.2738023107912249, + "learning_rate": 7.593187398453915e-07, + "loss": 0.7615138292312622, + "step": 5196 + }, + { + "epoch": 1.1974654377880185, + "grad_norm": 1.0493977127227612, + "learning_rate": 7.589489728135778e-07, + "loss": 0.8473657369613647, + "step": 5197 + }, + { + "epoch": 1.1976958525345622, + "grad_norm": 1.2204301678409606, + "learning_rate": 7.585792407700018e-07, + "loss": 0.7302027940750122, + "step": 5198 + }, + { + "epoch": 1.197926267281106, + "grad_norm": 1.123276567811957, + "learning_rate": 7.582095437683294e-07, + "loss": 0.7631692886352539, + "step": 5199 + }, + { + "epoch": 1.1981566820276497, + "grad_norm": 1.339389807954867, + "learning_rate": 7.578398818622211e-07, + "loss": 0.7982754707336426, + "step": 5200 + }, + { + "epoch": 1.1983870967741936, + "grad_norm": 1.3949436336418501, + "learning_rate": 7.574702551053339e-07, + "loss": 0.8445635437965393, + "step": 5201 + }, + { + "epoch": 1.1986175115207374, + "grad_norm": 1.267881130363425, + "learning_rate": 7.571006635513182e-07, + "loss": 0.8486276268959045, + "step": 5202 + }, + { + "epoch": 1.198847926267281, + "grad_norm": 1.2841422228776138, + "learning_rate": 7.567311072538191e-07, + "loss": 0.8433184623718262, + "step": 5203 + }, + { + "epoch": 1.1990783410138248, + "grad_norm": 1.5895945882971518, + "learning_rate": 7.56361586266478e-07, + "loss": 0.9772260189056396, + "step": 5204 + }, + { + "epoch": 1.1993087557603688, + "grad_norm": 1.1927959868338558, + "learning_rate": 7.559921006429304e-07, + "loss": 0.8349692821502686, + "step": 5205 + }, + { + "epoch": 1.1995391705069125, + "grad_norm": 1.070076083870323, + "learning_rate": 7.556226504368059e-07, + "loss": 0.7454575300216675, + "step": 5206 + }, + { + "epoch": 1.1997695852534562, + "grad_norm": 0.882927792535501, + "learning_rate": 7.552532357017303e-07, + "loss": 0.6680991649627686, + "step": 5207 + }, + { + "epoch": 1.2, + "grad_norm": 1.1844993546767875, + "learning_rate": 7.54883856491324e-07, + "loss": 0.6528318524360657, + "step": 5208 + }, + { + "epoch": 1.2002304147465437, + "grad_norm": 1.0482736751922475, + "learning_rate": 7.545145128592008e-07, + "loss": 0.7711834907531738, + "step": 5209 + }, + { + "epoch": 1.2004608294930876, + "grad_norm": 1.022603342926927, + "learning_rate": 7.541452048589714e-07, + "loss": 0.6378746628761292, + "step": 5210 + }, + { + "epoch": 1.2006912442396314, + "grad_norm": 0.9309859008896244, + "learning_rate": 7.537759325442402e-07, + "loss": 0.7489340305328369, + "step": 5211 + }, + { + "epoch": 1.200921658986175, + "grad_norm": 1.0825673838806515, + "learning_rate": 7.53406695968606e-07, + "loss": 0.7869534492492676, + "step": 5212 + }, + { + "epoch": 1.2011520737327188, + "grad_norm": 1.1316888770375757, + "learning_rate": 7.530374951856637e-07, + "loss": 0.7252482175827026, + "step": 5213 + }, + { + "epoch": 1.2013824884792628, + "grad_norm": 1.1337087819491523, + "learning_rate": 7.526683302490018e-07, + "loss": 0.763259768486023, + "step": 5214 + }, + { + "epoch": 1.2016129032258065, + "grad_norm": 1.405277715760194, + "learning_rate": 7.522992012122046e-07, + "loss": 0.8135688304901123, + "step": 5215 + }, + { + "epoch": 1.2018433179723502, + "grad_norm": 1.5589534049714566, + "learning_rate": 7.519301081288504e-07, + "loss": 0.9282290935516357, + "step": 5216 + }, + { + "epoch": 1.202073732718894, + "grad_norm": 1.2621340712897178, + "learning_rate": 7.515610510525125e-07, + "loss": 0.7968727946281433, + "step": 5217 + }, + { + "epoch": 1.202304147465438, + "grad_norm": 1.4154309582650375, + "learning_rate": 7.511920300367594e-07, + "loss": 0.9495606422424316, + "step": 5218 + }, + { + "epoch": 1.2025345622119816, + "grad_norm": 1.120709992771365, + "learning_rate": 7.508230451351537e-07, + "loss": 0.6790425181388855, + "step": 5219 + }, + { + "epoch": 1.2027649769585254, + "grad_norm": 1.1216778132469425, + "learning_rate": 7.504540964012527e-07, + "loss": 0.7269036173820496, + "step": 5220 + }, + { + "epoch": 1.202995391705069, + "grad_norm": 1.4394573291388193, + "learning_rate": 7.500851838886097e-07, + "loss": 0.820799708366394, + "step": 5221 + }, + { + "epoch": 1.2032258064516128, + "grad_norm": 1.1080457725700354, + "learning_rate": 7.497163076507715e-07, + "loss": 0.7693401575088501, + "step": 5222 + }, + { + "epoch": 1.2034562211981568, + "grad_norm": 1.1611837511561531, + "learning_rate": 7.493474677412793e-07, + "loss": 0.7687606811523438, + "step": 5223 + }, + { + "epoch": 1.2036866359447005, + "grad_norm": 0.9784122136232752, + "learning_rate": 7.489786642136709e-07, + "loss": 0.6858488321304321, + "step": 5224 + }, + { + "epoch": 1.2039170506912442, + "grad_norm": 0.8776412008252917, + "learning_rate": 7.486098971214769e-07, + "loss": 0.7575044631958008, + "step": 5225 + }, + { + "epoch": 1.204147465437788, + "grad_norm": 0.8129887936087057, + "learning_rate": 7.482411665182236e-07, + "loss": 0.6799627542495728, + "step": 5226 + }, + { + "epoch": 1.2043778801843317, + "grad_norm": 1.4994332488998736, + "learning_rate": 7.478724724574317e-07, + "loss": 0.8882759809494019, + "step": 5227 + }, + { + "epoch": 1.2046082949308756, + "grad_norm": 1.10750930167245, + "learning_rate": 7.475038149926165e-07, + "loss": 0.7835016250610352, + "step": 5228 + }, + { + "epoch": 1.2048387096774194, + "grad_norm": 1.3325922049902164, + "learning_rate": 7.471351941772883e-07, + "loss": 0.9264512062072754, + "step": 5229 + }, + { + "epoch": 1.205069124423963, + "grad_norm": 1.225862576818596, + "learning_rate": 7.467666100649521e-07, + "loss": 0.8094228506088257, + "step": 5230 + }, + { + "epoch": 1.205299539170507, + "grad_norm": 1.167425367358343, + "learning_rate": 7.463980627091073e-07, + "loss": 0.7782102823257446, + "step": 5231 + }, + { + "epoch": 1.2055299539170508, + "grad_norm": 1.2892161969383955, + "learning_rate": 7.460295521632474e-07, + "loss": 0.7946768999099731, + "step": 5232 + }, + { + "epoch": 1.2057603686635945, + "grad_norm": 1.2538288509415036, + "learning_rate": 7.456610784808624e-07, + "loss": 0.7571625709533691, + "step": 5233 + }, + { + "epoch": 1.2059907834101382, + "grad_norm": 1.3786667467707436, + "learning_rate": 7.45292641715435e-07, + "loss": 0.9760236144065857, + "step": 5234 + }, + { + "epoch": 1.206221198156682, + "grad_norm": 1.0717694328508904, + "learning_rate": 7.449242419204431e-07, + "loss": 0.6370055675506592, + "step": 5235 + }, + { + "epoch": 1.206451612903226, + "grad_norm": 1.226412390848778, + "learning_rate": 7.445558791493603e-07, + "loss": 0.7991320490837097, + "step": 5236 + }, + { + "epoch": 1.2066820276497696, + "grad_norm": 1.0607083796487833, + "learning_rate": 7.441875534556531e-07, + "loss": 0.8840054273605347, + "step": 5237 + }, + { + "epoch": 1.2069124423963133, + "grad_norm": 1.0615184698087237, + "learning_rate": 7.438192648927841e-07, + "loss": 0.8634533882141113, + "step": 5238 + }, + { + "epoch": 1.207142857142857, + "grad_norm": 0.9816687263450602, + "learning_rate": 7.434510135142098e-07, + "loss": 0.7081723213195801, + "step": 5239 + }, + { + "epoch": 1.2073732718894008, + "grad_norm": 1.1398058732045784, + "learning_rate": 7.430827993733808e-07, + "loss": 0.7160249352455139, + "step": 5240 + }, + { + "epoch": 1.2076036866359448, + "grad_norm": 0.8011837684152103, + "learning_rate": 7.427146225237438e-07, + "loss": 0.5323421955108643, + "step": 5241 + }, + { + "epoch": 1.2078341013824885, + "grad_norm": 1.0448270993907307, + "learning_rate": 7.423464830187386e-07, + "loss": 0.6439197063446045, + "step": 5242 + }, + { + "epoch": 1.2080645161290322, + "grad_norm": 1.2861588666790074, + "learning_rate": 7.419783809117999e-07, + "loss": 0.8268016576766968, + "step": 5243 + }, + { + "epoch": 1.2082949308755762, + "grad_norm": 1.0010661947708184, + "learning_rate": 7.416103162563582e-07, + "loss": 0.8115339279174805, + "step": 5244 + }, + { + "epoch": 1.2085253456221199, + "grad_norm": 1.05524382659239, + "learning_rate": 7.41242289105837e-07, + "loss": 0.8677197694778442, + "step": 5245 + }, + { + "epoch": 1.2087557603686636, + "grad_norm": 1.3337261104998102, + "learning_rate": 7.408742995136547e-07, + "loss": 0.7942948937416077, + "step": 5246 + }, + { + "epoch": 1.2089861751152073, + "grad_norm": 1.4261507552200647, + "learning_rate": 7.405063475332249e-07, + "loss": 0.8457766771316528, + "step": 5247 + }, + { + "epoch": 1.209216589861751, + "grad_norm": 1.2992145711475631, + "learning_rate": 7.401384332179552e-07, + "loss": 0.8463923931121826, + "step": 5248 + }, + { + "epoch": 1.209447004608295, + "grad_norm": 1.2576660242210724, + "learning_rate": 7.397705566212479e-07, + "loss": 0.9192875623703003, + "step": 5249 + }, + { + "epoch": 1.2096774193548387, + "grad_norm": 1.257257688865163, + "learning_rate": 7.394027177964999e-07, + "loss": 0.7461347579956055, + "step": 5250 + }, + { + "epoch": 1.2099078341013825, + "grad_norm": 1.150791607540225, + "learning_rate": 7.390349167971025e-07, + "loss": 0.6953321695327759, + "step": 5251 + }, + { + "epoch": 1.2101382488479262, + "grad_norm": 1.0284326235023098, + "learning_rate": 7.38667153676441e-07, + "loss": 0.7226089835166931, + "step": 5252 + }, + { + "epoch": 1.21036866359447, + "grad_norm": 0.8781484717910895, + "learning_rate": 7.382994284878967e-07, + "loss": 0.6746406555175781, + "step": 5253 + }, + { + "epoch": 1.2105990783410139, + "grad_norm": 1.109396083619457, + "learning_rate": 7.379317412848438e-07, + "loss": 0.7600215673446655, + "step": 5254 + }, + { + "epoch": 1.2108294930875576, + "grad_norm": 1.0821310147954002, + "learning_rate": 7.375640921206514e-07, + "loss": 0.7530734539031982, + "step": 5255 + }, + { + "epoch": 1.2110599078341013, + "grad_norm": 1.0572444642243028, + "learning_rate": 7.371964810486839e-07, + "loss": 0.8103033304214478, + "step": 5256 + }, + { + "epoch": 1.2112903225806453, + "grad_norm": 1.5370115848017, + "learning_rate": 7.368289081222994e-07, + "loss": 0.8916831016540527, + "step": 5257 + }, + { + "epoch": 1.211520737327189, + "grad_norm": 0.9972990737801745, + "learning_rate": 7.364613733948501e-07, + "loss": 0.6728129386901855, + "step": 5258 + }, + { + "epoch": 1.2117511520737327, + "grad_norm": 1.2459715050980873, + "learning_rate": 7.360938769196841e-07, + "loss": 0.8609380722045898, + "step": 5259 + }, + { + "epoch": 1.2119815668202765, + "grad_norm": 1.2704694196315967, + "learning_rate": 7.357264187501422e-07, + "loss": 0.9370373487472534, + "step": 5260 + }, + { + "epoch": 1.2122119815668202, + "grad_norm": 1.1080973982930933, + "learning_rate": 7.353589989395604e-07, + "loss": 0.6812434196472168, + "step": 5261 + }, + { + "epoch": 1.2124423963133641, + "grad_norm": 1.1917998982451765, + "learning_rate": 7.349916175412701e-07, + "loss": 0.7661731243133545, + "step": 5262 + }, + { + "epoch": 1.2126728110599079, + "grad_norm": 1.175052294784061, + "learning_rate": 7.346242746085951e-07, + "loss": 0.7306643128395081, + "step": 5263 + }, + { + "epoch": 1.2129032258064516, + "grad_norm": 1.2065862060559862, + "learning_rate": 7.34256970194856e-07, + "loss": 0.7189076542854309, + "step": 5264 + }, + { + "epoch": 1.2131336405529953, + "grad_norm": 0.8932044441494517, + "learning_rate": 7.338897043533656e-07, + "loss": 0.6935977935791016, + "step": 5265 + }, + { + "epoch": 1.213364055299539, + "grad_norm": 1.1224428177486496, + "learning_rate": 7.335224771374323e-07, + "loss": 0.8451323509216309, + "step": 5266 + }, + { + "epoch": 1.213594470046083, + "grad_norm": 1.1211043364668347, + "learning_rate": 7.331552886003589e-07, + "loss": 0.7936843037605286, + "step": 5267 + }, + { + "epoch": 1.2138248847926267, + "grad_norm": 1.1507587511456696, + "learning_rate": 7.327881387954418e-07, + "loss": 0.7989950776100159, + "step": 5268 + }, + { + "epoch": 1.2140552995391705, + "grad_norm": 1.1166217189865624, + "learning_rate": 7.324210277759726e-07, + "loss": 0.7579236030578613, + "step": 5269 + }, + { + "epoch": 1.2142857142857142, + "grad_norm": 1.1276787851795544, + "learning_rate": 7.320539555952372e-07, + "loss": 0.7101268768310547, + "step": 5270 + }, + { + "epoch": 1.2145161290322581, + "grad_norm": 1.0342829920040018, + "learning_rate": 7.316869223065155e-07, + "loss": 0.7920513153076172, + "step": 5271 + }, + { + "epoch": 1.2147465437788019, + "grad_norm": 1.4357028015234437, + "learning_rate": 7.313199279630814e-07, + "loss": 0.9241428375244141, + "step": 5272 + }, + { + "epoch": 1.2149769585253456, + "grad_norm": 1.1653282891915406, + "learning_rate": 7.309529726182044e-07, + "loss": 0.8278338313102722, + "step": 5273 + }, + { + "epoch": 1.2152073732718893, + "grad_norm": 0.9443953324177181, + "learning_rate": 7.305860563251473e-07, + "loss": 0.8230598568916321, + "step": 5274 + }, + { + "epoch": 1.2154377880184333, + "grad_norm": 0.9783962526324749, + "learning_rate": 7.302191791371672e-07, + "loss": 0.7791799902915955, + "step": 5275 + }, + { + "epoch": 1.215668202764977, + "grad_norm": 1.1070826926760935, + "learning_rate": 7.298523411075163e-07, + "loss": 0.705475926399231, + "step": 5276 + }, + { + "epoch": 1.2158986175115207, + "grad_norm": 1.2064718691511076, + "learning_rate": 7.294855422894406e-07, + "loss": 0.8078421354293823, + "step": 5277 + }, + { + "epoch": 1.2161290322580645, + "grad_norm": 1.2182160993977798, + "learning_rate": 7.2911878273618e-07, + "loss": 0.8115853667259216, + "step": 5278 + }, + { + "epoch": 1.2163594470046082, + "grad_norm": 1.0596504935928797, + "learning_rate": 7.287520625009698e-07, + "loss": 0.6917247772216797, + "step": 5279 + }, + { + "epoch": 1.2165898617511521, + "grad_norm": 1.0522660082790807, + "learning_rate": 7.283853816370386e-07, + "loss": 0.7131551504135132, + "step": 5280 + }, + { + "epoch": 1.2168202764976959, + "grad_norm": 0.9495683492221387, + "learning_rate": 7.280187401976093e-07, + "loss": 0.713994562625885, + "step": 5281 + }, + { + "epoch": 1.2170506912442396, + "grad_norm": 1.0845439765546743, + "learning_rate": 7.276521382359001e-07, + "loss": 0.7123454809188843, + "step": 5282 + }, + { + "epoch": 1.2172811059907833, + "grad_norm": 1.395671188469518, + "learning_rate": 7.272855758051226e-07, + "loss": 0.7805770635604858, + "step": 5283 + }, + { + "epoch": 1.2175115207373273, + "grad_norm": 0.9191020761831104, + "learning_rate": 7.269190529584823e-07, + "loss": 0.756670355796814, + "step": 5284 + }, + { + "epoch": 1.217741935483871, + "grad_norm": 0.9614002237797926, + "learning_rate": 7.265525697491804e-07, + "loss": 0.5992655754089355, + "step": 5285 + }, + { + "epoch": 1.2179723502304147, + "grad_norm": 1.1857893348181308, + "learning_rate": 7.26186126230411e-07, + "loss": 0.7552722692489624, + "step": 5286 + }, + { + "epoch": 1.2182027649769585, + "grad_norm": 1.3153742960319537, + "learning_rate": 7.258197224553627e-07, + "loss": 0.7189064025878906, + "step": 5287 + }, + { + "epoch": 1.2184331797235024, + "grad_norm": 1.115820306372996, + "learning_rate": 7.254533584772188e-07, + "loss": 0.8277319669723511, + "step": 5288 + }, + { + "epoch": 1.2186635944700461, + "grad_norm": 1.0584826489222536, + "learning_rate": 7.250870343491561e-07, + "loss": 0.6655987501144409, + "step": 5289 + }, + { + "epoch": 1.2188940092165899, + "grad_norm": 1.3888484350972408, + "learning_rate": 7.247207501243469e-07, + "loss": 0.8654178380966187, + "step": 5290 + }, + { + "epoch": 1.2191244239631336, + "grad_norm": 1.1781514985004269, + "learning_rate": 7.243545058559564e-07, + "loss": 0.9148486852645874, + "step": 5291 + }, + { + "epoch": 1.2193548387096773, + "grad_norm": 1.0525236851594717, + "learning_rate": 7.239883015971439e-07, + "loss": 0.8003618717193604, + "step": 5292 + }, + { + "epoch": 1.2195852534562213, + "grad_norm": 1.1614945814905475, + "learning_rate": 7.236221374010647e-07, + "loss": 0.7290889024734497, + "step": 5293 + }, + { + "epoch": 1.219815668202765, + "grad_norm": 0.963434252776205, + "learning_rate": 7.232560133208663e-07, + "loss": 0.5989147424697876, + "step": 5294 + }, + { + "epoch": 1.2200460829493087, + "grad_norm": 0.8766403983792901, + "learning_rate": 7.228899294096907e-07, + "loss": 0.8424522876739502, + "step": 5295 + }, + { + "epoch": 1.2202764976958524, + "grad_norm": 1.1686896205403536, + "learning_rate": 7.225238857206754e-07, + "loss": 0.7753746509552002, + "step": 5296 + }, + { + "epoch": 1.2205069124423964, + "grad_norm": 1.1424848742103464, + "learning_rate": 7.221578823069508e-07, + "loss": 0.693191647529602, + "step": 5297 + }, + { + "epoch": 1.2207373271889401, + "grad_norm": 1.177332636609729, + "learning_rate": 7.217919192216417e-07, + "loss": 0.7561964988708496, + "step": 5298 + }, + { + "epoch": 1.2209677419354839, + "grad_norm": 0.9927977088932712, + "learning_rate": 7.214259965178673e-07, + "loss": 0.7721199989318848, + "step": 5299 + }, + { + "epoch": 1.2211981566820276, + "grad_norm": 1.39798744468456, + "learning_rate": 7.210601142487407e-07, + "loss": 0.8100659251213074, + "step": 5300 + }, + { + "epoch": 1.2214285714285715, + "grad_norm": 1.0570396078634527, + "learning_rate": 7.206942724673688e-07, + "loss": 0.6753256916999817, + "step": 5301 + }, + { + "epoch": 1.2216589861751153, + "grad_norm": 1.1020954128293505, + "learning_rate": 7.20328471226854e-07, + "loss": 0.7534425854682922, + "step": 5302 + }, + { + "epoch": 1.221889400921659, + "grad_norm": 1.5962153366210945, + "learning_rate": 7.199627105802913e-07, + "loss": 0.8275027275085449, + "step": 5303 + }, + { + "epoch": 1.2221198156682027, + "grad_norm": 1.1431238814592317, + "learning_rate": 7.195969905807702e-07, + "loss": 0.728579580783844, + "step": 5304 + }, + { + "epoch": 1.2223502304147464, + "grad_norm": 1.1008777946014818, + "learning_rate": 7.192313112813749e-07, + "loss": 0.8221413493156433, + "step": 5305 + }, + { + "epoch": 1.2225806451612904, + "grad_norm": 1.0255386420970887, + "learning_rate": 7.188656727351832e-07, + "loss": 0.7819123268127441, + "step": 5306 + }, + { + "epoch": 1.2228110599078341, + "grad_norm": 1.1141595278176613, + "learning_rate": 7.185000749952666e-07, + "loss": 0.7474294900894165, + "step": 5307 + }, + { + "epoch": 1.2230414746543778, + "grad_norm": 1.4333018176649106, + "learning_rate": 7.181345181146919e-07, + "loss": 0.8072259426116943, + "step": 5308 + }, + { + "epoch": 1.2232718894009216, + "grad_norm": 1.3449246489382425, + "learning_rate": 7.177690021465184e-07, + "loss": 0.8718069791793823, + "step": 5309 + }, + { + "epoch": 1.2235023041474655, + "grad_norm": 1.1090181258933243, + "learning_rate": 7.174035271438006e-07, + "loss": 0.8374875783920288, + "step": 5310 + }, + { + "epoch": 1.2237327188940093, + "grad_norm": 1.2085386756305507, + "learning_rate": 7.170380931595869e-07, + "loss": 0.6669566631317139, + "step": 5311 + }, + { + "epoch": 1.223963133640553, + "grad_norm": 1.1706882886588135, + "learning_rate": 7.16672700246919e-07, + "loss": 0.8735665678977966, + "step": 5312 + }, + { + "epoch": 1.2241935483870967, + "grad_norm": 1.1826163019402958, + "learning_rate": 7.16307348458834e-07, + "loss": 0.8312361240386963, + "step": 5313 + }, + { + "epoch": 1.2244239631336407, + "grad_norm": 1.1102424714986416, + "learning_rate": 7.159420378483619e-07, + "loss": 0.7927724123001099, + "step": 5314 + }, + { + "epoch": 1.2246543778801844, + "grad_norm": 1.0527049283172933, + "learning_rate": 7.155767684685264e-07, + "loss": 0.7641698122024536, + "step": 5315 + }, + { + "epoch": 1.2248847926267281, + "grad_norm": 1.0508850668326304, + "learning_rate": 7.15211540372347e-07, + "loss": 0.7490028142929077, + "step": 5316 + }, + { + "epoch": 1.2251152073732718, + "grad_norm": 1.0604993776512237, + "learning_rate": 7.148463536128354e-07, + "loss": 0.7194815874099731, + "step": 5317 + }, + { + "epoch": 1.2253456221198156, + "grad_norm": 1.2779756064695784, + "learning_rate": 7.144812082429979e-07, + "loss": 0.8328256607055664, + "step": 5318 + }, + { + "epoch": 1.2255760368663595, + "grad_norm": 1.1539197608232337, + "learning_rate": 7.141161043158352e-07, + "loss": 0.9124876260757446, + "step": 5319 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 1.346989410896588, + "learning_rate": 7.137510418843416e-07, + "loss": 0.8183319568634033, + "step": 5320 + }, + { + "epoch": 1.226036866359447, + "grad_norm": 1.0902088619882297, + "learning_rate": 7.133860210015048e-07, + "loss": 0.8423885107040405, + "step": 5321 + }, + { + "epoch": 1.2262672811059907, + "grad_norm": 1.064962271727849, + "learning_rate": 7.130210417203082e-07, + "loss": 0.8175387382507324, + "step": 5322 + }, + { + "epoch": 1.2264976958525347, + "grad_norm": 1.0111617635250245, + "learning_rate": 7.126561040937274e-07, + "loss": 0.8415048718452454, + "step": 5323 + }, + { + "epoch": 1.2267281105990784, + "grad_norm": 1.4241774929740556, + "learning_rate": 7.122912081747321e-07, + "loss": 0.6891156435012817, + "step": 5324 + }, + { + "epoch": 1.226958525345622, + "grad_norm": 1.1236132104045742, + "learning_rate": 7.119263540162876e-07, + "loss": 0.667617678642273, + "step": 5325 + }, + { + "epoch": 1.2271889400921658, + "grad_norm": 1.21591291521647, + "learning_rate": 7.115615416713517e-07, + "loss": 0.7752082347869873, + "step": 5326 + }, + { + "epoch": 1.2274193548387098, + "grad_norm": 1.0094697644265302, + "learning_rate": 7.111967711928757e-07, + "loss": 0.6582639813423157, + "step": 5327 + }, + { + "epoch": 1.2276497695852535, + "grad_norm": 0.9823209869062589, + "learning_rate": 7.108320426338063e-07, + "loss": 0.6996462345123291, + "step": 5328 + }, + { + "epoch": 1.2278801843317972, + "grad_norm": 1.1364634127826816, + "learning_rate": 7.104673560470828e-07, + "loss": 0.7132028341293335, + "step": 5329 + }, + { + "epoch": 1.228110599078341, + "grad_norm": 1.1959075580849723, + "learning_rate": 7.101027114856395e-07, + "loss": 0.7344096899032593, + "step": 5330 + }, + { + "epoch": 1.2283410138248847, + "grad_norm": 1.2810764573761082, + "learning_rate": 7.097381090024039e-07, + "loss": 0.7805585861206055, + "step": 5331 + }, + { + "epoch": 1.2285714285714286, + "grad_norm": 1.2310137220528714, + "learning_rate": 7.093735486502976e-07, + "loss": 0.6785855889320374, + "step": 5332 + }, + { + "epoch": 1.2288018433179724, + "grad_norm": 1.3226389203047557, + "learning_rate": 7.090090304822355e-07, + "loss": 0.7465041875839233, + "step": 5333 + }, + { + "epoch": 1.229032258064516, + "grad_norm": 1.0465247410006058, + "learning_rate": 7.086445545511278e-07, + "loss": 0.7400432825088501, + "step": 5334 + }, + { + "epoch": 1.2292626728110598, + "grad_norm": 0.9732969942350592, + "learning_rate": 7.082801209098774e-07, + "loss": 0.8567768335342407, + "step": 5335 + }, + { + "epoch": 1.2294930875576038, + "grad_norm": 1.133102602749406, + "learning_rate": 7.079157296113807e-07, + "loss": 0.7451025247573853, + "step": 5336 + }, + { + "epoch": 1.2297235023041475, + "grad_norm": 1.2953309888801026, + "learning_rate": 7.075513807085299e-07, + "loss": 0.7178194522857666, + "step": 5337 + }, + { + "epoch": 1.2299539170506912, + "grad_norm": 1.114794382407599, + "learning_rate": 7.071870742542086e-07, + "loss": 0.7538058161735535, + "step": 5338 + }, + { + "epoch": 1.230184331797235, + "grad_norm": 1.2706015052011863, + "learning_rate": 7.068228103012959e-07, + "loss": 0.7853896021842957, + "step": 5339 + }, + { + "epoch": 1.230414746543779, + "grad_norm": 1.6145088717882257, + "learning_rate": 7.064585889026644e-07, + "loss": 0.9359887838363647, + "step": 5340 + }, + { + "epoch": 1.2306451612903226, + "grad_norm": 1.2876289498435494, + "learning_rate": 7.060944101111797e-07, + "loss": 0.8590530753135681, + "step": 5341 + }, + { + "epoch": 1.2308755760368664, + "grad_norm": 1.0245387562303532, + "learning_rate": 7.057302739797025e-07, + "loss": 0.7047204971313477, + "step": 5342 + }, + { + "epoch": 1.23110599078341, + "grad_norm": 1.3069544437359595, + "learning_rate": 7.053661805610867e-07, + "loss": 0.8826072216033936, + "step": 5343 + }, + { + "epoch": 1.2313364055299538, + "grad_norm": 1.2593962984780245, + "learning_rate": 7.050021299081792e-07, + "loss": 0.9394192695617676, + "step": 5344 + }, + { + "epoch": 1.2315668202764978, + "grad_norm": 1.1109567819341923, + "learning_rate": 7.046381220738224e-07, + "loss": 0.7814885377883911, + "step": 5345 + }, + { + "epoch": 1.2317972350230415, + "grad_norm": 1.1819250736895568, + "learning_rate": 7.042741571108512e-07, + "loss": 0.781699538230896, + "step": 5346 + }, + { + "epoch": 1.2320276497695852, + "grad_norm": 1.1116588757864085, + "learning_rate": 7.039102350720946e-07, + "loss": 0.6554632186889648, + "step": 5347 + }, + { + "epoch": 1.232258064516129, + "grad_norm": 0.9564548780258206, + "learning_rate": 7.035463560103753e-07, + "loss": 0.6449903249740601, + "step": 5348 + }, + { + "epoch": 1.2324884792626727, + "grad_norm": 1.3130676696714008, + "learning_rate": 7.031825199785101e-07, + "loss": 0.8222958445549011, + "step": 5349 + }, + { + "epoch": 1.2327188940092166, + "grad_norm": 1.073654969776922, + "learning_rate": 7.02818727029309e-07, + "loss": 0.8315533399581909, + "step": 5350 + }, + { + "epoch": 1.2329493087557604, + "grad_norm": 0.9980466179862664, + "learning_rate": 7.024549772155764e-07, + "loss": 0.8065732717514038, + "step": 5351 + }, + { + "epoch": 1.233179723502304, + "grad_norm": 1.3823215182318742, + "learning_rate": 7.020912705901101e-07, + "loss": 0.7607216835021973, + "step": 5352 + }, + { + "epoch": 1.233410138248848, + "grad_norm": 1.3000097773568569, + "learning_rate": 7.01727607205701e-07, + "loss": 0.877311110496521, + "step": 5353 + }, + { + "epoch": 1.2336405529953918, + "grad_norm": 1.1855641794195606, + "learning_rate": 7.013639871151354e-07, + "loss": 0.7352526187896729, + "step": 5354 + }, + { + "epoch": 1.2338709677419355, + "grad_norm": 1.1123782494693044, + "learning_rate": 7.010004103711915e-07, + "loss": 0.7676074504852295, + "step": 5355 + }, + { + "epoch": 1.2341013824884792, + "grad_norm": 1.1035546011135826, + "learning_rate": 7.00636877026642e-07, + "loss": 0.7802003622055054, + "step": 5356 + }, + { + "epoch": 1.234331797235023, + "grad_norm": 1.0576568317960378, + "learning_rate": 7.002733871342537e-07, + "loss": 0.747033953666687, + "step": 5357 + }, + { + "epoch": 1.234562211981567, + "grad_norm": 1.1565555542506367, + "learning_rate": 6.999099407467865e-07, + "loss": 0.8086956739425659, + "step": 5358 + }, + { + "epoch": 1.2347926267281106, + "grad_norm": 1.450692015608809, + "learning_rate": 6.995465379169941e-07, + "loss": 0.9362099170684814, + "step": 5359 + }, + { + "epoch": 1.2350230414746544, + "grad_norm": 1.0699993470783844, + "learning_rate": 6.991831786976241e-07, + "loss": 0.6784812211990356, + "step": 5360 + }, + { + "epoch": 1.235253456221198, + "grad_norm": 1.0206889971672557, + "learning_rate": 6.988198631414171e-07, + "loss": 0.7733708620071411, + "step": 5361 + }, + { + "epoch": 1.2354838709677418, + "grad_norm": 1.1745502344238163, + "learning_rate": 6.984565913011087e-07, + "loss": 0.8747115135192871, + "step": 5362 + }, + { + "epoch": 1.2357142857142858, + "grad_norm": 1.0659966645754941, + "learning_rate": 6.980933632294268e-07, + "loss": 0.6947430372238159, + "step": 5363 + }, + { + "epoch": 1.2359447004608295, + "grad_norm": 1.206089262306805, + "learning_rate": 6.97730178979093e-07, + "loss": 0.7128404378890991, + "step": 5364 + }, + { + "epoch": 1.2361751152073732, + "grad_norm": 1.1120167642627505, + "learning_rate": 6.973670386028242e-07, + "loss": 0.7190830707550049, + "step": 5365 + }, + { + "epoch": 1.2364055299539172, + "grad_norm": 1.1367562157166997, + "learning_rate": 6.970039421533291e-07, + "loss": 0.7625770568847656, + "step": 5366 + }, + { + "epoch": 1.236635944700461, + "grad_norm": 1.109720416461976, + "learning_rate": 6.966408896833104e-07, + "loss": 0.7942707538604736, + "step": 5367 + }, + { + "epoch": 1.2368663594470046, + "grad_norm": 1.2413354296268997, + "learning_rate": 6.962778812454652e-07, + "loss": 0.8329455852508545, + "step": 5368 + }, + { + "epoch": 1.2370967741935484, + "grad_norm": 0.8823115581397621, + "learning_rate": 6.959149168924833e-07, + "loss": 0.6034290790557861, + "step": 5369 + }, + { + "epoch": 1.237327188940092, + "grad_norm": 1.1119487486974622, + "learning_rate": 6.955519966770486e-07, + "loss": 0.8424680233001709, + "step": 5370 + }, + { + "epoch": 1.237557603686636, + "grad_norm": 1.4443979353165184, + "learning_rate": 6.951891206518388e-07, + "loss": 0.8670322895050049, + "step": 5371 + }, + { + "epoch": 1.2377880184331798, + "grad_norm": 1.2577295715670245, + "learning_rate": 6.948262888695244e-07, + "loss": 0.7283621430397034, + "step": 5372 + }, + { + "epoch": 1.2380184331797235, + "grad_norm": 1.1772858057268798, + "learning_rate": 6.9446350138277e-07, + "loss": 0.7990118265151978, + "step": 5373 + }, + { + "epoch": 1.2382488479262672, + "grad_norm": 1.3359682917878526, + "learning_rate": 6.941007582442342e-07, + "loss": 0.945558488368988, + "step": 5374 + }, + { + "epoch": 1.238479262672811, + "grad_norm": 1.186182272846314, + "learning_rate": 6.937380595065685e-07, + "loss": 0.6905936002731323, + "step": 5375 + }, + { + "epoch": 1.238709677419355, + "grad_norm": 1.1665515184197677, + "learning_rate": 6.933754052224176e-07, + "loss": 0.7757662534713745, + "step": 5376 + }, + { + "epoch": 1.2389400921658986, + "grad_norm": 1.1107589407670702, + "learning_rate": 6.930127954444209e-07, + "loss": 0.63062584400177, + "step": 5377 + }, + { + "epoch": 1.2391705069124423, + "grad_norm": 1.2453155093106256, + "learning_rate": 6.926502302252109e-07, + "loss": 0.7341021299362183, + "step": 5378 + }, + { + "epoch": 1.2394009216589863, + "grad_norm": 0.9019761448377311, + "learning_rate": 6.922877096174127e-07, + "loss": 0.572767972946167, + "step": 5379 + }, + { + "epoch": 1.23963133640553, + "grad_norm": 1.274761976544521, + "learning_rate": 6.919252336736463e-07, + "loss": 0.630276083946228, + "step": 5380 + }, + { + "epoch": 1.2398617511520738, + "grad_norm": 1.0769631455551745, + "learning_rate": 6.915628024465244e-07, + "loss": 0.668334424495697, + "step": 5381 + }, + { + "epoch": 1.2400921658986175, + "grad_norm": 0.9444198657704267, + "learning_rate": 6.912004159886529e-07, + "loss": 0.6766513586044312, + "step": 5382 + }, + { + "epoch": 1.2403225806451612, + "grad_norm": 1.3884668691330446, + "learning_rate": 6.908380743526328e-07, + "loss": 0.7016473412513733, + "step": 5383 + }, + { + "epoch": 1.2405529953917052, + "grad_norm": 1.378738366714881, + "learning_rate": 6.904757775910568e-07, + "loss": 0.8837979435920715, + "step": 5384 + }, + { + "epoch": 1.2407834101382489, + "grad_norm": 0.9305030195638431, + "learning_rate": 6.901135257565116e-07, + "loss": 0.7187714576721191, + "step": 5385 + }, + { + "epoch": 1.2410138248847926, + "grad_norm": 1.0935814864632027, + "learning_rate": 6.897513189015782e-07, + "loss": 0.8227157592773438, + "step": 5386 + }, + { + "epoch": 1.2412442396313363, + "grad_norm": 1.278600897043475, + "learning_rate": 6.893891570788301e-07, + "loss": 0.8812209367752075, + "step": 5387 + }, + { + "epoch": 1.24147465437788, + "grad_norm": 1.0426681195674332, + "learning_rate": 6.890270403408348e-07, + "loss": 0.6702297925949097, + "step": 5388 + }, + { + "epoch": 1.241705069124424, + "grad_norm": 1.1718249382850798, + "learning_rate": 6.886649687401529e-07, + "loss": 0.646358847618103, + "step": 5389 + }, + { + "epoch": 1.2419354838709677, + "grad_norm": 1.1131010301922042, + "learning_rate": 6.883029423293383e-07, + "loss": 0.6514080762863159, + "step": 5390 + }, + { + "epoch": 1.2421658986175115, + "grad_norm": 1.0826812738863971, + "learning_rate": 6.879409611609393e-07, + "loss": 0.6938437819480896, + "step": 5391 + }, + { + "epoch": 1.2423963133640552, + "grad_norm": 1.3710627721954263, + "learning_rate": 6.875790252874967e-07, + "loss": 0.8601399064064026, + "step": 5392 + }, + { + "epoch": 1.2426267281105992, + "grad_norm": 1.1590300352526421, + "learning_rate": 6.872171347615445e-07, + "loss": 0.6641080379486084, + "step": 5393 + }, + { + "epoch": 1.2428571428571429, + "grad_norm": 1.0046628491787142, + "learning_rate": 6.868552896356117e-07, + "loss": 0.7109012603759766, + "step": 5394 + }, + { + "epoch": 1.2430875576036866, + "grad_norm": 1.261042767669179, + "learning_rate": 6.864934899622191e-07, + "loss": 0.8558728694915771, + "step": 5395 + }, + { + "epoch": 1.2433179723502303, + "grad_norm": 1.1243133400823155, + "learning_rate": 6.861317357938807e-07, + "loss": 0.6119382977485657, + "step": 5396 + }, + { + "epoch": 1.2435483870967743, + "grad_norm": 1.2850449121793286, + "learning_rate": 6.857700271831059e-07, + "loss": 0.7527587413787842, + "step": 5397 + }, + { + "epoch": 1.243778801843318, + "grad_norm": 1.3104214277299573, + "learning_rate": 6.854083641823957e-07, + "loss": 0.8082761168479919, + "step": 5398 + }, + { + "epoch": 1.2440092165898617, + "grad_norm": 1.0664271007055484, + "learning_rate": 6.850467468442447e-07, + "loss": 0.7289307117462158, + "step": 5399 + }, + { + "epoch": 1.2442396313364055, + "grad_norm": 1.2684124709337747, + "learning_rate": 6.846851752211418e-07, + "loss": 0.8824148178100586, + "step": 5400 + }, + { + "epoch": 1.2444700460829492, + "grad_norm": 1.2011621536911168, + "learning_rate": 6.843236493655682e-07, + "loss": 0.7046724557876587, + "step": 5401 + }, + { + "epoch": 1.2447004608294931, + "grad_norm": 1.0456601321771188, + "learning_rate": 6.839621693299987e-07, + "loss": 0.8192921876907349, + "step": 5402 + }, + { + "epoch": 1.2449308755760369, + "grad_norm": 1.1031705508374716, + "learning_rate": 6.83600735166902e-07, + "loss": 0.7651070356369019, + "step": 5403 + }, + { + "epoch": 1.2451612903225806, + "grad_norm": 1.10155120943284, + "learning_rate": 6.832393469287401e-07, + "loss": 0.7689340114593506, + "step": 5404 + }, + { + "epoch": 1.2453917050691243, + "grad_norm": 1.438313566898243, + "learning_rate": 6.828780046679671e-07, + "loss": 0.9214832782745361, + "step": 5405 + }, + { + "epoch": 1.2456221198156683, + "grad_norm": 1.1160237214981186, + "learning_rate": 6.825167084370322e-07, + "loss": 0.7210682034492493, + "step": 5406 + }, + { + "epoch": 1.245852534562212, + "grad_norm": 1.1608936823977416, + "learning_rate": 6.82155458288377e-07, + "loss": 0.871317446231842, + "step": 5407 + }, + { + "epoch": 1.2460829493087557, + "grad_norm": 1.2750147741770517, + "learning_rate": 6.817942542744359e-07, + "loss": 0.7669065594673157, + "step": 5408 + }, + { + "epoch": 1.2463133640552995, + "grad_norm": 1.0693548196930358, + "learning_rate": 6.814330964476379e-07, + "loss": 0.7317448854446411, + "step": 5409 + }, + { + "epoch": 1.2465437788018434, + "grad_norm": 1.2936969678285373, + "learning_rate": 6.810719848604036e-07, + "loss": 0.7873220443725586, + "step": 5410 + }, + { + "epoch": 1.2467741935483871, + "grad_norm": 1.2973675980536, + "learning_rate": 6.807109195651492e-07, + "loss": 0.713294267654419, + "step": 5411 + }, + { + "epoch": 1.2470046082949309, + "grad_norm": 1.2551238151306954, + "learning_rate": 6.803499006142819e-07, + "loss": 0.7592979669570923, + "step": 5412 + }, + { + "epoch": 1.2472350230414746, + "grad_norm": 1.3113983649465133, + "learning_rate": 6.79988928060203e-07, + "loss": 0.7805737257003784, + "step": 5413 + }, + { + "epoch": 1.2474654377880183, + "grad_norm": 0.8180058983934718, + "learning_rate": 6.79628001955308e-07, + "loss": 0.7706440687179565, + "step": 5414 + }, + { + "epoch": 1.2476958525345623, + "grad_norm": 1.3696824329137627, + "learning_rate": 6.792671223519844e-07, + "loss": 0.772534966468811, + "step": 5415 + }, + { + "epoch": 1.247926267281106, + "grad_norm": 1.2283026355612159, + "learning_rate": 6.789062893026129e-07, + "loss": 0.7939096093177795, + "step": 5416 + }, + { + "epoch": 1.2481566820276497, + "grad_norm": 1.263037130888269, + "learning_rate": 6.78545502859569e-07, + "loss": 0.7062902450561523, + "step": 5417 + }, + { + "epoch": 1.2483870967741935, + "grad_norm": 1.042353004558378, + "learning_rate": 6.781847630752197e-07, + "loss": 0.8296496868133545, + "step": 5418 + }, + { + "epoch": 1.2486175115207374, + "grad_norm": 1.4186103660131706, + "learning_rate": 6.778240700019258e-07, + "loss": 0.926125168800354, + "step": 5419 + }, + { + "epoch": 1.2488479262672811, + "grad_norm": 1.1816532525816696, + "learning_rate": 6.774634236920419e-07, + "loss": 0.7301739454269409, + "step": 5420 + }, + { + "epoch": 1.2490783410138249, + "grad_norm": 1.366957713339659, + "learning_rate": 6.771028241979151e-07, + "loss": 0.7313426733016968, + "step": 5421 + }, + { + "epoch": 1.2493087557603686, + "grad_norm": 0.9539446793763906, + "learning_rate": 6.767422715718853e-07, + "loss": 0.7193025946617126, + "step": 5422 + }, + { + "epoch": 1.2495391705069125, + "grad_norm": 1.1735826178809459, + "learning_rate": 6.763817658662874e-07, + "loss": 0.6544638872146606, + "step": 5423 + }, + { + "epoch": 1.2497695852534563, + "grad_norm": 1.1828661707349362, + "learning_rate": 6.760213071334478e-07, + "loss": 0.8402822613716125, + "step": 5424 + }, + { + "epoch": 1.25, + "grad_norm": 1.1854670368859663, + "learning_rate": 6.756608954256861e-07, + "loss": 0.6840100288391113, + "step": 5425 + }, + { + "epoch": 1.2502304147465437, + "grad_norm": 1.1842873946027908, + "learning_rate": 6.753005307953165e-07, + "loss": 0.7315107583999634, + "step": 5426 + }, + { + "epoch": 1.2504608294930875, + "grad_norm": 0.9743094512393712, + "learning_rate": 6.74940213294645e-07, + "loss": 0.6369785070419312, + "step": 5427 + }, + { + "epoch": 1.2506912442396314, + "grad_norm": 1.0769824502789231, + "learning_rate": 6.745799429759711e-07, + "loss": 0.7700424790382385, + "step": 5428 + }, + { + "epoch": 1.2509216589861751, + "grad_norm": 1.2719323162039158, + "learning_rate": 6.742197198915877e-07, + "loss": 0.7436221241950989, + "step": 5429 + }, + { + "epoch": 1.2511520737327189, + "grad_norm": 1.235326047289827, + "learning_rate": 6.738595440937809e-07, + "loss": 0.8028342723846436, + "step": 5430 + }, + { + "epoch": 1.2513824884792628, + "grad_norm": 1.1651221420823998, + "learning_rate": 6.734994156348288e-07, + "loss": 0.7705515623092651, + "step": 5431 + }, + { + "epoch": 1.2516129032258063, + "grad_norm": 1.509633589240068, + "learning_rate": 6.73139334567005e-07, + "loss": 0.7110899686813354, + "step": 5432 + }, + { + "epoch": 1.2518433179723503, + "grad_norm": 1.0701201128505256, + "learning_rate": 6.727793009425739e-07, + "loss": 0.7495337128639221, + "step": 5433 + }, + { + "epoch": 1.252073732718894, + "grad_norm": 1.1393040143384143, + "learning_rate": 6.724193148137938e-07, + "loss": 0.7735337018966675, + "step": 5434 + }, + { + "epoch": 1.2523041474654377, + "grad_norm": 1.5709409365174263, + "learning_rate": 6.720593762329167e-07, + "loss": 0.8655617237091064, + "step": 5435 + }, + { + "epoch": 1.2525345622119817, + "grad_norm": 1.0969772466203969, + "learning_rate": 6.716994852521871e-07, + "loss": 0.7989616394042969, + "step": 5436 + }, + { + "epoch": 1.2527649769585254, + "grad_norm": 1.2186152186967236, + "learning_rate": 6.713396419238424e-07, + "loss": 0.8090296983718872, + "step": 5437 + }, + { + "epoch": 1.2529953917050691, + "grad_norm": 1.175751705980128, + "learning_rate": 6.709798463001138e-07, + "loss": 0.7150726318359375, + "step": 5438 + }, + { + "epoch": 1.2532258064516129, + "grad_norm": 1.1350361891486582, + "learning_rate": 6.706200984332249e-07, + "loss": 0.7136287689208984, + "step": 5439 + }, + { + "epoch": 1.2534562211981566, + "grad_norm": 1.2991395376590593, + "learning_rate": 6.702603983753927e-07, + "loss": 0.8538687229156494, + "step": 5440 + }, + { + "epoch": 1.2536866359447005, + "grad_norm": 1.5253402941485412, + "learning_rate": 6.699007461788272e-07, + "loss": 0.7960666418075562, + "step": 5441 + }, + { + "epoch": 1.2539170506912443, + "grad_norm": 0.9539757778238315, + "learning_rate": 6.695411418957309e-07, + "loss": 0.7462595701217651, + "step": 5442 + }, + { + "epoch": 1.254147465437788, + "grad_norm": 1.482445221768143, + "learning_rate": 6.691815855783009e-07, + "loss": 0.795913577079773, + "step": 5443 + }, + { + "epoch": 1.2543778801843317, + "grad_norm": 1.071717267875031, + "learning_rate": 6.688220772787258e-07, + "loss": 0.7589330077171326, + "step": 5444 + }, + { + "epoch": 1.2546082949308754, + "grad_norm": 1.4795497320121442, + "learning_rate": 6.684626170491874e-07, + "loss": 0.7719615697860718, + "step": 5445 + }, + { + "epoch": 1.2548387096774194, + "grad_norm": 1.06581311441289, + "learning_rate": 6.681032049418616e-07, + "loss": 0.8516664505004883, + "step": 5446 + }, + { + "epoch": 1.2550691244239631, + "grad_norm": 1.466555451116343, + "learning_rate": 6.677438410089163e-07, + "loss": 0.8597210049629211, + "step": 5447 + }, + { + "epoch": 1.2552995391705069, + "grad_norm": 1.2172979010742704, + "learning_rate": 6.673845253025124e-07, + "loss": 0.7101171016693115, + "step": 5448 + }, + { + "epoch": 1.2555299539170508, + "grad_norm": 1.105900547055049, + "learning_rate": 6.670252578748044e-07, + "loss": 0.6946178078651428, + "step": 5449 + }, + { + "epoch": 1.2557603686635945, + "grad_norm": 1.687580161954866, + "learning_rate": 6.666660387779395e-07, + "loss": 0.9912126660346985, + "step": 5450 + }, + { + "epoch": 1.2559907834101383, + "grad_norm": 1.087382323913162, + "learning_rate": 6.663068680640573e-07, + "loss": 0.6495379209518433, + "step": 5451 + }, + { + "epoch": 1.256221198156682, + "grad_norm": 1.0213661473677353, + "learning_rate": 6.65947745785292e-07, + "loss": 0.6276426315307617, + "step": 5452 + }, + { + "epoch": 1.2564516129032257, + "grad_norm": 1.082562870265783, + "learning_rate": 6.655886719937691e-07, + "loss": 0.7273461818695068, + "step": 5453 + }, + { + "epoch": 1.2566820276497697, + "grad_norm": 1.258671733492057, + "learning_rate": 6.652296467416073e-07, + "loss": 0.8248249292373657, + "step": 5454 + }, + { + "epoch": 1.2569124423963134, + "grad_norm": 1.2124691152915896, + "learning_rate": 6.648706700809196e-07, + "loss": 0.8709753751754761, + "step": 5455 + }, + { + "epoch": 1.2571428571428571, + "grad_norm": 1.4025604957471465, + "learning_rate": 6.645117420638105e-07, + "loss": 0.8207283020019531, + "step": 5456 + }, + { + "epoch": 1.2573732718894008, + "grad_norm": 1.0867491150840567, + "learning_rate": 6.641528627423774e-07, + "loss": 0.8222801685333252, + "step": 5457 + }, + { + "epoch": 1.2576036866359446, + "grad_norm": 1.0891862457945214, + "learning_rate": 6.637940321687121e-07, + "loss": 0.7684904336929321, + "step": 5458 + }, + { + "epoch": 1.2578341013824885, + "grad_norm": 1.106565522930133, + "learning_rate": 6.634352503948979e-07, + "loss": 0.7930517196655273, + "step": 5459 + }, + { + "epoch": 1.2580645161290323, + "grad_norm": 1.255727738748605, + "learning_rate": 6.630765174730116e-07, + "loss": 0.7414563298225403, + "step": 5460 + }, + { + "epoch": 1.258294930875576, + "grad_norm": 1.0415923536335177, + "learning_rate": 6.627178334551227e-07, + "loss": 0.7959232926368713, + "step": 5461 + }, + { + "epoch": 1.25852534562212, + "grad_norm": 1.2823788828450395, + "learning_rate": 6.623591983932935e-07, + "loss": 0.6722866296768188, + "step": 5462 + }, + { + "epoch": 1.2587557603686637, + "grad_norm": 1.0428819037253236, + "learning_rate": 6.620006123395799e-07, + "loss": 0.7688727378845215, + "step": 5463 + }, + { + "epoch": 1.2589861751152074, + "grad_norm": 1.1454091886933473, + "learning_rate": 6.616420753460301e-07, + "loss": 0.7543724179267883, + "step": 5464 + }, + { + "epoch": 1.2592165898617511, + "grad_norm": 1.3156243556780545, + "learning_rate": 6.612835874646847e-07, + "loss": 0.7097430229187012, + "step": 5465 + }, + { + "epoch": 1.2594470046082948, + "grad_norm": 1.1699591097632744, + "learning_rate": 6.609251487475786e-07, + "loss": 0.8640443682670593, + "step": 5466 + }, + { + "epoch": 1.2596774193548388, + "grad_norm": 1.4552439697890553, + "learning_rate": 6.605667592467384e-07, + "loss": 0.7872523069381714, + "step": 5467 + }, + { + "epoch": 1.2599078341013825, + "grad_norm": 1.3601390048962447, + "learning_rate": 6.602084190141835e-07, + "loss": 0.8647557497024536, + "step": 5468 + }, + { + "epoch": 1.2601382488479262, + "grad_norm": 0.9953963267515464, + "learning_rate": 6.598501281019268e-07, + "loss": 0.7323553562164307, + "step": 5469 + }, + { + "epoch": 1.26036866359447, + "grad_norm": 1.2478057023441294, + "learning_rate": 6.594918865619739e-07, + "loss": 0.8214852809906006, + "step": 5470 + }, + { + "epoch": 1.2605990783410137, + "grad_norm": 1.1743890995374524, + "learning_rate": 6.591336944463223e-07, + "loss": 0.8011265397071838, + "step": 5471 + }, + { + "epoch": 1.2608294930875577, + "grad_norm": 0.9651307194588488, + "learning_rate": 6.587755518069642e-07, + "loss": 0.798862636089325, + "step": 5472 + }, + { + "epoch": 1.2610599078341014, + "grad_norm": 1.1888872240865054, + "learning_rate": 6.58417458695883e-07, + "loss": 0.7231202721595764, + "step": 5473 + }, + { + "epoch": 1.261290322580645, + "grad_norm": 1.25713690411949, + "learning_rate": 6.580594151650551e-07, + "loss": 0.8816685676574707, + "step": 5474 + }, + { + "epoch": 1.261520737327189, + "grad_norm": 1.0218552259688816, + "learning_rate": 6.577014212664509e-07, + "loss": 0.6343427300453186, + "step": 5475 + }, + { + "epoch": 1.2617511520737328, + "grad_norm": 1.2062270864209526, + "learning_rate": 6.573434770520321e-07, + "loss": 0.7785895466804504, + "step": 5476 + }, + { + "epoch": 1.2619815668202765, + "grad_norm": 1.2086458816060426, + "learning_rate": 6.569855825737536e-07, + "loss": 0.7408698797225952, + "step": 5477 + }, + { + "epoch": 1.2622119815668202, + "grad_norm": 1.2755490666336102, + "learning_rate": 6.566277378835643e-07, + "loss": 0.8481286764144897, + "step": 5478 + }, + { + "epoch": 1.262442396313364, + "grad_norm": 1.0772225233745287, + "learning_rate": 6.56269943033404e-07, + "loss": 0.8221831917762756, + "step": 5479 + }, + { + "epoch": 1.262672811059908, + "grad_norm": 1.1202704150930312, + "learning_rate": 6.559121980752065e-07, + "loss": 0.805405855178833, + "step": 5480 + }, + { + "epoch": 1.2629032258064516, + "grad_norm": 1.4925713527432443, + "learning_rate": 6.55554503060898e-07, + "loss": 0.8643565773963928, + "step": 5481 + }, + { + "epoch": 1.2631336405529954, + "grad_norm": 1.038997236699539, + "learning_rate": 6.551968580423973e-07, + "loss": 0.7087225914001465, + "step": 5482 + }, + { + "epoch": 1.263364055299539, + "grad_norm": 1.3080505612178328, + "learning_rate": 6.54839263071616e-07, + "loss": 0.8401756882667542, + "step": 5483 + }, + { + "epoch": 1.2635944700460828, + "grad_norm": 0.974231759030553, + "learning_rate": 6.544817182004589e-07, + "loss": 0.76345294713974, + "step": 5484 + }, + { + "epoch": 1.2638248847926268, + "grad_norm": 0.9975788463971886, + "learning_rate": 6.541242234808228e-07, + "loss": 0.7177271842956543, + "step": 5485 + }, + { + "epoch": 1.2640552995391705, + "grad_norm": 1.0524467641617976, + "learning_rate": 6.537667789645981e-07, + "loss": 0.7436186075210571, + "step": 5486 + }, + { + "epoch": 1.2642857142857142, + "grad_norm": 1.025347292021162, + "learning_rate": 6.53409384703667e-07, + "loss": 0.6526673436164856, + "step": 5487 + }, + { + "epoch": 1.2645161290322582, + "grad_norm": 1.4422505610217646, + "learning_rate": 6.530520407499049e-07, + "loss": 0.879219651222229, + "step": 5488 + }, + { + "epoch": 1.264746543778802, + "grad_norm": 1.1643268817299548, + "learning_rate": 6.526947471551798e-07, + "loss": 0.7005003690719604, + "step": 5489 + }, + { + "epoch": 1.2649769585253456, + "grad_norm": 1.276974659887974, + "learning_rate": 6.523375039713525e-07, + "loss": 0.716349720954895, + "step": 5490 + }, + { + "epoch": 1.2652073732718894, + "grad_norm": 1.307490301718017, + "learning_rate": 6.519803112502758e-07, + "loss": 0.8524413704872131, + "step": 5491 + }, + { + "epoch": 1.265437788018433, + "grad_norm": 1.3886244481055607, + "learning_rate": 6.516231690437966e-07, + "loss": 0.8032857179641724, + "step": 5492 + }, + { + "epoch": 1.265668202764977, + "grad_norm": 1.3026581508138244, + "learning_rate": 6.512660774037531e-07, + "loss": 0.8912144899368286, + "step": 5493 + }, + { + "epoch": 1.2658986175115208, + "grad_norm": 1.1001846572449894, + "learning_rate": 6.509090363819764e-07, + "loss": 0.6526974439620972, + "step": 5494 + }, + { + "epoch": 1.2661290322580645, + "grad_norm": 1.1539964772442708, + "learning_rate": 6.505520460302916e-07, + "loss": 0.7436610460281372, + "step": 5495 + }, + { + "epoch": 1.2663594470046082, + "grad_norm": 1.0590907210895066, + "learning_rate": 6.501951064005145e-07, + "loss": 0.7112951874732971, + "step": 5496 + }, + { + "epoch": 1.266589861751152, + "grad_norm": 1.136772271419419, + "learning_rate": 6.498382175444545e-07, + "loss": 0.6908622980117798, + "step": 5497 + }, + { + "epoch": 1.266820276497696, + "grad_norm": 1.2936126009346398, + "learning_rate": 6.494813795139137e-07, + "loss": 0.8169400691986084, + "step": 5498 + }, + { + "epoch": 1.2670506912442396, + "grad_norm": 1.1611805763062155, + "learning_rate": 6.491245923606868e-07, + "loss": 0.7577871084213257, + "step": 5499 + }, + { + "epoch": 1.2672811059907834, + "grad_norm": 1.2166617406598321, + "learning_rate": 6.487678561365606e-07, + "loss": 0.7470887303352356, + "step": 5500 + }, + { + "epoch": 1.2675115207373273, + "grad_norm": 1.2499100792685887, + "learning_rate": 6.484111708933153e-07, + "loss": 0.7862193584442139, + "step": 5501 + }, + { + "epoch": 1.267741935483871, + "grad_norm": 1.0856856438170979, + "learning_rate": 6.48054536682723e-07, + "loss": 0.6809444427490234, + "step": 5502 + }, + { + "epoch": 1.2679723502304148, + "grad_norm": 1.1883483456973896, + "learning_rate": 6.476979535565486e-07, + "loss": 0.7560738921165466, + "step": 5503 + }, + { + "epoch": 1.2682027649769585, + "grad_norm": 1.060654462751894, + "learning_rate": 6.473414215665501e-07, + "loss": 0.6961003541946411, + "step": 5504 + }, + { + "epoch": 1.2684331797235022, + "grad_norm": 1.1318601167609275, + "learning_rate": 6.469849407644775e-07, + "loss": 0.762688159942627, + "step": 5505 + }, + { + "epoch": 1.2686635944700462, + "grad_norm": 1.3318780914664468, + "learning_rate": 6.46628511202073e-07, + "loss": 0.8735007047653198, + "step": 5506 + }, + { + "epoch": 1.26889400921659, + "grad_norm": 1.2498993266864264, + "learning_rate": 6.462721329310727e-07, + "loss": 0.7127432823181152, + "step": 5507 + }, + { + "epoch": 1.2691244239631336, + "grad_norm": 1.1810894491038926, + "learning_rate": 6.45915806003204e-07, + "loss": 0.7720422744750977, + "step": 5508 + }, + { + "epoch": 1.2693548387096774, + "grad_norm": 1.3742393921911886, + "learning_rate": 6.455595304701871e-07, + "loss": 0.8046890497207642, + "step": 5509 + }, + { + "epoch": 1.269585253456221, + "grad_norm": 1.433035812490825, + "learning_rate": 6.452033063837354e-07, + "loss": 0.8218742609024048, + "step": 5510 + }, + { + "epoch": 1.269815668202765, + "grad_norm": 1.3642640568886157, + "learning_rate": 6.448471337955536e-07, + "loss": 0.912622332572937, + "step": 5511 + }, + { + "epoch": 1.2700460829493088, + "grad_norm": 1.3101181049427244, + "learning_rate": 6.444910127573407e-07, + "loss": 0.7940733432769775, + "step": 5512 + }, + { + "epoch": 1.2702764976958525, + "grad_norm": 1.0982469100789136, + "learning_rate": 6.441349433207864e-07, + "loss": 0.7085565328598022, + "step": 5513 + }, + { + "epoch": 1.2705069124423964, + "grad_norm": 1.241687978637031, + "learning_rate": 6.437789255375739e-07, + "loss": 0.9316935539245605, + "step": 5514 + }, + { + "epoch": 1.2707373271889402, + "grad_norm": 0.9697190322352798, + "learning_rate": 6.43422959459379e-07, + "loss": 0.7412574291229248, + "step": 5515 + }, + { + "epoch": 1.270967741935484, + "grad_norm": 0.9713506680995111, + "learning_rate": 6.430670451378695e-07, + "loss": 0.7476450204849243, + "step": 5516 + }, + { + "epoch": 1.2711981566820276, + "grad_norm": 1.1272976564667934, + "learning_rate": 6.427111826247056e-07, + "loss": 0.8530189990997314, + "step": 5517 + }, + { + "epoch": 1.2714285714285714, + "grad_norm": 1.3163108639601895, + "learning_rate": 6.423553719715406e-07, + "loss": 0.8193017840385437, + "step": 5518 + }, + { + "epoch": 1.2716589861751153, + "grad_norm": 1.002275086425174, + "learning_rate": 6.419996132300203e-07, + "loss": 0.7444974780082703, + "step": 5519 + }, + { + "epoch": 1.271889400921659, + "grad_norm": 1.0214749663440856, + "learning_rate": 6.416439064517818e-07, + "loss": 0.7422837018966675, + "step": 5520 + }, + { + "epoch": 1.2721198156682028, + "grad_norm": 1.2499390785362547, + "learning_rate": 6.412882516884562e-07, + "loss": 1.0155640840530396, + "step": 5521 + }, + { + "epoch": 1.2723502304147465, + "grad_norm": 1.489615968336023, + "learning_rate": 6.409326489916658e-07, + "loss": 0.8097087144851685, + "step": 5522 + }, + { + "epoch": 1.2725806451612902, + "grad_norm": 1.293861875643454, + "learning_rate": 6.405770984130257e-07, + "loss": 0.8545565009117126, + "step": 5523 + }, + { + "epoch": 1.2728110599078342, + "grad_norm": 0.9914622760341439, + "learning_rate": 6.402216000041445e-07, + "loss": 0.6765652298927307, + "step": 5524 + }, + { + "epoch": 1.273041474654378, + "grad_norm": 1.103390848542702, + "learning_rate": 6.398661538166217e-07, + "loss": 0.7964426875114441, + "step": 5525 + }, + { + "epoch": 1.2732718894009216, + "grad_norm": 1.2196724846653912, + "learning_rate": 6.395107599020495e-07, + "loss": 0.7449651956558228, + "step": 5526 + }, + { + "epoch": 1.2735023041474656, + "grad_norm": 1.5614043870867116, + "learning_rate": 6.391554183120138e-07, + "loss": 0.8639888167381287, + "step": 5527 + }, + { + "epoch": 1.2737327188940093, + "grad_norm": 1.046130673497984, + "learning_rate": 6.388001290980914e-07, + "loss": 0.7668901681900024, + "step": 5528 + }, + { + "epoch": 1.273963133640553, + "grad_norm": 1.082923428749424, + "learning_rate": 6.384448923118517e-07, + "loss": 0.6461849212646484, + "step": 5529 + }, + { + "epoch": 1.2741935483870968, + "grad_norm": 1.1539877219125736, + "learning_rate": 6.380897080048576e-07, + "loss": 0.7045707702636719, + "step": 5530 + }, + { + "epoch": 1.2744239631336405, + "grad_norm": 1.1893221959186644, + "learning_rate": 6.377345762286632e-07, + "loss": 0.8303793668746948, + "step": 5531 + }, + { + "epoch": 1.2746543778801844, + "grad_norm": 1.112799220738114, + "learning_rate": 6.373794970348152e-07, + "loss": 0.808259129524231, + "step": 5532 + }, + { + "epoch": 1.2748847926267282, + "grad_norm": 1.527249581557179, + "learning_rate": 6.370244704748535e-07, + "loss": 0.8224689960479736, + "step": 5533 + }, + { + "epoch": 1.2751152073732719, + "grad_norm": 1.4408900318423565, + "learning_rate": 6.366694966003089e-07, + "loss": 0.8559266328811646, + "step": 5534 + }, + { + "epoch": 1.2753456221198156, + "grad_norm": 1.3225808297843282, + "learning_rate": 6.363145754627063e-07, + "loss": 0.7972407341003418, + "step": 5535 + }, + { + "epoch": 1.2755760368663593, + "grad_norm": 0.9700139233174567, + "learning_rate": 6.359597071135618e-07, + "loss": 0.7750328779220581, + "step": 5536 + }, + { + "epoch": 1.2758064516129033, + "grad_norm": 1.3472908531853058, + "learning_rate": 6.356048916043836e-07, + "loss": 0.807072639465332, + "step": 5537 + }, + { + "epoch": 1.276036866359447, + "grad_norm": 1.2153299361350896, + "learning_rate": 6.35250128986673e-07, + "loss": 0.8459323048591614, + "step": 5538 + }, + { + "epoch": 1.2762672811059907, + "grad_norm": 1.1921452547723677, + "learning_rate": 6.348954193119233e-07, + "loss": 0.7874447107315063, + "step": 5539 + }, + { + "epoch": 1.2764976958525347, + "grad_norm": 1.243785118643696, + "learning_rate": 6.345407626316202e-07, + "loss": 0.8817394971847534, + "step": 5540 + }, + { + "epoch": 1.2767281105990782, + "grad_norm": 1.0210963009280363, + "learning_rate": 6.341861589972417e-07, + "loss": 0.7936382293701172, + "step": 5541 + }, + { + "epoch": 1.2769585253456222, + "grad_norm": 1.1288567171733945, + "learning_rate": 6.33831608460258e-07, + "loss": 0.7301348447799683, + "step": 5542 + }, + { + "epoch": 1.2771889400921659, + "grad_norm": 0.9930019172389213, + "learning_rate": 6.334771110721311e-07, + "loss": 0.6546784043312073, + "step": 5543 + }, + { + "epoch": 1.2774193548387096, + "grad_norm": 1.1320345708885517, + "learning_rate": 6.331226668843168e-07, + "loss": 0.798918604850769, + "step": 5544 + }, + { + "epoch": 1.2776497695852536, + "grad_norm": 1.0677491026042323, + "learning_rate": 6.327682759482618e-07, + "loss": 0.6275264620780945, + "step": 5545 + }, + { + "epoch": 1.2778801843317973, + "grad_norm": 1.1056891749814017, + "learning_rate": 6.324139383154048e-07, + "loss": 0.6870732307434082, + "step": 5546 + }, + { + "epoch": 1.278110599078341, + "grad_norm": 1.113302907194177, + "learning_rate": 6.320596540371785e-07, + "loss": 0.8280556201934814, + "step": 5547 + }, + { + "epoch": 1.2783410138248847, + "grad_norm": 1.0958194382001605, + "learning_rate": 6.317054231650063e-07, + "loss": 0.8053648471832275, + "step": 5548 + }, + { + "epoch": 1.2785714285714285, + "grad_norm": 1.1500355966221105, + "learning_rate": 6.313512457503043e-07, + "loss": 0.7628893852233887, + "step": 5549 + }, + { + "epoch": 1.2788018433179724, + "grad_norm": 1.1770420137500979, + "learning_rate": 6.30997121844481e-07, + "loss": 0.8075753450393677, + "step": 5550 + }, + { + "epoch": 1.2790322580645161, + "grad_norm": 1.1420933628102303, + "learning_rate": 6.306430514989371e-07, + "loss": 0.7883275747299194, + "step": 5551 + }, + { + "epoch": 1.2792626728110599, + "grad_norm": 1.238710939895555, + "learning_rate": 6.302890347650648e-07, + "loss": 0.7438768744468689, + "step": 5552 + }, + { + "epoch": 1.2794930875576038, + "grad_norm": 1.261177122589368, + "learning_rate": 6.299350716942501e-07, + "loss": 0.7756023406982422, + "step": 5553 + }, + { + "epoch": 1.2797235023041473, + "grad_norm": 1.0915753285175969, + "learning_rate": 6.295811623378698e-07, + "loss": 0.7128444910049438, + "step": 5554 + }, + { + "epoch": 1.2799539170506913, + "grad_norm": 0.9707581386208312, + "learning_rate": 6.292273067472931e-07, + "loss": 0.7611228823661804, + "step": 5555 + }, + { + "epoch": 1.280184331797235, + "grad_norm": 1.0553125250063393, + "learning_rate": 6.288735049738822e-07, + "loss": 0.7803670167922974, + "step": 5556 + }, + { + "epoch": 1.2804147465437787, + "grad_norm": 1.0703973986821036, + "learning_rate": 6.28519757068991e-07, + "loss": 0.958204448223114, + "step": 5557 + }, + { + "epoch": 1.2806451612903227, + "grad_norm": 1.1879640741186497, + "learning_rate": 6.28166063083965e-07, + "loss": 0.7220249772071838, + "step": 5558 + }, + { + "epoch": 1.2808755760368664, + "grad_norm": 1.4250311227945265, + "learning_rate": 6.278124230701427e-07, + "loss": 0.7396695613861084, + "step": 5559 + }, + { + "epoch": 1.2811059907834101, + "grad_norm": 1.1549531480718158, + "learning_rate": 6.274588370788545e-07, + "loss": 0.819474458694458, + "step": 5560 + }, + { + "epoch": 1.2813364055299539, + "grad_norm": 1.0583859146786307, + "learning_rate": 6.271053051614231e-07, + "loss": 0.6997617483139038, + "step": 5561 + }, + { + "epoch": 1.2815668202764976, + "grad_norm": 1.1462805534929357, + "learning_rate": 6.26751827369163e-07, + "loss": 0.7526183128356934, + "step": 5562 + }, + { + "epoch": 1.2817972350230415, + "grad_norm": 1.3576714493720627, + "learning_rate": 6.263984037533805e-07, + "loss": 0.7185813188552856, + "step": 5563 + }, + { + "epoch": 1.2820276497695853, + "grad_norm": 0.9722151716418193, + "learning_rate": 6.260450343653757e-07, + "loss": 0.7739845514297485, + "step": 5564 + }, + { + "epoch": 1.282258064516129, + "grad_norm": 1.0387058407540612, + "learning_rate": 6.25691719256439e-07, + "loss": 0.698557436466217, + "step": 5565 + }, + { + "epoch": 1.2824884792626727, + "grad_norm": 1.1402265972621366, + "learning_rate": 6.253384584778534e-07, + "loss": 0.6946271657943726, + "step": 5566 + }, + { + "epoch": 1.2827188940092165, + "grad_norm": 1.2349626326096388, + "learning_rate": 6.24985252080895e-07, + "loss": 0.7746025323867798, + "step": 5567 + }, + { + "epoch": 1.2829493087557604, + "grad_norm": 1.050385772264468, + "learning_rate": 6.246321001168306e-07, + "loss": 0.8759660720825195, + "step": 5568 + }, + { + "epoch": 1.2831797235023041, + "grad_norm": 1.1535965526965875, + "learning_rate": 6.2427900263692e-07, + "loss": 0.741111159324646, + "step": 5569 + }, + { + "epoch": 1.2834101382488479, + "grad_norm": 1.2619269860039752, + "learning_rate": 6.239259596924149e-07, + "loss": 0.8580630421638489, + "step": 5570 + }, + { + "epoch": 1.2836405529953918, + "grad_norm": 1.0890841483076914, + "learning_rate": 6.235729713345588e-07, + "loss": 0.7139618992805481, + "step": 5571 + }, + { + "epoch": 1.2838709677419355, + "grad_norm": 1.1260979019373678, + "learning_rate": 6.232200376145873e-07, + "loss": 0.8300976753234863, + "step": 5572 + }, + { + "epoch": 1.2841013824884793, + "grad_norm": 1.091655687939806, + "learning_rate": 6.228671585837288e-07, + "loss": 0.7193114757537842, + "step": 5573 + }, + { + "epoch": 1.284331797235023, + "grad_norm": 1.289214780103651, + "learning_rate": 6.225143342932031e-07, + "loss": 0.8802851438522339, + "step": 5574 + }, + { + "epoch": 1.2845622119815667, + "grad_norm": 1.069264068692084, + "learning_rate": 6.221615647942217e-07, + "loss": 0.749543309211731, + "step": 5575 + }, + { + "epoch": 1.2847926267281107, + "grad_norm": 1.1044047193035296, + "learning_rate": 6.218088501379892e-07, + "loss": 0.703508734703064, + "step": 5576 + }, + { + "epoch": 1.2850230414746544, + "grad_norm": 1.4722305319077136, + "learning_rate": 6.214561903757017e-07, + "loss": 0.7519023418426514, + "step": 5577 + }, + { + "epoch": 1.2852534562211981, + "grad_norm": 1.4130549197431626, + "learning_rate": 6.211035855585466e-07, + "loss": 0.9525241851806641, + "step": 5578 + }, + { + "epoch": 1.2854838709677419, + "grad_norm": 1.3149636986285136, + "learning_rate": 6.207510357377046e-07, + "loss": 0.8288872241973877, + "step": 5579 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.3691241647074333, + "learning_rate": 6.203985409643478e-07, + "loss": 0.8531112670898438, + "step": 5580 + }, + { + "epoch": 1.2859447004608295, + "grad_norm": 1.121519108666965, + "learning_rate": 6.200461012896401e-07, + "loss": 0.7106495499610901, + "step": 5581 + }, + { + "epoch": 1.2861751152073733, + "grad_norm": 1.426451214846877, + "learning_rate": 6.19693716764738e-07, + "loss": 0.714931845664978, + "step": 5582 + }, + { + "epoch": 1.286405529953917, + "grad_norm": 1.3296169647206766, + "learning_rate": 6.19341387440789e-07, + "loss": 0.8281360268592834, + "step": 5583 + }, + { + "epoch": 1.286635944700461, + "grad_norm": 1.4833656768811476, + "learning_rate": 6.189891133689342e-07, + "loss": 0.9155910611152649, + "step": 5584 + }, + { + "epoch": 1.2868663594470047, + "grad_norm": 1.3432683189972507, + "learning_rate": 6.186368946003051e-07, + "loss": 0.7573060989379883, + "step": 5585 + }, + { + "epoch": 1.2870967741935484, + "grad_norm": 1.2055594370265132, + "learning_rate": 6.182847311860255e-07, + "loss": 0.6994235515594482, + "step": 5586 + }, + { + "epoch": 1.2873271889400921, + "grad_norm": 1.0775806715124838, + "learning_rate": 6.179326231772123e-07, + "loss": 0.771092414855957, + "step": 5587 + }, + { + "epoch": 1.2875576036866359, + "grad_norm": 1.269208775599209, + "learning_rate": 6.17580570624973e-07, + "loss": 0.7470684051513672, + "step": 5588 + }, + { + "epoch": 1.2877880184331798, + "grad_norm": 1.5425254092924614, + "learning_rate": 6.172285735804075e-07, + "loss": 0.918886125087738, + "step": 5589 + }, + { + "epoch": 1.2880184331797235, + "grad_norm": 1.0377944178544696, + "learning_rate": 6.16876632094608e-07, + "loss": 0.7232617139816284, + "step": 5590 + }, + { + "epoch": 1.2882488479262673, + "grad_norm": 1.1703799662994099, + "learning_rate": 6.16524746218658e-07, + "loss": 0.7367006540298462, + "step": 5591 + }, + { + "epoch": 1.288479262672811, + "grad_norm": 1.1904508940632728, + "learning_rate": 6.161729160036333e-07, + "loss": 0.8783999681472778, + "step": 5592 + }, + { + "epoch": 1.2887096774193547, + "grad_norm": 1.1869935665885074, + "learning_rate": 6.158211415006019e-07, + "loss": 0.8266523480415344, + "step": 5593 + }, + { + "epoch": 1.2889400921658987, + "grad_norm": 1.1675308279856504, + "learning_rate": 6.154694227606234e-07, + "loss": 0.8528730869293213, + "step": 5594 + }, + { + "epoch": 1.2891705069124424, + "grad_norm": 1.3182250244296418, + "learning_rate": 6.151177598347485e-07, + "loss": 0.7586283683776855, + "step": 5595 + }, + { + "epoch": 1.2894009216589861, + "grad_norm": 1.4182043487427547, + "learning_rate": 6.147661527740217e-07, + "loss": 0.8671954870223999, + "step": 5596 + }, + { + "epoch": 1.28963133640553, + "grad_norm": 1.081063839615246, + "learning_rate": 6.14414601629478e-07, + "loss": 0.7354376316070557, + "step": 5597 + }, + { + "epoch": 1.2898617511520738, + "grad_norm": 1.051384434692424, + "learning_rate": 6.140631064521443e-07, + "loss": 0.8515663146972656, + "step": 5598 + }, + { + "epoch": 1.2900921658986175, + "grad_norm": 1.3608023513745535, + "learning_rate": 6.137116672930395e-07, + "loss": 0.9068351984024048, + "step": 5599 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 1.4956373283031226, + "learning_rate": 6.133602842031752e-07, + "loss": 0.7260826230049133, + "step": 5600 + }, + { + "epoch": 1.290552995391705, + "grad_norm": 1.1400144341772105, + "learning_rate": 6.130089572335535e-07, + "loss": 0.7162504196166992, + "step": 5601 + }, + { + "epoch": 1.290783410138249, + "grad_norm": 1.2203621133034757, + "learning_rate": 6.126576864351695e-07, + "loss": 0.7625414133071899, + "step": 5602 + }, + { + "epoch": 1.2910138248847927, + "grad_norm": 1.0985405517526388, + "learning_rate": 6.123064718590099e-07, + "loss": 0.787274956703186, + "step": 5603 + }, + { + "epoch": 1.2912442396313364, + "grad_norm": 1.0173148522997915, + "learning_rate": 6.119553135560519e-07, + "loss": 0.6539326310157776, + "step": 5604 + }, + { + "epoch": 1.2914746543778801, + "grad_norm": 1.0405810111847797, + "learning_rate": 6.11604211577267e-07, + "loss": 0.8481189012527466, + "step": 5605 + }, + { + "epoch": 1.2917050691244238, + "grad_norm": 1.1908108884253377, + "learning_rate": 6.112531659736164e-07, + "loss": 0.794892430305481, + "step": 5606 + }, + { + "epoch": 1.2919354838709678, + "grad_norm": 1.0728869697567227, + "learning_rate": 6.10902176796054e-07, + "loss": 0.6738630533218384, + "step": 5607 + }, + { + "epoch": 1.2921658986175115, + "grad_norm": 1.2190379429225964, + "learning_rate": 6.105512440955258e-07, + "loss": 0.7220937609672546, + "step": 5608 + }, + { + "epoch": 1.2923963133640552, + "grad_norm": 0.9117229942004119, + "learning_rate": 6.102003679229688e-07, + "loss": 0.6831785440444946, + "step": 5609 + }, + { + "epoch": 1.2926267281105992, + "grad_norm": 1.0925904509799125, + "learning_rate": 6.098495483293125e-07, + "loss": 0.7033277750015259, + "step": 5610 + }, + { + "epoch": 1.292857142857143, + "grad_norm": 0.9024231402190447, + "learning_rate": 6.094987853654779e-07, + "loss": 0.7063429355621338, + "step": 5611 + }, + { + "epoch": 1.2930875576036867, + "grad_norm": 1.1531814321684226, + "learning_rate": 6.091480790823771e-07, + "loss": 0.7791472673416138, + "step": 5612 + }, + { + "epoch": 1.2933179723502304, + "grad_norm": 1.3904591821034944, + "learning_rate": 6.087974295309157e-07, + "loss": 0.8674220442771912, + "step": 5613 + }, + { + "epoch": 1.293548387096774, + "grad_norm": 1.0513898416349883, + "learning_rate": 6.084468367619895e-07, + "loss": 0.7878479957580566, + "step": 5614 + }, + { + "epoch": 1.293778801843318, + "grad_norm": 0.9253694996288483, + "learning_rate": 6.080963008264861e-07, + "loss": 0.7019612789154053, + "step": 5615 + }, + { + "epoch": 1.2940092165898618, + "grad_norm": 1.1163623788947772, + "learning_rate": 6.077458217752863e-07, + "loss": 0.68759685754776, + "step": 5616 + }, + { + "epoch": 1.2942396313364055, + "grad_norm": 1.1326420080908837, + "learning_rate": 6.073953996592612e-07, + "loss": 0.851733922958374, + "step": 5617 + }, + { + "epoch": 1.2944700460829492, + "grad_norm": 1.1539848484030915, + "learning_rate": 6.070450345292739e-07, + "loss": 0.699798047542572, + "step": 5618 + }, + { + "epoch": 1.294700460829493, + "grad_norm": 1.3439745934739915, + "learning_rate": 6.066947264361798e-07, + "loss": 0.8625125885009766, + "step": 5619 + }, + { + "epoch": 1.294930875576037, + "grad_norm": 1.2395704270447963, + "learning_rate": 6.063444754308253e-07, + "loss": 0.759062647819519, + "step": 5620 + }, + { + "epoch": 1.2951612903225806, + "grad_norm": 1.1349706072725887, + "learning_rate": 6.059942815640491e-07, + "loss": 0.7549973726272583, + "step": 5621 + }, + { + "epoch": 1.2953917050691244, + "grad_norm": 1.2217826699562653, + "learning_rate": 6.056441448866816e-07, + "loss": 0.8142743110656738, + "step": 5622 + }, + { + "epoch": 1.2956221198156683, + "grad_norm": 1.0818175637274867, + "learning_rate": 6.052940654495442e-07, + "loss": 0.7881144881248474, + "step": 5623 + }, + { + "epoch": 1.295852534562212, + "grad_norm": 1.2201407031885296, + "learning_rate": 6.049440433034505e-07, + "loss": 0.7922053933143616, + "step": 5624 + }, + { + "epoch": 1.2960829493087558, + "grad_norm": 1.1955381878542082, + "learning_rate": 6.045940784992061e-07, + "loss": 0.6808311939239502, + "step": 5625 + }, + { + "epoch": 1.2963133640552995, + "grad_norm": 1.203534246478074, + "learning_rate": 6.04244171087608e-07, + "loss": 0.933373749256134, + "step": 5626 + }, + { + "epoch": 1.2965437788018432, + "grad_norm": 1.3722573775025653, + "learning_rate": 6.038943211194439e-07, + "loss": 0.8077404499053955, + "step": 5627 + }, + { + "epoch": 1.2967741935483872, + "grad_norm": 1.2263754202708472, + "learning_rate": 6.035445286454953e-07, + "loss": 0.7920867204666138, + "step": 5628 + }, + { + "epoch": 1.297004608294931, + "grad_norm": 1.1574994086499075, + "learning_rate": 6.031947937165335e-07, + "loss": 0.5872117280960083, + "step": 5629 + }, + { + "epoch": 1.2972350230414746, + "grad_norm": 1.2959093642025599, + "learning_rate": 6.02845116383322e-07, + "loss": 0.8593505620956421, + "step": 5630 + }, + { + "epoch": 1.2974654377880184, + "grad_norm": 1.4149025135483138, + "learning_rate": 6.02495496696616e-07, + "loss": 0.8352359533309937, + "step": 5631 + }, + { + "epoch": 1.297695852534562, + "grad_norm": 1.1724909355958724, + "learning_rate": 6.021459347071623e-07, + "loss": 0.7316182255744934, + "step": 5632 + }, + { + "epoch": 1.297926267281106, + "grad_norm": 1.1972298924235394, + "learning_rate": 6.017964304656997e-07, + "loss": 0.7294400334358215, + "step": 5633 + }, + { + "epoch": 1.2981566820276498, + "grad_norm": 1.0769002788322786, + "learning_rate": 6.014469840229581e-07, + "loss": 0.6595947742462158, + "step": 5634 + }, + { + "epoch": 1.2983870967741935, + "grad_norm": 1.308087510592029, + "learning_rate": 6.010975954296587e-07, + "loss": 0.7849195003509521, + "step": 5635 + }, + { + "epoch": 1.2986175115207375, + "grad_norm": 1.0709465804551583, + "learning_rate": 6.007482647365159e-07, + "loss": 0.6915944218635559, + "step": 5636 + }, + { + "epoch": 1.2988479262672812, + "grad_norm": 1.1595852934519908, + "learning_rate": 6.003989919942338e-07, + "loss": 0.6821994781494141, + "step": 5637 + }, + { + "epoch": 1.299078341013825, + "grad_norm": 1.0472078656298618, + "learning_rate": 6.000497772535087e-07, + "loss": 0.7333718538284302, + "step": 5638 + }, + { + "epoch": 1.2993087557603686, + "grad_norm": 1.0656731272596272, + "learning_rate": 5.997006205650292e-07, + "loss": 0.8069280385971069, + "step": 5639 + }, + { + "epoch": 1.2995391705069124, + "grad_norm": 1.0655856429852437, + "learning_rate": 5.993515219794745e-07, + "loss": 0.6989297866821289, + "step": 5640 + }, + { + "epoch": 1.2997695852534563, + "grad_norm": 1.187477589278957, + "learning_rate": 5.990024815475161e-07, + "loss": 0.7784403562545776, + "step": 5641 + }, + { + "epoch": 1.3, + "grad_norm": 1.2512602653388225, + "learning_rate": 5.986534993198168e-07, + "loss": 0.6554181575775146, + "step": 5642 + }, + { + "epoch": 1.3002304147465438, + "grad_norm": 1.298436931300319, + "learning_rate": 5.983045753470307e-07, + "loss": 0.7647836208343506, + "step": 5643 + }, + { + "epoch": 1.3004608294930875, + "grad_norm": 0.9269247679622435, + "learning_rate": 5.979557096798033e-07, + "loss": 0.7787084579467773, + "step": 5644 + }, + { + "epoch": 1.3006912442396312, + "grad_norm": 1.0646184845326898, + "learning_rate": 5.97606902368773e-07, + "loss": 0.6367940902709961, + "step": 5645 + }, + { + "epoch": 1.3009216589861752, + "grad_norm": 1.0481428990706296, + "learning_rate": 5.972581534645679e-07, + "loss": 0.7650243043899536, + "step": 5646 + }, + { + "epoch": 1.301152073732719, + "grad_norm": 0.9452672150266047, + "learning_rate": 5.969094630178084e-07, + "loss": 0.6506018042564392, + "step": 5647 + }, + { + "epoch": 1.3013824884792626, + "grad_norm": 1.4764262273840163, + "learning_rate": 5.965608310791071e-07, + "loss": 0.7351242303848267, + "step": 5648 + }, + { + "epoch": 1.3016129032258066, + "grad_norm": 1.2210251097969258, + "learning_rate": 5.96212257699067e-07, + "loss": 0.7327077984809875, + "step": 5649 + }, + { + "epoch": 1.3018433179723503, + "grad_norm": 1.0681197005600311, + "learning_rate": 5.958637429282831e-07, + "loss": 0.6448171138763428, + "step": 5650 + }, + { + "epoch": 1.302073732718894, + "grad_norm": 1.18574113940407, + "learning_rate": 5.955152868173418e-07, + "loss": 0.8347861766815186, + "step": 5651 + }, + { + "epoch": 1.3023041474654378, + "grad_norm": 1.2733315501094051, + "learning_rate": 5.951668894168215e-07, + "loss": 0.736280620098114, + "step": 5652 + }, + { + "epoch": 1.3025345622119815, + "grad_norm": 1.2627292373923777, + "learning_rate": 5.948185507772908e-07, + "loss": 0.8677594661712646, + "step": 5653 + }, + { + "epoch": 1.3027649769585254, + "grad_norm": 1.1729788728933164, + "learning_rate": 5.944702709493113e-07, + "loss": 0.6598676443099976, + "step": 5654 + }, + { + "epoch": 1.3029953917050692, + "grad_norm": 1.1072155159392119, + "learning_rate": 5.941220499834352e-07, + "loss": 0.7795349359512329, + "step": 5655 + }, + { + "epoch": 1.303225806451613, + "grad_norm": 1.1312979891837796, + "learning_rate": 5.937738879302058e-07, + "loss": 0.6929318904876709, + "step": 5656 + }, + { + "epoch": 1.3034562211981566, + "grad_norm": 1.19931324162024, + "learning_rate": 5.934257848401593e-07, + "loss": 0.859328031539917, + "step": 5657 + }, + { + "epoch": 1.3036866359447004, + "grad_norm": 1.435339518052459, + "learning_rate": 5.930777407638216e-07, + "loss": 1.0015549659729004, + "step": 5658 + }, + { + "epoch": 1.3039170506912443, + "grad_norm": 1.0471647927751007, + "learning_rate": 5.927297557517115e-07, + "loss": 0.6775785088539124, + "step": 5659 + }, + { + "epoch": 1.304147465437788, + "grad_norm": 1.0488503999959857, + "learning_rate": 5.923818298543378e-07, + "loss": 0.7228262424468994, + "step": 5660 + }, + { + "epoch": 1.3043778801843318, + "grad_norm": 0.9177755631443217, + "learning_rate": 5.92033963122202e-07, + "loss": 0.6139897108078003, + "step": 5661 + }, + { + "epoch": 1.3046082949308757, + "grad_norm": 1.062819188029367, + "learning_rate": 5.916861556057965e-07, + "loss": 0.7336323261260986, + "step": 5662 + }, + { + "epoch": 1.3048387096774192, + "grad_norm": 1.1985877666304134, + "learning_rate": 5.913384073556049e-07, + "loss": 0.9223559498786926, + "step": 5663 + }, + { + "epoch": 1.3050691244239632, + "grad_norm": 1.1960311086176088, + "learning_rate": 5.909907184221023e-07, + "loss": 0.7230484485626221, + "step": 5664 + }, + { + "epoch": 1.305299539170507, + "grad_norm": 1.1557586988240278, + "learning_rate": 5.906430888557556e-07, + "loss": 0.753510594367981, + "step": 5665 + }, + { + "epoch": 1.3055299539170506, + "grad_norm": 1.2167084005991546, + "learning_rate": 5.902955187070229e-07, + "loss": 0.8960593938827515, + "step": 5666 + }, + { + "epoch": 1.3057603686635946, + "grad_norm": 0.9226031223011045, + "learning_rate": 5.899480080263527e-07, + "loss": 0.6865993738174438, + "step": 5667 + }, + { + "epoch": 1.3059907834101383, + "grad_norm": 1.2350884878154553, + "learning_rate": 5.896005568641868e-07, + "loss": 0.7748720645904541, + "step": 5668 + }, + { + "epoch": 1.306221198156682, + "grad_norm": 1.437104451012044, + "learning_rate": 5.892531652709567e-07, + "loss": 0.834233283996582, + "step": 5669 + }, + { + "epoch": 1.3064516129032258, + "grad_norm": 1.2209490689427414, + "learning_rate": 5.889058332970858e-07, + "loss": 0.8398417234420776, + "step": 5670 + }, + { + "epoch": 1.3066820276497695, + "grad_norm": 0.8546573405192346, + "learning_rate": 5.885585609929891e-07, + "loss": 0.6889529228210449, + "step": 5671 + }, + { + "epoch": 1.3069124423963134, + "grad_norm": 1.1935289122089947, + "learning_rate": 5.882113484090725e-07, + "loss": 0.6625782251358032, + "step": 5672 + }, + { + "epoch": 1.3071428571428572, + "grad_norm": 1.2286244905882078, + "learning_rate": 5.878641955957334e-07, + "loss": 0.7774407267570496, + "step": 5673 + }, + { + "epoch": 1.307373271889401, + "grad_norm": 1.066003573867245, + "learning_rate": 5.875171026033608e-07, + "loss": 0.7799595594406128, + "step": 5674 + }, + { + "epoch": 1.3076036866359446, + "grad_norm": 1.2859461118878832, + "learning_rate": 5.87170069482335e-07, + "loss": 0.800041913986206, + "step": 5675 + }, + { + "epoch": 1.3078341013824883, + "grad_norm": 1.2986825545894243, + "learning_rate": 5.868230962830265e-07, + "loss": 0.7478667497634888, + "step": 5676 + }, + { + "epoch": 1.3080645161290323, + "grad_norm": 0.9705514903251621, + "learning_rate": 5.86476183055799e-07, + "loss": 0.7538981437683105, + "step": 5677 + }, + { + "epoch": 1.308294930875576, + "grad_norm": 1.4195819337110585, + "learning_rate": 5.861293298510061e-07, + "loss": 0.7556810975074768, + "step": 5678 + }, + { + "epoch": 1.3085253456221198, + "grad_norm": 0.9225289666667563, + "learning_rate": 5.85782536718993e-07, + "loss": 0.670037031173706, + "step": 5679 + }, + { + "epoch": 1.3087557603686637, + "grad_norm": 1.1667524105558311, + "learning_rate": 5.854358037100964e-07, + "loss": 0.6238662600517273, + "step": 5680 + }, + { + "epoch": 1.3089861751152074, + "grad_norm": 1.1817165911107195, + "learning_rate": 5.85089130874644e-07, + "loss": 0.7972823977470398, + "step": 5681 + }, + { + "epoch": 1.3092165898617512, + "grad_norm": 1.0746427307389195, + "learning_rate": 5.847425182629549e-07, + "loss": 0.7332338094711304, + "step": 5682 + }, + { + "epoch": 1.3094470046082949, + "grad_norm": 1.2496997052714673, + "learning_rate": 5.843959659253398e-07, + "loss": 0.8186966180801392, + "step": 5683 + }, + { + "epoch": 1.3096774193548386, + "grad_norm": 1.2708999919485935, + "learning_rate": 5.840494739120996e-07, + "loss": 0.8207032680511475, + "step": 5684 + }, + { + "epoch": 1.3099078341013826, + "grad_norm": 1.4960688490449285, + "learning_rate": 5.83703042273528e-07, + "loss": 0.848265528678894, + "step": 5685 + }, + { + "epoch": 1.3101382488479263, + "grad_norm": 1.0212687278019523, + "learning_rate": 5.833566710599088e-07, + "loss": 0.7766404151916504, + "step": 5686 + }, + { + "epoch": 1.31036866359447, + "grad_norm": 1.2185059104564926, + "learning_rate": 5.830103603215168e-07, + "loss": 0.7570784687995911, + "step": 5687 + }, + { + "epoch": 1.3105990783410137, + "grad_norm": 1.1006353524996257, + "learning_rate": 5.826641101086194e-07, + "loss": 0.7551493644714355, + "step": 5688 + }, + { + "epoch": 1.3108294930875575, + "grad_norm": 1.3664942507199704, + "learning_rate": 5.823179204714739e-07, + "loss": 0.8589804172515869, + "step": 5689 + }, + { + "epoch": 1.3110599078341014, + "grad_norm": 1.2869604696659869, + "learning_rate": 5.819717914603288e-07, + "loss": 0.8252761960029602, + "step": 5690 + }, + { + "epoch": 1.3112903225806452, + "grad_norm": 1.0886628872971145, + "learning_rate": 5.816257231254254e-07, + "loss": 0.7784370183944702, + "step": 5691 + }, + { + "epoch": 1.3115207373271889, + "grad_norm": 1.1343775846575583, + "learning_rate": 5.812797155169942e-07, + "loss": 0.8040215969085693, + "step": 5692 + }, + { + "epoch": 1.3117511520737328, + "grad_norm": 1.013609351306971, + "learning_rate": 5.809337686852582e-07, + "loss": 0.8355100154876709, + "step": 5693 + }, + { + "epoch": 1.3119815668202766, + "grad_norm": 1.466649672488184, + "learning_rate": 5.805878826804303e-07, + "loss": 0.8233312368392944, + "step": 5694 + }, + { + "epoch": 1.3122119815668203, + "grad_norm": 1.1563119764352225, + "learning_rate": 5.802420575527165e-07, + "loss": 0.7756507992744446, + "step": 5695 + }, + { + "epoch": 1.312442396313364, + "grad_norm": 1.1867005828091945, + "learning_rate": 5.798962933523124e-07, + "loss": 0.7503829002380371, + "step": 5696 + }, + { + "epoch": 1.3126728110599077, + "grad_norm": 1.506327103479739, + "learning_rate": 5.795505901294051e-07, + "loss": 0.749663770198822, + "step": 5697 + }, + { + "epoch": 1.3129032258064517, + "grad_norm": 1.440884605575443, + "learning_rate": 5.792049479341732e-07, + "loss": 0.9003115296363831, + "step": 5698 + }, + { + "epoch": 1.3131336405529954, + "grad_norm": 1.059615932759845, + "learning_rate": 5.788593668167854e-07, + "loss": 0.655732274055481, + "step": 5699 + }, + { + "epoch": 1.3133640552995391, + "grad_norm": 0.9900775273356892, + "learning_rate": 5.785138468274036e-07, + "loss": 0.7318822145462036, + "step": 5700 + }, + { + "epoch": 1.3135944700460829, + "grad_norm": 0.9099775921199348, + "learning_rate": 5.781683880161788e-07, + "loss": 0.6512752771377563, + "step": 5701 + }, + { + "epoch": 1.3138248847926266, + "grad_norm": 1.1289875219473309, + "learning_rate": 5.778229904332537e-07, + "loss": 0.7232785820960999, + "step": 5702 + }, + { + "epoch": 1.3140552995391706, + "grad_norm": 1.2645196269426846, + "learning_rate": 5.77477654128763e-07, + "loss": 0.837032675743103, + "step": 5703 + }, + { + "epoch": 1.3142857142857143, + "grad_norm": 1.4984544841183642, + "learning_rate": 5.771323791528315e-07, + "loss": 0.926714301109314, + "step": 5704 + }, + { + "epoch": 1.314516129032258, + "grad_norm": 1.1221666474084682, + "learning_rate": 5.76787165555575e-07, + "loss": 0.7228986620903015, + "step": 5705 + }, + { + "epoch": 1.314746543778802, + "grad_norm": 1.3618848390091767, + "learning_rate": 5.764420133871015e-07, + "loss": 0.8330450057983398, + "step": 5706 + }, + { + "epoch": 1.3149769585253457, + "grad_norm": 1.2680150111326054, + "learning_rate": 5.760969226975088e-07, + "loss": 0.793700098991394, + "step": 5707 + }, + { + "epoch": 1.3152073732718894, + "grad_norm": 1.2897950240071954, + "learning_rate": 5.757518935368868e-07, + "loss": 0.8797321319580078, + "step": 5708 + }, + { + "epoch": 1.3154377880184331, + "grad_norm": 1.1147531221594877, + "learning_rate": 5.754069259553159e-07, + "loss": 0.8772039413452148, + "step": 5709 + }, + { + "epoch": 1.3156682027649769, + "grad_norm": 0.820739065285044, + "learning_rate": 5.750620200028672e-07, + "loss": 0.5998358726501465, + "step": 5710 + }, + { + "epoch": 1.3158986175115208, + "grad_norm": 1.7932534766511148, + "learning_rate": 5.747171757296041e-07, + "loss": 0.7694767713546753, + "step": 5711 + }, + { + "epoch": 1.3161290322580645, + "grad_norm": 1.2782062967169578, + "learning_rate": 5.7437239318558e-07, + "loss": 0.8526760339736938, + "step": 5712 + }, + { + "epoch": 1.3163594470046083, + "grad_norm": 1.199230266468518, + "learning_rate": 5.740276724208396e-07, + "loss": 0.8407987356185913, + "step": 5713 + }, + { + "epoch": 1.316589861751152, + "grad_norm": 1.289466266523787, + "learning_rate": 5.736830134854183e-07, + "loss": 0.9731476306915283, + "step": 5714 + }, + { + "epoch": 1.3168202764976957, + "grad_norm": 1.134122607422213, + "learning_rate": 5.733384164293434e-07, + "loss": 0.7230468988418579, + "step": 5715 + }, + { + "epoch": 1.3170506912442397, + "grad_norm": 1.2031868742095575, + "learning_rate": 5.729938813026327e-07, + "loss": 0.8260238766670227, + "step": 5716 + }, + { + "epoch": 1.3172811059907834, + "grad_norm": 1.0909604007760305, + "learning_rate": 5.726494081552948e-07, + "loss": 0.7616437673568726, + "step": 5717 + }, + { + "epoch": 1.3175115207373271, + "grad_norm": 1.1614064666034054, + "learning_rate": 5.723049970373295e-07, + "loss": 0.7628509998321533, + "step": 5718 + }, + { + "epoch": 1.317741935483871, + "grad_norm": 1.2522299219195512, + "learning_rate": 5.719606479987273e-07, + "loss": 0.744842529296875, + "step": 5719 + }, + { + "epoch": 1.3179723502304148, + "grad_norm": 0.9975745357037148, + "learning_rate": 5.716163610894708e-07, + "loss": 0.7228065133094788, + "step": 5720 + }, + { + "epoch": 1.3182027649769585, + "grad_norm": 1.5461378865588107, + "learning_rate": 5.712721363595325e-07, + "loss": 0.8764907121658325, + "step": 5721 + }, + { + "epoch": 1.3184331797235023, + "grad_norm": 1.0737882176659082, + "learning_rate": 5.709279738588757e-07, + "loss": 0.7966248393058777, + "step": 5722 + }, + { + "epoch": 1.318663594470046, + "grad_norm": 1.4239755183906653, + "learning_rate": 5.705838736374558e-07, + "loss": 0.8983157873153687, + "step": 5723 + }, + { + "epoch": 1.31889400921659, + "grad_norm": 1.1693207378088453, + "learning_rate": 5.70239835745218e-07, + "loss": 0.7349347472190857, + "step": 5724 + }, + { + "epoch": 1.3191244239631337, + "grad_norm": 1.4511397115268243, + "learning_rate": 5.698958602320988e-07, + "loss": 0.9297066926956177, + "step": 5725 + }, + { + "epoch": 1.3193548387096774, + "grad_norm": 1.0721204261694746, + "learning_rate": 5.695519471480266e-07, + "loss": 0.7106038331985474, + "step": 5726 + }, + { + "epoch": 1.3195852534562211, + "grad_norm": 1.3074916303787611, + "learning_rate": 5.692080965429193e-07, + "loss": 0.8759022951126099, + "step": 5727 + }, + { + "epoch": 1.3198156682027649, + "grad_norm": 1.2039841953988952, + "learning_rate": 5.688643084666862e-07, + "loss": 0.8337300419807434, + "step": 5728 + }, + { + "epoch": 1.3200460829493088, + "grad_norm": 1.2975435530580146, + "learning_rate": 5.685205829692283e-07, + "loss": 0.8543391227722168, + "step": 5729 + }, + { + "epoch": 1.3202764976958525, + "grad_norm": 0.9960252179140261, + "learning_rate": 5.681769201004366e-07, + "loss": 0.7497329711914062, + "step": 5730 + }, + { + "epoch": 1.3205069124423963, + "grad_norm": 1.0615580947761494, + "learning_rate": 5.678333199101929e-07, + "loss": 0.8190964460372925, + "step": 5731 + }, + { + "epoch": 1.3207373271889402, + "grad_norm": 1.1486652227224357, + "learning_rate": 5.674897824483711e-07, + "loss": 0.8233011960983276, + "step": 5732 + }, + { + "epoch": 1.320967741935484, + "grad_norm": 1.2086113696285639, + "learning_rate": 5.671463077648348e-07, + "loss": 0.75257408618927, + "step": 5733 + }, + { + "epoch": 1.3211981566820277, + "grad_norm": 1.0357997575051858, + "learning_rate": 5.668028959094386e-07, + "loss": 0.6468796133995056, + "step": 5734 + }, + { + "epoch": 1.3214285714285714, + "grad_norm": 0.869693175338726, + "learning_rate": 5.664595469320288e-07, + "loss": 0.6756174564361572, + "step": 5735 + }, + { + "epoch": 1.3216589861751151, + "grad_norm": 1.2928038093451135, + "learning_rate": 5.661162608824419e-07, + "loss": 0.9040344953536987, + "step": 5736 + }, + { + "epoch": 1.321889400921659, + "grad_norm": 1.013287726627938, + "learning_rate": 5.657730378105055e-07, + "loss": 0.8082150816917419, + "step": 5737 + }, + { + "epoch": 1.3221198156682028, + "grad_norm": 1.2602760490074278, + "learning_rate": 5.654298777660375e-07, + "loss": 0.8760210275650024, + "step": 5738 + }, + { + "epoch": 1.3223502304147465, + "grad_norm": 1.4464070872810626, + "learning_rate": 5.650867807988473e-07, + "loss": 0.6980990171432495, + "step": 5739 + }, + { + "epoch": 1.3225806451612903, + "grad_norm": 0.927469939331727, + "learning_rate": 5.647437469587355e-07, + "loss": 0.6552839279174805, + "step": 5740 + }, + { + "epoch": 1.322811059907834, + "grad_norm": 0.9934566913252004, + "learning_rate": 5.644007762954925e-07, + "loss": 0.8304816484451294, + "step": 5741 + }, + { + "epoch": 1.323041474654378, + "grad_norm": 1.1691146043820817, + "learning_rate": 5.640578688589e-07, + "loss": 0.7977567315101624, + "step": 5742 + }, + { + "epoch": 1.3232718894009217, + "grad_norm": 1.4376891352576404, + "learning_rate": 5.637150246987308e-07, + "loss": 0.7656992673873901, + "step": 5743 + }, + { + "epoch": 1.3235023041474654, + "grad_norm": 1.1120822444951537, + "learning_rate": 5.633722438647483e-07, + "loss": 0.921256422996521, + "step": 5744 + }, + { + "epoch": 1.3237327188940093, + "grad_norm": 1.2718785752085355, + "learning_rate": 5.630295264067063e-07, + "loss": 0.8012785315513611, + "step": 5745 + }, + { + "epoch": 1.323963133640553, + "grad_norm": 1.2403067439539972, + "learning_rate": 5.626868723743504e-07, + "loss": 0.613241970539093, + "step": 5746 + }, + { + "epoch": 1.3241935483870968, + "grad_norm": 1.34086331204533, + "learning_rate": 5.623442818174161e-07, + "loss": 0.7134846448898315, + "step": 5747 + }, + { + "epoch": 1.3244239631336405, + "grad_norm": 1.3127547947642921, + "learning_rate": 5.620017547856295e-07, + "loss": 0.8963242173194885, + "step": 5748 + }, + { + "epoch": 1.3246543778801843, + "grad_norm": 1.3476788930677732, + "learning_rate": 5.616592913287087e-07, + "loss": 0.8401378393173218, + "step": 5749 + }, + { + "epoch": 1.3248847926267282, + "grad_norm": 1.0346861015576712, + "learning_rate": 5.613168914963615e-07, + "loss": 0.6455308198928833, + "step": 5750 + }, + { + "epoch": 1.325115207373272, + "grad_norm": 1.105933895384034, + "learning_rate": 5.609745553382863e-07, + "loss": 0.6920031905174255, + "step": 5751 + }, + { + "epoch": 1.3253456221198157, + "grad_norm": 1.1001754091297298, + "learning_rate": 5.606322829041737e-07, + "loss": 0.9099706411361694, + "step": 5752 + }, + { + "epoch": 1.3255760368663594, + "grad_norm": 1.3286482905641974, + "learning_rate": 5.602900742437036e-07, + "loss": 0.8034265637397766, + "step": 5753 + }, + { + "epoch": 1.3258064516129031, + "grad_norm": 0.9956708814709011, + "learning_rate": 5.599479294065471e-07, + "loss": 0.7216918468475342, + "step": 5754 + }, + { + "epoch": 1.326036866359447, + "grad_norm": 1.1406371859334326, + "learning_rate": 5.596058484423655e-07, + "loss": 0.7428277730941772, + "step": 5755 + }, + { + "epoch": 1.3262672811059908, + "grad_norm": 1.3052741120899958, + "learning_rate": 5.592638314008127e-07, + "loss": 0.7636011838912964, + "step": 5756 + }, + { + "epoch": 1.3264976958525345, + "grad_norm": 1.3474656843000283, + "learning_rate": 5.589218783315311e-07, + "loss": 0.7765215635299683, + "step": 5757 + }, + { + "epoch": 1.3267281105990785, + "grad_norm": 1.0612768168901736, + "learning_rate": 5.585799892841551e-07, + "loss": 0.6524033546447754, + "step": 5758 + }, + { + "epoch": 1.3269585253456222, + "grad_norm": 1.134076107561282, + "learning_rate": 5.582381643083087e-07, + "loss": 0.8105186223983765, + "step": 5759 + }, + { + "epoch": 1.327188940092166, + "grad_norm": 1.2647095323418043, + "learning_rate": 5.578964034536084e-07, + "loss": 0.7654449939727783, + "step": 5760 + }, + { + "epoch": 1.3274193548387097, + "grad_norm": 1.0086580295069412, + "learning_rate": 5.5755470676966e-07, + "loss": 0.6545592546463013, + "step": 5761 + }, + { + "epoch": 1.3276497695852534, + "grad_norm": 1.1744400728961766, + "learning_rate": 5.572130743060597e-07, + "loss": 0.7116275429725647, + "step": 5762 + }, + { + "epoch": 1.3278801843317973, + "grad_norm": 1.246651079531453, + "learning_rate": 5.568715061123959e-07, + "loss": 0.8396822214126587, + "step": 5763 + }, + { + "epoch": 1.328110599078341, + "grad_norm": 1.2492632037634621, + "learning_rate": 5.565300022382464e-07, + "loss": 0.6729685664176941, + "step": 5764 + }, + { + "epoch": 1.3283410138248848, + "grad_norm": 1.1356361065972511, + "learning_rate": 5.561885627331795e-07, + "loss": 0.6891340017318726, + "step": 5765 + }, + { + "epoch": 1.3285714285714285, + "grad_norm": 1.1361488307123824, + "learning_rate": 5.558471876467556e-07, + "loss": 0.7232956886291504, + "step": 5766 + }, + { + "epoch": 1.3288018433179722, + "grad_norm": 1.3213229777584583, + "learning_rate": 5.555058770285246e-07, + "loss": 0.7800660133361816, + "step": 5767 + }, + { + "epoch": 1.3290322580645162, + "grad_norm": 1.106817082140827, + "learning_rate": 5.551646309280266e-07, + "loss": 0.6794005036354065, + "step": 5768 + }, + { + "epoch": 1.32926267281106, + "grad_norm": 1.223898149625345, + "learning_rate": 5.548234493947939e-07, + "loss": 0.7739551067352295, + "step": 5769 + }, + { + "epoch": 1.3294930875576036, + "grad_norm": 1.0605861943491384, + "learning_rate": 5.544823324783482e-07, + "loss": 0.759978711605072, + "step": 5770 + }, + { + "epoch": 1.3297235023041476, + "grad_norm": 0.9593168779270222, + "learning_rate": 5.541412802282017e-07, + "loss": 0.7563333511352539, + "step": 5771 + }, + { + "epoch": 1.3299539170506913, + "grad_norm": 1.2126531853296405, + "learning_rate": 5.538002926938587e-07, + "loss": 0.6705852746963501, + "step": 5772 + }, + { + "epoch": 1.330184331797235, + "grad_norm": 1.4207541622240418, + "learning_rate": 5.534593699248124e-07, + "loss": 0.8343281745910645, + "step": 5773 + }, + { + "epoch": 1.3304147465437788, + "grad_norm": 1.4106880574063376, + "learning_rate": 5.531185119705474e-07, + "loss": 0.7158486843109131, + "step": 5774 + }, + { + "epoch": 1.3306451612903225, + "grad_norm": 1.5132468140839932, + "learning_rate": 5.527777188805385e-07, + "loss": 0.8888766765594482, + "step": 5775 + }, + { + "epoch": 1.3308755760368665, + "grad_norm": 1.0939731159249404, + "learning_rate": 5.524369907042519e-07, + "loss": 0.873813271522522, + "step": 5776 + }, + { + "epoch": 1.3311059907834102, + "grad_norm": 1.1685988919933143, + "learning_rate": 5.520963274911437e-07, + "loss": 0.7654919624328613, + "step": 5777 + }, + { + "epoch": 1.331336405529954, + "grad_norm": 0.8790821291361985, + "learning_rate": 5.517557292906606e-07, + "loss": 0.6976190805435181, + "step": 5778 + }, + { + "epoch": 1.3315668202764976, + "grad_norm": 1.0920428500423505, + "learning_rate": 5.5141519615224e-07, + "loss": 0.8356388807296753, + "step": 5779 + }, + { + "epoch": 1.3317972350230414, + "grad_norm": 1.1881219631842852, + "learning_rate": 5.510747281253094e-07, + "loss": 0.719998836517334, + "step": 5780 + }, + { + "epoch": 1.3320276497695853, + "grad_norm": 1.4093262324111957, + "learning_rate": 5.507343252592882e-07, + "loss": 0.8432124853134155, + "step": 5781 + }, + { + "epoch": 1.332258064516129, + "grad_norm": 1.2484869478133402, + "learning_rate": 5.503939876035845e-07, + "loss": 0.8426402807235718, + "step": 5782 + }, + { + "epoch": 1.3324884792626728, + "grad_norm": 1.1843136616988323, + "learning_rate": 5.500537152075986e-07, + "loss": 0.8133292198181152, + "step": 5783 + }, + { + "epoch": 1.3327188940092167, + "grad_norm": 1.2203561788081314, + "learning_rate": 5.497135081207205e-07, + "loss": 0.8097467422485352, + "step": 5784 + }, + { + "epoch": 1.3329493087557602, + "grad_norm": 0.9964838781032684, + "learning_rate": 5.493733663923299e-07, + "loss": 0.6943382024765015, + "step": 5785 + }, + { + "epoch": 1.3331797235023042, + "grad_norm": 0.8958647048569846, + "learning_rate": 5.490332900717993e-07, + "loss": 0.5896245837211609, + "step": 5786 + }, + { + "epoch": 1.333410138248848, + "grad_norm": 1.2066217319918868, + "learning_rate": 5.486932792084895e-07, + "loss": 0.6837725639343262, + "step": 5787 + }, + { + "epoch": 1.3336405529953916, + "grad_norm": 1.3459237431541746, + "learning_rate": 5.483533338517523e-07, + "loss": 0.8371915221214294, + "step": 5788 + }, + { + "epoch": 1.3338709677419356, + "grad_norm": 1.1649657355507903, + "learning_rate": 5.480134540509313e-07, + "loss": 0.8001077175140381, + "step": 5789 + }, + { + "epoch": 1.3341013824884793, + "grad_norm": 1.4458040399946648, + "learning_rate": 5.476736398553591e-07, + "loss": 0.9070717096328735, + "step": 5790 + }, + { + "epoch": 1.334331797235023, + "grad_norm": 1.256932465914866, + "learning_rate": 5.473338913143589e-07, + "loss": 0.9061849117279053, + "step": 5791 + }, + { + "epoch": 1.3345622119815668, + "grad_norm": 1.2993559451282939, + "learning_rate": 5.469942084772454e-07, + "loss": 0.8465786576271057, + "step": 5792 + }, + { + "epoch": 1.3347926267281105, + "grad_norm": 1.2333173266054418, + "learning_rate": 5.466545913933229e-07, + "loss": 0.8221259117126465, + "step": 5793 + }, + { + "epoch": 1.3350230414746544, + "grad_norm": 1.1214356414954587, + "learning_rate": 5.463150401118864e-07, + "loss": 0.594088077545166, + "step": 5794 + }, + { + "epoch": 1.3352534562211982, + "grad_norm": 1.0900215939620008, + "learning_rate": 5.459755546822207e-07, + "loss": 0.6983529925346375, + "step": 5795 + }, + { + "epoch": 1.335483870967742, + "grad_norm": 1.3561793320800521, + "learning_rate": 5.456361351536027e-07, + "loss": 0.7720709443092346, + "step": 5796 + }, + { + "epoch": 1.3357142857142856, + "grad_norm": 1.1798730390657586, + "learning_rate": 5.45296781575298e-07, + "loss": 0.8087977766990662, + "step": 5797 + }, + { + "epoch": 1.3359447004608294, + "grad_norm": 1.123982224882924, + "learning_rate": 5.449574939965636e-07, + "loss": 0.6808000802993774, + "step": 5798 + }, + { + "epoch": 1.3361751152073733, + "grad_norm": 1.0634688756756818, + "learning_rate": 5.446182724666466e-07, + "loss": 0.7222881317138672, + "step": 5799 + }, + { + "epoch": 1.336405529953917, + "grad_norm": 1.1919279054292256, + "learning_rate": 5.44279117034784e-07, + "loss": 0.872687578201294, + "step": 5800 + }, + { + "epoch": 1.3366359447004608, + "grad_norm": 1.3648460603559815, + "learning_rate": 5.439400277502048e-07, + "loss": 0.7728114128112793, + "step": 5801 + }, + { + "epoch": 1.3368663594470047, + "grad_norm": 1.0445795610107291, + "learning_rate": 5.436010046621267e-07, + "loss": 0.807528018951416, + "step": 5802 + }, + { + "epoch": 1.3370967741935484, + "grad_norm": 1.151575262421327, + "learning_rate": 5.432620478197583e-07, + "loss": 0.6997063159942627, + "step": 5803 + }, + { + "epoch": 1.3373271889400922, + "grad_norm": 1.309125931504039, + "learning_rate": 5.429231572722995e-07, + "loss": 0.797568678855896, + "step": 5804 + }, + { + "epoch": 1.337557603686636, + "grad_norm": 1.0057472643211554, + "learning_rate": 5.425843330689386e-07, + "loss": 0.6412359476089478, + "step": 5805 + }, + { + "epoch": 1.3377880184331796, + "grad_norm": 1.1290308654729904, + "learning_rate": 5.422455752588569e-07, + "loss": 0.8605507612228394, + "step": 5806 + }, + { + "epoch": 1.3380184331797236, + "grad_norm": 1.0459165137008808, + "learning_rate": 5.419068838912238e-07, + "loss": 0.856192946434021, + "step": 5807 + }, + { + "epoch": 1.3382488479262673, + "grad_norm": 1.1354202830657318, + "learning_rate": 5.415682590151998e-07, + "loss": 0.8614650368690491, + "step": 5808 + }, + { + "epoch": 1.338479262672811, + "grad_norm": 1.6619444336877072, + "learning_rate": 5.412297006799365e-07, + "loss": 0.9675840139389038, + "step": 5809 + }, + { + "epoch": 1.3387096774193548, + "grad_norm": 1.0659647985310448, + "learning_rate": 5.408912089345747e-07, + "loss": 0.7333405017852783, + "step": 5810 + }, + { + "epoch": 1.3389400921658985, + "grad_norm": 1.3540380425785927, + "learning_rate": 5.405527838282457e-07, + "loss": 0.8271909952163696, + "step": 5811 + }, + { + "epoch": 1.3391705069124424, + "grad_norm": 1.3562249096622705, + "learning_rate": 5.402144254100724e-07, + "loss": 0.8036069869995117, + "step": 5812 + }, + { + "epoch": 1.3394009216589862, + "grad_norm": 1.3975514954701582, + "learning_rate": 5.398761337291667e-07, + "loss": 0.855912446975708, + "step": 5813 + }, + { + "epoch": 1.33963133640553, + "grad_norm": 1.2830545749549949, + "learning_rate": 5.395379088346309e-07, + "loss": 0.8198536038398743, + "step": 5814 + }, + { + "epoch": 1.3398617511520738, + "grad_norm": 1.2130249913700057, + "learning_rate": 5.391997507755581e-07, + "loss": 0.8931646347045898, + "step": 5815 + }, + { + "epoch": 1.3400921658986176, + "grad_norm": 0.9981529734228639, + "learning_rate": 5.388616596010312e-07, + "loss": 0.7073954343795776, + "step": 5816 + }, + { + "epoch": 1.3403225806451613, + "grad_norm": 1.2450035085912274, + "learning_rate": 5.385236353601241e-07, + "loss": 0.7758424282073975, + "step": 5817 + }, + { + "epoch": 1.340552995391705, + "grad_norm": 1.1707291956273551, + "learning_rate": 5.381856781019005e-07, + "loss": 0.6805497407913208, + "step": 5818 + }, + { + "epoch": 1.3407834101382488, + "grad_norm": 1.251540768140409, + "learning_rate": 5.378477878754144e-07, + "loss": 0.8956538438796997, + "step": 5819 + }, + { + "epoch": 1.3410138248847927, + "grad_norm": 1.0594639846580987, + "learning_rate": 5.375099647297096e-07, + "loss": 0.7819657921791077, + "step": 5820 + }, + { + "epoch": 1.3412442396313364, + "grad_norm": 1.0523114055014655, + "learning_rate": 5.371722087138217e-07, + "loss": 0.5764007568359375, + "step": 5821 + }, + { + "epoch": 1.3414746543778802, + "grad_norm": 1.1661401559135987, + "learning_rate": 5.368345198767749e-07, + "loss": 0.697022557258606, + "step": 5822 + }, + { + "epoch": 1.3417050691244239, + "grad_norm": 1.3065346909259405, + "learning_rate": 5.364968982675839e-07, + "loss": 0.7773014307022095, + "step": 5823 + }, + { + "epoch": 1.3419354838709676, + "grad_norm": 1.3340944249973796, + "learning_rate": 5.361593439352551e-07, + "loss": 0.7395004034042358, + "step": 5824 + }, + { + "epoch": 1.3421658986175116, + "grad_norm": 1.0762295080363014, + "learning_rate": 5.358218569287834e-07, + "loss": 0.7989716529846191, + "step": 5825 + }, + { + "epoch": 1.3423963133640553, + "grad_norm": 1.280549478612159, + "learning_rate": 5.354844372971543e-07, + "loss": 0.8894884586334229, + "step": 5826 + }, + { + "epoch": 1.342626728110599, + "grad_norm": 1.5586577225053506, + "learning_rate": 5.351470850893446e-07, + "loss": 0.8415021300315857, + "step": 5827 + }, + { + "epoch": 1.342857142857143, + "grad_norm": 1.4272313895508615, + "learning_rate": 5.3480980035432e-07, + "loss": 0.9963078498840332, + "step": 5828 + }, + { + "epoch": 1.3430875576036867, + "grad_norm": 1.1680739887228044, + "learning_rate": 5.344725831410368e-07, + "loss": 0.8489943742752075, + "step": 5829 + }, + { + "epoch": 1.3433179723502304, + "grad_norm": 0.9897462108554296, + "learning_rate": 5.341354334984422e-07, + "loss": 0.6949954032897949, + "step": 5830 + }, + { + "epoch": 1.3435483870967742, + "grad_norm": 1.1225897948987795, + "learning_rate": 5.337983514754722e-07, + "loss": 0.878408670425415, + "step": 5831 + }, + { + "epoch": 1.3437788018433179, + "grad_norm": 1.2067617593706235, + "learning_rate": 5.334613371210549e-07, + "loss": 0.722877025604248, + "step": 5832 + }, + { + "epoch": 1.3440092165898618, + "grad_norm": 1.04123853110292, + "learning_rate": 5.331243904841068e-07, + "loss": 0.670013427734375, + "step": 5833 + }, + { + "epoch": 1.3442396313364056, + "grad_norm": 1.0789084686611892, + "learning_rate": 5.327875116135354e-07, + "loss": 0.8336968421936035, + "step": 5834 + }, + { + "epoch": 1.3444700460829493, + "grad_norm": 1.2348261826059375, + "learning_rate": 5.324507005582381e-07, + "loss": 0.7917020916938782, + "step": 5835 + }, + { + "epoch": 1.344700460829493, + "grad_norm": 1.288528901659057, + "learning_rate": 5.321139573671024e-07, + "loss": 0.7479217052459717, + "step": 5836 + }, + { + "epoch": 1.3449308755760367, + "grad_norm": 1.206901718846971, + "learning_rate": 5.317772820890068e-07, + "loss": 0.8059084415435791, + "step": 5837 + }, + { + "epoch": 1.3451612903225807, + "grad_norm": 1.0687058344207596, + "learning_rate": 5.314406747728186e-07, + "loss": 0.6853187680244446, + "step": 5838 + }, + { + "epoch": 1.3453917050691244, + "grad_norm": 1.2007310227541288, + "learning_rate": 5.311041354673964e-07, + "loss": 0.7769491672515869, + "step": 5839 + }, + { + "epoch": 1.3456221198156681, + "grad_norm": 1.007121872066712, + "learning_rate": 5.307676642215877e-07, + "loss": 0.6669384241104126, + "step": 5840 + }, + { + "epoch": 1.345852534562212, + "grad_norm": 1.091111253411437, + "learning_rate": 5.304312610842319e-07, + "loss": 0.7884945869445801, + "step": 5841 + }, + { + "epoch": 1.3460829493087558, + "grad_norm": 1.2799296704263758, + "learning_rate": 5.300949261041567e-07, + "loss": 0.8030047416687012, + "step": 5842 + }, + { + "epoch": 1.3463133640552996, + "grad_norm": 1.293856241707333, + "learning_rate": 5.297586593301806e-07, + "loss": 0.7792675495147705, + "step": 5843 + }, + { + "epoch": 1.3465437788018433, + "grad_norm": 1.450964712660266, + "learning_rate": 5.29422460811113e-07, + "loss": 0.8699119091033936, + "step": 5844 + }, + { + "epoch": 1.346774193548387, + "grad_norm": 1.1164478098944863, + "learning_rate": 5.290863305957523e-07, + "loss": 0.8075394630432129, + "step": 5845 + }, + { + "epoch": 1.347004608294931, + "grad_norm": 1.2025668698948455, + "learning_rate": 5.287502687328868e-07, + "loss": 0.7875077128410339, + "step": 5846 + }, + { + "epoch": 1.3472350230414747, + "grad_norm": 1.2743475952279586, + "learning_rate": 5.284142752712965e-07, + "loss": 0.6799413561820984, + "step": 5847 + }, + { + "epoch": 1.3474654377880184, + "grad_norm": 1.3570475044053845, + "learning_rate": 5.280783502597496e-07, + "loss": 0.914801299571991, + "step": 5848 + }, + { + "epoch": 1.3476958525345621, + "grad_norm": 1.4096481978785727, + "learning_rate": 5.277424937470052e-07, + "loss": 0.8591992855072021, + "step": 5849 + }, + { + "epoch": 1.3479262672811059, + "grad_norm": 1.1971358109064123, + "learning_rate": 5.27406705781813e-07, + "loss": 0.7830478549003601, + "step": 5850 + }, + { + "epoch": 1.3481566820276498, + "grad_norm": 1.397466179292115, + "learning_rate": 5.270709864129119e-07, + "loss": 0.8365499973297119, + "step": 5851 + }, + { + "epoch": 1.3483870967741935, + "grad_norm": 1.5417932199175834, + "learning_rate": 5.267353356890305e-07, + "loss": 0.8342669010162354, + "step": 5852 + }, + { + "epoch": 1.3486175115207373, + "grad_norm": 1.0532947941417055, + "learning_rate": 5.263997536588891e-07, + "loss": 0.7802393436431885, + "step": 5853 + }, + { + "epoch": 1.3488479262672812, + "grad_norm": 1.2005511445865484, + "learning_rate": 5.260642403711964e-07, + "loss": 0.8245328068733215, + "step": 5854 + }, + { + "epoch": 1.349078341013825, + "grad_norm": 1.043405656704728, + "learning_rate": 5.257287958746519e-07, + "loss": 0.7209265232086182, + "step": 5855 + }, + { + "epoch": 1.3493087557603687, + "grad_norm": 1.254105643009189, + "learning_rate": 5.253934202179444e-07, + "loss": 0.9258058071136475, + "step": 5856 + }, + { + "epoch": 1.3495391705069124, + "grad_norm": 1.3493584028342165, + "learning_rate": 5.25058113449754e-07, + "loss": 0.6889467835426331, + "step": 5857 + }, + { + "epoch": 1.3497695852534561, + "grad_norm": 1.113027412487739, + "learning_rate": 5.247228756187498e-07, + "loss": 0.8810057640075684, + "step": 5858 + }, + { + "epoch": 1.35, + "grad_norm": 1.140989478824924, + "learning_rate": 5.243877067735909e-07, + "loss": 0.7236393690109253, + "step": 5859 + }, + { + "epoch": 1.3502304147465438, + "grad_norm": 1.1712872152312954, + "learning_rate": 5.240526069629264e-07, + "loss": 0.8287979364395142, + "step": 5860 + }, + { + "epoch": 1.3504608294930875, + "grad_norm": 0.9764543402246563, + "learning_rate": 5.237175762353964e-07, + "loss": 0.8268846869468689, + "step": 5861 + }, + { + "epoch": 1.3506912442396313, + "grad_norm": 1.08770217121451, + "learning_rate": 5.233826146396296e-07, + "loss": 0.7995575666427612, + "step": 5862 + }, + { + "epoch": 1.350921658986175, + "grad_norm": 1.185939350431103, + "learning_rate": 5.230477222242449e-07, + "loss": 0.7379493713378906, + "step": 5863 + }, + { + "epoch": 1.351152073732719, + "grad_norm": 1.1532350043824988, + "learning_rate": 5.227128990378524e-07, + "loss": 0.729906439781189, + "step": 5864 + }, + { + "epoch": 1.3513824884792627, + "grad_norm": 1.3775772205538213, + "learning_rate": 5.223781451290506e-07, + "loss": 0.8356789350509644, + "step": 5865 + }, + { + "epoch": 1.3516129032258064, + "grad_norm": 1.4707388081384496, + "learning_rate": 5.220434605464285e-07, + "loss": 0.8130582571029663, + "step": 5866 + }, + { + "epoch": 1.3518433179723504, + "grad_norm": 1.3840431554185126, + "learning_rate": 5.217088453385658e-07, + "loss": 0.7686447501182556, + "step": 5867 + }, + { + "epoch": 1.352073732718894, + "grad_norm": 1.4824685151456765, + "learning_rate": 5.213742995540309e-07, + "loss": 0.7945844531059265, + "step": 5868 + }, + { + "epoch": 1.3523041474654378, + "grad_norm": 0.9715413572597766, + "learning_rate": 5.210398232413824e-07, + "loss": 0.8082837462425232, + "step": 5869 + }, + { + "epoch": 1.3525345622119815, + "grad_norm": 1.2398246007417328, + "learning_rate": 5.2070541644917e-07, + "loss": 0.7826153039932251, + "step": 5870 + }, + { + "epoch": 1.3527649769585253, + "grad_norm": 1.2471684178108737, + "learning_rate": 5.203710792259318e-07, + "loss": 0.6853276491165161, + "step": 5871 + }, + { + "epoch": 1.3529953917050692, + "grad_norm": 1.2891891865978977, + "learning_rate": 5.200368116201962e-07, + "loss": 0.8354780673980713, + "step": 5872 + }, + { + "epoch": 1.353225806451613, + "grad_norm": 1.1178862343459024, + "learning_rate": 5.197026136804823e-07, + "loss": 0.7857648134231567, + "step": 5873 + }, + { + "epoch": 1.3534562211981567, + "grad_norm": 0.9168225851850988, + "learning_rate": 5.193684854552982e-07, + "loss": 0.663504958152771, + "step": 5874 + }, + { + "epoch": 1.3536866359447004, + "grad_norm": 1.329771615602396, + "learning_rate": 5.190344269931423e-07, + "loss": 0.8192203044891357, + "step": 5875 + }, + { + "epoch": 1.3539170506912441, + "grad_norm": 1.4861685476717017, + "learning_rate": 5.187004383425024e-07, + "loss": 0.801753044128418, + "step": 5876 + }, + { + "epoch": 1.354147465437788, + "grad_norm": 1.3551621393598028, + "learning_rate": 5.183665195518566e-07, + "loss": 0.9427206516265869, + "step": 5877 + }, + { + "epoch": 1.3543778801843318, + "grad_norm": 1.1121835630605517, + "learning_rate": 5.18032670669673e-07, + "loss": 0.7801729440689087, + "step": 5878 + }, + { + "epoch": 1.3546082949308755, + "grad_norm": 1.3936797390586833, + "learning_rate": 5.176988917444094e-07, + "loss": 0.8224533796310425, + "step": 5879 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.9505008459531469, + "learning_rate": 5.173651828245127e-07, + "loss": 0.7800098657608032, + "step": 5880 + }, + { + "epoch": 1.3550691244239632, + "grad_norm": 0.9654380749861797, + "learning_rate": 5.170315439584212e-07, + "loss": 0.7612746953964233, + "step": 5881 + }, + { + "epoch": 1.355299539170507, + "grad_norm": 1.191616140078335, + "learning_rate": 5.166979751945617e-07, + "loss": 0.8027492761611938, + "step": 5882 + }, + { + "epoch": 1.3555299539170507, + "grad_norm": 1.167147993456773, + "learning_rate": 5.163644765813508e-07, + "loss": 0.7509280443191528, + "step": 5883 + }, + { + "epoch": 1.3557603686635944, + "grad_norm": 1.2102231125675782, + "learning_rate": 5.160310481671966e-07, + "loss": 0.7663145661354065, + "step": 5884 + }, + { + "epoch": 1.3559907834101383, + "grad_norm": 1.246862901799125, + "learning_rate": 5.156976900004948e-07, + "loss": 0.7598870396614075, + "step": 5885 + }, + { + "epoch": 1.356221198156682, + "grad_norm": 1.127184650819857, + "learning_rate": 5.153644021296317e-07, + "loss": 0.7923038005828857, + "step": 5886 + }, + { + "epoch": 1.3564516129032258, + "grad_norm": 1.2664053097126295, + "learning_rate": 5.150311846029846e-07, + "loss": 0.8711799383163452, + "step": 5887 + }, + { + "epoch": 1.3566820276497695, + "grad_norm": 1.294570667250746, + "learning_rate": 5.146980374689191e-07, + "loss": 0.7852096557617188, + "step": 5888 + }, + { + "epoch": 1.3569124423963133, + "grad_norm": 1.1426360408928755, + "learning_rate": 5.143649607757905e-07, + "loss": 0.7259876132011414, + "step": 5889 + }, + { + "epoch": 1.3571428571428572, + "grad_norm": 0.9810253925795782, + "learning_rate": 5.140319545719454e-07, + "loss": 0.7612321376800537, + "step": 5890 + }, + { + "epoch": 1.357373271889401, + "grad_norm": 1.2919477789807814, + "learning_rate": 5.136990189057187e-07, + "loss": 0.7881298661231995, + "step": 5891 + }, + { + "epoch": 1.3576036866359447, + "grad_norm": 1.0310706760740191, + "learning_rate": 5.133661538254353e-07, + "loss": 0.6956340074539185, + "step": 5892 + }, + { + "epoch": 1.3578341013824886, + "grad_norm": 1.0277045355993415, + "learning_rate": 5.130333593794107e-07, + "loss": 0.7800698280334473, + "step": 5893 + }, + { + "epoch": 1.3580645161290323, + "grad_norm": 1.0373100274796343, + "learning_rate": 5.127006356159496e-07, + "loss": 0.6920318603515625, + "step": 5894 + }, + { + "epoch": 1.358294930875576, + "grad_norm": 0.9870224446835288, + "learning_rate": 5.123679825833458e-07, + "loss": 0.6972872018814087, + "step": 5895 + }, + { + "epoch": 1.3585253456221198, + "grad_norm": 1.1473583592012562, + "learning_rate": 5.12035400329884e-07, + "loss": 0.8820276260375977, + "step": 5896 + }, + { + "epoch": 1.3587557603686635, + "grad_norm": 1.1566218274104645, + "learning_rate": 5.117028889038375e-07, + "loss": 0.8834109306335449, + "step": 5897 + }, + { + "epoch": 1.3589861751152075, + "grad_norm": 1.1393544418506285, + "learning_rate": 5.113704483534704e-07, + "loss": 0.6981096267700195, + "step": 5898 + }, + { + "epoch": 1.3592165898617512, + "grad_norm": 1.329102048560067, + "learning_rate": 5.11038078727036e-07, + "loss": 0.7617249488830566, + "step": 5899 + }, + { + "epoch": 1.359447004608295, + "grad_norm": 1.7116569149164136, + "learning_rate": 5.107057800727773e-07, + "loss": 0.8373798131942749, + "step": 5900 + }, + { + "epoch": 1.3596774193548387, + "grad_norm": 1.3064563550321244, + "learning_rate": 5.103735524389264e-07, + "loss": 0.7176666855812073, + "step": 5901 + }, + { + "epoch": 1.3599078341013824, + "grad_norm": 0.9003342699900779, + "learning_rate": 5.100413958737067e-07, + "loss": 0.7872966527938843, + "step": 5902 + }, + { + "epoch": 1.3601382488479263, + "grad_norm": 1.1723157653802474, + "learning_rate": 5.097093104253295e-07, + "loss": 0.6668897271156311, + "step": 5903 + }, + { + "epoch": 1.36036866359447, + "grad_norm": 1.2119302484042467, + "learning_rate": 5.093772961419967e-07, + "loss": 0.8413408994674683, + "step": 5904 + }, + { + "epoch": 1.3605990783410138, + "grad_norm": 0.9857990015136971, + "learning_rate": 5.090453530719e-07, + "loss": 0.632825493812561, + "step": 5905 + }, + { + "epoch": 1.3608294930875577, + "grad_norm": 1.2378128322555926, + "learning_rate": 5.087134812632201e-07, + "loss": 0.737346887588501, + "step": 5906 + }, + { + "epoch": 1.3610599078341012, + "grad_norm": 1.2614434601380542, + "learning_rate": 5.083816807641283e-07, + "loss": 1.00008225440979, + "step": 5907 + }, + { + "epoch": 1.3612903225806452, + "grad_norm": 1.2168755174090398, + "learning_rate": 5.08049951622785e-07, + "loss": 0.7844079732894897, + "step": 5908 + }, + { + "epoch": 1.361520737327189, + "grad_norm": 1.0532681425474226, + "learning_rate": 5.077182938873393e-07, + "loss": 0.8615080118179321, + "step": 5909 + }, + { + "epoch": 1.3617511520737327, + "grad_norm": 1.279562028421048, + "learning_rate": 5.073867076059321e-07, + "loss": 0.6930621862411499, + "step": 5910 + }, + { + "epoch": 1.3619815668202766, + "grad_norm": 1.4077453728560791, + "learning_rate": 5.07055192826692e-07, + "loss": 0.7020307183265686, + "step": 5911 + }, + { + "epoch": 1.3622119815668203, + "grad_norm": 1.4403791813866107, + "learning_rate": 5.067237495977379e-07, + "loss": 0.7281042337417603, + "step": 5912 + }, + { + "epoch": 1.362442396313364, + "grad_norm": 1.014203177200115, + "learning_rate": 5.063923779671789e-07, + "loss": 0.8092719316482544, + "step": 5913 + }, + { + "epoch": 1.3626728110599078, + "grad_norm": 1.2597384594296865, + "learning_rate": 5.060610779831125e-07, + "loss": 0.7323317527770996, + "step": 5914 + }, + { + "epoch": 1.3629032258064515, + "grad_norm": 0.9804861205409557, + "learning_rate": 5.05729849693627e-07, + "loss": 0.7370069622993469, + "step": 5915 + }, + { + "epoch": 1.3631336405529955, + "grad_norm": 1.1355071333670705, + "learning_rate": 5.053986931467994e-07, + "loss": 0.7175320386886597, + "step": 5916 + }, + { + "epoch": 1.3633640552995392, + "grad_norm": 1.2834592450306632, + "learning_rate": 5.050676083906964e-07, + "loss": 0.8643501996994019, + "step": 5917 + }, + { + "epoch": 1.363594470046083, + "grad_norm": 1.2479698704612106, + "learning_rate": 5.047365954733752e-07, + "loss": 0.9110950827598572, + "step": 5918 + }, + { + "epoch": 1.3638248847926266, + "grad_norm": 1.6104451195946936, + "learning_rate": 5.044056544428814e-07, + "loss": 0.9242197275161743, + "step": 5919 + }, + { + "epoch": 1.3640552995391704, + "grad_norm": 1.2769108446030992, + "learning_rate": 5.040747853472509e-07, + "loss": 0.9218860864639282, + "step": 5920 + }, + { + "epoch": 1.3642857142857143, + "grad_norm": 1.3302527755174611, + "learning_rate": 5.037439882345084e-07, + "loss": 0.970054030418396, + "step": 5921 + }, + { + "epoch": 1.364516129032258, + "grad_norm": 1.0075132364725619, + "learning_rate": 5.034132631526695e-07, + "loss": 0.7707182168960571, + "step": 5922 + }, + { + "epoch": 1.3647465437788018, + "grad_norm": 1.1036594577594991, + "learning_rate": 5.03082610149738e-07, + "loss": 0.7673811912536621, + "step": 5923 + }, + { + "epoch": 1.3649769585253457, + "grad_norm": 1.2758650519526258, + "learning_rate": 5.027520292737073e-07, + "loss": 0.7387198209762573, + "step": 5924 + }, + { + "epoch": 1.3652073732718895, + "grad_norm": 1.139448521744241, + "learning_rate": 5.024215205725619e-07, + "loss": 0.7803019881248474, + "step": 5925 + }, + { + "epoch": 1.3654377880184332, + "grad_norm": 1.3985269621197394, + "learning_rate": 5.020910840942738e-07, + "loss": 0.8753018379211426, + "step": 5926 + }, + { + "epoch": 1.365668202764977, + "grad_norm": 1.0358625157915384, + "learning_rate": 5.017607198868055e-07, + "loss": 0.7917389869689941, + "step": 5927 + }, + { + "epoch": 1.3658986175115206, + "grad_norm": 1.2995608187995562, + "learning_rate": 5.014304279981095e-07, + "loss": 0.8393691182136536, + "step": 5928 + }, + { + "epoch": 1.3661290322580646, + "grad_norm": 1.2671721961788391, + "learning_rate": 5.011002084761264e-07, + "loss": 0.6635205745697021, + "step": 5929 + }, + { + "epoch": 1.3663594470046083, + "grad_norm": 1.2038857805513816, + "learning_rate": 5.007700613687879e-07, + "loss": 0.7058769464492798, + "step": 5930 + }, + { + "epoch": 1.366589861751152, + "grad_norm": 1.1784688857731938, + "learning_rate": 5.004399867240143e-07, + "loss": 0.841168224811554, + "step": 5931 + }, + { + "epoch": 1.3668202764976958, + "grad_norm": 1.3760327619217738, + "learning_rate": 5.001099845897148e-07, + "loss": 0.7385121583938599, + "step": 5932 + }, + { + "epoch": 1.3670506912442395, + "grad_norm": 1.1633525983686732, + "learning_rate": 4.997800550137897e-07, + "loss": 0.6525158882141113, + "step": 5933 + }, + { + "epoch": 1.3672811059907835, + "grad_norm": 1.2331358286597804, + "learning_rate": 4.994501980441274e-07, + "loss": 0.7838844060897827, + "step": 5934 + }, + { + "epoch": 1.3675115207373272, + "grad_norm": 1.4450953979822279, + "learning_rate": 4.991204137286061e-07, + "loss": 0.8831999897956848, + "step": 5935 + }, + { + "epoch": 1.367741935483871, + "grad_norm": 1.0408031352355525, + "learning_rate": 4.987907021150938e-07, + "loss": 0.8053784966468811, + "step": 5936 + }, + { + "epoch": 1.3679723502304149, + "grad_norm": 1.1356206370071746, + "learning_rate": 4.984610632514475e-07, + "loss": 0.8093301057815552, + "step": 5937 + }, + { + "epoch": 1.3682027649769586, + "grad_norm": 1.0230530705292329, + "learning_rate": 4.981314971855136e-07, + "loss": 0.7609653472900391, + "step": 5938 + }, + { + "epoch": 1.3684331797235023, + "grad_norm": 1.4109994154981755, + "learning_rate": 4.978020039651288e-07, + "loss": 0.7131600379943848, + "step": 5939 + }, + { + "epoch": 1.368663594470046, + "grad_norm": 1.3192550042799691, + "learning_rate": 4.974725836381184e-07, + "loss": 0.6555063724517822, + "step": 5940 + }, + { + "epoch": 1.3688940092165898, + "grad_norm": 1.1278604970222592, + "learning_rate": 4.971432362522968e-07, + "loss": 0.8349519968032837, + "step": 5941 + }, + { + "epoch": 1.3691244239631337, + "grad_norm": 1.2138732932202303, + "learning_rate": 4.968139618554691e-07, + "loss": 0.7335611581802368, + "step": 5942 + }, + { + "epoch": 1.3693548387096774, + "grad_norm": 1.050807913168598, + "learning_rate": 4.964847604954287e-07, + "loss": 0.8349814414978027, + "step": 5943 + }, + { + "epoch": 1.3695852534562212, + "grad_norm": 1.07716704849378, + "learning_rate": 4.961556322199585e-07, + "loss": 0.6816729307174683, + "step": 5944 + }, + { + "epoch": 1.369815668202765, + "grad_norm": 1.5220059571304148, + "learning_rate": 4.958265770768315e-07, + "loss": 0.847672164440155, + "step": 5945 + }, + { + "epoch": 1.3700460829493086, + "grad_norm": 1.267067930725286, + "learning_rate": 4.954975951138095e-07, + "loss": 0.6674519777297974, + "step": 5946 + }, + { + "epoch": 1.3702764976958526, + "grad_norm": 1.0820409905680344, + "learning_rate": 4.951686863786432e-07, + "loss": 0.7836427092552185, + "step": 5947 + }, + { + "epoch": 1.3705069124423963, + "grad_norm": 1.0577780792239002, + "learning_rate": 4.948398509190742e-07, + "loss": 0.640183687210083, + "step": 5948 + }, + { + "epoch": 1.37073732718894, + "grad_norm": 1.223963669470004, + "learning_rate": 4.945110887828322e-07, + "loss": 0.8438451290130615, + "step": 5949 + }, + { + "epoch": 1.370967741935484, + "grad_norm": 1.5483267377377474, + "learning_rate": 4.94182400017636e-07, + "loss": 0.9311714172363281, + "step": 5950 + }, + { + "epoch": 1.3711981566820277, + "grad_norm": 1.2352509732193302, + "learning_rate": 4.938537846711952e-07, + "loss": 0.7332801818847656, + "step": 5951 + }, + { + "epoch": 1.3714285714285714, + "grad_norm": 1.127354832681604, + "learning_rate": 4.935252427912075e-07, + "loss": 0.7189289331436157, + "step": 5952 + }, + { + "epoch": 1.3716589861751152, + "grad_norm": 1.451594181977691, + "learning_rate": 4.9319677442536e-07, + "loss": 0.827372670173645, + "step": 5953 + }, + { + "epoch": 1.371889400921659, + "grad_norm": 1.2273788913776413, + "learning_rate": 4.9286837962133e-07, + "loss": 0.7607625722885132, + "step": 5954 + }, + { + "epoch": 1.3721198156682028, + "grad_norm": 1.1935199245873378, + "learning_rate": 4.925400584267836e-07, + "loss": 0.9420886635780334, + "step": 5955 + }, + { + "epoch": 1.3723502304147466, + "grad_norm": 1.1557325656206936, + "learning_rate": 4.922118108893757e-07, + "loss": 0.7605317831039429, + "step": 5956 + }, + { + "epoch": 1.3725806451612903, + "grad_norm": 1.059494459687004, + "learning_rate": 4.918836370567513e-07, + "loss": 0.8353599309921265, + "step": 5957 + }, + { + "epoch": 1.372811059907834, + "grad_norm": 1.2571100340874592, + "learning_rate": 4.915555369765439e-07, + "loss": 0.8540027141571045, + "step": 5958 + }, + { + "epoch": 1.3730414746543778, + "grad_norm": 1.027809306304352, + "learning_rate": 4.912275106963778e-07, + "loss": 0.6965712308883667, + "step": 5959 + }, + { + "epoch": 1.3732718894009217, + "grad_norm": 1.0356479101830274, + "learning_rate": 4.908995582638648e-07, + "loss": 0.7460787296295166, + "step": 5960 + }, + { + "epoch": 1.3735023041474654, + "grad_norm": 1.1845566109999182, + "learning_rate": 4.905716797266067e-07, + "loss": 0.8652873039245605, + "step": 5961 + }, + { + "epoch": 1.3737327188940092, + "grad_norm": 1.1300176885770365, + "learning_rate": 4.902438751321952e-07, + "loss": 0.7757953405380249, + "step": 5962 + }, + { + "epoch": 1.3739631336405531, + "grad_norm": 1.2945741727860514, + "learning_rate": 4.899161445282102e-07, + "loss": 0.8842452168464661, + "step": 5963 + }, + { + "epoch": 1.3741935483870968, + "grad_norm": 1.1415902309445607, + "learning_rate": 4.895884879622215e-07, + "loss": 0.7259113788604736, + "step": 5964 + }, + { + "epoch": 1.3744239631336406, + "grad_norm": 1.3855842779268248, + "learning_rate": 4.892609054817883e-07, + "loss": 0.8871402144432068, + "step": 5965 + }, + { + "epoch": 1.3746543778801843, + "grad_norm": 1.3262407740428463, + "learning_rate": 4.889333971344586e-07, + "loss": 0.7564518451690674, + "step": 5966 + }, + { + "epoch": 1.374884792626728, + "grad_norm": 1.2010368462649357, + "learning_rate": 4.886059629677692e-07, + "loss": 0.7886015176773071, + "step": 5967 + }, + { + "epoch": 1.375115207373272, + "grad_norm": 1.199947155848343, + "learning_rate": 4.882786030292479e-07, + "loss": 0.8256035447120667, + "step": 5968 + }, + { + "epoch": 1.3753456221198157, + "grad_norm": 1.3084738837241086, + "learning_rate": 4.879513173664099e-07, + "loss": 0.9351227283477783, + "step": 5969 + }, + { + "epoch": 1.3755760368663594, + "grad_norm": 1.1794682657820328, + "learning_rate": 4.876241060267598e-07, + "loss": 0.7221553921699524, + "step": 5970 + }, + { + "epoch": 1.3758064516129032, + "grad_norm": 1.3959950512058854, + "learning_rate": 4.872969690577928e-07, + "loss": 0.7451514005661011, + "step": 5971 + }, + { + "epoch": 1.3760368663594469, + "grad_norm": 2.704793745814284, + "learning_rate": 4.86969906506992e-07, + "loss": 0.810903787612915, + "step": 5972 + }, + { + "epoch": 1.3762672811059908, + "grad_norm": 1.0363767093510534, + "learning_rate": 4.866429184218298e-07, + "loss": 0.6279938817024231, + "step": 5973 + }, + { + "epoch": 1.3764976958525346, + "grad_norm": 1.4075128359986724, + "learning_rate": 4.863160048497688e-07, + "loss": 0.7742956876754761, + "step": 5974 + }, + { + "epoch": 1.3767281105990783, + "grad_norm": 1.0416061346586747, + "learning_rate": 4.859891658382597e-07, + "loss": 0.7423844933509827, + "step": 5975 + }, + { + "epoch": 1.3769585253456222, + "grad_norm": 1.0348526250721313, + "learning_rate": 4.856624014347426e-07, + "loss": 0.8387676477432251, + "step": 5976 + }, + { + "epoch": 1.377188940092166, + "grad_norm": 1.3906652341525882, + "learning_rate": 4.853357116866471e-07, + "loss": 0.7959855794906616, + "step": 5977 + }, + { + "epoch": 1.3774193548387097, + "grad_norm": 1.2781418274310543, + "learning_rate": 4.850090966413913e-07, + "loss": 0.7086259722709656, + "step": 5978 + }, + { + "epoch": 1.3776497695852534, + "grad_norm": 1.113262974989995, + "learning_rate": 4.846825563463838e-07, + "loss": 0.7219396829605103, + "step": 5979 + }, + { + "epoch": 1.3778801843317972, + "grad_norm": 1.2693838975886846, + "learning_rate": 4.84356090849021e-07, + "loss": 0.8383582830429077, + "step": 5980 + }, + { + "epoch": 1.378110599078341, + "grad_norm": 1.2004259850017622, + "learning_rate": 4.840297001966887e-07, + "loss": 0.7624244689941406, + "step": 5981 + }, + { + "epoch": 1.3783410138248848, + "grad_norm": 1.3275243269089372, + "learning_rate": 4.837033844367626e-07, + "loss": 0.7901623249053955, + "step": 5982 + }, + { + "epoch": 1.3785714285714286, + "grad_norm": 1.0665581903589285, + "learning_rate": 4.833771436166068e-07, + "loss": 0.7732094526290894, + "step": 5983 + }, + { + "epoch": 1.3788018433179723, + "grad_norm": 1.221680510593368, + "learning_rate": 4.830509777835744e-07, + "loss": 0.7882228493690491, + "step": 5984 + }, + { + "epoch": 1.379032258064516, + "grad_norm": 1.3954212415484932, + "learning_rate": 4.827248869850086e-07, + "loss": 0.8601159453392029, + "step": 5985 + }, + { + "epoch": 1.37926267281106, + "grad_norm": 1.442537797357167, + "learning_rate": 4.823988712682406e-07, + "loss": 0.8828538656234741, + "step": 5986 + }, + { + "epoch": 1.3794930875576037, + "grad_norm": 1.2814445672112398, + "learning_rate": 4.820729306805907e-07, + "loss": 0.8586058020591736, + "step": 5987 + }, + { + "epoch": 1.3797235023041474, + "grad_norm": 1.3476469386797916, + "learning_rate": 4.8174706526937e-07, + "loss": 0.8276243209838867, + "step": 5988 + }, + { + "epoch": 1.3799539170506914, + "grad_norm": 1.1504215702512235, + "learning_rate": 4.814212750818764e-07, + "loss": 0.837665855884552, + "step": 5989 + }, + { + "epoch": 1.380184331797235, + "grad_norm": 1.0830851541320008, + "learning_rate": 4.810955601653978e-07, + "loss": 0.7493194341659546, + "step": 5990 + }, + { + "epoch": 1.3804147465437788, + "grad_norm": 0.9470923738615639, + "learning_rate": 4.807699205672123e-07, + "loss": 0.8382525444030762, + "step": 5991 + }, + { + "epoch": 1.3806451612903226, + "grad_norm": 1.302996846441217, + "learning_rate": 4.804443563345854e-07, + "loss": 0.8152645826339722, + "step": 5992 + }, + { + "epoch": 1.3808755760368663, + "grad_norm": 1.1087518210488847, + "learning_rate": 4.801188675147719e-07, + "loss": 0.7168164849281311, + "step": 5993 + }, + { + "epoch": 1.3811059907834102, + "grad_norm": 1.3971974855003246, + "learning_rate": 4.79793454155017e-07, + "loss": 0.883512556552887, + "step": 5994 + }, + { + "epoch": 1.381336405529954, + "grad_norm": 1.1775999496250547, + "learning_rate": 4.794681163025536e-07, + "loss": 0.7258438467979431, + "step": 5995 + }, + { + "epoch": 1.3815668202764977, + "grad_norm": 1.102316858629444, + "learning_rate": 4.79142854004604e-07, + "loss": 0.8408991098403931, + "step": 5996 + }, + { + "epoch": 1.3817972350230414, + "grad_norm": 1.2549882230845555, + "learning_rate": 4.788176673083796e-07, + "loss": 0.6506227254867554, + "step": 5997 + }, + { + "epoch": 1.3820276497695851, + "grad_norm": 1.145761304273299, + "learning_rate": 4.784925562610809e-07, + "loss": 0.6971127986907959, + "step": 5998 + }, + { + "epoch": 1.382258064516129, + "grad_norm": 1.3037562977083754, + "learning_rate": 4.781675209098967e-07, + "loss": 0.8399784564971924, + "step": 5999 + }, + { + "epoch": 1.3824884792626728, + "grad_norm": 1.1085204750545832, + "learning_rate": 4.778425613020067e-07, + "loss": 0.6451772451400757, + "step": 6000 + }, + { + "epoch": 1.3827188940092165, + "grad_norm": 1.2906420363235995, + "learning_rate": 4.775176774845774e-07, + "loss": 0.7794390916824341, + "step": 6001 + }, + { + "epoch": 1.3829493087557605, + "grad_norm": 1.2681207047961411, + "learning_rate": 4.771928695047652e-07, + "loss": 0.7743663191795349, + "step": 6002 + }, + { + "epoch": 1.3831797235023042, + "grad_norm": 1.3900227492937691, + "learning_rate": 4.768681374097165e-07, + "loss": 0.7654878497123718, + "step": 6003 + }, + { + "epoch": 1.383410138248848, + "grad_norm": 0.9597367840932265, + "learning_rate": 4.765434812465645e-07, + "loss": 0.634769082069397, + "step": 6004 + }, + { + "epoch": 1.3836405529953917, + "grad_norm": 1.506039076037628, + "learning_rate": 4.762189010624337e-07, + "loss": 0.7941944599151611, + "step": 6005 + }, + { + "epoch": 1.3838709677419354, + "grad_norm": 1.015987334283248, + "learning_rate": 4.75894396904436e-07, + "loss": 0.7437179088592529, + "step": 6006 + }, + { + "epoch": 1.3841013824884794, + "grad_norm": 1.4064808788220893, + "learning_rate": 4.7556996881967236e-07, + "loss": 0.7854535579681396, + "step": 6007 + }, + { + "epoch": 1.384331797235023, + "grad_norm": 1.1454067558015728, + "learning_rate": 4.752456168552339e-07, + "loss": 0.7506910562515259, + "step": 6008 + }, + { + "epoch": 1.3845622119815668, + "grad_norm": 1.3378490743548084, + "learning_rate": 4.749213410581995e-07, + "loss": 0.8967334032058716, + "step": 6009 + }, + { + "epoch": 1.3847926267281105, + "grad_norm": 0.9073367214802157, + "learning_rate": 4.7459714147563677e-07, + "loss": 0.7053096294403076, + "step": 6010 + }, + { + "epoch": 1.3850230414746543, + "grad_norm": 1.4011875457574152, + "learning_rate": 4.7427301815460396e-07, + "loss": 0.8759415149688721, + "step": 6011 + }, + { + "epoch": 1.3852534562211982, + "grad_norm": 1.2083846258038176, + "learning_rate": 4.739489711421466e-07, + "loss": 0.8827483654022217, + "step": 6012 + }, + { + "epoch": 1.385483870967742, + "grad_norm": 0.9892327750407551, + "learning_rate": 4.736250004852993e-07, + "loss": 0.7268258929252625, + "step": 6013 + }, + { + "epoch": 1.3857142857142857, + "grad_norm": 1.3354283922456354, + "learning_rate": 4.7330110623108665e-07, + "loss": 0.7142586708068848, + "step": 6014 + }, + { + "epoch": 1.3859447004608296, + "grad_norm": 0.9791582073391492, + "learning_rate": 4.7297728842652116e-07, + "loss": 0.7123303413391113, + "step": 6015 + }, + { + "epoch": 1.3861751152073734, + "grad_norm": 1.1089770586845422, + "learning_rate": 4.726535471186047e-07, + "loss": 0.7548067569732666, + "step": 6016 + }, + { + "epoch": 1.386405529953917, + "grad_norm": 1.205868893691031, + "learning_rate": 4.723298823543277e-07, + "loss": 0.7792191505432129, + "step": 6017 + }, + { + "epoch": 1.3866359447004608, + "grad_norm": 1.313401532453458, + "learning_rate": 4.7200629418066975e-07, + "loss": 0.8658785820007324, + "step": 6018 + }, + { + "epoch": 1.3868663594470045, + "grad_norm": 1.20345203638671, + "learning_rate": 4.716827826445987e-07, + "loss": 0.7173904776573181, + "step": 6019 + }, + { + "epoch": 1.3870967741935485, + "grad_norm": 1.0016118220950732, + "learning_rate": 4.7135934779307284e-07, + "loss": 0.6675543785095215, + "step": 6020 + }, + { + "epoch": 1.3873271889400922, + "grad_norm": 1.2559637316001069, + "learning_rate": 4.710359896730378e-07, + "loss": 0.8164724111557007, + "step": 6021 + }, + { + "epoch": 1.387557603686636, + "grad_norm": 1.474439832240672, + "learning_rate": 4.707127083314283e-07, + "loss": 0.8354332447052002, + "step": 6022 + }, + { + "epoch": 1.3877880184331797, + "grad_norm": 1.1544900465349175, + "learning_rate": 4.7038950381516885e-07, + "loss": 0.8414663672447205, + "step": 6023 + }, + { + "epoch": 1.3880184331797234, + "grad_norm": 1.2150035811173532, + "learning_rate": 4.700663761711717e-07, + "loss": 0.7693418264389038, + "step": 6024 + }, + { + "epoch": 1.3882488479262673, + "grad_norm": 1.0071958767588902, + "learning_rate": 4.697433254463382e-07, + "loss": 0.7809267044067383, + "step": 6025 + }, + { + "epoch": 1.388479262672811, + "grad_norm": 1.203482571104156, + "learning_rate": 4.6942035168755944e-07, + "loss": 0.7455927133560181, + "step": 6026 + }, + { + "epoch": 1.3887096774193548, + "grad_norm": 1.3018105004563159, + "learning_rate": 4.6909745494171383e-07, + "loss": 0.8217881917953491, + "step": 6027 + }, + { + "epoch": 1.3889400921658988, + "grad_norm": 1.3723027057230852, + "learning_rate": 4.687746352556703e-07, + "loss": 0.8138882517814636, + "step": 6028 + }, + { + "epoch": 1.3891705069124423, + "grad_norm": 1.241759909967513, + "learning_rate": 4.6845189267628505e-07, + "loss": 0.8926469087600708, + "step": 6029 + }, + { + "epoch": 1.3894009216589862, + "grad_norm": 1.3027918343739477, + "learning_rate": 4.681292272504036e-07, + "loss": 0.797023355960846, + "step": 6030 + }, + { + "epoch": 1.38963133640553, + "grad_norm": 0.8383796462842409, + "learning_rate": 4.6780663902486104e-07, + "loss": 0.6767498254776001, + "step": 6031 + }, + { + "epoch": 1.3898617511520737, + "grad_norm": 1.2727364252127855, + "learning_rate": 4.674841280464804e-07, + "loss": 0.7514280080795288, + "step": 6032 + }, + { + "epoch": 1.3900921658986176, + "grad_norm": 1.3853363805552346, + "learning_rate": 4.671616943620731e-07, + "loss": 0.8879726529121399, + "step": 6033 + }, + { + "epoch": 1.3903225806451613, + "grad_norm": 0.8270134553121277, + "learning_rate": 4.66839338018441e-07, + "loss": 0.6674140095710754, + "step": 6034 + }, + { + "epoch": 1.390552995391705, + "grad_norm": 1.078021820178179, + "learning_rate": 4.6651705906237307e-07, + "loss": 0.9094855785369873, + "step": 6035 + }, + { + "epoch": 1.3907834101382488, + "grad_norm": 1.2561393182724931, + "learning_rate": 4.661948575406478e-07, + "loss": 0.8334506750106812, + "step": 6036 + }, + { + "epoch": 1.3910138248847925, + "grad_norm": 1.040119500616202, + "learning_rate": 4.658727335000323e-07, + "loss": 0.6545997858047485, + "step": 6037 + }, + { + "epoch": 1.3912442396313365, + "grad_norm": 1.1967093206075838, + "learning_rate": 4.6555068698728237e-07, + "loss": 0.7810590267181396, + "step": 6038 + }, + { + "epoch": 1.3914746543778802, + "grad_norm": 1.0756703494881659, + "learning_rate": 4.652287180491424e-07, + "loss": 0.7581864595413208, + "step": 6039 + }, + { + "epoch": 1.391705069124424, + "grad_norm": 1.2754594039466507, + "learning_rate": 4.649068267323465e-07, + "loss": 0.7134817242622375, + "step": 6040 + }, + { + "epoch": 1.3919354838709677, + "grad_norm": 0.9730020123763279, + "learning_rate": 4.645850130836162e-07, + "loss": 0.7050445079803467, + "step": 6041 + }, + { + "epoch": 1.3921658986175114, + "grad_norm": 1.146073776977597, + "learning_rate": 4.642632771496622e-07, + "loss": 0.8510535955429077, + "step": 6042 + }, + { + "epoch": 1.3923963133640553, + "grad_norm": 1.3940656685053847, + "learning_rate": 4.6394161897718454e-07, + "loss": 0.8627035617828369, + "step": 6043 + }, + { + "epoch": 1.392626728110599, + "grad_norm": 1.2671457951329919, + "learning_rate": 4.6362003861287127e-07, + "loss": 0.89891517162323, + "step": 6044 + }, + { + "epoch": 1.3928571428571428, + "grad_norm": 1.3215265337916509, + "learning_rate": 4.6329853610339896e-07, + "loss": 0.7267141342163086, + "step": 6045 + }, + { + "epoch": 1.3930875576036867, + "grad_norm": 1.4814794045534565, + "learning_rate": 4.6297711149543405e-07, + "loss": 0.8021189570426941, + "step": 6046 + }, + { + "epoch": 1.3933179723502305, + "grad_norm": 1.0954918085269951, + "learning_rate": 4.6265576483563054e-07, + "loss": 0.7836861610412598, + "step": 6047 + }, + { + "epoch": 1.3935483870967742, + "grad_norm": 1.1158269152355589, + "learning_rate": 4.623344961706309e-07, + "loss": 0.816940188407898, + "step": 6048 + }, + { + "epoch": 1.393778801843318, + "grad_norm": 1.4383712223724088, + "learning_rate": 4.6201330554706773e-07, + "loss": 0.77923583984375, + "step": 6049 + }, + { + "epoch": 1.3940092165898617, + "grad_norm": 1.3116759273395542, + "learning_rate": 4.6169219301156117e-07, + "loss": 0.8017981052398682, + "step": 6050 + }, + { + "epoch": 1.3942396313364056, + "grad_norm": 0.9886522563222937, + "learning_rate": 4.6137115861071973e-07, + "loss": 0.6786847114562988, + "step": 6051 + }, + { + "epoch": 1.3944700460829493, + "grad_norm": 1.1651814302030006, + "learning_rate": 4.61050202391142e-07, + "loss": 0.7802412509918213, + "step": 6052 + }, + { + "epoch": 1.394700460829493, + "grad_norm": 1.1955845105043188, + "learning_rate": 4.6072932439941347e-07, + "loss": 0.7434886693954468, + "step": 6053 + }, + { + "epoch": 1.3949308755760368, + "grad_norm": 1.2231160523968054, + "learning_rate": 4.6040852468211e-07, + "loss": 0.7590811252593994, + "step": 6054 + }, + { + "epoch": 1.3951612903225805, + "grad_norm": 1.5534904257800726, + "learning_rate": 4.600878032857949e-07, + "loss": 0.8952670097351074, + "step": 6055 + }, + { + "epoch": 1.3953917050691245, + "grad_norm": 1.1221688640413483, + "learning_rate": 4.5976716025702036e-07, + "loss": 0.8055328130722046, + "step": 6056 + }, + { + "epoch": 1.3956221198156682, + "grad_norm": 1.2064570897657243, + "learning_rate": 4.5944659564232725e-07, + "loss": 0.8919316530227661, + "step": 6057 + }, + { + "epoch": 1.395852534562212, + "grad_norm": 1.1074605434156857, + "learning_rate": 4.591261094882453e-07, + "loss": 0.701945960521698, + "step": 6058 + }, + { + "epoch": 1.3960829493087559, + "grad_norm": 1.1766452414586335, + "learning_rate": 4.5880570184129206e-07, + "loss": 0.7457436323165894, + "step": 6059 + }, + { + "epoch": 1.3963133640552996, + "grad_norm": 1.193782401804385, + "learning_rate": 4.5848537274797527e-07, + "loss": 0.8093513250350952, + "step": 6060 + }, + { + "epoch": 1.3965437788018433, + "grad_norm": 1.5454221039375025, + "learning_rate": 4.5816512225478965e-07, + "loss": 0.7098822593688965, + "step": 6061 + }, + { + "epoch": 1.396774193548387, + "grad_norm": 1.2339994165792372, + "learning_rate": 4.578449504082189e-07, + "loss": 0.7423167824745178, + "step": 6062 + }, + { + "epoch": 1.3970046082949308, + "grad_norm": 1.1302042774482615, + "learning_rate": 4.5752485725473624e-07, + "loss": 0.8730076551437378, + "step": 6063 + }, + { + "epoch": 1.3972350230414747, + "grad_norm": 1.124374396794659, + "learning_rate": 4.572048428408024e-07, + "loss": 0.6914420127868652, + "step": 6064 + }, + { + "epoch": 1.3974654377880185, + "grad_norm": 1.3148006815381303, + "learning_rate": 4.5688490721286664e-07, + "loss": 0.8051402568817139, + "step": 6065 + }, + { + "epoch": 1.3976958525345622, + "grad_norm": 1.548390651351193, + "learning_rate": 4.5656505041736803e-07, + "loss": 0.9185452461242676, + "step": 6066 + }, + { + "epoch": 1.397926267281106, + "grad_norm": 1.1772485518113056, + "learning_rate": 4.5624527250073287e-07, + "loss": 0.766645073890686, + "step": 6067 + }, + { + "epoch": 1.3981566820276496, + "grad_norm": 1.3246112666718692, + "learning_rate": 4.559255735093763e-07, + "loss": 0.8005224466323853, + "step": 6068 + }, + { + "epoch": 1.3983870967741936, + "grad_norm": 1.2624209909197728, + "learning_rate": 4.5560595348970275e-07, + "loss": 0.8072810173034668, + "step": 6069 + }, + { + "epoch": 1.3986175115207373, + "grad_norm": 1.2197415999956105, + "learning_rate": 4.552864124881045e-07, + "loss": 0.7537474632263184, + "step": 6070 + }, + { + "epoch": 1.398847926267281, + "grad_norm": 1.3524984308216321, + "learning_rate": 4.549669505509619e-07, + "loss": 0.8396750092506409, + "step": 6071 + }, + { + "epoch": 1.399078341013825, + "grad_norm": 1.3095033527266953, + "learning_rate": 4.546475677246453e-07, + "loss": 0.8456804752349854, + "step": 6072 + }, + { + "epoch": 1.3993087557603687, + "grad_norm": 1.212970447769736, + "learning_rate": 4.543282640555123e-07, + "loss": 0.6150076389312744, + "step": 6073 + }, + { + "epoch": 1.3995391705069125, + "grad_norm": 1.1345047277741707, + "learning_rate": 4.540090395899089e-07, + "loss": 0.667172908782959, + "step": 6074 + }, + { + "epoch": 1.3997695852534562, + "grad_norm": 1.1269214154073468, + "learning_rate": 4.5368989437417116e-07, + "loss": 0.7918317914009094, + "step": 6075 + }, + { + "epoch": 1.4, + "grad_norm": 1.070411671989194, + "learning_rate": 4.5337082845462193e-07, + "loss": 0.6800580024719238, + "step": 6076 + }, + { + "epoch": 1.4002304147465439, + "grad_norm": 1.3908779413221009, + "learning_rate": 4.530518418775733e-07, + "loss": 0.9205034971237183, + "step": 6077 + }, + { + "epoch": 1.4004608294930876, + "grad_norm": 0.9376373503434607, + "learning_rate": 4.5273293468932585e-07, + "loss": 0.7228822708129883, + "step": 6078 + }, + { + "epoch": 1.4006912442396313, + "grad_norm": 1.0019153673681407, + "learning_rate": 4.524141069361679e-07, + "loss": 0.6827987432479858, + "step": 6079 + }, + { + "epoch": 1.400921658986175, + "grad_norm": 1.086076018779761, + "learning_rate": 4.520953586643779e-07, + "loss": 0.6272581815719604, + "step": 6080 + }, + { + "epoch": 1.4011520737327188, + "grad_norm": 1.1153873233388363, + "learning_rate": 4.5177668992022125e-07, + "loss": 0.8041881322860718, + "step": 6081 + }, + { + "epoch": 1.4013824884792627, + "grad_norm": 0.986104576594979, + "learning_rate": 4.5145810074995194e-07, + "loss": 0.7284958362579346, + "step": 6082 + }, + { + "epoch": 1.4016129032258065, + "grad_norm": 1.227152604501521, + "learning_rate": 4.511395911998135e-07, + "loss": 0.7653781175613403, + "step": 6083 + }, + { + "epoch": 1.4018433179723502, + "grad_norm": 1.0466936448387898, + "learning_rate": 4.5082116131603677e-07, + "loss": 0.8037170171737671, + "step": 6084 + }, + { + "epoch": 1.4020737327188941, + "grad_norm": 1.1911735797842866, + "learning_rate": 4.505028111448411e-07, + "loss": 0.783043384552002, + "step": 6085 + }, + { + "epoch": 1.4023041474654379, + "grad_norm": 1.0547410930732963, + "learning_rate": 4.501845407324354e-07, + "loss": 0.6712161302566528, + "step": 6086 + }, + { + "epoch": 1.4025345622119816, + "grad_norm": 1.6406574524985842, + "learning_rate": 4.4986635012501575e-07, + "loss": 0.9537261724472046, + "step": 6087 + }, + { + "epoch": 1.4027649769585253, + "grad_norm": 1.4091085059994304, + "learning_rate": 4.495482393687666e-07, + "loss": 0.8984304666519165, + "step": 6088 + }, + { + "epoch": 1.402995391705069, + "grad_norm": 1.0430973660752654, + "learning_rate": 4.4923020850986224e-07, + "loss": 0.6894555687904358, + "step": 6089 + }, + { + "epoch": 1.403225806451613, + "grad_norm": 1.1542541609725157, + "learning_rate": 4.489122575944639e-07, + "loss": 0.685502290725708, + "step": 6090 + }, + { + "epoch": 1.4034562211981567, + "grad_norm": 1.1082950627991512, + "learning_rate": 4.485943866687216e-07, + "loss": 0.6794239282608032, + "step": 6091 + }, + { + "epoch": 1.4036866359447004, + "grad_norm": 1.0717636346133315, + "learning_rate": 4.482765957787744e-07, + "loss": 0.7647888660430908, + "step": 6092 + }, + { + "epoch": 1.4039170506912442, + "grad_norm": 1.3476206179513355, + "learning_rate": 4.4795888497074896e-07, + "loss": 0.798794150352478, + "step": 6093 + }, + { + "epoch": 1.404147465437788, + "grad_norm": 1.0358789181259667, + "learning_rate": 4.4764125429076026e-07, + "loss": 0.79430091381073, + "step": 6094 + }, + { + "epoch": 1.4043778801843319, + "grad_norm": 1.4040182367122596, + "learning_rate": 4.4732370378491255e-07, + "loss": 0.9089795351028442, + "step": 6095 + }, + { + "epoch": 1.4046082949308756, + "grad_norm": 0.9307801992196251, + "learning_rate": 4.4700623349929757e-07, + "loss": 0.8270718455314636, + "step": 6096 + }, + { + "epoch": 1.4048387096774193, + "grad_norm": 1.082228260794844, + "learning_rate": 4.466888434799958e-07, + "loss": 0.7550361156463623, + "step": 6097 + }, + { + "epoch": 1.4050691244239633, + "grad_norm": 1.15557625190535, + "learning_rate": 4.463715337730759e-07, + "loss": 0.7406442165374756, + "step": 6098 + }, + { + "epoch": 1.405299539170507, + "grad_norm": 1.4065045960279658, + "learning_rate": 4.460543044245949e-07, + "loss": 0.830552875995636, + "step": 6099 + }, + { + "epoch": 1.4055299539170507, + "grad_norm": 1.4160409051991987, + "learning_rate": 4.45737155480598e-07, + "loss": 0.8961822390556335, + "step": 6100 + }, + { + "epoch": 1.4057603686635944, + "grad_norm": 1.2630678724710616, + "learning_rate": 4.454200869871195e-07, + "loss": 0.6307489275932312, + "step": 6101 + }, + { + "epoch": 1.4059907834101382, + "grad_norm": 1.437795392364305, + "learning_rate": 4.451030989901808e-07, + "loss": 0.8682084083557129, + "step": 6102 + }, + { + "epoch": 1.4062211981566821, + "grad_norm": 1.1897592960029226, + "learning_rate": 4.4478619153579323e-07, + "loss": 0.7157681584358215, + "step": 6103 + }, + { + "epoch": 1.4064516129032258, + "grad_norm": 1.196767224907471, + "learning_rate": 4.4446936466995486e-07, + "loss": 0.7267071008682251, + "step": 6104 + }, + { + "epoch": 1.4066820276497696, + "grad_norm": 1.1191501401801882, + "learning_rate": 4.4415261843865246e-07, + "loss": 0.8435063362121582, + "step": 6105 + }, + { + "epoch": 1.4069124423963133, + "grad_norm": 1.2220260712556485, + "learning_rate": 4.43835952887862e-07, + "loss": 0.8895175457000732, + "step": 6106 + }, + { + "epoch": 1.407142857142857, + "grad_norm": 1.0150052474935476, + "learning_rate": 4.435193680635467e-07, + "loss": 0.7470073699951172, + "step": 6107 + }, + { + "epoch": 1.407373271889401, + "grad_norm": 1.376675993117338, + "learning_rate": 4.432028640116581e-07, + "loss": 0.7993630170822144, + "step": 6108 + }, + { + "epoch": 1.4076036866359447, + "grad_norm": 1.2675455750766673, + "learning_rate": 4.4288644077813695e-07, + "loss": 0.823069155216217, + "step": 6109 + }, + { + "epoch": 1.4078341013824884, + "grad_norm": 1.374585518914166, + "learning_rate": 4.4257009840891146e-07, + "loss": 0.7665367126464844, + "step": 6110 + }, + { + "epoch": 1.4080645161290324, + "grad_norm": 1.1174810423449963, + "learning_rate": 4.422538369498979e-07, + "loss": 0.7173991799354553, + "step": 6111 + }, + { + "epoch": 1.4082949308755761, + "grad_norm": 0.9476955630635919, + "learning_rate": 4.4193765644700186e-07, + "loss": 0.8288347125053406, + "step": 6112 + }, + { + "epoch": 1.4085253456221198, + "grad_norm": 1.206088367901853, + "learning_rate": 4.4162155694611636e-07, + "loss": 0.8589911460876465, + "step": 6113 + }, + { + "epoch": 1.4087557603686636, + "grad_norm": 1.2884473987369411, + "learning_rate": 4.4130553849312213e-07, + "loss": 0.8783868551254272, + "step": 6114 + }, + { + "epoch": 1.4089861751152073, + "grad_norm": 1.0994332560949611, + "learning_rate": 4.409896011338898e-07, + "loss": 0.7625287771224976, + "step": 6115 + }, + { + "epoch": 1.4092165898617512, + "grad_norm": 1.1571434855502665, + "learning_rate": 4.406737449142769e-07, + "loss": 0.7412571907043457, + "step": 6116 + }, + { + "epoch": 1.409447004608295, + "grad_norm": 0.9525276096114424, + "learning_rate": 4.4035796988012943e-07, + "loss": 0.6248455047607422, + "step": 6117 + }, + { + "epoch": 1.4096774193548387, + "grad_norm": 1.1843810443395109, + "learning_rate": 4.400422760772817e-07, + "loss": 0.7970919609069824, + "step": 6118 + }, + { + "epoch": 1.4099078341013824, + "grad_norm": 1.0403384039115238, + "learning_rate": 4.397266635515563e-07, + "loss": 0.6184223294258118, + "step": 6119 + }, + { + "epoch": 1.4101382488479262, + "grad_norm": 1.07818776364935, + "learning_rate": 4.394111323487637e-07, + "loss": 0.9014843702316284, + "step": 6120 + }, + { + "epoch": 1.41036866359447, + "grad_norm": 1.1660248005288976, + "learning_rate": 4.390956825147034e-07, + "loss": 0.8468939661979675, + "step": 6121 + }, + { + "epoch": 1.4105990783410138, + "grad_norm": 1.0810631729189881, + "learning_rate": 4.3878031409516234e-07, + "loss": 0.7832604646682739, + "step": 6122 + }, + { + "epoch": 1.4108294930875576, + "grad_norm": 1.0700225295832282, + "learning_rate": 4.3846502713591527e-07, + "loss": 0.7202898263931274, + "step": 6123 + }, + { + "epoch": 1.4110599078341015, + "grad_norm": 1.1788285042234896, + "learning_rate": 4.3814982168272664e-07, + "loss": 0.6785540580749512, + "step": 6124 + }, + { + "epoch": 1.4112903225806452, + "grad_norm": 1.3040233352486812, + "learning_rate": 4.378346977813474e-07, + "loss": 0.795532763004303, + "step": 6125 + }, + { + "epoch": 1.411520737327189, + "grad_norm": 0.8875056644654742, + "learning_rate": 4.3751965547751735e-07, + "loss": 0.7715259790420532, + "step": 6126 + }, + { + "epoch": 1.4117511520737327, + "grad_norm": 1.4252318364105403, + "learning_rate": 4.37204694816965e-07, + "loss": 0.8657132983207703, + "step": 6127 + }, + { + "epoch": 1.4119815668202764, + "grad_norm": 1.0207817658354317, + "learning_rate": 4.3688981584540586e-07, + "loss": 0.7253363132476807, + "step": 6128 + }, + { + "epoch": 1.4122119815668204, + "grad_norm": 1.2055823367063212, + "learning_rate": 4.365750186085447e-07, + "loss": 0.8511998653411865, + "step": 6129 + }, + { + "epoch": 1.412442396313364, + "grad_norm": 1.3256931814656627, + "learning_rate": 4.3626030315207386e-07, + "loss": 0.7936528921127319, + "step": 6130 + }, + { + "epoch": 1.4126728110599078, + "grad_norm": 1.1878967804503957, + "learning_rate": 4.3594566952167324e-07, + "loss": 0.758521556854248, + "step": 6131 + }, + { + "epoch": 1.4129032258064516, + "grad_norm": 1.242405288398936, + "learning_rate": 4.3563111776301243e-07, + "loss": 0.8202048540115356, + "step": 6132 + }, + { + "epoch": 1.4131336405529953, + "grad_norm": 1.075213759854547, + "learning_rate": 4.3531664792174773e-07, + "loss": 0.7864067554473877, + "step": 6133 + }, + { + "epoch": 1.4133640552995392, + "grad_norm": 1.472991105564755, + "learning_rate": 4.350022600435236e-07, + "loss": 0.8051233291625977, + "step": 6134 + }, + { + "epoch": 1.413594470046083, + "grad_norm": 1.0811225554895896, + "learning_rate": 4.34687954173974e-07, + "loss": 0.7617348432540894, + "step": 6135 + }, + { + "epoch": 1.4138248847926267, + "grad_norm": 1.299621377240526, + "learning_rate": 4.3437373035871927e-07, + "loss": 0.7899652719497681, + "step": 6136 + }, + { + "epoch": 1.4140552995391706, + "grad_norm": 1.1704157180732915, + "learning_rate": 4.340595886433689e-07, + "loss": 0.8467222452163696, + "step": 6137 + }, + { + "epoch": 1.4142857142857144, + "grad_norm": 1.294364382858993, + "learning_rate": 4.3374552907352003e-07, + "loss": 0.8451426029205322, + "step": 6138 + }, + { + "epoch": 1.414516129032258, + "grad_norm": 1.1053072195052795, + "learning_rate": 4.3343155169475797e-07, + "loss": 0.7140414714813232, + "step": 6139 + }, + { + "epoch": 1.4147465437788018, + "grad_norm": 1.365344165744123, + "learning_rate": 4.331176565526558e-07, + "loss": 0.7680803537368774, + "step": 6140 + }, + { + "epoch": 1.4149769585253456, + "grad_norm": 1.0970331390876962, + "learning_rate": 4.328038436927757e-07, + "loss": 0.7262120246887207, + "step": 6141 + }, + { + "epoch": 1.4152073732718895, + "grad_norm": 1.2176292189863585, + "learning_rate": 4.3249011316066676e-07, + "loss": 0.7788687944412231, + "step": 6142 + }, + { + "epoch": 1.4154377880184332, + "grad_norm": 1.4880584379115793, + "learning_rate": 4.321764650018662e-07, + "loss": 0.7613503336906433, + "step": 6143 + }, + { + "epoch": 1.415668202764977, + "grad_norm": 0.9554644370778598, + "learning_rate": 4.3186289926190056e-07, + "loss": 0.6778309345245361, + "step": 6144 + }, + { + "epoch": 1.4158986175115207, + "grad_norm": 1.5159867718873894, + "learning_rate": 4.315494159862829e-07, + "loss": 0.8626673221588135, + "step": 6145 + }, + { + "epoch": 1.4161290322580644, + "grad_norm": 1.194727935560369, + "learning_rate": 4.312360152205147e-07, + "loss": 0.8321051597595215, + "step": 6146 + }, + { + "epoch": 1.4163594470046084, + "grad_norm": 1.146293428483721, + "learning_rate": 4.309226970100861e-07, + "loss": 0.9317119717597961, + "step": 6147 + }, + { + "epoch": 1.416589861751152, + "grad_norm": 1.4669878139895565, + "learning_rate": 4.306094614004748e-07, + "loss": 0.9479870200157166, + "step": 6148 + }, + { + "epoch": 1.4168202764976958, + "grad_norm": 1.0166991353273056, + "learning_rate": 4.3029630843714606e-07, + "loss": 0.8222699165344238, + "step": 6149 + }, + { + "epoch": 1.4170506912442398, + "grad_norm": 1.427356205375722, + "learning_rate": 4.2998323816555427e-07, + "loss": 0.8232519030570984, + "step": 6150 + }, + { + "epoch": 1.4172811059907833, + "grad_norm": 1.156719588287236, + "learning_rate": 4.2967025063114057e-07, + "loss": 0.7423735857009888, + "step": 6151 + }, + { + "epoch": 1.4175115207373272, + "grad_norm": 1.1009896479281802, + "learning_rate": 4.2935734587933527e-07, + "loss": 0.6947557926177979, + "step": 6152 + }, + { + "epoch": 1.417741935483871, + "grad_norm": 1.2980025668504918, + "learning_rate": 4.290445239555558e-07, + "loss": 0.789128303527832, + "step": 6153 + }, + { + "epoch": 1.4179723502304147, + "grad_norm": 1.344185599290992, + "learning_rate": 4.2873178490520745e-07, + "loss": 0.8025885820388794, + "step": 6154 + }, + { + "epoch": 1.4182027649769586, + "grad_norm": 1.3491619317054568, + "learning_rate": 4.284191287736847e-07, + "loss": 0.8139045238494873, + "step": 6155 + }, + { + "epoch": 1.4184331797235024, + "grad_norm": 1.1246209635446252, + "learning_rate": 4.2810655560636864e-07, + "loss": 0.8154167532920837, + "step": 6156 + }, + { + "epoch": 1.418663594470046, + "grad_norm": 1.0954033524128675, + "learning_rate": 4.2779406544862896e-07, + "loss": 0.6383910775184631, + "step": 6157 + }, + { + "epoch": 1.4188940092165898, + "grad_norm": 1.217902628448707, + "learning_rate": 4.2748165834582316e-07, + "loss": 0.7008179426193237, + "step": 6158 + }, + { + "epoch": 1.4191244239631335, + "grad_norm": 1.2584275851601723, + "learning_rate": 4.2716933434329684e-07, + "loss": 0.9458012580871582, + "step": 6159 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 1.1170402428175406, + "learning_rate": 4.268570934863829e-07, + "loss": 0.7354133725166321, + "step": 6160 + }, + { + "epoch": 1.4195852534562212, + "grad_norm": 1.050503834766047, + "learning_rate": 4.265449358204034e-07, + "loss": 0.7146268486976624, + "step": 6161 + }, + { + "epoch": 1.419815668202765, + "grad_norm": 1.3602740783757037, + "learning_rate": 4.262328613906674e-07, + "loss": 0.7357315421104431, + "step": 6162 + }, + { + "epoch": 1.4200460829493087, + "grad_norm": 1.5139772991772644, + "learning_rate": 4.2592087024247157e-07, + "loss": 0.8006314039230347, + "step": 6163 + }, + { + "epoch": 1.4202764976958524, + "grad_norm": 1.2194249079603743, + "learning_rate": 4.256089624211018e-07, + "loss": 0.8299369812011719, + "step": 6164 + }, + { + "epoch": 1.4205069124423964, + "grad_norm": 1.3878054713959478, + "learning_rate": 4.252971379718308e-07, + "loss": 0.7018890380859375, + "step": 6165 + }, + { + "epoch": 1.42073732718894, + "grad_norm": 1.0332854509364862, + "learning_rate": 4.24985396939919e-07, + "loss": 0.6501315236091614, + "step": 6166 + }, + { + "epoch": 1.4209677419354838, + "grad_norm": 1.6385767983913562, + "learning_rate": 4.24673739370616e-07, + "loss": 0.8379749059677124, + "step": 6167 + }, + { + "epoch": 1.4211981566820278, + "grad_norm": 1.3590615179836698, + "learning_rate": 4.24362165309158e-07, + "loss": 0.7996747493743896, + "step": 6168 + }, + { + "epoch": 1.4214285714285715, + "grad_norm": 1.2270246479776195, + "learning_rate": 4.240506748007695e-07, + "loss": 0.7258181571960449, + "step": 6169 + }, + { + "epoch": 1.4216589861751152, + "grad_norm": 0.9997463365032918, + "learning_rate": 4.237392678906633e-07, + "loss": 0.6035803556442261, + "step": 6170 + }, + { + "epoch": 1.421889400921659, + "grad_norm": 1.1041316785012205, + "learning_rate": 4.2342794462403954e-07, + "loss": 0.7668799757957458, + "step": 6171 + }, + { + "epoch": 1.4221198156682027, + "grad_norm": 0.9385556238542058, + "learning_rate": 4.23116705046086e-07, + "loss": 0.7816733121871948, + "step": 6172 + }, + { + "epoch": 1.4223502304147466, + "grad_norm": 1.2003519134278278, + "learning_rate": 4.228055492019793e-07, + "loss": 0.8753983974456787, + "step": 6173 + }, + { + "epoch": 1.4225806451612903, + "grad_norm": 1.1591394093837553, + "learning_rate": 4.224944771368831e-07, + "loss": 0.8319464921951294, + "step": 6174 + }, + { + "epoch": 1.422811059907834, + "grad_norm": 1.1444278460686073, + "learning_rate": 4.2218348889594866e-07, + "loss": 0.6670328378677368, + "step": 6175 + }, + { + "epoch": 1.4230414746543778, + "grad_norm": 0.9949133230999909, + "learning_rate": 4.218725845243163e-07, + "loss": 0.7879645824432373, + "step": 6176 + }, + { + "epoch": 1.4232718894009215, + "grad_norm": 1.1897456513351008, + "learning_rate": 4.2156176406711287e-07, + "loss": 0.709680438041687, + "step": 6177 + }, + { + "epoch": 1.4235023041474655, + "grad_norm": 1.2454467445687987, + "learning_rate": 4.2125102756945364e-07, + "loss": 0.7990894317626953, + "step": 6178 + }, + { + "epoch": 1.4237327188940092, + "grad_norm": 0.899401568311558, + "learning_rate": 4.2094037507644165e-07, + "loss": 0.7283308506011963, + "step": 6179 + }, + { + "epoch": 1.423963133640553, + "grad_norm": 1.1017464258775596, + "learning_rate": 4.2062980663316715e-07, + "loss": 0.8763309717178345, + "step": 6180 + }, + { + "epoch": 1.4241935483870969, + "grad_norm": 1.5313476968397717, + "learning_rate": 4.2031932228470966e-07, + "loss": 0.9370014667510986, + "step": 6181 + }, + { + "epoch": 1.4244239631336406, + "grad_norm": 1.2317913481286529, + "learning_rate": 4.2000892207613526e-07, + "loss": 0.7883036136627197, + "step": 6182 + }, + { + "epoch": 1.4246543778801843, + "grad_norm": 1.0986212570485994, + "learning_rate": 4.196986060524975e-07, + "loss": 0.7021682262420654, + "step": 6183 + }, + { + "epoch": 1.424884792626728, + "grad_norm": 1.6809928588875014, + "learning_rate": 4.193883742588393e-07, + "loss": 0.842636227607727, + "step": 6184 + }, + { + "epoch": 1.4251152073732718, + "grad_norm": 1.3804520546599122, + "learning_rate": 4.190782267401899e-07, + "loss": 0.8003957867622375, + "step": 6185 + }, + { + "epoch": 1.4253456221198157, + "grad_norm": 1.4234115388616575, + "learning_rate": 4.1876816354156655e-07, + "loss": 0.9799495935440063, + "step": 6186 + }, + { + "epoch": 1.4255760368663595, + "grad_norm": 1.4430834747300494, + "learning_rate": 4.184581847079751e-07, + "loss": 0.8726102113723755, + "step": 6187 + }, + { + "epoch": 1.4258064516129032, + "grad_norm": 1.4779961873749974, + "learning_rate": 4.181482902844082e-07, + "loss": 0.8771729469299316, + "step": 6188 + }, + { + "epoch": 1.426036866359447, + "grad_norm": 0.932904262005563, + "learning_rate": 4.1783848031584644e-07, + "loss": 0.5891281962394714, + "step": 6189 + }, + { + "epoch": 1.4262672811059907, + "grad_norm": 1.0356433358815755, + "learning_rate": 4.1752875484725904e-07, + "loss": 0.8133054971694946, + "step": 6190 + }, + { + "epoch": 1.4264976958525346, + "grad_norm": 1.2051464792634443, + "learning_rate": 4.1721911392360164e-07, + "loss": 0.7175684571266174, + "step": 6191 + }, + { + "epoch": 1.4267281105990783, + "grad_norm": 1.2483759508518841, + "learning_rate": 4.16909557589818e-07, + "loss": 0.7112927436828613, + "step": 6192 + }, + { + "epoch": 1.426958525345622, + "grad_norm": 1.3756845434805187, + "learning_rate": 4.166000858908406e-07, + "loss": 0.8564406037330627, + "step": 6193 + }, + { + "epoch": 1.427188940092166, + "grad_norm": 1.2070686503198162, + "learning_rate": 4.162906988715883e-07, + "loss": 0.7630729675292969, + "step": 6194 + }, + { + "epoch": 1.4274193548387097, + "grad_norm": 0.971140934311516, + "learning_rate": 4.1598139657696806e-07, + "loss": 0.6810768246650696, + "step": 6195 + }, + { + "epoch": 1.4276497695852535, + "grad_norm": 0.9185719080310675, + "learning_rate": 4.1567217905187535e-07, + "loss": 0.8482312560081482, + "step": 6196 + }, + { + "epoch": 1.4278801843317972, + "grad_norm": 1.4356078879259653, + "learning_rate": 4.1536304634119225e-07, + "loss": 0.845355749130249, + "step": 6197 + }, + { + "epoch": 1.428110599078341, + "grad_norm": 1.3990653285356356, + "learning_rate": 4.1505399848978896e-07, + "loss": 0.8082824349403381, + "step": 6198 + }, + { + "epoch": 1.4283410138248849, + "grad_norm": 1.5497395393382225, + "learning_rate": 4.147450355425235e-07, + "loss": 0.8141404390335083, + "step": 6199 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 1.0209015709753073, + "learning_rate": 4.14436157544241e-07, + "loss": 0.8144549131393433, + "step": 6200 + }, + { + "epoch": 1.4288018433179723, + "grad_norm": 1.2316152605954584, + "learning_rate": 4.141273645397754e-07, + "loss": 0.6554359793663025, + "step": 6201 + }, + { + "epoch": 1.429032258064516, + "grad_norm": 1.2095729612520494, + "learning_rate": 4.138186565739472e-07, + "loss": 0.8035449981689453, + "step": 6202 + }, + { + "epoch": 1.4292626728110598, + "grad_norm": 1.348688453980758, + "learning_rate": 4.1351003369156467e-07, + "loss": 0.7848105430603027, + "step": 6203 + }, + { + "epoch": 1.4294930875576037, + "grad_norm": 1.167048125389705, + "learning_rate": 4.132014959374246e-07, + "loss": 0.7064214944839478, + "step": 6204 + }, + { + "epoch": 1.4297235023041475, + "grad_norm": 1.236002479887974, + "learning_rate": 4.128930433563107e-07, + "loss": 0.7636318802833557, + "step": 6205 + }, + { + "epoch": 1.4299539170506912, + "grad_norm": 1.2440935326289273, + "learning_rate": 4.1258467599299395e-07, + "loss": 0.6839499473571777, + "step": 6206 + }, + { + "epoch": 1.4301843317972351, + "grad_norm": 1.1802386777878584, + "learning_rate": 4.122763938922341e-07, + "loss": 0.8355294466018677, + "step": 6207 + }, + { + "epoch": 1.4304147465437789, + "grad_norm": 1.1238131581281627, + "learning_rate": 4.1196819709877773e-07, + "loss": 0.7563334107398987, + "step": 6208 + }, + { + "epoch": 1.4306451612903226, + "grad_norm": 1.1336601077663977, + "learning_rate": 4.116600856573588e-07, + "loss": 0.6991991996765137, + "step": 6209 + }, + { + "epoch": 1.4308755760368663, + "grad_norm": 1.2669311049959366, + "learning_rate": 4.113520596126998e-07, + "loss": 0.7249872088432312, + "step": 6210 + }, + { + "epoch": 1.43110599078341, + "grad_norm": 0.9386622429459606, + "learning_rate": 4.110441190095101e-07, + "loss": 0.6570736169815063, + "step": 6211 + }, + { + "epoch": 1.431336405529954, + "grad_norm": 1.0652944602016763, + "learning_rate": 4.107362638924865e-07, + "loss": 0.7137724161148071, + "step": 6212 + }, + { + "epoch": 1.4315668202764977, + "grad_norm": 1.1571956532799377, + "learning_rate": 4.1042849430631453e-07, + "loss": 0.7620561122894287, + "step": 6213 + }, + { + "epoch": 1.4317972350230415, + "grad_norm": 1.118516282963539, + "learning_rate": 4.1012081029566616e-07, + "loss": 0.8186367750167847, + "step": 6214 + }, + { + "epoch": 1.4320276497695852, + "grad_norm": 1.2414517851095686, + "learning_rate": 4.098132119052008e-07, + "loss": 0.8068171739578247, + "step": 6215 + }, + { + "epoch": 1.432258064516129, + "grad_norm": 1.3160335320341774, + "learning_rate": 4.095056991795668e-07, + "loss": 0.8640002012252808, + "step": 6216 + }, + { + "epoch": 1.4324884792626729, + "grad_norm": 1.4376158954775202, + "learning_rate": 4.0919827216339887e-07, + "loss": 0.8886386156082153, + "step": 6217 + }, + { + "epoch": 1.4327188940092166, + "grad_norm": 1.072787779438559, + "learning_rate": 4.0889093090131965e-07, + "loss": 0.6853137016296387, + "step": 6218 + }, + { + "epoch": 1.4329493087557603, + "grad_norm": 1.0751813749856631, + "learning_rate": 4.0858367543793923e-07, + "loss": 0.7423670291900635, + "step": 6219 + }, + { + "epoch": 1.4331797235023043, + "grad_norm": 1.2596005033506457, + "learning_rate": 4.0827650581785544e-07, + "loss": 0.7969200611114502, + "step": 6220 + }, + { + "epoch": 1.433410138248848, + "grad_norm": 1.1441853902577663, + "learning_rate": 4.079694220856531e-07, + "loss": 0.8506221771240234, + "step": 6221 + }, + { + "epoch": 1.4336405529953917, + "grad_norm": 1.107985966829949, + "learning_rate": 4.076624242859058e-07, + "loss": 0.6755083799362183, + "step": 6222 + }, + { + "epoch": 1.4338709677419355, + "grad_norm": 1.0751582832116895, + "learning_rate": 4.0735551246317333e-07, + "loss": 0.7734944820404053, + "step": 6223 + }, + { + "epoch": 1.4341013824884792, + "grad_norm": 1.1828392807290495, + "learning_rate": 4.0704868666200345e-07, + "loss": 0.8564216494560242, + "step": 6224 + }, + { + "epoch": 1.4343317972350231, + "grad_norm": 0.8521811929477493, + "learning_rate": 4.067419469269321e-07, + "loss": 0.6858065128326416, + "step": 6225 + }, + { + "epoch": 1.4345622119815669, + "grad_norm": 1.4454169020848073, + "learning_rate": 4.064352933024813e-07, + "loss": 0.684749960899353, + "step": 6226 + }, + { + "epoch": 1.4347926267281106, + "grad_norm": 1.0124943930771644, + "learning_rate": 4.061287258331624e-07, + "loss": 0.7648766040802002, + "step": 6227 + }, + { + "epoch": 1.4350230414746543, + "grad_norm": 1.2226521022766697, + "learning_rate": 4.058222445634727e-07, + "loss": 0.924850583076477, + "step": 6228 + }, + { + "epoch": 1.435253456221198, + "grad_norm": 1.2841804739911125, + "learning_rate": 4.055158495378972e-07, + "loss": 0.906406581401825, + "step": 6229 + }, + { + "epoch": 1.435483870967742, + "grad_norm": 1.1497462597145154, + "learning_rate": 4.052095408009095e-07, + "loss": 0.9169156551361084, + "step": 6230 + }, + { + "epoch": 1.4357142857142857, + "grad_norm": 0.9291011874506654, + "learning_rate": 4.0490331839696967e-07, + "loss": 0.7367587685585022, + "step": 6231 + }, + { + "epoch": 1.4359447004608294, + "grad_norm": 0.9837392218179005, + "learning_rate": 4.045971823705249e-07, + "loss": 0.7608749270439148, + "step": 6232 + }, + { + "epoch": 1.4361751152073734, + "grad_norm": 1.006459600101246, + "learning_rate": 4.0429113276601134e-07, + "loss": 0.7008038759231567, + "step": 6233 + }, + { + "epoch": 1.4364055299539171, + "grad_norm": 1.3644950830796674, + "learning_rate": 4.039851696278511e-07, + "loss": 0.8581372499465942, + "step": 6234 + }, + { + "epoch": 1.4366359447004609, + "grad_norm": 1.1117269621825037, + "learning_rate": 4.036792930004542e-07, + "loss": 0.6602354049682617, + "step": 6235 + }, + { + "epoch": 1.4368663594470046, + "grad_norm": 1.1136625894629528, + "learning_rate": 4.0337350292821893e-07, + "loss": 0.8560018539428711, + "step": 6236 + }, + { + "epoch": 1.4370967741935483, + "grad_norm": 1.5699670277885023, + "learning_rate": 4.030677994555298e-07, + "loss": 0.8837640285491943, + "step": 6237 + }, + { + "epoch": 1.4373271889400923, + "grad_norm": 1.1788518631283098, + "learning_rate": 4.027621826267593e-07, + "loss": 0.8214797973632812, + "step": 6238 + }, + { + "epoch": 1.437557603686636, + "grad_norm": 1.091488147712342, + "learning_rate": 4.024566524862675e-07, + "loss": 0.7590944766998291, + "step": 6239 + }, + { + "epoch": 1.4377880184331797, + "grad_norm": 1.5224250495012106, + "learning_rate": 4.021512090784014e-07, + "loss": 0.8792011141777039, + "step": 6240 + }, + { + "epoch": 1.4380184331797234, + "grad_norm": 0.9801567843215049, + "learning_rate": 4.0184585244749556e-07, + "loss": 0.8309401273727417, + "step": 6241 + }, + { + "epoch": 1.4382488479262672, + "grad_norm": 1.2518924977337436, + "learning_rate": 4.015405826378727e-07, + "loss": 0.7474797964096069, + "step": 6242 + }, + { + "epoch": 1.4384792626728111, + "grad_norm": 1.0203221096159534, + "learning_rate": 4.012353996938421e-07, + "loss": 0.7376091480255127, + "step": 6243 + }, + { + "epoch": 1.4387096774193548, + "grad_norm": 1.4049798692682764, + "learning_rate": 4.0093030365970014e-07, + "loss": 0.7809054851531982, + "step": 6244 + }, + { + "epoch": 1.4389400921658986, + "grad_norm": 1.206100995388555, + "learning_rate": 4.0062529457973194e-07, + "loss": 0.8551669120788574, + "step": 6245 + }, + { + "epoch": 1.4391705069124425, + "grad_norm": 1.3285364918408127, + "learning_rate": 4.0032037249820874e-07, + "loss": 0.7874705791473389, + "step": 6246 + }, + { + "epoch": 1.4394009216589863, + "grad_norm": 1.220500481419073, + "learning_rate": 4.0001553745938923e-07, + "loss": 0.8032190799713135, + "step": 6247 + }, + { + "epoch": 1.43963133640553, + "grad_norm": 1.1833761956090303, + "learning_rate": 3.9971078950752057e-07, + "loss": 0.7600107192993164, + "step": 6248 + }, + { + "epoch": 1.4398617511520737, + "grad_norm": 1.0770488794400255, + "learning_rate": 3.994061286868361e-07, + "loss": 0.7738933563232422, + "step": 6249 + }, + { + "epoch": 1.4400921658986174, + "grad_norm": 1.2036013798832181, + "learning_rate": 3.9910155504155665e-07, + "loss": 0.701007604598999, + "step": 6250 + }, + { + "epoch": 1.4403225806451614, + "grad_norm": 1.2067244620095277, + "learning_rate": 3.9879706861589126e-07, + "loss": 0.8962818384170532, + "step": 6251 + }, + { + "epoch": 1.4405529953917051, + "grad_norm": 1.4532648423769148, + "learning_rate": 3.9849266945403513e-07, + "loss": 0.7636146545410156, + "step": 6252 + }, + { + "epoch": 1.4407834101382488, + "grad_norm": 1.4158432417231142, + "learning_rate": 3.981883576001722e-07, + "loss": 0.8816943168640137, + "step": 6253 + }, + { + "epoch": 1.4410138248847926, + "grad_norm": 1.2321816109724755, + "learning_rate": 3.978841330984725e-07, + "loss": 0.7252858877182007, + "step": 6254 + }, + { + "epoch": 1.4412442396313363, + "grad_norm": 1.1568327683598156, + "learning_rate": 3.975799959930932e-07, + "loss": 0.6720175743103027, + "step": 6255 + }, + { + "epoch": 1.4414746543778802, + "grad_norm": 0.981779637597959, + "learning_rate": 3.972759463281805e-07, + "loss": 0.8000779151916504, + "step": 6256 + }, + { + "epoch": 1.441705069124424, + "grad_norm": 1.2561538909400267, + "learning_rate": 3.9697198414786626e-07, + "loss": 0.7356371283531189, + "step": 6257 + }, + { + "epoch": 1.4419354838709677, + "grad_norm": 1.3228468777834088, + "learning_rate": 3.966681094962703e-07, + "loss": 0.708438515663147, + "step": 6258 + }, + { + "epoch": 1.4421658986175117, + "grad_norm": 1.1635121950639566, + "learning_rate": 3.963643224174994e-07, + "loss": 0.709287166595459, + "step": 6259 + }, + { + "epoch": 1.4423963133640554, + "grad_norm": 1.2638923885979756, + "learning_rate": 3.9606062295564813e-07, + "loss": 0.743755578994751, + "step": 6260 + }, + { + "epoch": 1.442626728110599, + "grad_norm": 1.119467668131696, + "learning_rate": 3.9575701115479744e-07, + "loss": 0.9727948904037476, + "step": 6261 + }, + { + "epoch": 1.4428571428571428, + "grad_norm": 1.165539680123963, + "learning_rate": 3.9545348705901703e-07, + "loss": 0.9070688486099243, + "step": 6262 + }, + { + "epoch": 1.4430875576036866, + "grad_norm": 1.3995169117674358, + "learning_rate": 3.951500507123627e-07, + "loss": 0.8167496919631958, + "step": 6263 + }, + { + "epoch": 1.4433179723502305, + "grad_norm": 1.1204443462300027, + "learning_rate": 3.948467021588775e-07, + "loss": 0.7691773772239685, + "step": 6264 + }, + { + "epoch": 1.4435483870967742, + "grad_norm": 1.2915211655205685, + "learning_rate": 3.945434414425927e-07, + "loss": 0.7638411521911621, + "step": 6265 + }, + { + "epoch": 1.443778801843318, + "grad_norm": 1.0311097608426527, + "learning_rate": 3.942402686075258e-07, + "loss": 0.8138284683227539, + "step": 6266 + }, + { + "epoch": 1.4440092165898617, + "grad_norm": 1.430800234304149, + "learning_rate": 3.939371836976816e-07, + "loss": 0.8404628038406372, + "step": 6267 + }, + { + "epoch": 1.4442396313364054, + "grad_norm": 1.0744818989251388, + "learning_rate": 3.936341867570533e-07, + "loss": 0.7354726791381836, + "step": 6268 + }, + { + "epoch": 1.4444700460829494, + "grad_norm": 1.2516347720495873, + "learning_rate": 3.9333127782962003e-07, + "loss": 0.8607511520385742, + "step": 6269 + }, + { + "epoch": 1.444700460829493, + "grad_norm": 1.03787633948696, + "learning_rate": 3.930284569593483e-07, + "loss": 0.7372239232063293, + "step": 6270 + }, + { + "epoch": 1.4449308755760368, + "grad_norm": 1.205690175362699, + "learning_rate": 3.927257241901929e-07, + "loss": 0.8902593851089478, + "step": 6271 + }, + { + "epoch": 1.4451612903225808, + "grad_norm": 1.0978426997676995, + "learning_rate": 3.924230795660947e-07, + "loss": 0.7481765747070312, + "step": 6272 + }, + { + "epoch": 1.4453917050691243, + "grad_norm": 1.1624854693895736, + "learning_rate": 3.9212052313098177e-07, + "loss": 0.6868888139724731, + "step": 6273 + }, + { + "epoch": 1.4456221198156682, + "grad_norm": 1.219538424407328, + "learning_rate": 3.918180549287705e-07, + "loss": 0.6867324709892273, + "step": 6274 + }, + { + "epoch": 1.445852534562212, + "grad_norm": 1.4192898010151693, + "learning_rate": 3.9151567500336323e-07, + "loss": 0.8473105430603027, + "step": 6275 + }, + { + "epoch": 1.4460829493087557, + "grad_norm": 1.2236253801186994, + "learning_rate": 3.912133833986504e-07, + "loss": 0.7629631757736206, + "step": 6276 + }, + { + "epoch": 1.4463133640552996, + "grad_norm": 1.0502703605539807, + "learning_rate": 3.909111801585091e-07, + "loss": 0.9501597881317139, + "step": 6277 + }, + { + "epoch": 1.4465437788018434, + "grad_norm": 1.0568805239624584, + "learning_rate": 3.906090653268037e-07, + "loss": 0.7330536842346191, + "step": 6278 + }, + { + "epoch": 1.446774193548387, + "grad_norm": 1.199243558298224, + "learning_rate": 3.903070389473857e-07, + "loss": 0.907101571559906, + "step": 6279 + }, + { + "epoch": 1.4470046082949308, + "grad_norm": 1.1269939172893009, + "learning_rate": 3.900051010640939e-07, + "loss": 0.8177503347396851, + "step": 6280 + }, + { + "epoch": 1.4472350230414746, + "grad_norm": 1.373102048695832, + "learning_rate": 3.897032517207538e-07, + "loss": 0.7851059436798096, + "step": 6281 + }, + { + "epoch": 1.4474654377880185, + "grad_norm": 0.8801777971944739, + "learning_rate": 3.8940149096117914e-07, + "loss": 0.7056214809417725, + "step": 6282 + }, + { + "epoch": 1.4476958525345622, + "grad_norm": 1.0831833275731695, + "learning_rate": 3.8909981882916975e-07, + "loss": 0.784143328666687, + "step": 6283 + }, + { + "epoch": 1.447926267281106, + "grad_norm": 1.2368924313085696, + "learning_rate": 3.8879823536851253e-07, + "loss": 0.8157210350036621, + "step": 6284 + }, + { + "epoch": 1.4481566820276497, + "grad_norm": 1.276176943713772, + "learning_rate": 3.884967406229828e-07, + "loss": 0.7329680323600769, + "step": 6285 + }, + { + "epoch": 1.4483870967741934, + "grad_norm": 1.4518343581804805, + "learning_rate": 3.8819533463634145e-07, + "loss": 0.9214208722114563, + "step": 6286 + }, + { + "epoch": 1.4486175115207374, + "grad_norm": 1.835142969551997, + "learning_rate": 3.8789401745233706e-07, + "loss": 0.8118722438812256, + "step": 6287 + }, + { + "epoch": 1.448847926267281, + "grad_norm": 1.0485981202236783, + "learning_rate": 3.8759278911470615e-07, + "loss": 0.7517364025115967, + "step": 6288 + }, + { + "epoch": 1.4490783410138248, + "grad_norm": 1.0879409814064, + "learning_rate": 3.872916496671711e-07, + "loss": 0.8979834318161011, + "step": 6289 + }, + { + "epoch": 1.4493087557603688, + "grad_norm": 1.6674549792368192, + "learning_rate": 3.8699059915344166e-07, + "loss": 0.9159818887710571, + "step": 6290 + }, + { + "epoch": 1.4495391705069125, + "grad_norm": 1.2582380909324238, + "learning_rate": 3.8668963761721563e-07, + "loss": 0.8176029324531555, + "step": 6291 + }, + { + "epoch": 1.4497695852534562, + "grad_norm": 1.3257834277786367, + "learning_rate": 3.8638876510217666e-07, + "loss": 0.7077589631080627, + "step": 6292 + }, + { + "epoch": 1.45, + "grad_norm": 1.0304546829516872, + "learning_rate": 3.8608798165199585e-07, + "loss": 0.8107718825340271, + "step": 6293 + }, + { + "epoch": 1.4502304147465437, + "grad_norm": 1.278146889045901, + "learning_rate": 3.8578728731033214e-07, + "loss": 0.9021201133728027, + "step": 6294 + }, + { + "epoch": 1.4504608294930876, + "grad_norm": 1.5907360314325336, + "learning_rate": 3.854866821208306e-07, + "loss": 0.9134507179260254, + "step": 6295 + }, + { + "epoch": 1.4506912442396314, + "grad_norm": 1.2431886164023473, + "learning_rate": 3.8518616612712317e-07, + "loss": 0.9081463813781738, + "step": 6296 + }, + { + "epoch": 1.450921658986175, + "grad_norm": 1.394869861453301, + "learning_rate": 3.848857393728303e-07, + "loss": 0.7892032861709595, + "step": 6297 + }, + { + "epoch": 1.4511520737327188, + "grad_norm": 1.1702087372951315, + "learning_rate": 3.8458540190155796e-07, + "loss": 0.753928542137146, + "step": 6298 + }, + { + "epoch": 1.4513824884792625, + "grad_norm": 1.1800339185606825, + "learning_rate": 3.8428515375689996e-07, + "loss": 0.6316792964935303, + "step": 6299 + }, + { + "epoch": 1.4516129032258065, + "grad_norm": 1.0510746352372813, + "learning_rate": 3.8398499498243665e-07, + "loss": 0.6569210290908813, + "step": 6300 + }, + { + "epoch": 1.4518433179723502, + "grad_norm": 1.2827982624069105, + "learning_rate": 3.836849256217355e-07, + "loss": 0.9082256555557251, + "step": 6301 + }, + { + "epoch": 1.452073732718894, + "grad_norm": 1.2539326790404104, + "learning_rate": 3.833849457183519e-07, + "loss": 0.6533655524253845, + "step": 6302 + }, + { + "epoch": 1.452304147465438, + "grad_norm": 1.1962706885387824, + "learning_rate": 3.830850553158271e-07, + "loss": 0.8181168437004089, + "step": 6303 + }, + { + "epoch": 1.4525345622119816, + "grad_norm": 1.191632474290621, + "learning_rate": 3.827852544576895e-07, + "loss": 0.8258780241012573, + "step": 6304 + }, + { + "epoch": 1.4527649769585254, + "grad_norm": 1.2200843626761786, + "learning_rate": 3.824855431874555e-07, + "loss": 0.7917114496231079, + "step": 6305 + }, + { + "epoch": 1.452995391705069, + "grad_norm": 1.1119249100754447, + "learning_rate": 3.821859215486274e-07, + "loss": 0.7523643970489502, + "step": 6306 + }, + { + "epoch": 1.4532258064516128, + "grad_norm": 1.173507656799684, + "learning_rate": 3.818863895846945e-07, + "loss": 0.7248106002807617, + "step": 6307 + }, + { + "epoch": 1.4534562211981568, + "grad_norm": 1.0384099625968284, + "learning_rate": 3.815869473391343e-07, + "loss": 0.6663920879364014, + "step": 6308 + }, + { + "epoch": 1.4536866359447005, + "grad_norm": 1.2904533830018654, + "learning_rate": 3.8128759485540995e-07, + "loss": 0.887082576751709, + "step": 6309 + }, + { + "epoch": 1.4539170506912442, + "grad_norm": 1.176731626067417, + "learning_rate": 3.8098833217697193e-07, + "loss": 0.8491328954696655, + "step": 6310 + }, + { + "epoch": 1.454147465437788, + "grad_norm": 0.995531509886264, + "learning_rate": 3.806891593472582e-07, + "loss": 0.6749746799468994, + "step": 6311 + }, + { + "epoch": 1.4543778801843317, + "grad_norm": 1.2359927269681388, + "learning_rate": 3.803900764096932e-07, + "loss": 0.7607502937316895, + "step": 6312 + }, + { + "epoch": 1.4546082949308756, + "grad_norm": 0.9855772687954082, + "learning_rate": 3.8009108340768804e-07, + "loss": 0.6713626980781555, + "step": 6313 + }, + { + "epoch": 1.4548387096774194, + "grad_norm": 1.0335982949651026, + "learning_rate": 3.797921803846419e-07, + "loss": 0.7031810879707336, + "step": 6314 + }, + { + "epoch": 1.455069124423963, + "grad_norm": 1.2499044478276522, + "learning_rate": 3.7949336738393955e-07, + "loss": 0.7233775854110718, + "step": 6315 + }, + { + "epoch": 1.455299539170507, + "grad_norm": 1.1902627494977487, + "learning_rate": 3.791946444489532e-07, + "loss": 0.7446990013122559, + "step": 6316 + }, + { + "epoch": 1.4555299539170508, + "grad_norm": 1.0356528338667375, + "learning_rate": 3.7889601162304273e-07, + "loss": 0.731992244720459, + "step": 6317 + }, + { + "epoch": 1.4557603686635945, + "grad_norm": 0.9012124257356037, + "learning_rate": 3.785974689495539e-07, + "loss": 0.7167335152626038, + "step": 6318 + }, + { + "epoch": 1.4559907834101382, + "grad_norm": 1.0367746360279544, + "learning_rate": 3.7829901647181993e-07, + "loss": 0.7634297609329224, + "step": 6319 + }, + { + "epoch": 1.456221198156682, + "grad_norm": 1.323601627974345, + "learning_rate": 3.7800065423316066e-07, + "loss": 0.7584050893783569, + "step": 6320 + }, + { + "epoch": 1.456451612903226, + "grad_norm": 1.3168506305563585, + "learning_rate": 3.777023822768829e-07, + "loss": 0.7150899171829224, + "step": 6321 + }, + { + "epoch": 1.4566820276497696, + "grad_norm": 1.3142694869577929, + "learning_rate": 3.7740420064628034e-07, + "loss": 0.7821052670478821, + "step": 6322 + }, + { + "epoch": 1.4569124423963133, + "grad_norm": 1.1890463822517086, + "learning_rate": 3.7710610938463405e-07, + "loss": 0.8678094148635864, + "step": 6323 + }, + { + "epoch": 1.457142857142857, + "grad_norm": 1.0929926711457507, + "learning_rate": 3.7680810853521107e-07, + "loss": 0.6953635215759277, + "step": 6324 + }, + { + "epoch": 1.4573732718894008, + "grad_norm": 1.392687245093679, + "learning_rate": 3.765101981412665e-07, + "loss": 0.765946626663208, + "step": 6325 + }, + { + "epoch": 1.4576036866359448, + "grad_norm": 1.2287803375758581, + "learning_rate": 3.7621237824604137e-07, + "loss": 0.8828680515289307, + "step": 6326 + }, + { + "epoch": 1.4578341013824885, + "grad_norm": 1.4191080683791804, + "learning_rate": 3.7591464889276326e-07, + "loss": 0.8916178345680237, + "step": 6327 + }, + { + "epoch": 1.4580645161290322, + "grad_norm": 1.4414543071479498, + "learning_rate": 3.756170101246481e-07, + "loss": 0.7563039064407349, + "step": 6328 + }, + { + "epoch": 1.4582949308755762, + "grad_norm": 1.1488058177567217, + "learning_rate": 3.7531946198489725e-07, + "loss": 0.8548855781555176, + "step": 6329 + }, + { + "epoch": 1.4585253456221199, + "grad_norm": 1.2471941201918813, + "learning_rate": 3.750220045166993e-07, + "loss": 0.8337546586990356, + "step": 6330 + }, + { + "epoch": 1.4587557603686636, + "grad_norm": 1.2665043024049272, + "learning_rate": 3.7472463776323036e-07, + "loss": 0.8909939527511597, + "step": 6331 + }, + { + "epoch": 1.4589861751152073, + "grad_norm": 0.9459101838544814, + "learning_rate": 3.744273617676524e-07, + "loss": 0.629026472568512, + "step": 6332 + }, + { + "epoch": 1.459216589861751, + "grad_norm": 1.245577103796106, + "learning_rate": 3.7413017657311454e-07, + "loss": 0.7264849543571472, + "step": 6333 + }, + { + "epoch": 1.459447004608295, + "grad_norm": 1.0987416494814488, + "learning_rate": 3.738330822227532e-07, + "loss": 0.808081865310669, + "step": 6334 + }, + { + "epoch": 1.4596774193548387, + "grad_norm": 1.145687515640666, + "learning_rate": 3.7353607875969115e-07, + "loss": 0.6092932820320129, + "step": 6335 + }, + { + "epoch": 1.4599078341013825, + "grad_norm": 1.2636271324745916, + "learning_rate": 3.7323916622703756e-07, + "loss": 0.8700584173202515, + "step": 6336 + }, + { + "epoch": 1.4601382488479262, + "grad_norm": 1.2867446987977476, + "learning_rate": 3.7294234466788954e-07, + "loss": 0.8424433469772339, + "step": 6337 + }, + { + "epoch": 1.46036866359447, + "grad_norm": 1.1929868573019329, + "learning_rate": 3.7264561412533013e-07, + "loss": 0.8587443828582764, + "step": 6338 + }, + { + "epoch": 1.4605990783410139, + "grad_norm": 1.1369944171843958, + "learning_rate": 3.7234897464242934e-07, + "loss": 0.7708064913749695, + "step": 6339 + }, + { + "epoch": 1.4608294930875576, + "grad_norm": 0.9599493655503268, + "learning_rate": 3.7205242626224395e-07, + "loss": 0.8226567506790161, + "step": 6340 + }, + { + "epoch": 1.4610599078341013, + "grad_norm": 1.6926769297162396, + "learning_rate": 3.717559690278176e-07, + "loss": 0.8414342403411865, + "step": 6341 + }, + { + "epoch": 1.4612903225806453, + "grad_norm": 1.136325082903018, + "learning_rate": 3.714596029821804e-07, + "loss": 0.765863299369812, + "step": 6342 + }, + { + "epoch": 1.461520737327189, + "grad_norm": 1.2033696575950952, + "learning_rate": 3.7116332816834997e-07, + "loss": 0.7253202199935913, + "step": 6343 + }, + { + "epoch": 1.4617511520737327, + "grad_norm": 1.2614732245354896, + "learning_rate": 3.7086714462933e-07, + "loss": 0.786415696144104, + "step": 6344 + }, + { + "epoch": 1.4619815668202765, + "grad_norm": 1.3398597613096093, + "learning_rate": 3.705710524081108e-07, + "loss": 0.8382824659347534, + "step": 6345 + }, + { + "epoch": 1.4622119815668202, + "grad_norm": 1.1421503229190921, + "learning_rate": 3.702750515476705e-07, + "loss": 0.7953319549560547, + "step": 6346 + }, + { + "epoch": 1.4624423963133641, + "grad_norm": 1.1953524657169348, + "learning_rate": 3.699791420909727e-07, + "loss": 0.7897430658340454, + "step": 6347 + }, + { + "epoch": 1.4626728110599079, + "grad_norm": 1.0462269201726477, + "learning_rate": 3.6968332408096804e-07, + "loss": 0.7276254892349243, + "step": 6348 + }, + { + "epoch": 1.4629032258064516, + "grad_norm": 1.2576670635193097, + "learning_rate": 3.693875975605949e-07, + "loss": 0.7318450212478638, + "step": 6349 + }, + { + "epoch": 1.4631336405529953, + "grad_norm": 1.3298595608160129, + "learning_rate": 3.6909196257277676e-07, + "loss": 0.8438090085983276, + "step": 6350 + }, + { + "epoch": 1.463364055299539, + "grad_norm": 1.1958819221255177, + "learning_rate": 3.6879641916042534e-07, + "loss": 0.7977915406227112, + "step": 6351 + }, + { + "epoch": 1.463594470046083, + "grad_norm": 1.5876789525233332, + "learning_rate": 3.685009673664382e-07, + "loss": 0.8845348358154297, + "step": 6352 + }, + { + "epoch": 1.4638248847926267, + "grad_norm": 1.1089282393569035, + "learning_rate": 3.682056072336992e-07, + "loss": 0.8971320986747742, + "step": 6353 + }, + { + "epoch": 1.4640552995391705, + "grad_norm": 1.1499585685789093, + "learning_rate": 3.679103388050803e-07, + "loss": 0.7015302181243896, + "step": 6354 + }, + { + "epoch": 1.4642857142857144, + "grad_norm": 1.058413373940715, + "learning_rate": 3.676151621234389e-07, + "loss": 0.5953146815299988, + "step": 6355 + }, + { + "epoch": 1.4645161290322581, + "grad_norm": 0.940762320723037, + "learning_rate": 3.673200772316193e-07, + "loss": 0.5794636011123657, + "step": 6356 + }, + { + "epoch": 1.4647465437788019, + "grad_norm": 1.4093031765021824, + "learning_rate": 3.6702508417245324e-07, + "loss": 0.8272292017936707, + "step": 6357 + }, + { + "epoch": 1.4649769585253456, + "grad_norm": 1.2004626750502272, + "learning_rate": 3.6673018298875826e-07, + "loss": 0.7239755392074585, + "step": 6358 + }, + { + "epoch": 1.4652073732718893, + "grad_norm": 1.0592207409293348, + "learning_rate": 3.6643537372333886e-07, + "loss": 0.8597465753555298, + "step": 6359 + }, + { + "epoch": 1.4654377880184333, + "grad_norm": 1.3768417389873642, + "learning_rate": 3.661406564189862e-07, + "loss": 0.7540475130081177, + "step": 6360 + }, + { + "epoch": 1.465668202764977, + "grad_norm": 1.2300552177842492, + "learning_rate": 3.658460311184782e-07, + "loss": 0.793259859085083, + "step": 6361 + }, + { + "epoch": 1.4658986175115207, + "grad_norm": 1.1933122341650848, + "learning_rate": 3.6555149786457883e-07, + "loss": 0.797966718673706, + "step": 6362 + }, + { + "epoch": 1.4661290322580645, + "grad_norm": 1.082541374270611, + "learning_rate": 3.6525705670004016e-07, + "loss": 0.7466796636581421, + "step": 6363 + }, + { + "epoch": 1.4663594470046082, + "grad_norm": 0.9612262339874744, + "learning_rate": 3.6496270766759927e-07, + "loss": 0.7694044709205627, + "step": 6364 + }, + { + "epoch": 1.4665898617511521, + "grad_norm": 1.753828188679532, + "learning_rate": 3.6466845080998043e-07, + "loss": 0.7701553106307983, + "step": 6365 + }, + { + "epoch": 1.4668202764976959, + "grad_norm": 1.0670832455899337, + "learning_rate": 3.643742861698952e-07, + "loss": 0.6718326807022095, + "step": 6366 + }, + { + "epoch": 1.4670506912442396, + "grad_norm": 1.1220075290963027, + "learning_rate": 3.6408021379004086e-07, + "loss": 0.7099052667617798, + "step": 6367 + }, + { + "epoch": 1.4672811059907835, + "grad_norm": 1.0614563823752192, + "learning_rate": 3.6378623371310126e-07, + "loss": 0.8650654554367065, + "step": 6368 + }, + { + "epoch": 1.4675115207373273, + "grad_norm": 1.18691798498221, + "learning_rate": 3.6349234598174794e-07, + "loss": 0.7920950055122375, + "step": 6369 + }, + { + "epoch": 1.467741935483871, + "grad_norm": 1.3672164620265899, + "learning_rate": 3.63198550638638e-07, + "loss": 0.7927969098091125, + "step": 6370 + }, + { + "epoch": 1.4679723502304147, + "grad_norm": 1.6817643007938734, + "learning_rate": 3.6290484772641514e-07, + "loss": 0.9403868913650513, + "step": 6371 + }, + { + "epoch": 1.4682027649769585, + "grad_norm": 1.188245842937741, + "learning_rate": 3.626112372877106e-07, + "loss": 0.9157334566116333, + "step": 6372 + }, + { + "epoch": 1.4684331797235024, + "grad_norm": 1.0918511661649737, + "learning_rate": 3.6231771936514067e-07, + "loss": 0.7742066979408264, + "step": 6373 + }, + { + "epoch": 1.4686635944700461, + "grad_norm": 1.0472722321327697, + "learning_rate": 3.6202429400131006e-07, + "loss": 0.69399094581604, + "step": 6374 + }, + { + "epoch": 1.4688940092165899, + "grad_norm": 1.243240675298042, + "learning_rate": 3.6173096123880854e-07, + "loss": 0.874832272529602, + "step": 6375 + }, + { + "epoch": 1.4691244239631336, + "grad_norm": 0.9504044447465768, + "learning_rate": 3.6143772112021275e-07, + "loss": 0.6685272455215454, + "step": 6376 + }, + { + "epoch": 1.4693548387096773, + "grad_norm": 1.2588614059189167, + "learning_rate": 3.611445736880867e-07, + "loss": 0.7422738671302795, + "step": 6377 + }, + { + "epoch": 1.4695852534562213, + "grad_norm": 1.1563672807518934, + "learning_rate": 3.6085151898498e-07, + "loss": 0.8208622932434082, + "step": 6378 + }, + { + "epoch": 1.469815668202765, + "grad_norm": 1.278791922768039, + "learning_rate": 3.605585570534293e-07, + "loss": 0.8001033663749695, + "step": 6379 + }, + { + "epoch": 1.4700460829493087, + "grad_norm": 1.4073194030234843, + "learning_rate": 3.6026568793595744e-07, + "loss": 0.789332926273346, + "step": 6380 + }, + { + "epoch": 1.4702764976958527, + "grad_norm": 1.1542499539799642, + "learning_rate": 3.599729116750742e-07, + "loss": 0.8071820139884949, + "step": 6381 + }, + { + "epoch": 1.4705069124423962, + "grad_norm": 1.3369229588575535, + "learning_rate": 3.5968022831327506e-07, + "loss": 0.8028534054756165, + "step": 6382 + }, + { + "epoch": 1.4707373271889401, + "grad_norm": 1.0119395143433376, + "learning_rate": 3.593876378930435e-07, + "loss": 0.6888329982757568, + "step": 6383 + }, + { + "epoch": 1.4709677419354839, + "grad_norm": 1.285773441215651, + "learning_rate": 3.590951404568483e-07, + "loss": 0.8176132440567017, + "step": 6384 + }, + { + "epoch": 1.4711981566820276, + "grad_norm": 0.9429108192029542, + "learning_rate": 3.588027360471446e-07, + "loss": 0.6715027689933777, + "step": 6385 + }, + { + "epoch": 1.4714285714285715, + "grad_norm": 1.2177133807456715, + "learning_rate": 3.585104247063753e-07, + "loss": 0.8622937798500061, + "step": 6386 + }, + { + "epoch": 1.4716589861751153, + "grad_norm": 1.252482813795077, + "learning_rate": 3.5821820647696864e-07, + "loss": 0.7244299650192261, + "step": 6387 + }, + { + "epoch": 1.471889400921659, + "grad_norm": 1.2422776234152886, + "learning_rate": 3.579260814013393e-07, + "loss": 0.8130464553833008, + "step": 6388 + }, + { + "epoch": 1.4721198156682027, + "grad_norm": 1.739841773852821, + "learning_rate": 3.576340495218897e-07, + "loss": 0.8563692569732666, + "step": 6389 + }, + { + "epoch": 1.4723502304147464, + "grad_norm": 1.1474783445098509, + "learning_rate": 3.573421108810073e-07, + "loss": 0.8315908908843994, + "step": 6390 + }, + { + "epoch": 1.4725806451612904, + "grad_norm": 1.0916407928923948, + "learning_rate": 3.5705026552106645e-07, + "loss": 0.653038740158081, + "step": 6391 + }, + { + "epoch": 1.4728110599078341, + "grad_norm": 1.250110377436999, + "learning_rate": 3.5675851348442876e-07, + "loss": 0.7511966228485107, + "step": 6392 + }, + { + "epoch": 1.4730414746543778, + "grad_norm": 1.226967151246929, + "learning_rate": 3.564668548134413e-07, + "loss": 0.8675990104675293, + "step": 6393 + }, + { + "epoch": 1.4732718894009218, + "grad_norm": 1.2481066388566375, + "learning_rate": 3.5617528955043765e-07, + "loss": 0.7574094533920288, + "step": 6394 + }, + { + "epoch": 1.4735023041474653, + "grad_norm": 1.3612516426224104, + "learning_rate": 3.5588381773773866e-07, + "loss": 0.7004787921905518, + "step": 6395 + }, + { + "epoch": 1.4737327188940093, + "grad_norm": 1.193988835000252, + "learning_rate": 3.555924394176508e-07, + "loss": 0.680101215839386, + "step": 6396 + }, + { + "epoch": 1.473963133640553, + "grad_norm": 1.2956197944669767, + "learning_rate": 3.55301154632467e-07, + "loss": 0.8340710401535034, + "step": 6397 + }, + { + "epoch": 1.4741935483870967, + "grad_norm": 1.2156451361937963, + "learning_rate": 3.5500996342446756e-07, + "loss": 0.8307079076766968, + "step": 6398 + }, + { + "epoch": 1.4744239631336407, + "grad_norm": 1.3824459968937755, + "learning_rate": 3.547188658359179e-07, + "loss": 0.9614958167076111, + "step": 6399 + }, + { + "epoch": 1.4746543778801844, + "grad_norm": 1.2140973914551956, + "learning_rate": 3.544278619090707e-07, + "loss": 0.782494068145752, + "step": 6400 + }, + { + "epoch": 1.4748847926267281, + "grad_norm": 1.372883571978596, + "learning_rate": 3.5413695168616474e-07, + "loss": 0.7474460601806641, + "step": 6401 + }, + { + "epoch": 1.4751152073732718, + "grad_norm": 1.0929029713656226, + "learning_rate": 3.5384613520942484e-07, + "loss": 0.7182635068893433, + "step": 6402 + }, + { + "epoch": 1.4753456221198156, + "grad_norm": 1.1562679128127753, + "learning_rate": 3.5355541252106336e-07, + "loss": 0.8116436004638672, + "step": 6403 + }, + { + "epoch": 1.4755760368663595, + "grad_norm": 1.1320096436261353, + "learning_rate": 3.5326478366327806e-07, + "loss": 0.8007283210754395, + "step": 6404 + }, + { + "epoch": 1.4758064516129032, + "grad_norm": 1.060451283065696, + "learning_rate": 3.5297424867825276e-07, + "loss": 0.7707732915878296, + "step": 6405 + }, + { + "epoch": 1.476036866359447, + "grad_norm": 1.319974893721661, + "learning_rate": 3.5268380760815917e-07, + "loss": 0.8031977415084839, + "step": 6406 + }, + { + "epoch": 1.4762672811059907, + "grad_norm": 1.0847497024921582, + "learning_rate": 3.5239346049515397e-07, + "loss": 0.7113008499145508, + "step": 6407 + }, + { + "epoch": 1.4764976958525344, + "grad_norm": 1.490354792200027, + "learning_rate": 3.521032073813802e-07, + "loss": 0.8069616556167603, + "step": 6408 + }, + { + "epoch": 1.4767281105990784, + "grad_norm": 1.6536617293382079, + "learning_rate": 3.518130483089686e-07, + "loss": 0.9780417680740356, + "step": 6409 + }, + { + "epoch": 1.476958525345622, + "grad_norm": 1.0393285063529043, + "learning_rate": 3.515229833200351e-07, + "loss": 0.765299379825592, + "step": 6410 + }, + { + "epoch": 1.4771889400921658, + "grad_norm": 0.9792702634570369, + "learning_rate": 3.512330124566816e-07, + "loss": 0.7279179096221924, + "step": 6411 + }, + { + "epoch": 1.4774193548387098, + "grad_norm": 1.3765526641198769, + "learning_rate": 3.509431357609978e-07, + "loss": 0.8429825901985168, + "step": 6412 + }, + { + "epoch": 1.4776497695852535, + "grad_norm": 1.2876523066268597, + "learning_rate": 3.506533532750586e-07, + "loss": 0.741936206817627, + "step": 6413 + }, + { + "epoch": 1.4778801843317972, + "grad_norm": 1.0841845353527741, + "learning_rate": 3.5036366504092527e-07, + "loss": 0.6841387748718262, + "step": 6414 + }, + { + "epoch": 1.478110599078341, + "grad_norm": 1.1361546476433346, + "learning_rate": 3.5007407110064626e-07, + "loss": 0.7136961221694946, + "step": 6415 + }, + { + "epoch": 1.4783410138248847, + "grad_norm": 1.1942730912918724, + "learning_rate": 3.497845714962554e-07, + "loss": 0.8483344912528992, + "step": 6416 + }, + { + "epoch": 1.4785714285714286, + "grad_norm": 1.1525838724707749, + "learning_rate": 3.4949516626977294e-07, + "loss": 0.7060235738754272, + "step": 6417 + }, + { + "epoch": 1.4788018433179724, + "grad_norm": 1.2546190088001288, + "learning_rate": 3.4920585546320625e-07, + "loss": 0.7351587414741516, + "step": 6418 + }, + { + "epoch": 1.479032258064516, + "grad_norm": 1.4082190266306274, + "learning_rate": 3.489166391185482e-07, + "loss": 0.7445269823074341, + "step": 6419 + }, + { + "epoch": 1.4792626728110598, + "grad_norm": 1.2308828080413103, + "learning_rate": 3.4862751727777796e-07, + "loss": 0.795128583908081, + "step": 6420 + }, + { + "epoch": 1.4794930875576036, + "grad_norm": 1.3455737723646244, + "learning_rate": 3.4833848998286133e-07, + "loss": 0.7916193008422852, + "step": 6421 + }, + { + "epoch": 1.4797235023041475, + "grad_norm": 1.2062461099240058, + "learning_rate": 3.480495572757497e-07, + "loss": 0.8279474973678589, + "step": 6422 + }, + { + "epoch": 1.4799539170506912, + "grad_norm": 1.3615355231577309, + "learning_rate": 3.477607191983822e-07, + "loss": 0.9339898824691772, + "step": 6423 + }, + { + "epoch": 1.480184331797235, + "grad_norm": 1.2958649175302657, + "learning_rate": 3.4747197579268296e-07, + "loss": 0.8579660654067993, + "step": 6424 + }, + { + "epoch": 1.480414746543779, + "grad_norm": 1.1935735021965341, + "learning_rate": 3.471833271005622e-07, + "loss": 0.7637878060340881, + "step": 6425 + }, + { + "epoch": 1.4806451612903226, + "grad_norm": 1.2997741786350927, + "learning_rate": 3.4689477316391756e-07, + "loss": 0.8600465059280396, + "step": 6426 + }, + { + "epoch": 1.4808755760368664, + "grad_norm": 0.9725758019670567, + "learning_rate": 3.46606314024632e-07, + "loss": 0.6576759815216064, + "step": 6427 + }, + { + "epoch": 1.48110599078341, + "grad_norm": 1.1289750059608772, + "learning_rate": 3.463179497245747e-07, + "loss": 0.7556706666946411, + "step": 6428 + }, + { + "epoch": 1.4813364055299538, + "grad_norm": 1.3449392913610907, + "learning_rate": 3.4602968030560196e-07, + "loss": 0.8826701641082764, + "step": 6429 + }, + { + "epoch": 1.4815668202764978, + "grad_norm": 1.1499087478485694, + "learning_rate": 3.457415058095554e-07, + "loss": 0.7352213263511658, + "step": 6430 + }, + { + "epoch": 1.4817972350230415, + "grad_norm": 1.4434298728988502, + "learning_rate": 3.454534262782628e-07, + "loss": 0.8108851909637451, + "step": 6431 + }, + { + "epoch": 1.4820276497695852, + "grad_norm": 1.3070168078927469, + "learning_rate": 3.4516544175353914e-07, + "loss": 0.8595583438873291, + "step": 6432 + }, + { + "epoch": 1.482258064516129, + "grad_norm": 1.1496814595283131, + "learning_rate": 3.448775522771847e-07, + "loss": 0.7194280028343201, + "step": 6433 + }, + { + "epoch": 1.4824884792626727, + "grad_norm": 1.2788780172510947, + "learning_rate": 3.445897578909861e-07, + "loss": 0.8966056108474731, + "step": 6434 + }, + { + "epoch": 1.4827188940092166, + "grad_norm": 1.4168806857520198, + "learning_rate": 3.443020586367167e-07, + "loss": 0.8089771270751953, + "step": 6435 + }, + { + "epoch": 1.4829493087557604, + "grad_norm": 1.3086078413537297, + "learning_rate": 3.4401445455613555e-07, + "loss": 0.7835644483566284, + "step": 6436 + }, + { + "epoch": 1.483179723502304, + "grad_norm": 1.242850049469479, + "learning_rate": 3.4372694569098746e-07, + "loss": 0.7285257577896118, + "step": 6437 + }, + { + "epoch": 1.483410138248848, + "grad_norm": 1.4884020116718253, + "learning_rate": 3.434395320830048e-07, + "loss": 0.9108592867851257, + "step": 6438 + }, + { + "epoch": 1.4836405529953918, + "grad_norm": 1.265305751937672, + "learning_rate": 3.431522137739049e-07, + "loss": 0.7154395580291748, + "step": 6439 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 1.0883673646660943, + "learning_rate": 3.428649908053917e-07, + "loss": 0.6483602523803711, + "step": 6440 + }, + { + "epoch": 1.4841013824884792, + "grad_norm": 1.457129029114168, + "learning_rate": 3.425778632191551e-07, + "loss": 0.8090662956237793, + "step": 6441 + }, + { + "epoch": 1.484331797235023, + "grad_norm": 1.428702771444548, + "learning_rate": 3.422908310568712e-07, + "loss": 0.7884642481803894, + "step": 6442 + }, + { + "epoch": 1.484562211981567, + "grad_norm": 1.2738553778883674, + "learning_rate": 3.4200389436020225e-07, + "loss": 0.8628194332122803, + "step": 6443 + }, + { + "epoch": 1.4847926267281106, + "grad_norm": 1.1838310809928603, + "learning_rate": 3.4171705317079723e-07, + "loss": 0.8192269802093506, + "step": 6444 + }, + { + "epoch": 1.4850230414746544, + "grad_norm": 1.316668872684636, + "learning_rate": 3.4143030753029054e-07, + "loss": 0.7768012285232544, + "step": 6445 + }, + { + "epoch": 1.485253456221198, + "grad_norm": 1.2324282268735118, + "learning_rate": 3.411436574803026e-07, + "loss": 0.7420791387557983, + "step": 6446 + }, + { + "epoch": 1.4854838709677418, + "grad_norm": 1.3102449774544425, + "learning_rate": 3.4085710306244086e-07, + "loss": 0.823938250541687, + "step": 6447 + }, + { + "epoch": 1.4857142857142858, + "grad_norm": 1.1672900255965821, + "learning_rate": 3.405706443182976e-07, + "loss": 0.7215089201927185, + "step": 6448 + }, + { + "epoch": 1.4859447004608295, + "grad_norm": 1.138949819615918, + "learning_rate": 3.4028428128945286e-07, + "loss": 0.8301436901092529, + "step": 6449 + }, + { + "epoch": 1.4861751152073732, + "grad_norm": 1.1171858572091258, + "learning_rate": 3.399980140174712e-07, + "loss": 0.6727990508079529, + "step": 6450 + }, + { + "epoch": 1.4864055299539172, + "grad_norm": 1.0969379356045603, + "learning_rate": 3.397118425439038e-07, + "loss": 0.8364754319190979, + "step": 6451 + }, + { + "epoch": 1.486635944700461, + "grad_norm": 1.2714499604529865, + "learning_rate": 3.394257669102887e-07, + "loss": 0.7241604328155518, + "step": 6452 + }, + { + "epoch": 1.4868663594470046, + "grad_norm": 1.429435383993002, + "learning_rate": 3.3913978715814897e-07, + "loss": 0.7762489914894104, + "step": 6453 + }, + { + "epoch": 1.4870967741935484, + "grad_norm": 1.3862601382620485, + "learning_rate": 3.38853903328994e-07, + "loss": 0.9278200268745422, + "step": 6454 + }, + { + "epoch": 1.487327188940092, + "grad_norm": 0.9454491284474441, + "learning_rate": 3.3856811546431994e-07, + "loss": 0.693070113658905, + "step": 6455 + }, + { + "epoch": 1.487557603686636, + "grad_norm": 1.4631261008304832, + "learning_rate": 3.382824236056084e-07, + "loss": 0.8541949987411499, + "step": 6456 + }, + { + "epoch": 1.4877880184331798, + "grad_norm": 1.1080747331787868, + "learning_rate": 3.379968277943267e-07, + "loss": 0.7638850212097168, + "step": 6457 + }, + { + "epoch": 1.4880184331797235, + "grad_norm": 1.5396868765343736, + "learning_rate": 3.377113280719295e-07, + "loss": 0.8240739107131958, + "step": 6458 + }, + { + "epoch": 1.4882488479262672, + "grad_norm": 1.037738997106509, + "learning_rate": 3.374259244798562e-07, + "loss": 0.7360633015632629, + "step": 6459 + }, + { + "epoch": 1.488479262672811, + "grad_norm": 1.1287418173516828, + "learning_rate": 3.371406170595328e-07, + "loss": 0.8626362085342407, + "step": 6460 + }, + { + "epoch": 1.488709677419355, + "grad_norm": 1.553133844655672, + "learning_rate": 3.368554058523713e-07, + "loss": 0.8499895334243774, + "step": 6461 + }, + { + "epoch": 1.4889400921658986, + "grad_norm": 1.1568237777707882, + "learning_rate": 3.3657029089976985e-07, + "loss": 0.8335039615631104, + "step": 6462 + }, + { + "epoch": 1.4891705069124423, + "grad_norm": 1.1957026633378731, + "learning_rate": 3.3628527224311196e-07, + "loss": 0.8154790997505188, + "step": 6463 + }, + { + "epoch": 1.4894009216589863, + "grad_norm": 1.2851436413791164, + "learning_rate": 3.3600034992376856e-07, + "loss": 0.7952951192855835, + "step": 6464 + }, + { + "epoch": 1.48963133640553, + "grad_norm": 1.5993164682006433, + "learning_rate": 3.3571552398309535e-07, + "loss": 0.7227598428726196, + "step": 6465 + }, + { + "epoch": 1.4898617511520738, + "grad_norm": 1.1773028491207966, + "learning_rate": 3.3543079446243404e-07, + "loss": 0.6703250408172607, + "step": 6466 + }, + { + "epoch": 1.4900921658986175, + "grad_norm": 1.152932493736184, + "learning_rate": 3.351461614031136e-07, + "loss": 0.7468122243881226, + "step": 6467 + }, + { + "epoch": 1.4903225806451612, + "grad_norm": 1.2933114629854674, + "learning_rate": 3.348616248464475e-07, + "loss": 0.8649178743362427, + "step": 6468 + }, + { + "epoch": 1.4905529953917052, + "grad_norm": 1.013990280281903, + "learning_rate": 3.345771848337359e-07, + "loss": 0.8229554295539856, + "step": 6469 + }, + { + "epoch": 1.4907834101382489, + "grad_norm": 1.3471402030282535, + "learning_rate": 3.342928414062652e-07, + "loss": 0.7275597453117371, + "step": 6470 + }, + { + "epoch": 1.4910138248847926, + "grad_norm": 1.095192106330462, + "learning_rate": 3.3400859460530737e-07, + "loss": 0.657899796962738, + "step": 6471 + }, + { + "epoch": 1.4912442396313363, + "grad_norm": 1.0853913135805695, + "learning_rate": 3.3372444447212e-07, + "loss": 0.7579425573348999, + "step": 6472 + }, + { + "epoch": 1.49147465437788, + "grad_norm": 1.1304988993649205, + "learning_rate": 3.334403910479479e-07, + "loss": 0.8707751631736755, + "step": 6473 + }, + { + "epoch": 1.491705069124424, + "grad_norm": 1.3454806591137698, + "learning_rate": 3.331564343740201e-07, + "loss": 0.7923752665519714, + "step": 6474 + }, + { + "epoch": 1.4919354838709677, + "grad_norm": 1.2646674876263875, + "learning_rate": 3.328725744915536e-07, + "loss": 0.8308948278427124, + "step": 6475 + }, + { + "epoch": 1.4921658986175115, + "grad_norm": 1.4029553470676885, + "learning_rate": 3.3258881144174967e-07, + "loss": 0.8984559774398804, + "step": 6476 + }, + { + "epoch": 1.4923963133640554, + "grad_norm": 1.2358798089346714, + "learning_rate": 3.3230514526579614e-07, + "loss": 0.9279792308807373, + "step": 6477 + }, + { + "epoch": 1.4926267281105992, + "grad_norm": 1.4094728162225774, + "learning_rate": 3.3202157600486655e-07, + "loss": 0.7934520244598389, + "step": 6478 + }, + { + "epoch": 1.4928571428571429, + "grad_norm": 1.658388461731414, + "learning_rate": 3.3173810370012136e-07, + "loss": 0.8463613390922546, + "step": 6479 + }, + { + "epoch": 1.4930875576036866, + "grad_norm": 1.339159678666659, + "learning_rate": 3.314547283927057e-07, + "loss": 0.8087350130081177, + "step": 6480 + }, + { + "epoch": 1.4933179723502303, + "grad_norm": 1.2350842201271304, + "learning_rate": 3.3117145012375113e-07, + "loss": 0.7711254358291626, + "step": 6481 + }, + { + "epoch": 1.4935483870967743, + "grad_norm": 1.2753839749074636, + "learning_rate": 3.3088826893437526e-07, + "loss": 0.7140679359436035, + "step": 6482 + }, + { + "epoch": 1.493778801843318, + "grad_norm": 1.1506161777222865, + "learning_rate": 3.3060518486568103e-07, + "loss": 0.7074463367462158, + "step": 6483 + }, + { + "epoch": 1.4940092165898617, + "grad_norm": 0.8291232249474376, + "learning_rate": 3.3032219795875827e-07, + "loss": 0.7560559511184692, + "step": 6484 + }, + { + "epoch": 1.4942396313364055, + "grad_norm": 1.4344445687170468, + "learning_rate": 3.3003930825468194e-07, + "loss": 0.7699435353279114, + "step": 6485 + }, + { + "epoch": 1.4944700460829492, + "grad_norm": 1.277197987117764, + "learning_rate": 3.297565157945129e-07, + "loss": 0.817488431930542, + "step": 6486 + }, + { + "epoch": 1.4947004608294931, + "grad_norm": 1.1511534488778172, + "learning_rate": 3.294738206192985e-07, + "loss": 0.7534141540527344, + "step": 6487 + }, + { + "epoch": 1.4949308755760369, + "grad_norm": 1.1924480850963226, + "learning_rate": 3.291912227700715e-07, + "loss": 0.7423536777496338, + "step": 6488 + }, + { + "epoch": 1.4951612903225806, + "grad_norm": 0.952322784205302, + "learning_rate": 3.2890872228785003e-07, + "loss": 0.7181985378265381, + "step": 6489 + }, + { + "epoch": 1.4953917050691246, + "grad_norm": 1.270224090305602, + "learning_rate": 3.286263192136396e-07, + "loss": 0.7143938541412354, + "step": 6490 + }, + { + "epoch": 1.4956221198156683, + "grad_norm": 1.3995714023195414, + "learning_rate": 3.2834401358843e-07, + "loss": 0.8247631788253784, + "step": 6491 + }, + { + "epoch": 1.495852534562212, + "grad_norm": 1.1449759372564834, + "learning_rate": 3.280618054531974e-07, + "loss": 0.8627001047134399, + "step": 6492 + }, + { + "epoch": 1.4960829493087557, + "grad_norm": 1.3482725665599868, + "learning_rate": 3.2777969484890456e-07, + "loss": 0.813239574432373, + "step": 6493 + }, + { + "epoch": 1.4963133640552995, + "grad_norm": 0.9200346218481302, + "learning_rate": 3.2749768181649904e-07, + "loss": 0.6633884310722351, + "step": 6494 + }, + { + "epoch": 1.4965437788018434, + "grad_norm": 1.4278232440541767, + "learning_rate": 3.272157663969144e-07, + "loss": 0.7760038375854492, + "step": 6495 + }, + { + "epoch": 1.4967741935483871, + "grad_norm": 1.3200918095184475, + "learning_rate": 3.2693394863107105e-07, + "loss": 0.9352993369102478, + "step": 6496 + }, + { + "epoch": 1.4970046082949309, + "grad_norm": 1.2344539392280847, + "learning_rate": 3.2665222855987397e-07, + "loss": 0.7011485695838928, + "step": 6497 + }, + { + "epoch": 1.4972350230414746, + "grad_norm": 1.2183950494067446, + "learning_rate": 3.263706062242142e-07, + "loss": 0.9008398056030273, + "step": 6498 + }, + { + "epoch": 1.4974654377880183, + "grad_norm": 1.194608222128912, + "learning_rate": 3.260890816649694e-07, + "loss": 0.768037736415863, + "step": 6499 + }, + { + "epoch": 1.4976958525345623, + "grad_norm": 0.9220148240054391, + "learning_rate": 3.258076549230024e-07, + "loss": 0.7603639364242554, + "step": 6500 + }, + { + "epoch": 1.497926267281106, + "grad_norm": 1.3821459764557307, + "learning_rate": 3.2552632603916177e-07, + "loss": 0.7984024286270142, + "step": 6501 + }, + { + "epoch": 1.4981566820276497, + "grad_norm": 1.415424035035242, + "learning_rate": 3.2524509505428187e-07, + "loss": 0.8466978073120117, + "step": 6502 + }, + { + "epoch": 1.4983870967741937, + "grad_norm": 1.3670825801142161, + "learning_rate": 3.24963962009183e-07, + "loss": 0.7964911460876465, + "step": 6503 + }, + { + "epoch": 1.4986175115207372, + "grad_norm": 1.3123478568754847, + "learning_rate": 3.246829269446716e-07, + "loss": 0.7551665306091309, + "step": 6504 + }, + { + "epoch": 1.4988479262672811, + "grad_norm": 1.3193018902055227, + "learning_rate": 3.2440198990153945e-07, + "loss": 0.6468057632446289, + "step": 6505 + }, + { + "epoch": 1.4990783410138249, + "grad_norm": 1.2139801652485203, + "learning_rate": 3.241211509205638e-07, + "loss": 0.7739330530166626, + "step": 6506 + }, + { + "epoch": 1.4993087557603686, + "grad_norm": 1.3659144717848737, + "learning_rate": 3.238404100425085e-07, + "loss": 0.8205568790435791, + "step": 6507 + }, + { + "epoch": 1.4995391705069125, + "grad_norm": 0.958982052367848, + "learning_rate": 3.235597673081227e-07, + "loss": 0.667822003364563, + "step": 6508 + }, + { + "epoch": 1.4997695852534563, + "grad_norm": 1.2374356667574686, + "learning_rate": 3.232792227581409e-07, + "loss": 0.7829990386962891, + "step": 6509 + }, + { + "epoch": 1.5, + "grad_norm": 1.1404525757399535, + "learning_rate": 3.229987764332843e-07, + "loss": 0.768509566783905, + "step": 6510 + }, + { + "epoch": 1.5002304147465437, + "grad_norm": 1.3651547247057954, + "learning_rate": 3.227184283742591e-07, + "loss": 0.8448585867881775, + "step": 6511 + }, + { + "epoch": 1.5004608294930875, + "grad_norm": 1.2722097281432705, + "learning_rate": 3.2243817862175705e-07, + "loss": 0.6929391622543335, + "step": 6512 + }, + { + "epoch": 1.5006912442396314, + "grad_norm": 0.8983294061831201, + "learning_rate": 3.221580272164567e-07, + "loss": 0.6453005075454712, + "step": 6513 + }, + { + "epoch": 1.5009216589861751, + "grad_norm": 1.135934251126359, + "learning_rate": 3.2187797419902143e-07, + "loss": 0.7870811820030212, + "step": 6514 + }, + { + "epoch": 1.5011520737327189, + "grad_norm": 1.264885386654941, + "learning_rate": 3.2159801961010013e-07, + "loss": 0.7032002210617065, + "step": 6515 + }, + { + "epoch": 1.5013824884792628, + "grad_norm": 1.5122369312915371, + "learning_rate": 3.213181634903285e-07, + "loss": 0.8018448352813721, + "step": 6516 + }, + { + "epoch": 1.5016129032258063, + "grad_norm": 1.0930874016239036, + "learning_rate": 3.2103840588032707e-07, + "loss": 0.7066134810447693, + "step": 6517 + }, + { + "epoch": 1.5018433179723503, + "grad_norm": 1.049874936950677, + "learning_rate": 3.207587468207018e-07, + "loss": 0.6835265159606934, + "step": 6518 + }, + { + "epoch": 1.502073732718894, + "grad_norm": 1.1994114231897615, + "learning_rate": 3.204791863520455e-07, + "loss": 0.6679749488830566, + "step": 6519 + }, + { + "epoch": 1.5023041474654377, + "grad_norm": 1.1780261658003046, + "learning_rate": 3.201997245149358e-07, + "loss": 0.781232476234436, + "step": 6520 + }, + { + "epoch": 1.5025345622119817, + "grad_norm": 1.156188659495686, + "learning_rate": 3.1992036134993616e-07, + "loss": 0.7853572368621826, + "step": 6521 + }, + { + "epoch": 1.5027649769585254, + "grad_norm": 1.3156565650023675, + "learning_rate": 3.1964109689759576e-07, + "loss": 0.8220832943916321, + "step": 6522 + }, + { + "epoch": 1.5029953917050691, + "grad_norm": 1.0874952614272322, + "learning_rate": 3.193619311984491e-07, + "loss": 0.8046013116836548, + "step": 6523 + }, + { + "epoch": 1.5032258064516129, + "grad_norm": 1.1481673715256613, + "learning_rate": 3.190828642930174e-07, + "loss": 0.7123414874076843, + "step": 6524 + }, + { + "epoch": 1.5034562211981566, + "grad_norm": 1.2507360463805697, + "learning_rate": 3.188038962218066e-07, + "loss": 0.7913625240325928, + "step": 6525 + }, + { + "epoch": 1.5036866359447005, + "grad_norm": 1.2264479129016654, + "learning_rate": 3.185250270253081e-07, + "loss": 0.7837327718734741, + "step": 6526 + }, + { + "epoch": 1.5039170506912443, + "grad_norm": 1.3223188543102071, + "learning_rate": 3.182462567440002e-07, + "loss": 0.7799992561340332, + "step": 6527 + }, + { + "epoch": 1.504147465437788, + "grad_norm": 1.2906027927929307, + "learning_rate": 3.1796758541834545e-07, + "loss": 0.8591268062591553, + "step": 6528 + }, + { + "epoch": 1.504377880184332, + "grad_norm": 1.1175058933428492, + "learning_rate": 3.176890130887926e-07, + "loss": 0.6886378526687622, + "step": 6529 + }, + { + "epoch": 1.5046082949308754, + "grad_norm": 1.4969255628781877, + "learning_rate": 3.1741053979577647e-07, + "loss": 0.8641641139984131, + "step": 6530 + }, + { + "epoch": 1.5048387096774194, + "grad_norm": 1.3022265823882768, + "learning_rate": 3.1713216557971687e-07, + "loss": 0.8215552568435669, + "step": 6531 + }, + { + "epoch": 1.5050691244239631, + "grad_norm": 1.332125606212464, + "learning_rate": 3.1685389048101906e-07, + "loss": 0.8506371974945068, + "step": 6532 + }, + { + "epoch": 1.5052995391705069, + "grad_norm": 1.371517957091787, + "learning_rate": 3.1657571454007515e-07, + "loss": 0.740912675857544, + "step": 6533 + }, + { + "epoch": 1.5055299539170508, + "grad_norm": 1.0380741302125553, + "learning_rate": 3.162976377972614e-07, + "loss": 0.6458308696746826, + "step": 6534 + }, + { + "epoch": 1.5057603686635943, + "grad_norm": 1.0737980819278299, + "learning_rate": 3.1601966029294013e-07, + "loss": 0.7368316650390625, + "step": 6535 + }, + { + "epoch": 1.5059907834101383, + "grad_norm": 1.1008143995933475, + "learning_rate": 3.1574178206746003e-07, + "loss": 0.6648637056350708, + "step": 6536 + }, + { + "epoch": 1.506221198156682, + "grad_norm": 1.2751679142768328, + "learning_rate": 3.154640031611544e-07, + "loss": 0.706688404083252, + "step": 6537 + }, + { + "epoch": 1.5064516129032257, + "grad_norm": 1.0597131508477158, + "learning_rate": 3.1518632361434263e-07, + "loss": 0.722059965133667, + "step": 6538 + }, + { + "epoch": 1.5066820276497697, + "grad_norm": 1.1420297201861054, + "learning_rate": 3.14908743467329e-07, + "loss": 0.7098807096481323, + "step": 6539 + }, + { + "epoch": 1.5069124423963134, + "grad_norm": 1.1123804283277692, + "learning_rate": 3.1463126276040454e-07, + "loss": 0.7131781578063965, + "step": 6540 + }, + { + "epoch": 1.5071428571428571, + "grad_norm": 0.757735402153, + "learning_rate": 3.143538815338451e-07, + "loss": 0.7292109727859497, + "step": 6541 + }, + { + "epoch": 1.507373271889401, + "grad_norm": 1.1145586582073062, + "learning_rate": 3.1407659982791204e-07, + "loss": 0.7305347919464111, + "step": 6542 + }, + { + "epoch": 1.5076036866359446, + "grad_norm": 1.3246030999705258, + "learning_rate": 3.1379941768285247e-07, + "loss": 0.8072094321250916, + "step": 6543 + }, + { + "epoch": 1.5078341013824885, + "grad_norm": 1.2831968996332677, + "learning_rate": 3.135223351388987e-07, + "loss": 0.8772450685501099, + "step": 6544 + }, + { + "epoch": 1.5080645161290323, + "grad_norm": 1.1816139196453221, + "learning_rate": 3.1324535223626957e-07, + "loss": 0.8463687896728516, + "step": 6545 + }, + { + "epoch": 1.508294930875576, + "grad_norm": 1.1937564350019036, + "learning_rate": 3.1296846901516806e-07, + "loss": 0.6764696836471558, + "step": 6546 + }, + { + "epoch": 1.50852534562212, + "grad_norm": 1.198918569491841, + "learning_rate": 3.126916855157841e-07, + "loss": 0.8395411968231201, + "step": 6547 + }, + { + "epoch": 1.5087557603686634, + "grad_norm": 1.0607235882989698, + "learning_rate": 3.1241500177829195e-07, + "loss": 0.8227219581604004, + "step": 6548 + }, + { + "epoch": 1.5089861751152074, + "grad_norm": 1.1677688606359355, + "learning_rate": 3.121384178428519e-07, + "loss": 0.7079675197601318, + "step": 6549 + }, + { + "epoch": 1.5092165898617511, + "grad_norm": 1.2218836381096956, + "learning_rate": 3.1186193374961014e-07, + "loss": 0.7792578935623169, + "step": 6550 + }, + { + "epoch": 1.5094470046082948, + "grad_norm": 1.403777710630671, + "learning_rate": 3.1158554953869776e-07, + "loss": 0.7821195125579834, + "step": 6551 + }, + { + "epoch": 1.5096774193548388, + "grad_norm": 1.0979873084769438, + "learning_rate": 3.1130926525023114e-07, + "loss": 0.6640183329582214, + "step": 6552 + }, + { + "epoch": 1.5099078341013825, + "grad_norm": 1.4504991573195685, + "learning_rate": 3.110330809243134e-07, + "loss": 0.8087342977523804, + "step": 6553 + }, + { + "epoch": 1.5101382488479262, + "grad_norm": 1.283455986462282, + "learning_rate": 3.1075699660103184e-07, + "loss": 0.7716038227081299, + "step": 6554 + }, + { + "epoch": 1.5103686635944702, + "grad_norm": 1.3208739442859028, + "learning_rate": 3.1048101232045943e-07, + "loss": 0.8408910632133484, + "step": 6555 + }, + { + "epoch": 1.5105990783410137, + "grad_norm": 1.3122071557376964, + "learning_rate": 3.1020512812265564e-07, + "loss": 0.8799750804901123, + "step": 6556 + }, + { + "epoch": 1.5108294930875577, + "grad_norm": 1.0380590960697713, + "learning_rate": 3.0992934404766426e-07, + "loss": 0.6534945368766785, + "step": 6557 + }, + { + "epoch": 1.5110599078341014, + "grad_norm": 1.2848519038217197, + "learning_rate": 3.0965366013551507e-07, + "loss": 0.8293032646179199, + "step": 6558 + }, + { + "epoch": 1.511290322580645, + "grad_norm": 1.2211391336842954, + "learning_rate": 3.0937807642622295e-07, + "loss": 0.7486997842788696, + "step": 6559 + }, + { + "epoch": 1.511520737327189, + "grad_norm": 0.9417803138002018, + "learning_rate": 3.0910259295978914e-07, + "loss": 0.7431119680404663, + "step": 6560 + }, + { + "epoch": 1.5117511520737326, + "grad_norm": 1.255174714268565, + "learning_rate": 3.0882720977619927e-07, + "loss": 0.945859432220459, + "step": 6561 + }, + { + "epoch": 1.5119815668202765, + "grad_norm": 1.3772354918121292, + "learning_rate": 3.0855192691542487e-07, + "loss": 0.7894721031188965, + "step": 6562 + }, + { + "epoch": 1.5122119815668202, + "grad_norm": 1.59783124946433, + "learning_rate": 3.082767444174229e-07, + "loss": 0.8593465089797974, + "step": 6563 + }, + { + "epoch": 1.512442396313364, + "grad_norm": 1.1763179818399963, + "learning_rate": 3.080016623221355e-07, + "loss": 0.8297405242919922, + "step": 6564 + }, + { + "epoch": 1.512672811059908, + "grad_norm": 1.2146736952008175, + "learning_rate": 3.07726680669491e-07, + "loss": 0.7372928857803345, + "step": 6565 + }, + { + "epoch": 1.5129032258064516, + "grad_norm": 0.987456734373793, + "learning_rate": 3.0745179949940235e-07, + "loss": 0.7031347751617432, + "step": 6566 + }, + { + "epoch": 1.5131336405529954, + "grad_norm": 0.9788253167457012, + "learning_rate": 3.071770188517679e-07, + "loss": 0.7086467742919922, + "step": 6567 + }, + { + "epoch": 1.5133640552995393, + "grad_norm": 1.2854493361240282, + "learning_rate": 3.069023387664723e-07, + "loss": 0.9091345071792603, + "step": 6568 + }, + { + "epoch": 1.5135944700460828, + "grad_norm": 1.3979394006170445, + "learning_rate": 3.066277592833847e-07, + "loss": 0.7470624446868896, + "step": 6569 + }, + { + "epoch": 1.5138248847926268, + "grad_norm": 1.2458050386964743, + "learning_rate": 3.0635328044235965e-07, + "loss": 0.75694739818573, + "step": 6570 + }, + { + "epoch": 1.5140552995391705, + "grad_norm": 1.1257752667184633, + "learning_rate": 3.0607890228323796e-07, + "loss": 0.7832024693489075, + "step": 6571 + }, + { + "epoch": 1.5142857142857142, + "grad_norm": 1.4206979397737705, + "learning_rate": 3.0580462484584455e-07, + "loss": 0.6777220368385315, + "step": 6572 + }, + { + "epoch": 1.5145161290322582, + "grad_norm": 1.1010797667803915, + "learning_rate": 3.055304481699913e-07, + "loss": 0.7748236060142517, + "step": 6573 + }, + { + "epoch": 1.5147465437788017, + "grad_norm": 1.1639246159957346, + "learning_rate": 3.052563722954741e-07, + "loss": 0.7495633363723755, + "step": 6574 + }, + { + "epoch": 1.5149769585253456, + "grad_norm": 1.1319897669216112, + "learning_rate": 3.049823972620744e-07, + "loss": 0.8011484742164612, + "step": 6575 + }, + { + "epoch": 1.5152073732718894, + "grad_norm": 1.3878273723563577, + "learning_rate": 3.0470852310956e-07, + "loss": 0.7480140924453735, + "step": 6576 + }, + { + "epoch": 1.515437788018433, + "grad_norm": 1.1963673851290149, + "learning_rate": 3.0443474987768305e-07, + "loss": 0.6561319828033447, + "step": 6577 + }, + { + "epoch": 1.515668202764977, + "grad_norm": 1.1887729560806304, + "learning_rate": 3.041610776061813e-07, + "loss": 0.7437188029289246, + "step": 6578 + }, + { + "epoch": 1.5158986175115208, + "grad_norm": 1.2420532978964127, + "learning_rate": 3.0388750633477766e-07, + "loss": 0.7429096698760986, + "step": 6579 + }, + { + "epoch": 1.5161290322580645, + "grad_norm": 1.3505114972693866, + "learning_rate": 3.0361403610318125e-07, + "loss": 0.859411358833313, + "step": 6580 + }, + { + "epoch": 1.5163594470046085, + "grad_norm": 0.9758931256825946, + "learning_rate": 3.0334066695108565e-07, + "loss": 0.7636305093765259, + "step": 6581 + }, + { + "epoch": 1.516589861751152, + "grad_norm": 1.1796162666849943, + "learning_rate": 3.030673989181699e-07, + "loss": 0.8331989049911499, + "step": 6582 + }, + { + "epoch": 1.516820276497696, + "grad_norm": 1.0763217337155384, + "learning_rate": 3.0279423204409857e-07, + "loss": 0.770574688911438, + "step": 6583 + }, + { + "epoch": 1.5170506912442396, + "grad_norm": 1.3524367915089308, + "learning_rate": 3.025211663685213e-07, + "loss": 0.7470898628234863, + "step": 6584 + }, + { + "epoch": 1.5172811059907834, + "grad_norm": 1.2515745730030696, + "learning_rate": 3.022482019310736e-07, + "loss": 0.7907510995864868, + "step": 6585 + }, + { + "epoch": 1.5175115207373273, + "grad_norm": 1.1087989572536945, + "learning_rate": 3.019753387713757e-07, + "loss": 0.751417338848114, + "step": 6586 + }, + { + "epoch": 1.5177419354838708, + "grad_norm": 1.3862652872284045, + "learning_rate": 3.01702576929033e-07, + "loss": 0.8987867832183838, + "step": 6587 + }, + { + "epoch": 1.5179723502304148, + "grad_norm": 1.2098170472034613, + "learning_rate": 3.0142991644363714e-07, + "loss": 0.7618268728256226, + "step": 6588 + }, + { + "epoch": 1.5182027649769585, + "grad_norm": 1.4029958928912587, + "learning_rate": 3.011573573547641e-07, + "loss": 0.9358207583427429, + "step": 6589 + }, + { + "epoch": 1.5184331797235022, + "grad_norm": 1.4434031985489326, + "learning_rate": 3.008848997019753e-07, + "loss": 0.6549144387245178, + "step": 6590 + }, + { + "epoch": 1.5186635944700462, + "grad_norm": 1.293720092884626, + "learning_rate": 3.00612543524818e-07, + "loss": 0.8642100095748901, + "step": 6591 + }, + { + "epoch": 1.51889400921659, + "grad_norm": 1.2852982676947153, + "learning_rate": 3.003402888628241e-07, + "loss": 0.7348824143409729, + "step": 6592 + }, + { + "epoch": 1.5191244239631336, + "grad_norm": 1.0897732641421132, + "learning_rate": 3.000681357555108e-07, + "loss": 0.8737039566040039, + "step": 6593 + }, + { + "epoch": 1.5193548387096776, + "grad_norm": 1.3095413820866733, + "learning_rate": 2.9979608424238134e-07, + "loss": 0.749860405921936, + "step": 6594 + }, + { + "epoch": 1.519585253456221, + "grad_norm": 1.4291988493830527, + "learning_rate": 2.99524134362923e-07, + "loss": 0.7583779096603394, + "step": 6595 + }, + { + "epoch": 1.519815668202765, + "grad_norm": 1.1886499728868618, + "learning_rate": 2.992522861566095e-07, + "loss": 0.7096224427223206, + "step": 6596 + }, + { + "epoch": 1.5200460829493088, + "grad_norm": 1.3265073494412316, + "learning_rate": 2.9898053966289904e-07, + "loss": 0.7813585996627808, + "step": 6597 + }, + { + "epoch": 1.5202764976958525, + "grad_norm": 1.3753919073529044, + "learning_rate": 2.9870889492123517e-07, + "loss": 0.7744605541229248, + "step": 6598 + }, + { + "epoch": 1.5205069124423964, + "grad_norm": 1.4661404938087315, + "learning_rate": 2.984373519710469e-07, + "loss": 0.8398552536964417, + "step": 6599 + }, + { + "epoch": 1.52073732718894, + "grad_norm": 1.1837780856173943, + "learning_rate": 2.981659108517478e-07, + "loss": 0.6853294372558594, + "step": 6600 + }, + { + "epoch": 1.520967741935484, + "grad_norm": 0.9892560165373243, + "learning_rate": 2.97894571602738e-07, + "loss": 0.7673987150192261, + "step": 6601 + }, + { + "epoch": 1.5211981566820276, + "grad_norm": 1.0638042713840496, + "learning_rate": 2.976233342634017e-07, + "loss": 0.7000377774238586, + "step": 6602 + }, + { + "epoch": 1.5214285714285714, + "grad_norm": 1.2089273111808856, + "learning_rate": 2.9735219887310857e-07, + "loss": 0.8429346680641174, + "step": 6603 + }, + { + "epoch": 1.5216589861751153, + "grad_norm": 1.4255685153178952, + "learning_rate": 2.970811654712133e-07, + "loss": 0.9118648767471313, + "step": 6604 + }, + { + "epoch": 1.521889400921659, + "grad_norm": 1.0974145188834663, + "learning_rate": 2.9681023409705666e-07, + "loss": 0.7745784521102905, + "step": 6605 + }, + { + "epoch": 1.5221198156682028, + "grad_norm": 1.234720575381531, + "learning_rate": 2.9653940478996367e-07, + "loss": 0.8481245040893555, + "step": 6606 + }, + { + "epoch": 1.5223502304147467, + "grad_norm": 1.1446582960275502, + "learning_rate": 2.9626867758924436e-07, + "loss": 0.8643463850021362, + "step": 6607 + }, + { + "epoch": 1.5225806451612902, + "grad_norm": 1.6406368897457513, + "learning_rate": 2.959980525341953e-07, + "loss": 0.9524952173233032, + "step": 6608 + }, + { + "epoch": 1.5228110599078342, + "grad_norm": 1.067119300713527, + "learning_rate": 2.9572752966409686e-07, + "loss": 0.7153829336166382, + "step": 6609 + }, + { + "epoch": 1.523041474654378, + "grad_norm": 1.1739681134356785, + "learning_rate": 2.954571090182149e-07, + "loss": 0.8332774639129639, + "step": 6610 + }, + { + "epoch": 1.5232718894009216, + "grad_norm": 1.3773090684366749, + "learning_rate": 2.9518679063580123e-07, + "loss": 0.7511743307113647, + "step": 6611 + }, + { + "epoch": 1.5235023041474656, + "grad_norm": 1.2327774867248482, + "learning_rate": 2.9491657455609175e-07, + "loss": 0.715233325958252, + "step": 6612 + }, + { + "epoch": 1.523732718894009, + "grad_norm": 1.139323635074032, + "learning_rate": 2.946464608183078e-07, + "loss": 0.7386246919631958, + "step": 6613 + }, + { + "epoch": 1.523963133640553, + "grad_norm": 1.1904592003911236, + "learning_rate": 2.943764494616565e-07, + "loss": 0.8337790369987488, + "step": 6614 + }, + { + "epoch": 1.5241935483870968, + "grad_norm": 1.394927398157402, + "learning_rate": 2.941065405253296e-07, + "loss": 0.8447855710983276, + "step": 6615 + }, + { + "epoch": 1.5244239631336405, + "grad_norm": 1.1307960049130217, + "learning_rate": 2.938367340485035e-07, + "loss": 0.7430610060691833, + "step": 6616 + }, + { + "epoch": 1.5246543778801844, + "grad_norm": 1.134552871583557, + "learning_rate": 2.9356703007034087e-07, + "loss": 0.7740806937217712, + "step": 6617 + }, + { + "epoch": 1.5248847926267282, + "grad_norm": 1.2516085920875086, + "learning_rate": 2.9329742862998875e-07, + "loss": 0.7824152708053589, + "step": 6618 + }, + { + "epoch": 1.5251152073732719, + "grad_norm": 1.0852675062610386, + "learning_rate": 2.930279297665792e-07, + "loss": 0.9222463965415955, + "step": 6619 + }, + { + "epoch": 1.5253456221198156, + "grad_norm": 1.8096931577931101, + "learning_rate": 2.927585335192294e-07, + "loss": 0.9548497200012207, + "step": 6620 + }, + { + "epoch": 1.5255760368663593, + "grad_norm": 1.497275795232007, + "learning_rate": 2.9248923992704255e-07, + "loss": 0.9007906913757324, + "step": 6621 + }, + { + "epoch": 1.5258064516129033, + "grad_norm": 1.0647051889661132, + "learning_rate": 2.9222004902910593e-07, + "loss": 0.6932169198989868, + "step": 6622 + }, + { + "epoch": 1.526036866359447, + "grad_norm": 0.9763599663388729, + "learning_rate": 2.919509608644922e-07, + "loss": 0.7327853441238403, + "step": 6623 + }, + { + "epoch": 1.5262672811059907, + "grad_norm": 1.423305414970627, + "learning_rate": 2.916819754722588e-07, + "loss": 0.617963433265686, + "step": 6624 + }, + { + "epoch": 1.5264976958525347, + "grad_norm": 1.3790687935494703, + "learning_rate": 2.914130928914493e-07, + "loss": 1.0567349195480347, + "step": 6625 + }, + { + "epoch": 1.5267281105990782, + "grad_norm": 1.243824261339929, + "learning_rate": 2.9114431316109145e-07, + "loss": 0.7362378835678101, + "step": 6626 + }, + { + "epoch": 1.5269585253456222, + "grad_norm": 1.1636178458595106, + "learning_rate": 2.9087563632019774e-07, + "loss": 0.6879991888999939, + "step": 6627 + }, + { + "epoch": 1.5271889400921659, + "grad_norm": 1.2540530060828472, + "learning_rate": 2.9060706240776686e-07, + "loss": 0.7804177403450012, + "step": 6628 + }, + { + "epoch": 1.5274193548387096, + "grad_norm": 1.2450061818881997, + "learning_rate": 2.9033859146278197e-07, + "loss": 0.7459548711776733, + "step": 6629 + }, + { + "epoch": 1.5276497695852536, + "grad_norm": 1.1214229491247267, + "learning_rate": 2.900702235242106e-07, + "loss": 0.7392233610153198, + "step": 6630 + }, + { + "epoch": 1.5278801843317973, + "grad_norm": 1.0862664338119448, + "learning_rate": 2.8980195863100675e-07, + "loss": 0.6956135034561157, + "step": 6631 + }, + { + "epoch": 1.528110599078341, + "grad_norm": 1.1232709572579735, + "learning_rate": 2.8953379682210856e-07, + "loss": 0.7042561769485474, + "step": 6632 + }, + { + "epoch": 1.5283410138248847, + "grad_norm": 1.070241779197473, + "learning_rate": 2.8926573813643884e-07, + "loss": 0.7114298343658447, + "step": 6633 + }, + { + "epoch": 1.5285714285714285, + "grad_norm": 1.0297537166419386, + "learning_rate": 2.8899778261290664e-07, + "loss": 0.862826943397522, + "step": 6634 + }, + { + "epoch": 1.5288018433179724, + "grad_norm": 1.3240716498057261, + "learning_rate": 2.8872993029040506e-07, + "loss": 0.8229889869689941, + "step": 6635 + }, + { + "epoch": 1.5290322580645161, + "grad_norm": 1.2292174291080764, + "learning_rate": 2.884621812078122e-07, + "loss": 0.8058778047561646, + "step": 6636 + }, + { + "epoch": 1.5292626728110599, + "grad_norm": 1.2782782809475366, + "learning_rate": 2.881945354039921e-07, + "loss": 0.8150385618209839, + "step": 6637 + }, + { + "epoch": 1.5294930875576038, + "grad_norm": 1.1137449533588037, + "learning_rate": 2.8792699291779276e-07, + "loss": 0.7067136168479919, + "step": 6638 + }, + { + "epoch": 1.5297235023041473, + "grad_norm": 1.2793329729310776, + "learning_rate": 2.8765955378804784e-07, + "loss": 0.7725155353546143, + "step": 6639 + }, + { + "epoch": 1.5299539170506913, + "grad_norm": 1.0584861581127705, + "learning_rate": 2.873922180535754e-07, + "loss": 0.5956720113754272, + "step": 6640 + }, + { + "epoch": 1.530184331797235, + "grad_norm": 1.1955034677005214, + "learning_rate": 2.8712498575317934e-07, + "loss": 0.6506170630455017, + "step": 6641 + }, + { + "epoch": 1.5304147465437787, + "grad_norm": 1.0781697188392338, + "learning_rate": 2.86857856925648e-07, + "loss": 0.7860926985740662, + "step": 6642 + }, + { + "epoch": 1.5306451612903227, + "grad_norm": 1.1840723689685375, + "learning_rate": 2.8659083160975464e-07, + "loss": 0.7003993391990662, + "step": 6643 + }, + { + "epoch": 1.5308755760368664, + "grad_norm": 1.1562706768971642, + "learning_rate": 2.8632390984425746e-07, + "loss": 0.6887079477310181, + "step": 6644 + }, + { + "epoch": 1.5311059907834101, + "grad_norm": 1.243117329825752, + "learning_rate": 2.860570916678998e-07, + "loss": 0.788282036781311, + "step": 6645 + }, + { + "epoch": 1.5313364055299539, + "grad_norm": 1.273283187040626, + "learning_rate": 2.8579037711941043e-07, + "loss": 0.771350085735321, + "step": 6646 + }, + { + "epoch": 1.5315668202764976, + "grad_norm": 1.1000030346921834, + "learning_rate": 2.855237662375021e-07, + "loss": 0.6418509483337402, + "step": 6647 + }, + { + "epoch": 1.5317972350230415, + "grad_norm": 1.022873677691871, + "learning_rate": 2.852572590608735e-07, + "loss": 0.6606692671775818, + "step": 6648 + }, + { + "epoch": 1.5320276497695853, + "grad_norm": 1.4727879897773712, + "learning_rate": 2.849908556282076e-07, + "loss": 0.8623934984207153, + "step": 6649 + }, + { + "epoch": 1.532258064516129, + "grad_norm": 1.1678986803146219, + "learning_rate": 2.8472455597817215e-07, + "loss": 0.848737359046936, + "step": 6650 + }, + { + "epoch": 1.532488479262673, + "grad_norm": 1.2265451299303025, + "learning_rate": 2.844583601494207e-07, + "loss": 0.7156505584716797, + "step": 6651 + }, + { + "epoch": 1.5327188940092165, + "grad_norm": 1.157360063816448, + "learning_rate": 2.8419226818059116e-07, + "loss": 0.598319411277771, + "step": 6652 + }, + { + "epoch": 1.5329493087557604, + "grad_norm": 1.0128877845083564, + "learning_rate": 2.8392628011030585e-07, + "loss": 0.6320680379867554, + "step": 6653 + }, + { + "epoch": 1.5331797235023041, + "grad_norm": 1.2437383042471344, + "learning_rate": 2.836603959771734e-07, + "loss": 0.8770536184310913, + "step": 6654 + }, + { + "epoch": 1.5334101382488479, + "grad_norm": 1.3327586940769975, + "learning_rate": 2.833946158197862e-07, + "loss": 0.896265983581543, + "step": 6655 + }, + { + "epoch": 1.5336405529953918, + "grad_norm": 1.1058301341236145, + "learning_rate": 2.8312893967672145e-07, + "loss": 0.7194868326187134, + "step": 6656 + }, + { + "epoch": 1.5338709677419353, + "grad_norm": 1.1479450761132848, + "learning_rate": 2.828633675865425e-07, + "loss": 0.7993383407592773, + "step": 6657 + }, + { + "epoch": 1.5341013824884793, + "grad_norm": 1.3252275312162691, + "learning_rate": 2.8259789958779635e-07, + "loss": 0.6808127760887146, + "step": 6658 + }, + { + "epoch": 1.534331797235023, + "grad_norm": 1.3083456260381565, + "learning_rate": 2.823325357190153e-07, + "loss": 0.7348822355270386, + "step": 6659 + }, + { + "epoch": 1.5345622119815667, + "grad_norm": 1.4520629186425333, + "learning_rate": 2.820672760187166e-07, + "loss": 0.7729920744895935, + "step": 6660 + }, + { + "epoch": 1.5347926267281107, + "grad_norm": 1.1927593175103235, + "learning_rate": 2.818021205254021e-07, + "loss": 0.803922176361084, + "step": 6661 + }, + { + "epoch": 1.5350230414746544, + "grad_norm": 1.1316086785563555, + "learning_rate": 2.815370692775594e-07, + "loss": 0.7931007146835327, + "step": 6662 + }, + { + "epoch": 1.5352534562211981, + "grad_norm": 0.9381855495475373, + "learning_rate": 2.8127212231365995e-07, + "loss": 0.7990511655807495, + "step": 6663 + }, + { + "epoch": 1.535483870967742, + "grad_norm": 1.1449374360466444, + "learning_rate": 2.8100727967216043e-07, + "loss": 0.8163471817970276, + "step": 6664 + }, + { + "epoch": 1.5357142857142856, + "grad_norm": 1.126530672311672, + "learning_rate": 2.8074254139150225e-07, + "loss": 0.7628358602523804, + "step": 6665 + }, + { + "epoch": 1.5359447004608295, + "grad_norm": 1.216707261403855, + "learning_rate": 2.8047790751011216e-07, + "loss": 0.8008173704147339, + "step": 6666 + }, + { + "epoch": 1.5361751152073733, + "grad_norm": 1.4385072008960633, + "learning_rate": 2.802133780664013e-07, + "loss": 0.9139487743377686, + "step": 6667 + }, + { + "epoch": 1.536405529953917, + "grad_norm": 1.479452922561271, + "learning_rate": 2.7994895309876555e-07, + "loss": 0.9436901211738586, + "step": 6668 + }, + { + "epoch": 1.536635944700461, + "grad_norm": 1.1137684825301204, + "learning_rate": 2.7968463264558617e-07, + "loss": 0.8072221875190735, + "step": 6669 + }, + { + "epoch": 1.5368663594470044, + "grad_norm": 1.4031563621096825, + "learning_rate": 2.7942041674522866e-07, + "loss": 0.7434822916984558, + "step": 6670 + }, + { + "epoch": 1.5370967741935484, + "grad_norm": 1.1245525381043615, + "learning_rate": 2.7915630543604394e-07, + "loss": 0.6729850769042969, + "step": 6671 + }, + { + "epoch": 1.5373271889400921, + "grad_norm": 1.2279789151687839, + "learning_rate": 2.7889229875636723e-07, + "loss": 0.8752315044403076, + "step": 6672 + }, + { + "epoch": 1.5375576036866359, + "grad_norm": 1.2125823370266373, + "learning_rate": 2.786283967445184e-07, + "loss": 0.8519413471221924, + "step": 6673 + }, + { + "epoch": 1.5377880184331798, + "grad_norm": 1.2674824603159123, + "learning_rate": 2.783645994388032e-07, + "loss": 0.8868448734283447, + "step": 6674 + }, + { + "epoch": 1.5380184331797235, + "grad_norm": 1.2984993367707722, + "learning_rate": 2.78100906877511e-07, + "loss": 0.9223456978797913, + "step": 6675 + }, + { + "epoch": 1.5382488479262673, + "grad_norm": 1.0080180068423799, + "learning_rate": 2.7783731909891616e-07, + "loss": 0.799191951751709, + "step": 6676 + }, + { + "epoch": 1.5384792626728112, + "grad_norm": 1.1987572506109172, + "learning_rate": 2.775738361412788e-07, + "loss": 0.7092995643615723, + "step": 6677 + }, + { + "epoch": 1.5387096774193547, + "grad_norm": 1.2206610409098804, + "learning_rate": 2.7731045804284283e-07, + "loss": 0.674687385559082, + "step": 6678 + }, + { + "epoch": 1.5389400921658987, + "grad_norm": 1.4910052625734944, + "learning_rate": 2.77047184841837e-07, + "loss": 0.7366930246353149, + "step": 6679 + }, + { + "epoch": 1.5391705069124424, + "grad_norm": 1.169385374165895, + "learning_rate": 2.767840165764753e-07, + "loss": 0.838137149810791, + "step": 6680 + }, + { + "epoch": 1.5394009216589861, + "grad_norm": 1.2120746756764942, + "learning_rate": 2.765209532849558e-07, + "loss": 0.7507175803184509, + "step": 6681 + }, + { + "epoch": 1.53963133640553, + "grad_norm": 1.2981666739842812, + "learning_rate": 2.7625799500546267e-07, + "loss": 0.8157602548599243, + "step": 6682 + }, + { + "epoch": 1.5398617511520736, + "grad_norm": 1.2345607869860449, + "learning_rate": 2.7599514177616333e-07, + "loss": 0.7779219150543213, + "step": 6683 + }, + { + "epoch": 1.5400921658986175, + "grad_norm": 1.186692939443946, + "learning_rate": 2.757323936352106e-07, + "loss": 0.8261638879776001, + "step": 6684 + }, + { + "epoch": 1.5403225806451613, + "grad_norm": 0.8917527422638705, + "learning_rate": 2.7546975062074197e-07, + "loss": 0.6139177680015564, + "step": 6685 + }, + { + "epoch": 1.540552995391705, + "grad_norm": 1.0945474995666544, + "learning_rate": 2.752072127708802e-07, + "loss": 0.744202733039856, + "step": 6686 + }, + { + "epoch": 1.540783410138249, + "grad_norm": 1.279582503351568, + "learning_rate": 2.749447801237319e-07, + "loss": 0.7685158848762512, + "step": 6687 + }, + { + "epoch": 1.5410138248847927, + "grad_norm": 1.4134776465364736, + "learning_rate": 2.7468245271738865e-07, + "loss": 0.7483633756637573, + "step": 6688 + }, + { + "epoch": 1.5412442396313364, + "grad_norm": 1.4452963556936742, + "learning_rate": 2.7442023058992746e-07, + "loss": 0.8967286348342896, + "step": 6689 + }, + { + "epoch": 1.5414746543778803, + "grad_norm": 3.4447797406152922, + "learning_rate": 2.7415811377940933e-07, + "loss": 0.8035085201263428, + "step": 6690 + }, + { + "epoch": 1.5417050691244238, + "grad_norm": 1.2535208224880003, + "learning_rate": 2.738961023238798e-07, + "loss": 0.8504149913787842, + "step": 6691 + }, + { + "epoch": 1.5419354838709678, + "grad_norm": 1.408249398601243, + "learning_rate": 2.736341962613701e-07, + "loss": 0.7612431049346924, + "step": 6692 + }, + { + "epoch": 1.5421658986175115, + "grad_norm": 1.3117649202054886, + "learning_rate": 2.733723956298951e-07, + "loss": 0.6974390745162964, + "step": 6693 + }, + { + "epoch": 1.5423963133640552, + "grad_norm": 1.10015572050179, + "learning_rate": 2.7311070046745476e-07, + "loss": 0.7946817874908447, + "step": 6694 + }, + { + "epoch": 1.5426267281105992, + "grad_norm": 1.3598767034128523, + "learning_rate": 2.728491108120342e-07, + "loss": 0.7801793813705444, + "step": 6695 + }, + { + "epoch": 1.5428571428571427, + "grad_norm": 1.0989233619042245, + "learning_rate": 2.725876267016023e-07, + "loss": 0.720335066318512, + "step": 6696 + }, + { + "epoch": 1.5430875576036867, + "grad_norm": 0.9331707903973574, + "learning_rate": 2.7232624817411376e-07, + "loss": 0.6820393800735474, + "step": 6697 + }, + { + "epoch": 1.5433179723502304, + "grad_norm": 1.2636082158419006, + "learning_rate": 2.7206497526750694e-07, + "loss": 0.8217613697052002, + "step": 6698 + }, + { + "epoch": 1.543548387096774, + "grad_norm": 1.2388683954169015, + "learning_rate": 2.7180380801970525e-07, + "loss": 0.7600520849227905, + "step": 6699 + }, + { + "epoch": 1.543778801843318, + "grad_norm": 1.2564669684453122, + "learning_rate": 2.7154274646861687e-07, + "loss": 0.9402344226837158, + "step": 6700 + }, + { + "epoch": 1.5440092165898618, + "grad_norm": 1.0720415723340906, + "learning_rate": 2.7128179065213417e-07, + "loss": 0.7470760345458984, + "step": 6701 + }, + { + "epoch": 1.5442396313364055, + "grad_norm": 1.0091593723711232, + "learning_rate": 2.710209406081353e-07, + "loss": 0.6915948390960693, + "step": 6702 + }, + { + "epoch": 1.5444700460829495, + "grad_norm": 1.1829806437851378, + "learning_rate": 2.707601963744817e-07, + "loss": 0.7554904222488403, + "step": 6703 + }, + { + "epoch": 1.544700460829493, + "grad_norm": 0.9892324198221251, + "learning_rate": 2.7049955798902026e-07, + "loss": 0.8197575807571411, + "step": 6704 + }, + { + "epoch": 1.544930875576037, + "grad_norm": 1.3144339350992138, + "learning_rate": 2.702390254895819e-07, + "loss": 0.7106794118881226, + "step": 6705 + }, + { + "epoch": 1.5451612903225806, + "grad_norm": 1.1715761852419602, + "learning_rate": 2.699785989139832e-07, + "loss": 0.6320512294769287, + "step": 6706 + }, + { + "epoch": 1.5453917050691244, + "grad_norm": 1.2156391686389374, + "learning_rate": 2.697182783000246e-07, + "loss": 0.8327566385269165, + "step": 6707 + }, + { + "epoch": 1.5456221198156683, + "grad_norm": 1.2605126330062313, + "learning_rate": 2.6945806368549063e-07, + "loss": 0.8732178211212158, + "step": 6708 + }, + { + "epoch": 1.5458525345622118, + "grad_norm": 1.3881676599881438, + "learning_rate": 2.69197955108152e-07, + "loss": 0.8709380626678467, + "step": 6709 + }, + { + "epoch": 1.5460829493087558, + "grad_norm": 1.2029107229444744, + "learning_rate": 2.689379526057628e-07, + "loss": 0.7821739912033081, + "step": 6710 + }, + { + "epoch": 1.5463133640552995, + "grad_norm": 1.2268892680878298, + "learning_rate": 2.686780562160615e-07, + "loss": 0.8658162355422974, + "step": 6711 + }, + { + "epoch": 1.5465437788018432, + "grad_norm": 0.9914521746084854, + "learning_rate": 2.6841826597677274e-07, + "loss": 0.6354731321334839, + "step": 6712 + }, + { + "epoch": 1.5467741935483872, + "grad_norm": 1.132983970089502, + "learning_rate": 2.68158581925604e-07, + "loss": 0.8000082969665527, + "step": 6713 + }, + { + "epoch": 1.547004608294931, + "grad_norm": 1.0140012222754493, + "learning_rate": 2.6789900410024804e-07, + "loss": 0.7998030185699463, + "step": 6714 + }, + { + "epoch": 1.5472350230414746, + "grad_norm": 1.2207312006862205, + "learning_rate": 2.676395325383827e-07, + "loss": 0.861609935760498, + "step": 6715 + }, + { + "epoch": 1.5474654377880186, + "grad_norm": 1.2739007648131329, + "learning_rate": 2.6738016727766976e-07, + "loss": 0.8119577765464783, + "step": 6716 + }, + { + "epoch": 1.547695852534562, + "grad_norm": 1.1272023201701244, + "learning_rate": 2.671209083557553e-07, + "loss": 0.7704594135284424, + "step": 6717 + }, + { + "epoch": 1.547926267281106, + "grad_norm": 1.1924986504981143, + "learning_rate": 2.6686175581027114e-07, + "loss": 0.7577236890792847, + "step": 6718 + }, + { + "epoch": 1.5481566820276498, + "grad_norm": 1.438095427566863, + "learning_rate": 2.666027096788326e-07, + "loss": 0.8362265825271606, + "step": 6719 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 1.3282450269784174, + "learning_rate": 2.6634376999903984e-07, + "loss": 0.7604315280914307, + "step": 6720 + }, + { + "epoch": 1.5486175115207375, + "grad_norm": 1.0996855935996066, + "learning_rate": 2.6608493680847757e-07, + "loss": 0.7181323766708374, + "step": 6721 + }, + { + "epoch": 1.548847926267281, + "grad_norm": 1.408245929611007, + "learning_rate": 2.6582621014471495e-07, + "loss": 0.8613896369934082, + "step": 6722 + }, + { + "epoch": 1.549078341013825, + "grad_norm": 1.1355853758662044, + "learning_rate": 2.6556759004530616e-07, + "loss": 0.6254151463508606, + "step": 6723 + }, + { + "epoch": 1.5493087557603686, + "grad_norm": 1.1737642272227355, + "learning_rate": 2.6530907654778957e-07, + "loss": 0.7960973381996155, + "step": 6724 + }, + { + "epoch": 1.5495391705069124, + "grad_norm": 1.1419390810119388, + "learning_rate": 2.6505066968968747e-07, + "loss": 0.7899094820022583, + "step": 6725 + }, + { + "epoch": 1.5497695852534563, + "grad_norm": 0.9820941780775652, + "learning_rate": 2.647923695085081e-07, + "loss": 0.6578950881958008, + "step": 6726 + }, + { + "epoch": 1.55, + "grad_norm": 1.3013325638388529, + "learning_rate": 2.64534176041743e-07, + "loss": 0.737798810005188, + "step": 6727 + }, + { + "epoch": 1.5502304147465438, + "grad_norm": 0.9487414790323747, + "learning_rate": 2.642760893268684e-07, + "loss": 0.7809627056121826, + "step": 6728 + }, + { + "epoch": 1.5504608294930877, + "grad_norm": 0.9991258167716155, + "learning_rate": 2.640181094013456e-07, + "loss": 0.6693655252456665, + "step": 6729 + }, + { + "epoch": 1.5506912442396312, + "grad_norm": 0.8705752911958233, + "learning_rate": 2.6376023630262003e-07, + "loss": 0.7264609932899475, + "step": 6730 + }, + { + "epoch": 1.5509216589861752, + "grad_norm": 1.0975251127061347, + "learning_rate": 2.635024700681211e-07, + "loss": 0.7585712671279907, + "step": 6731 + }, + { + "epoch": 1.551152073732719, + "grad_norm": 1.520332751892112, + "learning_rate": 2.6324481073526404e-07, + "loss": 0.7335324287414551, + "step": 6732 + }, + { + "epoch": 1.5513824884792626, + "grad_norm": 1.1271215778218124, + "learning_rate": 2.629872583414473e-07, + "loss": 0.835372805595398, + "step": 6733 + }, + { + "epoch": 1.5516129032258066, + "grad_norm": 1.231737661164668, + "learning_rate": 2.6272981292405405e-07, + "loss": 0.8069926500320435, + "step": 6734 + }, + { + "epoch": 1.55184331797235, + "grad_norm": 1.2110282300687614, + "learning_rate": 2.6247247452045285e-07, + "loss": 0.7548434138298035, + "step": 6735 + }, + { + "epoch": 1.552073732718894, + "grad_norm": 1.281837931597139, + "learning_rate": 2.6221524316799546e-07, + "loss": 0.6907505989074707, + "step": 6736 + }, + { + "epoch": 1.5523041474654378, + "grad_norm": 1.2384070012918627, + "learning_rate": 2.619581189040185e-07, + "loss": 0.8544988632202148, + "step": 6737 + }, + { + "epoch": 1.5525345622119815, + "grad_norm": 1.024260684065218, + "learning_rate": 2.6170110176584404e-07, + "loss": 0.7176710367202759, + "step": 6738 + }, + { + "epoch": 1.5527649769585254, + "grad_norm": 1.1771656195687117, + "learning_rate": 2.6144419179077715e-07, + "loss": 0.7160323858261108, + "step": 6739 + }, + { + "epoch": 1.5529953917050692, + "grad_norm": 1.2619778254885654, + "learning_rate": 2.6118738901610806e-07, + "loss": 0.7749248743057251, + "step": 6740 + }, + { + "epoch": 1.553225806451613, + "grad_norm": 1.3014936029444653, + "learning_rate": 2.6093069347911145e-07, + "loss": 0.7701436281204224, + "step": 6741 + }, + { + "epoch": 1.5534562211981566, + "grad_norm": 1.2206842608778186, + "learning_rate": 2.606741052170459e-07, + "loss": 0.6725181341171265, + "step": 6742 + }, + { + "epoch": 1.5536866359447004, + "grad_norm": 1.0193653205430255, + "learning_rate": 2.6041762426715563e-07, + "loss": 0.7730624675750732, + "step": 6743 + }, + { + "epoch": 1.5539170506912443, + "grad_norm": 0.9417911057706564, + "learning_rate": 2.601612506666682e-07, + "loss": 0.7083867788314819, + "step": 6744 + }, + { + "epoch": 1.554147465437788, + "grad_norm": 1.1436343405561136, + "learning_rate": 2.599049844527953e-07, + "loss": 0.7680408954620361, + "step": 6745 + }, + { + "epoch": 1.5543778801843318, + "grad_norm": 0.9401611092461176, + "learning_rate": 2.596488256627346e-07, + "loss": 0.7145194411277771, + "step": 6746 + }, + { + "epoch": 1.5546082949308757, + "grad_norm": 1.6305632532659482, + "learning_rate": 2.593927743336667e-07, + "loss": 0.8626812696456909, + "step": 6747 + }, + { + "epoch": 1.5548387096774192, + "grad_norm": 1.1326626029703477, + "learning_rate": 2.591368305027569e-07, + "loss": 0.775201678276062, + "step": 6748 + }, + { + "epoch": 1.5550691244239632, + "grad_norm": 1.1775115850016065, + "learning_rate": 2.588809942071557e-07, + "loss": 0.9363858699798584, + "step": 6749 + }, + { + "epoch": 1.555299539170507, + "grad_norm": 1.0406152793499837, + "learning_rate": 2.5862526548399697e-07, + "loss": 0.8079385757446289, + "step": 6750 + }, + { + "epoch": 1.5555299539170506, + "grad_norm": 1.2405408742249928, + "learning_rate": 2.5836964437039934e-07, + "loss": 0.8635082840919495, + "step": 6751 + }, + { + "epoch": 1.5557603686635946, + "grad_norm": 1.072904507718934, + "learning_rate": 2.581141309034662e-07, + "loss": 0.7840827703475952, + "step": 6752 + }, + { + "epoch": 1.5559907834101383, + "grad_norm": 1.202200191511419, + "learning_rate": 2.5785872512028497e-07, + "loss": 0.7833336591720581, + "step": 6753 + }, + { + "epoch": 1.556221198156682, + "grad_norm": 1.2301348726534915, + "learning_rate": 2.576034270579269e-07, + "loss": 0.7340226173400879, + "step": 6754 + }, + { + "epoch": 1.5564516129032258, + "grad_norm": 0.9782804135142905, + "learning_rate": 2.5734823675344895e-07, + "loss": 0.6423541307449341, + "step": 6755 + }, + { + "epoch": 1.5566820276497695, + "grad_norm": 1.1992594758940591, + "learning_rate": 2.570931542438913e-07, + "loss": 0.7772454619407654, + "step": 6756 + }, + { + "epoch": 1.5569124423963134, + "grad_norm": 1.192101331643462, + "learning_rate": 2.568381795662785e-07, + "loss": 0.8113390803337097, + "step": 6757 + }, + { + "epoch": 1.5571428571428572, + "grad_norm": 1.1257023205339645, + "learning_rate": 2.5658331275762045e-07, + "loss": 0.6688467264175415, + "step": 6758 + }, + { + "epoch": 1.557373271889401, + "grad_norm": 1.0966214019602503, + "learning_rate": 2.5632855385491037e-07, + "loss": 0.8140766620635986, + "step": 6759 + }, + { + "epoch": 1.5576036866359448, + "grad_norm": 1.0260387911312179, + "learning_rate": 2.560739028951262e-07, + "loss": 0.7661154270172119, + "step": 6760 + }, + { + "epoch": 1.5578341013824883, + "grad_norm": 1.2298722431512563, + "learning_rate": 2.558193599152302e-07, + "loss": 0.6781749725341797, + "step": 6761 + }, + { + "epoch": 1.5580645161290323, + "grad_norm": 1.62266115954538, + "learning_rate": 2.5556492495216865e-07, + "loss": 0.8885331749916077, + "step": 6762 + }, + { + "epoch": 1.558294930875576, + "grad_norm": 1.3197551931331304, + "learning_rate": 2.55310598042873e-07, + "loss": 0.799277663230896, + "step": 6763 + }, + { + "epoch": 1.5585253456221198, + "grad_norm": 1.205426943239231, + "learning_rate": 2.550563792242583e-07, + "loss": 0.8288404941558838, + "step": 6764 + }, + { + "epoch": 1.5587557603686637, + "grad_norm": 1.1206026594489704, + "learning_rate": 2.5480226853322397e-07, + "loss": 0.9452340602874756, + "step": 6765 + }, + { + "epoch": 1.5589861751152074, + "grad_norm": 1.068059951967386, + "learning_rate": 2.5454826600665347e-07, + "loss": 0.6716231107711792, + "step": 6766 + }, + { + "epoch": 1.5592165898617512, + "grad_norm": 0.9885922984637816, + "learning_rate": 2.542943716814157e-07, + "loss": 0.90239417552948, + "step": 6767 + }, + { + "epoch": 1.5594470046082949, + "grad_norm": 1.306788685526263, + "learning_rate": 2.5404058559436225e-07, + "loss": 0.7895521521568298, + "step": 6768 + }, + { + "epoch": 1.5596774193548386, + "grad_norm": 1.1707304874415911, + "learning_rate": 2.537869077823307e-07, + "loss": 0.8097352385520935, + "step": 6769 + }, + { + "epoch": 1.5599078341013826, + "grad_norm": 1.2075274904697726, + "learning_rate": 2.535333382821415e-07, + "loss": 0.7599455118179321, + "step": 6770 + }, + { + "epoch": 1.5601382488479263, + "grad_norm": 1.3869678105449568, + "learning_rate": 2.5327987713059986e-07, + "loss": 0.8735921382904053, + "step": 6771 + }, + { + "epoch": 1.56036866359447, + "grad_norm": 1.3359870563601237, + "learning_rate": 2.530265243644958e-07, + "loss": 0.7263825535774231, + "step": 6772 + }, + { + "epoch": 1.560599078341014, + "grad_norm": 1.2240386443766704, + "learning_rate": 2.5277328002060296e-07, + "loss": 0.8642966747283936, + "step": 6773 + }, + { + "epoch": 1.5608294930875575, + "grad_norm": 0.9860249164323385, + "learning_rate": 2.525201441356789e-07, + "loss": 0.6928948163986206, + "step": 6774 + }, + { + "epoch": 1.5610599078341014, + "grad_norm": 1.0977742625281808, + "learning_rate": 2.522671167464667e-07, + "loss": 0.7841427326202393, + "step": 6775 + }, + { + "epoch": 1.5612903225806452, + "grad_norm": 1.102415991736206, + "learning_rate": 2.5201419788969267e-07, + "loss": 0.6539766192436218, + "step": 6776 + }, + { + "epoch": 1.5615207373271889, + "grad_norm": 1.3087388284847004, + "learning_rate": 2.5176138760206734e-07, + "loss": 0.7817956805229187, + "step": 6777 + }, + { + "epoch": 1.5617511520737328, + "grad_norm": 1.2176256508295467, + "learning_rate": 2.5150868592028626e-07, + "loss": 0.7847198843955994, + "step": 6778 + }, + { + "epoch": 1.5619815668202763, + "grad_norm": 1.1618742898915668, + "learning_rate": 2.5125609288102856e-07, + "loss": 0.8248952627182007, + "step": 6779 + }, + { + "epoch": 1.5622119815668203, + "grad_norm": 1.315546210832164, + "learning_rate": 2.510036085209578e-07, + "loss": 0.8099820613861084, + "step": 6780 + }, + { + "epoch": 1.562442396313364, + "grad_norm": 1.0605499181430498, + "learning_rate": 2.5075123287672173e-07, + "loss": 0.7764754295349121, + "step": 6781 + }, + { + "epoch": 1.5626728110599077, + "grad_norm": 1.376636441531992, + "learning_rate": 2.5049896598495234e-07, + "loss": 0.8055214285850525, + "step": 6782 + }, + { + "epoch": 1.5629032258064517, + "grad_norm": 1.2086842805836235, + "learning_rate": 2.502468078822656e-07, + "loss": 0.7536123991012573, + "step": 6783 + }, + { + "epoch": 1.5631336405529954, + "grad_norm": 1.3330366284043236, + "learning_rate": 2.499947586052623e-07, + "loss": 0.8212461471557617, + "step": 6784 + }, + { + "epoch": 1.5633640552995391, + "grad_norm": 1.087165735027238, + "learning_rate": 2.49742818190527e-07, + "loss": 0.7297977209091187, + "step": 6785 + }, + { + "epoch": 1.563594470046083, + "grad_norm": 1.3633339944793545, + "learning_rate": 2.494909866746282e-07, + "loss": 0.752082109451294, + "step": 6786 + }, + { + "epoch": 1.5638248847926266, + "grad_norm": 1.231077416550479, + "learning_rate": 2.4923926409411934e-07, + "loss": 0.9181928634643555, + "step": 6787 + }, + { + "epoch": 1.5640552995391706, + "grad_norm": 1.263799738870316, + "learning_rate": 2.489876504855374e-07, + "loss": 0.8607058525085449, + "step": 6788 + }, + { + "epoch": 1.5642857142857143, + "grad_norm": 1.4864085600196295, + "learning_rate": 2.4873614588540347e-07, + "loss": 0.9659625887870789, + "step": 6789 + }, + { + "epoch": 1.564516129032258, + "grad_norm": 1.7132447669994355, + "learning_rate": 2.4848475033022377e-07, + "loss": 0.8357822299003601, + "step": 6790 + }, + { + "epoch": 1.564746543778802, + "grad_norm": 1.4493565138453182, + "learning_rate": 2.482334638564877e-07, + "loss": 0.7871281504631042, + "step": 6791 + }, + { + "epoch": 1.5649769585253455, + "grad_norm": 0.9644716518923556, + "learning_rate": 2.4798228650066874e-07, + "loss": 0.7221591472625732, + "step": 6792 + }, + { + "epoch": 1.5652073732718894, + "grad_norm": 1.217051022182652, + "learning_rate": 2.4773121829922586e-07, + "loss": 0.7399123907089233, + "step": 6793 + }, + { + "epoch": 1.5654377880184331, + "grad_norm": 1.6036073035934815, + "learning_rate": 2.474802592886003e-07, + "loss": 0.8159279227256775, + "step": 6794 + }, + { + "epoch": 1.5656682027649769, + "grad_norm": 1.208678395846015, + "learning_rate": 2.472294095052192e-07, + "loss": 0.8222753405570984, + "step": 6795 + }, + { + "epoch": 1.5658986175115208, + "grad_norm": 1.0411919729384558, + "learning_rate": 2.469786689854928e-07, + "loss": 0.6586673259735107, + "step": 6796 + }, + { + "epoch": 1.5661290322580645, + "grad_norm": 1.0728597460775429, + "learning_rate": 2.467280377658154e-07, + "loss": 0.8361790180206299, + "step": 6797 + }, + { + "epoch": 1.5663594470046083, + "grad_norm": 1.2928413385952742, + "learning_rate": 2.464775158825665e-07, + "loss": 0.7669099569320679, + "step": 6798 + }, + { + "epoch": 1.5665898617511522, + "grad_norm": 1.331214255352709, + "learning_rate": 2.462271033721086e-07, + "loss": 0.7876452207565308, + "step": 6799 + }, + { + "epoch": 1.5668202764976957, + "grad_norm": 1.2617656160077577, + "learning_rate": 2.459768002707887e-07, + "loss": 0.7932916879653931, + "step": 6800 + }, + { + "epoch": 1.5670506912442397, + "grad_norm": 1.1101874723309544, + "learning_rate": 2.457266066149382e-07, + "loss": 0.734020471572876, + "step": 6801 + }, + { + "epoch": 1.5672811059907834, + "grad_norm": 1.2001011742733312, + "learning_rate": 2.4547652244087216e-07, + "loss": 0.6975284814834595, + "step": 6802 + }, + { + "epoch": 1.5675115207373271, + "grad_norm": 1.213830843525294, + "learning_rate": 2.452265477848896e-07, + "loss": 0.7214465737342834, + "step": 6803 + }, + { + "epoch": 1.567741935483871, + "grad_norm": 1.1586033079782525, + "learning_rate": 2.4497668268327485e-07, + "loss": 0.8645110130310059, + "step": 6804 + }, + { + "epoch": 1.5679723502304146, + "grad_norm": 1.0991857687698348, + "learning_rate": 2.4472692717229504e-07, + "loss": 0.7389887571334839, + "step": 6805 + }, + { + "epoch": 1.5682027649769585, + "grad_norm": 1.206958266137894, + "learning_rate": 2.4447728128820165e-07, + "loss": 0.8462876081466675, + "step": 6806 + }, + { + "epoch": 1.5684331797235023, + "grad_norm": 1.2507487710365972, + "learning_rate": 2.44227745067231e-07, + "loss": 0.824936032295227, + "step": 6807 + }, + { + "epoch": 1.568663594470046, + "grad_norm": 1.2566804457387248, + "learning_rate": 2.439783185456027e-07, + "loss": 0.8516823053359985, + "step": 6808 + }, + { + "epoch": 1.56889400921659, + "grad_norm": 1.065798809017728, + "learning_rate": 2.4372900175952015e-07, + "loss": 0.6154674291610718, + "step": 6809 + }, + { + "epoch": 1.5691244239631337, + "grad_norm": 1.2816681742105784, + "learning_rate": 2.434797947451722e-07, + "loss": 0.7769260406494141, + "step": 6810 + }, + { + "epoch": 1.5693548387096774, + "grad_norm": 1.2232245245328917, + "learning_rate": 2.432306975387306e-07, + "loss": 0.9525332450866699, + "step": 6811 + }, + { + "epoch": 1.5695852534562214, + "grad_norm": 1.3409057347397177, + "learning_rate": 2.429817101763511e-07, + "loss": 0.7537581920623779, + "step": 6812 + }, + { + "epoch": 1.5698156682027649, + "grad_norm": 1.3548012775304474, + "learning_rate": 2.427328326941744e-07, + "loss": 0.814711332321167, + "step": 6813 + }, + { + "epoch": 1.5700460829493088, + "grad_norm": 1.3820372699413255, + "learning_rate": 2.4248406512832466e-07, + "loss": 0.708736777305603, + "step": 6814 + }, + { + "epoch": 1.5702764976958525, + "grad_norm": 1.1061554332755352, + "learning_rate": 2.422354075149098e-07, + "loss": 0.6757712960243225, + "step": 6815 + }, + { + "epoch": 1.5705069124423963, + "grad_norm": 1.0865188505414496, + "learning_rate": 2.4198685989002257e-07, + "loss": 0.736266553401947, + "step": 6816 + }, + { + "epoch": 1.5707373271889402, + "grad_norm": 1.1180343138508952, + "learning_rate": 2.417384222897392e-07, + "loss": 0.7423173189163208, + "step": 6817 + }, + { + "epoch": 1.5709677419354837, + "grad_norm": 1.2076049425001651, + "learning_rate": 2.414900947501197e-07, + "loss": 0.7260550260543823, + "step": 6818 + }, + { + "epoch": 1.5711981566820277, + "grad_norm": 1.241277027009942, + "learning_rate": 2.4124187730720915e-07, + "loss": 0.7125939130783081, + "step": 6819 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 1.1330555560067848, + "learning_rate": 2.409937699970356e-07, + "loss": 0.7429558634757996, + "step": 6820 + }, + { + "epoch": 1.5716589861751151, + "grad_norm": 1.1709438494600335, + "learning_rate": 2.407457728556115e-07, + "loss": 0.7166736721992493, + "step": 6821 + }, + { + "epoch": 1.571889400921659, + "grad_norm": 1.1783418664080478, + "learning_rate": 2.4049788591893336e-07, + "loss": 0.7438491582870483, + "step": 6822 + }, + { + "epoch": 1.5721198156682028, + "grad_norm": 1.3579191422740273, + "learning_rate": 2.402501092229814e-07, + "loss": 0.8031798601150513, + "step": 6823 + }, + { + "epoch": 1.5723502304147465, + "grad_norm": 1.3256875261480106, + "learning_rate": 2.400024428037206e-07, + "loss": 0.7067087888717651, + "step": 6824 + }, + { + "epoch": 1.5725806451612905, + "grad_norm": 1.1524386121511956, + "learning_rate": 2.3975488669709906e-07, + "loss": 0.7147783041000366, + "step": 6825 + }, + { + "epoch": 1.572811059907834, + "grad_norm": 1.2529979656124484, + "learning_rate": 2.395074409390491e-07, + "loss": 0.8534795641899109, + "step": 6826 + }, + { + "epoch": 1.573041474654378, + "grad_norm": 1.0527069171574706, + "learning_rate": 2.392601055654875e-07, + "loss": 0.7630984783172607, + "step": 6827 + }, + { + "epoch": 1.5732718894009217, + "grad_norm": 1.3268090351372508, + "learning_rate": 2.390128806123145e-07, + "loss": 0.9395428895950317, + "step": 6828 + }, + { + "epoch": 1.5735023041474654, + "grad_norm": 1.1905263432335205, + "learning_rate": 2.3876576611541423e-07, + "loss": 0.7086023092269897, + "step": 6829 + }, + { + "epoch": 1.5737327188940093, + "grad_norm": 1.0320188306367468, + "learning_rate": 2.385187621106555e-07, + "loss": 0.6937201619148254, + "step": 6830 + }, + { + "epoch": 1.5739631336405528, + "grad_norm": 1.1238131407833931, + "learning_rate": 2.3827186863389037e-07, + "loss": 0.7339247465133667, + "step": 6831 + }, + { + "epoch": 1.5741935483870968, + "grad_norm": 0.9948868064813976, + "learning_rate": 2.3802508572095493e-07, + "loss": 0.8453131318092346, + "step": 6832 + }, + { + "epoch": 1.5744239631336405, + "grad_norm": 1.2870129222879585, + "learning_rate": 2.377784134076698e-07, + "loss": 0.7303619384765625, + "step": 6833 + }, + { + "epoch": 1.5746543778801843, + "grad_norm": 1.1663952236638828, + "learning_rate": 2.3753185172983893e-07, + "loss": 0.9635858535766602, + "step": 6834 + }, + { + "epoch": 1.5748847926267282, + "grad_norm": 0.9711435467160289, + "learning_rate": 2.3728540072324998e-07, + "loss": 0.7174761295318604, + "step": 6835 + }, + { + "epoch": 1.575115207373272, + "grad_norm": 1.0168865512931398, + "learning_rate": 2.3703906042367584e-07, + "loss": 0.7375633716583252, + "step": 6836 + }, + { + "epoch": 1.5753456221198157, + "grad_norm": 1.0569071581049987, + "learning_rate": 2.3679283086687206e-07, + "loss": 0.8202652931213379, + "step": 6837 + }, + { + "epoch": 1.5755760368663596, + "grad_norm": 1.4428887155533328, + "learning_rate": 2.3654671208857823e-07, + "loss": 0.8448499441146851, + "step": 6838 + }, + { + "epoch": 1.5758064516129031, + "grad_norm": 1.3297185542360797, + "learning_rate": 2.3630070412451864e-07, + "loss": 0.7840893268585205, + "step": 6839 + }, + { + "epoch": 1.576036866359447, + "grad_norm": 1.1930310177318706, + "learning_rate": 2.3605480701040092e-07, + "loss": 0.8036940693855286, + "step": 6840 + }, + { + "epoch": 1.5762672811059908, + "grad_norm": 1.2730513650169084, + "learning_rate": 2.3580902078191666e-07, + "loss": 0.8333625793457031, + "step": 6841 + }, + { + "epoch": 1.5764976958525345, + "grad_norm": 1.3288439351572012, + "learning_rate": 2.3556334547474133e-07, + "loss": 0.804919958114624, + "step": 6842 + }, + { + "epoch": 1.5767281105990785, + "grad_norm": 1.2605928054638793, + "learning_rate": 2.3531778112453416e-07, + "loss": 0.752541720867157, + "step": 6843 + }, + { + "epoch": 1.576958525345622, + "grad_norm": 1.0378289852617786, + "learning_rate": 2.3507232776693896e-07, + "loss": 0.647051215171814, + "step": 6844 + }, + { + "epoch": 1.577188940092166, + "grad_norm": 1.1139826400416593, + "learning_rate": 2.3482698543758285e-07, + "loss": 0.7546517848968506, + "step": 6845 + }, + { + "epoch": 1.5774193548387097, + "grad_norm": 1.0118514872509952, + "learning_rate": 2.345817541720766e-07, + "loss": 0.8773425817489624, + "step": 6846 + }, + { + "epoch": 1.5776497695852534, + "grad_norm": 1.1485612061840695, + "learning_rate": 2.3433663400601567e-07, + "loss": 0.9538160562515259, + "step": 6847 + }, + { + "epoch": 1.5778801843317973, + "grad_norm": 1.0298677066929223, + "learning_rate": 2.340916249749787e-07, + "loss": 0.6275157332420349, + "step": 6848 + }, + { + "epoch": 1.578110599078341, + "grad_norm": 1.1889533964841936, + "learning_rate": 2.3384672711452812e-07, + "loss": 0.7729284167289734, + "step": 6849 + }, + { + "epoch": 1.5783410138248848, + "grad_norm": 1.4210079123943715, + "learning_rate": 2.3360194046021108e-07, + "loss": 0.8361644148826599, + "step": 6850 + }, + { + "epoch": 1.5785714285714287, + "grad_norm": 1.2305172757518368, + "learning_rate": 2.3335726504755793e-07, + "loss": 0.6782940626144409, + "step": 6851 + }, + { + "epoch": 1.5788018433179722, + "grad_norm": 1.3612688278959233, + "learning_rate": 2.3311270091208256e-07, + "loss": 0.8036615252494812, + "step": 6852 + }, + { + "epoch": 1.5790322580645162, + "grad_norm": 1.1729176601878941, + "learning_rate": 2.3286824808928362e-07, + "loss": 0.8450125455856323, + "step": 6853 + }, + { + "epoch": 1.57926267281106, + "grad_norm": 1.2162582175159786, + "learning_rate": 2.3262390661464303e-07, + "loss": 0.6546198725700378, + "step": 6854 + }, + { + "epoch": 1.5794930875576036, + "grad_norm": 1.4056383803669428, + "learning_rate": 2.3237967652362612e-07, + "loss": 0.8201385140419006, + "step": 6855 + }, + { + "epoch": 1.5797235023041476, + "grad_norm": 1.3504561324932176, + "learning_rate": 2.3213555785168336e-07, + "loss": 0.8753508925437927, + "step": 6856 + }, + { + "epoch": 1.579953917050691, + "grad_norm": 1.2672866740553073, + "learning_rate": 2.3189155063424782e-07, + "loss": 0.5884093642234802, + "step": 6857 + }, + { + "epoch": 1.580184331797235, + "grad_norm": 1.0135145180947078, + "learning_rate": 2.3164765490673654e-07, + "loss": 0.6494029760360718, + "step": 6858 + }, + { + "epoch": 1.5804147465437788, + "grad_norm": 1.1478304397345402, + "learning_rate": 2.3140387070455126e-07, + "loss": 0.7407097220420837, + "step": 6859 + }, + { + "epoch": 1.5806451612903225, + "grad_norm": 1.3351942864944542, + "learning_rate": 2.3116019806307673e-07, + "loss": 0.8934177160263062, + "step": 6860 + }, + { + "epoch": 1.5808755760368665, + "grad_norm": 1.0696222163552975, + "learning_rate": 2.309166370176816e-07, + "loss": 0.7487956881523132, + "step": 6861 + }, + { + "epoch": 1.5811059907834102, + "grad_norm": 1.316829236490256, + "learning_rate": 2.3067318760371845e-07, + "loss": 0.7744357585906982, + "step": 6862 + }, + { + "epoch": 1.581336405529954, + "grad_norm": 1.3202738468289819, + "learning_rate": 2.304298498565237e-07, + "loss": 0.8871743679046631, + "step": 6863 + }, + { + "epoch": 1.5815668202764976, + "grad_norm": 1.3064493000042272, + "learning_rate": 2.3018662381141717e-07, + "loss": 0.7865666151046753, + "step": 6864 + }, + { + "epoch": 1.5817972350230414, + "grad_norm": 1.4125222114326161, + "learning_rate": 2.2994350950370334e-07, + "loss": 0.8416531682014465, + "step": 6865 + }, + { + "epoch": 1.5820276497695853, + "grad_norm": 1.5275008378701445, + "learning_rate": 2.2970050696866972e-07, + "loss": 0.8443950414657593, + "step": 6866 + }, + { + "epoch": 1.582258064516129, + "grad_norm": 1.4005476364990852, + "learning_rate": 2.2945761624158756e-07, + "loss": 0.7770054340362549, + "step": 6867 + }, + { + "epoch": 1.5824884792626728, + "grad_norm": 1.2159355438440163, + "learning_rate": 2.2921483735771252e-07, + "loss": 0.7263047695159912, + "step": 6868 + }, + { + "epoch": 1.5827188940092167, + "grad_norm": 1.3958985609002883, + "learning_rate": 2.2897217035228312e-07, + "loss": 0.8288376927375793, + "step": 6869 + }, + { + "epoch": 1.5829493087557602, + "grad_norm": 1.2630380344196672, + "learning_rate": 2.2872961526052292e-07, + "loss": 0.8325462937355042, + "step": 6870 + }, + { + "epoch": 1.5831797235023042, + "grad_norm": 1.317005879944655, + "learning_rate": 2.284871721176379e-07, + "loss": 0.7412815093994141, + "step": 6871 + }, + { + "epoch": 1.583410138248848, + "grad_norm": 1.2813974132427688, + "learning_rate": 2.2824484095881823e-07, + "loss": 0.8958117961883545, + "step": 6872 + }, + { + "epoch": 1.5836405529953916, + "grad_norm": 1.228628782021168, + "learning_rate": 2.2800262181923858e-07, + "loss": 0.8374444246292114, + "step": 6873 + }, + { + "epoch": 1.5838709677419356, + "grad_norm": 1.2394995315660131, + "learning_rate": 2.2776051473405634e-07, + "loss": 0.7900353670120239, + "step": 6874 + }, + { + "epoch": 1.5841013824884793, + "grad_norm": 1.129671125708823, + "learning_rate": 2.2751851973841285e-07, + "loss": 0.7420408725738525, + "step": 6875 + }, + { + "epoch": 1.584331797235023, + "grad_norm": 1.3245275433928243, + "learning_rate": 2.2727663686743382e-07, + "loss": 0.8902314305305481, + "step": 6876 + }, + { + "epoch": 1.5845622119815668, + "grad_norm": 1.2122656586799572, + "learning_rate": 2.27034866156228e-07, + "loss": 0.739869236946106, + "step": 6877 + }, + { + "epoch": 1.5847926267281105, + "grad_norm": 1.169654737499052, + "learning_rate": 2.2679320763988775e-07, + "loss": 0.8340646624565125, + "step": 6878 + }, + { + "epoch": 1.5850230414746544, + "grad_norm": 1.3076425110312813, + "learning_rate": 2.2655166135349013e-07, + "loss": 0.7501030564308167, + "step": 6879 + }, + { + "epoch": 1.5852534562211982, + "grad_norm": 1.0619799072208593, + "learning_rate": 2.2631022733209504e-07, + "loss": 0.722623348236084, + "step": 6880 + }, + { + "epoch": 1.585483870967742, + "grad_norm": 1.4046404033814042, + "learning_rate": 2.260689056107461e-07, + "loss": 0.8319696187973022, + "step": 6881 + }, + { + "epoch": 1.5857142857142859, + "grad_norm": 1.2824383261655956, + "learning_rate": 2.2582769622447107e-07, + "loss": 0.85502028465271, + "step": 6882 + }, + { + "epoch": 1.5859447004608294, + "grad_norm": 1.444500113904039, + "learning_rate": 2.2558659920828095e-07, + "loss": 0.7942626476287842, + "step": 6883 + }, + { + "epoch": 1.5861751152073733, + "grad_norm": 0.9346347634599198, + "learning_rate": 2.253456145971705e-07, + "loss": 0.6731030941009521, + "step": 6884 + }, + { + "epoch": 1.586405529953917, + "grad_norm": 1.2567565363582325, + "learning_rate": 2.2510474242611887e-07, + "loss": 0.8479423522949219, + "step": 6885 + }, + { + "epoch": 1.5866359447004608, + "grad_norm": 1.0824322707106273, + "learning_rate": 2.2486398273008812e-07, + "loss": 0.7398810386657715, + "step": 6886 + }, + { + "epoch": 1.5868663594470047, + "grad_norm": 1.4531636253389437, + "learning_rate": 2.246233355440238e-07, + "loss": 0.8422881364822388, + "step": 6887 + }, + { + "epoch": 1.5870967741935482, + "grad_norm": 1.4298247398214885, + "learning_rate": 2.2438280090285612e-07, + "loss": 0.8307279944419861, + "step": 6888 + }, + { + "epoch": 1.5873271889400922, + "grad_norm": 1.3280924437525041, + "learning_rate": 2.2414237884149821e-07, + "loss": 0.8329004049301147, + "step": 6889 + }, + { + "epoch": 1.587557603686636, + "grad_norm": 1.196093026387475, + "learning_rate": 2.2390206939484645e-07, + "loss": 0.801641583442688, + "step": 6890 + }, + { + "epoch": 1.5877880184331796, + "grad_norm": 1.359543687074451, + "learning_rate": 2.2366187259778235e-07, + "loss": 0.9850986003875732, + "step": 6891 + }, + { + "epoch": 1.5880184331797236, + "grad_norm": 1.2770195506897435, + "learning_rate": 2.2342178848516935e-07, + "loss": 0.7169715166091919, + "step": 6892 + }, + { + "epoch": 1.5882488479262673, + "grad_norm": 1.1258655345605515, + "learning_rate": 2.2318181709185603e-07, + "loss": 0.7509033679962158, + "step": 6893 + }, + { + "epoch": 1.588479262672811, + "grad_norm": 1.2429319924869415, + "learning_rate": 2.2294195845267348e-07, + "loss": 0.6974655985832214, + "step": 6894 + }, + { + "epoch": 1.588709677419355, + "grad_norm": 1.1949954122245936, + "learning_rate": 2.227022126024367e-07, + "loss": 0.7388278245925903, + "step": 6895 + }, + { + "epoch": 1.5889400921658985, + "grad_norm": 1.1219112420315915, + "learning_rate": 2.2246257957594506e-07, + "loss": 0.6479122638702393, + "step": 6896 + }, + { + "epoch": 1.5891705069124424, + "grad_norm": 1.2556673774557678, + "learning_rate": 2.222230594079807e-07, + "loss": 0.759338915348053, + "step": 6897 + }, + { + "epoch": 1.5894009216589862, + "grad_norm": 1.1747779352742982, + "learning_rate": 2.2198365213330937e-07, + "loss": 0.7299938201904297, + "step": 6898 + }, + { + "epoch": 1.58963133640553, + "grad_norm": 1.2072520940330866, + "learning_rate": 2.2174435778668122e-07, + "loss": 0.707555890083313, + "step": 6899 + }, + { + "epoch": 1.5898617511520738, + "grad_norm": 1.3083069601374675, + "learning_rate": 2.2150517640282918e-07, + "loss": 0.8311065435409546, + "step": 6900 + }, + { + "epoch": 1.5900921658986173, + "grad_norm": 1.1585381591481734, + "learning_rate": 2.2126610801647028e-07, + "loss": 0.6494649648666382, + "step": 6901 + }, + { + "epoch": 1.5903225806451613, + "grad_norm": 1.006735116508423, + "learning_rate": 2.2102715266230486e-07, + "loss": 0.6563294529914856, + "step": 6902 + }, + { + "epoch": 1.590552995391705, + "grad_norm": 1.0365958828861261, + "learning_rate": 2.207883103750171e-07, + "loss": 0.7426891326904297, + "step": 6903 + }, + { + "epoch": 1.5907834101382488, + "grad_norm": 0.9164747480191582, + "learning_rate": 2.2054958118927413e-07, + "loss": 0.7074661254882812, + "step": 6904 + }, + { + "epoch": 1.5910138248847927, + "grad_norm": 1.4657092079572216, + "learning_rate": 2.203109651397279e-07, + "loss": 0.8407880663871765, + "step": 6905 + }, + { + "epoch": 1.5912442396313364, + "grad_norm": 1.014884431152031, + "learning_rate": 2.2007246226101296e-07, + "loss": 0.7228440642356873, + "step": 6906 + }, + { + "epoch": 1.5914746543778802, + "grad_norm": 1.1100543617790197, + "learning_rate": 2.1983407258774733e-07, + "loss": 0.6988812685012817, + "step": 6907 + }, + { + "epoch": 1.591705069124424, + "grad_norm": 1.3237351414434337, + "learning_rate": 2.195957961545335e-07, + "loss": 0.793757438659668, + "step": 6908 + }, + { + "epoch": 1.5919354838709676, + "grad_norm": 1.2485526093365642, + "learning_rate": 2.1935763299595678e-07, + "loss": 0.8621397018432617, + "step": 6909 + }, + { + "epoch": 1.5921658986175116, + "grad_norm": 1.2314950700356975, + "learning_rate": 2.1911958314658598e-07, + "loss": 0.7661364078521729, + "step": 6910 + }, + { + "epoch": 1.5923963133640553, + "grad_norm": 1.1937782252155265, + "learning_rate": 2.1888164664097408e-07, + "loss": 0.9322741031646729, + "step": 6911 + }, + { + "epoch": 1.592626728110599, + "grad_norm": 1.1787479005369867, + "learning_rate": 2.1864382351365717e-07, + "loss": 0.8411989212036133, + "step": 6912 + }, + { + "epoch": 1.592857142857143, + "grad_norm": 1.515351393614885, + "learning_rate": 2.1840611379915464e-07, + "loss": 0.8212479948997498, + "step": 6913 + }, + { + "epoch": 1.5930875576036865, + "grad_norm": 1.5032885798825617, + "learning_rate": 2.181685175319702e-07, + "loss": 0.7875508069992065, + "step": 6914 + }, + { + "epoch": 1.5933179723502304, + "grad_norm": 1.178669163135756, + "learning_rate": 2.1793103474659047e-07, + "loss": 0.8389852046966553, + "step": 6915 + }, + { + "epoch": 1.5935483870967742, + "grad_norm": 1.388906101423199, + "learning_rate": 2.1769366547748546e-07, + "loss": 0.8223046660423279, + "step": 6916 + }, + { + "epoch": 1.5937788018433179, + "grad_norm": 1.0682255683615596, + "learning_rate": 2.1745640975910962e-07, + "loss": 0.8427159786224365, + "step": 6917 + }, + { + "epoch": 1.5940092165898618, + "grad_norm": 1.2770752550871127, + "learning_rate": 2.172192676258996e-07, + "loss": 0.7448060512542725, + "step": 6918 + }, + { + "epoch": 1.5942396313364056, + "grad_norm": 1.1028377529507616, + "learning_rate": 2.1698223911227686e-07, + "loss": 0.7122288346290588, + "step": 6919 + }, + { + "epoch": 1.5944700460829493, + "grad_norm": 1.3801420957349657, + "learning_rate": 2.1674532425264548e-07, + "loss": 0.7712994813919067, + "step": 6920 + }, + { + "epoch": 1.5947004608294932, + "grad_norm": 1.4967308024498271, + "learning_rate": 2.1650852308139355e-07, + "loss": 0.9656664729118347, + "step": 6921 + }, + { + "epoch": 1.5949308755760367, + "grad_norm": 1.3725078407101703, + "learning_rate": 2.162718356328922e-07, + "loss": 0.748894214630127, + "step": 6922 + }, + { + "epoch": 1.5951612903225807, + "grad_norm": 1.0191925895935576, + "learning_rate": 2.1603526194149635e-07, + "loss": 0.6875454187393188, + "step": 6923 + }, + { + "epoch": 1.5953917050691244, + "grad_norm": 1.099493651981713, + "learning_rate": 2.1579880204154412e-07, + "loss": 0.8258690237998962, + "step": 6924 + }, + { + "epoch": 1.5956221198156681, + "grad_norm": 1.2228052738114181, + "learning_rate": 2.15562455967358e-07, + "loss": 0.7647902965545654, + "step": 6925 + }, + { + "epoch": 1.595852534562212, + "grad_norm": 1.0716326843288577, + "learning_rate": 2.1532622375324284e-07, + "loss": 0.7004281282424927, + "step": 6926 + }, + { + "epoch": 1.5960829493087556, + "grad_norm": 1.1015601686618846, + "learning_rate": 2.1509010543348726e-07, + "loss": 0.7500345706939697, + "step": 6927 + }, + { + "epoch": 1.5963133640552996, + "grad_norm": 1.1261005927566234, + "learning_rate": 2.148541010423641e-07, + "loss": 0.7300195693969727, + "step": 6928 + }, + { + "epoch": 1.5965437788018433, + "grad_norm": 1.0927883255058508, + "learning_rate": 2.1461821061412876e-07, + "loss": 0.7592284679412842, + "step": 6929 + }, + { + "epoch": 1.596774193548387, + "grad_norm": 1.265065855875648, + "learning_rate": 2.1438243418302016e-07, + "loss": 0.7179796099662781, + "step": 6930 + }, + { + "epoch": 1.597004608294931, + "grad_norm": 1.167267121775029, + "learning_rate": 2.1414677178326157e-07, + "loss": 0.8829631805419922, + "step": 6931 + }, + { + "epoch": 1.5972350230414747, + "grad_norm": 1.4030936435750112, + "learning_rate": 2.1391122344905865e-07, + "loss": 0.8661972880363464, + "step": 6932 + }, + { + "epoch": 1.5974654377880184, + "grad_norm": 1.4609293147197595, + "learning_rate": 2.136757892146007e-07, + "loss": 0.7774989604949951, + "step": 6933 + }, + { + "epoch": 1.5976958525345624, + "grad_norm": 1.2556066222087972, + "learning_rate": 2.1344046911406132e-07, + "loss": 0.7343888878822327, + "step": 6934 + }, + { + "epoch": 1.5979262672811059, + "grad_norm": 1.1442684268001395, + "learning_rate": 2.132052631815966e-07, + "loss": 0.7810107469558716, + "step": 6935 + }, + { + "epoch": 1.5981566820276498, + "grad_norm": 1.343676205067389, + "learning_rate": 2.12970171451346e-07, + "loss": 0.7585299611091614, + "step": 6936 + }, + { + "epoch": 1.5983870967741935, + "grad_norm": 1.2827689520736418, + "learning_rate": 2.1273519395743344e-07, + "loss": 0.886371910572052, + "step": 6937 + }, + { + "epoch": 1.5986175115207373, + "grad_norm": 0.9767702062550015, + "learning_rate": 2.1250033073396523e-07, + "loss": 0.6986823081970215, + "step": 6938 + }, + { + "epoch": 1.5988479262672812, + "grad_norm": 1.2062052682782651, + "learning_rate": 2.122655818150312e-07, + "loss": 0.7524189352989197, + "step": 6939 + }, + { + "epoch": 1.5990783410138247, + "grad_norm": 1.1473232496595593, + "learning_rate": 2.120309472347055e-07, + "loss": 0.7699365615844727, + "step": 6940 + }, + { + "epoch": 1.5993087557603687, + "grad_norm": 1.188421090787615, + "learning_rate": 2.1179642702704458e-07, + "loss": 0.8112696409225464, + "step": 6941 + }, + { + "epoch": 1.5995391705069124, + "grad_norm": 1.377266755106213, + "learning_rate": 2.115620212260889e-07, + "loss": 0.7067416906356812, + "step": 6942 + }, + { + "epoch": 1.5997695852534561, + "grad_norm": 0.80841875970131, + "learning_rate": 2.1132772986586211e-07, + "loss": 0.787110447883606, + "step": 6943 + }, + { + "epoch": 1.6, + "grad_norm": 1.282613261539406, + "learning_rate": 2.11093552980371e-07, + "loss": 0.7356789112091064, + "step": 6944 + }, + { + "epoch": 1.6002304147465438, + "grad_norm": 1.2747758780049527, + "learning_rate": 2.1085949060360653e-07, + "loss": 0.8057125806808472, + "step": 6945 + }, + { + "epoch": 1.6004608294930875, + "grad_norm": 1.1828340962550294, + "learning_rate": 2.1062554276954225e-07, + "loss": 0.7169399261474609, + "step": 6946 + }, + { + "epoch": 1.6006912442396315, + "grad_norm": 1.2018304676070681, + "learning_rate": 2.1039170951213526e-07, + "loss": 0.7219180464744568, + "step": 6947 + }, + { + "epoch": 1.600921658986175, + "grad_norm": 1.2736335133966967, + "learning_rate": 2.101579908653266e-07, + "loss": 0.7530789375305176, + "step": 6948 + }, + { + "epoch": 1.601152073732719, + "grad_norm": 1.2374620271602483, + "learning_rate": 2.0992438686303993e-07, + "loss": 0.8192377090454102, + "step": 6949 + }, + { + "epoch": 1.6013824884792627, + "grad_norm": 1.0987195977670645, + "learning_rate": 2.0969089753918223e-07, + "loss": 0.6904648542404175, + "step": 6950 + }, + { + "epoch": 1.6016129032258064, + "grad_norm": 1.2558183684284059, + "learning_rate": 2.0945752292764495e-07, + "loss": 0.7289770245552063, + "step": 6951 + }, + { + "epoch": 1.6018433179723504, + "grad_norm": 1.260283902447682, + "learning_rate": 2.0922426306230157e-07, + "loss": 0.8467620611190796, + "step": 6952 + }, + { + "epoch": 1.6020737327188939, + "grad_norm": 1.3650999598924758, + "learning_rate": 2.089911179770093e-07, + "loss": 0.7835153937339783, + "step": 6953 + }, + { + "epoch": 1.6023041474654378, + "grad_norm": 0.847985634813149, + "learning_rate": 2.0875808770560933e-07, + "loss": 0.6696668267250061, + "step": 6954 + }, + { + "epoch": 1.6025345622119815, + "grad_norm": 1.441689312728025, + "learning_rate": 2.0852517228192556e-07, + "loss": 0.8451364636421204, + "step": 6955 + }, + { + "epoch": 1.6027649769585253, + "grad_norm": 1.2628900414882365, + "learning_rate": 2.0829237173976487e-07, + "loss": 0.7917240858078003, + "step": 6956 + }, + { + "epoch": 1.6029953917050692, + "grad_norm": 1.2514606025933794, + "learning_rate": 2.0805968611291867e-07, + "loss": 0.791597843170166, + "step": 6957 + }, + { + "epoch": 1.603225806451613, + "grad_norm": 1.2854657657217543, + "learning_rate": 2.0782711543516063e-07, + "loss": 0.7571247816085815, + "step": 6958 + }, + { + "epoch": 1.6034562211981567, + "grad_norm": 1.1996866839711877, + "learning_rate": 2.075946597402478e-07, + "loss": 0.9196302890777588, + "step": 6959 + }, + { + "epoch": 1.6036866359447006, + "grad_norm": 0.9955085341059975, + "learning_rate": 2.0736231906192136e-07, + "loss": 0.7106618881225586, + "step": 6960 + }, + { + "epoch": 1.6039170506912441, + "grad_norm": 0.9090693582601959, + "learning_rate": 2.071300934339051e-07, + "loss": 0.8923465013504028, + "step": 6961 + }, + { + "epoch": 1.604147465437788, + "grad_norm": 1.1524730844586952, + "learning_rate": 2.0689798288990601e-07, + "loss": 0.6929241418838501, + "step": 6962 + }, + { + "epoch": 1.6043778801843318, + "grad_norm": 1.4736872345919192, + "learning_rate": 2.0666598746361487e-07, + "loss": 0.935944676399231, + "step": 6963 + }, + { + "epoch": 1.6046082949308755, + "grad_norm": 1.3002916307222088, + "learning_rate": 2.0643410718870536e-07, + "loss": 0.7442188262939453, + "step": 6964 + }, + { + "epoch": 1.6048387096774195, + "grad_norm": 1.13007905720726, + "learning_rate": 2.0620234209883446e-07, + "loss": 0.7340278625488281, + "step": 6965 + }, + { + "epoch": 1.605069124423963, + "grad_norm": 1.1841454047560163, + "learning_rate": 2.0597069222764297e-07, + "loss": 0.7436190247535706, + "step": 6966 + }, + { + "epoch": 1.605299539170507, + "grad_norm": 1.1998918795301519, + "learning_rate": 2.0573915760875406e-07, + "loss": 0.9109283685684204, + "step": 6967 + }, + { + "epoch": 1.6055299539170507, + "grad_norm": 1.362187790875206, + "learning_rate": 2.0550773827577518e-07, + "loss": 0.86224365234375, + "step": 6968 + }, + { + "epoch": 1.6057603686635944, + "grad_norm": 1.0973288140018649, + "learning_rate": 2.0527643426229636e-07, + "loss": 0.6873685121536255, + "step": 6969 + }, + { + "epoch": 1.6059907834101383, + "grad_norm": 1.2862613183491987, + "learning_rate": 2.0504524560189074e-07, + "loss": 0.7634609937667847, + "step": 6970 + }, + { + "epoch": 1.606221198156682, + "grad_norm": 1.642442078921259, + "learning_rate": 2.0481417232811572e-07, + "loss": 0.7940595149993896, + "step": 6971 + }, + { + "epoch": 1.6064516129032258, + "grad_norm": 1.0579671129687211, + "learning_rate": 2.0458321447451078e-07, + "loss": 0.7109687924385071, + "step": 6972 + }, + { + "epoch": 1.6066820276497698, + "grad_norm": 1.3780414286693414, + "learning_rate": 2.04352372074599e-07, + "loss": 0.9476398825645447, + "step": 6973 + }, + { + "epoch": 1.6069124423963133, + "grad_norm": 1.3106188238946987, + "learning_rate": 2.0412164516188747e-07, + "loss": 0.7563579678535461, + "step": 6974 + }, + { + "epoch": 1.6071428571428572, + "grad_norm": 1.1912217950342037, + "learning_rate": 2.0389103376986538e-07, + "loss": 0.7928751707077026, + "step": 6975 + }, + { + "epoch": 1.607373271889401, + "grad_norm": 1.1927610489358789, + "learning_rate": 2.0366053793200565e-07, + "loss": 0.776961624622345, + "step": 6976 + }, + { + "epoch": 1.6076036866359447, + "grad_norm": 1.1830668942381175, + "learning_rate": 2.0343015768176496e-07, + "loss": 0.6511167883872986, + "step": 6977 + }, + { + "epoch": 1.6078341013824886, + "grad_norm": 1.3541662729221868, + "learning_rate": 2.0319989305258235e-07, + "loss": 0.6487337350845337, + "step": 6978 + }, + { + "epoch": 1.6080645161290321, + "grad_norm": 1.5271951763204938, + "learning_rate": 2.0296974407788004e-07, + "loss": 0.921454131603241, + "step": 6979 + }, + { + "epoch": 1.608294930875576, + "grad_norm": 1.0476613319531645, + "learning_rate": 2.0273971079106467e-07, + "loss": 0.8145809769630432, + "step": 6980 + }, + { + "epoch": 1.6085253456221198, + "grad_norm": 0.9495439447317249, + "learning_rate": 2.0250979322552474e-07, + "loss": 0.6655904054641724, + "step": 6981 + }, + { + "epoch": 1.6087557603686635, + "grad_norm": 1.1486957458539049, + "learning_rate": 2.0227999141463258e-07, + "loss": 0.777961254119873, + "step": 6982 + }, + { + "epoch": 1.6089861751152075, + "grad_norm": 1.3274428663782127, + "learning_rate": 2.0205030539174361e-07, + "loss": 0.6543164253234863, + "step": 6983 + }, + { + "epoch": 1.6092165898617512, + "grad_norm": 1.233780092778412, + "learning_rate": 2.018207351901966e-07, + "loss": 0.7842000722885132, + "step": 6984 + }, + { + "epoch": 1.609447004608295, + "grad_norm": 0.999384175284256, + "learning_rate": 2.0159128084331278e-07, + "loss": 0.7264418005943298, + "step": 6985 + }, + { + "epoch": 1.6096774193548387, + "grad_norm": 1.313414021265448, + "learning_rate": 2.0136194238439795e-07, + "loss": 0.8722596168518066, + "step": 6986 + }, + { + "epoch": 1.6099078341013824, + "grad_norm": 1.3518278161266697, + "learning_rate": 2.0113271984673997e-07, + "loss": 0.8162735104560852, + "step": 6987 + }, + { + "epoch": 1.6101382488479263, + "grad_norm": 1.212757185466248, + "learning_rate": 2.0090361326360982e-07, + "loss": 0.6962481737136841, + "step": 6988 + }, + { + "epoch": 1.61036866359447, + "grad_norm": 1.133716172506403, + "learning_rate": 2.0067462266826264e-07, + "loss": 0.8186852931976318, + "step": 6989 + }, + { + "epoch": 1.6105990783410138, + "grad_norm": 1.505728867210405, + "learning_rate": 2.0044574809393543e-07, + "loss": 0.8935987949371338, + "step": 6990 + }, + { + "epoch": 1.6108294930875577, + "grad_norm": 1.2824355796337807, + "learning_rate": 2.002169895738498e-07, + "loss": 0.9152865409851074, + "step": 6991 + }, + { + "epoch": 1.6110599078341012, + "grad_norm": 1.521529078332145, + "learning_rate": 1.9998834714120928e-07, + "loss": 0.8042874336242676, + "step": 6992 + }, + { + "epoch": 1.6112903225806452, + "grad_norm": 1.3198117612600044, + "learning_rate": 1.9975982082920083e-07, + "loss": 0.9621129035949707, + "step": 6993 + }, + { + "epoch": 1.611520737327189, + "grad_norm": 1.1154614331355635, + "learning_rate": 1.9953141067099533e-07, + "loss": 0.8296995162963867, + "step": 6994 + }, + { + "epoch": 1.6117511520737327, + "grad_norm": 1.0827522335122797, + "learning_rate": 1.9930311669974587e-07, + "loss": 0.8129373788833618, + "step": 6995 + }, + { + "epoch": 1.6119815668202766, + "grad_norm": 1.359695561767368, + "learning_rate": 1.9907493894858874e-07, + "loss": 0.7450911998748779, + "step": 6996 + }, + { + "epoch": 1.6122119815668203, + "grad_norm": 1.2367503665171555, + "learning_rate": 1.9884687745064422e-07, + "loss": 0.798037052154541, + "step": 6997 + }, + { + "epoch": 1.612442396313364, + "grad_norm": 1.218969884225304, + "learning_rate": 1.9861893223901494e-07, + "loss": 0.8118857145309448, + "step": 6998 + }, + { + "epoch": 1.6126728110599078, + "grad_norm": 1.2176008366956401, + "learning_rate": 1.9839110334678632e-07, + "loss": 0.7954392433166504, + "step": 6999 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 1.2233633618619175, + "learning_rate": 1.9816339080702825e-07, + "loss": 0.8055616617202759, + "step": 7000 + }, + { + "epoch": 1.6131336405529955, + "grad_norm": 1.503254744382692, + "learning_rate": 1.979357946527924e-07, + "loss": 0.8949761986732483, + "step": 7001 + }, + { + "epoch": 1.6133640552995392, + "grad_norm": 1.376056206509758, + "learning_rate": 1.9770831491711427e-07, + "loss": 0.8327617645263672, + "step": 7002 + }, + { + "epoch": 1.613594470046083, + "grad_norm": 1.2867855951178133, + "learning_rate": 1.9748095163301215e-07, + "loss": 0.7593148946762085, + "step": 7003 + }, + { + "epoch": 1.6138248847926269, + "grad_norm": 1.2449007241812073, + "learning_rate": 1.9725370483348737e-07, + "loss": 0.7639665603637695, + "step": 7004 + }, + { + "epoch": 1.6140552995391704, + "grad_norm": 1.2839981076373308, + "learning_rate": 1.9702657455152448e-07, + "loss": 0.8561587929725647, + "step": 7005 + }, + { + "epoch": 1.6142857142857143, + "grad_norm": 1.4345782240891563, + "learning_rate": 1.9679956082009154e-07, + "loss": 0.835313081741333, + "step": 7006 + }, + { + "epoch": 1.614516129032258, + "grad_norm": 1.680229749258956, + "learning_rate": 1.9657266367213898e-07, + "loss": 0.831456184387207, + "step": 7007 + }, + { + "epoch": 1.6147465437788018, + "grad_norm": 1.1797102347566437, + "learning_rate": 1.963458831406005e-07, + "loss": 0.699436604976654, + "step": 7008 + }, + { + "epoch": 1.6149769585253457, + "grad_norm": 1.2382287230628872, + "learning_rate": 1.9611921925839337e-07, + "loss": 0.7821902632713318, + "step": 7009 + }, + { + "epoch": 1.6152073732718892, + "grad_norm": 1.035873020643515, + "learning_rate": 1.9589267205841742e-07, + "loss": 0.7491241097450256, + "step": 7010 + }, + { + "epoch": 1.6154377880184332, + "grad_norm": 1.3212550422299536, + "learning_rate": 1.956662415735554e-07, + "loss": 0.7299652099609375, + "step": 7011 + }, + { + "epoch": 1.615668202764977, + "grad_norm": 1.2121144450441814, + "learning_rate": 1.9543992783667385e-07, + "loss": 0.692190408706665, + "step": 7012 + }, + { + "epoch": 1.6158986175115206, + "grad_norm": 1.5397188528974992, + "learning_rate": 1.9521373088062166e-07, + "loss": 0.8727273941040039, + "step": 7013 + }, + { + "epoch": 1.6161290322580646, + "grad_norm": 0.9576172656761047, + "learning_rate": 1.9498765073823077e-07, + "loss": 0.6441171169281006, + "step": 7014 + }, + { + "epoch": 1.6163594470046083, + "grad_norm": 1.202013067822893, + "learning_rate": 1.947616874423169e-07, + "loss": 0.6960387229919434, + "step": 7015 + }, + { + "epoch": 1.616589861751152, + "grad_norm": 1.3238157552069112, + "learning_rate": 1.9453584102567788e-07, + "loss": 0.9231700301170349, + "step": 7016 + }, + { + "epoch": 1.616820276497696, + "grad_norm": 1.5395552640428811, + "learning_rate": 1.9431011152109555e-07, + "loss": 0.6957401037216187, + "step": 7017 + }, + { + "epoch": 1.6170506912442395, + "grad_norm": 1.221595091148929, + "learning_rate": 1.9408449896133384e-07, + "loss": 0.6608580350875854, + "step": 7018 + }, + { + "epoch": 1.6172811059907835, + "grad_norm": 1.386134285673899, + "learning_rate": 1.9385900337913997e-07, + "loss": 0.7322397232055664, + "step": 7019 + }, + { + "epoch": 1.6175115207373272, + "grad_norm": 1.1188269604657235, + "learning_rate": 1.9363362480724488e-07, + "loss": 0.6996288299560547, + "step": 7020 + }, + { + "epoch": 1.617741935483871, + "grad_norm": 1.022000935531768, + "learning_rate": 1.9340836327836163e-07, + "loss": 0.7928623557090759, + "step": 7021 + }, + { + "epoch": 1.6179723502304149, + "grad_norm": 0.9992379944358776, + "learning_rate": 1.9318321882518674e-07, + "loss": 0.6275026202201843, + "step": 7022 + }, + { + "epoch": 1.6182027649769584, + "grad_norm": 1.26569218150676, + "learning_rate": 1.9295819148039948e-07, + "loss": 0.6660110950469971, + "step": 7023 + }, + { + "epoch": 1.6184331797235023, + "grad_norm": 1.0401535425644861, + "learning_rate": 1.9273328127666232e-07, + "loss": 0.8129480481147766, + "step": 7024 + }, + { + "epoch": 1.618663594470046, + "grad_norm": 1.146646002030878, + "learning_rate": 1.9250848824662046e-07, + "loss": 0.8070700168609619, + "step": 7025 + }, + { + "epoch": 1.6188940092165898, + "grad_norm": 1.4109951707076815, + "learning_rate": 1.922838124229028e-07, + "loss": 0.8123769760131836, + "step": 7026 + }, + { + "epoch": 1.6191244239631337, + "grad_norm": 0.9906397496222884, + "learning_rate": 1.920592538381205e-07, + "loss": 0.6552244424819946, + "step": 7027 + }, + { + "epoch": 1.6193548387096774, + "grad_norm": 1.0749749429025204, + "learning_rate": 1.9183481252486767e-07, + "loss": 0.8764367699623108, + "step": 7028 + }, + { + "epoch": 1.6195852534562212, + "grad_norm": 1.8347518044142406, + "learning_rate": 1.9161048851572215e-07, + "loss": 0.9075809717178345, + "step": 7029 + }, + { + "epoch": 1.6198156682027651, + "grad_norm": 1.1695152473088226, + "learning_rate": 1.9138628184324412e-07, + "loss": 0.7308327555656433, + "step": 7030 + }, + { + "epoch": 1.6200460829493086, + "grad_norm": 1.4269673355519676, + "learning_rate": 1.9116219253997655e-07, + "loss": 0.838142991065979, + "step": 7031 + }, + { + "epoch": 1.6202764976958526, + "grad_norm": 1.5286648636126694, + "learning_rate": 1.9093822063844623e-07, + "loss": 0.7681041359901428, + "step": 7032 + }, + { + "epoch": 1.6205069124423963, + "grad_norm": 1.1858134701081806, + "learning_rate": 1.907143661711621e-07, + "loss": 0.7179980278015137, + "step": 7033 + }, + { + "epoch": 1.62073732718894, + "grad_norm": 1.2400863874788628, + "learning_rate": 1.9049062917061609e-07, + "loss": 0.8688361644744873, + "step": 7034 + }, + { + "epoch": 1.620967741935484, + "grad_norm": 1.0795907835047491, + "learning_rate": 1.9026700966928388e-07, + "loss": 0.6540178656578064, + "step": 7035 + }, + { + "epoch": 1.6211981566820275, + "grad_norm": 0.9042431894176799, + "learning_rate": 1.900435076996233e-07, + "loss": 0.7834869623184204, + "step": 7036 + }, + { + "epoch": 1.6214285714285714, + "grad_norm": 1.4376571546925008, + "learning_rate": 1.8982012329407505e-07, + "loss": 0.8895971775054932, + "step": 7037 + }, + { + "epoch": 1.6216589861751152, + "grad_norm": 1.1211547009425467, + "learning_rate": 1.8959685648506362e-07, + "loss": 0.6625858545303345, + "step": 7038 + }, + { + "epoch": 1.621889400921659, + "grad_norm": 1.4181930826937483, + "learning_rate": 1.893737073049957e-07, + "loss": 0.651193380355835, + "step": 7039 + }, + { + "epoch": 1.6221198156682028, + "grad_norm": 1.49480203283565, + "learning_rate": 1.8915067578626065e-07, + "loss": 0.8716636896133423, + "step": 7040 + }, + { + "epoch": 1.6223502304147466, + "grad_norm": 1.2037531898880258, + "learning_rate": 1.8892776196123196e-07, + "loss": 0.812637984752655, + "step": 7041 + }, + { + "epoch": 1.6225806451612903, + "grad_norm": 1.4952425500537936, + "learning_rate": 1.887049658622648e-07, + "loss": 0.7803184986114502, + "step": 7042 + }, + { + "epoch": 1.6228110599078343, + "grad_norm": 1.4542796613479354, + "learning_rate": 1.8848228752169793e-07, + "loss": 0.7884814739227295, + "step": 7043 + }, + { + "epoch": 1.6230414746543778, + "grad_norm": 1.3474838088832628, + "learning_rate": 1.8825972697185265e-07, + "loss": 0.7250671982765198, + "step": 7044 + }, + { + "epoch": 1.6232718894009217, + "grad_norm": 1.2055929150487366, + "learning_rate": 1.880372842450332e-07, + "loss": 0.8078780174255371, + "step": 7045 + }, + { + "epoch": 1.6235023041474654, + "grad_norm": 1.2023825853188168, + "learning_rate": 1.878149593735272e-07, + "loss": 0.8523818254470825, + "step": 7046 + }, + { + "epoch": 1.6237327188940092, + "grad_norm": 1.2683431455334386, + "learning_rate": 1.875927523896047e-07, + "loss": 0.8772249221801758, + "step": 7047 + }, + { + "epoch": 1.6239631336405531, + "grad_norm": 1.0815338842817483, + "learning_rate": 1.8737066332551843e-07, + "loss": 0.7906323671340942, + "step": 7048 + }, + { + "epoch": 1.6241935483870966, + "grad_norm": 1.3048529080567755, + "learning_rate": 1.8714869221350492e-07, + "loss": 0.8010337352752686, + "step": 7049 + }, + { + "epoch": 1.6244239631336406, + "grad_norm": 1.365899691735964, + "learning_rate": 1.8692683908578267e-07, + "loss": 0.8978049755096436, + "step": 7050 + }, + { + "epoch": 1.6246543778801843, + "grad_norm": 1.159165616843268, + "learning_rate": 1.8670510397455297e-07, + "loss": 0.6622864007949829, + "step": 7051 + }, + { + "epoch": 1.624884792626728, + "grad_norm": 1.048079119212609, + "learning_rate": 1.8648348691200112e-07, + "loss": 0.7795406579971313, + "step": 7052 + }, + { + "epoch": 1.625115207373272, + "grad_norm": 1.2605630326093136, + "learning_rate": 1.8626198793029423e-07, + "loss": 0.9152054786682129, + "step": 7053 + }, + { + "epoch": 1.6253456221198157, + "grad_norm": 1.1757865506402991, + "learning_rate": 1.860406070615822e-07, + "loss": 0.719946563243866, + "step": 7054 + }, + { + "epoch": 1.6255760368663594, + "grad_norm": 1.2991129477224903, + "learning_rate": 1.8581934433799884e-07, + "loss": 0.782962441444397, + "step": 7055 + }, + { + "epoch": 1.6258064516129034, + "grad_norm": 1.118392005824248, + "learning_rate": 1.855981997916597e-07, + "loss": 0.8119732737541199, + "step": 7056 + }, + { + "epoch": 1.6260368663594469, + "grad_norm": 1.2362407544063627, + "learning_rate": 1.8537717345466351e-07, + "loss": 0.7585981488227844, + "step": 7057 + }, + { + "epoch": 1.6262672811059908, + "grad_norm": 1.158465388331893, + "learning_rate": 1.8515626535909258e-07, + "loss": 0.6846082210540771, + "step": 7058 + }, + { + "epoch": 1.6264976958525346, + "grad_norm": 1.230933966400155, + "learning_rate": 1.8493547553701083e-07, + "loss": 0.7355546951293945, + "step": 7059 + }, + { + "epoch": 1.6267281105990783, + "grad_norm": 1.15836260056471, + "learning_rate": 1.847148040204657e-07, + "loss": 0.6828340291976929, + "step": 7060 + }, + { + "epoch": 1.6269585253456222, + "grad_norm": 1.0499975056987365, + "learning_rate": 1.8449425084148763e-07, + "loss": 0.8513988256454468, + "step": 7061 + }, + { + "epoch": 1.6271889400921657, + "grad_norm": 1.0253802645646743, + "learning_rate": 1.8427381603208947e-07, + "loss": 0.6817762851715088, + "step": 7062 + }, + { + "epoch": 1.6274193548387097, + "grad_norm": 0.9793159138955572, + "learning_rate": 1.8405349962426699e-07, + "loss": 0.7314180731773376, + "step": 7063 + }, + { + "epoch": 1.6276497695852534, + "grad_norm": 1.326821994662743, + "learning_rate": 1.8383330164999898e-07, + "loss": 0.8193466663360596, + "step": 7064 + }, + { + "epoch": 1.6278801843317972, + "grad_norm": 1.2511428182189692, + "learning_rate": 1.8361322214124643e-07, + "loss": 0.7469823360443115, + "step": 7065 + }, + { + "epoch": 1.628110599078341, + "grad_norm": 1.4366505105110272, + "learning_rate": 1.8339326112995423e-07, + "loss": 0.8578816652297974, + "step": 7066 + }, + { + "epoch": 1.6283410138248848, + "grad_norm": 1.4615192025781363, + "learning_rate": 1.8317341864804903e-07, + "loss": 0.8384239077568054, + "step": 7067 + }, + { + "epoch": 1.6285714285714286, + "grad_norm": 1.122194991625306, + "learning_rate": 1.829536947274406e-07, + "loss": 0.8707646131515503, + "step": 7068 + }, + { + "epoch": 1.6288018433179725, + "grad_norm": 1.2319397578647793, + "learning_rate": 1.82734089400022e-07, + "loss": 0.6869943141937256, + "step": 7069 + }, + { + "epoch": 1.629032258064516, + "grad_norm": 1.3893487386527597, + "learning_rate": 1.8251460269766848e-07, + "loss": 0.7776129245758057, + "step": 7070 + }, + { + "epoch": 1.62926267281106, + "grad_norm": 1.104887091227765, + "learning_rate": 1.8229523465223785e-07, + "loss": 0.8126854300498962, + "step": 7071 + }, + { + "epoch": 1.6294930875576037, + "grad_norm": 1.0317016664034484, + "learning_rate": 1.8207598529557166e-07, + "loss": 0.6570720672607422, + "step": 7072 + }, + { + "epoch": 1.6297235023041474, + "grad_norm": 0.8859395443506812, + "learning_rate": 1.818568546594934e-07, + "loss": 0.6485599875450134, + "step": 7073 + }, + { + "epoch": 1.6299539170506914, + "grad_norm": 1.206554438869518, + "learning_rate": 1.816378427758093e-07, + "loss": 0.9132766723632812, + "step": 7074 + }, + { + "epoch": 1.6301843317972349, + "grad_norm": 1.4945592359199265, + "learning_rate": 1.8141894967630932e-07, + "loss": 0.8277286291122437, + "step": 7075 + }, + { + "epoch": 1.6304147465437788, + "grad_norm": 1.3670934774676884, + "learning_rate": 1.812001753927651e-07, + "loss": 0.7409358024597168, + "step": 7076 + }, + { + "epoch": 1.6306451612903226, + "grad_norm": 1.2664504423738472, + "learning_rate": 1.809815199569311e-07, + "loss": 0.8233339786529541, + "step": 7077 + }, + { + "epoch": 1.6308755760368663, + "grad_norm": 1.3727275296136565, + "learning_rate": 1.8076298340054563e-07, + "loss": 0.8704487085342407, + "step": 7078 + }, + { + "epoch": 1.6311059907834102, + "grad_norm": 1.503472652590263, + "learning_rate": 1.8054456575532862e-07, + "loss": 0.8845789432525635, + "step": 7079 + }, + { + "epoch": 1.631336405529954, + "grad_norm": 1.0523258046250148, + "learning_rate": 1.8032626705298272e-07, + "loss": 0.7241162061691284, + "step": 7080 + }, + { + "epoch": 1.6315668202764977, + "grad_norm": 1.193290512437584, + "learning_rate": 1.8010808732519433e-07, + "loss": 0.7065681219100952, + "step": 7081 + }, + { + "epoch": 1.6317972350230416, + "grad_norm": 1.281102564788521, + "learning_rate": 1.7989002660363162e-07, + "loss": 0.6492339372634888, + "step": 7082 + }, + { + "epoch": 1.6320276497695851, + "grad_norm": 0.9673694389198546, + "learning_rate": 1.79672084919946e-07, + "loss": 0.7089248895645142, + "step": 7083 + }, + { + "epoch": 1.632258064516129, + "grad_norm": 1.0367687290608978, + "learning_rate": 1.794542623057712e-07, + "loss": 0.7030316591262817, + "step": 7084 + }, + { + "epoch": 1.6324884792626728, + "grad_norm": 1.1008255373775855, + "learning_rate": 1.792365587927239e-07, + "loss": 0.8626528978347778, + "step": 7085 + }, + { + "epoch": 1.6327188940092165, + "grad_norm": 1.1079176271315754, + "learning_rate": 1.7901897441240333e-07, + "loss": 0.8468672037124634, + "step": 7086 + }, + { + "epoch": 1.6329493087557605, + "grad_norm": 1.4611904004596754, + "learning_rate": 1.7880150919639214e-07, + "loss": 0.8546739816665649, + "step": 7087 + }, + { + "epoch": 1.633179723502304, + "grad_norm": 1.1949871550520017, + "learning_rate": 1.7858416317625468e-07, + "loss": 0.9187895655632019, + "step": 7088 + }, + { + "epoch": 1.633410138248848, + "grad_norm": 1.077248232790752, + "learning_rate": 1.7836693638353827e-07, + "loss": 0.7496293783187866, + "step": 7089 + }, + { + "epoch": 1.6336405529953917, + "grad_norm": 1.0517765508552415, + "learning_rate": 1.7814982884977358e-07, + "loss": 0.682653546333313, + "step": 7090 + }, + { + "epoch": 1.6338709677419354, + "grad_norm": 1.5003665522833143, + "learning_rate": 1.7793284060647295e-07, + "loss": 0.8065551519393921, + "step": 7091 + }, + { + "epoch": 1.6341013824884794, + "grad_norm": 1.134711484772771, + "learning_rate": 1.7771597168513263e-07, + "loss": 0.6605588793754578, + "step": 7092 + }, + { + "epoch": 1.634331797235023, + "grad_norm": 1.0012250391371058, + "learning_rate": 1.7749922211723034e-07, + "loss": 0.7257254123687744, + "step": 7093 + }, + { + "epoch": 1.6345622119815668, + "grad_norm": 1.1831263140816395, + "learning_rate": 1.772825919342269e-07, + "loss": 0.7438890933990479, + "step": 7094 + }, + { + "epoch": 1.6347926267281108, + "grad_norm": 1.250595895627981, + "learning_rate": 1.770660811675664e-07, + "loss": 0.8546249866485596, + "step": 7095 + }, + { + "epoch": 1.6350230414746543, + "grad_norm": 1.1835928544530323, + "learning_rate": 1.7684968984867466e-07, + "loss": 0.727516770362854, + "step": 7096 + }, + { + "epoch": 1.6352534562211982, + "grad_norm": 1.36586374940823, + "learning_rate": 1.766334180089606e-07, + "loss": 0.7578408718109131, + "step": 7097 + }, + { + "epoch": 1.635483870967742, + "grad_norm": 1.4255838450352876, + "learning_rate": 1.7641726567981606e-07, + "loss": 0.8253650665283203, + "step": 7098 + }, + { + "epoch": 1.6357142857142857, + "grad_norm": 1.3615057524495244, + "learning_rate": 1.7620123289261523e-07, + "loss": 0.8932347297668457, + "step": 7099 + }, + { + "epoch": 1.6359447004608296, + "grad_norm": 1.0770953977682685, + "learning_rate": 1.7598531967871465e-07, + "loss": 0.6661143898963928, + "step": 7100 + }, + { + "epoch": 1.6361751152073731, + "grad_norm": 1.2408264386151553, + "learning_rate": 1.7576952606945415e-07, + "loss": 0.8413572311401367, + "step": 7101 + }, + { + "epoch": 1.636405529953917, + "grad_norm": 1.2084626250429713, + "learning_rate": 1.7555385209615603e-07, + "loss": 0.713816225528717, + "step": 7102 + }, + { + "epoch": 1.6366359447004608, + "grad_norm": 1.67339389064804, + "learning_rate": 1.7533829779012466e-07, + "loss": 0.8588179349899292, + "step": 7103 + }, + { + "epoch": 1.6368663594470045, + "grad_norm": 1.3521357251955939, + "learning_rate": 1.7512286318264778e-07, + "loss": 0.8666437864303589, + "step": 7104 + }, + { + "epoch": 1.6370967741935485, + "grad_norm": 1.340257158830322, + "learning_rate": 1.7490754830499522e-07, + "loss": 0.9219843745231628, + "step": 7105 + }, + { + "epoch": 1.6373271889400922, + "grad_norm": 1.3285275552241094, + "learning_rate": 1.7469235318841956e-07, + "loss": 0.93767249584198, + "step": 7106 + }, + { + "epoch": 1.637557603686636, + "grad_norm": 1.2782247944953928, + "learning_rate": 1.7447727786415644e-07, + "loss": 0.7317457795143127, + "step": 7107 + }, + { + "epoch": 1.6377880184331797, + "grad_norm": 1.1023935137429937, + "learning_rate": 1.7426232236342365e-07, + "loss": 0.850578784942627, + "step": 7108 + }, + { + "epoch": 1.6380184331797234, + "grad_norm": 1.1932749051362488, + "learning_rate": 1.7404748671742143e-07, + "loss": 0.7580707669258118, + "step": 7109 + }, + { + "epoch": 1.6382488479262673, + "grad_norm": 1.4967576950530754, + "learning_rate": 1.738327709573333e-07, + "loss": 0.8393806219100952, + "step": 7110 + }, + { + "epoch": 1.638479262672811, + "grad_norm": 1.0170127852420416, + "learning_rate": 1.7361817511432474e-07, + "loss": 0.6641673445701599, + "step": 7111 + }, + { + "epoch": 1.6387096774193548, + "grad_norm": 1.2746608671167614, + "learning_rate": 1.734036992195438e-07, + "loss": 0.7570137977600098, + "step": 7112 + }, + { + "epoch": 1.6389400921658988, + "grad_norm": 1.1366436885649456, + "learning_rate": 1.7318934330412194e-07, + "loss": 0.78557288646698, + "step": 7113 + }, + { + "epoch": 1.6391705069124423, + "grad_norm": 1.3443988626089514, + "learning_rate": 1.729751073991721e-07, + "loss": 0.8309692740440369, + "step": 7114 + }, + { + "epoch": 1.6394009216589862, + "grad_norm": 1.0791152795033432, + "learning_rate": 1.727609915357908e-07, + "loss": 0.6409872770309448, + "step": 7115 + }, + { + "epoch": 1.63963133640553, + "grad_norm": 1.0106967037974632, + "learning_rate": 1.7254699574505648e-07, + "loss": 0.7916153073310852, + "step": 7116 + }, + { + "epoch": 1.6398617511520737, + "grad_norm": 1.5121844712494004, + "learning_rate": 1.7233312005803015e-07, + "loss": 0.7925357818603516, + "step": 7117 + }, + { + "epoch": 1.6400921658986176, + "grad_norm": 1.5493448906965575, + "learning_rate": 1.7211936450575594e-07, + "loss": 0.9124211668968201, + "step": 7118 + }, + { + "epoch": 1.6403225806451613, + "grad_norm": 1.2418161556418856, + "learning_rate": 1.7190572911925994e-07, + "loss": 0.8905198574066162, + "step": 7119 + }, + { + "epoch": 1.640552995391705, + "grad_norm": 1.0755844253909046, + "learning_rate": 1.716922139295509e-07, + "loss": 0.8139728307723999, + "step": 7120 + }, + { + "epoch": 1.6407834101382488, + "grad_norm": 1.3621014779170746, + "learning_rate": 1.7147881896762074e-07, + "loss": 0.7607166767120361, + "step": 7121 + }, + { + "epoch": 1.6410138248847925, + "grad_norm": 1.282778120557478, + "learning_rate": 1.7126554426444316e-07, + "loss": 0.806864857673645, + "step": 7122 + }, + { + "epoch": 1.6412442396313365, + "grad_norm": 1.352241351446694, + "learning_rate": 1.710523898509747e-07, + "loss": 0.697334885597229, + "step": 7123 + }, + { + "epoch": 1.6414746543778802, + "grad_norm": 1.4205201103890581, + "learning_rate": 1.7083935575815455e-07, + "loss": 0.7313966751098633, + "step": 7124 + }, + { + "epoch": 1.641705069124424, + "grad_norm": 1.3868798260826238, + "learning_rate": 1.7062644201690413e-07, + "loss": 0.8857930898666382, + "step": 7125 + }, + { + "epoch": 1.6419354838709679, + "grad_norm": 1.0686783154078314, + "learning_rate": 1.7041364865812758e-07, + "loss": 0.7451884746551514, + "step": 7126 + }, + { + "epoch": 1.6421658986175114, + "grad_norm": 1.2220777026134708, + "learning_rate": 1.7020097571271186e-07, + "loss": 0.7023841142654419, + "step": 7127 + }, + { + "epoch": 1.6423963133640553, + "grad_norm": 1.2608302557028366, + "learning_rate": 1.6998842321152607e-07, + "loss": 0.708385705947876, + "step": 7128 + }, + { + "epoch": 1.642626728110599, + "grad_norm": 1.3854146642080662, + "learning_rate": 1.697759911854215e-07, + "loss": 0.7885474562644958, + "step": 7129 + }, + { + "epoch": 1.6428571428571428, + "grad_norm": 1.161295661131579, + "learning_rate": 1.695636796652331e-07, + "loss": 0.7054568529129028, + "step": 7130 + }, + { + "epoch": 1.6430875576036867, + "grad_norm": 1.1652742930387396, + "learning_rate": 1.6935148868177718e-07, + "loss": 0.6899726986885071, + "step": 7131 + }, + { + "epoch": 1.6433179723502302, + "grad_norm": 1.4011600897250127, + "learning_rate": 1.6913941826585288e-07, + "loss": 0.8558614253997803, + "step": 7132 + }, + { + "epoch": 1.6435483870967742, + "grad_norm": 1.2947217762783314, + "learning_rate": 1.6892746844824223e-07, + "loss": 0.7741858959197998, + "step": 7133 + }, + { + "epoch": 1.643778801843318, + "grad_norm": 1.130755528536183, + "learning_rate": 1.6871563925970943e-07, + "loss": 0.7332532405853271, + "step": 7134 + }, + { + "epoch": 1.6440092165898617, + "grad_norm": 1.4331915051670545, + "learning_rate": 1.6850393073100078e-07, + "loss": 0.8288085460662842, + "step": 7135 + }, + { + "epoch": 1.6442396313364056, + "grad_norm": 1.493040320153856, + "learning_rate": 1.682923428928461e-07, + "loss": 0.9470697641372681, + "step": 7136 + }, + { + "epoch": 1.6444700460829493, + "grad_norm": 1.1093535752232264, + "learning_rate": 1.6808087577595686e-07, + "loss": 0.7123041749000549, + "step": 7137 + }, + { + "epoch": 1.644700460829493, + "grad_norm": 1.3701909416221987, + "learning_rate": 1.6786952941102694e-07, + "loss": 0.8077690005302429, + "step": 7138 + }, + { + "epoch": 1.644930875576037, + "grad_norm": 1.3400770079054931, + "learning_rate": 1.6765830382873348e-07, + "loss": 0.767215371131897, + "step": 7139 + }, + { + "epoch": 1.6451612903225805, + "grad_norm": 1.3723903093182923, + "learning_rate": 1.6744719905973502e-07, + "loss": 0.7488540410995483, + "step": 7140 + }, + { + "epoch": 1.6453917050691245, + "grad_norm": 1.4546211260208752, + "learning_rate": 1.6723621513467378e-07, + "loss": 0.7841323018074036, + "step": 7141 + }, + { + "epoch": 1.6456221198156682, + "grad_norm": 1.2167195095267902, + "learning_rate": 1.6702535208417346e-07, + "loss": 0.65464186668396, + "step": 7142 + }, + { + "epoch": 1.645852534562212, + "grad_norm": 1.3347329400915569, + "learning_rate": 1.6681460993884056e-07, + "loss": 0.8845036029815674, + "step": 7143 + }, + { + "epoch": 1.6460829493087559, + "grad_norm": 1.3318983430245122, + "learning_rate": 1.6660398872926396e-07, + "loss": 0.6741687655448914, + "step": 7144 + }, + { + "epoch": 1.6463133640552994, + "grad_norm": 1.4438874912830426, + "learning_rate": 1.663934884860152e-07, + "loss": 0.8656717538833618, + "step": 7145 + }, + { + "epoch": 1.6465437788018433, + "grad_norm": 1.3298318800949103, + "learning_rate": 1.6618310923964785e-07, + "loss": 0.7588434219360352, + "step": 7146 + }, + { + "epoch": 1.646774193548387, + "grad_norm": 1.3262924093620256, + "learning_rate": 1.6597285102069846e-07, + "loss": 0.7180176973342896, + "step": 7147 + }, + { + "epoch": 1.6470046082949308, + "grad_norm": 1.2551409816382322, + "learning_rate": 1.6576271385968576e-07, + "loss": 0.8253776431083679, + "step": 7148 + }, + { + "epoch": 1.6472350230414747, + "grad_norm": 1.2281736040805922, + "learning_rate": 1.6555269778711046e-07, + "loss": 0.7200941443443298, + "step": 7149 + }, + { + "epoch": 1.6474654377880185, + "grad_norm": 1.1059198918963296, + "learning_rate": 1.653428028334567e-07, + "loss": 0.7076164484024048, + "step": 7150 + }, + { + "epoch": 1.6476958525345622, + "grad_norm": 1.195055160265343, + "learning_rate": 1.6513302902919003e-07, + "loss": 0.8068090677261353, + "step": 7151 + }, + { + "epoch": 1.6479262672811061, + "grad_norm": 1.3947857709427287, + "learning_rate": 1.6492337640475884e-07, + "loss": 0.9712029099464417, + "step": 7152 + }, + { + "epoch": 1.6481566820276496, + "grad_norm": 1.406808701456467, + "learning_rate": 1.6471384499059438e-07, + "loss": 0.8359737992286682, + "step": 7153 + }, + { + "epoch": 1.6483870967741936, + "grad_norm": 1.0570634795327605, + "learning_rate": 1.645044348171094e-07, + "loss": 0.8066359758377075, + "step": 7154 + }, + { + "epoch": 1.6486175115207373, + "grad_norm": 1.3810484659709985, + "learning_rate": 1.642951459146995e-07, + "loss": 0.8717833757400513, + "step": 7155 + }, + { + "epoch": 1.648847926267281, + "grad_norm": 1.0992736543757442, + "learning_rate": 1.6408597831374305e-07, + "loss": 0.7335910201072693, + "step": 7156 + }, + { + "epoch": 1.649078341013825, + "grad_norm": 1.2397456033121492, + "learning_rate": 1.6387693204460028e-07, + "loss": 0.816049337387085, + "step": 7157 + }, + { + "epoch": 1.6493087557603685, + "grad_norm": 1.4068842390673124, + "learning_rate": 1.6366800713761364e-07, + "loss": 0.8060640096664429, + "step": 7158 + }, + { + "epoch": 1.6495391705069125, + "grad_norm": 1.2074799471388065, + "learning_rate": 1.6345920362310894e-07, + "loss": 0.8477619886398315, + "step": 7159 + }, + { + "epoch": 1.6497695852534562, + "grad_norm": 1.332601091577715, + "learning_rate": 1.6325052153139329e-07, + "loss": 0.9793992638587952, + "step": 7160 + }, + { + "epoch": 1.65, + "grad_norm": 1.1909988829986036, + "learning_rate": 1.6304196089275658e-07, + "loss": 0.8020002245903015, + "step": 7161 + }, + { + "epoch": 1.6502304147465439, + "grad_norm": 1.3231428787162685, + "learning_rate": 1.6283352173747146e-07, + "loss": 0.8226429224014282, + "step": 7162 + }, + { + "epoch": 1.6504608294930876, + "grad_norm": 1.2483952861501775, + "learning_rate": 1.6262520409579227e-07, + "loss": 0.7029248476028442, + "step": 7163 + }, + { + "epoch": 1.6506912442396313, + "grad_norm": 1.0969129808942812, + "learning_rate": 1.6241700799795631e-07, + "loss": 0.7234015464782715, + "step": 7164 + }, + { + "epoch": 1.6509216589861753, + "grad_norm": 1.3383637969539028, + "learning_rate": 1.6220893347418285e-07, + "loss": 0.854112982749939, + "step": 7165 + }, + { + "epoch": 1.6511520737327188, + "grad_norm": 1.2277405230752314, + "learning_rate": 1.6200098055467325e-07, + "loss": 0.8098663091659546, + "step": 7166 + }, + { + "epoch": 1.6513824884792627, + "grad_norm": 1.286099874995443, + "learning_rate": 1.617931492696123e-07, + "loss": 0.9032876491546631, + "step": 7167 + }, + { + "epoch": 1.6516129032258065, + "grad_norm": 1.0239384348378415, + "learning_rate": 1.6158543964916606e-07, + "loss": 0.7048916816711426, + "step": 7168 + }, + { + "epoch": 1.6518433179723502, + "grad_norm": 1.2354879671689736, + "learning_rate": 1.6137785172348307e-07, + "loss": 0.879542350769043, + "step": 7169 + }, + { + "epoch": 1.6520737327188941, + "grad_norm": 1.1499858637392877, + "learning_rate": 1.611703855226949e-07, + "loss": 0.7851279377937317, + "step": 7170 + }, + { + "epoch": 1.6523041474654376, + "grad_norm": 1.3219595195357319, + "learning_rate": 1.6096304107691493e-07, + "loss": 0.779682457447052, + "step": 7171 + }, + { + "epoch": 1.6525345622119816, + "grad_norm": 1.2160096597693908, + "learning_rate": 1.6075581841623854e-07, + "loss": 0.7761027812957764, + "step": 7172 + }, + { + "epoch": 1.6527649769585253, + "grad_norm": 1.2474814185415584, + "learning_rate": 1.605487175707443e-07, + "loss": 0.726230263710022, + "step": 7173 + }, + { + "epoch": 1.652995391705069, + "grad_norm": 1.4211290590725025, + "learning_rate": 1.6034173857049238e-07, + "loss": 0.915956437587738, + "step": 7174 + }, + { + "epoch": 1.653225806451613, + "grad_norm": 1.2631109729400856, + "learning_rate": 1.6013488144552534e-07, + "loss": 0.8435969352722168, + "step": 7175 + }, + { + "epoch": 1.6534562211981567, + "grad_norm": 1.4370024530537882, + "learning_rate": 1.599281462258687e-07, + "loss": 0.7775791883468628, + "step": 7176 + }, + { + "epoch": 1.6536866359447004, + "grad_norm": 1.2504716465033257, + "learning_rate": 1.5972153294152945e-07, + "loss": 0.7578383684158325, + "step": 7177 + }, + { + "epoch": 1.6539170506912444, + "grad_norm": 1.25108951979748, + "learning_rate": 1.5951504162249706e-07, + "loss": 0.8378545045852661, + "step": 7178 + }, + { + "epoch": 1.654147465437788, + "grad_norm": 0.8833465476140244, + "learning_rate": 1.59308672298744e-07, + "loss": 0.7071488499641418, + "step": 7179 + }, + { + "epoch": 1.6543778801843319, + "grad_norm": 1.315489910714214, + "learning_rate": 1.591024250002243e-07, + "loss": 0.7424521446228027, + "step": 7180 + }, + { + "epoch": 1.6546082949308756, + "grad_norm": 1.2002526550771535, + "learning_rate": 1.5889629975687401e-07, + "loss": 0.6503180265426636, + "step": 7181 + }, + { + "epoch": 1.6548387096774193, + "grad_norm": 1.1861762089682637, + "learning_rate": 1.5869029659861265e-07, + "loss": 0.7589888572692871, + "step": 7182 + }, + { + "epoch": 1.6550691244239633, + "grad_norm": 1.2877948406073703, + "learning_rate": 1.5848441555534109e-07, + "loss": 0.7609498500823975, + "step": 7183 + }, + { + "epoch": 1.6552995391705068, + "grad_norm": 1.1756552735153392, + "learning_rate": 1.582786566569425e-07, + "loss": 0.7813476324081421, + "step": 7184 + }, + { + "epoch": 1.6555299539170507, + "grad_norm": 1.1595327374780875, + "learning_rate": 1.5807301993328258e-07, + "loss": 0.7386292219161987, + "step": 7185 + }, + { + "epoch": 1.6557603686635944, + "grad_norm": 1.4106740697965885, + "learning_rate": 1.5786750541420922e-07, + "loss": 1.0402865409851074, + "step": 7186 + }, + { + "epoch": 1.6559907834101382, + "grad_norm": 1.071897744375966, + "learning_rate": 1.5766211312955246e-07, + "loss": 0.7375132441520691, + "step": 7187 + }, + { + "epoch": 1.6562211981566821, + "grad_norm": 1.3721197645813625, + "learning_rate": 1.574568431091251e-07, + "loss": 0.7903615236282349, + "step": 7188 + }, + { + "epoch": 1.6564516129032258, + "grad_norm": 1.1205445704505106, + "learning_rate": 1.5725169538272132e-07, + "loss": 0.6912896633148193, + "step": 7189 + }, + { + "epoch": 1.6566820276497696, + "grad_norm": 1.2659829320834666, + "learning_rate": 1.570466699801185e-07, + "loss": 0.7181826233863831, + "step": 7190 + }, + { + "epoch": 1.6569124423963135, + "grad_norm": 1.3941328099536103, + "learning_rate": 1.5684176693107566e-07, + "loss": 0.8328898549079895, + "step": 7191 + }, + { + "epoch": 1.657142857142857, + "grad_norm": 1.275566962551196, + "learning_rate": 1.5663698626533384e-07, + "loss": 0.7775120735168457, + "step": 7192 + }, + { + "epoch": 1.657373271889401, + "grad_norm": 1.3683527646177032, + "learning_rate": 1.564323280126173e-07, + "loss": 0.8412137031555176, + "step": 7193 + }, + { + "epoch": 1.6576036866359447, + "grad_norm": 1.4192183215515342, + "learning_rate": 1.562277922026316e-07, + "loss": 0.7046825885772705, + "step": 7194 + }, + { + "epoch": 1.6578341013824884, + "grad_norm": 1.3386632639806328, + "learning_rate": 1.5602337886506468e-07, + "loss": 0.7107498645782471, + "step": 7195 + }, + { + "epoch": 1.6580645161290324, + "grad_norm": 1.1946522893092928, + "learning_rate": 1.558190880295872e-07, + "loss": 0.640724778175354, + "step": 7196 + }, + { + "epoch": 1.658294930875576, + "grad_norm": 1.3093502483074915, + "learning_rate": 1.556149197258515e-07, + "loss": 0.7856858968734741, + "step": 7197 + }, + { + "epoch": 1.6585253456221198, + "grad_norm": 1.4971129714340625, + "learning_rate": 1.554108739834923e-07, + "loss": 0.7956376075744629, + "step": 7198 + }, + { + "epoch": 1.6587557603686636, + "grad_norm": 1.2753834260169075, + "learning_rate": 1.5520695083212675e-07, + "loss": 0.721325159072876, + "step": 7199 + }, + { + "epoch": 1.6589861751152073, + "grad_norm": 1.060032555829029, + "learning_rate": 1.550031503013539e-07, + "loss": 0.7043335437774658, + "step": 7200 + }, + { + "epoch": 1.6592165898617512, + "grad_norm": 1.2269468216437214, + "learning_rate": 1.5479947242075496e-07, + "loss": 0.7154408693313599, + "step": 7201 + }, + { + "epoch": 1.659447004608295, + "grad_norm": 1.0598234159957265, + "learning_rate": 1.5459591721989397e-07, + "loss": 0.7353748083114624, + "step": 7202 + }, + { + "epoch": 1.6596774193548387, + "grad_norm": 1.1815091781809732, + "learning_rate": 1.5439248472831644e-07, + "loss": 0.7404372692108154, + "step": 7203 + }, + { + "epoch": 1.6599078341013827, + "grad_norm": 1.7521749620198364, + "learning_rate": 1.541891749755503e-07, + "loss": 0.8678613305091858, + "step": 7204 + }, + { + "epoch": 1.6601382488479262, + "grad_norm": 1.2663476960491773, + "learning_rate": 1.5398598799110562e-07, + "loss": 0.7177796363830566, + "step": 7205 + }, + { + "epoch": 1.66036866359447, + "grad_norm": 1.3475911636796425, + "learning_rate": 1.537829238044749e-07, + "loss": 0.7610895037651062, + "step": 7206 + }, + { + "epoch": 1.6605990783410138, + "grad_norm": 1.355013126121341, + "learning_rate": 1.5357998244513227e-07, + "loss": 0.7340127825737, + "step": 7207 + }, + { + "epoch": 1.6608294930875576, + "grad_norm": 1.2008253519594887, + "learning_rate": 1.5337716394253498e-07, + "loss": 0.7060200572013855, + "step": 7208 + }, + { + "epoch": 1.6610599078341015, + "grad_norm": 1.306554098336219, + "learning_rate": 1.5317446832612147e-07, + "loss": 0.8592087030410767, + "step": 7209 + }, + { + "epoch": 1.661290322580645, + "grad_norm": 1.1630740877062444, + "learning_rate": 1.5297189562531264e-07, + "loss": 0.8687897324562073, + "step": 7210 + }, + { + "epoch": 1.661520737327189, + "grad_norm": 1.346256802747815, + "learning_rate": 1.5276944586951202e-07, + "loss": 0.8158563375473022, + "step": 7211 + }, + { + "epoch": 1.6617511520737327, + "grad_norm": 1.2436624388230366, + "learning_rate": 1.5256711908810482e-07, + "loss": 0.7734059691429138, + "step": 7212 + }, + { + "epoch": 1.6619815668202764, + "grad_norm": 1.4006583359216147, + "learning_rate": 1.5236491531045815e-07, + "loss": 0.8302994966506958, + "step": 7213 + }, + { + "epoch": 1.6622119815668204, + "grad_norm": 1.3250021353738068, + "learning_rate": 1.5216283456592216e-07, + "loss": 0.8474830389022827, + "step": 7214 + }, + { + "epoch": 1.662442396313364, + "grad_norm": 1.130266104375724, + "learning_rate": 1.5196087688382808e-07, + "loss": 0.7903469800949097, + "step": 7215 + }, + { + "epoch": 1.6626728110599078, + "grad_norm": 1.1131412296095682, + "learning_rate": 1.5175904229349035e-07, + "loss": 0.7756912708282471, + "step": 7216 + }, + { + "epoch": 1.6629032258064518, + "grad_norm": 1.4164367883683733, + "learning_rate": 1.5155733082420463e-07, + "loss": 0.7495905756950378, + "step": 7217 + }, + { + "epoch": 1.6631336405529953, + "grad_norm": 1.3394708776746769, + "learning_rate": 1.5135574250524897e-07, + "loss": 0.8536649942398071, + "step": 7218 + }, + { + "epoch": 1.6633640552995392, + "grad_norm": 1.3243776315844114, + "learning_rate": 1.5115427736588404e-07, + "loss": 0.7301580905914307, + "step": 7219 + }, + { + "epoch": 1.663594470046083, + "grad_norm": 1.324768351380299, + "learning_rate": 1.5095293543535203e-07, + "loss": 0.7131164073944092, + "step": 7220 + }, + { + "epoch": 1.6638248847926267, + "grad_norm": 1.0897989875613177, + "learning_rate": 1.5075171674287712e-07, + "loss": 0.708457350730896, + "step": 7221 + }, + { + "epoch": 1.6640552995391706, + "grad_norm": 1.402833248483696, + "learning_rate": 1.5055062131766662e-07, + "loss": 0.7509758472442627, + "step": 7222 + }, + { + "epoch": 1.6642857142857141, + "grad_norm": 1.1455053593625757, + "learning_rate": 1.503496491889089e-07, + "loss": 0.8401786088943481, + "step": 7223 + }, + { + "epoch": 1.664516129032258, + "grad_norm": 1.3755379329147759, + "learning_rate": 1.5014880038577482e-07, + "loss": 0.8578320741653442, + "step": 7224 + }, + { + "epoch": 1.6647465437788018, + "grad_norm": 1.0530962657504686, + "learning_rate": 1.4994807493741723e-07, + "loss": 0.6890276670455933, + "step": 7225 + }, + { + "epoch": 1.6649769585253456, + "grad_norm": 1.1705604667481366, + "learning_rate": 1.4974747287297128e-07, + "loss": 0.785246729850769, + "step": 7226 + }, + { + "epoch": 1.6652073732718895, + "grad_norm": 1.1145207566800768, + "learning_rate": 1.4954699422155382e-07, + "loss": 0.7826062440872192, + "step": 7227 + }, + { + "epoch": 1.6654377880184332, + "grad_norm": 1.392497287743248, + "learning_rate": 1.4934663901226452e-07, + "loss": 0.807513952255249, + "step": 7228 + }, + { + "epoch": 1.665668202764977, + "grad_norm": 1.0951466978132682, + "learning_rate": 1.4914640727418448e-07, + "loss": 0.8138872385025024, + "step": 7229 + }, + { + "epoch": 1.6658986175115207, + "grad_norm": 1.0721150835685114, + "learning_rate": 1.489462990363768e-07, + "loss": 0.8465121984481812, + "step": 7230 + }, + { + "epoch": 1.6661290322580644, + "grad_norm": 1.2125852838751665, + "learning_rate": 1.4874631432788743e-07, + "loss": 0.7649251222610474, + "step": 7231 + }, + { + "epoch": 1.6663594470046084, + "grad_norm": 1.242983952838099, + "learning_rate": 1.485464531777436e-07, + "loss": 0.8297271132469177, + "step": 7232 + }, + { + "epoch": 1.666589861751152, + "grad_norm": 1.4592304164798606, + "learning_rate": 1.483467156149546e-07, + "loss": 0.7873194217681885, + "step": 7233 + }, + { + "epoch": 1.6668202764976958, + "grad_norm": 1.1529440121296932, + "learning_rate": 1.4814710166851274e-07, + "loss": 0.6924761533737183, + "step": 7234 + }, + { + "epoch": 1.6670506912442398, + "grad_norm": 0.9776015930659686, + "learning_rate": 1.4794761136739132e-07, + "loss": 0.6600887179374695, + "step": 7235 + }, + { + "epoch": 1.6672811059907833, + "grad_norm": 1.0700715817274216, + "learning_rate": 1.477482447405458e-07, + "loss": 0.6552041172981262, + "step": 7236 + }, + { + "epoch": 1.6675115207373272, + "grad_norm": 1.1844260959064823, + "learning_rate": 1.4754900181691465e-07, + "loss": 0.8609327077865601, + "step": 7237 + }, + { + "epoch": 1.667741935483871, + "grad_norm": 0.9877698580103615, + "learning_rate": 1.4734988262541726e-07, + "loss": 0.6970123052597046, + "step": 7238 + }, + { + "epoch": 1.6679723502304147, + "grad_norm": 1.1422057607025191, + "learning_rate": 1.4715088719495573e-07, + "loss": 0.7859683036804199, + "step": 7239 + }, + { + "epoch": 1.6682027649769586, + "grad_norm": 1.102405207717508, + "learning_rate": 1.4695201555441393e-07, + "loss": 0.7448029518127441, + "step": 7240 + }, + { + "epoch": 1.6684331797235024, + "grad_norm": 1.136418636365662, + "learning_rate": 1.4675326773265762e-07, + "loss": 0.7566728591918945, + "step": 7241 + }, + { + "epoch": 1.668663594470046, + "grad_norm": 1.183347797545015, + "learning_rate": 1.465546437585351e-07, + "loss": 0.7563366889953613, + "step": 7242 + }, + { + "epoch": 1.6688940092165898, + "grad_norm": 1.2270668729431573, + "learning_rate": 1.4635614366087623e-07, + "loss": 0.8580834865570068, + "step": 7243 + }, + { + "epoch": 1.6691244239631335, + "grad_norm": 1.261588467565845, + "learning_rate": 1.4615776746849306e-07, + "loss": 0.6200178861618042, + "step": 7244 + }, + { + "epoch": 1.6693548387096775, + "grad_norm": 1.12353329539602, + "learning_rate": 1.4595951521017958e-07, + "loss": 0.8052491545677185, + "step": 7245 + }, + { + "epoch": 1.6695852534562212, + "grad_norm": 1.7485044689788691, + "learning_rate": 1.4576138691471186e-07, + "loss": 0.7383530735969543, + "step": 7246 + }, + { + "epoch": 1.669815668202765, + "grad_norm": 1.2061617795996018, + "learning_rate": 1.4556338261084776e-07, + "loss": 0.6735742092132568, + "step": 7247 + }, + { + "epoch": 1.670046082949309, + "grad_norm": 1.1671720957777614, + "learning_rate": 1.453655023273277e-07, + "loss": 0.7570016980171204, + "step": 7248 + }, + { + "epoch": 1.6702764976958524, + "grad_norm": 1.1212050061324152, + "learning_rate": 1.4516774609287364e-07, + "loss": 0.7271980047225952, + "step": 7249 + }, + { + "epoch": 1.6705069124423964, + "grad_norm": 1.3773952001351246, + "learning_rate": 1.449701139361894e-07, + "loss": 0.8567354083061218, + "step": 7250 + }, + { + "epoch": 1.67073732718894, + "grad_norm": 1.4372041287717652, + "learning_rate": 1.447726058859614e-07, + "loss": 0.8675428628921509, + "step": 7251 + }, + { + "epoch": 1.6709677419354838, + "grad_norm": 1.6475511282046704, + "learning_rate": 1.4457522197085748e-07, + "loss": 0.9131098389625549, + "step": 7252 + }, + { + "epoch": 1.6711981566820278, + "grad_norm": 0.9228526790942371, + "learning_rate": 1.4437796221952748e-07, + "loss": 0.7921037673950195, + "step": 7253 + }, + { + "epoch": 1.6714285714285713, + "grad_norm": 1.3314958050470875, + "learning_rate": 1.441808266606037e-07, + "loss": 0.7559863328933716, + "step": 7254 + }, + { + "epoch": 1.6716589861751152, + "grad_norm": 1.4253402064070324, + "learning_rate": 1.4398381532269998e-07, + "loss": 0.7433857917785645, + "step": 7255 + }, + { + "epoch": 1.671889400921659, + "grad_norm": 1.340982715064525, + "learning_rate": 1.4378692823441207e-07, + "loss": 0.8171184062957764, + "step": 7256 + }, + { + "epoch": 1.6721198156682027, + "grad_norm": 1.4295893582001031, + "learning_rate": 1.4359016542431824e-07, + "loss": 0.7296291589736938, + "step": 7257 + }, + { + "epoch": 1.6723502304147466, + "grad_norm": 1.1566282275472088, + "learning_rate": 1.4339352692097828e-07, + "loss": 0.7397829294204712, + "step": 7258 + }, + { + "epoch": 1.6725806451612903, + "grad_norm": 1.1030928795639288, + "learning_rate": 1.431970127529335e-07, + "loss": 0.6724194884300232, + "step": 7259 + }, + { + "epoch": 1.672811059907834, + "grad_norm": 1.266832602935082, + "learning_rate": 1.430006229487084e-07, + "loss": 0.7711449861526489, + "step": 7260 + }, + { + "epoch": 1.673041474654378, + "grad_norm": 1.0334522746934713, + "learning_rate": 1.428043575368083e-07, + "loss": 0.7581815719604492, + "step": 7261 + }, + { + "epoch": 1.6732718894009215, + "grad_norm": 1.2775574658714877, + "learning_rate": 1.4260821654572063e-07, + "loss": 0.7092517614364624, + "step": 7262 + }, + { + "epoch": 1.6735023041474655, + "grad_norm": 1.116987885688497, + "learning_rate": 1.4241220000391562e-07, + "loss": 0.646745502948761, + "step": 7263 + }, + { + "epoch": 1.6737327188940092, + "grad_norm": 1.0897996116307995, + "learning_rate": 1.4221630793984453e-07, + "loss": 0.7364122867584229, + "step": 7264 + }, + { + "epoch": 1.673963133640553, + "grad_norm": 1.0366138580080708, + "learning_rate": 1.4202054038194068e-07, + "loss": 0.8186795711517334, + "step": 7265 + }, + { + "epoch": 1.6741935483870969, + "grad_norm": 1.178861697439358, + "learning_rate": 1.4182489735861957e-07, + "loss": 0.7172378301620483, + "step": 7266 + }, + { + "epoch": 1.6744239631336404, + "grad_norm": 1.6433299949580555, + "learning_rate": 1.416293788982783e-07, + "loss": 0.8780974745750427, + "step": 7267 + }, + { + "epoch": 1.6746543778801843, + "grad_norm": 1.303060213158533, + "learning_rate": 1.4143398502929672e-07, + "loss": 0.9034930467605591, + "step": 7268 + }, + { + "epoch": 1.674884792626728, + "grad_norm": 1.283952582595571, + "learning_rate": 1.4123871578003543e-07, + "loss": 0.7994415760040283, + "step": 7269 + }, + { + "epoch": 1.6751152073732718, + "grad_norm": 1.2332939563797212, + "learning_rate": 1.410435711788376e-07, + "loss": 0.8327854871749878, + "step": 7270 + }, + { + "epoch": 1.6753456221198157, + "grad_norm": 1.3516689374751454, + "learning_rate": 1.408485512540285e-07, + "loss": 0.7667550444602966, + "step": 7271 + }, + { + "epoch": 1.6755760368663595, + "grad_norm": 1.3721126007283877, + "learning_rate": 1.4065365603391478e-07, + "loss": 0.8073924779891968, + "step": 7272 + }, + { + "epoch": 1.6758064516129032, + "grad_norm": 1.2537292403097655, + "learning_rate": 1.4045888554678497e-07, + "loss": 0.7265589237213135, + "step": 7273 + }, + { + "epoch": 1.6760368663594472, + "grad_norm": 1.4008103355507637, + "learning_rate": 1.402642398209104e-07, + "loss": 0.6912035942077637, + "step": 7274 + }, + { + "epoch": 1.6762672811059907, + "grad_norm": 1.4159985968960598, + "learning_rate": 1.400697188845432e-07, + "loss": 0.917953372001648, + "step": 7275 + }, + { + "epoch": 1.6764976958525346, + "grad_norm": 1.1092123664048492, + "learning_rate": 1.3987532276591774e-07, + "loss": 0.6989340782165527, + "step": 7276 + }, + { + "epoch": 1.6767281105990783, + "grad_norm": 1.0530722269060104, + "learning_rate": 1.396810514932507e-07, + "loss": 0.6648346185684204, + "step": 7277 + }, + { + "epoch": 1.676958525345622, + "grad_norm": 1.152242717428616, + "learning_rate": 1.3948690509474014e-07, + "loss": 0.6462730169296265, + "step": 7278 + }, + { + "epoch": 1.677188940092166, + "grad_norm": 1.0559078213581141, + "learning_rate": 1.3929288359856584e-07, + "loss": 0.6084051132202148, + "step": 7279 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 1.2568155531692753, + "learning_rate": 1.3909898703289037e-07, + "loss": 0.8593035936355591, + "step": 7280 + }, + { + "epoch": 1.6776497695852535, + "grad_norm": 1.432799112874992, + "learning_rate": 1.389052154258572e-07, + "loss": 0.8064925670623779, + "step": 7281 + }, + { + "epoch": 1.6778801843317972, + "grad_norm": 1.3257643730794528, + "learning_rate": 1.3871156880559186e-07, + "loss": 0.7366064786911011, + "step": 7282 + }, + { + "epoch": 1.678110599078341, + "grad_norm": 1.4541745835743052, + "learning_rate": 1.3851804720020233e-07, + "loss": 0.8090124726295471, + "step": 7283 + }, + { + "epoch": 1.6783410138248849, + "grad_norm": 1.3768572400260246, + "learning_rate": 1.3832465063777787e-07, + "loss": 0.7326936721801758, + "step": 7284 + }, + { + "epoch": 1.6785714285714286, + "grad_norm": 1.1036181265329146, + "learning_rate": 1.3813137914638961e-07, + "loss": 0.7142004370689392, + "step": 7285 + }, + { + "epoch": 1.6788018433179723, + "grad_norm": 1.1850699819171153, + "learning_rate": 1.3793823275409066e-07, + "loss": 0.8358181715011597, + "step": 7286 + }, + { + "epoch": 1.6790322580645163, + "grad_norm": 1.341055264970921, + "learning_rate": 1.3774521148891583e-07, + "loss": 0.7337081432342529, + "step": 7287 + }, + { + "epoch": 1.6792626728110598, + "grad_norm": 1.079298746666331, + "learning_rate": 1.3755231537888222e-07, + "loss": 0.8029334545135498, + "step": 7288 + }, + { + "epoch": 1.6794930875576037, + "grad_norm": 1.1362422930327392, + "learning_rate": 1.373595444519884e-07, + "loss": 0.8132611513137817, + "step": 7289 + }, + { + "epoch": 1.6797235023041475, + "grad_norm": 1.2850987320352512, + "learning_rate": 1.3716689873621446e-07, + "loss": 0.7377278804779053, + "step": 7290 + }, + { + "epoch": 1.6799539170506912, + "grad_norm": 1.5545938019119256, + "learning_rate": 1.3697437825952307e-07, + "loss": 0.788368284702301, + "step": 7291 + }, + { + "epoch": 1.6801843317972351, + "grad_norm": 1.3811107908360538, + "learning_rate": 1.3678198304985822e-07, + "loss": 0.8288586139678955, + "step": 7292 + }, + { + "epoch": 1.6804147465437786, + "grad_norm": 1.2973962244733976, + "learning_rate": 1.3658971313514567e-07, + "loss": 0.8534054160118103, + "step": 7293 + }, + { + "epoch": 1.6806451612903226, + "grad_norm": 1.261356018830994, + "learning_rate": 1.363975685432933e-07, + "loss": 0.8730596303939819, + "step": 7294 + }, + { + "epoch": 1.6808755760368663, + "grad_norm": 1.2262296688166254, + "learning_rate": 1.3620554930219076e-07, + "loss": 0.6891343593597412, + "step": 7295 + }, + { + "epoch": 1.68110599078341, + "grad_norm": 1.4944659665191207, + "learning_rate": 1.360136554397089e-07, + "loss": 0.8575270175933838, + "step": 7296 + }, + { + "epoch": 1.681336405529954, + "grad_norm": 1.1221716147697696, + "learning_rate": 1.3582188698370134e-07, + "loss": 0.82694011926651, + "step": 7297 + }, + { + "epoch": 1.6815668202764977, + "grad_norm": 1.1921152491764102, + "learning_rate": 1.3563024396200296e-07, + "loss": 0.6468113660812378, + "step": 7298 + }, + { + "epoch": 1.6817972350230415, + "grad_norm": 1.1634380991195066, + "learning_rate": 1.3543872640243016e-07, + "loss": 0.6818577647209167, + "step": 7299 + }, + { + "epoch": 1.6820276497695854, + "grad_norm": 1.262155726089824, + "learning_rate": 1.352473343327819e-07, + "loss": 0.7630767822265625, + "step": 7300 + }, + { + "epoch": 1.682258064516129, + "grad_norm": 1.3348546512512276, + "learning_rate": 1.3505606778083832e-07, + "loss": 0.9019678831100464, + "step": 7301 + }, + { + "epoch": 1.6824884792626729, + "grad_norm": 1.1302876731614566, + "learning_rate": 1.3486492677436123e-07, + "loss": 0.821324348449707, + "step": 7302 + }, + { + "epoch": 1.6827188940092166, + "grad_norm": 1.1997119452659193, + "learning_rate": 1.3467391134109495e-07, + "loss": 0.796151876449585, + "step": 7303 + }, + { + "epoch": 1.6829493087557603, + "grad_norm": 1.298615109914031, + "learning_rate": 1.3448302150876488e-07, + "loss": 0.8020445108413696, + "step": 7304 + }, + { + "epoch": 1.6831797235023043, + "grad_norm": 0.9490183941784253, + "learning_rate": 1.3429225730507843e-07, + "loss": 0.7215749025344849, + "step": 7305 + }, + { + "epoch": 1.6834101382488478, + "grad_norm": 1.2708231250445967, + "learning_rate": 1.3410161875772474e-07, + "loss": 0.920941174030304, + "step": 7306 + }, + { + "epoch": 1.6836405529953917, + "grad_norm": 1.4523260098562263, + "learning_rate": 1.3391110589437494e-07, + "loss": 0.8979494571685791, + "step": 7307 + }, + { + "epoch": 1.6838709677419355, + "grad_norm": 1.3126261706157987, + "learning_rate": 1.337207187426812e-07, + "loss": 0.9125145673751831, + "step": 7308 + }, + { + "epoch": 1.6841013824884792, + "grad_norm": 1.1179697975279568, + "learning_rate": 1.3353045733027858e-07, + "loss": 0.8205714225769043, + "step": 7309 + }, + { + "epoch": 1.6843317972350231, + "grad_norm": 1.0993805126125902, + "learning_rate": 1.3334032168478305e-07, + "loss": 0.6914113759994507, + "step": 7310 + }, + { + "epoch": 1.6845622119815669, + "grad_norm": 1.3165472089957067, + "learning_rate": 1.3315031183379233e-07, + "loss": 0.7355014085769653, + "step": 7311 + }, + { + "epoch": 1.6847926267281106, + "grad_norm": 1.3581792517836289, + "learning_rate": 1.3296042780488637e-07, + "loss": 0.7564182281494141, + "step": 7312 + }, + { + "epoch": 1.6850230414746545, + "grad_norm": 1.197316556809727, + "learning_rate": 1.3277066962562643e-07, + "loss": 0.8091372847557068, + "step": 7313 + }, + { + "epoch": 1.685253456221198, + "grad_norm": 1.131878643977171, + "learning_rate": 1.3258103732355586e-07, + "loss": 0.7457877993583679, + "step": 7314 + }, + { + "epoch": 1.685483870967742, + "grad_norm": 1.2462081986852567, + "learning_rate": 1.3239153092619948e-07, + "loss": 0.861819863319397, + "step": 7315 + }, + { + "epoch": 1.6857142857142857, + "grad_norm": 1.2291218741883772, + "learning_rate": 1.3220215046106353e-07, + "loss": 0.7698357105255127, + "step": 7316 + }, + { + "epoch": 1.6859447004608294, + "grad_norm": 1.2862793081172317, + "learning_rate": 1.320128959556369e-07, + "loss": 0.7889456152915955, + "step": 7317 + }, + { + "epoch": 1.6861751152073734, + "grad_norm": 1.0926817497008894, + "learning_rate": 1.3182376743738932e-07, + "loss": 0.6467938423156738, + "step": 7318 + }, + { + "epoch": 1.686405529953917, + "grad_norm": 0.962046315570081, + "learning_rate": 1.3163476493377245e-07, + "loss": 0.7202441692352295, + "step": 7319 + }, + { + "epoch": 1.6866359447004609, + "grad_norm": 1.2860571238613498, + "learning_rate": 1.3144588847222004e-07, + "loss": 0.7464008331298828, + "step": 7320 + }, + { + "epoch": 1.6868663594470046, + "grad_norm": 1.3323127704795366, + "learning_rate": 1.3125713808014704e-07, + "loss": 0.8924611806869507, + "step": 7321 + }, + { + "epoch": 1.6870967741935483, + "grad_norm": 1.5027995023789942, + "learning_rate": 1.3106851378495044e-07, + "loss": 0.6943146586418152, + "step": 7322 + }, + { + "epoch": 1.6873271889400923, + "grad_norm": 1.336362656918588, + "learning_rate": 1.308800156140085e-07, + "loss": 0.7335963249206543, + "step": 7323 + }, + { + "epoch": 1.687557603686636, + "grad_norm": 1.1540515039280186, + "learning_rate": 1.30691643594682e-07, + "loss": 0.6900516748428345, + "step": 7324 + }, + { + "epoch": 1.6877880184331797, + "grad_norm": 1.0161083273097216, + "learning_rate": 1.3050339775431262e-07, + "loss": 0.7230286598205566, + "step": 7325 + }, + { + "epoch": 1.6880184331797237, + "grad_norm": 1.3577939883495977, + "learning_rate": 1.3031527812022403e-07, + "loss": 0.8069840669631958, + "step": 7326 + }, + { + "epoch": 1.6882488479262672, + "grad_norm": 1.1850570268151976, + "learning_rate": 1.3012728471972134e-07, + "loss": 0.7598710060119629, + "step": 7327 + }, + { + "epoch": 1.6884792626728111, + "grad_norm": 1.1081098309526143, + "learning_rate": 1.2993941758009164e-07, + "loss": 0.6817609071731567, + "step": 7328 + }, + { + "epoch": 1.6887096774193548, + "grad_norm": 1.1578322948538884, + "learning_rate": 1.2975167672860387e-07, + "loss": 0.6958975791931152, + "step": 7329 + }, + { + "epoch": 1.6889400921658986, + "grad_norm": 1.3026010781309694, + "learning_rate": 1.2956406219250814e-07, + "loss": 0.8270853757858276, + "step": 7330 + }, + { + "epoch": 1.6891705069124425, + "grad_norm": 1.2716142402347783, + "learning_rate": 1.2937657399903623e-07, + "loss": 0.8045610189437866, + "step": 7331 + }, + { + "epoch": 1.689400921658986, + "grad_norm": 1.3670021400758372, + "learning_rate": 1.2918921217540224e-07, + "loss": 0.6685627698898315, + "step": 7332 + }, + { + "epoch": 1.68963133640553, + "grad_norm": 1.481483528763015, + "learning_rate": 1.2900197674880142e-07, + "loss": 0.8157398700714111, + "step": 7333 + }, + { + "epoch": 1.6898617511520737, + "grad_norm": 1.1922253618562, + "learning_rate": 1.2881486774641025e-07, + "loss": 0.6142218112945557, + "step": 7334 + }, + { + "epoch": 1.6900921658986174, + "grad_norm": 1.2611165552955415, + "learning_rate": 1.2862788519538815e-07, + "loss": 0.7849327921867371, + "step": 7335 + }, + { + "epoch": 1.6903225806451614, + "grad_norm": 1.3074701765125263, + "learning_rate": 1.2844102912287457e-07, + "loss": 0.8035926818847656, + "step": 7336 + }, + { + "epoch": 1.6905529953917051, + "grad_norm": 1.26449405816571, + "learning_rate": 1.2825429955599209e-07, + "loss": 0.8456575870513916, + "step": 7337 + }, + { + "epoch": 1.6907834101382488, + "grad_norm": 1.0994096629111347, + "learning_rate": 1.2806769652184402e-07, + "loss": 0.7436026334762573, + "step": 7338 + }, + { + "epoch": 1.6910138248847926, + "grad_norm": 1.3946687886072922, + "learning_rate": 1.2788122004751522e-07, + "loss": 0.8315454721450806, + "step": 7339 + }, + { + "epoch": 1.6912442396313363, + "grad_norm": 1.1032652805797263, + "learning_rate": 1.2769487016007307e-07, + "loss": 0.7425665855407715, + "step": 7340 + }, + { + "epoch": 1.6914746543778802, + "grad_norm": 1.210532059455236, + "learning_rate": 1.2750864688656572e-07, + "loss": 0.7899731993675232, + "step": 7341 + }, + { + "epoch": 1.691705069124424, + "grad_norm": 1.2339006903630358, + "learning_rate": 1.2732255025402327e-07, + "loss": 0.7637509703636169, + "step": 7342 + }, + { + "epoch": 1.6919354838709677, + "grad_norm": 1.2301886439270189, + "learning_rate": 1.2713658028945717e-07, + "loss": 0.793779730796814, + "step": 7343 + }, + { + "epoch": 1.6921658986175117, + "grad_norm": 1.2351914671209905, + "learning_rate": 1.2695073701986103e-07, + "loss": 0.7248083353042603, + "step": 7344 + }, + { + "epoch": 1.6923963133640552, + "grad_norm": 1.4318296651769333, + "learning_rate": 1.2676502047220973e-07, + "loss": 0.7506270408630371, + "step": 7345 + }, + { + "epoch": 1.692626728110599, + "grad_norm": 1.248314789497465, + "learning_rate": 1.2657943067345965e-07, + "loss": 0.7921839952468872, + "step": 7346 + }, + { + "epoch": 1.6928571428571428, + "grad_norm": 0.9630256947791611, + "learning_rate": 1.263939676505491e-07, + "loss": 0.7627893686294556, + "step": 7347 + }, + { + "epoch": 1.6930875576036866, + "grad_norm": 1.039168896728356, + "learning_rate": 1.262086314303973e-07, + "loss": 0.788955807685852, + "step": 7348 + }, + { + "epoch": 1.6933179723502305, + "grad_norm": 1.0370858136190912, + "learning_rate": 1.2602342203990612e-07, + "loss": 0.5527241826057434, + "step": 7349 + }, + { + "epoch": 1.6935483870967742, + "grad_norm": 1.344465363325951, + "learning_rate": 1.2583833950595825e-07, + "loss": 0.7324573397636414, + "step": 7350 + }, + { + "epoch": 1.693778801843318, + "grad_norm": 1.0731663336898336, + "learning_rate": 1.256533838554179e-07, + "loss": 0.6588207483291626, + "step": 7351 + }, + { + "epoch": 1.6940092165898617, + "grad_norm": 1.417078203000081, + "learning_rate": 1.2546855511513165e-07, + "loss": 0.7597184181213379, + "step": 7352 + }, + { + "epoch": 1.6942396313364054, + "grad_norm": 1.1748568881342167, + "learning_rate": 1.2528385331192692e-07, + "loss": 0.7487671375274658, + "step": 7353 + }, + { + "epoch": 1.6944700460829494, + "grad_norm": 1.0203340332958148, + "learning_rate": 1.250992784726126e-07, + "loss": 0.757739245891571, + "step": 7354 + }, + { + "epoch": 1.694700460829493, + "grad_norm": 1.314521719717035, + "learning_rate": 1.249148306239801e-07, + "loss": 0.616966724395752, + "step": 7355 + }, + { + "epoch": 1.6949308755760368, + "grad_norm": 1.506626916778979, + "learning_rate": 1.2473050979280142e-07, + "loss": 0.9415719509124756, + "step": 7356 + }, + { + "epoch": 1.6951612903225808, + "grad_norm": 1.0903568482188648, + "learning_rate": 1.2454631600583044e-07, + "loss": 0.7731447815895081, + "step": 7357 + }, + { + "epoch": 1.6953917050691243, + "grad_norm": 1.2821570786422227, + "learning_rate": 1.2436224928980276e-07, + "loss": 0.800236701965332, + "step": 7358 + }, + { + "epoch": 1.6956221198156682, + "grad_norm": 1.2900334463062004, + "learning_rate": 1.241783096714356e-07, + "loss": 0.8113845586776733, + "step": 7359 + }, + { + "epoch": 1.695852534562212, + "grad_norm": 1.2157051726485628, + "learning_rate": 1.2399449717742706e-07, + "loss": 0.748763382434845, + "step": 7360 + }, + { + "epoch": 1.6960829493087557, + "grad_norm": 1.3769466349570898, + "learning_rate": 1.2381081183445774e-07, + "loss": 0.8595450520515442, + "step": 7361 + }, + { + "epoch": 1.6963133640552996, + "grad_norm": 1.240341465296028, + "learning_rate": 1.2362725366918913e-07, + "loss": 0.7800960540771484, + "step": 7362 + }, + { + "epoch": 1.6965437788018434, + "grad_norm": 1.1951306648014712, + "learning_rate": 1.2344382270826438e-07, + "loss": 0.6549400687217712, + "step": 7363 + }, + { + "epoch": 1.696774193548387, + "grad_norm": 1.1182982438102955, + "learning_rate": 1.2326051897830858e-07, + "loss": 0.7839380502700806, + "step": 7364 + }, + { + "epoch": 1.6970046082949308, + "grad_norm": 1.2576690972053175, + "learning_rate": 1.230773425059277e-07, + "loss": 0.8436654806137085, + "step": 7365 + }, + { + "epoch": 1.6972350230414746, + "grad_norm": 0.8415515075804344, + "learning_rate": 1.2289429331770974e-07, + "loss": 0.6517987251281738, + "step": 7366 + }, + { + "epoch": 1.6974654377880185, + "grad_norm": 1.073572916121381, + "learning_rate": 1.2271137144022392e-07, + "loss": 0.7108355760574341, + "step": 7367 + }, + { + "epoch": 1.6976958525345622, + "grad_norm": 1.138464806776697, + "learning_rate": 1.2252857690002094e-07, + "loss": 0.7801471948623657, + "step": 7368 + }, + { + "epoch": 1.697926267281106, + "grad_norm": 0.9980466100193536, + "learning_rate": 1.2234590972363358e-07, + "loss": 0.8240209221839905, + "step": 7369 + }, + { + "epoch": 1.69815668202765, + "grad_norm": 1.5026485017018454, + "learning_rate": 1.2216336993757558e-07, + "loss": 0.8119853138923645, + "step": 7370 + }, + { + "epoch": 1.6983870967741934, + "grad_norm": 0.9448426506131885, + "learning_rate": 1.2198095756834216e-07, + "loss": 0.7685642838478088, + "step": 7371 + }, + { + "epoch": 1.6986175115207374, + "grad_norm": 1.1884615399125027, + "learning_rate": 1.217986726424106e-07, + "loss": 0.7820984125137329, + "step": 7372 + }, + { + "epoch": 1.698847926267281, + "grad_norm": 1.4933868054084445, + "learning_rate": 1.2161651518623916e-07, + "loss": 0.8051085472106934, + "step": 7373 + }, + { + "epoch": 1.6990783410138248, + "grad_norm": 1.16418962691877, + "learning_rate": 1.2143448522626742e-07, + "loss": 0.828999400138855, + "step": 7374 + }, + { + "epoch": 1.6993087557603688, + "grad_norm": 1.513005376638313, + "learning_rate": 1.2125258278891738e-07, + "loss": 0.8215579986572266, + "step": 7375 + }, + { + "epoch": 1.6995391705069123, + "grad_norm": 1.2614405602995598, + "learning_rate": 1.2107080790059156e-07, + "loss": 0.9362014532089233, + "step": 7376 + }, + { + "epoch": 1.6997695852534562, + "grad_norm": 1.014310262155135, + "learning_rate": 1.2088916058767428e-07, + "loss": 0.7789602279663086, + "step": 7377 + }, + { + "epoch": 1.7, + "grad_norm": 1.322797235291574, + "learning_rate": 1.2070764087653163e-07, + "loss": 0.8371152877807617, + "step": 7378 + }, + { + "epoch": 1.7002304147465437, + "grad_norm": 1.2225532720655308, + "learning_rate": 1.2052624879351103e-07, + "loss": 0.64423668384552, + "step": 7379 + }, + { + "epoch": 1.7004608294930876, + "grad_norm": 1.3442813905677369, + "learning_rate": 1.203449843649409e-07, + "loss": 0.7635257244110107, + "step": 7380 + }, + { + "epoch": 1.7006912442396314, + "grad_norm": 1.15010903043395, + "learning_rate": 1.2016384761713194e-07, + "loss": 0.7859230041503906, + "step": 7381 + }, + { + "epoch": 1.700921658986175, + "grad_norm": 1.0218637195871514, + "learning_rate": 1.199828385763757e-07, + "loss": 0.7066336870193481, + "step": 7382 + }, + { + "epoch": 1.701152073732719, + "grad_norm": 1.1069799499148123, + "learning_rate": 1.198019572689455e-07, + "loss": 0.7190531492233276, + "step": 7383 + }, + { + "epoch": 1.7013824884792625, + "grad_norm": 1.520158585759741, + "learning_rate": 1.1962120372109586e-07, + "loss": 0.7389136552810669, + "step": 7384 + }, + { + "epoch": 1.7016129032258065, + "grad_norm": 1.5406735409523549, + "learning_rate": 1.1944057795906316e-07, + "loss": 0.774425745010376, + "step": 7385 + }, + { + "epoch": 1.7018433179723502, + "grad_norm": 1.0093305285556118, + "learning_rate": 1.1926008000906484e-07, + "loss": 0.7566725015640259, + "step": 7386 + }, + { + "epoch": 1.702073732718894, + "grad_norm": 1.153413777620863, + "learning_rate": 1.1907970989729987e-07, + "loss": 0.6891475915908813, + "step": 7387 + }, + { + "epoch": 1.702304147465438, + "grad_norm": 1.08541401133235, + "learning_rate": 1.1889946764994873e-07, + "loss": 0.6188378930091858, + "step": 7388 + }, + { + "epoch": 1.7025345622119814, + "grad_norm": 1.1534210847497282, + "learning_rate": 1.1871935329317362e-07, + "loss": 0.703027069568634, + "step": 7389 + }, + { + "epoch": 1.7027649769585254, + "grad_norm": 1.2738888238498793, + "learning_rate": 1.1853936685311772e-07, + "loss": 0.9253139495849609, + "step": 7390 + }, + { + "epoch": 1.702995391705069, + "grad_norm": 1.015934424294919, + "learning_rate": 1.1835950835590569e-07, + "loss": 0.6504430770874023, + "step": 7391 + }, + { + "epoch": 1.7032258064516128, + "grad_norm": 1.0145240040509695, + "learning_rate": 1.18179777827644e-07, + "loss": 0.6656354665756226, + "step": 7392 + }, + { + "epoch": 1.7034562211981568, + "grad_norm": 1.451290987899464, + "learning_rate": 1.1800017529442019e-07, + "loss": 0.8534063100814819, + "step": 7393 + }, + { + "epoch": 1.7036866359447005, + "grad_norm": 1.1896366783409809, + "learning_rate": 1.178207007823031e-07, + "loss": 0.8315893411636353, + "step": 7394 + }, + { + "epoch": 1.7039170506912442, + "grad_norm": 1.1636407894423468, + "learning_rate": 1.1764135431734367e-07, + "loss": 0.8161677718162537, + "step": 7395 + }, + { + "epoch": 1.7041474654377882, + "grad_norm": 1.418011015190517, + "learning_rate": 1.1746213592557352e-07, + "loss": 0.7942687273025513, + "step": 7396 + }, + { + "epoch": 1.7043778801843317, + "grad_norm": 0.9938387819486493, + "learning_rate": 1.1728304563300584e-07, + "loss": 0.8056384325027466, + "step": 7397 + }, + { + "epoch": 1.7046082949308756, + "grad_norm": 1.3626759695428086, + "learning_rate": 1.1710408346563583e-07, + "loss": 0.8535007238388062, + "step": 7398 + }, + { + "epoch": 1.7048387096774194, + "grad_norm": 1.1491077351100174, + "learning_rate": 1.1692524944943916e-07, + "loss": 0.7729576826095581, + "step": 7399 + }, + { + "epoch": 1.705069124423963, + "grad_norm": 1.2729586784281095, + "learning_rate": 1.1674654361037328e-07, + "loss": 0.7755489349365234, + "step": 7400 + }, + { + "epoch": 1.705299539170507, + "grad_norm": 1.7008944920024607, + "learning_rate": 1.1656796597437757e-07, + "loss": 0.8752193450927734, + "step": 7401 + }, + { + "epoch": 1.7055299539170505, + "grad_norm": 1.0505715773863387, + "learning_rate": 1.1638951656737217e-07, + "loss": 0.7135917544364929, + "step": 7402 + }, + { + "epoch": 1.7057603686635945, + "grad_norm": 1.1807276735663779, + "learning_rate": 1.1621119541525859e-07, + "loss": 0.7378124594688416, + "step": 7403 + }, + { + "epoch": 1.7059907834101382, + "grad_norm": 1.1699041912496186, + "learning_rate": 1.1603300254391978e-07, + "loss": 0.637479305267334, + "step": 7404 + }, + { + "epoch": 1.706221198156682, + "grad_norm": 0.9107859734790176, + "learning_rate": 1.1585493797922075e-07, + "loss": 0.6162394881248474, + "step": 7405 + }, + { + "epoch": 1.706451612903226, + "grad_norm": 1.0832025296305532, + "learning_rate": 1.1567700174700701e-07, + "loss": 0.7836494445800781, + "step": 7406 + }, + { + "epoch": 1.7066820276497696, + "grad_norm": 1.3117851793296085, + "learning_rate": 1.154991938731057e-07, + "loss": 0.6297281980514526, + "step": 7407 + }, + { + "epoch": 1.7069124423963133, + "grad_norm": 0.9987358693502671, + "learning_rate": 1.1532151438332549e-07, + "loss": 0.7190115451812744, + "step": 7408 + }, + { + "epoch": 1.7071428571428573, + "grad_norm": 1.353324439932077, + "learning_rate": 1.151439633034561e-07, + "loss": 0.7578086853027344, + "step": 7409 + }, + { + "epoch": 1.7073732718894008, + "grad_norm": 0.986158496671175, + "learning_rate": 1.1496654065926925e-07, + "loss": 0.7347216010093689, + "step": 7410 + }, + { + "epoch": 1.7076036866359448, + "grad_norm": 1.2279759650694806, + "learning_rate": 1.1478924647651711e-07, + "loss": 0.7940168380737305, + "step": 7411 + }, + { + "epoch": 1.7078341013824885, + "grad_norm": 1.2336717780625897, + "learning_rate": 1.1461208078093431e-07, + "loss": 0.7625843286514282, + "step": 7412 + }, + { + "epoch": 1.7080645161290322, + "grad_norm": 1.5771280074431184, + "learning_rate": 1.1443504359823585e-07, + "loss": 0.7603492736816406, + "step": 7413 + }, + { + "epoch": 1.7082949308755762, + "grad_norm": 1.1263740749103024, + "learning_rate": 1.1425813495411817e-07, + "loss": 0.8746018409729004, + "step": 7414 + }, + { + "epoch": 1.7085253456221197, + "grad_norm": 1.2947959548271089, + "learning_rate": 1.1408135487425996e-07, + "loss": 0.72724449634552, + "step": 7415 + }, + { + "epoch": 1.7087557603686636, + "grad_norm": 0.794129708213959, + "learning_rate": 1.1390470338432023e-07, + "loss": 0.6874721646308899, + "step": 7416 + }, + { + "epoch": 1.7089861751152073, + "grad_norm": 0.9673124457868691, + "learning_rate": 1.1372818050993959e-07, + "loss": 0.7129265666007996, + "step": 7417 + }, + { + "epoch": 1.709216589861751, + "grad_norm": 1.3811139782005308, + "learning_rate": 1.1355178627674045e-07, + "loss": 0.7505607008934021, + "step": 7418 + }, + { + "epoch": 1.709447004608295, + "grad_norm": 1.1149863565678992, + "learning_rate": 1.1337552071032608e-07, + "loss": 0.7497769594192505, + "step": 7419 + }, + { + "epoch": 1.7096774193548387, + "grad_norm": 1.342673457996757, + "learning_rate": 1.1319938383628092e-07, + "loss": 0.792352020740509, + "step": 7420 + }, + { + "epoch": 1.7099078341013825, + "grad_norm": 1.1720516000619245, + "learning_rate": 1.1302337568017139e-07, + "loss": 0.780627965927124, + "step": 7421 + }, + { + "epoch": 1.7101382488479264, + "grad_norm": 1.2702279678670012, + "learning_rate": 1.1284749626754464e-07, + "loss": 0.7024368047714233, + "step": 7422 + }, + { + "epoch": 1.71036866359447, + "grad_norm": 1.2880158142162281, + "learning_rate": 1.1267174562392945e-07, + "loss": 0.756782591342926, + "step": 7423 + }, + { + "epoch": 1.7105990783410139, + "grad_norm": 1.2881350167706749, + "learning_rate": 1.1249612377483552e-07, + "loss": 0.8585456609725952, + "step": 7424 + }, + { + "epoch": 1.7108294930875576, + "grad_norm": 1.2079330064248406, + "learning_rate": 1.1232063074575449e-07, + "loss": 0.8610610961914062, + "step": 7425 + }, + { + "epoch": 1.7110599078341013, + "grad_norm": 1.2629835504337044, + "learning_rate": 1.1214526656215872e-07, + "loss": 0.7493829131126404, + "step": 7426 + }, + { + "epoch": 1.7112903225806453, + "grad_norm": 1.1677189056932475, + "learning_rate": 1.1197003124950222e-07, + "loss": 0.7479410171508789, + "step": 7427 + }, + { + "epoch": 1.7115207373271888, + "grad_norm": 1.2024881147733253, + "learning_rate": 1.1179492483322006e-07, + "loss": 0.8056051135063171, + "step": 7428 + }, + { + "epoch": 1.7117511520737327, + "grad_norm": 1.2393004464149642, + "learning_rate": 1.1161994733872848e-07, + "loss": 0.8448202610015869, + "step": 7429 + }, + { + "epoch": 1.7119815668202765, + "grad_norm": 1.3170634810384778, + "learning_rate": 1.1144509879142571e-07, + "loss": 0.7783033847808838, + "step": 7430 + }, + { + "epoch": 1.7122119815668202, + "grad_norm": 1.2589188548838177, + "learning_rate": 1.1127037921669058e-07, + "loss": 0.6591838598251343, + "step": 7431 + }, + { + "epoch": 1.7124423963133641, + "grad_norm": 1.4141951291447457, + "learning_rate": 1.1109578863988322e-07, + "loss": 0.8508287668228149, + "step": 7432 + }, + { + "epoch": 1.7126728110599079, + "grad_norm": 1.0110596601133535, + "learning_rate": 1.1092132708634549e-07, + "loss": 0.7981588840484619, + "step": 7433 + }, + { + "epoch": 1.7129032258064516, + "grad_norm": 1.1560054105611206, + "learning_rate": 1.1074699458140025e-07, + "loss": 0.7754761576652527, + "step": 7434 + }, + { + "epoch": 1.7131336405529956, + "grad_norm": 1.4234254723014017, + "learning_rate": 1.1057279115035124e-07, + "loss": 0.8487040996551514, + "step": 7435 + }, + { + "epoch": 1.713364055299539, + "grad_norm": 1.2105987237993454, + "learning_rate": 1.1039871681848433e-07, + "loss": 0.8175803422927856, + "step": 7436 + }, + { + "epoch": 1.713594470046083, + "grad_norm": 1.0010434545431337, + "learning_rate": 1.1022477161106591e-07, + "loss": 0.8361574411392212, + "step": 7437 + }, + { + "epoch": 1.7138248847926267, + "grad_norm": 1.1841110354603608, + "learning_rate": 1.1005095555334409e-07, + "loss": 0.6253053545951843, + "step": 7438 + }, + { + "epoch": 1.7140552995391705, + "grad_norm": 1.5361244402123166, + "learning_rate": 1.0987726867054792e-07, + "loss": 0.8035168647766113, + "step": 7439 + }, + { + "epoch": 1.7142857142857144, + "grad_norm": 1.0148513511065955, + "learning_rate": 1.0970371098788767e-07, + "loss": 0.7352867722511292, + "step": 7440 + }, + { + "epoch": 1.714516129032258, + "grad_norm": 1.1469128257526675, + "learning_rate": 1.0953028253055541e-07, + "loss": 0.7540202140808105, + "step": 7441 + }, + { + "epoch": 1.7147465437788019, + "grad_norm": 1.2653522382652087, + "learning_rate": 1.0935698332372379e-07, + "loss": 0.7883191108703613, + "step": 7442 + }, + { + "epoch": 1.7149769585253456, + "grad_norm": 1.2745739855530656, + "learning_rate": 1.0918381339254701e-07, + "loss": 0.7581819295883179, + "step": 7443 + }, + { + "epoch": 1.7152073732718893, + "grad_norm": 1.1705192956080483, + "learning_rate": 1.090107727621603e-07, + "loss": 0.8066321611404419, + "step": 7444 + }, + { + "epoch": 1.7154377880184333, + "grad_norm": 1.1820593590096908, + "learning_rate": 1.0883786145768037e-07, + "loss": 0.7427937984466553, + "step": 7445 + }, + { + "epoch": 1.715668202764977, + "grad_norm": 1.3132499515834741, + "learning_rate": 1.0866507950420523e-07, + "loss": 0.7736409902572632, + "step": 7446 + }, + { + "epoch": 1.7158986175115207, + "grad_norm": 1.1930714060597967, + "learning_rate": 1.0849242692681382e-07, + "loss": 0.7253416776657104, + "step": 7447 + }, + { + "epoch": 1.7161290322580647, + "grad_norm": 0.9521960056037656, + "learning_rate": 1.0831990375056643e-07, + "loss": 0.7933270931243896, + "step": 7448 + }, + { + "epoch": 1.7163594470046082, + "grad_norm": 1.407227257578247, + "learning_rate": 1.0814751000050437e-07, + "loss": 0.7946739196777344, + "step": 7449 + }, + { + "epoch": 1.7165898617511521, + "grad_norm": 1.2776015375287177, + "learning_rate": 1.0797524570165073e-07, + "loss": 0.7798205614089966, + "step": 7450 + }, + { + "epoch": 1.7168202764976959, + "grad_norm": 1.2558469001082564, + "learning_rate": 1.078031108790094e-07, + "loss": 0.616565465927124, + "step": 7451 + }, + { + "epoch": 1.7170506912442396, + "grad_norm": 1.2221718815584264, + "learning_rate": 1.0763110555756516e-07, + "loss": 0.8406517505645752, + "step": 7452 + }, + { + "epoch": 1.7172811059907835, + "grad_norm": 1.3773523411720476, + "learning_rate": 1.0745922976228483e-07, + "loss": 0.8827311992645264, + "step": 7453 + }, + { + "epoch": 1.717511520737327, + "grad_norm": 1.2403910104019171, + "learning_rate": 1.0728748351811567e-07, + "loss": 0.585588812828064, + "step": 7454 + }, + { + "epoch": 1.717741935483871, + "grad_norm": 0.9381679846122704, + "learning_rate": 1.0711586684998631e-07, + "loss": 0.6305320858955383, + "step": 7455 + }, + { + "epoch": 1.7179723502304147, + "grad_norm": 1.0634674542520166, + "learning_rate": 1.0694437978280701e-07, + "loss": 0.7982319593429565, + "step": 7456 + }, + { + "epoch": 1.7182027649769585, + "grad_norm": 1.3468349324058282, + "learning_rate": 1.0677302234146879e-07, + "loss": 0.7792943716049194, + "step": 7457 + }, + { + "epoch": 1.7184331797235024, + "grad_norm": 1.308217346349807, + "learning_rate": 1.0660179455084372e-07, + "loss": 0.7019332051277161, + "step": 7458 + }, + { + "epoch": 1.7186635944700461, + "grad_norm": 1.2330257329830192, + "learning_rate": 1.0643069643578562e-07, + "loss": 0.8088894486427307, + "step": 7459 + }, + { + "epoch": 1.7188940092165899, + "grad_norm": 1.5573400915532798, + "learning_rate": 1.0625972802112882e-07, + "loss": 0.799231767654419, + "step": 7460 + }, + { + "epoch": 1.7191244239631336, + "grad_norm": 0.950308854182165, + "learning_rate": 1.0608888933168958e-07, + "loss": 0.7265694737434387, + "step": 7461 + }, + { + "epoch": 1.7193548387096773, + "grad_norm": 1.1717288459308963, + "learning_rate": 1.0591818039226464e-07, + "loss": 0.8566714525222778, + "step": 7462 + }, + { + "epoch": 1.7195852534562213, + "grad_norm": 1.2255123057406947, + "learning_rate": 1.0574760122763216e-07, + "loss": 0.811874508857727, + "step": 7463 + }, + { + "epoch": 1.719815668202765, + "grad_norm": 1.0493349652228454, + "learning_rate": 1.0557715186255156e-07, + "loss": 0.7990631461143494, + "step": 7464 + }, + { + "epoch": 1.7200460829493087, + "grad_norm": 1.3183681626099089, + "learning_rate": 1.0540683232176307e-07, + "loss": 0.8108334541320801, + "step": 7465 + }, + { + "epoch": 1.7202764976958527, + "grad_norm": 1.8420274096120763, + "learning_rate": 1.0523664262998888e-07, + "loss": 0.8927996158599854, + "step": 7466 + }, + { + "epoch": 1.7205069124423962, + "grad_norm": 1.1733285346989661, + "learning_rate": 1.0506658281193138e-07, + "loss": 0.7277737855911255, + "step": 7467 + }, + { + "epoch": 1.7207373271889401, + "grad_norm": 1.0503912207473127, + "learning_rate": 1.0489665289227467e-07, + "loss": 0.7229233980178833, + "step": 7468 + }, + { + "epoch": 1.7209677419354839, + "grad_norm": 1.298634428768958, + "learning_rate": 1.0472685289568373e-07, + "loss": 0.7211846709251404, + "step": 7469 + }, + { + "epoch": 1.7211981566820276, + "grad_norm": 1.1862135261022106, + "learning_rate": 1.0455718284680504e-07, + "loss": 0.8239504098892212, + "step": 7470 + }, + { + "epoch": 1.7214285714285715, + "grad_norm": 1.2304377847970827, + "learning_rate": 1.0438764277026579e-07, + "loss": 0.7492972612380981, + "step": 7471 + }, + { + "epoch": 1.7216589861751153, + "grad_norm": 1.3060072891774943, + "learning_rate": 1.0421823269067442e-07, + "loss": 0.7658303380012512, + "step": 7472 + }, + { + "epoch": 1.721889400921659, + "grad_norm": 1.0618950256674606, + "learning_rate": 1.0404895263262092e-07, + "loss": 0.708244800567627, + "step": 7473 + }, + { + "epoch": 1.7221198156682027, + "grad_norm": 1.1946101503339825, + "learning_rate": 1.0387980262067575e-07, + "loss": 0.7575969696044922, + "step": 7474 + }, + { + "epoch": 1.7223502304147464, + "grad_norm": 1.3899740319803422, + "learning_rate": 1.0371078267939082e-07, + "loss": 0.7321910262107849, + "step": 7475 + }, + { + "epoch": 1.7225806451612904, + "grad_norm": 1.3828231848460977, + "learning_rate": 1.035418928332995e-07, + "loss": 0.7812562584877014, + "step": 7476 + }, + { + "epoch": 1.7228110599078341, + "grad_norm": 1.3136112254743646, + "learning_rate": 1.0337313310691565e-07, + "loss": 0.7272104620933533, + "step": 7477 + }, + { + "epoch": 1.7230414746543778, + "grad_norm": 1.1508289944716614, + "learning_rate": 1.032045035247343e-07, + "loss": 0.7006442546844482, + "step": 7478 + }, + { + "epoch": 1.7232718894009218, + "grad_norm": 1.138231534813956, + "learning_rate": 1.0303600411123226e-07, + "loss": 0.7082154750823975, + "step": 7479 + }, + { + "epoch": 1.7235023041474653, + "grad_norm": 1.4157478972732351, + "learning_rate": 1.0286763489086681e-07, + "loss": 0.7204899191856384, + "step": 7480 + }, + { + "epoch": 1.7237327188940093, + "grad_norm": 1.1954797848768004, + "learning_rate": 1.026993958880763e-07, + "loss": 0.9119626879692078, + "step": 7481 + }, + { + "epoch": 1.723963133640553, + "grad_norm": 1.0923155592461768, + "learning_rate": 1.0253128712728088e-07, + "loss": 0.5961707830429077, + "step": 7482 + }, + { + "epoch": 1.7241935483870967, + "grad_norm": 1.1032837677908203, + "learning_rate": 1.023633086328809e-07, + "loss": 0.7469611167907715, + "step": 7483 + }, + { + "epoch": 1.7244239631336407, + "grad_norm": 1.2394445599695993, + "learning_rate": 1.0219546042925841e-07, + "loss": 0.8353795409202576, + "step": 7484 + }, + { + "epoch": 1.7246543778801844, + "grad_norm": 1.120589163159477, + "learning_rate": 1.0202774254077618e-07, + "loss": 0.6587873101234436, + "step": 7485 + }, + { + "epoch": 1.7248847926267281, + "grad_norm": 1.2182162589741892, + "learning_rate": 1.0186015499177847e-07, + "loss": 0.8595654964447021, + "step": 7486 + }, + { + "epoch": 1.7251152073732718, + "grad_norm": 1.0966229129393803, + "learning_rate": 1.0169269780659028e-07, + "loss": 0.7683298587799072, + "step": 7487 + }, + { + "epoch": 1.7253456221198156, + "grad_norm": 1.372358134101511, + "learning_rate": 1.0152537100951786e-07, + "loss": 0.888152003288269, + "step": 7488 + }, + { + "epoch": 1.7255760368663595, + "grad_norm": 1.1162191205168919, + "learning_rate": 1.013581746248482e-07, + "loss": 0.7835309505462646, + "step": 7489 + }, + { + "epoch": 1.7258064516129032, + "grad_norm": 1.4079534093347241, + "learning_rate": 1.0119110867684999e-07, + "loss": 0.9744646549224854, + "step": 7490 + }, + { + "epoch": 1.726036866359447, + "grad_norm": 1.109483043922066, + "learning_rate": 1.0102417318977251e-07, + "loss": 0.6842091083526611, + "step": 7491 + }, + { + "epoch": 1.726267281105991, + "grad_norm": 1.2357910065520838, + "learning_rate": 1.0085736818784607e-07, + "loss": 0.7435774207115173, + "step": 7492 + }, + { + "epoch": 1.7264976958525344, + "grad_norm": 1.3316804792215136, + "learning_rate": 1.0069069369528249e-07, + "loss": 0.8430237770080566, + "step": 7493 + }, + { + "epoch": 1.7267281105990784, + "grad_norm": 1.1766330255379311, + "learning_rate": 1.0052414973627421e-07, + "loss": 0.8203141689300537, + "step": 7494 + }, + { + "epoch": 1.726958525345622, + "grad_norm": 1.291685708783942, + "learning_rate": 1.0035773633499456e-07, + "loss": 0.7491584420204163, + "step": 7495 + }, + { + "epoch": 1.7271889400921658, + "grad_norm": 0.9475128549493947, + "learning_rate": 1.0019145351559876e-07, + "loss": 0.6738899946212769, + "step": 7496 + }, + { + "epoch": 1.7274193548387098, + "grad_norm": 1.4107090522911332, + "learning_rate": 1.0002530130222231e-07, + "loss": 0.8628265857696533, + "step": 7497 + }, + { + "epoch": 1.7276497695852533, + "grad_norm": 1.5650622568616335, + "learning_rate": 9.985927971898178e-08, + "loss": 1.0158125162124634, + "step": 7498 + }, + { + "epoch": 1.7278801843317972, + "grad_norm": 1.2981782537446935, + "learning_rate": 9.969338878997535e-08, + "loss": 0.7269070148468018, + "step": 7499 + }, + { + "epoch": 1.728110599078341, + "grad_norm": 1.3106792244331589, + "learning_rate": 9.952762853928165e-08, + "loss": 0.8769187927246094, + "step": 7500 + }, + { + "epoch": 1.7283410138248847, + "grad_norm": 1.325563750244826, + "learning_rate": 9.936199899096042e-08, + "loss": 0.7841119170188904, + "step": 7501 + }, + { + "epoch": 1.7285714285714286, + "grad_norm": 1.7907234255256992, + "learning_rate": 9.91965001690529e-08, + "loss": 0.9209425449371338, + "step": 7502 + }, + { + "epoch": 1.7288018433179724, + "grad_norm": 1.110414701934764, + "learning_rate": 9.903113209758096e-08, + "loss": 0.7795250415802002, + "step": 7503 + }, + { + "epoch": 1.729032258064516, + "grad_norm": 1.2158163264490913, + "learning_rate": 9.886589480054741e-08, + "loss": 0.7131094932556152, + "step": 7504 + }, + { + "epoch": 1.72926267281106, + "grad_norm": 1.167789931248441, + "learning_rate": 9.870078830193629e-08, + "loss": 0.8090137839317322, + "step": 7505 + }, + { + "epoch": 1.7294930875576036, + "grad_norm": 1.124104241227004, + "learning_rate": 9.853581262571231e-08, + "loss": 0.7797958850860596, + "step": 7506 + }, + { + "epoch": 1.7297235023041475, + "grad_norm": 1.3470491669984355, + "learning_rate": 9.83709677958221e-08, + "loss": 0.6927989721298218, + "step": 7507 + }, + { + "epoch": 1.7299539170506912, + "grad_norm": 1.152565458620573, + "learning_rate": 9.820625383619219e-08, + "loss": 0.8009092807769775, + "step": 7508 + }, + { + "epoch": 1.730184331797235, + "grad_norm": 1.0970285369996284, + "learning_rate": 9.804167077073056e-08, + "loss": 0.761864423751831, + "step": 7509 + }, + { + "epoch": 1.730414746543779, + "grad_norm": 1.5795757660336223, + "learning_rate": 9.787721862332654e-08, + "loss": 0.7459509372711182, + "step": 7510 + }, + { + "epoch": 1.7306451612903224, + "grad_norm": 1.0401744024243509, + "learning_rate": 9.771289741785005e-08, + "loss": 0.8216449022293091, + "step": 7511 + }, + { + "epoch": 1.7308755760368664, + "grad_norm": 1.3924364017238642, + "learning_rate": 9.754870717815177e-08, + "loss": 0.7860604524612427, + "step": 7512 + }, + { + "epoch": 1.73110599078341, + "grad_norm": 1.146706612325942, + "learning_rate": 9.738464792806422e-08, + "loss": 0.7727769613265991, + "step": 7513 + }, + { + "epoch": 1.7313364055299538, + "grad_norm": 1.2690787911964316, + "learning_rate": 9.722071969140011e-08, + "loss": 0.874458909034729, + "step": 7514 + }, + { + "epoch": 1.7315668202764978, + "grad_norm": 1.1530798069952481, + "learning_rate": 9.705692249195319e-08, + "loss": 0.840191125869751, + "step": 7515 + }, + { + "epoch": 1.7317972350230415, + "grad_norm": 1.1387350117516357, + "learning_rate": 9.689325635349877e-08, + "loss": 0.7169238924980164, + "step": 7516 + }, + { + "epoch": 1.7320276497695852, + "grad_norm": 1.2478630540284088, + "learning_rate": 9.672972129979273e-08, + "loss": 0.7554492950439453, + "step": 7517 + }, + { + "epoch": 1.7322580645161292, + "grad_norm": 1.2166706454141942, + "learning_rate": 9.656631735457154e-08, + "loss": 0.5734076499938965, + "step": 7518 + }, + { + "epoch": 1.7324884792626727, + "grad_norm": 1.5466370383298045, + "learning_rate": 9.640304454155369e-08, + "loss": 0.7867637872695923, + "step": 7519 + }, + { + "epoch": 1.7327188940092166, + "grad_norm": 1.2704443586099365, + "learning_rate": 9.623990288443773e-08, + "loss": 0.7330230474472046, + "step": 7520 + }, + { + "epoch": 1.7329493087557604, + "grad_norm": 1.1352922714992866, + "learning_rate": 9.607689240690319e-08, + "loss": 0.7880058288574219, + "step": 7521 + }, + { + "epoch": 1.733179723502304, + "grad_norm": 1.0605191939295662, + "learning_rate": 9.591401313261139e-08, + "loss": 0.796575665473938, + "step": 7522 + }, + { + "epoch": 1.733410138248848, + "grad_norm": 1.4376273040997398, + "learning_rate": 9.575126508520359e-08, + "loss": 0.8101698160171509, + "step": 7523 + }, + { + "epoch": 1.7336405529953915, + "grad_norm": 1.0868433692155355, + "learning_rate": 9.55886482883026e-08, + "loss": 0.7811597585678101, + "step": 7524 + }, + { + "epoch": 1.7338709677419355, + "grad_norm": 1.1754841201094306, + "learning_rate": 9.542616276551208e-08, + "loss": 0.7680011987686157, + "step": 7525 + }, + { + "epoch": 1.7341013824884792, + "grad_norm": 1.3670730603232781, + "learning_rate": 9.526380854041638e-08, + "loss": 0.8018794059753418, + "step": 7526 + }, + { + "epoch": 1.734331797235023, + "grad_norm": 1.1232468645544793, + "learning_rate": 9.510158563658133e-08, + "loss": 0.7770500183105469, + "step": 7527 + }, + { + "epoch": 1.734562211981567, + "grad_norm": 1.1848169541071576, + "learning_rate": 9.493949407755309e-08, + "loss": 0.7622300982475281, + "step": 7528 + }, + { + "epoch": 1.7347926267281106, + "grad_norm": 1.5281654640943847, + "learning_rate": 9.477753388685928e-08, + "loss": 0.831570029258728, + "step": 7529 + }, + { + "epoch": 1.7350230414746544, + "grad_norm": 1.1599086861943149, + "learning_rate": 9.461570508800776e-08, + "loss": 0.7987254858016968, + "step": 7530 + }, + { + "epoch": 1.7352534562211983, + "grad_norm": 1.2752040500202788, + "learning_rate": 9.44540077044883e-08, + "loss": 0.8219848275184631, + "step": 7531 + }, + { + "epoch": 1.7354838709677418, + "grad_norm": 1.298736989691398, + "learning_rate": 9.429244175977092e-08, + "loss": 0.8273369073867798, + "step": 7532 + }, + { + "epoch": 1.7357142857142858, + "grad_norm": 1.2555474610105797, + "learning_rate": 9.413100727730628e-08, + "loss": 0.8241056203842163, + "step": 7533 + }, + { + "epoch": 1.7359447004608295, + "grad_norm": 1.4118150886368108, + "learning_rate": 9.396970428052697e-08, + "loss": 0.6880715489387512, + "step": 7534 + }, + { + "epoch": 1.7361751152073732, + "grad_norm": 1.092011806345561, + "learning_rate": 9.380853279284551e-08, + "loss": 0.7355446815490723, + "step": 7535 + }, + { + "epoch": 1.7364055299539172, + "grad_norm": 1.2700711725839655, + "learning_rate": 9.364749283765604e-08, + "loss": 0.8835841417312622, + "step": 7536 + }, + { + "epoch": 1.7366359447004607, + "grad_norm": 1.1984936737610834, + "learning_rate": 9.348658443833313e-08, + "loss": 0.80763840675354, + "step": 7537 + }, + { + "epoch": 1.7368663594470046, + "grad_norm": 1.2855970061631397, + "learning_rate": 9.332580761823227e-08, + "loss": 0.7473145723342896, + "step": 7538 + }, + { + "epoch": 1.7370967741935484, + "grad_norm": 1.2970951445867331, + "learning_rate": 9.316516240069028e-08, + "loss": 0.6618188619613647, + "step": 7539 + }, + { + "epoch": 1.737327188940092, + "grad_norm": 1.3396426049949766, + "learning_rate": 9.300464880902447e-08, + "loss": 0.7432928085327148, + "step": 7540 + }, + { + "epoch": 1.737557603686636, + "grad_norm": 1.1659381023507147, + "learning_rate": 9.284426686653302e-08, + "loss": 0.7915963530540466, + "step": 7541 + }, + { + "epoch": 1.7377880184331798, + "grad_norm": 1.1552275821682043, + "learning_rate": 9.26840165964955e-08, + "loss": 0.6428440809249878, + "step": 7542 + }, + { + "epoch": 1.7380184331797235, + "grad_norm": 1.1399241166482426, + "learning_rate": 9.252389802217187e-08, + "loss": 0.7142912149429321, + "step": 7543 + }, + { + "epoch": 1.7382488479262674, + "grad_norm": 1.316337246157137, + "learning_rate": 9.236391116680309e-08, + "loss": 0.878044605255127, + "step": 7544 + }, + { + "epoch": 1.738479262672811, + "grad_norm": 1.089416476430598, + "learning_rate": 9.220405605361103e-08, + "loss": 0.6861810684204102, + "step": 7545 + }, + { + "epoch": 1.738709677419355, + "grad_norm": 1.3890455529154517, + "learning_rate": 9.204433270579825e-08, + "loss": 0.7638171911239624, + "step": 7546 + }, + { + "epoch": 1.7389400921658986, + "grad_norm": 1.1532660265349828, + "learning_rate": 9.188474114654876e-08, + "loss": 0.7149873971939087, + "step": 7547 + }, + { + "epoch": 1.7391705069124423, + "grad_norm": 1.1783502444227563, + "learning_rate": 9.172528139902703e-08, + "loss": 0.7249442338943481, + "step": 7548 + }, + { + "epoch": 1.7394009216589863, + "grad_norm": 1.178650320628679, + "learning_rate": 9.156595348637819e-08, + "loss": 0.6846513748168945, + "step": 7549 + }, + { + "epoch": 1.7396313364055298, + "grad_norm": 1.4706201914955974, + "learning_rate": 9.140675743172843e-08, + "loss": 0.9332281351089478, + "step": 7550 + }, + { + "epoch": 1.7398617511520738, + "grad_norm": 1.1835891939139382, + "learning_rate": 9.124769325818526e-08, + "loss": 0.6878118515014648, + "step": 7551 + }, + { + "epoch": 1.7400921658986175, + "grad_norm": 1.077038469987993, + "learning_rate": 9.108876098883633e-08, + "loss": 0.7695426344871521, + "step": 7552 + }, + { + "epoch": 1.7403225806451612, + "grad_norm": 1.3278288479360603, + "learning_rate": 9.09299606467503e-08, + "loss": 0.7983303666114807, + "step": 7553 + }, + { + "epoch": 1.7405529953917052, + "grad_norm": 1.4656214059917094, + "learning_rate": 9.077129225497726e-08, + "loss": 0.8158761262893677, + "step": 7554 + }, + { + "epoch": 1.7407834101382489, + "grad_norm": 1.1519947124673093, + "learning_rate": 9.061275583654748e-08, + "loss": 0.8064214587211609, + "step": 7555 + }, + { + "epoch": 1.7410138248847926, + "grad_norm": 1.2545881332280804, + "learning_rate": 9.045435141447211e-08, + "loss": 0.9058080911636353, + "step": 7556 + }, + { + "epoch": 1.7412442396313366, + "grad_norm": 1.213639501339424, + "learning_rate": 9.029607901174374e-08, + "loss": 0.7392270565032959, + "step": 7557 + }, + { + "epoch": 1.74147465437788, + "grad_norm": 1.0453486445607982, + "learning_rate": 9.013793865133501e-08, + "loss": 0.7114729881286621, + "step": 7558 + }, + { + "epoch": 1.741705069124424, + "grad_norm": 1.2302263811033798, + "learning_rate": 8.997993035620022e-08, + "loss": 0.8675493597984314, + "step": 7559 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.9934561818451934, + "learning_rate": 8.98220541492738e-08, + "loss": 0.8103020191192627, + "step": 7560 + }, + { + "epoch": 1.7421658986175115, + "grad_norm": 1.2538115734834285, + "learning_rate": 8.966431005347109e-08, + "loss": 0.7339279651641846, + "step": 7561 + }, + { + "epoch": 1.7423963133640554, + "grad_norm": 1.3510829475373114, + "learning_rate": 8.950669809168887e-08, + "loss": 0.6971707344055176, + "step": 7562 + }, + { + "epoch": 1.742626728110599, + "grad_norm": 1.105458403928542, + "learning_rate": 8.934921828680408e-08, + "loss": 0.8633124232292175, + "step": 7563 + }, + { + "epoch": 1.7428571428571429, + "grad_norm": 1.3082830118219664, + "learning_rate": 8.919187066167466e-08, + "loss": 0.7704664468765259, + "step": 7564 + }, + { + "epoch": 1.7430875576036866, + "grad_norm": 1.1782653714880955, + "learning_rate": 8.903465523913955e-08, + "loss": 0.7063533067703247, + "step": 7565 + }, + { + "epoch": 1.7433179723502303, + "grad_norm": 1.1177210535700517, + "learning_rate": 8.887757204201817e-08, + "loss": 0.7094486951828003, + "step": 7566 + }, + { + "epoch": 1.7435483870967743, + "grad_norm": 1.4575572123890834, + "learning_rate": 8.872062109311096e-08, + "loss": 0.8743780255317688, + "step": 7567 + }, + { + "epoch": 1.743778801843318, + "grad_norm": 1.5827740898240907, + "learning_rate": 8.856380241519935e-08, + "loss": 0.7282687425613403, + "step": 7568 + }, + { + "epoch": 1.7440092165898617, + "grad_norm": 1.105316538989134, + "learning_rate": 8.840711603104523e-08, + "loss": 0.7507487535476685, + "step": 7569 + }, + { + "epoch": 1.7442396313364057, + "grad_norm": 1.2820028807325874, + "learning_rate": 8.82505619633912e-08, + "loss": 0.807691216468811, + "step": 7570 + }, + { + "epoch": 1.7444700460829492, + "grad_norm": 1.3537034886290398, + "learning_rate": 8.809414023496142e-08, + "loss": 0.8650702238082886, + "step": 7571 + }, + { + "epoch": 1.7447004608294931, + "grad_norm": 0.9602033366804331, + "learning_rate": 8.793785086845984e-08, + "loss": 0.6872273683547974, + "step": 7572 + }, + { + "epoch": 1.7449308755760369, + "grad_norm": 1.0979215212634434, + "learning_rate": 8.778169388657163e-08, + "loss": 0.7242698669433594, + "step": 7573 + }, + { + "epoch": 1.7451612903225806, + "grad_norm": 1.0962988735603825, + "learning_rate": 8.762566931196313e-08, + "loss": 0.741705060005188, + "step": 7574 + }, + { + "epoch": 1.7453917050691246, + "grad_norm": 1.06231801843056, + "learning_rate": 8.746977716728099e-08, + "loss": 0.7293061017990112, + "step": 7575 + }, + { + "epoch": 1.745622119815668, + "grad_norm": 1.0145801945512316, + "learning_rate": 8.731401747515244e-08, + "loss": 0.8385475277900696, + "step": 7576 + }, + { + "epoch": 1.745852534562212, + "grad_norm": 1.4891647422185605, + "learning_rate": 8.715839025818617e-08, + "loss": 0.8484489917755127, + "step": 7577 + }, + { + "epoch": 1.7460829493087557, + "grad_norm": 1.1930293813449155, + "learning_rate": 8.7002895538971e-08, + "loss": 0.6511530876159668, + "step": 7578 + }, + { + "epoch": 1.7463133640552995, + "grad_norm": 1.4360732745608953, + "learning_rate": 8.684753334007688e-08, + "loss": 0.8274673223495483, + "step": 7579 + }, + { + "epoch": 1.7465437788018434, + "grad_norm": 1.081237944644138, + "learning_rate": 8.669230368405456e-08, + "loss": 0.7367755174636841, + "step": 7580 + }, + { + "epoch": 1.7467741935483871, + "grad_norm": 1.2748877435171337, + "learning_rate": 8.653720659343522e-08, + "loss": 0.80199134349823, + "step": 7581 + }, + { + "epoch": 1.7470046082949309, + "grad_norm": 1.1988639104811598, + "learning_rate": 8.638224209073097e-08, + "loss": 0.7782701253890991, + "step": 7582 + }, + { + "epoch": 1.7472350230414746, + "grad_norm": 1.3660035419508034, + "learning_rate": 8.622741019843504e-08, + "loss": 0.7613752484321594, + "step": 7583 + }, + { + "epoch": 1.7474654377880183, + "grad_norm": 1.3599194483251544, + "learning_rate": 8.60727109390208e-08, + "loss": 0.8213690519332886, + "step": 7584 + }, + { + "epoch": 1.7476958525345623, + "grad_norm": 1.1411507368613496, + "learning_rate": 8.59181443349426e-08, + "loss": 0.7064045667648315, + "step": 7585 + }, + { + "epoch": 1.747926267281106, + "grad_norm": 1.1189241999598565, + "learning_rate": 8.576371040863573e-08, + "loss": 0.6686617136001587, + "step": 7586 + }, + { + "epoch": 1.7481566820276497, + "grad_norm": 1.0194951619872286, + "learning_rate": 8.560940918251592e-08, + "loss": 0.7520097494125366, + "step": 7587 + }, + { + "epoch": 1.7483870967741937, + "grad_norm": 1.0822685191965165, + "learning_rate": 8.545524067897991e-08, + "loss": 0.8176038265228271, + "step": 7588 + }, + { + "epoch": 1.7486175115207372, + "grad_norm": 1.3408318725531652, + "learning_rate": 8.530120492040505e-08, + "loss": 0.6680614948272705, + "step": 7589 + }, + { + "epoch": 1.7488479262672811, + "grad_norm": 1.3621846138568519, + "learning_rate": 8.514730192914921e-08, + "loss": 0.7421592473983765, + "step": 7590 + }, + { + "epoch": 1.7490783410138249, + "grad_norm": 1.2822263575200588, + "learning_rate": 8.499353172755164e-08, + "loss": 0.8869342803955078, + "step": 7591 + }, + { + "epoch": 1.7493087557603686, + "grad_norm": 1.1206823186662898, + "learning_rate": 8.48398943379316e-08, + "loss": 0.6850584745407104, + "step": 7592 + }, + { + "epoch": 1.7495391705069125, + "grad_norm": 1.0932592535391596, + "learning_rate": 8.468638978258914e-08, + "loss": 0.7433363199234009, + "step": 7593 + }, + { + "epoch": 1.7497695852534563, + "grad_norm": 1.0269953798613225, + "learning_rate": 8.453301808380564e-08, + "loss": 0.7744357585906982, + "step": 7594 + }, + { + "epoch": 1.75, + "grad_norm": 1.382126107142446, + "learning_rate": 8.437977926384277e-08, + "loss": 0.8236217498779297, + "step": 7595 + }, + { + "epoch": 1.7502304147465437, + "grad_norm": 1.3329245666066865, + "learning_rate": 8.422667334494249e-08, + "loss": 0.8552603721618652, + "step": 7596 + }, + { + "epoch": 1.7504608294930875, + "grad_norm": 1.4100651978644374, + "learning_rate": 8.407370034932859e-08, + "loss": 0.7755998373031616, + "step": 7597 + }, + { + "epoch": 1.7506912442396314, + "grad_norm": 1.3033243035055457, + "learning_rate": 8.392086029920442e-08, + "loss": 0.8105130195617676, + "step": 7598 + }, + { + "epoch": 1.7509216589861751, + "grad_norm": 1.290928258750675, + "learning_rate": 8.376815321675457e-08, + "loss": 0.8787405490875244, + "step": 7599 + }, + { + "epoch": 1.7511520737327189, + "grad_norm": 1.1296910155342912, + "learning_rate": 8.361557912414441e-08, + "loss": 0.6107788681983948, + "step": 7600 + }, + { + "epoch": 1.7513824884792628, + "grad_norm": 0.9941949428855014, + "learning_rate": 8.34631380435199e-08, + "loss": 0.6825795769691467, + "step": 7601 + }, + { + "epoch": 1.7516129032258063, + "grad_norm": 1.5141115638242784, + "learning_rate": 8.331082999700734e-08, + "loss": 0.7069272994995117, + "step": 7602 + }, + { + "epoch": 1.7518433179723503, + "grad_norm": 1.5687921139560086, + "learning_rate": 8.315865500671449e-08, + "loss": 0.7784801721572876, + "step": 7603 + }, + { + "epoch": 1.752073732718894, + "grad_norm": 1.0771300382051838, + "learning_rate": 8.300661309472912e-08, + "loss": 0.7653795480728149, + "step": 7604 + }, + { + "epoch": 1.7523041474654377, + "grad_norm": 1.5582480598587298, + "learning_rate": 8.285470428311991e-08, + "loss": 0.7386122941970825, + "step": 7605 + }, + { + "epoch": 1.7525345622119817, + "grad_norm": 0.9515219540238303, + "learning_rate": 8.270292859393613e-08, + "loss": 0.7828700542449951, + "step": 7606 + }, + { + "epoch": 1.7527649769585254, + "grad_norm": 1.5500733851956912, + "learning_rate": 8.255128604920792e-08, + "loss": 0.8955565094947815, + "step": 7607 + }, + { + "epoch": 1.7529953917050691, + "grad_norm": 1.2505809950313513, + "learning_rate": 8.2399776670946e-08, + "loss": 0.9071576595306396, + "step": 7608 + }, + { + "epoch": 1.7532258064516129, + "grad_norm": 1.3402860152327503, + "learning_rate": 8.22484004811419e-08, + "loss": 0.752417802810669, + "step": 7609 + }, + { + "epoch": 1.7534562211981566, + "grad_norm": 1.367440429282924, + "learning_rate": 8.209715750176727e-08, + "loss": 0.8611370325088501, + "step": 7610 + }, + { + "epoch": 1.7536866359447005, + "grad_norm": 1.232351895452084, + "learning_rate": 8.19460477547752e-08, + "loss": 0.745223879814148, + "step": 7611 + }, + { + "epoch": 1.7539170506912443, + "grad_norm": 1.0415704016806513, + "learning_rate": 8.179507126209906e-08, + "loss": 0.7799668908119202, + "step": 7612 + }, + { + "epoch": 1.754147465437788, + "grad_norm": 1.3761849870920217, + "learning_rate": 8.164422804565263e-08, + "loss": 0.8177207708358765, + "step": 7613 + }, + { + "epoch": 1.754377880184332, + "grad_norm": 1.2017347256018391, + "learning_rate": 8.149351812733085e-08, + "loss": 0.7111436128616333, + "step": 7614 + }, + { + "epoch": 1.7546082949308754, + "grad_norm": 1.2253776843179969, + "learning_rate": 8.1342941529009e-08, + "loss": 0.6840728521347046, + "step": 7615 + }, + { + "epoch": 1.7548387096774194, + "grad_norm": 1.02983629791633, + "learning_rate": 8.119249827254281e-08, + "loss": 0.6115491986274719, + "step": 7616 + }, + { + "epoch": 1.7550691244239631, + "grad_norm": 1.3870391302655596, + "learning_rate": 8.104218837976939e-08, + "loss": 0.7149351239204407, + "step": 7617 + }, + { + "epoch": 1.7552995391705069, + "grad_norm": 1.2174150358988711, + "learning_rate": 8.089201187250571e-08, + "loss": 0.688147783279419, + "step": 7618 + }, + { + "epoch": 1.7555299539170508, + "grad_norm": 1.2630937737290178, + "learning_rate": 8.074196877254969e-08, + "loss": 0.8092058300971985, + "step": 7619 + }, + { + "epoch": 1.7557603686635943, + "grad_norm": 0.8375696110242734, + "learning_rate": 8.05920591016801e-08, + "loss": 0.7375935912132263, + "step": 7620 + }, + { + "epoch": 1.7559907834101383, + "grad_norm": 1.1868565460321117, + "learning_rate": 8.044228288165599e-08, + "loss": 0.6793934106826782, + "step": 7621 + }, + { + "epoch": 1.756221198156682, + "grad_norm": 1.2102446264436708, + "learning_rate": 8.0292640134217e-08, + "loss": 0.7395757436752319, + "step": 7622 + }, + { + "epoch": 1.7564516129032257, + "grad_norm": 0.9259939168277553, + "learning_rate": 8.014313088108394e-08, + "loss": 0.546409010887146, + "step": 7623 + }, + { + "epoch": 1.7566820276497697, + "grad_norm": 1.4575552468425101, + "learning_rate": 7.999375514395778e-08, + "loss": 0.7790534496307373, + "step": 7624 + }, + { + "epoch": 1.7569124423963134, + "grad_norm": 1.0896798964233478, + "learning_rate": 7.984451294452e-08, + "loss": 0.7398231625556946, + "step": 7625 + }, + { + "epoch": 1.7571428571428571, + "grad_norm": 1.2623646343227142, + "learning_rate": 7.969540430443311e-08, + "loss": 0.7414441108703613, + "step": 7626 + }, + { + "epoch": 1.757373271889401, + "grad_norm": 1.1312110923091452, + "learning_rate": 7.954642924533994e-08, + "loss": 0.7548750638961792, + "step": 7627 + }, + { + "epoch": 1.7576036866359446, + "grad_norm": 0.957909042850816, + "learning_rate": 7.939758778886385e-08, + "loss": 0.7546773552894592, + "step": 7628 + }, + { + "epoch": 1.7578341013824885, + "grad_norm": 1.1252175485529645, + "learning_rate": 7.924887995660945e-08, + "loss": 0.7373867630958557, + "step": 7629 + }, + { + "epoch": 1.7580645161290323, + "grad_norm": 0.9815120449405607, + "learning_rate": 7.910030577016113e-08, + "loss": 0.7271026968955994, + "step": 7630 + }, + { + "epoch": 1.758294930875576, + "grad_norm": 1.3179911972781693, + "learning_rate": 7.89518652510841e-08, + "loss": 0.8723413944244385, + "step": 7631 + }, + { + "epoch": 1.75852534562212, + "grad_norm": 1.3060473211580457, + "learning_rate": 7.880355842092468e-08, + "loss": 0.8282548189163208, + "step": 7632 + }, + { + "epoch": 1.7587557603686634, + "grad_norm": 1.1089249458958528, + "learning_rate": 7.865538530120918e-08, + "loss": 0.7436991930007935, + "step": 7633 + }, + { + "epoch": 1.7589861751152074, + "grad_norm": 1.0884201833829175, + "learning_rate": 7.850734591344488e-08, + "loss": 0.7750650644302368, + "step": 7634 + }, + { + "epoch": 1.7592165898617511, + "grad_norm": 1.1544057740235625, + "learning_rate": 7.835944027911957e-08, + "loss": 0.6824958324432373, + "step": 7635 + }, + { + "epoch": 1.7594470046082948, + "grad_norm": 1.1607504467923393, + "learning_rate": 7.821166841970107e-08, + "loss": 0.8500322103500366, + "step": 7636 + }, + { + "epoch": 1.7596774193548388, + "grad_norm": 1.3527797330475602, + "learning_rate": 7.806403035663889e-08, + "loss": 0.7111128568649292, + "step": 7637 + }, + { + "epoch": 1.7599078341013825, + "grad_norm": 1.1877365592337052, + "learning_rate": 7.791652611136212e-08, + "loss": 0.7320532202720642, + "step": 7638 + }, + { + "epoch": 1.7601382488479262, + "grad_norm": 1.2292449607917257, + "learning_rate": 7.776915570528076e-08, + "loss": 0.8439149856567383, + "step": 7639 + }, + { + "epoch": 1.7603686635944702, + "grad_norm": 1.0358127598823044, + "learning_rate": 7.762191915978578e-08, + "loss": 0.6489125490188599, + "step": 7640 + }, + { + "epoch": 1.7605990783410137, + "grad_norm": 1.0601387517448153, + "learning_rate": 7.74748164962482e-08, + "loss": 0.8118616342544556, + "step": 7641 + }, + { + "epoch": 1.7608294930875577, + "grad_norm": 1.0728808336951121, + "learning_rate": 7.732784773601953e-08, + "loss": 0.7528102397918701, + "step": 7642 + }, + { + "epoch": 1.7610599078341014, + "grad_norm": 1.0747138773482348, + "learning_rate": 7.718101290043244e-08, + "loss": 0.7856849431991577, + "step": 7643 + }, + { + "epoch": 1.761290322580645, + "grad_norm": 1.3272410706900097, + "learning_rate": 7.703431201079969e-08, + "loss": 0.8001973628997803, + "step": 7644 + }, + { + "epoch": 1.761520737327189, + "grad_norm": 1.2721768499077395, + "learning_rate": 7.688774508841478e-08, + "loss": 0.8332471251487732, + "step": 7645 + }, + { + "epoch": 1.7617511520737326, + "grad_norm": 1.2453092502270793, + "learning_rate": 7.67413121545516e-08, + "loss": 0.7534940242767334, + "step": 7646 + }, + { + "epoch": 1.7619815668202765, + "grad_norm": 1.1709183629535718, + "learning_rate": 7.65950132304647e-08, + "loss": 0.737503170967102, + "step": 7647 + }, + { + "epoch": 1.7622119815668202, + "grad_norm": 1.0899482570396566, + "learning_rate": 7.644884833738896e-08, + "loss": 0.7407201528549194, + "step": 7648 + }, + { + "epoch": 1.762442396313364, + "grad_norm": 1.0059187913201149, + "learning_rate": 7.630281749654055e-08, + "loss": 0.7589377164840698, + "step": 7649 + }, + { + "epoch": 1.762672811059908, + "grad_norm": 1.2624576054967966, + "learning_rate": 7.615692072911528e-08, + "loss": 0.6586496829986572, + "step": 7650 + }, + { + "epoch": 1.7629032258064516, + "grad_norm": 1.1572434060771926, + "learning_rate": 7.601115805628977e-08, + "loss": 0.705591082572937, + "step": 7651 + }, + { + "epoch": 1.7631336405529954, + "grad_norm": 1.2006722437863475, + "learning_rate": 7.586552949922176e-08, + "loss": 0.7889619469642639, + "step": 7652 + }, + { + "epoch": 1.7633640552995393, + "grad_norm": 1.0348577197525213, + "learning_rate": 7.572003507904868e-08, + "loss": 0.6912282705307007, + "step": 7653 + }, + { + "epoch": 1.7635944700460828, + "grad_norm": 1.1101374555344716, + "learning_rate": 7.557467481688873e-08, + "loss": 0.7374964952468872, + "step": 7654 + }, + { + "epoch": 1.7638248847926268, + "grad_norm": 1.1479262514291408, + "learning_rate": 7.542944873384105e-08, + "loss": 0.7302298545837402, + "step": 7655 + }, + { + "epoch": 1.7640552995391705, + "grad_norm": 1.2653276061660264, + "learning_rate": 7.5284356850985e-08, + "loss": 0.8323671817779541, + "step": 7656 + }, + { + "epoch": 1.7642857142857142, + "grad_norm": 1.0548505840987745, + "learning_rate": 7.513939918938028e-08, + "loss": 0.6654655933380127, + "step": 7657 + }, + { + "epoch": 1.7645161290322582, + "grad_norm": 1.1231001283574193, + "learning_rate": 7.499457577006751e-08, + "loss": 0.6371186375617981, + "step": 7658 + }, + { + "epoch": 1.7647465437788017, + "grad_norm": 1.3299088323872645, + "learning_rate": 7.484988661406733e-08, + "loss": 0.7761695384979248, + "step": 7659 + }, + { + "epoch": 1.7649769585253456, + "grad_norm": 1.1268786347378037, + "learning_rate": 7.470533174238158e-08, + "loss": 0.779335618019104, + "step": 7660 + }, + { + "epoch": 1.7652073732718894, + "grad_norm": 1.26329747548588, + "learning_rate": 7.456091117599195e-08, + "loss": 0.7642731666564941, + "step": 7661 + }, + { + "epoch": 1.765437788018433, + "grad_norm": 1.417392503393573, + "learning_rate": 7.441662493586076e-08, + "loss": 0.7490801215171814, + "step": 7662 + }, + { + "epoch": 1.765668202764977, + "grad_norm": 1.6109060172749883, + "learning_rate": 7.427247304293139e-08, + "loss": 0.9480686187744141, + "step": 7663 + }, + { + "epoch": 1.7658986175115208, + "grad_norm": 1.243245001745715, + "learning_rate": 7.412845551812707e-08, + "loss": 0.6208070516586304, + "step": 7664 + }, + { + "epoch": 1.7661290322580645, + "grad_norm": 1.2606477635417679, + "learning_rate": 7.398457238235167e-08, + "loss": 0.7782050371170044, + "step": 7665 + }, + { + "epoch": 1.7663594470046085, + "grad_norm": 1.1494295384377444, + "learning_rate": 7.38408236564897e-08, + "loss": 0.6725378632545471, + "step": 7666 + }, + { + "epoch": 1.766589861751152, + "grad_norm": 1.4030647180836417, + "learning_rate": 7.369720936140611e-08, + "loss": 0.8247120380401611, + "step": 7667 + }, + { + "epoch": 1.766820276497696, + "grad_norm": 1.2966757041323174, + "learning_rate": 7.355372951794614e-08, + "loss": 0.7866288423538208, + "step": 7668 + }, + { + "epoch": 1.7670506912442396, + "grad_norm": 1.5029385474750363, + "learning_rate": 7.341038414693613e-08, + "loss": 0.8096400499343872, + "step": 7669 + }, + { + "epoch": 1.7672811059907834, + "grad_norm": 1.5152361583075085, + "learning_rate": 7.326717326918208e-08, + "loss": 0.7799873352050781, + "step": 7670 + }, + { + "epoch": 1.7675115207373273, + "grad_norm": 1.0568101452951337, + "learning_rate": 7.312409690547095e-08, + "loss": 0.809285044670105, + "step": 7671 + }, + { + "epoch": 1.7677419354838708, + "grad_norm": 1.351048640166805, + "learning_rate": 7.298115507657021e-08, + "loss": 0.874248743057251, + "step": 7672 + }, + { + "epoch": 1.7679723502304148, + "grad_norm": 1.1594085684678137, + "learning_rate": 7.283834780322761e-08, + "loss": 0.7418022155761719, + "step": 7673 + }, + { + "epoch": 1.7682027649769585, + "grad_norm": 1.2895302232300179, + "learning_rate": 7.269567510617126e-08, + "loss": 0.720660388469696, + "step": 7674 + }, + { + "epoch": 1.7684331797235022, + "grad_norm": 1.241628438381412, + "learning_rate": 7.255313700611032e-08, + "loss": 0.7655429840087891, + "step": 7675 + }, + { + "epoch": 1.7686635944700462, + "grad_norm": 1.125747625986026, + "learning_rate": 7.241073352373361e-08, + "loss": 0.7303705215454102, + "step": 7676 + }, + { + "epoch": 1.76889400921659, + "grad_norm": 1.1695690935051566, + "learning_rate": 7.226846467971093e-08, + "loss": 0.7997909188270569, + "step": 7677 + }, + { + "epoch": 1.7691244239631336, + "grad_norm": 1.261135372954414, + "learning_rate": 7.212633049469264e-08, + "loss": 0.6546763181686401, + "step": 7678 + }, + { + "epoch": 1.7693548387096776, + "grad_norm": 0.9669222373383191, + "learning_rate": 7.1984330989309e-08, + "loss": 0.6374444961547852, + "step": 7679 + }, + { + "epoch": 1.769585253456221, + "grad_norm": 1.2966171484977755, + "learning_rate": 7.184246618417111e-08, + "loss": 0.7092937231063843, + "step": 7680 + }, + { + "epoch": 1.769815668202765, + "grad_norm": 1.3237517845156634, + "learning_rate": 7.17007360998706e-08, + "loss": 0.7702305316925049, + "step": 7681 + }, + { + "epoch": 1.7700460829493088, + "grad_norm": 0.978090031115468, + "learning_rate": 7.155914075697933e-08, + "loss": 0.7763724327087402, + "step": 7682 + }, + { + "epoch": 1.7702764976958525, + "grad_norm": 0.9935287090208255, + "learning_rate": 7.141768017604966e-08, + "loss": 0.6409577131271362, + "step": 7683 + }, + { + "epoch": 1.7705069124423964, + "grad_norm": 1.2265488041489598, + "learning_rate": 7.127635437761459e-08, + "loss": 0.7500795125961304, + "step": 7684 + }, + { + "epoch": 1.77073732718894, + "grad_norm": 1.405023681248552, + "learning_rate": 7.113516338218717e-08, + "loss": 0.7312004566192627, + "step": 7685 + }, + { + "epoch": 1.770967741935484, + "grad_norm": 0.910138776962328, + "learning_rate": 7.099410721026112e-08, + "loss": 0.823514997959137, + "step": 7686 + }, + { + "epoch": 1.7711981566820276, + "grad_norm": 1.4146285511420962, + "learning_rate": 7.085318588231048e-08, + "loss": 0.9504063129425049, + "step": 7687 + }, + { + "epoch": 1.7714285714285714, + "grad_norm": 0.8614868773221174, + "learning_rate": 7.071239941878981e-08, + "loss": 0.7850733399391174, + "step": 7688 + }, + { + "epoch": 1.7716589861751153, + "grad_norm": 1.356738665999072, + "learning_rate": 7.057174784013431e-08, + "loss": 0.9447094798088074, + "step": 7689 + }, + { + "epoch": 1.771889400921659, + "grad_norm": 1.134179637006652, + "learning_rate": 7.04312311667592e-08, + "loss": 0.6675062775611877, + "step": 7690 + }, + { + "epoch": 1.7721198156682028, + "grad_norm": 0.9395193655643466, + "learning_rate": 7.029084941906005e-08, + "loss": 0.6875232458114624, + "step": 7691 + }, + { + "epoch": 1.7723502304147467, + "grad_norm": 1.3573723926231736, + "learning_rate": 7.015060261741357e-08, + "loss": 0.7847919464111328, + "step": 7692 + }, + { + "epoch": 1.7725806451612902, + "grad_norm": 1.300014614678359, + "learning_rate": 7.001049078217613e-08, + "loss": 0.7924584150314331, + "step": 7693 + }, + { + "epoch": 1.7728110599078342, + "grad_norm": 1.4499718780004744, + "learning_rate": 6.987051393368471e-08, + "loss": 0.8802344799041748, + "step": 7694 + }, + { + "epoch": 1.773041474654378, + "grad_norm": 1.425988233405148, + "learning_rate": 6.973067209225692e-08, + "loss": 0.7038631439208984, + "step": 7695 + }, + { + "epoch": 1.7732718894009216, + "grad_norm": 1.1226859696380713, + "learning_rate": 6.959096527819064e-08, + "loss": 0.9016700387001038, + "step": 7696 + }, + { + "epoch": 1.7735023041474656, + "grad_norm": 1.1967072079572705, + "learning_rate": 6.945139351176387e-08, + "loss": 0.7678165435791016, + "step": 7697 + }, + { + "epoch": 1.773732718894009, + "grad_norm": 1.1001980127511188, + "learning_rate": 6.931195681323565e-08, + "loss": 0.6612143516540527, + "step": 7698 + }, + { + "epoch": 1.773963133640553, + "grad_norm": 1.3968871696274494, + "learning_rate": 6.917265520284476e-08, + "loss": 0.840233325958252, + "step": 7699 + }, + { + "epoch": 1.7741935483870968, + "grad_norm": 1.3698339080168875, + "learning_rate": 6.90334887008106e-08, + "loss": 0.7913506031036377, + "step": 7700 + }, + { + "epoch": 1.7744239631336405, + "grad_norm": 1.3434994536689218, + "learning_rate": 6.889445732733323e-08, + "loss": 0.7523634433746338, + "step": 7701 + }, + { + "epoch": 1.7746543778801844, + "grad_norm": 1.1357027982798495, + "learning_rate": 6.875556110259273e-08, + "loss": 0.7009792327880859, + "step": 7702 + }, + { + "epoch": 1.7748847926267282, + "grad_norm": 0.9926018792518734, + "learning_rate": 6.861680004674963e-08, + "loss": 0.6533738970756531, + "step": 7703 + }, + { + "epoch": 1.7751152073732719, + "grad_norm": 1.0969556014291875, + "learning_rate": 6.847817417994517e-08, + "loss": 0.860493540763855, + "step": 7704 + }, + { + "epoch": 1.7753456221198156, + "grad_norm": 1.3425565367947665, + "learning_rate": 6.833968352230057e-08, + "loss": 0.810010552406311, + "step": 7705 + }, + { + "epoch": 1.7755760368663593, + "grad_norm": 1.2400741621258158, + "learning_rate": 6.820132809391743e-08, + "loss": 0.8443198204040527, + "step": 7706 + }, + { + "epoch": 1.7758064516129033, + "grad_norm": 1.1086679828690398, + "learning_rate": 6.806310791487813e-08, + "loss": 0.758772611618042, + "step": 7707 + }, + { + "epoch": 1.776036866359447, + "grad_norm": 1.2474164003496853, + "learning_rate": 6.792502300524472e-08, + "loss": 0.8438040614128113, + "step": 7708 + }, + { + "epoch": 1.7762672811059907, + "grad_norm": 1.154420265010753, + "learning_rate": 6.778707338506051e-08, + "loss": 0.7727431058883667, + "step": 7709 + }, + { + "epoch": 1.7764976958525347, + "grad_norm": 1.6420516256349273, + "learning_rate": 6.764925907434849e-08, + "loss": 0.8118282556533813, + "step": 7710 + }, + { + "epoch": 1.7767281105990782, + "grad_norm": 1.22888062854885, + "learning_rate": 6.75115800931122e-08, + "loss": 0.7667281627655029, + "step": 7711 + }, + { + "epoch": 1.7769585253456222, + "grad_norm": 1.2558357954388057, + "learning_rate": 6.737403646133566e-08, + "loss": 0.7824913263320923, + "step": 7712 + }, + { + "epoch": 1.7771889400921659, + "grad_norm": 1.176254722115087, + "learning_rate": 6.723662819898312e-08, + "loss": 0.7318419218063354, + "step": 7713 + }, + { + "epoch": 1.7774193548387096, + "grad_norm": 1.2059569400095187, + "learning_rate": 6.709935532599897e-08, + "loss": 0.7060009241104126, + "step": 7714 + }, + { + "epoch": 1.7776497695852536, + "grad_norm": 1.3093811884607869, + "learning_rate": 6.69622178623086e-08, + "loss": 0.7367588877677917, + "step": 7715 + }, + { + "epoch": 1.7778801843317973, + "grad_norm": 1.3618967587860527, + "learning_rate": 6.682521582781708e-08, + "loss": 0.7340742349624634, + "step": 7716 + }, + { + "epoch": 1.778110599078341, + "grad_norm": 1.257394780772999, + "learning_rate": 6.668834924240995e-08, + "loss": 0.6655991077423096, + "step": 7717 + }, + { + "epoch": 1.7783410138248847, + "grad_norm": 1.3379718118337083, + "learning_rate": 6.655161812595367e-08, + "loss": 0.7562434673309326, + "step": 7718 + }, + { + "epoch": 1.7785714285714285, + "grad_norm": 1.2416548769934193, + "learning_rate": 6.641502249829423e-08, + "loss": 0.8078730702400208, + "step": 7719 + }, + { + "epoch": 1.7788018433179724, + "grad_norm": 1.1920319583326109, + "learning_rate": 6.627856237925811e-08, + "loss": 0.6285899877548218, + "step": 7720 + }, + { + "epoch": 1.7790322580645161, + "grad_norm": 1.1055337731409536, + "learning_rate": 6.61422377886528e-08, + "loss": 0.6633951663970947, + "step": 7721 + }, + { + "epoch": 1.7792626728110599, + "grad_norm": 1.0697990396462347, + "learning_rate": 6.600604874626548e-08, + "loss": 0.7273050546646118, + "step": 7722 + }, + { + "epoch": 1.7794930875576038, + "grad_norm": 1.2680575632659172, + "learning_rate": 6.586999527186354e-08, + "loss": 0.6665729284286499, + "step": 7723 + }, + { + "epoch": 1.7797235023041473, + "grad_norm": 1.275935674563519, + "learning_rate": 6.573407738519531e-08, + "loss": 0.7332675457000732, + "step": 7724 + }, + { + "epoch": 1.7799539170506913, + "grad_norm": 1.0778234517601935, + "learning_rate": 6.559829510598892e-08, + "loss": 0.7439071536064148, + "step": 7725 + }, + { + "epoch": 1.780184331797235, + "grad_norm": 1.3635129938987167, + "learning_rate": 6.546264845395299e-08, + "loss": 0.7104752063751221, + "step": 7726 + }, + { + "epoch": 1.7804147465437787, + "grad_norm": 1.2639306988819587, + "learning_rate": 6.53271374487765e-08, + "loss": 0.7792220115661621, + "step": 7727 + }, + { + "epoch": 1.7806451612903227, + "grad_norm": 1.0938522733418012, + "learning_rate": 6.519176211012867e-08, + "loss": 0.6379693746566772, + "step": 7728 + }, + { + "epoch": 1.7808755760368664, + "grad_norm": 1.3289044633653213, + "learning_rate": 6.505652245765881e-08, + "loss": 0.7737444639205933, + "step": 7729 + }, + { + "epoch": 1.7811059907834101, + "grad_norm": 1.1550683939038542, + "learning_rate": 6.49214185109973e-08, + "loss": 0.7681130170822144, + "step": 7730 + }, + { + "epoch": 1.7813364055299539, + "grad_norm": 1.4083081227680676, + "learning_rate": 6.478645028975372e-08, + "loss": 0.8718420267105103, + "step": 7731 + }, + { + "epoch": 1.7815668202764976, + "grad_norm": 1.1823677205039174, + "learning_rate": 6.465161781351914e-08, + "loss": 0.7557366490364075, + "step": 7732 + }, + { + "epoch": 1.7817972350230415, + "grad_norm": 1.1999869902911706, + "learning_rate": 6.45169211018638e-08, + "loss": 0.6794936656951904, + "step": 7733 + }, + { + "epoch": 1.7820276497695853, + "grad_norm": 1.2764239528790797, + "learning_rate": 6.438236017433895e-08, + "loss": 0.8390437364578247, + "step": 7734 + }, + { + "epoch": 1.782258064516129, + "grad_norm": 1.134383511808464, + "learning_rate": 6.424793505047599e-08, + "loss": 0.8024254441261292, + "step": 7735 + }, + { + "epoch": 1.782488479262673, + "grad_norm": 0.8536836629483899, + "learning_rate": 6.411364574978651e-08, + "loss": 0.6382162570953369, + "step": 7736 + }, + { + "epoch": 1.7827188940092165, + "grad_norm": 1.1757601346145792, + "learning_rate": 6.397949229176225e-08, + "loss": 0.6832011938095093, + "step": 7737 + }, + { + "epoch": 1.7829493087557604, + "grad_norm": 1.653357486541517, + "learning_rate": 6.384547469587564e-08, + "loss": 0.9003958702087402, + "step": 7738 + }, + { + "epoch": 1.7831797235023041, + "grad_norm": 1.1523951728047304, + "learning_rate": 6.371159298157913e-08, + "loss": 0.7030328512191772, + "step": 7739 + }, + { + "epoch": 1.7834101382488479, + "grad_norm": 1.2390057793357907, + "learning_rate": 6.357784716830528e-08, + "loss": 0.8153259754180908, + "step": 7740 + }, + { + "epoch": 1.7836405529953918, + "grad_norm": 1.4244568607420958, + "learning_rate": 6.344423727546744e-08, + "loss": 0.8229082226753235, + "step": 7741 + }, + { + "epoch": 1.7838709677419353, + "grad_norm": 1.3055755817113595, + "learning_rate": 6.331076332245866e-08, + "loss": 0.7306294441223145, + "step": 7742 + }, + { + "epoch": 1.7841013824884793, + "grad_norm": 1.1085692686400792, + "learning_rate": 6.317742532865233e-08, + "loss": 0.7613078951835632, + "step": 7743 + }, + { + "epoch": 1.784331797235023, + "grad_norm": 1.4694829399841158, + "learning_rate": 6.304422331340275e-08, + "loss": 0.9164611101150513, + "step": 7744 + }, + { + "epoch": 1.7845622119815667, + "grad_norm": 1.4076564642652605, + "learning_rate": 6.29111572960439e-08, + "loss": 0.8770956993103027, + "step": 7745 + }, + { + "epoch": 1.7847926267281107, + "grad_norm": 1.3274833988945276, + "learning_rate": 6.277822729588989e-08, + "loss": 0.7482821941375732, + "step": 7746 + }, + { + "epoch": 1.7850230414746544, + "grad_norm": 1.3149565308569835, + "learning_rate": 6.264543333223549e-08, + "loss": 0.7850298881530762, + "step": 7747 + }, + { + "epoch": 1.7852534562211981, + "grad_norm": 1.0844733877563915, + "learning_rate": 6.251277542435552e-08, + "loss": 0.5781385898590088, + "step": 7748 + }, + { + "epoch": 1.785483870967742, + "grad_norm": 1.2619844590894689, + "learning_rate": 6.238025359150501e-08, + "loss": 0.8217513561248779, + "step": 7749 + }, + { + "epoch": 1.7857142857142856, + "grad_norm": 1.2512912228822737, + "learning_rate": 6.224786785291969e-08, + "loss": 0.8500482439994812, + "step": 7750 + }, + { + "epoch": 1.7859447004608295, + "grad_norm": 1.458025138254964, + "learning_rate": 6.211561822781474e-08, + "loss": 0.8146470785140991, + "step": 7751 + }, + { + "epoch": 1.7861751152073733, + "grad_norm": 1.248354775738917, + "learning_rate": 6.198350473538616e-08, + "loss": 0.7351702451705933, + "step": 7752 + }, + { + "epoch": 1.786405529953917, + "grad_norm": 1.2620887228989164, + "learning_rate": 6.185152739481026e-08, + "loss": 0.7993056774139404, + "step": 7753 + }, + { + "epoch": 1.786635944700461, + "grad_norm": 1.4031526672609798, + "learning_rate": 6.171968622524315e-08, + "loss": 0.8570160865783691, + "step": 7754 + }, + { + "epoch": 1.7868663594470044, + "grad_norm": 1.1948359150749444, + "learning_rate": 6.158798124582143e-08, + "loss": 0.6200212836265564, + "step": 7755 + }, + { + "epoch": 1.7870967741935484, + "grad_norm": 1.2592084852014216, + "learning_rate": 6.145641247566202e-08, + "loss": 0.8196465373039246, + "step": 7756 + }, + { + "epoch": 1.7873271889400921, + "grad_norm": 0.9917037331823602, + "learning_rate": 6.132497993386165e-08, + "loss": 0.7038032412528992, + "step": 7757 + }, + { + "epoch": 1.7875576036866359, + "grad_norm": 1.2428262727857045, + "learning_rate": 6.119368363949806e-08, + "loss": 0.7222307324409485, + "step": 7758 + }, + { + "epoch": 1.7877880184331798, + "grad_norm": 1.2991738769607613, + "learning_rate": 6.106252361162834e-08, + "loss": 0.8457501530647278, + "step": 7759 + }, + { + "epoch": 1.7880184331797235, + "grad_norm": 1.5487287329891364, + "learning_rate": 6.093149986929025e-08, + "loss": 0.7543236017227173, + "step": 7760 + }, + { + "epoch": 1.7882488479262673, + "grad_norm": 1.294614145507911, + "learning_rate": 6.080061243150191e-08, + "loss": 0.5728875398635864, + "step": 7761 + }, + { + "epoch": 1.7884792626728112, + "grad_norm": 1.3902935059609232, + "learning_rate": 6.066986131726138e-08, + "loss": 0.6864895820617676, + "step": 7762 + }, + { + "epoch": 1.7887096774193547, + "grad_norm": 1.1640824452811938, + "learning_rate": 6.053924654554687e-08, + "loss": 0.8580472469329834, + "step": 7763 + }, + { + "epoch": 1.7889400921658987, + "grad_norm": 1.358237067906671, + "learning_rate": 6.040876813531714e-08, + "loss": 0.7670924663543701, + "step": 7764 + }, + { + "epoch": 1.7891705069124424, + "grad_norm": 1.2558108988688055, + "learning_rate": 6.027842610551082e-08, + "loss": 0.6558287739753723, + "step": 7765 + }, + { + "epoch": 1.7894009216589861, + "grad_norm": 1.2875975662335684, + "learning_rate": 6.014822047504697e-08, + "loss": 0.8186839818954468, + "step": 7766 + }, + { + "epoch": 1.78963133640553, + "grad_norm": 1.2720662525098447, + "learning_rate": 6.001815126282462e-08, + "loss": 0.7862167358398438, + "step": 7767 + }, + { + "epoch": 1.7898617511520736, + "grad_norm": 1.1119662378593531, + "learning_rate": 5.98882184877233e-08, + "loss": 0.8594048023223877, + "step": 7768 + }, + { + "epoch": 1.7900921658986175, + "grad_norm": 1.3277176558233812, + "learning_rate": 5.975842216860238e-08, + "loss": 0.804019033908844, + "step": 7769 + }, + { + "epoch": 1.7903225806451613, + "grad_norm": 1.1244948347974122, + "learning_rate": 5.962876232430192e-08, + "loss": 0.7404098510742188, + "step": 7770 + }, + { + "epoch": 1.790552995391705, + "grad_norm": 1.3595838567399194, + "learning_rate": 5.949923897364173e-08, + "loss": 0.7726024389266968, + "step": 7771 + }, + { + "epoch": 1.790783410138249, + "grad_norm": 1.5060671287860161, + "learning_rate": 5.936985213542178e-08, + "loss": 0.8225048184394836, + "step": 7772 + }, + { + "epoch": 1.7910138248847927, + "grad_norm": 1.4217281972238225, + "learning_rate": 5.924060182842272e-08, + "loss": 0.8485706448554993, + "step": 7773 + }, + { + "epoch": 1.7912442396313364, + "grad_norm": 1.189460803975086, + "learning_rate": 5.9111488071404867e-08, + "loss": 0.6580322980880737, + "step": 7774 + }, + { + "epoch": 1.7914746543778803, + "grad_norm": 1.1783786831629417, + "learning_rate": 5.898251088310879e-08, + "loss": 0.7486656904220581, + "step": 7775 + }, + { + "epoch": 1.7917050691244238, + "grad_norm": 1.5948072851449393, + "learning_rate": 5.885367028225574e-08, + "loss": 0.9068334102630615, + "step": 7776 + }, + { + "epoch": 1.7919354838709678, + "grad_norm": 1.1107745619546634, + "learning_rate": 5.872496628754653e-08, + "loss": 0.7091449499130249, + "step": 7777 + }, + { + "epoch": 1.7921658986175115, + "grad_norm": 1.3473785107334575, + "learning_rate": 5.8596398917662107e-08, + "loss": 0.7248316407203674, + "step": 7778 + }, + { + "epoch": 1.7923963133640552, + "grad_norm": 1.2057819957098448, + "learning_rate": 5.8467968191264315e-08, + "loss": 0.7740335464477539, + "step": 7779 + }, + { + "epoch": 1.7926267281105992, + "grad_norm": 1.267573304949112, + "learning_rate": 5.833967412699448e-08, + "loss": 0.7810479402542114, + "step": 7780 + }, + { + "epoch": 1.7928571428571427, + "grad_norm": 1.004282792701847, + "learning_rate": 5.821151674347435e-08, + "loss": 0.7072443962097168, + "step": 7781 + }, + { + "epoch": 1.7930875576036867, + "grad_norm": 1.1829190770666373, + "learning_rate": 5.808349605930585e-08, + "loss": 0.8218289613723755, + "step": 7782 + }, + { + "epoch": 1.7933179723502304, + "grad_norm": 1.393265214120735, + "learning_rate": 5.795561209307087e-08, + "loss": 0.8928433656692505, + "step": 7783 + }, + { + "epoch": 1.793548387096774, + "grad_norm": 1.455083354855402, + "learning_rate": 5.7827864863331796e-08, + "loss": 0.765188455581665, + "step": 7784 + }, + { + "epoch": 1.793778801843318, + "grad_norm": 1.0118039506572176, + "learning_rate": 5.7700254388630795e-08, + "loss": 0.7149494886398315, + "step": 7785 + }, + { + "epoch": 1.7940092165898618, + "grad_norm": 1.6638445812749356, + "learning_rate": 5.75727806874905e-08, + "loss": 0.8144164085388184, + "step": 7786 + }, + { + "epoch": 1.7942396313364055, + "grad_norm": 1.1101501647130416, + "learning_rate": 5.744544377841354e-08, + "loss": 0.7549517154693604, + "step": 7787 + }, + { + "epoch": 1.7944700460829495, + "grad_norm": 1.1805002478026116, + "learning_rate": 5.731824367988258e-08, + "loss": 0.7820652723312378, + "step": 7788 + }, + { + "epoch": 1.794700460829493, + "grad_norm": 1.2187125462499315, + "learning_rate": 5.719118041036042e-08, + "loss": 0.8253183364868164, + "step": 7789 + }, + { + "epoch": 1.794930875576037, + "grad_norm": 1.3044045265020685, + "learning_rate": 5.70642539882904e-08, + "loss": 0.8177148103713989, + "step": 7790 + }, + { + "epoch": 1.7951612903225806, + "grad_norm": 1.2453642288062106, + "learning_rate": 5.69374644320958e-08, + "loss": 0.722260594367981, + "step": 7791 + }, + { + "epoch": 1.7953917050691244, + "grad_norm": 1.3322495120015716, + "learning_rate": 5.6810811760179434e-08, + "loss": 0.8128643035888672, + "step": 7792 + }, + { + "epoch": 1.7956221198156683, + "grad_norm": 1.2461980802133077, + "learning_rate": 5.6684295990925394e-08, + "loss": 0.8267233371734619, + "step": 7793 + }, + { + "epoch": 1.7958525345622118, + "grad_norm": 1.1467604985666775, + "learning_rate": 5.655791714269697e-08, + "loss": 0.8385082483291626, + "step": 7794 + }, + { + "epoch": 1.7960829493087558, + "grad_norm": 1.2035138425735283, + "learning_rate": 5.643167523383785e-08, + "loss": 0.8705167770385742, + "step": 7795 + }, + { + "epoch": 1.7963133640552995, + "grad_norm": 1.263928906996047, + "learning_rate": 5.6305570282672024e-08, + "loss": 0.7628496885299683, + "step": 7796 + }, + { + "epoch": 1.7965437788018432, + "grad_norm": 1.2993701262886028, + "learning_rate": 5.61796023075034e-08, + "loss": 0.8246536254882812, + "step": 7797 + }, + { + "epoch": 1.7967741935483872, + "grad_norm": 1.2920173759654132, + "learning_rate": 5.6053771326615815e-08, + "loss": 0.7103257179260254, + "step": 7798 + }, + { + "epoch": 1.797004608294931, + "grad_norm": 1.318695367926756, + "learning_rate": 5.5928077358273984e-08, + "loss": 0.614989161491394, + "step": 7799 + }, + { + "epoch": 1.7972350230414746, + "grad_norm": 1.6404840895868877, + "learning_rate": 5.5802520420721866e-08, + "loss": 0.9876137971878052, + "step": 7800 + }, + { + "epoch": 1.7974654377880186, + "grad_norm": 1.2467848598458215, + "learning_rate": 5.5677100532183775e-08, + "loss": 0.7023773193359375, + "step": 7801 + }, + { + "epoch": 1.797695852534562, + "grad_norm": 1.1844278512776936, + "learning_rate": 5.555181771086459e-08, + "loss": 0.6680843830108643, + "step": 7802 + }, + { + "epoch": 1.797926267281106, + "grad_norm": 1.0826933828880965, + "learning_rate": 5.542667197494877e-08, + "loss": 0.7221776843070984, + "step": 7803 + }, + { + "epoch": 1.7981566820276498, + "grad_norm": 1.0071738664190577, + "learning_rate": 5.5301663342601e-08, + "loss": 0.7473262548446655, + "step": 7804 + }, + { + "epoch": 1.7983870967741935, + "grad_norm": 1.2499370802188474, + "learning_rate": 5.517679183196622e-08, + "loss": 0.8690468072891235, + "step": 7805 + }, + { + "epoch": 1.7986175115207375, + "grad_norm": 1.0933317196070476, + "learning_rate": 5.505205746116937e-08, + "loss": 0.8353981971740723, + "step": 7806 + }, + { + "epoch": 1.798847926267281, + "grad_norm": 1.177111485427447, + "learning_rate": 5.4927460248315405e-08, + "loss": 0.7691711187362671, + "step": 7807 + }, + { + "epoch": 1.799078341013825, + "grad_norm": 1.034283547212154, + "learning_rate": 5.480300021148953e-08, + "loss": 0.6732556819915771, + "step": 7808 + }, + { + "epoch": 1.7993087557603686, + "grad_norm": 1.1520777556370354, + "learning_rate": 5.467867736875664e-08, + "loss": 0.7273567914962769, + "step": 7809 + }, + { + "epoch": 1.7995391705069124, + "grad_norm": 1.201774068977123, + "learning_rate": 5.455449173816251e-08, + "loss": 0.7951864004135132, + "step": 7810 + }, + { + "epoch": 1.7997695852534563, + "grad_norm": 1.4133736179333027, + "learning_rate": 5.4430443337732276e-08, + "loss": 0.7073169350624084, + "step": 7811 + }, + { + "epoch": 1.8, + "grad_norm": 1.0101637387022209, + "learning_rate": 5.430653218547132e-08, + "loss": 0.682072639465332, + "step": 7812 + }, + { + "epoch": 1.8002304147465438, + "grad_norm": 0.9949453624163476, + "learning_rate": 5.4182758299365364e-08, + "loss": 0.7512049674987793, + "step": 7813 + }, + { + "epoch": 1.8004608294930877, + "grad_norm": 1.2218170088515747, + "learning_rate": 5.405912169738003e-08, + "loss": 0.7470980882644653, + "step": 7814 + }, + { + "epoch": 1.8006912442396312, + "grad_norm": 1.1792295753175266, + "learning_rate": 5.3935622397460634e-08, + "loss": 0.792417049407959, + "step": 7815 + }, + { + "epoch": 1.8009216589861752, + "grad_norm": 1.4508025797803343, + "learning_rate": 5.3812260417533505e-08, + "loss": 0.8600934743881226, + "step": 7816 + }, + { + "epoch": 1.801152073732719, + "grad_norm": 1.2411035382017865, + "learning_rate": 5.36890357755041e-08, + "loss": 0.6931058168411255, + "step": 7817 + }, + { + "epoch": 1.8013824884792626, + "grad_norm": 1.1047587345616248, + "learning_rate": 5.3565948489258216e-08, + "loss": 0.7382420897483826, + "step": 7818 + }, + { + "epoch": 1.8016129032258066, + "grad_norm": 1.5724454012098283, + "learning_rate": 5.344299857666224e-08, + "loss": 0.6811971068382263, + "step": 7819 + }, + { + "epoch": 1.80184331797235, + "grad_norm": 1.3142032735909368, + "learning_rate": 5.332018605556188e-08, + "loss": 0.8551425933837891, + "step": 7820 + }, + { + "epoch": 1.802073732718894, + "grad_norm": 1.298840655183536, + "learning_rate": 5.319751094378322e-08, + "loss": 0.7907109260559082, + "step": 7821 + }, + { + "epoch": 1.8023041474654378, + "grad_norm": 1.462185741805911, + "learning_rate": 5.3074973259132464e-08, + "loss": 0.6995817422866821, + "step": 7822 + }, + { + "epoch": 1.8025345622119815, + "grad_norm": 1.2098230160416081, + "learning_rate": 5.295257301939582e-08, + "loss": 0.9157558679580688, + "step": 7823 + }, + { + "epoch": 1.8027649769585254, + "grad_norm": 1.3503599705143554, + "learning_rate": 5.283031024233942e-08, + "loss": 0.8181086778640747, + "step": 7824 + }, + { + "epoch": 1.8029953917050692, + "grad_norm": 1.061101797749781, + "learning_rate": 5.270818494570961e-08, + "loss": 0.7170151472091675, + "step": 7825 + }, + { + "epoch": 1.803225806451613, + "grad_norm": 1.3415396727620215, + "learning_rate": 5.258619714723278e-08, + "loss": 0.7548947334289551, + "step": 7826 + }, + { + "epoch": 1.8034562211981566, + "grad_norm": 1.309211881034751, + "learning_rate": 5.2464346864615204e-08, + "loss": 0.7482869625091553, + "step": 7827 + }, + { + "epoch": 1.8036866359447004, + "grad_norm": 1.2839346666214595, + "learning_rate": 5.234263411554329e-08, + "loss": 0.6984925270080566, + "step": 7828 + }, + { + "epoch": 1.8039170506912443, + "grad_norm": 1.4972180990250632, + "learning_rate": 5.222105891768347e-08, + "loss": 0.910038948059082, + "step": 7829 + }, + { + "epoch": 1.804147465437788, + "grad_norm": 1.4071380742837927, + "learning_rate": 5.2099621288682174e-08, + "loss": 0.8936711549758911, + "step": 7830 + }, + { + "epoch": 1.8043778801843318, + "grad_norm": 1.2841490446822148, + "learning_rate": 5.197832124616608e-08, + "loss": 0.7376326322555542, + "step": 7831 + }, + { + "epoch": 1.8046082949308757, + "grad_norm": 1.6922079171273652, + "learning_rate": 5.1857158807741554e-08, + "loss": 0.8373547792434692, + "step": 7832 + }, + { + "epoch": 1.8048387096774192, + "grad_norm": 1.1938115721747944, + "learning_rate": 5.17361339909953e-08, + "loss": 0.7018512487411499, + "step": 7833 + }, + { + "epoch": 1.8050691244239632, + "grad_norm": 1.0051532014919082, + "learning_rate": 5.161524681349394e-08, + "loss": 0.6111225485801697, + "step": 7834 + }, + { + "epoch": 1.805299539170507, + "grad_norm": 1.1643316930206133, + "learning_rate": 5.149449729278388e-08, + "loss": 0.6961934566497803, + "step": 7835 + }, + { + "epoch": 1.8055299539170506, + "grad_norm": 1.1662486414151942, + "learning_rate": 5.137388544639198e-08, + "loss": 0.677324116230011, + "step": 7836 + }, + { + "epoch": 1.8057603686635946, + "grad_norm": 1.1241341054985654, + "learning_rate": 5.125341129182481e-08, + "loss": 0.7124897837638855, + "step": 7837 + }, + { + "epoch": 1.8059907834101383, + "grad_norm": 1.1858041195501718, + "learning_rate": 5.1133074846568815e-08, + "loss": 0.7474578619003296, + "step": 7838 + }, + { + "epoch": 1.806221198156682, + "grad_norm": 1.0832413753523613, + "learning_rate": 5.101287612809102e-08, + "loss": 0.699856162071228, + "step": 7839 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 1.2510053638983376, + "learning_rate": 5.089281515383803e-08, + "loss": 0.6548302173614502, + "step": 7840 + }, + { + "epoch": 1.8066820276497695, + "grad_norm": 1.4067864996197734, + "learning_rate": 5.077289194123624e-08, + "loss": 0.8376108407974243, + "step": 7841 + }, + { + "epoch": 1.8069124423963134, + "grad_norm": 1.4168917230935398, + "learning_rate": 5.065310650769283e-08, + "loss": 0.741931140422821, + "step": 7842 + }, + { + "epoch": 1.8071428571428572, + "grad_norm": 1.0130617353418785, + "learning_rate": 5.053345887059413e-08, + "loss": 0.7253270149230957, + "step": 7843 + }, + { + "epoch": 1.807373271889401, + "grad_norm": 1.452385981822963, + "learning_rate": 5.0413949047306894e-08, + "loss": 0.8248677849769592, + "step": 7844 + }, + { + "epoch": 1.8076036866359448, + "grad_norm": 1.2182337218961132, + "learning_rate": 5.0294577055177925e-08, + "loss": 0.7571253776550293, + "step": 7845 + }, + { + "epoch": 1.8078341013824883, + "grad_norm": 1.3374870147899762, + "learning_rate": 5.017534291153391e-08, + "loss": 0.8256274461746216, + "step": 7846 + }, + { + "epoch": 1.8080645161290323, + "grad_norm": 1.0202351482491858, + "learning_rate": 5.0056246633681356e-08, + "loss": 0.8609060049057007, + "step": 7847 + }, + { + "epoch": 1.808294930875576, + "grad_norm": 1.0533455142790622, + "learning_rate": 4.9937288238907196e-08, + "loss": 0.7005047798156738, + "step": 7848 + }, + { + "epoch": 1.8085253456221198, + "grad_norm": 1.508707208071474, + "learning_rate": 4.981846774447784e-08, + "loss": 0.8640049695968628, + "step": 7849 + }, + { + "epoch": 1.8087557603686637, + "grad_norm": 1.2891784390675838, + "learning_rate": 4.969978516763984e-08, + "loss": 0.8385862112045288, + "step": 7850 + }, + { + "epoch": 1.8089861751152074, + "grad_norm": 1.4569260681358536, + "learning_rate": 4.9581240525620184e-08, + "loss": 0.845676064491272, + "step": 7851 + }, + { + "epoch": 1.8092165898617512, + "grad_norm": 1.1553749249891685, + "learning_rate": 4.9462833835625327e-08, + "loss": 0.7638444304466248, + "step": 7852 + }, + { + "epoch": 1.8094470046082949, + "grad_norm": 1.3732641737808478, + "learning_rate": 4.934456511484153e-08, + "loss": 0.813924252986908, + "step": 7853 + }, + { + "epoch": 1.8096774193548386, + "grad_norm": 1.1884602060780909, + "learning_rate": 4.9226434380435835e-08, + "loss": 0.8773660659790039, + "step": 7854 + }, + { + "epoch": 1.8099078341013826, + "grad_norm": 1.1706588526128812, + "learning_rate": 4.91084416495543e-08, + "loss": 0.6703497171401978, + "step": 7855 + }, + { + "epoch": 1.8101382488479263, + "grad_norm": 1.149648913375334, + "learning_rate": 4.8990586939323896e-08, + "loss": 0.760738730430603, + "step": 7856 + }, + { + "epoch": 1.81036866359447, + "grad_norm": 1.0947557024146697, + "learning_rate": 4.887287026685072e-08, + "loss": 0.6466494798660278, + "step": 7857 + }, + { + "epoch": 1.810599078341014, + "grad_norm": 1.2590537115606641, + "learning_rate": 4.8755291649221206e-08, + "loss": 0.7416050434112549, + "step": 7858 + }, + { + "epoch": 1.8108294930875575, + "grad_norm": 1.200153480710325, + "learning_rate": 4.863785110350205e-08, + "loss": 0.6841444373130798, + "step": 7859 + }, + { + "epoch": 1.8110599078341014, + "grad_norm": 0.9841555488440165, + "learning_rate": 4.8520548646739265e-08, + "loss": 0.7818359732627869, + "step": 7860 + }, + { + "epoch": 1.8112903225806452, + "grad_norm": 1.287409595467329, + "learning_rate": 4.840338429595914e-08, + "loss": 0.8802354335784912, + "step": 7861 + }, + { + "epoch": 1.8115207373271889, + "grad_norm": 1.3745378964034318, + "learning_rate": 4.8286358068168055e-08, + "loss": 0.8105144500732422, + "step": 7862 + }, + { + "epoch": 1.8117511520737328, + "grad_norm": 1.2395900259386188, + "learning_rate": 4.816946998035232e-08, + "loss": 0.6613968014717102, + "step": 7863 + }, + { + "epoch": 1.8119815668202763, + "grad_norm": 1.191330407638407, + "learning_rate": 4.80527200494778e-08, + "loss": 0.7945050001144409, + "step": 7864 + }, + { + "epoch": 1.8122119815668203, + "grad_norm": 1.1914418584370128, + "learning_rate": 4.793610829249084e-08, + "loss": 0.7268643379211426, + "step": 7865 + }, + { + "epoch": 1.812442396313364, + "grad_norm": 1.0567405581917244, + "learning_rate": 4.781963472631745e-08, + "loss": 0.7416445016860962, + "step": 7866 + }, + { + "epoch": 1.8126728110599077, + "grad_norm": 1.246941647908604, + "learning_rate": 4.770329936786355e-08, + "loss": 0.8536533117294312, + "step": 7867 + }, + { + "epoch": 1.8129032258064517, + "grad_norm": 1.2429467538833636, + "learning_rate": 4.7587102234015074e-08, + "loss": 0.8258422017097473, + "step": 7868 + }, + { + "epoch": 1.8131336405529954, + "grad_norm": 1.189598318299626, + "learning_rate": 4.7471043341637874e-08, + "loss": 0.6976941823959351, + "step": 7869 + }, + { + "epoch": 1.8133640552995391, + "grad_norm": 1.222534835599988, + "learning_rate": 4.735512270757758e-08, + "loss": 0.8213087916374207, + "step": 7870 + }, + { + "epoch": 1.813594470046083, + "grad_norm": 1.1756801964544004, + "learning_rate": 4.723934034866028e-08, + "loss": 0.8012057542800903, + "step": 7871 + }, + { + "epoch": 1.8138248847926266, + "grad_norm": 1.0419940327131916, + "learning_rate": 4.7123696281691436e-08, + "loss": 0.7802866697311401, + "step": 7872 + }, + { + "epoch": 1.8140552995391706, + "grad_norm": 1.1630887083640626, + "learning_rate": 4.700819052345639e-08, + "loss": 0.8024426698684692, + "step": 7873 + }, + { + "epoch": 1.8142857142857143, + "grad_norm": 0.9709635675133196, + "learning_rate": 4.689282309072107e-08, + "loss": 0.6383114457130432, + "step": 7874 + }, + { + "epoch": 1.814516129032258, + "grad_norm": 1.2768186922012608, + "learning_rate": 4.677759400023085e-08, + "loss": 0.7226015329360962, + "step": 7875 + }, + { + "epoch": 1.814746543778802, + "grad_norm": 1.0424513670531574, + "learning_rate": 4.6662503268710684e-08, + "loss": 0.8390164971351624, + "step": 7876 + }, + { + "epoch": 1.8149769585253455, + "grad_norm": 1.0443665370850939, + "learning_rate": 4.654755091286633e-08, + "loss": 0.8120134472846985, + "step": 7877 + }, + { + "epoch": 1.8152073732718894, + "grad_norm": 1.305111160234168, + "learning_rate": 4.6432736949382656e-08, + "loss": 0.6554470062255859, + "step": 7878 + }, + { + "epoch": 1.8154377880184331, + "grad_norm": 1.1780234915455678, + "learning_rate": 4.631806139492478e-08, + "loss": 0.7268370985984802, + "step": 7879 + }, + { + "epoch": 1.8156682027649769, + "grad_norm": 1.4051894182356444, + "learning_rate": 4.620352426613794e-08, + "loss": 0.7991992831230164, + "step": 7880 + }, + { + "epoch": 1.8158986175115208, + "grad_norm": 1.1268859101296151, + "learning_rate": 4.608912557964673e-08, + "loss": 0.7695842981338501, + "step": 7881 + }, + { + "epoch": 1.8161290322580645, + "grad_norm": 1.9896156470888766, + "learning_rate": 4.59748653520563e-08, + "loss": 0.8633268475532532, + "step": 7882 + }, + { + "epoch": 1.8163594470046083, + "grad_norm": 1.1364981478494263, + "learning_rate": 4.586074359995118e-08, + "loss": 0.7018440961837769, + "step": 7883 + }, + { + "epoch": 1.8165898617511522, + "grad_norm": 1.1022691462384118, + "learning_rate": 4.574676033989589e-08, + "loss": 0.7304259538650513, + "step": 7884 + }, + { + "epoch": 1.8168202764976957, + "grad_norm": 1.2520833867580832, + "learning_rate": 4.563291558843518e-08, + "loss": 0.7408654689788818, + "step": 7885 + }, + { + "epoch": 1.8170506912442397, + "grad_norm": 0.8583590816187824, + "learning_rate": 4.55192093620933e-08, + "loss": 0.6378169059753418, + "step": 7886 + }, + { + "epoch": 1.8172811059907834, + "grad_norm": 1.2929203847720665, + "learning_rate": 4.540564167737471e-08, + "loss": 0.8854331374168396, + "step": 7887 + }, + { + "epoch": 1.8175115207373271, + "grad_norm": 1.3325768500609418, + "learning_rate": 4.529221255076343e-08, + "loss": 0.6948372721672058, + "step": 7888 + }, + { + "epoch": 1.817741935483871, + "grad_norm": 1.0169430034347062, + "learning_rate": 4.517892199872364e-08, + "loss": 0.8199236392974854, + "step": 7889 + }, + { + "epoch": 1.8179723502304146, + "grad_norm": 1.2358305635738154, + "learning_rate": 4.506577003769918e-08, + "loss": 0.6967995762825012, + "step": 7890 + }, + { + "epoch": 1.8182027649769585, + "grad_norm": 1.5521492896589208, + "learning_rate": 4.495275668411425e-08, + "loss": 0.848435640335083, + "step": 7891 + }, + { + "epoch": 1.8184331797235023, + "grad_norm": 1.0482582355280439, + "learning_rate": 4.483988195437227e-08, + "loss": 0.7085731029510498, + "step": 7892 + }, + { + "epoch": 1.818663594470046, + "grad_norm": 1.540410469929121, + "learning_rate": 4.472714586485682e-08, + "loss": 0.7400653958320618, + "step": 7893 + }, + { + "epoch": 1.81889400921659, + "grad_norm": 1.3011192141788026, + "learning_rate": 4.461454843193169e-08, + "loss": 0.7636830806732178, + "step": 7894 + }, + { + "epoch": 1.8191244239631337, + "grad_norm": 0.9509851989309867, + "learning_rate": 4.4502089671940135e-08, + "loss": 0.6902754306793213, + "step": 7895 + }, + { + "epoch": 1.8193548387096774, + "grad_norm": 1.4497717090666749, + "learning_rate": 4.438976960120522e-08, + "loss": 0.8397349119186401, + "step": 7896 + }, + { + "epoch": 1.8195852534562214, + "grad_norm": 1.1317263019718502, + "learning_rate": 4.4277588236030226e-08, + "loss": 0.7505836486816406, + "step": 7897 + }, + { + "epoch": 1.8198156682027649, + "grad_norm": 1.4213425196027163, + "learning_rate": 4.416554559269814e-08, + "loss": 0.9310287833213806, + "step": 7898 + }, + { + "epoch": 1.8200460829493088, + "grad_norm": 1.0910777164101302, + "learning_rate": 4.405364168747161e-08, + "loss": 0.724685549736023, + "step": 7899 + }, + { + "epoch": 1.8202764976958525, + "grad_norm": 0.99356469827684, + "learning_rate": 4.394187653659365e-08, + "loss": 0.6554735898971558, + "step": 7900 + }, + { + "epoch": 1.8205069124423963, + "grad_norm": 1.5629584518265682, + "learning_rate": 4.383025015628661e-08, + "loss": 0.7494597434997559, + "step": 7901 + }, + { + "epoch": 1.8207373271889402, + "grad_norm": 1.3596683636243805, + "learning_rate": 4.371876256275287e-08, + "loss": 0.817386269569397, + "step": 7902 + }, + { + "epoch": 1.8209677419354837, + "grad_norm": 1.2645292088995888, + "learning_rate": 4.3607413772174806e-08, + "loss": 0.8668064475059509, + "step": 7903 + }, + { + "epoch": 1.8211981566820277, + "grad_norm": 1.2001673372629817, + "learning_rate": 4.34962038007145e-08, + "loss": 0.7400633096694946, + "step": 7904 + }, + { + "epoch": 1.8214285714285714, + "grad_norm": 1.018878326746976, + "learning_rate": 4.3385132664514046e-08, + "loss": 0.7273544073104858, + "step": 7905 + }, + { + "epoch": 1.8216589861751151, + "grad_norm": 1.149057253315942, + "learning_rate": 4.3274200379695315e-08, + "loss": 0.7133193016052246, + "step": 7906 + }, + { + "epoch": 1.821889400921659, + "grad_norm": 1.2433089389356335, + "learning_rate": 4.316340696235976e-08, + "loss": 0.9390736222267151, + "step": 7907 + }, + { + "epoch": 1.8221198156682028, + "grad_norm": 1.1318410882734156, + "learning_rate": 4.3052752428588966e-08, + "loss": 0.7065613269805908, + "step": 7908 + }, + { + "epoch": 1.8223502304147465, + "grad_norm": 1.2803518971044316, + "learning_rate": 4.294223679444442e-08, + "loss": 0.813999354839325, + "step": 7909 + }, + { + "epoch": 1.8225806451612905, + "grad_norm": 1.616827704611462, + "learning_rate": 4.2831860075966955e-08, + "loss": 0.9234256148338318, + "step": 7910 + }, + { + "epoch": 1.822811059907834, + "grad_norm": 1.4124883659201861, + "learning_rate": 4.272162228917808e-08, + "loss": 0.8630207777023315, + "step": 7911 + }, + { + "epoch": 1.823041474654378, + "grad_norm": 1.382424983437882, + "learning_rate": 4.2611523450078456e-08, + "loss": 0.7827208042144775, + "step": 7912 + }, + { + "epoch": 1.8232718894009217, + "grad_norm": 1.3479238410287269, + "learning_rate": 4.250156357464873e-08, + "loss": 0.884107232093811, + "step": 7913 + }, + { + "epoch": 1.8235023041474654, + "grad_norm": 1.3064700630797408, + "learning_rate": 4.2391742678849484e-08, + "loss": 0.8615697026252747, + "step": 7914 + }, + { + "epoch": 1.8237327188940093, + "grad_norm": 1.4410161390206035, + "learning_rate": 4.2282060778621174e-08, + "loss": 0.8001279830932617, + "step": 7915 + }, + { + "epoch": 1.8239631336405528, + "grad_norm": 1.1016373373524035, + "learning_rate": 4.217251788988374e-08, + "loss": 0.7183214426040649, + "step": 7916 + }, + { + "epoch": 1.8241935483870968, + "grad_norm": 1.2680472029966925, + "learning_rate": 4.206311402853746e-08, + "loss": 0.7751119136810303, + "step": 7917 + }, + { + "epoch": 1.8244239631336405, + "grad_norm": 1.287058032235602, + "learning_rate": 4.195384921046208e-08, + "loss": 0.8073426485061646, + "step": 7918 + }, + { + "epoch": 1.8246543778801843, + "grad_norm": 1.053407718143569, + "learning_rate": 4.1844723451517017e-08, + "loss": 0.7918455600738525, + "step": 7919 + }, + { + "epoch": 1.8248847926267282, + "grad_norm": 1.1789390806182918, + "learning_rate": 4.1735736767542054e-08, + "loss": 0.8070017099380493, + "step": 7920 + }, + { + "epoch": 1.825115207373272, + "grad_norm": 1.1456133687492283, + "learning_rate": 4.1626889174356306e-08, + "loss": 0.7202159762382507, + "step": 7921 + }, + { + "epoch": 1.8253456221198157, + "grad_norm": 1.304718816677761, + "learning_rate": 4.15181806877587e-08, + "loss": 0.8412283658981323, + "step": 7922 + }, + { + "epoch": 1.8255760368663596, + "grad_norm": 1.079962569087528, + "learning_rate": 4.140961132352849e-08, + "loss": 0.6230478286743164, + "step": 7923 + }, + { + "epoch": 1.8258064516129031, + "grad_norm": 1.184647211526077, + "learning_rate": 4.1301181097424196e-08, + "loss": 0.6475099921226501, + "step": 7924 + }, + { + "epoch": 1.826036866359447, + "grad_norm": 1.1526955390848261, + "learning_rate": 4.1192890025184223e-08, + "loss": 0.6277462244033813, + "step": 7925 + }, + { + "epoch": 1.8262672811059908, + "grad_norm": 1.048650750687635, + "learning_rate": 4.1084738122527e-08, + "loss": 0.784058690071106, + "step": 7926 + }, + { + "epoch": 1.8264976958525345, + "grad_norm": 1.2758998200943634, + "learning_rate": 4.097672540515063e-08, + "loss": 0.7214534282684326, + "step": 7927 + }, + { + "epoch": 1.8267281105990785, + "grad_norm": 1.3299220547069754, + "learning_rate": 4.086885188873302e-08, + "loss": 0.7504015564918518, + "step": 7928 + }, + { + "epoch": 1.826958525345622, + "grad_norm": 1.3115105618474625, + "learning_rate": 4.076111758893175e-08, + "loss": 0.8837840557098389, + "step": 7929 + }, + { + "epoch": 1.827188940092166, + "grad_norm": 0.9756920709009218, + "learning_rate": 4.065352252138443e-08, + "loss": 0.6903706789016724, + "step": 7930 + }, + { + "epoch": 1.8274193548387097, + "grad_norm": 1.0882078909648618, + "learning_rate": 4.054606670170824e-08, + "loss": 0.6120485067367554, + "step": 7931 + }, + { + "epoch": 1.8276497695852534, + "grad_norm": 1.3933670864132435, + "learning_rate": 4.043875014550047e-08, + "loss": 0.9566253423690796, + "step": 7932 + }, + { + "epoch": 1.8278801843317973, + "grad_norm": 1.143561158140067, + "learning_rate": 4.033157286833766e-08, + "loss": 0.7702776193618774, + "step": 7933 + }, + { + "epoch": 1.828110599078341, + "grad_norm": 1.3861853644171394, + "learning_rate": 4.0224534885776706e-08, + "loss": 0.7326529026031494, + "step": 7934 + }, + { + "epoch": 1.8283410138248848, + "grad_norm": 1.199651876611857, + "learning_rate": 4.011763621335395e-08, + "loss": 0.8161343336105347, + "step": 7935 + }, + { + "epoch": 1.8285714285714287, + "grad_norm": 1.2385311136965618, + "learning_rate": 4.001087686658544e-08, + "loss": 0.7167537212371826, + "step": 7936 + }, + { + "epoch": 1.8288018433179722, + "grad_norm": 1.5866479195226006, + "learning_rate": 3.9904256860967433e-08, + "loss": 0.9195249080657959, + "step": 7937 + }, + { + "epoch": 1.8290322580645162, + "grad_norm": 1.4492337682663832, + "learning_rate": 3.979777621197544e-08, + "loss": 0.9483609199523926, + "step": 7938 + }, + { + "epoch": 1.82926267281106, + "grad_norm": 1.1520857488925356, + "learning_rate": 3.96914349350651e-08, + "loss": 0.6521364450454712, + "step": 7939 + }, + { + "epoch": 1.8294930875576036, + "grad_norm": 1.1394847291425385, + "learning_rate": 3.958523304567174e-08, + "loss": 0.714328408241272, + "step": 7940 + }, + { + "epoch": 1.8297235023041476, + "grad_norm": 1.2749952242619191, + "learning_rate": 3.9479170559210464e-08, + "loss": 0.705136775970459, + "step": 7941 + }, + { + "epoch": 1.829953917050691, + "grad_norm": 1.2310686937076982, + "learning_rate": 3.937324749107584e-08, + "loss": 0.9096843004226685, + "step": 7942 + }, + { + "epoch": 1.830184331797235, + "grad_norm": 1.1347026880501985, + "learning_rate": 3.9267463856642704e-08, + "loss": 0.7797929048538208, + "step": 7943 + }, + { + "epoch": 1.8304147465437788, + "grad_norm": 1.1418375010830168, + "learning_rate": 3.9161819671265414e-08, + "loss": 0.739689290523529, + "step": 7944 + }, + { + "epoch": 1.8306451612903225, + "grad_norm": 1.2414926332489717, + "learning_rate": 3.905631495027795e-08, + "loss": 0.7297589778900146, + "step": 7945 + }, + { + "epoch": 1.8308755760368665, + "grad_norm": 1.1411747974433366, + "learning_rate": 3.895094970899426e-08, + "loss": 0.6632317900657654, + "step": 7946 + }, + { + "epoch": 1.8311059907834102, + "grad_norm": 1.1035263718417188, + "learning_rate": 3.884572396270802e-08, + "loss": 0.8075754642486572, + "step": 7947 + }, + { + "epoch": 1.831336405529954, + "grad_norm": 1.1206981689667126, + "learning_rate": 3.874063772669256e-08, + "loss": 0.879385232925415, + "step": 7948 + }, + { + "epoch": 1.8315668202764976, + "grad_norm": 1.1296410172019098, + "learning_rate": 3.86356910162009e-08, + "loss": 0.7182341814041138, + "step": 7949 + }, + { + "epoch": 1.8317972350230414, + "grad_norm": 1.3256415462362086, + "learning_rate": 3.853088384646608e-08, + "loss": 0.8980770111083984, + "step": 7950 + }, + { + "epoch": 1.8320276497695853, + "grad_norm": 1.2399263879902838, + "learning_rate": 3.8426216232700483e-08, + "loss": 0.7798547744750977, + "step": 7951 + }, + { + "epoch": 1.832258064516129, + "grad_norm": 1.30590072600508, + "learning_rate": 3.832168819009685e-08, + "loss": 0.7545509934425354, + "step": 7952 + }, + { + "epoch": 1.8324884792626728, + "grad_norm": 1.4626138945450415, + "learning_rate": 3.821729973382681e-08, + "loss": 0.7394163608551025, + "step": 7953 + }, + { + "epoch": 1.8327188940092167, + "grad_norm": 1.095086275435991, + "learning_rate": 3.811305087904271e-08, + "loss": 0.7771584987640381, + "step": 7954 + }, + { + "epoch": 1.8329493087557602, + "grad_norm": 1.0772465088176202, + "learning_rate": 3.800894164087587e-08, + "loss": 0.6490596532821655, + "step": 7955 + }, + { + "epoch": 1.8331797235023042, + "grad_norm": 1.6261572682115344, + "learning_rate": 3.7904972034437546e-08, + "loss": 0.8465416431427002, + "step": 7956 + }, + { + "epoch": 1.833410138248848, + "grad_norm": 1.1256653812684285, + "learning_rate": 3.780114207481899e-08, + "loss": 0.6769351363182068, + "step": 7957 + }, + { + "epoch": 1.8336405529953916, + "grad_norm": 1.1157448396752008, + "learning_rate": 3.769745177709094e-08, + "loss": 0.8187215328216553, + "step": 7958 + }, + { + "epoch": 1.8338709677419356, + "grad_norm": 0.9478307441179703, + "learning_rate": 3.759390115630356e-08, + "loss": 0.7524763345718384, + "step": 7959 + }, + { + "epoch": 1.8341013824884793, + "grad_norm": 1.3846707864730958, + "learning_rate": 3.749049022748762e-08, + "loss": 0.8019517064094543, + "step": 7960 + }, + { + "epoch": 1.834331797235023, + "grad_norm": 1.2301171101661803, + "learning_rate": 3.738721900565278e-08, + "loss": 0.7732158899307251, + "step": 7961 + }, + { + "epoch": 1.8345622119815668, + "grad_norm": 1.1624945144679932, + "learning_rate": 3.728408750578871e-08, + "loss": 0.7152917385101318, + "step": 7962 + }, + { + "epoch": 1.8347926267281105, + "grad_norm": 1.2249354034345745, + "learning_rate": 3.7181095742864876e-08, + "loss": 0.7117735147476196, + "step": 7963 + }, + { + "epoch": 1.8350230414746544, + "grad_norm": 1.1387667941982393, + "learning_rate": 3.7078243731830436e-08, + "loss": 0.7651360034942627, + "step": 7964 + }, + { + "epoch": 1.8352534562211982, + "grad_norm": 1.103224145154883, + "learning_rate": 3.697553148761412e-08, + "loss": 0.6686996817588806, + "step": 7965 + }, + { + "epoch": 1.835483870967742, + "grad_norm": 1.4148867918515446, + "learning_rate": 3.687295902512455e-08, + "loss": 0.8654145002365112, + "step": 7966 + }, + { + "epoch": 1.8357142857142859, + "grad_norm": 1.2014603088046913, + "learning_rate": 3.6770526359250046e-08, + "loss": 0.7883874177932739, + "step": 7967 + }, + { + "epoch": 1.8359447004608294, + "grad_norm": 1.3036366063511584, + "learning_rate": 3.666823350485848e-08, + "loss": 0.7270755767822266, + "step": 7968 + }, + { + "epoch": 1.8361751152073733, + "grad_norm": 1.2757403346821974, + "learning_rate": 3.656608047679744e-08, + "loss": 0.654710054397583, + "step": 7969 + }, + { + "epoch": 1.836405529953917, + "grad_norm": 1.3173622827867584, + "learning_rate": 3.6464067289894485e-08, + "loss": 0.688032329082489, + "step": 7970 + }, + { + "epoch": 1.8366359447004608, + "grad_norm": 1.610615012564481, + "learning_rate": 3.6362193958956457e-08, + "loss": 0.901115894317627, + "step": 7971 + }, + { + "epoch": 1.8368663594470047, + "grad_norm": 1.116601972108686, + "learning_rate": 3.6260460498770404e-08, + "loss": 0.7335774302482605, + "step": 7972 + }, + { + "epoch": 1.8370967741935482, + "grad_norm": 1.386903572934919, + "learning_rate": 3.615886692410275e-08, + "loss": 0.8056570291519165, + "step": 7973 + }, + { + "epoch": 1.8373271889400922, + "grad_norm": 1.0398578754417405, + "learning_rate": 3.6057413249699356e-08, + "loss": 0.82081538438797, + "step": 7974 + }, + { + "epoch": 1.837557603686636, + "grad_norm": 1.2589683870881863, + "learning_rate": 3.595609949028655e-08, + "loss": 0.7741475105285645, + "step": 7975 + }, + { + "epoch": 1.8377880184331796, + "grad_norm": 1.4550225731476647, + "learning_rate": 3.5854925660569693e-08, + "loss": 0.9020792245864868, + "step": 7976 + }, + { + "epoch": 1.8380184331797236, + "grad_norm": 1.395018589671643, + "learning_rate": 3.57538917752338e-08, + "loss": 0.759677529335022, + "step": 7977 + }, + { + "epoch": 1.8382488479262673, + "grad_norm": 1.2528132061795532, + "learning_rate": 3.565299784894427e-08, + "loss": 0.6658498644828796, + "step": 7978 + }, + { + "epoch": 1.838479262672811, + "grad_norm": 1.156561409904186, + "learning_rate": 3.5552243896345254e-08, + "loss": 0.8359798192977905, + "step": 7979 + }, + { + "epoch": 1.838709677419355, + "grad_norm": 0.9586985661683237, + "learning_rate": 3.545162993206141e-08, + "loss": 0.656216025352478, + "step": 7980 + }, + { + "epoch": 1.8389400921658985, + "grad_norm": 1.1907827843907386, + "learning_rate": 3.53511559706966e-08, + "loss": 0.7783077359199524, + "step": 7981 + }, + { + "epoch": 1.8391705069124424, + "grad_norm": 1.315887741405374, + "learning_rate": 3.525082202683427e-08, + "loss": 0.7726818919181824, + "step": 7982 + }, + { + "epoch": 1.8394009216589862, + "grad_norm": 1.203190333477806, + "learning_rate": 3.5150628115038213e-08, + "loss": 0.6797339916229248, + "step": 7983 + }, + { + "epoch": 1.83963133640553, + "grad_norm": 1.6491537372199485, + "learning_rate": 3.505057424985114e-08, + "loss": 0.818444013595581, + "step": 7984 + }, + { + "epoch": 1.8398617511520738, + "grad_norm": 1.2385444618355612, + "learning_rate": 3.495066044579564e-08, + "loss": 0.716003954410553, + "step": 7985 + }, + { + "epoch": 1.8400921658986173, + "grad_norm": 1.1184726381698433, + "learning_rate": 3.485088671737435e-08, + "loss": 0.8214380741119385, + "step": 7986 + }, + { + "epoch": 1.8403225806451613, + "grad_norm": 1.2891166927609845, + "learning_rate": 3.475125307906923e-08, + "loss": 0.8004239797592163, + "step": 7987 + }, + { + "epoch": 1.840552995391705, + "grad_norm": 1.0064244623457703, + "learning_rate": 3.465175954534183e-08, + "loss": 0.724868655204773, + "step": 7988 + }, + { + "epoch": 1.8407834101382488, + "grad_norm": 1.2194713737299876, + "learning_rate": 3.455240613063359e-08, + "loss": 0.6774435043334961, + "step": 7989 + }, + { + "epoch": 1.8410138248847927, + "grad_norm": 1.2000954990034474, + "learning_rate": 3.445319284936543e-08, + "loss": 0.7618406414985657, + "step": 7990 + }, + { + "epoch": 1.8412442396313364, + "grad_norm": 1.2446761227229344, + "learning_rate": 3.4354119715938154e-08, + "loss": 0.8176794648170471, + "step": 7991 + }, + { + "epoch": 1.8414746543778802, + "grad_norm": 1.3311989323291133, + "learning_rate": 3.4255186744732045e-08, + "loss": 0.7540123462677002, + "step": 7992 + }, + { + "epoch": 1.841705069124424, + "grad_norm": 0.8317940065053944, + "learning_rate": 3.4156393950107164e-08, + "loss": 0.6888976097106934, + "step": 7993 + }, + { + "epoch": 1.8419354838709676, + "grad_norm": 0.9229557772464766, + "learning_rate": 3.405774134640294e-08, + "loss": 0.6719028949737549, + "step": 7994 + }, + { + "epoch": 1.8421658986175116, + "grad_norm": 1.2216480626353798, + "learning_rate": 3.3959228947938903e-08, + "loss": 0.817806601524353, + "step": 7995 + }, + { + "epoch": 1.8423963133640553, + "grad_norm": 1.176727717908757, + "learning_rate": 3.3860856769013955e-08, + "loss": 0.6681252717971802, + "step": 7996 + }, + { + "epoch": 1.842626728110599, + "grad_norm": 1.261442308873967, + "learning_rate": 3.3762624823906574e-08, + "loss": 0.7965174317359924, + "step": 7997 + }, + { + "epoch": 1.842857142857143, + "grad_norm": 1.163849986057629, + "learning_rate": 3.366453312687512e-08, + "loss": 0.714171826839447, + "step": 7998 + }, + { + "epoch": 1.8430875576036865, + "grad_norm": 1.2077995913515678, + "learning_rate": 3.356658169215743e-08, + "loss": 0.7489287853240967, + "step": 7999 + }, + { + "epoch": 1.8433179723502304, + "grad_norm": 1.270011813451473, + "learning_rate": 3.34687705339709e-08, + "loss": 0.790866494178772, + "step": 8000 + }, + { + "epoch": 1.8435483870967742, + "grad_norm": 0.9665221846950844, + "learning_rate": 3.337109966651297e-08, + "loss": 0.8208349943161011, + "step": 8001 + }, + { + "epoch": 1.8437788018433179, + "grad_norm": 1.1715709525124653, + "learning_rate": 3.3273569103960174e-08, + "loss": 0.7974207401275635, + "step": 8002 + }, + { + "epoch": 1.8440092165898618, + "grad_norm": 1.1483232930238036, + "learning_rate": 3.317617886046908e-08, + "loss": 0.751643180847168, + "step": 8003 + }, + { + "epoch": 1.8442396313364056, + "grad_norm": 1.3210448516681466, + "learning_rate": 3.3078928950175724e-08, + "loss": 0.9231137037277222, + "step": 8004 + }, + { + "epoch": 1.8444700460829493, + "grad_norm": 1.1496984894908708, + "learning_rate": 3.2981819387195683e-08, + "loss": 0.7975907325744629, + "step": 8005 + }, + { + "epoch": 1.8447004608294932, + "grad_norm": 1.1807761173209448, + "learning_rate": 3.288485018562448e-08, + "loss": 0.7467124462127686, + "step": 8006 + }, + { + "epoch": 1.8449308755760367, + "grad_norm": 1.1558703241619663, + "learning_rate": 3.278802135953706e-08, + "loss": 0.7983080148696899, + "step": 8007 + }, + { + "epoch": 1.8451612903225807, + "grad_norm": 1.2273424689042212, + "learning_rate": 3.269133292298787e-08, + "loss": 0.7991635799407959, + "step": 8008 + }, + { + "epoch": 1.8453917050691244, + "grad_norm": 1.3284825495150037, + "learning_rate": 3.259478489001111e-08, + "loss": 0.9309900403022766, + "step": 8009 + }, + { + "epoch": 1.8456221198156681, + "grad_norm": 1.4898197506974649, + "learning_rate": 3.249837727462068e-08, + "loss": 0.7667444944381714, + "step": 8010 + }, + { + "epoch": 1.845852534562212, + "grad_norm": 1.0693184262343387, + "learning_rate": 3.2402110090809955e-08, + "loss": 0.722775936126709, + "step": 8011 + }, + { + "epoch": 1.8460829493087556, + "grad_norm": 1.2061345728793884, + "learning_rate": 3.230598335255208e-08, + "loss": 0.7049660682678223, + "step": 8012 + }, + { + "epoch": 1.8463133640552996, + "grad_norm": 1.2538545243397632, + "learning_rate": 3.220999707379957e-08, + "loss": 0.7543717622756958, + "step": 8013 + }, + { + "epoch": 1.8465437788018433, + "grad_norm": 1.0254969440317054, + "learning_rate": 3.2114151268484825e-08, + "loss": 0.705594539642334, + "step": 8014 + }, + { + "epoch": 1.846774193548387, + "grad_norm": 1.3381301652737214, + "learning_rate": 3.201844595051972e-08, + "loss": 0.8663946390151978, + "step": 8015 + }, + { + "epoch": 1.847004608294931, + "grad_norm": 1.2931743474180666, + "learning_rate": 3.192288113379582e-08, + "loss": 0.6990827918052673, + "step": 8016 + }, + { + "epoch": 1.8472350230414747, + "grad_norm": 1.3047302382268444, + "learning_rate": 3.182745683218391e-08, + "loss": 0.8494592905044556, + "step": 8017 + }, + { + "epoch": 1.8474654377880184, + "grad_norm": 1.1964557388323078, + "learning_rate": 3.173217305953524e-08, + "loss": 0.7689815163612366, + "step": 8018 + }, + { + "epoch": 1.8476958525345624, + "grad_norm": 1.0869127948311592, + "learning_rate": 3.163702982967964e-08, + "loss": 0.7961923480033875, + "step": 8019 + }, + { + "epoch": 1.8479262672811059, + "grad_norm": 1.1859545141002084, + "learning_rate": 3.154202715642729e-08, + "loss": 0.7290681600570679, + "step": 8020 + }, + { + "epoch": 1.8481566820276498, + "grad_norm": 1.2696204436408378, + "learning_rate": 3.1447165053567594e-08, + "loss": 0.7486605048179626, + "step": 8021 + }, + { + "epoch": 1.8483870967741935, + "grad_norm": 1.2409295752272667, + "learning_rate": 3.135244353486977e-08, + "loss": 0.8263967633247375, + "step": 8022 + }, + { + "epoch": 1.8486175115207373, + "grad_norm": 1.3436046094044156, + "learning_rate": 3.1257862614082254e-08, + "loss": 0.7462657690048218, + "step": 8023 + }, + { + "epoch": 1.8488479262672812, + "grad_norm": 1.7105756282592546, + "learning_rate": 3.116342230493374e-08, + "loss": 0.9305819272994995, + "step": 8024 + }, + { + "epoch": 1.8490783410138247, + "grad_norm": 1.1597494849443377, + "learning_rate": 3.1069122621131925e-08, + "loss": 0.7202557325363159, + "step": 8025 + }, + { + "epoch": 1.8493087557603687, + "grad_norm": 1.0985806176068067, + "learning_rate": 3.097496357636409e-08, + "loss": 0.723913311958313, + "step": 8026 + }, + { + "epoch": 1.8495391705069124, + "grad_norm": 1.427360065972912, + "learning_rate": 3.088094518429751e-08, + "loss": 0.7067763805389404, + "step": 8027 + }, + { + "epoch": 1.8497695852534561, + "grad_norm": 1.3110685780585822, + "learning_rate": 3.078706745857884e-08, + "loss": 0.7853527665138245, + "step": 8028 + }, + { + "epoch": 1.85, + "grad_norm": 1.228901367807535, + "learning_rate": 3.0693330412834285e-08, + "loss": 0.7183133363723755, + "step": 8029 + }, + { + "epoch": 1.8502304147465438, + "grad_norm": 1.1077136741228983, + "learning_rate": 3.0599734060669626e-08, + "loss": 0.8041096925735474, + "step": 8030 + }, + { + "epoch": 1.8504608294930875, + "grad_norm": 1.0495776729925357, + "learning_rate": 3.050627841567022e-08, + "loss": 0.7259166240692139, + "step": 8031 + }, + { + "epoch": 1.8506912442396315, + "grad_norm": 1.5016516908972768, + "learning_rate": 3.041296349140099e-08, + "loss": 0.8844292163848877, + "step": 8032 + }, + { + "epoch": 1.850921658986175, + "grad_norm": 1.2846098007302502, + "learning_rate": 3.031978930140666e-08, + "loss": 0.7566810846328735, + "step": 8033 + }, + { + "epoch": 1.851152073732719, + "grad_norm": 1.4566612706299762, + "learning_rate": 3.0226755859211085e-08, + "loss": 0.8365379571914673, + "step": 8034 + }, + { + "epoch": 1.8513824884792627, + "grad_norm": 1.03909937329538, + "learning_rate": 3.013386317831823e-08, + "loss": 0.6786175966262817, + "step": 8035 + }, + { + "epoch": 1.8516129032258064, + "grad_norm": 0.8445952555360507, + "learning_rate": 3.0041111272211206e-08, + "loss": 0.5450198650360107, + "step": 8036 + }, + { + "epoch": 1.8518433179723504, + "grad_norm": 1.3789732970427235, + "learning_rate": 2.994850015435269e-08, + "loss": 0.8792393207550049, + "step": 8037 + }, + { + "epoch": 1.8520737327188939, + "grad_norm": 1.1270074296152806, + "learning_rate": 2.985602983818525e-08, + "loss": 0.8463287353515625, + "step": 8038 + }, + { + "epoch": 1.8523041474654378, + "grad_norm": 1.2927452986312467, + "learning_rate": 2.9763700337130827e-08, + "loss": 0.77659010887146, + "step": 8039 + }, + { + "epoch": 1.8525345622119815, + "grad_norm": 0.8652026295993711, + "learning_rate": 2.9671511664590698e-08, + "loss": 0.6180428266525269, + "step": 8040 + }, + { + "epoch": 1.8527649769585253, + "grad_norm": 1.2049419514211082, + "learning_rate": 2.9579463833946273e-08, + "loss": 0.7886658906936646, + "step": 8041 + }, + { + "epoch": 1.8529953917050692, + "grad_norm": 1.35078980115234, + "learning_rate": 2.9487556858557972e-08, + "loss": 0.8371871709823608, + "step": 8042 + }, + { + "epoch": 1.853225806451613, + "grad_norm": 1.1555875449847217, + "learning_rate": 2.9395790751765904e-08, + "loss": 0.7082366347312927, + "step": 8043 + }, + { + "epoch": 1.8534562211981567, + "grad_norm": 1.2745414422252506, + "learning_rate": 2.930416552689008e-08, + "loss": 0.7866584062576294, + "step": 8044 + }, + { + "epoch": 1.8536866359447006, + "grad_norm": 1.229235509048025, + "learning_rate": 2.9212681197229527e-08, + "loss": 0.8789514303207397, + "step": 8045 + }, + { + "epoch": 1.8539170506912441, + "grad_norm": 1.0208282620264577, + "learning_rate": 2.9121337776063072e-08, + "loss": 0.7041239738464355, + "step": 8046 + }, + { + "epoch": 1.854147465437788, + "grad_norm": 1.3204473756112607, + "learning_rate": 2.9030135276649215e-08, + "loss": 0.8290516138076782, + "step": 8047 + }, + { + "epoch": 1.8543778801843318, + "grad_norm": 1.2424965520320617, + "learning_rate": 2.8939073712225813e-08, + "loss": 0.8532444834709167, + "step": 8048 + }, + { + "epoch": 1.8546082949308755, + "grad_norm": 1.375111764710695, + "learning_rate": 2.8848153096010407e-08, + "loss": 0.8635869026184082, + "step": 8049 + }, + { + "epoch": 1.8548387096774195, + "grad_norm": 1.3481674122248803, + "learning_rate": 2.8757373441199885e-08, + "loss": 0.723747730255127, + "step": 8050 + }, + { + "epoch": 1.855069124423963, + "grad_norm": 1.3399875040651272, + "learning_rate": 2.8666734760970925e-08, + "loss": 0.893456220626831, + "step": 8051 + }, + { + "epoch": 1.855299539170507, + "grad_norm": 1.2732338285848108, + "learning_rate": 2.8576237068479335e-08, + "loss": 0.6871381998062134, + "step": 8052 + }, + { + "epoch": 1.8555299539170507, + "grad_norm": 1.0534516506243037, + "learning_rate": 2.848588037686106e-08, + "loss": 0.7820594906806946, + "step": 8053 + }, + { + "epoch": 1.8557603686635944, + "grad_norm": 1.0873243123362593, + "learning_rate": 2.839566469923105e-08, + "loss": 0.7783479690551758, + "step": 8054 + }, + { + "epoch": 1.8559907834101383, + "grad_norm": 1.25602911336094, + "learning_rate": 2.8305590048684268e-08, + "loss": 0.7612866163253784, + "step": 8055 + }, + { + "epoch": 1.856221198156682, + "grad_norm": 1.0752346215773687, + "learning_rate": 2.82156564382946e-08, + "loss": 0.7483590841293335, + "step": 8056 + }, + { + "epoch": 1.8564516129032258, + "grad_norm": 1.0547692532993052, + "learning_rate": 2.812586388111582e-08, + "loss": 0.7553579807281494, + "step": 8057 + }, + { + "epoch": 1.8566820276497698, + "grad_norm": 1.0828193353243305, + "learning_rate": 2.80362123901815e-08, + "loss": 0.8895602226257324, + "step": 8058 + }, + { + "epoch": 1.8569124423963133, + "grad_norm": 1.1481937931103232, + "learning_rate": 2.794670197850424e-08, + "loss": 0.7974053621292114, + "step": 8059 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 1.0112292806236838, + "learning_rate": 2.7857332659076193e-08, + "loss": 0.7730135917663574, + "step": 8060 + }, + { + "epoch": 1.857373271889401, + "grad_norm": 1.115608079627536, + "learning_rate": 2.7768104444869434e-08, + "loss": 0.7258738279342651, + "step": 8061 + }, + { + "epoch": 1.8576036866359447, + "grad_norm": 1.3030363105586589, + "learning_rate": 2.7679017348835264e-08, + "loss": 0.7068890333175659, + "step": 8062 + }, + { + "epoch": 1.8578341013824886, + "grad_norm": 1.3041822573340287, + "learning_rate": 2.7590071383904568e-08, + "loss": 0.8741557002067566, + "step": 8063 + }, + { + "epoch": 1.8580645161290321, + "grad_norm": 1.3236368529143523, + "learning_rate": 2.750126656298768e-08, + "loss": 0.8723797798156738, + "step": 8064 + }, + { + "epoch": 1.858294930875576, + "grad_norm": 1.2019235064586495, + "learning_rate": 2.7412602898974514e-08, + "loss": 0.8510957956314087, + "step": 8065 + }, + { + "epoch": 1.8585253456221198, + "grad_norm": 0.8996466342772348, + "learning_rate": 2.732408040473444e-08, + "loss": 0.6875216960906982, + "step": 8066 + }, + { + "epoch": 1.8587557603686635, + "grad_norm": 1.235948717542994, + "learning_rate": 2.7235699093116515e-08, + "loss": 0.8057721257209778, + "step": 8067 + }, + { + "epoch": 1.8589861751152075, + "grad_norm": 1.1066694710477807, + "learning_rate": 2.7147458976949145e-08, + "loss": 0.7547335624694824, + "step": 8068 + }, + { + "epoch": 1.8592165898617512, + "grad_norm": 1.2565080056809024, + "learning_rate": 2.7059360069040193e-08, + "loss": 0.8301708102226257, + "step": 8069 + }, + { + "epoch": 1.859447004608295, + "grad_norm": 1.354839024861171, + "learning_rate": 2.69714023821771e-08, + "loss": 0.8313431143760681, + "step": 8070 + }, + { + "epoch": 1.8596774193548387, + "grad_norm": 1.2482736529337517, + "learning_rate": 2.6883585929126872e-08, + "loss": 0.6631792783737183, + "step": 8071 + }, + { + "epoch": 1.8599078341013824, + "grad_norm": 1.342165180678223, + "learning_rate": 2.679591072263576e-08, + "loss": 0.7643609046936035, + "step": 8072 + }, + { + "epoch": 1.8601382488479263, + "grad_norm": 1.5670037508761703, + "learning_rate": 2.670837677543003e-08, + "loss": 0.8543407917022705, + "step": 8073 + }, + { + "epoch": 1.86036866359447, + "grad_norm": 1.0908415634382522, + "learning_rate": 2.662098410021485e-08, + "loss": 0.8051489591598511, + "step": 8074 + }, + { + "epoch": 1.8605990783410138, + "grad_norm": 1.1493604797084143, + "learning_rate": 2.653373270967518e-08, + "loss": 0.7065767645835876, + "step": 8075 + }, + { + "epoch": 1.8608294930875577, + "grad_norm": 0.9852441728403762, + "learning_rate": 2.6446622616475566e-08, + "loss": 0.672603189945221, + "step": 8076 + }, + { + "epoch": 1.8610599078341012, + "grad_norm": 1.2739019796547877, + "learning_rate": 2.6359653833259776e-08, + "loss": 0.7201080918312073, + "step": 8077 + }, + { + "epoch": 1.8612903225806452, + "grad_norm": 1.156933357533599, + "learning_rate": 2.627282637265149e-08, + "loss": 0.7147494554519653, + "step": 8078 + }, + { + "epoch": 1.861520737327189, + "grad_norm": 1.3793116889121875, + "learning_rate": 2.6186140247253297e-08, + "loss": 0.7051082253456116, + "step": 8079 + }, + { + "epoch": 1.8617511520737327, + "grad_norm": 1.2253670327071573, + "learning_rate": 2.6099595469647683e-08, + "loss": 0.5786069631576538, + "step": 8080 + }, + { + "epoch": 1.8619815668202766, + "grad_norm": 1.2391603364729231, + "learning_rate": 2.6013192052396493e-08, + "loss": 0.8880232572555542, + "step": 8081 + }, + { + "epoch": 1.8622119815668203, + "grad_norm": 1.3577487615179598, + "learning_rate": 2.5926930008041137e-08, + "loss": 0.9295729398727417, + "step": 8082 + }, + { + "epoch": 1.862442396313364, + "grad_norm": 1.1507407274303025, + "learning_rate": 2.5840809349102378e-08, + "loss": 0.6963248252868652, + "step": 8083 + }, + { + "epoch": 1.8626728110599078, + "grad_norm": 1.2547838683138512, + "learning_rate": 2.5754830088080548e-08, + "loss": 0.8788298964500427, + "step": 8084 + }, + { + "epoch": 1.8629032258064515, + "grad_norm": 1.3540782368440085, + "learning_rate": 2.5668992237455334e-08, + "loss": 0.7454242706298828, + "step": 8085 + }, + { + "epoch": 1.8631336405529955, + "grad_norm": 1.1950812039913048, + "learning_rate": 2.558329580968599e-08, + "loss": 0.7659780383110046, + "step": 8086 + }, + { + "epoch": 1.8633640552995392, + "grad_norm": 1.5016734977487585, + "learning_rate": 2.5497740817211456e-08, + "loss": 0.8799881935119629, + "step": 8087 + }, + { + "epoch": 1.863594470046083, + "grad_norm": 0.9825172132169212, + "learning_rate": 2.5412327272449684e-08, + "loss": 0.7319198846817017, + "step": 8088 + }, + { + "epoch": 1.8638248847926269, + "grad_norm": 1.0689400870779366, + "learning_rate": 2.532705518779854e-08, + "loss": 0.6450645923614502, + "step": 8089 + }, + { + "epoch": 1.8640552995391704, + "grad_norm": 1.1783740361717576, + "learning_rate": 2.52419245756349e-08, + "loss": 0.7213672399520874, + "step": 8090 + }, + { + "epoch": 1.8642857142857143, + "grad_norm": 1.3483335750734096, + "learning_rate": 2.515693544831554e-08, + "loss": 0.790163516998291, + "step": 8091 + }, + { + "epoch": 1.864516129032258, + "grad_norm": 1.2871905619529331, + "learning_rate": 2.507208781817638e-08, + "loss": 0.8324074745178223, + "step": 8092 + }, + { + "epoch": 1.8647465437788018, + "grad_norm": 1.4095960145667545, + "learning_rate": 2.4987381697533227e-08, + "loss": 0.879224419593811, + "step": 8093 + }, + { + "epoch": 1.8649769585253457, + "grad_norm": 1.4121148041878757, + "learning_rate": 2.4902817098680807e-08, + "loss": 0.8668204545974731, + "step": 8094 + }, + { + "epoch": 1.8652073732718892, + "grad_norm": 1.1605042845973315, + "learning_rate": 2.481839403389341e-08, + "loss": 0.6737711429595947, + "step": 8095 + }, + { + "epoch": 1.8654377880184332, + "grad_norm": 1.3482506919608122, + "learning_rate": 2.4734112515425343e-08, + "loss": 0.8948237299919128, + "step": 8096 + }, + { + "epoch": 1.865668202764977, + "grad_norm": 1.2927456093148797, + "learning_rate": 2.4649972555509823e-08, + "loss": 0.6866592168807983, + "step": 8097 + }, + { + "epoch": 1.8658986175115206, + "grad_norm": 1.2040358944727056, + "learning_rate": 2.4565974166359416e-08, + "loss": 0.8852076530456543, + "step": 8098 + }, + { + "epoch": 1.8661290322580646, + "grad_norm": 1.1474664367024714, + "learning_rate": 2.44821173601667e-08, + "loss": 0.7402448654174805, + "step": 8099 + }, + { + "epoch": 1.8663594470046083, + "grad_norm": 1.299234544884085, + "learning_rate": 2.439840214910316e-08, + "loss": 0.8536320924758911, + "step": 8100 + }, + { + "epoch": 1.866589861751152, + "grad_norm": 1.1550631938568499, + "learning_rate": 2.4314828545319965e-08, + "loss": 0.6408628225326538, + "step": 8101 + }, + { + "epoch": 1.866820276497696, + "grad_norm": 1.188548223378954, + "learning_rate": 2.4231396560947858e-08, + "loss": 0.9578930735588074, + "step": 8102 + }, + { + "epoch": 1.8670506912442395, + "grad_norm": 1.8289817367376688, + "learning_rate": 2.4148106208096708e-08, + "loss": 0.7606109976768494, + "step": 8103 + }, + { + "epoch": 1.8672811059907835, + "grad_norm": 0.9826738512020193, + "learning_rate": 2.4064957498856177e-08, + "loss": 0.7446529865264893, + "step": 8104 + }, + { + "epoch": 1.8675115207373272, + "grad_norm": 1.0744366993530696, + "learning_rate": 2.398195044529505e-08, + "loss": 0.6086497902870178, + "step": 8105 + }, + { + "epoch": 1.867741935483871, + "grad_norm": 1.5561440229209103, + "learning_rate": 2.389908505946181e-08, + "loss": 0.9348995685577393, + "step": 8106 + }, + { + "epoch": 1.8679723502304149, + "grad_norm": 1.1497120508700005, + "learning_rate": 2.381636135338405e-08, + "loss": 0.6817007660865784, + "step": 8107 + }, + { + "epoch": 1.8682027649769584, + "grad_norm": 1.0815805532535518, + "learning_rate": 2.373377933906917e-08, + "loss": 0.7228778600692749, + "step": 8108 + }, + { + "epoch": 1.8684331797235023, + "grad_norm": 1.2824972753864794, + "learning_rate": 2.3651339028503913e-08, + "loss": 0.6974154114723206, + "step": 8109 + }, + { + "epoch": 1.868663594470046, + "grad_norm": 1.2746687740486187, + "learning_rate": 2.3569040433654264e-08, + "loss": 0.8025680780410767, + "step": 8110 + }, + { + "epoch": 1.8688940092165898, + "grad_norm": 1.0439186994105132, + "learning_rate": 2.3486883566465777e-08, + "loss": 0.7570391893386841, + "step": 8111 + }, + { + "epoch": 1.8691244239631337, + "grad_norm": 1.1353343636911755, + "learning_rate": 2.3404868438863246e-08, + "loss": 0.7982438802719116, + "step": 8112 + }, + { + "epoch": 1.8693548387096774, + "grad_norm": 0.948053216671403, + "learning_rate": 2.3322995062751372e-08, + "loss": 0.6615588665008545, + "step": 8113 + }, + { + "epoch": 1.8695852534562212, + "grad_norm": 1.1794145616088556, + "learning_rate": 2.324126345001376e-08, + "loss": 0.7748852968215942, + "step": 8114 + }, + { + "epoch": 1.8698156682027651, + "grad_norm": 1.146675047414541, + "learning_rate": 2.3159673612513587e-08, + "loss": 0.7238468527793884, + "step": 8115 + }, + { + "epoch": 1.8700460829493086, + "grad_norm": 1.2843830020573481, + "learning_rate": 2.3078225562093822e-08, + "loss": 0.8146705627441406, + "step": 8116 + }, + { + "epoch": 1.8702764976958526, + "grad_norm": 1.0747488287412188, + "learning_rate": 2.2996919310576235e-08, + "loss": 0.8393594026565552, + "step": 8117 + }, + { + "epoch": 1.8705069124423963, + "grad_norm": 1.6346887094004536, + "learning_rate": 2.2915754869762384e-08, + "loss": 0.9619652032852173, + "step": 8118 + }, + { + "epoch": 1.87073732718894, + "grad_norm": 1.6641290836048537, + "learning_rate": 2.2834732251433286e-08, + "loss": 0.8301321268081665, + "step": 8119 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 1.2687107297135523, + "learning_rate": 2.2753851467349206e-08, + "loss": 0.8236079812049866, + "step": 8120 + }, + { + "epoch": 1.8711981566820275, + "grad_norm": 1.430457986003777, + "learning_rate": 2.267311252924975e-08, + "loss": 0.9007565379142761, + "step": 8121 + }, + { + "epoch": 1.8714285714285714, + "grad_norm": 1.1827948115854126, + "learning_rate": 2.2592515448854432e-08, + "loss": 0.7430707216262817, + "step": 8122 + }, + { + "epoch": 1.8716589861751152, + "grad_norm": 1.17432989990484, + "learning_rate": 2.2512060237861452e-08, + "loss": 0.7562465667724609, + "step": 8123 + }, + { + "epoch": 1.871889400921659, + "grad_norm": 1.1839994711227122, + "learning_rate": 2.24317469079488e-08, + "loss": 0.7736096978187561, + "step": 8124 + }, + { + "epoch": 1.8721198156682028, + "grad_norm": 1.1809968020267403, + "learning_rate": 2.2351575470774153e-08, + "loss": 0.7652724981307983, + "step": 8125 + }, + { + "epoch": 1.8723502304147466, + "grad_norm": 1.4664554269524215, + "learning_rate": 2.2271545937973978e-08, + "loss": 0.8034792542457581, + "step": 8126 + }, + { + "epoch": 1.8725806451612903, + "grad_norm": 1.2107856133228136, + "learning_rate": 2.219165832116454e-08, + "loss": 0.6158101558685303, + "step": 8127 + }, + { + "epoch": 1.8728110599078343, + "grad_norm": 1.1984460742665393, + "learning_rate": 2.2111912631941564e-08, + "loss": 0.6514682769775391, + "step": 8128 + }, + { + "epoch": 1.8730414746543778, + "grad_norm": 1.1090676234846621, + "learning_rate": 2.203230888187979e-08, + "loss": 0.833041787147522, + "step": 8129 + }, + { + "epoch": 1.8732718894009217, + "grad_norm": 1.3944148742352294, + "learning_rate": 2.1952847082533864e-08, + "loss": 0.8033208250999451, + "step": 8130 + }, + { + "epoch": 1.8735023041474654, + "grad_norm": 1.2067904980609332, + "learning_rate": 2.187352724543734e-08, + "loss": 0.742051362991333, + "step": 8131 + }, + { + "epoch": 1.8737327188940092, + "grad_norm": 1.2058964422107643, + "learning_rate": 2.1794349382103337e-08, + "loss": 0.7411169409751892, + "step": 8132 + }, + { + "epoch": 1.8739631336405531, + "grad_norm": 1.3201479261882787, + "learning_rate": 2.171531350402467e-08, + "loss": 0.7517165541648865, + "step": 8133 + }, + { + "epoch": 1.8741935483870966, + "grad_norm": 1.2371172479380752, + "learning_rate": 2.1636419622673263e-08, + "loss": 0.8010021448135376, + "step": 8134 + }, + { + "epoch": 1.8744239631336406, + "grad_norm": 1.2501522956166489, + "learning_rate": 2.1557667749500187e-08, + "loss": 0.7265241742134094, + "step": 8135 + }, + { + "epoch": 1.8746543778801843, + "grad_norm": 1.191380870353666, + "learning_rate": 2.1479057895936403e-08, + "loss": 0.6809227466583252, + "step": 8136 + }, + { + "epoch": 1.874884792626728, + "grad_norm": 1.2737037893770147, + "learning_rate": 2.140059007339201e-08, + "loss": 0.8235769271850586, + "step": 8137 + }, + { + "epoch": 1.875115207373272, + "grad_norm": 1.1356268338575812, + "learning_rate": 2.132226429325634e-08, + "loss": 0.7556289434432983, + "step": 8138 + }, + { + "epoch": 1.8753456221198157, + "grad_norm": 1.257264783564694, + "learning_rate": 2.1244080566898638e-08, + "loss": 0.7765048742294312, + "step": 8139 + }, + { + "epoch": 1.8755760368663594, + "grad_norm": 1.1776465139256578, + "learning_rate": 2.1166038905666816e-08, + "loss": 0.7637666463851929, + "step": 8140 + }, + { + "epoch": 1.8758064516129034, + "grad_norm": 1.2471130614608452, + "learning_rate": 2.10881393208886e-08, + "loss": 0.8413453698158264, + "step": 8141 + }, + { + "epoch": 1.8760368663594469, + "grad_norm": 1.443351972543058, + "learning_rate": 2.101038182387105e-08, + "loss": 0.7937475442886353, + "step": 8142 + }, + { + "epoch": 1.8762672811059908, + "grad_norm": 1.1772607773578063, + "learning_rate": 2.0932766425900585e-08, + "loss": 0.7654982805252075, + "step": 8143 + }, + { + "epoch": 1.8764976958525346, + "grad_norm": 1.53397176108589, + "learning_rate": 2.0855293138242968e-08, + "loss": 0.8950663805007935, + "step": 8144 + }, + { + "epoch": 1.8767281105990783, + "grad_norm": 1.250929142335872, + "learning_rate": 2.077796197214332e-08, + "loss": 0.6405420303344727, + "step": 8145 + }, + { + "epoch": 1.8769585253456222, + "grad_norm": 1.085136655013558, + "learning_rate": 2.0700772938826217e-08, + "loss": 0.7724314332008362, + "step": 8146 + }, + { + "epoch": 1.8771889400921657, + "grad_norm": 1.09160242748488, + "learning_rate": 2.0623726049495472e-08, + "loss": 0.7929061651229858, + "step": 8147 + }, + { + "epoch": 1.8774193548387097, + "grad_norm": 1.0975195498555617, + "learning_rate": 2.0546821315334363e-08, + "loss": 0.7207096815109253, + "step": 8148 + }, + { + "epoch": 1.8776497695852534, + "grad_norm": 1.347240880442127, + "learning_rate": 2.0470058747505513e-08, + "loss": 0.9234127402305603, + "step": 8149 + }, + { + "epoch": 1.8778801843317972, + "grad_norm": 1.2189429089634525, + "learning_rate": 2.0393438357150906e-08, + "loss": 0.9006322026252747, + "step": 8150 + }, + { + "epoch": 1.878110599078341, + "grad_norm": 0.9863507376975118, + "learning_rate": 2.0316960155391972e-08, + "loss": 0.6289799809455872, + "step": 8151 + }, + { + "epoch": 1.8783410138248848, + "grad_norm": 1.117182475586666, + "learning_rate": 2.0240624153329168e-08, + "loss": 0.8551793098449707, + "step": 8152 + }, + { + "epoch": 1.8785714285714286, + "grad_norm": 1.1253834649892556, + "learning_rate": 2.016443036204285e-08, + "loss": 0.8065170645713806, + "step": 8153 + }, + { + "epoch": 1.8788018433179725, + "grad_norm": 1.0124272640628642, + "learning_rate": 2.0088378792592286e-08, + "loss": 0.6361274719238281, + "step": 8154 + }, + { + "epoch": 1.879032258064516, + "grad_norm": 1.3966308966349001, + "learning_rate": 2.0012469456016312e-08, + "loss": 0.8539700508117676, + "step": 8155 + }, + { + "epoch": 1.87926267281106, + "grad_norm": 1.380681857214056, + "learning_rate": 1.9936702363333115e-08, + "loss": 0.7424989938735962, + "step": 8156 + }, + { + "epoch": 1.8794930875576037, + "grad_norm": 1.0795560964001287, + "learning_rate": 1.9861077525540116e-08, + "loss": 0.5831520557403564, + "step": 8157 + }, + { + "epoch": 1.8797235023041474, + "grad_norm": 1.3034651332513367, + "learning_rate": 1.9785594953614093e-08, + "loss": 0.8080646991729736, + "step": 8158 + }, + { + "epoch": 1.8799539170506914, + "grad_norm": 1.3028494466110516, + "learning_rate": 1.9710254658511392e-08, + "loss": 0.8008537292480469, + "step": 8159 + }, + { + "epoch": 1.8801843317972349, + "grad_norm": 0.7838996508063781, + "learning_rate": 1.9635056651167492e-08, + "loss": 0.7317294478416443, + "step": 8160 + }, + { + "epoch": 1.8804147465437788, + "grad_norm": 1.240068145392807, + "learning_rate": 1.956000094249721e-08, + "loss": 0.803238034248352, + "step": 8161 + }, + { + "epoch": 1.8806451612903226, + "grad_norm": 1.1592302203633778, + "learning_rate": 1.948508754339506e-08, + "loss": 0.7202219367027283, + "step": 8162 + }, + { + "epoch": 1.8808755760368663, + "grad_norm": 1.3406292816176746, + "learning_rate": 1.9410316464734233e-08, + "loss": 0.7691160440444946, + "step": 8163 + }, + { + "epoch": 1.8811059907834102, + "grad_norm": 1.0898220168427848, + "learning_rate": 1.933568771736782e-08, + "loss": 0.7092962265014648, + "step": 8164 + }, + { + "epoch": 1.881336405529954, + "grad_norm": 1.3165421464208054, + "learning_rate": 1.9261201312128274e-08, + "loss": 0.819804310798645, + "step": 8165 + }, + { + "epoch": 1.8815668202764977, + "grad_norm": 1.2278633726487793, + "learning_rate": 1.918685725982694e-08, + "loss": 0.9127538204193115, + "step": 8166 + }, + { + "epoch": 1.8817972350230416, + "grad_norm": 1.198181344272901, + "learning_rate": 1.9112655571254855e-08, + "loss": 0.8023328185081482, + "step": 8167 + }, + { + "epoch": 1.8820276497695851, + "grad_norm": 1.1150363141436184, + "learning_rate": 1.903859625718218e-08, + "loss": 0.723065972328186, + "step": 8168 + }, + { + "epoch": 1.882258064516129, + "grad_norm": 1.329775802249569, + "learning_rate": 1.896467932835877e-08, + "loss": 0.7838670611381531, + "step": 8169 + }, + { + "epoch": 1.8824884792626728, + "grad_norm": 1.0221481880663403, + "learning_rate": 1.8890904795513475e-08, + "loss": 0.6029871702194214, + "step": 8170 + }, + { + "epoch": 1.8827188940092165, + "grad_norm": 1.1179619592038208, + "learning_rate": 1.8817272669354512e-08, + "loss": 0.7622933387756348, + "step": 8171 + }, + { + "epoch": 1.8829493087557605, + "grad_norm": 1.3471730261003036, + "learning_rate": 1.8743782960569444e-08, + "loss": 0.7702913284301758, + "step": 8172 + }, + { + "epoch": 1.883179723502304, + "grad_norm": 1.1115192812221177, + "learning_rate": 1.867043567982518e-08, + "loss": 0.6385080814361572, + "step": 8173 + }, + { + "epoch": 1.883410138248848, + "grad_norm": 1.1957117872616694, + "learning_rate": 1.8597230837768208e-08, + "loss": 0.6886409521102905, + "step": 8174 + }, + { + "epoch": 1.8836405529953917, + "grad_norm": 1.2615274538141057, + "learning_rate": 1.8524168445023803e-08, + "loss": 0.7697125673294067, + "step": 8175 + }, + { + "epoch": 1.8838709677419354, + "grad_norm": 1.2703572064059772, + "learning_rate": 1.8451248512197148e-08, + "loss": 0.7942332029342651, + "step": 8176 + }, + { + "epoch": 1.8841013824884794, + "grad_norm": 1.2486681210000266, + "learning_rate": 1.8378471049872445e-08, + "loss": 0.7751410007476807, + "step": 8177 + }, + { + "epoch": 1.884331797235023, + "grad_norm": 1.4135289386452112, + "learning_rate": 1.8305836068613023e-08, + "loss": 0.8650992512702942, + "step": 8178 + }, + { + "epoch": 1.8845622119815668, + "grad_norm": 1.255590367160678, + "learning_rate": 1.8233343578962e-08, + "loss": 0.7084495425224304, + "step": 8179 + }, + { + "epoch": 1.8847926267281108, + "grad_norm": 1.2065933395861381, + "learning_rate": 1.8160993591441408e-08, + "loss": 0.7428494691848755, + "step": 8180 + }, + { + "epoch": 1.8850230414746543, + "grad_norm": 1.2721568643853003, + "learning_rate": 1.8088786116552844e-08, + "loss": 0.7431809902191162, + "step": 8181 + }, + { + "epoch": 1.8852534562211982, + "grad_norm": 1.5234831289492186, + "learning_rate": 1.801672116477715e-08, + "loss": 0.8312518000602722, + "step": 8182 + }, + { + "epoch": 1.885483870967742, + "grad_norm": 1.412977003038852, + "learning_rate": 1.7944798746574285e-08, + "loss": 0.8574832081794739, + "step": 8183 + }, + { + "epoch": 1.8857142857142857, + "grad_norm": 1.209006694724365, + "learning_rate": 1.7873018872383793e-08, + "loss": 0.7716966867446899, + "step": 8184 + }, + { + "epoch": 1.8859447004608296, + "grad_norm": 1.1984291768693995, + "learning_rate": 1.780138155262456e-08, + "loss": 0.8536000847816467, + "step": 8185 + }, + { + "epoch": 1.8861751152073731, + "grad_norm": 1.4411910829910872, + "learning_rate": 1.7729886797694606e-08, + "loss": 0.6559889316558838, + "step": 8186 + }, + { + "epoch": 1.886405529953917, + "grad_norm": 1.4146541158068258, + "learning_rate": 1.7658534617971065e-08, + "loss": 0.7371512651443481, + "step": 8187 + }, + { + "epoch": 1.8866359447004608, + "grad_norm": 1.5920989952321163, + "learning_rate": 1.7587325023810773e-08, + "loss": 0.8092008829116821, + "step": 8188 + }, + { + "epoch": 1.8868663594470045, + "grad_norm": 1.1485577131831675, + "learning_rate": 1.751625802554979e-08, + "loss": 0.7793067693710327, + "step": 8189 + }, + { + "epoch": 1.8870967741935485, + "grad_norm": 1.3107398360408737, + "learning_rate": 1.7445333633503312e-08, + "loss": 0.8102752566337585, + "step": 8190 + }, + { + "epoch": 1.8873271889400922, + "grad_norm": 0.9411355693415201, + "learning_rate": 1.737455185796588e-08, + "loss": 0.7141490578651428, + "step": 8191 + }, + { + "epoch": 1.887557603686636, + "grad_norm": 1.3771499753857814, + "learning_rate": 1.7303912709211497e-08, + "loss": 0.8010870218276978, + "step": 8192 + }, + { + "epoch": 1.8877880184331797, + "grad_norm": 1.0040229371574219, + "learning_rate": 1.723341619749319e-08, + "loss": 0.7945431470870972, + "step": 8193 + }, + { + "epoch": 1.8880184331797234, + "grad_norm": 1.5084700431378903, + "learning_rate": 1.7163062333043544e-08, + "loss": 0.765398383140564, + "step": 8194 + }, + { + "epoch": 1.8882488479262673, + "grad_norm": 1.141763186710756, + "learning_rate": 1.709285112607428e-08, + "loss": 0.8645910024642944, + "step": 8195 + }, + { + "epoch": 1.888479262672811, + "grad_norm": 1.4294051802947438, + "learning_rate": 1.7022782586776363e-08, + "loss": 0.7650351524353027, + "step": 8196 + }, + { + "epoch": 1.8887096774193548, + "grad_norm": 1.148441042244908, + "learning_rate": 1.695285672532043e-08, + "loss": 0.8059902191162109, + "step": 8197 + }, + { + "epoch": 1.8889400921658988, + "grad_norm": 1.3019488561633756, + "learning_rate": 1.688307355185592e-08, + "loss": 0.8389305472373962, + "step": 8198 + }, + { + "epoch": 1.8891705069124423, + "grad_norm": 1.3363862822981094, + "learning_rate": 1.681343307651173e-08, + "loss": 0.755578875541687, + "step": 8199 + }, + { + "epoch": 1.8894009216589862, + "grad_norm": 1.2754809499843205, + "learning_rate": 1.6743935309396218e-08, + "loss": 0.822825014591217, + "step": 8200 + }, + { + "epoch": 1.88963133640553, + "grad_norm": 1.2571266177044025, + "learning_rate": 1.667458026059676e-08, + "loss": 0.8229342699050903, + "step": 8201 + }, + { + "epoch": 1.8898617511520737, + "grad_norm": 1.3086181916191966, + "learning_rate": 1.6605367940180303e-08, + "loss": 0.7142254114151001, + "step": 8202 + }, + { + "epoch": 1.8900921658986176, + "grad_norm": 1.1722391698259569, + "learning_rate": 1.6536298358192812e-08, + "loss": 0.8904600739479065, + "step": 8203 + }, + { + "epoch": 1.8903225806451613, + "grad_norm": 1.151403763105922, + "learning_rate": 1.6467371524659603e-08, + "loss": 0.8758517503738403, + "step": 8204 + }, + { + "epoch": 1.890552995391705, + "grad_norm": 1.3083947750625244, + "learning_rate": 1.6398587449585555e-08, + "loss": 0.7609111666679382, + "step": 8205 + }, + { + "epoch": 1.8907834101382488, + "grad_norm": 0.9406449994318669, + "learning_rate": 1.6329946142954353e-08, + "loss": 0.8177064657211304, + "step": 8206 + }, + { + "epoch": 1.8910138248847925, + "grad_norm": 1.1366142550146048, + "learning_rate": 1.626144761472925e-08, + "loss": 0.6342105865478516, + "step": 8207 + }, + { + "epoch": 1.8912442396313365, + "grad_norm": 0.8903675484312013, + "learning_rate": 1.6193091874852627e-08, + "loss": 0.6025499105453491, + "step": 8208 + }, + { + "epoch": 1.8914746543778802, + "grad_norm": 1.3017839387858507, + "learning_rate": 1.6124878933246543e-08, + "loss": 0.78373783826828, + "step": 8209 + }, + { + "epoch": 1.891705069124424, + "grad_norm": 1.336095893979754, + "learning_rate": 1.605680879981164e-08, + "loss": 0.8072086572647095, + "step": 8210 + }, + { + "epoch": 1.8919354838709679, + "grad_norm": 1.5597980072939257, + "learning_rate": 1.5988881484428453e-08, + "loss": 0.9057372212409973, + "step": 8211 + }, + { + "epoch": 1.8921658986175114, + "grad_norm": 1.2099616448625954, + "learning_rate": 1.592109699695643e-08, + "loss": 0.8235929012298584, + "step": 8212 + }, + { + "epoch": 1.8923963133640553, + "grad_norm": 1.2417707847492958, + "learning_rate": 1.5853455347234366e-08, + "loss": 0.6610825061798096, + "step": 8213 + }, + { + "epoch": 1.892626728110599, + "grad_norm": 1.4158986087253451, + "learning_rate": 1.5785956545080415e-08, + "loss": 0.7152366638183594, + "step": 8214 + }, + { + "epoch": 1.8928571428571428, + "grad_norm": 1.330885873092923, + "learning_rate": 1.5718600600292066e-08, + "loss": 0.7971903085708618, + "step": 8215 + }, + { + "epoch": 1.8930875576036867, + "grad_norm": 1.226467557812747, + "learning_rate": 1.565138752264572e-08, + "loss": 0.7639449238777161, + "step": 8216 + }, + { + "epoch": 1.8933179723502302, + "grad_norm": 1.0517976072639703, + "learning_rate": 1.5584317321897356e-08, + "loss": 0.6396117806434631, + "step": 8217 + }, + { + "epoch": 1.8935483870967742, + "grad_norm": 1.328962567982178, + "learning_rate": 1.5517390007782183e-08, + "loss": 0.790566086769104, + "step": 8218 + }, + { + "epoch": 1.893778801843318, + "grad_norm": 1.6769404862380202, + "learning_rate": 1.5450605590014544e-08, + "loss": 0.7948310971260071, + "step": 8219 + }, + { + "epoch": 1.8940092165898617, + "grad_norm": 1.2378052027269906, + "learning_rate": 1.5383964078288124e-08, + "loss": 0.9425654411315918, + "step": 8220 + }, + { + "epoch": 1.8942396313364056, + "grad_norm": 1.2441112834124675, + "learning_rate": 1.531746548227586e-08, + "loss": 0.8001678586006165, + "step": 8221 + }, + { + "epoch": 1.8944700460829493, + "grad_norm": 0.9072642646135723, + "learning_rate": 1.5251109811629915e-08, + "loss": 0.6636781692504883, + "step": 8222 + }, + { + "epoch": 1.894700460829493, + "grad_norm": 1.0313464437335311, + "learning_rate": 1.5184897075981807e-08, + "loss": 0.7884416580200195, + "step": 8223 + }, + { + "epoch": 1.894930875576037, + "grad_norm": 1.0907885139753422, + "learning_rate": 1.511882728494218e-08, + "loss": 0.6888208389282227, + "step": 8224 + }, + { + "epoch": 1.8951612903225805, + "grad_norm": 1.3461823033287323, + "learning_rate": 1.5052900448100815e-08, + "loss": 0.7253614664077759, + "step": 8225 + }, + { + "epoch": 1.8953917050691245, + "grad_norm": 1.2272377599078015, + "learning_rate": 1.498711657502716e-08, + "loss": 0.7865983843803406, + "step": 8226 + }, + { + "epoch": 1.8956221198156682, + "grad_norm": 1.4908955714231082, + "learning_rate": 1.492147567526947e-08, + "loss": 0.8778063654899597, + "step": 8227 + }, + { + "epoch": 1.895852534562212, + "grad_norm": 1.2263224402103408, + "learning_rate": 1.4855977758355675e-08, + "loss": 0.7812581062316895, + "step": 8228 + }, + { + "epoch": 1.8960829493087559, + "grad_norm": 1.2890011409819144, + "learning_rate": 1.4790622833792287e-08, + "loss": 0.7160226106643677, + "step": 8229 + }, + { + "epoch": 1.8963133640552994, + "grad_norm": 1.1613199880989007, + "learning_rate": 1.472541091106594e-08, + "loss": 0.8187412619590759, + "step": 8230 + }, + { + "epoch": 1.8965437788018433, + "grad_norm": 1.1653251647412382, + "learning_rate": 1.4660341999641834e-08, + "loss": 0.7517846822738647, + "step": 8231 + }, + { + "epoch": 1.896774193548387, + "grad_norm": 1.3673338656755198, + "learning_rate": 1.4595416108964753e-08, + "loss": 0.9230127334594727, + "step": 8232 + }, + { + "epoch": 1.8970046082949308, + "grad_norm": 1.228175308993719, + "learning_rate": 1.4530633248458269e-08, + "loss": 0.6803582906723022, + "step": 8233 + }, + { + "epoch": 1.8972350230414747, + "grad_norm": 1.2890219242119376, + "learning_rate": 1.4465993427525968e-08, + "loss": 0.8444511294364929, + "step": 8234 + }, + { + "epoch": 1.8974654377880185, + "grad_norm": 1.4479761110450609, + "learning_rate": 1.4401496655550016e-08, + "loss": 0.7622519731521606, + "step": 8235 + }, + { + "epoch": 1.8976958525345622, + "grad_norm": 1.20875065982799, + "learning_rate": 1.4337142941892033e-08, + "loss": 0.687129020690918, + "step": 8236 + }, + { + "epoch": 1.8979262672811061, + "grad_norm": 1.1827775538431895, + "learning_rate": 1.4272932295892992e-08, + "loss": 0.6421219110488892, + "step": 8237 + }, + { + "epoch": 1.8981566820276496, + "grad_norm": 1.2669401147896007, + "learning_rate": 1.4208864726872772e-08, + "loss": 0.7829388380050659, + "step": 8238 + }, + { + "epoch": 1.8983870967741936, + "grad_norm": 1.3482974956529734, + "learning_rate": 1.4144940244130821e-08, + "loss": 0.7754424810409546, + "step": 8239 + }, + { + "epoch": 1.8986175115207373, + "grad_norm": 1.1130898544931584, + "learning_rate": 1.4081158856945719e-08, + "loss": 0.6544859409332275, + "step": 8240 + }, + { + "epoch": 1.898847926267281, + "grad_norm": 1.0822240775455856, + "learning_rate": 1.4017520574575282e-08, + "loss": 0.8020427227020264, + "step": 8241 + }, + { + "epoch": 1.899078341013825, + "grad_norm": 1.1350657169907092, + "learning_rate": 1.3954025406256343e-08, + "loss": 0.7343212366104126, + "step": 8242 + }, + { + "epoch": 1.8993087557603685, + "grad_norm": 1.2792336145941459, + "learning_rate": 1.3890673361205418e-08, + "loss": 0.7643232345581055, + "step": 8243 + }, + { + "epoch": 1.8995391705069125, + "grad_norm": 1.212662168320899, + "learning_rate": 1.3827464448617709e-08, + "loss": 0.7806165814399719, + "step": 8244 + }, + { + "epoch": 1.8997695852534562, + "grad_norm": 1.6104194734157218, + "learning_rate": 1.3764398677667988e-08, + "loss": 0.8533280491828918, + "step": 8245 + }, + { + "epoch": 1.9, + "grad_norm": 1.1289941083869026, + "learning_rate": 1.3701476057510264e-08, + "loss": 0.773565411567688, + "step": 8246 + }, + { + "epoch": 1.9002304147465439, + "grad_norm": 1.1091300492504157, + "learning_rate": 1.3638696597277677e-08, + "loss": 0.7752503752708435, + "step": 8247 + }, + { + "epoch": 1.9004608294930876, + "grad_norm": 0.9880656776459645, + "learning_rate": 1.3576060306082383e-08, + "loss": 0.7466747760772705, + "step": 8248 + }, + { + "epoch": 1.9006912442396313, + "grad_norm": 1.2177337280417093, + "learning_rate": 1.3513567193016106e-08, + "loss": 0.8103033304214478, + "step": 8249 + }, + { + "epoch": 1.9009216589861753, + "grad_norm": 1.0248826665714235, + "learning_rate": 1.3451217267149595e-08, + "loss": 0.6501287817955017, + "step": 8250 + }, + { + "epoch": 1.9011520737327188, + "grad_norm": 1.210107770730306, + "learning_rate": 1.3389010537532941e-08, + "loss": 0.7329230308532715, + "step": 8251 + }, + { + "epoch": 1.9013824884792627, + "grad_norm": 1.3978474783131303, + "learning_rate": 1.3326947013195255e-08, + "loss": 0.8413917422294617, + "step": 8252 + }, + { + "epoch": 1.9016129032258065, + "grad_norm": 1.4081927433558092, + "learning_rate": 1.3265026703144999e-08, + "loss": 0.7283090353012085, + "step": 8253 + }, + { + "epoch": 1.9018433179723502, + "grad_norm": 1.2553133709092965, + "learning_rate": 1.3203249616369872e-08, + "loss": 0.8378126621246338, + "step": 8254 + }, + { + "epoch": 1.9020737327188941, + "grad_norm": 1.099276496142028, + "learning_rate": 1.3141615761836811e-08, + "loss": 0.7675777673721313, + "step": 8255 + }, + { + "epoch": 1.9023041474654376, + "grad_norm": 1.6916159414604328, + "learning_rate": 1.308012514849155e-08, + "loss": 0.6448104381561279, + "step": 8256 + }, + { + "epoch": 1.9025345622119816, + "grad_norm": 1.3264486635424506, + "learning_rate": 1.3018777785259838e-08, + "loss": 0.8024395704269409, + "step": 8257 + }, + { + "epoch": 1.9027649769585253, + "grad_norm": 1.1900370575281645, + "learning_rate": 1.2957573681045887e-08, + "loss": 0.8159325122833252, + "step": 8258 + }, + { + "epoch": 1.902995391705069, + "grad_norm": 1.1100937535082447, + "learning_rate": 1.2896512844733365e-08, + "loss": 0.7916233539581299, + "step": 8259 + }, + { + "epoch": 1.903225806451613, + "grad_norm": 1.2408177778484295, + "learning_rate": 1.2835595285185296e-08, + "loss": 0.798140823841095, + "step": 8260 + }, + { + "epoch": 1.9034562211981567, + "grad_norm": 1.2142666252173266, + "learning_rate": 1.277482101124383e-08, + "loss": 0.7881651520729065, + "step": 8261 + }, + { + "epoch": 1.9036866359447004, + "grad_norm": 1.3615775077613546, + "learning_rate": 1.2714190031730021e-08, + "loss": 0.7023189663887024, + "step": 8262 + }, + { + "epoch": 1.9039170506912444, + "grad_norm": 1.2537620544817238, + "learning_rate": 1.2653702355444606e-08, + "loss": 0.8286309242248535, + "step": 8263 + }, + { + "epoch": 1.904147465437788, + "grad_norm": 1.4181409914325045, + "learning_rate": 1.259335799116723e-08, + "loss": 0.7626973986625671, + "step": 8264 + }, + { + "epoch": 1.9043778801843319, + "grad_norm": 1.7640804361655256, + "learning_rate": 1.2533156947656665e-08, + "loss": 1.0350267887115479, + "step": 8265 + }, + { + "epoch": 1.9046082949308756, + "grad_norm": 1.0808972871053977, + "learning_rate": 1.2473099233651251e-08, + "loss": 0.6378228664398193, + "step": 8266 + }, + { + "epoch": 1.9048387096774193, + "grad_norm": 1.1012549826430145, + "learning_rate": 1.2413184857868241e-08, + "loss": 0.8265732526779175, + "step": 8267 + }, + { + "epoch": 1.9050691244239633, + "grad_norm": 1.102740322591124, + "learning_rate": 1.23534138290039e-08, + "loss": 0.8545348644256592, + "step": 8268 + }, + { + "epoch": 1.9052995391705068, + "grad_norm": 1.1667419775790697, + "learning_rate": 1.2293786155734176e-08, + "loss": 0.660080075263977, + "step": 8269 + }, + { + "epoch": 1.9055299539170507, + "grad_norm": 1.4258566183231558, + "learning_rate": 1.2234301846713813e-08, + "loss": 0.8409689664840698, + "step": 8270 + }, + { + "epoch": 1.9057603686635944, + "grad_norm": 1.3639053971310304, + "learning_rate": 1.2174960910576904e-08, + "loss": 0.8026434183120728, + "step": 8271 + }, + { + "epoch": 1.9059907834101382, + "grad_norm": 1.1477802786886386, + "learning_rate": 1.2115763355936671e-08, + "loss": 0.8315812945365906, + "step": 8272 + }, + { + "epoch": 1.9062211981566821, + "grad_norm": 1.1488868543504023, + "learning_rate": 1.2056709191385572e-08, + "loss": 0.7373194694519043, + "step": 8273 + }, + { + "epoch": 1.9064516129032258, + "grad_norm": 1.28219548502893, + "learning_rate": 1.1997798425495309e-08, + "loss": 0.7502317428588867, + "step": 8274 + }, + { + "epoch": 1.9066820276497696, + "grad_norm": 1.1940555150789485, + "learning_rate": 1.1939031066816707e-08, + "loss": 0.8208760023117065, + "step": 8275 + }, + { + "epoch": 1.9069124423963135, + "grad_norm": 1.2690336009694645, + "learning_rate": 1.188040712387961e-08, + "loss": 0.7584094405174255, + "step": 8276 + }, + { + "epoch": 1.907142857142857, + "grad_norm": 1.3136164329476003, + "learning_rate": 1.1821926605193433e-08, + "loss": 0.7776647210121155, + "step": 8277 + }, + { + "epoch": 1.907373271889401, + "grad_norm": 1.0778088332238458, + "learning_rate": 1.1763589519246387e-08, + "loss": 0.7739659547805786, + "step": 8278 + }, + { + "epoch": 1.9076036866359447, + "grad_norm": 1.3752880267959628, + "learning_rate": 1.170539587450603e-08, + "loss": 0.7276068925857544, + "step": 8279 + }, + { + "epoch": 1.9078341013824884, + "grad_norm": 1.1782987713077362, + "learning_rate": 1.1647345679419163e-08, + "loss": 0.624208927154541, + "step": 8280 + }, + { + "epoch": 1.9080645161290324, + "grad_norm": 1.0744404873031923, + "learning_rate": 1.1589438942411712e-08, + "loss": 0.7865229845046997, + "step": 8281 + }, + { + "epoch": 1.908294930875576, + "grad_norm": 1.1655122856650737, + "learning_rate": 1.1531675671888619e-08, + "loss": 0.8290715217590332, + "step": 8282 + }, + { + "epoch": 1.9085253456221198, + "grad_norm": 1.4733922787626827, + "learning_rate": 1.1474055876234289e-08, + "loss": 0.8750064969062805, + "step": 8283 + }, + { + "epoch": 1.9087557603686636, + "grad_norm": 1.0358743027064434, + "learning_rate": 1.1416579563812146e-08, + "loss": 0.7946900129318237, + "step": 8284 + }, + { + "epoch": 1.9089861751152073, + "grad_norm": 1.1260650941834194, + "learning_rate": 1.1359246742964623e-08, + "loss": 0.6673855781555176, + "step": 8285 + }, + { + "epoch": 1.9092165898617512, + "grad_norm": 1.5734371068415847, + "learning_rate": 1.1302057422013734e-08, + "loss": 0.8423609137535095, + "step": 8286 + }, + { + "epoch": 1.909447004608295, + "grad_norm": 1.1774099615686673, + "learning_rate": 1.124501160926039e-08, + "loss": 0.7583299279212952, + "step": 8287 + }, + { + "epoch": 1.9096774193548387, + "grad_norm": 1.3632188021099019, + "learning_rate": 1.1188109312984639e-08, + "loss": 0.8489730358123779, + "step": 8288 + }, + { + "epoch": 1.9099078341013827, + "grad_norm": 1.268317857067217, + "learning_rate": 1.1131350541445871e-08, + "loss": 0.7460636496543884, + "step": 8289 + }, + { + "epoch": 1.9101382488479262, + "grad_norm": 1.1951667787690143, + "learning_rate": 1.1074735302882387e-08, + "loss": 0.7310905456542969, + "step": 8290 + }, + { + "epoch": 1.91036866359447, + "grad_norm": 1.1692661015812214, + "learning_rate": 1.1018263605511946e-08, + "loss": 0.8411405086517334, + "step": 8291 + }, + { + "epoch": 1.9105990783410138, + "grad_norm": 1.12451343736832, + "learning_rate": 1.0961935457531323e-08, + "loss": 0.7980802059173584, + "step": 8292 + }, + { + "epoch": 1.9108294930875576, + "grad_norm": 1.2914760603674136, + "learning_rate": 1.0905750867116426e-08, + "loss": 0.779492974281311, + "step": 8293 + }, + { + "epoch": 1.9110599078341015, + "grad_norm": 1.0940139924335759, + "learning_rate": 1.0849709842422283e-08, + "loss": 0.7893733978271484, + "step": 8294 + }, + { + "epoch": 1.911290322580645, + "grad_norm": 1.367510888792546, + "learning_rate": 1.07938123915835e-08, + "loss": 0.8281872272491455, + "step": 8295 + }, + { + "epoch": 1.911520737327189, + "grad_norm": 1.3626141199750628, + "learning_rate": 1.0738058522713144e-08, + "loss": 0.721331775188446, + "step": 8296 + }, + { + "epoch": 1.9117511520737327, + "grad_norm": 0.9302233955509024, + "learning_rate": 1.0682448243904073e-08, + "loss": 0.6043491363525391, + "step": 8297 + }, + { + "epoch": 1.9119815668202764, + "grad_norm": 1.002380139729753, + "learning_rate": 1.0626981563227943e-08, + "loss": 0.7737481594085693, + "step": 8298 + }, + { + "epoch": 1.9122119815668204, + "grad_norm": 1.20563258082351, + "learning_rate": 1.0571658488735536e-08, + "loss": 0.771499514579773, + "step": 8299 + }, + { + "epoch": 1.912442396313364, + "grad_norm": 1.1334287395884057, + "learning_rate": 1.0516479028457204e-08, + "loss": 0.6711971759796143, + "step": 8300 + }, + { + "epoch": 1.9126728110599078, + "grad_norm": 1.1514161835446617, + "learning_rate": 1.0461443190402097e-08, + "loss": 0.691685140132904, + "step": 8301 + }, + { + "epoch": 1.9129032258064518, + "grad_norm": 1.0627327279898275, + "learning_rate": 1.0406550982558382e-08, + "loss": 0.7339159250259399, + "step": 8302 + }, + { + "epoch": 1.9131336405529953, + "grad_norm": 1.098827920572517, + "learning_rate": 1.0351802412893796e-08, + "loss": 0.7832008600234985, + "step": 8303 + }, + { + "epoch": 1.9133640552995392, + "grad_norm": 1.8976948304927823, + "learning_rate": 1.0297197489355092e-08, + "loss": 0.862671971321106, + "step": 8304 + }, + { + "epoch": 1.913594470046083, + "grad_norm": 1.2340137918284608, + "learning_rate": 1.0242736219867821e-08, + "loss": 0.6442357897758484, + "step": 8305 + }, + { + "epoch": 1.9138248847926267, + "grad_norm": 1.3262423414476558, + "learning_rate": 1.0188418612337102e-08, + "loss": 0.8777452707290649, + "step": 8306 + }, + { + "epoch": 1.9140552995391706, + "grad_norm": 1.2308393583128812, + "learning_rate": 1.0134244674647186e-08, + "loss": 0.7672470808029175, + "step": 8307 + }, + { + "epoch": 1.9142857142857141, + "grad_norm": 0.9277990008899878, + "learning_rate": 1.0080214414661226e-08, + "loss": 0.7338177561759949, + "step": 8308 + }, + { + "epoch": 1.914516129032258, + "grad_norm": 1.3815065909330264, + "learning_rate": 1.0026327840221727e-08, + "loss": 0.7546414136886597, + "step": 8309 + }, + { + "epoch": 1.9147465437788018, + "grad_norm": 1.0116807626508924, + "learning_rate": 9.972584959149988e-09, + "loss": 0.621455192565918, + "step": 8310 + }, + { + "epoch": 1.9149769585253456, + "grad_norm": 1.0385626369203964, + "learning_rate": 9.918985779247102e-09, + "loss": 0.7403131723403931, + "step": 8311 + }, + { + "epoch": 1.9152073732718895, + "grad_norm": 1.1027069898803628, + "learning_rate": 9.865530308292624e-09, + "loss": 0.7924279570579529, + "step": 8312 + }, + { + "epoch": 1.9154377880184332, + "grad_norm": 1.1362295208393791, + "learning_rate": 9.81221855404568e-09, + "loss": 0.8831228017807007, + "step": 8313 + }, + { + "epoch": 1.915668202764977, + "grad_norm": 1.1281945792188444, + "learning_rate": 9.759050524244417e-09, + "loss": 0.6786219477653503, + "step": 8314 + }, + { + "epoch": 1.9158986175115207, + "grad_norm": 1.2807157366480393, + "learning_rate": 9.70602622660599e-09, + "loss": 0.7311046123504639, + "step": 8315 + }, + { + "epoch": 1.9161290322580644, + "grad_norm": 1.3847340573145779, + "learning_rate": 9.653145668826912e-09, + "loss": 0.8914301991462708, + "step": 8316 + }, + { + "epoch": 1.9163594470046084, + "grad_norm": 1.4027670914288322, + "learning_rate": 9.600408858582709e-09, + "loss": 0.8144292831420898, + "step": 8317 + }, + { + "epoch": 1.916589861751152, + "grad_norm": 1.1077379444431534, + "learning_rate": 9.547815803528036e-09, + "loss": 0.6670823097229004, + "step": 8318 + }, + { + "epoch": 1.9168202764976958, + "grad_norm": 1.2434106495167774, + "learning_rate": 9.495366511296676e-09, + "loss": 0.6801552772521973, + "step": 8319 + }, + { + "epoch": 1.9170506912442398, + "grad_norm": 1.0098918722618904, + "learning_rate": 9.44306098950165e-09, + "loss": 0.8144240379333496, + "step": 8320 + }, + { + "epoch": 1.9172811059907833, + "grad_norm": 1.0515221920732627, + "learning_rate": 9.390899245734995e-09, + "loss": 0.6352888345718384, + "step": 8321 + }, + { + "epoch": 1.9175115207373272, + "grad_norm": 1.2296941092807456, + "learning_rate": 9.33888128756788e-09, + "loss": 0.7513711452484131, + "step": 8322 + }, + { + "epoch": 1.917741935483871, + "grad_norm": 1.4377668264686976, + "learning_rate": 9.287007122550705e-09, + "loss": 0.7699171304702759, + "step": 8323 + }, + { + "epoch": 1.9179723502304147, + "grad_norm": 1.591632209718944, + "learning_rate": 9.235276758212895e-09, + "loss": 0.8321002721786499, + "step": 8324 + }, + { + "epoch": 1.9182027649769586, + "grad_norm": 1.0453744404830132, + "learning_rate": 9.183690202062999e-09, + "loss": 0.6815298795700073, + "step": 8325 + }, + { + "epoch": 1.9184331797235024, + "grad_norm": 1.0030633247337575, + "learning_rate": 9.132247461588915e-09, + "loss": 0.7135178446769714, + "step": 8326 + }, + { + "epoch": 1.918663594470046, + "grad_norm": 1.3123190228023687, + "learning_rate": 9.080948544257338e-09, + "loss": 0.8452005982398987, + "step": 8327 + }, + { + "epoch": 1.9188940092165898, + "grad_norm": 1.1270879003396566, + "learning_rate": 9.029793457514312e-09, + "loss": 0.7449440956115723, + "step": 8328 + }, + { + "epoch": 1.9191244239631335, + "grad_norm": 1.2310904327231214, + "learning_rate": 8.978782208784897e-09, + "loss": 0.8172955513000488, + "step": 8329 + }, + { + "epoch": 1.9193548387096775, + "grad_norm": 1.0097624251077932, + "learning_rate": 8.92791480547317e-09, + "loss": 0.6682305335998535, + "step": 8330 + }, + { + "epoch": 1.9195852534562212, + "grad_norm": 1.1974701853493588, + "learning_rate": 8.877191254962779e-09, + "loss": 0.6874973773956299, + "step": 8331 + }, + { + "epoch": 1.919815668202765, + "grad_norm": 1.1728345166861331, + "learning_rate": 8.826611564615949e-09, + "loss": 0.8371694684028625, + "step": 8332 + }, + { + "epoch": 1.920046082949309, + "grad_norm": 1.1837626119929445, + "learning_rate": 8.77617574177425e-09, + "loss": 0.7147493362426758, + "step": 8333 + }, + { + "epoch": 1.9202764976958524, + "grad_norm": 1.2783488550083906, + "learning_rate": 8.725883793758382e-09, + "loss": 0.7444115877151489, + "step": 8334 + }, + { + "epoch": 1.9205069124423964, + "grad_norm": 1.3799268170287549, + "learning_rate": 8.675735727868283e-09, + "loss": 0.7772307395935059, + "step": 8335 + }, + { + "epoch": 1.92073732718894, + "grad_norm": 1.2730237375907167, + "learning_rate": 8.625731551382798e-09, + "loss": 0.702937126159668, + "step": 8336 + }, + { + "epoch": 1.9209677419354838, + "grad_norm": 1.316574939310684, + "learning_rate": 8.575871271559898e-09, + "loss": 0.7404709458351135, + "step": 8337 + }, + { + "epoch": 1.9211981566820278, + "grad_norm": 1.4216605594412726, + "learning_rate": 8.526154895636906e-09, + "loss": 0.7142058610916138, + "step": 8338 + }, + { + "epoch": 1.9214285714285713, + "grad_norm": 1.381037068322115, + "learning_rate": 8.476582430830048e-09, + "loss": 0.8950545191764832, + "step": 8339 + }, + { + "epoch": 1.9216589861751152, + "grad_norm": 1.2364573338693037, + "learning_rate": 8.42715388433446e-09, + "loss": 0.6939054131507874, + "step": 8340 + }, + { + "epoch": 1.921889400921659, + "grad_norm": 1.3248307922164142, + "learning_rate": 8.377869263324954e-09, + "loss": 0.7916324138641357, + "step": 8341 + }, + { + "epoch": 1.9221198156682027, + "grad_norm": 1.3092539218499513, + "learning_rate": 8.328728574954924e-09, + "loss": 0.8059754371643066, + "step": 8342 + }, + { + "epoch": 1.9223502304147466, + "grad_norm": 1.1195879983393067, + "learning_rate": 8.279731826357105e-09, + "loss": 0.650648295879364, + "step": 8343 + }, + { + "epoch": 1.9225806451612903, + "grad_norm": 0.9135397053997126, + "learning_rate": 8.230879024643478e-09, + "loss": 0.6912552118301392, + "step": 8344 + }, + { + "epoch": 1.922811059907834, + "grad_norm": 0.8588678436998939, + "learning_rate": 8.182170176904702e-09, + "loss": 0.7430927753448486, + "step": 8345 + }, + { + "epoch": 1.923041474654378, + "grad_norm": 1.1000327691208154, + "learning_rate": 8.133605290210898e-09, + "loss": 0.7550772428512573, + "step": 8346 + }, + { + "epoch": 1.9232718894009215, + "grad_norm": 1.1138393113278757, + "learning_rate": 8.08518437161132e-09, + "loss": 0.7235819101333618, + "step": 8347 + }, + { + "epoch": 1.9235023041474655, + "grad_norm": 1.085631464611088, + "learning_rate": 8.036907428134121e-09, + "loss": 0.790582537651062, + "step": 8348 + }, + { + "epoch": 1.9237327188940092, + "grad_norm": 1.2928878399763604, + "learning_rate": 7.988774466786585e-09, + "loss": 0.7350871562957764, + "step": 8349 + }, + { + "epoch": 1.923963133640553, + "grad_norm": 1.3980478677422172, + "learning_rate": 7.940785494555124e-09, + "loss": 0.86177659034729, + "step": 8350 + }, + { + "epoch": 1.9241935483870969, + "grad_norm": 1.196963381013611, + "learning_rate": 7.892940518405499e-09, + "loss": 0.8039232492446899, + "step": 8351 + }, + { + "epoch": 1.9244239631336404, + "grad_norm": 1.231295549355971, + "learning_rate": 7.845239545282046e-09, + "loss": 0.7130967378616333, + "step": 8352 + }, + { + "epoch": 1.9246543778801843, + "grad_norm": 1.0830506625128473, + "learning_rate": 7.797682582108667e-09, + "loss": 0.7297911047935486, + "step": 8353 + }, + { + "epoch": 1.924884792626728, + "grad_norm": 1.2576048144274934, + "learning_rate": 7.750269635788065e-09, + "loss": 0.7302875518798828, + "step": 8354 + }, + { + "epoch": 1.9251152073732718, + "grad_norm": 1.1228331103171292, + "learning_rate": 7.703000713202401e-09, + "loss": 0.7976555824279785, + "step": 8355 + }, + { + "epoch": 1.9253456221198157, + "grad_norm": 1.1181213613597878, + "learning_rate": 7.65587582121252e-09, + "loss": 0.6747829914093018, + "step": 8356 + }, + { + "epoch": 1.9255760368663595, + "grad_norm": 1.3086474559444063, + "learning_rate": 7.608894966658509e-09, + "loss": 0.7217142581939697, + "step": 8357 + }, + { + "epoch": 1.9258064516129032, + "grad_norm": 1.3893709396765357, + "learning_rate": 7.562058156359685e-09, + "loss": 0.8635888695716858, + "step": 8358 + }, + { + "epoch": 1.9260368663594472, + "grad_norm": 1.3318330118319255, + "learning_rate": 7.515365397114282e-09, + "loss": 0.8435994386672974, + "step": 8359 + }, + { + "epoch": 1.9262672811059907, + "grad_norm": 1.4490671236886896, + "learning_rate": 7.468816695699653e-09, + "loss": 0.8632286787033081, + "step": 8360 + }, + { + "epoch": 1.9264976958525346, + "grad_norm": 1.501498499241499, + "learning_rate": 7.422412058872396e-09, + "loss": 0.7916556596755981, + "step": 8361 + }, + { + "epoch": 1.9267281105990783, + "grad_norm": 1.1808854932681303, + "learning_rate": 7.376151493368121e-09, + "loss": 0.8307663202285767, + "step": 8362 + }, + { + "epoch": 1.926958525345622, + "grad_norm": 1.4156996026964064, + "learning_rate": 7.330035005901236e-09, + "loss": 0.9020388126373291, + "step": 8363 + }, + { + "epoch": 1.927188940092166, + "grad_norm": 1.222606934693838, + "learning_rate": 7.28406260316572e-09, + "loss": 0.7926114797592163, + "step": 8364 + }, + { + "epoch": 1.9274193548387095, + "grad_norm": 1.0417046174216056, + "learning_rate": 7.2382342918343446e-09, + "loss": 0.7609784603118896, + "step": 8365 + }, + { + "epoch": 1.9276497695852535, + "grad_norm": 1.3729827404737949, + "learning_rate": 7.192550078559012e-09, + "loss": 0.6010490655899048, + "step": 8366 + }, + { + "epoch": 1.9278801843317972, + "grad_norm": 1.495271329234438, + "learning_rate": 7.147009969970641e-09, + "loss": 0.8219606876373291, + "step": 8367 + }, + { + "epoch": 1.928110599078341, + "grad_norm": 1.207499145814505, + "learning_rate": 7.101613972679499e-09, + "loss": 0.8688151836395264, + "step": 8368 + }, + { + "epoch": 1.9283410138248849, + "grad_norm": 1.0608698410629562, + "learning_rate": 7.0563620932747595e-09, + "loss": 0.7654411792755127, + "step": 8369 + }, + { + "epoch": 1.9285714285714286, + "grad_norm": 1.0982841652537483, + "learning_rate": 7.01125433832439e-09, + "loss": 0.6878413558006287, + "step": 8370 + }, + { + "epoch": 1.9288018433179723, + "grad_norm": 1.0662803206592244, + "learning_rate": 6.966290714375933e-09, + "loss": 0.6703332662582397, + "step": 8371 + }, + { + "epoch": 1.9290322580645163, + "grad_norm": 1.1405585467491617, + "learning_rate": 6.921471227955833e-09, + "loss": 0.752200722694397, + "step": 8372 + }, + { + "epoch": 1.9292626728110598, + "grad_norm": 1.1122335677850106, + "learning_rate": 6.8767958855695526e-09, + "loss": 0.8107069730758667, + "step": 8373 + }, + { + "epoch": 1.9294930875576037, + "grad_norm": 1.4102834771954489, + "learning_rate": 6.832264693701573e-09, + "loss": 0.8816967010498047, + "step": 8374 + }, + { + "epoch": 1.9297235023041475, + "grad_norm": 1.2593635712728732, + "learning_rate": 6.78787765881561e-09, + "loss": 0.7889697551727295, + "step": 8375 + }, + { + "epoch": 1.9299539170506912, + "grad_norm": 1.2377942170623384, + "learning_rate": 6.743634787354291e-09, + "loss": 0.7218060493469238, + "step": 8376 + }, + { + "epoch": 1.9301843317972351, + "grad_norm": 1.2786458190631131, + "learning_rate": 6.699536085739588e-09, + "loss": 0.8061347007751465, + "step": 8377 + }, + { + "epoch": 1.9304147465437786, + "grad_norm": 1.0571211016932303, + "learning_rate": 6.655581560372159e-09, + "loss": 0.7320632934570312, + "step": 8378 + }, + { + "epoch": 1.9306451612903226, + "grad_norm": 1.2201688729332103, + "learning_rate": 6.611771217632123e-09, + "loss": 0.7039695978164673, + "step": 8379 + }, + { + "epoch": 1.9308755760368663, + "grad_norm": 1.0152325785443144, + "learning_rate": 6.568105063878393e-09, + "loss": 0.7056317925453186, + "step": 8380 + }, + { + "epoch": 1.93110599078341, + "grad_norm": 1.3442992098354511, + "learning_rate": 6.524583105449122e-09, + "loss": 0.9265607595443726, + "step": 8381 + }, + { + "epoch": 1.931336405529954, + "grad_norm": 0.9980232024455323, + "learning_rate": 6.481205348661367e-09, + "loss": 0.7249365448951721, + "step": 8382 + }, + { + "epoch": 1.9315668202764977, + "grad_norm": 1.0217670095742197, + "learning_rate": 6.4379717998114256e-09, + "loss": 0.8216372728347778, + "step": 8383 + }, + { + "epoch": 1.9317972350230415, + "grad_norm": 1.0731967820570871, + "learning_rate": 6.394882465174611e-09, + "loss": 0.6750606894493103, + "step": 8384 + }, + { + "epoch": 1.9320276497695854, + "grad_norm": 1.1382732221343326, + "learning_rate": 6.351937351005143e-09, + "loss": 0.8265045285224915, + "step": 8385 + }, + { + "epoch": 1.932258064516129, + "grad_norm": 1.2033626019579449, + "learning_rate": 6.309136463536591e-09, + "loss": 0.5992317795753479, + "step": 8386 + }, + { + "epoch": 1.9324884792626729, + "grad_norm": 1.026760102298627, + "learning_rate": 6.266479808981428e-09, + "loss": 0.6586567163467407, + "step": 8387 + }, + { + "epoch": 1.9327188940092166, + "grad_norm": 1.1335080912138158, + "learning_rate": 6.223967393531259e-09, + "loss": 0.7496415376663208, + "step": 8388 + }, + { + "epoch": 1.9329493087557603, + "grad_norm": 1.2743344602397095, + "learning_rate": 6.181599223356593e-09, + "loss": 0.8637027740478516, + "step": 8389 + }, + { + "epoch": 1.9331797235023043, + "grad_norm": 1.3348493633535858, + "learning_rate": 6.139375304607064e-09, + "loss": 0.6925984621047974, + "step": 8390 + }, + { + "epoch": 1.9334101382488478, + "grad_norm": 1.3338549311969345, + "learning_rate": 6.0972956434115485e-09, + "loss": 0.8345432877540588, + "step": 8391 + }, + { + "epoch": 1.9336405529953917, + "grad_norm": 1.211546505819517, + "learning_rate": 6.055360245877938e-09, + "loss": 0.797752857208252, + "step": 8392 + }, + { + "epoch": 1.9338709677419355, + "grad_norm": 1.025513773253857, + "learning_rate": 6.013569118092809e-09, + "loss": 0.7460094690322876, + "step": 8393 + }, + { + "epoch": 1.9341013824884792, + "grad_norm": 1.0501792229397418, + "learning_rate": 5.97192226612242e-09, + "loss": 0.7695547342300415, + "step": 8394 + }, + { + "epoch": 1.9343317972350231, + "grad_norm": 1.3341559418127071, + "learning_rate": 5.9304196960113795e-09, + "loss": 0.8372104167938232, + "step": 8395 + }, + { + "epoch": 1.9345622119815669, + "grad_norm": 1.174939684239835, + "learning_rate": 5.889061413784091e-09, + "loss": 0.7647950053215027, + "step": 8396 + }, + { + "epoch": 1.9347926267281106, + "grad_norm": 1.0568987578487792, + "learning_rate": 5.84784742544353e-09, + "loss": 0.6958519220352173, + "step": 8397 + }, + { + "epoch": 1.9350230414746545, + "grad_norm": 1.1905008025272417, + "learning_rate": 5.806777736971691e-09, + "loss": 0.8488763570785522, + "step": 8398 + }, + { + "epoch": 1.935253456221198, + "grad_norm": 1.1975357379056275, + "learning_rate": 5.765852354330025e-09, + "loss": 0.6448318958282471, + "step": 8399 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 1.288117894635522, + "learning_rate": 5.725071283458671e-09, + "loss": 0.7449144124984741, + "step": 8400 + }, + { + "epoch": 1.9357142857142857, + "grad_norm": 1.2060473887345362, + "learning_rate": 5.684434530277005e-09, + "loss": 0.8339489102363586, + "step": 8401 + }, + { + "epoch": 1.9359447004608294, + "grad_norm": 1.355663998015665, + "learning_rate": 5.643942100683308e-09, + "loss": 0.7758409380912781, + "step": 8402 + }, + { + "epoch": 1.9361751152073734, + "grad_norm": 1.2457476365021507, + "learning_rate": 5.60359400055499e-09, + "loss": 0.8604291081428528, + "step": 8403 + }, + { + "epoch": 1.936405529953917, + "grad_norm": 0.9800977546704353, + "learning_rate": 5.5633902357487e-09, + "loss": 0.7379741668701172, + "step": 8404 + }, + { + "epoch": 1.9366359447004609, + "grad_norm": 1.0501931597758303, + "learning_rate": 5.52333081209988e-09, + "loss": 0.6943101286888123, + "step": 8405 + }, + { + "epoch": 1.9368663594470046, + "grad_norm": 1.193280273833338, + "learning_rate": 5.483415735422992e-09, + "loss": 0.7397646903991699, + "step": 8406 + }, + { + "epoch": 1.9370967741935483, + "grad_norm": 1.1298510822998358, + "learning_rate": 5.443645011511844e-09, + "loss": 0.7566234469413757, + "step": 8407 + }, + { + "epoch": 1.9373271889400923, + "grad_norm": 1.322820355956732, + "learning_rate": 5.40401864613893e-09, + "loss": 0.6345827579498291, + "step": 8408 + }, + { + "epoch": 1.937557603686636, + "grad_norm": 1.6653451978671274, + "learning_rate": 5.3645366450560944e-09, + "loss": 0.7259831428527832, + "step": 8409 + }, + { + "epoch": 1.9377880184331797, + "grad_norm": 1.347964952979272, + "learning_rate": 5.325199013993975e-09, + "loss": 0.7897600531578064, + "step": 8410 + }, + { + "epoch": 1.9380184331797237, + "grad_norm": 1.3016062068490681, + "learning_rate": 5.286005758662448e-09, + "loss": 0.8421739339828491, + "step": 8411 + }, + { + "epoch": 1.9382488479262672, + "grad_norm": 1.3347958532899202, + "learning_rate": 5.2469568847504085e-09, + "loss": 0.7652501463890076, + "step": 8412 + }, + { + "epoch": 1.9384792626728111, + "grad_norm": 1.3105993577298032, + "learning_rate": 5.2080523979256556e-09, + "loss": 0.6397069096565247, + "step": 8413 + }, + { + "epoch": 1.9387096774193548, + "grad_norm": 1.2689574006754154, + "learning_rate": 5.169292303835116e-09, + "loss": 0.840052604675293, + "step": 8414 + }, + { + "epoch": 1.9389400921658986, + "grad_norm": 1.344062608291919, + "learning_rate": 5.130676608104845e-09, + "loss": 0.8453920483589172, + "step": 8415 + }, + { + "epoch": 1.9391705069124425, + "grad_norm": 1.3358429095342716, + "learning_rate": 5.092205316339915e-09, + "loss": 0.8301386833190918, + "step": 8416 + }, + { + "epoch": 1.939400921658986, + "grad_norm": 1.0570862677742232, + "learning_rate": 5.0538784341241924e-09, + "loss": 0.6682429313659668, + "step": 8417 + }, + { + "epoch": 1.93963133640553, + "grad_norm": 1.4370850274204425, + "learning_rate": 5.0156959670208945e-09, + "loss": 0.7881286144256592, + "step": 8418 + }, + { + "epoch": 1.9398617511520737, + "grad_norm": 1.1170749783406635, + "learning_rate": 4.9776579205721424e-09, + "loss": 0.7413277626037598, + "step": 8419 + }, + { + "epoch": 1.9400921658986174, + "grad_norm": 1.2672048797390025, + "learning_rate": 4.939764300299187e-09, + "loss": 0.6718757152557373, + "step": 8420 + }, + { + "epoch": 1.9403225806451614, + "grad_norm": 1.1707673461814823, + "learning_rate": 4.9020151117019625e-09, + "loss": 0.8595068454742432, + "step": 8421 + }, + { + "epoch": 1.9405529953917051, + "grad_norm": 1.0350774696905816, + "learning_rate": 4.864410360260085e-09, + "loss": 0.6985205411911011, + "step": 8422 + }, + { + "epoch": 1.9407834101382488, + "grad_norm": 1.222465370246094, + "learning_rate": 4.826950051431522e-09, + "loss": 0.7148889303207397, + "step": 8423 + }, + { + "epoch": 1.9410138248847926, + "grad_norm": 1.320040251210183, + "learning_rate": 4.789634190653813e-09, + "loss": 0.8109019994735718, + "step": 8424 + }, + { + "epoch": 1.9412442396313363, + "grad_norm": 1.4762486891336946, + "learning_rate": 4.752462783343292e-09, + "loss": 0.8268437385559082, + "step": 8425 + }, + { + "epoch": 1.9414746543778802, + "grad_norm": 0.9708535634361853, + "learning_rate": 4.715435834895088e-09, + "loss": 0.7300432920455933, + "step": 8426 + }, + { + "epoch": 1.941705069124424, + "grad_norm": 1.3017508085468754, + "learning_rate": 4.6785533506839005e-09, + "loss": 0.848440408706665, + "step": 8427 + }, + { + "epoch": 1.9419354838709677, + "grad_norm": 1.0873655680994063, + "learning_rate": 4.6418153360630044e-09, + "loss": 0.7526305913925171, + "step": 8428 + }, + { + "epoch": 1.9421658986175117, + "grad_norm": 1.1186105868292944, + "learning_rate": 4.605221796365022e-09, + "loss": 0.6987402439117432, + "step": 8429 + }, + { + "epoch": 1.9423963133640552, + "grad_norm": 1.5889483697201847, + "learning_rate": 4.568772736901261e-09, + "loss": 0.7944519519805908, + "step": 8430 + }, + { + "epoch": 1.942626728110599, + "grad_norm": 1.0443704220390153, + "learning_rate": 4.532468162962378e-09, + "loss": 0.7206175327301025, + "step": 8431 + }, + { + "epoch": 1.9428571428571428, + "grad_norm": 1.332362884391146, + "learning_rate": 4.4963080798179345e-09, + "loss": 0.6892992854118347, + "step": 8432 + }, + { + "epoch": 1.9430875576036866, + "grad_norm": 1.0826330060160456, + "learning_rate": 4.460292492716511e-09, + "loss": 0.696158766746521, + "step": 8433 + }, + { + "epoch": 1.9433179723502305, + "grad_norm": 0.9789941295444919, + "learning_rate": 4.424421406885704e-09, + "loss": 0.8007163405418396, + "step": 8434 + }, + { + "epoch": 1.9435483870967742, + "grad_norm": 1.1286085842961833, + "learning_rate": 4.3886948275320135e-09, + "loss": 0.7969222068786621, + "step": 8435 + }, + { + "epoch": 1.943778801843318, + "grad_norm": 1.2183409512094359, + "learning_rate": 4.353112759841404e-09, + "loss": 0.7752852439880371, + "step": 8436 + }, + { + "epoch": 1.9440092165898617, + "grad_norm": 1.1860536416754315, + "learning_rate": 4.317675208978411e-09, + "loss": 0.7788258790969849, + "step": 8437 + }, + { + "epoch": 1.9442396313364054, + "grad_norm": 1.1863849018136006, + "learning_rate": 4.2823821800866964e-09, + "loss": 0.838456392288208, + "step": 8438 + }, + { + "epoch": 1.9444700460829494, + "grad_norm": 1.0569456831140607, + "learning_rate": 4.2472336782890525e-09, + "loss": 0.7503675222396851, + "step": 8439 + }, + { + "epoch": 1.944700460829493, + "grad_norm": 0.9808278818485672, + "learning_rate": 4.212229708687287e-09, + "loss": 0.810901403427124, + "step": 8440 + }, + { + "epoch": 1.9449308755760368, + "grad_norm": 1.0050063922171069, + "learning_rate": 4.1773702763621135e-09, + "loss": 0.7551805973052979, + "step": 8441 + }, + { + "epoch": 1.9451612903225808, + "grad_norm": 1.2275039222333026, + "learning_rate": 4.142655386373373e-09, + "loss": 0.9387043714523315, + "step": 8442 + }, + { + "epoch": 1.9453917050691243, + "grad_norm": 1.034577232879954, + "learning_rate": 4.1080850437598124e-09, + "loss": 0.7508292198181152, + "step": 8443 + }, + { + "epoch": 1.9456221198156682, + "grad_norm": 0.9799945991508818, + "learning_rate": 4.073659253539308e-09, + "loss": 0.737107515335083, + "step": 8444 + }, + { + "epoch": 1.945852534562212, + "grad_norm": 1.477967097078984, + "learning_rate": 4.03937802070875e-09, + "loss": 0.86794114112854, + "step": 8445 + }, + { + "epoch": 1.9460829493087557, + "grad_norm": 0.9207750837260967, + "learning_rate": 4.005241350243937e-09, + "loss": 0.7629859447479248, + "step": 8446 + }, + { + "epoch": 1.9463133640552996, + "grad_norm": 1.4180879805115079, + "learning_rate": 3.971249247099906e-09, + "loss": 0.7455410957336426, + "step": 8447 + }, + { + "epoch": 1.9465437788018434, + "grad_norm": 1.1941620926103322, + "learning_rate": 3.937401716210376e-09, + "loss": 0.8322222828865051, + "step": 8448 + }, + { + "epoch": 1.946774193548387, + "grad_norm": 1.510433091637528, + "learning_rate": 3.903698762488528e-09, + "loss": 0.7961260676383972, + "step": 8449 + }, + { + "epoch": 1.9470046082949308, + "grad_norm": 1.2160569883363423, + "learning_rate": 3.870140390826005e-09, + "loss": 0.8144096732139587, + "step": 8450 + }, + { + "epoch": 1.9472350230414746, + "grad_norm": 1.2123613138822447, + "learning_rate": 3.8367266060939095e-09, + "loss": 0.7973348498344421, + "step": 8451 + }, + { + "epoch": 1.9474654377880185, + "grad_norm": 1.4038735969349747, + "learning_rate": 3.803457413142253e-09, + "loss": 0.8311715126037598, + "step": 8452 + }, + { + "epoch": 1.9476958525345622, + "grad_norm": 0.9815978065709688, + "learning_rate": 3.770332816799948e-09, + "loss": 0.7851812839508057, + "step": 8453 + }, + { + "epoch": 1.947926267281106, + "grad_norm": 1.3820548975058524, + "learning_rate": 3.737352821875039e-09, + "loss": 0.8721193075180054, + "step": 8454 + }, + { + "epoch": 1.94815668202765, + "grad_norm": 1.2337347998012935, + "learning_rate": 3.704517433154364e-09, + "loss": 0.8594118356704712, + "step": 8455 + }, + { + "epoch": 1.9483870967741934, + "grad_norm": 0.9620755666197012, + "learning_rate": 3.671826655404109e-09, + "loss": 0.6526527404785156, + "step": 8456 + }, + { + "epoch": 1.9486175115207374, + "grad_norm": 0.9198704876253201, + "learning_rate": 3.639280493369368e-09, + "loss": 0.7577145099639893, + "step": 8457 + }, + { + "epoch": 1.948847926267281, + "grad_norm": 1.4898349304718468, + "learning_rate": 3.6068789517739173e-09, + "loss": 0.9176833629608154, + "step": 8458 + }, + { + "epoch": 1.9490783410138248, + "grad_norm": 1.5070373914502264, + "learning_rate": 3.5746220353209956e-09, + "loss": 0.8947671055793762, + "step": 8459 + }, + { + "epoch": 1.9493087557603688, + "grad_norm": 1.2654885409411176, + "learning_rate": 3.542509748692524e-09, + "loss": 0.8791666030883789, + "step": 8460 + }, + { + "epoch": 1.9495391705069123, + "grad_norm": 0.9247331783476281, + "learning_rate": 3.5105420965496626e-09, + "loss": 0.7431247234344482, + "step": 8461 + }, + { + "epoch": 1.9497695852534562, + "grad_norm": 1.3437504272827105, + "learning_rate": 3.4787190835324775e-09, + "loss": 0.7998695373535156, + "step": 8462 + }, + { + "epoch": 1.95, + "grad_norm": 1.359553043789141, + "learning_rate": 3.447040714259941e-09, + "loss": 0.8120161294937134, + "step": 8463 + }, + { + "epoch": 1.9502304147465437, + "grad_norm": 1.063781533705899, + "learning_rate": 3.415506993330153e-09, + "loss": 0.8062546849250793, + "step": 8464 + }, + { + "epoch": 1.9504608294930876, + "grad_norm": 1.3290963135655427, + "learning_rate": 3.384117925320229e-09, + "loss": 0.8100919723510742, + "step": 8465 + }, + { + "epoch": 1.9506912442396314, + "grad_norm": 1.410960677080016, + "learning_rate": 3.352873514786303e-09, + "loss": 0.7376535534858704, + "step": 8466 + }, + { + "epoch": 1.950921658986175, + "grad_norm": 1.1333962819853984, + "learning_rate": 3.321773766263303e-09, + "loss": 0.7534361481666565, + "step": 8467 + }, + { + "epoch": 1.951152073732719, + "grad_norm": 0.956942860373484, + "learning_rate": 3.290818684265506e-09, + "loss": 0.6914925575256348, + "step": 8468 + }, + { + "epoch": 1.9513824884792625, + "grad_norm": 1.40322423242457, + "learning_rate": 3.2600082732858746e-09, + "loss": 0.837024450302124, + "step": 8469 + }, + { + "epoch": 1.9516129032258065, + "grad_norm": 1.3077639635125993, + "learning_rate": 3.229342537796609e-09, + "loss": 0.7960337400436401, + "step": 8470 + }, + { + "epoch": 1.9518433179723502, + "grad_norm": 1.1044299774108808, + "learning_rate": 3.1988214822485928e-09, + "loss": 0.6611788868904114, + "step": 8471 + }, + { + "epoch": 1.952073732718894, + "grad_norm": 1.2652589643459276, + "learning_rate": 3.16844511107206e-09, + "loss": 0.8798158168792725, + "step": 8472 + }, + { + "epoch": 1.952304147465438, + "grad_norm": 1.3477135835069336, + "learning_rate": 3.1382134286761506e-09, + "loss": 0.790015459060669, + "step": 8473 + }, + { + "epoch": 1.9525345622119814, + "grad_norm": 1.062422263250462, + "learning_rate": 3.1081264394489103e-09, + "loss": 0.7676407098770142, + "step": 8474 + }, + { + "epoch": 1.9527649769585254, + "grad_norm": 1.1707572290080033, + "learning_rate": 3.07818414775729e-09, + "loss": 0.8213051557540894, + "step": 8475 + }, + { + "epoch": 1.952995391705069, + "grad_norm": 1.328203051872804, + "learning_rate": 3.048386557947591e-09, + "loss": 0.8909401893615723, + "step": 8476 + }, + { + "epoch": 1.9532258064516128, + "grad_norm": 1.2206551189591073, + "learning_rate": 3.0187336743446867e-09, + "loss": 0.838227391242981, + "step": 8477 + }, + { + "epoch": 1.9534562211981568, + "grad_norm": 1.1958685930192579, + "learning_rate": 2.9892255012528013e-09, + "loss": 0.7297696471214294, + "step": 8478 + }, + { + "epoch": 1.9536866359447005, + "grad_norm": 1.508389266534061, + "learning_rate": 2.9598620429550636e-09, + "loss": 1.0060585737228394, + "step": 8479 + }, + { + "epoch": 1.9539170506912442, + "grad_norm": 1.1858328009290373, + "learning_rate": 2.9306433037132873e-09, + "loss": 0.7812967300415039, + "step": 8480 + }, + { + "epoch": 1.9541474654377882, + "grad_norm": 1.196629989025656, + "learning_rate": 2.901569287768746e-09, + "loss": 0.7349315881729126, + "step": 8481 + }, + { + "epoch": 1.9543778801843317, + "grad_norm": 1.1580071941270487, + "learning_rate": 2.8726399993415085e-09, + "loss": 0.7083498239517212, + "step": 8482 + }, + { + "epoch": 1.9546082949308756, + "grad_norm": 1.3308451395414542, + "learning_rate": 2.8438554426304386e-09, + "loss": 0.7969732880592346, + "step": 8483 + }, + { + "epoch": 1.9548387096774194, + "grad_norm": 1.405840014033905, + "learning_rate": 2.815215621813749e-09, + "loss": 0.7701122164726257, + "step": 8484 + }, + { + "epoch": 1.955069124423963, + "grad_norm": 1.0487330945577633, + "learning_rate": 2.7867205410484485e-09, + "loss": 0.7323017120361328, + "step": 8485 + }, + { + "epoch": 1.955299539170507, + "grad_norm": 0.9842598310766136, + "learning_rate": 2.7583702044704504e-09, + "loss": 0.8357248306274414, + "step": 8486 + }, + { + "epoch": 1.9555299539170505, + "grad_norm": 1.4806137218761686, + "learning_rate": 2.7301646161947966e-09, + "loss": 0.8164674043655396, + "step": 8487 + }, + { + "epoch": 1.9557603686635945, + "grad_norm": 1.2641967325925645, + "learning_rate": 2.7021037803156566e-09, + "loss": 0.7972782850265503, + "step": 8488 + }, + { + "epoch": 1.9559907834101382, + "grad_norm": 1.2417679147004388, + "learning_rate": 2.6741877009058835e-09, + "loss": 0.864342987537384, + "step": 8489 + }, + { + "epoch": 1.956221198156682, + "grad_norm": 1.1067561191492752, + "learning_rate": 2.646416382017458e-09, + "loss": 0.7428402900695801, + "step": 8490 + }, + { + "epoch": 1.956451612903226, + "grad_norm": 1.3211414352422526, + "learning_rate": 2.618789827681378e-09, + "loss": 0.7164437770843506, + "step": 8491 + }, + { + "epoch": 1.9566820276497696, + "grad_norm": 1.153189225005644, + "learning_rate": 2.5913080419075473e-09, + "loss": 0.6997767686843872, + "step": 8492 + }, + { + "epoch": 1.9569124423963133, + "grad_norm": 1.2481992001614755, + "learning_rate": 2.563971028684886e-09, + "loss": 0.6399234533309937, + "step": 8493 + }, + { + "epoch": 1.9571428571428573, + "grad_norm": 1.1639751659112805, + "learning_rate": 2.536778791981553e-09, + "loss": 0.7642914056777954, + "step": 8494 + }, + { + "epoch": 1.9573732718894008, + "grad_norm": 1.218382512158835, + "learning_rate": 2.5097313357442806e-09, + "loss": 0.8284746408462524, + "step": 8495 + }, + { + "epoch": 1.9576036866359448, + "grad_norm": 1.2221524988832009, + "learning_rate": 2.4828286638989282e-09, + "loss": 0.6680238246917725, + "step": 8496 + }, + { + "epoch": 1.9578341013824885, + "grad_norm": 1.2965002342798193, + "learning_rate": 2.4560707803504834e-09, + "loss": 0.7621040344238281, + "step": 8497 + }, + { + "epoch": 1.9580645161290322, + "grad_norm": 1.2947556724815892, + "learning_rate": 2.4294576889827278e-09, + "loss": 0.7326159477233887, + "step": 8498 + }, + { + "epoch": 1.9582949308755762, + "grad_norm": 1.0656455780738308, + "learning_rate": 2.4029893936586833e-09, + "loss": 0.6496877670288086, + "step": 8499 + }, + { + "epoch": 1.9585253456221197, + "grad_norm": 1.241192579535759, + "learning_rate": 2.376665898220054e-09, + "loss": 0.665170431137085, + "step": 8500 + }, + { + "epoch": 1.9587557603686636, + "grad_norm": 1.2593646350179877, + "learning_rate": 2.3504872064876724e-09, + "loss": 0.7238261699676514, + "step": 8501 + }, + { + "epoch": 1.9589861751152073, + "grad_norm": 1.5709730629781664, + "learning_rate": 2.3244533222613882e-09, + "loss": 0.6696983575820923, + "step": 8502 + }, + { + "epoch": 1.959216589861751, + "grad_norm": 1.6011689537620306, + "learning_rate": 2.2985642493199563e-09, + "loss": 0.8414099216461182, + "step": 8503 + }, + { + "epoch": 1.959447004608295, + "grad_norm": 1.1680069988943498, + "learning_rate": 2.2728199914210377e-09, + "loss": 0.7390140295028687, + "step": 8504 + }, + { + "epoch": 1.9596774193548387, + "grad_norm": 1.1922421298842674, + "learning_rate": 2.247220552301532e-09, + "loss": 0.7910370826721191, + "step": 8505 + }, + { + "epoch": 1.9599078341013825, + "grad_norm": 1.2059164746419144, + "learning_rate": 2.2217659356771334e-09, + "loss": 0.8111266493797302, + "step": 8506 + }, + { + "epoch": 1.9601382488479264, + "grad_norm": 1.382769681983927, + "learning_rate": 2.1964561452425535e-09, + "loss": 0.8748809099197388, + "step": 8507 + }, + { + "epoch": 1.96036866359447, + "grad_norm": 1.4348415171969837, + "learning_rate": 2.1712911846714088e-09, + "loss": 0.726898193359375, + "step": 8508 + }, + { + "epoch": 1.9605990783410139, + "grad_norm": 0.903624770648156, + "learning_rate": 2.1462710576163335e-09, + "loss": 0.5221005082130432, + "step": 8509 + }, + { + "epoch": 1.9608294930875576, + "grad_norm": 0.9979524654583228, + "learning_rate": 2.1213957677090887e-09, + "loss": 0.7336875200271606, + "step": 8510 + }, + { + "epoch": 1.9610599078341013, + "grad_norm": 1.4570574984679434, + "learning_rate": 2.096665318560231e-09, + "loss": 0.9653327465057373, + "step": 8511 + }, + { + "epoch": 1.9612903225806453, + "grad_norm": 1.3910033326033395, + "learning_rate": 2.0720797137594448e-09, + "loss": 0.8309473991394043, + "step": 8512 + }, + { + "epoch": 1.9615207373271888, + "grad_norm": 1.250491052702372, + "learning_rate": 2.047638956874986e-09, + "loss": 0.7829124331474304, + "step": 8513 + }, + { + "epoch": 1.9617511520737327, + "grad_norm": 1.6063542888921636, + "learning_rate": 2.0233430514547955e-09, + "loss": 0.8399544358253479, + "step": 8514 + }, + { + "epoch": 1.9619815668202765, + "grad_norm": 1.2304488854915971, + "learning_rate": 1.999192001025163e-09, + "loss": 0.7827579975128174, + "step": 8515 + }, + { + "epoch": 1.9622119815668202, + "grad_norm": 1.7023781342726942, + "learning_rate": 1.9751858090916174e-09, + "loss": 0.8617441654205322, + "step": 8516 + }, + { + "epoch": 1.9624423963133641, + "grad_norm": 1.124873706648068, + "learning_rate": 1.951324479138594e-09, + "loss": 0.758098840713501, + "step": 8517 + }, + { + "epoch": 1.9626728110599079, + "grad_norm": 1.407820551284048, + "learning_rate": 1.927608014629656e-09, + "loss": 0.738059937953949, + "step": 8518 + }, + { + "epoch": 1.9629032258064516, + "grad_norm": 1.2924313700222672, + "learning_rate": 1.9040364190070492e-09, + "loss": 0.6286636590957642, + "step": 8519 + }, + { + "epoch": 1.9631336405529956, + "grad_norm": 1.4040969276884698, + "learning_rate": 1.88060969569237e-09, + "loss": 0.764518141746521, + "step": 8520 + }, + { + "epoch": 1.963364055299539, + "grad_norm": 0.9848782890607348, + "learning_rate": 1.8573278480857878e-09, + "loss": 0.775516152381897, + "step": 8521 + }, + { + "epoch": 1.963594470046083, + "grad_norm": 1.2592904992793421, + "learning_rate": 1.8341908795665994e-09, + "loss": 0.8513185977935791, + "step": 8522 + }, + { + "epoch": 1.9638248847926267, + "grad_norm": 1.4423039825526616, + "learning_rate": 1.8111987934933404e-09, + "loss": 0.7300710082054138, + "step": 8523 + }, + { + "epoch": 1.9640552995391705, + "grad_norm": 1.1896167974085796, + "learning_rate": 1.788351593203119e-09, + "loss": 0.7346746921539307, + "step": 8524 + }, + { + "epoch": 1.9642857142857144, + "grad_norm": 1.3610028359172472, + "learning_rate": 1.7656492820121715e-09, + "loss": 0.8231781721115112, + "step": 8525 + }, + { + "epoch": 1.964516129032258, + "grad_norm": 1.2672154264769777, + "learning_rate": 1.743091863215751e-09, + "loss": 0.6972112655639648, + "step": 8526 + }, + { + "epoch": 1.9647465437788019, + "grad_norm": 1.013160541626117, + "learning_rate": 1.720679340088016e-09, + "loss": 0.6512203812599182, + "step": 8527 + }, + { + "epoch": 1.9649769585253456, + "grad_norm": 1.220658103943082, + "learning_rate": 1.698411715882253e-09, + "loss": 0.6755591630935669, + "step": 8528 + }, + { + "epoch": 1.9652073732718893, + "grad_norm": 1.115552383506669, + "learning_rate": 1.6762889938303215e-09, + "loss": 0.6858727335929871, + "step": 8529 + }, + { + "epoch": 1.9654377880184333, + "grad_norm": 1.1810577023934496, + "learning_rate": 1.6543111771434303e-09, + "loss": 0.7820768356323242, + "step": 8530 + }, + { + "epoch": 1.965668202764977, + "grad_norm": 1.512690235242737, + "learning_rate": 1.6324782690116944e-09, + "loss": 0.7841604948043823, + "step": 8531 + }, + { + "epoch": 1.9658986175115207, + "grad_norm": 1.4015300039500524, + "learning_rate": 1.6107902726040234e-09, + "loss": 0.8665674328804016, + "step": 8532 + }, + { + "epoch": 1.9661290322580647, + "grad_norm": 1.1307460450405855, + "learning_rate": 1.5892471910684547e-09, + "loss": 0.6764376163482666, + "step": 8533 + }, + { + "epoch": 1.9663594470046082, + "grad_norm": 1.4229790787582275, + "learning_rate": 1.5678490275319312e-09, + "loss": 0.8453094959259033, + "step": 8534 + }, + { + "epoch": 1.9665898617511521, + "grad_norm": 1.0573142140796512, + "learning_rate": 1.546595785100413e-09, + "loss": 0.7798272371292114, + "step": 8535 + }, + { + "epoch": 1.9668202764976959, + "grad_norm": 1.1791892730982974, + "learning_rate": 1.5254874668586548e-09, + "loss": 0.7426424026489258, + "step": 8536 + }, + { + "epoch": 1.9670506912442396, + "grad_norm": 1.1309739514060748, + "learning_rate": 1.5045240758706501e-09, + "loss": 0.8443984985351562, + "step": 8537 + }, + { + "epoch": 1.9672811059907835, + "grad_norm": 1.1053257066980806, + "learning_rate": 1.4837056151790762e-09, + "loss": 0.8439072370529175, + "step": 8538 + }, + { + "epoch": 1.967511520737327, + "grad_norm": 1.4135182916864908, + "learning_rate": 1.463032087805849e-09, + "loss": 0.8307704925537109, + "step": 8539 + }, + { + "epoch": 1.967741935483871, + "grad_norm": 1.1593054366438007, + "learning_rate": 1.442503496751568e-09, + "loss": 0.678236722946167, + "step": 8540 + }, + { + "epoch": 1.9679723502304147, + "grad_norm": 1.3372006359269073, + "learning_rate": 1.4221198449960724e-09, + "loss": 0.7072663307189941, + "step": 8541 + }, + { + "epoch": 1.9682027649769585, + "grad_norm": 1.194618240695654, + "learning_rate": 1.4018811354977732e-09, + "loss": 0.7825980186462402, + "step": 8542 + }, + { + "epoch": 1.9684331797235024, + "grad_norm": 1.8366711172437336, + "learning_rate": 1.3817873711945426e-09, + "loss": 0.786361813545227, + "step": 8543 + }, + { + "epoch": 1.9686635944700461, + "grad_norm": 1.6047169504491765, + "learning_rate": 1.3618385550029365e-09, + "loss": 1.00287926197052, + "step": 8544 + }, + { + "epoch": 1.9688940092165899, + "grad_norm": 1.336810745652672, + "learning_rate": 1.3420346898183054e-09, + "loss": 0.7320775389671326, + "step": 8545 + }, + { + "epoch": 1.9691244239631336, + "grad_norm": 1.0018804515064612, + "learning_rate": 1.322375778515461e-09, + "loss": 0.7127507925033569, + "step": 8546 + }, + { + "epoch": 1.9693548387096773, + "grad_norm": 1.4124185296399752, + "learning_rate": 1.3028618239475652e-09, + "loss": 0.818395733833313, + "step": 8547 + }, + { + "epoch": 1.9695852534562213, + "grad_norm": 1.2063998497880193, + "learning_rate": 1.2834928289472413e-09, + "loss": 0.6384972929954529, + "step": 8548 + }, + { + "epoch": 1.969815668202765, + "grad_norm": 1.240783999344712, + "learning_rate": 1.2642687963256849e-09, + "loss": 0.7358517646789551, + "step": 8549 + }, + { + "epoch": 1.9700460829493087, + "grad_norm": 1.1083546443376424, + "learning_rate": 1.2451897288734414e-09, + "loss": 0.7311068773269653, + "step": 8550 + }, + { + "epoch": 1.9702764976958527, + "grad_norm": 1.5415338816809878, + "learning_rate": 1.2262556293597403e-09, + "loss": 0.8390932083129883, + "step": 8551 + }, + { + "epoch": 1.9705069124423962, + "grad_norm": 1.2045586519715463, + "learning_rate": 1.2074665005328277e-09, + "loss": 0.8114689588546753, + "step": 8552 + }, + { + "epoch": 1.9707373271889401, + "grad_norm": 1.4445688810441233, + "learning_rate": 1.1888223451199665e-09, + "loss": 1.0044716596603394, + "step": 8553 + }, + { + "epoch": 1.9709677419354839, + "grad_norm": 1.2243432992298795, + "learning_rate": 1.170323165827214e-09, + "loss": 0.7566370368003845, + "step": 8554 + }, + { + "epoch": 1.9711981566820276, + "grad_norm": 1.2230365473762954, + "learning_rate": 1.1519689653397557e-09, + "loss": 0.7543225288391113, + "step": 8555 + }, + { + "epoch": 1.9714285714285715, + "grad_norm": 1.306226883529119, + "learning_rate": 1.1337597463217941e-09, + "loss": 0.8291902542114258, + "step": 8556 + }, + { + "epoch": 1.9716589861751153, + "grad_norm": 1.1360827313333892, + "learning_rate": 1.1156955114162147e-09, + "loss": 0.7363135814666748, + "step": 8557 + }, + { + "epoch": 1.971889400921659, + "grad_norm": 1.102255040931488, + "learning_rate": 1.0977762632451427e-09, + "loss": 0.7180813550949097, + "step": 8558 + }, + { + "epoch": 1.9721198156682027, + "grad_norm": 1.1849465839861355, + "learning_rate": 1.0800020044093861e-09, + "loss": 0.7220569849014282, + "step": 8559 + }, + { + "epoch": 1.9723502304147464, + "grad_norm": 1.2915012101962247, + "learning_rate": 1.0623727374889925e-09, + "loss": 0.8839110136032104, + "step": 8560 + }, + { + "epoch": 1.9725806451612904, + "grad_norm": 1.2553727673767463, + "learning_rate": 1.0448884650426926e-09, + "loss": 0.7210807800292969, + "step": 8561 + }, + { + "epoch": 1.9728110599078341, + "grad_norm": 1.3474393893445982, + "learning_rate": 1.0275491896084565e-09, + "loss": 0.6993537545204163, + "step": 8562 + }, + { + "epoch": 1.9730414746543778, + "grad_norm": 1.0591927963671788, + "learning_rate": 1.0103549137030486e-09, + "loss": 0.6951562166213989, + "step": 8563 + }, + { + "epoch": 1.9732718894009218, + "grad_norm": 1.0760064093903359, + "learning_rate": 9.933056398220285e-10, + "loss": 0.855778694152832, + "step": 8564 + }, + { + "epoch": 1.9735023041474653, + "grad_norm": 1.3238204379730676, + "learning_rate": 9.76401370440194e-10, + "loss": 0.8461301326751709, + "step": 8565 + }, + { + "epoch": 1.9737327188940093, + "grad_norm": 1.0765880280550415, + "learning_rate": 9.596421080112493e-10, + "loss": 0.6144053936004639, + "step": 8566 + }, + { + "epoch": 1.973963133640553, + "grad_norm": 1.226899728476588, + "learning_rate": 9.430278549675818e-10, + "loss": 0.6623581647872925, + "step": 8567 + }, + { + "epoch": 1.9741935483870967, + "grad_norm": 1.6396403159587711, + "learning_rate": 9.265586137209292e-10, + "loss": 0.9540686011314392, + "step": 8568 + }, + { + "epoch": 1.9744239631336407, + "grad_norm": 1.1121119945854705, + "learning_rate": 9.102343866616014e-10, + "loss": 0.7231987714767456, + "step": 8569 + }, + { + "epoch": 1.9746543778801844, + "grad_norm": 1.3771440446346792, + "learning_rate": 8.940551761592585e-10, + "loss": 0.7759320735931396, + "step": 8570 + }, + { + "epoch": 1.9748847926267281, + "grad_norm": 1.3995632478363096, + "learning_rate": 8.780209845621334e-10, + "loss": 0.8277846574783325, + "step": 8571 + }, + { + "epoch": 1.9751152073732718, + "grad_norm": 1.1615989785178322, + "learning_rate": 8.621318141974754e-10, + "loss": 0.7913431525230408, + "step": 8572 + }, + { + "epoch": 1.9753456221198156, + "grad_norm": 1.1262387789302248, + "learning_rate": 8.46387667371773e-10, + "loss": 0.7011829614639282, + "step": 8573 + }, + { + "epoch": 1.9755760368663595, + "grad_norm": 1.358126505769676, + "learning_rate": 8.30788546370198e-10, + "loss": 0.8762087821960449, + "step": 8574 + }, + { + "epoch": 1.9758064516129032, + "grad_norm": 1.5337262034773564, + "learning_rate": 8.153344534569396e-10, + "loss": 0.7944581508636475, + "step": 8575 + }, + { + "epoch": 1.976036866359447, + "grad_norm": 0.9763562202292912, + "learning_rate": 8.00025390875203e-10, + "loss": 0.7086907625198364, + "step": 8576 + }, + { + "epoch": 1.976267281105991, + "grad_norm": 1.3716397771498143, + "learning_rate": 7.848613608468779e-10, + "loss": 0.7263821959495544, + "step": 8577 + }, + { + "epoch": 1.9764976958525344, + "grad_norm": 1.0912146553836337, + "learning_rate": 7.698423655732034e-10, + "loss": 0.714054524898529, + "step": 8578 + }, + { + "epoch": 1.9767281105990784, + "grad_norm": 1.0671768990247028, + "learning_rate": 7.549684072341023e-10, + "loss": 0.817487359046936, + "step": 8579 + }, + { + "epoch": 1.976958525345622, + "grad_norm": 1.3039849886057633, + "learning_rate": 7.402394879885143e-10, + "loss": 0.7933021783828735, + "step": 8580 + }, + { + "epoch": 1.9771889400921658, + "grad_norm": 1.1473238275849764, + "learning_rate": 7.25655609974396e-10, + "loss": 0.8699008822441101, + "step": 8581 + }, + { + "epoch": 1.9774193548387098, + "grad_norm": 1.2250569758639698, + "learning_rate": 7.112167753083876e-10, + "loss": 0.804245114326477, + "step": 8582 + }, + { + "epoch": 1.9776497695852533, + "grad_norm": 1.322132271674899, + "learning_rate": 6.969229860863679e-10, + "loss": 0.8334434628486633, + "step": 8583 + }, + { + "epoch": 1.9778801843317972, + "grad_norm": 1.1368298808414594, + "learning_rate": 6.827742443831219e-10, + "loss": 0.7549147605895996, + "step": 8584 + }, + { + "epoch": 1.978110599078341, + "grad_norm": 0.9036184179111577, + "learning_rate": 6.687705522522291e-10, + "loss": 0.69701087474823, + "step": 8585 + }, + { + "epoch": 1.9783410138248847, + "grad_norm": 1.259028975685209, + "learning_rate": 6.549119117263969e-10, + "loss": 0.727588415145874, + "step": 8586 + }, + { + "epoch": 1.9785714285714286, + "grad_norm": 1.0967653076646233, + "learning_rate": 6.411983248171271e-10, + "loss": 0.7309392094612122, + "step": 8587 + }, + { + "epoch": 1.9788018433179724, + "grad_norm": 1.6515699626026994, + "learning_rate": 6.276297935149388e-10, + "loss": 0.8299658298492432, + "step": 8588 + }, + { + "epoch": 1.979032258064516, + "grad_norm": 1.362481943616663, + "learning_rate": 6.142063197892566e-10, + "loss": 0.9731055498123169, + "step": 8589 + }, + { + "epoch": 1.97926267281106, + "grad_norm": 1.1190783849934713, + "learning_rate": 6.009279055885219e-10, + "loss": 0.6292351484298706, + "step": 8590 + }, + { + "epoch": 1.9794930875576036, + "grad_norm": 1.2981523800262795, + "learning_rate": 5.877945528400818e-10, + "loss": 0.7881810665130615, + "step": 8591 + }, + { + "epoch": 1.9797235023041475, + "grad_norm": 1.2430793849512602, + "learning_rate": 5.748062634501894e-10, + "loss": 0.7910494804382324, + "step": 8592 + }, + { + "epoch": 1.9799539170506912, + "grad_norm": 1.3789958651744842, + "learning_rate": 5.619630393042252e-10, + "loss": 0.8255902528762817, + "step": 8593 + }, + { + "epoch": 1.980184331797235, + "grad_norm": 1.0908110861505123, + "learning_rate": 5.492648822660318e-10, + "loss": 0.788017749786377, + "step": 8594 + }, + { + "epoch": 1.980414746543779, + "grad_norm": 1.2052887418241187, + "learning_rate": 5.367117941791343e-10, + "loss": 0.8717716932296753, + "step": 8595 + }, + { + "epoch": 1.9806451612903224, + "grad_norm": 1.3810911920135494, + "learning_rate": 5.243037768652981e-10, + "loss": 0.7220178246498108, + "step": 8596 + }, + { + "epoch": 1.9808755760368664, + "grad_norm": 1.2221356933031184, + "learning_rate": 5.120408321256376e-10, + "loss": 0.7536830902099609, + "step": 8597 + }, + { + "epoch": 1.98110599078341, + "grad_norm": 1.13011497917934, + "learning_rate": 4.999229617401735e-10, + "loss": 0.7480939626693726, + "step": 8598 + }, + { + "epoch": 1.9813364055299538, + "grad_norm": 1.1029404069670388, + "learning_rate": 4.879501674676101e-10, + "loss": 0.7168867588043213, + "step": 8599 + }, + { + "epoch": 1.9815668202764978, + "grad_norm": 1.1019009005346911, + "learning_rate": 4.761224510460016e-10, + "loss": 0.8352792263031006, + "step": 8600 + }, + { + "epoch": 1.9817972350230415, + "grad_norm": 1.2827894099174693, + "learning_rate": 4.644398141919748e-10, + "loss": 0.6987372636795044, + "step": 8601 + }, + { + "epoch": 1.9820276497695852, + "grad_norm": 1.2120343684069002, + "learning_rate": 4.5290225860128426e-10, + "loss": 0.6844612956047058, + "step": 8602 + }, + { + "epoch": 1.9822580645161292, + "grad_norm": 1.4290404101727392, + "learning_rate": 4.4150978594859055e-10, + "loss": 0.7659348249435425, + "step": 8603 + }, + { + "epoch": 1.9824884792626727, + "grad_norm": 1.1012416889537506, + "learning_rate": 4.3026239788757077e-10, + "loss": 0.8163154125213623, + "step": 8604 + }, + { + "epoch": 1.9827188940092166, + "grad_norm": 1.3238497684740367, + "learning_rate": 4.191600960505859e-10, + "loss": 0.8688125610351562, + "step": 8605 + }, + { + "epoch": 1.9829493087557604, + "grad_norm": 1.13771312339099, + "learning_rate": 4.082028820493466e-10, + "loss": 0.8250670433044434, + "step": 8606 + }, + { + "epoch": 1.983179723502304, + "grad_norm": 1.1783821953258633, + "learning_rate": 3.973907574741364e-10, + "loss": 0.9378982782363892, + "step": 8607 + }, + { + "epoch": 1.983410138248848, + "grad_norm": 1.1593506126073094, + "learning_rate": 3.867237238943666e-10, + "loss": 0.8764913082122803, + "step": 8608 + }, + { + "epoch": 1.9836405529953915, + "grad_norm": 1.271012232850208, + "learning_rate": 3.762017828583541e-10, + "loss": 0.7690116763114929, + "step": 8609 + }, + { + "epoch": 1.9838709677419355, + "grad_norm": 1.2648955747200947, + "learning_rate": 3.6582493589332187e-10, + "loss": 0.6977133750915527, + "step": 8610 + }, + { + "epoch": 1.9841013824884792, + "grad_norm": 1.0674977135329127, + "learning_rate": 3.5559318450539835e-10, + "loss": 0.7362618446350098, + "step": 8611 + }, + { + "epoch": 1.984331797235023, + "grad_norm": 1.4639153789709758, + "learning_rate": 3.455065301798399e-10, + "loss": 0.7065306305885315, + "step": 8612 + }, + { + "epoch": 1.984562211981567, + "grad_norm": 1.2869199371326872, + "learning_rate": 3.355649743805866e-10, + "loss": 0.812393307685852, + "step": 8613 + }, + { + "epoch": 1.9847926267281106, + "grad_norm": 1.1854338312494677, + "learning_rate": 3.2576851855070644e-10, + "loss": 0.6947695016860962, + "step": 8614 + }, + { + "epoch": 1.9850230414746544, + "grad_norm": 1.2703269278379015, + "learning_rate": 3.161171641121729e-10, + "loss": 0.6745340824127197, + "step": 8615 + }, + { + "epoch": 1.9852534562211983, + "grad_norm": 1.2827625316731694, + "learning_rate": 3.0661091246575454e-10, + "loss": 0.7426450848579407, + "step": 8616 + }, + { + "epoch": 1.9854838709677418, + "grad_norm": 1.1091238879569632, + "learning_rate": 2.9724976499134745e-10, + "loss": 0.7769409418106079, + "step": 8617 + }, + { + "epoch": 1.9857142857142858, + "grad_norm": 1.3932431178326243, + "learning_rate": 2.8803372304775365e-10, + "loss": 0.9591978192329407, + "step": 8618 + }, + { + "epoch": 1.9859447004608295, + "grad_norm": 1.0484296609281079, + "learning_rate": 2.789627879725698e-10, + "loss": 0.7504953742027283, + "step": 8619 + }, + { + "epoch": 1.9861751152073732, + "grad_norm": 1.4071650291408113, + "learning_rate": 2.700369610825204e-10, + "loss": 0.8990021347999573, + "step": 8620 + }, + { + "epoch": 1.9864055299539172, + "grad_norm": 1.3278656398693938, + "learning_rate": 2.612562436731247e-10, + "loss": 0.786778450012207, + "step": 8621 + }, + { + "epoch": 1.9866359447004607, + "grad_norm": 1.3996268794778322, + "learning_rate": 2.526206370189188e-10, + "loss": 0.7387717366218567, + "step": 8622 + }, + { + "epoch": 1.9868663594470046, + "grad_norm": 1.1375614144189101, + "learning_rate": 2.4413014237323336e-10, + "loss": 0.7672144174575806, + "step": 8623 + }, + { + "epoch": 1.9870967741935484, + "grad_norm": 1.0342597373081839, + "learning_rate": 2.357847609686381e-10, + "loss": 0.6191907525062561, + "step": 8624 + }, + { + "epoch": 1.987327188940092, + "grad_norm": 1.1782057870810292, + "learning_rate": 2.2758449401638624e-10, + "loss": 0.7257785201072693, + "step": 8625 + }, + { + "epoch": 1.987557603686636, + "grad_norm": 1.0162904765762713, + "learning_rate": 2.195293427066369e-10, + "loss": 0.6997271776199341, + "step": 8626 + }, + { + "epoch": 1.9877880184331798, + "grad_norm": 1.0539587989000714, + "learning_rate": 2.1161930820878804e-10, + "loss": 0.7813891768455505, + "step": 8627 + }, + { + "epoch": 1.9880184331797235, + "grad_norm": 1.4503426709948117, + "learning_rate": 2.0385439167069917e-10, + "loss": 0.8003429174423218, + "step": 8628 + }, + { + "epoch": 1.9882488479262674, + "grad_norm": 1.0912355930233222, + "learning_rate": 1.962345942196908e-10, + "loss": 0.8020645380020142, + "step": 8629 + }, + { + "epoch": 1.988479262672811, + "grad_norm": 1.6862187526303312, + "learning_rate": 1.8875991696165604e-10, + "loss": 0.9189429879188538, + "step": 8630 + }, + { + "epoch": 1.988709677419355, + "grad_norm": 1.2154025001234743, + "learning_rate": 1.8143036098150487e-10, + "loss": 0.7399884462356567, + "step": 8631 + }, + { + "epoch": 1.9889400921658986, + "grad_norm": 1.346746600672021, + "learning_rate": 1.7424592734316402e-10, + "loss": 0.7725361585617065, + "step": 8632 + }, + { + "epoch": 1.9891705069124423, + "grad_norm": 1.2077720906172131, + "learning_rate": 1.6720661708946593e-10, + "loss": 0.7887094020843506, + "step": 8633 + }, + { + "epoch": 1.9894009216589863, + "grad_norm": 1.1656177751476533, + "learning_rate": 1.6031243124203786e-10, + "loss": 0.8007388114929199, + "step": 8634 + }, + { + "epoch": 1.9896313364055298, + "grad_norm": 1.1324445653667632, + "learning_rate": 1.5356337080174587e-10, + "loss": 0.6478462219238281, + "step": 8635 + }, + { + "epoch": 1.9898617511520738, + "grad_norm": 1.0566661119746916, + "learning_rate": 1.469594367480287e-10, + "loss": 0.8274422287940979, + "step": 8636 + }, + { + "epoch": 1.9900921658986175, + "grad_norm": 1.2865497814691733, + "learning_rate": 1.4050063003956391e-10, + "loss": 0.7919641733169556, + "step": 8637 + }, + { + "epoch": 1.9903225806451612, + "grad_norm": 1.1652783170900007, + "learning_rate": 1.3418695161382388e-10, + "loss": 0.7973719239234924, + "step": 8638 + }, + { + "epoch": 1.9905529953917052, + "grad_norm": 1.2951125509591672, + "learning_rate": 1.280184023870756e-10, + "loss": 0.8002075552940369, + "step": 8639 + }, + { + "epoch": 1.9907834101382489, + "grad_norm": 1.2388157581694845, + "learning_rate": 1.2199498325482506e-10, + "loss": 0.748448371887207, + "step": 8640 + }, + { + "epoch": 1.9910138248847926, + "grad_norm": 1.1203681158314, + "learning_rate": 1.1611669509137278e-10, + "loss": 0.7333977222442627, + "step": 8641 + }, + { + "epoch": 1.9912442396313366, + "grad_norm": 1.2174560450430658, + "learning_rate": 1.1038353874992524e-10, + "loss": 0.7760608196258545, + "step": 8642 + }, + { + "epoch": 1.99147465437788, + "grad_norm": 1.0029836343944154, + "learning_rate": 1.0479551506259455e-10, + "loss": 0.6129526495933533, + "step": 8643 + }, + { + "epoch": 1.991705069124424, + "grad_norm": 1.2201657055802861, + "learning_rate": 9.935262484062068e-11, + "loss": 0.68567955493927, + "step": 8644 + }, + { + "epoch": 1.9919354838709677, + "grad_norm": 1.1159731481420905, + "learning_rate": 9.405486887381631e-11, + "loss": 0.9042092561721802, + "step": 8645 + }, + { + "epoch": 1.9921658986175115, + "grad_norm": 1.1343470197220147, + "learning_rate": 8.890224793123291e-11, + "loss": 0.7143117189407349, + "step": 8646 + }, + { + "epoch": 1.9923963133640554, + "grad_norm": 1.1380121853465122, + "learning_rate": 8.389476276071672e-11, + "loss": 0.7486213445663452, + "step": 8647 + }, + { + "epoch": 1.992626728110599, + "grad_norm": 1.0074777840055806, + "learning_rate": 7.903241408924177e-11, + "loss": 0.8554232716560364, + "step": 8648 + }, + { + "epoch": 1.9928571428571429, + "grad_norm": 1.2464259863760472, + "learning_rate": 7.431520262246582e-11, + "loss": 0.6604819297790527, + "step": 8649 + }, + { + "epoch": 1.9930875576036866, + "grad_norm": 1.450236790683031, + "learning_rate": 6.974312904517443e-11, + "loss": 0.8032737970352173, + "step": 8650 + }, + { + "epoch": 1.9933179723502303, + "grad_norm": 1.417412341607897, + "learning_rate": 6.531619402083687e-11, + "loss": 0.7712494730949402, + "step": 8651 + }, + { + "epoch": 1.9935483870967743, + "grad_norm": 1.1501531132473004, + "learning_rate": 6.103439819216127e-11, + "loss": 0.7894617915153503, + "step": 8652 + }, + { + "epoch": 1.993778801843318, + "grad_norm": 1.585587469758744, + "learning_rate": 5.689774218065046e-11, + "loss": 0.8386135697364807, + "step": 8653 + }, + { + "epoch": 1.9940092165898617, + "grad_norm": 1.151150154599754, + "learning_rate": 5.290622658660204e-11, + "loss": 0.744853138923645, + "step": 8654 + }, + { + "epoch": 1.9942396313364057, + "grad_norm": 1.2019290228056547, + "learning_rate": 4.90598519894414e-11, + "loss": 0.7604823112487793, + "step": 8655 + }, + { + "epoch": 1.9944700460829492, + "grad_norm": 1.3088329290176663, + "learning_rate": 4.53586189474997e-11, + "loss": 0.7552424669265747, + "step": 8656 + }, + { + "epoch": 1.9947004608294931, + "grad_norm": 1.1999629002739178, + "learning_rate": 4.180252799801387e-11, + "loss": 0.9652698636054993, + "step": 8657 + }, + { + "epoch": 1.9949308755760369, + "grad_norm": 3.0754205014147553, + "learning_rate": 3.839157965712658e-11, + "loss": 0.9589856266975403, + "step": 8658 + }, + { + "epoch": 1.9951612903225806, + "grad_norm": 1.0149584356506736, + "learning_rate": 3.512577441988629e-11, + "loss": 0.6802269220352173, + "step": 8659 + }, + { + "epoch": 1.9953917050691246, + "grad_norm": 1.3402861462863225, + "learning_rate": 3.200511276035822e-11, + "loss": 0.8262367248535156, + "step": 8660 + }, + { + "epoch": 1.995622119815668, + "grad_norm": 1.166077707630556, + "learning_rate": 2.9029595131513372e-11, + "loss": 0.8353632688522339, + "step": 8661 + }, + { + "epoch": 1.995852534562212, + "grad_norm": 1.3494376018654042, + "learning_rate": 2.61992219652285e-11, + "loss": 0.8807231187820435, + "step": 8662 + }, + { + "epoch": 1.9960829493087557, + "grad_norm": 1.1520030195581032, + "learning_rate": 2.3513993672397148e-11, + "loss": 0.8394359350204468, + "step": 8663 + }, + { + "epoch": 1.9963133640552995, + "grad_norm": 0.9751046818624397, + "learning_rate": 2.0973910642707592e-11, + "loss": 0.8343399167060852, + "step": 8664 + }, + { + "epoch": 1.9965437788018434, + "grad_norm": 0.9230235584546375, + "learning_rate": 1.857897324475388e-11, + "loss": 0.7168834209442139, + "step": 8665 + }, + { + "epoch": 1.9967741935483871, + "grad_norm": 1.1469086275708407, + "learning_rate": 1.6329181826257866e-11, + "loss": 0.7825703620910645, + "step": 8666 + }, + { + "epoch": 1.9970046082949309, + "grad_norm": 1.2013625464128237, + "learning_rate": 1.4224536713847157e-11, + "loss": 0.6497002840042114, + "step": 8667 + }, + { + "epoch": 1.9972350230414746, + "grad_norm": 1.204271254016415, + "learning_rate": 1.2265038212944112e-11, + "loss": 0.8188776969909668, + "step": 8668 + }, + { + "epoch": 1.9974654377880183, + "grad_norm": 2.0423246677180056, + "learning_rate": 1.0450686607987869e-11, + "loss": 0.898658812046051, + "step": 8669 + }, + { + "epoch": 1.9976958525345623, + "grad_norm": 1.5435875726791675, + "learning_rate": 8.781482162212306e-12, + "loss": 0.8580871820449829, + "step": 8670 + }, + { + "epoch": 1.997926267281106, + "grad_norm": 1.3046658328904006, + "learning_rate": 7.25742511797911e-12, + "loss": 0.7657710313796997, + "step": 8671 + }, + { + "epoch": 1.9981566820276497, + "grad_norm": 1.4371880227275262, + "learning_rate": 5.87851569655573e-12, + "loss": 0.7881382703781128, + "step": 8672 + }, + { + "epoch": 1.9983870967741937, + "grad_norm": 1.3805751034431293, + "learning_rate": 4.644754098004356e-12, + "loss": 0.8711144924163818, + "step": 8673 + }, + { + "epoch": 1.9986175115207372, + "grad_norm": 1.1130398802574797, + "learning_rate": 3.5561405015149814e-12, + "loss": 0.6993192434310913, + "step": 8674 + }, + { + "epoch": 1.9988479262672811, + "grad_norm": 0.9709196628106886, + "learning_rate": 2.6126750650723452e-12, + "loss": 0.7348669767379761, + "step": 8675 + }, + { + "epoch": 1.9990783410138249, + "grad_norm": 1.0686467754804958, + "learning_rate": 1.8143579254559227e-12, + "loss": 0.7356513142585754, + "step": 8676 + }, + { + "epoch": 1.9993087557603686, + "grad_norm": 1.6789468220081696, + "learning_rate": 1.1611891986840206e-12, + "loss": 0.7969627380371094, + "step": 8677 + }, + { + "epoch": 1.9995391705069125, + "grad_norm": 1.3374302292197147, + "learning_rate": 6.531689795696848e-13, + "loss": 0.7247132062911987, + "step": 8678 + }, + { + "epoch": 1.9997695852534563, + "grad_norm": 1.3757463334176048, + "learning_rate": 2.902973418317245e-13, + "loss": 0.6177656650543213, + "step": 8679 + }, + { + "epoch": 2.0, + "grad_norm": 1.3149906641620008, + "learning_rate": 7.25743380947108e-14, + "loss": 0.8378380537033081, + "step": 8680 + }, + { + "epoch": 2.0, + "step": 8680, + "total_flos": 7249753014763520.0, + "train_loss": 0.8227015781100444, + "train_runtime": 44224.0669, + "train_samples_per_second": 0.785, + "train_steps_per_second": 0.196 + } + ], + "logging_steps": 1, + "max_steps": 8680, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 7249753014763520.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7eb191dd44f853b2edd49aafea231852c267845 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f95b396ac9a3c4ab0d50e403be4c8c0fd191fd2a0aac0b5d95c7c3b72c8501b +size 6968 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..8717102b381b71f7d3f079de5e31dd6b252afb97 Binary files /dev/null and b/training_loss.png differ